js_regex 3.7.2 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ec1645dcb85514957cbc0d1d08d973c44386e7ae554b83ceb56be807178a08d
4
- data.tar.gz: cebf15dc34ca2e2f6ed1e57cc9a4719ee79c652070d8d0b687dae9278ed18e8d
3
+ metadata.gz: 8971658980813740deb03ece3c5af5bfdfd7412f0630fc4a3f172e4c06b11c52
4
+ data.tar.gz: bdafa3639a230b1ec1ac4661828050d99339eb18ec1768fa2a6f1b5e69d95f1b
5
5
  SHA512:
6
- metadata.gz: 74eb573819cf814ca8c196dae4a6ddbb139b941947197cb115153ec749a89fe5189b1296693f86596ded4a882f81858b877b1514ff346289510dce351df8224f
7
- data.tar.gz: 07e5743c335d15bcadfd465df576e32a5b9c6c27918b16c0282f1ac19ecf5ba7634fdf6b9a528963742316ce1a15f174098934da61d2c0a8975f639e1827a4b0
6
+ metadata.gz: 13abdf7b41485194f05cce79751ca60e8b1f9fc864b17f58294650df9f9e485a889b9571d847bf564aa12b709fd572c53f27a5a2900e3dc8bfa765f522b58e62
7
+ data.tar.gz: 31941c0d7a4842fdea84d5f649f3df30c54e8da24e6dbf722cff3e48661ae89646117f66f33ee419c0fc0e393b11f284cb565751b4f77aa5eb0bbbc8d38d903d
@@ -1,8 +1,8 @@
1
1
  class JsRegex
2
2
  #
3
- # This class acts as a facade, passing a regex to the converters.
3
+ # This class acts as a facade, passing a Regexp to the Converters.
4
4
  #
5
- # ::of returns a source String, options String, and warnings Array.
5
+ # ::of returns a source String, options String, warnings Array, target String.
6
6
  #
7
7
  class Conversion
8
8
  require 'regexp_parser'
@@ -10,28 +10,33 @@ class JsRegex
10
10
  require_relative 'error'
11
11
  require_relative 'node'
12
12
  require_relative 'second_pass'
13
+ require_relative 'target'
13
14
 
14
15
  class << self
15
- def of(input, options: nil)
16
- source, warnings = convert_source(input)
17
- options_string = convert_options(input, options)
18
- [source, options_string, warnings]
16
+ def of(input, options: nil, target: Target::ES2009)
17
+ target = Target.cast(target)
18
+ source, warnings, extra_opts = convert_source(input, target)
19
+ options_string = convert_options(input, options, extra_opts)
20
+ [source, options_string, warnings, target]
19
21
  end
20
22
 
21
23
  private
22
24
 
23
- def convert_source(input)
25
+ def convert_source(input, target)
24
26
  tree = Regexp::Parser.parse(input)
25
- context = Converter::Context.new(case_insensitive_root: tree.i?)
27
+ context = Converter::Context.new(
28
+ case_insensitive_root: tree.i?,
29
+ target: target,
30
+ )
26
31
  converted_tree = Converter.convert(tree, context)
27
32
  final_tree = SecondPass.call(converted_tree)
28
- [final_tree.to_s, context.warnings]
33
+ [final_tree.to_s, context.warnings, context.required_options]
29
34
  rescue Regexp::Parser::Error => e
30
35
  raise e.extend(JsRegex::Error)
31
36
  end
32
37
 
33
- def convert_options(input, custom_options)
34
- options = custom_options.to_s.scan(/[gimuy]/)
38
+ def convert_options(input, custom_options, required_options)
39
+ options = custom_options.to_s.scan(/[gimsuy]/) + required_options
35
40
  if input.is_a?(Regexp) && (input.options & Regexp::IGNORECASE).nonzero?
36
41
  options << 'i'
37
42
  end
@@ -13,17 +13,39 @@ class JsRegex
13
13
  when :bol, :bos then '^'
14
14
  when :eol, :eos then '$'
15
15
  when :eos_ob_eol then '(?=\n?$)'
16
- when :word_boundary then pass_boundary_with_warning('\b')
17
- when :nonword_boundary then pass_boundary_with_warning('\B')
16
+ when :word_boundary then convert_boundary
17
+ when :nonword_boundary then convert_nonboundary
18
18
  else
19
19
  warn_of_unsupported_feature
20
20
  end
21
21
  end
22
22
 
23
- def pass_boundary_with_warning(boundary)
24
- warn_of("The anchor '#{boundary}' at index #{expression.ts} "\
25
- 'only works at ASCII word boundaries in JavaScript.')
26
- boundary
23
+ def convert_boundary
24
+ if context.es_2018_or_higher? && context.enable_u_option
25
+ BOUNDARY_EXPANSION
26
+ else
27
+ pass_boundary_with_warning
28
+ end
29
+ end
30
+
31
+ def convert_nonboundary
32
+ if context.es_2018_or_higher? && context.enable_u_option
33
+ NONBOUNDARY_EXPANSION
34
+ else
35
+ pass_boundary_with_warning
36
+ end
37
+ end
38
+
39
+ # This is an approximation to the word boundary behavior in Ruby, c.f.
40
+ # https://github.com/ruby/ruby/blob/08476c45/tool/enc-unicode.rb#L130
41
+ W = '\d\p{L}\p{M}\p{Pc}'
42
+ BOUNDARY_EXPANSION = "(?:(?<=[#{W}])(?=[^#{W}]|$)|(?<=[^#{W}]|^)(?=[#{W}]))"
43
+ NONBOUNDARY_EXPANSION = "(?<=[#{W}])(?=[#{W}])"
44
+
45
+ def pass_boundary_with_warning
46
+ warn_of("The anchor '#{data}' at index #{expression.ts} only works "\
47
+ 'at ASCII word boundaries with targets below ES2018".')
48
+ pass_through
27
49
  end
28
50
  end
29
51
  end
@@ -14,13 +14,24 @@ class JsRegex
14
14
  def convert_data
15
15
  case subtype
16
16
  when :lookahead, :nlookahead
17
- build_group(head: pass_through, capturing: false)
17
+ keep_as_is
18
+ when :lookbehind
19
+ return keep_as_is if context.es_2018_or_higher?
20
+
21
+ warn_of_unsupported_feature('lookbehind', min_target: Target::ES2018)
22
+ build_passive_group
18
23
  when :nlookbehind
19
- warn_of_unsupported_feature('negative lookbehind assertion')
20
- else # :lookbehind, ...
21
- build_unsupported_group
24
+ return keep_as_is if context.es_2018_or_higher?
25
+
26
+ warn_of_unsupported_feature('negative lookbehind', min_target: Target::ES2018)
27
+ else
28
+ warn_of_unsupported_feature
22
29
  end
23
30
  end
31
+
32
+ def keep_as_is
33
+ build_group(head: pass_through, capturing: false)
34
+ end
24
35
  end
25
36
  end
26
37
  end
@@ -10,16 +10,30 @@ class JsRegex
10
10
 
11
11
  def convert_data
12
12
  case subtype
13
- when :name_ref, :number, :number_ref, :number_rel_ref then convert_ref
14
- when :name_call, :number_call, :number_rel_call then convert_call
13
+ when :name_ref then convert_name_ref
14
+ when :number, :number_ref, :number_rel_ref then convert_to_plain_num_ref
15
+ when :name_call, :number_call, :number_rel_call then convert_call
15
16
  else # name_recursion_ref, number_recursion_ref, ...
16
17
  warn_of_unsupported_feature
17
18
  end
18
19
  end
19
20
 
20
- def convert_ref
21
- position = context.new_capturing_group_position(target_position)
22
- Node.new('\\', Node.new(position.to_s, type: :backref_num))
21
+ def convert_name_ref
22
+ if context.es_2018_or_higher?
23
+ # ES 2018+ supports named backrefs, but only the angled-bracket syntax
24
+ Node.new("\\k<#{expression.name}>", reference: new_position, type: :backref)
25
+ else
26
+ convert_to_plain_num_ref
27
+ end
28
+ end
29
+
30
+ def convert_to_plain_num_ref
31
+ position = new_position
32
+ Node.new("\\#{position}", reference: position, type: :backref)
33
+ end
34
+
35
+ def new_position
36
+ context.new_capturing_group_position(target_position)
23
37
  end
24
38
 
25
39
  def target_position
@@ -31,7 +45,10 @@ class JsRegex
31
45
  return warn_of_unsupported_feature('whole-pattern recursion')
32
46
  end
33
47
  context.increment_local_capturing_group_count
34
- convert_expression(expression.referenced_expression.unquantified_clone)
48
+ target_copy = expression.referenced_expression.unquantified_clone
49
+ # avoid "Duplicate capture group name" error in JS
50
+ target_copy.token = :capture if target_copy.is?(:named, :group)
51
+ convert_expression(target_copy)
35
52
  end
36
53
  end
37
54
  end
@@ -51,10 +51,14 @@ class JsRegex
51
51
  Converter.convert(expression, context)
52
52
  end
53
53
 
54
- def warn_of_unsupported_feature(description = nil)
54
+ def warn_of_unsupported_feature(description = nil, min_target: nil)
55
55
  description ||= "#{subtype} #{expression.type}".tr('_', ' ')
56
- full_desc = "#{description} '#{expression}'"
57
- warn_of("Dropped unsupported #{full_desc} at index #{expression.ts}")
56
+ full_text = "Dropped unsupported #{description} '#{expression}' "\
57
+ "at index #{expression.ts}"
58
+ if min_target
59
+ full_text += " (requires at least `target: '#{min_target}'`)"
60
+ end
61
+ warn_of(full_text)
58
62
  drop
59
63
  end
60
64
 
@@ -68,11 +72,11 @@ class JsRegex
68
72
  alias drop_without_warning drop
69
73
 
70
74
  def wrap_in_backrefed_lookahead(content)
71
- backref_num = context.capturing_group_count + 1
72
- backref_num_node = Node.new(backref_num.to_s, type: :backref_num)
75
+ number = context.capturing_group_count + 1
76
+ backref_node = Node.new("\\#{number}", reference: number, type: :backref)
73
77
  context.increment_local_capturing_group_count
74
78
  # an empty passive group (?:) is appended as literal digits may follow
75
- Node.new('(?=(', *content, '))\\', backref_num_node, '(?:)')
79
+ Node.new('(?=(', *content, '))', backref_node, '(?:)')
76
80
  end
77
81
  end
78
82
  end
@@ -10,12 +10,12 @@ class JsRegex
10
10
 
11
11
  def convert_data
12
12
  case subtype
13
- when :open then mark_conditional
13
+ when :open then mark_conditional_for_second_pass
14
14
  else warn_of_unsupported_feature
15
15
  end
16
16
  end
17
17
 
18
- def mark_conditional
18
+ def mark_conditional_for_second_pass
19
19
  reference = expression.referenced_expression.number
20
20
  node = Node.new('(?:', reference: reference, type: :conditional)
21
21
  expression.branches.each do |branch|
@@ -11,12 +11,36 @@ class JsRegex
11
11
  :in_atomic_group,
12
12
  :warnings
13
13
 
14
- def initialize(case_insensitive_root: false)
14
+ def initialize(case_insensitive_root: false, target: nil)
15
15
  self.added_capturing_groups_after_group = Hash.new(0)
16
16
  self.capturing_group_count = 0
17
17
  self.warnings = []
18
+ self.required_options_hash = {}
18
19
 
19
20
  self.case_insensitive_root = case_insensitive_root
21
+ self.target = target
22
+ end
23
+
24
+ # target context
25
+
26
+ def es_2015_or_higher?
27
+ target >= Target::ES2015
28
+ end
29
+
30
+ def es_2018_or_higher?
31
+ target >= Target::ES2018
32
+ end
33
+
34
+ # these methods allow appending options to the final Conversion output
35
+
36
+ def enable_u_option
37
+ return false unless es_2015_or_higher?
38
+
39
+ required_options_hash['u'] = true
40
+ end
41
+
42
+ def required_options
43
+ required_options_hash.keys
20
44
  end
21
45
 
22
46
  # group context
@@ -54,7 +78,9 @@ class JsRegex
54
78
 
55
79
  private
56
80
 
57
- attr_accessor :added_capturing_groups_after_group
81
+ attr_accessor :added_capturing_groups_after_group,
82
+ :required_options_hash,
83
+ :target
58
84
 
59
85
  attr_writer :capturing_group_count,
60
86
  :case_insensitive_root,
@@ -41,7 +41,7 @@ class JsRegex
41
41
  when :control, :meta_sequence
42
42
  unicode_escape_codepoint
43
43
  when :literal
44
- LiteralConverter.convert_data(expression.char)
44
+ LiteralConverter.convert_data(expression.char, context)
45
45
  when *ESCAPES_SHARED_BY_RUBY_AND_JS
46
46
  pass_through
47
47
  when :bell, :escape, :octal
@@ -52,11 +52,19 @@ class JsRegex
52
52
  end
53
53
 
54
54
  def convert_codepoint_list
55
- expression.chars.each_with_object(Node.new) do |char, node|
56
- node << LiteralConverter.convert_data(Regexp.escape(char))
55
+ if context.enable_u_option
56
+ split_codepoint_list
57
+ else
58
+ expression.chars.each_with_object(Node.new) do |char, node|
59
+ node << LiteralConverter.convert_data(Regexp.escape(char), context)
60
+ end
57
61
  end
58
62
  end
59
63
 
64
+ def split_codepoint_list
65
+ expression.codepoints.map { |cp| "\\u{#{cp.to_s(16).upcase}}" }.join
66
+ end
67
+
60
68
  def unicode_escape_codepoint
61
69
  "\\u#{expression.codepoint.to_s(16).upcase.rjust(4, '0')}"
62
70
  end
@@ -10,19 +10,30 @@ class JsRegex
10
10
 
11
11
  def convert_data
12
12
  case subtype
13
- when :capture, :named then build_group
13
+ when :capture then build_group
14
+ when :named then build_named_group
14
15
  when :atomic then emulate_atomic_group
15
16
  when :comment then drop_without_warning
16
17
  when :options, :options_switch then build_options_group
17
18
  when :passive then build_passive_group
18
19
  when :absence then build_absence_group_if_simple
19
- else build_unsupported_group
20
+ else warn_of_unsupported_feature
21
+ end
22
+ end
23
+
24
+ def build_named_group
25
+ if context.es_2018_or_higher?
26
+ # ES 2018+ supports named groups, but only the angled-bracket syntax
27
+ build_group(head: "(?<#{expression.name}>")
28
+ else
29
+ build_group
20
30
  end
21
31
  end
22
32
 
23
33
  def emulate_atomic_group
24
34
  if context.in_atomic_group
25
- build_unsupported_group('nested atomic group')
35
+ warn_of_unsupported_feature('nested atomic group')
36
+ build_passive_group
26
37
  else
27
38
  context.start_atomic_group
28
39
  result = wrap_in_backrefed_lookahead(convert_subexpressions)
@@ -68,11 +79,6 @@ class JsRegex
68
79
  build_group(head: head, tail: tail, capturing: false)
69
80
  end
70
81
 
71
- def build_unsupported_group(description = nil)
72
- warn_of_unsupported_feature(description)
73
- build_passive_group
74
- end
75
-
76
82
  def build_group(opts = {})
77
83
  head = opts[:head] || '('
78
84
  tail = opts[:tail] || ')'
@@ -0,0 +1,24 @@
1
+ require_relative 'base'
2
+
3
+ class JsRegex
4
+ module Converter
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class KeepConverter < JsRegex::Converter::Base
9
+ private
10
+
11
+ def convert_data
12
+ if context.es_2018_or_higher?
13
+ if expression.level.zero?
14
+ Node.new(type: :keep_mark) # mark for conversion in SecondPass
15
+ else
16
+ warn_of_unsupported_feature('nested keep mark')
17
+ end
18
+ else
19
+ warn_of_unsupported_feature('keep mark', min_target: Target::ES2018)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -7,11 +7,15 @@ class JsRegex
7
7
  #
8
8
  class LiteralConverter < JsRegex::Converter::Base
9
9
  class << self
10
- ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{FFFFF}]/
10
+ ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
11
11
 
12
- def convert_data(data)
12
+ def convert_data(data, context)
13
13
  if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
14
- convert_astral_data(data)
14
+ if context.enable_u_option
15
+ escape_incompatible_bmp_literals(data)
16
+ else
17
+ convert_astral_data(data)
18
+ end
15
19
  else
16
20
  escape_incompatible_bmp_literals(data)
17
21
  end
@@ -41,7 +45,7 @@ class JsRegex
41
45
  private
42
46
 
43
47
  def convert_data
44
- result = self.class.convert_data(data)
48
+ result = self.class.convert_data(data, context)
45
49
  if context.case_insensitive_root && !expression.case_insensitive?
46
50
  warn_of_unsupported_feature('nested case-sensitive literal')
47
51
  elsif !context.case_insensitive_root && expression.case_insensitive?
@@ -8,6 +8,16 @@ class JsRegex
8
8
  class MetaConverter < JsRegex::Converter::Base
9
9
  DOT_EXPANSION = '(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\n\uD800-\uDFFF])'
10
10
  ML_DOT_EXPANSION = '(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\uD800-\uDFFF])'
11
+ # Possible improvements for dot conversion:
12
+ #
13
+ # In ES2015, the 'u' flag allows dots to match astral chars. Unfortunately
14
+ # the dot keeps matching lone surrogates even with this flag, so the use
15
+ # of an expansion is still necessary to get the same behavior as in Ruby.
16
+ #
17
+ # ES2018 has the dotall flag 's', but it is tricky to use in conversions.
18
+ # 's' activates matching of BOTH astral chars and "\n", whereas the dot in
19
+ # Ruby doesn't match "\n" by default, and even with the 'm' flag set on
20
+ # the root, subexps might still exclude "\n" like so: /.(?-m:.)./m
11
21
 
12
22
  private
13
23
 
@@ -10,9 +10,24 @@ class JsRegex
10
10
  # codepoints matched by the property and build a set string from them.
11
11
  #
12
12
  class PropertyConverter < JsRegex::Converter::Base
13
+ # A map of normalized Ruby property names to names supported by ES2018+.
14
+ def self.map
15
+ @map ||= File.read("#{__dir__}/property_map.csv").scan(/(.+),(.+)/).to_h
16
+ end
17
+
13
18
  private
14
19
 
15
20
  def convert_data
21
+ if context.es_2018_or_higher? &&
22
+ (prop_name_in_js = self.class.map[subtype.to_s.tr('_', '')])
23
+ context.enable_u_option
24
+ "\\#{expression.negative? ? 'P' : 'p'}{#{prop_name_in_js}}"
25
+ else
26
+ build_character_set
27
+ end
28
+ end
29
+
30
+ def build_character_set
16
31
  content = CharacterSet.of_expression(expression)
17
32
 
18
33
  if expression.case_insensitive? && !context.case_insensitive_root
@@ -0,0 +1,171 @@
1
+ # THIS FILE IS GENERATED BY $ rake build_prop_map - DO NOT EDIT
2
+ ascii,ASCII
3
+ asciihexdigit,ASCII_Hex_Digit
4
+ adlam,Script=Adlam
5
+ anatolianhieroglyphs,Script=Anatolian_Hieroglyphs
6
+ armenian,Script=Armenian
7
+ avestan,Script=Avestan
8
+ bamum,Script=Bamum
9
+ bassavah,Script=Bassa_Vah
10
+ batak,Script=Batak
11
+ bengali,Script=Bengali
12
+ bhaiksuki,Script=Bhaiksuki
13
+ bidicontrol,Bidi_Control
14
+ bopomofo,Script=Bopomofo
15
+ braille,Script=Braille
16
+ buginese,Script=Buginese
17
+ buhid,Script=Buhid
18
+ carian,Script=Carian
19
+ caucasianalbanian,Script=Caucasian_Albanian
20
+ chakma,Script=Chakma
21
+ cham,Script=Cham
22
+ cherokee,Script=Cherokee
23
+ chorasmian,Script=Chorasmian
24
+ connectorpunctuation,Connector_Punctuation
25
+ control,Control
26
+ coptic,Script=Coptic
27
+ cuneiform,Script=Cuneiform
28
+ cypriot,Script=Cypriot
29
+ cyrillic,Script=Cyrillic
30
+ deprecated,Deprecated
31
+ deseret,Script=Deseret
32
+ devanagari,Script=Devanagari
33
+ divesakuru,Script=Dives_Akuru
34
+ dogra,Script=Dogra
35
+ duployan,Script=Duployan
36
+ egyptianhieroglyphs,Script=Egyptian_Hieroglyphs
37
+ elbasan,Script=Elbasan
38
+ elymaic,Script=Elymaic
39
+ emojicomponent,Emoji_Component
40
+ emojimodifier,Emoji_Modifier
41
+ enclosingmark,Enclosing_Mark
42
+ finalpunctuation,Final_Punctuation
43
+ georgian,Script=Georgian
44
+ gothic,Script=Gothic
45
+ grantha,Script=Grantha
46
+ greek,Script=Greek
47
+ gujarati,Script=Gujarati
48
+ gunjalagondi,Script=Gunjala_Gondi
49
+ gurmukhi,Script=Gurmukhi
50
+ hangul,Script=Hangul
51
+ hanifirohingya,Script=Hanifi_Rohingya
52
+ hanunoo,Script=Hanunoo
53
+ hatran,Script=Hatran
54
+ hebrew,Script=Hebrew
55
+ hexdigit,Hex_Digit
56
+ idsbinaryoperator,IDS_Binary_Operator
57
+ idstrinaryoperator,IDS_Trinary_Operator
58
+ imperialaramaic,Script=Imperial_Aramaic
59
+ initialpunctuation,Initial_Punctuation
60
+ inscriptionalpahlavi,Script=Inscriptional_Pahlavi
61
+ inscriptionalparthian,Script=Inscriptional_Parthian
62
+ javanese,Script=Javanese
63
+ joincontrol,Join_Control
64
+ kayahli,Script=Kayah_Li
65
+ kharoshthi,Script=Kharoshthi
66
+ khitansmallscript,Script=Khitan_Small_Script
67
+ khmer,Script=Khmer
68
+ khojki,Script=Khojki
69
+ khudawadi,Script=Khudawadi
70
+ lao,Script=Lao
71
+ lepcha,Script=Lepcha
72
+ letternumber,Letter_Number
73
+ limbu,Script=Limbu
74
+ lineseparator,Line_Separator
75
+ lineara,Script=Linear_A
76
+ linearb,Script=Linear_B
77
+ lisu,Script=Lisu
78
+ logicalorderexception,Logical_Order_Exception
79
+ lycian,Script=Lycian
80
+ lydian,Script=Lydian
81
+ mahajani,Script=Mahajani
82
+ makasar,Script=Makasar
83
+ malayalam,Script=Malayalam
84
+ mandaic,Script=Mandaic
85
+ manichaean,Script=Manichaean
86
+ marchen,Script=Marchen
87
+ masaramgondi,Script=Masaram_Gondi
88
+ math,Math
89
+ mathsymbol,Math_Symbol
90
+ medefaidrin,Script=Medefaidrin
91
+ meeteimayek,Script=Meetei_Mayek
92
+ mendekikakui,Script=Mende_Kikakui
93
+ meroiticcursive,Script=Meroitic_Cursive
94
+ meroitichieroglyphs,Script=Meroitic_Hieroglyphs
95
+ miao,Script=Miao
96
+ modi,Script=Modi
97
+ mro,Script=Mro
98
+ multani,Script=Multani
99
+ myanmar,Script=Myanmar
100
+ nabataean,Script=Nabataean
101
+ nandinagari,Script=Nandinagari
102
+ newtailue,Script=New_Tai_Lue
103
+ newa,Script=Newa
104
+ nko,Script=Nko
105
+ noncharactercodepoint,Noncharacter_Code_Point
106
+ nushu,Script=Nushu
107
+ nyiakengpuachuehmong,Script=Nyiakeng_Puachue_Hmong
108
+ ogham,Script=Ogham
109
+ olchiki,Script=Ol_Chiki
110
+ oldhungarian,Script=Old_Hungarian
111
+ olditalic,Script=Old_Italic
112
+ oldnortharabian,Script=Old_North_Arabian
113
+ oldpermic,Script=Old_Permic
114
+ oldpersian,Script=Old_Persian
115
+ oldsogdian,Script=Old_Sogdian
116
+ oldsoutharabian,Script=Old_South_Arabian
117
+ oldturkic,Script=Old_Turkic
118
+ oriya,Script=Oriya
119
+ osage,Script=Osage
120
+ osmanya,Script=Osmanya
121
+ othernumber,Other_Number
122
+ pahawhhmong,Script=Pahawh_Hmong
123
+ palmyrene,Script=Palmyrene
124
+ paragraphseparator,Paragraph_Separator
125
+ patternsyntax,Pattern_Syntax
126
+ patternwhitespace,Pattern_White_Space
127
+ paucinhau,Script=Pau_Cin_Hau
128
+ phagspa,Script=Phags_Pa
129
+ phoenician,Script=Phoenician
130
+ privateuse,Private_Use
131
+ psalterpahlavi,Script=Psalter_Pahlavi
132
+ quotationmark,Quotation_Mark
133
+ radical,Radical
134
+ regionalindicator,Regional_Indicator
135
+ rejang,Script=Rejang
136
+ runic,Script=Runic
137
+ samaritan,Script=Samaritan
138
+ saurashtra,Script=Saurashtra
139
+ separator,Separator
140
+ sharada,Script=Sharada
141
+ shavian,Script=Shavian
142
+ siddham,Script=Siddham
143
+ signwriting,Script=SignWriting
144
+ sinhala,Script=Sinhala
145
+ sogdian,Script=Sogdian
146
+ sorasompeng,Script=Sora_Sompeng
147
+ soyombo,Script=Soyombo
148
+ spaceseparator,Space_Separator
149
+ sundanese,Script=Sundanese
150
+ sylotinagri,Script=Syloti_Nagri
151
+ syriac,Script=Syriac
152
+ tagbanwa,Script=Tagbanwa
153
+ taile,Script=Tai_Le
154
+ taitham,Script=Tai_Tham
155
+ taiviet,Script=Tai_Viet
156
+ tamil,Script=Tamil
157
+ tangut,Script=Tangut
158
+ thaana,Script=Thaana
159
+ thai,Script=Thai
160
+ tibetan,Script=Tibetan
161
+ tifinagh,Script=Tifinagh
162
+ tirhuta,Script=Tirhuta
163
+ titlecaseletter,Titlecase_Letter
164
+ ugaritic,Script=Ugaritic
165
+ vai,Script=Vai
166
+ wancho,Script=Wancho
167
+ warangciti,Script=Warang_Citi
168
+ whitespace,White_Space
169
+ yezidi,Script=Yezidi
170
+ yi,Script=Yi
171
+ zanabazarsquare,Script=Zanabazar_Square
@@ -26,7 +26,12 @@ class JsRegex
26
26
  warn_of_unsupported_feature('nested case-sensitive set')
27
27
  end
28
28
 
29
- content.to_s_with_surrogate_ranges
29
+ if context.es_2015_or_higher?
30
+ context.enable_u_option if content.astral_part?
31
+ content.to_s(format: 'es6', in_brackets: true)
32
+ else
33
+ content.to_s_with_surrogate_ranges
34
+ end
30
35
  end
31
36
 
32
37
  def directly_compatible?
@@ -41,8 +46,8 @@ class JsRegex
41
46
  def child_directly_compatible?(exp)
42
47
  case exp.type
43
48
  when :literal
44
- # surrogate pair substitution needed if astral
45
- exp.text.ord <= 0xFFFF
49
+ # surrogate pair substitution needed on ES2009 if astral
50
+ exp.text.ord <= 0xFFFF || context.enable_u_option
46
51
  when :set
47
52
  # conversion needed for nested sets, intersections
48
53
  exp.token.equal?(:range)
@@ -6,9 +6,11 @@ class JsRegex
6
6
  # Template class implementation.
7
7
  #
8
8
  class TypeConverter < JsRegex::Converter::Base
9
- HEX_EXPANSION = '[0-9A-Fa-f]'
10
- NONHEX_EXPANSION = '[^0-9A-Fa-f]'
11
- LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
9
+ HEX_EXPANSION = '[0-9A-Fa-f]'
10
+ NONHEX_EXPANSION = '[^0-9A-Fa-f]'
11
+ ES2018_HEX_EXPANSION = '\p{AHex}'
12
+ ES2018_NONHEX_EXPANSION = '\P{AHex}'
13
+ LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
12
14
 
13
15
  def self.directly_compatible?(expression)
14
16
  case expression.token
@@ -23,8 +25,8 @@ class JsRegex
23
25
 
24
26
  def convert_data
25
27
  case subtype
26
- when :hex then HEX_EXPANSION
27
- when :nonhex then NONHEX_EXPANSION
28
+ when :hex then hex_expansion
29
+ when :nonhex then nonhex_expansion
28
30
  when :linebreak then LINEBREAK_EXPANSION
29
31
  when :digit, :space, :word
30
32
  return pass_through if self.class.directly_compatible?(expression)
@@ -37,6 +39,22 @@ class JsRegex
37
39
  end
38
40
  end
39
41
 
42
+ def hex_expansion
43
+ if context.es_2018_or_higher? && context.enable_u_option
44
+ ES2018_HEX_EXPANSION
45
+ else
46
+ HEX_EXPANSION
47
+ end
48
+ end
49
+
50
+ def nonhex_expansion
51
+ if context.es_2018_or_higher? && context.enable_u_option
52
+ ES2018_NONHEX_EXPANSION
53
+ else
54
+ NONHEX_EXPANSION
55
+ end
56
+ end
57
+
40
58
  def negative_set_substitution
41
59
  # ::of_expression returns an inverted set for negative expressions,
42
60
  # so we need to un-invert before wrapping in [^ and ]. Kinda lame.
@@ -13,6 +13,7 @@ class JsRegex
13
13
  expression: SubexpressionConverter,
14
14
  free_space: FreespaceConverter,
15
15
  group: GroupConverter,
16
+ keep: KeepConverter,
16
17
  literal: LiteralConverter,
17
18
  meta: MetaConverter,
18
19
  nonproperty: PropertyConverter,
data/lib/js_regex/node.rb CHANGED
@@ -9,10 +9,11 @@ class JsRegex
9
9
  attr_reader :children, :quantifier, :reference, :type
10
10
 
11
11
  TYPES = %i[
12
- backref_num
12
+ backref
13
13
  captured_group
14
14
  conditional
15
15
  dropped
16
+ keep_mark
16
17
  plain
17
18
  ].freeze
18
19
 
@@ -46,7 +47,7 @@ class JsRegex
46
47
  case type
47
48
  when :dropped
48
49
  ''
49
- when :backref_num, :captured_group, :plain
50
+ when :backref, :captured_group, :plain
50
51
  children.join << quantifier.to_s
51
52
  else
52
53
  raise TypeError.new(
@@ -59,6 +60,7 @@ class JsRegex
59
60
  self.children = attrs.fetch(:children) if attrs.key?(:children)
60
61
  self.quantifier = attrs.fetch(:quantifier) if attrs.key?(:quantifier)
61
62
  self.type = attrs.fetch(:type) if attrs.key?(:type)
63
+ self
62
64
  end
63
65
 
64
66
  private
@@ -6,12 +6,26 @@ class JsRegex
6
6
  module SecondPass
7
7
  class << self
8
8
  def call(tree)
9
+ substitute_root_level_keep_mark(tree)
9
10
  alternate_conditional_permutations(tree)
10
11
  tree
11
12
  end
12
13
 
13
14
  private
14
15
 
16
+ def substitute_root_level_keep_mark(tree)
17
+ keep_mark_index = nil
18
+ tree.children.each.with_index do |child, i|
19
+ break keep_mark_index = i if child.type == :keep_mark
20
+ end
21
+ return unless keep_mark_index
22
+
23
+ pre = tree.children[0...keep_mark_index]
24
+ post = tree.children[(keep_mark_index + 1)..-1]
25
+ lookbehind = Node.new('(?<=', *pre, ')')
26
+ tree.update(children: [lookbehind, *post])
27
+ end
28
+
15
29
  def alternate_conditional_permutations(tree)
16
30
  permutations = conditional_tree_permutations(tree)
17
31
  return if permutations.empty?
@@ -23,16 +37,16 @@ class JsRegex
23
37
  end
24
38
 
25
39
  def conditional_tree_permutations(tree)
26
- all_conds = conditions(tree)
27
- return [] if all_conds.empty?
40
+ conds = conditions(tree)
41
+ return [] if conds.empty?
28
42
 
29
43
  caps_per_branch = captured_group_count(tree)
30
44
 
31
- condition_permutations(all_conds).map.with_index do |truthy_conds, i|
45
+ condition_permutations(conds).map.with_index do |truthy_conds, i|
32
46
  tree_permutation = tree.clone
33
47
  # find referenced groups and conditionals and make one-sided
34
48
  crawl(tree_permutation) do |node|
35
- build_permutation(node, all_conds, truthy_conds, caps_per_branch, i)
49
+ build_permutation(node, conds, truthy_conds, caps_per_branch, i)
36
50
  end
37
51
  end
38
52
  end
@@ -63,16 +77,30 @@ class JsRegex
63
77
  end
64
78
  end
65
79
 
66
- def build_permutation(node, all_conds, truthy_conds, caps_per_branch, i)
80
+ def build_permutation(node, conds, truthy_conds, caps_per_branch, i)
67
81
  truthy = truthy_conds.include?(node.reference)
68
82
 
69
- if node.type.equal?(:captured_group) &&
70
- all_conds.include?(node.reference)
71
- adapt_referenced_group_to_permutation(node, truthy)
72
- elsif node.type.equal?(:conditional)
73
- adapt_conditional_to_permutation(node, truthy)
74
- elsif node.type.equal?(:backref_num)
83
+ case node.type
84
+ when :backref
85
+ # We cannot use named groups or backrefs in the conditional expansion,
86
+ # their repetition would cause a "Duplicate capture group name" error in JS.
87
+ node.update(children: [
88
+ node.children.first.sub(/k<.*>/, node.reference.to_s)
89
+ ])
90
+ # backref numbers need to be incremented for subsequent "branches"
75
91
  adapt_backref_to_permutation(node, caps_per_branch, i)
92
+ when :captured_group
93
+ # Remove name, c.f. :backref handling.
94
+ node.update(children: [
95
+ node.children.first.sub(/\?<.*>/, ''),
96
+ *node.children[1..-1]
97
+ ])
98
+ # if the group is referenced by any condition, modulate its quantity
99
+ if conds.include?(node.reference)
100
+ adapt_referenced_group_to_permutation(node, truthy)
101
+ end
102
+ when :conditional
103
+ adapt_conditional_to_permutation(node, truthy)
76
104
  end
77
105
  end
78
106
 
@@ -91,8 +119,8 @@ class JsRegex
91
119
  end
92
120
 
93
121
  def adapt_backref_to_permutation(backref_node, caps_per_branch, i)
94
- new_num = backref_node.children[0].to_i + caps_per_branch * i
95
- backref_node.update(children: [new_num.to_s])
122
+ new_num = backref_node.reference + caps_per_branch * i
123
+ backref_node.update(children: ["\\#{new_num}"])
96
124
  end
97
125
 
98
126
  def min_quantify(node)
@@ -0,0 +1,19 @@
1
+ class JsRegex
2
+ module Target
3
+ ES2009 = 'ES2009'
4
+ ES2015 = 'ES2015'
5
+ ES2018 = 'ES2018'
6
+ SUPPORTED = [ES2009, ES2015, ES2018].freeze
7
+
8
+ def self.cast(arg)
9
+ return ES2009 if arg.nil?
10
+
11
+ normalized_arg = arg.to_s.upcase
12
+ return normalized_arg if SUPPORTED.include?(normalized_arg)
13
+
14
+ raise ArgumentError.new(
15
+ "Unknown target: #{arg.inspect}. Try one of #{SUPPORTED}."
16
+ ).extend(JsRegex::Error)
17
+ end
18
+ end
19
+ end
@@ -1,3 +1,3 @@
1
1
  class JsRegex
2
- VERSION = '3.7.2'
2
+ VERSION = '3.8.0'
3
3
  end
data/lib/js_regex.rb CHANGED
@@ -12,10 +12,10 @@ class JsRegex
12
12
  require_relative File.join('js_regex', 'version')
13
13
  require 'json'
14
14
 
15
- attr_reader :source, :options, :warnings
15
+ attr_reader :source, :options, :warnings, :target
16
16
 
17
- def initialize(ruby_regex, options: nil)
18
- @source, @options, @warnings = Conversion.of(ruby_regex, options: options)
17
+ def initialize(ruby_regex, **kwargs)
18
+ @source, @options, @warnings, @target = Conversion.of(ruby_regex, **kwargs)
19
19
  end
20
20
 
21
21
  def to_h
@@ -30,8 +30,8 @@ class JsRegex
30
30
  "/#{source.empty? ? '(?:)' : source}/#{options}"
31
31
  end
32
32
 
33
- def self.new!(ruby_regex, options: nil)
34
- js_regex = new(ruby_regex, options: options)
33
+ def self.new!(ruby_regex, **kwargs)
34
+ js_regex = new(ruby_regex, **kwargs)
35
35
  if js_regex.warnings.any?
36
36
  raise StandardError.new(
37
37
  "Could not fully convert the given regex #{ruby_regex.inspect}:\n" +
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: js_regex
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.7.2
4
+ version: 3.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-27 00:00:00.000000000 Z
11
+ date: 2022-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: character_set
@@ -72,9 +72,11 @@ files:
72
72
  - lib/js_regex/converter/escape_converter.rb
73
73
  - lib/js_regex/converter/freespace_converter.rb
74
74
  - lib/js_regex/converter/group_converter.rb
75
+ - lib/js_regex/converter/keep_converter.rb
75
76
  - lib/js_regex/converter/literal_converter.rb
76
77
  - lib/js_regex/converter/meta_converter.rb
77
78
  - lib/js_regex/converter/property_converter.rb
79
+ - lib/js_regex/converter/property_map.csv
78
80
  - lib/js_regex/converter/set_converter.rb
79
81
  - lib/js_regex/converter/subexpression_converter.rb
80
82
  - lib/js_regex/converter/type_converter.rb
@@ -82,6 +84,7 @@ files:
82
84
  - lib/js_regex/error.rb
83
85
  - lib/js_regex/node.rb
84
86
  - lib/js_regex/second_pass.rb
87
+ - lib/js_regex/target.rb
85
88
  - lib/js_regex/version.rb
86
89
  homepage: https://github.com/jaynetics/js_regex
87
90
  licenses: