js_regex 3.7.2 → 3.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ec1645dcb85514957cbc0d1d08d973c44386e7ae554b83ceb56be807178a08d
4
- data.tar.gz: cebf15dc34ca2e2f6ed1e57cc9a4719ee79c652070d8d0b687dae9278ed18e8d
3
+ metadata.gz: 8971658980813740deb03ece3c5af5bfdfd7412f0630fc4a3f172e4c06b11c52
4
+ data.tar.gz: bdafa3639a230b1ec1ac4661828050d99339eb18ec1768fa2a6f1b5e69d95f1b
5
5
  SHA512:
6
- metadata.gz: 74eb573819cf814ca8c196dae4a6ddbb139b941947197cb115153ec749a89fe5189b1296693f86596ded4a882f81858b877b1514ff346289510dce351df8224f
7
- data.tar.gz: 07e5743c335d15bcadfd465df576e32a5b9c6c27918b16c0282f1ac19ecf5ba7634fdf6b9a528963742316ce1a15f174098934da61d2c0a8975f639e1827a4b0
6
+ metadata.gz: 13abdf7b41485194f05cce79751ca60e8b1f9fc864b17f58294650df9f9e485a889b9571d847bf564aa12b709fd572c53f27a5a2900e3dc8bfa765f522b58e62
7
+ data.tar.gz: 31941c0d7a4842fdea84d5f649f3df30c54e8da24e6dbf722cff3e48661ae89646117f66f33ee419c0fc0e393b11f284cb565751b4f77aa5eb0bbbc8d38d903d
@@ -1,8 +1,8 @@
1
1
  class JsRegex
2
2
  #
3
- # This class acts as a facade, passing a regex to the converters.
3
+ # This class acts as a facade, passing a Regexp to the Converters.
4
4
  #
5
- # ::of returns a source String, options String, and warnings Array.
5
+ # ::of returns a source String, options String, warnings Array, target String.
6
6
  #
7
7
  class Conversion
8
8
  require 'regexp_parser'
@@ -10,28 +10,33 @@ class JsRegex
10
10
  require_relative 'error'
11
11
  require_relative 'node'
12
12
  require_relative 'second_pass'
13
+ require_relative 'target'
13
14
 
14
15
  class << self
15
- def of(input, options: nil)
16
- source, warnings = convert_source(input)
17
- options_string = convert_options(input, options)
18
- [source, options_string, warnings]
16
+ def of(input, options: nil, target: Target::ES2009)
17
+ target = Target.cast(target)
18
+ source, warnings, extra_opts = convert_source(input, target)
19
+ options_string = convert_options(input, options, extra_opts)
20
+ [source, options_string, warnings, target]
19
21
  end
20
22
 
21
23
  private
22
24
 
23
- def convert_source(input)
25
+ def convert_source(input, target)
24
26
  tree = Regexp::Parser.parse(input)
25
- context = Converter::Context.new(case_insensitive_root: tree.i?)
27
+ context = Converter::Context.new(
28
+ case_insensitive_root: tree.i?,
29
+ target: target,
30
+ )
26
31
  converted_tree = Converter.convert(tree, context)
27
32
  final_tree = SecondPass.call(converted_tree)
28
- [final_tree.to_s, context.warnings]
33
+ [final_tree.to_s, context.warnings, context.required_options]
29
34
  rescue Regexp::Parser::Error => e
30
35
  raise e.extend(JsRegex::Error)
31
36
  end
32
37
 
33
- def convert_options(input, custom_options)
34
- options = custom_options.to_s.scan(/[gimuy]/)
38
+ def convert_options(input, custom_options, required_options)
39
+ options = custom_options.to_s.scan(/[gimsuy]/) + required_options
35
40
  if input.is_a?(Regexp) && (input.options & Regexp::IGNORECASE).nonzero?
36
41
  options << 'i'
37
42
  end
@@ -13,17 +13,39 @@ class JsRegex
13
13
  when :bol, :bos then '^'
14
14
  when :eol, :eos then '$'
15
15
  when :eos_ob_eol then '(?=\n?$)'
16
- when :word_boundary then pass_boundary_with_warning('\b')
17
- when :nonword_boundary then pass_boundary_with_warning('\B')
16
+ when :word_boundary then convert_boundary
17
+ when :nonword_boundary then convert_nonboundary
18
18
  else
19
19
  warn_of_unsupported_feature
20
20
  end
21
21
  end
22
22
 
23
- def pass_boundary_with_warning(boundary)
24
- warn_of("The anchor '#{boundary}' at index #{expression.ts} "\
25
- 'only works at ASCII word boundaries in JavaScript.')
26
- boundary
23
+ def convert_boundary
24
+ if context.es_2018_or_higher? && context.enable_u_option
25
+ BOUNDARY_EXPANSION
26
+ else
27
+ pass_boundary_with_warning
28
+ end
29
+ end
30
+
31
+ def convert_nonboundary
32
+ if context.es_2018_or_higher? && context.enable_u_option
33
+ NONBOUNDARY_EXPANSION
34
+ else
35
+ pass_boundary_with_warning
36
+ end
37
+ end
38
+
39
+ # This is an approximation to the word boundary behavior in Ruby, c.f.
40
+ # https://github.com/ruby/ruby/blob/08476c45/tool/enc-unicode.rb#L130
41
+ W = '\d\p{L}\p{M}\p{Pc}'
42
+ BOUNDARY_EXPANSION = "(?:(?<=[#{W}])(?=[^#{W}]|$)|(?<=[^#{W}]|^)(?=[#{W}]))"
43
+ NONBOUNDARY_EXPANSION = "(?<=[#{W}])(?=[#{W}])"
44
+
45
+ def pass_boundary_with_warning
46
+ warn_of("The anchor '#{data}' at index #{expression.ts} only works "\
47
+ 'at ASCII word boundaries with targets below ES2018".')
48
+ pass_through
27
49
  end
28
50
  end
29
51
  end
@@ -14,13 +14,24 @@ class JsRegex
14
14
  def convert_data
15
15
  case subtype
16
16
  when :lookahead, :nlookahead
17
- build_group(head: pass_through, capturing: false)
17
+ keep_as_is
18
+ when :lookbehind
19
+ return keep_as_is if context.es_2018_or_higher?
20
+
21
+ warn_of_unsupported_feature('lookbehind', min_target: Target::ES2018)
22
+ build_passive_group
18
23
  when :nlookbehind
19
- warn_of_unsupported_feature('negative lookbehind assertion')
20
- else # :lookbehind, ...
21
- build_unsupported_group
24
+ return keep_as_is if context.es_2018_or_higher?
25
+
26
+ warn_of_unsupported_feature('negative lookbehind', min_target: Target::ES2018)
27
+ else
28
+ warn_of_unsupported_feature
22
29
  end
23
30
  end
31
+
32
+ def keep_as_is
33
+ build_group(head: pass_through, capturing: false)
34
+ end
24
35
  end
25
36
  end
26
37
  end
@@ -10,16 +10,30 @@ class JsRegex
10
10
 
11
11
  def convert_data
12
12
  case subtype
13
- when :name_ref, :number, :number_ref, :number_rel_ref then convert_ref
14
- when :name_call, :number_call, :number_rel_call then convert_call
13
+ when :name_ref then convert_name_ref
14
+ when :number, :number_ref, :number_rel_ref then convert_to_plain_num_ref
15
+ when :name_call, :number_call, :number_rel_call then convert_call
15
16
  else # name_recursion_ref, number_recursion_ref, ...
16
17
  warn_of_unsupported_feature
17
18
  end
18
19
  end
19
20
 
20
- def convert_ref
21
- position = context.new_capturing_group_position(target_position)
22
- Node.new('\\', Node.new(position.to_s, type: :backref_num))
21
+ def convert_name_ref
22
+ if context.es_2018_or_higher?
23
+ # ES 2018+ supports named backrefs, but only the angled-bracket syntax
24
+ Node.new("\\k<#{expression.name}>", reference: new_position, type: :backref)
25
+ else
26
+ convert_to_plain_num_ref
27
+ end
28
+ end
29
+
30
+ def convert_to_plain_num_ref
31
+ position = new_position
32
+ Node.new("\\#{position}", reference: position, type: :backref)
33
+ end
34
+
35
+ def new_position
36
+ context.new_capturing_group_position(target_position)
23
37
  end
24
38
 
25
39
  def target_position
@@ -31,7 +45,10 @@ class JsRegex
31
45
  return warn_of_unsupported_feature('whole-pattern recursion')
32
46
  end
33
47
  context.increment_local_capturing_group_count
34
- convert_expression(expression.referenced_expression.unquantified_clone)
48
+ target_copy = expression.referenced_expression.unquantified_clone
49
+ # avoid "Duplicate capture group name" error in JS
50
+ target_copy.token = :capture if target_copy.is?(:named, :group)
51
+ convert_expression(target_copy)
35
52
  end
36
53
  end
37
54
  end
@@ -51,10 +51,14 @@ class JsRegex
51
51
  Converter.convert(expression, context)
52
52
  end
53
53
 
54
- def warn_of_unsupported_feature(description = nil)
54
+ def warn_of_unsupported_feature(description = nil, min_target: nil)
55
55
  description ||= "#{subtype} #{expression.type}".tr('_', ' ')
56
- full_desc = "#{description} '#{expression}'"
57
- warn_of("Dropped unsupported #{full_desc} at index #{expression.ts}")
56
+ full_text = "Dropped unsupported #{description} '#{expression}' "\
57
+ "at index #{expression.ts}"
58
+ if min_target
59
+ full_text += " (requires at least `target: '#{min_target}'`)"
60
+ end
61
+ warn_of(full_text)
58
62
  drop
59
63
  end
60
64
 
@@ -68,11 +72,11 @@ class JsRegex
68
72
  alias drop_without_warning drop
69
73
 
70
74
  def wrap_in_backrefed_lookahead(content)
71
- backref_num = context.capturing_group_count + 1
72
- backref_num_node = Node.new(backref_num.to_s, type: :backref_num)
75
+ number = context.capturing_group_count + 1
76
+ backref_node = Node.new("\\#{number}", reference: number, type: :backref)
73
77
  context.increment_local_capturing_group_count
74
78
  # an empty passive group (?:) is appended as literal digits may follow
75
- Node.new('(?=(', *content, '))\\', backref_num_node, '(?:)')
79
+ Node.new('(?=(', *content, '))', backref_node, '(?:)')
76
80
  end
77
81
  end
78
82
  end
@@ -10,12 +10,12 @@ class JsRegex
10
10
 
11
11
  def convert_data
12
12
  case subtype
13
- when :open then mark_conditional
13
+ when :open then mark_conditional_for_second_pass
14
14
  else warn_of_unsupported_feature
15
15
  end
16
16
  end
17
17
 
18
- def mark_conditional
18
+ def mark_conditional_for_second_pass
19
19
  reference = expression.referenced_expression.number
20
20
  node = Node.new('(?:', reference: reference, type: :conditional)
21
21
  expression.branches.each do |branch|
@@ -11,12 +11,36 @@ class JsRegex
11
11
  :in_atomic_group,
12
12
  :warnings
13
13
 
14
- def initialize(case_insensitive_root: false)
14
+ def initialize(case_insensitive_root: false, target: nil)
15
15
  self.added_capturing_groups_after_group = Hash.new(0)
16
16
  self.capturing_group_count = 0
17
17
  self.warnings = []
18
+ self.required_options_hash = {}
18
19
 
19
20
  self.case_insensitive_root = case_insensitive_root
21
+ self.target = target
22
+ end
23
+
24
+ # target context
25
+
26
+ def es_2015_or_higher?
27
+ target >= Target::ES2015
28
+ end
29
+
30
+ def es_2018_or_higher?
31
+ target >= Target::ES2018
32
+ end
33
+
34
+ # these methods allow appending options to the final Conversion output
35
+
36
+ def enable_u_option
37
+ return false unless es_2015_or_higher?
38
+
39
+ required_options_hash['u'] = true
40
+ end
41
+
42
+ def required_options
43
+ required_options_hash.keys
20
44
  end
21
45
 
22
46
  # group context
@@ -54,7 +78,9 @@ class JsRegex
54
78
 
55
79
  private
56
80
 
57
- attr_accessor :added_capturing_groups_after_group
81
+ attr_accessor :added_capturing_groups_after_group,
82
+ :required_options_hash,
83
+ :target
58
84
 
59
85
  attr_writer :capturing_group_count,
60
86
  :case_insensitive_root,
@@ -41,7 +41,7 @@ class JsRegex
41
41
  when :control, :meta_sequence
42
42
  unicode_escape_codepoint
43
43
  when :literal
44
- LiteralConverter.convert_data(expression.char)
44
+ LiteralConverter.convert_data(expression.char, context)
45
45
  when *ESCAPES_SHARED_BY_RUBY_AND_JS
46
46
  pass_through
47
47
  when :bell, :escape, :octal
@@ -52,11 +52,19 @@ class JsRegex
52
52
  end
53
53
 
54
54
  def convert_codepoint_list
55
- expression.chars.each_with_object(Node.new) do |char, node|
56
- node << LiteralConverter.convert_data(Regexp.escape(char))
55
+ if context.enable_u_option
56
+ split_codepoint_list
57
+ else
58
+ expression.chars.each_with_object(Node.new) do |char, node|
59
+ node << LiteralConverter.convert_data(Regexp.escape(char), context)
60
+ end
57
61
  end
58
62
  end
59
63
 
64
+ def split_codepoint_list
65
+ expression.codepoints.map { |cp| "\\u{#{cp.to_s(16).upcase}}" }.join
66
+ end
67
+
60
68
  def unicode_escape_codepoint
61
69
  "\\u#{expression.codepoint.to_s(16).upcase.rjust(4, '0')}"
62
70
  end
@@ -10,19 +10,30 @@ class JsRegex
10
10
 
11
11
  def convert_data
12
12
  case subtype
13
- when :capture, :named then build_group
13
+ when :capture then build_group
14
+ when :named then build_named_group
14
15
  when :atomic then emulate_atomic_group
15
16
  when :comment then drop_without_warning
16
17
  when :options, :options_switch then build_options_group
17
18
  when :passive then build_passive_group
18
19
  when :absence then build_absence_group_if_simple
19
- else build_unsupported_group
20
+ else warn_of_unsupported_feature
21
+ end
22
+ end
23
+
24
+ def build_named_group
25
+ if context.es_2018_or_higher?
26
+ # ES 2018+ supports named groups, but only the angled-bracket syntax
27
+ build_group(head: "(?<#{expression.name}>")
28
+ else
29
+ build_group
20
30
  end
21
31
  end
22
32
 
23
33
  def emulate_atomic_group
24
34
  if context.in_atomic_group
25
- build_unsupported_group('nested atomic group')
35
+ warn_of_unsupported_feature('nested atomic group')
36
+ build_passive_group
26
37
  else
27
38
  context.start_atomic_group
28
39
  result = wrap_in_backrefed_lookahead(convert_subexpressions)
@@ -68,11 +79,6 @@ class JsRegex
68
79
  build_group(head: head, tail: tail, capturing: false)
69
80
  end
70
81
 
71
- def build_unsupported_group(description = nil)
72
- warn_of_unsupported_feature(description)
73
- build_passive_group
74
- end
75
-
76
82
  def build_group(opts = {})
77
83
  head = opts[:head] || '('
78
84
  tail = opts[:tail] || ')'
@@ -0,0 +1,24 @@
1
+ require_relative 'base'
2
+
3
+ class JsRegex
4
+ module Converter
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class KeepConverter < JsRegex::Converter::Base
9
+ private
10
+
11
+ def convert_data
12
+ if context.es_2018_or_higher?
13
+ if expression.level.zero?
14
+ Node.new(type: :keep_mark) # mark for conversion in SecondPass
15
+ else
16
+ warn_of_unsupported_feature('nested keep mark')
17
+ end
18
+ else
19
+ warn_of_unsupported_feature('keep mark', min_target: Target::ES2018)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -7,11 +7,15 @@ class JsRegex
7
7
  #
8
8
  class LiteralConverter < JsRegex::Converter::Base
9
9
  class << self
10
- ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{FFFFF}]/
10
+ ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
11
11
 
12
- def convert_data(data)
12
+ def convert_data(data, context)
13
13
  if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
14
- convert_astral_data(data)
14
+ if context.enable_u_option
15
+ escape_incompatible_bmp_literals(data)
16
+ else
17
+ convert_astral_data(data)
18
+ end
15
19
  else
16
20
  escape_incompatible_bmp_literals(data)
17
21
  end
@@ -41,7 +45,7 @@ class JsRegex
41
45
  private
42
46
 
43
47
  def convert_data
44
- result = self.class.convert_data(data)
48
+ result = self.class.convert_data(data, context)
45
49
  if context.case_insensitive_root && !expression.case_insensitive?
46
50
  warn_of_unsupported_feature('nested case-sensitive literal')
47
51
  elsif !context.case_insensitive_root && expression.case_insensitive?
@@ -8,6 +8,16 @@ class JsRegex
8
8
  class MetaConverter < JsRegex::Converter::Base
9
9
  DOT_EXPANSION = '(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\n\uD800-\uDFFF])'
10
10
  ML_DOT_EXPANSION = '(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\uD800-\uDFFF])'
11
+ # Possible improvements for dot conversion:
12
+ #
13
+ # In ES2015, the 'u' flag allows dots to match astral chars. Unfortunately
14
+ # the dot keeps matching lone surrogates even with this flag, so the use
15
+ # of an expansion is still necessary to get the same behavior as in Ruby.
16
+ #
17
+ # ES2018 has the dotall flag 's', but it is tricky to use in conversions.
18
+ # 's' activates matching of BOTH astral chars and "\n", whereas the dot in
19
+ # Ruby doesn't match "\n" by default, and even with the 'm' flag set on
20
+ # the root, subexps might still exclude "\n" like so: /.(?-m:.)./m
11
21
 
12
22
  private
13
23
 
@@ -10,9 +10,24 @@ class JsRegex
10
10
  # codepoints matched by the property and build a set string from them.
11
11
  #
12
12
  class PropertyConverter < JsRegex::Converter::Base
13
+ # A map of normalized Ruby property names to names supported by ES2018+.
14
+ def self.map
15
+ @map ||= File.read("#{__dir__}/property_map.csv").scan(/(.+),(.+)/).to_h
16
+ end
17
+
13
18
  private
14
19
 
15
20
  def convert_data
21
+ if context.es_2018_or_higher? &&
22
+ (prop_name_in_js = self.class.map[subtype.to_s.tr('_', '')])
23
+ context.enable_u_option
24
+ "\\#{expression.negative? ? 'P' : 'p'}{#{prop_name_in_js}}"
25
+ else
26
+ build_character_set
27
+ end
28
+ end
29
+
30
+ def build_character_set
16
31
  content = CharacterSet.of_expression(expression)
17
32
 
18
33
  if expression.case_insensitive? && !context.case_insensitive_root
@@ -0,0 +1,171 @@
1
+ # THIS FILE IS GENERATED BY $ rake build_prop_map - DO NOT EDIT
2
+ ascii,ASCII
3
+ asciihexdigit,ASCII_Hex_Digit
4
+ adlam,Script=Adlam
5
+ anatolianhieroglyphs,Script=Anatolian_Hieroglyphs
6
+ armenian,Script=Armenian
7
+ avestan,Script=Avestan
8
+ bamum,Script=Bamum
9
+ bassavah,Script=Bassa_Vah
10
+ batak,Script=Batak
11
+ bengali,Script=Bengali
12
+ bhaiksuki,Script=Bhaiksuki
13
+ bidicontrol,Bidi_Control
14
+ bopomofo,Script=Bopomofo
15
+ braille,Script=Braille
16
+ buginese,Script=Buginese
17
+ buhid,Script=Buhid
18
+ carian,Script=Carian
19
+ caucasianalbanian,Script=Caucasian_Albanian
20
+ chakma,Script=Chakma
21
+ cham,Script=Cham
22
+ cherokee,Script=Cherokee
23
+ chorasmian,Script=Chorasmian
24
+ connectorpunctuation,Connector_Punctuation
25
+ control,Control
26
+ coptic,Script=Coptic
27
+ cuneiform,Script=Cuneiform
28
+ cypriot,Script=Cypriot
29
+ cyrillic,Script=Cyrillic
30
+ deprecated,Deprecated
31
+ deseret,Script=Deseret
32
+ devanagari,Script=Devanagari
33
+ divesakuru,Script=Dives_Akuru
34
+ dogra,Script=Dogra
35
+ duployan,Script=Duployan
36
+ egyptianhieroglyphs,Script=Egyptian_Hieroglyphs
37
+ elbasan,Script=Elbasan
38
+ elymaic,Script=Elymaic
39
+ emojicomponent,Emoji_Component
40
+ emojimodifier,Emoji_Modifier
41
+ enclosingmark,Enclosing_Mark
42
+ finalpunctuation,Final_Punctuation
43
+ georgian,Script=Georgian
44
+ gothic,Script=Gothic
45
+ grantha,Script=Grantha
46
+ greek,Script=Greek
47
+ gujarati,Script=Gujarati
48
+ gunjalagondi,Script=Gunjala_Gondi
49
+ gurmukhi,Script=Gurmukhi
50
+ hangul,Script=Hangul
51
+ hanifirohingya,Script=Hanifi_Rohingya
52
+ hanunoo,Script=Hanunoo
53
+ hatran,Script=Hatran
54
+ hebrew,Script=Hebrew
55
+ hexdigit,Hex_Digit
56
+ idsbinaryoperator,IDS_Binary_Operator
57
+ idstrinaryoperator,IDS_Trinary_Operator
58
+ imperialaramaic,Script=Imperial_Aramaic
59
+ initialpunctuation,Initial_Punctuation
60
+ inscriptionalpahlavi,Script=Inscriptional_Pahlavi
61
+ inscriptionalparthian,Script=Inscriptional_Parthian
62
+ javanese,Script=Javanese
63
+ joincontrol,Join_Control
64
+ kayahli,Script=Kayah_Li
65
+ kharoshthi,Script=Kharoshthi
66
+ khitansmallscript,Script=Khitan_Small_Script
67
+ khmer,Script=Khmer
68
+ khojki,Script=Khojki
69
+ khudawadi,Script=Khudawadi
70
+ lao,Script=Lao
71
+ lepcha,Script=Lepcha
72
+ letternumber,Letter_Number
73
+ limbu,Script=Limbu
74
+ lineseparator,Line_Separator
75
+ lineara,Script=Linear_A
76
+ linearb,Script=Linear_B
77
+ lisu,Script=Lisu
78
+ logicalorderexception,Logical_Order_Exception
79
+ lycian,Script=Lycian
80
+ lydian,Script=Lydian
81
+ mahajani,Script=Mahajani
82
+ makasar,Script=Makasar
83
+ malayalam,Script=Malayalam
84
+ mandaic,Script=Mandaic
85
+ manichaean,Script=Manichaean
86
+ marchen,Script=Marchen
87
+ masaramgondi,Script=Masaram_Gondi
88
+ math,Math
89
+ mathsymbol,Math_Symbol
90
+ medefaidrin,Script=Medefaidrin
91
+ meeteimayek,Script=Meetei_Mayek
92
+ mendekikakui,Script=Mende_Kikakui
93
+ meroiticcursive,Script=Meroitic_Cursive
94
+ meroitichieroglyphs,Script=Meroitic_Hieroglyphs
95
+ miao,Script=Miao
96
+ modi,Script=Modi
97
+ mro,Script=Mro
98
+ multani,Script=Multani
99
+ myanmar,Script=Myanmar
100
+ nabataean,Script=Nabataean
101
+ nandinagari,Script=Nandinagari
102
+ newtailue,Script=New_Tai_Lue
103
+ newa,Script=Newa
104
+ nko,Script=Nko
105
+ noncharactercodepoint,Noncharacter_Code_Point
106
+ nushu,Script=Nushu
107
+ nyiakengpuachuehmong,Script=Nyiakeng_Puachue_Hmong
108
+ ogham,Script=Ogham
109
+ olchiki,Script=Ol_Chiki
110
+ oldhungarian,Script=Old_Hungarian
111
+ olditalic,Script=Old_Italic
112
+ oldnortharabian,Script=Old_North_Arabian
113
+ oldpermic,Script=Old_Permic
114
+ oldpersian,Script=Old_Persian
115
+ oldsogdian,Script=Old_Sogdian
116
+ oldsoutharabian,Script=Old_South_Arabian
117
+ oldturkic,Script=Old_Turkic
118
+ oriya,Script=Oriya
119
+ osage,Script=Osage
120
+ osmanya,Script=Osmanya
121
+ othernumber,Other_Number
122
+ pahawhhmong,Script=Pahawh_Hmong
123
+ palmyrene,Script=Palmyrene
124
+ paragraphseparator,Paragraph_Separator
125
+ patternsyntax,Pattern_Syntax
126
+ patternwhitespace,Pattern_White_Space
127
+ paucinhau,Script=Pau_Cin_Hau
128
+ phagspa,Script=Phags_Pa
129
+ phoenician,Script=Phoenician
130
+ privateuse,Private_Use
131
+ psalterpahlavi,Script=Psalter_Pahlavi
132
+ quotationmark,Quotation_Mark
133
+ radical,Radical
134
+ regionalindicator,Regional_Indicator
135
+ rejang,Script=Rejang
136
+ runic,Script=Runic
137
+ samaritan,Script=Samaritan
138
+ saurashtra,Script=Saurashtra
139
+ separator,Separator
140
+ sharada,Script=Sharada
141
+ shavian,Script=Shavian
142
+ siddham,Script=Siddham
143
+ signwriting,Script=SignWriting
144
+ sinhala,Script=Sinhala
145
+ sogdian,Script=Sogdian
146
+ sorasompeng,Script=Sora_Sompeng
147
+ soyombo,Script=Soyombo
148
+ spaceseparator,Space_Separator
149
+ sundanese,Script=Sundanese
150
+ sylotinagri,Script=Syloti_Nagri
151
+ syriac,Script=Syriac
152
+ tagbanwa,Script=Tagbanwa
153
+ taile,Script=Tai_Le
154
+ taitham,Script=Tai_Tham
155
+ taiviet,Script=Tai_Viet
156
+ tamil,Script=Tamil
157
+ tangut,Script=Tangut
158
+ thaana,Script=Thaana
159
+ thai,Script=Thai
160
+ tibetan,Script=Tibetan
161
+ tifinagh,Script=Tifinagh
162
+ tirhuta,Script=Tirhuta
163
+ titlecaseletter,Titlecase_Letter
164
+ ugaritic,Script=Ugaritic
165
+ vai,Script=Vai
166
+ wancho,Script=Wancho
167
+ warangciti,Script=Warang_Citi
168
+ whitespace,White_Space
169
+ yezidi,Script=Yezidi
170
+ yi,Script=Yi
171
+ zanabazarsquare,Script=Zanabazar_Square
@@ -26,7 +26,12 @@ class JsRegex
26
26
  warn_of_unsupported_feature('nested case-sensitive set')
27
27
  end
28
28
 
29
- content.to_s_with_surrogate_ranges
29
+ if context.es_2015_or_higher?
30
+ context.enable_u_option if content.astral_part?
31
+ content.to_s(format: 'es6', in_brackets: true)
32
+ else
33
+ content.to_s_with_surrogate_ranges
34
+ end
30
35
  end
31
36
 
32
37
  def directly_compatible?
@@ -41,8 +46,8 @@ class JsRegex
41
46
  def child_directly_compatible?(exp)
42
47
  case exp.type
43
48
  when :literal
44
- # surrogate pair substitution needed if astral
45
- exp.text.ord <= 0xFFFF
49
+ # surrogate pair substitution needed on ES2009 if astral
50
+ exp.text.ord <= 0xFFFF || context.enable_u_option
46
51
  when :set
47
52
  # conversion needed for nested sets, intersections
48
53
  exp.token.equal?(:range)
@@ -6,9 +6,11 @@ class JsRegex
6
6
  # Template class implementation.
7
7
  #
8
8
  class TypeConverter < JsRegex::Converter::Base
9
- HEX_EXPANSION = '[0-9A-Fa-f]'
10
- NONHEX_EXPANSION = '[^0-9A-Fa-f]'
11
- LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
9
+ HEX_EXPANSION = '[0-9A-Fa-f]'
10
+ NONHEX_EXPANSION = '[^0-9A-Fa-f]'
11
+ ES2018_HEX_EXPANSION = '\p{AHex}'
12
+ ES2018_NONHEX_EXPANSION = '\P{AHex}'
13
+ LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
12
14
 
13
15
  def self.directly_compatible?(expression)
14
16
  case expression.token
@@ -23,8 +25,8 @@ class JsRegex
23
25
 
24
26
  def convert_data
25
27
  case subtype
26
- when :hex then HEX_EXPANSION
27
- when :nonhex then NONHEX_EXPANSION
28
+ when :hex then hex_expansion
29
+ when :nonhex then nonhex_expansion
28
30
  when :linebreak then LINEBREAK_EXPANSION
29
31
  when :digit, :space, :word
30
32
  return pass_through if self.class.directly_compatible?(expression)
@@ -37,6 +39,22 @@ class JsRegex
37
39
  end
38
40
  end
39
41
 
42
+ def hex_expansion
43
+ if context.es_2018_or_higher? && context.enable_u_option
44
+ ES2018_HEX_EXPANSION
45
+ else
46
+ HEX_EXPANSION
47
+ end
48
+ end
49
+
50
+ def nonhex_expansion
51
+ if context.es_2018_or_higher? && context.enable_u_option
52
+ ES2018_NONHEX_EXPANSION
53
+ else
54
+ NONHEX_EXPANSION
55
+ end
56
+ end
57
+
40
58
  def negative_set_substitution
41
59
  # ::of_expression returns an inverted set for negative expressions,
42
60
  # so we need to un-invert before wrapping in [^ and ]. Kinda lame.
@@ -13,6 +13,7 @@ class JsRegex
13
13
  expression: SubexpressionConverter,
14
14
  free_space: FreespaceConverter,
15
15
  group: GroupConverter,
16
+ keep: KeepConverter,
16
17
  literal: LiteralConverter,
17
18
  meta: MetaConverter,
18
19
  nonproperty: PropertyConverter,
data/lib/js_regex/node.rb CHANGED
@@ -9,10 +9,11 @@ class JsRegex
9
9
  attr_reader :children, :quantifier, :reference, :type
10
10
 
11
11
  TYPES = %i[
12
- backref_num
12
+ backref
13
13
  captured_group
14
14
  conditional
15
15
  dropped
16
+ keep_mark
16
17
  plain
17
18
  ].freeze
18
19
 
@@ -46,7 +47,7 @@ class JsRegex
46
47
  case type
47
48
  when :dropped
48
49
  ''
49
- when :backref_num, :captured_group, :plain
50
+ when :backref, :captured_group, :plain
50
51
  children.join << quantifier.to_s
51
52
  else
52
53
  raise TypeError.new(
@@ -59,6 +60,7 @@ class JsRegex
59
60
  self.children = attrs.fetch(:children) if attrs.key?(:children)
60
61
  self.quantifier = attrs.fetch(:quantifier) if attrs.key?(:quantifier)
61
62
  self.type = attrs.fetch(:type) if attrs.key?(:type)
63
+ self
62
64
  end
63
65
 
64
66
  private
@@ -6,12 +6,26 @@ class JsRegex
6
6
  module SecondPass
7
7
  class << self
8
8
  def call(tree)
9
+ substitute_root_level_keep_mark(tree)
9
10
  alternate_conditional_permutations(tree)
10
11
  tree
11
12
  end
12
13
 
13
14
  private
14
15
 
16
+ def substitute_root_level_keep_mark(tree)
17
+ keep_mark_index = nil
18
+ tree.children.each.with_index do |child, i|
19
+ break keep_mark_index = i if child.type == :keep_mark
20
+ end
21
+ return unless keep_mark_index
22
+
23
+ pre = tree.children[0...keep_mark_index]
24
+ post = tree.children[(keep_mark_index + 1)..-1]
25
+ lookbehind = Node.new('(?<=', *pre, ')')
26
+ tree.update(children: [lookbehind, *post])
27
+ end
28
+
15
29
  def alternate_conditional_permutations(tree)
16
30
  permutations = conditional_tree_permutations(tree)
17
31
  return if permutations.empty?
@@ -23,16 +37,16 @@ class JsRegex
23
37
  end
24
38
 
25
39
  def conditional_tree_permutations(tree)
26
- all_conds = conditions(tree)
27
- return [] if all_conds.empty?
40
+ conds = conditions(tree)
41
+ return [] if conds.empty?
28
42
 
29
43
  caps_per_branch = captured_group_count(tree)
30
44
 
31
- condition_permutations(all_conds).map.with_index do |truthy_conds, i|
45
+ condition_permutations(conds).map.with_index do |truthy_conds, i|
32
46
  tree_permutation = tree.clone
33
47
  # find referenced groups and conditionals and make one-sided
34
48
  crawl(tree_permutation) do |node|
35
- build_permutation(node, all_conds, truthy_conds, caps_per_branch, i)
49
+ build_permutation(node, conds, truthy_conds, caps_per_branch, i)
36
50
  end
37
51
  end
38
52
  end
@@ -63,16 +77,30 @@ class JsRegex
63
77
  end
64
78
  end
65
79
 
66
- def build_permutation(node, all_conds, truthy_conds, caps_per_branch, i)
80
+ def build_permutation(node, conds, truthy_conds, caps_per_branch, i)
67
81
  truthy = truthy_conds.include?(node.reference)
68
82
 
69
- if node.type.equal?(:captured_group) &&
70
- all_conds.include?(node.reference)
71
- adapt_referenced_group_to_permutation(node, truthy)
72
- elsif node.type.equal?(:conditional)
73
- adapt_conditional_to_permutation(node, truthy)
74
- elsif node.type.equal?(:backref_num)
83
+ case node.type
84
+ when :backref
85
+ # We cannot use named groups or backrefs in the conditional expansion,
86
+ # their repetition would cause a "Duplicate capture group name" error in JS.
87
+ node.update(children: [
88
+ node.children.first.sub(/k<.*>/, node.reference.to_s)
89
+ ])
90
+ # backref numbers need to be incremented for subsequent "branches"
75
91
  adapt_backref_to_permutation(node, caps_per_branch, i)
92
+ when :captured_group
93
+ # Remove name, c.f. :backref handling.
94
+ node.update(children: [
95
+ node.children.first.sub(/\?<.*>/, ''),
96
+ *node.children[1..-1]
97
+ ])
98
+ # if the group is referenced by any condition, modulate its quantity
99
+ if conds.include?(node.reference)
100
+ adapt_referenced_group_to_permutation(node, truthy)
101
+ end
102
+ when :conditional
103
+ adapt_conditional_to_permutation(node, truthy)
76
104
  end
77
105
  end
78
106
 
@@ -91,8 +119,8 @@ class JsRegex
91
119
  end
92
120
 
93
121
  def adapt_backref_to_permutation(backref_node, caps_per_branch, i)
94
- new_num = backref_node.children[0].to_i + caps_per_branch * i
95
- backref_node.update(children: [new_num.to_s])
122
+ new_num = backref_node.reference + caps_per_branch * i
123
+ backref_node.update(children: ["\\#{new_num}"])
96
124
  end
97
125
 
98
126
  def min_quantify(node)
@@ -0,0 +1,19 @@
1
+ class JsRegex
2
+ module Target
3
+ ES2009 = 'ES2009'
4
+ ES2015 = 'ES2015'
5
+ ES2018 = 'ES2018'
6
+ SUPPORTED = [ES2009, ES2015, ES2018].freeze
7
+
8
+ def self.cast(arg)
9
+ return ES2009 if arg.nil?
10
+
11
+ normalized_arg = arg.to_s.upcase
12
+ return normalized_arg if SUPPORTED.include?(normalized_arg)
13
+
14
+ raise ArgumentError.new(
15
+ "Unknown target: #{arg.inspect}. Try one of #{SUPPORTED}."
16
+ ).extend(JsRegex::Error)
17
+ end
18
+ end
19
+ end
@@ -1,3 +1,3 @@
1
1
  class JsRegex
2
- VERSION = '3.7.2'
2
+ VERSION = '3.8.0'
3
3
  end
data/lib/js_regex.rb CHANGED
@@ -12,10 +12,10 @@ class JsRegex
12
12
  require_relative File.join('js_regex', 'version')
13
13
  require 'json'
14
14
 
15
- attr_reader :source, :options, :warnings
15
+ attr_reader :source, :options, :warnings, :target
16
16
 
17
- def initialize(ruby_regex, options: nil)
18
- @source, @options, @warnings = Conversion.of(ruby_regex, options: options)
17
+ def initialize(ruby_regex, **kwargs)
18
+ @source, @options, @warnings, @target = Conversion.of(ruby_regex, **kwargs)
19
19
  end
20
20
 
21
21
  def to_h
@@ -30,8 +30,8 @@ class JsRegex
30
30
  "/#{source.empty? ? '(?:)' : source}/#{options}"
31
31
  end
32
32
 
33
- def self.new!(ruby_regex, options: nil)
34
- js_regex = new(ruby_regex, options: options)
33
+ def self.new!(ruby_regex, **kwargs)
34
+ js_regex = new(ruby_regex, **kwargs)
35
35
  if js_regex.warnings.any?
36
36
  raise StandardError.new(
37
37
  "Could not fully convert the given regex #{ruby_regex.inspect}:\n" +
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: js_regex
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.7.2
4
+ version: 3.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-27 00:00:00.000000000 Z
11
+ date: 2022-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: character_set
@@ -72,9 +72,11 @@ files:
72
72
  - lib/js_regex/converter/escape_converter.rb
73
73
  - lib/js_regex/converter/freespace_converter.rb
74
74
  - lib/js_regex/converter/group_converter.rb
75
+ - lib/js_regex/converter/keep_converter.rb
75
76
  - lib/js_regex/converter/literal_converter.rb
76
77
  - lib/js_regex/converter/meta_converter.rb
77
78
  - lib/js_regex/converter/property_converter.rb
79
+ - lib/js_regex/converter/property_map.csv
78
80
  - lib/js_regex/converter/set_converter.rb
79
81
  - lib/js_regex/converter/subexpression_converter.rb
80
82
  - lib/js_regex/converter/type_converter.rb
@@ -82,6 +84,7 @@ files:
82
84
  - lib/js_regex/error.rb
83
85
  - lib/js_regex/node.rb
84
86
  - lib/js_regex/second_pass.rb
87
+ - lib/js_regex/target.rb
85
88
  - lib/js_regex/version.rb
86
89
  homepage: https://github.com/jaynetics/js_regex
87
90
  licenses: