js_regex 3.12.0 → 3.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c6bfe5a3631b78df8e258b46c12f9c89eeabefa5ce0967760cd7cc9c152ca8e
4
- data.tar.gz: d05300ee0f1496adbfa455533af89d8a42af7808af14fdb1f6c569bfcde5913f
3
+ metadata.gz: 81da8ab5dab32613d0486bb8b1d0c9926b1cd5dcf5cbf1dae7f7c7b86b695718
4
+ data.tar.gz: ad87e55f4c1834b237d8f18318fae065625e1e94f7645158ec4a7450967e5b69
5
5
  SHA512:
6
- metadata.gz: e7ed1e47c10c775151a4ccfbae673c9d122354841a20b7d18aed2e545b81ce9c87584026b2d7433c83be42d30c5e1c9ca823f7f31186cea4278273d3698c2a12
7
- data.tar.gz: 9d1e0e0f2ae0865e69262c8eefef4999cbcf877a270eb642ee7f115a3ff77c10c0ca8c6fdb60093fba5244e54bca0de58c1514ad28a1c6c21378e3c64d00b378
6
+ metadata.gz: 5b6e9d0eea5aa5656d3eaae3dd0ad769a21ef8a32d7304237afbe965266e8a8e8229c256a9110ce1573b2faf52ef112f354fcf84af61183b1f5aeb79035869f5
7
+ data.tar.gz: e546194d186650cc46aed54b73d92e6320251cb2954e213859fe3e5d4e06e04f502f8e0b5fe7a331db12578b59678484e184485ce4397dd61391677fe4766cd2
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class JsRegex
2
4
  #
3
5
  # This class acts as a facade, passing a Regexp to the Converters.
@@ -37,7 +39,7 @@ class JsRegex
37
39
  end
38
40
 
39
41
  def convert_options(input, custom_options, required_options)
40
- options = custom_options.to_s.scan(/[gimsuy]/) + required_options
42
+ options = custom_options.to_s.scan(/[dgimsuvy]/) + required_options
41
43
  if input.is_a?(Regexp) && (input.options & Regexp::IGNORECASE).nonzero?
42
44
  options << 'i'
43
45
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
 
3
5
  class JsRegex
@@ -10,8 +12,10 @@ class JsRegex
10
12
 
11
13
  def convert_data
12
14
  case subtype
13
- when :bol, :bos then '^'
14
- when :eol, :eos then '$'
15
+ when :bol then convert_bol
16
+ when :bos then '^'
17
+ when :eol then '(?=$|\n)'
18
+ when :eos then '$'
15
19
  when :eos_ob_eol then '(?=\n?$)'
16
20
  when :word_boundary then convert_boundary
17
21
  when :nonword_boundary then convert_nonboundary
@@ -20,6 +24,15 @@ class JsRegex
20
24
  end
21
25
  end
22
26
 
27
+ def convert_bol
28
+ if context.es_2018_or_higher?
29
+ '(?<=^|\n(?!$))'
30
+ else
31
+ # TODO: warn in v4.0.0, or drop ES2009 & ES2015 support
32
+ '^'
33
+ end
34
+ end
35
+
23
36
  def convert_boundary
24
37
  if context.es_2018_or_higher? && context.enable_u_option
25
38
  BOUNDARY_EXPANSION
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
  require_relative 'group_converter'
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
 
3
5
  class JsRegex
@@ -19,17 +21,39 @@ class JsRegex
19
21
  end
20
22
 
21
23
  def convert_name_ref
22
- if context.es_2018_or_higher?
23
- # ES 2018+ supports named backrefs, but only the angled-bracket syntax
24
- Node.new("\\k<#{expression.name}>", reference: new_position, type: :backref)
24
+ # Check if this is a multiplexed named group reference
25
+ if expression.referenced_expressions.count > 1
26
+ convert_multiplexed_name_ref
25
27
  else
28
+ # Always use numeric backrefs since we convert all named groups to numbered
29
+ # (see comment in GroupConverter)
26
30
  convert_to_plain_num_ref
27
31
  end
28
32
  end
29
33
 
30
34
  def convert_to_plain_num_ref
31
35
  position = new_position
32
- Node.new("\\#{position}", reference: position, type: :backref)
36
+
37
+ # Check if this backreference refers to a group that was recursively called
38
+ original_group = target_position
39
+ if (recursive_position = context.get_recursive_group_position(original_group))
40
+ # Use the position of the group created by the recursive call
41
+ position = recursive_position
42
+ end
43
+
44
+ text = "\\#{position}#{'(?:)' if expression.x?}"
45
+ Node.new(text, reference: position, type: :backref)
46
+ end
47
+
48
+ def convert_multiplexed_name_ref
49
+ # Create alternation of all groups with the same name
50
+ positions = expression.referenced_expressions.map do |ref_exp|
51
+ context.new_capturing_group_position(ref_exp.number)
52
+ end
53
+
54
+ # Build alternation like (?:\1|\2)
55
+ alternation = positions.map { |pos| "\\#{pos}" }.join('|')
56
+ Node.new("(?:#{alternation})")
33
57
  end
34
58
 
35
59
  def new_position
@@ -43,18 +67,53 @@ class JsRegex
43
67
  def convert_call
44
68
  if context.recursions(expression) >= 5
45
69
  warn_of("Recursion for '#{expression}' curtailed at 5 levels")
46
- return ''
70
+ return drop
47
71
  end
48
72
 
49
73
  context.count_recursion(expression)
74
+
75
+ # Track groups before the wrapper group is added
76
+ groups_before_wrapper = context.capturing_group_count
77
+
50
78
  context.increment_local_capturing_group_count
51
79
  target_copy = expression.referenced_expression.unquantified_clone
52
80
  # avoid "Duplicate capture group name" error in JS
53
81
  target_copy.token = :capture if target_copy.is?(:named, :group)
82
+ context.start_subexp_recursion
54
83
  result = convert_expression(target_copy)
55
- # wrap in group if it is a full-pattern recursion
84
+ context.end_subexp_recursion
85
+
86
+ # Track all groups created during this recursive call
87
+ # This handles both the directly called group and any nested groups within it
88
+ # Get all group numbers from the referenced expression
89
+ original_groups = collect_group_numbers(expression.referenced_expression)
90
+
91
+ # The first new group number is groups_before_wrapper + 1
92
+ # (the wrapper group from increment_local_capturing_group_count doesn't appear in output)
93
+ first_new_group = groups_before_wrapper + 1
94
+
95
+ # Map each original group to its corresponding new group
96
+ # For example, if we recursively called group 1 which contains group 2,
97
+ # and this created groups 3 and 4, then:
98
+ # - group 1 -> group 3
99
+ # - group 2 -> group 4
100
+ original_groups.each_with_index do |old_group_num, index|
101
+ new_group_num = first_new_group + index
102
+ context.track_recursive_group_call(old_group_num, new_group_num)
103
+ end
104
+
105
+ # wrap in passive group if it is a full-pattern recursion
56
106
  expression.reference == 0 ? Node.new('(?:', result, ')') : result
57
107
  end
108
+
109
+ def collect_group_numbers(exp)
110
+ return [] if exp.terminal?
111
+
112
+ numbers = []
113
+ numbers << exp.number if exp.capturing?
114
+ exp.each_expression { |sub| numbers += collect_group_numbers(sub) }
115
+ numbers
116
+ end
58
117
  end
59
118
  end
60
119
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class JsRegex
2
4
  module Converter
3
5
  #
@@ -44,7 +46,9 @@ class JsRegex
44
46
  end
45
47
 
46
48
  def convert_subexpressions
47
- Node.new(*expression.map { |subexp| convert_expression(subexp) })
49
+ # mark alternation and conditional branches for processing in second pass
50
+ type = expression.is?(:sequence) ? :branch : :plain
51
+ Node.new(*expression.map { |subexp| convert_expression(subexp) }, type: type)
48
52
  end
49
53
 
50
54
  def convert_expression(expression)
@@ -78,9 +82,15 @@ class JsRegex
78
82
  def wrap_in_backrefed_lookahead(content)
79
83
  number = context.capturing_group_count + 1
80
84
  backref_node = Node.new("\\#{number}", reference: number, type: :backref)
85
+ backrefed_group = Node.new('(', *content, ')', reference: number, type: :captured_group)
81
86
  context.increment_local_capturing_group_count
82
- # an empty passive group (?:) is appended as literal digits may follow
83
- Node.new('(?=(', *content, '))', backref_node, '(?:)')
87
+ # The surrounding group is added so that quantifiers apply to the whole.
88
+ # Without it, `(?:)` would need to be appended as literal digits may follow.
89
+ Node.new('(?:(?=', backrefed_group, ')', backref_node, ')')
90
+ end
91
+
92
+ def unmatchable_substitution
93
+ '(?!)'
84
94
  end
85
95
  end
86
96
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
 
3
5
  class JsRegex
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class JsRegex
2
4
  module Converter
3
5
  #
@@ -10,6 +12,7 @@ class JsRegex
10
12
  :case_insensitive_root,
11
13
  :fail_fast,
12
14
  :in_atomic_group,
15
+ :in_subexp_recursion,
13
16
  :warnings
14
17
 
15
18
  def initialize(case_insensitive_root: false, fail_fast: false, target: nil)
@@ -17,8 +20,10 @@ class JsRegex
17
20
  self.capturing_group_count = 0
18
21
  self.fail_fast = fail_fast
19
22
  self.recursions_per_expression = {}
23
+ self.recursion_stack = []
20
24
  self.required_options_hash = {}
21
25
  self.warnings = []
26
+ self.recursive_group_map = {}
22
27
 
23
28
  self.case_insensitive_root = case_insensitive_root
24
29
  self.target = target
@@ -70,17 +75,34 @@ class JsRegex
70
75
  end
71
76
 
72
77
  def recursions(exp)
73
- recursions_per_expression[recursion_id(exp)] || 0
78
+ # Count recursions in the current stack path only
79
+ recursion_stack.count { |e| recursion_id(e) == recursion_id(exp) }
74
80
  end
75
81
 
76
82
  def count_recursion(exp)
77
- recursions_per_expression[recursion_id(exp)] = recursions(exp) + 1
83
+ recursion_stack.push(exp)
78
84
  end
79
85
 
80
86
  def recursion_id(exp)
81
87
  [exp.class, exp.starts_at]
82
88
  end
83
89
 
90
+ def start_subexp_recursion
91
+ self.in_subexp_recursion = true
92
+ self.recursion_start_group_count = capturing_group_count
93
+ end
94
+
95
+ def end_subexp_recursion
96
+ self.in_subexp_recursion = false
97
+ # Pop the last recursion from stack when exiting
98
+ recursion_stack.pop if recursion_stack.any?
99
+ end
100
+
101
+ # Get the number of groups at the start of the current recursion
102
+ def recursion_start_group_count
103
+ self.recursion_start_group_count || 0
104
+ end
105
+
84
106
  # takes and returns 1-indexed group positions.
85
107
  # new is different from old if capturing groups were added in between.
86
108
  def new_capturing_group_position(old_position)
@@ -95,17 +117,31 @@ class JsRegex
95
117
  capturing_group_count - total_added_capturing_groups
96
118
  end
97
119
 
120
+ # Track that a group was created by a recursive call
121
+ def track_recursive_group_call(original_group_num, new_group_num)
122
+ recursive_group_map[original_group_num] = new_group_num
123
+ end
124
+
125
+ # Get the group number created by a recursive call
126
+ def get_recursive_group_position(original_group_num)
127
+ recursive_group_map[original_group_num]
128
+ end
129
+
98
130
  private
99
131
 
100
132
  attr_accessor :added_capturing_groups_after_group,
101
133
  :recursions_per_expression,
134
+ :recursion_stack,
102
135
  :required_options_hash,
136
+ :recursive_group_map,
103
137
  :target
104
138
 
105
139
  attr_writer :capturing_group_count,
106
140
  :case_insensitive_root,
107
141
  :fail_fast,
108
142
  :in_atomic_group,
143
+ :in_subexp_recursion,
144
+ :recursion_start_group_count,
109
145
  :warnings
110
146
 
111
147
  def total_added_capturing_groups
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
  require_relative 'literal_converter'
3
5
 
@@ -38,14 +40,14 @@ class JsRegex
38
40
  case subtype
39
41
  when :codepoint_list
40
42
  convert_codepoint_list
41
- when :control, :meta_sequence
43
+ when :control, :meta_sequence, :utf8_hex
42
44
  unicode_escape_codepoint
43
45
  when :literal
44
46
  LiteralConverter.convert_data(expression.char, context)
47
+ when :bell, :escape, :hex, :octal
48
+ hex_escape_codepoint
45
49
  when *ESCAPES_SHARED_BY_RUBY_AND_JS
46
50
  pass_through
47
- when :bell, :escape, :octal
48
- hex_escape_codepoint
49
51
  else
50
52
  warn_of_unsupported_feature
51
53
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
 
3
5
  class JsRegex
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
 
3
5
  class JsRegex
@@ -22,12 +24,10 @@ class JsRegex
22
24
  end
23
25
 
24
26
  def build_named_group
25
- if context.es_2018_or_higher?
26
- # ES 2018+ supports named groups, but only the angled-bracket syntax
27
- build_group(head: "(?<#{expression.name}>")
28
- else
29
- build_group
30
- end
27
+ # Always convert named groups to numbered groups. ES2018+ supports named
28
+ # groups, but can not handle repeated names in multiplexing or conditional
29
+ # expansion scenarios.
30
+ build_group
31
31
  end
32
32
 
33
33
  def emulate_atomic_group
@@ -69,10 +69,6 @@ class JsRegex
69
69
  expression.empty?
70
70
  end
71
71
 
72
- def unmatchable_substitution
73
- '(?!)'
74
- end
75
-
76
72
  def build_absence_group
77
73
  head = "(?:(?:.|\\n){,#{expression.inner_match_length.min - 1}}|(?:(?!"
78
74
  tail = ')(?:.|\n))*)'
@@ -84,7 +80,7 @@ class JsRegex
84
80
  tail = opts[:tail] || ')'
85
81
  return Node.new(*wrap(head, tail)) if opts[:capturing].equal?(false)
86
82
 
87
- context.capture_group
83
+ context.capture_group unless context.in_subexp_recursion
88
84
  ref = expression.number
89
85
  Node.new(*wrap(head, tail), reference: ref, type: :captured_group)
90
86
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
 
3
5
  class JsRegex
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
 
3
5
  class JsRegex
@@ -6,8 +8,8 @@ class JsRegex
6
8
  # Template class implementation.
7
9
  #
8
10
  class LiteralConverter < JsRegex::Converter::Base
9
- ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
10
- LITERAL_REQUIRING_ESCAPE_PATTERN = /[\/\f\n\r\t\v]/
11
+ ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/.freeze
12
+ LITERAL_REQUIRING_ESCAPE_PATTERN = /[\/\f\n\r\t\v]/.freeze
11
13
 
12
14
  class << self
13
15
  def convert_data(data, context)
@@ -59,7 +61,7 @@ class JsRegex
59
61
  result
60
62
  end
61
63
 
62
- HAS_CASE_PATTERN = /[\p{lower}\p{upper}]/
64
+ HAS_CASE_PATTERN = /[\p{lower}\p{upper}]/.freeze
63
65
 
64
66
  def handle_locally_case_insensitive_literal(literal)
65
67
  literal =~ HAS_CASE_PATTERN ? case_insensitivize(literal) : literal
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
 
3
5
  class JsRegex
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
  require 'character_set'
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
  require_relative 'escape_converter'
3
5
  require_relative 'type_converter'
@@ -41,11 +43,7 @@ class JsRegex
41
43
  def simple_convert_child(exp)
42
44
  case exp.type
43
45
  when :literal
44
- return false if !context.u? &&
45
- exp.text =~ LiteralConverter::ASTRAL_PLANE_CODEPOINT_PATTERN &&
46
- !context.enable_u_option
47
-
48
- LiteralConverter.escape_incompatible_bmp_literals(exp.text)
46
+ simple_convert_literal_child(exp)
49
47
  when :set
50
48
  # full conversion is needed for nested sets and intersections
51
49
  exp.token.equal?(:range) && exp.expressions.map do |op|
@@ -67,7 +65,20 @@ class JsRegex
67
65
  end
68
66
  end
69
67
 
70
- SET_SPECIFIC_ESCAPES_PATTERN = /[\^\-]/
68
+ def simple_convert_literal_child(exp)
69
+ if !context.u? &&
70
+ exp.text =~ LiteralConverter::ASTRAL_PLANE_CODEPOINT_PATTERN &&
71
+ !context.enable_u_option
72
+ false
73
+ elsif SET_LITERALS_REQUIRING_ESCAPE_PATTERN.match?(exp.text)
74
+ "\\#{exp.text}"
75
+ else
76
+ LiteralConverter.escape_incompatible_bmp_literals(exp.text)
77
+ end
78
+ end
79
+
80
+ SET_LITERALS_REQUIRING_ESCAPE_PATTERN = Regexp.union(%w<( ) [ ] { } / - |>)
81
+ SET_SPECIFIC_ESCAPES_PATTERN = /[\^\-]/.freeze
71
82
  CONVERTIBLE_ESCAPE_TOKENS = %i[control meta_sequence bell escape octal] +
72
83
  EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS
73
84
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
 
3
5
  class JsRegex
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
 
3
5
  class JsRegex
@@ -10,10 +12,37 @@ class JsRegex
10
12
  NONHEX_EXPANSION = '[^0-9A-Fa-f]'
11
13
  I_MODE_HEX_EXPANSION = '[0-9A-F]'
12
14
  I_MODE_NONHEX_EXPANSION = '[^0-9A-F]'
15
+ LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
13
16
  ES2018_HEX_EXPANSION = '\p{AHex}'
14
17
  ES2018_NONHEX_EXPANSION = '\P{AHex}'
15
- ES2018_XGRAPHEME_EXPANSION = '[\P{M}\P{Lm}](?:(?:[\u035C\u0361]\P{M}\p{M}*)|\u200d|\p{M}|\p{Lm}|\p{Emoji_Modifier})*'
16
- LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
18
+ # partially taken from https://unicode.org/reports/tr51/#EBNF_and_Regex
19
+ ES2018_XGRAPHEME_EXPANSION = <<-'REGEXP'.gsub(/\s+/, '')
20
+ (?:
21
+ \r\n
22
+ |
23
+ \p{RI}\p{RI}
24
+ |
25
+ \p{Emoji}
26
+ (?:
27
+ \p{EMod}
28
+ |
29
+ \uFE0F\u20E3?
30
+ |
31
+ [\u{E0020}-\u{E007E}]+\u{E007F}
32
+ )?
33
+ (?:
34
+ \u200D
35
+ (?:
36
+ \p{RI}\p{RI}
37
+ |
38
+ \p{Emoji}(?:\p{EMod}|\uFE0F\u20E3?|[\u{E0020}-\u{E007E}]+\u{E007F})?
39
+ )
40
+ )*
41
+ |
42
+ [\P{M}\P{Lm}](?:\u200d|\p{M}|\p{Lm}|\p{Emoji_Modifier})*
43
+ )
44
+ REGEXP
45
+
17
46
 
18
47
  def self.directly_compatible?(expression, _context = nil)
19
48
  case expression.token
@@ -30,7 +59,7 @@ class JsRegex
30
59
  case subtype
31
60
  when :hex then hex_expansion
32
61
  when :nonhex then nonhex_expansion
33
- when :linebreak then LINEBREAK_EXPANSION
62
+ when :linebreak then linebreak_expansion
34
63
  when :xgrapheme then xgrapheme
35
64
  when :digit, :space, :word
36
65
  return pass_through if self.class.directly_compatible?(expression)
@@ -63,6 +92,10 @@ class JsRegex
63
92
  end
64
93
  end
65
94
 
95
+ def linebreak_expansion
96
+ wrap_in_backrefed_lookahead(LINEBREAK_EXPANSION)
97
+ end
98
+
66
99
  def negative_set_substitution
67
100
  # ::of_expression returns an inverted set for negative expressions,
68
101
  # so we need to un-invert before wrapping in [^ and ]. Kinda lame.
@@ -79,7 +112,7 @@ class JsRegex
79
112
 
80
113
  def xgrapheme
81
114
  if context.es_2018_or_higher? && context.enable_u_option
82
- ES2018_XGRAPHEME_EXPANSION
115
+ wrap_in_backrefed_lookahead(ES2018_XGRAPHEME_EXPANSION)
83
116
  else
84
117
  warn_of_unsupported_feature
85
118
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'base'
2
4
 
3
5
  class JsRegex
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class JsRegex
2
4
  module Converter
3
5
  Dir[File.join(__dir__, 'converter', '*.rb')].sort.each do |file|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class JsRegex
2
4
  # This is mixed into errors, e.g. those thrown by the parser,
3
5
  # allowing to `rescue JsRegex::Error`.
data/lib/js_regex/node.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class JsRegex
2
4
  #
3
5
  # Converter#convert result. Represents a branch or leaf node with an optional
@@ -10,6 +12,7 @@ class JsRegex
10
12
 
11
13
  TYPES = %i[
12
14
  backref
15
+ branch
13
16
  captured_group
14
17
  conditional
15
18
  dropped
@@ -47,7 +50,7 @@ class JsRegex
47
50
  case type
48
51
  when :dropped
49
52
  ''
50
- when :backref, :captured_group, :plain
53
+ when :backref, :branch, :captured_group, :plain
51
54
  children.join << quantifier.to_s
52
55
  else
53
56
  raise TypeError.new(
@@ -63,6 +66,10 @@ class JsRegex
63
66
  self
64
67
  end
65
68
 
69
+ def optional?
70
+ quantifier && quantifier.min == 0
71
+ end
72
+
66
73
  private
67
74
 
68
75
  TypeError = Class.new(::TypeError).extend(JsRegex::Error)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class JsRegex
2
4
  #
3
5
  # After conversion of a full Regexp::Expression tree, this
@@ -8,6 +10,7 @@ class JsRegex
8
10
  def call(tree)
9
11
  substitute_root_level_keep_mark(tree)
10
12
  alternate_conditional_permutations(tree)
13
+ handle_non_participating_backrefs(tree)
11
14
  tree
12
15
  end
13
16
 
@@ -26,12 +29,49 @@ class JsRegex
26
29
  tree.update(children: [lookbehind, *post])
27
30
  end
28
31
 
32
+ def handle_non_participating_backrefs(tree)
33
+ level = 0
34
+ completed_group_numbers = {}
35
+ group_branches = {}
36
+ branch_stack = []
37
+
38
+ crawl(tree, true) do |node, event|
39
+ case [node.type, event]
40
+ when [:branch, :enter]
41
+ branch_stack.push(node)
42
+ when [:branch, :exit]
43
+ branch_stack.pop
44
+ when [:captured_group, :enter]
45
+ level += 1
46
+ when [:captured_group, :exit]
47
+ unless node.optional? # ignore optional groups
48
+ group_branches[node.reference] = branch_stack.last
49
+ end
50
+ number = level
51
+ number += 1 while completed_group_numbers[number]
52
+ completed_group_numbers[number] = true
53
+ level -= 1
54
+ when [:backref, :exit]
55
+ ref_branch = group_branches[node.reference]
56
+ current_branch = branch_stack.last
57
+
58
+ # make bad backrefs non-matchable
59
+ references_other_branch =
60
+ ref_branch && current_branch && ref_branch != current_branch
61
+ forward_reference = !completed_group_numbers[node.reference]
62
+ if references_other_branch || forward_reference
63
+ node.update(type: :plain, children: ['(?!)'])
64
+ end
65
+ end
66
+ end
67
+ end
68
+
29
69
  def alternate_conditional_permutations(tree)
30
70
  permutations = conditional_tree_permutations(tree)
31
71
  return if permutations.empty?
32
72
 
33
73
  alternatives = permutations.map.with_index do |variant, i|
34
- Node.new((i.zero? ? '(?:' : '|(?:'), variant, ')')
74
+ Node.new((i.zero? ? '(?:' : '|(?:'), variant, ')', type: :branch)
35
75
  end
36
76
  tree.update(children: alternatives)
37
77
  end
@@ -48,13 +88,16 @@ class JsRegex
48
88
  crawl(tree_permutation) do |node|
49
89
  build_permutation(node, conds, truthy_conds, caps_per_branch, i)
50
90
  end
91
+ tree_permutation
51
92
  end
52
93
  end
53
94
 
54
- def crawl(node, &block)
95
+ def crawl(node, trace = false, &block)
55
96
  return if node.instance_of?(String)
56
- yield(node)
57
- node.children.each { |child| crawl(child, &block) }
97
+
98
+ trace ? yield(node, :enter) : yield(node)
99
+ node.children.each { |child| crawl(child, trace, &block) }
100
+ trace && yield(node, :exit)
58
101
  end
59
102
 
60
103
  def conditions(tree)
@@ -90,11 +133,6 @@ class JsRegex
90
133
  # backref numbers need to be incremented for subsequent "branches"
91
134
  adapt_backref_to_permutation(node, caps_per_branch, i)
92
135
  when :captured_group
93
- # Remove name, c.f. :backref handling.
94
- node.update(children: [
95
- node.children.first.sub(/\?<.*>/, ''),
96
- *node.children[1..-1]
97
- ])
98
136
  # if the group is referenced by any condition, modulate its quantity
99
137
  if conds.include?(node.reference)
100
138
  adapt_referenced_group_to_permutation(node, truthy)
@@ -124,8 +162,9 @@ class JsRegex
124
162
  end
125
163
 
126
164
  def min_quantify(node)
127
- return if guarantees_at_least_one_match?(qtf = node.quantifier)
165
+ return unless node.optional?
128
166
 
167
+ qtf = node.quantifier
129
168
  if qtf.max.equal?(1) # any zero_or_one quantifier (?, ??, ?+)
130
169
  node.update(quantifier: nil)
131
170
  else
@@ -135,10 +174,6 @@ class JsRegex
135
174
  end
136
175
  end
137
176
 
138
- def guarantees_at_least_one_match?(quantifier)
139
- quantifier.nil? || quantifier.min > 0
140
- end
141
-
142
177
  def null_quantify(node)
143
178
  null_quantifier = Regexp::Expression::Quantifier.construct(text: '{0}')
144
179
  node.update(quantifier: null_quantifier)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class JsRegex
2
4
  module Target
3
5
  ES2009 = 'ES2009'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class JsRegex
2
- VERSION = '3.12.0'
4
+ VERSION = '3.14.0'
3
5
  end
data/lib/js_regex.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # JsRegex converts ::Regexp instances to JavaScript.
2
4
  #
3
5
  # Usage:
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: js_regex
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.12.0
4
+ version: 3.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-11-02 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: character_set
@@ -28,22 +27,16 @@ dependencies:
28
27
  name: regexp_parser
29
28
  requirement: !ruby/object:Gem::Requirement
30
29
  requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: 2.6.2
34
- - - "<"
30
+ - - "~>"
35
31
  - !ruby/object:Gem::Version
36
- version: 3.0.0
32
+ version: '2.11'
37
33
  type: :runtime
38
34
  prerelease: false
39
35
  version_requirements: !ruby/object:Gem::Requirement
40
36
  requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- version: 2.6.2
44
- - - "<"
37
+ - - "~>"
45
38
  - !ruby/object:Gem::Version
46
- version: 3.0.0
39
+ version: '2.11'
47
40
  - !ruby/object:Gem::Dependency
48
41
  name: regexp_property_values
49
42
  requirement: !ruby/object:Gem::Requirement
@@ -96,7 +89,6 @@ homepage: https://github.com/jaynetics/js_regex
96
89
  licenses:
97
90
  - MIT
98
91
  metadata: {}
99
- post_install_message:
100
92
  rdoc_options: []
101
93
  require_paths:
102
94
  - lib
@@ -111,8 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
111
103
  - !ruby/object:Gem::Version
112
104
  version: '0'
113
105
  requirements: []
114
- rubygems_version: 3.5.16
115
- signing_key:
106
+ rubygems_version: 3.6.7
116
107
  specification_version: 4
117
108
  summary: Converts Ruby regexes to JavaScript regexes.
118
109
  test_files: []