regexp_parser 2.4.0 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +98 -42
  3. data/README.md +46 -30
  4. data/lib/regexp_parser/expression/base.rb +17 -9
  5. data/lib/regexp_parser/expression/classes/backreference.rb +19 -2
  6. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -0
  7. data/lib/regexp_parser/expression/classes/conditional.rb +8 -0
  8. data/lib/regexp_parser/expression/classes/escape_sequence.rb +1 -1
  9. data/lib/regexp_parser/expression/classes/group.rb +10 -0
  10. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  11. data/lib/regexp_parser/expression/classes/root.rb +3 -5
  12. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -0
  13. data/lib/regexp_parser/expression/methods/construct.rb +43 -0
  14. data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
  15. data/lib/regexp_parser/expression/methods/match_length.rb +9 -5
  16. data/lib/regexp_parser/expression/methods/traverse.rb +6 -3
  17. data/lib/regexp_parser/expression/quantifier.rb +6 -5
  18. data/lib/regexp_parser/expression/sequence.rb +6 -21
  19. data/lib/regexp_parser/expression/shared.rb +20 -3
  20. data/lib/regexp_parser/expression/subexpression.rb +4 -1
  21. data/lib/regexp_parser/expression.rb +4 -2
  22. data/lib/regexp_parser/lexer.rb +61 -29
  23. data/lib/regexp_parser/parser.rb +36 -26
  24. data/lib/regexp_parser/scanner/property.rl +1 -1
  25. data/lib/regexp_parser/scanner/scanner.rl +57 -42
  26. data/lib/regexp_parser/scanner.rb +873 -823
  27. data/lib/regexp_parser/syntax/token/escape.rb +1 -1
  28. data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
  29. data/lib/regexp_parser/syntax/versions.rb +2 -0
  30. data/lib/regexp_parser/version.rb +1 -1
  31. metadata +7 -5
@@ -0,0 +1,43 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ # default implementation, e.g. "atomic group", "hex escape", "word type", ..
4
+ def human_name
5
+ [token, type].compact.join(' ').tr('_', ' ')
6
+ end
7
+ end
8
+
9
+ Alternation.class_eval { def human_name; 'alternation' end }
10
+ Alternative.class_eval { def human_name; 'alternative' end }
11
+ Anchor::BOL.class_eval { def human_name; 'beginning of line' end }
12
+ Anchor::BOS.class_eval { def human_name; 'beginning of string' end }
13
+ Anchor::EOL.class_eval { def human_name; 'end of line' end }
14
+ Anchor::EOS.class_eval { def human_name; 'end of string' end }
15
+ Anchor::EOSobEOL.class_eval { def human_name; 'newline-ready end of string' end }
16
+ Anchor::MatchStart.class_eval { def human_name; 'match start' end }
17
+ Anchor::NonWordBoundary.class_eval { def human_name; 'no word boundary' end }
18
+ Anchor::WordBoundary.class_eval { def human_name; 'word boundary' end }
19
+ Assertion::Lookahead.class_eval { def human_name; 'lookahead' end }
20
+ Assertion::Lookbehind.class_eval { def human_name; 'lookbehind' end }
21
+ Assertion::NegativeLookahead.class_eval { def human_name; 'negative lookahead' end }
22
+ Assertion::NegativeLookbehind.class_eval { def human_name; 'negative lookbehind' end }
23
+ Backreference::Name.class_eval { def human_name; 'backreference by name' end }
24
+ Backreference::NameCall.class_eval { def human_name; 'subexpression call by name' end }
25
+ Backreference::Number.class_eval { def human_name; 'backreference' end }
26
+ Backreference::NumberRelative.class_eval { def human_name; 'relative backreference' end }
27
+ Backreference::NumberCall.class_eval { def human_name; 'subexpression call' end }
28
+ Backreference::NumberCallRelative.class_eval { def human_name; 'relative subexpression call' end }
29
+ CharacterSet::IntersectedSequence.class_eval { def human_name; 'intersected sequence' end }
30
+ CharacterSet::Intersection.class_eval { def human_name; 'intersection' end }
31
+ CharacterSet::Range.class_eval { def human_name; 'character range' end }
32
+ CharacterType::Any.class_eval { def human_name; 'match-all' end }
33
+ Comment.class_eval { def human_name; 'comment' end }
34
+ Conditional::Branch.class_eval { def human_name; 'conditional branch' end }
35
+ Conditional::Condition.class_eval { def human_name; 'condition' end }
36
+ Conditional::Expression.class_eval { def human_name; 'conditional' end }
37
+ Group::Capture.class_eval { def human_name; "capture group #{number}" end }
38
+ Group::Named.class_eval { def human_name; 'named capture group' end }
39
+ Keep::Mark.class_eval { def human_name; 'keep-mark lookbehind' end }
40
+ Literal.class_eval { def human_name; 'literal' end }
41
+ Root.class_eval { def human_name; 'root' end }
42
+ WhiteSpace.class_eval { def human_name; 'free space' end }
43
+ end
@@ -63,16 +63,20 @@ class Regexp::MatchLength
63
63
  end
64
64
 
65
65
  def to_re
66
- "(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}"
66
+ /(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
67
67
  end
68
68
 
69
69
  private
70
70
 
71
71
  attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
72
72
 
73
- def test_regexp
74
- @test_regexp ||= Regexp.new("^#{to_re}$").tap do |regexp|
75
- regexp.respond_to?(:match?) || def regexp.match?(str); !!match(str) end
73
+ if Regexp.method_defined?(:match?) # ruby >= 2.4
74
+ def test_regexp
75
+ @test_regexp ||= /^#{to_re}$/
76
+ end
77
+ else
78
+ def test_regexp
79
+ @test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
76
80
  end
77
81
  end
78
82
  end
@@ -112,7 +116,7 @@ module Regexp::Expression
112
116
  end
113
117
 
114
118
  def inner_match_length
115
- dummy = Regexp::Expression::Root.build
119
+ dummy = Regexp::Expression::Root.construct
116
120
  dummy.expressions = expressions.map(&:clone)
117
121
  dummy.quantifier = quantifier && quantifier.clone
118
122
  dummy.match_length
@@ -36,11 +36,14 @@ module Regexp::Expression
36
36
 
37
37
  # Iterates over the expressions of this expression as an array, passing
38
38
  # the expression and its index within its parent to the given block.
39
- def each_expression(include_self = false)
39
+ def each_expression(include_self = false, &block)
40
40
  return enum_for(__method__, include_self) unless block_given?
41
41
 
42
- traverse(include_self) do |event, exp, index|
43
- yield(exp, index) unless event == :exit
42
+ block.call(self, 0) if include_self
43
+
44
+ each_with_index do |exp, index|
45
+ block.call(exp, index)
46
+ exp.each_expression(&block) unless exp.terminal?
44
47
  end
45
48
  end
46
49
 
@@ -14,7 +14,7 @@ module Regexp::Expression
14
14
  deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
15
15
 
16
16
  init_from_token_and_options(*args)
17
- @mode = (token[/greedy|reluctant|possessive/] || :greedy).to_sym
17
+ @mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
18
18
  @min, @max = minmax
19
19
  # TODO: remove in v3.0.0, stop removing parts of #token (?)
20
20
  self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
@@ -44,10 +44,11 @@ module Regexp::Expression
44
44
  def deprecated_old_init(token, text, min, max, mode = :greedy)
45
45
  warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
46
46
  "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
47
- "Please pass a Regexp::Token instead, e.g. replace `type, text, min, max, mode` "\
48
- "with `::Regexp::Token.new(:quantifier, type, text)`. min, max, and mode "\
49
- "will be derived automatically. \nThis is consistent with how Expression::Base "\
50
- "instances are created."
47
+ "Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
48
+ "with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
49
+ "will be derived automatically.\n"\
50
+ "Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
51
+ "This is consistent with how Expression::Base instances are created. "
51
52
  @token = token
52
53
  @text = text
53
54
  @min = min
@@ -7,31 +7,16 @@ module Regexp::Expression
7
7
  # branches, and CharacterSet::Intersection intersected sequences.
8
8
  class Sequence < Regexp::Expression::Subexpression
9
9
  class << self
10
- def add_to(subexpression, params = {}, active_opts = {})
11
- sequence = at_levels(
12
- subexpression.level,
13
- subexpression.set_level,
14
- params[:conditional_level] || subexpression.conditional_level
10
+ def add_to(exp, params = {}, active_opts = {})
11
+ sequence = construct(
12
+ level: exp.level,
13
+ set_level: exp.set_level,
14
+ conditional_level: params[:conditional_level] || exp.conditional_level,
15
15
  )
16
- sequence.nesting_level = subexpression.nesting_level + 1
17
16
  sequence.options = active_opts
18
- subexpression.expressions << sequence
17
+ exp.expressions << sequence
19
18
  sequence
20
19
  end
21
-
22
- def at_levels(level, set_level, conditional_level)
23
- token = Regexp::Token.new(
24
- :expression,
25
- :sequence,
26
- '',
27
- nil, # ts
28
- nil, # te
29
- level,
30
- set_level,
31
- conditional_level
32
- )
33
- new(token)
34
- end
35
20
  end
36
21
 
37
22
  def starts_at
@@ -1,12 +1,16 @@
1
1
  module Regexp::Expression
2
2
  module Shared
3
+ module ClassMethods; end # filled in ./methods/*.rb
4
+
3
5
  def self.included(mod)
4
6
  mod.class_eval do
7
+ extend Shared::ClassMethods
8
+
5
9
  attr_accessor :type, :token, :text, :ts, :te,
6
10
  :level, :set_level, :conditional_level,
7
- :options, :quantifier
11
+ :options
8
12
 
9
- attr_reader :nesting_level
13
+ attr_reader :nesting_level, :quantifier
10
14
  end
11
15
  end
12
16
 
@@ -60,6 +64,10 @@ module Regexp::Expression
60
64
  !quantifier.nil?
61
65
  end
62
66
 
67
+ def optional?
68
+ quantified? && quantifier.min == 0
69
+ end
70
+
63
71
  def offset
64
72
  [starts_at, full_length]
65
73
  end
@@ -69,7 +77,11 @@ module Regexp::Expression
69
77
  end
70
78
 
71
79
  def terminal?
72
- !respond_to?(:expressions)
80
+ true # overridden to be false in Expression::Subexpression
81
+ end
82
+
83
+ def referential?
84
+ false # overridden to be true e.g. in Expression::Backreference::Base
73
85
  end
74
86
 
75
87
  def nesting_level=(lvl)
@@ -77,5 +89,10 @@ module Regexp::Expression
77
89
  quantifier && quantifier.nesting_level = lvl
78
90
  terminal? || each { |subexp| subexp.nesting_level = lvl + 1 }
79
91
  end
92
+
93
+ def quantifier=(qtf)
94
+ @quantifier = qtf
95
+ @repetitions = nil # clear memoized value
96
+ end
80
97
  end
81
98
  end
@@ -19,7 +19,6 @@ module Regexp::Expression
19
19
  if exp.is_a?(WhiteSpace) && last && last.is_a?(WhiteSpace)
20
20
  last.merge(exp)
21
21
  else
22
- exp.nesting_level = nesting_level + 1
23
22
  expressions << exp
24
23
  end
25
24
  end
@@ -53,6 +52,10 @@ module Regexp::Expression
53
52
  )
54
53
  end
55
54
 
55
+ def terminal?
56
+ false
57
+ end
58
+
56
59
  private
57
60
 
58
61
  def intersperse(expressions, separator)
@@ -13,6 +13,7 @@ require 'regexp_parser/expression/classes/backreference'
13
13
  require 'regexp_parser/expression/classes/character_set'
14
14
  require 'regexp_parser/expression/classes/character_set/intersection'
15
15
  require 'regexp_parser/expression/classes/character_set/range'
16
+ require 'regexp_parser/expression/classes/character_type'
16
17
  require 'regexp_parser/expression/classes/conditional'
17
18
  require 'regexp_parser/expression/classes/escape_sequence'
18
19
  require 'regexp_parser/expression/classes/free_space'
@@ -20,10 +21,11 @@ require 'regexp_parser/expression/classes/group'
20
21
  require 'regexp_parser/expression/classes/keep'
21
22
  require 'regexp_parser/expression/classes/literal'
22
23
  require 'regexp_parser/expression/classes/posix_class'
23
- require 'regexp_parser/expression/classes/property'
24
24
  require 'regexp_parser/expression/classes/root'
25
- require 'regexp_parser/expression/classes/type'
25
+ require 'regexp_parser/expression/classes/unicode_property'
26
26
 
27
+ require 'regexp_parser/expression/methods/construct'
28
+ require 'regexp_parser/expression/methods/human_name'
27
29
  require 'regexp_parser/expression/methods/match'
28
30
  require 'regexp_parser/expression/methods/match_length'
29
31
  require 'regexp_parser/expression/methods/options'
@@ -13,50 +13,68 @@ class Regexp::Lexer
13
13
 
14
14
  CONDITION_TOKENS = %i[condition condition_close].freeze
15
15
 
16
- def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
17
- new.lex(input, syntax, options: options, &block)
16
+ def self.lex(input, syntax = nil, options: nil, collect_tokens: true, &block)
17
+ new.lex(input, syntax, options: options, collect_tokens: collect_tokens, &block)
18
18
  end
19
19
 
20
- def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
21
- syntax = Regexp::Syntax.for(syntax)
20
+ def lex(input, syntax = nil, options: nil, collect_tokens: true, &block)
21
+ syntax = syntax ? Regexp::Syntax.for(syntax) : Regexp::Syntax::CURRENT
22
22
 
23
+ self.block = block
24
+ self.collect_tokens = collect_tokens
23
25
  self.tokens = []
26
+ self.prev_token = nil
27
+ self.preprev_token = nil
24
28
  self.nesting = 0
25
29
  self.set_nesting = 0
26
30
  self.conditional_nesting = 0
27
31
  self.shift = 0
28
32
 
29
- last = nil
30
- Regexp::Scanner.scan(input, options: options) do |type, token, text, ts, te|
33
+ Regexp::Scanner.scan(input, options: options, collect_tokens: false) do |type, token, text, ts, te|
31
34
  type, token = *syntax.normalize(type, token)
32
35
  syntax.check! type, token
33
36
 
34
37
  ascend(type, token)
35
38
 
36
- if type == :quantifier and last
37
- break_literal(last) if last.type == :literal
38
- break_codepoint_list(last) if last.token == :codepoint_list
39
+ if (last = prev_token) &&
40
+ type == :quantifier &&
41
+ (
42
+ (last.type == :literal && (parts = break_literal(last))) ||
43
+ (last.token == :codepoint_list && (parts = break_codepoint_list(last)))
44
+ )
45
+ emit(parts[0])
46
+ last = parts[1]
39
47
  end
40
48
 
41
49
  current = Regexp::Token.new(type, token, text, ts + shift, te + shift,
42
50
  nesting, set_nesting, conditional_nesting)
43
51
 
44
- current = merge_condition(current) if type == :conditional and
45
- CONDITION_TOKENS.include?(token)
46
-
47
- last.next = current if last
48
- current.previous = last if last
52
+ if type == :conditional && CONDITION_TOKENS.include?(token)
53
+ current = merge_condition(current, last)
54
+ elsif last
55
+ last.next = current
56
+ current.previous = last
57
+ emit(last)
58
+ end
49
59
 
50
- tokens << current
51
- last = current
60
+ self.preprev_token = last
61
+ self.prev_token = current
52
62
 
53
63
  descend(type, token)
54
64
  end
55
65
 
56
- if block_given?
57
- tokens.map { |t| block.call(t) }
66
+ emit(prev_token) if prev_token
67
+
68
+ collect_tokens ? tokens : nil
69
+ end
70
+
71
+ def emit(token)
72
+ if block
73
+ # TODO: in v3.0.0, remove `collect_tokens:` kwarg and only collect w/o block
74
+ res = block.call(token)
75
+ tokens << res if collect_tokens
58
76
  else
59
- tokens
77
+ tokens << token
60
78
  end
61
79
  end
62
80
 
@@ -66,7 +84,9 @@ class Regexp::Lexer
66
84
 
67
85
  private
68
86
 
69
- attr_accessor :tokens, :nesting, :set_nesting, :conditional_nesting, :shift
87
+ attr_accessor :block,
88
+ :collect_tokens, :tokens, :prev_token, :preprev_token,
89
+ :nesting, :set_nesting, :conditional_nesting, :shift
70
90
 
71
91
  def ascend(type, token)
72
92
  case type
@@ -96,34 +116,46 @@ class Regexp::Lexer
96
116
  lead, last, _ = token.text.partition(/.\z/mu)
97
117
  return if lead.empty?
98
118
 
99
- tokens.pop
100
- tokens << Regexp::Token.new(:literal, :literal, lead,
119
+ token_1 = Regexp::Token.new(:literal, :literal, lead,
101
120
  token.ts, (token.te - last.length),
102
121
  nesting, set_nesting, conditional_nesting)
103
- tokens << Regexp::Token.new(:literal, :literal, last,
122
+ token_2 = Regexp::Token.new(:literal, :literal, last,
104
123
  (token.ts + lead.length), token.te,
105
124
  nesting, set_nesting, conditional_nesting)
125
+
126
+ token_1.previous = preprev_token
127
+ token_1.next = token_2
128
+ token_2.previous = token_1 # .next will be set by #lex
129
+ [token_1, token_2]
106
130
  end
107
131
 
132
+ # if a codepoint list is followed by a quantifier, that quantifier applies
133
+ # to the last codepoint, e.g. /\u{61 62 63}{3}/ =~ 'abccc'
134
+ # c.f. #break_literal.
108
135
  def break_codepoint_list(token)
109
136
  lead, _, tail = token.text.rpartition(' ')
110
137
  return if lead.empty?
111
138
 
112
- tokens.pop
113
- tokens << Regexp::Token.new(:escape, :codepoint_list, lead + '}',
139
+ token_1 = Regexp::Token.new(:escape, :codepoint_list, lead + '}',
114
140
  token.ts, (token.te - tail.length),
115
141
  nesting, set_nesting, conditional_nesting)
116
- tokens << Regexp::Token.new(:escape, :codepoint_list, '\u{' + tail,
142
+ token_2 = Regexp::Token.new(:escape, :codepoint_list, '\u{' + tail,
117
143
  (token.ts + lead.length + 1), (token.te + 3),
118
144
  nesting, set_nesting, conditional_nesting)
119
145
 
120
146
  self.shift = shift + 3 # one space less, but extra \, u, {, and }
147
+
148
+ token_1.previous = preprev_token
149
+ token_1.next = token_2
150
+ token_2.previous = token_1 # .next will be set by #lex
151
+ [token_1, token_2]
121
152
  end
122
153
 
123
- def merge_condition(current)
124
- last = tokens.pop
125
- Regexp::Token.new(:conditional, :condition, last.text + current.text,
154
+ def merge_condition(current, last)
155
+ token = Regexp::Token.new(:conditional, :condition, last.text + current.text,
126
156
  last.ts, current.te, nesting, set_nesting, conditional_nesting)
157
+ token.previous = preprev_token # .next will be set by #lex
158
+ token
127
159
  end
128
160
 
129
161
  end # module Regexp::Lexer
@@ -18,12 +18,12 @@ class Regexp::Parser
18
18
  end
19
19
  end
20
20
 
21
- def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
21
+ def self.parse(input, syntax = nil, options: nil, &block)
22
22
  new.parse(input, syntax, options: options, &block)
23
23
  end
24
24
 
25
- def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
26
- root = Root.build(extract_options(input, options))
25
+ def parse(input, syntax = nil, options: nil, &block)
26
+ root = Root.construct(options: extract_options(input, options))
27
27
 
28
28
  self.root = root
29
29
  self.node = root
@@ -35,7 +35,7 @@ class Regexp::Parser
35
35
 
36
36
  self.captured_group_counts = Hash.new(0)
37
37
 
38
- Regexp::Lexer.scan(input, syntax, options: options) do |token|
38
+ Regexp::Lexer.scan(input, syntax, options: options, collect_tokens: false) do |token|
39
39
  parse_token(token)
40
40
  end
41
41
 
@@ -200,11 +200,11 @@ class Regexp::Parser
200
200
  end
201
201
 
202
202
  def captured_group_count_at_level
203
- captured_group_counts[node.level]
203
+ captured_group_counts[node]
204
204
  end
205
205
 
206
206
  def count_captured_group
207
- captured_group_counts[node.level] += 1
207
+ captured_group_counts[node] += 1
208
208
  end
209
209
 
210
210
  def close_group
@@ -235,7 +235,15 @@ class Regexp::Parser
235
235
  when :number, :number_ref
236
236
  node << Backreference::Number.new(token, active_opts)
237
237
  when :number_recursion_ref
238
- node << Backreference::NumberRecursionLevel.new(token, active_opts)
238
+ node << Backreference::NumberRecursionLevel.new(token, active_opts).tap do |exp|
239
+ # TODO: should split off new token number_recursion_rel_ref and new
240
+ # class NumberRelativeRecursionLevel in v3.0.0 to get rid of this
241
+ if exp.text =~ /[<'][+-]/
242
+ assign_effective_number(exp)
243
+ else
244
+ exp.effective_number = exp.number
245
+ end
246
+ end
239
247
  when :number_call
240
248
  node << Backreference::NumberCall.new(token, active_opts)
241
249
  when :number_rel_ref
@@ -254,6 +262,8 @@ class Regexp::Parser
254
262
  def assign_effective_number(exp)
255
263
  exp.effective_number =
256
264
  exp.number + total_captured_group_count + (exp.number < 0 ? 1 : 0)
265
+ exp.effective_number > 0 ||
266
+ raise(ParserError, "Invalid reference: #{exp.reference}")
257
267
  end
258
268
 
259
269
  def conditional(token)
@@ -369,7 +379,7 @@ class Regexp::Parser
369
379
  end
370
380
 
371
381
  def sequence_operation(klass, token)
372
- unless node.is_a?(klass)
382
+ unless node.instance_of?(klass)
373
383
  operator = klass.new(token, active_opts)
374
384
  sequence = operator.add_sequence(active_opts)
375
385
  sequence.expressions = node.expressions
@@ -475,17 +485,14 @@ class Regexp::Parser
475
485
  # description of the problem: https://github.com/ammar/regexp_parser/issues/3
476
486
  # rationale for this solution: https://github.com/ammar/regexp_parser/pull/69
477
487
  if target_node.quantified?
478
- new_token = Regexp::Token.new(
479
- :group,
480
- :passive,
481
- '', # text (none because this group is implicit)
482
- target_node.ts,
483
- nil, # te (unused)
484
- target_node.level,
485
- target_node.set_level,
486
- target_node.conditional_level
488
+ new_group = Group::Passive.construct(
489
+ token: :passive,
490
+ ts: target_node.ts,
491
+ level: target_node.level,
492
+ set_level: target_node.set_level,
493
+ conditional_level: target_node.conditional_level,
494
+ options: active_opts,
487
495
  )
488
- new_group = Group::Passive.new(new_token, active_opts)
489
496
  new_group.implicit = true
490
497
  new_group << target_node
491
498
  increase_group_level(target_node)
@@ -534,7 +541,7 @@ class Regexp::Parser
534
541
 
535
542
  def range(token)
536
543
  exp = CharacterSet::Range.new(token, active_opts)
537
- scope = node.last.is_a?(CharacterSet::IntersectedSequence) ? node.last : node
544
+ scope = node.last.instance_of?(CharacterSet::IntersectedSequence) ? node.last : node
538
545
  exp << scope.expressions.pop
539
546
  nest(exp)
540
547
  end
@@ -561,7 +568,7 @@ class Regexp::Parser
561
568
  end
562
569
 
563
570
  def close_completed_character_set_range
564
- decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
571
+ decrease_nesting if node.instance_of?(CharacterSet::Range) && node.complete?
565
572
  end
566
573
 
567
574
  def active_opts
@@ -572,15 +579,18 @@ class Regexp::Parser
572
579
  # an instance of Backreference::Number, its #referenced_expression is set to
573
580
  # the instance of Group::Capture that it refers to via its number.
574
581
  def assign_referenced_expressions
575
- targets = {}
576
- # find all referencable expressions
582
+ # find all referencable and refering expressions
583
+ targets = { 0 => root }
584
+ referrers = []
577
585
  root.each_expression do |exp|
578
586
  exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
587
+ referrers << exp if exp.referential?
579
588
  end
580
- # assign them to any refering expressions
581
- root.each_expression do |exp|
582
- exp.respond_to?(:reference) &&
583
- exp.referenced_expression = targets[exp.reference]
589
+ # assign reference expression to refering expressions
590
+ # (in a second iteration because there might be forward references)
591
+ referrers.each do |exp|
592
+ exp.referenced_expression = targets[exp.reference] ||
593
+ raise(ParserError, "Invalid reference: #{exp.reference}")
584
594
  end
585
595
  end
586
596
  end # module Regexp::Parser
@@ -17,7 +17,7 @@
17
17
  text = copy(data, ts-1, te)
18
18
  type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property
19
19
 
20
- name = data[ts+2..te-2].pack('c*').gsub(/[\^\s_\-]/, '').downcase
20
+ name = text[3..-2].gsub(/[\^\s_\-]/, '').downcase
21
21
 
22
22
  token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
23
23
  validation_error(:property, name) unless token