regexp_parser 2.7.0 → 2.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -5
  3. data/LICENSE +1 -1
  4. data/lib/regexp_parser/expression/base.rb +0 -7
  5. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  6. data/lib/regexp_parser/expression/classes/backreference.rb +4 -6
  7. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
  8. data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
  9. data/lib/regexp_parser/expression/classes/conditional.rb +2 -14
  10. data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
  11. data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
  12. data/lib/regexp_parser/expression/classes/group.rb +0 -22
  13. data/lib/regexp_parser/expression/classes/keep.rb +1 -1
  14. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
  15. data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
  16. data/lib/regexp_parser/expression/methods/construct.rb +2 -4
  17. data/lib/regexp_parser/expression/methods/negative.rb +20 -0
  18. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  19. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  20. data/lib/regexp_parser/expression/methods/tests.rb +40 -3
  21. data/lib/regexp_parser/expression/methods/traverse.rb +33 -20
  22. data/lib/regexp_parser/expression/quantifier.rb +30 -17
  23. data/lib/regexp_parser/expression/sequence.rb +5 -9
  24. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  25. data/lib/regexp_parser/expression/shared.rb +37 -24
  26. data/lib/regexp_parser/expression/subexpression.rb +20 -18
  27. data/lib/regexp_parser/expression.rb +34 -31
  28. data/lib/regexp_parser/lexer.rb +15 -7
  29. data/lib/regexp_parser/parser.rb +91 -91
  30. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  31. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  32. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  33. data/lib/regexp_parser/scanner/properties/long.csv +29 -0
  34. data/lib/regexp_parser/scanner/properties/short.csv +3 -0
  35. data/lib/regexp_parser/scanner/property.rl +1 -1
  36. data/lib/regexp_parser/scanner/scanner.rl +44 -130
  37. data/lib/regexp_parser/scanner.rb +1096 -1297
  38. data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
  39. data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
  40. data/lib/regexp_parser/syntax/token/escape.rb +3 -1
  41. data/lib/regexp_parser/syntax/token/meta.rb +9 -2
  42. data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
  43. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  44. data/lib/regexp_parser/syntax/token.rb +13 -13
  45. data/lib/regexp_parser/syntax/versions.rb +1 -1
  46. data/lib/regexp_parser/syntax.rb +1 -1
  47. data/lib/regexp_parser/version.rb +1 -1
  48. data/lib/regexp_parser.rb +6 -6
  49. data/regexp_parser.gemspec +5 -5
  50. metadata +14 -8
  51. data/CHANGELOG.md +0 -632
  52. data/README.md +0 -503
@@ -5,21 +5,16 @@ module Regexp::Expression
5
5
  alias :operands :expressions
6
6
  alias :operator :text
7
7
 
8
- def starts_at
9
- expressions.first.starts_at
8
+ def ts
9
+ (head = expressions.first) ? head.ts : @ts
10
10
  end
11
- alias :ts :starts_at
12
11
 
13
12
  def <<(exp)
14
13
  expressions.last << exp
15
14
  end
16
15
 
17
- def add_sequence(active_opts = {})
18
- self.class::OPERAND.add_to(self, {}, active_opts)
19
- end
20
-
21
- def parts
22
- intersperse(expressions, text.dup)
16
+ def add_sequence(active_opts = {}, params = { ts: 0 })
17
+ self.class::OPERAND.add_to(self, params, active_opts)
23
18
  end
24
19
  end
25
20
  end
@@ -8,7 +8,8 @@ module Regexp::Expression
8
8
 
9
9
  attr_accessor :type, :token, :text, :ts, :te,
10
10
  :level, :set_level, :conditional_level,
11
- :options
11
+ :options, :parent,
12
+ :custom_to_s_handling, :pre_quantifier_decorations
12
13
 
13
14
  attr_reader :nesting_level, :quantifier
14
15
  end
@@ -32,6 +33,10 @@ module Regexp::Expression
32
33
  self.text = orig.text.dup if orig.text
33
34
  self.options = orig.options.dup if orig.options
34
35
  self.quantifier = orig.quantifier.clone if orig.quantifier
36
+ self.parent = nil # updated by Subexpression#initialize_copy
37
+ if orig.pre_quantifier_decorations
38
+ self.pre_quantifier_decorations = orig.pre_quantifier_decorations.map(&:dup)
39
+ end
35
40
  super
36
41
  end
37
42
 
@@ -39,35 +44,51 @@ module Regexp::Expression
39
44
  ts
40
45
  end
41
46
 
47
+ def ends_at(include_quantifier = true)
48
+ ts + (include_quantifier ? full_length : base_length)
49
+ end
50
+
42
51
  def base_length
43
52
  to_s(:base).length
44
53
  end
45
54
 
46
55
  def full_length
47
- to_s.length
48
- end
49
-
56
+ to_s(:original).length
57
+ end
58
+
59
+ # #to_s reproduces the original source, as an unparser would.
60
+ #
61
+ # It takes an optional format argument.
62
+ #
63
+ # Example:
64
+ #
65
+ # lit = Regexp::Parser.parse(/a +/x)[0]
66
+ #
67
+ # lit.to_s # => 'a+' # default; with quantifier
68
+ # lit.to_s(:full) # => 'a+' # default; with quantifier
69
+ # lit.to_s(:base) # => 'a' # without quantifier
70
+ # lit.to_s(:original) # => 'a +' # with quantifier AND intermittent decorations
71
+ #
50
72
  def to_s(format = :full)
51
- "#{parts.join}#{quantifier_affix(format)}"
73
+ base = parts.each_with_object(''.dup) do |part, buff|
74
+ if part.instance_of?(String)
75
+ buff << part
76
+ elsif !part.custom_to_s_handling
77
+ buff << part.to_s(:original)
78
+ end
79
+ end
80
+ "#{base}#{pre_quantifier_decoration(format)}#{quantifier_affix(format)}"
52
81
  end
53
82
  alias :to_str :to_s
54
83
 
55
- def parts
56
- [text.dup]
84
+ def pre_quantifier_decoration(expression_format = :original)
85
+ pre_quantifier_decorations.to_a.join if expression_format == :original
57
86
  end
58
87
 
59
- def quantifier_affix(expression_format)
88
+ def quantifier_affix(expression_format = :full)
60
89
  quantifier.to_s if quantified? && expression_format != :base
61
90
  end
62
91
 
63
- def quantified?
64
- !quantifier.nil?
65
- end
66
-
67
- def optional?
68
- quantified? && quantifier.min == 0
69
- end
70
-
71
92
  def offset
72
93
  [starts_at, full_length]
73
94
  end
@@ -76,14 +97,6 @@ module Regexp::Expression
76
97
  '@%d+%d' % offset
77
98
  end
78
99
 
79
- def terminal?
80
- true # overridden to be false in Expression::Subexpression
81
- end
82
-
83
- def referential?
84
- false # overridden to be true e.g. in Expression::Backreference::Base
85
- end
86
-
87
100
  def nesting_level=(lvl)
88
101
  @nesting_level = lvl
89
102
  quantifier && quantifier.nesting_level = lvl
@@ -11,16 +11,15 @@ module Regexp::Expression
11
11
 
12
12
  # Override base method to clone the expressions as well.
13
13
  def initialize_copy(orig)
14
- self.expressions = orig.expressions.map(&:clone)
14
+ self.expressions = orig.expressions.map do |exp|
15
+ exp.clone.tap { |copy| copy.parent = self }
16
+ end
15
17
  super
16
18
  end
17
19
 
18
20
  def <<(exp)
19
- if exp.is_a?(WhiteSpace) && last && last.is_a?(WhiteSpace)
20
- last.merge(exp)
21
- else
22
- expressions << exp
23
- end
21
+ exp.parent = self
22
+ expressions << exp
24
23
  end
25
24
 
26
25
  %w[[] at each empty? fetch index join last length values_at].each do |method|
@@ -38,11 +37,7 @@ module Regexp::Expression
38
37
  end
39
38
 
40
39
  def te
41
- ts + to_s.length
42
- end
43
-
44
- def parts
45
- expressions
40
+ ts + base_length
46
41
  end
47
42
 
48
43
  def to_h
@@ -52,14 +47,21 @@ module Regexp::Expression
52
47
  )
53
48
  end
54
49
 
55
- def terminal?
56
- false
57
- end
58
-
59
- private
50
+ def extract_quantifier_target(quantifier_description)
51
+ pre_quantifier_decorations = []
52
+ target = expressions.reverse.find do |exp|
53
+ if exp.decorative?
54
+ exp.custom_to_s_handling = true
55
+ pre_quantifier_decorations << exp.text
56
+ next
57
+ end
58
+ exp
59
+ end
60
+ target or raise Regexp::Parser::ParserError,
61
+ "No valid target found for '#{quantifier_description}' quantifier"
60
62
 
61
- def intersperse(expressions, separator)
62
- expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
63
+ target.pre_quantifier_decorations = pre_quantifier_decorations
64
+ target
63
65
  end
64
66
  end
65
67
  end
@@ -1,34 +1,37 @@
1
- require 'regexp_parser/error'
1
+ require_relative 'error'
2
2
 
3
- require 'regexp_parser/expression/shared'
4
- require 'regexp_parser/expression/base'
5
- require 'regexp_parser/expression/quantifier'
6
- require 'regexp_parser/expression/subexpression'
7
- require 'regexp_parser/expression/sequence'
8
- require 'regexp_parser/expression/sequence_operation'
3
+ require_relative 'expression/shared'
4
+ require_relative 'expression/base'
5
+ require_relative 'expression/quantifier'
6
+ require_relative 'expression/subexpression'
7
+ require_relative 'expression/sequence'
8
+ require_relative 'expression/sequence_operation'
9
9
 
10
- require 'regexp_parser/expression/classes/alternation'
11
- require 'regexp_parser/expression/classes/anchor'
12
- require 'regexp_parser/expression/classes/backreference'
13
- require 'regexp_parser/expression/classes/character_set'
14
- require 'regexp_parser/expression/classes/character_set/intersection'
15
- require 'regexp_parser/expression/classes/character_set/range'
16
- require 'regexp_parser/expression/classes/character_type'
17
- require 'regexp_parser/expression/classes/conditional'
18
- require 'regexp_parser/expression/classes/escape_sequence'
19
- require 'regexp_parser/expression/classes/free_space'
20
- require 'regexp_parser/expression/classes/group'
21
- require 'regexp_parser/expression/classes/keep'
22
- require 'regexp_parser/expression/classes/literal'
23
- require 'regexp_parser/expression/classes/posix_class'
24
- require 'regexp_parser/expression/classes/root'
25
- require 'regexp_parser/expression/classes/unicode_property'
10
+ require_relative 'expression/classes/alternation'
11
+ require_relative 'expression/classes/anchor'
12
+ require_relative 'expression/classes/backreference'
13
+ require_relative 'expression/classes/character_set'
14
+ require_relative 'expression/classes/character_set/intersection'
15
+ require_relative 'expression/classes/character_set/range'
16
+ require_relative 'expression/classes/character_type'
17
+ require_relative 'expression/classes/conditional'
18
+ require_relative 'expression/classes/escape_sequence'
19
+ require_relative 'expression/classes/free_space'
20
+ require_relative 'expression/classes/group'
21
+ require_relative 'expression/classes/keep'
22
+ require_relative 'expression/classes/literal'
23
+ require_relative 'expression/classes/posix_class'
24
+ require_relative 'expression/classes/root'
25
+ require_relative 'expression/classes/unicode_property'
26
26
 
27
- require 'regexp_parser/expression/methods/construct'
28
- require 'regexp_parser/expression/methods/human_name'
29
- require 'regexp_parser/expression/methods/match'
30
- require 'regexp_parser/expression/methods/match_length'
31
- require 'regexp_parser/expression/methods/options'
32
- require 'regexp_parser/expression/methods/strfregexp'
33
- require 'regexp_parser/expression/methods/tests'
34
- require 'regexp_parser/expression/methods/traverse'
27
+ require_relative 'expression/methods/construct'
28
+ require_relative 'expression/methods/human_name'
29
+ require_relative 'expression/methods/match'
30
+ require_relative 'expression/methods/match_length'
31
+ require_relative 'expression/methods/negative'
32
+ require_relative 'expression/methods/options'
33
+ require_relative 'expression/methods/parts'
34
+ require_relative 'expression/methods/printing'
35
+ require_relative 'expression/methods/strfregexp'
36
+ require_relative 'expression/methods/tests'
37
+ require_relative 'expression/methods/traverse'
@@ -6,7 +6,7 @@ class Regexp::Lexer
6
6
 
7
7
  OPENING_TOKENS = %i[
8
8
  capture passive lookahead nlookahead lookbehind nlookbehind
9
- atomic options options_switch named absence
9
+ atomic options options_switch named absence open
10
10
  ].freeze
11
11
 
12
12
  CLOSING_TOKENS = %i[close].freeze
@@ -89,24 +89,32 @@ class Regexp::Lexer
89
89
  :nesting, :set_nesting, :conditional_nesting, :shift
90
90
 
91
91
  def ascend(type, token)
92
+ return unless CLOSING_TOKENS.include?(token)
93
+
92
94
  case type
93
95
  when :group, :assertion
94
- self.nesting = nesting - 1 if CLOSING_TOKENS.include?(token)
96
+ self.nesting = nesting - 1
95
97
  when :set
96
- self.set_nesting = set_nesting - 1 if token == :close
98
+ self.set_nesting = set_nesting - 1
97
99
  when :conditional
98
- self.conditional_nesting = conditional_nesting - 1 if token == :close
100
+ self.conditional_nesting = conditional_nesting - 1
101
+ else
102
+ raise "unhandled nesting type #{type}"
99
103
  end
100
104
  end
101
105
 
102
106
  def descend(type, token)
107
+ return unless OPENING_TOKENS.include?(token)
108
+
103
109
  case type
104
110
  when :group, :assertion
105
- self.nesting = nesting + 1 if OPENING_TOKENS.include?(token)
111
+ self.nesting = nesting + 1
106
112
  when :set
107
- self.set_nesting = set_nesting + 1 if token == :open
113
+ self.set_nesting = set_nesting + 1
108
114
  when :conditional
109
- self.conditional_nesting = conditional_nesting + 1 if token == :open
115
+ self.conditional_nesting = conditional_nesting + 1
116
+ else
117
+ raise "unhandled nesting type #{type}"
110
118
  end
111
119
  end
112
120
 
@@ -1,5 +1,5 @@
1
- require 'regexp_parser/error'
2
- require 'regexp_parser/expression'
1
+ require_relative 'error'
2
+ require_relative 'expression'
3
3
 
4
4
  class Regexp::Parser
5
5
  include Regexp::Expression
@@ -232,7 +232,7 @@ class Regexp::Parser
232
232
  node << Backreference::NameRecursionLevel.new(token, active_opts)
233
233
  when :name_call
234
234
  node << Backreference::NameCall.new(token, active_opts)
235
- when :number, :number_ref
235
+ when :number, :number_ref # TODO: split in v3.0.0
236
236
  node << Backreference::Number.new(token, active_opts)
237
237
  when :number_recursion_ref
238
238
  node << Backreference::NumberRecursionLevel.new(token, active_opts).tap do |exp|
@@ -272,9 +272,9 @@ class Regexp::Parser
272
272
  nest_conditional(Conditional::Expression.new(token, active_opts))
273
273
  when :condition
274
274
  conditional_nesting.last.condition = Conditional::Condition.new(token, active_opts)
275
- conditional_nesting.last.add_sequence(active_opts)
275
+ conditional_nesting.last.add_sequence(active_opts, { ts: token.te })
276
276
  when :separator
277
- conditional_nesting.last.add_sequence(active_opts)
277
+ conditional_nesting.last.add_sequence(active_opts, { ts: token.te })
278
278
  self.node = conditional_nesting.last.branches.last
279
279
  when :close
280
280
  conditional_nesting.pop
@@ -322,6 +322,7 @@ class Regexp::Parser
322
322
 
323
323
  when :control
324
324
  if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
325
+ # TODO: emit :meta_control_sequence token in v3.0.0
325
326
  node << EscapeSequence::MetaControl.new(token, active_opts)
326
327
  else
327
328
  node << EscapeSequence::Control.new(token, active_opts)
@@ -329,6 +330,7 @@ class Regexp::Parser
329
330
 
330
331
  when :meta_sequence
331
332
  if token.text =~ /\A\\M-\\[Cc]/
333
+ # TODO: emit :meta_control_sequence token in v3.0.0:
332
334
  node << EscapeSequence::MetaControl.new(token, active_opts)
333
335
  else
334
336
  node << EscapeSequence::Meta.new(token, active_opts)
@@ -349,11 +351,7 @@ class Regexp::Parser
349
351
  when :comment
350
352
  node << Comment.new(token, active_opts)
351
353
  when :whitespace
352
- if node.last.is_a?(WhiteSpace)
353
- node.last.merge(WhiteSpace.new(token, active_opts))
354
- else
355
- node << WhiteSpace.new(token, active_opts)
356
- end
354
+ node << WhiteSpace.new(token, active_opts)
357
355
  else
358
356
  raise UnknownTokenError.new('FreeSpace', token)
359
357
  end
@@ -381,96 +379,97 @@ class Regexp::Parser
381
379
  def sequence_operation(klass, token)
382
380
  unless node.instance_of?(klass)
383
381
  operator = klass.new(token, active_opts)
384
- sequence = operator.add_sequence(active_opts)
382
+ sequence = operator.add_sequence(active_opts, { ts: token.ts })
385
383
  sequence.expressions = node.expressions
386
384
  node.expressions = []
387
385
  nest(operator)
388
386
  end
389
- node.add_sequence(active_opts)
387
+ node.add_sequence(active_opts, { ts: token.te })
390
388
  end
391
389
 
392
390
  def posixclass(token)
393
391
  node << PosixClass.new(token, active_opts)
394
392
  end
395
393
 
396
- include Regexp::Expression::UnicodeProperty
397
- UPTokens = Regexp::Syntax::Token::UnicodeProperty
394
+ UP = Regexp::Expression::Property
395
+ UPTokens = Regexp::Syntax::Token::Property
398
396
 
399
397
  def property(token)
400
398
  case token.token
401
- when :alnum; node << Alnum.new(token, active_opts)
402
- when :alpha; node << Alpha.new(token, active_opts)
403
- when :ascii; node << Ascii.new(token, active_opts)
404
- when :blank; node << Blank.new(token, active_opts)
405
- when :cntrl; node << Cntrl.new(token, active_opts)
406
- when :digit; node << Digit.new(token, active_opts)
407
- when :graph; node << Graph.new(token, active_opts)
408
- when :lower; node << Lower.new(token, active_opts)
409
- when :print; node << Print.new(token, active_opts)
410
- when :punct; node << Punct.new(token, active_opts)
411
- when :space; node << Space.new(token, active_opts)
412
- when :upper; node << Upper.new(token, active_opts)
413
- when :word; node << Word.new(token, active_opts)
414
- when :xdigit; node << Xdigit.new(token, active_opts)
415
- when :xposixpunct; node << XPosixPunct.new(token, active_opts)
399
+ when :alnum; node << UP::Alnum.new(token, active_opts)
400
+ when :alpha; node << UP::Alpha.new(token, active_opts)
401
+ when :ascii; node << UP::Ascii.new(token, active_opts)
402
+ when :blank; node << UP::Blank.new(token, active_opts)
403
+ when :cntrl; node << UP::Cntrl.new(token, active_opts)
404
+ when :digit; node << UP::Digit.new(token, active_opts)
405
+ when :graph; node << UP::Graph.new(token, active_opts)
406
+ when :lower; node << UP::Lower.new(token, active_opts)
407
+ when :print; node << UP::Print.new(token, active_opts)
408
+ when :punct; node << UP::Punct.new(token, active_opts)
409
+ when :space; node << UP::Space.new(token, active_opts)
410
+ when :upper; node << UP::Upper.new(token, active_opts)
411
+ when :word; node << UP::Word.new(token, active_opts)
412
+ when :xdigit; node << UP::Xdigit.new(token, active_opts)
413
+ when :xposixpunct; node << UP::XPosixPunct.new(token, active_opts)
416
414
 
417
415
  # only in Oniguruma (old rubies)
418
- when :newline; node << Newline.new(token, active_opts)
419
-
420
- when :any; node << Any.new(token, active_opts)
421
- when :assigned; node << Assigned.new(token, active_opts)
422
-
423
- when :letter; node << Letter::Any.new(token, active_opts)
424
- when :cased_letter; node << Letter::Cased.new(token, active_opts)
425
- when :uppercase_letter; node << Letter::Uppercase.new(token, active_opts)
426
- when :lowercase_letter; node << Letter::Lowercase.new(token, active_opts)
427
- when :titlecase_letter; node << Letter::Titlecase.new(token, active_opts)
428
- when :modifier_letter; node << Letter::Modifier.new(token, active_opts)
429
- when :other_letter; node << Letter::Other.new(token, active_opts)
430
-
431
- when :mark; node << Mark::Any.new(token, active_opts)
432
- when :combining_mark; node << Mark::Combining.new(token, active_opts)
433
- when :nonspacing_mark; node << Mark::Nonspacing.new(token, active_opts)
434
- when :spacing_mark; node << Mark::Spacing.new(token, active_opts)
435
- when :enclosing_mark; node << Mark::Enclosing.new(token, active_opts)
436
-
437
- when :number; node << Number::Any.new(token, active_opts)
438
- when :decimal_number; node << Number::Decimal.new(token, active_opts)
439
- when :letter_number; node << Number::Letter.new(token, active_opts)
440
- when :other_number; node << Number::Other.new(token, active_opts)
441
-
442
- when :punctuation; node << Punctuation::Any.new(token, active_opts)
443
- when :connector_punctuation; node << Punctuation::Connector.new(token, active_opts)
444
- when :dash_punctuation; node << Punctuation::Dash.new(token, active_opts)
445
- when :open_punctuation; node << Punctuation::Open.new(token, active_opts)
446
- when :close_punctuation; node << Punctuation::Close.new(token, active_opts)
447
- when :initial_punctuation; node << Punctuation::Initial.new(token, active_opts)
448
- when :final_punctuation; node << Punctuation::Final.new(token, active_opts)
449
- when :other_punctuation; node << Punctuation::Other.new(token, active_opts)
450
-
451
- when :separator; node << Separator::Any.new(token, active_opts)
452
- when :space_separator; node << Separator::Space.new(token, active_opts)
453
- when :line_separator; node << Separator::Line.new(token, active_opts)
454
- when :paragraph_separator; node << Separator::Paragraph.new(token, active_opts)
455
-
456
- when :symbol; node << Symbol::Any.new(token, active_opts)
457
- when :math_symbol; node << Symbol::Math.new(token, active_opts)
458
- when :currency_symbol; node << Symbol::Currency.new(token, active_opts)
459
- when :modifier_symbol; node << Symbol::Modifier.new(token, active_opts)
460
- when :other_symbol; node << Symbol::Other.new(token, active_opts)
461
-
462
- when :other; node << Codepoint::Any.new(token, active_opts)
463
- when :control; node << Codepoint::Control.new(token, active_opts)
464
- when :format; node << Codepoint::Format.new(token, active_opts)
465
- when :surrogate; node << Codepoint::Surrogate.new(token, active_opts)
466
- when :private_use; node << Codepoint::PrivateUse.new(token, active_opts)
467
- when :unassigned; node << Codepoint::Unassigned.new(token, active_opts)
468
-
469
- when *UPTokens::Age; node << Age.new(token, active_opts)
470
- when *UPTokens::Derived; node << Derived.new(token, active_opts)
471
- when *UPTokens::Emoji; node << Emoji.new(token, active_opts)
472
- when *UPTokens::Script; node << Script.new(token, active_opts)
473
- when *UPTokens::UnicodeBlock; node << Block.new(token, active_opts)
416
+ when :newline; node << UP::Newline.new(token, active_opts)
417
+
418
+ when :any; node << UP::Any.new(token, active_opts)
419
+ when :assigned; node << UP::Assigned.new(token, active_opts)
420
+
421
+ when :letter; node << UP::Letter::Any.new(token, active_opts)
422
+ when :cased_letter; node << UP::Letter::Cased.new(token, active_opts)
423
+ when :uppercase_letter; node << UP::Letter::Uppercase.new(token, active_opts)
424
+ when :lowercase_letter; node << UP::Letter::Lowercase.new(token, active_opts)
425
+ when :titlecase_letter; node << UP::Letter::Titlecase.new(token, active_opts)
426
+ when :modifier_letter; node << UP::Letter::Modifier.new(token, active_opts)
427
+ when :other_letter; node << UP::Letter::Other.new(token, active_opts)
428
+
429
+ when :mark; node << UP::Mark::Any.new(token, active_opts)
430
+ when :combining_mark; node << UP::Mark::Combining.new(token, active_opts)
431
+ when :nonspacing_mark; node << UP::Mark::Nonspacing.new(token, active_opts)
432
+ when :spacing_mark; node << UP::Mark::Spacing.new(token, active_opts)
433
+ when :enclosing_mark; node << UP::Mark::Enclosing.new(token, active_opts)
434
+
435
+ when :number; node << UP::Number::Any.new(token, active_opts)
436
+ when :decimal_number; node << UP::Number::Decimal.new(token, active_opts)
437
+ when :letter_number; node << UP::Number::Letter.new(token, active_opts)
438
+ when :other_number; node << UP::Number::Other.new(token, active_opts)
439
+
440
+ when :punctuation; node << UP::Punctuation::Any.new(token, active_opts)
441
+ when :connector_punctuation; node << UP::Punctuation::Connector.new(token, active_opts)
442
+ when :dash_punctuation; node << UP::Punctuation::Dash.new(token, active_opts)
443
+ when :open_punctuation; node << UP::Punctuation::Open.new(token, active_opts)
444
+ when :close_punctuation; node << UP::Punctuation::Close.new(token, active_opts)
445
+ when :initial_punctuation; node << UP::Punctuation::Initial.new(token, active_opts)
446
+ when :final_punctuation; node << UP::Punctuation::Final.new(token, active_opts)
447
+ when :other_punctuation; node << UP::Punctuation::Other.new(token, active_opts)
448
+
449
+ when :separator; node << UP::Separator::Any.new(token, active_opts)
450
+ when :space_separator; node << UP::Separator::Space.new(token, active_opts)
451
+ when :line_separator; node << UP::Separator::Line.new(token, active_opts)
452
+ when :paragraph_separator; node << UP::Separator::Paragraph.new(token, active_opts)
453
+
454
+ when :symbol; node << UP::Symbol::Any.new(token, active_opts)
455
+ when :math_symbol; node << UP::Symbol::Math.new(token, active_opts)
456
+ when :currency_symbol; node << UP::Symbol::Currency.new(token, active_opts)
457
+ when :modifier_symbol; node << UP::Symbol::Modifier.new(token, active_opts)
458
+ when :other_symbol; node << UP::Symbol::Other.new(token, active_opts)
459
+
460
+ when :other; node << UP::Codepoint::Any.new(token, active_opts)
461
+ when :control; node << UP::Codepoint::Control.new(token, active_opts)
462
+ when :format; node << UP::Codepoint::Format.new(token, active_opts)
463
+ when :surrogate; node << UP::Codepoint::Surrogate.new(token, active_opts)
464
+ when :private_use; node << UP::Codepoint::PrivateUse.new(token, active_opts)
465
+ when :unassigned; node << UP::Codepoint::Unassigned.new(token, active_opts)
466
+
467
+ when *UPTokens::Age; node << UP::Age.new(token, active_opts)
468
+ when *UPTokens::Derived; node << UP::Derived.new(token, active_opts)
469
+ when *UPTokens::Emoji; node << UP::Emoji.new(token, active_opts)
470
+ when *UPTokens::Enumerated; node << UP::Enumerated.new(token, active_opts)
471
+ when *UPTokens::Script; node << UP::Script.new(token, active_opts)
472
+ when *UPTokens::UnicodeBlock; node << UP::Block.new(token, active_opts)
474
473
 
475
474
  else
476
475
  raise UnknownTokenError.new('UnicodeProperty', token)
@@ -478,8 +477,7 @@ class Regexp::Parser
478
477
  end
479
478
 
480
479
  def quantifier(token)
481
- target_node = node.expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
482
- target_node or raise ParserError, "No valid target found for '#{token.text}'"
480
+ target_node = node.extract_quantifier_target(token.text)
483
481
 
484
482
  # in case of chained quantifiers, wrap target in an implicit passive group
485
483
  # description of the problem: https://github.com/ammar/regexp_parser/issues/3
@@ -527,6 +525,8 @@ class Regexp::Parser
527
525
  end
528
526
 
529
527
  def open_set(token)
528
+ # TODO: this and Quantifier are the only cases where Expression#token
529
+ # does not match the scanner/lexer output. Fix in v3.0.0.
530
530
  token.token = :character
531
531
  nest(CharacterSet.new(token, active_opts))
532
532
  end
@@ -575,22 +575,22 @@ class Regexp::Parser
575
575
  options_stack.last
576
576
  end
577
577
 
578
- # Assigns referenced expressions to refering expressions, e.g. if there is
578
+ # Assigns referenced expressions to referring expressions, e.g. if there is
579
579
  # an instance of Backreference::Number, its #referenced_expression is set to
580
580
  # the instance of Group::Capture that it refers to via its number.
581
581
  def assign_referenced_expressions
582
- # find all referencable and refering expressions
582
+ # find all referenceable and referring expressions
583
583
  targets = { 0 => root }
584
584
  referrers = []
585
585
  root.each_expression do |exp|
586
586
  exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
587
587
  referrers << exp if exp.referential?
588
588
  end
589
- # assign reference expression to refering expressions
589
+ # assign reference expression to referring expressions
590
590
  # (in a second iteration because there might be forward references)
591
591
  referrers.each do |exp|
592
592
  exp.referenced_expression = targets[exp.reference] ||
593
- raise(ParserError, "Invalid reference: #{exp.reference}")
593
+ raise(ParserError, "Invalid reference #{exp.reference} at pos #{exp.ts}")
594
594
  end
595
595
  end
596
596
  end # module Regexp::Parser
@@ -0,0 +1,8 @@
1
+ class Regexp::Scanner
2
+ # Unexpected end of pattern
3
+ class PrematureEndError < ScannerError
4
+ def initialize(where = '')
5
+ super "Premature end of pattern at #{where}"
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,6 @@
1
+ require_relative '../../../regexp_parser/error'
2
+
3
+ class Regexp::Scanner
4
+ # General scanner error (catch all)
5
+ class ScannerError < Regexp::Parser::Error; end
6
+ end