regexp_parser 2.7.0 → 2.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +5 -5
- data/LICENSE +1 -1
- data/lib/regexp_parser/expression/base.rb +0 -7
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/backreference.rb +4 -6
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
- data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -14
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
- data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
- data/lib/regexp_parser/expression/classes/group.rb +0 -22
- data/lib/regexp_parser/expression/classes/keep.rb +1 -1
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
- data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
- data/lib/regexp_parser/expression/methods/construct.rb +2 -4
- data/lib/regexp_parser/expression/methods/negative.rb +20 -0
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/tests.rb +40 -3
- data/lib/regexp_parser/expression/methods/traverse.rb +33 -20
- data/lib/regexp_parser/expression/quantifier.rb +30 -17
- data/lib/regexp_parser/expression/sequence.rb +5 -9
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +37 -24
- data/lib/regexp_parser/expression/subexpression.rb +20 -18
- data/lib/regexp_parser/expression.rb +34 -31
- data/lib/regexp_parser/lexer.rb +15 -7
- data/lib/regexp_parser/parser.rb +91 -91
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +29 -0
- data/lib/regexp_parser/scanner/properties/short.csv +3 -0
- data/lib/regexp_parser/scanner/property.rl +1 -1
- data/lib/regexp_parser/scanner/scanner.rl +44 -130
- data/lib/regexp_parser/scanner.rb +1096 -1297
- data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
- data/lib/regexp_parser/syntax/token/escape.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +9 -2
- data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +13 -13
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +6 -6
- data/regexp_parser.gemspec +5 -5
- metadata +14 -8
- data/CHANGELOG.md +0 -632
- data/README.md +0 -503
@@ -5,21 +5,16 @@ module Regexp::Expression
|
|
5
5
|
alias :operands :expressions
|
6
6
|
alias :operator :text
|
7
7
|
|
8
|
-
def
|
9
|
-
expressions.first.
|
8
|
+
def ts
|
9
|
+
(head = expressions.first) ? head.ts : @ts
|
10
10
|
end
|
11
|
-
alias :ts :starts_at
|
12
11
|
|
13
12
|
def <<(exp)
|
14
13
|
expressions.last << exp
|
15
14
|
end
|
16
15
|
|
17
|
-
def add_sequence(active_opts = {})
|
18
|
-
self.class::OPERAND.add_to(self,
|
19
|
-
end
|
20
|
-
|
21
|
-
def parts
|
22
|
-
intersperse(expressions, text.dup)
|
16
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
17
|
+
self.class::OPERAND.add_to(self, params, active_opts)
|
23
18
|
end
|
24
19
|
end
|
25
20
|
end
|
@@ -8,7 +8,8 @@ module Regexp::Expression
|
|
8
8
|
|
9
9
|
attr_accessor :type, :token, :text, :ts, :te,
|
10
10
|
:level, :set_level, :conditional_level,
|
11
|
-
:options
|
11
|
+
:options, :parent,
|
12
|
+
:custom_to_s_handling, :pre_quantifier_decorations
|
12
13
|
|
13
14
|
attr_reader :nesting_level, :quantifier
|
14
15
|
end
|
@@ -32,6 +33,10 @@ module Regexp::Expression
|
|
32
33
|
self.text = orig.text.dup if orig.text
|
33
34
|
self.options = orig.options.dup if orig.options
|
34
35
|
self.quantifier = orig.quantifier.clone if orig.quantifier
|
36
|
+
self.parent = nil # updated by Subexpression#initialize_copy
|
37
|
+
if orig.pre_quantifier_decorations
|
38
|
+
self.pre_quantifier_decorations = orig.pre_quantifier_decorations.map(&:dup)
|
39
|
+
end
|
35
40
|
super
|
36
41
|
end
|
37
42
|
|
@@ -39,35 +44,51 @@ module Regexp::Expression
|
|
39
44
|
ts
|
40
45
|
end
|
41
46
|
|
47
|
+
def ends_at(include_quantifier = true)
|
48
|
+
ts + (include_quantifier ? full_length : base_length)
|
49
|
+
end
|
50
|
+
|
42
51
|
def base_length
|
43
52
|
to_s(:base).length
|
44
53
|
end
|
45
54
|
|
46
55
|
def full_length
|
47
|
-
to_s.length
|
48
|
-
end
|
49
|
-
|
56
|
+
to_s(:original).length
|
57
|
+
end
|
58
|
+
|
59
|
+
# #to_s reproduces the original source, as an unparser would.
|
60
|
+
#
|
61
|
+
# It takes an optional format argument.
|
62
|
+
#
|
63
|
+
# Example:
|
64
|
+
#
|
65
|
+
# lit = Regexp::Parser.parse(/a +/x)[0]
|
66
|
+
#
|
67
|
+
# lit.to_s # => 'a+' # default; with quantifier
|
68
|
+
# lit.to_s(:full) # => 'a+' # default; with quantifier
|
69
|
+
# lit.to_s(:base) # => 'a' # without quantifier
|
70
|
+
# lit.to_s(:original) # => 'a +' # with quantifier AND intermittent decorations
|
71
|
+
#
|
50
72
|
def to_s(format = :full)
|
51
|
-
|
73
|
+
base = parts.each_with_object(''.dup) do |part, buff|
|
74
|
+
if part.instance_of?(String)
|
75
|
+
buff << part
|
76
|
+
elsif !part.custom_to_s_handling
|
77
|
+
buff << part.to_s(:original)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
"#{base}#{pre_quantifier_decoration(format)}#{quantifier_affix(format)}"
|
52
81
|
end
|
53
82
|
alias :to_str :to_s
|
54
83
|
|
55
|
-
def
|
56
|
-
|
84
|
+
def pre_quantifier_decoration(expression_format = :original)
|
85
|
+
pre_quantifier_decorations.to_a.join if expression_format == :original
|
57
86
|
end
|
58
87
|
|
59
|
-
def quantifier_affix(expression_format)
|
88
|
+
def quantifier_affix(expression_format = :full)
|
60
89
|
quantifier.to_s if quantified? && expression_format != :base
|
61
90
|
end
|
62
91
|
|
63
|
-
def quantified?
|
64
|
-
!quantifier.nil?
|
65
|
-
end
|
66
|
-
|
67
|
-
def optional?
|
68
|
-
quantified? && quantifier.min == 0
|
69
|
-
end
|
70
|
-
|
71
92
|
def offset
|
72
93
|
[starts_at, full_length]
|
73
94
|
end
|
@@ -76,14 +97,6 @@ module Regexp::Expression
|
|
76
97
|
'@%d+%d' % offset
|
77
98
|
end
|
78
99
|
|
79
|
-
def terminal?
|
80
|
-
true # overridden to be false in Expression::Subexpression
|
81
|
-
end
|
82
|
-
|
83
|
-
def referential?
|
84
|
-
false # overridden to be true e.g. in Expression::Backreference::Base
|
85
|
-
end
|
86
|
-
|
87
100
|
def nesting_level=(lvl)
|
88
101
|
@nesting_level = lvl
|
89
102
|
quantifier && quantifier.nesting_level = lvl
|
@@ -11,16 +11,15 @@ module Regexp::Expression
|
|
11
11
|
|
12
12
|
# Override base method to clone the expressions as well.
|
13
13
|
def initialize_copy(orig)
|
14
|
-
self.expressions = orig.expressions.map
|
14
|
+
self.expressions = orig.expressions.map do |exp|
|
15
|
+
exp.clone.tap { |copy| copy.parent = self }
|
16
|
+
end
|
15
17
|
super
|
16
18
|
end
|
17
19
|
|
18
20
|
def <<(exp)
|
19
|
-
|
20
|
-
|
21
|
-
else
|
22
|
-
expressions << exp
|
23
|
-
end
|
21
|
+
exp.parent = self
|
22
|
+
expressions << exp
|
24
23
|
end
|
25
24
|
|
26
25
|
%w[[] at each empty? fetch index join last length values_at].each do |method|
|
@@ -38,11 +37,7 @@ module Regexp::Expression
|
|
38
37
|
end
|
39
38
|
|
40
39
|
def te
|
41
|
-
ts +
|
42
|
-
end
|
43
|
-
|
44
|
-
def parts
|
45
|
-
expressions
|
40
|
+
ts + base_length
|
46
41
|
end
|
47
42
|
|
48
43
|
def to_h
|
@@ -52,14 +47,21 @@ module Regexp::Expression
|
|
52
47
|
)
|
53
48
|
end
|
54
49
|
|
55
|
-
def
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
50
|
+
def extract_quantifier_target(quantifier_description)
|
51
|
+
pre_quantifier_decorations = []
|
52
|
+
target = expressions.reverse.find do |exp|
|
53
|
+
if exp.decorative?
|
54
|
+
exp.custom_to_s_handling = true
|
55
|
+
pre_quantifier_decorations << exp.text
|
56
|
+
next
|
57
|
+
end
|
58
|
+
exp
|
59
|
+
end
|
60
|
+
target or raise Regexp::Parser::ParserError,
|
61
|
+
"No valid target found for '#{quantifier_description}' quantifier"
|
60
62
|
|
61
|
-
|
62
|
-
|
63
|
+
target.pre_quantifier_decorations = pre_quantifier_decorations
|
64
|
+
target
|
63
65
|
end
|
64
66
|
end
|
65
67
|
end
|
@@ -1,34 +1,37 @@
|
|
1
|
-
|
1
|
+
require_relative 'error'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
3
|
+
require_relative 'expression/shared'
|
4
|
+
require_relative 'expression/base'
|
5
|
+
require_relative 'expression/quantifier'
|
6
|
+
require_relative 'expression/subexpression'
|
7
|
+
require_relative 'expression/sequence'
|
8
|
+
require_relative 'expression/sequence_operation'
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
10
|
+
require_relative 'expression/classes/alternation'
|
11
|
+
require_relative 'expression/classes/anchor'
|
12
|
+
require_relative 'expression/classes/backreference'
|
13
|
+
require_relative 'expression/classes/character_set'
|
14
|
+
require_relative 'expression/classes/character_set/intersection'
|
15
|
+
require_relative 'expression/classes/character_set/range'
|
16
|
+
require_relative 'expression/classes/character_type'
|
17
|
+
require_relative 'expression/classes/conditional'
|
18
|
+
require_relative 'expression/classes/escape_sequence'
|
19
|
+
require_relative 'expression/classes/free_space'
|
20
|
+
require_relative 'expression/classes/group'
|
21
|
+
require_relative 'expression/classes/keep'
|
22
|
+
require_relative 'expression/classes/literal'
|
23
|
+
require_relative 'expression/classes/posix_class'
|
24
|
+
require_relative 'expression/classes/root'
|
25
|
+
require_relative 'expression/classes/unicode_property'
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
27
|
+
require_relative 'expression/methods/construct'
|
28
|
+
require_relative 'expression/methods/human_name'
|
29
|
+
require_relative 'expression/methods/match'
|
30
|
+
require_relative 'expression/methods/match_length'
|
31
|
+
require_relative 'expression/methods/negative'
|
32
|
+
require_relative 'expression/methods/options'
|
33
|
+
require_relative 'expression/methods/parts'
|
34
|
+
require_relative 'expression/methods/printing'
|
35
|
+
require_relative 'expression/methods/strfregexp'
|
36
|
+
require_relative 'expression/methods/tests'
|
37
|
+
require_relative 'expression/methods/traverse'
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -6,7 +6,7 @@ class Regexp::Lexer
|
|
6
6
|
|
7
7
|
OPENING_TOKENS = %i[
|
8
8
|
capture passive lookahead nlookahead lookbehind nlookbehind
|
9
|
-
atomic options options_switch named absence
|
9
|
+
atomic options options_switch named absence open
|
10
10
|
].freeze
|
11
11
|
|
12
12
|
CLOSING_TOKENS = %i[close].freeze
|
@@ -89,24 +89,32 @@ class Regexp::Lexer
|
|
89
89
|
:nesting, :set_nesting, :conditional_nesting, :shift
|
90
90
|
|
91
91
|
def ascend(type, token)
|
92
|
+
return unless CLOSING_TOKENS.include?(token)
|
93
|
+
|
92
94
|
case type
|
93
95
|
when :group, :assertion
|
94
|
-
self.nesting = nesting - 1
|
96
|
+
self.nesting = nesting - 1
|
95
97
|
when :set
|
96
|
-
self.set_nesting = set_nesting - 1
|
98
|
+
self.set_nesting = set_nesting - 1
|
97
99
|
when :conditional
|
98
|
-
self.conditional_nesting = conditional_nesting - 1
|
100
|
+
self.conditional_nesting = conditional_nesting - 1
|
101
|
+
else
|
102
|
+
raise "unhandled nesting type #{type}"
|
99
103
|
end
|
100
104
|
end
|
101
105
|
|
102
106
|
def descend(type, token)
|
107
|
+
return unless OPENING_TOKENS.include?(token)
|
108
|
+
|
103
109
|
case type
|
104
110
|
when :group, :assertion
|
105
|
-
self.nesting = nesting + 1
|
111
|
+
self.nesting = nesting + 1
|
106
112
|
when :set
|
107
|
-
self.set_nesting = set_nesting + 1
|
113
|
+
self.set_nesting = set_nesting + 1
|
108
114
|
when :conditional
|
109
|
-
self.conditional_nesting = conditional_nesting + 1
|
115
|
+
self.conditional_nesting = conditional_nesting + 1
|
116
|
+
else
|
117
|
+
raise "unhandled nesting type #{type}"
|
110
118
|
end
|
111
119
|
end
|
112
120
|
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative 'error'
|
2
|
+
require_relative 'expression'
|
3
3
|
|
4
4
|
class Regexp::Parser
|
5
5
|
include Regexp::Expression
|
@@ -232,7 +232,7 @@ class Regexp::Parser
|
|
232
232
|
node << Backreference::NameRecursionLevel.new(token, active_opts)
|
233
233
|
when :name_call
|
234
234
|
node << Backreference::NameCall.new(token, active_opts)
|
235
|
-
when :number, :number_ref
|
235
|
+
when :number, :number_ref # TODO: split in v3.0.0
|
236
236
|
node << Backreference::Number.new(token, active_opts)
|
237
237
|
when :number_recursion_ref
|
238
238
|
node << Backreference::NumberRecursionLevel.new(token, active_opts).tap do |exp|
|
@@ -272,9 +272,9 @@ class Regexp::Parser
|
|
272
272
|
nest_conditional(Conditional::Expression.new(token, active_opts))
|
273
273
|
when :condition
|
274
274
|
conditional_nesting.last.condition = Conditional::Condition.new(token, active_opts)
|
275
|
-
conditional_nesting.last.add_sequence(active_opts)
|
275
|
+
conditional_nesting.last.add_sequence(active_opts, { ts: token.te })
|
276
276
|
when :separator
|
277
|
-
conditional_nesting.last.add_sequence(active_opts)
|
277
|
+
conditional_nesting.last.add_sequence(active_opts, { ts: token.te })
|
278
278
|
self.node = conditional_nesting.last.branches.last
|
279
279
|
when :close
|
280
280
|
conditional_nesting.pop
|
@@ -322,6 +322,7 @@ class Regexp::Parser
|
|
322
322
|
|
323
323
|
when :control
|
324
324
|
if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
|
325
|
+
# TODO: emit :meta_control_sequence token in v3.0.0
|
325
326
|
node << EscapeSequence::MetaControl.new(token, active_opts)
|
326
327
|
else
|
327
328
|
node << EscapeSequence::Control.new(token, active_opts)
|
@@ -329,6 +330,7 @@ class Regexp::Parser
|
|
329
330
|
|
330
331
|
when :meta_sequence
|
331
332
|
if token.text =~ /\A\\M-\\[Cc]/
|
333
|
+
# TODO: emit :meta_control_sequence token in v3.0.0:
|
332
334
|
node << EscapeSequence::MetaControl.new(token, active_opts)
|
333
335
|
else
|
334
336
|
node << EscapeSequence::Meta.new(token, active_opts)
|
@@ -349,11 +351,7 @@ class Regexp::Parser
|
|
349
351
|
when :comment
|
350
352
|
node << Comment.new(token, active_opts)
|
351
353
|
when :whitespace
|
352
|
-
|
353
|
-
node.last.merge(WhiteSpace.new(token, active_opts))
|
354
|
-
else
|
355
|
-
node << WhiteSpace.new(token, active_opts)
|
356
|
-
end
|
354
|
+
node << WhiteSpace.new(token, active_opts)
|
357
355
|
else
|
358
356
|
raise UnknownTokenError.new('FreeSpace', token)
|
359
357
|
end
|
@@ -381,96 +379,97 @@ class Regexp::Parser
|
|
381
379
|
def sequence_operation(klass, token)
|
382
380
|
unless node.instance_of?(klass)
|
383
381
|
operator = klass.new(token, active_opts)
|
384
|
-
sequence = operator.add_sequence(active_opts)
|
382
|
+
sequence = operator.add_sequence(active_opts, { ts: token.ts })
|
385
383
|
sequence.expressions = node.expressions
|
386
384
|
node.expressions = []
|
387
385
|
nest(operator)
|
388
386
|
end
|
389
|
-
node.add_sequence(active_opts)
|
387
|
+
node.add_sequence(active_opts, { ts: token.te })
|
390
388
|
end
|
391
389
|
|
392
390
|
def posixclass(token)
|
393
391
|
node << PosixClass.new(token, active_opts)
|
394
392
|
end
|
395
393
|
|
396
|
-
|
397
|
-
UPTokens = Regexp::Syntax::Token::
|
394
|
+
UP = Regexp::Expression::Property
|
395
|
+
UPTokens = Regexp::Syntax::Token::Property
|
398
396
|
|
399
397
|
def property(token)
|
400
398
|
case token.token
|
401
|
-
when :alnum; node << Alnum.new(token, active_opts)
|
402
|
-
when :alpha; node << Alpha.new(token, active_opts)
|
403
|
-
when :ascii; node << Ascii.new(token, active_opts)
|
404
|
-
when :blank; node << Blank.new(token, active_opts)
|
405
|
-
when :cntrl; node << Cntrl.new(token, active_opts)
|
406
|
-
when :digit; node << Digit.new(token, active_opts)
|
407
|
-
when :graph; node << Graph.new(token, active_opts)
|
408
|
-
when :lower; node << Lower.new(token, active_opts)
|
409
|
-
when :print; node << Print.new(token, active_opts)
|
410
|
-
when :punct; node << Punct.new(token, active_opts)
|
411
|
-
when :space; node << Space.new(token, active_opts)
|
412
|
-
when :upper; node << Upper.new(token, active_opts)
|
413
|
-
when :word; node << Word.new(token, active_opts)
|
414
|
-
when :xdigit; node << Xdigit.new(token, active_opts)
|
415
|
-
when :xposixpunct; node << XPosixPunct.new(token, active_opts)
|
399
|
+
when :alnum; node << UP::Alnum.new(token, active_opts)
|
400
|
+
when :alpha; node << UP::Alpha.new(token, active_opts)
|
401
|
+
when :ascii; node << UP::Ascii.new(token, active_opts)
|
402
|
+
when :blank; node << UP::Blank.new(token, active_opts)
|
403
|
+
when :cntrl; node << UP::Cntrl.new(token, active_opts)
|
404
|
+
when :digit; node << UP::Digit.new(token, active_opts)
|
405
|
+
when :graph; node << UP::Graph.new(token, active_opts)
|
406
|
+
when :lower; node << UP::Lower.new(token, active_opts)
|
407
|
+
when :print; node << UP::Print.new(token, active_opts)
|
408
|
+
when :punct; node << UP::Punct.new(token, active_opts)
|
409
|
+
when :space; node << UP::Space.new(token, active_opts)
|
410
|
+
when :upper; node << UP::Upper.new(token, active_opts)
|
411
|
+
when :word; node << UP::Word.new(token, active_opts)
|
412
|
+
when :xdigit; node << UP::Xdigit.new(token, active_opts)
|
413
|
+
when :xposixpunct; node << UP::XPosixPunct.new(token, active_opts)
|
416
414
|
|
417
415
|
# only in Oniguruma (old rubies)
|
418
|
-
when :newline; node << Newline.new(token, active_opts)
|
419
|
-
|
420
|
-
when :any; node << Any.new(token, active_opts)
|
421
|
-
when :assigned; node << Assigned.new(token, active_opts)
|
422
|
-
|
423
|
-
when :letter; node << Letter::Any.new(token, active_opts)
|
424
|
-
when :cased_letter; node << Letter::Cased.new(token, active_opts)
|
425
|
-
when :uppercase_letter; node << Letter::Uppercase.new(token, active_opts)
|
426
|
-
when :lowercase_letter; node << Letter::Lowercase.new(token, active_opts)
|
427
|
-
when :titlecase_letter; node << Letter::Titlecase.new(token, active_opts)
|
428
|
-
when :modifier_letter; node << Letter::Modifier.new(token, active_opts)
|
429
|
-
when :other_letter; node << Letter::Other.new(token, active_opts)
|
430
|
-
|
431
|
-
when :mark; node << Mark::Any.new(token, active_opts)
|
432
|
-
when :combining_mark; node << Mark::Combining.new(token, active_opts)
|
433
|
-
when :nonspacing_mark; node << Mark::Nonspacing.new(token, active_opts)
|
434
|
-
when :spacing_mark; node << Mark::Spacing.new(token, active_opts)
|
435
|
-
when :enclosing_mark; node << Mark::Enclosing.new(token, active_opts)
|
436
|
-
|
437
|
-
when :number; node << Number::Any.new(token, active_opts)
|
438
|
-
when :decimal_number; node << Number::Decimal.new(token, active_opts)
|
439
|
-
when :letter_number; node << Number::Letter.new(token, active_opts)
|
440
|
-
when :other_number; node << Number::Other.new(token, active_opts)
|
441
|
-
|
442
|
-
when :punctuation; node << Punctuation::Any.new(token, active_opts)
|
443
|
-
when :connector_punctuation; node << Punctuation::Connector.new(token, active_opts)
|
444
|
-
when :dash_punctuation; node << Punctuation::Dash.new(token, active_opts)
|
445
|
-
when :open_punctuation; node << Punctuation::Open.new(token, active_opts)
|
446
|
-
when :close_punctuation; node << Punctuation::Close.new(token, active_opts)
|
447
|
-
when :initial_punctuation; node << Punctuation::Initial.new(token, active_opts)
|
448
|
-
when :final_punctuation; node << Punctuation::Final.new(token, active_opts)
|
449
|
-
when :other_punctuation; node << Punctuation::Other.new(token, active_opts)
|
450
|
-
|
451
|
-
when :separator; node << Separator::Any.new(token, active_opts)
|
452
|
-
when :space_separator; node << Separator::Space.new(token, active_opts)
|
453
|
-
when :line_separator; node << Separator::Line.new(token, active_opts)
|
454
|
-
when :paragraph_separator; node << Separator::Paragraph.new(token, active_opts)
|
455
|
-
|
456
|
-
when :symbol; node << Symbol::Any.new(token, active_opts)
|
457
|
-
when :math_symbol; node << Symbol::Math.new(token, active_opts)
|
458
|
-
when :currency_symbol; node << Symbol::Currency.new(token, active_opts)
|
459
|
-
when :modifier_symbol; node << Symbol::Modifier.new(token, active_opts)
|
460
|
-
when :other_symbol; node << Symbol::Other.new(token, active_opts)
|
461
|
-
|
462
|
-
when :other; node << Codepoint::Any.new(token, active_opts)
|
463
|
-
when :control; node << Codepoint::Control.new(token, active_opts)
|
464
|
-
when :format; node << Codepoint::Format.new(token, active_opts)
|
465
|
-
when :surrogate; node << Codepoint::Surrogate.new(token, active_opts)
|
466
|
-
when :private_use; node << Codepoint::PrivateUse.new(token, active_opts)
|
467
|
-
when :unassigned; node << Codepoint::Unassigned.new(token, active_opts)
|
468
|
-
|
469
|
-
when *UPTokens::Age; node << Age.new(token, active_opts)
|
470
|
-
when *UPTokens::Derived; node << Derived.new(token, active_opts)
|
471
|
-
when *UPTokens::Emoji; node << Emoji.new(token, active_opts)
|
472
|
-
when *UPTokens::
|
473
|
-
when *UPTokens::
|
416
|
+
when :newline; node << UP::Newline.new(token, active_opts)
|
417
|
+
|
418
|
+
when :any; node << UP::Any.new(token, active_opts)
|
419
|
+
when :assigned; node << UP::Assigned.new(token, active_opts)
|
420
|
+
|
421
|
+
when :letter; node << UP::Letter::Any.new(token, active_opts)
|
422
|
+
when :cased_letter; node << UP::Letter::Cased.new(token, active_opts)
|
423
|
+
when :uppercase_letter; node << UP::Letter::Uppercase.new(token, active_opts)
|
424
|
+
when :lowercase_letter; node << UP::Letter::Lowercase.new(token, active_opts)
|
425
|
+
when :titlecase_letter; node << UP::Letter::Titlecase.new(token, active_opts)
|
426
|
+
when :modifier_letter; node << UP::Letter::Modifier.new(token, active_opts)
|
427
|
+
when :other_letter; node << UP::Letter::Other.new(token, active_opts)
|
428
|
+
|
429
|
+
when :mark; node << UP::Mark::Any.new(token, active_opts)
|
430
|
+
when :combining_mark; node << UP::Mark::Combining.new(token, active_opts)
|
431
|
+
when :nonspacing_mark; node << UP::Mark::Nonspacing.new(token, active_opts)
|
432
|
+
when :spacing_mark; node << UP::Mark::Spacing.new(token, active_opts)
|
433
|
+
when :enclosing_mark; node << UP::Mark::Enclosing.new(token, active_opts)
|
434
|
+
|
435
|
+
when :number; node << UP::Number::Any.new(token, active_opts)
|
436
|
+
when :decimal_number; node << UP::Number::Decimal.new(token, active_opts)
|
437
|
+
when :letter_number; node << UP::Number::Letter.new(token, active_opts)
|
438
|
+
when :other_number; node << UP::Number::Other.new(token, active_opts)
|
439
|
+
|
440
|
+
when :punctuation; node << UP::Punctuation::Any.new(token, active_opts)
|
441
|
+
when :connector_punctuation; node << UP::Punctuation::Connector.new(token, active_opts)
|
442
|
+
when :dash_punctuation; node << UP::Punctuation::Dash.new(token, active_opts)
|
443
|
+
when :open_punctuation; node << UP::Punctuation::Open.new(token, active_opts)
|
444
|
+
when :close_punctuation; node << UP::Punctuation::Close.new(token, active_opts)
|
445
|
+
when :initial_punctuation; node << UP::Punctuation::Initial.new(token, active_opts)
|
446
|
+
when :final_punctuation; node << UP::Punctuation::Final.new(token, active_opts)
|
447
|
+
when :other_punctuation; node << UP::Punctuation::Other.new(token, active_opts)
|
448
|
+
|
449
|
+
when :separator; node << UP::Separator::Any.new(token, active_opts)
|
450
|
+
when :space_separator; node << UP::Separator::Space.new(token, active_opts)
|
451
|
+
when :line_separator; node << UP::Separator::Line.new(token, active_opts)
|
452
|
+
when :paragraph_separator; node << UP::Separator::Paragraph.new(token, active_opts)
|
453
|
+
|
454
|
+
when :symbol; node << UP::Symbol::Any.new(token, active_opts)
|
455
|
+
when :math_symbol; node << UP::Symbol::Math.new(token, active_opts)
|
456
|
+
when :currency_symbol; node << UP::Symbol::Currency.new(token, active_opts)
|
457
|
+
when :modifier_symbol; node << UP::Symbol::Modifier.new(token, active_opts)
|
458
|
+
when :other_symbol; node << UP::Symbol::Other.new(token, active_opts)
|
459
|
+
|
460
|
+
when :other; node << UP::Codepoint::Any.new(token, active_opts)
|
461
|
+
when :control; node << UP::Codepoint::Control.new(token, active_opts)
|
462
|
+
when :format; node << UP::Codepoint::Format.new(token, active_opts)
|
463
|
+
when :surrogate; node << UP::Codepoint::Surrogate.new(token, active_opts)
|
464
|
+
when :private_use; node << UP::Codepoint::PrivateUse.new(token, active_opts)
|
465
|
+
when :unassigned; node << UP::Codepoint::Unassigned.new(token, active_opts)
|
466
|
+
|
467
|
+
when *UPTokens::Age; node << UP::Age.new(token, active_opts)
|
468
|
+
when *UPTokens::Derived; node << UP::Derived.new(token, active_opts)
|
469
|
+
when *UPTokens::Emoji; node << UP::Emoji.new(token, active_opts)
|
470
|
+
when *UPTokens::Enumerated; node << UP::Enumerated.new(token, active_opts)
|
471
|
+
when *UPTokens::Script; node << UP::Script.new(token, active_opts)
|
472
|
+
when *UPTokens::UnicodeBlock; node << UP::Block.new(token, active_opts)
|
474
473
|
|
475
474
|
else
|
476
475
|
raise UnknownTokenError.new('UnicodeProperty', token)
|
@@ -478,8 +477,7 @@ class Regexp::Parser
|
|
478
477
|
end
|
479
478
|
|
480
479
|
def quantifier(token)
|
481
|
-
target_node = node.
|
482
|
-
target_node or raise ParserError, "No valid target found for '#{token.text}'"
|
480
|
+
target_node = node.extract_quantifier_target(token.text)
|
483
481
|
|
484
482
|
# in case of chained quantifiers, wrap target in an implicit passive group
|
485
483
|
# description of the problem: https://github.com/ammar/regexp_parser/issues/3
|
@@ -527,6 +525,8 @@ class Regexp::Parser
|
|
527
525
|
end
|
528
526
|
|
529
527
|
def open_set(token)
|
528
|
+
# TODO: this and Quantifier are the only cases where Expression#token
|
529
|
+
# does not match the scanner/lexer output. Fix in v3.0.0.
|
530
530
|
token.token = :character
|
531
531
|
nest(CharacterSet.new(token, active_opts))
|
532
532
|
end
|
@@ -575,22 +575,22 @@ class Regexp::Parser
|
|
575
575
|
options_stack.last
|
576
576
|
end
|
577
577
|
|
578
|
-
# Assigns referenced expressions to
|
578
|
+
# Assigns referenced expressions to referring expressions, e.g. if there is
|
579
579
|
# an instance of Backreference::Number, its #referenced_expression is set to
|
580
580
|
# the instance of Group::Capture that it refers to via its number.
|
581
581
|
def assign_referenced_expressions
|
582
|
-
# find all
|
582
|
+
# find all referenceable and referring expressions
|
583
583
|
targets = { 0 => root }
|
584
584
|
referrers = []
|
585
585
|
root.each_expression do |exp|
|
586
586
|
exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
|
587
587
|
referrers << exp if exp.referential?
|
588
588
|
end
|
589
|
-
# assign reference expression to
|
589
|
+
# assign reference expression to referring expressions
|
590
590
|
# (in a second iteration because there might be forward references)
|
591
591
|
referrers.each do |exp|
|
592
592
|
exp.referenced_expression = targets[exp.reference] ||
|
593
|
-
raise(ParserError, "Invalid reference
|
593
|
+
raise(ParserError, "Invalid reference #{exp.reference} at pos #{exp.ts}")
|
594
594
|
end
|
595
595
|
end
|
596
596
|
end # module Regexp::Parser
|