regexp_parser 2.7.0 → 2.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +5 -5
- data/LICENSE +1 -1
- data/lib/regexp_parser/expression/base.rb +0 -7
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/backreference.rb +4 -6
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
- data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -14
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
- data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
- data/lib/regexp_parser/expression/classes/group.rb +0 -22
- data/lib/regexp_parser/expression/classes/keep.rb +1 -1
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
- data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
- data/lib/regexp_parser/expression/methods/construct.rb +2 -4
- data/lib/regexp_parser/expression/methods/negative.rb +20 -0
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/tests.rb +40 -3
- data/lib/regexp_parser/expression/methods/traverse.rb +33 -20
- data/lib/regexp_parser/expression/quantifier.rb +30 -17
- data/lib/regexp_parser/expression/sequence.rb +5 -9
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +37 -24
- data/lib/regexp_parser/expression/subexpression.rb +20 -18
- data/lib/regexp_parser/expression.rb +34 -31
- data/lib/regexp_parser/lexer.rb +15 -7
- data/lib/regexp_parser/parser.rb +91 -91
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +29 -0
- data/lib/regexp_parser/scanner/properties/short.csv +3 -0
- data/lib/regexp_parser/scanner/property.rl +1 -1
- data/lib/regexp_parser/scanner/scanner.rl +44 -130
- data/lib/regexp_parser/scanner.rb +1096 -1297
- data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
- data/lib/regexp_parser/syntax/token/escape.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +9 -2
- data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +13 -13
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +6 -6
- data/regexp_parser.gemspec +5 -5
- metadata +14 -8
- data/CHANGELOG.md +0 -632
- data/README.md +0 -503
@@ -5,21 +5,16 @@ module Regexp::Expression
|
|
5
5
|
alias :operands :expressions
|
6
6
|
alias :operator :text
|
7
7
|
|
8
|
-
def
|
9
|
-
expressions.first.
|
8
|
+
def ts
|
9
|
+
(head = expressions.first) ? head.ts : @ts
|
10
10
|
end
|
11
|
-
alias :ts :starts_at
|
12
11
|
|
13
12
|
def <<(exp)
|
14
13
|
expressions.last << exp
|
15
14
|
end
|
16
15
|
|
17
|
-
def add_sequence(active_opts = {})
|
18
|
-
self.class::OPERAND.add_to(self,
|
19
|
-
end
|
20
|
-
|
21
|
-
def parts
|
22
|
-
intersperse(expressions, text.dup)
|
16
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
17
|
+
self.class::OPERAND.add_to(self, params, active_opts)
|
23
18
|
end
|
24
19
|
end
|
25
20
|
end
|
@@ -8,7 +8,8 @@ module Regexp::Expression
|
|
8
8
|
|
9
9
|
attr_accessor :type, :token, :text, :ts, :te,
|
10
10
|
:level, :set_level, :conditional_level,
|
11
|
-
:options
|
11
|
+
:options, :parent,
|
12
|
+
:custom_to_s_handling, :pre_quantifier_decorations
|
12
13
|
|
13
14
|
attr_reader :nesting_level, :quantifier
|
14
15
|
end
|
@@ -32,6 +33,10 @@ module Regexp::Expression
|
|
32
33
|
self.text = orig.text.dup if orig.text
|
33
34
|
self.options = orig.options.dup if orig.options
|
34
35
|
self.quantifier = orig.quantifier.clone if orig.quantifier
|
36
|
+
self.parent = nil # updated by Subexpression#initialize_copy
|
37
|
+
if orig.pre_quantifier_decorations
|
38
|
+
self.pre_quantifier_decorations = orig.pre_quantifier_decorations.map(&:dup)
|
39
|
+
end
|
35
40
|
super
|
36
41
|
end
|
37
42
|
|
@@ -39,35 +44,51 @@ module Regexp::Expression
|
|
39
44
|
ts
|
40
45
|
end
|
41
46
|
|
47
|
+
def ends_at(include_quantifier = true)
|
48
|
+
ts + (include_quantifier ? full_length : base_length)
|
49
|
+
end
|
50
|
+
|
42
51
|
def base_length
|
43
52
|
to_s(:base).length
|
44
53
|
end
|
45
54
|
|
46
55
|
def full_length
|
47
|
-
to_s.length
|
48
|
-
end
|
49
|
-
|
56
|
+
to_s(:original).length
|
57
|
+
end
|
58
|
+
|
59
|
+
# #to_s reproduces the original source, as an unparser would.
|
60
|
+
#
|
61
|
+
# It takes an optional format argument.
|
62
|
+
#
|
63
|
+
# Example:
|
64
|
+
#
|
65
|
+
# lit = Regexp::Parser.parse(/a +/x)[0]
|
66
|
+
#
|
67
|
+
# lit.to_s # => 'a+' # default; with quantifier
|
68
|
+
# lit.to_s(:full) # => 'a+' # default; with quantifier
|
69
|
+
# lit.to_s(:base) # => 'a' # without quantifier
|
70
|
+
# lit.to_s(:original) # => 'a +' # with quantifier AND intermittent decorations
|
71
|
+
#
|
50
72
|
def to_s(format = :full)
|
51
|
-
|
73
|
+
base = parts.each_with_object(''.dup) do |part, buff|
|
74
|
+
if part.instance_of?(String)
|
75
|
+
buff << part
|
76
|
+
elsif !part.custom_to_s_handling
|
77
|
+
buff << part.to_s(:original)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
"#{base}#{pre_quantifier_decoration(format)}#{quantifier_affix(format)}"
|
52
81
|
end
|
53
82
|
alias :to_str :to_s
|
54
83
|
|
55
|
-
def
|
56
|
-
|
84
|
+
def pre_quantifier_decoration(expression_format = :original)
|
85
|
+
pre_quantifier_decorations.to_a.join if expression_format == :original
|
57
86
|
end
|
58
87
|
|
59
|
-
def quantifier_affix(expression_format)
|
88
|
+
def quantifier_affix(expression_format = :full)
|
60
89
|
quantifier.to_s if quantified? && expression_format != :base
|
61
90
|
end
|
62
91
|
|
63
|
-
def quantified?
|
64
|
-
!quantifier.nil?
|
65
|
-
end
|
66
|
-
|
67
|
-
def optional?
|
68
|
-
quantified? && quantifier.min == 0
|
69
|
-
end
|
70
|
-
|
71
92
|
def offset
|
72
93
|
[starts_at, full_length]
|
73
94
|
end
|
@@ -76,14 +97,6 @@ module Regexp::Expression
|
|
76
97
|
'@%d+%d' % offset
|
77
98
|
end
|
78
99
|
|
79
|
-
def terminal?
|
80
|
-
true # overridden to be false in Expression::Subexpression
|
81
|
-
end
|
82
|
-
|
83
|
-
def referential?
|
84
|
-
false # overridden to be true e.g. in Expression::Backreference::Base
|
85
|
-
end
|
86
|
-
|
87
100
|
def nesting_level=(lvl)
|
88
101
|
@nesting_level = lvl
|
89
102
|
quantifier && quantifier.nesting_level = lvl
|
@@ -11,16 +11,15 @@ module Regexp::Expression
|
|
11
11
|
|
12
12
|
# Override base method to clone the expressions as well.
|
13
13
|
def initialize_copy(orig)
|
14
|
-
self.expressions = orig.expressions.map
|
14
|
+
self.expressions = orig.expressions.map do |exp|
|
15
|
+
exp.clone.tap { |copy| copy.parent = self }
|
16
|
+
end
|
15
17
|
super
|
16
18
|
end
|
17
19
|
|
18
20
|
def <<(exp)
|
19
|
-
|
20
|
-
|
21
|
-
else
|
22
|
-
expressions << exp
|
23
|
-
end
|
21
|
+
exp.parent = self
|
22
|
+
expressions << exp
|
24
23
|
end
|
25
24
|
|
26
25
|
%w[[] at each empty? fetch index join last length values_at].each do |method|
|
@@ -38,11 +37,7 @@ module Regexp::Expression
|
|
38
37
|
end
|
39
38
|
|
40
39
|
def te
|
41
|
-
ts +
|
42
|
-
end
|
43
|
-
|
44
|
-
def parts
|
45
|
-
expressions
|
40
|
+
ts + base_length
|
46
41
|
end
|
47
42
|
|
48
43
|
def to_h
|
@@ -52,14 +47,21 @@ module Regexp::Expression
|
|
52
47
|
)
|
53
48
|
end
|
54
49
|
|
55
|
-
def
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
50
|
+
def extract_quantifier_target(quantifier_description)
|
51
|
+
pre_quantifier_decorations = []
|
52
|
+
target = expressions.reverse.find do |exp|
|
53
|
+
if exp.decorative?
|
54
|
+
exp.custom_to_s_handling = true
|
55
|
+
pre_quantifier_decorations << exp.text
|
56
|
+
next
|
57
|
+
end
|
58
|
+
exp
|
59
|
+
end
|
60
|
+
target or raise Regexp::Parser::ParserError,
|
61
|
+
"No valid target found for '#{quantifier_description}' quantifier"
|
60
62
|
|
61
|
-
|
62
|
-
|
63
|
+
target.pre_quantifier_decorations = pre_quantifier_decorations
|
64
|
+
target
|
63
65
|
end
|
64
66
|
end
|
65
67
|
end
|
@@ -1,34 +1,37 @@
|
|
1
|
-
|
1
|
+
require_relative 'error'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
3
|
+
require_relative 'expression/shared'
|
4
|
+
require_relative 'expression/base'
|
5
|
+
require_relative 'expression/quantifier'
|
6
|
+
require_relative 'expression/subexpression'
|
7
|
+
require_relative 'expression/sequence'
|
8
|
+
require_relative 'expression/sequence_operation'
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
10
|
+
require_relative 'expression/classes/alternation'
|
11
|
+
require_relative 'expression/classes/anchor'
|
12
|
+
require_relative 'expression/classes/backreference'
|
13
|
+
require_relative 'expression/classes/character_set'
|
14
|
+
require_relative 'expression/classes/character_set/intersection'
|
15
|
+
require_relative 'expression/classes/character_set/range'
|
16
|
+
require_relative 'expression/classes/character_type'
|
17
|
+
require_relative 'expression/classes/conditional'
|
18
|
+
require_relative 'expression/classes/escape_sequence'
|
19
|
+
require_relative 'expression/classes/free_space'
|
20
|
+
require_relative 'expression/classes/group'
|
21
|
+
require_relative 'expression/classes/keep'
|
22
|
+
require_relative 'expression/classes/literal'
|
23
|
+
require_relative 'expression/classes/posix_class'
|
24
|
+
require_relative 'expression/classes/root'
|
25
|
+
require_relative 'expression/classes/unicode_property'
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
27
|
+
require_relative 'expression/methods/construct'
|
28
|
+
require_relative 'expression/methods/human_name'
|
29
|
+
require_relative 'expression/methods/match'
|
30
|
+
require_relative 'expression/methods/match_length'
|
31
|
+
require_relative 'expression/methods/negative'
|
32
|
+
require_relative 'expression/methods/options'
|
33
|
+
require_relative 'expression/methods/parts'
|
34
|
+
require_relative 'expression/methods/printing'
|
35
|
+
require_relative 'expression/methods/strfregexp'
|
36
|
+
require_relative 'expression/methods/tests'
|
37
|
+
require_relative 'expression/methods/traverse'
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -6,7 +6,7 @@ class Regexp::Lexer
|
|
6
6
|
|
7
7
|
OPENING_TOKENS = %i[
|
8
8
|
capture passive lookahead nlookahead lookbehind nlookbehind
|
9
|
-
atomic options options_switch named absence
|
9
|
+
atomic options options_switch named absence open
|
10
10
|
].freeze
|
11
11
|
|
12
12
|
CLOSING_TOKENS = %i[close].freeze
|
@@ -89,24 +89,32 @@ class Regexp::Lexer
|
|
89
89
|
:nesting, :set_nesting, :conditional_nesting, :shift
|
90
90
|
|
91
91
|
def ascend(type, token)
|
92
|
+
return unless CLOSING_TOKENS.include?(token)
|
93
|
+
|
92
94
|
case type
|
93
95
|
when :group, :assertion
|
94
|
-
self.nesting = nesting - 1
|
96
|
+
self.nesting = nesting - 1
|
95
97
|
when :set
|
96
|
-
self.set_nesting = set_nesting - 1
|
98
|
+
self.set_nesting = set_nesting - 1
|
97
99
|
when :conditional
|
98
|
-
self.conditional_nesting = conditional_nesting - 1
|
100
|
+
self.conditional_nesting = conditional_nesting - 1
|
101
|
+
else
|
102
|
+
raise "unhandled nesting type #{type}"
|
99
103
|
end
|
100
104
|
end
|
101
105
|
|
102
106
|
def descend(type, token)
|
107
|
+
return unless OPENING_TOKENS.include?(token)
|
108
|
+
|
103
109
|
case type
|
104
110
|
when :group, :assertion
|
105
|
-
self.nesting = nesting + 1
|
111
|
+
self.nesting = nesting + 1
|
106
112
|
when :set
|
107
|
-
self.set_nesting = set_nesting + 1
|
113
|
+
self.set_nesting = set_nesting + 1
|
108
114
|
when :conditional
|
109
|
-
self.conditional_nesting = conditional_nesting + 1
|
115
|
+
self.conditional_nesting = conditional_nesting + 1
|
116
|
+
else
|
117
|
+
raise "unhandled nesting type #{type}"
|
110
118
|
end
|
111
119
|
end
|
112
120
|
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative 'error'
|
2
|
+
require_relative 'expression'
|
3
3
|
|
4
4
|
class Regexp::Parser
|
5
5
|
include Regexp::Expression
|
@@ -232,7 +232,7 @@ class Regexp::Parser
|
|
232
232
|
node << Backreference::NameRecursionLevel.new(token, active_opts)
|
233
233
|
when :name_call
|
234
234
|
node << Backreference::NameCall.new(token, active_opts)
|
235
|
-
when :number, :number_ref
|
235
|
+
when :number, :number_ref # TODO: split in v3.0.0
|
236
236
|
node << Backreference::Number.new(token, active_opts)
|
237
237
|
when :number_recursion_ref
|
238
238
|
node << Backreference::NumberRecursionLevel.new(token, active_opts).tap do |exp|
|
@@ -272,9 +272,9 @@ class Regexp::Parser
|
|
272
272
|
nest_conditional(Conditional::Expression.new(token, active_opts))
|
273
273
|
when :condition
|
274
274
|
conditional_nesting.last.condition = Conditional::Condition.new(token, active_opts)
|
275
|
-
conditional_nesting.last.add_sequence(active_opts)
|
275
|
+
conditional_nesting.last.add_sequence(active_opts, { ts: token.te })
|
276
276
|
when :separator
|
277
|
-
conditional_nesting.last.add_sequence(active_opts)
|
277
|
+
conditional_nesting.last.add_sequence(active_opts, { ts: token.te })
|
278
278
|
self.node = conditional_nesting.last.branches.last
|
279
279
|
when :close
|
280
280
|
conditional_nesting.pop
|
@@ -322,6 +322,7 @@ class Regexp::Parser
|
|
322
322
|
|
323
323
|
when :control
|
324
324
|
if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
|
325
|
+
# TODO: emit :meta_control_sequence token in v3.0.0
|
325
326
|
node << EscapeSequence::MetaControl.new(token, active_opts)
|
326
327
|
else
|
327
328
|
node << EscapeSequence::Control.new(token, active_opts)
|
@@ -329,6 +330,7 @@ class Regexp::Parser
|
|
329
330
|
|
330
331
|
when :meta_sequence
|
331
332
|
if token.text =~ /\A\\M-\\[Cc]/
|
333
|
+
# TODO: emit :meta_control_sequence token in v3.0.0:
|
332
334
|
node << EscapeSequence::MetaControl.new(token, active_opts)
|
333
335
|
else
|
334
336
|
node << EscapeSequence::Meta.new(token, active_opts)
|
@@ -349,11 +351,7 @@ class Regexp::Parser
|
|
349
351
|
when :comment
|
350
352
|
node << Comment.new(token, active_opts)
|
351
353
|
when :whitespace
|
352
|
-
|
353
|
-
node.last.merge(WhiteSpace.new(token, active_opts))
|
354
|
-
else
|
355
|
-
node << WhiteSpace.new(token, active_opts)
|
356
|
-
end
|
354
|
+
node << WhiteSpace.new(token, active_opts)
|
357
355
|
else
|
358
356
|
raise UnknownTokenError.new('FreeSpace', token)
|
359
357
|
end
|
@@ -381,96 +379,97 @@ class Regexp::Parser
|
|
381
379
|
def sequence_operation(klass, token)
|
382
380
|
unless node.instance_of?(klass)
|
383
381
|
operator = klass.new(token, active_opts)
|
384
|
-
sequence = operator.add_sequence(active_opts)
|
382
|
+
sequence = operator.add_sequence(active_opts, { ts: token.ts })
|
385
383
|
sequence.expressions = node.expressions
|
386
384
|
node.expressions = []
|
387
385
|
nest(operator)
|
388
386
|
end
|
389
|
-
node.add_sequence(active_opts)
|
387
|
+
node.add_sequence(active_opts, { ts: token.te })
|
390
388
|
end
|
391
389
|
|
392
390
|
def posixclass(token)
|
393
391
|
node << PosixClass.new(token, active_opts)
|
394
392
|
end
|
395
393
|
|
396
|
-
|
397
|
-
UPTokens = Regexp::Syntax::Token::
|
394
|
+
UP = Regexp::Expression::Property
|
395
|
+
UPTokens = Regexp::Syntax::Token::Property
|
398
396
|
|
399
397
|
def property(token)
|
400
398
|
case token.token
|
401
|
-
when :alnum; node << Alnum.new(token, active_opts)
|
402
|
-
when :alpha; node << Alpha.new(token, active_opts)
|
403
|
-
when :ascii; node << Ascii.new(token, active_opts)
|
404
|
-
when :blank; node << Blank.new(token, active_opts)
|
405
|
-
when :cntrl; node << Cntrl.new(token, active_opts)
|
406
|
-
when :digit; node << Digit.new(token, active_opts)
|
407
|
-
when :graph; node << Graph.new(token, active_opts)
|
408
|
-
when :lower; node << Lower.new(token, active_opts)
|
409
|
-
when :print; node << Print.new(token, active_opts)
|
410
|
-
when :punct; node << Punct.new(token, active_opts)
|
411
|
-
when :space; node << Space.new(token, active_opts)
|
412
|
-
when :upper; node << Upper.new(token, active_opts)
|
413
|
-
when :word; node << Word.new(token, active_opts)
|
414
|
-
when :xdigit; node << Xdigit.new(token, active_opts)
|
415
|
-
when :xposixpunct; node << XPosixPunct.new(token, active_opts)
|
399
|
+
when :alnum; node << UP::Alnum.new(token, active_opts)
|
400
|
+
when :alpha; node << UP::Alpha.new(token, active_opts)
|
401
|
+
when :ascii; node << UP::Ascii.new(token, active_opts)
|
402
|
+
when :blank; node << UP::Blank.new(token, active_opts)
|
403
|
+
when :cntrl; node << UP::Cntrl.new(token, active_opts)
|
404
|
+
when :digit; node << UP::Digit.new(token, active_opts)
|
405
|
+
when :graph; node << UP::Graph.new(token, active_opts)
|
406
|
+
when :lower; node << UP::Lower.new(token, active_opts)
|
407
|
+
when :print; node << UP::Print.new(token, active_opts)
|
408
|
+
when :punct; node << UP::Punct.new(token, active_opts)
|
409
|
+
when :space; node << UP::Space.new(token, active_opts)
|
410
|
+
when :upper; node << UP::Upper.new(token, active_opts)
|
411
|
+
when :word; node << UP::Word.new(token, active_opts)
|
412
|
+
when :xdigit; node << UP::Xdigit.new(token, active_opts)
|
413
|
+
when :xposixpunct; node << UP::XPosixPunct.new(token, active_opts)
|
416
414
|
|
417
415
|
# only in Oniguruma (old rubies)
|
418
|
-
when :newline; node << Newline.new(token, active_opts)
|
419
|
-
|
420
|
-
when :any; node << Any.new(token, active_opts)
|
421
|
-
when :assigned; node << Assigned.new(token, active_opts)
|
422
|
-
|
423
|
-
when :letter; node << Letter::Any.new(token, active_opts)
|
424
|
-
when :cased_letter; node << Letter::Cased.new(token, active_opts)
|
425
|
-
when :uppercase_letter; node << Letter::Uppercase.new(token, active_opts)
|
426
|
-
when :lowercase_letter; node << Letter::Lowercase.new(token, active_opts)
|
427
|
-
when :titlecase_letter; node << Letter::Titlecase.new(token, active_opts)
|
428
|
-
when :modifier_letter; node << Letter::Modifier.new(token, active_opts)
|
429
|
-
when :other_letter; node << Letter::Other.new(token, active_opts)
|
430
|
-
|
431
|
-
when :mark; node << Mark::Any.new(token, active_opts)
|
432
|
-
when :combining_mark; node << Mark::Combining.new(token, active_opts)
|
433
|
-
when :nonspacing_mark; node << Mark::Nonspacing.new(token, active_opts)
|
434
|
-
when :spacing_mark; node << Mark::Spacing.new(token, active_opts)
|
435
|
-
when :enclosing_mark; node << Mark::Enclosing.new(token, active_opts)
|
436
|
-
|
437
|
-
when :number; node << Number::Any.new(token, active_opts)
|
438
|
-
when :decimal_number; node << Number::Decimal.new(token, active_opts)
|
439
|
-
when :letter_number; node << Number::Letter.new(token, active_opts)
|
440
|
-
when :other_number; node << Number::Other.new(token, active_opts)
|
441
|
-
|
442
|
-
when :punctuation; node << Punctuation::Any.new(token, active_opts)
|
443
|
-
when :connector_punctuation; node << Punctuation::Connector.new(token, active_opts)
|
444
|
-
when :dash_punctuation; node << Punctuation::Dash.new(token, active_opts)
|
445
|
-
when :open_punctuation; node << Punctuation::Open.new(token, active_opts)
|
446
|
-
when :close_punctuation; node << Punctuation::Close.new(token, active_opts)
|
447
|
-
when :initial_punctuation; node << Punctuation::Initial.new(token, active_opts)
|
448
|
-
when :final_punctuation; node << Punctuation::Final.new(token, active_opts)
|
449
|
-
when :other_punctuation; node << Punctuation::Other.new(token, active_opts)
|
450
|
-
|
451
|
-
when :separator; node << Separator::Any.new(token, active_opts)
|
452
|
-
when :space_separator; node << Separator::Space.new(token, active_opts)
|
453
|
-
when :line_separator; node << Separator::Line.new(token, active_opts)
|
454
|
-
when :paragraph_separator; node << Separator::Paragraph.new(token, active_opts)
|
455
|
-
|
456
|
-
when :symbol; node << Symbol::Any.new(token, active_opts)
|
457
|
-
when :math_symbol; node << Symbol::Math.new(token, active_opts)
|
458
|
-
when :currency_symbol; node << Symbol::Currency.new(token, active_opts)
|
459
|
-
when :modifier_symbol; node << Symbol::Modifier.new(token, active_opts)
|
460
|
-
when :other_symbol; node << Symbol::Other.new(token, active_opts)
|
461
|
-
|
462
|
-
when :other; node << Codepoint::Any.new(token, active_opts)
|
463
|
-
when :control; node << Codepoint::Control.new(token, active_opts)
|
464
|
-
when :format; node << Codepoint::Format.new(token, active_opts)
|
465
|
-
when :surrogate; node << Codepoint::Surrogate.new(token, active_opts)
|
466
|
-
when :private_use; node << Codepoint::PrivateUse.new(token, active_opts)
|
467
|
-
when :unassigned; node << Codepoint::Unassigned.new(token, active_opts)
|
468
|
-
|
469
|
-
when *UPTokens::Age; node << Age.new(token, active_opts)
|
470
|
-
when *UPTokens::Derived; node << Derived.new(token, active_opts)
|
471
|
-
when *UPTokens::Emoji; node << Emoji.new(token, active_opts)
|
472
|
-
when *UPTokens::
|
473
|
-
when *UPTokens::
|
416
|
+
when :newline; node << UP::Newline.new(token, active_opts)
|
417
|
+
|
418
|
+
when :any; node << UP::Any.new(token, active_opts)
|
419
|
+
when :assigned; node << UP::Assigned.new(token, active_opts)
|
420
|
+
|
421
|
+
when :letter; node << UP::Letter::Any.new(token, active_opts)
|
422
|
+
when :cased_letter; node << UP::Letter::Cased.new(token, active_opts)
|
423
|
+
when :uppercase_letter; node << UP::Letter::Uppercase.new(token, active_opts)
|
424
|
+
when :lowercase_letter; node << UP::Letter::Lowercase.new(token, active_opts)
|
425
|
+
when :titlecase_letter; node << UP::Letter::Titlecase.new(token, active_opts)
|
426
|
+
when :modifier_letter; node << UP::Letter::Modifier.new(token, active_opts)
|
427
|
+
when :other_letter; node << UP::Letter::Other.new(token, active_opts)
|
428
|
+
|
429
|
+
when :mark; node << UP::Mark::Any.new(token, active_opts)
|
430
|
+
when :combining_mark; node << UP::Mark::Combining.new(token, active_opts)
|
431
|
+
when :nonspacing_mark; node << UP::Mark::Nonspacing.new(token, active_opts)
|
432
|
+
when :spacing_mark; node << UP::Mark::Spacing.new(token, active_opts)
|
433
|
+
when :enclosing_mark; node << UP::Mark::Enclosing.new(token, active_opts)
|
434
|
+
|
435
|
+
when :number; node << UP::Number::Any.new(token, active_opts)
|
436
|
+
when :decimal_number; node << UP::Number::Decimal.new(token, active_opts)
|
437
|
+
when :letter_number; node << UP::Number::Letter.new(token, active_opts)
|
438
|
+
when :other_number; node << UP::Number::Other.new(token, active_opts)
|
439
|
+
|
440
|
+
when :punctuation; node << UP::Punctuation::Any.new(token, active_opts)
|
441
|
+
when :connector_punctuation; node << UP::Punctuation::Connector.new(token, active_opts)
|
442
|
+
when :dash_punctuation; node << UP::Punctuation::Dash.new(token, active_opts)
|
443
|
+
when :open_punctuation; node << UP::Punctuation::Open.new(token, active_opts)
|
444
|
+
when :close_punctuation; node << UP::Punctuation::Close.new(token, active_opts)
|
445
|
+
when :initial_punctuation; node << UP::Punctuation::Initial.new(token, active_opts)
|
446
|
+
when :final_punctuation; node << UP::Punctuation::Final.new(token, active_opts)
|
447
|
+
when :other_punctuation; node << UP::Punctuation::Other.new(token, active_opts)
|
448
|
+
|
449
|
+
when :separator; node << UP::Separator::Any.new(token, active_opts)
|
450
|
+
when :space_separator; node << UP::Separator::Space.new(token, active_opts)
|
451
|
+
when :line_separator; node << UP::Separator::Line.new(token, active_opts)
|
452
|
+
when :paragraph_separator; node << UP::Separator::Paragraph.new(token, active_opts)
|
453
|
+
|
454
|
+
when :symbol; node << UP::Symbol::Any.new(token, active_opts)
|
455
|
+
when :math_symbol; node << UP::Symbol::Math.new(token, active_opts)
|
456
|
+
when :currency_symbol; node << UP::Symbol::Currency.new(token, active_opts)
|
457
|
+
when :modifier_symbol; node << UP::Symbol::Modifier.new(token, active_opts)
|
458
|
+
when :other_symbol; node << UP::Symbol::Other.new(token, active_opts)
|
459
|
+
|
460
|
+
when :other; node << UP::Codepoint::Any.new(token, active_opts)
|
461
|
+
when :control; node << UP::Codepoint::Control.new(token, active_opts)
|
462
|
+
when :format; node << UP::Codepoint::Format.new(token, active_opts)
|
463
|
+
when :surrogate; node << UP::Codepoint::Surrogate.new(token, active_opts)
|
464
|
+
when :private_use; node << UP::Codepoint::PrivateUse.new(token, active_opts)
|
465
|
+
when :unassigned; node << UP::Codepoint::Unassigned.new(token, active_opts)
|
466
|
+
|
467
|
+
when *UPTokens::Age; node << UP::Age.new(token, active_opts)
|
468
|
+
when *UPTokens::Derived; node << UP::Derived.new(token, active_opts)
|
469
|
+
when *UPTokens::Emoji; node << UP::Emoji.new(token, active_opts)
|
470
|
+
when *UPTokens::Enumerated; node << UP::Enumerated.new(token, active_opts)
|
471
|
+
when *UPTokens::Script; node << UP::Script.new(token, active_opts)
|
472
|
+
when *UPTokens::UnicodeBlock; node << UP::Block.new(token, active_opts)
|
474
473
|
|
475
474
|
else
|
476
475
|
raise UnknownTokenError.new('UnicodeProperty', token)
|
@@ -478,8 +477,7 @@ class Regexp::Parser
|
|
478
477
|
end
|
479
478
|
|
480
479
|
def quantifier(token)
|
481
|
-
target_node = node.
|
482
|
-
target_node or raise ParserError, "No valid target found for '#{token.text}'"
|
480
|
+
target_node = node.extract_quantifier_target(token.text)
|
483
481
|
|
484
482
|
# in case of chained quantifiers, wrap target in an implicit passive group
|
485
483
|
# description of the problem: https://github.com/ammar/regexp_parser/issues/3
|
@@ -527,6 +525,8 @@ class Regexp::Parser
|
|
527
525
|
end
|
528
526
|
|
529
527
|
def open_set(token)
|
528
|
+
# TODO: this and Quantifier are the only cases where Expression#token
|
529
|
+
# does not match the scanner/lexer output. Fix in v3.0.0.
|
530
530
|
token.token = :character
|
531
531
|
nest(CharacterSet.new(token, active_opts))
|
532
532
|
end
|
@@ -575,22 +575,22 @@ class Regexp::Parser
|
|
575
575
|
options_stack.last
|
576
576
|
end
|
577
577
|
|
578
|
-
# Assigns referenced expressions to
|
578
|
+
# Assigns referenced expressions to referring expressions, e.g. if there is
|
579
579
|
# an instance of Backreference::Number, its #referenced_expression is set to
|
580
580
|
# the instance of Group::Capture that it refers to via its number.
|
581
581
|
def assign_referenced_expressions
|
582
|
-
# find all
|
582
|
+
# find all referenceable and referring expressions
|
583
583
|
targets = { 0 => root }
|
584
584
|
referrers = []
|
585
585
|
root.each_expression do |exp|
|
586
586
|
exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
|
587
587
|
referrers << exp if exp.referential?
|
588
588
|
end
|
589
|
-
# assign reference expression to
|
589
|
+
# assign reference expression to referring expressions
|
590
590
|
# (in a second iteration because there might be forward references)
|
591
591
|
referrers.each do |exp|
|
592
592
|
exp.referenced_expression = targets[exp.reference] ||
|
593
|
-
raise(ParserError, "Invalid reference
|
593
|
+
raise(ParserError, "Invalid reference #{exp.reference} at pos #{exp.ts}")
|
594
594
|
end
|
595
595
|
end
|
596
596
|
end # module Regexp::Parser
|