regexp_parser 2.1.1 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +94 -6
- data/Gemfile +2 -1
- data/LICENSE +1 -1
- data/README.md +40 -30
- data/Rakefile +6 -70
- data/lib/regexp_parser/error.rb +1 -1
- data/lib/regexp_parser/expression/base.rb +75 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +1 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -2
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +2 -2
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
- data/lib/regexp_parser/expression/classes/group.rb +6 -6
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/root.rb +3 -6
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -2
- data/lib/regexp_parser/expression/methods/construct.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +10 -1
- data/lib/regexp_parser/expression/quantifier.rb +41 -23
- data/lib/regexp_parser/expression/sequence.rb +9 -24
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
- data/lib/regexp_parser/expression/shared.rb +85 -0
- data/lib/regexp_parser/expression/subexpression.rb +11 -8
- data/lib/regexp_parser/expression.rb +10 -132
- data/lib/regexp_parser/lexer.rb +8 -6
- data/lib/regexp_parser/parser.rb +21 -72
- data/lib/regexp_parser/scanner/properties/long.csv +622 -0
- data/lib/regexp_parser/scanner/properties/short.csv +246 -0
- data/lib/regexp_parser/scanner/property.rl +1 -1
- data/lib/regexp_parser/scanner/scanner.rl +48 -35
- data/lib/regexp_parser/scanner.rb +735 -801
- data/lib/regexp_parser/syntax/any.rb +2 -7
- data/lib/regexp_parser/syntax/base.rb +91 -66
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +717 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +20 -22
- metadata +37 -166
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -104
- data/spec/expression/clone_spec.rb +0 -152
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -108
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/delimiters_spec.rb +0 -68
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -64
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -60
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/options_spec.rb +0 -28
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -68
- data/spec/parser/refcalls_spec.rb +0 -117
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/delimiters_spec.rb +0 -52
- data/spec/scanner/errors_spec.rb +0 -67
- data/spec/scanner/escapes_spec.rb +0 -64
- data/spec/scanner/free_space_spec.rb +0 -165
- data/spec/scanner/groups_spec.rb +0 -61
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -39
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/options_spec.rb +0 -36
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -25
- data/spec/scanner/refcalls_spec.rb +0 -55
- data/spec/scanner/sets_spec.rb +0 -151
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -16
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
data/lib/regexp_parser/parser.rb
CHANGED
@@ -23,7 +23,7 @@ class Regexp::Parser
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
|
26
|
-
root = Root.
|
26
|
+
root = Root.construct(options: extract_options(input, options))
|
27
27
|
|
28
28
|
self.root = root
|
29
29
|
self.node = root
|
@@ -39,6 +39,9 @@ class Regexp::Parser
|
|
39
39
|
parse_token(token)
|
40
40
|
end
|
41
41
|
|
42
|
+
# Trigger recursive setting of #nesting_level, which reflects how deep
|
43
|
+
# a node is in the tree. Do this at the end to account for tree rewrites.
|
44
|
+
root.nesting_level = 0
|
42
45
|
assign_referenced_expressions
|
43
46
|
|
44
47
|
if block_given?
|
@@ -197,11 +200,11 @@ class Regexp::Parser
|
|
197
200
|
end
|
198
201
|
|
199
202
|
def captured_group_count_at_level
|
200
|
-
captured_group_counts[node
|
203
|
+
captured_group_counts[node]
|
201
204
|
end
|
202
205
|
|
203
206
|
def count_captured_group
|
204
|
-
captured_group_counts[node
|
207
|
+
captured_group_counts[node] += 1
|
205
208
|
end
|
206
209
|
|
207
210
|
def close_group
|
@@ -286,17 +289,9 @@ class Regexp::Parser
|
|
286
289
|
def nest(exp)
|
287
290
|
nesting.push(exp)
|
288
291
|
node << exp
|
289
|
-
update_transplanted_subtree(exp, node)
|
290
292
|
self.node = exp
|
291
293
|
end
|
292
294
|
|
293
|
-
# subtrees are transplanted to build Alternations, Intersections, Ranges
|
294
|
-
def update_transplanted_subtree(exp, new_parent)
|
295
|
-
exp.nesting_level = new_parent.nesting_level + 1
|
296
|
-
exp.respond_to?(:each) &&
|
297
|
-
exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
|
298
|
-
end
|
299
|
-
|
300
295
|
def escape(token)
|
301
296
|
case token.token
|
302
297
|
|
@@ -480,79 +475,33 @@ class Regexp::Parser
|
|
480
475
|
# description of the problem: https://github.com/ammar/regexp_parser/issues/3
|
481
476
|
# rationale for this solution: https://github.com/ammar/regexp_parser/pull/69
|
482
477
|
if target_node.quantified?
|
483
|
-
|
484
|
-
:
|
485
|
-
:
|
486
|
-
|
487
|
-
target_node.
|
488
|
-
|
489
|
-
|
490
|
-
target_node.set_level,
|
491
|
-
target_node.conditional_level
|
478
|
+
new_group = Group::Passive.construct(
|
479
|
+
token: :passive,
|
480
|
+
ts: target_node.ts,
|
481
|
+
level: target_node.level,
|
482
|
+
set_level: target_node.set_level,
|
483
|
+
conditional_level: target_node.conditional_level,
|
484
|
+
options: active_opts,
|
492
485
|
)
|
493
|
-
new_group = Group::Passive.new(new_token, active_opts)
|
494
486
|
new_group.implicit = true
|
495
487
|
new_group << target_node
|
496
|
-
|
488
|
+
increase_group_level(target_node)
|
497
489
|
node.expressions[node.expressions.index(target_node)] = new_group
|
498
490
|
target_node = new_group
|
499
491
|
end
|
500
492
|
|
501
|
-
|
502
|
-
|
503
|
-
target_node.quantify(:zero_or_one, token.text, 0, 1, :greedy)
|
504
|
-
when :zero_or_one_reluctant
|
505
|
-
target_node.quantify(:zero_or_one, token.text, 0, 1, :reluctant)
|
506
|
-
when :zero_or_one_possessive
|
507
|
-
target_node.quantify(:zero_or_one, token.text, 0, 1, :possessive)
|
508
|
-
|
509
|
-
when :zero_or_more
|
510
|
-
target_node.quantify(:zero_or_more, token.text, 0, -1, :greedy)
|
511
|
-
when :zero_or_more_reluctant
|
512
|
-
target_node.quantify(:zero_or_more, token.text, 0, -1, :reluctant)
|
513
|
-
when :zero_or_more_possessive
|
514
|
-
target_node.quantify(:zero_or_more, token.text, 0, -1, :possessive)
|
515
|
-
|
516
|
-
when :one_or_more
|
517
|
-
target_node.quantify(:one_or_more, token.text, 1, -1, :greedy)
|
518
|
-
when :one_or_more_reluctant
|
519
|
-
target_node.quantify(:one_or_more, token.text, 1, -1, :reluctant)
|
520
|
-
when :one_or_more_possessive
|
521
|
-
target_node.quantify(:one_or_more, token.text, 1, -1, :possessive)
|
522
|
-
|
523
|
-
when :interval
|
524
|
-
interval(target_node, token)
|
525
|
-
|
526
|
-
else
|
493
|
+
unless token.token =~ /\A(?:zero_or_one|zero_or_more|one_or_more|interval)
|
494
|
+
(?:_greedy|_reluctant|_possessive)?\z/x
|
527
495
|
raise UnknownTokenError.new('Quantifier', token)
|
528
496
|
end
|
497
|
+
|
498
|
+
target_node.quantify(token, active_opts)
|
529
499
|
end
|
530
500
|
|
531
|
-
def
|
501
|
+
def increase_group_level(exp)
|
532
502
|
exp.level += 1
|
533
|
-
exp.
|
534
|
-
|
535
|
-
|
536
|
-
def interval(target_node, token)
|
537
|
-
text = token.text
|
538
|
-
mchr = text[text.length-1].chr =~ /[?+]/ ? text[text.length-1].chr : nil
|
539
|
-
case mchr
|
540
|
-
when '?'
|
541
|
-
range_text = text[0...-1]
|
542
|
-
mode = :reluctant
|
543
|
-
when '+'
|
544
|
-
range_text = text[0...-1]
|
545
|
-
mode = :possessive
|
546
|
-
else
|
547
|
-
range_text = text
|
548
|
-
mode = :greedy
|
549
|
-
end
|
550
|
-
|
551
|
-
range = range_text.gsub(/\{|\}/, '').split(',', 2)
|
552
|
-
min = range[0].empty? ? 0 : range[0]
|
553
|
-
max = range[1] ? (range[1].empty? ? -1 : range[1]) : min
|
554
|
-
|
555
|
-
target_node.quantify(:interval, text, min.to_i, max.to_i, mode)
|
503
|
+
exp.quantifier.level += 1 if exp.quantifier
|
504
|
+
exp.terminal? || exp.each { |subexp| increase_group_level(subexp) }
|
556
505
|
end
|
557
506
|
|
558
507
|
def set(token)
|