regexp_parser 2.0.3 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -3
- data/Gemfile +5 -1
- data/README.md +1 -1
- data/Rakefile +6 -6
- data/lib/regexp_parser.rb +1 -0
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression.rb +1 -1
- data/lib/regexp_parser/expression/classes/backref.rb +5 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
- data/lib/regexp_parser/expression/classes/group.rb +6 -1
- data/lib/regexp_parser/expression/classes/property.rb +1 -1
- data/lib/regexp_parser/expression/classes/set/range.rb +2 -1
- data/lib/regexp_parser/expression/quantifier.rb +1 -1
- data/lib/regexp_parser/expression/sequence.rb +3 -9
- data/lib/regexp_parser/expression/subexpression.rb +1 -1
- data/lib/regexp_parser/parser.rb +281 -332
- data/lib/regexp_parser/scanner.rb +1015 -1003
- data/lib/regexp_parser/scanner/scanner.rl +53 -77
- data/lib/regexp_parser/syntax.rb +6 -6
- data/lib/regexp_parser/syntax/any.rb +1 -1
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/spec/expression/clone_spec.rb +36 -4
- data/spec/expression/free_space_spec.rb +2 -2
- data/spec/expression/methods/match_length_spec.rb +2 -2
- data/spec/lexer/refcalls_spec.rb +5 -0
- data/spec/parser/all_spec.rb +2 -2
- data/spec/parser/refcalls_spec.rb +5 -0
- data/spec/scanner/escapes_spec.rb +1 -1
- data/spec/scanner/refcalls_spec.rb +19 -0
- data/spec/scanner/sets_spec.rb +42 -11
- metadata +4 -3
@@ -20,7 +20,7 @@
|
|
20
20
|
|
21
21
|
group_open = '(';
|
22
22
|
group_close = ')';
|
23
|
-
|
23
|
+
parentheses = group_open | group_close;
|
24
24
|
|
25
25
|
set_open = '[';
|
26
26
|
set_close = ']';
|
@@ -58,6 +58,8 @@
|
|
58
58
|
|
59
59
|
meta_sequence = 'M-' . (backslash . ('c' | 'C-'))? . backslash? . any;
|
60
60
|
|
61
|
+
sequence_char = [CMcux];
|
62
|
+
|
61
63
|
zero_or_one = '?' | '??' | '?+';
|
62
64
|
zero_or_more = '*' | '*?' | '*+';
|
63
65
|
one_or_more = '+' | '+?' | '++';
|
@@ -106,11 +108,15 @@
|
|
106
108
|
|
107
109
|
group_named = ('?' . group_name );
|
108
110
|
|
109
|
-
|
110
|
-
|
111
|
+
group_name_backref = 'k' . (('<' . group_name_id_ab? . group_level? '>') |
|
112
|
+
("'" . group_name_id_sq? . group_level? "'"));
|
113
|
+
group_name_call = 'g' . (('<' . group_name_id_ab? . group_level? '>') |
|
114
|
+
("'" . group_name_id_sq? . group_level? "'"));
|
111
115
|
|
112
|
-
|
113
|
-
|
116
|
+
group_number_backref = 'k' . (('<' . group_number . group_level? '>') |
|
117
|
+
("'" . group_number . group_level? "'"));
|
118
|
+
group_number_call = 'g' . (('<' . ((group_number . group_level?) | '0') '>') |
|
119
|
+
("'" . ((group_number . group_level?) | '0') "'"));
|
114
120
|
|
115
121
|
group_type = group_atomic | group_passive | group_absence | group_named;
|
116
122
|
|
@@ -121,7 +127,7 @@
|
|
121
127
|
|
122
128
|
# characters that 'break' a literal
|
123
129
|
meta_char = dot | backslash | alternation |
|
124
|
-
curlies |
|
130
|
+
curlies | parentheses | brackets |
|
125
131
|
line_anchor | quantifier_greedy;
|
126
132
|
|
127
133
|
literal_delimiters = ']' | '}';
|
@@ -130,10 +136,12 @@
|
|
130
136
|
ascii_nonprint = (0x01..0x1f | 0x7f);
|
131
137
|
|
132
138
|
non_literal_escape = char_type_char | anchor_char | escaped_ascii |
|
133
|
-
keep_mark |
|
139
|
+
keep_mark | sequence_char;
|
140
|
+
|
141
|
+
# escapes that also work within a character set
|
142
|
+
set_escape = backslash | brackets | escaped_ascii | property_char |
|
143
|
+
sequence_char | single_codepoint_char_type;
|
134
144
|
|
135
|
-
non_set_escape = (anchor_char - 'b') | group_ref | keep_mark |
|
136
|
-
multi_codepoint_char_type | [0-9cCM];
|
137
145
|
|
138
146
|
# EOF error, used where it can be detected
|
139
147
|
action premature_end_error {
|
@@ -249,16 +257,16 @@
|
|
249
257
|
# set escapes scanner
|
250
258
|
# --------------------------------------------------------------------------
|
251
259
|
set_escape_sequence := |*
|
252
|
-
|
253
|
-
emit(:escape, :literal, copy(data, ts-1, te))
|
254
|
-
fret;
|
255
|
-
};
|
256
|
-
|
257
|
-
any > (escaped_set_alpha, 1) {
|
260
|
+
set_escape > (escaped_set_alpha, 2) {
|
258
261
|
fhold;
|
259
262
|
fnext character_set;
|
260
263
|
fcall escape_sequence;
|
261
264
|
};
|
265
|
+
|
266
|
+
any > (escaped_set_alpha, 1) {
|
267
|
+
emit(:escape, :literal, copy(data, ts-1, te))
|
268
|
+
fret;
|
269
|
+
};
|
262
270
|
*|;
|
263
271
|
|
264
272
|
|
@@ -538,67 +546,35 @@
|
|
538
546
|
|
539
547
|
# Group backreference, named and numbered
|
540
548
|
# ------------------------------------------------------------------------
|
541
|
-
backslash . (
|
549
|
+
backslash . (group_name_backref | group_number_backref) > (backslashed, 4) {
|
542
550
|
case text = copy(data, ts, te)
|
543
|
-
when /^\\(
|
544
|
-
validation_error(:backref, '
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
emit(:backref, :name_ref_sq, text)
|
558
|
-
else
|
559
|
-
emit(:backref, :name_call_sq, text)
|
560
|
-
end
|
561
|
-
|
562
|
-
when /^\\([gk])<\d+>/ # angle-brackets
|
563
|
-
if $1 == 'k'
|
564
|
-
emit(:backref, :number_ref_ab, text)
|
565
|
-
else
|
566
|
-
emit(:backref, :number_call_ab, text)
|
567
|
-
end
|
568
|
-
|
569
|
-
when /^\\([gk])'\d+'/ # single quotes
|
570
|
-
if $1 == 'k'
|
571
|
-
emit(:backref, :number_ref_sq, text)
|
572
|
-
else
|
573
|
-
emit(:backref, :number_call_sq, text)
|
574
|
-
end
|
575
|
-
|
576
|
-
when /^\\(?:g<\+|g<-|(k)<-)\d+>/ # angle-brackets
|
577
|
-
if $1 == 'k'
|
578
|
-
emit(:backref, :number_rel_ref_ab, text)
|
579
|
-
else
|
580
|
-
emit(:backref, :number_rel_call_ab, text)
|
581
|
-
end
|
582
|
-
|
583
|
-
when /^\\(?:g'\+|g'-|(k)'-)\d+'/ # single quotes
|
584
|
-
if $1 == 'k'
|
585
|
-
emit(:backref, :number_rel_ref_sq, text)
|
586
|
-
else
|
587
|
-
emit(:backref, :number_rel_call_sq, text)
|
588
|
-
end
|
589
|
-
|
590
|
-
when /^\\k<[^\p{digit}+\->][^>]*[+\-]\d+>/ # angle-brackets
|
591
|
-
emit(:backref, :name_recursion_ref_ab, text)
|
592
|
-
|
593
|
-
when /^\\k'[^\p{digit}+\-'][^']*[+\-]\d+'/ # single-quotes
|
594
|
-
emit(:backref, :name_recursion_ref_sq, text)
|
595
|
-
|
596
|
-
when /^\\([gk])<[+\-]?\d+[+\-]\d+>/ # angle-brackets
|
597
|
-
emit(:backref, :number_recursion_ref_ab, text)
|
598
|
-
|
599
|
-
when /^\\([gk])'[+\-]?\d+[+\-]\d+'/ # single-quotes
|
600
|
-
emit(:backref, :number_recursion_ref_sq, text)
|
551
|
+
when /^\\k(<>|'')/
|
552
|
+
validation_error(:backref, 'backreference', 'ref ID is empty')
|
553
|
+
when /^\\k(.)[^\p{digit}\-][^+\-]*\D$/
|
554
|
+
emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
|
555
|
+
when /^\\k(.)\d+\D$/
|
556
|
+
emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
|
557
|
+
when /^\\k(.)-\d+\D$/
|
558
|
+
emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
|
559
|
+
when /^\\k(.)[^\p{digit}\-].*[+\-]\d+\D$/
|
560
|
+
emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
|
561
|
+
when /^\\k(.)-?\d+[+\-]\d+\D$/
|
562
|
+
emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
|
563
|
+
end
|
564
|
+
};
|
601
565
|
|
566
|
+
# Group call, named and numbered
|
567
|
+
# ------------------------------------------------------------------------
|
568
|
+
backslash . (group_name_call | group_number_call) > (backslashed, 4) {
|
569
|
+
case text = copy(data, ts, te)
|
570
|
+
when /^\\g(<>|'')/
|
571
|
+
validation_error(:backref, 'subexpression call', 'ref ID is empty')
|
572
|
+
when /^\\g(.)[^\p{digit}+\->][^+\-]*/
|
573
|
+
emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
|
574
|
+
when /^\\g(.)\d+\D$/
|
575
|
+
emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
|
576
|
+
when /^\\g(.)[+-]\d+/
|
577
|
+
emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
|
602
578
|
end
|
603
579
|
};
|
604
580
|
|
@@ -677,10 +653,10 @@
|
|
677
653
|
|
678
654
|
class Regexp::Scanner
|
679
655
|
# General scanner error (catch all)
|
680
|
-
class ScannerError <
|
656
|
+
class ScannerError < Regexp::Parser::Error; end
|
681
657
|
|
682
658
|
# Base for all scanner validation errors
|
683
|
-
class ValidationError <
|
659
|
+
class ValidationError < Regexp::Parser::Error
|
684
660
|
def initialize(reason)
|
685
661
|
super reason
|
686
662
|
end
|
@@ -782,7 +758,7 @@ class Regexp::Scanner
|
|
782
758
|
|
783
759
|
# lazy-load property maps when first needed
|
784
760
|
require 'yaml'
|
785
|
-
PROP_MAPS_DIR = File.
|
761
|
+
PROP_MAPS_DIR = File.join(__dir__, 'scanner', 'properties')
|
786
762
|
|
787
763
|
def self.short_prop_map
|
788
764
|
@short_prop_map ||= YAML.load_file("#{PROP_MAPS_DIR}/short.yml")
|
data/lib/regexp_parser/syntax.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
module Regexp::Syntax
|
2
|
-
class SyntaxError <
|
2
|
+
class SyntaxError < Regexp::Parser::Error; end
|
3
3
|
end
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
require_relative 'syntax/tokens'
|
6
|
+
require_relative 'syntax/base'
|
7
|
+
require_relative 'syntax/any'
|
8
|
+
require_relative 'syntax/version_lookup'
|
9
|
+
require_relative 'syntax/versions'
|
@@ -27,8 +27,8 @@ RSpec.describe('Expression#clone') do
|
|
27
27
|
expect(root_2.quantifier.object_id).not_to eq copy_2.quantifier.object_id
|
28
28
|
|
29
29
|
# regression test
|
30
|
-
expect { root_2.clone }.not_to
|
31
|
-
expect { root_2.clone }.not_to
|
30
|
+
expect { root_2.clone }.not_to(change { root_2.quantifier.object_id })
|
31
|
+
expect { root_2.clone }.not_to(change { root_2.quantifier.text.object_id })
|
32
32
|
end
|
33
33
|
|
34
34
|
specify('Subexpression#clone') do
|
@@ -48,7 +48,7 @@ RSpec.describe('Expression#clone') do
|
|
48
48
|
end
|
49
49
|
|
50
50
|
# regression test
|
51
|
-
expect { root.clone }.not_to
|
51
|
+
expect { root.clone }.not_to(change { root.expressions.object_id })
|
52
52
|
end
|
53
53
|
|
54
54
|
specify('Group::Named#clone') do
|
@@ -69,7 +69,39 @@ RSpec.describe('Expression#clone') do
|
|
69
69
|
end
|
70
70
|
|
71
71
|
# regression test
|
72
|
-
expect { root_1.clone }.not_to
|
72
|
+
expect { root_1.clone }.not_to(change { root_1.name.object_id })
|
73
|
+
end
|
74
|
+
|
75
|
+
specify('Group::Options#clone') do
|
76
|
+
root = RP.parse('foo(?i)bar')
|
77
|
+
copy = root.clone
|
78
|
+
|
79
|
+
expect(copy.to_s).to eq root.to_s
|
80
|
+
|
81
|
+
root_1 = root[1]
|
82
|
+
copy_1 = copy[1]
|
83
|
+
|
84
|
+
expect(root_1.option_changes).to eq copy_1.option_changes
|
85
|
+
expect(root_1.option_changes.object_id).not_to eq copy_1.option_changes.object_id
|
86
|
+
|
87
|
+
# regression test
|
88
|
+
expect { root_1.clone }.not_to(change { root_1.option_changes.object_id })
|
89
|
+
end
|
90
|
+
|
91
|
+
specify('Backreference::Base#clone') do
|
92
|
+
root = RP.parse('(foo)\1')
|
93
|
+
copy = root.clone
|
94
|
+
|
95
|
+
expect(copy.to_s).to eq root.to_s
|
96
|
+
|
97
|
+
root_1 = root[1]
|
98
|
+
copy_1 = copy[1]
|
99
|
+
|
100
|
+
expect(root_1.referenced_expression.to_s).to eq copy_1.referenced_expression.to_s
|
101
|
+
expect(root_1.referenced_expression.object_id).not_to eq copy_1.referenced_expression.object_id
|
102
|
+
|
103
|
+
# regression test
|
104
|
+
expect { root_1.clone }.not_to(change { root_1.referenced_expression.object_id })
|
73
105
|
end
|
74
106
|
|
75
107
|
specify('Sequence#clone') do
|
@@ -10,7 +10,7 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
|
|
10
10
|
space = root[0]
|
11
11
|
|
12
12
|
expect(space).to be_instance_of(FreeSpace::WhiteSpace)
|
13
|
-
expect { space.quantify(:dummy, '#') }.to raise_error(
|
13
|
+
expect { space.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
|
14
14
|
end
|
15
15
|
|
16
16
|
specify('comment quantify raises error') do
|
@@ -22,6 +22,6 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
|
|
22
22
|
comment = root[3]
|
23
23
|
|
24
24
|
expect(comment).to be_instance_of(FreeSpace::Comment)
|
25
|
-
expect { comment.quantify(:dummy, '#') }.to raise_error(
|
25
|
+
expect { comment.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
|
26
26
|
end
|
27
27
|
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
|
4
|
-
ML = described_class
|
3
|
+
ML = Regexp::MatchLength
|
5
4
|
|
5
|
+
RSpec.describe(Regexp::MatchLength) do
|
6
6
|
specify('literal') { expect(ML.of(/a/).minmax).to eq [1, 1] }
|
7
7
|
specify('literal sequence') { expect(ML.of(/abc/).minmax).to eq [3, 3] }
|
8
8
|
specify('dot') { expect(ML.of(/./).minmax).to eq [1, 1] }
|
data/spec/lexer/refcalls_spec.rb
CHANGED
@@ -32,6 +32,11 @@ RSpec.describe('RefCall lexing') do
|
|
32
32
|
include_examples 'lex', "(abc)\\g'1'",
|
33
33
|
3 => [:backref, :number_call, "\\g'1'", 5, 10, 0, 0, 0]
|
34
34
|
|
35
|
+
include_examples 'lex', '\g<0>',
|
36
|
+
0 => [:backref, :number_call, '\g<0>', 0, 5, 0, 0, 0]
|
37
|
+
include_examples 'lex', "\\g'0'",
|
38
|
+
0 => [:backref, :number_call, "\\g'0'", 0, 5, 0, 0, 0]
|
39
|
+
|
35
40
|
include_examples 'lex', '(abc)\g<-1>',
|
36
41
|
3 => [:backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0, 0]
|
37
42
|
include_examples 'lex', "(abc)\\g'-1'",
|
data/spec/parser/all_spec.rb
CHANGED
@@ -34,10 +34,10 @@ RSpec.describe(Regexp::Parser) do
|
|
34
34
|
end
|
35
35
|
|
36
36
|
specify('parse no quantifier target raises error') do
|
37
|
-
expect { RP.parse('?abc') }.to raise_error(
|
37
|
+
expect { RP.parse('?abc') }.to raise_error(Regexp::Parser::Error)
|
38
38
|
end
|
39
39
|
|
40
40
|
specify('parse sequence no quantifier target raises error') do
|
41
|
-
expect { RP.parse('abc|?def') }.to raise_error(
|
41
|
+
expect { RP.parse('abc|?def') }.to raise_error(Regexp::Parser::Error)
|
42
42
|
end
|
43
43
|
end
|
@@ -29,6 +29,11 @@ RSpec.describe('Refcall parsing') do
|
|
29
29
|
include_examples 'parse', /(abc)\g'1'/,
|
30
30
|
1 => [:backref, :number_call, Backreference::NumberCall, number: 1]
|
31
31
|
|
32
|
+
include_examples 'parse', '\g<0>',
|
33
|
+
0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
|
34
|
+
include_examples 'parse', "\\g'0'",
|
35
|
+
0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
|
36
|
+
|
32
37
|
include_examples 'parse', /(abc)\g<-1>/,
|
33
38
|
1 => [:backref, :number_rel_call, Backreference::NumberCallRelative, number: -1]
|
34
39
|
include_examples 'parse', /(abc)\g'-1'/,
|
@@ -4,7 +4,7 @@ RSpec.describe('Escape scanning') do
|
|
4
4
|
include_examples 'scan', /c\at/, 1 => [:escape, :bell, '\a', 1, 3]
|
5
5
|
|
6
6
|
# not an escape outside a character set
|
7
|
-
include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1,
|
7
|
+
include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
|
8
8
|
|
9
9
|
include_examples 'scan', /c\ft/, 1 => [:escape, :form_feed, '\f', 1, 3]
|
10
10
|
include_examples 'scan', /c\nt/, 1 => [:escape, :newline, '\n', 1, 3]
|
@@ -5,9 +5,19 @@ RSpec.describe('RefCall scanning') do
|
|
5
5
|
include_examples 'scan', '(abc)\1' , 3 => [:backref, :number, '\1', 5, 7]
|
6
6
|
|
7
7
|
# Group back-references, named, numbered, and relative
|
8
|
+
#
|
9
|
+
# NOTE: only \g supports forward-looking references using '+', e.g. \g<+1>
|
10
|
+
# refers to the next group, but \k<+1> refers to a group named '+1'.
|
11
|
+
# Inversely, only \k supports addition or substraction of a recursion level.
|
12
|
+
# E.g. \k<x+0> refers to a group named 'x' at the current recursion level,
|
13
|
+
# but \g<x+0> refers to a a group named 'x+0'.
|
14
|
+
#
|
8
15
|
include_examples 'scan', '(?<X>abc)\k<X>', 3 => [:backref, :name_ref_ab, '\k<X>', 9, 14]
|
9
16
|
include_examples 'scan', "(?<X>abc)\\k'X'", 3 => [:backref, :name_ref_sq, "\\k'X'", 9, 14]
|
10
17
|
|
18
|
+
include_examples 'scan', '(?<+1>abc)\k<+1>', 3 => [:backref, :name_ref_ab, '\k<+1>', 10, 16]
|
19
|
+
include_examples 'scan', "(?<+1>abc)\\k'+1'", 3 => [:backref, :name_ref_sq, "\\k'+1'", 10, 16]
|
20
|
+
|
11
21
|
include_examples 'scan', '(abc)\k<1>', 3 => [:backref, :number_ref_ab, '\k<1>', 5, 10]
|
12
22
|
include_examples 'scan', "(abc)\\k'1'", 3 => [:backref, :number_ref_sq, "\\k'1'", 5, 10]
|
13
23
|
|
@@ -18,9 +28,15 @@ RSpec.describe('RefCall scanning') do
|
|
18
28
|
include_examples 'scan', '(?<X>abc)\g<X>', 3 => [:backref, :name_call_ab, '\g<X>', 9, 14]
|
19
29
|
include_examples 'scan', "(?<X>abc)\\g'X'", 3 => [:backref, :name_call_sq, "\\g'X'", 9, 14]
|
20
30
|
|
31
|
+
include_examples 'scan', '(?<X>abc)\g<X-1>', 3 => [:backref, :name_call_ab, '\g<X-1>', 9, 16]
|
32
|
+
include_examples 'scan', "(?<X>abc)\\g'X-1'", 3 => [:backref, :name_call_sq, "\\g'X-1'", 9, 16]
|
33
|
+
|
21
34
|
include_examples 'scan', '(abc)\g<1>', 3 => [:backref, :number_call_ab, '\g<1>', 5, 10]
|
22
35
|
include_examples 'scan', "(abc)\\g'1'", 3 => [:backref, :number_call_sq, "\\g'1'", 5, 10]
|
23
36
|
|
37
|
+
include_examples 'scan', 'a(b|\g<0>)', 4 => [:backref, :number_call_ab, '\g<0>', 4, 9]
|
38
|
+
include_examples 'scan', "a(b|\\g'0')", 4 => [:backref, :number_call_sq, "\\g'0'", 4, 9]
|
39
|
+
|
24
40
|
include_examples 'scan', '(abc)\g<-1>', 3 => [:backref, :number_rel_call_ab, '\g<-1>', 5, 11]
|
25
41
|
include_examples 'scan', "(abc)\\g'-1'", 3 => [:backref, :number_rel_call_sq, "\\g'-1'", 5, 11]
|
26
42
|
|
@@ -33,4 +49,7 @@ RSpec.describe('RefCall scanning') do
|
|
33
49
|
|
34
50
|
include_examples 'scan', '(abc)\k<1-0>', 3 => [:backref, :number_recursion_ref_ab, '\k<1-0>', 5, 12]
|
35
51
|
include_examples 'scan', "(abc)\\k'1-0'", 3 => [:backref, :number_recursion_ref_sq, "\\k'1-0'", 5, 12]
|
52
|
+
|
53
|
+
include_examples 'scan', '(abc)\k<+1-0>', 3 => [:backref, :name_recursion_ref_ab, '\k<+1-0>', 5, 13]
|
54
|
+
include_examples 'scan', "(abc)\\k'+1-0'", 3 => [:backref, :name_recursion_ref_sq, "\\k'+1-0'", 5, 13]
|
36
55
|
end
|
data/spec/scanner/sets_spec.rb
CHANGED
@@ -6,8 +6,18 @@ RSpec.describe('Set scanning') do
|
|
6
6
|
include_examples 'scan', /[^n]/, 1 => [:set, :negate, '^', 1, 2]
|
7
7
|
|
8
8
|
include_examples 'scan', /[c]/, 1 => [:literal, :literal, 'c', 1, 2]
|
9
|
-
include_examples 'scan', /[
|
10
|
-
|
9
|
+
include_examples 'scan', /[^d]/, 2 => [:literal, :literal, 'd', 2, 3]
|
10
|
+
|
11
|
+
include_examples 'scan', /[\b]/, 1 => [:escape, :backspace, '\b', 1, 3]
|
12
|
+
include_examples 'scan', /[A\bX]/, 2 => [:escape, :backspace, '\b', 2, 4]
|
13
|
+
|
14
|
+
include_examples 'scan', /[\a]/, 1 => [:escape, :bell, '\a', 1, 3]
|
15
|
+
include_examples 'scan', /[\e]/, 1 => [:escape, :escape, '\e', 1, 3]
|
16
|
+
include_examples 'scan', /[\f]/, 1 => [:escape, :form_feed, '\f', 1, 3]
|
17
|
+
include_examples 'scan', /[\n]/, 1 => [:escape, :newline, '\n', 1, 3]
|
18
|
+
include_examples 'scan', /[\r]/, 1 => [:escape, :carriage, '\r', 1, 3]
|
19
|
+
include_examples 'scan', /[\t]/, 1 => [:escape, :tab, '\t', 1, 3]
|
20
|
+
include_examples 'scan', /[\v]/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
|
11
21
|
|
12
22
|
include_examples 'scan', /[.]/, 1 => [:literal, :literal, '.', 1, 2]
|
13
23
|
include_examples 'scan', /[?]/, 1 => [:literal, :literal, '?', 1, 2]
|
@@ -18,22 +28,36 @@ RSpec.describe('Set scanning') do
|
|
18
28
|
include_examples 'scan', /[<]/, 1 => [:literal, :literal, '<', 1, 2]
|
19
29
|
include_examples 'scan', /[>]/, 1 => [:literal, :literal, '>', 1, 2]
|
20
30
|
|
21
|
-
include_examples 'scan',
|
22
|
-
|
23
|
-
include_examples 'scan', '[
|
31
|
+
include_examples 'scan', '[\\\\]', 1 => [:escape, :backslash, '\\\\', 1, 3]
|
32
|
+
include_examples 'scan', '[\u0040]', 1 => [:escape, :codepoint, '\u0040', 1, 7]
|
33
|
+
include_examples 'scan', '[\u{40}]', 1 => [:escape, :codepoint_list, '\u{40}', 1, 7]
|
34
|
+
include_examples 'scan', '[\c2]', 1 => [:escape, :control, '\c2', 1, 4]
|
35
|
+
include_examples 'scan', '[\C-C]', 1 => [:escape, :control, '\C-C', 1, 5]
|
36
|
+
include_examples 'scan', '[\x20]', 1 => [:escape, :hex, '\x20', 1, 5]
|
37
|
+
include_examples 'scan', '[\M-Z]', 1 => [:escape, :meta_sequence, '\M-Z', 1, 5]
|
38
|
+
include_examples 'scan', '[\M-\C-X]', 1 => [:escape, :meta_sequence, '\M-\C-X', 1, 8]
|
39
|
+
include_examples 'scan', '[\\[]', 1 => [:escape, :set_open, '\[', 1, 3]
|
40
|
+
include_examples 'scan', '[\\]]', 1 => [:escape, :set_close, '\]', 1, 3]
|
41
|
+
include_examples 'scan', '[a\-]', 2 => [:escape, :literal, '\-', 2, 4]
|
42
|
+
include_examples 'scan', '[\-c]', 1 => [:escape, :literal, '\-', 1, 3]
|
43
|
+
include_examples 'scan', '[\.]', 1 => [:escape, :literal, '\.', 1, 3]
|
44
|
+
include_examples 'scan', '[\?]', 1 => [:escape, :literal, '\?', 1, 3]
|
45
|
+
include_examples 'scan', '[\*]', 1 => [:escape, :literal, '\*', 1, 3]
|
46
|
+
include_examples 'scan', '[\+]', 1 => [:escape, :literal, '\+', 1, 3]
|
47
|
+
include_examples 'scan', '[\|]', 1 => [:escape, :literal, '\|', 1, 3]
|
48
|
+
include_examples 'scan', '[\{]', 1 => [:escape, :literal, '\{', 1, 3]
|
49
|
+
include_examples 'scan', '[\}]', 1 => [:escape, :literal, '\}', 1, 3]
|
50
|
+
include_examples 'scan', '[\(]', 1 => [:escape, :literal, '\(', 1, 3]
|
51
|
+
include_examples 'scan', '[\)]', 1 => [:escape, :literal, '\)', 1, 3]
|
24
52
|
include_examples 'scan', '[\!]', 1 => [:escape, :literal, '\!', 1, 3]
|
25
53
|
include_examples 'scan', '[\#]', 1 => [:escape, :literal, '\#', 1, 3]
|
26
|
-
include_examples 'scan', '[\\]]', 1 => [:escape, :set_close, '\]', 1, 3]
|
27
|
-
include_examples 'scan', '[\\\\]', 1 => [:escape, :backslash, '\\\\', 1, 3]
|
28
54
|
include_examples 'scan', '[\A]', 1 => [:escape, :literal, '\A', 1, 3]
|
29
55
|
include_examples 'scan', '[\z]', 1 => [:escape, :literal, '\z', 1, 3]
|
30
56
|
include_examples 'scan', '[\g]', 1 => [:escape, :literal, '\g', 1, 3]
|
31
57
|
include_examples 'scan', '[\K]', 1 => [:escape, :literal, '\K', 1, 3]
|
32
58
|
include_examples 'scan', '[\R]', 1 => [:escape, :literal, '\R', 1, 3]
|
33
59
|
include_examples 'scan', '[\X]', 1 => [:escape, :literal, '\X', 1, 3]
|
34
|
-
include_examples 'scan', '[\c2]', 1 => [:escape, :literal, '\c', 1, 3]
|
35
60
|
include_examples 'scan', '[\B]', 1 => [:escape, :literal, '\B', 1, 3]
|
36
|
-
include_examples 'scan', '[a\-c]', 2 => [:escape, :literal, '\-', 2, 4]
|
37
61
|
|
38
62
|
include_examples 'scan', /[\d]/, 1 => [:type, :digit, '\d', 1, 3]
|
39
63
|
include_examples 'scan', /[\da-z]/, 1 => [:type, :digit, '\d', 1, 3]
|
@@ -54,8 +78,14 @@ RSpec.describe('Set scanning') do
|
|
54
78
|
include_examples 'scan', /[a-b-]/, 4 => [:literal, :literal, '-', 4, 5]
|
55
79
|
include_examples 'scan', /[-a]/, 1 => [:literal, :literal, '-', 1, 2]
|
56
80
|
include_examples 'scan', /[a-c^]/, 4 => [:literal, :literal, '^', 4, 5]
|
57
|
-
include_examples 'scan', /[a-bd-f]/, 2 => [:set,
|
58
|
-
include_examples 'scan', /[a-cd-f]/, 5 => [:set,
|
81
|
+
include_examples 'scan', /[a-bd-f]/, 2 => [:set, :range, '-', 2, 3]
|
82
|
+
include_examples 'scan', /[a-cd-f]/, 5 => [:set, :range, '-', 5, 6]
|
83
|
+
# this is a buggy range, it matches only `c`, but not `a`, `b` or `-`
|
84
|
+
include_examples 'scan', /[a-[c]]/, 2 => [:set, :range, '-', 2, 3]
|
85
|
+
# these are not ranges, they match `a`, `c` and `-` (or non-`-` if negated)
|
86
|
+
include_examples 'scan', /[[a]-[c]]/, 4 => [:literal, :literal, '-', 4, 5]
|
87
|
+
include_examples 'scan', /[[a]-c]/, 4 => [:literal, :literal, '-', 4, 5]
|
88
|
+
include_examples 'scan', /[^-c]/, 2 => [:literal, :literal, '-', 2, 3]
|
59
89
|
|
60
90
|
include_examples 'scan', /[a[:digit:]c]/, 2 => [:posixclass, :digit, '[:digit:]', 2, 11]
|
61
91
|
include_examples 'scan', /[[:digit:][:space:]]/, 2 => [:posixclass, :space, '[:space:]', 10, 19]
|
@@ -64,6 +94,7 @@ RSpec.describe('Set scanning') do
|
|
64
94
|
include_examples 'scan', /[a-d&&g-h]/, 4 => [:set, :intersection, '&&', 4, 6]
|
65
95
|
include_examples 'scan', /[a&&]/, 2 => [:set, :intersection, '&&', 2, 4]
|
66
96
|
include_examples 'scan', /[&&z]/, 1 => [:set, :intersection, '&&', 1, 3]
|
97
|
+
include_examples 'scan', /[&&]/, 1 => [:set, :intersection, '&&', 1, 3]
|
67
98
|
|
68
99
|
include_examples 'scan', /[a\p{digit}c]/, 2 => [:property, :digit, '\p{digit}', 2, 11]
|
69
100
|
include_examples 'scan', /[a\P{digit}c]/, 2 => [:nonproperty, :digit, '\P{digit}', 2, 11]
|