regexp_parser 1.8.2 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +93 -0
- data/Gemfile +6 -1
- data/README.md +1 -4
- data/Rakefile +8 -8
- data/lib/regexp_parser.rb +1 -0
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression.rb +5 -18
- data/lib/regexp_parser/expression/classes/backref.rb +5 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/free_space.rb +2 -2
- data/lib/regexp_parser/expression/classes/group.rb +28 -3
- data/lib/regexp_parser/expression/classes/property.rb +1 -1
- data/lib/regexp_parser/expression/classes/root.rb +4 -16
- data/lib/regexp_parser/expression/classes/set/range.rb +2 -1
- data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
- data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
- data/lib/regexp_parser/expression/quantifier.rb +10 -1
- data/lib/regexp_parser/expression/sequence.rb +3 -19
- data/lib/regexp_parser/expression/subexpression.rb +1 -1
- data/lib/regexp_parser/lexer.rb +2 -2
- data/lib/regexp_parser/parser.rb +306 -332
- data/lib/regexp_parser/scanner.rb +1272 -1338
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +206 -238
- data/lib/regexp_parser/syntax.rb +7 -7
- data/lib/regexp_parser/syntax/any.rb +3 -3
- data/lib/regexp_parser/syntax/base.rb +1 -1
- data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/spec/expression/base_spec.rb +10 -0
- data/spec/expression/clone_spec.rb +36 -4
- data/spec/expression/free_space_spec.rb +2 -2
- data/spec/expression/methods/match_length_spec.rb +2 -2
- data/spec/expression/subexpression_spec.rb +1 -1
- data/spec/expression/to_s_spec.rb +39 -31
- data/spec/lexer/literals_spec.rb +24 -49
- data/spec/lexer/refcalls_spec.rb +5 -0
- data/spec/parser/all_spec.rb +2 -2
- data/spec/parser/errors_spec.rb +1 -1
- data/spec/parser/escapes_spec.rb +1 -1
- data/spec/parser/quantifiers_spec.rb +16 -0
- data/spec/parser/refcalls_spec.rb +5 -0
- data/spec/parser/set/ranges_spec.rb +3 -3
- data/spec/scanner/escapes_spec.rb +8 -1
- data/spec/scanner/groups_spec.rb +10 -1
- data/spec/scanner/literals_spec.rb +28 -38
- data/spec/scanner/quantifiers_spec.rb +18 -13
- data/spec/scanner/refcalls_spec.rb +19 -0
- data/spec/scanner/sets_spec.rb +65 -16
- data/spec/spec_helper.rb +1 -0
- metadata +4 -7
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
@@ -11,6 +11,7 @@ RSpec.describe('Quantifier parsing') do
|
|
11
11
|
expect(exp.quantifier.min).to eq min
|
12
12
|
expect(exp.quantifier.max).to eq max
|
13
13
|
expect(exp.quantifier.mode).to eq mode
|
14
|
+
expect(exp.quantifier.text).to eq text
|
14
15
|
end
|
15
16
|
end
|
16
17
|
|
@@ -37,6 +38,21 @@ RSpec.describe('Quantifier parsing') do
|
|
37
38
|
include_examples 'quantifier', /a{4}+b/, '{4}+', :possessive, :interval, 4, 4
|
38
39
|
include_examples 'quantifier', /a{004}+b/, '{004}+', :possessive, :interval, 4, 4
|
39
40
|
|
41
|
+
# special case: exps with chained quantifiers are wrapped in implicit passive groups
|
42
|
+
include_examples 'parse', /a+{2}{3}/,
|
43
|
+
0 => [
|
44
|
+
:group, :passive, Group::Passive, implicit?: true, level: 0,
|
45
|
+
quantifier: Quantifier.new(:interval, '{3}', 3, 3, :greedy)
|
46
|
+
],
|
47
|
+
[0, 0] => [
|
48
|
+
:group, :passive, Group::Passive, implicit?: true, level: 1,
|
49
|
+
quantifier: Quantifier.new(:interval, '{2}', 2, 2, :greedy)
|
50
|
+
],
|
51
|
+
[0, 0, 0] => [
|
52
|
+
:literal, :literal, Literal, text: 'a', level: 2,
|
53
|
+
quantifier: Quantifier.new(:one_or_more, '+', 1, -1, :greedy)
|
54
|
+
]
|
55
|
+
|
40
56
|
specify('mode-checking methods') do
|
41
57
|
exp = RP.parse(/a??/).first
|
42
58
|
|
@@ -29,6 +29,11 @@ RSpec.describe('Refcall parsing') do
|
|
29
29
|
include_examples 'parse', /(abc)\g'1'/,
|
30
30
|
1 => [:backref, :number_call, Backreference::NumberCall, number: 1]
|
31
31
|
|
32
|
+
include_examples 'parse', '\g<0>',
|
33
|
+
0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
|
34
|
+
include_examples 'parse', "\\g'0'",
|
35
|
+
0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
|
36
|
+
|
32
37
|
include_examples 'parse', /(abc)\g<-1>/,
|
33
38
|
1 => [:backref, :number_rel_call, Backreference::NumberCallRelative, number: -1]
|
34
39
|
include_examples 'parse', /(abc)\g'-1'/,
|
@@ -17,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
17
17
|
end
|
18
18
|
|
19
19
|
specify('parse set range hex') do
|
20
|
-
root = RP.parse('[\\x00-\\
|
20
|
+
root = RP.parse('[\\x00-\\x22]')
|
21
21
|
set = root[0]
|
22
22
|
range = set[0]
|
23
23
|
|
@@ -26,9 +26,9 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
26
26
|
expect(range.count).to eq 2
|
27
27
|
expect(range.first.to_s).to eq '\\x00'
|
28
28
|
expect(range.first).to be_instance_of(EscapeSequence::Hex)
|
29
|
-
expect(range.last.to_s).to eq '\\
|
29
|
+
expect(range.last.to_s).to eq '\\x22'
|
30
30
|
expect(range.last).to be_instance_of(EscapeSequence::Hex)
|
31
|
-
expect(set).to match
|
31
|
+
expect(set).to match "\x11"
|
32
32
|
end
|
33
33
|
|
34
34
|
specify('parse set range unicode') do
|
@@ -4,14 +4,20 @@ RSpec.describe('Escape scanning') do
|
|
4
4
|
include_examples 'scan', /c\at/, 1 => [:escape, :bell, '\a', 1, 3]
|
5
5
|
|
6
6
|
# not an escape outside a character set
|
7
|
-
include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1,
|
7
|
+
include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
|
8
8
|
|
9
9
|
include_examples 'scan', /c\ft/, 1 => [:escape, :form_feed, '\f', 1, 3]
|
10
10
|
include_examples 'scan', /c\nt/, 1 => [:escape, :newline, '\n', 1, 3]
|
11
11
|
include_examples 'scan', /c\tt/, 1 => [:escape, :tab, '\t', 1, 3]
|
12
12
|
include_examples 'scan', /c\vt/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
|
13
13
|
|
14
|
+
# ineffectual literal escapes
|
15
|
+
# these cause "Unknown escape" warnings in Ruby for ascii chars,
|
16
|
+
# and simply drop the backslash for non-ascii chars (/\ü/.inspect == '/ü/').
|
17
|
+
# In terms of matching, Ruby treats them both like non-escaped literals.
|
14
18
|
include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
|
19
|
+
include_examples 'scan', 'a\üc', 1 => [:escape, :literal, '\ü', 1, 3]
|
20
|
+
include_examples 'scan', 'a\😋c', 1 => [:escape, :literal, '\😋', 1, 3]
|
15
21
|
|
16
22
|
# these incomplete ref/call sequences are treated as literal escapes by Ruby
|
17
23
|
include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3]
|
@@ -21,6 +27,7 @@ RSpec.describe('Escape scanning') do
|
|
21
27
|
include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
|
22
28
|
include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
|
23
29
|
|
30
|
+
include_examples 'scan', 'a\xA', 1 => [:escape, :hex, '\xA', 1, 4]
|
24
31
|
include_examples 'scan', 'a\x24c', 1 => [:escape, :hex, '\x24', 1, 5]
|
25
32
|
include_examples 'scan', 'a\x0640c', 1 => [:escape, :hex, '\x06', 1, 5]
|
26
33
|
|
data/spec/scanner/groups_spec.rb
CHANGED
@@ -5,11 +5,20 @@ RSpec.describe('Group scanning') do
|
|
5
5
|
include_examples 'scan', '(?>abc)', 0 => [:group, :atomic, '(?>', 0, 3]
|
6
6
|
include_examples 'scan', '(abc)', 0 => [:group, :capture, '(', 0, 1]
|
7
7
|
|
8
|
+
# Named groups
|
9
|
+
# only names that start with a hyphen or digit (ascii or other) are invalid
|
8
10
|
include_examples 'scan', '(?<name>abc)', 0 => [:group, :named_ab, '(?<name>', 0, 8]
|
9
11
|
include_examples 'scan', "(?'name'abc)", 0 => [:group, :named_sq, "(?'name'", 0, 8]
|
10
|
-
|
11
12
|
include_examples 'scan', '(?<name_1>abc)', 0 => [:group, :named_ab, '(?<name_1>', 0,10]
|
12
13
|
include_examples 'scan', "(?'name_1'abc)", 0 => [:group, :named_sq, "(?'name_1'", 0,10]
|
14
|
+
include_examples 'scan', '(?<name-1>abc)', 0 => [:group, :named_ab, '(?<name-1>', 0,10]
|
15
|
+
include_examples 'scan', "(?'name-1'abc)", 0 => [:group, :named_sq, "(?'name-1'", 0,10]
|
16
|
+
include_examples 'scan', "(?<name'1>abc)", 0 => [:group, :named_ab, "(?<name'1>", 0,10]
|
17
|
+
include_examples 'scan', "(?'name>1'abc)", 0 => [:group, :named_sq, "(?'name>1'", 0,10]
|
18
|
+
include_examples 'scan', '(?<üüuuüü>abc)', 0 => [:group, :named_ab, '(?<üüuuüü>', 0,10]
|
19
|
+
include_examples 'scan', "(?'üüuuüü'abc)", 0 => [:group, :named_sq, "(?'üüuuüü'", 0,10]
|
20
|
+
include_examples 'scan', "(?<😋1234😋>abc)", 0 => [:group, :named_ab, "(?<😋1234😋>", 0,10]
|
21
|
+
include_examples 'scan', "(?'😋1234😋'abc)", 0 => [:group, :named_sq, "(?'😋1234😋'", 0,10]
|
13
22
|
|
14
23
|
include_examples 'scan', '(?:abc)', 0 => [:group, :passive, '(?:', 0, 3]
|
15
24
|
include_examples 'scan', '(?:)', 0 => [:group, :passive, '(?:', 0, 3]
|
@@ -2,48 +2,38 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
RSpec.describe('UTF8 scanning') do
|
4
4
|
# ascii, single byte characters
|
5
|
-
include_examples 'scan', 'a',
|
5
|
+
include_examples 'scan', 'a',
|
6
|
+
0 => [:literal, :literal, 'a', 0, 1]
|
6
7
|
|
7
|
-
include_examples 'scan', 'ab+',
|
8
|
-
|
8
|
+
include_examples 'scan', 'ab+',
|
9
|
+
0 => [:literal, :literal, 'ab', 0, 2],
|
10
|
+
1 => [:quantifier, :one_or_more, '+', 2, 3]
|
9
11
|
|
10
|
-
# 2 byte wide characters
|
11
|
-
include_examples 'scan', '
|
12
|
-
|
13
|
-
include_examples 'scan', 'aاbبت?', 0 => [:literal, :literal, 'aاbبت', 0, 8]
|
14
|
-
include_examples 'scan', 'aاbبت?', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
|
15
|
-
|
16
|
-
include_examples 'scan', 'aا?bبcت+', 0 => [:literal, :literal, 'aا', 0, 3]
|
17
|
-
include_examples 'scan', 'aا?bبcت+', 1 => [:quantifier, :zero_or_one, '?', 3, 4]
|
18
|
-
include_examples 'scan', 'aا?bبcت+', 2 => [:literal, :literal, 'bبcت', 4, 10]
|
19
|
-
include_examples 'scan', 'aا?bبcت+', 3 => [:quantifier, :one_or_more, '+', 10, 11]
|
20
|
-
|
21
|
-
include_examples 'scan', 'a(اbب+)cت?', 0 => [:literal, :literal, 'a', 0, 1]
|
22
|
-
include_examples 'scan', 'a(اbب+)cت?', 1 => [:group, :capture, '(', 1, 2]
|
23
|
-
include_examples 'scan', 'a(اbب+)cت?', 2 => [:literal, :literal, 'اbب', 2, 7]
|
24
|
-
include_examples 'scan', 'a(اbب+)cت?', 3 => [:quantifier, :one_or_more, '+', 7, 8]
|
25
|
-
include_examples 'scan', 'a(اbب+)cت?', 4 => [:group, :close, ')', 8, 9]
|
26
|
-
include_examples 'scan', 'a(اbب+)cت?', 5 => [:literal, :literal, 'cت', 9, 12]
|
27
|
-
include_examples 'scan', 'a(اbب+)cت?', 6 => [:quantifier, :zero_or_one, '?', 12, 13]
|
12
|
+
# 2 byte wide characters
|
13
|
+
include_examples 'scan', 'äöü',
|
14
|
+
0 => [:literal, :literal, 'äöü', 0, 3]
|
28
15
|
|
29
16
|
# 3 byte wide characters, Japanese
|
30
|
-
include_examples 'scan', 'ab?れます+cd',
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
17
|
+
include_examples 'scan', 'ab?れます+cd',
|
18
|
+
0 => [:literal, :literal, 'ab', 0, 2],
|
19
|
+
1 => [:quantifier, :zero_or_one, '?', 2, 3],
|
20
|
+
2 => [:literal, :literal, 'れます', 3, 6],
|
21
|
+
3 => [:quantifier, :one_or_more, '+', 6, 7],
|
22
|
+
4 => [:literal, :literal, 'cd', 7, 9]
|
35
23
|
|
36
24
|
# 4 byte wide characters, Osmanya
|
37
|
-
include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃',
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
include_examples 'scan', 'mu𝄞?si*𝄫c+',
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
25
|
+
include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃',
|
26
|
+
0 => [:literal, :literal, '𐒀𐒁', 0, 2],
|
27
|
+
1 => [:quantifier, :zero_or_one, '?', 2, 3],
|
28
|
+
2 => [:literal, :literal, '𐒂ab', 3, 6],
|
29
|
+
3 => [:quantifier, :one_or_more, '+', 6, 7],
|
30
|
+
4 => [:literal, :literal, '𐒃', 7, 8]
|
31
|
+
|
32
|
+
include_examples 'scan', 'mu𝄞?si*𝄫c+',
|
33
|
+
0 => [:literal, :literal, 'mu𝄞', 0, 3],
|
34
|
+
1 => [:quantifier, :zero_or_one, '?', 3, 4],
|
35
|
+
2 => [:literal, :literal, 'si', 4, 6],
|
36
|
+
3 => [:quantifier, :zero_or_more, '*', 6, 7],
|
37
|
+
4 => [:literal, :literal, '𝄫c', 7, 9],
|
38
|
+
5 => [:quantifier, :one_or_more, '+', 9, 10]
|
49
39
|
end
|
@@ -1,20 +1,25 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe('Quantifier scanning') do
|
4
|
-
include_examples 'scan', 'a?',
|
5
|
-
include_examples 'scan', 'a??',
|
6
|
-
include_examples 'scan', 'a?+',
|
4
|
+
include_examples 'scan', 'a?', 1 => [:quantifier, :zero_or_one, '?', 1, 2]
|
5
|
+
include_examples 'scan', 'a??', 1 => [:quantifier, :zero_or_one_reluctant, '??', 1, 3]
|
6
|
+
include_examples 'scan', 'a?+', 1 => [:quantifier, :zero_or_one_possessive, '?+', 1, 3]
|
7
7
|
|
8
|
-
include_examples 'scan', 'a*',
|
9
|
-
include_examples 'scan', 'a*?',
|
10
|
-
include_examples 'scan', 'a*+',
|
8
|
+
include_examples 'scan', 'a*', 1 => [:quantifier, :zero_or_more, '*', 1, 2]
|
9
|
+
include_examples 'scan', 'a*?', 1 => [:quantifier, :zero_or_more_reluctant, '*?', 1, 3]
|
10
|
+
include_examples 'scan', 'a*+', 1 => [:quantifier, :zero_or_more_possessive, '*+', 1, 3]
|
11
11
|
|
12
|
-
include_examples 'scan', 'a+',
|
13
|
-
include_examples 'scan', 'a+?',
|
14
|
-
include_examples 'scan', 'a++',
|
12
|
+
include_examples 'scan', 'a+', 1 => [:quantifier, :one_or_more, '+', 1, 2]
|
13
|
+
include_examples 'scan', 'a+?', 1 => [:quantifier, :one_or_more_reluctant, '+?', 1, 3]
|
14
|
+
include_examples 'scan', 'a++', 1 => [:quantifier, :one_or_more_possessive, '++', 1, 3]
|
15
15
|
|
16
|
-
include_examples 'scan', 'a{2}',
|
17
|
-
include_examples 'scan', 'a{2,}',
|
18
|
-
include_examples 'scan', 'a{,2}',
|
19
|
-
include_examples 'scan', 'a{2,4}',
|
16
|
+
include_examples 'scan', 'a{2}', 1 => [:quantifier, :interval, '{2}', 1, 4]
|
17
|
+
include_examples 'scan', 'a{2,}', 1 => [:quantifier, :interval, '{2,}', 1, 5]
|
18
|
+
include_examples 'scan', 'a{,2}', 1 => [:quantifier, :interval, '{,2}', 1, 5]
|
19
|
+
include_examples 'scan', 'a{2,4}', 1 => [:quantifier, :interval, '{2,4}', 1, 6]
|
20
|
+
|
21
|
+
# special case: chained quantifiers
|
22
|
+
include_examples 'scan', 'a+{2}{3}', 1 => [:quantifier, :one_or_more, '+', 1, 2]
|
23
|
+
include_examples 'scan', 'a+{2}{3}', 2 => [:quantifier, :interval, '{2}', 2, 5]
|
24
|
+
include_examples 'scan', 'a+{2}{3}', 3 => [:quantifier, :interval, '{3}', 5, 8]
|
20
25
|
end
|
@@ -5,9 +5,19 @@ RSpec.describe('RefCall scanning') do
|
|
5
5
|
include_examples 'scan', '(abc)\1' , 3 => [:backref, :number, '\1', 5, 7]
|
6
6
|
|
7
7
|
# Group back-references, named, numbered, and relative
|
8
|
+
#
|
9
|
+
# NOTE: only \g supports forward-looking references using '+', e.g. \g<+1>
|
10
|
+
# refers to the next group, but \k<+1> refers to a group named '+1'.
|
11
|
+
# Inversely, only \k supports addition or substraction of a recursion level.
|
12
|
+
# E.g. \k<x+0> refers to a group named 'x' at the current recursion level,
|
13
|
+
# but \g<x+0> refers to a a group named 'x+0'.
|
14
|
+
#
|
8
15
|
include_examples 'scan', '(?<X>abc)\k<X>', 3 => [:backref, :name_ref_ab, '\k<X>', 9, 14]
|
9
16
|
include_examples 'scan', "(?<X>abc)\\k'X'", 3 => [:backref, :name_ref_sq, "\\k'X'", 9, 14]
|
10
17
|
|
18
|
+
include_examples 'scan', '(?<+1>abc)\k<+1>', 3 => [:backref, :name_ref_ab, '\k<+1>', 10, 16]
|
19
|
+
include_examples 'scan', "(?<+1>abc)\\k'+1'", 3 => [:backref, :name_ref_sq, "\\k'+1'", 10, 16]
|
20
|
+
|
11
21
|
include_examples 'scan', '(abc)\k<1>', 3 => [:backref, :number_ref_ab, '\k<1>', 5, 10]
|
12
22
|
include_examples 'scan', "(abc)\\k'1'", 3 => [:backref, :number_ref_sq, "\\k'1'", 5, 10]
|
13
23
|
|
@@ -18,9 +28,15 @@ RSpec.describe('RefCall scanning') do
|
|
18
28
|
include_examples 'scan', '(?<X>abc)\g<X>', 3 => [:backref, :name_call_ab, '\g<X>', 9, 14]
|
19
29
|
include_examples 'scan', "(?<X>abc)\\g'X'", 3 => [:backref, :name_call_sq, "\\g'X'", 9, 14]
|
20
30
|
|
31
|
+
include_examples 'scan', '(?<X>abc)\g<X-1>', 3 => [:backref, :name_call_ab, '\g<X-1>', 9, 16]
|
32
|
+
include_examples 'scan', "(?<X>abc)\\g'X-1'", 3 => [:backref, :name_call_sq, "\\g'X-1'", 9, 16]
|
33
|
+
|
21
34
|
include_examples 'scan', '(abc)\g<1>', 3 => [:backref, :number_call_ab, '\g<1>', 5, 10]
|
22
35
|
include_examples 'scan', "(abc)\\g'1'", 3 => [:backref, :number_call_sq, "\\g'1'", 5, 10]
|
23
36
|
|
37
|
+
include_examples 'scan', 'a(b|\g<0>)', 4 => [:backref, :number_call_ab, '\g<0>', 4, 9]
|
38
|
+
include_examples 'scan', "a(b|\\g'0')", 4 => [:backref, :number_call_sq, "\\g'0'", 4, 9]
|
39
|
+
|
24
40
|
include_examples 'scan', '(abc)\g<-1>', 3 => [:backref, :number_rel_call_ab, '\g<-1>', 5, 11]
|
25
41
|
include_examples 'scan', "(abc)\\g'-1'", 3 => [:backref, :number_rel_call_sq, "\\g'-1'", 5, 11]
|
26
42
|
|
@@ -33,4 +49,7 @@ RSpec.describe('RefCall scanning') do
|
|
33
49
|
|
34
50
|
include_examples 'scan', '(abc)\k<1-0>', 3 => [:backref, :number_recursion_ref_ab, '\k<1-0>', 5, 12]
|
35
51
|
include_examples 'scan', "(abc)\\k'1-0'", 3 => [:backref, :number_recursion_ref_sq, "\\k'1-0'", 5, 12]
|
52
|
+
|
53
|
+
include_examples 'scan', '(abc)\k<+1-0>', 3 => [:backref, :name_recursion_ref_ab, '\k<+1-0>', 5, 13]
|
54
|
+
include_examples 'scan', "(abc)\\k'+1-0'", 3 => [:backref, :name_recursion_ref_sq, "\\k'+1-0'", 5, 13]
|
36
55
|
end
|
data/spec/scanner/sets_spec.rb
CHANGED
@@ -6,8 +6,18 @@ RSpec.describe('Set scanning') do
|
|
6
6
|
include_examples 'scan', /[^n]/, 1 => [:set, :negate, '^', 1, 2]
|
7
7
|
|
8
8
|
include_examples 'scan', /[c]/, 1 => [:literal, :literal, 'c', 1, 2]
|
9
|
-
include_examples 'scan', /[
|
10
|
-
|
9
|
+
include_examples 'scan', /[^d]/, 2 => [:literal, :literal, 'd', 2, 3]
|
10
|
+
|
11
|
+
include_examples 'scan', /[\b]/, 1 => [:escape, :backspace, '\b', 1, 3]
|
12
|
+
include_examples 'scan', /[A\bX]/, 2 => [:escape, :backspace, '\b', 2, 4]
|
13
|
+
|
14
|
+
include_examples 'scan', /[\a]/, 1 => [:escape, :bell, '\a', 1, 3]
|
15
|
+
include_examples 'scan', /[\e]/, 1 => [:escape, :escape, '\e', 1, 3]
|
16
|
+
include_examples 'scan', /[\f]/, 1 => [:escape, :form_feed, '\f', 1, 3]
|
17
|
+
include_examples 'scan', /[\n]/, 1 => [:escape, :newline, '\n', 1, 3]
|
18
|
+
include_examples 'scan', /[\r]/, 1 => [:escape, :carriage, '\r', 1, 3]
|
19
|
+
include_examples 'scan', /[\t]/, 1 => [:escape, :tab, '\t', 1, 3]
|
20
|
+
include_examples 'scan', /[\v]/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
|
11
21
|
|
12
22
|
include_examples 'scan', /[.]/, 1 => [:literal, :literal, '.', 1, 2]
|
13
23
|
include_examples 'scan', /[?]/, 1 => [:literal, :literal, '?', 1, 2]
|
@@ -18,24 +28,36 @@ RSpec.describe('Set scanning') do
|
|
18
28
|
include_examples 'scan', /[<]/, 1 => [:literal, :literal, '<', 1, 2]
|
19
29
|
include_examples 'scan', /[>]/, 1 => [:literal, :literal, '>', 1, 2]
|
20
30
|
|
21
|
-
include_examples 'scan',
|
22
|
-
|
23
|
-
include_examples 'scan',
|
24
|
-
|
25
|
-
include_examples 'scan', '[
|
31
|
+
include_examples 'scan', '[\\\\]', 1 => [:escape, :backslash, '\\\\', 1, 3]
|
32
|
+
include_examples 'scan', '[\u0040]', 1 => [:escape, :codepoint, '\u0040', 1, 7]
|
33
|
+
include_examples 'scan', '[\u{40}]', 1 => [:escape, :codepoint_list, '\u{40}', 1, 7]
|
34
|
+
include_examples 'scan', '[\c2]', 1 => [:escape, :control, '\c2', 1, 4]
|
35
|
+
include_examples 'scan', '[\C-C]', 1 => [:escape, :control, '\C-C', 1, 5]
|
36
|
+
include_examples 'scan', '[\x20]', 1 => [:escape, :hex, '\x20', 1, 5]
|
37
|
+
include_examples 'scan', '[\M-Z]', 1 => [:escape, :meta_sequence, '\M-Z', 1, 5]
|
38
|
+
include_examples 'scan', '[\M-\C-X]', 1 => [:escape, :meta_sequence, '\M-\C-X', 1, 8]
|
39
|
+
include_examples 'scan', '[\\[]', 1 => [:escape, :set_open, '\[', 1, 3]
|
40
|
+
include_examples 'scan', '[\\]]', 1 => [:escape, :set_close, '\]', 1, 3]
|
41
|
+
include_examples 'scan', '[a\-]', 2 => [:escape, :literal, '\-', 2, 4]
|
42
|
+
include_examples 'scan', '[\-c]', 1 => [:escape, :literal, '\-', 1, 3]
|
43
|
+
include_examples 'scan', '[\.]', 1 => [:escape, :literal, '\.', 1, 3]
|
44
|
+
include_examples 'scan', '[\?]', 1 => [:escape, :literal, '\?', 1, 3]
|
45
|
+
include_examples 'scan', '[\*]', 1 => [:escape, :literal, '\*', 1, 3]
|
46
|
+
include_examples 'scan', '[\+]', 1 => [:escape, :literal, '\+', 1, 3]
|
47
|
+
include_examples 'scan', '[\|]', 1 => [:escape, :literal, '\|', 1, 3]
|
48
|
+
include_examples 'scan', '[\{]', 1 => [:escape, :literal, '\{', 1, 3]
|
49
|
+
include_examples 'scan', '[\}]', 1 => [:escape, :literal, '\}', 1, 3]
|
50
|
+
include_examples 'scan', '[\(]', 1 => [:escape, :literal, '\(', 1, 3]
|
51
|
+
include_examples 'scan', '[\)]', 1 => [:escape, :literal, '\)', 1, 3]
|
26
52
|
include_examples 'scan', '[\!]', 1 => [:escape, :literal, '\!', 1, 3]
|
27
53
|
include_examples 'scan', '[\#]', 1 => [:escape, :literal, '\#', 1, 3]
|
28
|
-
include_examples 'scan', '[\\]]', 1 => [:escape, :set_close, '\]', 1, 3]
|
29
|
-
include_examples 'scan', '[\\\\]', 1 => [:escape, :backslash, '\\\\', 1, 3]
|
30
54
|
include_examples 'scan', '[\A]', 1 => [:escape, :literal, '\A', 1, 3]
|
31
55
|
include_examples 'scan', '[\z]', 1 => [:escape, :literal, '\z', 1, 3]
|
32
56
|
include_examples 'scan', '[\g]', 1 => [:escape, :literal, '\g', 1, 3]
|
33
57
|
include_examples 'scan', '[\K]', 1 => [:escape, :literal, '\K', 1, 3]
|
34
58
|
include_examples 'scan', '[\R]', 1 => [:escape, :literal, '\R', 1, 3]
|
35
59
|
include_examples 'scan', '[\X]', 1 => [:escape, :literal, '\X', 1, 3]
|
36
|
-
include_examples 'scan', '[\c2]', 1 => [:escape, :literal, '\c', 1, 3]
|
37
60
|
include_examples 'scan', '[\B]', 1 => [:escape, :literal, '\B', 1, 3]
|
38
|
-
include_examples 'scan', '[a\-c]', 2 => [:escape, :literal, '\-', 2, 4]
|
39
61
|
|
40
62
|
include_examples 'scan', /[\d]/, 1 => [:type, :digit, '\d', 1, 3]
|
41
63
|
include_examples 'scan', /[\da-z]/, 1 => [:type, :digit, '\d', 1, 3]
|
@@ -56,19 +78,23 @@ RSpec.describe('Set scanning') do
|
|
56
78
|
include_examples 'scan', /[a-b-]/, 4 => [:literal, :literal, '-', 4, 5]
|
57
79
|
include_examples 'scan', /[-a]/, 1 => [:literal, :literal, '-', 1, 2]
|
58
80
|
include_examples 'scan', /[a-c^]/, 4 => [:literal, :literal, '^', 4, 5]
|
59
|
-
include_examples 'scan', /[a-bd-f]/, 2 => [:set,
|
60
|
-
include_examples 'scan', /[a-cd-f]/, 5 => [:set,
|
81
|
+
include_examples 'scan', /[a-bd-f]/, 2 => [:set, :range, '-', 2, 3]
|
82
|
+
include_examples 'scan', /[a-cd-f]/, 5 => [:set, :range, '-', 5, 6]
|
83
|
+
# this is a buggy range, it matches only `c`, but not `a`, `b` or `-`
|
84
|
+
include_examples 'scan', /[a-[c]]/, 2 => [:set, :range, '-', 2, 3]
|
85
|
+
# these are not ranges, they match `a`, `c` and `-` (or non-`-` if negated)
|
86
|
+
include_examples 'scan', /[[a]-[c]]/, 4 => [:literal, :literal, '-', 4, 5]
|
87
|
+
include_examples 'scan', /[[a]-c]/, 4 => [:literal, :literal, '-', 4, 5]
|
88
|
+
include_examples 'scan', /[^-c]/, 2 => [:literal, :literal, '-', 2, 3]
|
61
89
|
|
62
90
|
include_examples 'scan', /[a[:digit:]c]/, 2 => [:posixclass, :digit, '[:digit:]', 2, 11]
|
63
91
|
include_examples 'scan', /[[:digit:][:space:]]/, 2 => [:posixclass, :space, '[:space:]', 10, 19]
|
64
92
|
include_examples 'scan', /[[:^digit:]]/, 1 => [:nonposixclass, :digit, '[:^digit:]', 1, 11]
|
65
93
|
|
66
|
-
include_examples 'scan', /[a[.a-b.]c]/, 2 => [:set, :collation, '[.a-b.]', 2, 9]
|
67
|
-
include_examples 'scan', /[a[=e=]c]/, 2 => [:set, :equivalent, '[=e=]', 2, 7]
|
68
|
-
|
69
94
|
include_examples 'scan', /[a-d&&g-h]/, 4 => [:set, :intersection, '&&', 4, 6]
|
70
95
|
include_examples 'scan', /[a&&]/, 2 => [:set, :intersection, '&&', 2, 4]
|
71
96
|
include_examples 'scan', /[&&z]/, 1 => [:set, :intersection, '&&', 1, 3]
|
97
|
+
include_examples 'scan', /[&&]/, 1 => [:set, :intersection, '&&', 1, 3]
|
72
98
|
|
73
99
|
include_examples 'scan', /[a\p{digit}c]/, 2 => [:property, :digit, '\p{digit}', 2, 11]
|
74
100
|
include_examples 'scan', /[a\P{digit}c]/, 2 => [:nonproperty, :digit, '\P{digit}', 2, 11]
|
@@ -90,6 +116,29 @@ RSpec.describe('Set scanning') do
|
|
90
116
|
8 => [:set, :range, '-', 9, 10],
|
91
117
|
10=> [:set, :close, ']', 11, 12]
|
92
118
|
|
119
|
+
# Collations/collating sequences and character equivalents are not enabled
|
120
|
+
# in Ruby at the moment. If they ever are, enable them in the scanner,
|
121
|
+
# add them to a new syntax version, and handle them in the parser. Until then,
|
122
|
+
# expect them to be scanned as regular subsets containing literals.
|
123
|
+
# include_examples 'scan', /[a[.a-b.]c]/, 2 => [:set, :collation, '[.a-b.]', 2, 9]
|
124
|
+
# include_examples 'scan', /[a[=e=]c]/, 2 => [:set, :equivalent, '[=e=]', 2, 7]
|
125
|
+
include_examples 'scan', '[a[.a-b.]c]',
|
126
|
+
2 => [:set, :open, '[', 2, 3],
|
127
|
+
3 => [:literal, :literal, '.', 3, 4],
|
128
|
+
4 => [:literal, :literal, 'a', 4, 5]
|
129
|
+
include_examples 'scan', '[a[=e=]c]',
|
130
|
+
2 => [:set, :open, '[', 2, 3],
|
131
|
+
3 => [:literal, :literal, '=', 3, 4],
|
132
|
+
4 => [:literal, :literal, 'e', 4, 5]
|
133
|
+
|
134
|
+
# multi-byte characters should not affect indices
|
135
|
+
include_examples 'scan', /[れます]/,
|
136
|
+
0 => [:set, :open, '[', 0, 1],
|
137
|
+
1 => [:literal, :literal, 'れ', 1, 2],
|
138
|
+
2 => [:literal, :literal, 'ま', 2, 3],
|
139
|
+
3 => [:literal, :literal, 'す', 3, 4],
|
140
|
+
4 => [:set, :close, ']', 4, 5]
|
141
|
+
|
93
142
|
specify('set literal encoding') do
|
94
143
|
text = RS.scan('[a]')[1][2].to_s
|
95
144
|
expect(text).to eq 'a'
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-22 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
14
14
|
email:
|
@@ -23,6 +23,7 @@ files:
|
|
23
23
|
- README.md
|
24
24
|
- Rakefile
|
25
25
|
- lib/regexp_parser.rb
|
26
|
+
- lib/regexp_parser/error.rb
|
26
27
|
- lib/regexp_parser/expression.rb
|
27
28
|
- lib/regexp_parser/expression/classes/alternation.rb
|
28
29
|
- lib/regexp_parser/expression/classes/anchor.rb
|
@@ -102,8 +103,6 @@ files:
|
|
102
103
|
- spec/expression/methods/tests_spec.rb
|
103
104
|
- spec/expression/methods/traverse_spec.rb
|
104
105
|
- spec/expression/options_spec.rb
|
105
|
-
- spec/expression/root_spec.rb
|
106
|
-
- spec/expression/sequence_spec.rb
|
107
106
|
- spec/expression/subexpression_spec.rb
|
108
107
|
- spec/expression/to_h_spec.rb
|
109
108
|
- spec/expression/to_s_spec.rb
|
@@ -185,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
185
184
|
- !ruby/object:Gem::Version
|
186
185
|
version: '0'
|
187
186
|
requirements: []
|
188
|
-
rubygems_version: 3.2.
|
187
|
+
rubygems_version: 3.2.3
|
189
188
|
signing_key:
|
190
189
|
specification_version: 4
|
191
190
|
summary: Scanner, lexer, parser for ruby's regular expressions
|
@@ -200,8 +199,6 @@ test_files:
|
|
200
199
|
- spec/expression/methods/tests_spec.rb
|
201
200
|
- spec/expression/methods/traverse_spec.rb
|
202
201
|
- spec/expression/options_spec.rb
|
203
|
-
- spec/expression/root_spec.rb
|
204
|
-
- spec/expression/sequence_spec.rb
|
205
202
|
- spec/expression/subexpression_spec.rb
|
206
203
|
- spec/expression/to_h_spec.rb
|
207
204
|
- spec/expression/to_s_spec.rb
|