regexp_parser 1.7.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +80 -1
- data/README.md +24 -12
- data/lib/regexp_parser/expression.rb +10 -19
- data/lib/regexp_parser/expression/classes/group.rb +17 -2
- data/lib/regexp_parser/expression/classes/root.rb +4 -16
- data/lib/regexp_parser/expression/quantifier.rb +9 -0
- data/lib/regexp_parser/expression/sequence.rb +0 -10
- data/lib/regexp_parser/lexer.rb +6 -6
- data/lib/regexp_parser/parser.rb +45 -12
- data/lib/regexp_parser/scanner.rb +1305 -1193
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +194 -171
- data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +1 -1
- data/spec/expression/base_spec.rb +10 -0
- data/spec/expression/to_s_spec.rb +16 -0
- data/spec/lexer/delimiters_spec.rb +68 -0
- data/spec/lexer/literals_spec.rb +24 -49
- data/spec/parser/escapes_spec.rb +1 -1
- data/spec/parser/options_spec.rb +28 -0
- data/spec/parser/quantifiers_spec.rb +16 -0
- data/spec/parser/set/ranges_spec.rb +3 -3
- data/spec/scanner/delimiters_spec.rb +52 -0
- data/spec/scanner/errors_spec.rb +0 -1
- data/spec/scanner/escapes_spec.rb +10 -0
- data/spec/scanner/free_space_spec.rb +32 -0
- data/spec/scanner/literals_spec.rb +28 -38
- data/spec/scanner/options_spec.rb +36 -0
- data/spec/scanner/quantifiers_spec.rb +18 -13
- data/spec/scanner/sets_spec.rb +8 -2
- metadata +65 -61
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
@@ -74,9 +74,9 @@ module Regexp::Syntax
|
|
74
74
|
end
|
75
75
|
|
76
76
|
def warn_if_future_version(const_name)
|
77
|
-
return if comparable_version(const_name) < comparable_version('
|
77
|
+
return if comparable_version(const_name) < comparable_version('4.0.0')
|
78
78
|
|
79
|
-
warn('This library has only been tested up to Ruby
|
79
|
+
warn('This library has only been tested up to Ruby 3.x, '\
|
80
80
|
"but you are running with #{const_get(const_name).inspect}")
|
81
81
|
end
|
82
82
|
end
|
data/regexp_parser.gemspec
CHANGED
@@ -91,4 +91,14 @@ RSpec.describe(Regexp::Expression::Base) do
|
|
91
91
|
expect(RP.parse(/a*/)[0].repetitions).to eq 0..(Float::INFINITY)
|
92
92
|
expect(RP.parse(/a+/)[0].repetitions).to eq 1..(Float::INFINITY)
|
93
93
|
end
|
94
|
+
|
95
|
+
specify('#base_length') do
|
96
|
+
expect(RP.parse(/(aa)/)[0].base_length).to eq 4
|
97
|
+
expect(RP.parse(/(aa){42}/)[0].base_length).to eq 4
|
98
|
+
end
|
99
|
+
|
100
|
+
specify('#full_length') do
|
101
|
+
expect(RP.parse(/(aa)/)[0].full_length).to eq 4
|
102
|
+
expect(RP.parse(/(aa){42}/)[0].full_length).to eq 8
|
103
|
+
end
|
94
104
|
end
|
@@ -97,4 +97,20 @@ RSpec.describe('Expression#to_s') do
|
|
97
97
|
|
98
98
|
expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eq multiline.match(str)[0]
|
99
99
|
end
|
100
|
+
|
101
|
+
# special case: implicit groups used for chained quantifiers produce no parens
|
102
|
+
specify 'chained quantifiers #to_s' do
|
103
|
+
pattern = /a+{1}{2}/
|
104
|
+
root = RP.parse(pattern)
|
105
|
+
expect(root.to_s).to eq 'a+{1}{2}'
|
106
|
+
end
|
107
|
+
|
108
|
+
# regression test for https://github.com/ammar/regexp_parser/issues/74
|
109
|
+
specify('non-ascii comment') do
|
110
|
+
pattern = '(?x) 😋 # 😋'
|
111
|
+
root = RP.parse(pattern)
|
112
|
+
expect(root.last).to be_a Regexp::Expression::Comment
|
113
|
+
expect(root.last.to_s).to eq '# 😋'
|
114
|
+
expect(root.to_s).to eq pattern
|
115
|
+
end
|
100
116
|
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe('Literal delimiter lexing') do
|
4
|
+
include_examples 'lex', '}',
|
5
|
+
0 => [:literal, :literal, '}', 0, 1, 0, 0, 0]
|
6
|
+
|
7
|
+
include_examples 'lex', '}}',
|
8
|
+
0 => [:literal, :literal, '}}', 0, 2, 0, 0, 0]
|
9
|
+
|
10
|
+
include_examples 'lex', '{',
|
11
|
+
0 => [:literal, :literal, '{', 0, 1, 0, 0, 0]
|
12
|
+
|
13
|
+
include_examples 'lex', '{{',
|
14
|
+
0 => [:literal, :literal, '{{', 0, 2, 0, 0, 0]
|
15
|
+
|
16
|
+
include_examples 'lex', '{}',
|
17
|
+
0 => [:literal, :literal, '{}', 0, 2, 0, 0, 0]
|
18
|
+
|
19
|
+
include_examples 'lex', '}{',
|
20
|
+
0 => [:literal, :literal, '}{', 0, 2, 0, 0, 0]
|
21
|
+
|
22
|
+
include_examples 'lex', '}{+',
|
23
|
+
0 => [:literal, :literal, '}', 0, 1, 0, 0, 0],
|
24
|
+
1 => [:literal, :literal, '{', 1, 2, 0, 0, 0],
|
25
|
+
2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
|
26
|
+
|
27
|
+
include_examples 'lex', '{{var}}',
|
28
|
+
0 => [:literal, :literal, '{{var}}', 0, 7, 0, 0, 0]
|
29
|
+
|
30
|
+
include_examples 'lex', 'a{b}c',
|
31
|
+
0 => [:literal, :literal, 'a{b}c', 0, 5, 0, 0, 0]
|
32
|
+
|
33
|
+
include_examples 'lex', 'a{1,2',
|
34
|
+
0 => [:literal, :literal, 'a{1,2', 0, 5, 0, 0, 0]
|
35
|
+
|
36
|
+
include_examples 'lex', '({.+})',
|
37
|
+
0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
|
38
|
+
1 => [:literal, :literal, '{', 1, 2, 1, 0, 0],
|
39
|
+
2 => [:meta, :dot, '.', 2, 3, 1, 0, 0],
|
40
|
+
3 => [:quantifier, :one_or_more, '+', 3, 4, 1, 0, 0],
|
41
|
+
4 => [:literal, :literal, '}', 4, 5, 1, 0, 0],
|
42
|
+
5 => [:group, :close, ')', 5, 6, 0, 0, 0]
|
43
|
+
|
44
|
+
include_examples 'lex', ']',
|
45
|
+
0 => [:literal, :literal, ']', 0, 1, 0, 0, 0]
|
46
|
+
|
47
|
+
include_examples 'lex', ']]',
|
48
|
+
0 => [:literal, :literal, ']]', 0, 2, 0, 0, 0]
|
49
|
+
|
50
|
+
include_examples 'lex', ']\[',
|
51
|
+
0 => [:literal, :literal, ']', 0, 1, 0, 0, 0],
|
52
|
+
1 => [:escape, :set_open, '\[', 1, 3, 0, 0, 0]
|
53
|
+
|
54
|
+
include_examples 'lex', '()',
|
55
|
+
0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
|
56
|
+
1 => [:group, :close, ')', 1, 2, 0, 0, 0]
|
57
|
+
|
58
|
+
include_examples 'lex', '{abc:.+}}}[^}]]}',
|
59
|
+
0 => [:literal, :literal, '{abc:', 0, 5, 0, 0, 0],
|
60
|
+
1 => [:meta, :dot, '.', 5, 6, 0, 0, 0],
|
61
|
+
2 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
|
62
|
+
3 => [:literal, :literal, '}}}', 7, 10, 0, 0, 0],
|
63
|
+
4 => [:set, :open, '[', 10, 11, 0, 0, 0],
|
64
|
+
5 => [:set, :negate, '^', 11, 12, 0, 1, 0],
|
65
|
+
6 => [:literal, :literal, '}', 12, 13, 0, 1, 0],
|
66
|
+
7 => [:set, :close, ']', 13, 14, 0, 0, 0],
|
67
|
+
8 => [:literal, :literal, ']}', 14, 16, 0, 0, 0]
|
68
|
+
end
|
data/spec/lexer/literals_spec.rb
CHANGED
@@ -10,67 +10,42 @@ RSpec.describe('Literal lexing') do
|
|
10
10
|
1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
|
11
11
|
2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
|
12
12
|
|
13
|
-
# 2 byte wide characters
|
14
|
-
include_examples 'lex', '
|
15
|
-
0 => [:literal, :literal, '
|
16
|
-
|
17
|
-
|
18
|
-
0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0]
|
19
|
-
|
20
|
-
include_examples 'lex', 'aاbبت?',
|
21
|
-
0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0, 0],
|
22
|
-
1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
|
23
|
-
2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0]
|
24
|
-
|
25
|
-
include_examples 'lex', 'aا?bبcت+',
|
26
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
27
|
-
1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
|
28
|
-
2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
|
29
|
-
3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
|
30
|
-
4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
|
31
|
-
5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0]
|
32
|
-
|
33
|
-
include_examples 'lex', 'a(اbب+)cت?',
|
34
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
35
|
-
1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
|
36
|
-
2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
|
37
|
-
3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
|
38
|
-
4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
|
39
|
-
5 => [:group, :close, ')', 8, 9, 0, 0, 0],
|
40
|
-
6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
|
41
|
-
7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
|
42
|
-
8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0]
|
13
|
+
# 2 byte wide characters
|
14
|
+
include_examples 'lex', 'äöü+',
|
15
|
+
0 => [:literal, :literal, 'äö', 0, 2, 0, 0, 0],
|
16
|
+
1 => [:literal, :literal, 'ü', 2, 3, 0, 0, 0],
|
17
|
+
2 => [:quantifier, :one_or_more, '+', 3, 4, 0, 0, 0]
|
43
18
|
|
44
19
|
# 3 byte wide characters, Japanese
|
45
20
|
include_examples 'lex', 'ab?れます+cd',
|
46
21
|
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
47
22
|
1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
|
48
23
|
2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
|
49
|
-
3 => [:literal, :literal, 'れま', 3,
|
50
|
-
4 => [:literal, :literal, 'す',
|
51
|
-
5 => [:quantifier, :one_or_more, '+',
|
52
|
-
6 => [:literal, :literal, 'cd',
|
24
|
+
3 => [:literal, :literal, 'れま', 3, 5, 0, 0, 0],
|
25
|
+
4 => [:literal, :literal, 'す', 5, 6, 0, 0, 0],
|
26
|
+
5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
|
27
|
+
6 => [:literal, :literal, 'cd', 7, 9, 0, 0, 0]
|
53
28
|
|
54
29
|
# 4 byte wide characters, Osmanya
|
55
30
|
include_examples 'lex', '𐒀𐒁?𐒂ab+𐒃',
|
56
|
-
0 => [:literal, :literal, '𐒀', 0,
|
57
|
-
1 => [:literal, :literal, '𐒁',
|
58
|
-
2 => [:quantifier, :zero_or_one, '?',
|
59
|
-
3 => [:literal, :literal, '𐒂a',
|
60
|
-
4 => [:literal, :literal, 'b',
|
61
|
-
5 => [:quantifier, :one_or_more, '+',
|
62
|
-
6 => [:literal, :literal, '𐒃',
|
31
|
+
0 => [:literal, :literal, '𐒀', 0, 1, 0, 0, 0],
|
32
|
+
1 => [:literal, :literal, '𐒁', 1, 2, 0, 0, 0],
|
33
|
+
2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
|
34
|
+
3 => [:literal, :literal, '𐒂a', 3, 5, 0, 0, 0],
|
35
|
+
4 => [:literal, :literal, 'b', 5, 6, 0, 0, 0],
|
36
|
+
5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
|
37
|
+
6 => [:literal, :literal, '𐒃', 7, 8, 0, 0, 0]
|
63
38
|
|
64
39
|
include_examples 'lex', 'mu𝄞?si*𝄫c+',
|
65
40
|
0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
|
66
|
-
1 => [:literal, :literal, '𝄞', 2,
|
67
|
-
2 => [:quantifier, :zero_or_one, '?',
|
68
|
-
3 => [:literal, :literal, 's',
|
69
|
-
4 => [:literal, :literal, 'i',
|
70
|
-
5 => [:quantifier, :zero_or_more, '*',
|
71
|
-
6 => [:literal, :literal, '𝄫',
|
72
|
-
7 => [:literal, :literal, 'c',
|
73
|
-
8 => [:quantifier, :one_or_more, '+',
|
41
|
+
1 => [:literal, :literal, '𝄞', 2, 3, 0, 0, 0],
|
42
|
+
2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
|
43
|
+
3 => [:literal, :literal, 's', 4, 5, 0, 0, 0],
|
44
|
+
4 => [:literal, :literal, 'i', 5, 6, 0, 0, 0],
|
45
|
+
5 => [:quantifier, :zero_or_more, '*', 6, 7, 0, 0, 0],
|
46
|
+
6 => [:literal, :literal, '𝄫', 7, 8, 0, 0, 0],
|
47
|
+
7 => [:literal, :literal, 'c', 8, 9, 0, 0, 0],
|
48
|
+
8 => [:quantifier, :one_or_more, '+', 9, 10, 0, 0, 0]
|
74
49
|
|
75
50
|
specify('lex single 2 byte char') do
|
76
51
|
tokens = RL.lex("\u0627+")
|
data/spec/parser/escapes_spec.rb
CHANGED
@@ -25,7 +25,7 @@ RSpec.describe('EscapeSequence parsing') do
|
|
25
25
|
include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
|
26
26
|
include_examples 'parse', /a\u{10FFFF}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
|
27
27
|
|
28
|
-
|
28
|
+
# hex escapes
|
29
29
|
include_examples 'parse', /a\xFF/n, 1 => [:escape, :hex, EscapeSequence::Hex]
|
30
30
|
|
31
31
|
# octal escapes
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe('passing options to parse') do
|
4
|
+
it 'raises if if parsing from a Regexp and options are passed' do
|
5
|
+
expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
|
6
|
+
ArgumentError,
|
7
|
+
'options cannot be supplied unless parsing a String'
|
8
|
+
)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'sets options if parsing from a String' do
|
12
|
+
root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED)
|
13
|
+
|
14
|
+
expect(root.options).to eq(m: true, x: true)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'allows options to not be supplied when parsing from a Regexp' do
|
18
|
+
root = RP.parse(/a+/ix)
|
19
|
+
|
20
|
+
expect(root.options).to eq(i: true, x: true)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'has an empty option-hash when parsing from a String and passing no options' do
|
24
|
+
root = RP.parse('a+')
|
25
|
+
|
26
|
+
expect(root.options).to be_empty
|
27
|
+
end
|
28
|
+
end
|
@@ -35,6 +35,22 @@ RSpec.describe('Quantifier parsing') do
|
|
35
35
|
include_examples 'quantifier', /a{4}b/, '{4}', :greedy, :interval, 4, 4
|
36
36
|
include_examples 'quantifier', /a{4}?b/, '{4}?', :reluctant, :interval, 4, 4
|
37
37
|
include_examples 'quantifier', /a{4}+b/, '{4}+', :possessive, :interval, 4, 4
|
38
|
+
include_examples 'quantifier', /a{004}+b/, '{004}+', :possessive, :interval, 4, 4
|
39
|
+
|
40
|
+
# special case: exps with chained quantifiers are wrapped in implicit passive groups
|
41
|
+
include_examples 'parse', /a+{2}{3}/,
|
42
|
+
0 => [
|
43
|
+
:group, :passive, Group::Passive, implicit?: true, level: 0,
|
44
|
+
quantifier: Quantifier.new(:interval, '{3}', 3, 3, :greedy)
|
45
|
+
],
|
46
|
+
[0, 0] => [
|
47
|
+
:group, :passive, Group::Passive, implicit?: true, level: 1,
|
48
|
+
quantifier: Quantifier.new(:interval, '{2}', 2, 2, :greedy)
|
49
|
+
],
|
50
|
+
[0, 0, 0] => [
|
51
|
+
:literal, :literal, Literal, text: 'a', level: 2,
|
52
|
+
quantifier: Quantifier.new(:one_or_more, '+', 1, -1, :greedy)
|
53
|
+
]
|
38
54
|
|
39
55
|
specify('mode-checking methods') do
|
40
56
|
exp = RP.parse(/a??/).first
|
@@ -17,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
17
17
|
end
|
18
18
|
|
19
19
|
specify('parse set range hex') do
|
20
|
-
root = RP.parse('[\\x00-\\
|
20
|
+
root = RP.parse('[\\x00-\\x22]')
|
21
21
|
set = root[0]
|
22
22
|
range = set[0]
|
23
23
|
|
@@ -26,9 +26,9 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
26
26
|
expect(range.count).to eq 2
|
27
27
|
expect(range.first.to_s).to eq '\\x00'
|
28
28
|
expect(range.first).to be_instance_of(EscapeSequence::Hex)
|
29
|
-
expect(range.last.to_s).to eq '\\
|
29
|
+
expect(range.last.to_s).to eq '\\x22'
|
30
30
|
expect(range.last).to be_instance_of(EscapeSequence::Hex)
|
31
|
-
expect(set).to match
|
31
|
+
expect(set).to match "\x11"
|
32
32
|
end
|
33
33
|
|
34
34
|
specify('parse set range unicode') do
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe('Literal delimiter scanning') do
|
4
|
+
include_examples 'scan', '}',
|
5
|
+
0 => [:literal, :literal, '}', 0, 1]
|
6
|
+
|
7
|
+
include_examples 'scan', '}}',
|
8
|
+
0 => [:literal, :literal, '}}', 0, 2]
|
9
|
+
|
10
|
+
include_examples 'scan', '{',
|
11
|
+
0 => [:literal, :literal, '{', 0, 1]
|
12
|
+
|
13
|
+
include_examples 'scan', '{{',
|
14
|
+
0 => [:literal, :literal, '{{', 0, 2]
|
15
|
+
|
16
|
+
include_examples 'scan', '{}',
|
17
|
+
0 => [:literal, :literal, '{}', 0, 2]
|
18
|
+
|
19
|
+
include_examples 'scan', '}{',
|
20
|
+
0 => [:literal, :literal, '}{', 0, 2]
|
21
|
+
|
22
|
+
include_examples 'scan', '}{+',
|
23
|
+
0 => [:literal, :literal, '}{', 0, 2]
|
24
|
+
|
25
|
+
include_examples 'scan', '{{var}}',
|
26
|
+
0 => [:literal, :literal, '{{var}}', 0, 7]
|
27
|
+
|
28
|
+
include_examples 'scan', 'a{1,2',
|
29
|
+
0 => [:literal, :literal, 'a{1,2', 0, 5]
|
30
|
+
|
31
|
+
include_examples 'scan', '({.+})',
|
32
|
+
0 => [:group, :capture, '(', 0, 1],
|
33
|
+
1 => [:literal, :literal, '{', 1, 2],
|
34
|
+
2 => [:meta, :dot, '.', 2, 3],
|
35
|
+
3 => [:quantifier, :one_or_more, '+', 3, 4],
|
36
|
+
4 => [:literal, :literal, '}', 4, 5],
|
37
|
+
5 => [:group, :close, ')', 5, 6]
|
38
|
+
|
39
|
+
include_examples 'scan', ']',
|
40
|
+
0 => [:literal, :literal, ']', 0, 1]
|
41
|
+
|
42
|
+
include_examples 'scan', ']]',
|
43
|
+
0 => [:literal, :literal, ']]', 0, 2]
|
44
|
+
|
45
|
+
include_examples 'scan', ']\[',
|
46
|
+
0 => [:literal, :literal, ']', 0, 1],
|
47
|
+
1 => [:escape, :set_open, '\[', 1, 3]
|
48
|
+
|
49
|
+
include_examples 'scan', '()',
|
50
|
+
0 => [:group, :capture, '(', 0, 1],
|
51
|
+
1 => [:group, :close, ')', 1, 2]
|
52
|
+
end
|
data/spec/scanner/errors_spec.rb
CHANGED
@@ -10,7 +10,6 @@ RSpec.describe(Regexp::Scanner) do
|
|
10
10
|
include_examples 'scan error', RS::PrematureEndError, 'unbalanced set', '[a'
|
11
11
|
include_examples 'scan error', RS::PrematureEndError, 'unbalanced set', '[[:alpha:]'
|
12
12
|
include_examples 'scan error', RS::PrematureEndError, 'unbalanced group', '(abc'
|
13
|
-
include_examples 'scan error', RS::PrematureEndError, 'unbalanced interval', 'a{1,2'
|
14
13
|
include_examples 'scan error', RS::PrematureEndError, 'eof in property', '\p{asci'
|
15
14
|
include_examples 'scan error', RS::PrematureEndError, 'incomplete property', '\p{ascii abc'
|
16
15
|
include_examples 'scan error', RS::PrematureEndError, 'eof options', '(?mix'
|
@@ -11,7 +11,17 @@ RSpec.describe('Escape scanning') do
|
|
11
11
|
include_examples 'scan', /c\tt/, 1 => [:escape, :tab, '\t', 1, 3]
|
12
12
|
include_examples 'scan', /c\vt/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
|
13
13
|
|
14
|
+
# ineffectual literal escapes
|
15
|
+
# these cause "Unknown escape" warnings in Ruby for ascii chars,
|
16
|
+
# and simply drop the backslash for non-ascii chars (/\ü/.inspect == '/ü/').
|
17
|
+
# In terms of matching, Ruby treats them both like non-escaped literals.
|
14
18
|
include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
|
19
|
+
include_examples 'scan', 'a\üc', 1 => [:escape, :literal, '\ü', 1, 3]
|
20
|
+
include_examples 'scan', 'a\😋c', 1 => [:escape, :literal, '\😋', 1, 3]
|
21
|
+
|
22
|
+
# these incomplete ref/call sequences are treated as literal escapes by Ruby
|
23
|
+
include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3]
|
24
|
+
include_examples 'scan', 'c\kt', 1 => [:escape, :literal, '\k', 1, 3]
|
15
25
|
|
16
26
|
include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5]
|
17
27
|
include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
|
@@ -39,6 +39,17 @@ RSpec.describe('FreeSpace scanning') do
|
|
39
39
|
11 => [:free_space, :comment, "# B ? comment\n", 37, 51],
|
40
40
|
17 => [:free_space, :comment, "# C {2,3} comment\n", 66, 84],
|
41
41
|
29 => [:free_space, :comment, "# D|E comment\n", 100, 114]
|
42
|
+
|
43
|
+
# single line / no trailing newline (c.f. issue #66)
|
44
|
+
include_examples 'scan', /a # b/x,
|
45
|
+
0 => [:literal, :literal, 'a', 0, 1],
|
46
|
+
1 => [:free_space, :whitespace, ' ', 1, 2],
|
47
|
+
2 => [:free_space, :comment, "# b", 2, 5]
|
48
|
+
|
49
|
+
# without spaces (c.f. issue #66)
|
50
|
+
include_examples 'scan', /a#b/x,
|
51
|
+
0 => [:literal, :literal, 'a', 0, 1],
|
52
|
+
1 => [:free_space, :comment, "#b", 1, 3]
|
42
53
|
end
|
43
54
|
|
44
55
|
describe('scan free space inlined') do
|
@@ -130,4 +141,25 @@ RSpec.describe('FreeSpace scanning') do
|
|
130
141
|
26 => [:literal, :literal, 'i j', 35, 38],
|
131
142
|
27 => [:group, :close, ')', 38, 39]
|
132
143
|
end
|
144
|
+
|
145
|
+
describe('scanning `#` in regular (non-x mode)') do # c.f. issue 70
|
146
|
+
include_examples 'scan', /a#bcd/,
|
147
|
+
0 => [:literal, :literal, 'a#bcd', 0, 5]
|
148
|
+
include_examples 'scan', /a # bcd/,
|
149
|
+
0 => [:literal, :literal, 'a # bcd', 0, 7]
|
150
|
+
|
151
|
+
include_examples 'scan', /a#\d/,
|
152
|
+
0 => [:literal, :literal, 'a#', 0, 2],
|
153
|
+
1 => [:type, :digit, '\d', 2, 4]
|
154
|
+
include_examples 'scan', /a # \d/,
|
155
|
+
0 => [:literal, :literal, 'a # ', 0, 4],
|
156
|
+
1 => [:type, :digit, '\d', 4, 6]
|
157
|
+
|
158
|
+
include_examples 'scan', /a#()/,
|
159
|
+
0 => [:literal, :literal, 'a#', 0, 2],
|
160
|
+
1 => [:group, :capture, '(', 2, 3]
|
161
|
+
include_examples 'scan', /a # ()/,
|
162
|
+
0 => [:literal, :literal, 'a # ', 0, 4],
|
163
|
+
1 => [:group, :capture, '(', 4, 5]
|
164
|
+
end
|
133
165
|
end
|
@@ -2,48 +2,38 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
RSpec.describe('UTF8 scanning') do
|
4
4
|
# ascii, single byte characters
|
5
|
-
include_examples 'scan', 'a',
|
5
|
+
include_examples 'scan', 'a',
|
6
|
+
0 => [:literal, :literal, 'a', 0, 1]
|
6
7
|
|
7
|
-
include_examples 'scan', 'ab+',
|
8
|
-
|
8
|
+
include_examples 'scan', 'ab+',
|
9
|
+
0 => [:literal, :literal, 'ab', 0, 2],
|
10
|
+
1 => [:quantifier, :one_or_more, '+', 2, 3]
|
9
11
|
|
10
|
-
# 2 byte wide characters
|
11
|
-
include_examples 'scan', '
|
12
|
-
|
13
|
-
include_examples 'scan', 'aاbبت?', 0 => [:literal, :literal, 'aاbبت', 0, 8]
|
14
|
-
include_examples 'scan', 'aاbبت?', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
|
15
|
-
|
16
|
-
include_examples 'scan', 'aا?bبcت+', 0 => [:literal, :literal, 'aا', 0, 3]
|
17
|
-
include_examples 'scan', 'aا?bبcت+', 1 => [:quantifier, :zero_or_one, '?', 3, 4]
|
18
|
-
include_examples 'scan', 'aا?bبcت+', 2 => [:literal, :literal, 'bبcت', 4, 10]
|
19
|
-
include_examples 'scan', 'aا?bبcت+', 3 => [:quantifier, :one_or_more, '+', 10, 11]
|
20
|
-
|
21
|
-
include_examples 'scan', 'a(اbب+)cت?', 0 => [:literal, :literal, 'a', 0, 1]
|
22
|
-
include_examples 'scan', 'a(اbب+)cت?', 1 => [:group, :capture, '(', 1, 2]
|
23
|
-
include_examples 'scan', 'a(اbب+)cت?', 2 => [:literal, :literal, 'اbب', 2, 7]
|
24
|
-
include_examples 'scan', 'a(اbب+)cت?', 3 => [:quantifier, :one_or_more, '+', 7, 8]
|
25
|
-
include_examples 'scan', 'a(اbب+)cت?', 4 => [:group, :close, ')', 8, 9]
|
26
|
-
include_examples 'scan', 'a(اbب+)cت?', 5 => [:literal, :literal, 'cت', 9, 12]
|
27
|
-
include_examples 'scan', 'a(اbب+)cت?', 6 => [:quantifier, :zero_or_one, '?', 12, 13]
|
12
|
+
# 2 byte wide characters
|
13
|
+
include_examples 'scan', 'äöü',
|
14
|
+
0 => [:literal, :literal, 'äöü', 0, 3]
|
28
15
|
|
29
16
|
# 3 byte wide characters, Japanese
|
30
|
-
include_examples 'scan', 'ab?れます+cd',
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
17
|
+
include_examples 'scan', 'ab?れます+cd',
|
18
|
+
0 => [:literal, :literal, 'ab', 0, 2],
|
19
|
+
1 => [:quantifier, :zero_or_one, '?', 2, 3],
|
20
|
+
2 => [:literal, :literal, 'れます', 3, 6],
|
21
|
+
3 => [:quantifier, :one_or_more, '+', 6, 7],
|
22
|
+
4 => [:literal, :literal, 'cd', 7, 9]
|
35
23
|
|
36
24
|
# 4 byte wide characters, Osmanya
|
37
|
-
include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃',
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
include_examples 'scan', 'mu𝄞?si*𝄫c+',
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
25
|
+
include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃',
|
26
|
+
0 => [:literal, :literal, '𐒀𐒁', 0, 2],
|
27
|
+
1 => [:quantifier, :zero_or_one, '?', 2, 3],
|
28
|
+
2 => [:literal, :literal, '𐒂ab', 3, 6],
|
29
|
+
3 => [:quantifier, :one_or_more, '+', 6, 7],
|
30
|
+
4 => [:literal, :literal, '𐒃', 7, 8]
|
31
|
+
|
32
|
+
include_examples 'scan', 'mu𝄞?si*𝄫c+',
|
33
|
+
0 => [:literal, :literal, 'mu𝄞', 0, 3],
|
34
|
+
1 => [:quantifier, :zero_or_one, '?', 3, 4],
|
35
|
+
2 => [:literal, :literal, 'si', 4, 6],
|
36
|
+
3 => [:quantifier, :zero_or_more, '*', 6, 7],
|
37
|
+
4 => [:literal, :literal, '𝄫c', 7, 9],
|
38
|
+
5 => [:quantifier, :one_or_more, '+', 9, 10]
|
49
39
|
end
|