regexp_parser 1.7.1 โ 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +83 -0
- data/README.md +23 -11
- data/lib/regexp_parser/expression.rb +10 -19
- data/lib/regexp_parser/expression/classes/group.rb +17 -2
- data/lib/regexp_parser/expression/classes/root.rb +4 -16
- data/lib/regexp_parser/expression/quantifier.rb +9 -0
- data/lib/regexp_parser/expression/sequence.rb +0 -10
- data/lib/regexp_parser/lexer.rb +6 -6
- data/lib/regexp_parser/parser.rb +45 -12
- data/lib/regexp_parser/scanner.rb +1264 -1280
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +195 -194
- data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +1 -1
- data/spec/expression/base_spec.rb +10 -0
- data/spec/expression/to_s_spec.rb +16 -0
- data/spec/lexer/literals_spec.rb +24 -49
- data/spec/parser/escapes_spec.rb +1 -1
- data/spec/parser/options_spec.rb +28 -0
- data/spec/parser/quantifiers_spec.rb +15 -0
- data/spec/parser/set/ranges_spec.rb +3 -3
- data/spec/scanner/escapes_spec.rb +11 -0
- data/spec/scanner/free_space_spec.rb +32 -0
- data/spec/scanner/groups_spec.rb +10 -1
- data/spec/scanner/literals_spec.rb +28 -38
- data/spec/scanner/options_spec.rb +36 -0
- data/spec/scanner/quantifiers_spec.rb +18 -13
- data/spec/scanner/sets_spec.rb +8 -2
- metadata +60 -60
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
@@ -74,9 +74,9 @@ module Regexp::Syntax
|
|
74
74
|
end
|
75
75
|
|
76
76
|
def warn_if_future_version(const_name)
|
77
|
-
return if comparable_version(const_name) < comparable_version('
|
77
|
+
return if comparable_version(const_name) < comparable_version('4.0.0')
|
78
78
|
|
79
|
-
warn('This library has only been tested up to Ruby
|
79
|
+
warn('This library has only been tested up to Ruby 3.x, '\
|
80
80
|
"but you are running with #{const_get(const_name).inspect}")
|
81
81
|
end
|
82
82
|
end
|
data/regexp_parser.gemspec
CHANGED
@@ -91,4 +91,14 @@ RSpec.describe(Regexp::Expression::Base) do
|
|
91
91
|
expect(RP.parse(/a*/)[0].repetitions).to eq 0..(Float::INFINITY)
|
92
92
|
expect(RP.parse(/a+/)[0].repetitions).to eq 1..(Float::INFINITY)
|
93
93
|
end
|
94
|
+
|
95
|
+
specify('#base_length') do
|
96
|
+
expect(RP.parse(/(aa)/)[0].base_length).to eq 4
|
97
|
+
expect(RP.parse(/(aa){42}/)[0].base_length).to eq 4
|
98
|
+
end
|
99
|
+
|
100
|
+
specify('#full_length') do
|
101
|
+
expect(RP.parse(/(aa)/)[0].full_length).to eq 4
|
102
|
+
expect(RP.parse(/(aa){42}/)[0].full_length).to eq 8
|
103
|
+
end
|
94
104
|
end
|
@@ -97,4 +97,20 @@ RSpec.describe('Expression#to_s') do
|
|
97
97
|
|
98
98
|
expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eq multiline.match(str)[0]
|
99
99
|
end
|
100
|
+
|
101
|
+
# special case: implicit groups used for chained quantifiers produce no parens
|
102
|
+
specify 'chained quantifiers #to_s' do
|
103
|
+
pattern = /a+{1}{2}/
|
104
|
+
root = RP.parse(pattern)
|
105
|
+
expect(root.to_s).to eq 'a+{1}{2}'
|
106
|
+
end
|
107
|
+
|
108
|
+
# regression test for https://github.com/ammar/regexp_parser/issues/74
|
109
|
+
specify('non-ascii comment') do
|
110
|
+
pattern = '(?x) ๐ # ๐'
|
111
|
+
root = RP.parse(pattern)
|
112
|
+
expect(root.last).to be_a Regexp::Expression::Comment
|
113
|
+
expect(root.last.to_s).to eq '# ๐'
|
114
|
+
expect(root.to_s).to eq pattern
|
115
|
+
end
|
100
116
|
end
|
data/spec/lexer/literals_spec.rb
CHANGED
@@ -10,67 +10,42 @@ RSpec.describe('Literal lexing') do
|
|
10
10
|
1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
|
11
11
|
2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
|
12
12
|
|
13
|
-
# 2 byte wide characters
|
14
|
-
include_examples 'lex', '
|
15
|
-
0 => [:literal, :literal, '
|
16
|
-
|
17
|
-
|
18
|
-
0 => [:literal, :literal, 'aุงbุจcุช', 0, 9, 0, 0, 0]
|
19
|
-
|
20
|
-
include_examples 'lex', 'aุงbุจุช?',
|
21
|
-
0 => [:literal, :literal, 'aุงbุจ', 0, 6, 0, 0, 0],
|
22
|
-
1 => [:literal, :literal, 'ุช', 6, 8, 0, 0, 0],
|
23
|
-
2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0]
|
24
|
-
|
25
|
-
include_examples 'lex', 'aุง?bุจcุช+',
|
26
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
27
|
-
1 => [:literal, :literal, 'ุง', 1, 3, 0, 0, 0],
|
28
|
-
2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
|
29
|
-
3 => [:literal, :literal, 'bุจc', 4, 8, 0, 0, 0],
|
30
|
-
4 => [:literal, :literal, 'ุช', 8, 10, 0, 0, 0],
|
31
|
-
5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0]
|
32
|
-
|
33
|
-
include_examples 'lex', 'a(ุงbุจ+)cุช?',
|
34
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
35
|
-
1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
|
36
|
-
2 => [:literal, :literal, 'ุงb', 2, 5, 1, 0, 0],
|
37
|
-
3 => [:literal, :literal, 'ุจ', 5, 7, 1, 0, 0],
|
38
|
-
4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
|
39
|
-
5 => [:group, :close, ')', 8, 9, 0, 0, 0],
|
40
|
-
6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
|
41
|
-
7 => [:literal, :literal, 'ุช', 10, 12, 0, 0, 0],
|
42
|
-
8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0]
|
13
|
+
# 2 byte wide characters
|
14
|
+
include_examples 'lex', 'รครถรผ+',
|
15
|
+
0 => [:literal, :literal, 'รครถ', 0, 2, 0, 0, 0],
|
16
|
+
1 => [:literal, :literal, 'รผ', 2, 3, 0, 0, 0],
|
17
|
+
2 => [:quantifier, :one_or_more, '+', 3, 4, 0, 0, 0]
|
43
18
|
|
44
19
|
# 3 byte wide characters, Japanese
|
45
20
|
include_examples 'lex', 'ab?ใใพใ+cd',
|
46
21
|
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
47
22
|
1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
|
48
23
|
2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
|
49
|
-
3 => [:literal, :literal, 'ใใพ', 3,
|
50
|
-
4 => [:literal, :literal, 'ใ',
|
51
|
-
5 => [:quantifier, :one_or_more, '+',
|
52
|
-
6 => [:literal, :literal, 'cd',
|
24
|
+
3 => [:literal, :literal, 'ใใพ', 3, 5, 0, 0, 0],
|
25
|
+
4 => [:literal, :literal, 'ใ', 5, 6, 0, 0, 0],
|
26
|
+
5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
|
27
|
+
6 => [:literal, :literal, 'cd', 7, 9, 0, 0, 0]
|
53
28
|
|
54
29
|
# 4 byte wide characters, Osmanya
|
55
30
|
include_examples 'lex', '๐๐?๐ab+๐',
|
56
|
-
0 => [:literal, :literal, '๐', 0,
|
57
|
-
1 => [:literal, :literal, '๐',
|
58
|
-
2 => [:quantifier, :zero_or_one, '?',
|
59
|
-
3 => [:literal, :literal, '๐a',
|
60
|
-
4 => [:literal, :literal, 'b',
|
61
|
-
5 => [:quantifier, :one_or_more, '+',
|
62
|
-
6 => [:literal, :literal, '๐',
|
31
|
+
0 => [:literal, :literal, '๐', 0, 1, 0, 0, 0],
|
32
|
+
1 => [:literal, :literal, '๐', 1, 2, 0, 0, 0],
|
33
|
+
2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
|
34
|
+
3 => [:literal, :literal, '๐a', 3, 5, 0, 0, 0],
|
35
|
+
4 => [:literal, :literal, 'b', 5, 6, 0, 0, 0],
|
36
|
+
5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
|
37
|
+
6 => [:literal, :literal, '๐', 7, 8, 0, 0, 0]
|
63
38
|
|
64
39
|
include_examples 'lex', 'mu๐?si*๐ซc+',
|
65
40
|
0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
|
66
|
-
1 => [:literal, :literal, '๐', 2,
|
67
|
-
2 => [:quantifier, :zero_or_one, '?',
|
68
|
-
3 => [:literal, :literal, 's',
|
69
|
-
4 => [:literal, :literal, 'i',
|
70
|
-
5 => [:quantifier, :zero_or_more, '*',
|
71
|
-
6 => [:literal, :literal, '๐ซ',
|
72
|
-
7 => [:literal, :literal, 'c',
|
73
|
-
8 => [:quantifier, :one_or_more, '+',
|
41
|
+
1 => [:literal, :literal, '๐', 2, 3, 0, 0, 0],
|
42
|
+
2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
|
43
|
+
3 => [:literal, :literal, 's', 4, 5, 0, 0, 0],
|
44
|
+
4 => [:literal, :literal, 'i', 5, 6, 0, 0, 0],
|
45
|
+
5 => [:quantifier, :zero_or_more, '*', 6, 7, 0, 0, 0],
|
46
|
+
6 => [:literal, :literal, '๐ซ', 7, 8, 0, 0, 0],
|
47
|
+
7 => [:literal, :literal, 'c', 8, 9, 0, 0, 0],
|
48
|
+
8 => [:quantifier, :one_or_more, '+', 9, 10, 0, 0, 0]
|
74
49
|
|
75
50
|
specify('lex single 2 byte char') do
|
76
51
|
tokens = RL.lex("\u0627+")
|
data/spec/parser/escapes_spec.rb
CHANGED
@@ -25,7 +25,7 @@ RSpec.describe('EscapeSequence parsing') do
|
|
25
25
|
include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
|
26
26
|
include_examples 'parse', /a\u{10FFFF}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
|
27
27
|
|
28
|
-
|
28
|
+
# hex escapes
|
29
29
|
include_examples 'parse', /a\xFF/n, 1 => [:escape, :hex, EscapeSequence::Hex]
|
30
30
|
|
31
31
|
# octal escapes
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe('passing options to parse') do
|
4
|
+
it 'raises if if parsing from a Regexp and options are passed' do
|
5
|
+
expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
|
6
|
+
ArgumentError,
|
7
|
+
'options cannot be supplied unless parsing a String'
|
8
|
+
)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'sets options if parsing from a String' do
|
12
|
+
root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED)
|
13
|
+
|
14
|
+
expect(root.options).to eq(m: true, x: true)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'allows options to not be supplied when parsing from a Regexp' do
|
18
|
+
root = RP.parse(/a+/ix)
|
19
|
+
|
20
|
+
expect(root.options).to eq(i: true, x: true)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'has an empty option-hash when parsing from a String and passing no options' do
|
24
|
+
root = RP.parse('a+')
|
25
|
+
|
26
|
+
expect(root.options).to be_empty
|
27
|
+
end
|
28
|
+
end
|
@@ -37,6 +37,21 @@ RSpec.describe('Quantifier parsing') do
|
|
37
37
|
include_examples 'quantifier', /a{4}+b/, '{4}+', :possessive, :interval, 4, 4
|
38
38
|
include_examples 'quantifier', /a{004}+b/, '{004}+', :possessive, :interval, 4, 4
|
39
39
|
|
40
|
+
# special case: exps with chained quantifiers are wrapped in implicit passive groups
|
41
|
+
include_examples 'parse', /a+{2}{3}/,
|
42
|
+
0 => [
|
43
|
+
:group, :passive, Group::Passive, implicit?: true, level: 0,
|
44
|
+
quantifier: Quantifier.new(:interval, '{3}', 3, 3, :greedy)
|
45
|
+
],
|
46
|
+
[0, 0] => [
|
47
|
+
:group, :passive, Group::Passive, implicit?: true, level: 1,
|
48
|
+
quantifier: Quantifier.new(:interval, '{2}', 2, 2, :greedy)
|
49
|
+
],
|
50
|
+
[0, 0, 0] => [
|
51
|
+
:literal, :literal, Literal, text: 'a', level: 2,
|
52
|
+
quantifier: Quantifier.new(:one_or_more, '+', 1, -1, :greedy)
|
53
|
+
]
|
54
|
+
|
40
55
|
specify('mode-checking methods') do
|
41
56
|
exp = RP.parse(/a??/).first
|
42
57
|
|
@@ -17,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
17
17
|
end
|
18
18
|
|
19
19
|
specify('parse set range hex') do
|
20
|
-
root = RP.parse('[\\x00-\\
|
20
|
+
root = RP.parse('[\\x00-\\x22]')
|
21
21
|
set = root[0]
|
22
22
|
range = set[0]
|
23
23
|
|
@@ -26,9 +26,9 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
26
26
|
expect(range.count).to eq 2
|
27
27
|
expect(range.first.to_s).to eq '\\x00'
|
28
28
|
expect(range.first).to be_instance_of(EscapeSequence::Hex)
|
29
|
-
expect(range.last.to_s).to eq '\\
|
29
|
+
expect(range.last.to_s).to eq '\\x22'
|
30
30
|
expect(range.last).to be_instance_of(EscapeSequence::Hex)
|
31
|
-
expect(set).to match
|
31
|
+
expect(set).to match "\x11"
|
32
32
|
end
|
33
33
|
|
34
34
|
specify('parse set range unicode') do
|
@@ -11,12 +11,23 @@ RSpec.describe('Escape scanning') do
|
|
11
11
|
include_examples 'scan', /c\tt/, 1 => [:escape, :tab, '\t', 1, 3]
|
12
12
|
include_examples 'scan', /c\vt/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
|
13
13
|
|
14
|
+
# ineffectual literal escapes
|
15
|
+
# these cause "Unknown escape" warnings in Ruby for ascii chars,
|
16
|
+
# and simply drop the backslash for non-ascii chars (/\รผ/.inspect == '/รผ/').
|
17
|
+
# In terms of matching, Ruby treats them both like non-escaped literals.
|
14
18
|
include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
|
19
|
+
include_examples 'scan', 'a\รผc', 1 => [:escape, :literal, '\รผ', 1, 3]
|
20
|
+
include_examples 'scan', 'a\๐c', 1 => [:escape, :literal, '\๐', 1, 3]
|
21
|
+
|
22
|
+
# these incomplete ref/call sequences are treated as literal escapes by Ruby
|
23
|
+
include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3]
|
24
|
+
include_examples 'scan', 'c\kt', 1 => [:escape, :literal, '\k', 1, 3]
|
15
25
|
|
16
26
|
include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5]
|
17
27
|
include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
|
18
28
|
include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
|
19
29
|
|
30
|
+
include_examples 'scan', 'a\xA', 1 => [:escape, :hex, '\xA', 1, 4]
|
20
31
|
include_examples 'scan', 'a\x24c', 1 => [:escape, :hex, '\x24', 1, 5]
|
21
32
|
include_examples 'scan', 'a\x0640c', 1 => [:escape, :hex, '\x06', 1, 5]
|
22
33
|
|
@@ -39,6 +39,17 @@ RSpec.describe('FreeSpace scanning') do
|
|
39
39
|
11 => [:free_space, :comment, "# B ? comment\n", 37, 51],
|
40
40
|
17 => [:free_space, :comment, "# C {2,3} comment\n", 66, 84],
|
41
41
|
29 => [:free_space, :comment, "# D|E comment\n", 100, 114]
|
42
|
+
|
43
|
+
# single line / no trailing newline (c.f. issue #66)
|
44
|
+
include_examples 'scan', /a # b/x,
|
45
|
+
0 => [:literal, :literal, 'a', 0, 1],
|
46
|
+
1 => [:free_space, :whitespace, ' ', 1, 2],
|
47
|
+
2 => [:free_space, :comment, "# b", 2, 5]
|
48
|
+
|
49
|
+
# without spaces (c.f. issue #66)
|
50
|
+
include_examples 'scan', /a#b/x,
|
51
|
+
0 => [:literal, :literal, 'a', 0, 1],
|
52
|
+
1 => [:free_space, :comment, "#b", 1, 3]
|
42
53
|
end
|
43
54
|
|
44
55
|
describe('scan free space inlined') do
|
@@ -130,4 +141,25 @@ RSpec.describe('FreeSpace scanning') do
|
|
130
141
|
26 => [:literal, :literal, 'i j', 35, 38],
|
131
142
|
27 => [:group, :close, ')', 38, 39]
|
132
143
|
end
|
144
|
+
|
145
|
+
describe('scanning `#` in regular (non-x mode)') do # c.f. issue 70
|
146
|
+
include_examples 'scan', /a#bcd/,
|
147
|
+
0 => [:literal, :literal, 'a#bcd', 0, 5]
|
148
|
+
include_examples 'scan', /a # bcd/,
|
149
|
+
0 => [:literal, :literal, 'a # bcd', 0, 7]
|
150
|
+
|
151
|
+
include_examples 'scan', /a#\d/,
|
152
|
+
0 => [:literal, :literal, 'a#', 0, 2],
|
153
|
+
1 => [:type, :digit, '\d', 2, 4]
|
154
|
+
include_examples 'scan', /a # \d/,
|
155
|
+
0 => [:literal, :literal, 'a # ', 0, 4],
|
156
|
+
1 => [:type, :digit, '\d', 4, 6]
|
157
|
+
|
158
|
+
include_examples 'scan', /a#()/,
|
159
|
+
0 => [:literal, :literal, 'a#', 0, 2],
|
160
|
+
1 => [:group, :capture, '(', 2, 3]
|
161
|
+
include_examples 'scan', /a # ()/,
|
162
|
+
0 => [:literal, :literal, 'a # ', 0, 4],
|
163
|
+
1 => [:group, :capture, '(', 4, 5]
|
164
|
+
end
|
133
165
|
end
|
data/spec/scanner/groups_spec.rb
CHANGED
@@ -5,11 +5,20 @@ RSpec.describe('Group scanning') do
|
|
5
5
|
include_examples 'scan', '(?>abc)', 0 => [:group, :atomic, '(?>', 0, 3]
|
6
6
|
include_examples 'scan', '(abc)', 0 => [:group, :capture, '(', 0, 1]
|
7
7
|
|
8
|
+
# Named groups
|
9
|
+
# only names that start with a hyphen or digit (ascii or other) are invalid
|
8
10
|
include_examples 'scan', '(?<name>abc)', 0 => [:group, :named_ab, '(?<name>', 0, 8]
|
9
11
|
include_examples 'scan', "(?'name'abc)", 0 => [:group, :named_sq, "(?'name'", 0, 8]
|
10
|
-
|
11
12
|
include_examples 'scan', '(?<name_1>abc)', 0 => [:group, :named_ab, '(?<name_1>', 0,10]
|
12
13
|
include_examples 'scan', "(?'name_1'abc)", 0 => [:group, :named_sq, "(?'name_1'", 0,10]
|
14
|
+
include_examples 'scan', '(?<name-1>abc)', 0 => [:group, :named_ab, '(?<name-1>', 0,10]
|
15
|
+
include_examples 'scan', "(?'name-1'abc)", 0 => [:group, :named_sq, "(?'name-1'", 0,10]
|
16
|
+
include_examples 'scan', "(?<name'1>abc)", 0 => [:group, :named_ab, "(?<name'1>", 0,10]
|
17
|
+
include_examples 'scan', "(?'name>1'abc)", 0 => [:group, :named_sq, "(?'name>1'", 0,10]
|
18
|
+
include_examples 'scan', '(?<รผรผuuรผรผ>abc)', 0 => [:group, :named_ab, '(?<รผรผuuรผรผ>', 0,10]
|
19
|
+
include_examples 'scan', "(?'รผรผuuรผรผ'abc)", 0 => [:group, :named_sq, "(?'รผรผuuรผรผ'", 0,10]
|
20
|
+
include_examples 'scan', "(?<๐1234๐>abc)", 0 => [:group, :named_ab, "(?<๐1234๐>", 0,10]
|
21
|
+
include_examples 'scan', "(?'๐1234๐'abc)", 0 => [:group, :named_sq, "(?'๐1234๐'", 0,10]
|
13
22
|
|
14
23
|
include_examples 'scan', '(?:abc)', 0 => [:group, :passive, '(?:', 0, 3]
|
15
24
|
include_examples 'scan', '(?:)', 0 => [:group, :passive, '(?:', 0, 3]
|
@@ -2,48 +2,38 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
RSpec.describe('UTF8 scanning') do
|
4
4
|
# ascii, single byte characters
|
5
|
-
include_examples 'scan', 'a',
|
5
|
+
include_examples 'scan', 'a',
|
6
|
+
0 => [:literal, :literal, 'a', 0, 1]
|
6
7
|
|
7
|
-
include_examples 'scan', 'ab+',
|
8
|
-
|
8
|
+
include_examples 'scan', 'ab+',
|
9
|
+
0 => [:literal, :literal, 'ab', 0, 2],
|
10
|
+
1 => [:quantifier, :one_or_more, '+', 2, 3]
|
9
11
|
|
10
|
-
# 2 byte wide characters
|
11
|
-
include_examples 'scan', '
|
12
|
-
|
13
|
-
include_examples 'scan', 'aุงbุจุช?', 0 => [:literal, :literal, 'aุงbุจุช', 0, 8]
|
14
|
-
include_examples 'scan', 'aุงbุจุช?', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
|
15
|
-
|
16
|
-
include_examples 'scan', 'aุง?bุจcุช+', 0 => [:literal, :literal, 'aุง', 0, 3]
|
17
|
-
include_examples 'scan', 'aุง?bุจcุช+', 1 => [:quantifier, :zero_or_one, '?', 3, 4]
|
18
|
-
include_examples 'scan', 'aุง?bุจcุช+', 2 => [:literal, :literal, 'bุจcุช', 4, 10]
|
19
|
-
include_examples 'scan', 'aุง?bุจcุช+', 3 => [:quantifier, :one_or_more, '+', 10, 11]
|
20
|
-
|
21
|
-
include_examples 'scan', 'a(ุงbุจ+)cุช?', 0 => [:literal, :literal, 'a', 0, 1]
|
22
|
-
include_examples 'scan', 'a(ุงbุจ+)cุช?', 1 => [:group, :capture, '(', 1, 2]
|
23
|
-
include_examples 'scan', 'a(ุงbุจ+)cุช?', 2 => [:literal, :literal, 'ุงbุจ', 2, 7]
|
24
|
-
include_examples 'scan', 'a(ุงbุจ+)cุช?', 3 => [:quantifier, :one_or_more, '+', 7, 8]
|
25
|
-
include_examples 'scan', 'a(ุงbุจ+)cุช?', 4 => [:group, :close, ')', 8, 9]
|
26
|
-
include_examples 'scan', 'a(ุงbุจ+)cุช?', 5 => [:literal, :literal, 'cุช', 9, 12]
|
27
|
-
include_examples 'scan', 'a(ุงbุจ+)cุช?', 6 => [:quantifier, :zero_or_one, '?', 12, 13]
|
12
|
+
# 2 byte wide characters
|
13
|
+
include_examples 'scan', 'รครถรผ',
|
14
|
+
0 => [:literal, :literal, 'รครถรผ', 0, 3]
|
28
15
|
|
29
16
|
# 3 byte wide characters, Japanese
|
30
|
-
include_examples 'scan', 'ab?ใใพใ+cd',
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
17
|
+
include_examples 'scan', 'ab?ใใพใ+cd',
|
18
|
+
0 => [:literal, :literal, 'ab', 0, 2],
|
19
|
+
1 => [:quantifier, :zero_or_one, '?', 2, 3],
|
20
|
+
2 => [:literal, :literal, 'ใใพใ', 3, 6],
|
21
|
+
3 => [:quantifier, :one_or_more, '+', 6, 7],
|
22
|
+
4 => [:literal, :literal, 'cd', 7, 9]
|
35
23
|
|
36
24
|
# 4 byte wide characters, Osmanya
|
37
|
-
include_examples 'scan', '๐๐?๐ab+๐',
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
include_examples 'scan', 'mu๐?si*๐ซc+',
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
25
|
+
include_examples 'scan', '๐๐?๐ab+๐',
|
26
|
+
0 => [:literal, :literal, '๐๐', 0, 2],
|
27
|
+
1 => [:quantifier, :zero_or_one, '?', 2, 3],
|
28
|
+
2 => [:literal, :literal, '๐ab', 3, 6],
|
29
|
+
3 => [:quantifier, :one_or_more, '+', 6, 7],
|
30
|
+
4 => [:literal, :literal, '๐', 7, 8]
|
31
|
+
|
32
|
+
include_examples 'scan', 'mu๐?si*๐ซc+',
|
33
|
+
0 => [:literal, :literal, 'mu๐', 0, 3],
|
34
|
+
1 => [:quantifier, :zero_or_one, '?', 3, 4],
|
35
|
+
2 => [:literal, :literal, 'si', 4, 6],
|
36
|
+
3 => [:quantifier, :zero_or_more, '*', 6, 7],
|
37
|
+
4 => [:literal, :literal, '๐ซc', 7, 9],
|
38
|
+
5 => [:quantifier, :one_or_more, '+', 9, 10]
|
49
39
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe('passing options to scan') do
|
4
|
+
def expect_type_tokens(tokens, type_tokens)
|
5
|
+
expect(tokens.map { |type, token, *| [type, token] }).to eq(type_tokens)
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'raises if if scanning from a Regexp and options are passed' do
|
9
|
+
expect { RS.scan(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
|
10
|
+
ArgumentError,
|
11
|
+
'options cannot be supplied unless scanning a String'
|
12
|
+
)
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'sets free_spacing based on options if scanning from a String' do
|
16
|
+
expect_type_tokens(
|
17
|
+
RS.scan('a+#c', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED),
|
18
|
+
[
|
19
|
+
%i[literal literal],
|
20
|
+
%i[quantifier one_or_more],
|
21
|
+
%i[free_space comment]
|
22
|
+
]
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'does not set free_spacing if scanning from a String and passing no options' do
|
27
|
+
expect_type_tokens(
|
28
|
+
RS.scan('a+#c'),
|
29
|
+
[
|
30
|
+
%i[literal literal],
|
31
|
+
%i[quantifier one_or_more],
|
32
|
+
%i[literal literal]
|
33
|
+
]
|
34
|
+
)
|
35
|
+
end
|
36
|
+
end
|
@@ -1,20 +1,25 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe('Quantifier scanning') do
|
4
|
-
include_examples 'scan', 'a?',
|
5
|
-
include_examples 'scan', 'a??',
|
6
|
-
include_examples 'scan', 'a?+',
|
4
|
+
include_examples 'scan', 'a?', 1 => [:quantifier, :zero_or_one, '?', 1, 2]
|
5
|
+
include_examples 'scan', 'a??', 1 => [:quantifier, :zero_or_one_reluctant, '??', 1, 3]
|
6
|
+
include_examples 'scan', 'a?+', 1 => [:quantifier, :zero_or_one_possessive, '?+', 1, 3]
|
7
7
|
|
8
|
-
include_examples 'scan', 'a*',
|
9
|
-
include_examples 'scan', 'a*?',
|
10
|
-
include_examples 'scan', 'a*+',
|
8
|
+
include_examples 'scan', 'a*', 1 => [:quantifier, :zero_or_more, '*', 1, 2]
|
9
|
+
include_examples 'scan', 'a*?', 1 => [:quantifier, :zero_or_more_reluctant, '*?', 1, 3]
|
10
|
+
include_examples 'scan', 'a*+', 1 => [:quantifier, :zero_or_more_possessive, '*+', 1, 3]
|
11
11
|
|
12
|
-
include_examples 'scan', 'a+',
|
13
|
-
include_examples 'scan', 'a+?',
|
14
|
-
include_examples 'scan', 'a++',
|
12
|
+
include_examples 'scan', 'a+', 1 => [:quantifier, :one_or_more, '+', 1, 2]
|
13
|
+
include_examples 'scan', 'a+?', 1 => [:quantifier, :one_or_more_reluctant, '+?', 1, 3]
|
14
|
+
include_examples 'scan', 'a++', 1 => [:quantifier, :one_or_more_possessive, '++', 1, 3]
|
15
15
|
|
16
|
-
include_examples 'scan', 'a{2}',
|
17
|
-
include_examples 'scan', 'a{2,}',
|
18
|
-
include_examples 'scan', 'a{,2}',
|
19
|
-
include_examples 'scan', 'a{2,4}',
|
16
|
+
include_examples 'scan', 'a{2}', 1 => [:quantifier, :interval, '{2}', 1, 4]
|
17
|
+
include_examples 'scan', 'a{2,}', 1 => [:quantifier, :interval, '{2,}', 1, 5]
|
18
|
+
include_examples 'scan', 'a{,2}', 1 => [:quantifier, :interval, '{,2}', 1, 5]
|
19
|
+
include_examples 'scan', 'a{2,4}', 1 => [:quantifier, :interval, '{2,4}', 1, 6]
|
20
|
+
|
21
|
+
# special case: chained quantifiers
|
22
|
+
include_examples 'scan', 'a+{2}{3}', 1 => [:quantifier, :one_or_more, '+', 1, 2]
|
23
|
+
include_examples 'scan', 'a+{2}{3}', 2 => [:quantifier, :interval, '{2}', 2, 5]
|
24
|
+
include_examples 'scan', 'a+{2}{3}', 3 => [:quantifier, :interval, '{3}', 5, 8]
|
20
25
|
end
|