regexp_parser 1.8.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +70 -0
- data/Gemfile +1 -0
- data/README.md +12 -11
- data/Rakefile +2 -2
- data/lib/regexp_parser/expression.rb +10 -19
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
- data/lib/regexp_parser/expression/classes/group.rb +22 -2
- data/lib/regexp_parser/expression/classes/root.rb +4 -16
- data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
- data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
- data/lib/regexp_parser/expression/quantifier.rb +9 -0
- data/lib/regexp_parser/expression/sequence.rb +0 -10
- data/lib/regexp_parser/lexer.rb +2 -2
- data/lib/regexp_parser/parser.rb +27 -0
- data/lib/regexp_parser/scanner.rb +1248 -1258
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +175 -181
- data/lib/regexp_parser/syntax/any.rb +2 -2
- data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +1 -1
- data/spec/expression/base_spec.rb +10 -0
- data/spec/expression/subexpression_spec.rb +1 -1
- data/spec/expression/to_s_spec.rb +39 -31
- data/spec/lexer/literals_spec.rb +24 -49
- data/spec/parser/errors_spec.rb +1 -1
- data/spec/parser/escapes_spec.rb +1 -1
- data/spec/parser/free_space_spec.rb +4 -25
- data/spec/parser/quantifiers_spec.rb +16 -0
- data/spec/parser/set/ranges_spec.rb +3 -3
- data/spec/scanner/escapes_spec.rb +7 -0
- data/spec/scanner/free_space_spec.rb +32 -0
- data/spec/scanner/groups_spec.rb +10 -1
- data/spec/scanner/literals_spec.rb +28 -38
- data/spec/scanner/quantifiers_spec.rb +18 -13
- data/spec/scanner/sets_spec.rb +8 -2
- data/spec/spec_helper.rb +1 -0
- metadata +57 -61
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
@@ -74,9 +74,9 @@ module Regexp::Syntax
|
|
74
74
|
end
|
75
75
|
|
76
76
|
def warn_if_future_version(const_name)
|
77
|
-
return if comparable_version(const_name) < comparable_version('
|
77
|
+
return if comparable_version(const_name) < comparable_version('4.0.0')
|
78
78
|
|
79
|
-
warn('This library has only been tested up to Ruby
|
79
|
+
warn('This library has only been tested up to Ruby 3.x, '\
|
80
80
|
"but you are running with #{const_get(const_name).inspect}")
|
81
81
|
end
|
82
82
|
end
|
data/regexp_parser.gemspec
CHANGED
@@ -91,4 +91,14 @@ RSpec.describe(Regexp::Expression::Base) do
|
|
91
91
|
expect(RP.parse(/a*/)[0].repetitions).to eq 0..(Float::INFINITY)
|
92
92
|
expect(RP.parse(/a+/)[0].repetitions).to eq 1..(Float::INFINITY)
|
93
93
|
end
|
94
|
+
|
95
|
+
specify('#base_length') do
|
96
|
+
expect(RP.parse(/(aa)/)[0].base_length).to eq 4
|
97
|
+
expect(RP.parse(/(aa){42}/)[0].base_length).to eq 4
|
98
|
+
end
|
99
|
+
|
100
|
+
specify('#full_length') do
|
101
|
+
expect(RP.parse(/(aa)/)[0].full_length).to eq 4
|
102
|
+
expect(RP.parse(/(aa){42}/)[0].full_length).to eq 8
|
103
|
+
end
|
94
104
|
end
|
@@ -32,7 +32,7 @@ RSpec.describe(Regexp::Expression::Subexpression) do
|
|
32
32
|
}
|
33
33
|
|
34
34
|
root.each_expression do |exp|
|
35
|
-
next unless expected_nesting_level = tests.delete(exp.to_s)
|
35
|
+
next unless (expected_nesting_level = tests.delete(exp.to_s))
|
36
36
|
expect(expected_nesting_level).to eq exp.nesting_level
|
37
37
|
end
|
38
38
|
|
@@ -1,58 +1,50 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe('Expression#to_s') do
|
4
|
-
|
5
|
-
pattern
|
4
|
+
def parse_frozen(pattern, ruby_version = nil)
|
5
|
+
IceNine.deep_freeze(RP.parse(pattern, *ruby_version))
|
6
|
+
end
|
7
|
+
|
8
|
+
def expect_round_trip(pattern, ruby_version = nil)
|
9
|
+
parsed = parse_frozen(pattern, ruby_version)
|
6
10
|
|
7
|
-
expect(
|
11
|
+
expect(parsed.to_s).to eql(pattern)
|
8
12
|
end
|
9
13
|
|
10
|
-
specify('
|
11
|
-
|
14
|
+
specify('literal alternation') do
|
15
|
+
expect_round_trip('abcd|ghij|klmn|pqur')
|
16
|
+
end
|
12
17
|
|
13
|
-
|
18
|
+
specify('quantified alternations') do
|
19
|
+
expect_round_trip('(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)')
|
14
20
|
end
|
15
21
|
|
16
22
|
specify('quantified sets') do
|
17
|
-
|
18
|
-
|
19
|
-
expect(RP.parse(pattern).to_s).to eq pattern
|
23
|
+
expect_round_trip('[abc]+|[^def]{3,6}')
|
20
24
|
end
|
21
25
|
|
22
26
|
specify('property sets') do
|
23
|
-
|
24
|
-
|
25
|
-
expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
|
27
|
+
expect_round_trip('[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+', 'ruby/1.9')
|
26
28
|
end
|
27
29
|
|
28
30
|
specify('groups') do
|
29
|
-
|
30
|
-
|
31
|
-
expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
|
31
|
+
expect_round_trip("(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++", 'ruby/1.9')
|
32
32
|
end
|
33
33
|
|
34
34
|
specify('assertions') do
|
35
|
-
|
36
|
-
|
37
|
-
expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
|
35
|
+
expect_round_trip('(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?', 'ruby/1.9')
|
38
36
|
end
|
39
37
|
|
40
38
|
specify('comments') do
|
41
|
-
|
42
|
-
|
43
|
-
expect(RP.parse(pattern).to_s).to eq pattern
|
39
|
+
expect_round_trip('(?#start)a(?#middle)b(?#end)')
|
44
40
|
end
|
45
41
|
|
46
42
|
specify('options') do
|
47
|
-
|
48
|
-
|
49
|
-
expect(RP.parse(pattern).to_s).to eq pattern
|
43
|
+
expect_round_trip('(?mix:start)a(?-mix:middle)b(?i-mx:end)')
|
50
44
|
end
|
51
45
|
|
52
46
|
specify('url') do
|
53
|
-
|
54
|
-
|
55
|
-
expect(RP.parse(pattern).to_s).to eq pattern
|
47
|
+
expect_round_trip('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
|
56
48
|
end
|
57
49
|
|
58
50
|
specify('multiline source') do
|
@@ -64,7 +56,7 @@ RSpec.describe('Expression#to_s') do
|
|
64
56
|
\z
|
65
57
|
/x
|
66
58
|
|
67
|
-
expect(
|
59
|
+
expect(parse_frozen(multiline).to_s).to eql(multiline.source)
|
68
60
|
end
|
69
61
|
|
70
62
|
specify('multiline #to_s') do
|
@@ -76,7 +68,7 @@ RSpec.describe('Expression#to_s') do
|
|
76
68
|
\z
|
77
69
|
/x
|
78
70
|
|
79
|
-
|
71
|
+
expect_round_trip(multiline.to_s)
|
80
72
|
end
|
81
73
|
|
82
74
|
# Free spacing expressions that use spaces between quantifiers and their
|
@@ -93,8 +85,24 @@ RSpec.describe('Expression#to_s') do
|
|
93
85
|
/x
|
94
86
|
|
95
87
|
str = 'bbbcged'
|
96
|
-
root =
|
88
|
+
root = parse_frozen(multiline)
|
89
|
+
|
90
|
+
expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eql(multiline.match(str)[0])
|
91
|
+
end
|
92
|
+
|
93
|
+
# special case: implicit groups used for chained quantifiers produce no parens
|
94
|
+
specify 'chained quantifiers #to_s' do
|
95
|
+
pattern = /a+{1}{2}/
|
96
|
+
root = parse_frozen(pattern)
|
97
|
+
expect(root.to_s).to eql('a+{1}{2}')
|
98
|
+
end
|
97
99
|
|
98
|
-
|
100
|
+
# regression test for https://github.com/ammar/regexp_parser/issues/74
|
101
|
+
specify('non-ascii comment') do
|
102
|
+
pattern = '(?x) 😋 # 😋'
|
103
|
+
root = RP.parse(pattern)
|
104
|
+
expect(root.last).to be_a(Regexp::Expression::Comment)
|
105
|
+
expect(root.last.to_s).to eql('# 😋')
|
106
|
+
expect(root.to_s).to eql(pattern)
|
99
107
|
end
|
100
108
|
end
|
data/spec/lexer/literals_spec.rb
CHANGED
@@ -10,67 +10,42 @@ RSpec.describe('Literal lexing') do
|
|
10
10
|
1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
|
11
11
|
2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
|
12
12
|
|
13
|
-
# 2 byte wide characters
|
14
|
-
include_examples 'lex', '
|
15
|
-
0 => [:literal, :literal, '
|
16
|
-
|
17
|
-
|
18
|
-
0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0]
|
19
|
-
|
20
|
-
include_examples 'lex', 'aاbبت?',
|
21
|
-
0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0, 0],
|
22
|
-
1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
|
23
|
-
2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0]
|
24
|
-
|
25
|
-
include_examples 'lex', 'aا?bبcت+',
|
26
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
27
|
-
1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
|
28
|
-
2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
|
29
|
-
3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
|
30
|
-
4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
|
31
|
-
5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0]
|
32
|
-
|
33
|
-
include_examples 'lex', 'a(اbب+)cت?',
|
34
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
35
|
-
1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
|
36
|
-
2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
|
37
|
-
3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
|
38
|
-
4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
|
39
|
-
5 => [:group, :close, ')', 8, 9, 0, 0, 0],
|
40
|
-
6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
|
41
|
-
7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
|
42
|
-
8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0]
|
13
|
+
# 2 byte wide characters
|
14
|
+
include_examples 'lex', 'äöü+',
|
15
|
+
0 => [:literal, :literal, 'äö', 0, 2, 0, 0, 0],
|
16
|
+
1 => [:literal, :literal, 'ü', 2, 3, 0, 0, 0],
|
17
|
+
2 => [:quantifier, :one_or_more, '+', 3, 4, 0, 0, 0]
|
43
18
|
|
44
19
|
# 3 byte wide characters, Japanese
|
45
20
|
include_examples 'lex', 'ab?れます+cd',
|
46
21
|
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
47
22
|
1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
|
48
23
|
2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
|
49
|
-
3 => [:literal, :literal, 'れま', 3,
|
50
|
-
4 => [:literal, :literal, 'す',
|
51
|
-
5 => [:quantifier, :one_or_more, '+',
|
52
|
-
6 => [:literal, :literal, 'cd',
|
24
|
+
3 => [:literal, :literal, 'れま', 3, 5, 0, 0, 0],
|
25
|
+
4 => [:literal, :literal, 'す', 5, 6, 0, 0, 0],
|
26
|
+
5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
|
27
|
+
6 => [:literal, :literal, 'cd', 7, 9, 0, 0, 0]
|
53
28
|
|
54
29
|
# 4 byte wide characters, Osmanya
|
55
30
|
include_examples 'lex', '𐒀𐒁?𐒂ab+𐒃',
|
56
|
-
0 => [:literal, :literal, '𐒀', 0,
|
57
|
-
1 => [:literal, :literal, '𐒁',
|
58
|
-
2 => [:quantifier, :zero_or_one, '?',
|
59
|
-
3 => [:literal, :literal, '𐒂a',
|
60
|
-
4 => [:literal, :literal, 'b',
|
61
|
-
5 => [:quantifier, :one_or_more, '+',
|
62
|
-
6 => [:literal, :literal, '𐒃',
|
31
|
+
0 => [:literal, :literal, '𐒀', 0, 1, 0, 0, 0],
|
32
|
+
1 => [:literal, :literal, '𐒁', 1, 2, 0, 0, 0],
|
33
|
+
2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
|
34
|
+
3 => [:literal, :literal, '𐒂a', 3, 5, 0, 0, 0],
|
35
|
+
4 => [:literal, :literal, 'b', 5, 6, 0, 0, 0],
|
36
|
+
5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
|
37
|
+
6 => [:literal, :literal, '𐒃', 7, 8, 0, 0, 0]
|
63
38
|
|
64
39
|
include_examples 'lex', 'mu𝄞?si*𝄫c+',
|
65
40
|
0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
|
66
|
-
1 => [:literal, :literal, '𝄞', 2,
|
67
|
-
2 => [:quantifier, :zero_or_one, '?',
|
68
|
-
3 => [:literal, :literal, 's',
|
69
|
-
4 => [:literal, :literal, 'i',
|
70
|
-
5 => [:quantifier, :zero_or_more, '*',
|
71
|
-
6 => [:literal, :literal, '𝄫',
|
72
|
-
7 => [:literal, :literal, 'c',
|
73
|
-
8 => [:quantifier, :one_or_more, '+',
|
41
|
+
1 => [:literal, :literal, '𝄞', 2, 3, 0, 0, 0],
|
42
|
+
2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
|
43
|
+
3 => [:literal, :literal, 's', 4, 5, 0, 0, 0],
|
44
|
+
4 => [:literal, :literal, 'i', 5, 6, 0, 0, 0],
|
45
|
+
5 => [:quantifier, :zero_or_more, '*', 6, 7, 0, 0, 0],
|
46
|
+
6 => [:literal, :literal, '𝄫', 7, 8, 0, 0, 0],
|
47
|
+
7 => [:literal, :literal, 'c', 8, 9, 0, 0, 0],
|
48
|
+
8 => [:quantifier, :one_or_more, '+', 9, 10, 0, 0, 0]
|
74
49
|
|
75
50
|
specify('lex single 2 byte char') do
|
76
51
|
tokens = RL.lex("\u0627+")
|
data/spec/parser/errors_spec.rb
CHANGED
@@ -9,7 +9,7 @@ RSpec.describe('Parsing errors') do
|
|
9
9
|
.to raise_error(Regexp::Parser::UnknownTokenTypeError)
|
10
10
|
end
|
11
11
|
|
12
|
-
RSpec.shared_examples 'UnknownTokenError' do |type
|
12
|
+
RSpec.shared_examples 'UnknownTokenError' do |type|
|
13
13
|
it "raises for unkown tokens of type #{type}" do
|
14
14
|
expect { parser.send(:parse_token, Regexp::Token.new(type, :foo)) }
|
15
15
|
.to raise_error(Regexp::Parser::UnknownTokenError)
|
data/spec/parser/escapes_spec.rb
CHANGED
@@ -25,7 +25,7 @@ RSpec.describe('EscapeSequence parsing') do
|
|
25
25
|
include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
|
26
26
|
include_examples 'parse', /a\u{10FFFF}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
|
27
27
|
|
28
|
-
|
28
|
+
# hex escapes
|
29
29
|
include_examples 'parse', /a\xFF/n, 1 => [:escape, :hex, EscapeSequence::Hex]
|
30
30
|
|
31
31
|
# octal escapes
|
@@ -24,34 +24,13 @@ RSpec.describe('FreeSpace parsing') do
|
|
24
24
|
expect(root.first.text).to eq 'a b c d'
|
25
25
|
end
|
26
26
|
|
27
|
-
specify('parse single-line free space comments without spaces') do
|
28
|
-
regexp = /a#b/x
|
29
|
-
|
30
|
-
root = RP.parse(regexp)
|
31
|
-
expect(root.length).to eq 2
|
32
|
-
|
33
|
-
expect(root[0]).to be_instance_of(Literal)
|
34
|
-
expect(root[1]).to be_instance_of(Comment)
|
35
|
-
end
|
36
|
-
|
37
|
-
specify('parse single-line free space comments with spaces') do
|
38
|
-
regexp = /a # b/x
|
39
|
-
|
40
|
-
root = RP.parse(regexp)
|
41
|
-
expect(root.length).to eq 3
|
42
|
-
|
43
|
-
expect(root[0]).to be_instance_of(Literal)
|
44
|
-
expect(root[1]).to be_instance_of(WhiteSpace)
|
45
|
-
expect(root[2]).to be_instance_of(Comment)
|
46
|
-
end
|
47
|
-
|
48
27
|
specify('parse free space comments') do
|
49
28
|
regexp = /
|
50
29
|
a ? # One letter
|
51
30
|
b {2,5} # Another one
|
52
31
|
[c-g] + # A set
|
53
32
|
(h|i|j) | # A group
|
54
|
-
klm
|
33
|
+
klm *
|
55
34
|
nop +
|
56
35
|
/x
|
57
36
|
|
@@ -72,11 +51,11 @@ RSpec.describe('FreeSpace parsing') do
|
|
72
51
|
|
73
52
|
alt_2 = alt.alternatives.last
|
74
53
|
expect(alt_2).to be_instance_of(Alternative)
|
75
|
-
expect(alt_2.length).to eq
|
54
|
+
expect(alt_2.length).to eq 7
|
76
55
|
|
77
|
-
[0, 2,
|
56
|
+
[0, 2, 4, 6].each { |i| expect(alt_2[i].class).to eq WhiteSpace }
|
78
57
|
|
79
|
-
|
58
|
+
expect(alt_2[1]).to be_instance_of(Comment)
|
80
59
|
end
|
81
60
|
|
82
61
|
specify('parse free space nested comments') do
|
@@ -11,6 +11,7 @@ RSpec.describe('Quantifier parsing') do
|
|
11
11
|
expect(exp.quantifier.min).to eq min
|
12
12
|
expect(exp.quantifier.max).to eq max
|
13
13
|
expect(exp.quantifier.mode).to eq mode
|
14
|
+
expect(exp.quantifier.text).to eq text
|
14
15
|
end
|
15
16
|
end
|
16
17
|
|
@@ -37,6 +38,21 @@ RSpec.describe('Quantifier parsing') do
|
|
37
38
|
include_examples 'quantifier', /a{4}+b/, '{4}+', :possessive, :interval, 4, 4
|
38
39
|
include_examples 'quantifier', /a{004}+b/, '{004}+', :possessive, :interval, 4, 4
|
39
40
|
|
41
|
+
# special case: exps with chained quantifiers are wrapped in implicit passive groups
|
42
|
+
include_examples 'parse', /a+{2}{3}/,
|
43
|
+
0 => [
|
44
|
+
:group, :passive, Group::Passive, implicit?: true, level: 0,
|
45
|
+
quantifier: Quantifier.new(:interval, '{3}', 3, 3, :greedy)
|
46
|
+
],
|
47
|
+
[0, 0] => [
|
48
|
+
:group, :passive, Group::Passive, implicit?: true, level: 1,
|
49
|
+
quantifier: Quantifier.new(:interval, '{2}', 2, 2, :greedy)
|
50
|
+
],
|
51
|
+
[0, 0, 0] => [
|
52
|
+
:literal, :literal, Literal, text: 'a', level: 2,
|
53
|
+
quantifier: Quantifier.new(:one_or_more, '+', 1, -1, :greedy)
|
54
|
+
]
|
55
|
+
|
40
56
|
specify('mode-checking methods') do
|
41
57
|
exp = RP.parse(/a??/).first
|
42
58
|
|
@@ -17,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
17
17
|
end
|
18
18
|
|
19
19
|
specify('parse set range hex') do
|
20
|
-
root = RP.parse('[\\x00-\\
|
20
|
+
root = RP.parse('[\\x00-\\x22]')
|
21
21
|
set = root[0]
|
22
22
|
range = set[0]
|
23
23
|
|
@@ -26,9 +26,9 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
26
26
|
expect(range.count).to eq 2
|
27
27
|
expect(range.first.to_s).to eq '\\x00'
|
28
28
|
expect(range.first).to be_instance_of(EscapeSequence::Hex)
|
29
|
-
expect(range.last.to_s).to eq '\\
|
29
|
+
expect(range.last.to_s).to eq '\\x22'
|
30
30
|
expect(range.last).to be_instance_of(EscapeSequence::Hex)
|
31
|
-
expect(set).to match
|
31
|
+
expect(set).to match "\x11"
|
32
32
|
end
|
33
33
|
|
34
34
|
specify('parse set range unicode') do
|
@@ -11,7 +11,13 @@ RSpec.describe('Escape scanning') do
|
|
11
11
|
include_examples 'scan', /c\tt/, 1 => [:escape, :tab, '\t', 1, 3]
|
12
12
|
include_examples 'scan', /c\vt/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
|
13
13
|
|
14
|
+
# ineffectual literal escapes
|
15
|
+
# these cause "Unknown escape" warnings in Ruby for ascii chars,
|
16
|
+
# and simply drop the backslash for non-ascii chars (/\ü/.inspect == '/ü/').
|
17
|
+
# In terms of matching, Ruby treats them both like non-escaped literals.
|
14
18
|
include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
|
19
|
+
include_examples 'scan', 'a\üc', 1 => [:escape, :literal, '\ü', 1, 3]
|
20
|
+
include_examples 'scan', 'a\😋c', 1 => [:escape, :literal, '\😋', 1, 3]
|
15
21
|
|
16
22
|
# these incomplete ref/call sequences are treated as literal escapes by Ruby
|
17
23
|
include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3]
|
@@ -21,6 +27,7 @@ RSpec.describe('Escape scanning') do
|
|
21
27
|
include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
|
22
28
|
include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
|
23
29
|
|
30
|
+
include_examples 'scan', 'a\xA', 1 => [:escape, :hex, '\xA', 1, 4]
|
24
31
|
include_examples 'scan', 'a\x24c', 1 => [:escape, :hex, '\x24', 1, 5]
|
25
32
|
include_examples 'scan', 'a\x0640c', 1 => [:escape, :hex, '\x06', 1, 5]
|
26
33
|
|
@@ -39,6 +39,17 @@ RSpec.describe('FreeSpace scanning') do
|
|
39
39
|
11 => [:free_space, :comment, "# B ? comment\n", 37, 51],
|
40
40
|
17 => [:free_space, :comment, "# C {2,3} comment\n", 66, 84],
|
41
41
|
29 => [:free_space, :comment, "# D|E comment\n", 100, 114]
|
42
|
+
|
43
|
+
# single line / no trailing newline (c.f. issue #66)
|
44
|
+
include_examples 'scan', /a # b/x,
|
45
|
+
0 => [:literal, :literal, 'a', 0, 1],
|
46
|
+
1 => [:free_space, :whitespace, ' ', 1, 2],
|
47
|
+
2 => [:free_space, :comment, "# b", 2, 5]
|
48
|
+
|
49
|
+
# without spaces (c.f. issue #66)
|
50
|
+
include_examples 'scan', /a#b/x,
|
51
|
+
0 => [:literal, :literal, 'a', 0, 1],
|
52
|
+
1 => [:free_space, :comment, "#b", 1, 3]
|
42
53
|
end
|
43
54
|
|
44
55
|
describe('scan free space inlined') do
|
@@ -130,4 +141,25 @@ RSpec.describe('FreeSpace scanning') do
|
|
130
141
|
26 => [:literal, :literal, 'i j', 35, 38],
|
131
142
|
27 => [:group, :close, ')', 38, 39]
|
132
143
|
end
|
144
|
+
|
145
|
+
describe('scanning `#` in regular (non-x mode)') do # c.f. issue 70
|
146
|
+
include_examples 'scan', /a#bcd/,
|
147
|
+
0 => [:literal, :literal, 'a#bcd', 0, 5]
|
148
|
+
include_examples 'scan', /a # bcd/,
|
149
|
+
0 => [:literal, :literal, 'a # bcd', 0, 7]
|
150
|
+
|
151
|
+
include_examples 'scan', /a#\d/,
|
152
|
+
0 => [:literal, :literal, 'a#', 0, 2],
|
153
|
+
1 => [:type, :digit, '\d', 2, 4]
|
154
|
+
include_examples 'scan', /a # \d/,
|
155
|
+
0 => [:literal, :literal, 'a # ', 0, 4],
|
156
|
+
1 => [:type, :digit, '\d', 4, 6]
|
157
|
+
|
158
|
+
include_examples 'scan', /a#()/,
|
159
|
+
0 => [:literal, :literal, 'a#', 0, 2],
|
160
|
+
1 => [:group, :capture, '(', 2, 3]
|
161
|
+
include_examples 'scan', /a # ()/,
|
162
|
+
0 => [:literal, :literal, 'a # ', 0, 4],
|
163
|
+
1 => [:group, :capture, '(', 4, 5]
|
164
|
+
end
|
133
165
|
end
|