regexp_parser 1.8.1 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +70 -0
- data/Gemfile +1 -0
- data/README.md +12 -11
- data/Rakefile +2 -2
- data/lib/regexp_parser/expression.rb +10 -19
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
- data/lib/regexp_parser/expression/classes/group.rb +22 -2
- data/lib/regexp_parser/expression/classes/root.rb +4 -16
- data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
- data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
- data/lib/regexp_parser/expression/quantifier.rb +9 -0
- data/lib/regexp_parser/expression/sequence.rb +0 -10
- data/lib/regexp_parser/lexer.rb +2 -2
- data/lib/regexp_parser/parser.rb +27 -2
- data/lib/regexp_parser/scanner.rb +1194 -1272
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +178 -186
- data/lib/regexp_parser/syntax.rb +4 -4
- data/lib/regexp_parser/syntax/any.rb +2 -2
- data/lib/regexp_parser/syntax/base.rb +1 -1
- data/lib/regexp_parser/syntax/version_lookup.rb +4 -4
- data/lib/regexp_parser/version.rb +1 -1
- data/spec/expression/base_spec.rb +10 -0
- data/spec/expression/subexpression_spec.rb +1 -1
- data/spec/expression/to_s_spec.rb +39 -31
- data/spec/lexer/literals_spec.rb +24 -49
- data/spec/parser/errors_spec.rb +1 -1
- data/spec/parser/escapes_spec.rb +1 -1
- data/spec/parser/quantifiers_spec.rb +16 -0
- data/spec/parser/set/ranges_spec.rb +3 -3
- data/spec/scanner/escapes_spec.rb +7 -0
- data/spec/scanner/groups_spec.rb +10 -1
- data/spec/scanner/literals_spec.rb +28 -38
- data/spec/scanner/quantifiers_spec.rb +18 -13
- data/spec/scanner/sets_spec.rb +23 -5
- data/spec/spec_helper.rb +1 -0
- metadata +56 -60
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
data/lib/regexp_parser/syntax.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
+
module Regexp::Syntax
|
2
|
+
class SyntaxError < StandardError; end
|
3
|
+
end
|
4
|
+
|
1
5
|
require File.expand_path('../syntax/tokens', __FILE__)
|
2
6
|
require File.expand_path('../syntax/base', __FILE__)
|
3
7
|
require File.expand_path('../syntax/any', __FILE__)
|
4
8
|
require File.expand_path('../syntax/version_lookup', __FILE__)
|
5
9
|
require File.expand_path('../syntax/versions', __FILE__)
|
6
|
-
|
7
|
-
module Regexp::Syntax
|
8
|
-
class SyntaxError < StandardError; end
|
9
|
-
end
|
@@ -3,13 +3,13 @@ module Regexp::Syntax
|
|
3
3
|
VERSION_REGEXP = /#{VERSION_FORMAT}/
|
4
4
|
VERSION_CONST_REGEXP = /\AV\d+_\d+(?:_\d+)?\z/
|
5
5
|
|
6
|
-
class InvalidVersionNameError < SyntaxError
|
6
|
+
class InvalidVersionNameError < Regexp::Syntax::SyntaxError
|
7
7
|
def initialize(name)
|
8
8
|
super "Invalid version name '#{name}'. Expected format is '#{VERSION_FORMAT}'"
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
-
class UnknownSyntaxNameError < SyntaxError
|
12
|
+
class UnknownSyntaxNameError < Regexp::Syntax::SyntaxError
|
13
13
|
def initialize(name)
|
14
14
|
super "Unknown syntax name '#{name}'."
|
15
15
|
end
|
@@ -74,9 +74,9 @@ module Regexp::Syntax
|
|
74
74
|
end
|
75
75
|
|
76
76
|
def warn_if_future_version(const_name)
|
77
|
-
return if comparable_version(const_name) < comparable_version('
|
77
|
+
return if comparable_version(const_name) < comparable_version('4.0.0')
|
78
78
|
|
79
|
-
warn('This library has only been tested up to Ruby
|
79
|
+
warn('This library has only been tested up to Ruby 3.x, '\
|
80
80
|
"but you are running with #{const_get(const_name).inspect}")
|
81
81
|
end
|
82
82
|
end
|
@@ -91,4 +91,14 @@ RSpec.describe(Regexp::Expression::Base) do
|
|
91
91
|
expect(RP.parse(/a*/)[0].repetitions).to eq 0..(Float::INFINITY)
|
92
92
|
expect(RP.parse(/a+/)[0].repetitions).to eq 1..(Float::INFINITY)
|
93
93
|
end
|
94
|
+
|
95
|
+
specify('#base_length') do
|
96
|
+
expect(RP.parse(/(aa)/)[0].base_length).to eq 4
|
97
|
+
expect(RP.parse(/(aa){42}/)[0].base_length).to eq 4
|
98
|
+
end
|
99
|
+
|
100
|
+
specify('#full_length') do
|
101
|
+
expect(RP.parse(/(aa)/)[0].full_length).to eq 4
|
102
|
+
expect(RP.parse(/(aa){42}/)[0].full_length).to eq 8
|
103
|
+
end
|
94
104
|
end
|
@@ -32,7 +32,7 @@ RSpec.describe(Regexp::Expression::Subexpression) do
|
|
32
32
|
}
|
33
33
|
|
34
34
|
root.each_expression do |exp|
|
35
|
-
next unless expected_nesting_level = tests.delete(exp.to_s)
|
35
|
+
next unless (expected_nesting_level = tests.delete(exp.to_s))
|
36
36
|
expect(expected_nesting_level).to eq exp.nesting_level
|
37
37
|
end
|
38
38
|
|
@@ -1,58 +1,50 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe('Expression#to_s') do
|
4
|
-
|
5
|
-
pattern
|
4
|
+
def parse_frozen(pattern, ruby_version = nil)
|
5
|
+
IceNine.deep_freeze(RP.parse(pattern, *ruby_version))
|
6
|
+
end
|
7
|
+
|
8
|
+
def expect_round_trip(pattern, ruby_version = nil)
|
9
|
+
parsed = parse_frozen(pattern, ruby_version)
|
6
10
|
|
7
|
-
expect(
|
11
|
+
expect(parsed.to_s).to eql(pattern)
|
8
12
|
end
|
9
13
|
|
10
|
-
specify('
|
11
|
-
|
14
|
+
specify('literal alternation') do
|
15
|
+
expect_round_trip('abcd|ghij|klmn|pqur')
|
16
|
+
end
|
12
17
|
|
13
|
-
|
18
|
+
specify('quantified alternations') do
|
19
|
+
expect_round_trip('(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)')
|
14
20
|
end
|
15
21
|
|
16
22
|
specify('quantified sets') do
|
17
|
-
|
18
|
-
|
19
|
-
expect(RP.parse(pattern).to_s).to eq pattern
|
23
|
+
expect_round_trip('[abc]+|[^def]{3,6}')
|
20
24
|
end
|
21
25
|
|
22
26
|
specify('property sets') do
|
23
|
-
|
24
|
-
|
25
|
-
expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
|
27
|
+
expect_round_trip('[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+', 'ruby/1.9')
|
26
28
|
end
|
27
29
|
|
28
30
|
specify('groups') do
|
29
|
-
|
30
|
-
|
31
|
-
expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
|
31
|
+
expect_round_trip("(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++", 'ruby/1.9')
|
32
32
|
end
|
33
33
|
|
34
34
|
specify('assertions') do
|
35
|
-
|
36
|
-
|
37
|
-
expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
|
35
|
+
expect_round_trip('(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?', 'ruby/1.9')
|
38
36
|
end
|
39
37
|
|
40
38
|
specify('comments') do
|
41
|
-
|
42
|
-
|
43
|
-
expect(RP.parse(pattern).to_s).to eq pattern
|
39
|
+
expect_round_trip('(?#start)a(?#middle)b(?#end)')
|
44
40
|
end
|
45
41
|
|
46
42
|
specify('options') do
|
47
|
-
|
48
|
-
|
49
|
-
expect(RP.parse(pattern).to_s).to eq pattern
|
43
|
+
expect_round_trip('(?mix:start)a(?-mix:middle)b(?i-mx:end)')
|
50
44
|
end
|
51
45
|
|
52
46
|
specify('url') do
|
53
|
-
|
54
|
-
|
55
|
-
expect(RP.parse(pattern).to_s).to eq pattern
|
47
|
+
expect_round_trip('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
|
56
48
|
end
|
57
49
|
|
58
50
|
specify('multiline source') do
|
@@ -64,7 +56,7 @@ RSpec.describe('Expression#to_s') do
|
|
64
56
|
\z
|
65
57
|
/x
|
66
58
|
|
67
|
-
expect(
|
59
|
+
expect(parse_frozen(multiline).to_s).to eql(multiline.source)
|
68
60
|
end
|
69
61
|
|
70
62
|
specify('multiline #to_s') do
|
@@ -76,7 +68,7 @@ RSpec.describe('Expression#to_s') do
|
|
76
68
|
\z
|
77
69
|
/x
|
78
70
|
|
79
|
-
|
71
|
+
expect_round_trip(multiline.to_s)
|
80
72
|
end
|
81
73
|
|
82
74
|
# Free spacing expressions that use spaces between quantifiers and their
|
@@ -93,8 +85,24 @@ RSpec.describe('Expression#to_s') do
|
|
93
85
|
/x
|
94
86
|
|
95
87
|
str = 'bbbcged'
|
96
|
-
root =
|
88
|
+
root = parse_frozen(multiline)
|
89
|
+
|
90
|
+
expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eql(multiline.match(str)[0])
|
91
|
+
end
|
92
|
+
|
93
|
+
# special case: implicit groups used for chained quantifiers produce no parens
|
94
|
+
specify 'chained quantifiers #to_s' do
|
95
|
+
pattern = /a+{1}{2}/
|
96
|
+
root = parse_frozen(pattern)
|
97
|
+
expect(root.to_s).to eql('a+{1}{2}')
|
98
|
+
end
|
97
99
|
|
98
|
-
|
100
|
+
# regression test for https://github.com/ammar/regexp_parser/issues/74
|
101
|
+
specify('non-ascii comment') do
|
102
|
+
pattern = '(?x) 😋 # 😋'
|
103
|
+
root = RP.parse(pattern)
|
104
|
+
expect(root.last).to be_a(Regexp::Expression::Comment)
|
105
|
+
expect(root.last.to_s).to eql('# 😋')
|
106
|
+
expect(root.to_s).to eql(pattern)
|
99
107
|
end
|
100
108
|
end
|
data/spec/lexer/literals_spec.rb
CHANGED
@@ -10,67 +10,42 @@ RSpec.describe('Literal lexing') do
|
|
10
10
|
1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
|
11
11
|
2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
|
12
12
|
|
13
|
-
# 2 byte wide characters
|
14
|
-
include_examples 'lex', '
|
15
|
-
0 => [:literal, :literal, '
|
16
|
-
|
17
|
-
|
18
|
-
0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0]
|
19
|
-
|
20
|
-
include_examples 'lex', 'aاbبت?',
|
21
|
-
0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0, 0],
|
22
|
-
1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
|
23
|
-
2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0]
|
24
|
-
|
25
|
-
include_examples 'lex', 'aا?bبcت+',
|
26
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
27
|
-
1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
|
28
|
-
2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
|
29
|
-
3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
|
30
|
-
4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
|
31
|
-
5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0]
|
32
|
-
|
33
|
-
include_examples 'lex', 'a(اbب+)cت?',
|
34
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
35
|
-
1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
|
36
|
-
2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
|
37
|
-
3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
|
38
|
-
4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
|
39
|
-
5 => [:group, :close, ')', 8, 9, 0, 0, 0],
|
40
|
-
6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
|
41
|
-
7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
|
42
|
-
8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0]
|
13
|
+
# 2 byte wide characters
|
14
|
+
include_examples 'lex', 'äöü+',
|
15
|
+
0 => [:literal, :literal, 'äö', 0, 2, 0, 0, 0],
|
16
|
+
1 => [:literal, :literal, 'ü', 2, 3, 0, 0, 0],
|
17
|
+
2 => [:quantifier, :one_or_more, '+', 3, 4, 0, 0, 0]
|
43
18
|
|
44
19
|
# 3 byte wide characters, Japanese
|
45
20
|
include_examples 'lex', 'ab?れます+cd',
|
46
21
|
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
47
22
|
1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
|
48
23
|
2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
|
49
|
-
3 => [:literal, :literal, 'れま', 3,
|
50
|
-
4 => [:literal, :literal, 'す',
|
51
|
-
5 => [:quantifier, :one_or_more, '+',
|
52
|
-
6 => [:literal, :literal, 'cd',
|
24
|
+
3 => [:literal, :literal, 'れま', 3, 5, 0, 0, 0],
|
25
|
+
4 => [:literal, :literal, 'す', 5, 6, 0, 0, 0],
|
26
|
+
5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
|
27
|
+
6 => [:literal, :literal, 'cd', 7, 9, 0, 0, 0]
|
53
28
|
|
54
29
|
# 4 byte wide characters, Osmanya
|
55
30
|
include_examples 'lex', '𐒀𐒁?𐒂ab+𐒃',
|
56
|
-
0 => [:literal, :literal, '𐒀', 0,
|
57
|
-
1 => [:literal, :literal, '𐒁',
|
58
|
-
2 => [:quantifier, :zero_or_one, '?',
|
59
|
-
3 => [:literal, :literal, '𐒂a',
|
60
|
-
4 => [:literal, :literal, 'b',
|
61
|
-
5 => [:quantifier, :one_or_more, '+',
|
62
|
-
6 => [:literal, :literal, '𐒃',
|
31
|
+
0 => [:literal, :literal, '𐒀', 0, 1, 0, 0, 0],
|
32
|
+
1 => [:literal, :literal, '𐒁', 1, 2, 0, 0, 0],
|
33
|
+
2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
|
34
|
+
3 => [:literal, :literal, '𐒂a', 3, 5, 0, 0, 0],
|
35
|
+
4 => [:literal, :literal, 'b', 5, 6, 0, 0, 0],
|
36
|
+
5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
|
37
|
+
6 => [:literal, :literal, '𐒃', 7, 8, 0, 0, 0]
|
63
38
|
|
64
39
|
include_examples 'lex', 'mu𝄞?si*𝄫c+',
|
65
40
|
0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
|
66
|
-
1 => [:literal, :literal, '𝄞', 2,
|
67
|
-
2 => [:quantifier, :zero_or_one, '?',
|
68
|
-
3 => [:literal, :literal, 's',
|
69
|
-
4 => [:literal, :literal, 'i',
|
70
|
-
5 => [:quantifier, :zero_or_more, '*',
|
71
|
-
6 => [:literal, :literal, '𝄫',
|
72
|
-
7 => [:literal, :literal, 'c',
|
73
|
-
8 => [:quantifier, :one_or_more, '+',
|
41
|
+
1 => [:literal, :literal, '𝄞', 2, 3, 0, 0, 0],
|
42
|
+
2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
|
43
|
+
3 => [:literal, :literal, 's', 4, 5, 0, 0, 0],
|
44
|
+
4 => [:literal, :literal, 'i', 5, 6, 0, 0, 0],
|
45
|
+
5 => [:quantifier, :zero_or_more, '*', 6, 7, 0, 0, 0],
|
46
|
+
6 => [:literal, :literal, '𝄫', 7, 8, 0, 0, 0],
|
47
|
+
7 => [:literal, :literal, 'c', 8, 9, 0, 0, 0],
|
48
|
+
8 => [:quantifier, :one_or_more, '+', 9, 10, 0, 0, 0]
|
74
49
|
|
75
50
|
specify('lex single 2 byte char') do
|
76
51
|
tokens = RL.lex("\u0627+")
|
data/spec/parser/errors_spec.rb
CHANGED
@@ -9,7 +9,7 @@ RSpec.describe('Parsing errors') do
|
|
9
9
|
.to raise_error(Regexp::Parser::UnknownTokenTypeError)
|
10
10
|
end
|
11
11
|
|
12
|
-
RSpec.shared_examples 'UnknownTokenError' do |type
|
12
|
+
RSpec.shared_examples 'UnknownTokenError' do |type|
|
13
13
|
it "raises for unkown tokens of type #{type}" do
|
14
14
|
expect { parser.send(:parse_token, Regexp::Token.new(type, :foo)) }
|
15
15
|
.to raise_error(Regexp::Parser::UnknownTokenError)
|
data/spec/parser/escapes_spec.rb
CHANGED
@@ -25,7 +25,7 @@ RSpec.describe('EscapeSequence parsing') do
|
|
25
25
|
include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
|
26
26
|
include_examples 'parse', /a\u{10FFFF}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
|
27
27
|
|
28
|
-
|
28
|
+
# hex escapes
|
29
29
|
include_examples 'parse', /a\xFF/n, 1 => [:escape, :hex, EscapeSequence::Hex]
|
30
30
|
|
31
31
|
# octal escapes
|
@@ -11,6 +11,7 @@ RSpec.describe('Quantifier parsing') do
|
|
11
11
|
expect(exp.quantifier.min).to eq min
|
12
12
|
expect(exp.quantifier.max).to eq max
|
13
13
|
expect(exp.quantifier.mode).to eq mode
|
14
|
+
expect(exp.quantifier.text).to eq text
|
14
15
|
end
|
15
16
|
end
|
16
17
|
|
@@ -37,6 +38,21 @@ RSpec.describe('Quantifier parsing') do
|
|
37
38
|
include_examples 'quantifier', /a{4}+b/, '{4}+', :possessive, :interval, 4, 4
|
38
39
|
include_examples 'quantifier', /a{004}+b/, '{004}+', :possessive, :interval, 4, 4
|
39
40
|
|
41
|
+
# special case: exps with chained quantifiers are wrapped in implicit passive groups
|
42
|
+
include_examples 'parse', /a+{2}{3}/,
|
43
|
+
0 => [
|
44
|
+
:group, :passive, Group::Passive, implicit?: true, level: 0,
|
45
|
+
quantifier: Quantifier.new(:interval, '{3}', 3, 3, :greedy)
|
46
|
+
],
|
47
|
+
[0, 0] => [
|
48
|
+
:group, :passive, Group::Passive, implicit?: true, level: 1,
|
49
|
+
quantifier: Quantifier.new(:interval, '{2}', 2, 2, :greedy)
|
50
|
+
],
|
51
|
+
[0, 0, 0] => [
|
52
|
+
:literal, :literal, Literal, text: 'a', level: 2,
|
53
|
+
quantifier: Quantifier.new(:one_or_more, '+', 1, -1, :greedy)
|
54
|
+
]
|
55
|
+
|
40
56
|
specify('mode-checking methods') do
|
41
57
|
exp = RP.parse(/a??/).first
|
42
58
|
|
@@ -17,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
17
17
|
end
|
18
18
|
|
19
19
|
specify('parse set range hex') do
|
20
|
-
root = RP.parse('[\\x00-\\
|
20
|
+
root = RP.parse('[\\x00-\\x22]')
|
21
21
|
set = root[0]
|
22
22
|
range = set[0]
|
23
23
|
|
@@ -26,9 +26,9 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
26
26
|
expect(range.count).to eq 2
|
27
27
|
expect(range.first.to_s).to eq '\\x00'
|
28
28
|
expect(range.first).to be_instance_of(EscapeSequence::Hex)
|
29
|
-
expect(range.last.to_s).to eq '\\
|
29
|
+
expect(range.last.to_s).to eq '\\x22'
|
30
30
|
expect(range.last).to be_instance_of(EscapeSequence::Hex)
|
31
|
-
expect(set).to match
|
31
|
+
expect(set).to match "\x11"
|
32
32
|
end
|
33
33
|
|
34
34
|
specify('parse set range unicode') do
|
@@ -11,7 +11,13 @@ RSpec.describe('Escape scanning') do
|
|
11
11
|
include_examples 'scan', /c\tt/, 1 => [:escape, :tab, '\t', 1, 3]
|
12
12
|
include_examples 'scan', /c\vt/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
|
13
13
|
|
14
|
+
# ineffectual literal escapes
|
15
|
+
# these cause "Unknown escape" warnings in Ruby for ascii chars,
|
16
|
+
# and simply drop the backslash for non-ascii chars (/\ü/.inspect == '/ü/').
|
17
|
+
# In terms of matching, Ruby treats them both like non-escaped literals.
|
14
18
|
include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
|
19
|
+
include_examples 'scan', 'a\üc', 1 => [:escape, :literal, '\ü', 1, 3]
|
20
|
+
include_examples 'scan', 'a\😋c', 1 => [:escape, :literal, '\😋', 1, 3]
|
15
21
|
|
16
22
|
# these incomplete ref/call sequences are treated as literal escapes by Ruby
|
17
23
|
include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3]
|
@@ -21,6 +27,7 @@ RSpec.describe('Escape scanning') do
|
|
21
27
|
include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
|
22
28
|
include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
|
23
29
|
|
30
|
+
include_examples 'scan', 'a\xA', 1 => [:escape, :hex, '\xA', 1, 4]
|
24
31
|
include_examples 'scan', 'a\x24c', 1 => [:escape, :hex, '\x24', 1, 5]
|
25
32
|
include_examples 'scan', 'a\x0640c', 1 => [:escape, :hex, '\x06', 1, 5]
|
26
33
|
|
data/spec/scanner/groups_spec.rb
CHANGED
@@ -5,11 +5,20 @@ RSpec.describe('Group scanning') do
|
|
5
5
|
include_examples 'scan', '(?>abc)', 0 => [:group, :atomic, '(?>', 0, 3]
|
6
6
|
include_examples 'scan', '(abc)', 0 => [:group, :capture, '(', 0, 1]
|
7
7
|
|
8
|
+
# Named groups
|
9
|
+
# only names that start with a hyphen or digit (ascii or other) are invalid
|
8
10
|
include_examples 'scan', '(?<name>abc)', 0 => [:group, :named_ab, '(?<name>', 0, 8]
|
9
11
|
include_examples 'scan', "(?'name'abc)", 0 => [:group, :named_sq, "(?'name'", 0, 8]
|
10
|
-
|
11
12
|
include_examples 'scan', '(?<name_1>abc)', 0 => [:group, :named_ab, '(?<name_1>', 0,10]
|
12
13
|
include_examples 'scan', "(?'name_1'abc)", 0 => [:group, :named_sq, "(?'name_1'", 0,10]
|
14
|
+
include_examples 'scan', '(?<name-1>abc)', 0 => [:group, :named_ab, '(?<name-1>', 0,10]
|
15
|
+
include_examples 'scan', "(?'name-1'abc)", 0 => [:group, :named_sq, "(?'name-1'", 0,10]
|
16
|
+
include_examples 'scan', "(?<name'1>abc)", 0 => [:group, :named_ab, "(?<name'1>", 0,10]
|
17
|
+
include_examples 'scan', "(?'name>1'abc)", 0 => [:group, :named_sq, "(?'name>1'", 0,10]
|
18
|
+
include_examples 'scan', '(?<üüuuüü>abc)', 0 => [:group, :named_ab, '(?<üüuuüü>', 0,10]
|
19
|
+
include_examples 'scan', "(?'üüuuüü'abc)", 0 => [:group, :named_sq, "(?'üüuuüü'", 0,10]
|
20
|
+
include_examples 'scan', "(?<😋1234😋>abc)", 0 => [:group, :named_ab, "(?<😋1234😋>", 0,10]
|
21
|
+
include_examples 'scan', "(?'😋1234😋'abc)", 0 => [:group, :named_sq, "(?'😋1234😋'", 0,10]
|
13
22
|
|
14
23
|
include_examples 'scan', '(?:abc)', 0 => [:group, :passive, '(?:', 0, 3]
|
15
24
|
include_examples 'scan', '(?:)', 0 => [:group, :passive, '(?:', 0, 3]
|
@@ -2,48 +2,38 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
RSpec.describe('UTF8 scanning') do
|
4
4
|
# ascii, single byte characters
|
5
|
-
include_examples 'scan', 'a',
|
5
|
+
include_examples 'scan', 'a',
|
6
|
+
0 => [:literal, :literal, 'a', 0, 1]
|
6
7
|
|
7
|
-
include_examples 'scan', 'ab+',
|
8
|
-
|
8
|
+
include_examples 'scan', 'ab+',
|
9
|
+
0 => [:literal, :literal, 'ab', 0, 2],
|
10
|
+
1 => [:quantifier, :one_or_more, '+', 2, 3]
|
9
11
|
|
10
|
-
# 2 byte wide characters
|
11
|
-
include_examples 'scan', '
|
12
|
-
|
13
|
-
include_examples 'scan', 'aاbبت?', 0 => [:literal, :literal, 'aاbبت', 0, 8]
|
14
|
-
include_examples 'scan', 'aاbبت?', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
|
15
|
-
|
16
|
-
include_examples 'scan', 'aا?bبcت+', 0 => [:literal, :literal, 'aا', 0, 3]
|
17
|
-
include_examples 'scan', 'aا?bبcت+', 1 => [:quantifier, :zero_or_one, '?', 3, 4]
|
18
|
-
include_examples 'scan', 'aا?bبcت+', 2 => [:literal, :literal, 'bبcت', 4, 10]
|
19
|
-
include_examples 'scan', 'aا?bبcت+', 3 => [:quantifier, :one_or_more, '+', 10, 11]
|
20
|
-
|
21
|
-
include_examples 'scan', 'a(اbب+)cت?', 0 => [:literal, :literal, 'a', 0, 1]
|
22
|
-
include_examples 'scan', 'a(اbب+)cت?', 1 => [:group, :capture, '(', 1, 2]
|
23
|
-
include_examples 'scan', 'a(اbب+)cت?', 2 => [:literal, :literal, 'اbب', 2, 7]
|
24
|
-
include_examples 'scan', 'a(اbب+)cت?', 3 => [:quantifier, :one_or_more, '+', 7, 8]
|
25
|
-
include_examples 'scan', 'a(اbب+)cت?', 4 => [:group, :close, ')', 8, 9]
|
26
|
-
include_examples 'scan', 'a(اbب+)cت?', 5 => [:literal, :literal, 'cت', 9, 12]
|
27
|
-
include_examples 'scan', 'a(اbب+)cت?', 6 => [:quantifier, :zero_or_one, '?', 12, 13]
|
12
|
+
# 2 byte wide characters
|
13
|
+
include_examples 'scan', 'äöü',
|
14
|
+
0 => [:literal, :literal, 'äöü', 0, 3]
|
28
15
|
|
29
16
|
# 3 byte wide characters, Japanese
|
30
|
-
include_examples 'scan', 'ab?れます+cd',
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
17
|
+
include_examples 'scan', 'ab?れます+cd',
|
18
|
+
0 => [:literal, :literal, 'ab', 0, 2],
|
19
|
+
1 => [:quantifier, :zero_or_one, '?', 2, 3],
|
20
|
+
2 => [:literal, :literal, 'れます', 3, 6],
|
21
|
+
3 => [:quantifier, :one_or_more, '+', 6, 7],
|
22
|
+
4 => [:literal, :literal, 'cd', 7, 9]
|
35
23
|
|
36
24
|
# 4 byte wide characters, Osmanya
|
37
|
-
include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃',
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
include_examples 'scan', 'mu𝄞?si*𝄫c+',
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
25
|
+
include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃',
|
26
|
+
0 => [:literal, :literal, '𐒀𐒁', 0, 2],
|
27
|
+
1 => [:quantifier, :zero_or_one, '?', 2, 3],
|
28
|
+
2 => [:literal, :literal, '𐒂ab', 3, 6],
|
29
|
+
3 => [:quantifier, :one_or_more, '+', 6, 7],
|
30
|
+
4 => [:literal, :literal, '𐒃', 7, 8]
|
31
|
+
|
32
|
+
include_examples 'scan', 'mu𝄞?si*𝄫c+',
|
33
|
+
0 => [:literal, :literal, 'mu𝄞', 0, 3],
|
34
|
+
1 => [:quantifier, :zero_or_one, '?', 3, 4],
|
35
|
+
2 => [:literal, :literal, 'si', 4, 6],
|
36
|
+
3 => [:quantifier, :zero_or_more, '*', 6, 7],
|
37
|
+
4 => [:literal, :literal, '𝄫c', 7, 9],
|
38
|
+
5 => [:quantifier, :one_or_more, '+', 9, 10]
|
49
39
|
end
|