regexp_parser 2.2.0 → 2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -2
- data/LICENSE +1 -1
- data/README.md +2 -2
- data/Rakefile +5 -8
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +12 -7
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/scanner/properties/long.csv +604 -0
- data/lib/regexp_parser/scanner/properties/short.csv +242 -0
- data/lib/regexp_parser/scanner/scanner.rl +6 -4
- data/lib/regexp_parser/scanner.rb +126 -124
- data/lib/regexp_parser/syntax/base.rb +3 -5
- data/lib/regexp_parser/syntax/token/backreference.rb +7 -2
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +20 -22
- metadata +11 -143
- data/lib/regexp_parser/scanner/properties/long.yml +0 -607
- data/lib/regexp_parser/scanner/properties/short.yml +0 -245
- data/spec/expression/base_spec.rb +0 -104
- data/spec/expression/clone_spec.rb +0 -152
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -108
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/delimiters_spec.rb +0 -68
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -64
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -60
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -133
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/options_spec.rb +0 -28
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -117
- data/spec/parser/quantifiers_spec.rb +0 -68
- data/spec/parser/refcalls_spec.rb +0 -117
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -121
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/delimiters_spec.rb +0 -52
- data/spec/scanner/errors_spec.rb +0 -67
- data/spec/scanner/escapes_spec.rb +0 -73
- data/spec/scanner/free_space_spec.rb +0 -165
- data/spec/scanner/groups_spec.rb +0 -61
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -39
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/options_spec.rb +0 -36
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -25
- data/spec/scanner/refcalls_spec.rb +0 -55
- data/spec/scanner/sets_spec.rb +0 -151
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -28
- data/spec/support/capturing_stderr.rb +0 -9
- data/spec/support/shared_examples.rb +0 -77
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -38
- data/spec/token/token_spec.rb +0 -85
@@ -1,128 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Expression#options') do
|
4
|
-
it 'returns a hash of options/flags that affect the expression' do
|
5
|
-
exp = RP.parse(/a/ix)[0]
|
6
|
-
expect(exp).to be_a Literal
|
7
|
-
expect(exp.options).to eq(i: true, x: true)
|
8
|
-
end
|
9
|
-
|
10
|
-
it 'includes options that are locally enabled via special groups' do
|
11
|
-
exp = RP.parse(/(?x)(?m:a)/i)[1][0]
|
12
|
-
expect(exp).to be_a Literal
|
13
|
-
expect(exp.options).to eq(i: true, m: true, x: true)
|
14
|
-
end
|
15
|
-
|
16
|
-
it 'excludes locally disabled options' do
|
17
|
-
exp = RP.parse(/(?x)(?-im:a)/i)[1][0]
|
18
|
-
expect(exp).to be_a Literal
|
19
|
-
expect(exp.options).to eq(x: true)
|
20
|
-
end
|
21
|
-
|
22
|
-
it 'gives correct precedence to negative options' do
|
23
|
-
# Negative options have precedence. E.g. /(?i-i)a/ is case-sensitive.
|
24
|
-
regexp = /(?i-i:a)/
|
25
|
-
expect(regexp).to match 'a'
|
26
|
-
expect(regexp).not_to match 'A'
|
27
|
-
|
28
|
-
exp = RP.parse(regexp)[0][0]
|
29
|
-
expect(exp).to be_a Literal
|
30
|
-
expect(exp.options).to eq({})
|
31
|
-
end
|
32
|
-
|
33
|
-
it 'correctly handles multiple negative option parts' do
|
34
|
-
regexp = /(?--m--mx--) . /mx
|
35
|
-
expect(regexp).to match ' . '
|
36
|
-
expect(regexp).not_to match '.'
|
37
|
-
expect(regexp).not_to match "\n"
|
38
|
-
|
39
|
-
exp = RP.parse(regexp)[2]
|
40
|
-
expect(exp.options).to eq({})
|
41
|
-
end
|
42
|
-
|
43
|
-
it 'gives correct precedence when encountering multiple encoding flags' do
|
44
|
-
# Any encoding flag overrides all previous encoding flags. If there are
|
45
|
-
# multiple encoding flags in an options string, the last one wins.
|
46
|
-
# E.g. /(?dau)\w/ matches UTF8 chars but /(?dua)\w/ only ASCII chars.
|
47
|
-
regexp1 = /(?dau)\w/
|
48
|
-
regexp2 = /(?dua)\w/
|
49
|
-
expect(regexp1).to match 'ü'
|
50
|
-
expect(regexp2).not_to match 'ü'
|
51
|
-
|
52
|
-
exp1 = RP.parse(regexp1)[1]
|
53
|
-
exp2 = RP.parse(regexp2)[1]
|
54
|
-
expect(exp1.options).to eq(u: true)
|
55
|
-
expect(exp2.options).to eq(a: true)
|
56
|
-
end
|
57
|
-
|
58
|
-
it 'is accessible via shortcuts' do
|
59
|
-
exp = Root.build
|
60
|
-
|
61
|
-
expect { exp.options[:i] = true }
|
62
|
-
.to change { exp.i? }.from(false).to(true)
|
63
|
-
.and change { exp.ignore_case? }.from(false).to(true)
|
64
|
-
.and change { exp.case_insensitive? }.from(false).to(true)
|
65
|
-
|
66
|
-
expect { exp.options[:m] = true }
|
67
|
-
.to change { exp.m? }.from(false).to(true)
|
68
|
-
.and change { exp.multiline? }.from(false).to(true)
|
69
|
-
|
70
|
-
expect { exp.options[:x] = true }
|
71
|
-
.to change { exp.x? }.from(false).to(true)
|
72
|
-
.and change { exp.extended? }.from(false).to(true)
|
73
|
-
.and change { exp.free_spacing? }.from(false).to(true)
|
74
|
-
|
75
|
-
expect { exp.options[:a] = true }
|
76
|
-
.to change { exp.a? }.from(false).to(true)
|
77
|
-
.and change { exp.ascii_classes? }.from(false).to(true)
|
78
|
-
|
79
|
-
expect { exp.options[:d] = true }
|
80
|
-
.to change { exp.d? }.from(false).to(true)
|
81
|
-
.and change { exp.default_classes? }.from(false).to(true)
|
82
|
-
|
83
|
-
expect { exp.options[:u] = true }
|
84
|
-
.to change { exp.u? }.from(false).to(true)
|
85
|
-
.and change { exp.unicode_classes? }.from(false).to(true)
|
86
|
-
end
|
87
|
-
|
88
|
-
RSpec.shared_examples '#options' do |regexp, path, klass|
|
89
|
-
it "works for expression class #{klass}" do
|
90
|
-
exp = RP.parse(/#{regexp.source}/i).dig(*path)
|
91
|
-
expect(exp).to be_a(klass)
|
92
|
-
expect(exp).to be_i
|
93
|
-
expect(exp).not_to be_x
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
include_examples '#options', //, [], Root
|
98
|
-
include_examples '#options', /a/, [0], Literal
|
99
|
-
include_examples '#options', /\A/, [0], Anchor::Base
|
100
|
-
include_examples '#options', /\d/, [0], CharacterType::Base
|
101
|
-
include_examples '#options', /\n/, [0], EscapeSequence::Base
|
102
|
-
include_examples '#options', /\K/, [0], Keep::Mark
|
103
|
-
include_examples '#options', /./, [0], CharacterType::Any
|
104
|
-
include_examples '#options', /(a)/, [0], Group::Base
|
105
|
-
include_examples '#options', /(a)/, [0, 0], Literal
|
106
|
-
include_examples '#options', /(?=a)/, [0], Assertion::Base
|
107
|
-
include_examples '#options', /(?=a)/, [0, 0], Literal
|
108
|
-
include_examples '#options', /(a|b)/, [0], Group::Base
|
109
|
-
include_examples '#options', /(a|b)/, [0, 0], Alternation
|
110
|
-
include_examples '#options', /(a|b)/, [0, 0, 0], Alternative
|
111
|
-
include_examples '#options', /(a|b)/, [0, 0, 0, 0], Literal
|
112
|
-
include_examples '#options', /(a)\1/, [1], Backreference::Base
|
113
|
-
include_examples '#options', /(a)\k<1>/, [1], Backreference::Number
|
114
|
-
include_examples '#options', /(a)\g<1>/, [1], Backreference::NumberCall
|
115
|
-
include_examples '#options', /[a]/, [0], CharacterSet
|
116
|
-
include_examples '#options', /[a]/, [0, 0], Literal
|
117
|
-
include_examples '#options', /[a-z]/, [0, 0], CharacterSet::Range
|
118
|
-
include_examples '#options', /[a-z]/, [0, 0, 0], Literal
|
119
|
-
include_examples '#options', /[a&&z]/, [0, 0], CharacterSet::Intersection
|
120
|
-
include_examples '#options', /[a&&z]/, [0, 0, 0], CharacterSet::IntersectedSequence
|
121
|
-
include_examples '#options', /[a&&z]/, [0, 0, 0, 0], Literal
|
122
|
-
include_examples '#options', /[[:ascii:]]/, [0, 0], PosixClass
|
123
|
-
include_examples '#options', /\p{word}/, [0], UnicodeProperty::Base
|
124
|
-
include_examples '#options', /(a)(?(1)b|c)/, [1], Conditional::Expression
|
125
|
-
include_examples '#options', /(a)(?(1)b|c)/, [1, 0], Conditional::Condition
|
126
|
-
include_examples '#options', /(a)(?(1)b|c)/, [1, 1], Conditional::Branch
|
127
|
-
include_examples '#options', /(a)(?(1)b|c)/, [1, 1, 0], Literal
|
128
|
-
end
|
@@ -1,50 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe(Regexp::Expression::Subexpression) do
|
4
|
-
specify('#ts, #te') do
|
5
|
-
regx = /abcd|ghij|klmn|pqur/
|
6
|
-
root = RP.parse(regx)
|
7
|
-
|
8
|
-
alt = root.first
|
9
|
-
|
10
|
-
{ 0 => [0, 4], 1 => [5, 9], 2 => [10, 14], 3 => [15, 19] }.each do |index, span|
|
11
|
-
sequence = alt[index]
|
12
|
-
|
13
|
-
expect(sequence.ts).to eq span[0]
|
14
|
-
expect(sequence.te).to eq span[1]
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
specify('#nesting_level') do
|
19
|
-
root = RP.parse(/a(b(\d|[ef-g[h]]))/)
|
20
|
-
|
21
|
-
tests = {
|
22
|
-
'a' => 1,
|
23
|
-
'b' => 2,
|
24
|
-
'\d|[ef-g[h]]' => 3, # alternation
|
25
|
-
'\d' => 4, # first alternative
|
26
|
-
'[ef-g[h]]' => 4, # second alternative
|
27
|
-
'e' => 5,
|
28
|
-
'f-g' => 5,
|
29
|
-
'f' => 6,
|
30
|
-
'g' => 6,
|
31
|
-
'h' => 6,
|
32
|
-
}
|
33
|
-
|
34
|
-
root.each_expression do |exp|
|
35
|
-
next unless (expected_nesting_level = tests.delete(exp.to_s))
|
36
|
-
expect(expected_nesting_level).to eq exp.nesting_level
|
37
|
-
end
|
38
|
-
|
39
|
-
expect(tests).to be_empty
|
40
|
-
end
|
41
|
-
|
42
|
-
specify('#dig') do
|
43
|
-
root = RP.parse(/(((a)))/)
|
44
|
-
|
45
|
-
expect(root.dig(0).to_s).to eq '(((a)))'
|
46
|
-
expect(root.dig(0, 0, 0, 0).to_s).to eq 'a'
|
47
|
-
expect(root.dig(0, 0, 0, 0, 0)).to be_nil
|
48
|
-
expect(root.dig(3, 7)).to be_nil
|
49
|
-
end
|
50
|
-
end
|
@@ -1,26 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Expression#to_h') do
|
4
|
-
specify('Root#to_h') do
|
5
|
-
root = RP.parse('abc')
|
6
|
-
|
7
|
-
hash = root.to_h
|
8
|
-
|
9
|
-
expect(token: :root, type: :expression, text: 'abc', starts_at: 0, length: 3, quantifier: nil, options: {}, level: nil, set_level: nil, conditional_level: nil, expressions: [{ token: :literal, type: :literal, text: 'abc', starts_at: 0, length: 3, quantifier: nil, options: {}, level: 0, set_level: 0, conditional_level: 0 }]).to eq hash
|
10
|
-
end
|
11
|
-
|
12
|
-
specify('Quantifier#to_h') do
|
13
|
-
root = RP.parse('a{2,4}')
|
14
|
-
exp = root.expressions.at(0)
|
15
|
-
|
16
|
-
hash = exp.quantifier.to_h
|
17
|
-
|
18
|
-
expect(max: 4, min: 2, mode: :greedy, text: '{2,4}', token: :interval).to eq hash
|
19
|
-
end
|
20
|
-
|
21
|
-
specify('Conditional#to_h') do
|
22
|
-
root = RP.parse('(?<A>a)(?(<A>)b|c)', 'ruby/2.0')
|
23
|
-
|
24
|
-
expect { root.to_h }.not_to(raise_error)
|
25
|
-
end
|
26
|
-
end
|
@@ -1,108 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Expression#to_s') do
|
4
|
-
def parse_frozen(pattern, ruby_version = nil)
|
5
|
-
IceNine.deep_freeze(RP.parse(pattern, *ruby_version))
|
6
|
-
end
|
7
|
-
|
8
|
-
def expect_round_trip(pattern, ruby_version = nil)
|
9
|
-
parsed = parse_frozen(pattern, ruby_version)
|
10
|
-
|
11
|
-
expect(parsed.to_s).to eql(pattern)
|
12
|
-
end
|
13
|
-
|
14
|
-
specify('literal alternation') do
|
15
|
-
expect_round_trip('abcd|ghij|klmn|pqur')
|
16
|
-
end
|
17
|
-
|
18
|
-
specify('quantified alternations') do
|
19
|
-
expect_round_trip('(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)')
|
20
|
-
end
|
21
|
-
|
22
|
-
specify('quantified sets') do
|
23
|
-
expect_round_trip('[abc]+|[^def]{3,6}')
|
24
|
-
end
|
25
|
-
|
26
|
-
specify('property sets') do
|
27
|
-
expect_round_trip('[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+', 'ruby/1.9')
|
28
|
-
end
|
29
|
-
|
30
|
-
specify('groups') do
|
31
|
-
expect_round_trip("(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++", 'ruby/1.9')
|
32
|
-
end
|
33
|
-
|
34
|
-
specify('assertions') do
|
35
|
-
expect_round_trip('(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?', 'ruby/1.9')
|
36
|
-
end
|
37
|
-
|
38
|
-
specify('comments') do
|
39
|
-
expect_round_trip('(?#start)a(?#middle)b(?#end)')
|
40
|
-
end
|
41
|
-
|
42
|
-
specify('options') do
|
43
|
-
expect_round_trip('(?mix:start)a(?-mix:middle)b(?i-mx:end)')
|
44
|
-
end
|
45
|
-
|
46
|
-
specify('url') do
|
47
|
-
expect_round_trip('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
|
48
|
-
end
|
49
|
-
|
50
|
-
specify('multiline source') do
|
51
|
-
multiline = /
|
52
|
-
\A
|
53
|
-
a? # One letter
|
54
|
-
b{2,5} # Another one
|
55
|
-
[c-g]+ # A set
|
56
|
-
\z
|
57
|
-
/x
|
58
|
-
|
59
|
-
expect(parse_frozen(multiline).to_s).to eql(multiline.source)
|
60
|
-
end
|
61
|
-
|
62
|
-
specify('multiline #to_s') do
|
63
|
-
multiline = /
|
64
|
-
\A
|
65
|
-
a? # One letter
|
66
|
-
b{2,5} # Another one
|
67
|
-
[c-g]+ # A set
|
68
|
-
\z
|
69
|
-
/x
|
70
|
-
|
71
|
-
expect_round_trip(multiline.to_s)
|
72
|
-
end
|
73
|
-
|
74
|
-
# Free spacing expressions that use spaces between quantifiers and their
|
75
|
-
# targets do not produce identical results due to the way quantifiers are
|
76
|
-
# applied to expressions (members, not nodes) and the merging of consecutive
|
77
|
-
# space nodes. This tests that they produce equivalent results.
|
78
|
-
specify('multiline equivalence') do
|
79
|
-
multiline = /
|
80
|
-
\A
|
81
|
-
a ? # One letter
|
82
|
-
b {2,5} # Another one
|
83
|
-
[c-g] + # A set
|
84
|
-
\z
|
85
|
-
/x
|
86
|
-
|
87
|
-
str = 'bbbcged'
|
88
|
-
root = parse_frozen(multiline)
|
89
|
-
|
90
|
-
expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eql(multiline.match(str)[0])
|
91
|
-
end
|
92
|
-
|
93
|
-
# special case: implicit groups used for chained quantifiers produce no parens
|
94
|
-
specify 'chained quantifiers #to_s' do
|
95
|
-
pattern = /a+{1}{2}/
|
96
|
-
root = parse_frozen(pattern)
|
97
|
-
expect(root.to_s).to eql('a+{1}{2}')
|
98
|
-
end
|
99
|
-
|
100
|
-
# regression test for https://github.com/ammar/regexp_parser/issues/74
|
101
|
-
specify('non-ascii comment') do
|
102
|
-
pattern = '(?x) 😋 # 😋'
|
103
|
-
root = RP.parse(pattern)
|
104
|
-
expect(root.last).to be_a(Regexp::Expression::Comment)
|
105
|
-
expect(root.last.to_s).to eql('# 😋')
|
106
|
-
expect(root.to_s).to eql(pattern)
|
107
|
-
end
|
108
|
-
end
|
data/spec/lexer/all_spec.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe(Regexp::Lexer) do
|
4
|
-
specify('lexer returns an array') do
|
5
|
-
expect(RL.lex('abc')).to be_instance_of(Array)
|
6
|
-
end
|
7
|
-
|
8
|
-
specify('lexer returns tokens') do
|
9
|
-
tokens = RL.lex('^abc+[^one]{2,3}\\b\\d\\\\C-C$')
|
10
|
-
expect(tokens).to all(be_a Regexp::Token)
|
11
|
-
expect(tokens.map { |token| token.to_a.length }).to all(eq 8)
|
12
|
-
end
|
13
|
-
|
14
|
-
specify('lexer token count') do
|
15
|
-
tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
|
16
|
-
expect(tokens.length).to eq 28
|
17
|
-
end
|
18
|
-
|
19
|
-
specify('lexer scan alias') do
|
20
|
-
expect(RL.scan(/a|b|c/)).to eq RL.lex(/a|b|c/)
|
21
|
-
end
|
22
|
-
end
|
@@ -1,53 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Conditional lexing') do
|
4
|
-
include_examples 'lex', /(?<A>a)(?(<A>)b|c)/,
|
5
|
-
3 => [:conditional, :open, '(?', 7, 9, 0, 0, 0],
|
6
|
-
4 => [:conditional, :condition, '(<A>)', 9, 14, 0, 0, 1],
|
7
|
-
6 => [:conditional, :separator, '|', 15, 16, 0, 0, 1],
|
8
|
-
8 => [:conditional, :close, ')', 17, 18, 0, 0, 0]
|
9
|
-
|
10
|
-
include_examples 'lex', /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/,
|
11
|
-
0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
|
12
|
-
1 => [:group, :named, '(?<A>', 1, 6, 1, 0, 0],
|
13
|
-
5 => [:conditional, :open, '(?', 13, 15, 2, 0, 0],
|
14
|
-
6 => [:conditional, :condition, '(<A>)', 15, 20, 2, 0, 1],
|
15
|
-
8 => [:conditional, :separator, '|', 21, 22, 2, 0, 1],
|
16
|
-
10 => [:conditional, :open, '(?', 23, 25, 3, 0, 1],
|
17
|
-
11 => [:conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
|
18
|
-
12 => [:set, :open, '[', 30, 31, 3, 0, 2],
|
19
|
-
13 => [:literal, :literal, 'e', 31, 32, 3, 1, 2],
|
20
|
-
14 => [:set, :range, '-', 32, 33, 3, 1, 2],
|
21
|
-
15 => [:literal, :literal, 'g', 33, 34, 3, 1, 2],
|
22
|
-
16 => [:set, :close, ']', 34, 35, 3, 0, 2],
|
23
|
-
17 => [:conditional, :separator, '|', 35, 36, 3, 0, 2],
|
24
|
-
23 => [:conditional, :close, ')', 41, 42, 3, 0, 1],
|
25
|
-
25 => [:conditional, :close, ')', 43, 44, 2, 0, 0],
|
26
|
-
26 => [:group, :close, ')', 44, 45, 1, 0, 0],
|
27
|
-
27 => [:group, :close, ')', 45, 46, 0, 0, 0]
|
28
|
-
|
29
|
-
include_examples 'lex', /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/,
|
30
|
-
9 => [:conditional, :open, '(?', 9, 11, 0, 0, 0],
|
31
|
-
10 => [:conditional, :condition, '(1)', 11, 14, 0, 0, 1],
|
32
|
-
11 => [:conditional, :open, '(?', 14, 16, 0, 0, 1],
|
33
|
-
12 => [:conditional, :condition, '(2)', 16, 19, 0, 0, 2],
|
34
|
-
13 => [:conditional, :open, '(?', 19, 21, 0, 0, 2],
|
35
|
-
14 => [:conditional, :condition, '(3)', 21, 24, 0, 0, 3],
|
36
|
-
16 => [:conditional, :separator, '|', 25, 26, 0, 0, 3],
|
37
|
-
18 => [:conditional, :close, ')', 27, 28, 0, 0, 2],
|
38
|
-
19 => [:conditional, :close, ')', 28, 29, 0, 0, 1],
|
39
|
-
20 => [:conditional, :separator, '|', 29, 30, 0, 0, 1],
|
40
|
-
21 => [:conditional, :open, '(?', 30, 32, 0, 0, 1],
|
41
|
-
22 => [:conditional, :condition, '(3)', 32, 35, 0, 0, 2],
|
42
|
-
23 => [:conditional, :open, '(?', 35, 37, 0, 0, 2],
|
43
|
-
24 => [:conditional, :condition, '(2)', 37, 40, 0, 0, 3],
|
44
|
-
26 => [:conditional, :separator, '|', 41, 42, 0, 0, 3],
|
45
|
-
28 => [:conditional, :close, ')', 43, 44, 0, 0, 2],
|
46
|
-
29 => [:conditional, :separator, '|', 44, 45, 0, 0, 2],
|
47
|
-
30 => [:conditional, :open, '(?', 45, 47, 0, 0, 2],
|
48
|
-
31 => [:conditional, :condition, '(1)', 47, 50, 0, 0, 3],
|
49
|
-
33 => [:conditional, :separator, '|', 51, 52, 0, 0, 3],
|
50
|
-
35 => [:conditional, :close, ')', 53, 54, 0, 0, 2],
|
51
|
-
36 => [:conditional, :close, ')', 54, 55, 0, 0, 1],
|
52
|
-
37 => [:conditional, :close, ')', 55, 56, 0, 0, 0]
|
53
|
-
end
|
@@ -1,68 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Literal delimiter lexing') do
|
4
|
-
include_examples 'lex', '}',
|
5
|
-
0 => [:literal, :literal, '}', 0, 1, 0, 0, 0]
|
6
|
-
|
7
|
-
include_examples 'lex', '}}',
|
8
|
-
0 => [:literal, :literal, '}}', 0, 2, 0, 0, 0]
|
9
|
-
|
10
|
-
include_examples 'lex', '{',
|
11
|
-
0 => [:literal, :literal, '{', 0, 1, 0, 0, 0]
|
12
|
-
|
13
|
-
include_examples 'lex', '{{',
|
14
|
-
0 => [:literal, :literal, '{{', 0, 2, 0, 0, 0]
|
15
|
-
|
16
|
-
include_examples 'lex', '{}',
|
17
|
-
0 => [:literal, :literal, '{}', 0, 2, 0, 0, 0]
|
18
|
-
|
19
|
-
include_examples 'lex', '}{',
|
20
|
-
0 => [:literal, :literal, '}{', 0, 2, 0, 0, 0]
|
21
|
-
|
22
|
-
include_examples 'lex', '}{+',
|
23
|
-
0 => [:literal, :literal, '}', 0, 1, 0, 0, 0],
|
24
|
-
1 => [:literal, :literal, '{', 1, 2, 0, 0, 0],
|
25
|
-
2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
|
26
|
-
|
27
|
-
include_examples 'lex', '{{var}}',
|
28
|
-
0 => [:literal, :literal, '{{var}}', 0, 7, 0, 0, 0]
|
29
|
-
|
30
|
-
include_examples 'lex', 'a{b}c',
|
31
|
-
0 => [:literal, :literal, 'a{b}c', 0, 5, 0, 0, 0]
|
32
|
-
|
33
|
-
include_examples 'lex', 'a{1,2',
|
34
|
-
0 => [:literal, :literal, 'a{1,2', 0, 5, 0, 0, 0]
|
35
|
-
|
36
|
-
include_examples 'lex', '({.+})',
|
37
|
-
0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
|
38
|
-
1 => [:literal, :literal, '{', 1, 2, 1, 0, 0],
|
39
|
-
2 => [:meta, :dot, '.', 2, 3, 1, 0, 0],
|
40
|
-
3 => [:quantifier, :one_or_more, '+', 3, 4, 1, 0, 0],
|
41
|
-
4 => [:literal, :literal, '}', 4, 5, 1, 0, 0],
|
42
|
-
5 => [:group, :close, ')', 5, 6, 0, 0, 0]
|
43
|
-
|
44
|
-
include_examples 'lex', ']',
|
45
|
-
0 => [:literal, :literal, ']', 0, 1, 0, 0, 0]
|
46
|
-
|
47
|
-
include_examples 'lex', ']]',
|
48
|
-
0 => [:literal, :literal, ']]', 0, 2, 0, 0, 0]
|
49
|
-
|
50
|
-
include_examples 'lex', ']\[',
|
51
|
-
0 => [:literal, :literal, ']', 0, 1, 0, 0, 0],
|
52
|
-
1 => [:escape, :set_open, '\[', 1, 3, 0, 0, 0]
|
53
|
-
|
54
|
-
include_examples 'lex', '()',
|
55
|
-
0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
|
56
|
-
1 => [:group, :close, ')', 1, 2, 0, 0, 0]
|
57
|
-
|
58
|
-
include_examples 'lex', '{abc:.+}}}[^}]]}',
|
59
|
-
0 => [:literal, :literal, '{abc:', 0, 5, 0, 0, 0],
|
60
|
-
1 => [:meta, :dot, '.', 5, 6, 0, 0, 0],
|
61
|
-
2 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
|
62
|
-
3 => [:literal, :literal, '}}}', 7, 10, 0, 0, 0],
|
63
|
-
4 => [:set, :open, '[', 10, 11, 0, 0, 0],
|
64
|
-
5 => [:set, :negate, '^', 11, 12, 0, 1, 0],
|
65
|
-
6 => [:literal, :literal, '}', 12, 13, 0, 1, 0],
|
66
|
-
7 => [:set, :close, ']', 13, 14, 0, 0, 0],
|
67
|
-
8 => [:literal, :literal, ']}', 14, 16, 0, 0, 0]
|
68
|
-
end
|
data/spec/lexer/escapes_spec.rb
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Escape lexing') do
|
4
|
-
include_examples 'lex', '\u{62}',
|
5
|
-
0 => [:escape, :codepoint_list, '\u{62}', 0, 6, 0, 0, 0]
|
6
|
-
|
7
|
-
include_examples 'lex', '\u{62 63 64}',
|
8
|
-
0 => [:escape, :codepoint_list, '\u{62 63 64}', 0, 12, 0, 0, 0]
|
9
|
-
|
10
|
-
include_examples 'lex', '\u{62 63 64}+',
|
11
|
-
0 => [:escape, :codepoint_list, '\u{62 63}', 0, 9, 0, 0, 0],
|
12
|
-
1 => [:escape, :codepoint_list, '\u{64}', 9, 15, 0, 0, 0],
|
13
|
-
2 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0]
|
14
|
-
end
|
data/spec/lexer/keep_spec.rb
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Keep lexing') do
|
4
|
-
include_examples 'lex', /ab\Kcd/,
|
5
|
-
1 => [:keep, :mark, '\K', 2, 4, 0, 0, 0]
|
6
|
-
|
7
|
-
include_examples 'lex', /(a\Kb)|(c\\\Kd)ef/,
|
8
|
-
2 => [:keep, :mark, '\K', 2, 4, 1, 0, 0],
|
9
|
-
9 => [:keep, :mark, '\K', 11, 13, 1, 0, 0]
|
10
|
-
end
|
data/spec/lexer/literals_spec.rb
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Literal lexing') do
|
4
|
-
# ascii, single byte characters
|
5
|
-
include_examples 'lex', 'a',
|
6
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0]
|
7
|
-
|
8
|
-
include_examples 'lex', 'ab+',
|
9
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
10
|
-
1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
|
11
|
-
2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
|
12
|
-
|
13
|
-
# 2 byte wide characters
|
14
|
-
include_examples 'lex', 'äöü+',
|
15
|
-
0 => [:literal, :literal, 'äö', 0, 2, 0, 0, 0],
|
16
|
-
1 => [:literal, :literal, 'ü', 2, 3, 0, 0, 0],
|
17
|
-
2 => [:quantifier, :one_or_more, '+', 3, 4, 0, 0, 0]
|
18
|
-
|
19
|
-
# 3 byte wide characters, Japanese
|
20
|
-
include_examples 'lex', 'ab?れます+cd',
|
21
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
22
|
-
1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
|
23
|
-
2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
|
24
|
-
3 => [:literal, :literal, 'れま', 3, 5, 0, 0, 0],
|
25
|
-
4 => [:literal, :literal, 'す', 5, 6, 0, 0, 0],
|
26
|
-
5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
|
27
|
-
6 => [:literal, :literal, 'cd', 7, 9, 0, 0, 0]
|
28
|
-
|
29
|
-
# 4 byte wide characters, Osmanya
|
30
|
-
include_examples 'lex', '𐒀𐒁?𐒂ab+𐒃',
|
31
|
-
0 => [:literal, :literal, '𐒀', 0, 1, 0, 0, 0],
|
32
|
-
1 => [:literal, :literal, '𐒁', 1, 2, 0, 0, 0],
|
33
|
-
2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
|
34
|
-
3 => [:literal, :literal, '𐒂a', 3, 5, 0, 0, 0],
|
35
|
-
4 => [:literal, :literal, 'b', 5, 6, 0, 0, 0],
|
36
|
-
5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
|
37
|
-
6 => [:literal, :literal, '𐒃', 7, 8, 0, 0, 0]
|
38
|
-
|
39
|
-
include_examples 'lex', 'mu𝄞?si*𝄫c+',
|
40
|
-
0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
|
41
|
-
1 => [:literal, :literal, '𝄞', 2, 3, 0, 0, 0],
|
42
|
-
2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
|
43
|
-
3 => [:literal, :literal, 's', 4, 5, 0, 0, 0],
|
44
|
-
4 => [:literal, :literal, 'i', 5, 6, 0, 0, 0],
|
45
|
-
5 => [:quantifier, :zero_or_more, '*', 6, 7, 0, 0, 0],
|
46
|
-
6 => [:literal, :literal, '𝄫', 7, 8, 0, 0, 0],
|
47
|
-
7 => [:literal, :literal, 'c', 8, 9, 0, 0, 0],
|
48
|
-
8 => [:quantifier, :one_or_more, '+', 9, 10, 0, 0, 0]
|
49
|
-
|
50
|
-
specify('lex single 2 byte char') do
|
51
|
-
tokens = RL.lex("\u0627+")
|
52
|
-
expect(tokens.count).to eq 2
|
53
|
-
end
|
54
|
-
|
55
|
-
specify('lex single 3 byte char') do
|
56
|
-
tokens = RL.lex("\u308C+")
|
57
|
-
expect(tokens.count).to eq 2
|
58
|
-
end
|
59
|
-
|
60
|
-
specify('lex single 4 byte char') do
|
61
|
-
tokens = RL.lex("\u{1D11E}+")
|
62
|
-
expect(tokens.count).to eq 2
|
63
|
-
end
|
64
|
-
end
|
data/spec/lexer/nesting_spec.rb
DELETED
@@ -1,99 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Nesting lexing') do
|
4
|
-
include_examples 'lex', /(((b)))/,
|
5
|
-
0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
|
6
|
-
1 => [:group, :capture, '(', 1, 2, 1, 0, 0],
|
7
|
-
2 => [:group, :capture, '(', 2, 3, 2, 0, 0],
|
8
|
-
3 => [:literal, :literal, 'b', 3, 4, 3, 0, 0],
|
9
|
-
4 => [:group, :close, ')', 4, 5, 2, 0, 0],
|
10
|
-
5 => [:group, :close, ')', 5, 6, 1, 0, 0],
|
11
|
-
6 => [:group, :close, ')', 6, 7, 0, 0, 0]
|
12
|
-
|
13
|
-
include_examples 'lex', /(\((b)\))/,
|
14
|
-
0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
|
15
|
-
1 => [:escape, :group_open, '\(', 1, 3, 1, 0, 0],
|
16
|
-
2 => [:group, :capture, '(', 3, 4, 1, 0, 0],
|
17
|
-
3 => [:literal, :literal, 'b', 4, 5, 2, 0, 0],
|
18
|
-
4 => [:group, :close, ')', 5, 6, 1, 0, 0],
|
19
|
-
5 => [:escape, :group_close, '\)', 6, 8, 1, 0, 0],
|
20
|
-
6 => [:group, :close, ')', 8, 9, 0, 0, 0]
|
21
|
-
|
22
|
-
include_examples 'lex', /(?>a(?>b(?>c)))/,
|
23
|
-
0 => [:group, :atomic, '(?>', 0, 3, 0, 0, 0],
|
24
|
-
2 => [:group, :atomic, '(?>', 4, 7, 1, 0, 0],
|
25
|
-
4 => [:group, :atomic, '(?>', 8, 11, 2, 0, 0],
|
26
|
-
6 => [:group, :close, ')', 12, 13, 2, 0, 0],
|
27
|
-
7 => [:group, :close, ')', 13, 14, 1, 0, 0],
|
28
|
-
8 => [:group, :close, ')', 14, 15, 0, 0, 0]
|
29
|
-
|
30
|
-
include_examples 'lex', /(?:a(?:b(?:c)))/,
|
31
|
-
0 => [:group, :passive, '(?:', 0, 3, 0, 0, 0],
|
32
|
-
2 => [:group, :passive, '(?:', 4, 7, 1, 0, 0],
|
33
|
-
4 => [:group, :passive, '(?:', 8, 11, 2, 0, 0],
|
34
|
-
6 => [:group, :close, ')', 12, 13, 2, 0, 0],
|
35
|
-
7 => [:group, :close, ')', 13, 14, 1, 0, 0],
|
36
|
-
8 => [:group, :close, ')', 14, 15, 0, 0, 0]
|
37
|
-
|
38
|
-
include_examples 'lex', /(?=a(?!b(?<=c(?<!d))))/,
|
39
|
-
0 => [:assertion, :lookahead, '(?=', 0, 3, 0, 0, 0],
|
40
|
-
2 => [:assertion, :nlookahead, '(?!', 4, 7, 1, 0, 0],
|
41
|
-
4 => [:assertion, :lookbehind, '(?<=', 8, 12, 2, 0, 0],
|
42
|
-
6 => [:assertion, :nlookbehind, '(?<!', 13, 17, 3, 0, 0],
|
43
|
-
8 => [:group, :close, ')', 18, 19, 3, 0, 0],
|
44
|
-
9 => [:group, :close, ')', 19, 20, 2, 0, 0],
|
45
|
-
10 => [:group, :close, ')', 20, 21, 1, 0, 0],
|
46
|
-
11 => [:group, :close, ')', 21, 22, 0, 0, 0]
|
47
|
-
|
48
|
-
include_examples 'lex', /((?#a)b(?#c)d(?#e))/,
|
49
|
-
0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
|
50
|
-
1 => [:group, :comment, '(?#a)', 1, 6, 1, 0, 0],
|
51
|
-
3 => [:group, :comment, '(?#c)', 7, 12, 1, 0, 0],
|
52
|
-
5 => [:group, :comment, '(?#e)', 13, 18, 1, 0, 0],
|
53
|
-
6 => [:group, :close, ')', 18, 19, 0, 0, 0]
|
54
|
-
|
55
|
-
include_examples 'lex', /a[b-e]f/,
|
56
|
-
1 => [:set, :open, '[', 1, 2, 0, 0, 0],
|
57
|
-
2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
|
58
|
-
3 => [:set, :range, '-', 3, 4, 0, 1, 0],
|
59
|
-
4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
|
60
|
-
5 => [:set, :close, ']', 5, 6, 0, 0, 0]
|
61
|
-
|
62
|
-
include_examples 'lex', '[[:word:]&&[^c]z]',
|
63
|
-
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
64
|
-
1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
|
65
|
-
2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
|
66
|
-
3 => [:set, :open, '[', 11, 12, 0, 1, 0],
|
67
|
-
4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
|
68
|
-
5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
|
69
|
-
6 => [:set, :close, ']', 14, 15, 0, 1, 0],
|
70
|
-
7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
|
71
|
-
8 => [:set, :close, ']', 16, 17, 0, 0, 0]
|
72
|
-
|
73
|
-
include_examples 'lex', '[\p{word}&&[^c]z]',
|
74
|
-
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
75
|
-
1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
|
76
|
-
2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
|
77
|
-
3 => [:set, :open, '[', 11, 12, 0, 1, 0],
|
78
|
-
4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
|
79
|
-
5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
|
80
|
-
6 => [:set, :close, ']', 14, 15, 0, 1, 0],
|
81
|
-
7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
|
82
|
-
8 => [:set, :close, ']', 16, 17, 0, 0, 0]
|
83
|
-
|
84
|
-
include_examples 'lex', /[a[b[c[d-g]]]]/,
|
85
|
-
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
86
|
-
1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
|
87
|
-
2 => [:set, :open, '[', 2, 3, 0, 1, 0],
|
88
|
-
3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
|
89
|
-
4 => [:set, :open, '[', 4, 5, 0, 2, 0],
|
90
|
-
5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
|
91
|
-
6 => [:set, :open, '[', 6, 7, 0, 3, 0],
|
92
|
-
7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
|
93
|
-
8 => [:set, :range, '-', 8, 9, 0, 4, 0],
|
94
|
-
9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
|
95
|
-
10 => [:set, :close, ']', 10, 11, 0, 3, 0],
|
96
|
-
11 => [:set, :close, ']', 11, 12, 0, 2, 0],
|
97
|
-
12 => [:set, :close, ']', 12, 13, 0, 1, 0],
|
98
|
-
13 => [:set, :close, ']', 13, 14, 0, 0, 0]
|
99
|
-
end
|