regexp_parser 1.7.1 → 2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +157 -1
- data/Gemfile +6 -1
- data/LICENSE +1 -1
- data/README.md +38 -32
- data/Rakefile +18 -27
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +2 -4
- data/lib/regexp_parser/expression/classes/group.rb +28 -3
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +1 -3
- data/lib/regexp_parser/expression/classes/root.rb +4 -17
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
- data/lib/regexp_parser/expression/quantifier.rb +11 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -20
- data/lib/regexp_parser/expression/subexpression.rb +1 -2
- data/lib/regexp_parser/expression.rb +7 -139
- data/lib/regexp_parser/lexer.rb +13 -11
- data/lib/regexp_parser/parser.rb +325 -344
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/properties/long.csv +604 -0
- data/lib/regexp_parser/scanner/properties/short.csv +242 -0
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +235 -255
- data/lib/regexp_parser/scanner.rb +1324 -1387
- data/lib/regexp_parser/syntax/any.rb +4 -6
- data/lib/regexp_parser/syntax/base.rb +13 -15
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +4 -4
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +20 -22
- metadata +34 -165
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -94
- data/spec/expression/clone_spec.rb +0 -120
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -100
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/delimiters_spec.rb +0 -68
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -89
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -55
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -52
- data/spec/parser/refcalls_spec.rb +0 -112
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/delimiters_spec.rb +0 -52
- data/spec/scanner/errors_spec.rb +0 -67
- data/spec/scanner/escapes_spec.rb +0 -53
- data/spec/scanner/free_space_spec.rb +0 -133
- data/spec/scanner/groups_spec.rb +0 -52
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -49
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -20
- data/spec/scanner/refcalls_spec.rb +0 -36
- data/spec/scanner/sets_spec.rb +0 -102
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -15
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
@@ -1,88 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Alternation parsing') do
|
4
|
-
let(:root) { RP.parse('(ab??|cd*|ef+)*|(gh|ij|kl)?') }
|
5
|
-
|
6
|
-
specify('parse alternation root') do
|
7
|
-
e = root[0]
|
8
|
-
expect(e).to be_a(Alternation)
|
9
|
-
end
|
10
|
-
|
11
|
-
specify('parse alternation alts') do
|
12
|
-
alts = root[0].alternatives
|
13
|
-
|
14
|
-
expect(alts[0]).to be_a(Alternative)
|
15
|
-
expect(alts[1]).to be_a(Alternative)
|
16
|
-
|
17
|
-
expect(alts[0][0]).to be_a(Group::Capture)
|
18
|
-
expect(alts[1][0]).to be_a(Group::Capture)
|
19
|
-
|
20
|
-
expect(alts.length).to eq 2
|
21
|
-
end
|
22
|
-
|
23
|
-
specify('parse alternation nested') do
|
24
|
-
e = root[0].alternatives[0][0][0]
|
25
|
-
|
26
|
-
expect(e).to be_a(Alternation)
|
27
|
-
end
|
28
|
-
|
29
|
-
specify('parse alternation nested sequence') do
|
30
|
-
alts = root[0][0]
|
31
|
-
nested = alts[0][0][0]
|
32
|
-
|
33
|
-
expect(nested).to be_a(Alternative)
|
34
|
-
|
35
|
-
expect(nested[0]).to be_a(Literal)
|
36
|
-
expect(nested[1]).to be_a(Literal)
|
37
|
-
expect(nested.expressions.length).to eq 2
|
38
|
-
end
|
39
|
-
|
40
|
-
specify('parse alternation nested groups') do
|
41
|
-
root = RP.parse('(i|ey|([ougfd]+)|(ney))')
|
42
|
-
|
43
|
-
alts = root[0][0].alternatives
|
44
|
-
expect(alts.length).to eq 4
|
45
|
-
end
|
46
|
-
|
47
|
-
specify('parse alternation grouped alts') do
|
48
|
-
root = RP.parse('ca((n)|(t)|(ll)|(b))')
|
49
|
-
|
50
|
-
alts = root[1][0].alternatives
|
51
|
-
|
52
|
-
expect(alts.length).to eq 4
|
53
|
-
|
54
|
-
expect(alts[0]).to be_a(Alternative)
|
55
|
-
expect(alts[1]).to be_a(Alternative)
|
56
|
-
expect(alts[2]).to be_a(Alternative)
|
57
|
-
expect(alts[3]).to be_a(Alternative)
|
58
|
-
end
|
59
|
-
|
60
|
-
specify('parse alternation nested grouped alts') do
|
61
|
-
root = RP.parse('ca((n|t)|(ll|b))')
|
62
|
-
|
63
|
-
alts = root[1][0].alternatives
|
64
|
-
|
65
|
-
expect(alts.length).to eq 2
|
66
|
-
|
67
|
-
expect(alts[0]).to be_a(Alternative)
|
68
|
-
expect(alts[1]).to be_a(Alternative)
|
69
|
-
|
70
|
-
subalts = root[1][0][0][0][0].alternatives
|
71
|
-
|
72
|
-
expect(alts.length).to eq 2
|
73
|
-
|
74
|
-
expect(subalts[0]).to be_a(Alternative)
|
75
|
-
expect(subalts[1]).to be_a(Alternative)
|
76
|
-
end
|
77
|
-
|
78
|
-
specify('parse alternation continues after nesting') do
|
79
|
-
root = RP.parse(/a|(b)c/)
|
80
|
-
|
81
|
-
seq = root[0][1].expressions
|
82
|
-
|
83
|
-
expect(seq.length).to eq 2
|
84
|
-
|
85
|
-
expect(seq[0]).to be_a(Group::Capture)
|
86
|
-
expect(seq[1]).to be_a(Literal)
|
87
|
-
end
|
88
|
-
end
|
data/spec/parser/anchors_spec.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Anchor parsing') do
|
4
|
-
include_examples 'parse', /^a/, 0 => [:anchor, :bol, Anchor::BOL]
|
5
|
-
include_examples 'parse', /a$/, 1 => [:anchor, :eol, Anchor::EOL]
|
6
|
-
|
7
|
-
include_examples 'parse', /\Aa/, 0 => [:anchor, :bos, Anchor::BOS]
|
8
|
-
include_examples 'parse', /a\z/, 1 => [:anchor, :eos, Anchor::EOS]
|
9
|
-
include_examples 'parse', /a\Z/, 1 => [:anchor, :eos_ob_eol, Anchor::EOSobEOL]
|
10
|
-
|
11
|
-
include_examples 'parse', /a\b/, 1 => [:anchor, :word_boundary, Anchor::WordBoundary]
|
12
|
-
include_examples 'parse', /a\B/, 1 => [:anchor, :nonword_boundary, Anchor::NonWordBoundary]
|
13
|
-
|
14
|
-
include_examples 'parse', /a\G/, 1 => [:anchor, :match_start, Anchor::MatchStart]
|
15
|
-
|
16
|
-
include_examples 'parse', /\\A/, 0 => [:escape, :backslash, EscapeSequence::Literal]
|
17
|
-
end
|
@@ -1,179 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Conditional parsing') do
|
4
|
-
specify('parse conditional') do
|
5
|
-
regexp = /(?<A>a)(?(<A>)T|F)/
|
6
|
-
|
7
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
8
|
-
exp = root[1]
|
9
|
-
|
10
|
-
expect(exp).to be_a(Conditional::Expression)
|
11
|
-
|
12
|
-
expect(exp.type).to eq :conditional
|
13
|
-
expect(exp.token).to eq :open
|
14
|
-
expect(exp.to_s).to eq '(?(<A>)T|F)'
|
15
|
-
expect(exp.reference).to eq 'A'
|
16
|
-
end
|
17
|
-
|
18
|
-
specify('parse conditional condition') do
|
19
|
-
regexp = /(?<A>a)(?(<A>)T|F)/
|
20
|
-
|
21
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
22
|
-
exp = root[1].condition
|
23
|
-
|
24
|
-
expect(exp).to be_a(Conditional::Condition)
|
25
|
-
|
26
|
-
expect(exp.type).to eq :conditional
|
27
|
-
expect(exp.token).to eq :condition
|
28
|
-
expect(exp.to_s).to eq '(<A>)'
|
29
|
-
expect(exp.reference).to eq 'A'
|
30
|
-
expect(exp.referenced_expression.to_s).to eq '(?<A>a)'
|
31
|
-
end
|
32
|
-
|
33
|
-
specify('parse conditional condition with number ref') do
|
34
|
-
regexp = /(a)(?(1)T|F)/
|
35
|
-
|
36
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
37
|
-
exp = root[1].condition
|
38
|
-
|
39
|
-
expect(exp).to be_a(Conditional::Condition)
|
40
|
-
|
41
|
-
expect(exp.type).to eq :conditional
|
42
|
-
expect(exp.token).to eq :condition
|
43
|
-
expect(exp.to_s).to eq '(1)'
|
44
|
-
expect(exp.reference).to eq 1
|
45
|
-
expect(exp.referenced_expression.to_s).to eq '(a)'
|
46
|
-
end
|
47
|
-
|
48
|
-
specify('parse conditional nested groups') do
|
49
|
-
regexp = /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/
|
50
|
-
|
51
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
52
|
-
|
53
|
-
expect(root.to_s).to eq regexp.source
|
54
|
-
|
55
|
-
group = root.first
|
56
|
-
expect(group).to be_instance_of(Group::Capture)
|
57
|
-
|
58
|
-
alt = group.first
|
59
|
-
expect(alt).to be_instance_of(Alternation)
|
60
|
-
expect(alt.length).to eq 3
|
61
|
-
|
62
|
-
expect(alt.map(&:first)).to all(be_a Group::Capture)
|
63
|
-
|
64
|
-
subgroup = alt[2].first
|
65
|
-
conditional = subgroup.first
|
66
|
-
|
67
|
-
expect(conditional).to be_instance_of(Conditional::Expression)
|
68
|
-
expect(conditional.length).to eq 3
|
69
|
-
|
70
|
-
expect(conditional[0]).to be_instance_of(Conditional::Condition)
|
71
|
-
expect(conditional[0].to_s).to eq '(2)'
|
72
|
-
|
73
|
-
condition = conditional.condition
|
74
|
-
expect(condition).to be_instance_of(Conditional::Condition)
|
75
|
-
expect(condition.to_s).to eq '(2)'
|
76
|
-
|
77
|
-
branches = conditional.branches
|
78
|
-
expect(branches.length).to eq 2
|
79
|
-
expect(branches).to be_instance_of(Array)
|
80
|
-
end
|
81
|
-
|
82
|
-
specify('parse conditional nested') do
|
83
|
-
regexp = /(a(b(c(d)(e))))(?(1)(?(2)d|(?(3)e|f))|(?(4)(?(5)g|h)))/
|
84
|
-
|
85
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
86
|
-
|
87
|
-
expect(root.to_s).to eq regexp.source
|
88
|
-
|
89
|
-
{
|
90
|
-
1 => [2, root[1]],
|
91
|
-
2 => [2, root[1][1][0]],
|
92
|
-
3 => [2, root[1][1][0][2][0]],
|
93
|
-
4 => [1, root[1][2][0]],
|
94
|
-
5 => [2, root[1][2][0][1][0]]
|
95
|
-
}.each do |index, example|
|
96
|
-
branch_count, exp = example
|
97
|
-
|
98
|
-
expect(exp).to be_instance_of(Conditional::Expression)
|
99
|
-
expect(exp.condition.to_s).to eq "(#{index})"
|
100
|
-
expect(exp.branches.length).to eq branch_count
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
specify('parse conditional nested alternation') do
|
105
|
-
regexp = /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/
|
106
|
-
|
107
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
108
|
-
|
109
|
-
expect(root.to_s).to eq regexp.source
|
110
|
-
|
111
|
-
expect(root.first).to be_instance_of(Alternation)
|
112
|
-
|
113
|
-
[
|
114
|
-
[3, 'b|c|d', root[0][0][1][1][0][0]],
|
115
|
-
[3, 'e|f|g', root[0][0][1][2][0][0]],
|
116
|
-
[3, 'i|j|k', root[0][0][3][1][0][0]],
|
117
|
-
[3, 'l|m|n', root[0][0][3][2][0][0]]
|
118
|
-
].each do |example|
|
119
|
-
alt_count, alt_text, exp = example
|
120
|
-
|
121
|
-
expect(exp).to be_instance_of(Alternation)
|
122
|
-
expect(exp.to_s).to eq alt_text
|
123
|
-
expect(exp.alternatives.length).to eq alt_count
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
specify('parse conditional extra separator') do
|
128
|
-
regexp = /(?<A>a)(?(<A>)T|)/
|
129
|
-
|
130
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
131
|
-
branches = root[1].branches
|
132
|
-
|
133
|
-
expect(branches.length).to eq 2
|
134
|
-
|
135
|
-
seq_1, seq_2 = branches
|
136
|
-
|
137
|
-
[seq_1, seq_2].each do |seq|
|
138
|
-
expect(seq).to be_a(Sequence)
|
139
|
-
|
140
|
-
expect(seq.type).to eq :expression
|
141
|
-
expect(seq.token).to eq :sequence
|
142
|
-
end
|
143
|
-
|
144
|
-
expect(seq_1.to_s).to eq 'T'
|
145
|
-
expect(seq_2.to_s).to eq ''
|
146
|
-
end
|
147
|
-
|
148
|
-
specify('parse conditional quantified') do
|
149
|
-
regexp = /(foo)(?(1)\d|(\w)){42}/
|
150
|
-
|
151
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
152
|
-
conditional = root[1]
|
153
|
-
|
154
|
-
expect(conditional).to be_quantified
|
155
|
-
expect(conditional.quantifier.to_s).to eq '{42}'
|
156
|
-
expect(conditional.to_s).to eq '(?(1)\\d|(\\w)){42}'
|
157
|
-
expect(conditional.branches.any?(&:quantified?)).to be false
|
158
|
-
end
|
159
|
-
|
160
|
-
specify('parse conditional branch content quantified') do
|
161
|
-
regexp = /(foo)(?(1)\d{23}|(\w){42})/
|
162
|
-
|
163
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
164
|
-
conditional = root[1]
|
165
|
-
|
166
|
-
expect(conditional).not_to be_quantified
|
167
|
-
expect(conditional.branches.any?(&:quantified?)).to be false
|
168
|
-
expect(conditional.branches[0][0]).to be_quantified
|
169
|
-
expect(conditional.branches[0][0].quantifier.to_s).to eq '{23}'
|
170
|
-
expect(conditional.branches[1][0]).to be_quantified
|
171
|
-
expect(conditional.branches[1][0].quantifier.to_s).to eq '{42}'
|
172
|
-
end
|
173
|
-
|
174
|
-
specify('parse conditional excessive branches') do
|
175
|
-
regexp = '(?<A>a)(?(<A>)T|F|X)'
|
176
|
-
|
177
|
-
expect { RP.parse(regexp, 'ruby/2.0') }.to raise_error(Conditional::TooManyBranches)
|
178
|
-
end
|
179
|
-
end
|
data/spec/parser/errors_spec.rb
DELETED
@@ -1,30 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Parsing errors') do
|
4
|
-
let(:parser) { Regexp::Parser.new }
|
5
|
-
before { parser.parse(/foo/) } # initializes ivars
|
6
|
-
|
7
|
-
it('raises UnknownTokenTypeError for unknown token types') do
|
8
|
-
expect { parser.send(:parse_token, Regexp::Token.new(:foo, :bar)) }
|
9
|
-
.to raise_error(Regexp::Parser::UnknownTokenTypeError)
|
10
|
-
end
|
11
|
-
|
12
|
-
RSpec.shared_examples 'UnknownTokenError' do |type, token|
|
13
|
-
it "raises for unkown tokens of type #{type}" do
|
14
|
-
expect { parser.send(:parse_token, Regexp::Token.new(type, :foo)) }
|
15
|
-
.to raise_error(Regexp::Parser::UnknownTokenError)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
include_examples 'UnknownTokenError', :anchor
|
20
|
-
include_examples 'UnknownTokenError', :backref
|
21
|
-
include_examples 'UnknownTokenError', :conditional
|
22
|
-
include_examples 'UnknownTokenError', :free_space
|
23
|
-
include_examples 'UnknownTokenError', :group
|
24
|
-
include_examples 'UnknownTokenError', :meta
|
25
|
-
include_examples 'UnknownTokenError', :nonproperty
|
26
|
-
include_examples 'UnknownTokenError', :property
|
27
|
-
include_examples 'UnknownTokenError', :quantifier
|
28
|
-
include_examples 'UnknownTokenError', :set
|
29
|
-
include_examples 'UnknownTokenError', :type
|
30
|
-
end
|
data/spec/parser/escapes_spec.rb
DELETED
@@ -1,121 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('EscapeSequence parsing') do
|
4
|
-
include_examples 'parse', /a\ac/, 1 => [:escape, :bell, EscapeSequence::Bell]
|
5
|
-
include_examples 'parse', /a\ec/, 1 => [:escape, :escape, EscapeSequence::AsciiEscape]
|
6
|
-
include_examples 'parse', /a\fc/, 1 => [:escape, :form_feed, EscapeSequence::FormFeed]
|
7
|
-
include_examples 'parse', /a\nc/, 1 => [:escape, :newline, EscapeSequence::Newline]
|
8
|
-
include_examples 'parse', /a\rc/, 1 => [:escape, :carriage, EscapeSequence::Return]
|
9
|
-
include_examples 'parse', /a\tc/, 1 => [:escape, :tab, EscapeSequence::Tab]
|
10
|
-
include_examples 'parse', /a\vc/, 1 => [:escape, :vertical_tab, EscapeSequence::VerticalTab]
|
11
|
-
|
12
|
-
# meta character escapes
|
13
|
-
include_examples 'parse', /a\.c/, 1 => [:escape, :dot, EscapeSequence::Literal]
|
14
|
-
include_examples 'parse', /a\?c/, 1 => [:escape, :zero_or_one, EscapeSequence::Literal]
|
15
|
-
include_examples 'parse', /a\*c/, 1 => [:escape, :zero_or_more, EscapeSequence::Literal]
|
16
|
-
include_examples 'parse', /a\+c/, 1 => [:escape, :one_or_more, EscapeSequence::Literal]
|
17
|
-
include_examples 'parse', /a\|c/, 1 => [:escape, :alternation, EscapeSequence::Literal]
|
18
|
-
include_examples 'parse', /a\(c/, 1 => [:escape, :group_open, EscapeSequence::Literal]
|
19
|
-
include_examples 'parse', /a\)c/, 1 => [:escape, :group_close, EscapeSequence::Literal]
|
20
|
-
include_examples 'parse', /a\{c/, 1 => [:escape, :interval_open, EscapeSequence::Literal]
|
21
|
-
include_examples 'parse', /a\}c/, 1 => [:escape, :interval_close, EscapeSequence::Literal]
|
22
|
-
|
23
|
-
# unicode escapes
|
24
|
-
include_examples 'parse', /a\u0640/, 1 => [:escape, :codepoint, EscapeSequence::Codepoint]
|
25
|
-
include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
|
26
|
-
include_examples 'parse', /a\u{10FFFF}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
|
27
|
-
|
28
|
-
# hex escapes
|
29
|
-
include_examples 'parse', /a\xFF/n, 1 => [:escape, :hex, EscapeSequence::Hex]
|
30
|
-
|
31
|
-
# octal escapes
|
32
|
-
include_examples 'parse', /a\177/n, 1 => [:escape, :octal, EscapeSequence::Octal]
|
33
|
-
|
34
|
-
specify('parse chars and codepoints') do
|
35
|
-
root = RP.parse(/\n\?\101\x42\u0043\u{44 45}/)
|
36
|
-
|
37
|
-
expect(root[0].char).to eq "\n"
|
38
|
-
expect(root[0].codepoint).to eq 10
|
39
|
-
|
40
|
-
expect(root[1].char).to eq '?'
|
41
|
-
expect(root[1].codepoint).to eq 63
|
42
|
-
|
43
|
-
expect(root[2].char).to eq 'A'
|
44
|
-
expect(root[2].codepoint).to eq 65
|
45
|
-
|
46
|
-
expect(root[3].char).to eq 'B'
|
47
|
-
expect(root[3].codepoint).to eq 66
|
48
|
-
|
49
|
-
expect(root[4].char).to eq 'C'
|
50
|
-
expect(root[4].codepoint).to eq 67
|
51
|
-
|
52
|
-
expect(root[5].chars).to eq %w[D E]
|
53
|
-
expect(root[5].codepoints).to eq [68, 69]
|
54
|
-
|
55
|
-
expect { root[5].char }.to raise_error(/#chars/)
|
56
|
-
expect { root[5].codepoint }.to raise_error(/#codepoints/)
|
57
|
-
end
|
58
|
-
|
59
|
-
specify('parse escape control sequence lower') do
|
60
|
-
root = RP.parse(/a\\\c2b/)
|
61
|
-
|
62
|
-
expect(root[2]).to be_instance_of(EscapeSequence::Control)
|
63
|
-
expect(root[2].text).to eq '\\c2'
|
64
|
-
expect(root[2].char).to eq "\x12"
|
65
|
-
expect(root[2].codepoint).to eq 18
|
66
|
-
end
|
67
|
-
|
68
|
-
specify('parse escape control sequence upper') do
|
69
|
-
root = RP.parse(/\d\\\C-C\w/)
|
70
|
-
|
71
|
-
expect(root[2]).to be_instance_of(EscapeSequence::Control)
|
72
|
-
expect(root[2].text).to eq '\\C-C'
|
73
|
-
expect(root[2].char).to eq "\x03"
|
74
|
-
expect(root[2].codepoint).to eq 3
|
75
|
-
end
|
76
|
-
|
77
|
-
specify('parse escape meta sequence') do
|
78
|
-
root = RP.parse(/\Z\\\M-Z/n)
|
79
|
-
|
80
|
-
expect(root[2]).to be_instance_of(EscapeSequence::Meta)
|
81
|
-
expect(root[2].text).to eq '\\M-Z'
|
82
|
-
expect(root[2].char).to eq "\u00DA"
|
83
|
-
expect(root[2].codepoint).to eq 218
|
84
|
-
end
|
85
|
-
|
86
|
-
specify('parse escape meta control sequence') do
|
87
|
-
root = RP.parse(/\A\\\M-\C-X/n)
|
88
|
-
|
89
|
-
expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
|
90
|
-
expect(root[2].text).to eq '\\M-\\C-X'
|
91
|
-
expect(root[2].char).to eq "\u0098"
|
92
|
-
expect(root[2].codepoint).to eq 152
|
93
|
-
end
|
94
|
-
|
95
|
-
specify('parse lower c meta control sequence') do
|
96
|
-
root = RP.parse(/\A\\\M-\cX/n)
|
97
|
-
|
98
|
-
expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
|
99
|
-
expect(root[2].text).to eq '\\M-\\cX'
|
100
|
-
expect(root[2].char).to eq "\u0098"
|
101
|
-
expect(root[2].codepoint).to eq 152
|
102
|
-
end
|
103
|
-
|
104
|
-
specify('parse escape reverse meta control sequence') do
|
105
|
-
root = RP.parse(/\A\\\C-\M-X/n)
|
106
|
-
|
107
|
-
expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
|
108
|
-
expect(root[2].text).to eq '\\C-\\M-X'
|
109
|
-
expect(root[2].char).to eq "\u0098"
|
110
|
-
expect(root[2].codepoint).to eq 152
|
111
|
-
end
|
112
|
-
|
113
|
-
specify('parse escape reverse lower c meta control sequence') do
|
114
|
-
root = RP.parse(/\A\\\c\M-X/n)
|
115
|
-
|
116
|
-
expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
|
117
|
-
expect(root[2].text).to eq '\\c\\M-X'
|
118
|
-
expect(root[2].char).to eq "\u0098"
|
119
|
-
expect(root[2].codepoint).to eq 152
|
120
|
-
end
|
121
|
-
end
|
@@ -1,130 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('FreeSpace parsing') do
|
4
|
-
specify('parse free space spaces') do
|
5
|
-
regexp = /a ? b * c + d{2,4}/x
|
6
|
-
root = RP.parse(regexp)
|
7
|
-
|
8
|
-
0.upto(6) do |i|
|
9
|
-
if i.odd?
|
10
|
-
expect(root[i]).to be_instance_of(WhiteSpace)
|
11
|
-
expect(root[i].text).to eq ' '
|
12
|
-
else
|
13
|
-
expect(root[i]).to be_instance_of(Literal)
|
14
|
-
expect(root[i]).to be_quantified
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
specify('parse non free space literals') do
|
20
|
-
regexp = /a b c d/
|
21
|
-
root = RP.parse(regexp)
|
22
|
-
|
23
|
-
expect(root.first).to be_instance_of(Literal)
|
24
|
-
expect(root.first.text).to eq 'a b c d'
|
25
|
-
end
|
26
|
-
|
27
|
-
specify('parse free space comments') do
|
28
|
-
regexp = /
|
29
|
-
a ? # One letter
|
30
|
-
b {2,5} # Another one
|
31
|
-
[c-g] + # A set
|
32
|
-
(h|i|j) | # A group
|
33
|
-
klm *
|
34
|
-
nop +
|
35
|
-
/x
|
36
|
-
|
37
|
-
root = RP.parse(regexp)
|
38
|
-
|
39
|
-
alt = root.first
|
40
|
-
expect(alt).to be_instance_of(Alternation)
|
41
|
-
|
42
|
-
alt_1 = alt.alternatives.first
|
43
|
-
expect(alt_1).to be_instance_of(Alternative)
|
44
|
-
expect(alt_1.length).to eq 15
|
45
|
-
|
46
|
-
[0, 2, 4, 6, 8, 12, 14].each do |i|
|
47
|
-
expect(alt_1[i]).to be_instance_of(WhiteSpace)
|
48
|
-
end
|
49
|
-
|
50
|
-
[3, 7, 11].each { |i| expect(alt_1[i].class).to eq Comment }
|
51
|
-
|
52
|
-
alt_2 = alt.alternatives.last
|
53
|
-
expect(alt_2).to be_instance_of(Alternative)
|
54
|
-
expect(alt_2.length).to eq 7
|
55
|
-
|
56
|
-
[0, 2, 4, 6].each { |i| expect(alt_2[i].class).to eq WhiteSpace }
|
57
|
-
|
58
|
-
expect(alt_2[1]).to be_instance_of(Comment)
|
59
|
-
end
|
60
|
-
|
61
|
-
specify('parse free space nested comments') do
|
62
|
-
regexp = /
|
63
|
-
# Group one
|
64
|
-
(
|
65
|
-
abc # Comment one
|
66
|
-
\d? # Optional \d
|
67
|
-
)+
|
68
|
-
|
69
|
-
# Group two
|
70
|
-
(
|
71
|
-
def # Comment two
|
72
|
-
\s? # Optional \s
|
73
|
-
)?
|
74
|
-
/x
|
75
|
-
|
76
|
-
root = RP.parse(regexp)
|
77
|
-
|
78
|
-
top_comment_1 = root[1]
|
79
|
-
expect(top_comment_1).to be_instance_of(Comment)
|
80
|
-
expect(top_comment_1.text).to eq "# Group one\n"
|
81
|
-
expect(top_comment_1.starts_at).to eq 7
|
82
|
-
|
83
|
-
top_comment_2 = root[5]
|
84
|
-
expect(top_comment_2).to be_instance_of(Comment)
|
85
|
-
expect(top_comment_2.text).to eq "# Group two\n"
|
86
|
-
expect(top_comment_2.starts_at).to eq 95
|
87
|
-
|
88
|
-
[3, 7].each do |g,|
|
89
|
-
group = root[g]
|
90
|
-
|
91
|
-
[3, 7].each do |c|
|
92
|
-
comment = group[c]
|
93
|
-
expect(comment).to be_instance_of(Comment)
|
94
|
-
expect(comment.text.length).to eq 14
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
specify('parse free space quantifiers') do
|
100
|
-
regexp = /
|
101
|
-
a
|
102
|
-
# comment 1
|
103
|
-
?
|
104
|
-
(
|
105
|
-
b # comment 2
|
106
|
-
# comment 3
|
107
|
-
+
|
108
|
-
)
|
109
|
-
# comment 4
|
110
|
-
*
|
111
|
-
/x
|
112
|
-
|
113
|
-
root = RP.parse(regexp)
|
114
|
-
|
115
|
-
literal_1 = root[1]
|
116
|
-
expect(literal_1).to be_instance_of(Literal)
|
117
|
-
expect(literal_1).to be_quantified
|
118
|
-
expect(literal_1.quantifier.token).to eq :zero_or_one
|
119
|
-
|
120
|
-
group = root[5]
|
121
|
-
expect(group).to be_instance_of(Group::Capture)
|
122
|
-
expect(group).to be_quantified
|
123
|
-
expect(group.quantifier.token).to eq :zero_or_more
|
124
|
-
|
125
|
-
literal_2 = group[1]
|
126
|
-
expect(literal_2).to be_instance_of(Literal)
|
127
|
-
expect(literal_2).to be_quantified
|
128
|
-
expect(literal_2.quantifier.token).to eq :one_or_more
|
129
|
-
end
|
130
|
-
end
|
data/spec/parser/groups_spec.rb
DELETED
@@ -1,108 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Group parsing') do
|
4
|
-
include_examples 'parse', /(?=abc)(?!def)/,
|
5
|
-
0 => [:assertion, :lookahead, Assertion::Lookahead],
|
6
|
-
1 => [:assertion, :nlookahead, Assertion::NegativeLookahead]
|
7
|
-
|
8
|
-
include_examples 'parse', /(?<=abc)(?<!def)/,
|
9
|
-
0 => [:assertion, :lookbehind, Assertion::Lookbehind],
|
10
|
-
1 => [:assertion, :nlookbehind, Assertion::NegativeLookbehind]
|
11
|
-
|
12
|
-
include_examples 'parse', /a(?# is for apple)b(?# for boy)c(?# cat)/,
|
13
|
-
1 => [:group, :comment, Group::Comment],
|
14
|
-
3 => [:group, :comment, Group::Comment],
|
15
|
-
5 => [:group, :comment, Group::Comment]
|
16
|
-
|
17
|
-
if ruby_version_at_least('2.4.1')
|
18
|
-
include_examples 'parse', 'a(?~b)c(?~d)e',
|
19
|
-
1 => [:group, :absence, Group::Absence],
|
20
|
-
3 => [:group, :absence, Group::Absence]
|
21
|
-
end
|
22
|
-
|
23
|
-
include_examples 'parse', /(?m:a)/,
|
24
|
-
0 => [:group, :options, Group::Options, options: { m: true }, option_changes: { m: true }]
|
25
|
-
|
26
|
-
# self-defeating group option
|
27
|
-
include_examples 'parse', /(?m-m:a)/,
|
28
|
-
0 => [:group, :options, Group::Options, options: {}, option_changes: { m: false }]
|
29
|
-
|
30
|
-
# activate one option in nested group
|
31
|
-
include_examples 'parse', /(?x-mi:a(?m:b))/,
|
32
|
-
0 => [:group, :options, Group::Options, options: { x: true }, option_changes: { i: false, m: false, x: true }],
|
33
|
-
[0, 1] => [:group, :options, Group::Options, options: { m: true, x: true }, option_changes: { m: true }]
|
34
|
-
|
35
|
-
# deactivate one option in nested group
|
36
|
-
include_examples 'parse', /(?ix-m:a(?-i:b))/,
|
37
|
-
0 => [:group, :options, Group::Options, options: { i: true, x: true }, option_changes: { i: true, m: false, x: true }],
|
38
|
-
[0, 1] => [:group, :options, Group::Options, options: { x: true }, option_changes: { i: false }]
|
39
|
-
|
40
|
-
# invert all options in nested group
|
41
|
-
include_examples 'parse', /(?xi-m:a(?m-ix:b))/,
|
42
|
-
0 => [:group, :options, Group::Options, options: { i: true, x: true }, option_changes: { i: true, m: false, x: true }],
|
43
|
-
[0, 1] => [:group, :options, Group::Options, options: { m: true }, option_changes: { i: false, m: true, x: false }]
|
44
|
-
|
45
|
-
# nested options affect literal subexpressions
|
46
|
-
include_examples 'parse', /(?x-mi:a(?m:b))/,
|
47
|
-
[0, 0] => [:literal, :literal, Literal, text: 'a', options: { x: true }],
|
48
|
-
[0, 1, 0] => [:literal, :literal, Literal, text: 'b', options: { m: true, x: true }]
|
49
|
-
|
50
|
-
# option switching group
|
51
|
-
include_examples 'parse', /a(?i-m)b/m,
|
52
|
-
0 => [:literal, :literal, Literal, text: 'a', options: { m: true }],
|
53
|
-
1 => [:group, :options_switch, Group::Options, options: { i: true }, option_changes: { i: true, m: false }],
|
54
|
-
2 => [:literal, :literal, Literal, text: 'b', options: { i: true }]
|
55
|
-
|
56
|
-
# option switch in group
|
57
|
-
include_examples 'parse', /(a(?i-m)b)c/m,
|
58
|
-
0 => [:group, :capture, Group::Capture, options: { m: true }],
|
59
|
-
[0, 0] => [:literal, :literal, Literal, text: 'a', options: { m: true }],
|
60
|
-
[0, 1] => [:group, :options_switch, Group::Options, options: { i: true }, option_changes: { i: true, m: false }],
|
61
|
-
[0, 2] => [:literal, :literal, Literal, text: 'b', options: { i: true }],
|
62
|
-
1 => [:literal, :literal, Literal, text: 'c', options: { m: true }]
|
63
|
-
|
64
|
-
# nested option switch in group
|
65
|
-
include_examples 'parse', /((?i-m)(a(?-i)b))/m,
|
66
|
-
[0, 1] => [:group, :capture, Group::Capture, options: { i: true }],
|
67
|
-
[0, 1, 0] => [:literal, :literal, Literal, text: 'a', options: { i: true }],
|
68
|
-
[0, 1, 1] => [:group, :options_switch, Group::Options, options: {}, option_changes: { i: false }],
|
69
|
-
[0, 1, 2] => [:literal, :literal, Literal, text: 'b', options: {}]
|
70
|
-
|
71
|
-
# options dau
|
72
|
-
include_examples 'parse', /(?dua:abc)/,
|
73
|
-
0 => [:group, :options, Group::Options, options: { a: true }, option_changes: { a: true }]
|
74
|
-
|
75
|
-
# nested options dau
|
76
|
-
include_examples 'parse', /(?u:a(?d:b))/,
|
77
|
-
0 => [:group, :options, Group::Options, options: { u: true }, option_changes: { u: true }],
|
78
|
-
[0, 1] => [:group, :options, Group::Options, options: { d: true }, option_changes: { d: true, u: false }],
|
79
|
-
[0, 1, 0] => [:literal, :literal, Literal, text: 'b', options: { d: true }]
|
80
|
-
|
81
|
-
# nested options da
|
82
|
-
include_examples 'parse', /(?di-xm:a(?da-x:b))/,
|
83
|
-
0 => [:group, :options, Group::Options, options: { d: true, i:true }],
|
84
|
-
[0, 1] => [:group, :options, Group::Options, options: { a: true, i: true }, option_changes: { a: true, d: false, x: false}],
|
85
|
-
[0, 1, 0] => [:literal, :literal, Literal, text: 'b', options: { a: true, i: true }]
|
86
|
-
|
87
|
-
specify('parse group number') do
|
88
|
-
root = RP.parse(/(a)(?=b)((?:c)(d|(e)))/)
|
89
|
-
|
90
|
-
expect(root[0].number).to eq 1
|
91
|
-
expect(root[1]).not_to respond_to(:number)
|
92
|
-
expect(root[2].number).to eq 2
|
93
|
-
expect(root[2][0]).not_to respond_to(:number)
|
94
|
-
expect(root[2][1].number).to eq 3
|
95
|
-
expect(root[2][1][0][1][0].number).to eq 4
|
96
|
-
end
|
97
|
-
|
98
|
-
specify('parse group number at level') do
|
99
|
-
root = RP.parse(/(a)(?=b)((?:c)(d|(e)))/)
|
100
|
-
|
101
|
-
expect(root[0].number_at_level).to eq 1
|
102
|
-
expect(root[1]).not_to respond_to(:number_at_level)
|
103
|
-
expect(root[2].number_at_level).to eq 2
|
104
|
-
expect(root[2][0]).not_to respond_to(:number_at_level)
|
105
|
-
expect(root[2][1].number_at_level).to eq 1
|
106
|
-
expect(root[2][1][0][1][0].number_at_level).to eq 1
|
107
|
-
end
|
108
|
-
end
|
data/spec/parser/keep_spec.rb
DELETED
@@ -1,8 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('PosixClass parsing') do
|
4
|
-
include_examples 'parse', /[[:word:]]/, [0, 0] => [:posixclass, :word, PosixClass,
|
5
|
-
name: 'word', text: '[:word:]', negative?: false]
|
6
|
-
include_examples 'parse', /[[:^word:]]/, [0, 0] => [:nonposixclass, :word, PosixClass,
|
7
|
-
name: 'word', text: '[:^word:]', negative?: true]
|
8
|
-
end
|