regexp_parser 1.7.1 → 2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +157 -1
- data/Gemfile +6 -1
- data/LICENSE +1 -1
- data/README.md +38 -32
- data/Rakefile +18 -27
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +2 -4
- data/lib/regexp_parser/expression/classes/group.rb +28 -3
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +1 -3
- data/lib/regexp_parser/expression/classes/root.rb +4 -17
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
- data/lib/regexp_parser/expression/quantifier.rb +11 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -20
- data/lib/regexp_parser/expression/subexpression.rb +1 -2
- data/lib/regexp_parser/expression.rb +7 -139
- data/lib/regexp_parser/lexer.rb +13 -11
- data/lib/regexp_parser/parser.rb +325 -344
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/properties/long.csv +604 -0
- data/lib/regexp_parser/scanner/properties/short.csv +242 -0
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +235 -255
- data/lib/regexp_parser/scanner.rb +1324 -1387
- data/lib/regexp_parser/syntax/any.rb +4 -6
- data/lib/regexp_parser/syntax/base.rb +13 -15
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +4 -4
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +20 -22
- metadata +34 -165
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -94
- data/spec/expression/clone_spec.rb +0 -120
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -100
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/delimiters_spec.rb +0 -68
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -89
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -55
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -52
- data/spec/parser/refcalls_spec.rb +0 -112
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/delimiters_spec.rb +0 -52
- data/spec/scanner/errors_spec.rb +0 -67
- data/spec/scanner/escapes_spec.rb +0 -53
- data/spec/scanner/free_space_spec.rb +0 -133
- data/spec/scanner/groups_spec.rb +0 -52
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -49
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -20
- data/spec/scanner/refcalls_spec.rb +0 -36
- data/spec/scanner/sets_spec.rb +0 -102
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -15
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
@@ -1,115 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Property parsing') do
|
4
|
-
example_props = [
|
5
|
-
'Alnum',
|
6
|
-
'Any',
|
7
|
-
'Age=1.1',
|
8
|
-
'Dash',
|
9
|
-
'di',
|
10
|
-
'Default_Ignorable_Code_Point',
|
11
|
-
'Math',
|
12
|
-
'Noncharacter-Code_Point', # test dash
|
13
|
-
'sd',
|
14
|
-
'Soft Dotted', # test whitespace
|
15
|
-
'sterm',
|
16
|
-
'xidc',
|
17
|
-
'XID_Continue',
|
18
|
-
'Emoji',
|
19
|
-
'InChessSymbols'
|
20
|
-
]
|
21
|
-
|
22
|
-
example_props.each do |name|
|
23
|
-
it("parses property #{name}") do
|
24
|
-
exp = RP.parse("ab\\p{#{name}}", '*').last
|
25
|
-
|
26
|
-
expect(exp).to be_a(UnicodeProperty::Base)
|
27
|
-
expect(exp.type).to eq :property
|
28
|
-
expect(exp.name).to eq name
|
29
|
-
end
|
30
|
-
|
31
|
-
it("parses nonproperty #{name}") do
|
32
|
-
exp = RP.parse("ab\\P{#{name}}", '*').last
|
33
|
-
|
34
|
-
expect(exp).to be_a(UnicodeProperty::Base)
|
35
|
-
expect(exp.type).to eq :nonproperty
|
36
|
-
expect(exp.name).to eq name
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
specify('parse all properties of current ruby') do
|
41
|
-
unsupported = RegexpPropertyValues.all_for_current_ruby.reject do |prop|
|
42
|
-
RP.parse("\\p{#{prop}}") rescue false
|
43
|
-
end
|
44
|
-
expect(unsupported).to be_empty
|
45
|
-
end
|
46
|
-
|
47
|
-
specify('parse property negative') do
|
48
|
-
root = RP.parse('ab\p{L}cd', 'ruby/1.9')
|
49
|
-
expect(root[1]).not_to be_negative
|
50
|
-
end
|
51
|
-
|
52
|
-
specify('parse nonproperty negative') do
|
53
|
-
root = RP.parse('ab\P{L}cd', 'ruby/1.9')
|
54
|
-
expect(root[1]).to be_negative
|
55
|
-
end
|
56
|
-
|
57
|
-
specify('parse caret nonproperty negative') do
|
58
|
-
root = RP.parse('ab\p{^L}cd', 'ruby/1.9')
|
59
|
-
expect(root[1]).to be_negative
|
60
|
-
end
|
61
|
-
|
62
|
-
specify('parse double negated property negative') do
|
63
|
-
root = RP.parse('ab\P{^L}cd', 'ruby/1.9')
|
64
|
-
expect(root[1]).not_to be_negative
|
65
|
-
end
|
66
|
-
|
67
|
-
specify('parse property shortcut') do
|
68
|
-
expect(RP.parse('\p{lowercase_letter}')[0].shortcut).to eq 'll'
|
69
|
-
expect(RP.parse('\p{sc}')[0].shortcut).to eq 'sc'
|
70
|
-
expect(RP.parse('\p{in_bengali}')[0].shortcut).to be_nil
|
71
|
-
end
|
72
|
-
|
73
|
-
specify('parse property age') do
|
74
|
-
root = RP.parse('ab\p{age=5.2}cd', 'ruby/1.9')
|
75
|
-
expect(root[1]).to be_a(UnicodeProperty::Age)
|
76
|
-
end
|
77
|
-
|
78
|
-
specify('parse property derived') do
|
79
|
-
root = RP.parse('ab\p{Math}cd', 'ruby/1.9')
|
80
|
-
expect(root[1]).to be_a(UnicodeProperty::Derived)
|
81
|
-
end
|
82
|
-
|
83
|
-
specify('parse property script') do
|
84
|
-
root = RP.parse('ab\p{Hiragana}cd', 'ruby/1.9')
|
85
|
-
expect(root[1]).to be_a(UnicodeProperty::Script)
|
86
|
-
end
|
87
|
-
|
88
|
-
specify('parse property script V1 9 3') do
|
89
|
-
root = RP.parse('ab\p{Brahmi}cd', 'ruby/1.9.3')
|
90
|
-
expect(root[1]).to be_a(UnicodeProperty::Script)
|
91
|
-
end
|
92
|
-
|
93
|
-
specify('parse property script V2 2 0') do
|
94
|
-
root = RP.parse('ab\p{Caucasian_Albanian}cd', 'ruby/2.2')
|
95
|
-
expect(root[1]).to be_a(UnicodeProperty::Script)
|
96
|
-
end
|
97
|
-
|
98
|
-
specify('parse property block') do
|
99
|
-
root = RP.parse('ab\p{InArmenian}cd', 'ruby/1.9')
|
100
|
-
expect(root[1]).to be_a(UnicodeProperty::Block)
|
101
|
-
end
|
102
|
-
|
103
|
-
specify('parse property following literal') do
|
104
|
-
root = RP.parse('ab\p{Lu}cd', 'ruby/1.9')
|
105
|
-
expect(root[2]).to be_a(Literal)
|
106
|
-
end
|
107
|
-
|
108
|
-
specify('parse abandoned newline property') do
|
109
|
-
root = RP.parse('\p{newline}', 'ruby/1.9')
|
110
|
-
expect(root.expressions.last).to be_a(UnicodeProperty::Base)
|
111
|
-
|
112
|
-
expect { RP.parse('\p{newline}', 'ruby/2.0') }
|
113
|
-
.to raise_error(Regexp::Syntax::NotImplementedError)
|
114
|
-
end
|
115
|
-
end
|
@@ -1,52 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Quantifier parsing') do
|
4
|
-
RSpec.shared_examples 'quantifier' do |pattern, text, mode, token, min, max|
|
5
|
-
it "parses the quantifier in #{pattern} as #{mode} #{token}" do
|
6
|
-
root = RP.parse(pattern, '*')
|
7
|
-
exp = root[0]
|
8
|
-
|
9
|
-
expect(exp).to be_quantified
|
10
|
-
expect(exp.quantifier.token).to eq token
|
11
|
-
expect(exp.quantifier.min).to eq min
|
12
|
-
expect(exp.quantifier.max).to eq max
|
13
|
-
expect(exp.quantifier.mode).to eq mode
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
include_examples 'quantifier', /a?b/, '?', :greedy, :zero_or_one, 0, 1
|
18
|
-
include_examples 'quantifier', /a??b/, '??', :reluctant, :zero_or_one, 0, 1
|
19
|
-
include_examples 'quantifier', /a?+b/, '?+', :possessive, :zero_or_one, 0, 1
|
20
|
-
include_examples 'quantifier', /a*b/, '*', :greedy, :zero_or_more, 0, -1
|
21
|
-
include_examples 'quantifier', /a*?b/, '*?', :reluctant, :zero_or_more, 0, -1
|
22
|
-
include_examples 'quantifier', /a*+b/, '*+', :possessive, :zero_or_more, 0, -1
|
23
|
-
include_examples 'quantifier', /a+b/, '+', :greedy, :one_or_more, 1, -1
|
24
|
-
include_examples 'quantifier', /a+?b/, '+?', :reluctant, :one_or_more, 1, -1
|
25
|
-
include_examples 'quantifier', /a++b/, '++', :possessive, :one_or_more, 1, -1
|
26
|
-
include_examples 'quantifier', /a{2,4}b/, '{2,4}', :greedy, :interval, 2, 4
|
27
|
-
include_examples 'quantifier', /a{2,4}?b/, '{2,4}?', :reluctant, :interval, 2, 4
|
28
|
-
include_examples 'quantifier', /a{2,4}+b/, '{2,4}+', :possessive, :interval, 2, 4
|
29
|
-
include_examples 'quantifier', /a{2,}b/, '{2,}', :greedy, :interval, 2, -1
|
30
|
-
include_examples 'quantifier', /a{2,}?b/, '{2,}?', :reluctant, :interval, 2, -1
|
31
|
-
include_examples 'quantifier', /a{2,}+b/, '{2,}+', :possessive, :interval, 2, -1
|
32
|
-
include_examples 'quantifier', /a{,3}b/, '{,3}', :greedy, :interval, 0, 3
|
33
|
-
include_examples 'quantifier', /a{,3}?b/, '{,3}?', :reluctant, :interval, 0, 3
|
34
|
-
include_examples 'quantifier', /a{,3}+b/, '{,3}+', :possessive, :interval, 0, 3
|
35
|
-
include_examples 'quantifier', /a{4}b/, '{4}', :greedy, :interval, 4, 4
|
36
|
-
include_examples 'quantifier', /a{4}?b/, '{4}?', :reluctant, :interval, 4, 4
|
37
|
-
include_examples 'quantifier', /a{4}+b/, '{4}+', :possessive, :interval, 4, 4
|
38
|
-
include_examples 'quantifier', /a{004}+b/, '{004}+', :possessive, :interval, 4, 4
|
39
|
-
|
40
|
-
specify('mode-checking methods') do
|
41
|
-
exp = RP.parse(/a??/).first
|
42
|
-
|
43
|
-
expect(exp).to be_reluctant
|
44
|
-
expect(exp).to be_lazy
|
45
|
-
expect(exp).not_to be_greedy
|
46
|
-
expect(exp).not_to be_possessive
|
47
|
-
expect(exp.quantifier).to be_reluctant
|
48
|
-
expect(exp.quantifier).to be_lazy
|
49
|
-
expect(exp.quantifier).not_to be_greedy
|
50
|
-
expect(exp.quantifier).not_to be_possessive
|
51
|
-
end
|
52
|
-
end
|
@@ -1,112 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('Refcall parsing') do
|
4
|
-
include_examples 'parse', /(abc)\1/,
|
5
|
-
1 => [:backref, :number, Backreference::Number, number: 1]
|
6
|
-
|
7
|
-
include_examples 'parse', /(?<X>abc)\k<X>/,
|
8
|
-
1 => [:backref, :name_ref, Backreference::Name, name: 'X']
|
9
|
-
include_examples 'parse', /(?<X>abc)\k'X'/,
|
10
|
-
1 => [:backref, :name_ref, Backreference::Name, name: 'X']
|
11
|
-
|
12
|
-
include_examples 'parse', /(abc)\k<1>/,
|
13
|
-
1 => [:backref, :number_ref, Backreference::Number, number: 1]
|
14
|
-
include_examples 'parse', /(abc)\k'1'/,
|
15
|
-
1 => [:backref, :number_ref, Backreference::Number, number: 1]
|
16
|
-
|
17
|
-
include_examples 'parse', /(abc)\k<-1>/,
|
18
|
-
1 => [:backref, :number_rel_ref, Backreference::NumberRelative, number: -1]
|
19
|
-
include_examples 'parse', /(abc)\k'-1'/,
|
20
|
-
1 => [:backref, :number_rel_ref, Backreference::NumberRelative, number: -1]
|
21
|
-
|
22
|
-
include_examples 'parse', /(?<X>abc)\g<X>/,
|
23
|
-
1 => [:backref, :name_call, Backreference::NameCall, name: 'X']
|
24
|
-
include_examples 'parse', /(?<X>abc)\g'X'/,
|
25
|
-
1 => [:backref, :name_call, Backreference::NameCall, name: 'X']
|
26
|
-
|
27
|
-
include_examples 'parse', /(abc)\g<1>/,
|
28
|
-
1 => [:backref, :number_call, Backreference::NumberCall, number: 1]
|
29
|
-
include_examples 'parse', /(abc)\g'1'/,
|
30
|
-
1 => [:backref, :number_call, Backreference::NumberCall, number: 1]
|
31
|
-
|
32
|
-
include_examples 'parse', /(abc)\g<-1>/,
|
33
|
-
1 => [:backref, :number_rel_call, Backreference::NumberCallRelative, number: -1]
|
34
|
-
include_examples 'parse', /(abc)\g'-1'/,
|
35
|
-
1 => [:backref, :number_rel_call, Backreference::NumberCallRelative, number: -1]
|
36
|
-
|
37
|
-
include_examples 'parse', /\g<+1>(abc)/,
|
38
|
-
0 => [:backref, :number_rel_call, Backreference::NumberCallRelative, number: 1]
|
39
|
-
include_examples 'parse', /\g'+1'(abc)/,
|
40
|
-
0 => [:backref, :number_rel_call, Backreference::NumberCallRelative, number: 1]
|
41
|
-
|
42
|
-
include_examples 'parse', /(?<X>abc)\k<X-0>/,
|
43
|
-
1 => [:backref, :name_recursion_ref, Backreference::NameRecursionLevel,
|
44
|
-
name: 'X', recursion_level: 0]
|
45
|
-
include_examples 'parse', /(?<X>abc)\k'X-0'/,
|
46
|
-
1 => [:backref, :name_recursion_ref, Backreference::NameRecursionLevel,
|
47
|
-
name: 'X', recursion_level: 0]
|
48
|
-
|
49
|
-
include_examples 'parse', /(abc)\k<1-0>/,
|
50
|
-
1 => [:backref, :number_recursion_ref, Backreference::NumberRecursionLevel,
|
51
|
-
number: 1, recursion_level: 0]
|
52
|
-
include_examples 'parse', /(abc)\k'1-0'/,
|
53
|
-
1 => [:backref, :number_recursion_ref, Backreference::NumberRecursionLevel,
|
54
|
-
number: 1, recursion_level: 0]
|
55
|
-
include_examples 'parse', /(abc)\k'-1+0'/,
|
56
|
-
1 => [:backref, :number_recursion_ref, Backreference::NumberRecursionLevel,
|
57
|
-
number: -1, recursion_level: 0]
|
58
|
-
include_examples 'parse', /(abc)\k'1+1'/,
|
59
|
-
1 => [:backref, :number_recursion_ref, Backreference::NumberRecursionLevel,
|
60
|
-
number: 1, recursion_level: 1]
|
61
|
-
include_examples 'parse', /(abc)\k'1-1'/,
|
62
|
-
1 => [:backref, :number_recursion_ref, Backreference::NumberRecursionLevel,
|
63
|
-
number: 1, recursion_level: -1]
|
64
|
-
|
65
|
-
specify('parse backref effective_number') do
|
66
|
-
root = RP.parse('(abc)(def)\\k<-1>(ghi)\\k<-3>\\k<-1>', 'ruby/1.9')
|
67
|
-
exp1 = root[2]
|
68
|
-
exp2 = root[4]
|
69
|
-
exp3 = root[5]
|
70
|
-
|
71
|
-
expect([exp1, exp2, exp3]).to all be_instance_of(Backreference::NumberRelative)
|
72
|
-
expect(exp1.effective_number).to eq 2
|
73
|
-
expect(exp2.effective_number).to eq 1
|
74
|
-
expect(exp3.effective_number).to eq 3
|
75
|
-
end
|
76
|
-
|
77
|
-
specify('parse backref referenced_expression') do
|
78
|
-
root = RP.parse('(abc)(def)\\k<-1>(ghi)\\k<-3>\\k<-1>', 'ruby/1.9')
|
79
|
-
exp1 = root[2]
|
80
|
-
exp2 = root[4]
|
81
|
-
exp3 = root[5]
|
82
|
-
|
83
|
-
expect([exp1, exp2, exp3]).to all be_instance_of(Backreference::NumberRelative)
|
84
|
-
expect(exp1.referenced_expression.to_s).to eq '(def)'
|
85
|
-
expect(exp2.referenced_expression.to_s).to eq '(abc)'
|
86
|
-
expect(exp3.referenced_expression.to_s).to eq '(ghi)'
|
87
|
-
end
|
88
|
-
|
89
|
-
specify('parse backref call effective_number') do
|
90
|
-
root = RP.parse('\\g<+1>(abc)\\g<+2>(def)(ghi)\\g<-2>', 'ruby/1.9')
|
91
|
-
exp1 = root[0]
|
92
|
-
exp2 = root[2]
|
93
|
-
exp3 = root[5]
|
94
|
-
|
95
|
-
expect([exp1, exp2, exp3]).to all be_instance_of(Backreference::NumberCallRelative)
|
96
|
-
expect(exp1.effective_number).to eq 1
|
97
|
-
expect(exp2.effective_number).to eq 3
|
98
|
-
expect(exp3.effective_number).to eq 2
|
99
|
-
end
|
100
|
-
|
101
|
-
specify('parse backref call referenced_expression') do
|
102
|
-
root = RP.parse('\\g<+1>(abc)\\g<+2>(def)(ghi)\\g<-2>', 'ruby/1.9')
|
103
|
-
exp1 = root[0]
|
104
|
-
exp2 = root[2]
|
105
|
-
exp3 = root[5]
|
106
|
-
|
107
|
-
expect([exp1, exp2, exp3]).to all be_instance_of(Backreference::NumberCallRelative)
|
108
|
-
expect(exp1.referenced_expression.to_s).to eq '(abc)'
|
109
|
-
expect(exp2.referenced_expression.to_s).to eq '(ghi)'
|
110
|
-
expect(exp3.referenced_expression.to_s).to eq '(def)'
|
111
|
-
end
|
112
|
-
end
|
@@ -1,127 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
# edge cases with `...-&&...` and `...&&-...` are checked in test_ranges.rb
|
4
|
-
|
5
|
-
RSpec.describe('CharacterSet::Intersection parsing') do
|
6
|
-
specify('parse set intersection') do
|
7
|
-
root = RP.parse('[a&&z]')
|
8
|
-
set = root[0]
|
9
|
-
ints = set[0]
|
10
|
-
|
11
|
-
expect(set.count).to eq 1
|
12
|
-
expect(ints).to be_instance_of(CharacterSet::Intersection)
|
13
|
-
expect(ints.count).to eq 2
|
14
|
-
|
15
|
-
seq1, seq2 = ints.expressions
|
16
|
-
expect(seq1).to be_instance_of(CharacterSet::IntersectedSequence)
|
17
|
-
expect(seq1.count).to eq 1
|
18
|
-
expect(seq1.first.to_s).to eq 'a'
|
19
|
-
expect(seq1.first).to be_instance_of(Literal)
|
20
|
-
expect(seq2).to be_instance_of(CharacterSet::IntersectedSequence)
|
21
|
-
expect(seq2.count).to eq 1
|
22
|
-
expect(seq2.first.to_s).to eq 'z'
|
23
|
-
expect(seq2.first).to be_instance_of(Literal)
|
24
|
-
|
25
|
-
expect(set).not_to match 'a'
|
26
|
-
expect(set).not_to match '&'
|
27
|
-
expect(set).not_to match 'z'
|
28
|
-
end
|
29
|
-
|
30
|
-
specify('parse set intersection range and subset') do
|
31
|
-
root = RP.parse('[a-z&&[^a]]')
|
32
|
-
set = root[0]
|
33
|
-
ints = set[0]
|
34
|
-
|
35
|
-
expect(set.count).to eq 1
|
36
|
-
expect(ints).to be_instance_of(CharacterSet::Intersection)
|
37
|
-
expect(ints.count).to eq 2
|
38
|
-
|
39
|
-
seq1, seq2 = ints.expressions
|
40
|
-
expect(seq1).to be_instance_of(CharacterSet::IntersectedSequence)
|
41
|
-
expect(seq1.count).to eq 1
|
42
|
-
expect(seq1.first.to_s).to eq 'a-z'
|
43
|
-
expect(seq1.first).to be_instance_of(CharacterSet::Range)
|
44
|
-
expect(seq2).to be_instance_of(CharacterSet::IntersectedSequence)
|
45
|
-
expect(seq2.count).to eq 1
|
46
|
-
expect(seq2.first.to_s).to eq '[^a]'
|
47
|
-
expect(seq2.first).to be_instance_of(CharacterSet)
|
48
|
-
|
49
|
-
expect(set).not_to match 'a'
|
50
|
-
expect(set).not_to match '&'
|
51
|
-
expect(set).to match 'b'
|
52
|
-
end
|
53
|
-
|
54
|
-
specify('parse set intersection trailing range') do
|
55
|
-
root = RP.parse('[a&&a-z]')
|
56
|
-
set = root[0]
|
57
|
-
ints = set[0]
|
58
|
-
|
59
|
-
expect(set.count).to eq 1
|
60
|
-
expect(ints).to be_instance_of(CharacterSet::Intersection)
|
61
|
-
expect(ints.count).to eq 2
|
62
|
-
|
63
|
-
seq1, seq2 = ints.expressions
|
64
|
-
expect(seq1).to be_instance_of(CharacterSet::IntersectedSequence)
|
65
|
-
expect(seq1.count).to eq 1
|
66
|
-
expect(seq1.first.to_s).to eq 'a'
|
67
|
-
expect(seq1.first).to be_instance_of(Literal)
|
68
|
-
expect(seq2).to be_instance_of(CharacterSet::IntersectedSequence)
|
69
|
-
expect(seq2.count).to eq 1
|
70
|
-
expect(seq2.first.to_s).to eq 'a-z'
|
71
|
-
expect(seq2.first).to be_instance_of(CharacterSet::Range)
|
72
|
-
|
73
|
-
expect(set).to match 'a'
|
74
|
-
expect(set).not_to match '&'
|
75
|
-
expect(set).not_to match 'b'
|
76
|
-
end
|
77
|
-
|
78
|
-
specify('parse set intersection type') do
|
79
|
-
root = RP.parse('[a&&\\w]')
|
80
|
-
set = root[0]
|
81
|
-
ints = set[0]
|
82
|
-
|
83
|
-
expect(set.count).to eq 1
|
84
|
-
expect(ints).to be_instance_of(CharacterSet::Intersection)
|
85
|
-
expect(ints.count).to eq 2
|
86
|
-
|
87
|
-
seq1, seq2 = ints.expressions
|
88
|
-
expect(seq1).to be_instance_of(CharacterSet::IntersectedSequence)
|
89
|
-
expect(seq1.count).to eq 1
|
90
|
-
expect(seq1.first.to_s).to eq 'a'
|
91
|
-
expect(seq1.first).to be_instance_of(Literal)
|
92
|
-
expect(seq2).to be_instance_of(CharacterSet::IntersectedSequence)
|
93
|
-
expect(seq2.count).to eq 1
|
94
|
-
expect(seq2.first.to_s).to eq '\\w'
|
95
|
-
expect(seq2.first).to be_instance_of(CharacterType::Word)
|
96
|
-
|
97
|
-
expect(set).to match 'a'
|
98
|
-
expect(set).not_to match '&'
|
99
|
-
expect(set).not_to match 'b'
|
100
|
-
end
|
101
|
-
|
102
|
-
specify('parse set intersection multipart') do
|
103
|
-
root = RP.parse('[\\h&&\\w&&efg]')
|
104
|
-
set = root[0]
|
105
|
-
ints = set[0]
|
106
|
-
|
107
|
-
expect(set.count).to eq 1
|
108
|
-
expect(ints).to be_instance_of(CharacterSet::Intersection)
|
109
|
-
expect(ints.count).to eq 3
|
110
|
-
|
111
|
-
seq1, seq2, seq3 = ints.expressions
|
112
|
-
expect(seq1).to be_instance_of(CharacterSet::IntersectedSequence)
|
113
|
-
expect(seq1.count).to eq 1
|
114
|
-
expect(seq1.first.to_s).to eq '\\h'
|
115
|
-
expect(seq2).to be_instance_of(CharacterSet::IntersectedSequence)
|
116
|
-
expect(seq2.count).to eq 1
|
117
|
-
expect(seq2.first.to_s).to eq '\\w'
|
118
|
-
expect(seq3).to be_instance_of(CharacterSet::IntersectedSequence)
|
119
|
-
expect(seq3.count).to eq 3
|
120
|
-
expect(seq3.to_s).to eq 'efg'
|
121
|
-
|
122
|
-
expect(set).to match 'e'
|
123
|
-
expect(set).to match 'f'
|
124
|
-
expect(set).not_to match 'a'
|
125
|
-
expect(set).not_to match 'g'
|
126
|
-
end
|
127
|
-
end
|
@@ -1,111 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('CharacterSet::Range parsing') do
|
4
|
-
specify('parse set range') do
|
5
|
-
root = RP.parse('[a-z]')
|
6
|
-
set = root[0]
|
7
|
-
range = set[0]
|
8
|
-
|
9
|
-
expect(set.count).to eq 1
|
10
|
-
expect(range).to be_instance_of(CharacterSet::Range)
|
11
|
-
expect(range.count).to eq 2
|
12
|
-
expect(range.first.to_s).to eq 'a'
|
13
|
-
expect(range.first).to be_instance_of(Literal)
|
14
|
-
expect(range.last.to_s).to eq 'z'
|
15
|
-
expect(range.last).to be_instance_of(Literal)
|
16
|
-
expect(set).to match 'm'
|
17
|
-
end
|
18
|
-
|
19
|
-
specify('parse set range hex') do
|
20
|
-
root = RP.parse('[\\x00-\\x99]')
|
21
|
-
set = root[0]
|
22
|
-
range = set[0]
|
23
|
-
|
24
|
-
expect(set.count).to eq 1
|
25
|
-
expect(range).to be_instance_of(CharacterSet::Range)
|
26
|
-
expect(range.count).to eq 2
|
27
|
-
expect(range.first.to_s).to eq '\\x00'
|
28
|
-
expect(range.first).to be_instance_of(EscapeSequence::Hex)
|
29
|
-
expect(range.last.to_s).to eq '\\x99'
|
30
|
-
expect(range.last).to be_instance_of(EscapeSequence::Hex)
|
31
|
-
expect(set).to match '\\x50'
|
32
|
-
end
|
33
|
-
|
34
|
-
specify('parse set range unicode') do
|
35
|
-
root = RP.parse('[\\u{40 42}-\\u1234]')
|
36
|
-
set = root[0]
|
37
|
-
range = set[0]
|
38
|
-
|
39
|
-
expect(set.count).to eq 1
|
40
|
-
expect(range).to be_instance_of(CharacterSet::Range)
|
41
|
-
expect(range.count).to eq 2
|
42
|
-
expect(range.first.to_s).to eq '\\u{40 42}'
|
43
|
-
expect(range.first).to be_instance_of(EscapeSequence::CodepointList)
|
44
|
-
expect(range.last.to_s).to eq '\\u1234'
|
45
|
-
expect(range.last).to be_instance_of(EscapeSequence::Codepoint)
|
46
|
-
expect(set).to match '\\u600'
|
47
|
-
end
|
48
|
-
|
49
|
-
specify('parse set range edge case leading dash') do
|
50
|
-
root = RP.parse('[--z]')
|
51
|
-
set = root[0]
|
52
|
-
range = set[0]
|
53
|
-
|
54
|
-
expect(set.count).to eq 1
|
55
|
-
expect(range.count).to eq 2
|
56
|
-
expect(set).to match 'a'
|
57
|
-
end
|
58
|
-
|
59
|
-
specify('parse set range edge case trailing dash') do
|
60
|
-
root = RP.parse('[!--]')
|
61
|
-
set = root[0]
|
62
|
-
range = set[0]
|
63
|
-
|
64
|
-
expect(set.count).to eq 1
|
65
|
-
expect(range.count).to eq 2
|
66
|
-
expect(set).to match '$'
|
67
|
-
end
|
68
|
-
|
69
|
-
specify('parse set range edge case leading negate') do
|
70
|
-
root = RP.parse('[^-z]')
|
71
|
-
set = root[0]
|
72
|
-
|
73
|
-
expect(set.count).to eq 2
|
74
|
-
expect(set).to match 'a'
|
75
|
-
expect(set).not_to match 'z'
|
76
|
-
end
|
77
|
-
|
78
|
-
specify('parse set range edge case trailing negate') do
|
79
|
-
root = RP.parse('[!-^]')
|
80
|
-
set = root[0]
|
81
|
-
range = set[0]
|
82
|
-
|
83
|
-
expect(set.count).to eq 1
|
84
|
-
expect(range.count).to eq 2
|
85
|
-
expect(set).to match '$'
|
86
|
-
end
|
87
|
-
|
88
|
-
specify('parse set range edge case leading intersection') do
|
89
|
-
root = RP.parse('[[\\-ab]&&-bc]')
|
90
|
-
set = root[0]
|
91
|
-
|
92
|
-
expect(set.count).to eq 1
|
93
|
-
expect(set.first.last.to_s).to eq '-bc'
|
94
|
-
expect(set).to match '-'
|
95
|
-
expect(set).to match 'b'
|
96
|
-
expect(set).not_to match 'a'
|
97
|
-
expect(set).not_to match 'c'
|
98
|
-
end
|
99
|
-
|
100
|
-
specify('parse set range edge case trailing intersection') do
|
101
|
-
root = RP.parse('[bc-&&[\\-ab]]')
|
102
|
-
set = root[0]
|
103
|
-
|
104
|
-
expect(set.count).to eq 1
|
105
|
-
expect(set.first.first.to_s).to eq 'bc-'
|
106
|
-
expect(set).to match '-'
|
107
|
-
expect(set).to match 'b'
|
108
|
-
expect(set).not_to match 'a'
|
109
|
-
expect(set).not_to match 'c'
|
110
|
-
end
|
111
|
-
end
|
data/spec/parser/sets_spec.rb
DELETED
@@ -1,178 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe('CharacterSet parsing') do
|
4
|
-
specify('parse set basic') do
|
5
|
-
root = RP.parse('[ab]+')
|
6
|
-
exp = root[0]
|
7
|
-
|
8
|
-
expect(exp).to be_instance_of(CharacterSet)
|
9
|
-
expect(exp.count).to eq 2
|
10
|
-
|
11
|
-
expect(exp[0]).to be_instance_of(Literal)
|
12
|
-
expect(exp[0].text).to eq 'a'
|
13
|
-
expect(exp[1]).to be_instance_of(Literal)
|
14
|
-
expect(exp[1].text).to eq 'b'
|
15
|
-
|
16
|
-
expect(exp).to be_quantified
|
17
|
-
expect(exp.quantifier.min).to eq 1
|
18
|
-
expect(exp.quantifier.max).to eq(-1)
|
19
|
-
end
|
20
|
-
|
21
|
-
specify('parse set char type') do
|
22
|
-
root = RP.parse('[a\\dc]')
|
23
|
-
exp = root[0]
|
24
|
-
|
25
|
-
expect(exp).to be_instance_of(CharacterSet)
|
26
|
-
expect(exp.count).to eq 3
|
27
|
-
|
28
|
-
expect(exp[1]).to be_instance_of(CharacterType::Digit)
|
29
|
-
expect(exp[1].text).to eq '\\d'
|
30
|
-
end
|
31
|
-
|
32
|
-
specify('parse set escape sequence backspace') do
|
33
|
-
root = RP.parse('[a\\bc]')
|
34
|
-
exp = root[0]
|
35
|
-
|
36
|
-
expect(exp).to be_instance_of(CharacterSet)
|
37
|
-
expect(exp.count).to eq 3
|
38
|
-
|
39
|
-
expect(exp[1]).to be_instance_of(EscapeSequence::Backspace)
|
40
|
-
expect(exp[1].text).to eq '\\b'
|
41
|
-
|
42
|
-
expect(exp).to match 'a'
|
43
|
-
expect(exp).to match "\b"
|
44
|
-
expect(exp).not_to match 'b'
|
45
|
-
expect(exp).to match 'c'
|
46
|
-
end
|
47
|
-
|
48
|
-
specify('parse set escape sequence hex') do
|
49
|
-
root = RP.parse('[a\\x20c]', :any)
|
50
|
-
exp = root[0]
|
51
|
-
|
52
|
-
expect(exp).to be_instance_of(CharacterSet)
|
53
|
-
expect(exp.count).to eq 3
|
54
|
-
|
55
|
-
expect(exp[1]).to be_instance_of(EscapeSequence::Hex)
|
56
|
-
expect(exp[1].text).to eq '\\x20'
|
57
|
-
end
|
58
|
-
|
59
|
-
specify('parse set escape sequence codepoint') do
|
60
|
-
root = RP.parse('[a\\u0640]')
|
61
|
-
exp = root[0]
|
62
|
-
|
63
|
-
expect(exp).to be_instance_of(CharacterSet)
|
64
|
-
expect(exp.count).to eq 2
|
65
|
-
|
66
|
-
expect(exp[1]).to be_instance_of(EscapeSequence::Codepoint)
|
67
|
-
expect(exp[1].text).to eq '\\u0640'
|
68
|
-
end
|
69
|
-
|
70
|
-
specify('parse set escape sequence codepoint list') do
|
71
|
-
root = RP.parse('[a\\u{41 1F60D}]')
|
72
|
-
exp = root[0]
|
73
|
-
|
74
|
-
expect(exp).to be_instance_of(CharacterSet)
|
75
|
-
expect(exp.count).to eq 2
|
76
|
-
|
77
|
-
expect(exp[1]).to be_instance_of(EscapeSequence::CodepointList)
|
78
|
-
expect(exp[1].text).to eq '\\u{41 1F60D}'
|
79
|
-
end
|
80
|
-
|
81
|
-
specify('parse set posix class') do
|
82
|
-
root = RP.parse('[[:digit:][:^lower:]]+')
|
83
|
-
exp = root[0]
|
84
|
-
|
85
|
-
expect(exp).to be_instance_of(CharacterSet)
|
86
|
-
expect(exp.count).to eq 2
|
87
|
-
|
88
|
-
expect(exp[0]).to be_instance_of(PosixClass)
|
89
|
-
expect(exp[0].text).to eq '[:digit:]'
|
90
|
-
expect(exp[1]).to be_instance_of(PosixClass)
|
91
|
-
expect(exp[1].text).to eq '[:^lower:]'
|
92
|
-
end
|
93
|
-
|
94
|
-
specify('parse set nesting') do
|
95
|
-
root = RP.parse('[a[b[c]d]e]')
|
96
|
-
|
97
|
-
exp = root[0]
|
98
|
-
expect(exp).to be_instance_of(CharacterSet)
|
99
|
-
expect(exp.count).to eq 3
|
100
|
-
expect(exp[0]).to be_instance_of(Literal)
|
101
|
-
expect(exp[2]).to be_instance_of(Literal)
|
102
|
-
|
103
|
-
subset1 = exp[1]
|
104
|
-
expect(subset1).to be_instance_of(CharacterSet)
|
105
|
-
expect(subset1.count).to eq 3
|
106
|
-
expect(subset1[0]).to be_instance_of(Literal)
|
107
|
-
expect(subset1[2]).to be_instance_of(Literal)
|
108
|
-
|
109
|
-
subset2 = subset1[1]
|
110
|
-
expect(subset2).to be_instance_of(CharacterSet)
|
111
|
-
expect(subset2.count).to eq 1
|
112
|
-
expect(subset2[0]).to be_instance_of(Literal)
|
113
|
-
end
|
114
|
-
|
115
|
-
specify('parse set nesting negative') do
|
116
|
-
root = RP.parse('[a[^b[c]]]')
|
117
|
-
exp = root[0]
|
118
|
-
|
119
|
-
expect(exp).to be_instance_of(CharacterSet)
|
120
|
-
expect(exp.count).to eq 2
|
121
|
-
expect(exp[0]).to be_instance_of(Literal)
|
122
|
-
expect(exp).not_to be_negative
|
123
|
-
|
124
|
-
subset1 = exp[1]
|
125
|
-
expect(subset1).to be_instance_of(CharacterSet)
|
126
|
-
expect(subset1.count).to eq 2
|
127
|
-
expect(subset1[0]).to be_instance_of(Literal)
|
128
|
-
expect(subset1).to be_negative
|
129
|
-
|
130
|
-
subset2 = subset1[1]
|
131
|
-
expect(subset2).to be_instance_of(CharacterSet)
|
132
|
-
expect(subset2.count).to eq 1
|
133
|
-
expect(subset2[0]).to be_instance_of(Literal)
|
134
|
-
expect(subset2).not_to be_negative
|
135
|
-
end
|
136
|
-
|
137
|
-
specify('parse set nesting #to_s') do
|
138
|
-
pattern = '[a[b[^c]]]'
|
139
|
-
root = RP.parse(pattern)
|
140
|
-
|
141
|
-
expect(root.to_s).to eq pattern
|
142
|
-
end
|
143
|
-
|
144
|
-
specify('parse set literals are not merged') do
|
145
|
-
root = RP.parse("[#{('a' * 10)}]")
|
146
|
-
exp = root[0]
|
147
|
-
|
148
|
-
expect(exp.count).to eq 10
|
149
|
-
end
|
150
|
-
|
151
|
-
specify('parse set whitespace is not merged') do
|
152
|
-
root = RP.parse("[#{(' ' * 10)}]")
|
153
|
-
exp = root[0]
|
154
|
-
|
155
|
-
expect(exp.count).to eq 10
|
156
|
-
end
|
157
|
-
|
158
|
-
specify('parse set whitespace is not merged in x mode') do
|
159
|
-
root = RP.parse("(?x)[#{(' ' * 10)}]")
|
160
|
-
exp = root[1]
|
161
|
-
|
162
|
-
expect(exp.count).to eq 10
|
163
|
-
end
|
164
|
-
|
165
|
-
specify('parse set collating sequence') do
|
166
|
-
root = RP.parse('[a[.span-ll.]h]', :any)
|
167
|
-
exp = root[0]
|
168
|
-
|
169
|
-
expect(exp[1].to_s).to eq '[.span-ll.]'
|
170
|
-
end
|
171
|
-
|
172
|
-
specify('parse set character equivalents') do
|
173
|
-
root = RP.parse('[a[=e=]h]', :any)
|
174
|
-
exp = root[0]
|
175
|
-
|
176
|
-
expect(exp[1].to_s).to eq '[=e=]'
|
177
|
-
end
|
178
|
-
end
|