regexp_parser 1.3.0 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +53 -1
- data/Gemfile +3 -3
- data/README.md +10 -14
- data/Rakefile +3 -4
- data/lib/regexp_parser/expression.rb +28 -53
- data/lib/regexp_parser/expression/classes/backref.rb +18 -10
- data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
- data/lib/regexp_parser/expression/classes/escape.rb +0 -4
- data/lib/regexp_parser/expression/classes/group.rb +4 -2
- data/lib/regexp_parser/expression/classes/keep.rb +1 -3
- data/lib/regexp_parser/expression/methods/match.rb +13 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
- data/lib/regexp_parser/expression/methods/options.rb +35 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
- data/lib/regexp_parser/expression/methods/tests.rb +6 -15
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -6
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
- data/lib/regexp_parser/expression/subexpression.rb +3 -5
- data/lib/regexp_parser/lexer.rb +30 -44
- data/lib/regexp_parser/parser.rb +47 -24
- data/lib/regexp_parser/scanner.rb +1159 -1329
- data/lib/regexp_parser/scanner/char_type.rl +0 -3
- data/lib/regexp_parser/scanner/properties/long.yml +34 -1
- data/lib/regexp_parser/scanner/properties/short.yml +12 -0
- data/lib/regexp_parser/scanner/scanner.rl +82 -190
- data/lib/regexp_parser/syntax/tokens.rb +2 -10
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +72 -21
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +3 -3
- data/spec/expression/base_spec.rb +94 -0
- data/spec/expression/clone_spec.rb +120 -0
- data/spec/expression/conditional_spec.rb +89 -0
- data/spec/expression/free_space_spec.rb +27 -0
- data/spec/expression/methods/match_length_spec.rb +154 -0
- data/spec/expression/methods/match_spec.rb +25 -0
- data/spec/expression/methods/strfregexp_spec.rb +224 -0
- data/spec/expression/methods/tests_spec.rb +99 -0
- data/spec/expression/methods/traverse_spec.rb +140 -0
- data/spec/expression/options_spec.rb +128 -0
- data/spec/expression/root_spec.rb +9 -0
- data/spec/expression/sequence_spec.rb +9 -0
- data/spec/expression/subexpression_spec.rb +50 -0
- data/spec/expression/to_h_spec.rb +26 -0
- data/spec/expression/to_s_spec.rb +100 -0
- data/spec/lexer/all_spec.rb +22 -0
- data/spec/lexer/conditionals_spec.rb +53 -0
- data/spec/lexer/escapes_spec.rb +14 -0
- data/spec/lexer/keep_spec.rb +10 -0
- data/spec/lexer/literals_spec.rb +89 -0
- data/spec/lexer/nesting_spec.rb +99 -0
- data/spec/lexer/refcalls_spec.rb +55 -0
- data/spec/parser/all_spec.rb +43 -0
- data/spec/parser/alternation_spec.rb +88 -0
- data/spec/parser/anchors_spec.rb +17 -0
- data/spec/parser/conditionals_spec.rb +179 -0
- data/spec/parser/errors_spec.rb +30 -0
- data/spec/parser/escapes_spec.rb +121 -0
- data/spec/parser/free_space_spec.rb +130 -0
- data/spec/parser/groups_spec.rb +108 -0
- data/spec/parser/keep_spec.rb +6 -0
- data/spec/parser/posix_classes_spec.rb +8 -0
- data/spec/parser/properties_spec.rb +115 -0
- data/spec/parser/quantifiers_spec.rb +51 -0
- data/spec/parser/refcalls_spec.rb +112 -0
- data/spec/parser/set/intersections_spec.rb +127 -0
- data/spec/parser/set/ranges_spec.rb +111 -0
- data/spec/parser/sets_spec.rb +178 -0
- data/spec/parser/types_spec.rb +18 -0
- data/spec/scanner/all_spec.rb +18 -0
- data/spec/scanner/anchors_spec.rb +21 -0
- data/spec/scanner/conditionals_spec.rb +128 -0
- data/spec/scanner/errors_spec.rb +68 -0
- data/spec/scanner/escapes_spec.rb +53 -0
- data/spec/scanner/free_space_spec.rb +133 -0
- data/spec/scanner/groups_spec.rb +52 -0
- data/spec/scanner/keep_spec.rb +10 -0
- data/spec/scanner/literals_spec.rb +49 -0
- data/spec/scanner/meta_spec.rb +18 -0
- data/spec/scanner/properties_spec.rb +64 -0
- data/spec/scanner/quantifiers_spec.rb +20 -0
- data/spec/scanner/refcalls_spec.rb +36 -0
- data/spec/scanner/sets_spec.rb +102 -0
- data/spec/scanner/types_spec.rb +14 -0
- data/spec/spec_helper.rb +15 -0
- data/{test → spec}/support/runner.rb +9 -8
- data/spec/support/shared_examples.rb +77 -0
- data/{test → spec}/support/warning_extractor.rb +5 -7
- data/spec/syntax/syntax_spec.rb +48 -0
- data/spec/syntax/syntax_token_map_spec.rb +23 -0
- data/spec/syntax/versions/1.8.6_spec.rb +17 -0
- data/spec/syntax/versions/1.9.1_spec.rb +10 -0
- data/spec/syntax/versions/1.9.3_spec.rb +9 -0
- data/spec/syntax/versions/2.0.0_spec.rb +13 -0
- data/spec/syntax/versions/2.2.0_spec.rb +9 -0
- data/spec/syntax/versions/aliases_spec.rb +37 -0
- data/spec/token/token_spec.rb +85 -0
- metadata +144 -143
- data/test/expression/test_all.rb +0 -12
- data/test/expression/test_base.rb +0 -90
- data/test/expression/test_clone.rb +0 -89
- data/test/expression/test_conditionals.rb +0 -113
- data/test/expression/test_free_space.rb +0 -35
- data/test/expression/test_set.rb +0 -84
- data/test/expression/test_strfregexp.rb +0 -230
- data/test/expression/test_subexpression.rb +0 -58
- data/test/expression/test_tests.rb +0 -99
- data/test/expression/test_to_h.rb +0 -59
- data/test/expression/test_to_s.rb +0 -104
- data/test/expression/test_traverse.rb +0 -161
- data/test/helpers.rb +0 -10
- data/test/lexer/test_all.rb +0 -41
- data/test/lexer/test_conditionals.rb +0 -127
- data/test/lexer/test_keep.rb +0 -24
- data/test/lexer/test_literals.rb +0 -130
- data/test/lexer/test_nesting.rb +0 -132
- data/test/lexer/test_refcalls.rb +0 -56
- data/test/parser/set/test_intersections.rb +0 -127
- data/test/parser/set/test_ranges.rb +0 -111
- data/test/parser/test_all.rb +0 -64
- data/test/parser/test_alternation.rb +0 -92
- data/test/parser/test_anchors.rb +0 -34
- data/test/parser/test_conditionals.rb +0 -187
- data/test/parser/test_errors.rb +0 -63
- data/test/parser/test_escapes.rb +0 -134
- data/test/parser/test_free_space.rb +0 -139
- data/test/parser/test_groups.rb +0 -289
- data/test/parser/test_keep.rb +0 -21
- data/test/parser/test_posix_classes.rb +0 -27
- data/test/parser/test_properties.rb +0 -133
- data/test/parser/test_quantifiers.rb +0 -301
- data/test/parser/test_refcalls.rb +0 -186
- data/test/parser/test_sets.rb +0 -179
- data/test/parser/test_types.rb +0 -50
- data/test/scanner/test_all.rb +0 -38
- data/test/scanner/test_anchors.rb +0 -38
- data/test/scanner/test_conditionals.rb +0 -184
- data/test/scanner/test_errors.rb +0 -91
- data/test/scanner/test_escapes.rb +0 -56
- data/test/scanner/test_free_space.rb +0 -200
- data/test/scanner/test_groups.rb +0 -79
- data/test/scanner/test_keep.rb +0 -35
- data/test/scanner/test_literals.rb +0 -89
- data/test/scanner/test_meta.rb +0 -40
- data/test/scanner/test_properties.rb +0 -312
- data/test/scanner/test_quantifiers.rb +0 -37
- data/test/scanner/test_refcalls.rb +0 -52
- data/test/scanner/test_scripts.rb +0 -53
- data/test/scanner/test_sets.rb +0 -119
- data/test/scanner/test_types.rb +0 -35
- data/test/scanner/test_unicode_blocks.rb +0 -30
- data/test/support/disable_autotest.rb +0 -8
- data/test/syntax/test_all.rb +0 -6
- data/test/syntax/test_syntax.rb +0 -61
- data/test/syntax/test_syntax_token_map.rb +0 -25
- data/test/syntax/versions/test_1.8.rb +0 -55
- data/test/syntax/versions/test_1.9.1.rb +0 -36
- data/test/syntax/versions/test_1.9.3.rb +0 -32
- data/test/syntax/versions/test_2.0.0.rb +0 -37
- data/test/syntax/versions/test_2.2.0.rb +0 -32
- data/test/syntax/versions/test_aliases.rb +0 -129
- data/test/syntax/versions/test_all.rb +0 -5
- data/test/test_all.rb +0 -5
- data/test/token/test_all.rb +0 -2
- data/test/token/test_token.rb +0 -107
data/test/helpers.rb
DELETED
data/test/lexer/test_all.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
require File.expand_path("../../helpers", __FILE__)
|
2
|
-
|
3
|
-
%w{
|
4
|
-
literals nesting refcalls
|
5
|
-
}.each do|tc|
|
6
|
-
require File.expand_path("../test_#{tc}", __FILE__)
|
7
|
-
end
|
8
|
-
|
9
|
-
if RUBY_VERSION >= '2.0.0'
|
10
|
-
%w{conditionals keep}.each do|tc|
|
11
|
-
require File.expand_path("../test_#{tc}", __FILE__)
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
class TestRegexpLexer < Test::Unit::TestCase
|
16
|
-
|
17
|
-
def test_lexer_returns_an_array
|
18
|
-
assert_instance_of Array, RL.lex('abc')
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_lexer_returns_tokens
|
22
|
-
tokens = RL.lex('^abc+[^one]{2,3}\b\d\\\C-C$')
|
23
|
-
|
24
|
-
assert tokens.all?{ |token| token.kind_of?(Regexp::Token) },
|
25
|
-
"Not all array members are tokens"
|
26
|
-
|
27
|
-
assert tokens.all?{ |token| token.to_a.length == 8 },
|
28
|
-
"Not all tokens have a length of 8"
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_lexer_token_count
|
32
|
-
tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
|
33
|
-
|
34
|
-
assert_equal 28, tokens.length
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_lexer_scan_alias
|
38
|
-
assert_equal RL.lex(/a|b|c/), RL.scan(/a|b|c/)
|
39
|
-
end
|
40
|
-
|
41
|
-
end
|
@@ -1,127 +0,0 @@
|
|
1
|
-
require File.expand_path("../../helpers", __FILE__)
|
2
|
-
|
3
|
-
class LexerConditionals < Test::Unit::TestCase
|
4
|
-
if RUBY_VERSION >= '2.0'
|
5
|
-
|
6
|
-
# Basic lexer output and nesting tests
|
7
|
-
tests = {
|
8
|
-
'(?<A>a)(?(<A>)b|c)' => [3, :conditional, :open, '(?', 7, 9, 0, 0, 0],
|
9
|
-
'(?<B>a)(?(<B>)b|c)' => [4, :conditional, :condition, '(<B>)', 9, 14, 0, 0, 1],
|
10
|
-
'(?<C>a)(?(<C>)b|c)' => [6, :conditional, :separator, '|', 15, 16, 0, 0, 1],
|
11
|
-
'(?<D>a)(?(<D>)b|c)' => [8, :conditional, :close, ')', 17, 18, 0, 0, 0],
|
12
|
-
}
|
13
|
-
|
14
|
-
tests.each_with_index do |(pattern, (index, type, token, text, ts, te, level, set_level, conditional_level)), count|
|
15
|
-
define_method "test_lexer_#{type}_#{token}_#{count}" do
|
16
|
-
tokens = RL.lex(pattern)
|
17
|
-
struct = tokens.at(index)
|
18
|
-
|
19
|
-
assert_equal type, struct.type
|
20
|
-
assert_equal token, struct.token
|
21
|
-
assert_equal text, struct.text
|
22
|
-
assert_equal ts, struct.ts
|
23
|
-
assert_equal te, struct.te
|
24
|
-
assert_equal level, struct.level
|
25
|
-
assert_equal set_level, struct.set_level
|
26
|
-
assert_equal conditional_level, struct.conditional_level
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_lexer_conditional_mixed_nesting
|
31
|
-
regexp = /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/
|
32
|
-
tokens = RL.lex(regexp)
|
33
|
-
|
34
|
-
[
|
35
|
-
[ 0, :group, :capture, '(', 0, 1, 0, 0, 0],
|
36
|
-
[ 1, :group, :named, '(?<A>', 1, 6, 1, 0, 0],
|
37
|
-
|
38
|
-
[ 5, :conditional, :open, '(?', 13, 15, 2, 0, 0],
|
39
|
-
[ 6, :conditional, :condition, '(<A>)', 15, 20, 2, 0, 1],
|
40
|
-
[ 8, :conditional, :separator, '|', 21, 22, 2, 0, 1],
|
41
|
-
|
42
|
-
[10, :conditional, :open, '(?', 23, 25, 3, 0, 1],
|
43
|
-
[11, :conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
|
44
|
-
|
45
|
-
[12, :set, :open, '[', 30, 31, 3, 0, 2],
|
46
|
-
[13, :literal, :literal, 'e', 31, 32, 3, 1, 2],
|
47
|
-
[14, :set, :range, '-', 32, 33, 3, 1, 2],
|
48
|
-
[15, :literal, :literal, 'g', 33, 34, 3, 1, 2],
|
49
|
-
[16, :set, :close, ']', 34, 35, 3, 0, 2],
|
50
|
-
|
51
|
-
[17, :conditional, :separator, '|', 35, 36, 3, 0, 2],
|
52
|
-
[23, :conditional, :close, ')', 41, 42, 3, 0, 1],
|
53
|
-
[25, :conditional, :close, ')', 43, 44, 2, 0, 0],
|
54
|
-
|
55
|
-
[26, :group, :close, ')', 44, 45, 1, 0, 0],
|
56
|
-
[27, :group, :close, ')', 45, 46, 0, 0, 0]
|
57
|
-
].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
|
58
|
-
struct = tokens.at(index)
|
59
|
-
|
60
|
-
assert_equal type, struct.type
|
61
|
-
assert_equal token, struct.token
|
62
|
-
assert_equal text, struct.text
|
63
|
-
assert_equal ts, struct.ts
|
64
|
-
assert_equal te, struct.te
|
65
|
-
assert_equal level, struct.level
|
66
|
-
assert_equal set_level, struct.set_level
|
67
|
-
assert_equal conditional_level, struct.conditional_level
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
def test_lexer_conditional_deep_nesting
|
72
|
-
regexp = /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/
|
73
|
-
tokens = RL.lex(regexp)
|
74
|
-
|
75
|
-
[
|
76
|
-
[ 9, :conditional, :open, '(?', 9, 11, 0, 0, 0],
|
77
|
-
[10, :conditional, :condition, '(1)', 11, 14, 0, 0, 1],
|
78
|
-
|
79
|
-
[11, :conditional, :open, '(?', 14, 16, 0, 0, 1],
|
80
|
-
[12, :conditional, :condition, '(2)', 16, 19, 0, 0, 2],
|
81
|
-
|
82
|
-
[13, :conditional, :open, '(?', 19, 21, 0, 0, 2],
|
83
|
-
[14, :conditional, :condition, '(3)', 21, 24, 0, 0, 3],
|
84
|
-
|
85
|
-
[16, :conditional, :separator, '|', 25, 26, 0, 0, 3],
|
86
|
-
|
87
|
-
[18, :conditional, :close, ')', 27, 28, 0, 0, 2],
|
88
|
-
[19, :conditional, :close, ')', 28, 29, 0, 0, 1],
|
89
|
-
|
90
|
-
[20, :conditional, :separator, '|', 29, 30, 0, 0, 1],
|
91
|
-
|
92
|
-
[21, :conditional, :open, '(?', 30, 32, 0, 0, 1],
|
93
|
-
[22, :conditional, :condition, '(3)', 32, 35, 0, 0, 2],
|
94
|
-
|
95
|
-
[23, :conditional, :open, '(?', 35, 37, 0, 0, 2],
|
96
|
-
[24, :conditional, :condition, '(2)', 37, 40, 0, 0, 3],
|
97
|
-
|
98
|
-
[26, :conditional, :separator, '|', 41, 42, 0, 0, 3],
|
99
|
-
|
100
|
-
[28, :conditional, :close, ')', 43, 44, 0, 0, 2],
|
101
|
-
|
102
|
-
[29, :conditional, :separator, '|', 44, 45, 0, 0, 2],
|
103
|
-
|
104
|
-
[30, :conditional, :open, '(?', 45, 47, 0, 0, 2],
|
105
|
-
[31, :conditional, :condition, '(1)', 47, 50, 0, 0, 3],
|
106
|
-
|
107
|
-
[33, :conditional, :separator, '|', 51, 52, 0, 0, 3],
|
108
|
-
|
109
|
-
[35, :conditional, :close, ')', 53, 54, 0, 0, 2],
|
110
|
-
[36, :conditional, :close, ')', 54, 55, 0, 0, 1],
|
111
|
-
[37, :conditional, :close, ')', 55, 56, 0, 0, 0]
|
112
|
-
].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
|
113
|
-
struct = tokens.at(index)
|
114
|
-
|
115
|
-
assert_equal type, struct.type
|
116
|
-
assert_equal token, struct.token
|
117
|
-
assert_equal text, struct.text
|
118
|
-
assert_equal ts, struct.ts
|
119
|
-
assert_equal te, struct.te
|
120
|
-
assert_equal level, struct.level
|
121
|
-
assert_equal set_level, struct.set_level
|
122
|
-
assert_equal conditional_level, struct.conditional_level
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
end # if RUBY_VERSION >= '2.0'
|
127
|
-
end
|
data/test/lexer/test_keep.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
require File.expand_path("../../helpers", __FILE__)
|
2
|
-
|
3
|
-
class LexerKeep < Test::Unit::TestCase
|
4
|
-
|
5
|
-
def test_lex_keep_token
|
6
|
-
regexp = /ab\Kcd/
|
7
|
-
tokens = RL.lex(regexp)
|
8
|
-
|
9
|
-
assert_equal :keep, tokens[1].type
|
10
|
-
assert_equal :mark, tokens[1].token
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_lex_keep_nested
|
14
|
-
regexp = /(a\Kb)|(c\\\Kd)ef/
|
15
|
-
tokens = RL.lex(regexp)
|
16
|
-
|
17
|
-
assert_equal :keep, tokens[2].type
|
18
|
-
assert_equal :mark, tokens[2].token
|
19
|
-
|
20
|
-
assert_equal :keep, tokens[9].type
|
21
|
-
assert_equal :mark, tokens[9].token
|
22
|
-
end
|
23
|
-
|
24
|
-
end
|
data/test/lexer/test_literals.rb
DELETED
@@ -1,130 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
|
3
|
-
require File.expand_path("../../helpers", __FILE__)
|
4
|
-
|
5
|
-
class LexerLiterals < Test::Unit::TestCase
|
6
|
-
|
7
|
-
tests = {
|
8
|
-
# ascii, single byte characters
|
9
|
-
'a' => {
|
10
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
11
|
-
},
|
12
|
-
|
13
|
-
'ab+' => {
|
14
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
15
|
-
1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
|
16
|
-
2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0],
|
17
|
-
},
|
18
|
-
|
19
|
-
|
20
|
-
# 2 byte wide characters, Arabic
|
21
|
-
'ا' => {
|
22
|
-
0 => [:literal, :literal, 'ا', 0, 2, 0, 0, 0],
|
23
|
-
},
|
24
|
-
|
25
|
-
'aاbبcت' => {
|
26
|
-
0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0],
|
27
|
-
},
|
28
|
-
|
29
|
-
'aاbبت?' => {
|
30
|
-
0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0, 0],
|
31
|
-
1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
|
32
|
-
2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
|
33
|
-
},
|
34
|
-
|
35
|
-
'aا?bبcت+' => {
|
36
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
37
|
-
1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
|
38
|
-
2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
|
39
|
-
3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
|
40
|
-
4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
|
41
|
-
5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0],
|
42
|
-
},
|
43
|
-
|
44
|
-
'a(اbب+)cت?' => {
|
45
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
46
|
-
1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
|
47
|
-
2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
|
48
|
-
3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
|
49
|
-
4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
|
50
|
-
5 => [:group, :close, ')', 8, 9, 0, 0, 0],
|
51
|
-
6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
|
52
|
-
7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
|
53
|
-
8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0],
|
54
|
-
},
|
55
|
-
|
56
|
-
|
57
|
-
# 3 byte wide characters, Japanese
|
58
|
-
'ab?れます+cd' => {
|
59
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
60
|
-
1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
|
61
|
-
2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
|
62
|
-
3 => [:literal, :literal, 'れま', 3, 9, 0, 0, 0],
|
63
|
-
4 => [:literal, :literal, 'す', 9, 12, 0, 0, 0],
|
64
|
-
5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0, 0],
|
65
|
-
6 => [:literal, :literal, 'cd', 13, 15, 0, 0, 0],
|
66
|
-
},
|
67
|
-
|
68
|
-
|
69
|
-
# 4 byte wide characters, Osmanya
|
70
|
-
'𐒀𐒁?𐒂ab+𐒃' => {
|
71
|
-
0 => [:literal, :literal, '𐒀', 0, 4, 0, 0, 0],
|
72
|
-
1 => [:literal, :literal, '𐒁', 4, 8, 0, 0, 0],
|
73
|
-
2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
|
74
|
-
3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0, 0],
|
75
|
-
4 => [:literal, :literal, 'b', 14, 15, 0, 0, 0],
|
76
|
-
5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
|
77
|
-
6 => [:literal, :literal, '𐒃', 16, 20, 0, 0, 0],
|
78
|
-
},
|
79
|
-
|
80
|
-
'mu𝄞?si*𝄫c+' => {
|
81
|
-
0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
|
82
|
-
1 => [:literal, :literal, '𝄞', 2, 6, 0, 0, 0],
|
83
|
-
2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0, 0],
|
84
|
-
3 => [:literal, :literal, 's', 7, 8, 0, 0, 0],
|
85
|
-
4 => [:literal, :literal, 'i', 8, 9, 0, 0, 0],
|
86
|
-
5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0, 0],
|
87
|
-
6 => [:literal, :literal, '𝄫', 10, 14, 0, 0, 0],
|
88
|
-
7 => [:literal, :literal, 'c', 14, 15, 0, 0, 0],
|
89
|
-
8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
|
90
|
-
},
|
91
|
-
}
|
92
|
-
|
93
|
-
tests.each_with_index do |(pattern, checks), count|
|
94
|
-
define_method "test_lex_literal_runs_#{count}" do
|
95
|
-
tokens = RL.lex(pattern)
|
96
|
-
|
97
|
-
checks.each do |index, (type, token, text, ts, te, level, set_level, conditional_level)|
|
98
|
-
struct = tokens.at(index)
|
99
|
-
|
100
|
-
assert_equal type, struct.type
|
101
|
-
assert_equal token, struct.token
|
102
|
-
assert_equal text, struct.text
|
103
|
-
assert_equal ts, struct.ts
|
104
|
-
assert_equal te, struct.te
|
105
|
-
assert_equal level, struct.level
|
106
|
-
assert_equal set_level, struct.set_level
|
107
|
-
assert_equal conditional_level, struct.conditional_level
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
def test_lex_single_2_byte_char
|
113
|
-
tokens = RL.lex('ا+')
|
114
|
-
|
115
|
-
assert_equal 2, tokens.length
|
116
|
-
end
|
117
|
-
|
118
|
-
def test_lex_single_3_byte_char
|
119
|
-
tokens = RL.lex('れ+')
|
120
|
-
|
121
|
-
assert_equal 2, tokens.length
|
122
|
-
end
|
123
|
-
|
124
|
-
def test_lex_single_4_byte_char
|
125
|
-
tokens = RL.lex('𝄞+')
|
126
|
-
|
127
|
-
assert_equal 2, tokens.length
|
128
|
-
end
|
129
|
-
|
130
|
-
end
|
data/test/lexer/test_nesting.rb
DELETED
@@ -1,132 +0,0 @@
|
|
1
|
-
require File.expand_path("../../helpers", __FILE__)
|
2
|
-
|
3
|
-
class LexerNesting < Test::Unit::TestCase
|
4
|
-
|
5
|
-
tests = {
|
6
|
-
'(((b)))' => {
|
7
|
-
0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
|
8
|
-
1 => [:group, :capture, '(', 1, 2, 1, 0, 0],
|
9
|
-
2 => [:group, :capture, '(', 2, 3, 2, 0, 0],
|
10
|
-
3 => [:literal, :literal, 'b', 3, 4, 3, 0, 0],
|
11
|
-
4 => [:group, :close, ')', 4, 5, 2, 0, 0],
|
12
|
-
5 => [:group, :close, ')', 5, 6, 1, 0, 0],
|
13
|
-
6 => [:group, :close, ')', 6, 7, 0, 0, 0],
|
14
|
-
},
|
15
|
-
|
16
|
-
'(\((b)\))' => {
|
17
|
-
0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
|
18
|
-
1 => [:escape, :group_open, '\(', 1, 3, 1, 0, 0],
|
19
|
-
2 => [:group, :capture, '(', 3, 4, 1, 0, 0],
|
20
|
-
3 => [:literal, :literal, 'b', 4, 5, 2, 0, 0],
|
21
|
-
4 => [:group, :close, ')', 5, 6, 1, 0, 0],
|
22
|
-
5 => [:escape, :group_close, '\)', 6, 8, 1, 0, 0],
|
23
|
-
6 => [:group, :close, ')', 8, 9, 0, 0, 0],
|
24
|
-
},
|
25
|
-
|
26
|
-
'(?>a(?>b(?>c)))' => {
|
27
|
-
0 => [:group, :atomic, '(?>', 0, 3, 0, 0, 0],
|
28
|
-
2 => [:group, :atomic, '(?>', 4, 7, 1, 0, 0],
|
29
|
-
4 => [:group, :atomic, '(?>', 8, 11, 2, 0, 0],
|
30
|
-
6 => [:group, :close, ')', 12, 13, 2, 0, 0],
|
31
|
-
7 => [:group, :close, ')', 13, 14, 1, 0, 0],
|
32
|
-
8 => [:group, :close, ')', 14, 15, 0, 0, 0],
|
33
|
-
},
|
34
|
-
|
35
|
-
'(?:a(?:b(?:c)))' => {
|
36
|
-
0 => [:group, :passive, '(?:', 0, 3, 0, 0, 0],
|
37
|
-
2 => [:group, :passive, '(?:', 4, 7, 1, 0, 0],
|
38
|
-
4 => [:group, :passive, '(?:', 8, 11, 2, 0, 0],
|
39
|
-
6 => [:group, :close, ')', 12, 13, 2, 0, 0],
|
40
|
-
7 => [:group, :close, ')', 13, 14, 1, 0, 0],
|
41
|
-
8 => [:group, :close, ')', 14, 15, 0, 0, 0],
|
42
|
-
},
|
43
|
-
|
44
|
-
'(?=a(?!b(?<=c(?<!d))))' => {
|
45
|
-
0 => [:assertion, :lookahead, '(?=', 0, 3, 0, 0, 0],
|
46
|
-
2 => [:assertion, :nlookahead, '(?!', 4, 7, 1, 0, 0],
|
47
|
-
4 => [:assertion, :lookbehind, '(?<=', 8, 12, 2, 0, 0],
|
48
|
-
6 => [:assertion, :nlookbehind, '(?<!', 13, 17, 3, 0, 0],
|
49
|
-
8 => [:group, :close, ')', 18, 19, 3, 0, 0],
|
50
|
-
9 => [:group, :close, ')', 19, 20, 2, 0, 0],
|
51
|
-
10 => [:group, :close, ')', 20, 21, 1, 0, 0],
|
52
|
-
11 => [:group, :close, ')', 21, 22, 0, 0, 0],
|
53
|
-
},
|
54
|
-
|
55
|
-
'((?#a)b(?#c)d(?#e))' => {
|
56
|
-
0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
|
57
|
-
1 => [:group, :comment, '(?#a)', 1, 6, 1, 0, 0],
|
58
|
-
3 => [:group, :comment, '(?#c)', 7, 12, 1, 0, 0],
|
59
|
-
5 => [:group, :comment, '(?#e)', 13, 18, 1, 0, 0],
|
60
|
-
6 => [:group, :close, ')', 18, 19, 0, 0, 0],
|
61
|
-
},
|
62
|
-
|
63
|
-
'a[b-e]f' => {
|
64
|
-
1 => [:set, :open, '[', 1, 2, 0, 0, 0],
|
65
|
-
2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
|
66
|
-
3 => [:set, :range, '-', 3, 4, 0, 1, 0],
|
67
|
-
4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
|
68
|
-
5 => [:set, :close, ']', 5, 6, 0, 0, 0],
|
69
|
-
},
|
70
|
-
|
71
|
-
'[[:word:]&&[^c]z]' => {
|
72
|
-
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
73
|
-
1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
|
74
|
-
2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
|
75
|
-
3 => [:set, :open, '[', 11, 12, 0, 1, 0],
|
76
|
-
4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
|
77
|
-
5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
|
78
|
-
6 => [:set, :close, ']', 14, 15, 0, 1, 0],
|
79
|
-
7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
|
80
|
-
8 => [:set, :close, ']', 16, 17, 0, 0, 0],
|
81
|
-
},
|
82
|
-
|
83
|
-
'[\p{word}&&[^c]z]' => {
|
84
|
-
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
85
|
-
1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
|
86
|
-
2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
|
87
|
-
3 => [:set, :open, '[', 11, 12, 0, 1, 0],
|
88
|
-
4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
|
89
|
-
5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
|
90
|
-
6 => [:set, :close, ']', 14, 15, 0, 1, 0],
|
91
|
-
7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
|
92
|
-
8 => [:set, :close, ']', 16, 17, 0, 0, 0],
|
93
|
-
},
|
94
|
-
|
95
|
-
'[a[b[c[d-g]]]]' => {
|
96
|
-
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
97
|
-
1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
|
98
|
-
2 => [:set, :open, '[', 2, 3, 0, 1, 0],
|
99
|
-
3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
|
100
|
-
4 => [:set, :open, '[', 4, 5, 0, 2, 0],
|
101
|
-
5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
|
102
|
-
6 => [:set, :open, '[', 6, 7, 0, 3, 0],
|
103
|
-
7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
|
104
|
-
8 => [:set, :range, '-', 8, 9, 0, 4, 0],
|
105
|
-
9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
|
106
|
-
10 => [:set, :close, ']', 10, 11, 0, 3, 0],
|
107
|
-
11 => [:set, :close, ']', 11, 12, 0, 2, 0],
|
108
|
-
12 => [:set, :close, ']', 12, 13, 0, 1, 0],
|
109
|
-
13 => [:set, :close, ']', 13, 14, 0, 0, 0],
|
110
|
-
},
|
111
|
-
}
|
112
|
-
|
113
|
-
tests.each_with_index do |(pattern, checks), count|
|
114
|
-
define_method "test_lex_nesting_in_'#{pattern}'_#{count}" do
|
115
|
-
tokens = RL.lex(pattern, 'ruby/1.9')
|
116
|
-
|
117
|
-
checks.each do |offset, (type, token, text, ts, te, level, set_level, conditional_level)|
|
118
|
-
struct = tokens.at(offset)
|
119
|
-
|
120
|
-
assert_equal type, struct.type
|
121
|
-
assert_equal token, struct.token
|
122
|
-
assert_equal text, struct.text
|
123
|
-
assert_equal ts, struct.ts
|
124
|
-
assert_equal te, struct.te
|
125
|
-
assert_equal level, struct.level
|
126
|
-
assert_equal set_level, struct.set_level
|
127
|
-
assert_equal conditional_level, struct.conditional_level
|
128
|
-
end
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
end
|