regexp_parser 0.1.6 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +57 -0
- data/Gemfile +8 -0
- data/LICENSE +1 -1
- data/README.md +225 -206
- data/Rakefile +9 -3
- data/lib/regexp_parser.rb +7 -11
- data/lib/regexp_parser/expression.rb +72 -14
- data/lib/regexp_parser/expression/classes/alternation.rb +3 -16
- data/lib/regexp_parser/expression/classes/conditional.rb +57 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +17 -0
- data/lib/regexp_parser/expression/classes/keep.rb +7 -0
- data/lib/regexp_parser/expression/classes/set.rb +28 -7
- data/lib/regexp_parser/expression/methods/strfregexp.rb +113 -0
- data/lib/regexp_parser/expression/methods/tests.rb +116 -0
- data/lib/regexp_parser/expression/methods/traverse.rb +63 -0
- data/lib/regexp_parser/expression/quantifier.rb +10 -0
- data/lib/regexp_parser/expression/sequence.rb +45 -0
- data/lib/regexp_parser/expression/subexpression.rb +29 -1
- data/lib/regexp_parser/lexer.rb +31 -8
- data/lib/regexp_parser/parser.rb +118 -45
- data/lib/regexp_parser/scanner.rb +1745 -1404
- data/lib/regexp_parser/scanner/property.rl +57 -3
- data/lib/regexp_parser/scanner/scanner.rl +161 -34
- data/lib/regexp_parser/syntax.rb +12 -2
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +3 -3
- data/lib/regexp_parser/syntax/ruby/1.9.3.rb +2 -7
- data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -1
- data/lib/regexp_parser/syntax/ruby/2.1.4.rb +13 -0
- data/lib/regexp_parser/syntax/ruby/2.1.5.rb +13 -0
- data/lib/regexp_parser/syntax/ruby/2.1.rb +2 -2
- data/lib/regexp_parser/syntax/ruby/2.2.0.rb +16 -0
- data/lib/regexp_parser/syntax/ruby/2.2.rb +8 -0
- data/lib/regexp_parser/syntax/tokens.rb +19 -2
- data/lib/regexp_parser/syntax/tokens/conditional.rb +22 -0
- data/lib/regexp_parser/syntax/tokens/keep.rb +14 -0
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +45 -4
- data/lib/regexp_parser/token.rb +23 -8
- data/lib/regexp_parser/version.rb +5 -0
- data/regexp_parser.gemspec +35 -0
- data/test/expression/test_all.rb +6 -1
- data/test/expression/test_base.rb +19 -0
- data/test/expression/test_conditionals.rb +114 -0
- data/test/expression/test_free_space.rb +33 -0
- data/test/expression/test_set.rb +61 -0
- data/test/expression/test_strfregexp.rb +214 -0
- data/test/expression/test_subexpression.rb +24 -0
- data/test/expression/test_tests.rb +99 -0
- data/test/expression/test_to_h.rb +48 -0
- data/test/expression/test_to_s.rb +46 -0
- data/test/expression/test_traverse.rb +164 -0
- data/test/lexer/test_all.rb +16 -3
- data/test/lexer/test_conditionals.rb +101 -0
- data/test/lexer/test_keep.rb +24 -0
- data/test/lexer/test_literals.rb +51 -51
- data/test/lexer/test_nesting.rb +62 -62
- data/test/lexer/test_refcalls.rb +18 -20
- data/test/parser/test_all.rb +18 -3
- data/test/parser/test_alternation.rb +11 -14
- data/test/parser/test_conditionals.rb +148 -0
- data/test/parser/test_escapes.rb +29 -5
- data/test/parser/test_free_space.rb +139 -0
- data/test/parser/test_groups.rb +40 -0
- data/test/parser/test_keep.rb +21 -0
- data/test/scanner/test_all.rb +8 -2
- data/test/scanner/test_conditionals.rb +166 -0
- data/test/scanner/test_escapes.rb +8 -5
- data/test/scanner/test_free_space.rb +133 -0
- data/test/scanner/test_groups.rb +28 -0
- data/test/scanner/test_keep.rb +33 -0
- data/test/scanner/test_properties.rb +4 -0
- data/test/scanner/test_scripts.rb +71 -1
- data/test/syntax/ruby/test_1.9.3.rb +2 -2
- data/test/syntax/ruby/test_2.0.0.rb +38 -0
- data/test/syntax/ruby/test_2.2.0.rb +38 -0
- data/test/syntax/ruby/test_all.rb +1 -8
- data/test/syntax/ruby/test_files.rb +104 -0
- data/test/test_all.rb +2 -1
- data/test/token/test_all.rb +2 -0
- data/test/token/test_token.rb +109 -0
- metadata +75 -21
- data/VERSION.yml +0 -5
- data/lib/regexp_parser/ctype.rb +0 -48
- data/test/syntax/ruby/test_2.x.rb +0 -46
@@ -48,4 +48,50 @@ class ExpressionToS < Test::Unit::TestCase
|
|
48
48
|
assert_equal( pattern, RP.parse(pattern).to_s )
|
49
49
|
end
|
50
50
|
|
51
|
+
def test_expression_to_s_multiline_source
|
52
|
+
multiline = %r{
|
53
|
+
\A
|
54
|
+
a? # One letter
|
55
|
+
b{2,5} # Another one
|
56
|
+
[c-g]+ # A set
|
57
|
+
\z
|
58
|
+
}x
|
59
|
+
|
60
|
+
assert_equal( multiline.source, RP.parse(multiline).to_s )
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_expression_to_s_multiline_to_s
|
64
|
+
multiline = %r{
|
65
|
+
\A
|
66
|
+
a? # One letter
|
67
|
+
b{2,5} # Another one
|
68
|
+
[c-g]+ # A set
|
69
|
+
\z
|
70
|
+
}x
|
71
|
+
|
72
|
+
assert_equal( multiline.to_s, RP.parse(multiline.to_s).to_s )
|
73
|
+
end
|
74
|
+
|
75
|
+
# Free spacing expressions that use spaces between quantifiers and their
|
76
|
+
# targets do not produce identical results due to the way quantifiers are
|
77
|
+
# applied to expressions (members, not nodes) and the merging of consecutive
|
78
|
+
# space nodes. This tests that they produce equivalent results.
|
79
|
+
def test_expression_to_s_multiline_equivalence
|
80
|
+
multiline = %r{
|
81
|
+
\A
|
82
|
+
a ? # One letter
|
83
|
+
b {2,5} # Another one
|
84
|
+
[c-g] + # A set
|
85
|
+
\z
|
86
|
+
}x
|
87
|
+
|
88
|
+
str = 'bbbcged'
|
89
|
+
root = RP.parse(multiline)
|
90
|
+
|
91
|
+
assert_equal(
|
92
|
+
multiline.match(str)[0],
|
93
|
+
Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]
|
94
|
+
)
|
95
|
+
end
|
96
|
+
|
51
97
|
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class SubexpressionTraverse < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_subexpression_traverse
|
6
|
+
root = RP.parse(/a(b(c(d)))|g[hi]j|klmn/)
|
7
|
+
|
8
|
+
enters = 0
|
9
|
+
visits = 0
|
10
|
+
exits = 0
|
11
|
+
|
12
|
+
root.traverse {|event, exp, index|
|
13
|
+
enters += 1 if event == :enter
|
14
|
+
visits += 1 if event == :visit
|
15
|
+
exits += 1 if event == :exit
|
16
|
+
}
|
17
|
+
|
18
|
+
assert_equal( 7, enters )
|
19
|
+
assert_equal( exits, enters )
|
20
|
+
|
21
|
+
assert_equal( 8, visits )
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_subexpression_traverse_include_self
|
25
|
+
root = RP.parse(/a(b(c(d)))|g[hi]j|klmn/)
|
26
|
+
|
27
|
+
enters = 0
|
28
|
+
visits = 0
|
29
|
+
exits = 0
|
30
|
+
|
31
|
+
root.traverse(true) {|event, exp, index|
|
32
|
+
enters += 1 if event == :enter
|
33
|
+
visits += 1 if event == :visit
|
34
|
+
exits += 1 if event == :exit
|
35
|
+
}
|
36
|
+
|
37
|
+
assert_equal( 8, enters )
|
38
|
+
assert_equal( exits, enters )
|
39
|
+
|
40
|
+
assert_equal( 8, visits )
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_subexpression_walk_alias
|
44
|
+
root = RP.parse(/abc/)
|
45
|
+
|
46
|
+
assert_equal( true, root.respond_to?(:walk) )
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_subexpression_each_expression
|
50
|
+
root = RP.parse(/a(?x:b(c))|g[h-k]/)
|
51
|
+
|
52
|
+
count = 0
|
53
|
+
root.each_expression {|exp, index|
|
54
|
+
count += 1
|
55
|
+
}
|
56
|
+
|
57
|
+
assert_equal( 10, count )
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_subexpression_each_expression_include_self
|
61
|
+
root = RP.parse(/a(?x:b(c))|g[hi]/)
|
62
|
+
|
63
|
+
count = 0
|
64
|
+
root.each_expression(true) {|exp, index|
|
65
|
+
count += 1
|
66
|
+
}
|
67
|
+
|
68
|
+
assert_equal( 11, count )
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_subexpression_each_expression_indices
|
72
|
+
root = RP.parse(/a(b)c/)
|
73
|
+
|
74
|
+
indices = []
|
75
|
+
root.each_expression {|exp, index| indices << index}
|
76
|
+
|
77
|
+
assert_equal( [0, 1, 0, 2], indices )
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_subexpression_each_expression_indices_include_self
|
81
|
+
root = RP.parse(/a(b)c/)
|
82
|
+
|
83
|
+
indices = []
|
84
|
+
root.each_expression(true) {|exp, index| indices << index}
|
85
|
+
|
86
|
+
assert_equal( [0, 0, 1, 0, 2], indices )
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_subexpression_map_without_block
|
90
|
+
root = RP.parse(/a(b([c-e]+))?/)
|
91
|
+
|
92
|
+
array = root.map
|
93
|
+
|
94
|
+
assert_equal( Array, array.class )
|
95
|
+
assert_equal( 5, array.length )
|
96
|
+
|
97
|
+
array.each do |item|
|
98
|
+
assert_equal( Array, item.class )
|
99
|
+
assert_equal( 2, item.length )
|
100
|
+
assert_equal( true, item.first.is_a?(Regexp::Expression::Base) )
|
101
|
+
assert_equal( true, item.last.is_a?(Fixnum) )
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_subexpression_map_without_block_include_self
|
106
|
+
root = RP.parse(/a(b([c-e]+))?/)
|
107
|
+
|
108
|
+
array = root.map(true)
|
109
|
+
|
110
|
+
assert_equal( Array, array.class )
|
111
|
+
assert_equal( 6, array.length )
|
112
|
+
end
|
113
|
+
|
114
|
+
def test_subexpression_map_indices
|
115
|
+
root = RP.parse(/a(b([c-e]+))?f*g/)
|
116
|
+
|
117
|
+
indices = root.map {|exp, index| index}
|
118
|
+
|
119
|
+
assert_equal( [0, 1, 0, 1, 0, 2, 3], indices )
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_subexpression_map_indices_include_self
|
123
|
+
root = RP.parse(/a(b([c-e]+))?f*g/)
|
124
|
+
|
125
|
+
indices = root.map(true) {|exp, index| index}
|
126
|
+
|
127
|
+
assert_equal( [0, 0, 1, 0, 1, 0, 2, 3], indices )
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_subexpression_map_expressions
|
131
|
+
root = RP.parse(/a(b(c(d)))/)
|
132
|
+
|
133
|
+
levels = root.map {|exp, index|
|
134
|
+
[exp.level, exp.text] if exp.terminal?
|
135
|
+
}.compact
|
136
|
+
|
137
|
+
assert_equal(
|
138
|
+
[[0, 'a'], [1, 'b'], [2, 'c'], [3, 'd']],
|
139
|
+
levels
|
140
|
+
)
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_subexpression_map_expressions_include_self
|
144
|
+
root = RP.parse(/a(b(c(d)))/)
|
145
|
+
|
146
|
+
levels = root.map(true) {|exp, index|
|
147
|
+
[exp.level, exp.to_s]
|
148
|
+
}.compact
|
149
|
+
|
150
|
+
assert_equal( [
|
151
|
+
[nil, 'a(b(c(d)))'],
|
152
|
+
[0, 'a'],
|
153
|
+
[0, '(b(c(d)))'],
|
154
|
+
[1, 'b'],
|
155
|
+
[1, '(c(d))'],
|
156
|
+
[2, 'c'],
|
157
|
+
[2, '(d)'],
|
158
|
+
[3, 'd']
|
159
|
+
],
|
160
|
+
levels
|
161
|
+
)
|
162
|
+
end
|
163
|
+
|
164
|
+
end
|
data/test/lexer/test_all.rb
CHANGED
@@ -6,21 +6,34 @@ require File.expand_path("../../helpers", __FILE__)
|
|
6
6
|
require File.expand_path("../test_#{tc}", __FILE__)
|
7
7
|
end
|
8
8
|
|
9
|
+
if RUBY_VERSION >= '2.0.0'
|
10
|
+
%w{conditionals keep}.each do|tc|
|
11
|
+
require File.expand_path("../test_#{tc}", __FILE__)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
9
15
|
class TestRegexpLexer < Test::Unit::TestCase
|
10
16
|
|
11
17
|
def test_lexer_returns_an_array
|
12
|
-
assert_instance_of( Array, RL.
|
18
|
+
assert_instance_of( Array, RL.lex('abc'))
|
13
19
|
end
|
14
20
|
|
15
21
|
def test_lexer_returns_tokens
|
16
|
-
tokens = RL.
|
22
|
+
tokens = RL.lex('^abc+[^one]{2,3}\b\d\\\C-C$')
|
17
23
|
assert( tokens.all?{|token| token.kind_of?(Regexp::Token)},
|
18
24
|
"Not all array members are tokens")
|
25
|
+
|
26
|
+
assert( tokens.all?{|token| token.to_a.length == 8},
|
27
|
+
"Not all tokens have a length of 8")
|
19
28
|
end
|
20
29
|
|
21
30
|
def test_lexer_token_count
|
22
|
-
tokens = RL.
|
31
|
+
tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
|
23
32
|
assert_equal( 26, tokens.length )
|
24
33
|
end
|
25
34
|
|
35
|
+
def test_lexer_scan_alias
|
36
|
+
assert_equal( RL.lex(/a|b|c/), RL.scan(/a|b|c/) )
|
37
|
+
end
|
38
|
+
|
26
39
|
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class LexerConditionals < Test::Unit::TestCase
|
4
|
+
|
5
|
+
if RUBY_VERSION >= '2.0'
|
6
|
+
|
7
|
+
# Basic lexer output and nesting tests
|
8
|
+
tests = {
|
9
|
+
'(?<A>a)(?(<A>)b|c)' => [3, :conditional, :open, '(?', 7, 9, 0, 0, 0],
|
10
|
+
'(?<B>a)(?(<B>)b|c)' => [4, :conditional, :condition, '(<B>)', 9, 14, 0, 0, 1],
|
11
|
+
'(?<C>a)(?(<C>)b|c)' => [6, :conditional, :separator, '|', 15, 16, 0, 0, 1],
|
12
|
+
'(?<D>a)(?(<D>)b|c)' => [8, :conditional, :close, ')', 17, 18, 0, 0, 0],
|
13
|
+
}
|
14
|
+
|
15
|
+
count = 0
|
16
|
+
tests.each do |pattern, test|
|
17
|
+
define_method "test_lexer_#{test[1]}_#{test[2]}_#{count+=1}" do
|
18
|
+
tokens = RL.lex(pattern)
|
19
|
+
assert_equal( test[1,8], tokens[test[0]].to_a)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_lexer_conditional_mixed_nesting
|
24
|
+
regexp = /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/
|
25
|
+
tokens = RL.lex(regexp)
|
26
|
+
|
27
|
+
expected = [
|
28
|
+
[ 0, :group, :capture, '(', 0, 1, 0, 0, 0],
|
29
|
+
[ 1, :group, :named, '(?<A>', 1, 6, 1, 0, 0],
|
30
|
+
|
31
|
+
[ 5, :conditional, :open, '(?', 13, 15, 2, 0, 0],
|
32
|
+
[ 6, :conditional, :condition, '(<A>)', 15, 20, 2, 0, 1],
|
33
|
+
[ 8, :conditional, :separator, '|', 21, 22, 2, 0, 1],
|
34
|
+
|
35
|
+
[10, :conditional, :open, '(?', 23, 25, 3, 0, 1],
|
36
|
+
[11, :conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
|
37
|
+
|
38
|
+
[12, :set, :open, '[', 30, 31, 3, 0, 2],
|
39
|
+
[13, :set, :range, 'e-g', 31, 34, 3, 1, 2],
|
40
|
+
[14, :set, :close, ']', 34, 35, 3, 0, 2],
|
41
|
+
|
42
|
+
[15, :conditional, :separator, '|', 35, 36, 3, 0, 2],
|
43
|
+
[19, :conditional, :close, ')', 41, 42, 3, 0, 1],
|
44
|
+
[21, :conditional, :close, ')', 43, 44, 2, 0, 0],
|
45
|
+
|
46
|
+
[22, :group, :close, ')', 44, 45, 1, 0, 0],
|
47
|
+
[23, :group, :close, ')', 45, 46, 0, 0, 0]
|
48
|
+
].each do |test|
|
49
|
+
assert_equal( test[1,8], tokens[test[0]].to_a)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_lexer_conditional_deep_nesting
|
54
|
+
regexp = /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/
|
55
|
+
tokens = RL.lex(regexp)
|
56
|
+
|
57
|
+
expected = [
|
58
|
+
[ 9, :conditional, :open, '(?', 9, 11, 0, 0, 0],
|
59
|
+
[10, :conditional, :condition, '(1)', 11, 14, 0, 0, 1],
|
60
|
+
|
61
|
+
[11, :conditional, :open, '(?', 14, 16, 0, 0, 1],
|
62
|
+
[12, :conditional, :condition, '(2)', 16, 19, 0, 0, 2],
|
63
|
+
|
64
|
+
[13, :conditional, :open, '(?', 19, 21, 0, 0, 2],
|
65
|
+
[14, :conditional, :condition, '(3)', 21, 24, 0, 0, 3],
|
66
|
+
|
67
|
+
[16, :conditional, :separator, '|', 25, 26, 0, 0, 3],
|
68
|
+
|
69
|
+
[18, :conditional, :close, ')', 27, 28, 0, 0, 2],
|
70
|
+
[19, :conditional, :close, ')', 28, 29, 0, 0, 1],
|
71
|
+
|
72
|
+
[20, :conditional, :separator, '|', 29, 30, 0, 0, 1],
|
73
|
+
|
74
|
+
[21, :conditional, :open, '(?', 30, 32, 0, 0, 1],
|
75
|
+
[22, :conditional, :condition, '(3)', 32, 35, 0, 0, 2],
|
76
|
+
|
77
|
+
[23, :conditional, :open, '(?', 35, 37, 0, 0, 2],
|
78
|
+
[24, :conditional, :condition, '(2)', 37, 40, 0, 0, 3],
|
79
|
+
|
80
|
+
[26, :conditional, :separator, '|', 41, 42, 0, 0, 3],
|
81
|
+
|
82
|
+
[28, :conditional, :close, ')', 43, 44, 0, 0, 2],
|
83
|
+
|
84
|
+
[29, :conditional, :separator, '|', 44, 45, 0, 0, 2],
|
85
|
+
|
86
|
+
[30, :conditional, :open, '(?', 45, 47, 0, 0, 2],
|
87
|
+
[31, :conditional, :condition, '(1)', 47, 50, 0, 0, 3],
|
88
|
+
|
89
|
+
[33, :conditional, :separator, '|', 51, 52, 0, 0, 3],
|
90
|
+
|
91
|
+
[35, :conditional, :close, ')', 53, 54, 0, 0, 2],
|
92
|
+
[36, :conditional, :close, ')', 54, 55, 0, 0, 1],
|
93
|
+
[37, :conditional, :close, ')', 55, 56, 0, 0, 0]
|
94
|
+
].each do |test|
|
95
|
+
assert_equal( test[1,8], tokens[test[0]].to_a)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class LexerKeep < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_lex_keep_token
|
6
|
+
regexp = /ab\Kcd/
|
7
|
+
tokens = RL.lex(regexp)
|
8
|
+
|
9
|
+
assert_equal( :keep, tokens[1].type )
|
10
|
+
assert_equal( :mark, tokens[1].token )
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_lex_keep_nested
|
14
|
+
regexp = /(a\Kb)|(c\\\Kd)ef/
|
15
|
+
tokens = RL.lex(regexp)
|
16
|
+
|
17
|
+
assert_equal( :keep, tokens[2].type )
|
18
|
+
assert_equal( :mark, tokens[2].token )
|
19
|
+
|
20
|
+
assert_equal( :keep, tokens[9].type )
|
21
|
+
assert_equal( :mark, tokens[9].token )
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
data/test/lexer/test_literals.rb
CHANGED
@@ -7,86 +7,86 @@ class LexerLiterals < Test::Unit::TestCase
|
|
7
7
|
tests = {
|
8
8
|
# ascii, single byte characters
|
9
9
|
'a' => {
|
10
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0],
|
10
|
+
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
11
11
|
},
|
12
12
|
|
13
13
|
'ab+' => {
|
14
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0],
|
15
|
-
1 => [:literal, :literal, 'b', 1, 2, 0, 0],
|
16
|
-
2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0],
|
14
|
+
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
15
|
+
1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
|
16
|
+
2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0],
|
17
17
|
},
|
18
18
|
|
19
19
|
|
20
20
|
# 2 byte wide characters, Arabic
|
21
21
|
'ا' => {
|
22
|
-
0 => [:literal, :literal, 'ا', 0, 2, 0, 0],
|
22
|
+
0 => [:literal, :literal, 'ا', 0, 2, 0, 0, 0],
|
23
23
|
},
|
24
24
|
|
25
25
|
'aاbبcت' => {
|
26
|
-
0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0],
|
26
|
+
0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0],
|
27
27
|
},
|
28
28
|
|
29
29
|
'aاbبت?' => {
|
30
|
-
0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0],
|
31
|
-
1 => [:literal, :literal, 'ت', 6, 8, 0, 0],
|
32
|
-
2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0],
|
30
|
+
0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0, 0],
|
31
|
+
1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
|
32
|
+
2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
|
33
33
|
},
|
34
34
|
|
35
35
|
'aا?bبcت+' => {
|
36
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0],
|
37
|
-
1 => [:literal, :literal, 'ا', 1, 3, 0, 0],
|
38
|
-
2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0],
|
39
|
-
3 => [:literal, :literal, 'bبc', 4, 8, 0, 0],
|
40
|
-
4 => [:literal, :literal, 'ت', 8, 10, 0, 0],
|
41
|
-
5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0],
|
36
|
+
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
37
|
+
1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
|
38
|
+
2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
|
39
|
+
3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
|
40
|
+
4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
|
41
|
+
5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0],
|
42
42
|
},
|
43
43
|
|
44
44
|
'a(اbب+)cت?' => {
|
45
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0],
|
46
|
-
1 => [:group, :capture, '(', 1, 2, 0, 0],
|
47
|
-
2 => [:literal, :literal, 'اb', 2, 5, 1, 0],
|
48
|
-
3 => [:literal, :literal, 'ب', 5, 7, 1, 0],
|
49
|
-
4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0],
|
50
|
-
5 => [:group, :close, ')', 8, 9, 0, 0],
|
51
|
-
6 => [:literal, :literal, 'c', 9, 10, 0, 0],
|
52
|
-
7 => [:literal, :literal, 'ت', 10, 12, 0, 0],
|
53
|
-
8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0],
|
45
|
+
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
46
|
+
1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
|
47
|
+
2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
|
48
|
+
3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
|
49
|
+
4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
|
50
|
+
5 => [:group, :close, ')', 8, 9, 0, 0, 0],
|
51
|
+
6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
|
52
|
+
7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
|
53
|
+
8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0],
|
54
54
|
},
|
55
55
|
|
56
56
|
|
57
57
|
# 3 byte wide characters, Japanese
|
58
58
|
'ab?れます+cd' => {
|
59
|
-
0 => [:literal, :literal, 'a', 0, 1, 0, 0],
|
60
|
-
1 => [:literal, :literal, 'b', 1, 2, 0, 0],
|
61
|
-
2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0],
|
62
|
-
3 => [:literal, :literal, 'れま', 3, 9, 0, 0],
|
63
|
-
4 => [:literal, :literal, 'す', 9, 12, 0, 0],
|
64
|
-
5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0],
|
65
|
-
6 => [:literal, :literal, 'cd', 13, 15, 0, 0],
|
59
|
+
0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
|
60
|
+
1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
|
61
|
+
2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
|
62
|
+
3 => [:literal, :literal, 'れま', 3, 9, 0, 0, 0],
|
63
|
+
4 => [:literal, :literal, 'す', 9, 12, 0, 0, 0],
|
64
|
+
5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0, 0],
|
65
|
+
6 => [:literal, :literal, 'cd', 13, 15, 0, 0, 0],
|
66
66
|
},
|
67
67
|
|
68
68
|
|
69
69
|
# 4 byte wide characters, Osmanya
|
70
70
|
'𐒀𐒁?𐒂ab+𐒃' => {
|
71
|
-
0 => [:literal, :literal, '𐒀', 0, 4, 0, 0],
|
72
|
-
1 => [:literal, :literal, '𐒁', 4, 8, 0, 0],
|
73
|
-
2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0],
|
74
|
-
3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0],
|
75
|
-
4 => [:literal, :literal, 'b', 14, 15, 0, 0],
|
76
|
-
5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0],
|
77
|
-
6 => [:literal, :literal, '𐒃', 16, 20, 0, 0],
|
71
|
+
0 => [:literal, :literal, '𐒀', 0, 4, 0, 0, 0],
|
72
|
+
1 => [:literal, :literal, '𐒁', 4, 8, 0, 0, 0],
|
73
|
+
2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
|
74
|
+
3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0, 0],
|
75
|
+
4 => [:literal, :literal, 'b', 14, 15, 0, 0, 0],
|
76
|
+
5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
|
77
|
+
6 => [:literal, :literal, '𐒃', 16, 20, 0, 0, 0],
|
78
78
|
},
|
79
79
|
|
80
80
|
'mu𝄞?si*𝄫c+' => {
|
81
|
-
0 => [:literal, :literal, 'mu', 0, 2, 0, 0],
|
82
|
-
1 => [:literal, :literal, '𝄞', 2, 6, 0, 0],
|
83
|
-
2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0],
|
84
|
-
3 => [:literal, :literal, 's', 7, 8, 0, 0],
|
85
|
-
4 => [:literal, :literal, 'i', 8, 9, 0, 0],
|
86
|
-
5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0],
|
87
|
-
6 => [:literal, :literal, '𝄫', 10, 14, 0, 0],
|
88
|
-
7 => [:literal, :literal, 'c', 14, 15, 0, 0],
|
89
|
-
8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0],
|
81
|
+
0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
|
82
|
+
1 => [:literal, :literal, '𝄞', 2, 6, 0, 0, 0],
|
83
|
+
2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0, 0],
|
84
|
+
3 => [:literal, :literal, 's', 7, 8, 0, 0, 0],
|
85
|
+
4 => [:literal, :literal, 'i', 8, 9, 0, 0, 0],
|
86
|
+
5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0, 0],
|
87
|
+
6 => [:literal, :literal, '𝄫', 10, 14, 0, 0, 0],
|
88
|
+
7 => [:literal, :literal, 'c', 14, 15, 0, 0, 0],
|
89
|
+
8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
|
90
90
|
},
|
91
91
|
}
|
92
92
|
|
@@ -94,7 +94,7 @@ class LexerLiterals < Test::Unit::TestCase
|
|
94
94
|
tests.each do |pattern, checks|
|
95
95
|
define_method "test_lex_literal_runs_#{count+=1}" do
|
96
96
|
|
97
|
-
tokens = RL.
|
97
|
+
tokens = RL.lex(pattern)
|
98
98
|
checks.each do |offset, token|
|
99
99
|
assert_equal( token, tokens[offset].to_a )
|
100
100
|
end
|
@@ -103,17 +103,17 @@ class LexerLiterals < Test::Unit::TestCase
|
|
103
103
|
end
|
104
104
|
|
105
105
|
def test_lex_single_2_byte_char
|
106
|
-
tokens = RL.
|
106
|
+
tokens = RL.lex('ا+')
|
107
107
|
assert_equal( 2, tokens.length )
|
108
108
|
end
|
109
109
|
|
110
110
|
def test_lex_single_3_byte_char
|
111
|
-
tokens = RL.
|
111
|
+
tokens = RL.lex('れ+')
|
112
112
|
assert_equal( 2, tokens.length )
|
113
113
|
end
|
114
114
|
|
115
115
|
def test_lex_single_4_byte_char
|
116
|
-
tokens = RL.
|
116
|
+
tokens = RL.lex('𝄞+')
|
117
117
|
assert_equal( 2, tokens.length )
|
118
118
|
end
|
119
119
|
|