regexp_parser 0.1.6 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +57 -0
  3. data/Gemfile +8 -0
  4. data/LICENSE +1 -1
  5. data/README.md +225 -206
  6. data/Rakefile +9 -3
  7. data/lib/regexp_parser.rb +7 -11
  8. data/lib/regexp_parser/expression.rb +72 -14
  9. data/lib/regexp_parser/expression/classes/alternation.rb +3 -16
  10. data/lib/regexp_parser/expression/classes/conditional.rb +57 -0
  11. data/lib/regexp_parser/expression/classes/free_space.rb +17 -0
  12. data/lib/regexp_parser/expression/classes/keep.rb +7 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +28 -7
  14. data/lib/regexp_parser/expression/methods/strfregexp.rb +113 -0
  15. data/lib/regexp_parser/expression/methods/tests.rb +116 -0
  16. data/lib/regexp_parser/expression/methods/traverse.rb +63 -0
  17. data/lib/regexp_parser/expression/quantifier.rb +10 -0
  18. data/lib/regexp_parser/expression/sequence.rb +45 -0
  19. data/lib/regexp_parser/expression/subexpression.rb +29 -1
  20. data/lib/regexp_parser/lexer.rb +31 -8
  21. data/lib/regexp_parser/parser.rb +118 -45
  22. data/lib/regexp_parser/scanner.rb +1745 -1404
  23. data/lib/regexp_parser/scanner/property.rl +57 -3
  24. data/lib/regexp_parser/scanner/scanner.rl +161 -34
  25. data/lib/regexp_parser/syntax.rb +12 -2
  26. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +3 -3
  27. data/lib/regexp_parser/syntax/ruby/1.9.3.rb +2 -7
  28. data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -1
  29. data/lib/regexp_parser/syntax/ruby/2.1.4.rb +13 -0
  30. data/lib/regexp_parser/syntax/ruby/2.1.5.rb +13 -0
  31. data/lib/regexp_parser/syntax/ruby/2.1.rb +2 -2
  32. data/lib/regexp_parser/syntax/ruby/2.2.0.rb +16 -0
  33. data/lib/regexp_parser/syntax/ruby/2.2.rb +8 -0
  34. data/lib/regexp_parser/syntax/tokens.rb +19 -2
  35. data/lib/regexp_parser/syntax/tokens/conditional.rb +22 -0
  36. data/lib/regexp_parser/syntax/tokens/keep.rb +14 -0
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +45 -4
  38. data/lib/regexp_parser/token.rb +23 -8
  39. data/lib/regexp_parser/version.rb +5 -0
  40. data/regexp_parser.gemspec +35 -0
  41. data/test/expression/test_all.rb +6 -1
  42. data/test/expression/test_base.rb +19 -0
  43. data/test/expression/test_conditionals.rb +114 -0
  44. data/test/expression/test_free_space.rb +33 -0
  45. data/test/expression/test_set.rb +61 -0
  46. data/test/expression/test_strfregexp.rb +214 -0
  47. data/test/expression/test_subexpression.rb +24 -0
  48. data/test/expression/test_tests.rb +99 -0
  49. data/test/expression/test_to_h.rb +48 -0
  50. data/test/expression/test_to_s.rb +46 -0
  51. data/test/expression/test_traverse.rb +164 -0
  52. data/test/lexer/test_all.rb +16 -3
  53. data/test/lexer/test_conditionals.rb +101 -0
  54. data/test/lexer/test_keep.rb +24 -0
  55. data/test/lexer/test_literals.rb +51 -51
  56. data/test/lexer/test_nesting.rb +62 -62
  57. data/test/lexer/test_refcalls.rb +18 -20
  58. data/test/parser/test_all.rb +18 -3
  59. data/test/parser/test_alternation.rb +11 -14
  60. data/test/parser/test_conditionals.rb +148 -0
  61. data/test/parser/test_escapes.rb +29 -5
  62. data/test/parser/test_free_space.rb +139 -0
  63. data/test/parser/test_groups.rb +40 -0
  64. data/test/parser/test_keep.rb +21 -0
  65. data/test/scanner/test_all.rb +8 -2
  66. data/test/scanner/test_conditionals.rb +166 -0
  67. data/test/scanner/test_escapes.rb +8 -5
  68. data/test/scanner/test_free_space.rb +133 -0
  69. data/test/scanner/test_groups.rb +28 -0
  70. data/test/scanner/test_keep.rb +33 -0
  71. data/test/scanner/test_properties.rb +4 -0
  72. data/test/scanner/test_scripts.rb +71 -1
  73. data/test/syntax/ruby/test_1.9.3.rb +2 -2
  74. data/test/syntax/ruby/test_2.0.0.rb +38 -0
  75. data/test/syntax/ruby/test_2.2.0.rb +38 -0
  76. data/test/syntax/ruby/test_all.rb +1 -8
  77. data/test/syntax/ruby/test_files.rb +104 -0
  78. data/test/test_all.rb +2 -1
  79. data/test/token/test_all.rb +2 -0
  80. data/test/token/test_token.rb +109 -0
  81. metadata +75 -21
  82. data/VERSION.yml +0 -5
  83. data/lib/regexp_parser/ctype.rb +0 -48
  84. data/test/syntax/ruby/test_2.x.rb +0 -46
@@ -48,4 +48,50 @@ class ExpressionToS < Test::Unit::TestCase
48
48
  assert_equal( pattern, RP.parse(pattern).to_s )
49
49
  end
50
50
 
51
+ def test_expression_to_s_multiline_source
52
+ multiline = %r{
53
+ \A
54
+ a? # One letter
55
+ b{2,5} # Another one
56
+ [c-g]+ # A set
57
+ \z
58
+ }x
59
+
60
+ assert_equal( multiline.source, RP.parse(multiline).to_s )
61
+ end
62
+
63
+ def test_expression_to_s_multiline_to_s
64
+ multiline = %r{
65
+ \A
66
+ a? # One letter
67
+ b{2,5} # Another one
68
+ [c-g]+ # A set
69
+ \z
70
+ }x
71
+
72
+ assert_equal( multiline.to_s, RP.parse(multiline.to_s).to_s )
73
+ end
74
+
75
+ # Free spacing expressions that use spaces between quantifiers and their
76
+ # targets do not produce identical results due to the way quantifiers are
77
+ # applied to expressions (members, not nodes) and the merging of consecutive
78
+ # space nodes. This tests that they produce equivalent results.
79
+ def test_expression_to_s_multiline_equivalence
80
+ multiline = %r{
81
+ \A
82
+ a ? # One letter
83
+ b {2,5} # Another one
84
+ [c-g] + # A set
85
+ \z
86
+ }x
87
+
88
+ str = 'bbbcged'
89
+ root = RP.parse(multiline)
90
+
91
+ assert_equal(
92
+ multiline.match(str)[0],
93
+ Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]
94
+ )
95
+ end
96
+
51
97
  end
@@ -0,0 +1,164 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class SubexpressionTraverse < Test::Unit::TestCase
4
+
5
+ def test_subexpression_traverse
6
+ root = RP.parse(/a(b(c(d)))|g[hi]j|klmn/)
7
+
8
+ enters = 0
9
+ visits = 0
10
+ exits = 0
11
+
12
+ root.traverse {|event, exp, index|
13
+ enters += 1 if event == :enter
14
+ visits += 1 if event == :visit
15
+ exits += 1 if event == :exit
16
+ }
17
+
18
+ assert_equal( 7, enters )
19
+ assert_equal( exits, enters )
20
+
21
+ assert_equal( 8, visits )
22
+ end
23
+
24
+ def test_subexpression_traverse_include_self
25
+ root = RP.parse(/a(b(c(d)))|g[hi]j|klmn/)
26
+
27
+ enters = 0
28
+ visits = 0
29
+ exits = 0
30
+
31
+ root.traverse(true) {|event, exp, index|
32
+ enters += 1 if event == :enter
33
+ visits += 1 if event == :visit
34
+ exits += 1 if event == :exit
35
+ }
36
+
37
+ assert_equal( 8, enters )
38
+ assert_equal( exits, enters )
39
+
40
+ assert_equal( 8, visits )
41
+ end
42
+
43
+ def test_subexpression_walk_alias
44
+ root = RP.parse(/abc/)
45
+
46
+ assert_equal( true, root.respond_to?(:walk) )
47
+ end
48
+
49
+ def test_subexpression_each_expression
50
+ root = RP.parse(/a(?x:b(c))|g[h-k]/)
51
+
52
+ count = 0
53
+ root.each_expression {|exp, index|
54
+ count += 1
55
+ }
56
+
57
+ assert_equal( 10, count )
58
+ end
59
+
60
+ def test_subexpression_each_expression_include_self
61
+ root = RP.parse(/a(?x:b(c))|g[hi]/)
62
+
63
+ count = 0
64
+ root.each_expression(true) {|exp, index|
65
+ count += 1
66
+ }
67
+
68
+ assert_equal( 11, count )
69
+ end
70
+
71
+ def test_subexpression_each_expression_indices
72
+ root = RP.parse(/a(b)c/)
73
+
74
+ indices = []
75
+ root.each_expression {|exp, index| indices << index}
76
+
77
+ assert_equal( [0, 1, 0, 2], indices )
78
+ end
79
+
80
+ def test_subexpression_each_expression_indices_include_self
81
+ root = RP.parse(/a(b)c/)
82
+
83
+ indices = []
84
+ root.each_expression(true) {|exp, index| indices << index}
85
+
86
+ assert_equal( [0, 0, 1, 0, 2], indices )
87
+ end
88
+
89
+ def test_subexpression_map_without_block
90
+ root = RP.parse(/a(b([c-e]+))?/)
91
+
92
+ array = root.map
93
+
94
+ assert_equal( Array, array.class )
95
+ assert_equal( 5, array.length )
96
+
97
+ array.each do |item|
98
+ assert_equal( Array, item.class )
99
+ assert_equal( 2, item.length )
100
+ assert_equal( true, item.first.is_a?(Regexp::Expression::Base) )
101
+ assert_equal( true, item.last.is_a?(Fixnum) )
102
+ end
103
+ end
104
+
105
+ def test_subexpression_map_without_block_include_self
106
+ root = RP.parse(/a(b([c-e]+))?/)
107
+
108
+ array = root.map(true)
109
+
110
+ assert_equal( Array, array.class )
111
+ assert_equal( 6, array.length )
112
+ end
113
+
114
+ def test_subexpression_map_indices
115
+ root = RP.parse(/a(b([c-e]+))?f*g/)
116
+
117
+ indices = root.map {|exp, index| index}
118
+
119
+ assert_equal( [0, 1, 0, 1, 0, 2, 3], indices )
120
+ end
121
+
122
+ def test_subexpression_map_indices_include_self
123
+ root = RP.parse(/a(b([c-e]+))?f*g/)
124
+
125
+ indices = root.map(true) {|exp, index| index}
126
+
127
+ assert_equal( [0, 0, 1, 0, 1, 0, 2, 3], indices )
128
+ end
129
+
130
+ def test_subexpression_map_expressions
131
+ root = RP.parse(/a(b(c(d)))/)
132
+
133
+ levels = root.map {|exp, index|
134
+ [exp.level, exp.text] if exp.terminal?
135
+ }.compact
136
+
137
+ assert_equal(
138
+ [[0, 'a'], [1, 'b'], [2, 'c'], [3, 'd']],
139
+ levels
140
+ )
141
+ end
142
+
143
+ def test_subexpression_map_expressions_include_self
144
+ root = RP.parse(/a(b(c(d)))/)
145
+
146
+ levels = root.map(true) {|exp, index|
147
+ [exp.level, exp.to_s]
148
+ }.compact
149
+
150
+ assert_equal( [
151
+ [nil, 'a(b(c(d)))'],
152
+ [0, 'a'],
153
+ [0, '(b(c(d)))'],
154
+ [1, 'b'],
155
+ [1, '(c(d))'],
156
+ [2, 'c'],
157
+ [2, '(d)'],
158
+ [3, 'd']
159
+ ],
160
+ levels
161
+ )
162
+ end
163
+
164
+ end
@@ -6,21 +6,34 @@ require File.expand_path("../../helpers", __FILE__)
6
6
  require File.expand_path("../test_#{tc}", __FILE__)
7
7
  end
8
8
 
9
+ if RUBY_VERSION >= '2.0.0'
10
+ %w{conditionals keep}.each do|tc|
11
+ require File.expand_path("../test_#{tc}", __FILE__)
12
+ end
13
+ end
14
+
9
15
  class TestRegexpLexer < Test::Unit::TestCase
10
16
 
11
17
  def test_lexer_returns_an_array
12
- assert_instance_of( Array, RL.scan('abc'))
18
+ assert_instance_of( Array, RL.lex('abc'))
13
19
  end
14
20
 
15
21
  def test_lexer_returns_tokens
16
- tokens = RL.scan('^abc+[^one]{2,3}\b\d\\\C-C$')
22
+ tokens = RL.lex('^abc+[^one]{2,3}\b\d\\\C-C$')
17
23
  assert( tokens.all?{|token| token.kind_of?(Regexp::Token)},
18
24
  "Not all array members are tokens")
25
+
26
+ assert( tokens.all?{|token| token.to_a.length == 8},
27
+ "Not all tokens have a length of 8")
19
28
  end
20
29
 
21
30
  def test_lexer_token_count
22
- tokens = RL.scan(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
31
+ tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
23
32
  assert_equal( 26, tokens.length )
24
33
  end
25
34
 
35
+ def test_lexer_scan_alias
36
+ assert_equal( RL.lex(/a|b|c/), RL.scan(/a|b|c/) )
37
+ end
38
+
26
39
  end
@@ -0,0 +1,101 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class LexerConditionals < Test::Unit::TestCase
4
+
5
+ if RUBY_VERSION >= '2.0'
6
+
7
+ # Basic lexer output and nesting tests
8
+ tests = {
9
+ '(?<A>a)(?(<A>)b|c)' => [3, :conditional, :open, '(?', 7, 9, 0, 0, 0],
10
+ '(?<B>a)(?(<B>)b|c)' => [4, :conditional, :condition, '(<B>)', 9, 14, 0, 0, 1],
11
+ '(?<C>a)(?(<C>)b|c)' => [6, :conditional, :separator, '|', 15, 16, 0, 0, 1],
12
+ '(?<D>a)(?(<D>)b|c)' => [8, :conditional, :close, ')', 17, 18, 0, 0, 0],
13
+ }
14
+
15
+ count = 0
16
+ tests.each do |pattern, test|
17
+ define_method "test_lexer_#{test[1]}_#{test[2]}_#{count+=1}" do
18
+ tokens = RL.lex(pattern)
19
+ assert_equal( test[1,8], tokens[test[0]].to_a)
20
+ end
21
+ end
22
+
23
+ def test_lexer_conditional_mixed_nesting
24
+ regexp = /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/
25
+ tokens = RL.lex(regexp)
26
+
27
+ expected = [
28
+ [ 0, :group, :capture, '(', 0, 1, 0, 0, 0],
29
+ [ 1, :group, :named, '(?<A>', 1, 6, 1, 0, 0],
30
+
31
+ [ 5, :conditional, :open, '(?', 13, 15, 2, 0, 0],
32
+ [ 6, :conditional, :condition, '(<A>)', 15, 20, 2, 0, 1],
33
+ [ 8, :conditional, :separator, '|', 21, 22, 2, 0, 1],
34
+
35
+ [10, :conditional, :open, '(?', 23, 25, 3, 0, 1],
36
+ [11, :conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
37
+
38
+ [12, :set, :open, '[', 30, 31, 3, 0, 2],
39
+ [13, :set, :range, 'e-g', 31, 34, 3, 1, 2],
40
+ [14, :set, :close, ']', 34, 35, 3, 0, 2],
41
+
42
+ [15, :conditional, :separator, '|', 35, 36, 3, 0, 2],
43
+ [19, :conditional, :close, ')', 41, 42, 3, 0, 1],
44
+ [21, :conditional, :close, ')', 43, 44, 2, 0, 0],
45
+
46
+ [22, :group, :close, ')', 44, 45, 1, 0, 0],
47
+ [23, :group, :close, ')', 45, 46, 0, 0, 0]
48
+ ].each do |test|
49
+ assert_equal( test[1,8], tokens[test[0]].to_a)
50
+ end
51
+ end
52
+
53
+ def test_lexer_conditional_deep_nesting
54
+ regexp = /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/
55
+ tokens = RL.lex(regexp)
56
+
57
+ expected = [
58
+ [ 9, :conditional, :open, '(?', 9, 11, 0, 0, 0],
59
+ [10, :conditional, :condition, '(1)', 11, 14, 0, 0, 1],
60
+
61
+ [11, :conditional, :open, '(?', 14, 16, 0, 0, 1],
62
+ [12, :conditional, :condition, '(2)', 16, 19, 0, 0, 2],
63
+
64
+ [13, :conditional, :open, '(?', 19, 21, 0, 0, 2],
65
+ [14, :conditional, :condition, '(3)', 21, 24, 0, 0, 3],
66
+
67
+ [16, :conditional, :separator, '|', 25, 26, 0, 0, 3],
68
+
69
+ [18, :conditional, :close, ')', 27, 28, 0, 0, 2],
70
+ [19, :conditional, :close, ')', 28, 29, 0, 0, 1],
71
+
72
+ [20, :conditional, :separator, '|', 29, 30, 0, 0, 1],
73
+
74
+ [21, :conditional, :open, '(?', 30, 32, 0, 0, 1],
75
+ [22, :conditional, :condition, '(3)', 32, 35, 0, 0, 2],
76
+
77
+ [23, :conditional, :open, '(?', 35, 37, 0, 0, 2],
78
+ [24, :conditional, :condition, '(2)', 37, 40, 0, 0, 3],
79
+
80
+ [26, :conditional, :separator, '|', 41, 42, 0, 0, 3],
81
+
82
+ [28, :conditional, :close, ')', 43, 44, 0, 0, 2],
83
+
84
+ [29, :conditional, :separator, '|', 44, 45, 0, 0, 2],
85
+
86
+ [30, :conditional, :open, '(?', 45, 47, 0, 0, 2],
87
+ [31, :conditional, :condition, '(1)', 47, 50, 0, 0, 3],
88
+
89
+ [33, :conditional, :separator, '|', 51, 52, 0, 0, 3],
90
+
91
+ [35, :conditional, :close, ')', 53, 54, 0, 0, 2],
92
+ [36, :conditional, :close, ')', 54, 55, 0, 0, 1],
93
+ [37, :conditional, :close, ')', 55, 56, 0, 0, 0]
94
+ ].each do |test|
95
+ assert_equal( test[1,8], tokens[test[0]].to_a)
96
+ end
97
+ end
98
+
99
+ end
100
+
101
+ end
@@ -0,0 +1,24 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class LexerKeep < Test::Unit::TestCase
4
+
5
+ def test_lex_keep_token
6
+ regexp = /ab\Kcd/
7
+ tokens = RL.lex(regexp)
8
+
9
+ assert_equal( :keep, tokens[1].type )
10
+ assert_equal( :mark, tokens[1].token )
11
+ end
12
+
13
+ def test_lex_keep_nested
14
+ regexp = /(a\Kb)|(c\\\Kd)ef/
15
+ tokens = RL.lex(regexp)
16
+
17
+ assert_equal( :keep, tokens[2].type )
18
+ assert_equal( :mark, tokens[2].token )
19
+
20
+ assert_equal( :keep, tokens[9].type )
21
+ assert_equal( :mark, tokens[9].token )
22
+ end
23
+
24
+ end
@@ -7,86 +7,86 @@ class LexerLiterals < Test::Unit::TestCase
7
7
  tests = {
8
8
  # ascii, single byte characters
9
9
  'a' => {
10
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0],
10
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
11
11
  },
12
12
 
13
13
  'ab+' => {
14
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0],
15
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0],
16
- 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0],
14
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
15
+ 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
16
+ 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0],
17
17
  },
18
18
 
19
19
 
20
20
  # 2 byte wide characters, Arabic
21
21
  'ا' => {
22
- 0 => [:literal, :literal, 'ا', 0, 2, 0, 0],
22
+ 0 => [:literal, :literal, 'ا', 0, 2, 0, 0, 0],
23
23
  },
24
24
 
25
25
  'aاbبcت' => {
26
- 0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0],
26
+ 0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0],
27
27
  },
28
28
 
29
29
  'aاbبت?' => {
30
- 0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0],
31
- 1 => [:literal, :literal, 'ت', 6, 8, 0, 0],
32
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0],
30
+ 0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0, 0],
31
+ 1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
32
+ 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
33
33
  },
34
34
 
35
35
  'aا?bبcت+' => {
36
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0],
37
- 1 => [:literal, :literal, 'ا', 1, 3, 0, 0],
38
- 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0],
39
- 3 => [:literal, :literal, 'bبc', 4, 8, 0, 0],
40
- 4 => [:literal, :literal, 'ت', 8, 10, 0, 0],
41
- 5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0],
36
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
37
+ 1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
38
+ 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
39
+ 3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
40
+ 4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
41
+ 5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0],
42
42
  },
43
43
 
44
44
  'a(اbب+)cت?' => {
45
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0],
46
- 1 => [:group, :capture, '(', 1, 2, 0, 0],
47
- 2 => [:literal, :literal, 'اb', 2, 5, 1, 0],
48
- 3 => [:literal, :literal, 'ب', 5, 7, 1, 0],
49
- 4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0],
50
- 5 => [:group, :close, ')', 8, 9, 0, 0],
51
- 6 => [:literal, :literal, 'c', 9, 10, 0, 0],
52
- 7 => [:literal, :literal, 'ت', 10, 12, 0, 0],
53
- 8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0],
45
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
46
+ 1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
47
+ 2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
48
+ 3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
49
+ 4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
50
+ 5 => [:group, :close, ')', 8, 9, 0, 0, 0],
51
+ 6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
52
+ 7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
53
+ 8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0],
54
54
  },
55
55
 
56
56
 
57
57
  # 3 byte wide characters, Japanese
58
58
  'ab?れます+cd' => {
59
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0],
60
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0],
61
- 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0],
62
- 3 => [:literal, :literal, 'れま', 3, 9, 0, 0],
63
- 4 => [:literal, :literal, 'す', 9, 12, 0, 0],
64
- 5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0],
65
- 6 => [:literal, :literal, 'cd', 13, 15, 0, 0],
59
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
60
+ 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
61
+ 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
62
+ 3 => [:literal, :literal, 'れま', 3, 9, 0, 0, 0],
63
+ 4 => [:literal, :literal, 'す', 9, 12, 0, 0, 0],
64
+ 5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0, 0],
65
+ 6 => [:literal, :literal, 'cd', 13, 15, 0, 0, 0],
66
66
  },
67
67
 
68
68
 
69
69
  # 4 byte wide characters, Osmanya
70
70
  '𐒀𐒁?𐒂ab+𐒃' => {
71
- 0 => [:literal, :literal, '𐒀', 0, 4, 0, 0],
72
- 1 => [:literal, :literal, '𐒁', 4, 8, 0, 0],
73
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0],
74
- 3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0],
75
- 4 => [:literal, :literal, 'b', 14, 15, 0, 0],
76
- 5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0],
77
- 6 => [:literal, :literal, '𐒃', 16, 20, 0, 0],
71
+ 0 => [:literal, :literal, '𐒀', 0, 4, 0, 0, 0],
72
+ 1 => [:literal, :literal, '𐒁', 4, 8, 0, 0, 0],
73
+ 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
74
+ 3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0, 0],
75
+ 4 => [:literal, :literal, 'b', 14, 15, 0, 0, 0],
76
+ 5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
77
+ 6 => [:literal, :literal, '𐒃', 16, 20, 0, 0, 0],
78
78
  },
79
79
 
80
80
  'mu𝄞?si*𝄫c+' => {
81
- 0 => [:literal, :literal, 'mu', 0, 2, 0, 0],
82
- 1 => [:literal, :literal, '𝄞', 2, 6, 0, 0],
83
- 2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0],
84
- 3 => [:literal, :literal, 's', 7, 8, 0, 0],
85
- 4 => [:literal, :literal, 'i', 8, 9, 0, 0],
86
- 5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0],
87
- 6 => [:literal, :literal, '𝄫', 10, 14, 0, 0],
88
- 7 => [:literal, :literal, 'c', 14, 15, 0, 0],
89
- 8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0],
81
+ 0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
82
+ 1 => [:literal, :literal, '𝄞', 2, 6, 0, 0, 0],
83
+ 2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0, 0],
84
+ 3 => [:literal, :literal, 's', 7, 8, 0, 0, 0],
85
+ 4 => [:literal, :literal, 'i', 8, 9, 0, 0, 0],
86
+ 5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0, 0],
87
+ 6 => [:literal, :literal, '𝄫', 10, 14, 0, 0, 0],
88
+ 7 => [:literal, :literal, 'c', 14, 15, 0, 0, 0],
89
+ 8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
90
90
  },
91
91
  }
92
92
 
@@ -94,7 +94,7 @@ class LexerLiterals < Test::Unit::TestCase
94
94
  tests.each do |pattern, checks|
95
95
  define_method "test_lex_literal_runs_#{count+=1}" do
96
96
 
97
- tokens = RL.scan(pattern)
97
+ tokens = RL.lex(pattern)
98
98
  checks.each do |offset, token|
99
99
  assert_equal( token, tokens[offset].to_a )
100
100
  end
@@ -103,17 +103,17 @@ class LexerLiterals < Test::Unit::TestCase
103
103
  end
104
104
 
105
105
  def test_lex_single_2_byte_char
106
- tokens = RL.scan('ا+')
106
+ tokens = RL.lex('ا+')
107
107
  assert_equal( 2, tokens.length )
108
108
  end
109
109
 
110
110
  def test_lex_single_3_byte_char
111
- tokens = RL.scan('れ+')
111
+ tokens = RL.lex('れ+')
112
112
  assert_equal( 2, tokens.length )
113
113
  end
114
114
 
115
115
  def test_lex_single_4_byte_char
116
- tokens = RL.scan('𝄞+')
116
+ tokens = RL.lex('𝄞+')
117
117
  assert_equal( 2, tokens.length )
118
118
  end
119
119