regexp_parser 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +57 -0
  3. data/Gemfile +8 -0
  4. data/LICENSE +1 -1
  5. data/README.md +225 -206
  6. data/Rakefile +9 -3
  7. data/lib/regexp_parser.rb +7 -11
  8. data/lib/regexp_parser/expression.rb +72 -14
  9. data/lib/regexp_parser/expression/classes/alternation.rb +3 -16
  10. data/lib/regexp_parser/expression/classes/conditional.rb +57 -0
  11. data/lib/regexp_parser/expression/classes/free_space.rb +17 -0
  12. data/lib/regexp_parser/expression/classes/keep.rb +7 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +28 -7
  14. data/lib/regexp_parser/expression/methods/strfregexp.rb +113 -0
  15. data/lib/regexp_parser/expression/methods/tests.rb +116 -0
  16. data/lib/regexp_parser/expression/methods/traverse.rb +63 -0
  17. data/lib/regexp_parser/expression/quantifier.rb +10 -0
  18. data/lib/regexp_parser/expression/sequence.rb +45 -0
  19. data/lib/regexp_parser/expression/subexpression.rb +29 -1
  20. data/lib/regexp_parser/lexer.rb +31 -8
  21. data/lib/regexp_parser/parser.rb +118 -45
  22. data/lib/regexp_parser/scanner.rb +1745 -1404
  23. data/lib/regexp_parser/scanner/property.rl +57 -3
  24. data/lib/regexp_parser/scanner/scanner.rl +161 -34
  25. data/lib/regexp_parser/syntax.rb +12 -2
  26. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +3 -3
  27. data/lib/regexp_parser/syntax/ruby/1.9.3.rb +2 -7
  28. data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -1
  29. data/lib/regexp_parser/syntax/ruby/2.1.4.rb +13 -0
  30. data/lib/regexp_parser/syntax/ruby/2.1.5.rb +13 -0
  31. data/lib/regexp_parser/syntax/ruby/2.1.rb +2 -2
  32. data/lib/regexp_parser/syntax/ruby/2.2.0.rb +16 -0
  33. data/lib/regexp_parser/syntax/ruby/2.2.rb +8 -0
  34. data/lib/regexp_parser/syntax/tokens.rb +19 -2
  35. data/lib/regexp_parser/syntax/tokens/conditional.rb +22 -0
  36. data/lib/regexp_parser/syntax/tokens/keep.rb +14 -0
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +45 -4
  38. data/lib/regexp_parser/token.rb +23 -8
  39. data/lib/regexp_parser/version.rb +5 -0
  40. data/regexp_parser.gemspec +35 -0
  41. data/test/expression/test_all.rb +6 -1
  42. data/test/expression/test_base.rb +19 -0
  43. data/test/expression/test_conditionals.rb +114 -0
  44. data/test/expression/test_free_space.rb +33 -0
  45. data/test/expression/test_set.rb +61 -0
  46. data/test/expression/test_strfregexp.rb +214 -0
  47. data/test/expression/test_subexpression.rb +24 -0
  48. data/test/expression/test_tests.rb +99 -0
  49. data/test/expression/test_to_h.rb +48 -0
  50. data/test/expression/test_to_s.rb +46 -0
  51. data/test/expression/test_traverse.rb +164 -0
  52. data/test/lexer/test_all.rb +16 -3
  53. data/test/lexer/test_conditionals.rb +101 -0
  54. data/test/lexer/test_keep.rb +24 -0
  55. data/test/lexer/test_literals.rb +51 -51
  56. data/test/lexer/test_nesting.rb +62 -62
  57. data/test/lexer/test_refcalls.rb +18 -20
  58. data/test/parser/test_all.rb +18 -3
  59. data/test/parser/test_alternation.rb +11 -14
  60. data/test/parser/test_conditionals.rb +148 -0
  61. data/test/parser/test_escapes.rb +29 -5
  62. data/test/parser/test_free_space.rb +139 -0
  63. data/test/parser/test_groups.rb +40 -0
  64. data/test/parser/test_keep.rb +21 -0
  65. data/test/scanner/test_all.rb +8 -2
  66. data/test/scanner/test_conditionals.rb +166 -0
  67. data/test/scanner/test_escapes.rb +8 -5
  68. data/test/scanner/test_free_space.rb +133 -0
  69. data/test/scanner/test_groups.rb +28 -0
  70. data/test/scanner/test_keep.rb +33 -0
  71. data/test/scanner/test_properties.rb +4 -0
  72. data/test/scanner/test_scripts.rb +71 -1
  73. data/test/syntax/ruby/test_1.9.3.rb +2 -2
  74. data/test/syntax/ruby/test_2.0.0.rb +38 -0
  75. data/test/syntax/ruby/test_2.2.0.rb +38 -0
  76. data/test/syntax/ruby/test_all.rb +1 -8
  77. data/test/syntax/ruby/test_files.rb +104 -0
  78. data/test/test_all.rb +2 -1
  79. data/test/token/test_all.rb +2 -0
  80. data/test/token/test_token.rb +109 -0
  81. metadata +75 -21
  82. data/VERSION.yml +0 -5
  83. data/lib/regexp_parser/ctype.rb +0 -48
  84. data/test/syntax/ruby/test_2.x.rb +0 -46
@@ -48,4 +48,50 @@ class ExpressionToS < Test::Unit::TestCase
48
48
  assert_equal( pattern, RP.parse(pattern).to_s )
49
49
  end
50
50
 
51
+ def test_expression_to_s_multiline_source
52
+ multiline = %r{
53
+ \A
54
+ a? # One letter
55
+ b{2,5} # Another one
56
+ [c-g]+ # A set
57
+ \z
58
+ }x
59
+
60
+ assert_equal( multiline.source, RP.parse(multiline).to_s )
61
+ end
62
+
63
+ def test_expression_to_s_multiline_to_s
64
+ multiline = %r{
65
+ \A
66
+ a? # One letter
67
+ b{2,5} # Another one
68
+ [c-g]+ # A set
69
+ \z
70
+ }x
71
+
72
+ assert_equal( multiline.to_s, RP.parse(multiline.to_s).to_s )
73
+ end
74
+
75
+ # Free spacing expressions that use spaces between quantifiers and their
76
+ # targets do not produce identical results due to the way quantifiers are
77
+ # applied to expressions (members, not nodes) and the merging of consecutive
78
+ # space nodes. This tests that they produce equivalent results.
79
+ def test_expression_to_s_multiline_equivalence
80
+ multiline = %r{
81
+ \A
82
+ a ? # One letter
83
+ b {2,5} # Another one
84
+ [c-g] + # A set
85
+ \z
86
+ }x
87
+
88
+ str = 'bbbcged'
89
+ root = RP.parse(multiline)
90
+
91
+ assert_equal(
92
+ multiline.match(str)[0],
93
+ Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]
94
+ )
95
+ end
96
+
51
97
  end
@@ -0,0 +1,164 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class SubexpressionTraverse < Test::Unit::TestCase
4
+
5
+ def test_subexpression_traverse
6
+ root = RP.parse(/a(b(c(d)))|g[hi]j|klmn/)
7
+
8
+ enters = 0
9
+ visits = 0
10
+ exits = 0
11
+
12
+ root.traverse {|event, exp, index|
13
+ enters += 1 if event == :enter
14
+ visits += 1 if event == :visit
15
+ exits += 1 if event == :exit
16
+ }
17
+
18
+ assert_equal( 7, enters )
19
+ assert_equal( exits, enters )
20
+
21
+ assert_equal( 8, visits )
22
+ end
23
+
24
+ def test_subexpression_traverse_include_self
25
+ root = RP.parse(/a(b(c(d)))|g[hi]j|klmn/)
26
+
27
+ enters = 0
28
+ visits = 0
29
+ exits = 0
30
+
31
+ root.traverse(true) {|event, exp, index|
32
+ enters += 1 if event == :enter
33
+ visits += 1 if event == :visit
34
+ exits += 1 if event == :exit
35
+ }
36
+
37
+ assert_equal( 8, enters )
38
+ assert_equal( exits, enters )
39
+
40
+ assert_equal( 8, visits )
41
+ end
42
+
43
+ def test_subexpression_walk_alias
44
+ root = RP.parse(/abc/)
45
+
46
+ assert_equal( true, root.respond_to?(:walk) )
47
+ end
48
+
49
+ def test_subexpression_each_expression
50
+ root = RP.parse(/a(?x:b(c))|g[h-k]/)
51
+
52
+ count = 0
53
+ root.each_expression {|exp, index|
54
+ count += 1
55
+ }
56
+
57
+ assert_equal( 10, count )
58
+ end
59
+
60
+ def test_subexpression_each_expression_include_self
61
+ root = RP.parse(/a(?x:b(c))|g[hi]/)
62
+
63
+ count = 0
64
+ root.each_expression(true) {|exp, index|
65
+ count += 1
66
+ }
67
+
68
+ assert_equal( 11, count )
69
+ end
70
+
71
+ def test_subexpression_each_expression_indices
72
+ root = RP.parse(/a(b)c/)
73
+
74
+ indices = []
75
+ root.each_expression {|exp, index| indices << index}
76
+
77
+ assert_equal( [0, 1, 0, 2], indices )
78
+ end
79
+
80
+ def test_subexpression_each_expression_indices_include_self
81
+ root = RP.parse(/a(b)c/)
82
+
83
+ indices = []
84
+ root.each_expression(true) {|exp, index| indices << index}
85
+
86
+ assert_equal( [0, 0, 1, 0, 2], indices )
87
+ end
88
+
89
+ def test_subexpression_map_without_block
90
+ root = RP.parse(/a(b([c-e]+))?/)
91
+
92
+ array = root.map
93
+
94
+ assert_equal( Array, array.class )
95
+ assert_equal( 5, array.length )
96
+
97
+ array.each do |item|
98
+ assert_equal( Array, item.class )
99
+ assert_equal( 2, item.length )
100
+ assert_equal( true, item.first.is_a?(Regexp::Expression::Base) )
101
+ assert_equal( true, item.last.is_a?(Fixnum) )
102
+ end
103
+ end
104
+
105
+ def test_subexpression_map_without_block_include_self
106
+ root = RP.parse(/a(b([c-e]+))?/)
107
+
108
+ array = root.map(true)
109
+
110
+ assert_equal( Array, array.class )
111
+ assert_equal( 6, array.length )
112
+ end
113
+
114
+ def test_subexpression_map_indices
115
+ root = RP.parse(/a(b([c-e]+))?f*g/)
116
+
117
+ indices = root.map {|exp, index| index}
118
+
119
+ assert_equal( [0, 1, 0, 1, 0, 2, 3], indices )
120
+ end
121
+
122
+ def test_subexpression_map_indices_include_self
123
+ root = RP.parse(/a(b([c-e]+))?f*g/)
124
+
125
+ indices = root.map(true) {|exp, index| index}
126
+
127
+ assert_equal( [0, 0, 1, 0, 1, 0, 2, 3], indices )
128
+ end
129
+
130
+ def test_subexpression_map_expressions
131
+ root = RP.parse(/a(b(c(d)))/)
132
+
133
+ levels = root.map {|exp, index|
134
+ [exp.level, exp.text] if exp.terminal?
135
+ }.compact
136
+
137
+ assert_equal(
138
+ [[0, 'a'], [1, 'b'], [2, 'c'], [3, 'd']],
139
+ levels
140
+ )
141
+ end
142
+
143
+ def test_subexpression_map_expressions_include_self
144
+ root = RP.parse(/a(b(c(d)))/)
145
+
146
+ levels = root.map(true) {|exp, index|
147
+ [exp.level, exp.to_s]
148
+ }.compact
149
+
150
+ assert_equal( [
151
+ [nil, 'a(b(c(d)))'],
152
+ [0, 'a'],
153
+ [0, '(b(c(d)))'],
154
+ [1, 'b'],
155
+ [1, '(c(d))'],
156
+ [2, 'c'],
157
+ [2, '(d)'],
158
+ [3, 'd']
159
+ ],
160
+ levels
161
+ )
162
+ end
163
+
164
+ end
@@ -6,21 +6,34 @@ require File.expand_path("../../helpers", __FILE__)
6
6
  require File.expand_path("../test_#{tc}", __FILE__)
7
7
  end
8
8
 
9
+ if RUBY_VERSION >= '2.0.0'
10
+ %w{conditionals keep}.each do|tc|
11
+ require File.expand_path("../test_#{tc}", __FILE__)
12
+ end
13
+ end
14
+
9
15
  class TestRegexpLexer < Test::Unit::TestCase
10
16
 
11
17
  def test_lexer_returns_an_array
12
- assert_instance_of( Array, RL.scan('abc'))
18
+ assert_instance_of( Array, RL.lex('abc'))
13
19
  end
14
20
 
15
21
  def test_lexer_returns_tokens
16
- tokens = RL.scan('^abc+[^one]{2,3}\b\d\\\C-C$')
22
+ tokens = RL.lex('^abc+[^one]{2,3}\b\d\\\C-C$')
17
23
  assert( tokens.all?{|token| token.kind_of?(Regexp::Token)},
18
24
  "Not all array members are tokens")
25
+
26
+ assert( tokens.all?{|token| token.to_a.length == 8},
27
+ "Not all tokens have a length of 8")
19
28
  end
20
29
 
21
30
  def test_lexer_token_count
22
- tokens = RL.scan(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
31
+ tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
23
32
  assert_equal( 26, tokens.length )
24
33
  end
25
34
 
35
+ def test_lexer_scan_alias
36
+ assert_equal( RL.lex(/a|b|c/), RL.scan(/a|b|c/) )
37
+ end
38
+
26
39
  end
@@ -0,0 +1,101 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class LexerConditionals < Test::Unit::TestCase
4
+
5
+ if RUBY_VERSION >= '2.0'
6
+
7
+ # Basic lexer output and nesting tests
8
+ tests = {
9
+ '(?<A>a)(?(<A>)b|c)' => [3, :conditional, :open, '(?', 7, 9, 0, 0, 0],
10
+ '(?<B>a)(?(<B>)b|c)' => [4, :conditional, :condition, '(<B>)', 9, 14, 0, 0, 1],
11
+ '(?<C>a)(?(<C>)b|c)' => [6, :conditional, :separator, '|', 15, 16, 0, 0, 1],
12
+ '(?<D>a)(?(<D>)b|c)' => [8, :conditional, :close, ')', 17, 18, 0, 0, 0],
13
+ }
14
+
15
+ count = 0
16
+ tests.each do |pattern, test|
17
+ define_method "test_lexer_#{test[1]}_#{test[2]}_#{count+=1}" do
18
+ tokens = RL.lex(pattern)
19
+ assert_equal( test[1,8], tokens[test[0]].to_a)
20
+ end
21
+ end
22
+
23
+ def test_lexer_conditional_mixed_nesting
24
+ regexp = /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/
25
+ tokens = RL.lex(regexp)
26
+
27
+ expected = [
28
+ [ 0, :group, :capture, '(', 0, 1, 0, 0, 0],
29
+ [ 1, :group, :named, '(?<A>', 1, 6, 1, 0, 0],
30
+
31
+ [ 5, :conditional, :open, '(?', 13, 15, 2, 0, 0],
32
+ [ 6, :conditional, :condition, '(<A>)', 15, 20, 2, 0, 1],
33
+ [ 8, :conditional, :separator, '|', 21, 22, 2, 0, 1],
34
+
35
+ [10, :conditional, :open, '(?', 23, 25, 3, 0, 1],
36
+ [11, :conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
37
+
38
+ [12, :set, :open, '[', 30, 31, 3, 0, 2],
39
+ [13, :set, :range, 'e-g', 31, 34, 3, 1, 2],
40
+ [14, :set, :close, ']', 34, 35, 3, 0, 2],
41
+
42
+ [15, :conditional, :separator, '|', 35, 36, 3, 0, 2],
43
+ [19, :conditional, :close, ')', 41, 42, 3, 0, 1],
44
+ [21, :conditional, :close, ')', 43, 44, 2, 0, 0],
45
+
46
+ [22, :group, :close, ')', 44, 45, 1, 0, 0],
47
+ [23, :group, :close, ')', 45, 46, 0, 0, 0]
48
+ ].each do |test|
49
+ assert_equal( test[1,8], tokens[test[0]].to_a)
50
+ end
51
+ end
52
+
53
+ def test_lexer_conditional_deep_nesting
54
+ regexp = /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/
55
+ tokens = RL.lex(regexp)
56
+
57
+ expected = [
58
+ [ 9, :conditional, :open, '(?', 9, 11, 0, 0, 0],
59
+ [10, :conditional, :condition, '(1)', 11, 14, 0, 0, 1],
60
+
61
+ [11, :conditional, :open, '(?', 14, 16, 0, 0, 1],
62
+ [12, :conditional, :condition, '(2)', 16, 19, 0, 0, 2],
63
+
64
+ [13, :conditional, :open, '(?', 19, 21, 0, 0, 2],
65
+ [14, :conditional, :condition, '(3)', 21, 24, 0, 0, 3],
66
+
67
+ [16, :conditional, :separator, '|', 25, 26, 0, 0, 3],
68
+
69
+ [18, :conditional, :close, ')', 27, 28, 0, 0, 2],
70
+ [19, :conditional, :close, ')', 28, 29, 0, 0, 1],
71
+
72
+ [20, :conditional, :separator, '|', 29, 30, 0, 0, 1],
73
+
74
+ [21, :conditional, :open, '(?', 30, 32, 0, 0, 1],
75
+ [22, :conditional, :condition, '(3)', 32, 35, 0, 0, 2],
76
+
77
+ [23, :conditional, :open, '(?', 35, 37, 0, 0, 2],
78
+ [24, :conditional, :condition, '(2)', 37, 40, 0, 0, 3],
79
+
80
+ [26, :conditional, :separator, '|', 41, 42, 0, 0, 3],
81
+
82
+ [28, :conditional, :close, ')', 43, 44, 0, 0, 2],
83
+
84
+ [29, :conditional, :separator, '|', 44, 45, 0, 0, 2],
85
+
86
+ [30, :conditional, :open, '(?', 45, 47, 0, 0, 2],
87
+ [31, :conditional, :condition, '(1)', 47, 50, 0, 0, 3],
88
+
89
+ [33, :conditional, :separator, '|', 51, 52, 0, 0, 3],
90
+
91
+ [35, :conditional, :close, ')', 53, 54, 0, 0, 2],
92
+ [36, :conditional, :close, ')', 54, 55, 0, 0, 1],
93
+ [37, :conditional, :close, ')', 55, 56, 0, 0, 0]
94
+ ].each do |test|
95
+ assert_equal( test[1,8], tokens[test[0]].to_a)
96
+ end
97
+ end
98
+
99
+ end
100
+
101
+ end
@@ -0,0 +1,24 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class LexerKeep < Test::Unit::TestCase
4
+
5
+ def test_lex_keep_token
6
+ regexp = /ab\Kcd/
7
+ tokens = RL.lex(regexp)
8
+
9
+ assert_equal( :keep, tokens[1].type )
10
+ assert_equal( :mark, tokens[1].token )
11
+ end
12
+
13
+ def test_lex_keep_nested
14
+ regexp = /(a\Kb)|(c\\\Kd)ef/
15
+ tokens = RL.lex(regexp)
16
+
17
+ assert_equal( :keep, tokens[2].type )
18
+ assert_equal( :mark, tokens[2].token )
19
+
20
+ assert_equal( :keep, tokens[9].type )
21
+ assert_equal( :mark, tokens[9].token )
22
+ end
23
+
24
+ end
@@ -7,86 +7,86 @@ class LexerLiterals < Test::Unit::TestCase
7
7
  tests = {
8
8
  # ascii, single byte characters
9
9
  'a' => {
10
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0],
10
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
11
11
  },
12
12
 
13
13
  'ab+' => {
14
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0],
15
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0],
16
- 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0],
14
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
15
+ 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
16
+ 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0],
17
17
  },
18
18
 
19
19
 
20
20
  # 2 byte wide characters, Arabic
21
21
  'ا' => {
22
- 0 => [:literal, :literal, 'ا', 0, 2, 0, 0],
22
+ 0 => [:literal, :literal, 'ا', 0, 2, 0, 0, 0],
23
23
  },
24
24
 
25
25
  'aاbبcت' => {
26
- 0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0],
26
+ 0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0],
27
27
  },
28
28
 
29
29
  'aاbبت?' => {
30
- 0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0],
31
- 1 => [:literal, :literal, 'ت', 6, 8, 0, 0],
32
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0],
30
+ 0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0, 0],
31
+ 1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
32
+ 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
33
33
  },
34
34
 
35
35
  'aا?bبcت+' => {
36
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0],
37
- 1 => [:literal, :literal, 'ا', 1, 3, 0, 0],
38
- 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0],
39
- 3 => [:literal, :literal, 'bبc', 4, 8, 0, 0],
40
- 4 => [:literal, :literal, 'ت', 8, 10, 0, 0],
41
- 5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0],
36
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
37
+ 1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
38
+ 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
39
+ 3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
40
+ 4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
41
+ 5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0],
42
42
  },
43
43
 
44
44
  'a(اbب+)cت?' => {
45
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0],
46
- 1 => [:group, :capture, '(', 1, 2, 0, 0],
47
- 2 => [:literal, :literal, 'اb', 2, 5, 1, 0],
48
- 3 => [:literal, :literal, 'ب', 5, 7, 1, 0],
49
- 4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0],
50
- 5 => [:group, :close, ')', 8, 9, 0, 0],
51
- 6 => [:literal, :literal, 'c', 9, 10, 0, 0],
52
- 7 => [:literal, :literal, 'ت', 10, 12, 0, 0],
53
- 8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0],
45
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
46
+ 1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
47
+ 2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
48
+ 3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
49
+ 4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
50
+ 5 => [:group, :close, ')', 8, 9, 0, 0, 0],
51
+ 6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
52
+ 7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
53
+ 8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0],
54
54
  },
55
55
 
56
56
 
57
57
  # 3 byte wide characters, Japanese
58
58
  'ab?れます+cd' => {
59
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0],
60
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0],
61
- 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0],
62
- 3 => [:literal, :literal, 'れま', 3, 9, 0, 0],
63
- 4 => [:literal, :literal, 'す', 9, 12, 0, 0],
64
- 5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0],
65
- 6 => [:literal, :literal, 'cd', 13, 15, 0, 0],
59
+ 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
60
+ 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
61
+ 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
62
+ 3 => [:literal, :literal, 'れま', 3, 9, 0, 0, 0],
63
+ 4 => [:literal, :literal, 'す', 9, 12, 0, 0, 0],
64
+ 5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0, 0],
65
+ 6 => [:literal, :literal, 'cd', 13, 15, 0, 0, 0],
66
66
  },
67
67
 
68
68
 
69
69
  # 4 byte wide characters, Osmanya
70
70
  '𐒀𐒁?𐒂ab+𐒃' => {
71
- 0 => [:literal, :literal, '𐒀', 0, 4, 0, 0],
72
- 1 => [:literal, :literal, '𐒁', 4, 8, 0, 0],
73
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0],
74
- 3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0],
75
- 4 => [:literal, :literal, 'b', 14, 15, 0, 0],
76
- 5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0],
77
- 6 => [:literal, :literal, '𐒃', 16, 20, 0, 0],
71
+ 0 => [:literal, :literal, '𐒀', 0, 4, 0, 0, 0],
72
+ 1 => [:literal, :literal, '𐒁', 4, 8, 0, 0, 0],
73
+ 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
74
+ 3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0, 0],
75
+ 4 => [:literal, :literal, 'b', 14, 15, 0, 0, 0],
76
+ 5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
77
+ 6 => [:literal, :literal, '𐒃', 16, 20, 0, 0, 0],
78
78
  },
79
79
 
80
80
  'mu𝄞?si*𝄫c+' => {
81
- 0 => [:literal, :literal, 'mu', 0, 2, 0, 0],
82
- 1 => [:literal, :literal, '𝄞', 2, 6, 0, 0],
83
- 2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0],
84
- 3 => [:literal, :literal, 's', 7, 8, 0, 0],
85
- 4 => [:literal, :literal, 'i', 8, 9, 0, 0],
86
- 5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0],
87
- 6 => [:literal, :literal, '𝄫', 10, 14, 0, 0],
88
- 7 => [:literal, :literal, 'c', 14, 15, 0, 0],
89
- 8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0],
81
+ 0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
82
+ 1 => [:literal, :literal, '𝄞', 2, 6, 0, 0, 0],
83
+ 2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0, 0],
84
+ 3 => [:literal, :literal, 's', 7, 8, 0, 0, 0],
85
+ 4 => [:literal, :literal, 'i', 8, 9, 0, 0, 0],
86
+ 5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0, 0],
87
+ 6 => [:literal, :literal, '𝄫', 10, 14, 0, 0, 0],
88
+ 7 => [:literal, :literal, 'c', 14, 15, 0, 0, 0],
89
+ 8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
90
90
  },
91
91
  }
92
92
 
@@ -94,7 +94,7 @@ class LexerLiterals < Test::Unit::TestCase
94
94
  tests.each do |pattern, checks|
95
95
  define_method "test_lex_literal_runs_#{count+=1}" do
96
96
 
97
- tokens = RL.scan(pattern)
97
+ tokens = RL.lex(pattern)
98
98
  checks.each do |offset, token|
99
99
  assert_equal( token, tokens[offset].to_a )
100
100
  end
@@ -103,17 +103,17 @@ class LexerLiterals < Test::Unit::TestCase
103
103
  end
104
104
 
105
105
  def test_lex_single_2_byte_char
106
- tokens = RL.scan('ا+')
106
+ tokens = RL.lex('ا+')
107
107
  assert_equal( 2, tokens.length )
108
108
  end
109
109
 
110
110
  def test_lex_single_3_byte_char
111
- tokens = RL.scan('れ+')
111
+ tokens = RL.lex('れ+')
112
112
  assert_equal( 2, tokens.length )
113
113
  end
114
114
 
115
115
  def test_lex_single_4_byte_char
116
- tokens = RL.scan('𝄞+')
116
+ tokens = RL.lex('𝄞+')
117
117
  assert_equal( 2, tokens.length )
118
118
  end
119
119