regexp_parser 1.4.0 → 1.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (171) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +66 -1
  3. data/Gemfile +3 -3
  4. data/README.md +11 -18
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +28 -53
  7. data/lib/regexp_parser/expression/classes/backref.rb +18 -10
  8. data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
  9. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  10. data/lib/regexp_parser/expression/classes/group.rb +4 -2
  11. data/lib/regexp_parser/expression/classes/keep.rb +1 -3
  12. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  13. data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
  14. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  15. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  16. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  17. data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
  18. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  19. data/lib/regexp_parser/expression/sequence.rb +3 -6
  20. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  21. data/lib/regexp_parser/expression/subexpression.rb +3 -5
  22. data/lib/regexp_parser/lexer.rb +30 -44
  23. data/lib/regexp_parser/parser.rb +47 -24
  24. data/lib/regexp_parser/scanner.rb +1228 -1367
  25. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  26. data/lib/regexp_parser/scanner/properties/long.yml +15 -1
  27. data/lib/regexp_parser/scanner/properties/short.yml +5 -0
  28. data/lib/regexp_parser/scanner/scanner.rl +101 -194
  29. data/lib/regexp_parser/syntax/tokens.rb +2 -10
  30. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +30 -0
  31. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  32. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  33. data/lib/regexp_parser/version.rb +1 -1
  34. data/regexp_parser.gemspec +2 -2
  35. data/spec/expression/base_spec.rb +94 -0
  36. data/spec/expression/clone_spec.rb +120 -0
  37. data/spec/expression/conditional_spec.rb +89 -0
  38. data/spec/expression/free_space_spec.rb +27 -0
  39. data/spec/expression/methods/match_length_spec.rb +161 -0
  40. data/spec/expression/methods/match_spec.rb +25 -0
  41. data/spec/expression/methods/strfregexp_spec.rb +224 -0
  42. data/spec/expression/methods/tests_spec.rb +99 -0
  43. data/spec/expression/methods/traverse_spec.rb +161 -0
  44. data/spec/expression/options_spec.rb +128 -0
  45. data/spec/expression/root_spec.rb +9 -0
  46. data/spec/expression/sequence_spec.rb +9 -0
  47. data/spec/expression/subexpression_spec.rb +50 -0
  48. data/spec/expression/to_h_spec.rb +26 -0
  49. data/spec/expression/to_s_spec.rb +100 -0
  50. data/spec/lexer/all_spec.rb +22 -0
  51. data/spec/lexer/conditionals_spec.rb +53 -0
  52. data/spec/lexer/delimiters_spec.rb +68 -0
  53. data/spec/lexer/escapes_spec.rb +14 -0
  54. data/spec/lexer/keep_spec.rb +10 -0
  55. data/spec/lexer/literals_spec.rb +89 -0
  56. data/spec/lexer/nesting_spec.rb +99 -0
  57. data/spec/lexer/refcalls_spec.rb +55 -0
  58. data/spec/parser/all_spec.rb +43 -0
  59. data/spec/parser/alternation_spec.rb +88 -0
  60. data/spec/parser/anchors_spec.rb +17 -0
  61. data/spec/parser/conditionals_spec.rb +179 -0
  62. data/spec/parser/errors_spec.rb +30 -0
  63. data/spec/parser/escapes_spec.rb +121 -0
  64. data/spec/parser/free_space_spec.rb +130 -0
  65. data/spec/parser/groups_spec.rb +108 -0
  66. data/spec/parser/keep_spec.rb +6 -0
  67. data/spec/parser/posix_classes_spec.rb +8 -0
  68. data/spec/parser/properties_spec.rb +115 -0
  69. data/spec/parser/quantifiers_spec.rb +52 -0
  70. data/spec/parser/refcalls_spec.rb +112 -0
  71. data/spec/parser/set/intersections_spec.rb +127 -0
  72. data/spec/parser/set/ranges_spec.rb +111 -0
  73. data/spec/parser/sets_spec.rb +178 -0
  74. data/spec/parser/types_spec.rb +18 -0
  75. data/spec/scanner/all_spec.rb +18 -0
  76. data/spec/scanner/anchors_spec.rb +21 -0
  77. data/spec/scanner/conditionals_spec.rb +128 -0
  78. data/spec/scanner/delimiters_spec.rb +52 -0
  79. data/spec/scanner/errors_spec.rb +67 -0
  80. data/spec/scanner/escapes_spec.rb +53 -0
  81. data/spec/scanner/free_space_spec.rb +133 -0
  82. data/spec/scanner/groups_spec.rb +52 -0
  83. data/spec/scanner/keep_spec.rb +10 -0
  84. data/spec/scanner/literals_spec.rb +49 -0
  85. data/spec/scanner/meta_spec.rb +18 -0
  86. data/spec/scanner/properties_spec.rb +64 -0
  87. data/spec/scanner/quantifiers_spec.rb +20 -0
  88. data/spec/scanner/refcalls_spec.rb +36 -0
  89. data/spec/scanner/sets_spec.rb +102 -0
  90. data/spec/scanner/types_spec.rb +14 -0
  91. data/spec/spec_helper.rb +15 -0
  92. data/{test → spec}/support/runner.rb +9 -8
  93. data/spec/support/shared_examples.rb +77 -0
  94. data/{test → spec}/support/warning_extractor.rb +5 -7
  95. data/spec/syntax/syntax_spec.rb +48 -0
  96. data/spec/syntax/syntax_token_map_spec.rb +23 -0
  97. data/spec/syntax/versions/1.8.6_spec.rb +17 -0
  98. data/spec/syntax/versions/1.9.1_spec.rb +10 -0
  99. data/spec/syntax/versions/1.9.3_spec.rb +9 -0
  100. data/spec/syntax/versions/2.0.0_spec.rb +13 -0
  101. data/spec/syntax/versions/2.2.0_spec.rb +9 -0
  102. data/spec/syntax/versions/aliases_spec.rb +37 -0
  103. data/spec/token/token_spec.rb +85 -0
  104. metadata +149 -144
  105. data/test/expression/test_all.rb +0 -12
  106. data/test/expression/test_base.rb +0 -90
  107. data/test/expression/test_clone.rb +0 -89
  108. data/test/expression/test_conditionals.rb +0 -113
  109. data/test/expression/test_free_space.rb +0 -35
  110. data/test/expression/test_set.rb +0 -84
  111. data/test/expression/test_strfregexp.rb +0 -230
  112. data/test/expression/test_subexpression.rb +0 -58
  113. data/test/expression/test_tests.rb +0 -99
  114. data/test/expression/test_to_h.rb +0 -59
  115. data/test/expression/test_to_s.rb +0 -104
  116. data/test/expression/test_traverse.rb +0 -161
  117. data/test/helpers.rb +0 -10
  118. data/test/lexer/test_all.rb +0 -41
  119. data/test/lexer/test_conditionals.rb +0 -127
  120. data/test/lexer/test_keep.rb +0 -24
  121. data/test/lexer/test_literals.rb +0 -130
  122. data/test/lexer/test_nesting.rb +0 -132
  123. data/test/lexer/test_refcalls.rb +0 -56
  124. data/test/parser/set/test_intersections.rb +0 -127
  125. data/test/parser/set/test_ranges.rb +0 -111
  126. data/test/parser/test_all.rb +0 -64
  127. data/test/parser/test_alternation.rb +0 -92
  128. data/test/parser/test_anchors.rb +0 -34
  129. data/test/parser/test_conditionals.rb +0 -187
  130. data/test/parser/test_errors.rb +0 -63
  131. data/test/parser/test_escapes.rb +0 -134
  132. data/test/parser/test_free_space.rb +0 -139
  133. data/test/parser/test_groups.rb +0 -289
  134. data/test/parser/test_keep.rb +0 -21
  135. data/test/parser/test_posix_classes.rb +0 -27
  136. data/test/parser/test_properties.rb +0 -134
  137. data/test/parser/test_quantifiers.rb +0 -301
  138. data/test/parser/test_refcalls.rb +0 -186
  139. data/test/parser/test_sets.rb +0 -179
  140. data/test/parser/test_types.rb +0 -50
  141. data/test/scanner/test_all.rb +0 -38
  142. data/test/scanner/test_anchors.rb +0 -38
  143. data/test/scanner/test_conditionals.rb +0 -184
  144. data/test/scanner/test_errors.rb +0 -91
  145. data/test/scanner/test_escapes.rb +0 -56
  146. data/test/scanner/test_free_space.rb +0 -200
  147. data/test/scanner/test_groups.rb +0 -79
  148. data/test/scanner/test_keep.rb +0 -35
  149. data/test/scanner/test_literals.rb +0 -89
  150. data/test/scanner/test_meta.rb +0 -40
  151. data/test/scanner/test_properties.rb +0 -312
  152. data/test/scanner/test_quantifiers.rb +0 -37
  153. data/test/scanner/test_refcalls.rb +0 -52
  154. data/test/scanner/test_scripts.rb +0 -53
  155. data/test/scanner/test_sets.rb +0 -119
  156. data/test/scanner/test_types.rb +0 -35
  157. data/test/scanner/test_unicode_blocks.rb +0 -30
  158. data/test/support/disable_autotest.rb +0 -8
  159. data/test/syntax/test_all.rb +0 -6
  160. data/test/syntax/test_syntax.rb +0 -61
  161. data/test/syntax/test_syntax_token_map.rb +0 -25
  162. data/test/syntax/versions/test_1.8.rb +0 -55
  163. data/test/syntax/versions/test_1.9.1.rb +0 -36
  164. data/test/syntax/versions/test_1.9.3.rb +0 -32
  165. data/test/syntax/versions/test_2.0.0.rb +0 -37
  166. data/test/syntax/versions/test_2.2.0.rb +0 -32
  167. data/test/syntax/versions/test_aliases.rb +0 -129
  168. data/test/syntax/versions/test_all.rb +0 -5
  169. data/test/test_all.rb +0 -5
  170. data/test/token/test_all.rb +0 -2
  171. data/test/token/test_token.rb +0 -107
@@ -1,10 +0,0 @@
1
- require "test/unit"
2
- require File.expand_path("../../lib/regexp_parser", __FILE__)
3
- require 'regexp_property_values'
4
-
5
- RS = Regexp::Scanner
6
- RL = Regexp::Lexer
7
- RP = Regexp::Parser
8
- RE = Regexp::Expression
9
-
10
- include Regexp::Expression
@@ -1,41 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- %w{
4
- literals nesting refcalls
5
- }.each do|tc|
6
- require File.expand_path("../test_#{tc}", __FILE__)
7
- end
8
-
9
- if RUBY_VERSION >= '2.0.0'
10
- %w{conditionals keep}.each do|tc|
11
- require File.expand_path("../test_#{tc}", __FILE__)
12
- end
13
- end
14
-
15
- class TestRegexpLexer < Test::Unit::TestCase
16
-
17
- def test_lexer_returns_an_array
18
- assert_instance_of Array, RL.lex('abc')
19
- end
20
-
21
- def test_lexer_returns_tokens
22
- tokens = RL.lex('^abc+[^one]{2,3}\b\d\\\C-C$')
23
-
24
- assert tokens.all?{ |token| token.kind_of?(Regexp::Token) },
25
- "Not all array members are tokens"
26
-
27
- assert tokens.all?{ |token| token.to_a.length == 8 },
28
- "Not all tokens have a length of 8"
29
- end
30
-
31
- def test_lexer_token_count
32
- tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
33
-
34
- assert_equal 28, tokens.length
35
- end
36
-
37
- def test_lexer_scan_alias
38
- assert_equal RL.lex(/a|b|c/), RL.scan(/a|b|c/)
39
- end
40
-
41
- end
@@ -1,127 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class LexerConditionals < Test::Unit::TestCase
4
- if RUBY_VERSION >= '2.0'
5
-
6
- # Basic lexer output and nesting tests
7
- tests = {
8
- '(?<A>a)(?(<A>)b|c)' => [3, :conditional, :open, '(?', 7, 9, 0, 0, 0],
9
- '(?<B>a)(?(<B>)b|c)' => [4, :conditional, :condition, '(<B>)', 9, 14, 0, 0, 1],
10
- '(?<C>a)(?(<C>)b|c)' => [6, :conditional, :separator, '|', 15, 16, 0, 0, 1],
11
- '(?<D>a)(?(<D>)b|c)' => [8, :conditional, :close, ')', 17, 18, 0, 0, 0],
12
- }
13
-
14
- tests.each_with_index do |(pattern, (index, type, token, text, ts, te, level, set_level, conditional_level)), count|
15
- define_method "test_lexer_#{type}_#{token}_#{count}" do
16
- tokens = RL.lex(pattern)
17
- struct = tokens.at(index)
18
-
19
- assert_equal type, struct.type
20
- assert_equal token, struct.token
21
- assert_equal text, struct.text
22
- assert_equal ts, struct.ts
23
- assert_equal te, struct.te
24
- assert_equal level, struct.level
25
- assert_equal set_level, struct.set_level
26
- assert_equal conditional_level, struct.conditional_level
27
- end
28
- end
29
-
30
- def test_lexer_conditional_mixed_nesting
31
- regexp = /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/
32
- tokens = RL.lex(regexp)
33
-
34
- [
35
- [ 0, :group, :capture, '(', 0, 1, 0, 0, 0],
36
- [ 1, :group, :named, '(?<A>', 1, 6, 1, 0, 0],
37
-
38
- [ 5, :conditional, :open, '(?', 13, 15, 2, 0, 0],
39
- [ 6, :conditional, :condition, '(<A>)', 15, 20, 2, 0, 1],
40
- [ 8, :conditional, :separator, '|', 21, 22, 2, 0, 1],
41
-
42
- [10, :conditional, :open, '(?', 23, 25, 3, 0, 1],
43
- [11, :conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
44
-
45
- [12, :set, :open, '[', 30, 31, 3, 0, 2],
46
- [13, :literal, :literal, 'e', 31, 32, 3, 1, 2],
47
- [14, :set, :range, '-', 32, 33, 3, 1, 2],
48
- [15, :literal, :literal, 'g', 33, 34, 3, 1, 2],
49
- [16, :set, :close, ']', 34, 35, 3, 0, 2],
50
-
51
- [17, :conditional, :separator, '|', 35, 36, 3, 0, 2],
52
- [23, :conditional, :close, ')', 41, 42, 3, 0, 1],
53
- [25, :conditional, :close, ')', 43, 44, 2, 0, 0],
54
-
55
- [26, :group, :close, ')', 44, 45, 1, 0, 0],
56
- [27, :group, :close, ')', 45, 46, 0, 0, 0]
57
- ].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
58
- struct = tokens.at(index)
59
-
60
- assert_equal type, struct.type
61
- assert_equal token, struct.token
62
- assert_equal text, struct.text
63
- assert_equal ts, struct.ts
64
- assert_equal te, struct.te
65
- assert_equal level, struct.level
66
- assert_equal set_level, struct.set_level
67
- assert_equal conditional_level, struct.conditional_level
68
- end
69
- end
70
-
71
- def test_lexer_conditional_deep_nesting
72
- regexp = /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/
73
- tokens = RL.lex(regexp)
74
-
75
- [
76
- [ 9, :conditional, :open, '(?', 9, 11, 0, 0, 0],
77
- [10, :conditional, :condition, '(1)', 11, 14, 0, 0, 1],
78
-
79
- [11, :conditional, :open, '(?', 14, 16, 0, 0, 1],
80
- [12, :conditional, :condition, '(2)', 16, 19, 0, 0, 2],
81
-
82
- [13, :conditional, :open, '(?', 19, 21, 0, 0, 2],
83
- [14, :conditional, :condition, '(3)', 21, 24, 0, 0, 3],
84
-
85
- [16, :conditional, :separator, '|', 25, 26, 0, 0, 3],
86
-
87
- [18, :conditional, :close, ')', 27, 28, 0, 0, 2],
88
- [19, :conditional, :close, ')', 28, 29, 0, 0, 1],
89
-
90
- [20, :conditional, :separator, '|', 29, 30, 0, 0, 1],
91
-
92
- [21, :conditional, :open, '(?', 30, 32, 0, 0, 1],
93
- [22, :conditional, :condition, '(3)', 32, 35, 0, 0, 2],
94
-
95
- [23, :conditional, :open, '(?', 35, 37, 0, 0, 2],
96
- [24, :conditional, :condition, '(2)', 37, 40, 0, 0, 3],
97
-
98
- [26, :conditional, :separator, '|', 41, 42, 0, 0, 3],
99
-
100
- [28, :conditional, :close, ')', 43, 44, 0, 0, 2],
101
-
102
- [29, :conditional, :separator, '|', 44, 45, 0, 0, 2],
103
-
104
- [30, :conditional, :open, '(?', 45, 47, 0, 0, 2],
105
- [31, :conditional, :condition, '(1)', 47, 50, 0, 0, 3],
106
-
107
- [33, :conditional, :separator, '|', 51, 52, 0, 0, 3],
108
-
109
- [35, :conditional, :close, ')', 53, 54, 0, 0, 2],
110
- [36, :conditional, :close, ')', 54, 55, 0, 0, 1],
111
- [37, :conditional, :close, ')', 55, 56, 0, 0, 0]
112
- ].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
113
- struct = tokens.at(index)
114
-
115
- assert_equal type, struct.type
116
- assert_equal token, struct.token
117
- assert_equal text, struct.text
118
- assert_equal ts, struct.ts
119
- assert_equal te, struct.te
120
- assert_equal level, struct.level
121
- assert_equal set_level, struct.set_level
122
- assert_equal conditional_level, struct.conditional_level
123
- end
124
- end
125
-
126
- end # if RUBY_VERSION >= '2.0'
127
- end
@@ -1,24 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class LexerKeep < Test::Unit::TestCase
4
-
5
- def test_lex_keep_token
6
- regexp = /ab\Kcd/
7
- tokens = RL.lex(regexp)
8
-
9
- assert_equal :keep, tokens[1].type
10
- assert_equal :mark, tokens[1].token
11
- end
12
-
13
- def test_lex_keep_nested
14
- regexp = /(a\Kb)|(c\\\Kd)ef/
15
- tokens = RL.lex(regexp)
16
-
17
- assert_equal :keep, tokens[2].type
18
- assert_equal :mark, tokens[2].token
19
-
20
- assert_equal :keep, tokens[9].type
21
- assert_equal :mark, tokens[9].token
22
- end
23
-
24
- end
@@ -1,130 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- require File.expand_path("../../helpers", __FILE__)
4
-
5
- class LexerLiterals < Test::Unit::TestCase
6
-
7
- tests = {
8
- # ascii, single byte characters
9
- 'a' => {
10
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
11
- },
12
-
13
- 'ab+' => {
14
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
15
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
16
- 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0],
17
- },
18
-
19
-
20
- # 2 byte wide characters, Arabic
21
- 'ا' => {
22
- 0 => [:literal, :literal, 'ا', 0, 2, 0, 0, 0],
23
- },
24
-
25
- 'aاbبcت' => {
26
- 0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0],
27
- },
28
-
29
- 'aاbبت?' => {
30
- 0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0, 0],
31
- 1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
32
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
33
- },
34
-
35
- 'aا?bبcت+' => {
36
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
37
- 1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
38
- 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
39
- 3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
40
- 4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
41
- 5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0],
42
- },
43
-
44
- 'a(اbب+)cت?' => {
45
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
46
- 1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
47
- 2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
48
- 3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
49
- 4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
50
- 5 => [:group, :close, ')', 8, 9, 0, 0, 0],
51
- 6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
52
- 7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
53
- 8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0],
54
- },
55
-
56
-
57
- # 3 byte wide characters, Japanese
58
- 'ab?れます+cd' => {
59
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
60
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
61
- 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
62
- 3 => [:literal, :literal, 'れま', 3, 9, 0, 0, 0],
63
- 4 => [:literal, :literal, 'す', 9, 12, 0, 0, 0],
64
- 5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0, 0],
65
- 6 => [:literal, :literal, 'cd', 13, 15, 0, 0, 0],
66
- },
67
-
68
-
69
- # 4 byte wide characters, Osmanya
70
- '𐒀𐒁?𐒂ab+𐒃' => {
71
- 0 => [:literal, :literal, '𐒀', 0, 4, 0, 0, 0],
72
- 1 => [:literal, :literal, '𐒁', 4, 8, 0, 0, 0],
73
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
74
- 3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0, 0],
75
- 4 => [:literal, :literal, 'b', 14, 15, 0, 0, 0],
76
- 5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
77
- 6 => [:literal, :literal, '𐒃', 16, 20, 0, 0, 0],
78
- },
79
-
80
- 'mu𝄞?si*𝄫c+' => {
81
- 0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
82
- 1 => [:literal, :literal, '𝄞', 2, 6, 0, 0, 0],
83
- 2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0, 0],
84
- 3 => [:literal, :literal, 's', 7, 8, 0, 0, 0],
85
- 4 => [:literal, :literal, 'i', 8, 9, 0, 0, 0],
86
- 5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0, 0],
87
- 6 => [:literal, :literal, '𝄫', 10, 14, 0, 0, 0],
88
- 7 => [:literal, :literal, 'c', 14, 15, 0, 0, 0],
89
- 8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
90
- },
91
- }
92
-
93
- tests.each_with_index do |(pattern, checks), count|
94
- define_method "test_lex_literal_runs_#{count}" do
95
- tokens = RL.lex(pattern)
96
-
97
- checks.each do |index, (type, token, text, ts, te, level, set_level, conditional_level)|
98
- struct = tokens.at(index)
99
-
100
- assert_equal type, struct.type
101
- assert_equal token, struct.token
102
- assert_equal text, struct.text
103
- assert_equal ts, struct.ts
104
- assert_equal te, struct.te
105
- assert_equal level, struct.level
106
- assert_equal set_level, struct.set_level
107
- assert_equal conditional_level, struct.conditional_level
108
- end
109
- end
110
- end
111
-
112
- def test_lex_single_2_byte_char
113
- tokens = RL.lex('ا+')
114
-
115
- assert_equal 2, tokens.length
116
- end
117
-
118
- def test_lex_single_3_byte_char
119
- tokens = RL.lex('れ+')
120
-
121
- assert_equal 2, tokens.length
122
- end
123
-
124
- def test_lex_single_4_byte_char
125
- tokens = RL.lex('𝄞+')
126
-
127
- assert_equal 2, tokens.length
128
- end
129
-
130
- end
@@ -1,132 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class LexerNesting < Test::Unit::TestCase
4
-
5
- tests = {
6
- '(((b)))' => {
7
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
8
- 1 => [:group, :capture, '(', 1, 2, 1, 0, 0],
9
- 2 => [:group, :capture, '(', 2, 3, 2, 0, 0],
10
- 3 => [:literal, :literal, 'b', 3, 4, 3, 0, 0],
11
- 4 => [:group, :close, ')', 4, 5, 2, 0, 0],
12
- 5 => [:group, :close, ')', 5, 6, 1, 0, 0],
13
- 6 => [:group, :close, ')', 6, 7, 0, 0, 0],
14
- },
15
-
16
- '(\((b)\))' => {
17
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
18
- 1 => [:escape, :group_open, '\(', 1, 3, 1, 0, 0],
19
- 2 => [:group, :capture, '(', 3, 4, 1, 0, 0],
20
- 3 => [:literal, :literal, 'b', 4, 5, 2, 0, 0],
21
- 4 => [:group, :close, ')', 5, 6, 1, 0, 0],
22
- 5 => [:escape, :group_close, '\)', 6, 8, 1, 0, 0],
23
- 6 => [:group, :close, ')', 8, 9, 0, 0, 0],
24
- },
25
-
26
- '(?>a(?>b(?>c)))' => {
27
- 0 => [:group, :atomic, '(?>', 0, 3, 0, 0, 0],
28
- 2 => [:group, :atomic, '(?>', 4, 7, 1, 0, 0],
29
- 4 => [:group, :atomic, '(?>', 8, 11, 2, 0, 0],
30
- 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
31
- 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
32
- 8 => [:group, :close, ')', 14, 15, 0, 0, 0],
33
- },
34
-
35
- '(?:a(?:b(?:c)))' => {
36
- 0 => [:group, :passive, '(?:', 0, 3, 0, 0, 0],
37
- 2 => [:group, :passive, '(?:', 4, 7, 1, 0, 0],
38
- 4 => [:group, :passive, '(?:', 8, 11, 2, 0, 0],
39
- 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
40
- 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
41
- 8 => [:group, :close, ')', 14, 15, 0, 0, 0],
42
- },
43
-
44
- '(?=a(?!b(?<=c(?<!d))))' => {
45
- 0 => [:assertion, :lookahead, '(?=', 0, 3, 0, 0, 0],
46
- 2 => [:assertion, :nlookahead, '(?!', 4, 7, 1, 0, 0],
47
- 4 => [:assertion, :lookbehind, '(?<=', 8, 12, 2, 0, 0],
48
- 6 => [:assertion, :nlookbehind, '(?<!', 13, 17, 3, 0, 0],
49
- 8 => [:group, :close, ')', 18, 19, 3, 0, 0],
50
- 9 => [:group, :close, ')', 19, 20, 2, 0, 0],
51
- 10 => [:group, :close, ')', 20, 21, 1, 0, 0],
52
- 11 => [:group, :close, ')', 21, 22, 0, 0, 0],
53
- },
54
-
55
- '((?#a)b(?#c)d(?#e))' => {
56
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
57
- 1 => [:group, :comment, '(?#a)', 1, 6, 1, 0, 0],
58
- 3 => [:group, :comment, '(?#c)', 7, 12, 1, 0, 0],
59
- 5 => [:group, :comment, '(?#e)', 13, 18, 1, 0, 0],
60
- 6 => [:group, :close, ')', 18, 19, 0, 0, 0],
61
- },
62
-
63
- 'a[b-e]f' => {
64
- 1 => [:set, :open, '[', 1, 2, 0, 0, 0],
65
- 2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
66
- 3 => [:set, :range, '-', 3, 4, 0, 1, 0],
67
- 4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
68
- 5 => [:set, :close, ']', 5, 6, 0, 0, 0],
69
- },
70
-
71
- '[[:word:]&&[^c]z]' => {
72
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
73
- 1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
74
- 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
75
- 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
76
- 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
77
- 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
78
- 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
79
- 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
80
- 8 => [:set, :close, ']', 16, 17, 0, 0, 0],
81
- },
82
-
83
- '[\p{word}&&[^c]z]' => {
84
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
85
- 1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
86
- 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
87
- 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
88
- 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
89
- 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
90
- 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
91
- 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
92
- 8 => [:set, :close, ']', 16, 17, 0, 0, 0],
93
- },
94
-
95
- '[a[b[c[d-g]]]]' => {
96
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
97
- 1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
98
- 2 => [:set, :open, '[', 2, 3, 0, 1, 0],
99
- 3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
100
- 4 => [:set, :open, '[', 4, 5, 0, 2, 0],
101
- 5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
102
- 6 => [:set, :open, '[', 6, 7, 0, 3, 0],
103
- 7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
104
- 8 => [:set, :range, '-', 8, 9, 0, 4, 0],
105
- 9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
106
- 10 => [:set, :close, ']', 10, 11, 0, 3, 0],
107
- 11 => [:set, :close, ']', 11, 12, 0, 2, 0],
108
- 12 => [:set, :close, ']', 12, 13, 0, 1, 0],
109
- 13 => [:set, :close, ']', 13, 14, 0, 0, 0],
110
- },
111
- }
112
-
113
- tests.each_with_index do |(pattern, checks), count|
114
- define_method "test_lex_nesting_in_'#{pattern}'_#{count}" do
115
- tokens = RL.lex(pattern, 'ruby/1.9')
116
-
117
- checks.each do |offset, (type, token, text, ts, te, level, set_level, conditional_level)|
118
- struct = tokens.at(offset)
119
-
120
- assert_equal type, struct.type
121
- assert_equal token, struct.token
122
- assert_equal text, struct.text
123
- assert_equal ts, struct.ts
124
- assert_equal te, struct.te
125
- assert_equal level, struct.level
126
- assert_equal set_level, struct.set_level
127
- assert_equal conditional_level, struct.conditional_level
128
- end
129
- end
130
- end
131
-
132
- end