regexp_parser 1.3.0 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +72 -1
  3. data/Gemfile +3 -3
  4. data/README.md +12 -19
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +28 -53
  7. data/lib/regexp_parser/expression/classes/backref.rb +18 -10
  8. data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
  9. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  10. data/lib/regexp_parser/expression/classes/group.rb +4 -2
  11. data/lib/regexp_parser/expression/classes/keep.rb +1 -3
  12. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  13. data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
  14. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  15. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  16. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  17. data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
  18. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  19. data/lib/regexp_parser/expression/sequence.rb +3 -6
  20. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  21. data/lib/regexp_parser/expression/subexpression.rb +3 -5
  22. data/lib/regexp_parser/lexer.rb +30 -44
  23. data/lib/regexp_parser/parser.rb +47 -24
  24. data/lib/regexp_parser/scanner.rb +1228 -1367
  25. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  26. data/lib/regexp_parser/scanner/properties/long.yml +34 -1
  27. data/lib/regexp_parser/scanner/properties/short.yml +12 -0
  28. data/lib/regexp_parser/scanner/scanner.rl +101 -194
  29. data/lib/regexp_parser/syntax/tokens.rb +2 -10
  30. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +72 -21
  31. data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
  32. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  33. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  34. data/lib/regexp_parser/version.rb +1 -1
  35. data/regexp_parser.gemspec +3 -3
  36. data/spec/expression/base_spec.rb +94 -0
  37. data/spec/expression/clone_spec.rb +120 -0
  38. data/spec/expression/conditional_spec.rb +89 -0
  39. data/spec/expression/free_space_spec.rb +27 -0
  40. data/spec/expression/methods/match_length_spec.rb +161 -0
  41. data/spec/expression/methods/match_spec.rb +25 -0
  42. data/spec/expression/methods/strfregexp_spec.rb +224 -0
  43. data/spec/expression/methods/tests_spec.rb +99 -0
  44. data/spec/expression/methods/traverse_spec.rb +161 -0
  45. data/spec/expression/options_spec.rb +128 -0
  46. data/spec/expression/root_spec.rb +9 -0
  47. data/spec/expression/sequence_spec.rb +9 -0
  48. data/spec/expression/subexpression_spec.rb +50 -0
  49. data/spec/expression/to_h_spec.rb +26 -0
  50. data/spec/expression/to_s_spec.rb +100 -0
  51. data/spec/lexer/all_spec.rb +22 -0
  52. data/spec/lexer/conditionals_spec.rb +53 -0
  53. data/spec/lexer/delimiters_spec.rb +68 -0
  54. data/spec/lexer/escapes_spec.rb +14 -0
  55. data/spec/lexer/keep_spec.rb +10 -0
  56. data/spec/lexer/literals_spec.rb +89 -0
  57. data/spec/lexer/nesting_spec.rb +99 -0
  58. data/spec/lexer/refcalls_spec.rb +55 -0
  59. data/spec/parser/all_spec.rb +43 -0
  60. data/spec/parser/alternation_spec.rb +88 -0
  61. data/spec/parser/anchors_spec.rb +17 -0
  62. data/spec/parser/conditionals_spec.rb +179 -0
  63. data/spec/parser/errors_spec.rb +30 -0
  64. data/spec/parser/escapes_spec.rb +121 -0
  65. data/spec/parser/free_space_spec.rb +130 -0
  66. data/spec/parser/groups_spec.rb +108 -0
  67. data/spec/parser/keep_spec.rb +6 -0
  68. data/spec/parser/posix_classes_spec.rb +8 -0
  69. data/spec/parser/properties_spec.rb +115 -0
  70. data/spec/parser/quantifiers_spec.rb +52 -0
  71. data/spec/parser/refcalls_spec.rb +112 -0
  72. data/spec/parser/set/intersections_spec.rb +127 -0
  73. data/spec/parser/set/ranges_spec.rb +111 -0
  74. data/spec/parser/sets_spec.rb +178 -0
  75. data/spec/parser/types_spec.rb +18 -0
  76. data/spec/scanner/all_spec.rb +18 -0
  77. data/spec/scanner/anchors_spec.rb +21 -0
  78. data/spec/scanner/conditionals_spec.rb +128 -0
  79. data/spec/scanner/delimiters_spec.rb +52 -0
  80. data/spec/scanner/errors_spec.rb +67 -0
  81. data/spec/scanner/escapes_spec.rb +53 -0
  82. data/spec/scanner/free_space_spec.rb +133 -0
  83. data/spec/scanner/groups_spec.rb +52 -0
  84. data/spec/scanner/keep_spec.rb +10 -0
  85. data/spec/scanner/literals_spec.rb +49 -0
  86. data/spec/scanner/meta_spec.rb +18 -0
  87. data/spec/scanner/properties_spec.rb +64 -0
  88. data/spec/scanner/quantifiers_spec.rb +20 -0
  89. data/spec/scanner/refcalls_spec.rb +36 -0
  90. data/spec/scanner/sets_spec.rb +102 -0
  91. data/spec/scanner/types_spec.rb +14 -0
  92. data/spec/spec_helper.rb +15 -0
  93. data/{test → spec}/support/runner.rb +9 -8
  94. data/spec/support/shared_examples.rb +77 -0
  95. data/{test → spec}/support/warning_extractor.rb +5 -7
  96. data/spec/syntax/syntax_spec.rb +48 -0
  97. data/spec/syntax/syntax_token_map_spec.rb +23 -0
  98. data/spec/syntax/versions/1.8.6_spec.rb +17 -0
  99. data/spec/syntax/versions/1.9.1_spec.rb +10 -0
  100. data/spec/syntax/versions/1.9.3_spec.rb +9 -0
  101. data/spec/syntax/versions/2.0.0_spec.rb +13 -0
  102. data/spec/syntax/versions/2.2.0_spec.rb +9 -0
  103. data/spec/syntax/versions/aliases_spec.rb +37 -0
  104. data/spec/token/token_spec.rb +85 -0
  105. metadata +151 -146
  106. data/test/expression/test_all.rb +0 -12
  107. data/test/expression/test_base.rb +0 -90
  108. data/test/expression/test_clone.rb +0 -89
  109. data/test/expression/test_conditionals.rb +0 -113
  110. data/test/expression/test_free_space.rb +0 -35
  111. data/test/expression/test_set.rb +0 -84
  112. data/test/expression/test_strfregexp.rb +0 -230
  113. data/test/expression/test_subexpression.rb +0 -58
  114. data/test/expression/test_tests.rb +0 -99
  115. data/test/expression/test_to_h.rb +0 -59
  116. data/test/expression/test_to_s.rb +0 -104
  117. data/test/expression/test_traverse.rb +0 -161
  118. data/test/helpers.rb +0 -10
  119. data/test/lexer/test_all.rb +0 -41
  120. data/test/lexer/test_conditionals.rb +0 -127
  121. data/test/lexer/test_keep.rb +0 -24
  122. data/test/lexer/test_literals.rb +0 -130
  123. data/test/lexer/test_nesting.rb +0 -132
  124. data/test/lexer/test_refcalls.rb +0 -56
  125. data/test/parser/set/test_intersections.rb +0 -127
  126. data/test/parser/set/test_ranges.rb +0 -111
  127. data/test/parser/test_all.rb +0 -64
  128. data/test/parser/test_alternation.rb +0 -92
  129. data/test/parser/test_anchors.rb +0 -34
  130. data/test/parser/test_conditionals.rb +0 -187
  131. data/test/parser/test_errors.rb +0 -63
  132. data/test/parser/test_escapes.rb +0 -134
  133. data/test/parser/test_free_space.rb +0 -139
  134. data/test/parser/test_groups.rb +0 -289
  135. data/test/parser/test_keep.rb +0 -21
  136. data/test/parser/test_posix_classes.rb +0 -27
  137. data/test/parser/test_properties.rb +0 -133
  138. data/test/parser/test_quantifiers.rb +0 -301
  139. data/test/parser/test_refcalls.rb +0 -186
  140. data/test/parser/test_sets.rb +0 -179
  141. data/test/parser/test_types.rb +0 -50
  142. data/test/scanner/test_all.rb +0 -38
  143. data/test/scanner/test_anchors.rb +0 -38
  144. data/test/scanner/test_conditionals.rb +0 -184
  145. data/test/scanner/test_errors.rb +0 -91
  146. data/test/scanner/test_escapes.rb +0 -56
  147. data/test/scanner/test_free_space.rb +0 -200
  148. data/test/scanner/test_groups.rb +0 -79
  149. data/test/scanner/test_keep.rb +0 -35
  150. data/test/scanner/test_literals.rb +0 -89
  151. data/test/scanner/test_meta.rb +0 -40
  152. data/test/scanner/test_properties.rb +0 -312
  153. data/test/scanner/test_quantifiers.rb +0 -37
  154. data/test/scanner/test_refcalls.rb +0 -52
  155. data/test/scanner/test_scripts.rb +0 -53
  156. data/test/scanner/test_sets.rb +0 -119
  157. data/test/scanner/test_types.rb +0 -35
  158. data/test/scanner/test_unicode_blocks.rb +0 -30
  159. data/test/support/disable_autotest.rb +0 -8
  160. data/test/syntax/test_all.rb +0 -6
  161. data/test/syntax/test_syntax.rb +0 -61
  162. data/test/syntax/test_syntax_token_map.rb +0 -25
  163. data/test/syntax/versions/test_1.8.rb +0 -55
  164. data/test/syntax/versions/test_1.9.1.rb +0 -36
  165. data/test/syntax/versions/test_1.9.3.rb +0 -32
  166. data/test/syntax/versions/test_2.0.0.rb +0 -37
  167. data/test/syntax/versions/test_2.2.0.rb +0 -32
  168. data/test/syntax/versions/test_aliases.rb +0 -129
  169. data/test/syntax/versions/test_all.rb +0 -5
  170. data/test/test_all.rb +0 -5
  171. data/test/token/test_all.rb +0 -2
  172. data/test/token/test_token.rb +0 -107
@@ -1,10 +0,0 @@
1
- require "test/unit"
2
- require File.expand_path("../../lib/regexp_parser", __FILE__)
3
- require 'regexp_property_values'
4
-
5
- RS = Regexp::Scanner
6
- RL = Regexp::Lexer
7
- RP = Regexp::Parser
8
- RE = Regexp::Expression
9
-
10
- include Regexp::Expression
@@ -1,41 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- %w{
4
- literals nesting refcalls
5
- }.each do|tc|
6
- require File.expand_path("../test_#{tc}", __FILE__)
7
- end
8
-
9
- if RUBY_VERSION >= '2.0.0'
10
- %w{conditionals keep}.each do|tc|
11
- require File.expand_path("../test_#{tc}", __FILE__)
12
- end
13
- end
14
-
15
- class TestRegexpLexer < Test::Unit::TestCase
16
-
17
- def test_lexer_returns_an_array
18
- assert_instance_of Array, RL.lex('abc')
19
- end
20
-
21
- def test_lexer_returns_tokens
22
- tokens = RL.lex('^abc+[^one]{2,3}\b\d\\\C-C$')
23
-
24
- assert tokens.all?{ |token| token.kind_of?(Regexp::Token) },
25
- "Not all array members are tokens"
26
-
27
- assert tokens.all?{ |token| token.to_a.length == 8 },
28
- "Not all tokens have a length of 8"
29
- end
30
-
31
- def test_lexer_token_count
32
- tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
33
-
34
- assert_equal 28, tokens.length
35
- end
36
-
37
- def test_lexer_scan_alias
38
- assert_equal RL.lex(/a|b|c/), RL.scan(/a|b|c/)
39
- end
40
-
41
- end
@@ -1,127 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class LexerConditionals < Test::Unit::TestCase
4
- if RUBY_VERSION >= '2.0'
5
-
6
- # Basic lexer output and nesting tests
7
- tests = {
8
- '(?<A>a)(?(<A>)b|c)' => [3, :conditional, :open, '(?', 7, 9, 0, 0, 0],
9
- '(?<B>a)(?(<B>)b|c)' => [4, :conditional, :condition, '(<B>)', 9, 14, 0, 0, 1],
10
- '(?<C>a)(?(<C>)b|c)' => [6, :conditional, :separator, '|', 15, 16, 0, 0, 1],
11
- '(?<D>a)(?(<D>)b|c)' => [8, :conditional, :close, ')', 17, 18, 0, 0, 0],
12
- }
13
-
14
- tests.each_with_index do |(pattern, (index, type, token, text, ts, te, level, set_level, conditional_level)), count|
15
- define_method "test_lexer_#{type}_#{token}_#{count}" do
16
- tokens = RL.lex(pattern)
17
- struct = tokens.at(index)
18
-
19
- assert_equal type, struct.type
20
- assert_equal token, struct.token
21
- assert_equal text, struct.text
22
- assert_equal ts, struct.ts
23
- assert_equal te, struct.te
24
- assert_equal level, struct.level
25
- assert_equal set_level, struct.set_level
26
- assert_equal conditional_level, struct.conditional_level
27
- end
28
- end
29
-
30
- def test_lexer_conditional_mixed_nesting
31
- regexp = /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/
32
- tokens = RL.lex(regexp)
33
-
34
- [
35
- [ 0, :group, :capture, '(', 0, 1, 0, 0, 0],
36
- [ 1, :group, :named, '(?<A>', 1, 6, 1, 0, 0],
37
-
38
- [ 5, :conditional, :open, '(?', 13, 15, 2, 0, 0],
39
- [ 6, :conditional, :condition, '(<A>)', 15, 20, 2, 0, 1],
40
- [ 8, :conditional, :separator, '|', 21, 22, 2, 0, 1],
41
-
42
- [10, :conditional, :open, '(?', 23, 25, 3, 0, 1],
43
- [11, :conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
44
-
45
- [12, :set, :open, '[', 30, 31, 3, 0, 2],
46
- [13, :literal, :literal, 'e', 31, 32, 3, 1, 2],
47
- [14, :set, :range, '-', 32, 33, 3, 1, 2],
48
- [15, :literal, :literal, 'g', 33, 34, 3, 1, 2],
49
- [16, :set, :close, ']', 34, 35, 3, 0, 2],
50
-
51
- [17, :conditional, :separator, '|', 35, 36, 3, 0, 2],
52
- [23, :conditional, :close, ')', 41, 42, 3, 0, 1],
53
- [25, :conditional, :close, ')', 43, 44, 2, 0, 0],
54
-
55
- [26, :group, :close, ')', 44, 45, 1, 0, 0],
56
- [27, :group, :close, ')', 45, 46, 0, 0, 0]
57
- ].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
58
- struct = tokens.at(index)
59
-
60
- assert_equal type, struct.type
61
- assert_equal token, struct.token
62
- assert_equal text, struct.text
63
- assert_equal ts, struct.ts
64
- assert_equal te, struct.te
65
- assert_equal level, struct.level
66
- assert_equal set_level, struct.set_level
67
- assert_equal conditional_level, struct.conditional_level
68
- end
69
- end
70
-
71
- def test_lexer_conditional_deep_nesting
72
- regexp = /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/
73
- tokens = RL.lex(regexp)
74
-
75
- [
76
- [ 9, :conditional, :open, '(?', 9, 11, 0, 0, 0],
77
- [10, :conditional, :condition, '(1)', 11, 14, 0, 0, 1],
78
-
79
- [11, :conditional, :open, '(?', 14, 16, 0, 0, 1],
80
- [12, :conditional, :condition, '(2)', 16, 19, 0, 0, 2],
81
-
82
- [13, :conditional, :open, '(?', 19, 21, 0, 0, 2],
83
- [14, :conditional, :condition, '(3)', 21, 24, 0, 0, 3],
84
-
85
- [16, :conditional, :separator, '|', 25, 26, 0, 0, 3],
86
-
87
- [18, :conditional, :close, ')', 27, 28, 0, 0, 2],
88
- [19, :conditional, :close, ')', 28, 29, 0, 0, 1],
89
-
90
- [20, :conditional, :separator, '|', 29, 30, 0, 0, 1],
91
-
92
- [21, :conditional, :open, '(?', 30, 32, 0, 0, 1],
93
- [22, :conditional, :condition, '(3)', 32, 35, 0, 0, 2],
94
-
95
- [23, :conditional, :open, '(?', 35, 37, 0, 0, 2],
96
- [24, :conditional, :condition, '(2)', 37, 40, 0, 0, 3],
97
-
98
- [26, :conditional, :separator, '|', 41, 42, 0, 0, 3],
99
-
100
- [28, :conditional, :close, ')', 43, 44, 0, 0, 2],
101
-
102
- [29, :conditional, :separator, '|', 44, 45, 0, 0, 2],
103
-
104
- [30, :conditional, :open, '(?', 45, 47, 0, 0, 2],
105
- [31, :conditional, :condition, '(1)', 47, 50, 0, 0, 3],
106
-
107
- [33, :conditional, :separator, '|', 51, 52, 0, 0, 3],
108
-
109
- [35, :conditional, :close, ')', 53, 54, 0, 0, 2],
110
- [36, :conditional, :close, ')', 54, 55, 0, 0, 1],
111
- [37, :conditional, :close, ')', 55, 56, 0, 0, 0]
112
- ].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
113
- struct = tokens.at(index)
114
-
115
- assert_equal type, struct.type
116
- assert_equal token, struct.token
117
- assert_equal text, struct.text
118
- assert_equal ts, struct.ts
119
- assert_equal te, struct.te
120
- assert_equal level, struct.level
121
- assert_equal set_level, struct.set_level
122
- assert_equal conditional_level, struct.conditional_level
123
- end
124
- end
125
-
126
- end # if RUBY_VERSION >= '2.0'
127
- end
@@ -1,24 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class LexerKeep < Test::Unit::TestCase
4
-
5
- def test_lex_keep_token
6
- regexp = /ab\Kcd/
7
- tokens = RL.lex(regexp)
8
-
9
- assert_equal :keep, tokens[1].type
10
- assert_equal :mark, tokens[1].token
11
- end
12
-
13
- def test_lex_keep_nested
14
- regexp = /(a\Kb)|(c\\\Kd)ef/
15
- tokens = RL.lex(regexp)
16
-
17
- assert_equal :keep, tokens[2].type
18
- assert_equal :mark, tokens[2].token
19
-
20
- assert_equal :keep, tokens[9].type
21
- assert_equal :mark, tokens[9].token
22
- end
23
-
24
- end
@@ -1,130 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- require File.expand_path("../../helpers", __FILE__)
4
-
5
- class LexerLiterals < Test::Unit::TestCase
6
-
7
- tests = {
8
- # ascii, single byte characters
9
- 'a' => {
10
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
11
- },
12
-
13
- 'ab+' => {
14
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
15
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
16
- 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0],
17
- },
18
-
19
-
20
- # 2 byte wide characters, Arabic
21
- 'ا' => {
22
- 0 => [:literal, :literal, 'ا', 0, 2, 0, 0, 0],
23
- },
24
-
25
- 'aاbبcت' => {
26
- 0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0],
27
- },
28
-
29
- 'aاbبت?' => {
30
- 0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0, 0],
31
- 1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
32
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
33
- },
34
-
35
- 'aا?bبcت+' => {
36
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
37
- 1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
38
- 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
39
- 3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
40
- 4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
41
- 5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0],
42
- },
43
-
44
- 'a(اbب+)cت?' => {
45
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
46
- 1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
47
- 2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
48
- 3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
49
- 4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
50
- 5 => [:group, :close, ')', 8, 9, 0, 0, 0],
51
- 6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
52
- 7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
53
- 8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0],
54
- },
55
-
56
-
57
- # 3 byte wide characters, Japanese
58
- 'ab?れます+cd' => {
59
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
60
- 1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
61
- 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
62
- 3 => [:literal, :literal, 'れま', 3, 9, 0, 0, 0],
63
- 4 => [:literal, :literal, 'す', 9, 12, 0, 0, 0],
64
- 5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0, 0],
65
- 6 => [:literal, :literal, 'cd', 13, 15, 0, 0, 0],
66
- },
67
-
68
-
69
- # 4 byte wide characters, Osmanya
70
- '𐒀𐒁?𐒂ab+𐒃' => {
71
- 0 => [:literal, :literal, '𐒀', 0, 4, 0, 0, 0],
72
- 1 => [:literal, :literal, '𐒁', 4, 8, 0, 0, 0],
73
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
74
- 3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0, 0],
75
- 4 => [:literal, :literal, 'b', 14, 15, 0, 0, 0],
76
- 5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
77
- 6 => [:literal, :literal, '𐒃', 16, 20, 0, 0, 0],
78
- },
79
-
80
- 'mu𝄞?si*𝄫c+' => {
81
- 0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
82
- 1 => [:literal, :literal, '𝄞', 2, 6, 0, 0, 0],
83
- 2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0, 0],
84
- 3 => [:literal, :literal, 's', 7, 8, 0, 0, 0],
85
- 4 => [:literal, :literal, 'i', 8, 9, 0, 0, 0],
86
- 5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0, 0],
87
- 6 => [:literal, :literal, '𝄫', 10, 14, 0, 0, 0],
88
- 7 => [:literal, :literal, 'c', 14, 15, 0, 0, 0],
89
- 8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
90
- },
91
- }
92
-
93
- tests.each_with_index do |(pattern, checks), count|
94
- define_method "test_lex_literal_runs_#{count}" do
95
- tokens = RL.lex(pattern)
96
-
97
- checks.each do |index, (type, token, text, ts, te, level, set_level, conditional_level)|
98
- struct = tokens.at(index)
99
-
100
- assert_equal type, struct.type
101
- assert_equal token, struct.token
102
- assert_equal text, struct.text
103
- assert_equal ts, struct.ts
104
- assert_equal te, struct.te
105
- assert_equal level, struct.level
106
- assert_equal set_level, struct.set_level
107
- assert_equal conditional_level, struct.conditional_level
108
- end
109
- end
110
- end
111
-
112
- def test_lex_single_2_byte_char
113
- tokens = RL.lex('ا+')
114
-
115
- assert_equal 2, tokens.length
116
- end
117
-
118
- def test_lex_single_3_byte_char
119
- tokens = RL.lex('れ+')
120
-
121
- assert_equal 2, tokens.length
122
- end
123
-
124
- def test_lex_single_4_byte_char
125
- tokens = RL.lex('𝄞+')
126
-
127
- assert_equal 2, tokens.length
128
- end
129
-
130
- end
@@ -1,132 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class LexerNesting < Test::Unit::TestCase
4
-
5
- tests = {
6
- '(((b)))' => {
7
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
8
- 1 => [:group, :capture, '(', 1, 2, 1, 0, 0],
9
- 2 => [:group, :capture, '(', 2, 3, 2, 0, 0],
10
- 3 => [:literal, :literal, 'b', 3, 4, 3, 0, 0],
11
- 4 => [:group, :close, ')', 4, 5, 2, 0, 0],
12
- 5 => [:group, :close, ')', 5, 6, 1, 0, 0],
13
- 6 => [:group, :close, ')', 6, 7, 0, 0, 0],
14
- },
15
-
16
- '(\((b)\))' => {
17
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
18
- 1 => [:escape, :group_open, '\(', 1, 3, 1, 0, 0],
19
- 2 => [:group, :capture, '(', 3, 4, 1, 0, 0],
20
- 3 => [:literal, :literal, 'b', 4, 5, 2, 0, 0],
21
- 4 => [:group, :close, ')', 5, 6, 1, 0, 0],
22
- 5 => [:escape, :group_close, '\)', 6, 8, 1, 0, 0],
23
- 6 => [:group, :close, ')', 8, 9, 0, 0, 0],
24
- },
25
-
26
- '(?>a(?>b(?>c)))' => {
27
- 0 => [:group, :atomic, '(?>', 0, 3, 0, 0, 0],
28
- 2 => [:group, :atomic, '(?>', 4, 7, 1, 0, 0],
29
- 4 => [:group, :atomic, '(?>', 8, 11, 2, 0, 0],
30
- 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
31
- 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
32
- 8 => [:group, :close, ')', 14, 15, 0, 0, 0],
33
- },
34
-
35
- '(?:a(?:b(?:c)))' => {
36
- 0 => [:group, :passive, '(?:', 0, 3, 0, 0, 0],
37
- 2 => [:group, :passive, '(?:', 4, 7, 1, 0, 0],
38
- 4 => [:group, :passive, '(?:', 8, 11, 2, 0, 0],
39
- 6 => [:group, :close, ')', 12, 13, 2, 0, 0],
40
- 7 => [:group, :close, ')', 13, 14, 1, 0, 0],
41
- 8 => [:group, :close, ')', 14, 15, 0, 0, 0],
42
- },
43
-
44
- '(?=a(?!b(?<=c(?<!d))))' => {
45
- 0 => [:assertion, :lookahead, '(?=', 0, 3, 0, 0, 0],
46
- 2 => [:assertion, :nlookahead, '(?!', 4, 7, 1, 0, 0],
47
- 4 => [:assertion, :lookbehind, '(?<=', 8, 12, 2, 0, 0],
48
- 6 => [:assertion, :nlookbehind, '(?<!', 13, 17, 3, 0, 0],
49
- 8 => [:group, :close, ')', 18, 19, 3, 0, 0],
50
- 9 => [:group, :close, ')', 19, 20, 2, 0, 0],
51
- 10 => [:group, :close, ')', 20, 21, 1, 0, 0],
52
- 11 => [:group, :close, ')', 21, 22, 0, 0, 0],
53
- },
54
-
55
- '((?#a)b(?#c)d(?#e))' => {
56
- 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
57
- 1 => [:group, :comment, '(?#a)', 1, 6, 1, 0, 0],
58
- 3 => [:group, :comment, '(?#c)', 7, 12, 1, 0, 0],
59
- 5 => [:group, :comment, '(?#e)', 13, 18, 1, 0, 0],
60
- 6 => [:group, :close, ')', 18, 19, 0, 0, 0],
61
- },
62
-
63
- 'a[b-e]f' => {
64
- 1 => [:set, :open, '[', 1, 2, 0, 0, 0],
65
- 2 => [:literal, :literal, 'b', 2, 3, 0, 1, 0],
66
- 3 => [:set, :range, '-', 3, 4, 0, 1, 0],
67
- 4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
68
- 5 => [:set, :close, ']', 5, 6, 0, 0, 0],
69
- },
70
-
71
- '[[:word:]&&[^c]z]' => {
72
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
73
- 1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
74
- 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
75
- 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
76
- 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
77
- 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
78
- 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
79
- 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
80
- 8 => [:set, :close, ']', 16, 17, 0, 0, 0],
81
- },
82
-
83
- '[\p{word}&&[^c]z]' => {
84
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
85
- 1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
86
- 2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
87
- 3 => [:set, :open, '[', 11, 12, 0, 1, 0],
88
- 4 => [:set, :negate, '^', 12, 13, 0, 2, 0],
89
- 5 => [:literal, :literal, 'c', 13, 14, 0, 2, 0],
90
- 6 => [:set, :close, ']', 14, 15, 0, 1, 0],
91
- 7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
92
- 8 => [:set, :close, ']', 16, 17, 0, 0, 0],
93
- },
94
-
95
- '[a[b[c[d-g]]]]' => {
96
- 0 => [:set, :open, '[', 0, 1, 0, 0, 0],
97
- 1 => [:literal, :literal, 'a', 1, 2, 0, 1, 0],
98
- 2 => [:set, :open, '[', 2, 3, 0, 1, 0],
99
- 3 => [:literal, :literal, 'b', 3, 4, 0, 2, 0],
100
- 4 => [:set, :open, '[', 4, 5, 0, 2, 0],
101
- 5 => [:literal, :literal, 'c', 5, 6, 0, 3, 0],
102
- 6 => [:set, :open, '[', 6, 7, 0, 3, 0],
103
- 7 => [:literal, :literal, 'd', 7, 8, 0, 4, 0],
104
- 8 => [:set, :range, '-', 8, 9, 0, 4, 0],
105
- 9 => [:literal, :literal, 'g', 9, 10, 0, 4, 0],
106
- 10 => [:set, :close, ']', 10, 11, 0, 3, 0],
107
- 11 => [:set, :close, ']', 11, 12, 0, 2, 0],
108
- 12 => [:set, :close, ']', 12, 13, 0, 1, 0],
109
- 13 => [:set, :close, ']', 13, 14, 0, 0, 0],
110
- },
111
- }
112
-
113
- tests.each_with_index do |(pattern, checks), count|
114
- define_method "test_lex_nesting_in_'#{pattern}'_#{count}" do
115
- tokens = RL.lex(pattern, 'ruby/1.9')
116
-
117
- checks.each do |offset, (type, token, text, ts, te, level, set_level, conditional_level)|
118
- struct = tokens.at(offset)
119
-
120
- assert_equal type, struct.type
121
- assert_equal token, struct.token
122
- assert_equal text, struct.text
123
- assert_equal ts, struct.ts
124
- assert_equal te, struct.te
125
- assert_equal level, struct.level
126
- assert_equal set_level, struct.set_level
127
- assert_equal conditional_level, struct.conditional_level
128
- end
129
- end
130
- end
131
-
132
- end