regexp_parser 1.4.0 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +66 -1
  3. data/Gemfile +3 -3
  4. data/README.md +11 -18
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +28 -53
  7. data/lib/regexp_parser/expression/classes/backref.rb +18 -10
  8. data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
  9. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  10. data/lib/regexp_parser/expression/classes/group.rb +4 -2
  11. data/lib/regexp_parser/expression/classes/keep.rb +1 -3
  12. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  13. data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
  14. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  15. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  16. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  17. data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
  18. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  19. data/lib/regexp_parser/expression/sequence.rb +3 -6
  20. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  21. data/lib/regexp_parser/expression/subexpression.rb +3 -5
  22. data/lib/regexp_parser/lexer.rb +30 -44
  23. data/lib/regexp_parser/parser.rb +47 -24
  24. data/lib/regexp_parser/scanner.rb +1228 -1367
  25. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  26. data/lib/regexp_parser/scanner/properties/long.yml +15 -1
  27. data/lib/regexp_parser/scanner/properties/short.yml +5 -0
  28. data/lib/regexp_parser/scanner/scanner.rl +101 -194
  29. data/lib/regexp_parser/syntax/tokens.rb +2 -10
  30. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +30 -0
  31. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  32. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  33. data/lib/regexp_parser/version.rb +1 -1
  34. data/regexp_parser.gemspec +2 -2
  35. data/spec/expression/base_spec.rb +94 -0
  36. data/spec/expression/clone_spec.rb +120 -0
  37. data/spec/expression/conditional_spec.rb +89 -0
  38. data/spec/expression/free_space_spec.rb +27 -0
  39. data/spec/expression/methods/match_length_spec.rb +161 -0
  40. data/spec/expression/methods/match_spec.rb +25 -0
  41. data/spec/expression/methods/strfregexp_spec.rb +224 -0
  42. data/spec/expression/methods/tests_spec.rb +99 -0
  43. data/spec/expression/methods/traverse_spec.rb +161 -0
  44. data/spec/expression/options_spec.rb +128 -0
  45. data/spec/expression/root_spec.rb +9 -0
  46. data/spec/expression/sequence_spec.rb +9 -0
  47. data/spec/expression/subexpression_spec.rb +50 -0
  48. data/spec/expression/to_h_spec.rb +26 -0
  49. data/spec/expression/to_s_spec.rb +100 -0
  50. data/spec/lexer/all_spec.rb +22 -0
  51. data/spec/lexer/conditionals_spec.rb +53 -0
  52. data/spec/lexer/delimiters_spec.rb +68 -0
  53. data/spec/lexer/escapes_spec.rb +14 -0
  54. data/spec/lexer/keep_spec.rb +10 -0
  55. data/spec/lexer/literals_spec.rb +89 -0
  56. data/spec/lexer/nesting_spec.rb +99 -0
  57. data/spec/lexer/refcalls_spec.rb +55 -0
  58. data/spec/parser/all_spec.rb +43 -0
  59. data/spec/parser/alternation_spec.rb +88 -0
  60. data/spec/parser/anchors_spec.rb +17 -0
  61. data/spec/parser/conditionals_spec.rb +179 -0
  62. data/spec/parser/errors_spec.rb +30 -0
  63. data/spec/parser/escapes_spec.rb +121 -0
  64. data/spec/parser/free_space_spec.rb +130 -0
  65. data/spec/parser/groups_spec.rb +108 -0
  66. data/spec/parser/keep_spec.rb +6 -0
  67. data/spec/parser/posix_classes_spec.rb +8 -0
  68. data/spec/parser/properties_spec.rb +115 -0
  69. data/spec/parser/quantifiers_spec.rb +52 -0
  70. data/spec/parser/refcalls_spec.rb +112 -0
  71. data/spec/parser/set/intersections_spec.rb +127 -0
  72. data/spec/parser/set/ranges_spec.rb +111 -0
  73. data/spec/parser/sets_spec.rb +178 -0
  74. data/spec/parser/types_spec.rb +18 -0
  75. data/spec/scanner/all_spec.rb +18 -0
  76. data/spec/scanner/anchors_spec.rb +21 -0
  77. data/spec/scanner/conditionals_spec.rb +128 -0
  78. data/spec/scanner/delimiters_spec.rb +52 -0
  79. data/spec/scanner/errors_spec.rb +67 -0
  80. data/spec/scanner/escapes_spec.rb +53 -0
  81. data/spec/scanner/free_space_spec.rb +133 -0
  82. data/spec/scanner/groups_spec.rb +52 -0
  83. data/spec/scanner/keep_spec.rb +10 -0
  84. data/spec/scanner/literals_spec.rb +49 -0
  85. data/spec/scanner/meta_spec.rb +18 -0
  86. data/spec/scanner/properties_spec.rb +64 -0
  87. data/spec/scanner/quantifiers_spec.rb +20 -0
  88. data/spec/scanner/refcalls_spec.rb +36 -0
  89. data/spec/scanner/sets_spec.rb +102 -0
  90. data/spec/scanner/types_spec.rb +14 -0
  91. data/spec/spec_helper.rb +15 -0
  92. data/{test → spec}/support/runner.rb +9 -8
  93. data/spec/support/shared_examples.rb +77 -0
  94. data/{test → spec}/support/warning_extractor.rb +5 -7
  95. data/spec/syntax/syntax_spec.rb +48 -0
  96. data/spec/syntax/syntax_token_map_spec.rb +23 -0
  97. data/spec/syntax/versions/1.8.6_spec.rb +17 -0
  98. data/spec/syntax/versions/1.9.1_spec.rb +10 -0
  99. data/spec/syntax/versions/1.9.3_spec.rb +9 -0
  100. data/spec/syntax/versions/2.0.0_spec.rb +13 -0
  101. data/spec/syntax/versions/2.2.0_spec.rb +9 -0
  102. data/spec/syntax/versions/aliases_spec.rb +37 -0
  103. data/spec/token/token_spec.rb +85 -0
  104. metadata +149 -144
  105. data/test/expression/test_all.rb +0 -12
  106. data/test/expression/test_base.rb +0 -90
  107. data/test/expression/test_clone.rb +0 -89
  108. data/test/expression/test_conditionals.rb +0 -113
  109. data/test/expression/test_free_space.rb +0 -35
  110. data/test/expression/test_set.rb +0 -84
  111. data/test/expression/test_strfregexp.rb +0 -230
  112. data/test/expression/test_subexpression.rb +0 -58
  113. data/test/expression/test_tests.rb +0 -99
  114. data/test/expression/test_to_h.rb +0 -59
  115. data/test/expression/test_to_s.rb +0 -104
  116. data/test/expression/test_traverse.rb +0 -161
  117. data/test/helpers.rb +0 -10
  118. data/test/lexer/test_all.rb +0 -41
  119. data/test/lexer/test_conditionals.rb +0 -127
  120. data/test/lexer/test_keep.rb +0 -24
  121. data/test/lexer/test_literals.rb +0 -130
  122. data/test/lexer/test_nesting.rb +0 -132
  123. data/test/lexer/test_refcalls.rb +0 -56
  124. data/test/parser/set/test_intersections.rb +0 -127
  125. data/test/parser/set/test_ranges.rb +0 -111
  126. data/test/parser/test_all.rb +0 -64
  127. data/test/parser/test_alternation.rb +0 -92
  128. data/test/parser/test_anchors.rb +0 -34
  129. data/test/parser/test_conditionals.rb +0 -187
  130. data/test/parser/test_errors.rb +0 -63
  131. data/test/parser/test_escapes.rb +0 -134
  132. data/test/parser/test_free_space.rb +0 -139
  133. data/test/parser/test_groups.rb +0 -289
  134. data/test/parser/test_keep.rb +0 -21
  135. data/test/parser/test_posix_classes.rb +0 -27
  136. data/test/parser/test_properties.rb +0 -134
  137. data/test/parser/test_quantifiers.rb +0 -301
  138. data/test/parser/test_refcalls.rb +0 -186
  139. data/test/parser/test_sets.rb +0 -179
  140. data/test/parser/test_types.rb +0 -50
  141. data/test/scanner/test_all.rb +0 -38
  142. data/test/scanner/test_anchors.rb +0 -38
  143. data/test/scanner/test_conditionals.rb +0 -184
  144. data/test/scanner/test_errors.rb +0 -91
  145. data/test/scanner/test_escapes.rb +0 -56
  146. data/test/scanner/test_free_space.rb +0 -200
  147. data/test/scanner/test_groups.rb +0 -79
  148. data/test/scanner/test_keep.rb +0 -35
  149. data/test/scanner/test_literals.rb +0 -89
  150. data/test/scanner/test_meta.rb +0 -40
  151. data/test/scanner/test_properties.rb +0 -312
  152. data/test/scanner/test_quantifiers.rb +0 -37
  153. data/test/scanner/test_refcalls.rb +0 -52
  154. data/test/scanner/test_scripts.rb +0 -53
  155. data/test/scanner/test_sets.rb +0 -119
  156. data/test/scanner/test_types.rb +0 -35
  157. data/test/scanner/test_unicode_blocks.rb +0 -30
  158. data/test/support/disable_autotest.rb +0 -8
  159. data/test/syntax/test_all.rb +0 -6
  160. data/test/syntax/test_syntax.rb +0 -61
  161. data/test/syntax/test_syntax_token_map.rb +0 -25
  162. data/test/syntax/versions/test_1.8.rb +0 -55
  163. data/test/syntax/versions/test_1.9.1.rb +0 -36
  164. data/test/syntax/versions/test_1.9.3.rb +0 -32
  165. data/test/syntax/versions/test_2.0.0.rb +0 -37
  166. data/test/syntax/versions/test_2.2.0.rb +0 -32
  167. data/test/syntax/versions/test_aliases.rb +0 -129
  168. data/test/syntax/versions/test_all.rb +0 -5
  169. data/test/test_all.rb +0 -5
  170. data/test/token/test_all.rb +0 -2
  171. data/test/token/test_token.rb +0 -107
@@ -1,179 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class TestParserSets < Test::Unit::TestCase
4
- def test_parse_set_basic
5
- root = RP.parse('[ab]+')
6
- exp = root[0]
7
-
8
- assert_equal CharacterSet, exp.class
9
- assert_equal 2, exp.count
10
-
11
- assert_equal Literal, exp[0].class
12
- assert_equal 'a', exp[0].text
13
- assert_equal Literal, exp[1].class
14
- assert_equal 'b', exp[1].text
15
-
16
- assert exp.quantified?
17
- assert_equal 1, exp.quantifier.min
18
- assert_equal(-1, exp.quantifier.max)
19
- end
20
-
21
- def test_parse_set_char_type
22
- root = RP.parse('[a\dc]')
23
- exp = root[0]
24
-
25
- assert_equal CharacterSet, exp.class
26
- assert_equal 3, exp.count
27
-
28
- assert_equal CharacterType::Digit, exp[1].class
29
- assert_equal '\d', exp[1].text
30
- end
31
-
32
- def test_parse_set_escape_sequence_backspace
33
- root = RP.parse('[a\bc]')
34
- exp = root[0]
35
-
36
- assert_equal CharacterSet, exp.class
37
- assert_equal 3, exp.count
38
-
39
- assert_equal EscapeSequence::Backspace, exp[1].class
40
- assert_equal '\b', exp[1].text
41
-
42
- assert exp.matches?('a')
43
- assert exp.matches?("\b")
44
- refute exp.matches?('b')
45
- assert exp.matches?('c')
46
- end
47
-
48
- def test_parse_set_escape_sequence_hex
49
- root = RP.parse('[a\x20c]', :any)
50
- exp = root[0]
51
-
52
- assert_equal CharacterSet, exp.class
53
- assert_equal 3, exp.count
54
-
55
- assert_equal EscapeSequence::Hex, exp[1].class
56
- assert_equal '\x20', exp[1].text
57
- end
58
-
59
- def test_parse_set_escape_sequence_codepoint
60
- root = RP.parse('[a\u0640]')
61
- exp = root[0]
62
-
63
- assert_equal CharacterSet, exp.class
64
- assert_equal 2, exp.count
65
-
66
- assert_equal EscapeSequence::Codepoint, exp[1].class
67
- assert_equal '\u0640', exp[1].text
68
- end
69
-
70
- def test_parse_set_escape_sequence_codepoint_list
71
- root = RP.parse('[a\u{41 1F60D}]')
72
- exp = root[0]
73
-
74
- assert_equal CharacterSet, exp.class
75
- assert_equal 2, exp.count
76
-
77
- assert_equal EscapeSequence::CodepointList, exp[1].class
78
- assert_equal '\u{41 1F60D}', exp[1].text
79
- end
80
-
81
- def test_parse_set_posix_class
82
- root = RP.parse('[[:digit:][:^lower:]]+')
83
- exp = root[0]
84
-
85
- assert_equal CharacterSet, exp.class
86
- assert_equal 2, exp.count
87
-
88
- assert_equal PosixClass, exp[0].class
89
- assert_equal '[:digit:]', exp[0].text
90
- assert_equal PosixClass, exp[1].class
91
- assert_equal '[:^lower:]', exp[1].text
92
- end
93
-
94
- def test_parse_set_nesting
95
- root = RP.parse('[a[b[c]d]e]')
96
-
97
- exp = root[0]
98
- assert_equal CharacterSet, exp.class
99
- assert_equal 3, exp.count
100
- assert_equal Literal, exp[0].class
101
- assert_equal Literal, exp[2].class
102
-
103
- subset1 = exp[1]
104
- assert_equal CharacterSet, subset1.class
105
- assert_equal 3, subset1.count
106
- assert_equal Literal, subset1[0].class
107
- assert_equal Literal, subset1[2].class
108
-
109
- subset2 = subset1[1]
110
- assert_equal CharacterSet, subset2.class
111
- assert_equal 1, subset2.count
112
- assert_equal Literal, subset2[0].class
113
- end
114
-
115
- def test_parse_set_nesting_negative
116
- root = RP.parse('[a[^b[c]]]')
117
- exp = root[0]
118
-
119
- assert_equal CharacterSet, exp.class
120
- assert_equal 2, exp.count
121
- assert_equal Literal, exp[0].class
122
- refute exp.negative?
123
-
124
- subset1 = exp[1]
125
- assert_equal CharacterSet, subset1.class
126
- assert_equal 2, subset1.count
127
- assert_equal Literal, subset1[0].class
128
- assert subset1.negative?
129
-
130
- subset2 = subset1[1]
131
- assert_equal CharacterSet, subset2.class
132
- assert_equal 1, subset2.count
133
- assert_equal Literal, subset2[0].class
134
- refute subset2.negative?
135
- end
136
-
137
- def test_parse_set_nesting_to_s
138
- pattern = '[a[b[^c]]]'
139
- root = RP.parse(pattern)
140
-
141
- assert_equal pattern, root.to_s
142
- end
143
-
144
- def test_parse_set_literals_are_not_merged
145
- root = RP.parse("[#{'a' * 10}]")
146
- exp = root[0]
147
-
148
- assert_equal 10, exp.count
149
- end
150
-
151
- def test_parse_set_whitespace_is_not_merged
152
- root = RP.parse("[#{' ' * 10}]")
153
- exp = root[0]
154
-
155
- assert_equal 10, exp.count
156
- end
157
-
158
- def test_parse_set_whitespace_is_not_merged_in_x_mode
159
- root = RP.parse("(?x)[#{' ' * 10}]")
160
- exp = root[1]
161
-
162
- assert_equal 10, exp.count
163
- end
164
-
165
- # TODO: Collations and equivalents need own exp class if they ever get enabled
166
- def test_parse_set_collating_sequence
167
- root = RP.parse('[a[.span-ll.]h]', :any)
168
- exp = root[0]
169
-
170
- assert_equal '[.span-ll.]', exp[1].to_s
171
- end
172
-
173
- def test_parse_set_character_equivalents
174
- root = RP.parse('[a[=e=]h]', :any)
175
- exp = root[0]
176
-
177
- assert_equal '[=e=]', exp[1].to_s
178
- end
179
- end
@@ -1,50 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class TestParserTypes < Test::Unit::TestCase
4
-
5
- tests = {
6
- /a\dc/ => [1, :type, :digit, CharacterType::Digit],
7
- /a\Dc/ => [1, :type, :nondigit, CharacterType::NonDigit],
8
-
9
- /a\sc/ => [1, :type, :space, CharacterType::Space],
10
- /a\Sc/ => [1, :type, :nonspace, CharacterType::NonSpace],
11
-
12
- /a\hc/ => [1, :type, :hex, CharacterType::Hex],
13
- /a\Hc/ => [1, :type, :nonhex, CharacterType::NonHex],
14
-
15
- /a\wc/ => [1, :type, :word, CharacterType::Word],
16
- /a\Wc/ => [1, :type, :nonword, CharacterType::NonWord],
17
- }
18
-
19
- tests.each_with_index do |(pattern, (index, type, token, klass)), count|
20
- define_method "test_parse_type_#{token}_#{count}" do
21
- root = RP.parse(pattern, 'ruby/1.9')
22
- exp = root.expressions.at(index)
23
-
24
- assert exp.is_a?( klass ),
25
- "Expected #{klass}, but got #{exp.class.name}"
26
-
27
- assert_equal type, exp.type
28
- assert_equal token, exp.token
29
- end
30
- end
31
-
32
- tests_2_0 = {
33
- 'a\Rc' => [1, :type, :linebreak, CharacterType::Linebreak],
34
- 'a\Xc' => [1, :type, :xgrapheme, CharacterType::ExtendedGrapheme],
35
- }
36
-
37
- tests_2_0.each_with_index do |(pattern, (index, type, token, klass)), count|
38
- define_method "test_parse_type_#{token}_#{count}" do
39
- root = RP.parse(pattern, 'ruby/2.0')
40
- exp = root.expressions.at(index)
41
-
42
- assert exp.is_a?( klass ),
43
- "Expected #{klass}, but got #{exp.class.name}"
44
-
45
- assert_equal type, exp.type
46
- assert_equal token, exp.token
47
- end
48
- end
49
-
50
- end
@@ -1,38 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- %w[
4
- anchors errors escapes free_space groups literals meta
5
- properties quantifiers refcalls scripts sets types unicode_blocks
6
- ].each do |tc|
7
- require File.expand_path("../test_#{tc}", __FILE__)
8
- end
9
-
10
- if RUBY_VERSION >= '2.0.0'
11
- %w{conditionals keep}.each do|tc|
12
- require File.expand_path("../test_#{tc}", __FILE__)
13
- end
14
- end
15
-
16
- class TestRegexpScanner < Test::Unit::TestCase
17
-
18
- def test_scanner_returns_an_array
19
- assert_instance_of Array, RS.scan('abc')
20
- end
21
-
22
- def test_scanner_returns_tokens_as_arrays
23
- tokens = RS.scan('^abc+[^one]{2,3}\b\d\\\C-C$')
24
-
25
- all_arrays = tokens.all? do |token|
26
- token.kind_of?(Array) and token.length == 5
27
- end
28
-
29
- assert all_arrays, 'Not all tokens are arrays of 5 elements'
30
- end
31
-
32
- def test_scanner_token_count
33
- re = /^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i
34
-
35
- assert_equal 28, RS.scan(re).length
36
- end
37
-
38
- end
@@ -1,38 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class ScannerAnchors < Test::Unit::TestCase
4
-
5
- tests = {
6
- '^abc' => [0, :anchor, :bol, '^', 0, 1],
7
- 'abc$' => [1, :anchor, :eol, '$', 3, 4],
8
-
9
- '\Aabc' => [0, :anchor, :bos, '\A', 0, 2],
10
- 'abc\z' => [1, :anchor, :eos, '\z', 3, 5],
11
- 'abc\Z' => [1, :anchor, :eos_ob_eol, '\Z', 3, 5],
12
-
13
- 'a\bc' => [1, :anchor, :word_boundary, '\b', 1, 3],
14
- 'a\Bc' => [1, :anchor, :nonword_boundary, '\B', 1, 3],
15
-
16
- 'a\Gc' => [1, :anchor, :match_start, '\G', 1, 3],
17
-
18
- "\\\\Ac" => [0, :escape, :backslash, '\\\\', 0, 2],
19
- "a\\\\z" => [1, :escape, :backslash, '\\\\', 1, 3],
20
- "a\\\\Z" => [1, :escape, :backslash, '\\\\', 1, 3],
21
- "a\\\\bc" => [1, :escape, :backslash, '\\\\', 1, 3],
22
- "a\\\\Bc" => [1, :escape, :backslash, '\\\\', 1, 3],
23
- }
24
-
25
- tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
26
- define_method "test_scanner_#{type}_#{token}_#{count}" do
27
- tokens = RS.scan(pattern)
28
- result = tokens[index]
29
-
30
- assert_equal type, result[0]
31
- assert_equal token, result[1]
32
- assert_equal text, result[2]
33
- assert_equal ts, result[3]
34
- assert_equal te, result[4]
35
- end
36
- end
37
-
38
- end
@@ -1,184 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class ScannerConditionals < Test::Unit::TestCase
4
-
5
- # Basic conditional scan token tests
6
- tests = {
7
- /(a)(?(1)T|F)1/ => [3, :conditional, :open, '(?', 3, 5],
8
- /(a)(?(1)T|F)2/ => [4, :conditional, :condition_open, '(', 5, 6],
9
- /(a)(?(1)T|F)3/ => [5, :conditional, :condition, '1', 6, 7],
10
- /(a)(?(1)T|F)4/ => [6, :conditional, :condition_close, ')', 7, 8],
11
- /(a)(?(1)T|F)5/ => [7, :literal, :literal, 'T', 8, 9],
12
- /(a)(?(1)T|F)6/ => [8, :conditional, :separator, '|', 9, 10],
13
- /(a)(?(1)T|F)7/ => [9, :literal, :literal, 'F', 10, 11],
14
- /(a)(?(1)T|F)8/ => [10, :conditional, :close, ')', 11, 12],
15
-
16
- /(a)(?(1)TRUE)9/ => [8, :conditional, :close, ')', 12, 13],
17
-
18
- /(a)(?(1)TRUE|)10/ => [8, :conditional, :separator, '|', 12, 13],
19
- /(a)(?(1)TRUE|)11/ => [9, :conditional, :close, ')', 13, 14],
20
-
21
- /(?<N>A)(?(<N>)T|F)1/ => [5, :conditional, :condition, '<N>', 10, 13],
22
- /(?'N'A)(?('N')T|F)2/ => [5, :conditional, :condition, "'N'", 10, 13],
23
- }
24
-
25
- tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
26
- define_method "test_scanner_#{type}_#{token}_#{count}" do
27
- tokens = RS.scan(pattern)
28
- result = tokens[index]
29
-
30
- assert_equal type, result[0]
31
- assert_equal token, result[1]
32
- assert_equal text, result[2]
33
- assert_equal ts, result[3]
34
- assert_equal te, result[4]
35
- end
36
- end
37
-
38
- def test_scan_conditional_nested
39
- regexp = /(a(b(c)))(?(1)(?(2)d|(?(3)e|f))|(?(2)(?(1)g|h)))/
40
- tokens = RS.scan(regexp)
41
-
42
- [ [ 0, :group, :capture, '(', 0, 1],
43
- [ 1, :literal, :literal, 'a', 1, 2],
44
- [ 2, :group, :capture, '(', 2, 3],
45
- [ 3, :literal, :literal, 'b', 3, 4],
46
- [ 4, :group, :capture, '(', 4, 5],
47
- [ 5, :literal, :literal, 'c', 5, 6],
48
- [ 6, :group, :close, ')', 6, 7],
49
- [ 7, :group, :close, ')', 7, 8],
50
- [ 8, :group, :close, ')', 8, 9],
51
- [ 9, :conditional, :open, '(?', 9, 11],
52
- [10, :conditional, :condition_open, '(', 11, 12],
53
- [11, :conditional, :condition, '1', 12, 13],
54
- [12, :conditional, :condition_close, ')', 13, 14],
55
- [13, :conditional, :open, '(?', 14, 16],
56
- [14, :conditional, :condition_open, '(', 16, 17],
57
- [15, :conditional, :condition, '2', 17, 18],
58
- [16, :conditional, :condition_close, ')', 18, 19],
59
- [17, :literal, :literal, 'd', 19, 20],
60
- [18, :conditional, :separator, '|', 20, 21],
61
- [19, :conditional, :open, '(?', 21, 23],
62
- [20, :conditional, :condition_open, '(', 23, 24],
63
- [21, :conditional, :condition, '3', 24, 25],
64
- [22, :conditional, :condition_close, ')', 25, 26],
65
- [23, :literal, :literal, 'e', 26, 27],
66
- [24, :conditional, :separator, '|', 27, 28],
67
- [25, :literal, :literal, 'f', 28, 29],
68
- [26, :conditional, :close, ')', 29, 30],
69
- [27, :conditional, :close, ')', 30, 31],
70
- [28, :conditional, :separator, '|', 31, 32],
71
- [29, :conditional, :open, '(?', 32, 34],
72
- [30, :conditional, :condition_open, '(', 34, 35],
73
- [31, :conditional, :condition, '2', 35, 36],
74
- [32, :conditional, :condition_close, ')', 36, 37],
75
- [33, :conditional, :open, '(?', 37, 39],
76
- [34, :conditional, :condition_open, '(', 39, 40],
77
- [35, :conditional, :condition, '1', 40, 41],
78
- [36, :conditional, :condition_close, ')', 41, 42],
79
- [37, :literal, :literal, 'g', 42, 43],
80
- [38, :conditional, :separator, '|', 43, 44],
81
- [39, :literal, :literal, 'h', 44, 45],
82
- [40, :conditional, :close, ')', 45, 46],
83
- [41, :conditional, :close, ')', 46, 47],
84
- [42, :conditional, :close, ')', 47, 48]
85
- ].each do |index, type, token, text, ts, te|
86
- result = tokens[index]
87
-
88
- assert_equal type, result[0]
89
- assert_equal token, result[1]
90
- assert_equal text, result[2]
91
- assert_equal ts, result[3]
92
- assert_equal te, result[4]
93
- end
94
- end
95
-
96
- def test_scan_conditional_nested_groups
97
- regexp = /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/
98
- tokens = RS.scan(regexp)
99
-
100
- [ [ 0, :group, :capture, '(', 0, 1],
101
- [ 1, :group, :capture, '(', 1, 2],
102
- [ 2, :literal, :literal, 'a', 2, 3],
103
- [ 3, :group, :close, ')', 3, 4],
104
- [ 4, :meta, :alternation, '|', 4, 5],
105
- [ 5, :group, :capture, '(', 5, 6],
106
- [ 6, :literal, :literal, 'b', 6, 7],
107
- [ 7, :group, :close, ')', 7, 8],
108
- [ 8, :meta, :alternation, '|', 8, 9],
109
- [ 9, :group, :capture, '(', 9, 10],
110
- [10, :conditional, :open, '(?', 10, 12],
111
- [11, :conditional, :condition_open, '(', 12, 13],
112
- [12, :conditional, :condition, '2', 13, 14],
113
- [13, :conditional, :condition_close, ')', 14, 15],
114
- [14, :group, :capture, '(', 15, 16],
115
- [15, :literal, :literal, 'c', 16, 17],
116
- [16, :group, :capture, '(', 17, 18],
117
- [17, :literal, :literal, 'd', 18, 19],
118
- [18, :meta, :alternation, '|', 19, 20],
119
- [19, :literal, :literal, 'e', 20, 21],
120
- [20, :group, :close, ')', 21, 22],
121
- [21, :quantifier, :one_or_more, '+', 22, 23],
122
- [22, :group, :close, ')', 23, 24],
123
- [23, :quantifier, :zero_or_one, '?', 24, 25],
124
- [24, :conditional, :separator, '|', 25, 26],
125
- [25, :conditional, :open, '(?', 26, 28],
126
- [26, :conditional, :condition_open, '(', 28, 29],
127
- [27, :conditional, :condition, '3', 29, 30],
128
- [28, :conditional, :condition_close, ')', 30, 31],
129
- [29, :literal, :literal, 'f', 31, 32],
130
- [30, :conditional, :separator, '|', 32, 33],
131
- [31, :conditional, :open, '(?', 33, 35],
132
- [32, :conditional, :condition_open, '(', 35, 36],
133
- [33, :conditional, :condition, '4', 36, 37],
134
- [34, :conditional, :condition_close, ')', 37, 38],
135
- [35, :group, :capture, '(', 38, 39],
136
- [36, :literal, :literal, 'g', 39, 40],
137
- [37, :meta, :alternation, '|', 40, 41],
138
- [38, :group, :capture, '(', 41, 42],
139
- [39, :literal, :literal, 'h', 42, 43],
140
- [40, :group, :close, ')', 43, 44],
141
- [41, :group, :capture, '(', 44, 45],
142
- [42, :literal, :literal, 'i', 45, 46],
143
- [43, :group, :close, ')', 46, 47],
144
- [44, :group, :close, ')', 47, 48],
145
- [45, :conditional, :close, ')', 48, 49],
146
- [46, :conditional, :close, ')', 49, 50],
147
- [47, :conditional, :close, ')', 50, 51],
148
- [48, :group, :close, ')', 51, 52],
149
- [49, :group, :close, ')', 52, 53]
150
- ].each do |index, type, token, text, ts, te|
151
- result = tokens[index]
152
-
153
- assert_equal type, result[0]
154
- assert_equal token, result[1]
155
- assert_equal text, result[2]
156
- assert_equal ts, result[3]
157
- assert_equal te, result[4]
158
- end
159
- end
160
-
161
- def test_scan_conditional_nested_alternation
162
- regexp = /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/
163
- tokens = RS.scan(regexp)
164
-
165
- [9, 11, 17, 19, 32, 34, 40, 42, 46, 48].each do |index|
166
- result = tokens[index]
167
-
168
- assert_equal :meta, result[0]
169
- assert_equal :alternation, result[1]
170
- assert_equal '|', result[2]
171
- assert_equal 1, result[4] - result[3]
172
- end
173
-
174
- [14, 37].each do |index|
175
- result = tokens[index]
176
-
177
- assert_equal :conditional, result[0]
178
- assert_equal :separator, result[1]
179
- assert_equal '|', result[2]
180
- assert_equal 1, result[4] - result[3]
181
- end
182
- end
183
-
184
- end