regexp_parser 1.3.0 → 1.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +65 -1
  3. data/Gemfile +3 -3
  4. data/README.md +10 -14
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +28 -53
  7. data/lib/regexp_parser/expression/classes/backref.rb +18 -10
  8. data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
  9. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  10. data/lib/regexp_parser/expression/classes/group.rb +4 -2
  11. data/lib/regexp_parser/expression/classes/keep.rb +1 -3
  12. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  13. data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
  14. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  15. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  16. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  17. data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
  18. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  19. data/lib/regexp_parser/expression/sequence.rb +3 -6
  20. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  21. data/lib/regexp_parser/expression/subexpression.rb +3 -5
  22. data/lib/regexp_parser/lexer.rb +30 -44
  23. data/lib/regexp_parser/parser.rb +47 -24
  24. data/lib/regexp_parser/scanner.rb +1159 -1329
  25. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  26. data/lib/regexp_parser/scanner/properties/long.yml +34 -1
  27. data/lib/regexp_parser/scanner/properties/short.yml +12 -0
  28. data/lib/regexp_parser/scanner/scanner.rl +82 -190
  29. data/lib/regexp_parser/syntax/tokens.rb +2 -10
  30. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +72 -21
  31. data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
  32. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  33. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  34. data/lib/regexp_parser/version.rb +1 -1
  35. data/regexp_parser.gemspec +3 -3
  36. data/spec/expression/base_spec.rb +94 -0
  37. data/spec/expression/clone_spec.rb +120 -0
  38. data/spec/expression/conditional_spec.rb +89 -0
  39. data/spec/expression/free_space_spec.rb +27 -0
  40. data/spec/expression/methods/match_length_spec.rb +161 -0
  41. data/spec/expression/methods/match_spec.rb +25 -0
  42. data/spec/expression/methods/strfregexp_spec.rb +224 -0
  43. data/spec/expression/methods/tests_spec.rb +99 -0
  44. data/spec/expression/methods/traverse_spec.rb +161 -0
  45. data/spec/expression/options_spec.rb +128 -0
  46. data/spec/expression/root_spec.rb +9 -0
  47. data/spec/expression/sequence_spec.rb +9 -0
  48. data/spec/expression/subexpression_spec.rb +50 -0
  49. data/spec/expression/to_h_spec.rb +26 -0
  50. data/spec/expression/to_s_spec.rb +100 -0
  51. data/spec/lexer/all_spec.rb +22 -0
  52. data/spec/lexer/conditionals_spec.rb +53 -0
  53. data/spec/lexer/escapes_spec.rb +14 -0
  54. data/spec/lexer/keep_spec.rb +10 -0
  55. data/spec/lexer/literals_spec.rb +89 -0
  56. data/spec/lexer/nesting_spec.rb +99 -0
  57. data/spec/lexer/refcalls_spec.rb +55 -0
  58. data/spec/parser/all_spec.rb +43 -0
  59. data/spec/parser/alternation_spec.rb +88 -0
  60. data/spec/parser/anchors_spec.rb +17 -0
  61. data/spec/parser/conditionals_spec.rb +179 -0
  62. data/spec/parser/errors_spec.rb +30 -0
  63. data/spec/parser/escapes_spec.rb +121 -0
  64. data/spec/parser/free_space_spec.rb +130 -0
  65. data/spec/parser/groups_spec.rb +108 -0
  66. data/spec/parser/keep_spec.rb +6 -0
  67. data/spec/parser/posix_classes_spec.rb +8 -0
  68. data/spec/parser/properties_spec.rb +115 -0
  69. data/spec/parser/quantifiers_spec.rb +51 -0
  70. data/spec/parser/refcalls_spec.rb +112 -0
  71. data/spec/parser/set/intersections_spec.rb +127 -0
  72. data/spec/parser/set/ranges_spec.rb +111 -0
  73. data/spec/parser/sets_spec.rb +178 -0
  74. data/spec/parser/types_spec.rb +18 -0
  75. data/spec/scanner/all_spec.rb +18 -0
  76. data/spec/scanner/anchors_spec.rb +21 -0
  77. data/spec/scanner/conditionals_spec.rb +128 -0
  78. data/spec/scanner/errors_spec.rb +68 -0
  79. data/spec/scanner/escapes_spec.rb +53 -0
  80. data/spec/scanner/free_space_spec.rb +133 -0
  81. data/spec/scanner/groups_spec.rb +52 -0
  82. data/spec/scanner/keep_spec.rb +10 -0
  83. data/spec/scanner/literals_spec.rb +49 -0
  84. data/spec/scanner/meta_spec.rb +18 -0
  85. data/spec/scanner/properties_spec.rb +64 -0
  86. data/spec/scanner/quantifiers_spec.rb +20 -0
  87. data/spec/scanner/refcalls_spec.rb +36 -0
  88. data/spec/scanner/sets_spec.rb +102 -0
  89. data/spec/scanner/types_spec.rb +14 -0
  90. data/spec/spec_helper.rb +15 -0
  91. data/{test → spec}/support/runner.rb +9 -8
  92. data/spec/support/shared_examples.rb +77 -0
  93. data/{test → spec}/support/warning_extractor.rb +5 -7
  94. data/spec/syntax/syntax_spec.rb +48 -0
  95. data/spec/syntax/syntax_token_map_spec.rb +23 -0
  96. data/spec/syntax/versions/1.8.6_spec.rb +17 -0
  97. data/spec/syntax/versions/1.9.1_spec.rb +10 -0
  98. data/spec/syntax/versions/1.9.3_spec.rb +9 -0
  99. data/spec/syntax/versions/2.0.0_spec.rb +13 -0
  100. data/spec/syntax/versions/2.2.0_spec.rb +9 -0
  101. data/spec/syntax/versions/aliases_spec.rb +37 -0
  102. data/spec/token/token_spec.rb +85 -0
  103. metadata +144 -143
  104. data/test/expression/test_all.rb +0 -12
  105. data/test/expression/test_base.rb +0 -90
  106. data/test/expression/test_clone.rb +0 -89
  107. data/test/expression/test_conditionals.rb +0 -113
  108. data/test/expression/test_free_space.rb +0 -35
  109. data/test/expression/test_set.rb +0 -84
  110. data/test/expression/test_strfregexp.rb +0 -230
  111. data/test/expression/test_subexpression.rb +0 -58
  112. data/test/expression/test_tests.rb +0 -99
  113. data/test/expression/test_to_h.rb +0 -59
  114. data/test/expression/test_to_s.rb +0 -104
  115. data/test/expression/test_traverse.rb +0 -161
  116. data/test/helpers.rb +0 -10
  117. data/test/lexer/test_all.rb +0 -41
  118. data/test/lexer/test_conditionals.rb +0 -127
  119. data/test/lexer/test_keep.rb +0 -24
  120. data/test/lexer/test_literals.rb +0 -130
  121. data/test/lexer/test_nesting.rb +0 -132
  122. data/test/lexer/test_refcalls.rb +0 -56
  123. data/test/parser/set/test_intersections.rb +0 -127
  124. data/test/parser/set/test_ranges.rb +0 -111
  125. data/test/parser/test_all.rb +0 -64
  126. data/test/parser/test_alternation.rb +0 -92
  127. data/test/parser/test_anchors.rb +0 -34
  128. data/test/parser/test_conditionals.rb +0 -187
  129. data/test/parser/test_errors.rb +0 -63
  130. data/test/parser/test_escapes.rb +0 -134
  131. data/test/parser/test_free_space.rb +0 -139
  132. data/test/parser/test_groups.rb +0 -289
  133. data/test/parser/test_keep.rb +0 -21
  134. data/test/parser/test_posix_classes.rb +0 -27
  135. data/test/parser/test_properties.rb +0 -133
  136. data/test/parser/test_quantifiers.rb +0 -301
  137. data/test/parser/test_refcalls.rb +0 -186
  138. data/test/parser/test_sets.rb +0 -179
  139. data/test/parser/test_types.rb +0 -50
  140. data/test/scanner/test_all.rb +0 -38
  141. data/test/scanner/test_anchors.rb +0 -38
  142. data/test/scanner/test_conditionals.rb +0 -184
  143. data/test/scanner/test_errors.rb +0 -91
  144. data/test/scanner/test_escapes.rb +0 -56
  145. data/test/scanner/test_free_space.rb +0 -200
  146. data/test/scanner/test_groups.rb +0 -79
  147. data/test/scanner/test_keep.rb +0 -35
  148. data/test/scanner/test_literals.rb +0 -89
  149. data/test/scanner/test_meta.rb +0 -40
  150. data/test/scanner/test_properties.rb +0 -312
  151. data/test/scanner/test_quantifiers.rb +0 -37
  152. data/test/scanner/test_refcalls.rb +0 -52
  153. data/test/scanner/test_scripts.rb +0 -53
  154. data/test/scanner/test_sets.rb +0 -119
  155. data/test/scanner/test_types.rb +0 -35
  156. data/test/scanner/test_unicode_blocks.rb +0 -30
  157. data/test/support/disable_autotest.rb +0 -8
  158. data/test/syntax/test_all.rb +0 -6
  159. data/test/syntax/test_syntax.rb +0 -61
  160. data/test/syntax/test_syntax_token_map.rb +0 -25
  161. data/test/syntax/versions/test_1.8.rb +0 -55
  162. data/test/syntax/versions/test_1.9.1.rb +0 -36
  163. data/test/syntax/versions/test_1.9.3.rb +0 -32
  164. data/test/syntax/versions/test_2.0.0.rb +0 -37
  165. data/test/syntax/versions/test_2.2.0.rb +0 -32
  166. data/test/syntax/versions/test_aliases.rb +0 -129
  167. data/test/syntax/versions/test_all.rb +0 -5
  168. data/test/test_all.rb +0 -5
  169. data/test/token/test_all.rb +0 -2
  170. data/test/token/test_token.rb +0 -107
@@ -1,179 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class TestParserSets < Test::Unit::TestCase
4
- def test_parse_set_basic
5
- root = RP.parse('[ab]+')
6
- exp = root[0]
7
-
8
- assert_equal CharacterSet, exp.class
9
- assert_equal 2, exp.count
10
-
11
- assert_equal Literal, exp[0].class
12
- assert_equal 'a', exp[0].text
13
- assert_equal Literal, exp[1].class
14
- assert_equal 'b', exp[1].text
15
-
16
- assert exp.quantified?
17
- assert_equal 1, exp.quantifier.min
18
- assert_equal(-1, exp.quantifier.max)
19
- end
20
-
21
- def test_parse_set_char_type
22
- root = RP.parse('[a\dc]')
23
- exp = root[0]
24
-
25
- assert_equal CharacterSet, exp.class
26
- assert_equal 3, exp.count
27
-
28
- assert_equal CharacterType::Digit, exp[1].class
29
- assert_equal '\d', exp[1].text
30
- end
31
-
32
- def test_parse_set_escape_sequence_backspace
33
- root = RP.parse('[a\bc]')
34
- exp = root[0]
35
-
36
- assert_equal CharacterSet, exp.class
37
- assert_equal 3, exp.count
38
-
39
- assert_equal EscapeSequence::Backspace, exp[1].class
40
- assert_equal '\b', exp[1].text
41
-
42
- assert exp.matches?('a')
43
- assert exp.matches?("\b")
44
- refute exp.matches?('b')
45
- assert exp.matches?('c')
46
- end
47
-
48
- def test_parse_set_escape_sequence_hex
49
- root = RP.parse('[a\x20c]', :any)
50
- exp = root[0]
51
-
52
- assert_equal CharacterSet, exp.class
53
- assert_equal 3, exp.count
54
-
55
- assert_equal EscapeSequence::Hex, exp[1].class
56
- assert_equal '\x20', exp[1].text
57
- end
58
-
59
- def test_parse_set_escape_sequence_codepoint
60
- root = RP.parse('[a\u0640]')
61
- exp = root[0]
62
-
63
- assert_equal CharacterSet, exp.class
64
- assert_equal 2, exp.count
65
-
66
- assert_equal EscapeSequence::Codepoint, exp[1].class
67
- assert_equal '\u0640', exp[1].text
68
- end
69
-
70
- def test_parse_set_escape_sequence_codepoint_list
71
- root = RP.parse('[a\u{41 1F60D}]')
72
- exp = root[0]
73
-
74
- assert_equal CharacterSet, exp.class
75
- assert_equal 2, exp.count
76
-
77
- assert_equal EscapeSequence::CodepointList, exp[1].class
78
- assert_equal '\u{41 1F60D}', exp[1].text
79
- end
80
-
81
- def test_parse_set_posix_class
82
- root = RP.parse('[[:digit:][:^lower:]]+')
83
- exp = root[0]
84
-
85
- assert_equal CharacterSet, exp.class
86
- assert_equal 2, exp.count
87
-
88
- assert_equal PosixClass, exp[0].class
89
- assert_equal '[:digit:]', exp[0].text
90
- assert_equal PosixClass, exp[1].class
91
- assert_equal '[:^lower:]', exp[1].text
92
- end
93
-
94
- def test_parse_set_nesting
95
- root = RP.parse('[a[b[c]d]e]')
96
-
97
- exp = root[0]
98
- assert_equal CharacterSet, exp.class
99
- assert_equal 3, exp.count
100
- assert_equal Literal, exp[0].class
101
- assert_equal Literal, exp[2].class
102
-
103
- subset1 = exp[1]
104
- assert_equal CharacterSet, subset1.class
105
- assert_equal 3, subset1.count
106
- assert_equal Literal, subset1[0].class
107
- assert_equal Literal, subset1[2].class
108
-
109
- subset2 = subset1[1]
110
- assert_equal CharacterSet, subset2.class
111
- assert_equal 1, subset2.count
112
- assert_equal Literal, subset2[0].class
113
- end
114
-
115
- def test_parse_set_nesting_negative
116
- root = RP.parse('[a[^b[c]]]')
117
- exp = root[0]
118
-
119
- assert_equal CharacterSet, exp.class
120
- assert_equal 2, exp.count
121
- assert_equal Literal, exp[0].class
122
- refute exp.negative?
123
-
124
- subset1 = exp[1]
125
- assert_equal CharacterSet, subset1.class
126
- assert_equal 2, subset1.count
127
- assert_equal Literal, subset1[0].class
128
- assert subset1.negative?
129
-
130
- subset2 = subset1[1]
131
- assert_equal CharacterSet, subset2.class
132
- assert_equal 1, subset2.count
133
- assert_equal Literal, subset2[0].class
134
- refute subset2.negative?
135
- end
136
-
137
- def test_parse_set_nesting_to_s
138
- pattern = '[a[b[^c]]]'
139
- root = RP.parse(pattern)
140
-
141
- assert_equal pattern, root.to_s
142
- end
143
-
144
- def test_parse_set_literals_are_not_merged
145
- root = RP.parse("[#{'a' * 10}]")
146
- exp = root[0]
147
-
148
- assert_equal 10, exp.count
149
- end
150
-
151
- def test_parse_set_whitespace_is_not_merged
152
- root = RP.parse("[#{' ' * 10}]")
153
- exp = root[0]
154
-
155
- assert_equal 10, exp.count
156
- end
157
-
158
- def test_parse_set_whitespace_is_not_merged_in_x_mode
159
- root = RP.parse("(?x)[#{' ' * 10}]")
160
- exp = root[1]
161
-
162
- assert_equal 10, exp.count
163
- end
164
-
165
- # TODO: Collations and equivalents need own exp class if they ever get enabled
166
- def test_parse_set_collating_sequence
167
- root = RP.parse('[a[.span-ll.]h]', :any)
168
- exp = root[0]
169
-
170
- assert_equal '[.span-ll.]', exp[1].to_s
171
- end
172
-
173
- def test_parse_set_character_equivalents
174
- root = RP.parse('[a[=e=]h]', :any)
175
- exp = root[0]
176
-
177
- assert_equal '[=e=]', exp[1].to_s
178
- end
179
- end
@@ -1,50 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class TestParserTypes < Test::Unit::TestCase
4
-
5
- tests = {
6
- /a\dc/ => [1, :type, :digit, CharacterType::Digit],
7
- /a\Dc/ => [1, :type, :nondigit, CharacterType::NonDigit],
8
-
9
- /a\sc/ => [1, :type, :space, CharacterType::Space],
10
- /a\Sc/ => [1, :type, :nonspace, CharacterType::NonSpace],
11
-
12
- /a\hc/ => [1, :type, :hex, CharacterType::Hex],
13
- /a\Hc/ => [1, :type, :nonhex, CharacterType::NonHex],
14
-
15
- /a\wc/ => [1, :type, :word, CharacterType::Word],
16
- /a\Wc/ => [1, :type, :nonword, CharacterType::NonWord],
17
- }
18
-
19
- tests.each_with_index do |(pattern, (index, type, token, klass)), count|
20
- define_method "test_parse_type_#{token}_#{count}" do
21
- root = RP.parse(pattern, 'ruby/1.9')
22
- exp = root.expressions.at(index)
23
-
24
- assert exp.is_a?( klass ),
25
- "Expected #{klass}, but got #{exp.class.name}"
26
-
27
- assert_equal type, exp.type
28
- assert_equal token, exp.token
29
- end
30
- end
31
-
32
- tests_2_0 = {
33
- 'a\Rc' => [1, :type, :linebreak, CharacterType::Linebreak],
34
- 'a\Xc' => [1, :type, :xgrapheme, CharacterType::ExtendedGrapheme],
35
- }
36
-
37
- tests_2_0.each_with_index do |(pattern, (index, type, token, klass)), count|
38
- define_method "test_parse_type_#{token}_#{count}" do
39
- root = RP.parse(pattern, 'ruby/2.0')
40
- exp = root.expressions.at(index)
41
-
42
- assert exp.is_a?( klass ),
43
- "Expected #{klass}, but got #{exp.class.name}"
44
-
45
- assert_equal type, exp.type
46
- assert_equal token, exp.token
47
- end
48
- end
49
-
50
- end
@@ -1,38 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- %w[
4
- anchors errors escapes free_space groups literals meta
5
- properties quantifiers refcalls scripts sets types unicode_blocks
6
- ].each do |tc|
7
- require File.expand_path("../test_#{tc}", __FILE__)
8
- end
9
-
10
- if RUBY_VERSION >= '2.0.0'
11
- %w{conditionals keep}.each do|tc|
12
- require File.expand_path("../test_#{tc}", __FILE__)
13
- end
14
- end
15
-
16
- class TestRegexpScanner < Test::Unit::TestCase
17
-
18
- def test_scanner_returns_an_array
19
- assert_instance_of Array, RS.scan('abc')
20
- end
21
-
22
- def test_scanner_returns_tokens_as_arrays
23
- tokens = RS.scan('^abc+[^one]{2,3}\b\d\\\C-C$')
24
-
25
- all_arrays = tokens.all? do |token|
26
- token.kind_of?(Array) and token.length == 5
27
- end
28
-
29
- assert all_arrays, 'Not all tokens are arrays of 5 elements'
30
- end
31
-
32
- def test_scanner_token_count
33
- re = /^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i
34
-
35
- assert_equal 28, RS.scan(re).length
36
- end
37
-
38
- end
@@ -1,38 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class ScannerAnchors < Test::Unit::TestCase
4
-
5
- tests = {
6
- '^abc' => [0, :anchor, :bol, '^', 0, 1],
7
- 'abc$' => [1, :anchor, :eol, '$', 3, 4],
8
-
9
- '\Aabc' => [0, :anchor, :bos, '\A', 0, 2],
10
- 'abc\z' => [1, :anchor, :eos, '\z', 3, 5],
11
- 'abc\Z' => [1, :anchor, :eos_ob_eol, '\Z', 3, 5],
12
-
13
- 'a\bc' => [1, :anchor, :word_boundary, '\b', 1, 3],
14
- 'a\Bc' => [1, :anchor, :nonword_boundary, '\B', 1, 3],
15
-
16
- 'a\Gc' => [1, :anchor, :match_start, '\G', 1, 3],
17
-
18
- "\\\\Ac" => [0, :escape, :backslash, '\\\\', 0, 2],
19
- "a\\\\z" => [1, :escape, :backslash, '\\\\', 1, 3],
20
- "a\\\\Z" => [1, :escape, :backslash, '\\\\', 1, 3],
21
- "a\\\\bc" => [1, :escape, :backslash, '\\\\', 1, 3],
22
- "a\\\\Bc" => [1, :escape, :backslash, '\\\\', 1, 3],
23
- }
24
-
25
- tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
26
- define_method "test_scanner_#{type}_#{token}_#{count}" do
27
- tokens = RS.scan(pattern)
28
- result = tokens[index]
29
-
30
- assert_equal type, result[0]
31
- assert_equal token, result[1]
32
- assert_equal text, result[2]
33
- assert_equal ts, result[3]
34
- assert_equal te, result[4]
35
- end
36
- end
37
-
38
- end
@@ -1,184 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class ScannerConditionals < Test::Unit::TestCase
4
-
5
- # Basic conditional scan token tests
6
- tests = {
7
- /(a)(?(1)T|F)1/ => [3, :conditional, :open, '(?', 3, 5],
8
- /(a)(?(1)T|F)2/ => [4, :conditional, :condition_open, '(', 5, 6],
9
- /(a)(?(1)T|F)3/ => [5, :conditional, :condition, '1', 6, 7],
10
- /(a)(?(1)T|F)4/ => [6, :conditional, :condition_close, ')', 7, 8],
11
- /(a)(?(1)T|F)5/ => [7, :literal, :literal, 'T', 8, 9],
12
- /(a)(?(1)T|F)6/ => [8, :conditional, :separator, '|', 9, 10],
13
- /(a)(?(1)T|F)7/ => [9, :literal, :literal, 'F', 10, 11],
14
- /(a)(?(1)T|F)8/ => [10, :conditional, :close, ')', 11, 12],
15
-
16
- /(a)(?(1)TRUE)9/ => [8, :conditional, :close, ')', 12, 13],
17
-
18
- /(a)(?(1)TRUE|)10/ => [8, :conditional, :separator, '|', 12, 13],
19
- /(a)(?(1)TRUE|)11/ => [9, :conditional, :close, ')', 13, 14],
20
-
21
- /(?<N>A)(?(<N>)T|F)1/ => [5, :conditional, :condition, '<N>', 10, 13],
22
- /(?'N'A)(?('N')T|F)2/ => [5, :conditional, :condition, "'N'", 10, 13],
23
- }
24
-
25
- tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
26
- define_method "test_scanner_#{type}_#{token}_#{count}" do
27
- tokens = RS.scan(pattern)
28
- result = tokens[index]
29
-
30
- assert_equal type, result[0]
31
- assert_equal token, result[1]
32
- assert_equal text, result[2]
33
- assert_equal ts, result[3]
34
- assert_equal te, result[4]
35
- end
36
- end
37
-
38
- def test_scan_conditional_nested
39
- regexp = /(a(b(c)))(?(1)(?(2)d|(?(3)e|f))|(?(2)(?(1)g|h)))/
40
- tokens = RS.scan(regexp)
41
-
42
- [ [ 0, :group, :capture, '(', 0, 1],
43
- [ 1, :literal, :literal, 'a', 1, 2],
44
- [ 2, :group, :capture, '(', 2, 3],
45
- [ 3, :literal, :literal, 'b', 3, 4],
46
- [ 4, :group, :capture, '(', 4, 5],
47
- [ 5, :literal, :literal, 'c', 5, 6],
48
- [ 6, :group, :close, ')', 6, 7],
49
- [ 7, :group, :close, ')', 7, 8],
50
- [ 8, :group, :close, ')', 8, 9],
51
- [ 9, :conditional, :open, '(?', 9, 11],
52
- [10, :conditional, :condition_open, '(', 11, 12],
53
- [11, :conditional, :condition, '1', 12, 13],
54
- [12, :conditional, :condition_close, ')', 13, 14],
55
- [13, :conditional, :open, '(?', 14, 16],
56
- [14, :conditional, :condition_open, '(', 16, 17],
57
- [15, :conditional, :condition, '2', 17, 18],
58
- [16, :conditional, :condition_close, ')', 18, 19],
59
- [17, :literal, :literal, 'd', 19, 20],
60
- [18, :conditional, :separator, '|', 20, 21],
61
- [19, :conditional, :open, '(?', 21, 23],
62
- [20, :conditional, :condition_open, '(', 23, 24],
63
- [21, :conditional, :condition, '3', 24, 25],
64
- [22, :conditional, :condition_close, ')', 25, 26],
65
- [23, :literal, :literal, 'e', 26, 27],
66
- [24, :conditional, :separator, '|', 27, 28],
67
- [25, :literal, :literal, 'f', 28, 29],
68
- [26, :conditional, :close, ')', 29, 30],
69
- [27, :conditional, :close, ')', 30, 31],
70
- [28, :conditional, :separator, '|', 31, 32],
71
- [29, :conditional, :open, '(?', 32, 34],
72
- [30, :conditional, :condition_open, '(', 34, 35],
73
- [31, :conditional, :condition, '2', 35, 36],
74
- [32, :conditional, :condition_close, ')', 36, 37],
75
- [33, :conditional, :open, '(?', 37, 39],
76
- [34, :conditional, :condition_open, '(', 39, 40],
77
- [35, :conditional, :condition, '1', 40, 41],
78
- [36, :conditional, :condition_close, ')', 41, 42],
79
- [37, :literal, :literal, 'g', 42, 43],
80
- [38, :conditional, :separator, '|', 43, 44],
81
- [39, :literal, :literal, 'h', 44, 45],
82
- [40, :conditional, :close, ')', 45, 46],
83
- [41, :conditional, :close, ')', 46, 47],
84
- [42, :conditional, :close, ')', 47, 48]
85
- ].each do |index, type, token, text, ts, te|
86
- result = tokens[index]
87
-
88
- assert_equal type, result[0]
89
- assert_equal token, result[1]
90
- assert_equal text, result[2]
91
- assert_equal ts, result[3]
92
- assert_equal te, result[4]
93
- end
94
- end
95
-
96
- def test_scan_conditional_nested_groups
97
- regexp = /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/
98
- tokens = RS.scan(regexp)
99
-
100
- [ [ 0, :group, :capture, '(', 0, 1],
101
- [ 1, :group, :capture, '(', 1, 2],
102
- [ 2, :literal, :literal, 'a', 2, 3],
103
- [ 3, :group, :close, ')', 3, 4],
104
- [ 4, :meta, :alternation, '|', 4, 5],
105
- [ 5, :group, :capture, '(', 5, 6],
106
- [ 6, :literal, :literal, 'b', 6, 7],
107
- [ 7, :group, :close, ')', 7, 8],
108
- [ 8, :meta, :alternation, '|', 8, 9],
109
- [ 9, :group, :capture, '(', 9, 10],
110
- [10, :conditional, :open, '(?', 10, 12],
111
- [11, :conditional, :condition_open, '(', 12, 13],
112
- [12, :conditional, :condition, '2', 13, 14],
113
- [13, :conditional, :condition_close, ')', 14, 15],
114
- [14, :group, :capture, '(', 15, 16],
115
- [15, :literal, :literal, 'c', 16, 17],
116
- [16, :group, :capture, '(', 17, 18],
117
- [17, :literal, :literal, 'd', 18, 19],
118
- [18, :meta, :alternation, '|', 19, 20],
119
- [19, :literal, :literal, 'e', 20, 21],
120
- [20, :group, :close, ')', 21, 22],
121
- [21, :quantifier, :one_or_more, '+', 22, 23],
122
- [22, :group, :close, ')', 23, 24],
123
- [23, :quantifier, :zero_or_one, '?', 24, 25],
124
- [24, :conditional, :separator, '|', 25, 26],
125
- [25, :conditional, :open, '(?', 26, 28],
126
- [26, :conditional, :condition_open, '(', 28, 29],
127
- [27, :conditional, :condition, '3', 29, 30],
128
- [28, :conditional, :condition_close, ')', 30, 31],
129
- [29, :literal, :literal, 'f', 31, 32],
130
- [30, :conditional, :separator, '|', 32, 33],
131
- [31, :conditional, :open, '(?', 33, 35],
132
- [32, :conditional, :condition_open, '(', 35, 36],
133
- [33, :conditional, :condition, '4', 36, 37],
134
- [34, :conditional, :condition_close, ')', 37, 38],
135
- [35, :group, :capture, '(', 38, 39],
136
- [36, :literal, :literal, 'g', 39, 40],
137
- [37, :meta, :alternation, '|', 40, 41],
138
- [38, :group, :capture, '(', 41, 42],
139
- [39, :literal, :literal, 'h', 42, 43],
140
- [40, :group, :close, ')', 43, 44],
141
- [41, :group, :capture, '(', 44, 45],
142
- [42, :literal, :literal, 'i', 45, 46],
143
- [43, :group, :close, ')', 46, 47],
144
- [44, :group, :close, ')', 47, 48],
145
- [45, :conditional, :close, ')', 48, 49],
146
- [46, :conditional, :close, ')', 49, 50],
147
- [47, :conditional, :close, ')', 50, 51],
148
- [48, :group, :close, ')', 51, 52],
149
- [49, :group, :close, ')', 52, 53]
150
- ].each do |index, type, token, text, ts, te|
151
- result = tokens[index]
152
-
153
- assert_equal type, result[0]
154
- assert_equal token, result[1]
155
- assert_equal text, result[2]
156
- assert_equal ts, result[3]
157
- assert_equal te, result[4]
158
- end
159
- end
160
-
161
- def test_scan_conditional_nested_alternation
162
- regexp = /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/
163
- tokens = RS.scan(regexp)
164
-
165
- [9, 11, 17, 19, 32, 34, 40, 42, 46, 48].each do |index|
166
- result = tokens[index]
167
-
168
- assert_equal :meta, result[0]
169
- assert_equal :alternation, result[1]
170
- assert_equal '|', result[2]
171
- assert_equal 1, result[4] - result[3]
172
- end
173
-
174
- [14, 37].each do |index|
175
- result = tokens[index]
176
-
177
- assert_equal :conditional, result[0]
178
- assert_equal :separator, result[1]
179
- assert_equal '|', result[2]
180
- assert_equal 1, result[4] - result[3]
181
- end
182
- end
183
-
184
- end