regexp_parser 1.4.0 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +66 -1
  3. data/Gemfile +3 -3
  4. data/README.md +11 -18
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +28 -53
  7. data/lib/regexp_parser/expression/classes/backref.rb +18 -10
  8. data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
  9. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  10. data/lib/regexp_parser/expression/classes/group.rb +4 -2
  11. data/lib/regexp_parser/expression/classes/keep.rb +1 -3
  12. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  13. data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
  14. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  15. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  16. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  17. data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
  18. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  19. data/lib/regexp_parser/expression/sequence.rb +3 -6
  20. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  21. data/lib/regexp_parser/expression/subexpression.rb +3 -5
  22. data/lib/regexp_parser/lexer.rb +30 -44
  23. data/lib/regexp_parser/parser.rb +47 -24
  24. data/lib/regexp_parser/scanner.rb +1228 -1367
  25. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  26. data/lib/regexp_parser/scanner/properties/long.yml +15 -1
  27. data/lib/regexp_parser/scanner/properties/short.yml +5 -0
  28. data/lib/regexp_parser/scanner/scanner.rl +101 -194
  29. data/lib/regexp_parser/syntax/tokens.rb +2 -10
  30. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +30 -0
  31. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  32. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  33. data/lib/regexp_parser/version.rb +1 -1
  34. data/regexp_parser.gemspec +2 -2
  35. data/spec/expression/base_spec.rb +94 -0
  36. data/spec/expression/clone_spec.rb +120 -0
  37. data/spec/expression/conditional_spec.rb +89 -0
  38. data/spec/expression/free_space_spec.rb +27 -0
  39. data/spec/expression/methods/match_length_spec.rb +161 -0
  40. data/spec/expression/methods/match_spec.rb +25 -0
  41. data/spec/expression/methods/strfregexp_spec.rb +224 -0
  42. data/spec/expression/methods/tests_spec.rb +99 -0
  43. data/spec/expression/methods/traverse_spec.rb +161 -0
  44. data/spec/expression/options_spec.rb +128 -0
  45. data/spec/expression/root_spec.rb +9 -0
  46. data/spec/expression/sequence_spec.rb +9 -0
  47. data/spec/expression/subexpression_spec.rb +50 -0
  48. data/spec/expression/to_h_spec.rb +26 -0
  49. data/spec/expression/to_s_spec.rb +100 -0
  50. data/spec/lexer/all_spec.rb +22 -0
  51. data/spec/lexer/conditionals_spec.rb +53 -0
  52. data/spec/lexer/delimiters_spec.rb +68 -0
  53. data/spec/lexer/escapes_spec.rb +14 -0
  54. data/spec/lexer/keep_spec.rb +10 -0
  55. data/spec/lexer/literals_spec.rb +89 -0
  56. data/spec/lexer/nesting_spec.rb +99 -0
  57. data/spec/lexer/refcalls_spec.rb +55 -0
  58. data/spec/parser/all_spec.rb +43 -0
  59. data/spec/parser/alternation_spec.rb +88 -0
  60. data/spec/parser/anchors_spec.rb +17 -0
  61. data/spec/parser/conditionals_spec.rb +179 -0
  62. data/spec/parser/errors_spec.rb +30 -0
  63. data/spec/parser/escapes_spec.rb +121 -0
  64. data/spec/parser/free_space_spec.rb +130 -0
  65. data/spec/parser/groups_spec.rb +108 -0
  66. data/spec/parser/keep_spec.rb +6 -0
  67. data/spec/parser/posix_classes_spec.rb +8 -0
  68. data/spec/parser/properties_spec.rb +115 -0
  69. data/spec/parser/quantifiers_spec.rb +52 -0
  70. data/spec/parser/refcalls_spec.rb +112 -0
  71. data/spec/parser/set/intersections_spec.rb +127 -0
  72. data/spec/parser/set/ranges_spec.rb +111 -0
  73. data/spec/parser/sets_spec.rb +178 -0
  74. data/spec/parser/types_spec.rb +18 -0
  75. data/spec/scanner/all_spec.rb +18 -0
  76. data/spec/scanner/anchors_spec.rb +21 -0
  77. data/spec/scanner/conditionals_spec.rb +128 -0
  78. data/spec/scanner/delimiters_spec.rb +52 -0
  79. data/spec/scanner/errors_spec.rb +67 -0
  80. data/spec/scanner/escapes_spec.rb +53 -0
  81. data/spec/scanner/free_space_spec.rb +133 -0
  82. data/spec/scanner/groups_spec.rb +52 -0
  83. data/spec/scanner/keep_spec.rb +10 -0
  84. data/spec/scanner/literals_spec.rb +49 -0
  85. data/spec/scanner/meta_spec.rb +18 -0
  86. data/spec/scanner/properties_spec.rb +64 -0
  87. data/spec/scanner/quantifiers_spec.rb +20 -0
  88. data/spec/scanner/refcalls_spec.rb +36 -0
  89. data/spec/scanner/sets_spec.rb +102 -0
  90. data/spec/scanner/types_spec.rb +14 -0
  91. data/spec/spec_helper.rb +15 -0
  92. data/{test → spec}/support/runner.rb +9 -8
  93. data/spec/support/shared_examples.rb +77 -0
  94. data/{test → spec}/support/warning_extractor.rb +5 -7
  95. data/spec/syntax/syntax_spec.rb +48 -0
  96. data/spec/syntax/syntax_token_map_spec.rb +23 -0
  97. data/spec/syntax/versions/1.8.6_spec.rb +17 -0
  98. data/spec/syntax/versions/1.9.1_spec.rb +10 -0
  99. data/spec/syntax/versions/1.9.3_spec.rb +9 -0
  100. data/spec/syntax/versions/2.0.0_spec.rb +13 -0
  101. data/spec/syntax/versions/2.2.0_spec.rb +9 -0
  102. data/spec/syntax/versions/aliases_spec.rb +37 -0
  103. data/spec/token/token_spec.rb +85 -0
  104. metadata +149 -144
  105. data/test/expression/test_all.rb +0 -12
  106. data/test/expression/test_base.rb +0 -90
  107. data/test/expression/test_clone.rb +0 -89
  108. data/test/expression/test_conditionals.rb +0 -113
  109. data/test/expression/test_free_space.rb +0 -35
  110. data/test/expression/test_set.rb +0 -84
  111. data/test/expression/test_strfregexp.rb +0 -230
  112. data/test/expression/test_subexpression.rb +0 -58
  113. data/test/expression/test_tests.rb +0 -99
  114. data/test/expression/test_to_h.rb +0 -59
  115. data/test/expression/test_to_s.rb +0 -104
  116. data/test/expression/test_traverse.rb +0 -161
  117. data/test/helpers.rb +0 -10
  118. data/test/lexer/test_all.rb +0 -41
  119. data/test/lexer/test_conditionals.rb +0 -127
  120. data/test/lexer/test_keep.rb +0 -24
  121. data/test/lexer/test_literals.rb +0 -130
  122. data/test/lexer/test_nesting.rb +0 -132
  123. data/test/lexer/test_refcalls.rb +0 -56
  124. data/test/parser/set/test_intersections.rb +0 -127
  125. data/test/parser/set/test_ranges.rb +0 -111
  126. data/test/parser/test_all.rb +0 -64
  127. data/test/parser/test_alternation.rb +0 -92
  128. data/test/parser/test_anchors.rb +0 -34
  129. data/test/parser/test_conditionals.rb +0 -187
  130. data/test/parser/test_errors.rb +0 -63
  131. data/test/parser/test_escapes.rb +0 -134
  132. data/test/parser/test_free_space.rb +0 -139
  133. data/test/parser/test_groups.rb +0 -289
  134. data/test/parser/test_keep.rb +0 -21
  135. data/test/parser/test_posix_classes.rb +0 -27
  136. data/test/parser/test_properties.rb +0 -134
  137. data/test/parser/test_quantifiers.rb +0 -301
  138. data/test/parser/test_refcalls.rb +0 -186
  139. data/test/parser/test_sets.rb +0 -179
  140. data/test/parser/test_types.rb +0 -50
  141. data/test/scanner/test_all.rb +0 -38
  142. data/test/scanner/test_anchors.rb +0 -38
  143. data/test/scanner/test_conditionals.rb +0 -184
  144. data/test/scanner/test_errors.rb +0 -91
  145. data/test/scanner/test_escapes.rb +0 -56
  146. data/test/scanner/test_free_space.rb +0 -200
  147. data/test/scanner/test_groups.rb +0 -79
  148. data/test/scanner/test_keep.rb +0 -35
  149. data/test/scanner/test_literals.rb +0 -89
  150. data/test/scanner/test_meta.rb +0 -40
  151. data/test/scanner/test_properties.rb +0 -312
  152. data/test/scanner/test_quantifiers.rb +0 -37
  153. data/test/scanner/test_refcalls.rb +0 -52
  154. data/test/scanner/test_scripts.rb +0 -53
  155. data/test/scanner/test_sets.rb +0 -119
  156. data/test/scanner/test_types.rb +0 -35
  157. data/test/scanner/test_unicode_blocks.rb +0 -30
  158. data/test/support/disable_autotest.rb +0 -8
  159. data/test/syntax/test_all.rb +0 -6
  160. data/test/syntax/test_syntax.rb +0 -61
  161. data/test/syntax/test_syntax_token_map.rb +0 -25
  162. data/test/syntax/versions/test_1.8.rb +0 -55
  163. data/test/syntax/versions/test_1.9.1.rb +0 -36
  164. data/test/syntax/versions/test_1.9.3.rb +0 -32
  165. data/test/syntax/versions/test_2.0.0.rb +0 -37
  166. data/test/syntax/versions/test_2.2.0.rb +0 -32
  167. data/test/syntax/versions/test_aliases.rb +0 -129
  168. data/test/syntax/versions/test_all.rb +0 -5
  169. data/test/test_all.rb +0 -5
  170. data/test/token/test_all.rb +0 -2
  171. data/test/token/test_token.rb +0 -107
@@ -1,34 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class TestParserAnchors < Test::Unit::TestCase
4
-
5
- tests = {
6
- '^a' => [0, :anchor, :bol, Anchor::BOL],
7
- 'a$' => [1, :anchor, :eol, Anchor::EOL],
8
-
9
- '\Aa' => [0, :anchor, :bos, Anchor::BOS],
10
- 'a\z' => [1, :anchor, :eos, Anchor::EOS],
11
- 'a\Z' => [1, :anchor, :eos_ob_eol, Anchor::EOSobEOL],
12
-
13
- 'a\b' => [1, :anchor, :word_boundary, Anchor::WordBoundary],
14
- 'a\B' => [1, :anchor, :nonword_boundary, Anchor::NonWordBoundary],
15
-
16
- 'a\G' => [1, :anchor, :match_start, Anchor::MatchStart],
17
-
18
- "\\\\Aa" => [0, :escape, :backslash, EscapeSequence::Literal],
19
- }
20
-
21
- tests.each_with_index do |(pattern, (index, type, token, klass)), count|
22
- define_method "test_parse_anchor_#{token}_#{count}" do
23
- root = RP.parse(pattern, 'ruby/1.9')
24
- exp = root.expressions.at(index)
25
-
26
- assert exp.is_a?(klass),
27
- "Expected #{klass}, but got #{exp.class.name}"
28
-
29
- assert_equal type, exp.type
30
- assert_equal token, exp.token
31
- end
32
- end
33
-
34
- end
@@ -1,187 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class TestParserConditionals < Test::Unit::TestCase
4
-
5
- def test_parse_conditional
6
- regexp = /(?<A>a)(?(<A>)T|F)/
7
-
8
- root = RP.parse(regexp, 'ruby/2.0')
9
- exp = root.expressions[1]
10
-
11
- assert exp.is_a?(Conditional::Expression),
12
- "Expected Condition, but got #{exp.class.name}"
13
-
14
- assert_equal exp.type, :conditional
15
- assert_equal exp.token, :open
16
- assert_equal exp.text, '(?'
17
- assert_equal exp.reference, 'A'
18
- end
19
-
20
- def test_parse_conditional_condition
21
- regexp = /(?<A>a)(?(<A>)T|F)/
22
-
23
- root = RP.parse(regexp, 'ruby/2.0')
24
- exp = root[1].condition
25
-
26
- assert exp.is_a?(Conditional::Condition),
27
- "Expected Condition, but got #{exp.class.name}"
28
-
29
- assert_equal exp.type, :conditional
30
- assert_equal exp.token, :condition
31
- assert_equal exp.text, '(<A>)'
32
- assert_equal exp.reference, 'A'
33
- end
34
-
35
- def test_parse_conditional_condition_with_number_ref
36
- regexp = /(a)(?(1)T|F)/
37
-
38
- root = RP.parse(regexp, 'ruby/2.0')
39
- exp = root[1].condition
40
-
41
- assert exp.is_a?(Conditional::Condition),
42
- "Expected Condition, but got #{exp.class.name}"
43
-
44
- assert_equal exp.type, :conditional
45
- assert_equal exp.token, :condition
46
- assert_equal exp.text, '(1)'
47
- assert_equal exp.reference, 1
48
- end
49
-
50
- def test_parse_conditional_nested_groups
51
- regexp = /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/
52
-
53
- root = RP.parse(regexp, 'ruby/2.0')
54
-
55
- assert_equal regexp.source, root.to_s
56
-
57
- group = root.first
58
- assert_equal Group::Capture, group.class
59
-
60
- alt = group.first
61
- assert_equal Alternation, alt.class
62
- assert_equal 3, alt.length
63
-
64
- all_captures = alt.all? do |exp|
65
- exp.first.is_a?(Group::Capture)
66
- end
67
-
68
- assert_equal true, all_captures
69
-
70
- subgroup = alt[2].first
71
- conditional = subgroup.first
72
-
73
- assert_equal Conditional::Expression, conditional.class
74
- assert_equal 3, conditional.length
75
-
76
- assert_equal Conditional::Condition, conditional[0].class
77
- assert_equal '(2)', conditional[0].text
78
-
79
- condition = conditional.condition
80
- assert_equal Conditional::Condition, condition.class
81
- assert_equal '(2)', condition.text
82
-
83
- branches = conditional.branches
84
- assert_equal 2, branches.length
85
- assert_equal Array, branches.class
86
- end
87
-
88
- def test_parse_conditional_nested
89
- regexp = /(a(b(c(d)(e))))(?(1)(?(2)d|(?(3)e|f))|(?(4)(?(5)g|h)))/
90
-
91
- root = RP.parse(regexp, 'ruby/2.0')
92
-
93
- assert_equal regexp.source, root.to_s
94
-
95
- { 1 => [2, root[1]],
96
- 2 => [2, root[1][1][0]],
97
- 3 => [2, root[1][1][0][2][0]],
98
- 4 => [1, root[1][2][0]],
99
- 5 => [2, root[1][2][0][1][0]],
100
- }.each do |index, test|
101
- branch_count, exp = test
102
-
103
- assert_equal Conditional::Expression, exp.class
104
- assert_equal "(#{index})", exp.condition.text
105
- assert_equal branch_count, exp.branches.length
106
- end
107
- end
108
-
109
- def test_parse_conditional_nested_alternation
110
- regexp = /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/
111
-
112
- root = RP.parse(regexp, 'ruby/2.0')
113
-
114
- assert_equal regexp.source, root.to_s
115
-
116
- assert_equal Alternation, root.first.class
117
-
118
- [ [3, 'b|c|d', root[0][0][1][1][0][0]],
119
- [3, 'e|f|g', root[0][0][1][2][0][0]],
120
- [3, 'i|j|k', root[0][0][3][1][0][0]],
121
- [3, 'l|m|n', root[0][0][3][2][0][0]],
122
- ].each do |test|
123
- alt_count, alt_text, exp = test
124
-
125
- assert_equal Alternation, exp.class
126
- assert_equal alt_text, exp.to_s
127
- assert_equal alt_count, exp.alternatives.length
128
- end
129
- end
130
-
131
- def test_parse_conditional_extra_separator
132
- regexp = /(?<A>a)(?(<A>)T|)/
133
-
134
- root = RP.parse(regexp, 'ruby/2.0')
135
- branches = root[1].branches
136
-
137
- assert_equal 2, branches.length
138
-
139
- seq_1, seq_2 = branches
140
-
141
- [seq_1, seq_2].each do |seq|
142
- assert seq.is_a?( Sequence ),
143
- "Expected Condition, but got #{seq.class.name}"
144
-
145
- assert_equal :expression, seq.type
146
- assert_equal :sequence, seq.token
147
- end
148
-
149
- assert_equal 'T', seq_1.to_s
150
- assert_equal '', seq_2.to_s
151
- end
152
-
153
- def test_parse_conditional_quantified
154
- regexp = /(foo)(?(1)\d|(\w)){42}/
155
-
156
- root = RP.parse(regexp, 'ruby/2.0')
157
- conditional = root[1]
158
-
159
- assert conditional.quantified?
160
- assert_equal '{42}', conditional.quantifier.text
161
- assert_equal '(?(1)\d|(\w)){42}', conditional.to_s
162
- refute conditional.branches.any?(&:quantified?)
163
- end
164
-
165
- def test_parse_conditional_branch_content_quantified
166
- regexp = /(foo)(?(1)\d{23}|(\w){42})/
167
-
168
- root = RP.parse(regexp, 'ruby/2.0')
169
- conditional = root[1]
170
-
171
- refute conditional.quantified?
172
- refute conditional.branches.any?(&:quantified?)
173
- assert conditional.branches[0][0].quantified?
174
- assert_equal '{23}', conditional.branches[0][0].quantifier.text
175
- assert conditional.branches[1][0].quantified?
176
- assert_equal '{42}', conditional.branches[1][0].quantifier.text
177
- end
178
-
179
- # For source (text) expressions only, ruby raises an error otherwise.
180
- def test_parse_conditional_excessive_branches
181
- regexp = '(?<A>a)(?(<A>)T|F|X)'
182
-
183
- assert_raise( Conditional::TooManyBranches ) {
184
- RP.parse(regexp, 'ruby/2.0')
185
- }
186
- end
187
- end
@@ -1,63 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class ParserErrors < Test::Unit::TestCase
4
- def setup
5
- @rp = Regexp::Parser.new
6
- @rp.parse(/foo/)
7
- end
8
-
9
- def test_parser_unknown_token_type
10
- assert_raise( Regexp::Parser::UnknownTokenTypeError ) {
11
- @rp.__send__(:parse_token, Regexp::Token.new(:foo, :bar))
12
- }
13
- end
14
-
15
- def test_parser_unknown_set_token
16
- assert_raise( Regexp::Parser::UnknownTokenError ) {
17
- @rp.__send__(:parse_token, Regexp::Token.new(:set, :foo))
18
- }
19
- end
20
-
21
- def test_parser_unknown_meta_token
22
- assert_raise( Regexp::Parser::UnknownTokenError ) {
23
- @rp.__send__(:parse_token, Regexp::Token.new(:meta, :foo))
24
- }
25
- end
26
-
27
- def test_parser_unknown_character_type_token
28
- assert_raise( Regexp::Parser::UnknownTokenError ) {
29
- @rp.__send__(:parse_token, Regexp::Token.new(:type, :foo))
30
- }
31
- end
32
-
33
- def test_parser_unknown_unicode_property_token
34
- assert_raise( Regexp::Parser::UnknownTokenError ) {
35
- @rp.__send__(:parse_token, Regexp::Token.new(:property, :foo))
36
- }
37
- end
38
-
39
- def test_parser_unknown_unicode_nonproperty_token
40
- assert_raise( Regexp::Parser::UnknownTokenError ) {
41
- @rp.__send__(:parse_token, Regexp::Token.new(:nonproperty, :foo))
42
- }
43
- end
44
-
45
- def test_parser_unknown_anchor_token
46
- assert_raise( Regexp::Parser::UnknownTokenError ) {
47
- @rp.__send__(:parse_token, Regexp::Token.new(:anchor, :foo))
48
- }
49
- end
50
-
51
- def test_parser_unknown_quantifier_token
52
- assert_raise( Regexp::Parser::UnknownTokenError ) {
53
- @rp.__send__(:parse_token, Regexp::Token.new(:quantifier, :foo))
54
- }
55
- end
56
-
57
- def test_parser_unknown_group_open_token
58
- assert_raise( Regexp::Parser::UnknownTokenError ) {
59
- @rp.__send__(:parse_token, Regexp::Token.new(:group, :foo))
60
- }
61
- end
62
-
63
- end
@@ -1,134 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class TestParserEscapes < Test::Unit::TestCase
4
-
5
- tests = {
6
- /a\ac/ => [1, :escape, :bell, EscapeSequence::Bell],
7
- /a\ec/ => [1, :escape, :escape, EscapeSequence::AsciiEscape],
8
- /a\fc/ => [1, :escape, :form_feed, EscapeSequence::FormFeed],
9
- /a\nc/ => [1, :escape, :newline, EscapeSequence::Newline],
10
- /a\rc/ => [1, :escape, :carriage, EscapeSequence::Return],
11
- /a\tc/ => [1, :escape, :tab, EscapeSequence::Tab],
12
- /a\vc/ => [1, :escape, :vertical_tab, EscapeSequence::VerticalTab],
13
-
14
- # meta character escapes
15
- /a\.c/ => [1, :escape, :dot, EscapeSequence::Literal],
16
- /a\?c/ => [1, :escape, :zero_or_one, EscapeSequence::Literal],
17
- /a\*c/ => [1, :escape, :zero_or_more, EscapeSequence::Literal],
18
- /a\+c/ => [1, :escape, :one_or_more, EscapeSequence::Literal],
19
- /a\|c/ => [1, :escape, :alternation, EscapeSequence::Literal],
20
- /a\(c/ => [1, :escape, :group_open, EscapeSequence::Literal],
21
- /a\)c/ => [1, :escape, :group_close, EscapeSequence::Literal],
22
- /a\{c/ => [1, :escape, :interval_open, EscapeSequence::Literal],
23
- /a\}c/ => [1, :escape, :interval_close, EscapeSequence::Literal],
24
-
25
- # unicode escapes
26
- /a\u0640/ => [1, :escape, :codepoint, EscapeSequence::Codepoint],
27
- /a\u{41 1F60D}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
28
- /a\u{10FFFF}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
29
-
30
- # hex escapes
31
- /a\xFF/n => [1, :escape, :hex, EscapeSequence::Hex],
32
-
33
- # octal escapes
34
- /a\177/n => [1, :escape, :octal, EscapeSequence::Octal],
35
- }
36
-
37
- tests.each_with_index do |(pattern, (index, type, token, klass)), count|
38
- define_method "test_parse_escape_#{token}_#{count+=1}" do
39
- root = RP.parse(pattern, 'ruby/1.9')
40
- exp = root.expressions.at(index)
41
-
42
- assert exp.is_a?(klass),
43
- "Expected #{klass}, but got #{exp.class.name}"
44
-
45
- assert_equal type, exp.type
46
- assert_equal token, exp.token
47
- end
48
- end
49
-
50
- def test_parse_chars_and_codepoints
51
- root = RP.parse(/\n\?\101\x42\u0043\u{44 45}/)
52
-
53
- assert_equal "\n", root[0].char
54
- assert_equal 10, root[0].codepoint
55
-
56
- assert_equal "?", root[1].char
57
- assert_equal 63, root[1].codepoint
58
-
59
- assert_equal "A", root[2].char
60
- assert_equal 65, root[2].codepoint
61
-
62
- assert_equal "B", root[3].char
63
- assert_equal 66, root[3].codepoint
64
-
65
- assert_equal "C", root[4].char
66
- assert_equal 67, root[4].codepoint
67
-
68
- assert_equal ["D", "E"], root[5].chars
69
- assert_equal [68, 69], root[5].codepoints
70
- end
71
-
72
- def test_parse_escape_control_sequence_lower
73
- root = RP.parse(/a\\\c2b/)
74
-
75
- assert_equal EscapeSequence::Control, root[2].class
76
- assert_equal '\\c2', root[2].text
77
- assert_equal "\u0012", root[2].char
78
- assert_equal 18, root[2].codepoint
79
- end
80
-
81
- def test_parse_escape_control_sequence_upper
82
- root = RP.parse(/\d\\\C-C\w/)
83
-
84
- assert_equal EscapeSequence::Control, root[2].class
85
- assert_equal '\\C-C', root[2].text
86
- assert_equal "\u0003", root[2].char
87
- assert_equal 3, root[2].codepoint
88
- end
89
-
90
- def test_parse_escape_meta_sequence
91
- root = RP.parse(/\Z\\\M-Z/n)
92
-
93
- assert_equal EscapeSequence::Meta, root[2].class
94
- assert_equal '\\M-Z', root[2].text
95
- assert_equal "\u00DA", root[2].char
96
- assert_equal 218, root[2].codepoint
97
- end
98
-
99
- def test_parse_escape_meta_control_sequence
100
- root = RP.parse(/\A\\\M-\C-X/n)
101
-
102
- assert_equal EscapeSequence::MetaControl, root[2].class
103
- assert_equal '\\M-\\C-X', root[2].text
104
- assert_equal "\u0098", root[2].char
105
- assert_equal 152, root[2].codepoint
106
- end
107
-
108
- def test_parse_lower_c_meta_control_sequence
109
- root = RP.parse(/\A\\\M-\cX/n)
110
-
111
- assert_equal EscapeSequence::MetaControl, root[2].class
112
- assert_equal '\\M-\\cX', root[2].text
113
- assert_equal "\u0098", root[2].char
114
- assert_equal 152, root[2].codepoint
115
- end
116
-
117
- def test_parse_escape_reverse_meta_control_sequence
118
- root = RP.parse(/\A\\\C-\M-X/n)
119
-
120
- assert_equal EscapeSequence::MetaControl, root[2].class
121
- assert_equal '\\C-\\M-X', root[2].text
122
- assert_equal "\u0098", root[2].char
123
- assert_equal 152, root[2].codepoint
124
- end
125
-
126
- def test_parse_escape_reverse_lower_c_meta_control_sequence
127
- root = RP.parse(/\A\\\c\M-X/n)
128
-
129
- assert_equal EscapeSequence::MetaControl, root[2].class
130
- assert_equal '\\c\\M-X', root[2].text
131
- assert_equal "\u0098", root[2].char
132
- assert_equal 152, root[2].codepoint
133
- end
134
- end
@@ -1,139 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class ParserFreeSpace < Test::Unit::TestCase
4
-
5
- def test_parse_free_space_spaces
6
- regexp = /a ? b * c + d{2,4}/x
7
- root = RP.parse(regexp)
8
-
9
- 0.upto(6) do |i|
10
- if i.odd?
11
- # Consecutive spaces get merged by the parser, thus the two spaces.
12
- assert_equal WhiteSpace, root[i].class
13
- assert_equal ' ', root[i].text
14
- else
15
- assert_equal Literal, root[i].class
16
- assert_equal true, root[i].quantified?
17
- end
18
- end
19
- end
20
-
21
- def test_parse_non_free_space_literals
22
- regexp = /a b c d/
23
- root = RP.parse(regexp)
24
-
25
- assert_equal Literal, root.first.class
26
- assert_equal 'a b c d', root.first.text
27
- end
28
-
29
- def test_parse_free_space_comments
30
- regexp = %r{
31
- a ? # One letter
32
- b {2,5} # Another one
33
- [c-g] + # A set
34
- (h|i|j) | # A group
35
- klm *
36
- nop +
37
- }x
38
-
39
- root = RP.parse(regexp)
40
-
41
- alt = root.first
42
- assert_equal Alternation, alt.class
43
-
44
- alt_1 = alt.alternatives.first
45
- assert_equal Alternative, alt_1.class
46
- assert_equal 15, alt_1.length
47
-
48
- [0, 2, 4, 6, 8, 12, 14].each do |i|
49
- assert_equal WhiteSpace, alt_1[i].class
50
- end
51
-
52
- [3, 7, 11].each do |i|
53
- assert_equal Comment, alt_1[i].class
54
- end
55
-
56
- alt_2 = alt.alternatives.last
57
- assert_equal Alternative, alt_2.class
58
- assert_equal 7, alt_2.length
59
-
60
- [0, 2, 4, 6].each do |i|
61
- assert_equal WhiteSpace, alt_2[i].class
62
- end
63
-
64
- assert_equal Comment, alt_2[1].class
65
- end
66
-
67
- def test_parse_free_space_nested_comments
68
- # Tests depend on spacing and indentation, obviously.
69
- regexp = %r{
70
- # Group one
71
- (
72
- abc # Comment one
73
- \d? # Optional \d
74
- )+
75
-
76
- # Group two
77
- (
78
- def # Comment two
79
- \s? # Optional \s
80
- )?
81
- }x
82
-
83
- root = RP.parse(regexp)
84
-
85
- top_comment_1 = root[1]
86
- assert_equal Comment, top_comment_1.class
87
- assert_equal "# Group one\n", top_comment_1.text
88
- assert_equal 7, top_comment_1.starts_at
89
-
90
- top_comment_2 = root[5]
91
- assert_equal Comment, top_comment_2.class
92
- assert_equal "# Group two\n", top_comment_2.text
93
- assert_equal 95, top_comment_2.starts_at
94
-
95
- # Nested comments
96
- [3, 7].each_with_index do |g, i|
97
- group = root[g]
98
-
99
- [3, 7].each do |c|
100
- comment = group[c]
101
- assert_equal Comment, comment.class
102
- assert_equal 14, comment.text.length
103
- end
104
- end
105
- end
106
-
107
- def test_parse_free_space_quantifiers
108
- regexp = %r{
109
- a
110
- # comment 1
111
- ?
112
- (
113
- b # comment 2
114
- # comment 3
115
- +
116
- )
117
- # comment 4
118
- *
119
- }x
120
-
121
- root = RP.parse(regexp)
122
-
123
- literal_1 = root[1]
124
- assert_equal Literal, literal_1.class
125
- assert_equal true, literal_1.quantified?
126
- assert_equal :zero_or_one, literal_1.quantifier.token
127
-
128
- group = root[5]
129
- assert_equal Group::Capture, group.class
130
- assert_equal true, group.quantified?
131
- assert_equal :zero_or_more, group.quantifier.token
132
-
133
- literal_2 = group[1]
134
- assert_equal Literal, literal_2.class
135
- assert_equal true, literal_2.quantified?
136
- assert_equal :one_or_more, literal_2.quantifier.token
137
- end
138
-
139
- end