regexp_parser 1.3.0 → 1.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -1
- data/Gemfile +3 -3
- data/README.md +12 -19
- data/Rakefile +3 -4
- data/lib/regexp_parser/expression.rb +28 -53
- data/lib/regexp_parser/expression/classes/backref.rb +18 -10
- data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
- data/lib/regexp_parser/expression/classes/escape.rb +0 -4
- data/lib/regexp_parser/expression/classes/group.rb +4 -2
- data/lib/regexp_parser/expression/classes/keep.rb +1 -3
- data/lib/regexp_parser/expression/methods/match.rb +13 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
- data/lib/regexp_parser/expression/methods/options.rb +35 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
- data/lib/regexp_parser/expression/methods/tests.rb +6 -15
- data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -6
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
- data/lib/regexp_parser/expression/subexpression.rb +3 -5
- data/lib/regexp_parser/lexer.rb +30 -44
- data/lib/regexp_parser/parser.rb +47 -24
- data/lib/regexp_parser/scanner.rb +1228 -1367
- data/lib/regexp_parser/scanner/char_type.rl +0 -3
- data/lib/regexp_parser/scanner/properties/long.yml +34 -1
- data/lib/regexp_parser/scanner/properties/short.yml +12 -0
- data/lib/regexp_parser/scanner/scanner.rl +101 -194
- data/lib/regexp_parser/syntax/tokens.rb +2 -10
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +72 -21
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +3 -3
- data/spec/expression/base_spec.rb +94 -0
- data/spec/expression/clone_spec.rb +120 -0
- data/spec/expression/conditional_spec.rb +89 -0
- data/spec/expression/free_space_spec.rb +27 -0
- data/spec/expression/methods/match_length_spec.rb +161 -0
- data/spec/expression/methods/match_spec.rb +25 -0
- data/spec/expression/methods/strfregexp_spec.rb +224 -0
- data/spec/expression/methods/tests_spec.rb +99 -0
- data/spec/expression/methods/traverse_spec.rb +161 -0
- data/spec/expression/options_spec.rb +128 -0
- data/spec/expression/root_spec.rb +9 -0
- data/spec/expression/sequence_spec.rb +9 -0
- data/spec/expression/subexpression_spec.rb +50 -0
- data/spec/expression/to_h_spec.rb +26 -0
- data/spec/expression/to_s_spec.rb +100 -0
- data/spec/lexer/all_spec.rb +22 -0
- data/spec/lexer/conditionals_spec.rb +53 -0
- data/spec/lexer/delimiters_spec.rb +68 -0
- data/spec/lexer/escapes_spec.rb +14 -0
- data/spec/lexer/keep_spec.rb +10 -0
- data/spec/lexer/literals_spec.rb +89 -0
- data/spec/lexer/nesting_spec.rb +99 -0
- data/spec/lexer/refcalls_spec.rb +55 -0
- data/spec/parser/all_spec.rb +43 -0
- data/spec/parser/alternation_spec.rb +88 -0
- data/spec/parser/anchors_spec.rb +17 -0
- data/spec/parser/conditionals_spec.rb +179 -0
- data/spec/parser/errors_spec.rb +30 -0
- data/spec/parser/escapes_spec.rb +121 -0
- data/spec/parser/free_space_spec.rb +130 -0
- data/spec/parser/groups_spec.rb +108 -0
- data/spec/parser/keep_spec.rb +6 -0
- data/spec/parser/posix_classes_spec.rb +8 -0
- data/spec/parser/properties_spec.rb +115 -0
- data/spec/parser/quantifiers_spec.rb +52 -0
- data/spec/parser/refcalls_spec.rb +112 -0
- data/spec/parser/set/intersections_spec.rb +127 -0
- data/spec/parser/set/ranges_spec.rb +111 -0
- data/spec/parser/sets_spec.rb +178 -0
- data/spec/parser/types_spec.rb +18 -0
- data/spec/scanner/all_spec.rb +18 -0
- data/spec/scanner/anchors_spec.rb +21 -0
- data/spec/scanner/conditionals_spec.rb +128 -0
- data/spec/scanner/delimiters_spec.rb +52 -0
- data/spec/scanner/errors_spec.rb +67 -0
- data/spec/scanner/escapes_spec.rb +53 -0
- data/spec/scanner/free_space_spec.rb +133 -0
- data/spec/scanner/groups_spec.rb +52 -0
- data/spec/scanner/keep_spec.rb +10 -0
- data/spec/scanner/literals_spec.rb +49 -0
- data/spec/scanner/meta_spec.rb +18 -0
- data/spec/scanner/properties_spec.rb +64 -0
- data/spec/scanner/quantifiers_spec.rb +20 -0
- data/spec/scanner/refcalls_spec.rb +36 -0
- data/spec/scanner/sets_spec.rb +102 -0
- data/spec/scanner/types_spec.rb +14 -0
- data/spec/spec_helper.rb +15 -0
- data/{test → spec}/support/runner.rb +9 -8
- data/spec/support/shared_examples.rb +77 -0
- data/{test → spec}/support/warning_extractor.rb +5 -7
- data/spec/syntax/syntax_spec.rb +48 -0
- data/spec/syntax/syntax_token_map_spec.rb +23 -0
- data/spec/syntax/versions/1.8.6_spec.rb +17 -0
- data/spec/syntax/versions/1.9.1_spec.rb +10 -0
- data/spec/syntax/versions/1.9.3_spec.rb +9 -0
- data/spec/syntax/versions/2.0.0_spec.rb +13 -0
- data/spec/syntax/versions/2.2.0_spec.rb +9 -0
- data/spec/syntax/versions/aliases_spec.rb +37 -0
- data/spec/token/token_spec.rb +85 -0
- metadata +151 -146
- data/test/expression/test_all.rb +0 -12
- data/test/expression/test_base.rb +0 -90
- data/test/expression/test_clone.rb +0 -89
- data/test/expression/test_conditionals.rb +0 -113
- data/test/expression/test_free_space.rb +0 -35
- data/test/expression/test_set.rb +0 -84
- data/test/expression/test_strfregexp.rb +0 -230
- data/test/expression/test_subexpression.rb +0 -58
- data/test/expression/test_tests.rb +0 -99
- data/test/expression/test_to_h.rb +0 -59
- data/test/expression/test_to_s.rb +0 -104
- data/test/expression/test_traverse.rb +0 -161
- data/test/helpers.rb +0 -10
- data/test/lexer/test_all.rb +0 -41
- data/test/lexer/test_conditionals.rb +0 -127
- data/test/lexer/test_keep.rb +0 -24
- data/test/lexer/test_literals.rb +0 -130
- data/test/lexer/test_nesting.rb +0 -132
- data/test/lexer/test_refcalls.rb +0 -56
- data/test/parser/set/test_intersections.rb +0 -127
- data/test/parser/set/test_ranges.rb +0 -111
- data/test/parser/test_all.rb +0 -64
- data/test/parser/test_alternation.rb +0 -92
- data/test/parser/test_anchors.rb +0 -34
- data/test/parser/test_conditionals.rb +0 -187
- data/test/parser/test_errors.rb +0 -63
- data/test/parser/test_escapes.rb +0 -134
- data/test/parser/test_free_space.rb +0 -139
- data/test/parser/test_groups.rb +0 -289
- data/test/parser/test_keep.rb +0 -21
- data/test/parser/test_posix_classes.rb +0 -27
- data/test/parser/test_properties.rb +0 -133
- data/test/parser/test_quantifiers.rb +0 -301
- data/test/parser/test_refcalls.rb +0 -186
- data/test/parser/test_sets.rb +0 -179
- data/test/parser/test_types.rb +0 -50
- data/test/scanner/test_all.rb +0 -38
- data/test/scanner/test_anchors.rb +0 -38
- data/test/scanner/test_conditionals.rb +0 -184
- data/test/scanner/test_errors.rb +0 -91
- data/test/scanner/test_escapes.rb +0 -56
- data/test/scanner/test_free_space.rb +0 -200
- data/test/scanner/test_groups.rb +0 -79
- data/test/scanner/test_keep.rb +0 -35
- data/test/scanner/test_literals.rb +0 -89
- data/test/scanner/test_meta.rb +0 -40
- data/test/scanner/test_properties.rb +0 -312
- data/test/scanner/test_quantifiers.rb +0 -37
- data/test/scanner/test_refcalls.rb +0 -52
- data/test/scanner/test_scripts.rb +0 -53
- data/test/scanner/test_sets.rb +0 -119
- data/test/scanner/test_types.rb +0 -35
- data/test/scanner/test_unicode_blocks.rb +0 -30
- data/test/support/disable_autotest.rb +0 -8
- data/test/syntax/test_all.rb +0 -6
- data/test/syntax/test_syntax.rb +0 -61
- data/test/syntax/test_syntax_token_map.rb +0 -25
- data/test/syntax/versions/test_1.8.rb +0 -55
- data/test/syntax/versions/test_1.9.1.rb +0 -36
- data/test/syntax/versions/test_1.9.3.rb +0 -32
- data/test/syntax/versions/test_2.0.0.rb +0 -37
- data/test/syntax/versions/test_2.2.0.rb +0 -32
- data/test/syntax/versions/test_aliases.rb +0 -129
- data/test/syntax/versions/test_all.rb +0 -5
- data/test/test_all.rb +0 -5
- data/test/token/test_all.rb +0 -2
- data/test/token/test_token.rb +0 -107
data/test/parser/test_anchors.rb
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
require File.expand_path("../../helpers", __FILE__)
|
2
|
-
|
3
|
-
class TestParserAnchors < Test::Unit::TestCase
|
4
|
-
|
5
|
-
tests = {
|
6
|
-
'^a' => [0, :anchor, :bol, Anchor::BOL],
|
7
|
-
'a$' => [1, :anchor, :eol, Anchor::EOL],
|
8
|
-
|
9
|
-
'\Aa' => [0, :anchor, :bos, Anchor::BOS],
|
10
|
-
'a\z' => [1, :anchor, :eos, Anchor::EOS],
|
11
|
-
'a\Z' => [1, :anchor, :eos_ob_eol, Anchor::EOSobEOL],
|
12
|
-
|
13
|
-
'a\b' => [1, :anchor, :word_boundary, Anchor::WordBoundary],
|
14
|
-
'a\B' => [1, :anchor, :nonword_boundary, Anchor::NonWordBoundary],
|
15
|
-
|
16
|
-
'a\G' => [1, :anchor, :match_start, Anchor::MatchStart],
|
17
|
-
|
18
|
-
"\\\\Aa" => [0, :escape, :backslash, EscapeSequence::Literal],
|
19
|
-
}
|
20
|
-
|
21
|
-
tests.each_with_index do |(pattern, (index, type, token, klass)), count|
|
22
|
-
define_method "test_parse_anchor_#{token}_#{count}" do
|
23
|
-
root = RP.parse(pattern, 'ruby/1.9')
|
24
|
-
exp = root.expressions.at(index)
|
25
|
-
|
26
|
-
assert exp.is_a?(klass),
|
27
|
-
"Expected #{klass}, but got #{exp.class.name}"
|
28
|
-
|
29
|
-
assert_equal type, exp.type
|
30
|
-
assert_equal token, exp.token
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
@@ -1,187 +0,0 @@
|
|
1
|
-
require File.expand_path("../../helpers", __FILE__)
|
2
|
-
|
3
|
-
class TestParserConditionals < Test::Unit::TestCase
|
4
|
-
|
5
|
-
def test_parse_conditional
|
6
|
-
regexp = /(?<A>a)(?(<A>)T|F)/
|
7
|
-
|
8
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
9
|
-
exp = root.expressions[1]
|
10
|
-
|
11
|
-
assert exp.is_a?(Conditional::Expression),
|
12
|
-
"Expected Condition, but got #{exp.class.name}"
|
13
|
-
|
14
|
-
assert_equal exp.type, :conditional
|
15
|
-
assert_equal exp.token, :open
|
16
|
-
assert_equal exp.text, '(?'
|
17
|
-
assert_equal exp.reference, 'A'
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_parse_conditional_condition
|
21
|
-
regexp = /(?<A>a)(?(<A>)T|F)/
|
22
|
-
|
23
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
24
|
-
exp = root[1].condition
|
25
|
-
|
26
|
-
assert exp.is_a?(Conditional::Condition),
|
27
|
-
"Expected Condition, but got #{exp.class.name}"
|
28
|
-
|
29
|
-
assert_equal exp.type, :conditional
|
30
|
-
assert_equal exp.token, :condition
|
31
|
-
assert_equal exp.text, '(<A>)'
|
32
|
-
assert_equal exp.reference, 'A'
|
33
|
-
end
|
34
|
-
|
35
|
-
def test_parse_conditional_condition_with_number_ref
|
36
|
-
regexp = /(a)(?(1)T|F)/
|
37
|
-
|
38
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
39
|
-
exp = root[1].condition
|
40
|
-
|
41
|
-
assert exp.is_a?(Conditional::Condition),
|
42
|
-
"Expected Condition, but got #{exp.class.name}"
|
43
|
-
|
44
|
-
assert_equal exp.type, :conditional
|
45
|
-
assert_equal exp.token, :condition
|
46
|
-
assert_equal exp.text, '(1)'
|
47
|
-
assert_equal exp.reference, 1
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_parse_conditional_nested_groups
|
51
|
-
regexp = /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/
|
52
|
-
|
53
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
54
|
-
|
55
|
-
assert_equal regexp.source, root.to_s
|
56
|
-
|
57
|
-
group = root.first
|
58
|
-
assert_equal Group::Capture, group.class
|
59
|
-
|
60
|
-
alt = group.first
|
61
|
-
assert_equal Alternation, alt.class
|
62
|
-
assert_equal 3, alt.length
|
63
|
-
|
64
|
-
all_captures = alt.all? do |exp|
|
65
|
-
exp.first.is_a?(Group::Capture)
|
66
|
-
end
|
67
|
-
|
68
|
-
assert_equal true, all_captures
|
69
|
-
|
70
|
-
subgroup = alt[2].first
|
71
|
-
conditional = subgroup.first
|
72
|
-
|
73
|
-
assert_equal Conditional::Expression, conditional.class
|
74
|
-
assert_equal 3, conditional.length
|
75
|
-
|
76
|
-
assert_equal Conditional::Condition, conditional[0].class
|
77
|
-
assert_equal '(2)', conditional[0].text
|
78
|
-
|
79
|
-
condition = conditional.condition
|
80
|
-
assert_equal Conditional::Condition, condition.class
|
81
|
-
assert_equal '(2)', condition.text
|
82
|
-
|
83
|
-
branches = conditional.branches
|
84
|
-
assert_equal 2, branches.length
|
85
|
-
assert_equal Array, branches.class
|
86
|
-
end
|
87
|
-
|
88
|
-
def test_parse_conditional_nested
|
89
|
-
regexp = /(a(b(c(d)(e))))(?(1)(?(2)d|(?(3)e|f))|(?(4)(?(5)g|h)))/
|
90
|
-
|
91
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
92
|
-
|
93
|
-
assert_equal regexp.source, root.to_s
|
94
|
-
|
95
|
-
{ 1 => [2, root[1]],
|
96
|
-
2 => [2, root[1][1][0]],
|
97
|
-
3 => [2, root[1][1][0][2][0]],
|
98
|
-
4 => [1, root[1][2][0]],
|
99
|
-
5 => [2, root[1][2][0][1][0]],
|
100
|
-
}.each do |index, test|
|
101
|
-
branch_count, exp = test
|
102
|
-
|
103
|
-
assert_equal Conditional::Expression, exp.class
|
104
|
-
assert_equal "(#{index})", exp.condition.text
|
105
|
-
assert_equal branch_count, exp.branches.length
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
def test_parse_conditional_nested_alternation
|
110
|
-
regexp = /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/
|
111
|
-
|
112
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
113
|
-
|
114
|
-
assert_equal regexp.source, root.to_s
|
115
|
-
|
116
|
-
assert_equal Alternation, root.first.class
|
117
|
-
|
118
|
-
[ [3, 'b|c|d', root[0][0][1][1][0][0]],
|
119
|
-
[3, 'e|f|g', root[0][0][1][2][0][0]],
|
120
|
-
[3, 'i|j|k', root[0][0][3][1][0][0]],
|
121
|
-
[3, 'l|m|n', root[0][0][3][2][0][0]],
|
122
|
-
].each do |test|
|
123
|
-
alt_count, alt_text, exp = test
|
124
|
-
|
125
|
-
assert_equal Alternation, exp.class
|
126
|
-
assert_equal alt_text, exp.to_s
|
127
|
-
assert_equal alt_count, exp.alternatives.length
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
def test_parse_conditional_extra_separator
|
132
|
-
regexp = /(?<A>a)(?(<A>)T|)/
|
133
|
-
|
134
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
135
|
-
branches = root[1].branches
|
136
|
-
|
137
|
-
assert_equal 2, branches.length
|
138
|
-
|
139
|
-
seq_1, seq_2 = branches
|
140
|
-
|
141
|
-
[seq_1, seq_2].each do |seq|
|
142
|
-
assert seq.is_a?( Sequence ),
|
143
|
-
"Expected Condition, but got #{seq.class.name}"
|
144
|
-
|
145
|
-
assert_equal :expression, seq.type
|
146
|
-
assert_equal :sequence, seq.token
|
147
|
-
end
|
148
|
-
|
149
|
-
assert_equal 'T', seq_1.to_s
|
150
|
-
assert_equal '', seq_2.to_s
|
151
|
-
end
|
152
|
-
|
153
|
-
def test_parse_conditional_quantified
|
154
|
-
regexp = /(foo)(?(1)\d|(\w)){42}/
|
155
|
-
|
156
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
157
|
-
conditional = root[1]
|
158
|
-
|
159
|
-
assert conditional.quantified?
|
160
|
-
assert_equal '{42}', conditional.quantifier.text
|
161
|
-
assert_equal '(?(1)\d|(\w)){42}', conditional.to_s
|
162
|
-
refute conditional.branches.any?(&:quantified?)
|
163
|
-
end
|
164
|
-
|
165
|
-
def test_parse_conditional_branch_content_quantified
|
166
|
-
regexp = /(foo)(?(1)\d{23}|(\w){42})/
|
167
|
-
|
168
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
169
|
-
conditional = root[1]
|
170
|
-
|
171
|
-
refute conditional.quantified?
|
172
|
-
refute conditional.branches.any?(&:quantified?)
|
173
|
-
assert conditional.branches[0][0].quantified?
|
174
|
-
assert_equal '{23}', conditional.branches[0][0].quantifier.text
|
175
|
-
assert conditional.branches[1][0].quantified?
|
176
|
-
assert_equal '{42}', conditional.branches[1][0].quantifier.text
|
177
|
-
end
|
178
|
-
|
179
|
-
# For source (text) expressions only, ruby raises an error otherwise.
|
180
|
-
def test_parse_conditional_excessive_branches
|
181
|
-
regexp = '(?<A>a)(?(<A>)T|F|X)'
|
182
|
-
|
183
|
-
assert_raise( Conditional::TooManyBranches ) {
|
184
|
-
RP.parse(regexp, 'ruby/2.0')
|
185
|
-
}
|
186
|
-
end
|
187
|
-
end
|
data/test/parser/test_errors.rb
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
require File.expand_path("../../helpers", __FILE__)
|
2
|
-
|
3
|
-
class ParserErrors < Test::Unit::TestCase
|
4
|
-
def setup
|
5
|
-
@rp = Regexp::Parser.new
|
6
|
-
@rp.parse(/foo/)
|
7
|
-
end
|
8
|
-
|
9
|
-
def test_parser_unknown_token_type
|
10
|
-
assert_raise( Regexp::Parser::UnknownTokenTypeError ) {
|
11
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:foo, :bar))
|
12
|
-
}
|
13
|
-
end
|
14
|
-
|
15
|
-
def test_parser_unknown_set_token
|
16
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
17
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:set, :foo))
|
18
|
-
}
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_parser_unknown_meta_token
|
22
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
23
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:meta, :foo))
|
24
|
-
}
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_parser_unknown_character_type_token
|
28
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
29
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:type, :foo))
|
30
|
-
}
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_parser_unknown_unicode_property_token
|
34
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
35
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:property, :foo))
|
36
|
-
}
|
37
|
-
end
|
38
|
-
|
39
|
-
def test_parser_unknown_unicode_nonproperty_token
|
40
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
41
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:nonproperty, :foo))
|
42
|
-
}
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_parser_unknown_anchor_token
|
46
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
47
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:anchor, :foo))
|
48
|
-
}
|
49
|
-
end
|
50
|
-
|
51
|
-
def test_parser_unknown_quantifier_token
|
52
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
53
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:quantifier, :foo))
|
54
|
-
}
|
55
|
-
end
|
56
|
-
|
57
|
-
def test_parser_unknown_group_open_token
|
58
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
59
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:group, :foo))
|
60
|
-
}
|
61
|
-
end
|
62
|
-
|
63
|
-
end
|
data/test/parser/test_escapes.rb
DELETED
@@ -1,134 +0,0 @@
|
|
1
|
-
require File.expand_path("../../helpers", __FILE__)
|
2
|
-
|
3
|
-
class TestParserEscapes < Test::Unit::TestCase
|
4
|
-
|
5
|
-
tests = {
|
6
|
-
/a\ac/ => [1, :escape, :bell, EscapeSequence::Bell],
|
7
|
-
/a\ec/ => [1, :escape, :escape, EscapeSequence::AsciiEscape],
|
8
|
-
/a\fc/ => [1, :escape, :form_feed, EscapeSequence::FormFeed],
|
9
|
-
/a\nc/ => [1, :escape, :newline, EscapeSequence::Newline],
|
10
|
-
/a\rc/ => [1, :escape, :carriage, EscapeSequence::Return],
|
11
|
-
/a\tc/ => [1, :escape, :tab, EscapeSequence::Tab],
|
12
|
-
/a\vc/ => [1, :escape, :vertical_tab, EscapeSequence::VerticalTab],
|
13
|
-
|
14
|
-
# meta character escapes
|
15
|
-
/a\.c/ => [1, :escape, :dot, EscapeSequence::Literal],
|
16
|
-
/a\?c/ => [1, :escape, :zero_or_one, EscapeSequence::Literal],
|
17
|
-
/a\*c/ => [1, :escape, :zero_or_more, EscapeSequence::Literal],
|
18
|
-
/a\+c/ => [1, :escape, :one_or_more, EscapeSequence::Literal],
|
19
|
-
/a\|c/ => [1, :escape, :alternation, EscapeSequence::Literal],
|
20
|
-
/a\(c/ => [1, :escape, :group_open, EscapeSequence::Literal],
|
21
|
-
/a\)c/ => [1, :escape, :group_close, EscapeSequence::Literal],
|
22
|
-
/a\{c/ => [1, :escape, :interval_open, EscapeSequence::Literal],
|
23
|
-
/a\}c/ => [1, :escape, :interval_close, EscapeSequence::Literal],
|
24
|
-
|
25
|
-
# unicode escapes
|
26
|
-
/a\u0640/ => [1, :escape, :codepoint, EscapeSequence::Codepoint],
|
27
|
-
/a\u{41 1F60D}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
|
28
|
-
/a\u{10FFFF}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
|
29
|
-
|
30
|
-
# hex escapes
|
31
|
-
/a\xFF/n => [1, :escape, :hex, EscapeSequence::Hex],
|
32
|
-
|
33
|
-
# octal escapes
|
34
|
-
/a\177/n => [1, :escape, :octal, EscapeSequence::Octal],
|
35
|
-
}
|
36
|
-
|
37
|
-
tests.each_with_index do |(pattern, (index, type, token, klass)), count|
|
38
|
-
define_method "test_parse_escape_#{token}_#{count+=1}" do
|
39
|
-
root = RP.parse(pattern, 'ruby/1.9')
|
40
|
-
exp = root.expressions.at(index)
|
41
|
-
|
42
|
-
assert exp.is_a?(klass),
|
43
|
-
"Expected #{klass}, but got #{exp.class.name}"
|
44
|
-
|
45
|
-
assert_equal type, exp.type
|
46
|
-
assert_equal token, exp.token
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_parse_chars_and_codepoints
|
51
|
-
root = RP.parse(/\n\?\101\x42\u0043\u{44 45}/)
|
52
|
-
|
53
|
-
assert_equal "\n", root[0].char
|
54
|
-
assert_equal 10, root[0].codepoint
|
55
|
-
|
56
|
-
assert_equal "?", root[1].char
|
57
|
-
assert_equal 63, root[1].codepoint
|
58
|
-
|
59
|
-
assert_equal "A", root[2].char
|
60
|
-
assert_equal 65, root[2].codepoint
|
61
|
-
|
62
|
-
assert_equal "B", root[3].char
|
63
|
-
assert_equal 66, root[3].codepoint
|
64
|
-
|
65
|
-
assert_equal "C", root[4].char
|
66
|
-
assert_equal 67, root[4].codepoint
|
67
|
-
|
68
|
-
assert_equal ["D", "E"], root[5].chars
|
69
|
-
assert_equal [68, 69], root[5].codepoints
|
70
|
-
end
|
71
|
-
|
72
|
-
def test_parse_escape_control_sequence_lower
|
73
|
-
root = RP.parse(/a\\\c2b/)
|
74
|
-
|
75
|
-
assert_equal EscapeSequence::Control, root[2].class
|
76
|
-
assert_equal '\\c2', root[2].text
|
77
|
-
assert_equal "\u0012", root[2].char
|
78
|
-
assert_equal 18, root[2].codepoint
|
79
|
-
end
|
80
|
-
|
81
|
-
def test_parse_escape_control_sequence_upper
|
82
|
-
root = RP.parse(/\d\\\C-C\w/)
|
83
|
-
|
84
|
-
assert_equal EscapeSequence::Control, root[2].class
|
85
|
-
assert_equal '\\C-C', root[2].text
|
86
|
-
assert_equal "\u0003", root[2].char
|
87
|
-
assert_equal 3, root[2].codepoint
|
88
|
-
end
|
89
|
-
|
90
|
-
def test_parse_escape_meta_sequence
|
91
|
-
root = RP.parse(/\Z\\\M-Z/n)
|
92
|
-
|
93
|
-
assert_equal EscapeSequence::Meta, root[2].class
|
94
|
-
assert_equal '\\M-Z', root[2].text
|
95
|
-
assert_equal "\u00DA", root[2].char
|
96
|
-
assert_equal 218, root[2].codepoint
|
97
|
-
end
|
98
|
-
|
99
|
-
def test_parse_escape_meta_control_sequence
|
100
|
-
root = RP.parse(/\A\\\M-\C-X/n)
|
101
|
-
|
102
|
-
assert_equal EscapeSequence::MetaControl, root[2].class
|
103
|
-
assert_equal '\\M-\\C-X', root[2].text
|
104
|
-
assert_equal "\u0098", root[2].char
|
105
|
-
assert_equal 152, root[2].codepoint
|
106
|
-
end
|
107
|
-
|
108
|
-
def test_parse_lower_c_meta_control_sequence
|
109
|
-
root = RP.parse(/\A\\\M-\cX/n)
|
110
|
-
|
111
|
-
assert_equal EscapeSequence::MetaControl, root[2].class
|
112
|
-
assert_equal '\\M-\\cX', root[2].text
|
113
|
-
assert_equal "\u0098", root[2].char
|
114
|
-
assert_equal 152, root[2].codepoint
|
115
|
-
end
|
116
|
-
|
117
|
-
def test_parse_escape_reverse_meta_control_sequence
|
118
|
-
root = RP.parse(/\A\\\C-\M-X/n)
|
119
|
-
|
120
|
-
assert_equal EscapeSequence::MetaControl, root[2].class
|
121
|
-
assert_equal '\\C-\\M-X', root[2].text
|
122
|
-
assert_equal "\u0098", root[2].char
|
123
|
-
assert_equal 152, root[2].codepoint
|
124
|
-
end
|
125
|
-
|
126
|
-
def test_parse_escape_reverse_lower_c_meta_control_sequence
|
127
|
-
root = RP.parse(/\A\\\c\M-X/n)
|
128
|
-
|
129
|
-
assert_equal EscapeSequence::MetaControl, root[2].class
|
130
|
-
assert_equal '\\c\\M-X', root[2].text
|
131
|
-
assert_equal "\u0098", root[2].char
|
132
|
-
assert_equal 152, root[2].codepoint
|
133
|
-
end
|
134
|
-
end
|
@@ -1,139 +0,0 @@
|
|
1
|
-
require File.expand_path("../../helpers", __FILE__)
|
2
|
-
|
3
|
-
class ParserFreeSpace < Test::Unit::TestCase
|
4
|
-
|
5
|
-
def test_parse_free_space_spaces
|
6
|
-
regexp = /a ? b * c + d{2,4}/x
|
7
|
-
root = RP.parse(regexp)
|
8
|
-
|
9
|
-
0.upto(6) do |i|
|
10
|
-
if i.odd?
|
11
|
-
# Consecutive spaces get merged by the parser, thus the two spaces.
|
12
|
-
assert_equal WhiteSpace, root[i].class
|
13
|
-
assert_equal ' ', root[i].text
|
14
|
-
else
|
15
|
-
assert_equal Literal, root[i].class
|
16
|
-
assert_equal true, root[i].quantified?
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_parse_non_free_space_literals
|
22
|
-
regexp = /a b c d/
|
23
|
-
root = RP.parse(regexp)
|
24
|
-
|
25
|
-
assert_equal Literal, root.first.class
|
26
|
-
assert_equal 'a b c d', root.first.text
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_parse_free_space_comments
|
30
|
-
regexp = %r{
|
31
|
-
a ? # One letter
|
32
|
-
b {2,5} # Another one
|
33
|
-
[c-g] + # A set
|
34
|
-
(h|i|j) | # A group
|
35
|
-
klm *
|
36
|
-
nop +
|
37
|
-
}x
|
38
|
-
|
39
|
-
root = RP.parse(regexp)
|
40
|
-
|
41
|
-
alt = root.first
|
42
|
-
assert_equal Alternation, alt.class
|
43
|
-
|
44
|
-
alt_1 = alt.alternatives.first
|
45
|
-
assert_equal Alternative, alt_1.class
|
46
|
-
assert_equal 15, alt_1.length
|
47
|
-
|
48
|
-
[0, 2, 4, 6, 8, 12, 14].each do |i|
|
49
|
-
assert_equal WhiteSpace, alt_1[i].class
|
50
|
-
end
|
51
|
-
|
52
|
-
[3, 7, 11].each do |i|
|
53
|
-
assert_equal Comment, alt_1[i].class
|
54
|
-
end
|
55
|
-
|
56
|
-
alt_2 = alt.alternatives.last
|
57
|
-
assert_equal Alternative, alt_2.class
|
58
|
-
assert_equal 7, alt_2.length
|
59
|
-
|
60
|
-
[0, 2, 4, 6].each do |i|
|
61
|
-
assert_equal WhiteSpace, alt_2[i].class
|
62
|
-
end
|
63
|
-
|
64
|
-
assert_equal Comment, alt_2[1].class
|
65
|
-
end
|
66
|
-
|
67
|
-
def test_parse_free_space_nested_comments
|
68
|
-
# Tests depend on spacing and indentation, obviously.
|
69
|
-
regexp = %r{
|
70
|
-
# Group one
|
71
|
-
(
|
72
|
-
abc # Comment one
|
73
|
-
\d? # Optional \d
|
74
|
-
)+
|
75
|
-
|
76
|
-
# Group two
|
77
|
-
(
|
78
|
-
def # Comment two
|
79
|
-
\s? # Optional \s
|
80
|
-
)?
|
81
|
-
}x
|
82
|
-
|
83
|
-
root = RP.parse(regexp)
|
84
|
-
|
85
|
-
top_comment_1 = root[1]
|
86
|
-
assert_equal Comment, top_comment_1.class
|
87
|
-
assert_equal "# Group one\n", top_comment_1.text
|
88
|
-
assert_equal 7, top_comment_1.starts_at
|
89
|
-
|
90
|
-
top_comment_2 = root[5]
|
91
|
-
assert_equal Comment, top_comment_2.class
|
92
|
-
assert_equal "# Group two\n", top_comment_2.text
|
93
|
-
assert_equal 95, top_comment_2.starts_at
|
94
|
-
|
95
|
-
# Nested comments
|
96
|
-
[3, 7].each_with_index do |g, i|
|
97
|
-
group = root[g]
|
98
|
-
|
99
|
-
[3, 7].each do |c|
|
100
|
-
comment = group[c]
|
101
|
-
assert_equal Comment, comment.class
|
102
|
-
assert_equal 14, comment.text.length
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
def test_parse_free_space_quantifiers
|
108
|
-
regexp = %r{
|
109
|
-
a
|
110
|
-
# comment 1
|
111
|
-
?
|
112
|
-
(
|
113
|
-
b # comment 2
|
114
|
-
# comment 3
|
115
|
-
+
|
116
|
-
)
|
117
|
-
# comment 4
|
118
|
-
*
|
119
|
-
}x
|
120
|
-
|
121
|
-
root = RP.parse(regexp)
|
122
|
-
|
123
|
-
literal_1 = root[1]
|
124
|
-
assert_equal Literal, literal_1.class
|
125
|
-
assert_equal true, literal_1.quantified?
|
126
|
-
assert_equal :zero_or_one, literal_1.quantifier.token
|
127
|
-
|
128
|
-
group = root[5]
|
129
|
-
assert_equal Group::Capture, group.class
|
130
|
-
assert_equal true, group.quantified?
|
131
|
-
assert_equal :zero_or_more, group.quantifier.token
|
132
|
-
|
133
|
-
literal_2 = group[1]
|
134
|
-
assert_equal Literal, literal_2.class
|
135
|
-
assert_equal true, literal_2.quantified?
|
136
|
-
assert_equal :one_or_more, literal_2.quantifier.token
|
137
|
-
end
|
138
|
-
|
139
|
-
end
|