regexp_parser 1.3.0 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -1
- data/Gemfile +3 -3
- data/README.md +12 -19
- data/Rakefile +3 -4
- data/lib/regexp_parser/expression.rb +28 -53
- data/lib/regexp_parser/expression/classes/backref.rb +18 -10
- data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
- data/lib/regexp_parser/expression/classes/escape.rb +0 -4
- data/lib/regexp_parser/expression/classes/group.rb +4 -2
- data/lib/regexp_parser/expression/classes/keep.rb +1 -3
- data/lib/regexp_parser/expression/methods/match.rb +13 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
- data/lib/regexp_parser/expression/methods/options.rb +35 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
- data/lib/regexp_parser/expression/methods/tests.rb +6 -15
- data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -6
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
- data/lib/regexp_parser/expression/subexpression.rb +3 -5
- data/lib/regexp_parser/lexer.rb +30 -44
- data/lib/regexp_parser/parser.rb +47 -24
- data/lib/regexp_parser/scanner.rb +1228 -1367
- data/lib/regexp_parser/scanner/char_type.rl +0 -3
- data/lib/regexp_parser/scanner/properties/long.yml +34 -1
- data/lib/regexp_parser/scanner/properties/short.yml +12 -0
- data/lib/regexp_parser/scanner/scanner.rl +101 -194
- data/lib/regexp_parser/syntax/tokens.rb +2 -10
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +72 -21
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +3 -3
- data/spec/expression/base_spec.rb +94 -0
- data/spec/expression/clone_spec.rb +120 -0
- data/spec/expression/conditional_spec.rb +89 -0
- data/spec/expression/free_space_spec.rb +27 -0
- data/spec/expression/methods/match_length_spec.rb +161 -0
- data/spec/expression/methods/match_spec.rb +25 -0
- data/spec/expression/methods/strfregexp_spec.rb +224 -0
- data/spec/expression/methods/tests_spec.rb +99 -0
- data/spec/expression/methods/traverse_spec.rb +161 -0
- data/spec/expression/options_spec.rb +128 -0
- data/spec/expression/root_spec.rb +9 -0
- data/spec/expression/sequence_spec.rb +9 -0
- data/spec/expression/subexpression_spec.rb +50 -0
- data/spec/expression/to_h_spec.rb +26 -0
- data/spec/expression/to_s_spec.rb +100 -0
- data/spec/lexer/all_spec.rb +22 -0
- data/spec/lexer/conditionals_spec.rb +53 -0
- data/spec/lexer/delimiters_spec.rb +68 -0
- data/spec/lexer/escapes_spec.rb +14 -0
- data/spec/lexer/keep_spec.rb +10 -0
- data/spec/lexer/literals_spec.rb +89 -0
- data/spec/lexer/nesting_spec.rb +99 -0
- data/spec/lexer/refcalls_spec.rb +55 -0
- data/spec/parser/all_spec.rb +43 -0
- data/spec/parser/alternation_spec.rb +88 -0
- data/spec/parser/anchors_spec.rb +17 -0
- data/spec/parser/conditionals_spec.rb +179 -0
- data/spec/parser/errors_spec.rb +30 -0
- data/spec/parser/escapes_spec.rb +121 -0
- data/spec/parser/free_space_spec.rb +130 -0
- data/spec/parser/groups_spec.rb +108 -0
- data/spec/parser/keep_spec.rb +6 -0
- data/spec/parser/posix_classes_spec.rb +8 -0
- data/spec/parser/properties_spec.rb +115 -0
- data/spec/parser/quantifiers_spec.rb +52 -0
- data/spec/parser/refcalls_spec.rb +112 -0
- data/spec/parser/set/intersections_spec.rb +127 -0
- data/spec/parser/set/ranges_spec.rb +111 -0
- data/spec/parser/sets_spec.rb +178 -0
- data/spec/parser/types_spec.rb +18 -0
- data/spec/scanner/all_spec.rb +18 -0
- data/spec/scanner/anchors_spec.rb +21 -0
- data/spec/scanner/conditionals_spec.rb +128 -0
- data/spec/scanner/delimiters_spec.rb +52 -0
- data/spec/scanner/errors_spec.rb +67 -0
- data/spec/scanner/escapes_spec.rb +53 -0
- data/spec/scanner/free_space_spec.rb +133 -0
- data/spec/scanner/groups_spec.rb +52 -0
- data/spec/scanner/keep_spec.rb +10 -0
- data/spec/scanner/literals_spec.rb +49 -0
- data/spec/scanner/meta_spec.rb +18 -0
- data/spec/scanner/properties_spec.rb +64 -0
- data/spec/scanner/quantifiers_spec.rb +20 -0
- data/spec/scanner/refcalls_spec.rb +36 -0
- data/spec/scanner/sets_spec.rb +102 -0
- data/spec/scanner/types_spec.rb +14 -0
- data/spec/spec_helper.rb +15 -0
- data/{test → spec}/support/runner.rb +9 -8
- data/spec/support/shared_examples.rb +77 -0
- data/{test → spec}/support/warning_extractor.rb +5 -7
- data/spec/syntax/syntax_spec.rb +48 -0
- data/spec/syntax/syntax_token_map_spec.rb +23 -0
- data/spec/syntax/versions/1.8.6_spec.rb +17 -0
- data/spec/syntax/versions/1.9.1_spec.rb +10 -0
- data/spec/syntax/versions/1.9.3_spec.rb +9 -0
- data/spec/syntax/versions/2.0.0_spec.rb +13 -0
- data/spec/syntax/versions/2.2.0_spec.rb +9 -0
- data/spec/syntax/versions/aliases_spec.rb +37 -0
- data/spec/token/token_spec.rb +85 -0
- metadata +151 -146
- data/test/expression/test_all.rb +0 -12
- data/test/expression/test_base.rb +0 -90
- data/test/expression/test_clone.rb +0 -89
- data/test/expression/test_conditionals.rb +0 -113
- data/test/expression/test_free_space.rb +0 -35
- data/test/expression/test_set.rb +0 -84
- data/test/expression/test_strfregexp.rb +0 -230
- data/test/expression/test_subexpression.rb +0 -58
- data/test/expression/test_tests.rb +0 -99
- data/test/expression/test_to_h.rb +0 -59
- data/test/expression/test_to_s.rb +0 -104
- data/test/expression/test_traverse.rb +0 -161
- data/test/helpers.rb +0 -10
- data/test/lexer/test_all.rb +0 -41
- data/test/lexer/test_conditionals.rb +0 -127
- data/test/lexer/test_keep.rb +0 -24
- data/test/lexer/test_literals.rb +0 -130
- data/test/lexer/test_nesting.rb +0 -132
- data/test/lexer/test_refcalls.rb +0 -56
- data/test/parser/set/test_intersections.rb +0 -127
- data/test/parser/set/test_ranges.rb +0 -111
- data/test/parser/test_all.rb +0 -64
- data/test/parser/test_alternation.rb +0 -92
- data/test/parser/test_anchors.rb +0 -34
- data/test/parser/test_conditionals.rb +0 -187
- data/test/parser/test_errors.rb +0 -63
- data/test/parser/test_escapes.rb +0 -134
- data/test/parser/test_free_space.rb +0 -139
- data/test/parser/test_groups.rb +0 -289
- data/test/parser/test_keep.rb +0 -21
- data/test/parser/test_posix_classes.rb +0 -27
- data/test/parser/test_properties.rb +0 -133
- data/test/parser/test_quantifiers.rb +0 -301
- data/test/parser/test_refcalls.rb +0 -186
- data/test/parser/test_sets.rb +0 -179
- data/test/parser/test_types.rb +0 -50
- data/test/scanner/test_all.rb +0 -38
- data/test/scanner/test_anchors.rb +0 -38
- data/test/scanner/test_conditionals.rb +0 -184
- data/test/scanner/test_errors.rb +0 -91
- data/test/scanner/test_escapes.rb +0 -56
- data/test/scanner/test_free_space.rb +0 -200
- data/test/scanner/test_groups.rb +0 -79
- data/test/scanner/test_keep.rb +0 -35
- data/test/scanner/test_literals.rb +0 -89
- data/test/scanner/test_meta.rb +0 -40
- data/test/scanner/test_properties.rb +0 -312
- data/test/scanner/test_quantifiers.rb +0 -37
- data/test/scanner/test_refcalls.rb +0 -52
- data/test/scanner/test_scripts.rb +0 -53
- data/test/scanner/test_sets.rb +0 -119
- data/test/scanner/test_types.rb +0 -35
- data/test/scanner/test_unicode_blocks.rb +0 -30
- data/test/support/disable_autotest.rb +0 -8
- data/test/syntax/test_all.rb +0 -6
- data/test/syntax/test_syntax.rb +0 -61
- data/test/syntax/test_syntax_token_map.rb +0 -25
- data/test/syntax/versions/test_1.8.rb +0 -55
- data/test/syntax/versions/test_1.9.1.rb +0 -36
- data/test/syntax/versions/test_1.9.3.rb +0 -32
- data/test/syntax/versions/test_2.0.0.rb +0 -37
- data/test/syntax/versions/test_2.2.0.rb +0 -32
- data/test/syntax/versions/test_aliases.rb +0 -129
- data/test/syntax/versions/test_all.rb +0 -5
- data/test/test_all.rb +0 -5
- data/test/token/test_all.rb +0 -2
- data/test/token/test_token.rb +0 -107
data/test/parser/test_anchors.rb
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
require File.expand_path("../../helpers", __FILE__)
|
2
|
-
|
3
|
-
class TestParserAnchors < Test::Unit::TestCase
|
4
|
-
|
5
|
-
tests = {
|
6
|
-
'^a' => [0, :anchor, :bol, Anchor::BOL],
|
7
|
-
'a$' => [1, :anchor, :eol, Anchor::EOL],
|
8
|
-
|
9
|
-
'\Aa' => [0, :anchor, :bos, Anchor::BOS],
|
10
|
-
'a\z' => [1, :anchor, :eos, Anchor::EOS],
|
11
|
-
'a\Z' => [1, :anchor, :eos_ob_eol, Anchor::EOSobEOL],
|
12
|
-
|
13
|
-
'a\b' => [1, :anchor, :word_boundary, Anchor::WordBoundary],
|
14
|
-
'a\B' => [1, :anchor, :nonword_boundary, Anchor::NonWordBoundary],
|
15
|
-
|
16
|
-
'a\G' => [1, :anchor, :match_start, Anchor::MatchStart],
|
17
|
-
|
18
|
-
"\\\\Aa" => [0, :escape, :backslash, EscapeSequence::Literal],
|
19
|
-
}
|
20
|
-
|
21
|
-
tests.each_with_index do |(pattern, (index, type, token, klass)), count|
|
22
|
-
define_method "test_parse_anchor_#{token}_#{count}" do
|
23
|
-
root = RP.parse(pattern, 'ruby/1.9')
|
24
|
-
exp = root.expressions.at(index)
|
25
|
-
|
26
|
-
assert exp.is_a?(klass),
|
27
|
-
"Expected #{klass}, but got #{exp.class.name}"
|
28
|
-
|
29
|
-
assert_equal type, exp.type
|
30
|
-
assert_equal token, exp.token
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
@@ -1,187 +0,0 @@
|
|
1
|
-
require File.expand_path("../../helpers", __FILE__)
|
2
|
-
|
3
|
-
class TestParserConditionals < Test::Unit::TestCase
|
4
|
-
|
5
|
-
def test_parse_conditional
|
6
|
-
regexp = /(?<A>a)(?(<A>)T|F)/
|
7
|
-
|
8
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
9
|
-
exp = root.expressions[1]
|
10
|
-
|
11
|
-
assert exp.is_a?(Conditional::Expression),
|
12
|
-
"Expected Condition, but got #{exp.class.name}"
|
13
|
-
|
14
|
-
assert_equal exp.type, :conditional
|
15
|
-
assert_equal exp.token, :open
|
16
|
-
assert_equal exp.text, '(?'
|
17
|
-
assert_equal exp.reference, 'A'
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_parse_conditional_condition
|
21
|
-
regexp = /(?<A>a)(?(<A>)T|F)/
|
22
|
-
|
23
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
24
|
-
exp = root[1].condition
|
25
|
-
|
26
|
-
assert exp.is_a?(Conditional::Condition),
|
27
|
-
"Expected Condition, but got #{exp.class.name}"
|
28
|
-
|
29
|
-
assert_equal exp.type, :conditional
|
30
|
-
assert_equal exp.token, :condition
|
31
|
-
assert_equal exp.text, '(<A>)'
|
32
|
-
assert_equal exp.reference, 'A'
|
33
|
-
end
|
34
|
-
|
35
|
-
def test_parse_conditional_condition_with_number_ref
|
36
|
-
regexp = /(a)(?(1)T|F)/
|
37
|
-
|
38
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
39
|
-
exp = root[1].condition
|
40
|
-
|
41
|
-
assert exp.is_a?(Conditional::Condition),
|
42
|
-
"Expected Condition, but got #{exp.class.name}"
|
43
|
-
|
44
|
-
assert_equal exp.type, :conditional
|
45
|
-
assert_equal exp.token, :condition
|
46
|
-
assert_equal exp.text, '(1)'
|
47
|
-
assert_equal exp.reference, 1
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_parse_conditional_nested_groups
|
51
|
-
regexp = /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/
|
52
|
-
|
53
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
54
|
-
|
55
|
-
assert_equal regexp.source, root.to_s
|
56
|
-
|
57
|
-
group = root.first
|
58
|
-
assert_equal Group::Capture, group.class
|
59
|
-
|
60
|
-
alt = group.first
|
61
|
-
assert_equal Alternation, alt.class
|
62
|
-
assert_equal 3, alt.length
|
63
|
-
|
64
|
-
all_captures = alt.all? do |exp|
|
65
|
-
exp.first.is_a?(Group::Capture)
|
66
|
-
end
|
67
|
-
|
68
|
-
assert_equal true, all_captures
|
69
|
-
|
70
|
-
subgroup = alt[2].first
|
71
|
-
conditional = subgroup.first
|
72
|
-
|
73
|
-
assert_equal Conditional::Expression, conditional.class
|
74
|
-
assert_equal 3, conditional.length
|
75
|
-
|
76
|
-
assert_equal Conditional::Condition, conditional[0].class
|
77
|
-
assert_equal '(2)', conditional[0].text
|
78
|
-
|
79
|
-
condition = conditional.condition
|
80
|
-
assert_equal Conditional::Condition, condition.class
|
81
|
-
assert_equal '(2)', condition.text
|
82
|
-
|
83
|
-
branches = conditional.branches
|
84
|
-
assert_equal 2, branches.length
|
85
|
-
assert_equal Array, branches.class
|
86
|
-
end
|
87
|
-
|
88
|
-
def test_parse_conditional_nested
|
89
|
-
regexp = /(a(b(c(d)(e))))(?(1)(?(2)d|(?(3)e|f))|(?(4)(?(5)g|h)))/
|
90
|
-
|
91
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
92
|
-
|
93
|
-
assert_equal regexp.source, root.to_s
|
94
|
-
|
95
|
-
{ 1 => [2, root[1]],
|
96
|
-
2 => [2, root[1][1][0]],
|
97
|
-
3 => [2, root[1][1][0][2][0]],
|
98
|
-
4 => [1, root[1][2][0]],
|
99
|
-
5 => [2, root[1][2][0][1][0]],
|
100
|
-
}.each do |index, test|
|
101
|
-
branch_count, exp = test
|
102
|
-
|
103
|
-
assert_equal Conditional::Expression, exp.class
|
104
|
-
assert_equal "(#{index})", exp.condition.text
|
105
|
-
assert_equal branch_count, exp.branches.length
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
def test_parse_conditional_nested_alternation
|
110
|
-
regexp = /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/
|
111
|
-
|
112
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
113
|
-
|
114
|
-
assert_equal regexp.source, root.to_s
|
115
|
-
|
116
|
-
assert_equal Alternation, root.first.class
|
117
|
-
|
118
|
-
[ [3, 'b|c|d', root[0][0][1][1][0][0]],
|
119
|
-
[3, 'e|f|g', root[0][0][1][2][0][0]],
|
120
|
-
[3, 'i|j|k', root[0][0][3][1][0][0]],
|
121
|
-
[3, 'l|m|n', root[0][0][3][2][0][0]],
|
122
|
-
].each do |test|
|
123
|
-
alt_count, alt_text, exp = test
|
124
|
-
|
125
|
-
assert_equal Alternation, exp.class
|
126
|
-
assert_equal alt_text, exp.to_s
|
127
|
-
assert_equal alt_count, exp.alternatives.length
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
def test_parse_conditional_extra_separator
|
132
|
-
regexp = /(?<A>a)(?(<A>)T|)/
|
133
|
-
|
134
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
135
|
-
branches = root[1].branches
|
136
|
-
|
137
|
-
assert_equal 2, branches.length
|
138
|
-
|
139
|
-
seq_1, seq_2 = branches
|
140
|
-
|
141
|
-
[seq_1, seq_2].each do |seq|
|
142
|
-
assert seq.is_a?( Sequence ),
|
143
|
-
"Expected Condition, but got #{seq.class.name}"
|
144
|
-
|
145
|
-
assert_equal :expression, seq.type
|
146
|
-
assert_equal :sequence, seq.token
|
147
|
-
end
|
148
|
-
|
149
|
-
assert_equal 'T', seq_1.to_s
|
150
|
-
assert_equal '', seq_2.to_s
|
151
|
-
end
|
152
|
-
|
153
|
-
def test_parse_conditional_quantified
|
154
|
-
regexp = /(foo)(?(1)\d|(\w)){42}/
|
155
|
-
|
156
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
157
|
-
conditional = root[1]
|
158
|
-
|
159
|
-
assert conditional.quantified?
|
160
|
-
assert_equal '{42}', conditional.quantifier.text
|
161
|
-
assert_equal '(?(1)\d|(\w)){42}', conditional.to_s
|
162
|
-
refute conditional.branches.any?(&:quantified?)
|
163
|
-
end
|
164
|
-
|
165
|
-
def test_parse_conditional_branch_content_quantified
|
166
|
-
regexp = /(foo)(?(1)\d{23}|(\w){42})/
|
167
|
-
|
168
|
-
root = RP.parse(regexp, 'ruby/2.0')
|
169
|
-
conditional = root[1]
|
170
|
-
|
171
|
-
refute conditional.quantified?
|
172
|
-
refute conditional.branches.any?(&:quantified?)
|
173
|
-
assert conditional.branches[0][0].quantified?
|
174
|
-
assert_equal '{23}', conditional.branches[0][0].quantifier.text
|
175
|
-
assert conditional.branches[1][0].quantified?
|
176
|
-
assert_equal '{42}', conditional.branches[1][0].quantifier.text
|
177
|
-
end
|
178
|
-
|
179
|
-
# For source (text) expressions only, ruby raises an error otherwise.
|
180
|
-
def test_parse_conditional_excessive_branches
|
181
|
-
regexp = '(?<A>a)(?(<A>)T|F|X)'
|
182
|
-
|
183
|
-
assert_raise( Conditional::TooManyBranches ) {
|
184
|
-
RP.parse(regexp, 'ruby/2.0')
|
185
|
-
}
|
186
|
-
end
|
187
|
-
end
|
data/test/parser/test_errors.rb
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
require File.expand_path("../../helpers", __FILE__)
|
2
|
-
|
3
|
-
class ParserErrors < Test::Unit::TestCase
|
4
|
-
def setup
|
5
|
-
@rp = Regexp::Parser.new
|
6
|
-
@rp.parse(/foo/)
|
7
|
-
end
|
8
|
-
|
9
|
-
def test_parser_unknown_token_type
|
10
|
-
assert_raise( Regexp::Parser::UnknownTokenTypeError ) {
|
11
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:foo, :bar))
|
12
|
-
}
|
13
|
-
end
|
14
|
-
|
15
|
-
def test_parser_unknown_set_token
|
16
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
17
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:set, :foo))
|
18
|
-
}
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_parser_unknown_meta_token
|
22
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
23
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:meta, :foo))
|
24
|
-
}
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_parser_unknown_character_type_token
|
28
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
29
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:type, :foo))
|
30
|
-
}
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_parser_unknown_unicode_property_token
|
34
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
35
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:property, :foo))
|
36
|
-
}
|
37
|
-
end
|
38
|
-
|
39
|
-
def test_parser_unknown_unicode_nonproperty_token
|
40
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
41
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:nonproperty, :foo))
|
42
|
-
}
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_parser_unknown_anchor_token
|
46
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
47
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:anchor, :foo))
|
48
|
-
}
|
49
|
-
end
|
50
|
-
|
51
|
-
def test_parser_unknown_quantifier_token
|
52
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
53
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:quantifier, :foo))
|
54
|
-
}
|
55
|
-
end
|
56
|
-
|
57
|
-
def test_parser_unknown_group_open_token
|
58
|
-
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
59
|
-
@rp.__send__(:parse_token, Regexp::Token.new(:group, :foo))
|
60
|
-
}
|
61
|
-
end
|
62
|
-
|
63
|
-
end
|
data/test/parser/test_escapes.rb
DELETED
@@ -1,134 +0,0 @@
|
|
1
|
-
require File.expand_path("../../helpers", __FILE__)
|
2
|
-
|
3
|
-
class TestParserEscapes < Test::Unit::TestCase
|
4
|
-
|
5
|
-
tests = {
|
6
|
-
/a\ac/ => [1, :escape, :bell, EscapeSequence::Bell],
|
7
|
-
/a\ec/ => [1, :escape, :escape, EscapeSequence::AsciiEscape],
|
8
|
-
/a\fc/ => [1, :escape, :form_feed, EscapeSequence::FormFeed],
|
9
|
-
/a\nc/ => [1, :escape, :newline, EscapeSequence::Newline],
|
10
|
-
/a\rc/ => [1, :escape, :carriage, EscapeSequence::Return],
|
11
|
-
/a\tc/ => [1, :escape, :tab, EscapeSequence::Tab],
|
12
|
-
/a\vc/ => [1, :escape, :vertical_tab, EscapeSequence::VerticalTab],
|
13
|
-
|
14
|
-
# meta character escapes
|
15
|
-
/a\.c/ => [1, :escape, :dot, EscapeSequence::Literal],
|
16
|
-
/a\?c/ => [1, :escape, :zero_or_one, EscapeSequence::Literal],
|
17
|
-
/a\*c/ => [1, :escape, :zero_or_more, EscapeSequence::Literal],
|
18
|
-
/a\+c/ => [1, :escape, :one_or_more, EscapeSequence::Literal],
|
19
|
-
/a\|c/ => [1, :escape, :alternation, EscapeSequence::Literal],
|
20
|
-
/a\(c/ => [1, :escape, :group_open, EscapeSequence::Literal],
|
21
|
-
/a\)c/ => [1, :escape, :group_close, EscapeSequence::Literal],
|
22
|
-
/a\{c/ => [1, :escape, :interval_open, EscapeSequence::Literal],
|
23
|
-
/a\}c/ => [1, :escape, :interval_close, EscapeSequence::Literal],
|
24
|
-
|
25
|
-
# unicode escapes
|
26
|
-
/a\u0640/ => [1, :escape, :codepoint, EscapeSequence::Codepoint],
|
27
|
-
/a\u{41 1F60D}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
|
28
|
-
/a\u{10FFFF}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
|
29
|
-
|
30
|
-
# hex escapes
|
31
|
-
/a\xFF/n => [1, :escape, :hex, EscapeSequence::Hex],
|
32
|
-
|
33
|
-
# octal escapes
|
34
|
-
/a\177/n => [1, :escape, :octal, EscapeSequence::Octal],
|
35
|
-
}
|
36
|
-
|
37
|
-
tests.each_with_index do |(pattern, (index, type, token, klass)), count|
|
38
|
-
define_method "test_parse_escape_#{token}_#{count+=1}" do
|
39
|
-
root = RP.parse(pattern, 'ruby/1.9')
|
40
|
-
exp = root.expressions.at(index)
|
41
|
-
|
42
|
-
assert exp.is_a?(klass),
|
43
|
-
"Expected #{klass}, but got #{exp.class.name}"
|
44
|
-
|
45
|
-
assert_equal type, exp.type
|
46
|
-
assert_equal token, exp.token
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_parse_chars_and_codepoints
|
51
|
-
root = RP.parse(/\n\?\101\x42\u0043\u{44 45}/)
|
52
|
-
|
53
|
-
assert_equal "\n", root[0].char
|
54
|
-
assert_equal 10, root[0].codepoint
|
55
|
-
|
56
|
-
assert_equal "?", root[1].char
|
57
|
-
assert_equal 63, root[1].codepoint
|
58
|
-
|
59
|
-
assert_equal "A", root[2].char
|
60
|
-
assert_equal 65, root[2].codepoint
|
61
|
-
|
62
|
-
assert_equal "B", root[3].char
|
63
|
-
assert_equal 66, root[3].codepoint
|
64
|
-
|
65
|
-
assert_equal "C", root[4].char
|
66
|
-
assert_equal 67, root[4].codepoint
|
67
|
-
|
68
|
-
assert_equal ["D", "E"], root[5].chars
|
69
|
-
assert_equal [68, 69], root[5].codepoints
|
70
|
-
end
|
71
|
-
|
72
|
-
def test_parse_escape_control_sequence_lower
|
73
|
-
root = RP.parse(/a\\\c2b/)
|
74
|
-
|
75
|
-
assert_equal EscapeSequence::Control, root[2].class
|
76
|
-
assert_equal '\\c2', root[2].text
|
77
|
-
assert_equal "\u0012", root[2].char
|
78
|
-
assert_equal 18, root[2].codepoint
|
79
|
-
end
|
80
|
-
|
81
|
-
def test_parse_escape_control_sequence_upper
|
82
|
-
root = RP.parse(/\d\\\C-C\w/)
|
83
|
-
|
84
|
-
assert_equal EscapeSequence::Control, root[2].class
|
85
|
-
assert_equal '\\C-C', root[2].text
|
86
|
-
assert_equal "\u0003", root[2].char
|
87
|
-
assert_equal 3, root[2].codepoint
|
88
|
-
end
|
89
|
-
|
90
|
-
def test_parse_escape_meta_sequence
|
91
|
-
root = RP.parse(/\Z\\\M-Z/n)
|
92
|
-
|
93
|
-
assert_equal EscapeSequence::Meta, root[2].class
|
94
|
-
assert_equal '\\M-Z', root[2].text
|
95
|
-
assert_equal "\u00DA", root[2].char
|
96
|
-
assert_equal 218, root[2].codepoint
|
97
|
-
end
|
98
|
-
|
99
|
-
def test_parse_escape_meta_control_sequence
|
100
|
-
root = RP.parse(/\A\\\M-\C-X/n)
|
101
|
-
|
102
|
-
assert_equal EscapeSequence::MetaControl, root[2].class
|
103
|
-
assert_equal '\\M-\\C-X', root[2].text
|
104
|
-
assert_equal "\u0098", root[2].char
|
105
|
-
assert_equal 152, root[2].codepoint
|
106
|
-
end
|
107
|
-
|
108
|
-
def test_parse_lower_c_meta_control_sequence
|
109
|
-
root = RP.parse(/\A\\\M-\cX/n)
|
110
|
-
|
111
|
-
assert_equal EscapeSequence::MetaControl, root[2].class
|
112
|
-
assert_equal '\\M-\\cX', root[2].text
|
113
|
-
assert_equal "\u0098", root[2].char
|
114
|
-
assert_equal 152, root[2].codepoint
|
115
|
-
end
|
116
|
-
|
117
|
-
def test_parse_escape_reverse_meta_control_sequence
|
118
|
-
root = RP.parse(/\A\\\C-\M-X/n)
|
119
|
-
|
120
|
-
assert_equal EscapeSequence::MetaControl, root[2].class
|
121
|
-
assert_equal '\\C-\\M-X', root[2].text
|
122
|
-
assert_equal "\u0098", root[2].char
|
123
|
-
assert_equal 152, root[2].codepoint
|
124
|
-
end
|
125
|
-
|
126
|
-
def test_parse_escape_reverse_lower_c_meta_control_sequence
|
127
|
-
root = RP.parse(/\A\\\c\M-X/n)
|
128
|
-
|
129
|
-
assert_equal EscapeSequence::MetaControl, root[2].class
|
130
|
-
assert_equal '\\c\\M-X', root[2].text
|
131
|
-
assert_equal "\u0098", root[2].char
|
132
|
-
assert_equal 152, root[2].codepoint
|
133
|
-
end
|
134
|
-
end
|
@@ -1,139 +0,0 @@
|
|
1
|
-
require File.expand_path("../../helpers", __FILE__)
|
2
|
-
|
3
|
-
class ParserFreeSpace < Test::Unit::TestCase
|
4
|
-
|
5
|
-
def test_parse_free_space_spaces
|
6
|
-
regexp = /a ? b * c + d{2,4}/x
|
7
|
-
root = RP.parse(regexp)
|
8
|
-
|
9
|
-
0.upto(6) do |i|
|
10
|
-
if i.odd?
|
11
|
-
# Consecutive spaces get merged by the parser, thus the two spaces.
|
12
|
-
assert_equal WhiteSpace, root[i].class
|
13
|
-
assert_equal ' ', root[i].text
|
14
|
-
else
|
15
|
-
assert_equal Literal, root[i].class
|
16
|
-
assert_equal true, root[i].quantified?
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_parse_non_free_space_literals
|
22
|
-
regexp = /a b c d/
|
23
|
-
root = RP.parse(regexp)
|
24
|
-
|
25
|
-
assert_equal Literal, root.first.class
|
26
|
-
assert_equal 'a b c d', root.first.text
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_parse_free_space_comments
|
30
|
-
regexp = %r{
|
31
|
-
a ? # One letter
|
32
|
-
b {2,5} # Another one
|
33
|
-
[c-g] + # A set
|
34
|
-
(h|i|j) | # A group
|
35
|
-
klm *
|
36
|
-
nop +
|
37
|
-
}x
|
38
|
-
|
39
|
-
root = RP.parse(regexp)
|
40
|
-
|
41
|
-
alt = root.first
|
42
|
-
assert_equal Alternation, alt.class
|
43
|
-
|
44
|
-
alt_1 = alt.alternatives.first
|
45
|
-
assert_equal Alternative, alt_1.class
|
46
|
-
assert_equal 15, alt_1.length
|
47
|
-
|
48
|
-
[0, 2, 4, 6, 8, 12, 14].each do |i|
|
49
|
-
assert_equal WhiteSpace, alt_1[i].class
|
50
|
-
end
|
51
|
-
|
52
|
-
[3, 7, 11].each do |i|
|
53
|
-
assert_equal Comment, alt_1[i].class
|
54
|
-
end
|
55
|
-
|
56
|
-
alt_2 = alt.alternatives.last
|
57
|
-
assert_equal Alternative, alt_2.class
|
58
|
-
assert_equal 7, alt_2.length
|
59
|
-
|
60
|
-
[0, 2, 4, 6].each do |i|
|
61
|
-
assert_equal WhiteSpace, alt_2[i].class
|
62
|
-
end
|
63
|
-
|
64
|
-
assert_equal Comment, alt_2[1].class
|
65
|
-
end
|
66
|
-
|
67
|
-
def test_parse_free_space_nested_comments
|
68
|
-
# Tests depend on spacing and indentation, obviously.
|
69
|
-
regexp = %r{
|
70
|
-
# Group one
|
71
|
-
(
|
72
|
-
abc # Comment one
|
73
|
-
\d? # Optional \d
|
74
|
-
)+
|
75
|
-
|
76
|
-
# Group two
|
77
|
-
(
|
78
|
-
def # Comment two
|
79
|
-
\s? # Optional \s
|
80
|
-
)?
|
81
|
-
}x
|
82
|
-
|
83
|
-
root = RP.parse(regexp)
|
84
|
-
|
85
|
-
top_comment_1 = root[1]
|
86
|
-
assert_equal Comment, top_comment_1.class
|
87
|
-
assert_equal "# Group one\n", top_comment_1.text
|
88
|
-
assert_equal 7, top_comment_1.starts_at
|
89
|
-
|
90
|
-
top_comment_2 = root[5]
|
91
|
-
assert_equal Comment, top_comment_2.class
|
92
|
-
assert_equal "# Group two\n", top_comment_2.text
|
93
|
-
assert_equal 95, top_comment_2.starts_at
|
94
|
-
|
95
|
-
# Nested comments
|
96
|
-
[3, 7].each_with_index do |g, i|
|
97
|
-
group = root[g]
|
98
|
-
|
99
|
-
[3, 7].each do |c|
|
100
|
-
comment = group[c]
|
101
|
-
assert_equal Comment, comment.class
|
102
|
-
assert_equal 14, comment.text.length
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
def test_parse_free_space_quantifiers
|
108
|
-
regexp = %r{
|
109
|
-
a
|
110
|
-
# comment 1
|
111
|
-
?
|
112
|
-
(
|
113
|
-
b # comment 2
|
114
|
-
# comment 3
|
115
|
-
+
|
116
|
-
)
|
117
|
-
# comment 4
|
118
|
-
*
|
119
|
-
}x
|
120
|
-
|
121
|
-
root = RP.parse(regexp)
|
122
|
-
|
123
|
-
literal_1 = root[1]
|
124
|
-
assert_equal Literal, literal_1.class
|
125
|
-
assert_equal true, literal_1.quantified?
|
126
|
-
assert_equal :zero_or_one, literal_1.quantifier.token
|
127
|
-
|
128
|
-
group = root[5]
|
129
|
-
assert_equal Group::Capture, group.class
|
130
|
-
assert_equal true, group.quantified?
|
131
|
-
assert_equal :zero_or_more, group.quantifier.token
|
132
|
-
|
133
|
-
literal_2 = group[1]
|
134
|
-
assert_equal Literal, literal_2.class
|
135
|
-
assert_equal true, literal_2.quantified?
|
136
|
-
assert_equal :one_or_more, literal_2.quantifier.token
|
137
|
-
end
|
138
|
-
|
139
|
-
end
|