regexp_parser 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +4 -0
- data/LICENSE +22 -0
- data/README.rdoc +307 -0
- data/Rakefile +91 -0
- data/lib/regexp_parser/ctype.rb +48 -0
- data/lib/regexp_parser/expression/property.rb +108 -0
- data/lib/regexp_parser/expression/set.rb +59 -0
- data/lib/regexp_parser/expression.rb +287 -0
- data/lib/regexp_parser/lexer.rb +105 -0
- data/lib/regexp_parser/parser.rb +417 -0
- data/lib/regexp_parser/scanner/property.rl +534 -0
- data/lib/regexp_parser/scanner/scanner.rl +712 -0
- data/lib/regexp_parser/scanner.rb +3325 -0
- data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
- data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
- data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
- data/lib/regexp_parser/syntax/tokens.rb +332 -0
- data/lib/regexp_parser/syntax.rb +172 -0
- data/lib/regexp_parser.rb +45 -0
- data/test/helpers.rb +8 -0
- data/test/lexer/test_all.rb +26 -0
- data/test/lexer/test_literals.rb +120 -0
- data/test/lexer/test_nesting.rb +107 -0
- data/test/lexer/test_refcalls.rb +45 -0
- data/test/parser/test_all.rb +44 -0
- data/test/parser/test_alternation.rb +46 -0
- data/test/parser/test_anchors.rb +35 -0
- data/test/parser/test_errors.rb +59 -0
- data/test/parser/test_escapes.rb +48 -0
- data/test/parser/test_expression.rb +51 -0
- data/test/parser/test_groups.rb +69 -0
- data/test/parser/test_properties.rb +346 -0
- data/test/parser/test_quantifiers.rb +236 -0
- data/test/parser/test_refcalls.rb +101 -0
- data/test/parser/test_sets.rb +99 -0
- data/test/scanner/test_all.rb +30 -0
- data/test/scanner/test_anchors.rb +35 -0
- data/test/scanner/test_errors.rb +36 -0
- data/test/scanner/test_escapes.rb +49 -0
- data/test/scanner/test_groups.rb +41 -0
- data/test/scanner/test_literals.rb +85 -0
- data/test/scanner/test_meta.rb +36 -0
- data/test/scanner/test_properties.rb +315 -0
- data/test/scanner/test_quantifiers.rb +38 -0
- data/test/scanner/test_refcalls.rb +45 -0
- data/test/scanner/test_scripts.rb +314 -0
- data/test/scanner/test_sets.rb +80 -0
- data/test/scanner/test_types.rb +30 -0
- data/test/syntax/ruby/test_1.8.rb +57 -0
- data/test/syntax/ruby/test_1.9.1.rb +39 -0
- data/test/syntax/ruby/test_1.9.3.rb +38 -0
- data/test/syntax/ruby/test_all.rb +12 -0
- data/test/syntax/test_all.rb +19 -0
- data/test/test_all.rb +4 -0
- metadata +160 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class LexerRefCalls < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
# Group back-references, named, numbered, and relative
|
7
|
+
'(?<X>abc)\k<X>' => [3, :backref, :name_ref, '\k<X>', 9, 14, 0, 0],
|
8
|
+
"(?<X>abc)\\k'X'" => [3, :backref, :name_ref, "\\k'X'", 9, 14, 0, 0],
|
9
|
+
|
10
|
+
'(abc)\k<1>' => [3, :backref, :number_ref, '\k<1>', 5, 10, 0, 0],
|
11
|
+
"(abc)\\k'1'" => [3, :backref, :number_ref, "\\k'1'", 5, 10, 0, 0],
|
12
|
+
|
13
|
+
'(abc)\k<-1>' => [3, :backref, :number_rel_ref, '\k<-1>', 5, 11, 0, 0],
|
14
|
+
"(abc)\\k'-1'" => [3, :backref, :number_rel_ref, "\\k'-1'", 5, 11, 0, 0],
|
15
|
+
|
16
|
+
# Sub-expression invocation, named, numbered, and relative
|
17
|
+
'(?<X>abc)\g<X>' => [3, :backref, :name_call, '\g<X>', 9, 14, 0, 0],
|
18
|
+
"(?<X>abc)\\g'X'" => [3, :backref, :name_call, "\\g'X'", 9, 14, 0, 0],
|
19
|
+
|
20
|
+
'(abc)\g<1>' => [3, :backref, :number_call, '\g<1>', 5, 10, 0, 0],
|
21
|
+
"(abc)\\g'1'" => [3, :backref, :number_call, "\\g'1'", 5, 10, 0, 0],
|
22
|
+
|
23
|
+
'(abc)\g<-1>' => [3, :backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0],
|
24
|
+
"(abc)\\g'-1'" => [3, :backref, :number_rel_call, "\\g'-1'", 5, 11, 0, 0],
|
25
|
+
|
26
|
+
# Group back-references, with nesting level
|
27
|
+
'(?<X>abc)\k<X-0>' => [3, :backref, :name_nest_ref, '\k<X-0>', 9, 16, 0, 0],
|
28
|
+
"(?<X>abc)\\k'X-0'" => [3, :backref, :name_nest_ref, "\\k'X-0'", 9, 16, 0, 0],
|
29
|
+
|
30
|
+
'(abc)\k<1-0>' => [3, :backref, :number_nest_ref, '\k<1-0>', 5, 12, 0, 0],
|
31
|
+
"(abc)\\k'1-0'" => [3, :backref, :number_nest_ref, "\\k'1-0'", 5, 12, 0, 0],
|
32
|
+
}
|
33
|
+
|
34
|
+
count = 0
|
35
|
+
tests.each do |pattern, test|
|
36
|
+
define_method "test_lexer_#{test[1]}_#{test[2]}_#{count+=1}" do
|
37
|
+
|
38
|
+
tokens = RL.scan(pattern)
|
39
|
+
assert_equal( test[1,7], tokens[test[0]].to_a)
|
40
|
+
assert_equal( test[3], pattern[tokens[test[0]][3], tokens[test[0]][4]])
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
%w{
|
4
|
+
alternation anchors errors escapes expression groups properties
|
5
|
+
quantifiers refcalls sets
|
6
|
+
}.each do|tc|
|
7
|
+
require File.expand_path("../test_#{tc}", __FILE__)
|
8
|
+
end
|
9
|
+
|
10
|
+
class TestParser < Test::Unit::TestCase
|
11
|
+
|
12
|
+
def test_parse_returns_a_root_expression
|
13
|
+
assert_instance_of( Regexp::Expression::Root, RP.parse('abc'))
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_parse_root_contains_expressions
|
17
|
+
root = RP.parse(/^a.c+[^one]{2,3}\b\d\\\C-C$/)
|
18
|
+
|
19
|
+
assert( root.expressions.all?{|exp|
|
20
|
+
exp.kind_of?(Regexp::Expression::Base)},
|
21
|
+
"Not all nodes are instances of Regexp::Expression")
|
22
|
+
end
|
23
|
+
|
24
|
+
# too much going on here, it's just for development
|
25
|
+
def test_parse_node_types
|
26
|
+
root = RP.parse('^(one){2,3}([^d\]efm-qz\,\-]*)(ghi)+$')
|
27
|
+
|
28
|
+
assert( root.expressions[1].expressions[0].is_a?(Literal),
|
29
|
+
"Not a literal node, but should be")
|
30
|
+
|
31
|
+
assert( root.expressions[1].quantified?, "Not quanfified, but should be")
|
32
|
+
|
33
|
+
assert( root.expressions[2].expressions[0].is_a?(CharacterSet),
|
34
|
+
"Not a caracter set, but it should be")
|
35
|
+
|
36
|
+
assert_equal( false, root.expressions[2].quantified? )
|
37
|
+
|
38
|
+
assert( root.expressions[3].is_a?(Group::Capture),
|
39
|
+
"Not a group, but should be")
|
40
|
+
|
41
|
+
assert_equal( true, root.expressions[3].quantified? )
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ParserAlternation < Test::Unit::TestCase
|
4
|
+
|
5
|
+
# TODO: these tests pass, but they show how hard and messy the tree is
|
6
|
+
# to navigate
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@root = RP.parse('(ab??|cd*+|ef+)*|(gh|ij|kl)?')
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_parse_alternation_root
|
13
|
+
e = @root.expressions[0]
|
14
|
+
assert_equal( true, e.is_a?(Alternation) )
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_parse_alternation_alts
|
18
|
+
alts = @root.expressions[0].alternatives
|
19
|
+
|
20
|
+
assert_equal( true, alts[0].is_a?(Sequence) )
|
21
|
+
assert_equal( true, alts[1].is_a?(Sequence) )
|
22
|
+
|
23
|
+
assert_equal( true, alts[0][0].is_a?(Group::Capture) )
|
24
|
+
assert_equal( true, alts[1][0].is_a?(Group::Capture) )
|
25
|
+
|
26
|
+
assert_equal( 2, alts.length )
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_parse_alternation_nested
|
30
|
+
e = @root[0].alternatives[0][0][0]
|
31
|
+
|
32
|
+
assert_equal( true, e.is_a?(Alternation) )
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_parse_alternation_nested_sequence
|
36
|
+
alts = @root.expressions[0][0]
|
37
|
+
nested = alts.expressions[0][0][0]
|
38
|
+
|
39
|
+
assert_equal( true, nested.is_a?(Sequence) )
|
40
|
+
|
41
|
+
assert_equal( true, nested.expressions[0].is_a?(Literal) )
|
42
|
+
assert_equal( true, nested.expressions[1].is_a?(Literal) )
|
43
|
+
assert_equal( 2, nested.expressions.length )
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class TestParserAnchors < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
'^a' => [0, :anchor, :beginning_of_line, Anchor::BOL],
|
7
|
+
'a$' => [1, :anchor, :end_of_line, Anchor::EOL],
|
8
|
+
|
9
|
+
'\Aa' => [0, :anchor, :bos, Anchor::BOS],
|
10
|
+
'a\z' => [1, :anchor, :eos, Anchor::EOS],
|
11
|
+
'a\Z' => [1, :anchor, :eos_ob_eol, Anchor::EOSobEOL],
|
12
|
+
|
13
|
+
'a\b' => [1, :anchor, :word_boundary, Anchor::WordBoundary],
|
14
|
+
'a\B' => [1, :anchor, :nonword_boundary, Anchor::NonWordBoundary],
|
15
|
+
|
16
|
+
'a\G' => [1, :anchor, :match_start, Anchor::MatchStart],
|
17
|
+
|
18
|
+
"\\\\Aa" => [0, :escape, :backslash, EscapeSequence::Literal],
|
19
|
+
}
|
20
|
+
|
21
|
+
count = 0
|
22
|
+
tests.each do |pattern, test|
|
23
|
+
define_method "test_parse_anchor_#{test[2]}_#{count+=1}" do
|
24
|
+
root = RP.parse(pattern, 'ruby/1.9')
|
25
|
+
exp = root.expressions[test[0]]
|
26
|
+
|
27
|
+
assert( exp.is_a?( test[3] ),
|
28
|
+
"Expected #{test[3]}, but got #{exp.class.name}")
|
29
|
+
|
30
|
+
assert_equal( test[1], exp.type )
|
31
|
+
assert_equal( test[2], exp.token )
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ParserErrors < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_parser_unknown_token_type
|
6
|
+
assert_raise( Regexp::Parser::UnknownTokenTypeError ) {
|
7
|
+
RP.parse_token(Regexp::Token.new(:foo, :bar))
|
8
|
+
}
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_parser_unknown_set_token
|
12
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
13
|
+
RP.parse_token(Regexp::Token.new(:set, :foo))
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_parser_unknown_meta_token
|
18
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
19
|
+
RP.parse_token(Regexp::Token.new(:meta, :foo))
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_parser_unknown_character_type_token
|
24
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
25
|
+
RP.parse_token(Regexp::Token.new(:type, :foo))
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_parser_unknown_unicode_property_token
|
30
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
31
|
+
RP.parse_token(Regexp::Token.new(:property, :foo))
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_parser_unknown_unicode_nonproperty_token
|
36
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
37
|
+
RP.parse_token(Regexp::Token.new(:nonproperty, :foo))
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_parser_unknown_anchor_token
|
42
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
43
|
+
RP.parse_token(Regexp::Token.new(:anchor, :foo))
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_parser_unknown_quantifier_token
|
48
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
49
|
+
RP.parse_token(Regexp::Token.new(:quantifier, :foo))
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_parser_unknown_group_open_token
|
54
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
55
|
+
RP.parse_token(Regexp::Token.new(:group, :foo))
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class TestParserEscapes < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_parse_control_sequence_short
|
6
|
+
#root = RP.parse(/\b\d\\\c2\C-C\M-\C-2/)
|
7
|
+
end
|
8
|
+
|
9
|
+
tests = {
|
10
|
+
/a\ac/ => [1, :escape, :bell, EscapeSequence::Bell],
|
11
|
+
/a\ec/ => [1, :escape, :escape, EscapeSequence::AsciiEscape],
|
12
|
+
/a\fc/ => [1, :escape, :form_feed, EscapeSequence::FormFeed],
|
13
|
+
/a\nc/ => [1, :escape, :newline, EscapeSequence::Newline],
|
14
|
+
/a\rc/ => [1, :escape, :carriage, EscapeSequence::Return],
|
15
|
+
/a\tc/ => [1, :escape, :tab, EscapeSequence::Tab],
|
16
|
+
/a\vc/ => [1, :escape, :vertical_tab, EscapeSequence::VerticalTab],
|
17
|
+
|
18
|
+
# special cases
|
19
|
+
/a\bc/ => [1, :anchor, :word_boundary, Anchor::WordBoundary],
|
20
|
+
/a\sc/ => [1, :type, :space, CharacterType::Space],
|
21
|
+
|
22
|
+
# meta character escapes
|
23
|
+
/a\.c/ => [1, :escape, :dot, EscapeSequence::Literal],
|
24
|
+
/a\?c/ => [1, :escape, :zero_or_one, EscapeSequence::Literal],
|
25
|
+
/a\*c/ => [1, :escape, :zero_or_more, EscapeSequence::Literal],
|
26
|
+
/a\+c/ => [1, :escape, :one_or_more, EscapeSequence::Literal],
|
27
|
+
/a\|c/ => [1, :escape, :alternation, EscapeSequence::Literal],
|
28
|
+
/a\(c/ => [1, :escape, :group_open, EscapeSequence::Literal],
|
29
|
+
/a\)c/ => [1, :escape, :group_close, EscapeSequence::Literal],
|
30
|
+
/a\{c/ => [1, :escape, :interval_open, EscapeSequence::Literal],
|
31
|
+
/a\}c/ => [1, :escape, :interval_close, EscapeSequence::Literal],
|
32
|
+
}
|
33
|
+
|
34
|
+
count = 0
|
35
|
+
tests.each do |pattern, test|
|
36
|
+
define_method "test_parse_anchor_#{test[2]}_#{count+=1}" do
|
37
|
+
root = RP.parse(pattern, 'ruby/1.9')
|
38
|
+
exp = root.expressions[test[0]]
|
39
|
+
|
40
|
+
assert( exp.is_a?( test[3] ),
|
41
|
+
"Expected #{test[3]}, but got #{exp.class.name}")
|
42
|
+
|
43
|
+
assert_equal( test[1], exp.type )
|
44
|
+
assert_equal( test[2], exp.token )
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ParserExpression < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_parse_expression_to_s_literal_alternation
|
6
|
+
pattern = 'abcd|ghij|klmn|pqur'
|
7
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_parse_expression_to_s_quantified_alternations
|
11
|
+
pattern = '(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)'
|
12
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_parse_expression_to_s_quantified_sets
|
16
|
+
pattern = '[abc]+|[^def]{3,6}'
|
17
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_parse_expression_to_s_property_sets
|
21
|
+
pattern = '[\a\b\p{Lu}\P{Z}\c\d]+'
|
22
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_parse_expression_to_s_groups
|
26
|
+
pattern = "(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++"
|
27
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_parse_expression_to_s_assertions
|
31
|
+
pattern = '(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?'
|
32
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_parse_expression_to_s_comments
|
36
|
+
pattern = '(?#start)a(?#middle)b(?#end)'
|
37
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_parse_expression_to_s_options
|
41
|
+
pattern = '(?mix:start)a(?-mix:middle)b(?i-mx:end)'
|
42
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_parse_expression_to_s_url
|
46
|
+
pattern = '(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*'+
|
47
|
+
'\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)'
|
48
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class TestParserGroups < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_parse_root_options_mi
|
6
|
+
t = RP.parse((/[abc]/mi).to_s)
|
7
|
+
|
8
|
+
assert_equal( true, t.m? )
|
9
|
+
assert_equal( true, t.i? )
|
10
|
+
assert_equal( false, t.x? )
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_parse_nested_options_m
|
14
|
+
t = RP.parse('(?xi-m:a(?m-ix:b))')
|
15
|
+
|
16
|
+
assert_equal( true, t.expressions[0].expressions[1].m? )
|
17
|
+
assert_equal( false, t.expressions[0].expressions[1].i? )
|
18
|
+
assert_equal( false, t.expressions[0].expressions[1].x? )
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_parse_nested_options_xm
|
22
|
+
t = RP.parse(/(?i-xm:a(?mx-i:b))/)
|
23
|
+
|
24
|
+
assert_equal( true, t.expressions[0].expressions[1].m? )
|
25
|
+
assert_equal( false, t.expressions[0].expressions[1].i? )
|
26
|
+
assert_equal( true, t.expressions[0].expressions[1].x? )
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_parse_nested_options_im
|
30
|
+
t = RP.parse(/(?x-mi:a(?mi-x:b))/)
|
31
|
+
|
32
|
+
assert_equal( true, t.expressions[0].expressions[1].m? )
|
33
|
+
assert_equal( true, t.expressions[0].expressions[1].i? )
|
34
|
+
assert_equal( false, t.expressions[0].expressions[1].x? )
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_parse_lookahead
|
38
|
+
t = RP.parse('(?=abc)(?!def)')
|
39
|
+
|
40
|
+
assert( t.expressions[0].is_a?(Assertion::Lookahead),
|
41
|
+
"Expected lookahead, but got #{t.expressions[0].class.name}")
|
42
|
+
|
43
|
+
assert( t.expressions[1].is_a?(Assertion::NegativeLookahead),
|
44
|
+
"Expected negative lookahead, but got #{t.expressions[0].class.name}")
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_parse_lookbehind
|
48
|
+
t = RP.parse('(?<=abc)(?<!def)')
|
49
|
+
|
50
|
+
assert( t.expressions[0].is_a?(Assertion::Lookbehind),
|
51
|
+
"Expected lookbehind, but got #{t.expressions[0].class.name}")
|
52
|
+
|
53
|
+
assert( t.expressions[1].is_a?(Assertion::NegativeLookbehind),
|
54
|
+
"Expected negative lookbehind, but got #{t.expressions[0].class.name}")
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_parse_comment
|
58
|
+
t = RP.parse('a(?# is for apple)b(?# for boy)c(?# cat)')
|
59
|
+
|
60
|
+
[1,3,5].each do |i|
|
61
|
+
assert( t.expressions[i].is_a?(Group::Comment),
|
62
|
+
"Expected comment, but got #{t.expressions[i].class.name}")
|
63
|
+
|
64
|
+
assert_equal( :group, t.expressions[i].type )
|
65
|
+
assert_equal( :comment, t.expressions[i].token )
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
@@ -0,0 +1,346 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ParserProperties < Test::Unit::TestCase
|
4
|
+
|
5
|
+
modes = ['p', 'P']
|
6
|
+
props = [
|
7
|
+
'Alnum',
|
8
|
+
'Alpha',
|
9
|
+
'Any',
|
10
|
+
'Ascii',
|
11
|
+
'Blank',
|
12
|
+
'Cntrl',
|
13
|
+
'Digit',
|
14
|
+
'Graph',
|
15
|
+
'Lower',
|
16
|
+
'Newline',
|
17
|
+
'Print',
|
18
|
+
'Punct',
|
19
|
+
'Space',
|
20
|
+
'Upper',
|
21
|
+
'Word',
|
22
|
+
'Xdigit',
|
23
|
+
|
24
|
+
'L',
|
25
|
+
'Letter',
|
26
|
+
|
27
|
+
'Lu',
|
28
|
+
'Uppercase_Letter',
|
29
|
+
|
30
|
+
'Ll',
|
31
|
+
'Lowercase_Letter',
|
32
|
+
|
33
|
+
'Lt',
|
34
|
+
'Titlecase_Letter',
|
35
|
+
|
36
|
+
'Lm',
|
37
|
+
'Modifier_Letter',
|
38
|
+
|
39
|
+
'Lo',
|
40
|
+
'Other_Letter',
|
41
|
+
|
42
|
+
'M',
|
43
|
+
'Mark',
|
44
|
+
|
45
|
+
'Mn',
|
46
|
+
'Nonspacing_Mark',
|
47
|
+
|
48
|
+
'Mc',
|
49
|
+
'Spacing_Mark',
|
50
|
+
|
51
|
+
'Me',
|
52
|
+
'Enclosing_Mark',
|
53
|
+
|
54
|
+
'N',
|
55
|
+
'Number',
|
56
|
+
|
57
|
+
'Nd',
|
58
|
+
'Decimal_Number',
|
59
|
+
|
60
|
+
'Nl',
|
61
|
+
'Letter_Number',
|
62
|
+
|
63
|
+
'No',
|
64
|
+
'Other_Number',
|
65
|
+
|
66
|
+
'P',
|
67
|
+
'Punctuation',
|
68
|
+
|
69
|
+
'Pc',
|
70
|
+
'Connector_Punctuation',
|
71
|
+
|
72
|
+
'Pd',
|
73
|
+
'Dash_Punctuation',
|
74
|
+
|
75
|
+
'Ps',
|
76
|
+
'Open_Punctuation',
|
77
|
+
|
78
|
+
'Pe',
|
79
|
+
'Close_Punctuation',
|
80
|
+
|
81
|
+
'Pi',
|
82
|
+
'Initial_Punctuation',
|
83
|
+
|
84
|
+
'Pf',
|
85
|
+
'Final_Punctuation',
|
86
|
+
|
87
|
+
'Po',
|
88
|
+
'Other_Punctuation',
|
89
|
+
|
90
|
+
'S',
|
91
|
+
'Symbol',
|
92
|
+
|
93
|
+
'Sm',
|
94
|
+
'Math_Symbol',
|
95
|
+
|
96
|
+
'Sc',
|
97
|
+
'Currency_Symbol',
|
98
|
+
|
99
|
+
'Sk',
|
100
|
+
'Modifier_Symbol',
|
101
|
+
|
102
|
+
'So',
|
103
|
+
'Other_Symbol',
|
104
|
+
|
105
|
+
'Z',
|
106
|
+
'Separator',
|
107
|
+
|
108
|
+
'Zs',
|
109
|
+
'Space_Separator',
|
110
|
+
|
111
|
+
'Zl',
|
112
|
+
'Line_Separator',
|
113
|
+
|
114
|
+
'Zp',
|
115
|
+
'Paragraph_Separator',
|
116
|
+
|
117
|
+
'C',
|
118
|
+
'Other',
|
119
|
+
|
120
|
+
'Cc',
|
121
|
+
'Control',
|
122
|
+
|
123
|
+
'Cf',
|
124
|
+
'Format',
|
125
|
+
|
126
|
+
'Cs',
|
127
|
+
'Surrogate',
|
128
|
+
|
129
|
+
'Co',
|
130
|
+
'Private_Use',
|
131
|
+
|
132
|
+
'Cn',
|
133
|
+
'Unassigned',
|
134
|
+
|
135
|
+
'Age=1.1',
|
136
|
+
'Age=2.0',
|
137
|
+
'Age=2.1',
|
138
|
+
'Age=3.0',
|
139
|
+
'Age=3.1',
|
140
|
+
'Age=3.2',
|
141
|
+
'Age=4.0',
|
142
|
+
'Age=4.1',
|
143
|
+
'Age=5.0',
|
144
|
+
'Age=5.1',
|
145
|
+
'Age=5.2',
|
146
|
+
'Age=6.0',
|
147
|
+
|
148
|
+
'ahex',
|
149
|
+
'ASCII_Hex_Digit',
|
150
|
+
|
151
|
+
'Alphabetic',
|
152
|
+
|
153
|
+
'Cased',
|
154
|
+
|
155
|
+
'cwcf',
|
156
|
+
'Changes_When_Casefolded',
|
157
|
+
|
158
|
+
'cwcm',
|
159
|
+
'Changes_When_Casemapped',
|
160
|
+
|
161
|
+
'cwl',
|
162
|
+
'Changes_When_Lowercased',
|
163
|
+
|
164
|
+
'cwt',
|
165
|
+
'Changes_When_Titlecased',
|
166
|
+
|
167
|
+
'cwu',
|
168
|
+
'Changes_When_Uppercased',
|
169
|
+
|
170
|
+
'ci',
|
171
|
+
'Case_Ignorable',
|
172
|
+
|
173
|
+
'bidic',
|
174
|
+
'Bidi_Control',
|
175
|
+
|
176
|
+
'Dash',
|
177
|
+
|
178
|
+
'dep',
|
179
|
+
'Deprecated',
|
180
|
+
|
181
|
+
'di',
|
182
|
+
'Default_Ignorable_Code_Point',
|
183
|
+
|
184
|
+
'dia',
|
185
|
+
'Diacritic',
|
186
|
+
|
187
|
+
'ext',
|
188
|
+
'Extender',
|
189
|
+
|
190
|
+
'grbase',
|
191
|
+
'Grapheme_Base',
|
192
|
+
|
193
|
+
'grext',
|
194
|
+
'Grapheme_Extend',
|
195
|
+
|
196
|
+
'grlink',
|
197
|
+
'Grapheme_Link',
|
198
|
+
|
199
|
+
'hex',
|
200
|
+
'Hex_Digit',
|
201
|
+
|
202
|
+
'Hyphen',
|
203
|
+
|
204
|
+
'idc',
|
205
|
+
'ID_Continue',
|
206
|
+
|
207
|
+
'ideo',
|
208
|
+
'Ideographic',
|
209
|
+
|
210
|
+
'ids',
|
211
|
+
'ID_Start',
|
212
|
+
|
213
|
+
'idsb',
|
214
|
+
'IDS_Binary_Operator',
|
215
|
+
|
216
|
+
'idst',
|
217
|
+
'IDS_Trinary_Operator',
|
218
|
+
|
219
|
+
'joinc',
|
220
|
+
'Join_Control',
|
221
|
+
|
222
|
+
'loe',
|
223
|
+
'Logical_Order_Exception',
|
224
|
+
|
225
|
+
'Lowercase',
|
226
|
+
|
227
|
+
'Math',
|
228
|
+
|
229
|
+
'nchar',
|
230
|
+
'Noncharacter_Code_Point',
|
231
|
+
|
232
|
+
'oalpha',
|
233
|
+
'Other_Alphabetic',
|
234
|
+
|
235
|
+
'odi',
|
236
|
+
'Other_Default_Ignorable_Code_Point',
|
237
|
+
|
238
|
+
'ogrext',
|
239
|
+
'Other_Grapheme_Extend',
|
240
|
+
|
241
|
+
'oidc',
|
242
|
+
'Other_ID_Continue',
|
243
|
+
|
244
|
+
'oids',
|
245
|
+
'Other_ID_Start',
|
246
|
+
|
247
|
+
'olower',
|
248
|
+
'Other_Lowercase',
|
249
|
+
|
250
|
+
'omath',
|
251
|
+
'Other_Math',
|
252
|
+
|
253
|
+
'oupper',
|
254
|
+
'Other_Uppercase',
|
255
|
+
|
256
|
+
'patsyn',
|
257
|
+
'Pattern_Syntax',
|
258
|
+
|
259
|
+
'patws',
|
260
|
+
'Pattern_Whitespace',
|
261
|
+
|
262
|
+
'qmark',
|
263
|
+
'quotationmark',
|
264
|
+
|
265
|
+
'radical',
|
266
|
+
|
267
|
+
'sd',
|
268
|
+
'Soft_Dotted',
|
269
|
+
|
270
|
+
'sterm',
|
271
|
+
|
272
|
+
'term',
|
273
|
+
'Terminal_Punctuation',
|
274
|
+
|
275
|
+
'uideo',
|
276
|
+
'Unified_Ideograph',
|
277
|
+
|
278
|
+
'Uppercase',
|
279
|
+
|
280
|
+
'vs',
|
281
|
+
'Variation_Selector',
|
282
|
+
|
283
|
+
'wspace',
|
284
|
+
'whitespace',
|
285
|
+
|
286
|
+
'xids',
|
287
|
+
'XID_Start',
|
288
|
+
|
289
|
+
'xidc',
|
290
|
+
'XID_Continue',
|
291
|
+
]
|
292
|
+
|
293
|
+
modes.each do |mode|
|
294
|
+
token_type = mode == 'p' ? :property : :nonproperty
|
295
|
+
|
296
|
+
props.each do |property|
|
297
|
+
define_method "test_parse_#{token_type}_#{property}" do
|
298
|
+
t = RP.parse "ab\\#{mode}{#{property}}"
|
299
|
+
|
300
|
+
assert( t.expressions.last.is_a?(UnicodeProperty::Base),
|
301
|
+
"Expected property, but got #{t.expressions.last.class.name}")
|
302
|
+
|
303
|
+
assert_equal( token_type, t.expressions.last.type )
|
304
|
+
assert_equal( property, t.expressions.last.name )
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
def test_parse_property_negative
|
310
|
+
t = RP.parse 'ab\p{L}cd'
|
311
|
+
assert_equal( false, t.expressions[1].negative? )
|
312
|
+
end
|
313
|
+
|
314
|
+
def test_parse_nonproperty_negative
|
315
|
+
t = RP.parse 'ab\P{L}cd'
|
316
|
+
assert_equal( true, t.expressions[1].negative? )
|
317
|
+
end
|
318
|
+
|
319
|
+
def test_parse_property_age
|
320
|
+
t = RP.parse 'ab\p{age=5.2}cd'
|
321
|
+
|
322
|
+
assert( t.expressions[1].is_a?(UnicodeProperty::Age),
|
323
|
+
"Expected Age property, but got #{t.expressions[1].class.name}")
|
324
|
+
end
|
325
|
+
|
326
|
+
def test_parse_property_derived
|
327
|
+
t = RP.parse 'ab\p{Math}cd'
|
328
|
+
|
329
|
+
assert( t.expressions[1].is_a?(UnicodeProperty::Derived),
|
330
|
+
"Expected Derived property, but got #{t.expressions[1].class.name}")
|
331
|
+
end
|
332
|
+
|
333
|
+
def test_parse_property_script
|
334
|
+
t = RP.parse 'ab\p{Hiragana}cd'
|
335
|
+
|
336
|
+
assert( t.expressions[1].is_a?(UnicodeProperty::Script),
|
337
|
+
"Expected Script property, but got #{t.expressions[1].class.name}")
|
338
|
+
end
|
339
|
+
|
340
|
+
def test_parse_property_following_literal
|
341
|
+
t = RP.parse 'ab\p{Lu}cd'
|
342
|
+
|
343
|
+
assert( t.expressions[2].is_a?(Literal),
|
344
|
+
"Expected Literal, but got #{t.expressions[2].class.name}")
|
345
|
+
end
|
346
|
+
end
|