regexp_parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +4 -0
- data/LICENSE +22 -0
- data/README.rdoc +307 -0
- data/Rakefile +91 -0
- data/lib/regexp_parser/ctype.rb +48 -0
- data/lib/regexp_parser/expression/property.rb +108 -0
- data/lib/regexp_parser/expression/set.rb +59 -0
- data/lib/regexp_parser/expression.rb +287 -0
- data/lib/regexp_parser/lexer.rb +105 -0
- data/lib/regexp_parser/parser.rb +417 -0
- data/lib/regexp_parser/scanner/property.rl +534 -0
- data/lib/regexp_parser/scanner/scanner.rl +712 -0
- data/lib/regexp_parser/scanner.rb +3325 -0
- data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
- data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
- data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
- data/lib/regexp_parser/syntax/tokens.rb +332 -0
- data/lib/regexp_parser/syntax.rb +172 -0
- data/lib/regexp_parser.rb +45 -0
- data/test/helpers.rb +8 -0
- data/test/lexer/test_all.rb +26 -0
- data/test/lexer/test_literals.rb +120 -0
- data/test/lexer/test_nesting.rb +107 -0
- data/test/lexer/test_refcalls.rb +45 -0
- data/test/parser/test_all.rb +44 -0
- data/test/parser/test_alternation.rb +46 -0
- data/test/parser/test_anchors.rb +35 -0
- data/test/parser/test_errors.rb +59 -0
- data/test/parser/test_escapes.rb +48 -0
- data/test/parser/test_expression.rb +51 -0
- data/test/parser/test_groups.rb +69 -0
- data/test/parser/test_properties.rb +346 -0
- data/test/parser/test_quantifiers.rb +236 -0
- data/test/parser/test_refcalls.rb +101 -0
- data/test/parser/test_sets.rb +99 -0
- data/test/scanner/test_all.rb +30 -0
- data/test/scanner/test_anchors.rb +35 -0
- data/test/scanner/test_errors.rb +36 -0
- data/test/scanner/test_escapes.rb +49 -0
- data/test/scanner/test_groups.rb +41 -0
- data/test/scanner/test_literals.rb +85 -0
- data/test/scanner/test_meta.rb +36 -0
- data/test/scanner/test_properties.rb +315 -0
- data/test/scanner/test_quantifiers.rb +38 -0
- data/test/scanner/test_refcalls.rb +45 -0
- data/test/scanner/test_scripts.rb +314 -0
- data/test/scanner/test_sets.rb +80 -0
- data/test/scanner/test_types.rb +30 -0
- data/test/syntax/ruby/test_1.8.rb +57 -0
- data/test/syntax/ruby/test_1.9.1.rb +39 -0
- data/test/syntax/ruby/test_1.9.3.rb +38 -0
- data/test/syntax/ruby/test_all.rb +12 -0
- data/test/syntax/test_all.rb +19 -0
- data/test/test_all.rb +4 -0
- metadata +160 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class LexerRefCalls < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
# Group back-references, named, numbered, and relative
|
7
|
+
'(?<X>abc)\k<X>' => [3, :backref, :name_ref, '\k<X>', 9, 14, 0, 0],
|
8
|
+
"(?<X>abc)\\k'X'" => [3, :backref, :name_ref, "\\k'X'", 9, 14, 0, 0],
|
9
|
+
|
10
|
+
'(abc)\k<1>' => [3, :backref, :number_ref, '\k<1>', 5, 10, 0, 0],
|
11
|
+
"(abc)\\k'1'" => [3, :backref, :number_ref, "\\k'1'", 5, 10, 0, 0],
|
12
|
+
|
13
|
+
'(abc)\k<-1>' => [3, :backref, :number_rel_ref, '\k<-1>', 5, 11, 0, 0],
|
14
|
+
"(abc)\\k'-1'" => [3, :backref, :number_rel_ref, "\\k'-1'", 5, 11, 0, 0],
|
15
|
+
|
16
|
+
# Sub-expression invocation, named, numbered, and relative
|
17
|
+
'(?<X>abc)\g<X>' => [3, :backref, :name_call, '\g<X>', 9, 14, 0, 0],
|
18
|
+
"(?<X>abc)\\g'X'" => [3, :backref, :name_call, "\\g'X'", 9, 14, 0, 0],
|
19
|
+
|
20
|
+
'(abc)\g<1>' => [3, :backref, :number_call, '\g<1>', 5, 10, 0, 0],
|
21
|
+
"(abc)\\g'1'" => [3, :backref, :number_call, "\\g'1'", 5, 10, 0, 0],
|
22
|
+
|
23
|
+
'(abc)\g<-1>' => [3, :backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0],
|
24
|
+
"(abc)\\g'-1'" => [3, :backref, :number_rel_call, "\\g'-1'", 5, 11, 0, 0],
|
25
|
+
|
26
|
+
# Group back-references, with nesting level
|
27
|
+
'(?<X>abc)\k<X-0>' => [3, :backref, :name_nest_ref, '\k<X-0>', 9, 16, 0, 0],
|
28
|
+
"(?<X>abc)\\k'X-0'" => [3, :backref, :name_nest_ref, "\\k'X-0'", 9, 16, 0, 0],
|
29
|
+
|
30
|
+
'(abc)\k<1-0>' => [3, :backref, :number_nest_ref, '\k<1-0>', 5, 12, 0, 0],
|
31
|
+
"(abc)\\k'1-0'" => [3, :backref, :number_nest_ref, "\\k'1-0'", 5, 12, 0, 0],
|
32
|
+
}
|
33
|
+
|
34
|
+
count = 0
|
35
|
+
tests.each do |pattern, test|
|
36
|
+
define_method "test_lexer_#{test[1]}_#{test[2]}_#{count+=1}" do
|
37
|
+
|
38
|
+
tokens = RL.scan(pattern)
|
39
|
+
assert_equal( test[1,7], tokens[test[0]].to_a)
|
40
|
+
assert_equal( test[3], pattern[tokens[test[0]][3], tokens[test[0]][4]])
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
%w{
|
4
|
+
alternation anchors errors escapes expression groups properties
|
5
|
+
quantifiers refcalls sets
|
6
|
+
}.each do|tc|
|
7
|
+
require File.expand_path("../test_#{tc}", __FILE__)
|
8
|
+
end
|
9
|
+
|
10
|
+
class TestParser < Test::Unit::TestCase
|
11
|
+
|
12
|
+
def test_parse_returns_a_root_expression
|
13
|
+
assert_instance_of( Regexp::Expression::Root, RP.parse('abc'))
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_parse_root_contains_expressions
|
17
|
+
root = RP.parse(/^a.c+[^one]{2,3}\b\d\\\C-C$/)
|
18
|
+
|
19
|
+
assert( root.expressions.all?{|exp|
|
20
|
+
exp.kind_of?(Regexp::Expression::Base)},
|
21
|
+
"Not all nodes are instances of Regexp::Expression")
|
22
|
+
end
|
23
|
+
|
24
|
+
# too much going on here, it's just for development
|
25
|
+
def test_parse_node_types
|
26
|
+
root = RP.parse('^(one){2,3}([^d\]efm-qz\,\-]*)(ghi)+$')
|
27
|
+
|
28
|
+
assert( root.expressions[1].expressions[0].is_a?(Literal),
|
29
|
+
"Not a literal node, but should be")
|
30
|
+
|
31
|
+
assert( root.expressions[1].quantified?, "Not quanfified, but should be")
|
32
|
+
|
33
|
+
assert( root.expressions[2].expressions[0].is_a?(CharacterSet),
|
34
|
+
"Not a caracter set, but it should be")
|
35
|
+
|
36
|
+
assert_equal( false, root.expressions[2].quantified? )
|
37
|
+
|
38
|
+
assert( root.expressions[3].is_a?(Group::Capture),
|
39
|
+
"Not a group, but should be")
|
40
|
+
|
41
|
+
assert_equal( true, root.expressions[3].quantified? )
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ParserAlternation < Test::Unit::TestCase
|
4
|
+
|
5
|
+
# TODO: these tests pass, but they show how hard and messy the tree is
|
6
|
+
# to navigate
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@root = RP.parse('(ab??|cd*+|ef+)*|(gh|ij|kl)?')
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_parse_alternation_root
|
13
|
+
e = @root.expressions[0]
|
14
|
+
assert_equal( true, e.is_a?(Alternation) )
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_parse_alternation_alts
|
18
|
+
alts = @root.expressions[0].alternatives
|
19
|
+
|
20
|
+
assert_equal( true, alts[0].is_a?(Sequence) )
|
21
|
+
assert_equal( true, alts[1].is_a?(Sequence) )
|
22
|
+
|
23
|
+
assert_equal( true, alts[0][0].is_a?(Group::Capture) )
|
24
|
+
assert_equal( true, alts[1][0].is_a?(Group::Capture) )
|
25
|
+
|
26
|
+
assert_equal( 2, alts.length )
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_parse_alternation_nested
|
30
|
+
e = @root[0].alternatives[0][0][0]
|
31
|
+
|
32
|
+
assert_equal( true, e.is_a?(Alternation) )
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_parse_alternation_nested_sequence
|
36
|
+
alts = @root.expressions[0][0]
|
37
|
+
nested = alts.expressions[0][0][0]
|
38
|
+
|
39
|
+
assert_equal( true, nested.is_a?(Sequence) )
|
40
|
+
|
41
|
+
assert_equal( true, nested.expressions[0].is_a?(Literal) )
|
42
|
+
assert_equal( true, nested.expressions[1].is_a?(Literal) )
|
43
|
+
assert_equal( 2, nested.expressions.length )
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class TestParserAnchors < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
'^a' => [0, :anchor, :beginning_of_line, Anchor::BOL],
|
7
|
+
'a$' => [1, :anchor, :end_of_line, Anchor::EOL],
|
8
|
+
|
9
|
+
'\Aa' => [0, :anchor, :bos, Anchor::BOS],
|
10
|
+
'a\z' => [1, :anchor, :eos, Anchor::EOS],
|
11
|
+
'a\Z' => [1, :anchor, :eos_ob_eol, Anchor::EOSobEOL],
|
12
|
+
|
13
|
+
'a\b' => [1, :anchor, :word_boundary, Anchor::WordBoundary],
|
14
|
+
'a\B' => [1, :anchor, :nonword_boundary, Anchor::NonWordBoundary],
|
15
|
+
|
16
|
+
'a\G' => [1, :anchor, :match_start, Anchor::MatchStart],
|
17
|
+
|
18
|
+
"\\\\Aa" => [0, :escape, :backslash, EscapeSequence::Literal],
|
19
|
+
}
|
20
|
+
|
21
|
+
count = 0
|
22
|
+
tests.each do |pattern, test|
|
23
|
+
define_method "test_parse_anchor_#{test[2]}_#{count+=1}" do
|
24
|
+
root = RP.parse(pattern, 'ruby/1.9')
|
25
|
+
exp = root.expressions[test[0]]
|
26
|
+
|
27
|
+
assert( exp.is_a?( test[3] ),
|
28
|
+
"Expected #{test[3]}, but got #{exp.class.name}")
|
29
|
+
|
30
|
+
assert_equal( test[1], exp.type )
|
31
|
+
assert_equal( test[2], exp.token )
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ParserErrors < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_parser_unknown_token_type
|
6
|
+
assert_raise( Regexp::Parser::UnknownTokenTypeError ) {
|
7
|
+
RP.parse_token(Regexp::Token.new(:foo, :bar))
|
8
|
+
}
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_parser_unknown_set_token
|
12
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
13
|
+
RP.parse_token(Regexp::Token.new(:set, :foo))
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_parser_unknown_meta_token
|
18
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
19
|
+
RP.parse_token(Regexp::Token.new(:meta, :foo))
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_parser_unknown_character_type_token
|
24
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
25
|
+
RP.parse_token(Regexp::Token.new(:type, :foo))
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_parser_unknown_unicode_property_token
|
30
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
31
|
+
RP.parse_token(Regexp::Token.new(:property, :foo))
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_parser_unknown_unicode_nonproperty_token
|
36
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
37
|
+
RP.parse_token(Regexp::Token.new(:nonproperty, :foo))
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_parser_unknown_anchor_token
|
42
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
43
|
+
RP.parse_token(Regexp::Token.new(:anchor, :foo))
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_parser_unknown_quantifier_token
|
48
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
49
|
+
RP.parse_token(Regexp::Token.new(:quantifier, :foo))
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_parser_unknown_group_open_token
|
54
|
+
assert_raise( Regexp::Parser::UnknownTokenError ) {
|
55
|
+
RP.parse_token(Regexp::Token.new(:group, :foo))
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class TestParserEscapes < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_parse_control_sequence_short
|
6
|
+
#root = RP.parse(/\b\d\\\c2\C-C\M-\C-2/)
|
7
|
+
end
|
8
|
+
|
9
|
+
tests = {
|
10
|
+
/a\ac/ => [1, :escape, :bell, EscapeSequence::Bell],
|
11
|
+
/a\ec/ => [1, :escape, :escape, EscapeSequence::AsciiEscape],
|
12
|
+
/a\fc/ => [1, :escape, :form_feed, EscapeSequence::FormFeed],
|
13
|
+
/a\nc/ => [1, :escape, :newline, EscapeSequence::Newline],
|
14
|
+
/a\rc/ => [1, :escape, :carriage, EscapeSequence::Return],
|
15
|
+
/a\tc/ => [1, :escape, :tab, EscapeSequence::Tab],
|
16
|
+
/a\vc/ => [1, :escape, :vertical_tab, EscapeSequence::VerticalTab],
|
17
|
+
|
18
|
+
# special cases
|
19
|
+
/a\bc/ => [1, :anchor, :word_boundary, Anchor::WordBoundary],
|
20
|
+
/a\sc/ => [1, :type, :space, CharacterType::Space],
|
21
|
+
|
22
|
+
# meta character escapes
|
23
|
+
/a\.c/ => [1, :escape, :dot, EscapeSequence::Literal],
|
24
|
+
/a\?c/ => [1, :escape, :zero_or_one, EscapeSequence::Literal],
|
25
|
+
/a\*c/ => [1, :escape, :zero_or_more, EscapeSequence::Literal],
|
26
|
+
/a\+c/ => [1, :escape, :one_or_more, EscapeSequence::Literal],
|
27
|
+
/a\|c/ => [1, :escape, :alternation, EscapeSequence::Literal],
|
28
|
+
/a\(c/ => [1, :escape, :group_open, EscapeSequence::Literal],
|
29
|
+
/a\)c/ => [1, :escape, :group_close, EscapeSequence::Literal],
|
30
|
+
/a\{c/ => [1, :escape, :interval_open, EscapeSequence::Literal],
|
31
|
+
/a\}c/ => [1, :escape, :interval_close, EscapeSequence::Literal],
|
32
|
+
}
|
33
|
+
|
34
|
+
count = 0
|
35
|
+
tests.each do |pattern, test|
|
36
|
+
define_method "test_parse_anchor_#{test[2]}_#{count+=1}" do
|
37
|
+
root = RP.parse(pattern, 'ruby/1.9')
|
38
|
+
exp = root.expressions[test[0]]
|
39
|
+
|
40
|
+
assert( exp.is_a?( test[3] ),
|
41
|
+
"Expected #{test[3]}, but got #{exp.class.name}")
|
42
|
+
|
43
|
+
assert_equal( test[1], exp.type )
|
44
|
+
assert_equal( test[2], exp.token )
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ParserExpression < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_parse_expression_to_s_literal_alternation
|
6
|
+
pattern = 'abcd|ghij|klmn|pqur'
|
7
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_parse_expression_to_s_quantified_alternations
|
11
|
+
pattern = '(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)'
|
12
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_parse_expression_to_s_quantified_sets
|
16
|
+
pattern = '[abc]+|[^def]{3,6}'
|
17
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_parse_expression_to_s_property_sets
|
21
|
+
pattern = '[\a\b\p{Lu}\P{Z}\c\d]+'
|
22
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_parse_expression_to_s_groups
|
26
|
+
pattern = "(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++"
|
27
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_parse_expression_to_s_assertions
|
31
|
+
pattern = '(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?'
|
32
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_parse_expression_to_s_comments
|
36
|
+
pattern = '(?#start)a(?#middle)b(?#end)'
|
37
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_parse_expression_to_s_options
|
41
|
+
pattern = '(?mix:start)a(?-mix:middle)b(?i-mx:end)'
|
42
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_parse_expression_to_s_url
|
46
|
+
pattern = '(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*'+
|
47
|
+
'\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)'
|
48
|
+
assert_equal( pattern, RP.parse(pattern).to_s )
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class TestParserGroups < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_parse_root_options_mi
|
6
|
+
t = RP.parse((/[abc]/mi).to_s)
|
7
|
+
|
8
|
+
assert_equal( true, t.m? )
|
9
|
+
assert_equal( true, t.i? )
|
10
|
+
assert_equal( false, t.x? )
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_parse_nested_options_m
|
14
|
+
t = RP.parse('(?xi-m:a(?m-ix:b))')
|
15
|
+
|
16
|
+
assert_equal( true, t.expressions[0].expressions[1].m? )
|
17
|
+
assert_equal( false, t.expressions[0].expressions[1].i? )
|
18
|
+
assert_equal( false, t.expressions[0].expressions[1].x? )
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_parse_nested_options_xm
|
22
|
+
t = RP.parse(/(?i-xm:a(?mx-i:b))/)
|
23
|
+
|
24
|
+
assert_equal( true, t.expressions[0].expressions[1].m? )
|
25
|
+
assert_equal( false, t.expressions[0].expressions[1].i? )
|
26
|
+
assert_equal( true, t.expressions[0].expressions[1].x? )
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_parse_nested_options_im
|
30
|
+
t = RP.parse(/(?x-mi:a(?mi-x:b))/)
|
31
|
+
|
32
|
+
assert_equal( true, t.expressions[0].expressions[1].m? )
|
33
|
+
assert_equal( true, t.expressions[0].expressions[1].i? )
|
34
|
+
assert_equal( false, t.expressions[0].expressions[1].x? )
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_parse_lookahead
|
38
|
+
t = RP.parse('(?=abc)(?!def)')
|
39
|
+
|
40
|
+
assert( t.expressions[0].is_a?(Assertion::Lookahead),
|
41
|
+
"Expected lookahead, but got #{t.expressions[0].class.name}")
|
42
|
+
|
43
|
+
assert( t.expressions[1].is_a?(Assertion::NegativeLookahead),
|
44
|
+
"Expected negative lookahead, but got #{t.expressions[0].class.name}")
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_parse_lookbehind
|
48
|
+
t = RP.parse('(?<=abc)(?<!def)')
|
49
|
+
|
50
|
+
assert( t.expressions[0].is_a?(Assertion::Lookbehind),
|
51
|
+
"Expected lookbehind, but got #{t.expressions[0].class.name}")
|
52
|
+
|
53
|
+
assert( t.expressions[1].is_a?(Assertion::NegativeLookbehind),
|
54
|
+
"Expected negative lookbehind, but got #{t.expressions[0].class.name}")
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_parse_comment
|
58
|
+
t = RP.parse('a(?# is for apple)b(?# for boy)c(?# cat)')
|
59
|
+
|
60
|
+
[1,3,5].each do |i|
|
61
|
+
assert( t.expressions[i].is_a?(Group::Comment),
|
62
|
+
"Expected comment, but got #{t.expressions[i].class.name}")
|
63
|
+
|
64
|
+
assert_equal( :group, t.expressions[i].type )
|
65
|
+
assert_equal( :comment, t.expressions[i].token )
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
@@ -0,0 +1,346 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ParserProperties < Test::Unit::TestCase
|
4
|
+
|
5
|
+
modes = ['p', 'P']
|
6
|
+
props = [
|
7
|
+
'Alnum',
|
8
|
+
'Alpha',
|
9
|
+
'Any',
|
10
|
+
'Ascii',
|
11
|
+
'Blank',
|
12
|
+
'Cntrl',
|
13
|
+
'Digit',
|
14
|
+
'Graph',
|
15
|
+
'Lower',
|
16
|
+
'Newline',
|
17
|
+
'Print',
|
18
|
+
'Punct',
|
19
|
+
'Space',
|
20
|
+
'Upper',
|
21
|
+
'Word',
|
22
|
+
'Xdigit',
|
23
|
+
|
24
|
+
'L',
|
25
|
+
'Letter',
|
26
|
+
|
27
|
+
'Lu',
|
28
|
+
'Uppercase_Letter',
|
29
|
+
|
30
|
+
'Ll',
|
31
|
+
'Lowercase_Letter',
|
32
|
+
|
33
|
+
'Lt',
|
34
|
+
'Titlecase_Letter',
|
35
|
+
|
36
|
+
'Lm',
|
37
|
+
'Modifier_Letter',
|
38
|
+
|
39
|
+
'Lo',
|
40
|
+
'Other_Letter',
|
41
|
+
|
42
|
+
'M',
|
43
|
+
'Mark',
|
44
|
+
|
45
|
+
'Mn',
|
46
|
+
'Nonspacing_Mark',
|
47
|
+
|
48
|
+
'Mc',
|
49
|
+
'Spacing_Mark',
|
50
|
+
|
51
|
+
'Me',
|
52
|
+
'Enclosing_Mark',
|
53
|
+
|
54
|
+
'N',
|
55
|
+
'Number',
|
56
|
+
|
57
|
+
'Nd',
|
58
|
+
'Decimal_Number',
|
59
|
+
|
60
|
+
'Nl',
|
61
|
+
'Letter_Number',
|
62
|
+
|
63
|
+
'No',
|
64
|
+
'Other_Number',
|
65
|
+
|
66
|
+
'P',
|
67
|
+
'Punctuation',
|
68
|
+
|
69
|
+
'Pc',
|
70
|
+
'Connector_Punctuation',
|
71
|
+
|
72
|
+
'Pd',
|
73
|
+
'Dash_Punctuation',
|
74
|
+
|
75
|
+
'Ps',
|
76
|
+
'Open_Punctuation',
|
77
|
+
|
78
|
+
'Pe',
|
79
|
+
'Close_Punctuation',
|
80
|
+
|
81
|
+
'Pi',
|
82
|
+
'Initial_Punctuation',
|
83
|
+
|
84
|
+
'Pf',
|
85
|
+
'Final_Punctuation',
|
86
|
+
|
87
|
+
'Po',
|
88
|
+
'Other_Punctuation',
|
89
|
+
|
90
|
+
'S',
|
91
|
+
'Symbol',
|
92
|
+
|
93
|
+
'Sm',
|
94
|
+
'Math_Symbol',
|
95
|
+
|
96
|
+
'Sc',
|
97
|
+
'Currency_Symbol',
|
98
|
+
|
99
|
+
'Sk',
|
100
|
+
'Modifier_Symbol',
|
101
|
+
|
102
|
+
'So',
|
103
|
+
'Other_Symbol',
|
104
|
+
|
105
|
+
'Z',
|
106
|
+
'Separator',
|
107
|
+
|
108
|
+
'Zs',
|
109
|
+
'Space_Separator',
|
110
|
+
|
111
|
+
'Zl',
|
112
|
+
'Line_Separator',
|
113
|
+
|
114
|
+
'Zp',
|
115
|
+
'Paragraph_Separator',
|
116
|
+
|
117
|
+
'C',
|
118
|
+
'Other',
|
119
|
+
|
120
|
+
'Cc',
|
121
|
+
'Control',
|
122
|
+
|
123
|
+
'Cf',
|
124
|
+
'Format',
|
125
|
+
|
126
|
+
'Cs',
|
127
|
+
'Surrogate',
|
128
|
+
|
129
|
+
'Co',
|
130
|
+
'Private_Use',
|
131
|
+
|
132
|
+
'Cn',
|
133
|
+
'Unassigned',
|
134
|
+
|
135
|
+
'Age=1.1',
|
136
|
+
'Age=2.0',
|
137
|
+
'Age=2.1',
|
138
|
+
'Age=3.0',
|
139
|
+
'Age=3.1',
|
140
|
+
'Age=3.2',
|
141
|
+
'Age=4.0',
|
142
|
+
'Age=4.1',
|
143
|
+
'Age=5.0',
|
144
|
+
'Age=5.1',
|
145
|
+
'Age=5.2',
|
146
|
+
'Age=6.0',
|
147
|
+
|
148
|
+
'ahex',
|
149
|
+
'ASCII_Hex_Digit',
|
150
|
+
|
151
|
+
'Alphabetic',
|
152
|
+
|
153
|
+
'Cased',
|
154
|
+
|
155
|
+
'cwcf',
|
156
|
+
'Changes_When_Casefolded',
|
157
|
+
|
158
|
+
'cwcm',
|
159
|
+
'Changes_When_Casemapped',
|
160
|
+
|
161
|
+
'cwl',
|
162
|
+
'Changes_When_Lowercased',
|
163
|
+
|
164
|
+
'cwt',
|
165
|
+
'Changes_When_Titlecased',
|
166
|
+
|
167
|
+
'cwu',
|
168
|
+
'Changes_When_Uppercased',
|
169
|
+
|
170
|
+
'ci',
|
171
|
+
'Case_Ignorable',
|
172
|
+
|
173
|
+
'bidic',
|
174
|
+
'Bidi_Control',
|
175
|
+
|
176
|
+
'Dash',
|
177
|
+
|
178
|
+
'dep',
|
179
|
+
'Deprecated',
|
180
|
+
|
181
|
+
'di',
|
182
|
+
'Default_Ignorable_Code_Point',
|
183
|
+
|
184
|
+
'dia',
|
185
|
+
'Diacritic',
|
186
|
+
|
187
|
+
'ext',
|
188
|
+
'Extender',
|
189
|
+
|
190
|
+
'grbase',
|
191
|
+
'Grapheme_Base',
|
192
|
+
|
193
|
+
'grext',
|
194
|
+
'Grapheme_Extend',
|
195
|
+
|
196
|
+
'grlink',
|
197
|
+
'Grapheme_Link',
|
198
|
+
|
199
|
+
'hex',
|
200
|
+
'Hex_Digit',
|
201
|
+
|
202
|
+
'Hyphen',
|
203
|
+
|
204
|
+
'idc',
|
205
|
+
'ID_Continue',
|
206
|
+
|
207
|
+
'ideo',
|
208
|
+
'Ideographic',
|
209
|
+
|
210
|
+
'ids',
|
211
|
+
'ID_Start',
|
212
|
+
|
213
|
+
'idsb',
|
214
|
+
'IDS_Binary_Operator',
|
215
|
+
|
216
|
+
'idst',
|
217
|
+
'IDS_Trinary_Operator',
|
218
|
+
|
219
|
+
'joinc',
|
220
|
+
'Join_Control',
|
221
|
+
|
222
|
+
'loe',
|
223
|
+
'Logical_Order_Exception',
|
224
|
+
|
225
|
+
'Lowercase',
|
226
|
+
|
227
|
+
'Math',
|
228
|
+
|
229
|
+
'nchar',
|
230
|
+
'Noncharacter_Code_Point',
|
231
|
+
|
232
|
+
'oalpha',
|
233
|
+
'Other_Alphabetic',
|
234
|
+
|
235
|
+
'odi',
|
236
|
+
'Other_Default_Ignorable_Code_Point',
|
237
|
+
|
238
|
+
'ogrext',
|
239
|
+
'Other_Grapheme_Extend',
|
240
|
+
|
241
|
+
'oidc',
|
242
|
+
'Other_ID_Continue',
|
243
|
+
|
244
|
+
'oids',
|
245
|
+
'Other_ID_Start',
|
246
|
+
|
247
|
+
'olower',
|
248
|
+
'Other_Lowercase',
|
249
|
+
|
250
|
+
'omath',
|
251
|
+
'Other_Math',
|
252
|
+
|
253
|
+
'oupper',
|
254
|
+
'Other_Uppercase',
|
255
|
+
|
256
|
+
'patsyn',
|
257
|
+
'Pattern_Syntax',
|
258
|
+
|
259
|
+
'patws',
|
260
|
+
'Pattern_Whitespace',
|
261
|
+
|
262
|
+
'qmark',
|
263
|
+
'quotationmark',
|
264
|
+
|
265
|
+
'radical',
|
266
|
+
|
267
|
+
'sd',
|
268
|
+
'Soft_Dotted',
|
269
|
+
|
270
|
+
'sterm',
|
271
|
+
|
272
|
+
'term',
|
273
|
+
'Terminal_Punctuation',
|
274
|
+
|
275
|
+
'uideo',
|
276
|
+
'Unified_Ideograph',
|
277
|
+
|
278
|
+
'Uppercase',
|
279
|
+
|
280
|
+
'vs',
|
281
|
+
'Variation_Selector',
|
282
|
+
|
283
|
+
'wspace',
|
284
|
+
'whitespace',
|
285
|
+
|
286
|
+
'xids',
|
287
|
+
'XID_Start',
|
288
|
+
|
289
|
+
'xidc',
|
290
|
+
'XID_Continue',
|
291
|
+
]
|
292
|
+
|
293
|
+
modes.each do |mode|
|
294
|
+
token_type = mode == 'p' ? :property : :nonproperty
|
295
|
+
|
296
|
+
props.each do |property|
|
297
|
+
define_method "test_parse_#{token_type}_#{property}" do
|
298
|
+
t = RP.parse "ab\\#{mode}{#{property}}"
|
299
|
+
|
300
|
+
assert( t.expressions.last.is_a?(UnicodeProperty::Base),
|
301
|
+
"Expected property, but got #{t.expressions.last.class.name}")
|
302
|
+
|
303
|
+
assert_equal( token_type, t.expressions.last.type )
|
304
|
+
assert_equal( property, t.expressions.last.name )
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
def test_parse_property_negative
|
310
|
+
t = RP.parse 'ab\p{L}cd'
|
311
|
+
assert_equal( false, t.expressions[1].negative? )
|
312
|
+
end
|
313
|
+
|
314
|
+
def test_parse_nonproperty_negative
|
315
|
+
t = RP.parse 'ab\P{L}cd'
|
316
|
+
assert_equal( true, t.expressions[1].negative? )
|
317
|
+
end
|
318
|
+
|
319
|
+
def test_parse_property_age
|
320
|
+
t = RP.parse 'ab\p{age=5.2}cd'
|
321
|
+
|
322
|
+
assert( t.expressions[1].is_a?(UnicodeProperty::Age),
|
323
|
+
"Expected Age property, but got #{t.expressions[1].class.name}")
|
324
|
+
end
|
325
|
+
|
326
|
+
def test_parse_property_derived
|
327
|
+
t = RP.parse 'ab\p{Math}cd'
|
328
|
+
|
329
|
+
assert( t.expressions[1].is_a?(UnicodeProperty::Derived),
|
330
|
+
"Expected Derived property, but got #{t.expressions[1].class.name}")
|
331
|
+
end
|
332
|
+
|
333
|
+
def test_parse_property_script
|
334
|
+
t = RP.parse 'ab\p{Hiragana}cd'
|
335
|
+
|
336
|
+
assert( t.expressions[1].is_a?(UnicodeProperty::Script),
|
337
|
+
"Expected Script property, but got #{t.expressions[1].class.name}")
|
338
|
+
end
|
339
|
+
|
340
|
+
def test_parse_property_following_literal
|
341
|
+
t = RP.parse 'ab\p{Lu}cd'
|
342
|
+
|
343
|
+
assert( t.expressions[2].is_a?(Literal),
|
344
|
+
"Expected Literal, but got #{t.expressions[2].class.name}")
|
345
|
+
end
|
346
|
+
end
|