regexp_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/ChangeLog +4 -0
  2. data/LICENSE +22 -0
  3. data/README.rdoc +307 -0
  4. data/Rakefile +91 -0
  5. data/lib/regexp_parser/ctype.rb +48 -0
  6. data/lib/regexp_parser/expression/property.rb +108 -0
  7. data/lib/regexp_parser/expression/set.rb +59 -0
  8. data/lib/regexp_parser/expression.rb +287 -0
  9. data/lib/regexp_parser/lexer.rb +105 -0
  10. data/lib/regexp_parser/parser.rb +417 -0
  11. data/lib/regexp_parser/scanner/property.rl +534 -0
  12. data/lib/regexp_parser/scanner/scanner.rl +712 -0
  13. data/lib/regexp_parser/scanner.rb +3325 -0
  14. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
  15. data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
  16. data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
  17. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
  18. data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
  19. data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
  20. data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
  21. data/lib/regexp_parser/syntax/tokens.rb +332 -0
  22. data/lib/regexp_parser/syntax.rb +172 -0
  23. data/lib/regexp_parser.rb +45 -0
  24. data/test/helpers.rb +8 -0
  25. data/test/lexer/test_all.rb +26 -0
  26. data/test/lexer/test_literals.rb +120 -0
  27. data/test/lexer/test_nesting.rb +107 -0
  28. data/test/lexer/test_refcalls.rb +45 -0
  29. data/test/parser/test_all.rb +44 -0
  30. data/test/parser/test_alternation.rb +46 -0
  31. data/test/parser/test_anchors.rb +35 -0
  32. data/test/parser/test_errors.rb +59 -0
  33. data/test/parser/test_escapes.rb +48 -0
  34. data/test/parser/test_expression.rb +51 -0
  35. data/test/parser/test_groups.rb +69 -0
  36. data/test/parser/test_properties.rb +346 -0
  37. data/test/parser/test_quantifiers.rb +236 -0
  38. data/test/parser/test_refcalls.rb +101 -0
  39. data/test/parser/test_sets.rb +99 -0
  40. data/test/scanner/test_all.rb +30 -0
  41. data/test/scanner/test_anchors.rb +35 -0
  42. data/test/scanner/test_errors.rb +36 -0
  43. data/test/scanner/test_escapes.rb +49 -0
  44. data/test/scanner/test_groups.rb +41 -0
  45. data/test/scanner/test_literals.rb +85 -0
  46. data/test/scanner/test_meta.rb +36 -0
  47. data/test/scanner/test_properties.rb +315 -0
  48. data/test/scanner/test_quantifiers.rb +38 -0
  49. data/test/scanner/test_refcalls.rb +45 -0
  50. data/test/scanner/test_scripts.rb +314 -0
  51. data/test/scanner/test_sets.rb +80 -0
  52. data/test/scanner/test_types.rb +30 -0
  53. data/test/syntax/ruby/test_1.8.rb +57 -0
  54. data/test/syntax/ruby/test_1.9.1.rb +39 -0
  55. data/test/syntax/ruby/test_1.9.3.rb +38 -0
  56. data/test/syntax/ruby/test_all.rb +12 -0
  57. data/test/syntax/test_all.rb +19 -0
  58. data/test/test_all.rb +4 -0
  59. metadata +160 -0
@@ -0,0 +1,45 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class LexerRefCalls < Test::Unit::TestCase
4
+
5
+ tests = {
6
+ # Group back-references, named, numbered, and relative
7
+ '(?<X>abc)\k<X>' => [3, :backref, :name_ref, '\k<X>', 9, 14, 0, 0],
8
+ "(?<X>abc)\\k'X'" => [3, :backref, :name_ref, "\\k'X'", 9, 14, 0, 0],
9
+
10
+ '(abc)\k<1>' => [3, :backref, :number_ref, '\k<1>', 5, 10, 0, 0],
11
+ "(abc)\\k'1'" => [3, :backref, :number_ref, "\\k'1'", 5, 10, 0, 0],
12
+
13
+ '(abc)\k<-1>' => [3, :backref, :number_rel_ref, '\k<-1>', 5, 11, 0, 0],
14
+ "(abc)\\k'-1'" => [3, :backref, :number_rel_ref, "\\k'-1'", 5, 11, 0, 0],
15
+
16
+ # Sub-expression invocation, named, numbered, and relative
17
+ '(?<X>abc)\g<X>' => [3, :backref, :name_call, '\g<X>', 9, 14, 0, 0],
18
+ "(?<X>abc)\\g'X'" => [3, :backref, :name_call, "\\g'X'", 9, 14, 0, 0],
19
+
20
+ '(abc)\g<1>' => [3, :backref, :number_call, '\g<1>', 5, 10, 0, 0],
21
+ "(abc)\\g'1'" => [3, :backref, :number_call, "\\g'1'", 5, 10, 0, 0],
22
+
23
+ '(abc)\g<-1>' => [3, :backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0],
24
+ "(abc)\\g'-1'" => [3, :backref, :number_rel_call, "\\g'-1'", 5, 11, 0, 0],
25
+
26
+ # Group back-references, with nesting level
27
+ '(?<X>abc)\k<X-0>' => [3, :backref, :name_nest_ref, '\k<X-0>', 9, 16, 0, 0],
28
+ "(?<X>abc)\\k'X-0'" => [3, :backref, :name_nest_ref, "\\k'X-0'", 9, 16, 0, 0],
29
+
30
+ '(abc)\k<1-0>' => [3, :backref, :number_nest_ref, '\k<1-0>', 5, 12, 0, 0],
31
+ "(abc)\\k'1-0'" => [3, :backref, :number_nest_ref, "\\k'1-0'", 5, 12, 0, 0],
32
+ }
33
+
34
+ count = 0
35
+ tests.each do |pattern, test|
36
+ define_method "test_lexer_#{test[1]}_#{test[2]}_#{count+=1}" do
37
+
38
+ tokens = RL.scan(pattern)
39
+ assert_equal( test[1,7], tokens[test[0]].to_a)
40
+ assert_equal( test[3], pattern[tokens[test[0]][3], tokens[test[0]][4]])
41
+
42
+ end
43
+ end
44
+
45
+ end
@@ -0,0 +1,44 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ %w{
4
+ alternation anchors errors escapes expression groups properties
5
+ quantifiers refcalls sets
6
+ }.each do|tc|
7
+ require File.expand_path("../test_#{tc}", __FILE__)
8
+ end
9
+
10
+ class TestParser < Test::Unit::TestCase
11
+
12
+ def test_parse_returns_a_root_expression
13
+ assert_instance_of( Regexp::Expression::Root, RP.parse('abc'))
14
+ end
15
+
16
+ def test_parse_root_contains_expressions
17
+ root = RP.parse(/^a.c+[^one]{2,3}\b\d\\\C-C$/)
18
+
19
+ assert( root.expressions.all?{|exp|
20
+ exp.kind_of?(Regexp::Expression::Base)},
21
+ "Not all nodes are instances of Regexp::Expression")
22
+ end
23
+
24
+ # too much going on here, it's just for development
25
+ def test_parse_node_types
26
+ root = RP.parse('^(one){2,3}([^d\]efm-qz\,\-]*)(ghi)+$')
27
+
28
+ assert( root.expressions[1].expressions[0].is_a?(Literal),
29
+ "Not a literal node, but should be")
30
+
31
+ assert( root.expressions[1].quantified?, "Not quanfified, but should be")
32
+
33
+ assert( root.expressions[2].expressions[0].is_a?(CharacterSet),
34
+ "Not a caracter set, but it should be")
35
+
36
+ assert_equal( false, root.expressions[2].quantified? )
37
+
38
+ assert( root.expressions[3].is_a?(Group::Capture),
39
+ "Not a group, but should be")
40
+
41
+ assert_equal( true, root.expressions[3].quantified? )
42
+ end
43
+
44
+ end
@@ -0,0 +1,46 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ParserAlternation < Test::Unit::TestCase
4
+
5
+ # TODO: these tests pass, but they show how hard and messy the tree is
6
+ # to navigate
7
+
8
+ def setup
9
+ @root = RP.parse('(ab??|cd*+|ef+)*|(gh|ij|kl)?')
10
+ end
11
+
12
+ def test_parse_alternation_root
13
+ e = @root.expressions[0]
14
+ assert_equal( true, e.is_a?(Alternation) )
15
+ end
16
+
17
+ def test_parse_alternation_alts
18
+ alts = @root.expressions[0].alternatives
19
+
20
+ assert_equal( true, alts[0].is_a?(Sequence) )
21
+ assert_equal( true, alts[1].is_a?(Sequence) )
22
+
23
+ assert_equal( true, alts[0][0].is_a?(Group::Capture) )
24
+ assert_equal( true, alts[1][0].is_a?(Group::Capture) )
25
+
26
+ assert_equal( 2, alts.length )
27
+ end
28
+
29
+ def test_parse_alternation_nested
30
+ e = @root[0].alternatives[0][0][0]
31
+
32
+ assert_equal( true, e.is_a?(Alternation) )
33
+ end
34
+
35
+ def test_parse_alternation_nested_sequence
36
+ alts = @root.expressions[0][0]
37
+ nested = alts.expressions[0][0][0]
38
+
39
+ assert_equal( true, nested.is_a?(Sequence) )
40
+
41
+ assert_equal( true, nested.expressions[0].is_a?(Literal) )
42
+ assert_equal( true, nested.expressions[1].is_a?(Literal) )
43
+ assert_equal( 2, nested.expressions.length )
44
+ end
45
+
46
+ end
@@ -0,0 +1,35 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class TestParserAnchors < Test::Unit::TestCase
4
+
5
+ tests = {
6
+ '^a' => [0, :anchor, :beginning_of_line, Anchor::BOL],
7
+ 'a$' => [1, :anchor, :end_of_line, Anchor::EOL],
8
+
9
+ '\Aa' => [0, :anchor, :bos, Anchor::BOS],
10
+ 'a\z' => [1, :anchor, :eos, Anchor::EOS],
11
+ 'a\Z' => [1, :anchor, :eos_ob_eol, Anchor::EOSobEOL],
12
+
13
+ 'a\b' => [1, :anchor, :word_boundary, Anchor::WordBoundary],
14
+ 'a\B' => [1, :anchor, :nonword_boundary, Anchor::NonWordBoundary],
15
+
16
+ 'a\G' => [1, :anchor, :match_start, Anchor::MatchStart],
17
+
18
+ "\\\\Aa" => [0, :escape, :backslash, EscapeSequence::Literal],
19
+ }
20
+
21
+ count = 0
22
+ tests.each do |pattern, test|
23
+ define_method "test_parse_anchor_#{test[2]}_#{count+=1}" do
24
+ root = RP.parse(pattern, 'ruby/1.9')
25
+ exp = root.expressions[test[0]]
26
+
27
+ assert( exp.is_a?( test[3] ),
28
+ "Expected #{test[3]}, but got #{exp.class.name}")
29
+
30
+ assert_equal( test[1], exp.type )
31
+ assert_equal( test[2], exp.token )
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,59 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ParserErrors < Test::Unit::TestCase
4
+
5
+ def test_parser_unknown_token_type
6
+ assert_raise( Regexp::Parser::UnknownTokenTypeError ) {
7
+ RP.parse_token(Regexp::Token.new(:foo, :bar))
8
+ }
9
+ end
10
+
11
+ def test_parser_unknown_set_token
12
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
13
+ RP.parse_token(Regexp::Token.new(:set, :foo))
14
+ }
15
+ end
16
+
17
+ def test_parser_unknown_meta_token
18
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
19
+ RP.parse_token(Regexp::Token.new(:meta, :foo))
20
+ }
21
+ end
22
+
23
+ def test_parser_unknown_character_type_token
24
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
25
+ RP.parse_token(Regexp::Token.new(:type, :foo))
26
+ }
27
+ end
28
+
29
+ def test_parser_unknown_unicode_property_token
30
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
31
+ RP.parse_token(Regexp::Token.new(:property, :foo))
32
+ }
33
+ end
34
+
35
+ def test_parser_unknown_unicode_nonproperty_token
36
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
37
+ RP.parse_token(Regexp::Token.new(:nonproperty, :foo))
38
+ }
39
+ end
40
+
41
+ def test_parser_unknown_anchor_token
42
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
43
+ RP.parse_token(Regexp::Token.new(:anchor, :foo))
44
+ }
45
+ end
46
+
47
+ def test_parser_unknown_quantifier_token
48
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
49
+ RP.parse_token(Regexp::Token.new(:quantifier, :foo))
50
+ }
51
+ end
52
+
53
+ def test_parser_unknown_group_open_token
54
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
55
+ RP.parse_token(Regexp::Token.new(:group, :foo))
56
+ }
57
+ end
58
+
59
+ end
@@ -0,0 +1,48 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class TestParserEscapes < Test::Unit::TestCase
4
+
5
+ def test_parse_control_sequence_short
6
+ #root = RP.parse(/\b\d\\\c2\C-C\M-\C-2/)
7
+ end
8
+
9
+ tests = {
10
+ /a\ac/ => [1, :escape, :bell, EscapeSequence::Bell],
11
+ /a\ec/ => [1, :escape, :escape, EscapeSequence::AsciiEscape],
12
+ /a\fc/ => [1, :escape, :form_feed, EscapeSequence::FormFeed],
13
+ /a\nc/ => [1, :escape, :newline, EscapeSequence::Newline],
14
+ /a\rc/ => [1, :escape, :carriage, EscapeSequence::Return],
15
+ /a\tc/ => [1, :escape, :tab, EscapeSequence::Tab],
16
+ /a\vc/ => [1, :escape, :vertical_tab, EscapeSequence::VerticalTab],
17
+
18
+ # special cases
19
+ /a\bc/ => [1, :anchor, :word_boundary, Anchor::WordBoundary],
20
+ /a\sc/ => [1, :type, :space, CharacterType::Space],
21
+
22
+ # meta character escapes
23
+ /a\.c/ => [1, :escape, :dot, EscapeSequence::Literal],
24
+ /a\?c/ => [1, :escape, :zero_or_one, EscapeSequence::Literal],
25
+ /a\*c/ => [1, :escape, :zero_or_more, EscapeSequence::Literal],
26
+ /a\+c/ => [1, :escape, :one_or_more, EscapeSequence::Literal],
27
+ /a\|c/ => [1, :escape, :alternation, EscapeSequence::Literal],
28
+ /a\(c/ => [1, :escape, :group_open, EscapeSequence::Literal],
29
+ /a\)c/ => [1, :escape, :group_close, EscapeSequence::Literal],
30
+ /a\{c/ => [1, :escape, :interval_open, EscapeSequence::Literal],
31
+ /a\}c/ => [1, :escape, :interval_close, EscapeSequence::Literal],
32
+ }
33
+
34
+ count = 0
35
+ tests.each do |pattern, test|
36
+ define_method "test_parse_anchor_#{test[2]}_#{count+=1}" do
37
+ root = RP.parse(pattern, 'ruby/1.9')
38
+ exp = root.expressions[test[0]]
39
+
40
+ assert( exp.is_a?( test[3] ),
41
+ "Expected #{test[3]}, but got #{exp.class.name}")
42
+
43
+ assert_equal( test[1], exp.type )
44
+ assert_equal( test[2], exp.token )
45
+ end
46
+ end
47
+
48
+ end
@@ -0,0 +1,51 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ParserExpression < Test::Unit::TestCase
4
+
5
+ def test_parse_expression_to_s_literal_alternation
6
+ pattern = 'abcd|ghij|klmn|pqur'
7
+ assert_equal( pattern, RP.parse(pattern).to_s )
8
+ end
9
+
10
+ def test_parse_expression_to_s_quantified_alternations
11
+ pattern = '(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)'
12
+ assert_equal( pattern, RP.parse(pattern).to_s )
13
+ end
14
+
15
+ def test_parse_expression_to_s_quantified_sets
16
+ pattern = '[abc]+|[^def]{3,6}'
17
+ assert_equal( pattern, RP.parse(pattern).to_s )
18
+ end
19
+
20
+ def test_parse_expression_to_s_property_sets
21
+ pattern = '[\a\b\p{Lu}\P{Z}\c\d]+'
22
+ assert_equal( pattern, RP.parse(pattern).to_s )
23
+ end
24
+
25
+ def test_parse_expression_to_s_groups
26
+ pattern = "(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++"
27
+ assert_equal( pattern, RP.parse(pattern).to_s )
28
+ end
29
+
30
+ def test_parse_expression_to_s_assertions
31
+ pattern = '(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?'
32
+ assert_equal( pattern, RP.parse(pattern).to_s )
33
+ end
34
+
35
+ def test_parse_expression_to_s_comments
36
+ pattern = '(?#start)a(?#middle)b(?#end)'
37
+ assert_equal( pattern, RP.parse(pattern).to_s )
38
+ end
39
+
40
+ def test_parse_expression_to_s_options
41
+ pattern = '(?mix:start)a(?-mix:middle)b(?i-mx:end)'
42
+ assert_equal( pattern, RP.parse(pattern).to_s )
43
+ end
44
+
45
+ def test_parse_expression_to_s_url
46
+ pattern = '(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*'+
47
+ '\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)'
48
+ assert_equal( pattern, RP.parse(pattern).to_s )
49
+ end
50
+
51
+ end
@@ -0,0 +1,69 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class TestParserGroups < Test::Unit::TestCase
4
+
5
+ def test_parse_root_options_mi
6
+ t = RP.parse((/[abc]/mi).to_s)
7
+
8
+ assert_equal( true, t.m? )
9
+ assert_equal( true, t.i? )
10
+ assert_equal( false, t.x? )
11
+ end
12
+
13
+ def test_parse_nested_options_m
14
+ t = RP.parse('(?xi-m:a(?m-ix:b))')
15
+
16
+ assert_equal( true, t.expressions[0].expressions[1].m? )
17
+ assert_equal( false, t.expressions[0].expressions[1].i? )
18
+ assert_equal( false, t.expressions[0].expressions[1].x? )
19
+ end
20
+
21
+ def test_parse_nested_options_xm
22
+ t = RP.parse(/(?i-xm:a(?mx-i:b))/)
23
+
24
+ assert_equal( true, t.expressions[0].expressions[1].m? )
25
+ assert_equal( false, t.expressions[0].expressions[1].i? )
26
+ assert_equal( true, t.expressions[0].expressions[1].x? )
27
+ end
28
+
29
+ def test_parse_nested_options_im
30
+ t = RP.parse(/(?x-mi:a(?mi-x:b))/)
31
+
32
+ assert_equal( true, t.expressions[0].expressions[1].m? )
33
+ assert_equal( true, t.expressions[0].expressions[1].i? )
34
+ assert_equal( false, t.expressions[0].expressions[1].x? )
35
+ end
36
+
37
+ def test_parse_lookahead
38
+ t = RP.parse('(?=abc)(?!def)')
39
+
40
+ assert( t.expressions[0].is_a?(Assertion::Lookahead),
41
+ "Expected lookahead, but got #{t.expressions[0].class.name}")
42
+
43
+ assert( t.expressions[1].is_a?(Assertion::NegativeLookahead),
44
+ "Expected negative lookahead, but got #{t.expressions[0].class.name}")
45
+ end
46
+
47
+ def test_parse_lookbehind
48
+ t = RP.parse('(?<=abc)(?<!def)')
49
+
50
+ assert( t.expressions[0].is_a?(Assertion::Lookbehind),
51
+ "Expected lookbehind, but got #{t.expressions[0].class.name}")
52
+
53
+ assert( t.expressions[1].is_a?(Assertion::NegativeLookbehind),
54
+ "Expected negative lookbehind, but got #{t.expressions[0].class.name}")
55
+ end
56
+
57
+ def test_parse_comment
58
+ t = RP.parse('a(?# is for apple)b(?# for boy)c(?# cat)')
59
+
60
+ [1,3,5].each do |i|
61
+ assert( t.expressions[i].is_a?(Group::Comment),
62
+ "Expected comment, but got #{t.expressions[i].class.name}")
63
+
64
+ assert_equal( :group, t.expressions[i].type )
65
+ assert_equal( :comment, t.expressions[i].token )
66
+ end
67
+ end
68
+
69
+ end
@@ -0,0 +1,346 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ParserProperties < Test::Unit::TestCase
4
+
5
+ modes = ['p', 'P']
6
+ props = [
7
+ 'Alnum',
8
+ 'Alpha',
9
+ 'Any',
10
+ 'Ascii',
11
+ 'Blank',
12
+ 'Cntrl',
13
+ 'Digit',
14
+ 'Graph',
15
+ 'Lower',
16
+ 'Newline',
17
+ 'Print',
18
+ 'Punct',
19
+ 'Space',
20
+ 'Upper',
21
+ 'Word',
22
+ 'Xdigit',
23
+
24
+ 'L',
25
+ 'Letter',
26
+
27
+ 'Lu',
28
+ 'Uppercase_Letter',
29
+
30
+ 'Ll',
31
+ 'Lowercase_Letter',
32
+
33
+ 'Lt',
34
+ 'Titlecase_Letter',
35
+
36
+ 'Lm',
37
+ 'Modifier_Letter',
38
+
39
+ 'Lo',
40
+ 'Other_Letter',
41
+
42
+ 'M',
43
+ 'Mark',
44
+
45
+ 'Mn',
46
+ 'Nonspacing_Mark',
47
+
48
+ 'Mc',
49
+ 'Spacing_Mark',
50
+
51
+ 'Me',
52
+ 'Enclosing_Mark',
53
+
54
+ 'N',
55
+ 'Number',
56
+
57
+ 'Nd',
58
+ 'Decimal_Number',
59
+
60
+ 'Nl',
61
+ 'Letter_Number',
62
+
63
+ 'No',
64
+ 'Other_Number',
65
+
66
+ 'P',
67
+ 'Punctuation',
68
+
69
+ 'Pc',
70
+ 'Connector_Punctuation',
71
+
72
+ 'Pd',
73
+ 'Dash_Punctuation',
74
+
75
+ 'Ps',
76
+ 'Open_Punctuation',
77
+
78
+ 'Pe',
79
+ 'Close_Punctuation',
80
+
81
+ 'Pi',
82
+ 'Initial_Punctuation',
83
+
84
+ 'Pf',
85
+ 'Final_Punctuation',
86
+
87
+ 'Po',
88
+ 'Other_Punctuation',
89
+
90
+ 'S',
91
+ 'Symbol',
92
+
93
+ 'Sm',
94
+ 'Math_Symbol',
95
+
96
+ 'Sc',
97
+ 'Currency_Symbol',
98
+
99
+ 'Sk',
100
+ 'Modifier_Symbol',
101
+
102
+ 'So',
103
+ 'Other_Symbol',
104
+
105
+ 'Z',
106
+ 'Separator',
107
+
108
+ 'Zs',
109
+ 'Space_Separator',
110
+
111
+ 'Zl',
112
+ 'Line_Separator',
113
+
114
+ 'Zp',
115
+ 'Paragraph_Separator',
116
+
117
+ 'C',
118
+ 'Other',
119
+
120
+ 'Cc',
121
+ 'Control',
122
+
123
+ 'Cf',
124
+ 'Format',
125
+
126
+ 'Cs',
127
+ 'Surrogate',
128
+
129
+ 'Co',
130
+ 'Private_Use',
131
+
132
+ 'Cn',
133
+ 'Unassigned',
134
+
135
+ 'Age=1.1',
136
+ 'Age=2.0',
137
+ 'Age=2.1',
138
+ 'Age=3.0',
139
+ 'Age=3.1',
140
+ 'Age=3.2',
141
+ 'Age=4.0',
142
+ 'Age=4.1',
143
+ 'Age=5.0',
144
+ 'Age=5.1',
145
+ 'Age=5.2',
146
+ 'Age=6.0',
147
+
148
+ 'ahex',
149
+ 'ASCII_Hex_Digit',
150
+
151
+ 'Alphabetic',
152
+
153
+ 'Cased',
154
+
155
+ 'cwcf',
156
+ 'Changes_When_Casefolded',
157
+
158
+ 'cwcm',
159
+ 'Changes_When_Casemapped',
160
+
161
+ 'cwl',
162
+ 'Changes_When_Lowercased',
163
+
164
+ 'cwt',
165
+ 'Changes_When_Titlecased',
166
+
167
+ 'cwu',
168
+ 'Changes_When_Uppercased',
169
+
170
+ 'ci',
171
+ 'Case_Ignorable',
172
+
173
+ 'bidic',
174
+ 'Bidi_Control',
175
+
176
+ 'Dash',
177
+
178
+ 'dep',
179
+ 'Deprecated',
180
+
181
+ 'di',
182
+ 'Default_Ignorable_Code_Point',
183
+
184
+ 'dia',
185
+ 'Diacritic',
186
+
187
+ 'ext',
188
+ 'Extender',
189
+
190
+ 'grbase',
191
+ 'Grapheme_Base',
192
+
193
+ 'grext',
194
+ 'Grapheme_Extend',
195
+
196
+ 'grlink',
197
+ 'Grapheme_Link',
198
+
199
+ 'hex',
200
+ 'Hex_Digit',
201
+
202
+ 'Hyphen',
203
+
204
+ 'idc',
205
+ 'ID_Continue',
206
+
207
+ 'ideo',
208
+ 'Ideographic',
209
+
210
+ 'ids',
211
+ 'ID_Start',
212
+
213
+ 'idsb',
214
+ 'IDS_Binary_Operator',
215
+
216
+ 'idst',
217
+ 'IDS_Trinary_Operator',
218
+
219
+ 'joinc',
220
+ 'Join_Control',
221
+
222
+ 'loe',
223
+ 'Logical_Order_Exception',
224
+
225
+ 'Lowercase',
226
+
227
+ 'Math',
228
+
229
+ 'nchar',
230
+ 'Noncharacter_Code_Point',
231
+
232
+ 'oalpha',
233
+ 'Other_Alphabetic',
234
+
235
+ 'odi',
236
+ 'Other_Default_Ignorable_Code_Point',
237
+
238
+ 'ogrext',
239
+ 'Other_Grapheme_Extend',
240
+
241
+ 'oidc',
242
+ 'Other_ID_Continue',
243
+
244
+ 'oids',
245
+ 'Other_ID_Start',
246
+
247
+ 'olower',
248
+ 'Other_Lowercase',
249
+
250
+ 'omath',
251
+ 'Other_Math',
252
+
253
+ 'oupper',
254
+ 'Other_Uppercase',
255
+
256
+ 'patsyn',
257
+ 'Pattern_Syntax',
258
+
259
+ 'patws',
260
+ 'Pattern_Whitespace',
261
+
262
+ 'qmark',
263
+ 'quotationmark',
264
+
265
+ 'radical',
266
+
267
+ 'sd',
268
+ 'Soft_Dotted',
269
+
270
+ 'sterm',
271
+
272
+ 'term',
273
+ 'Terminal_Punctuation',
274
+
275
+ 'uideo',
276
+ 'Unified_Ideograph',
277
+
278
+ 'Uppercase',
279
+
280
+ 'vs',
281
+ 'Variation_Selector',
282
+
283
+ 'wspace',
284
+ 'whitespace',
285
+
286
+ 'xids',
287
+ 'XID_Start',
288
+
289
+ 'xidc',
290
+ 'XID_Continue',
291
+ ]
292
+
293
+ modes.each do |mode|
294
+ token_type = mode == 'p' ? :property : :nonproperty
295
+
296
+ props.each do |property|
297
+ define_method "test_parse_#{token_type}_#{property}" do
298
+ t = RP.parse "ab\\#{mode}{#{property}}"
299
+
300
+ assert( t.expressions.last.is_a?(UnicodeProperty::Base),
301
+ "Expected property, but got #{t.expressions.last.class.name}")
302
+
303
+ assert_equal( token_type, t.expressions.last.type )
304
+ assert_equal( property, t.expressions.last.name )
305
+ end
306
+ end
307
+ end
308
+
309
+ def test_parse_property_negative
310
+ t = RP.parse 'ab\p{L}cd'
311
+ assert_equal( false, t.expressions[1].negative? )
312
+ end
313
+
314
+ def test_parse_nonproperty_negative
315
+ t = RP.parse 'ab\P{L}cd'
316
+ assert_equal( true, t.expressions[1].negative? )
317
+ end
318
+
319
+ def test_parse_property_age
320
+ t = RP.parse 'ab\p{age=5.2}cd'
321
+
322
+ assert( t.expressions[1].is_a?(UnicodeProperty::Age),
323
+ "Expected Age property, but got #{t.expressions[1].class.name}")
324
+ end
325
+
326
+ def test_parse_property_derived
327
+ t = RP.parse 'ab\p{Math}cd'
328
+
329
+ assert( t.expressions[1].is_a?(UnicodeProperty::Derived),
330
+ "Expected Derived property, but got #{t.expressions[1].class.name}")
331
+ end
332
+
333
+ def test_parse_property_script
334
+ t = RP.parse 'ab\p{Hiragana}cd'
335
+
336
+ assert( t.expressions[1].is_a?(UnicodeProperty::Script),
337
+ "Expected Script property, but got #{t.expressions[1].class.name}")
338
+ end
339
+
340
+ def test_parse_property_following_literal
341
+ t = RP.parse 'ab\p{Lu}cd'
342
+
343
+ assert( t.expressions[2].is_a?(Literal),
344
+ "Expected Literal, but got #{t.expressions[2].class.name}")
345
+ end
346
+ end