regexp_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/ChangeLog +4 -0
  2. data/LICENSE +22 -0
  3. data/README.rdoc +307 -0
  4. data/Rakefile +91 -0
  5. data/lib/regexp_parser/ctype.rb +48 -0
  6. data/lib/regexp_parser/expression/property.rb +108 -0
  7. data/lib/regexp_parser/expression/set.rb +59 -0
  8. data/lib/regexp_parser/expression.rb +287 -0
  9. data/lib/regexp_parser/lexer.rb +105 -0
  10. data/lib/regexp_parser/parser.rb +417 -0
  11. data/lib/regexp_parser/scanner/property.rl +534 -0
  12. data/lib/regexp_parser/scanner/scanner.rl +712 -0
  13. data/lib/regexp_parser/scanner.rb +3325 -0
  14. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
  15. data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
  16. data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
  17. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
  18. data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
  19. data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
  20. data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
  21. data/lib/regexp_parser/syntax/tokens.rb +332 -0
  22. data/lib/regexp_parser/syntax.rb +172 -0
  23. data/lib/regexp_parser.rb +45 -0
  24. data/test/helpers.rb +8 -0
  25. data/test/lexer/test_all.rb +26 -0
  26. data/test/lexer/test_literals.rb +120 -0
  27. data/test/lexer/test_nesting.rb +107 -0
  28. data/test/lexer/test_refcalls.rb +45 -0
  29. data/test/parser/test_all.rb +44 -0
  30. data/test/parser/test_alternation.rb +46 -0
  31. data/test/parser/test_anchors.rb +35 -0
  32. data/test/parser/test_errors.rb +59 -0
  33. data/test/parser/test_escapes.rb +48 -0
  34. data/test/parser/test_expression.rb +51 -0
  35. data/test/parser/test_groups.rb +69 -0
  36. data/test/parser/test_properties.rb +346 -0
  37. data/test/parser/test_quantifiers.rb +236 -0
  38. data/test/parser/test_refcalls.rb +101 -0
  39. data/test/parser/test_sets.rb +99 -0
  40. data/test/scanner/test_all.rb +30 -0
  41. data/test/scanner/test_anchors.rb +35 -0
  42. data/test/scanner/test_errors.rb +36 -0
  43. data/test/scanner/test_escapes.rb +49 -0
  44. data/test/scanner/test_groups.rb +41 -0
  45. data/test/scanner/test_literals.rb +85 -0
  46. data/test/scanner/test_meta.rb +36 -0
  47. data/test/scanner/test_properties.rb +315 -0
  48. data/test/scanner/test_quantifiers.rb +38 -0
  49. data/test/scanner/test_refcalls.rb +45 -0
  50. data/test/scanner/test_scripts.rb +314 -0
  51. data/test/scanner/test_sets.rb +80 -0
  52. data/test/scanner/test_types.rb +30 -0
  53. data/test/syntax/ruby/test_1.8.rb +57 -0
  54. data/test/syntax/ruby/test_1.9.1.rb +39 -0
  55. data/test/syntax/ruby/test_1.9.3.rb +38 -0
  56. data/test/syntax/ruby/test_all.rb +12 -0
  57. data/test/syntax/test_all.rb +19 -0
  58. data/test/test_all.rb +4 -0
  59. metadata +160 -0
@@ -0,0 +1,45 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class LexerRefCalls < Test::Unit::TestCase
4
+
5
+ tests = {
6
+ # Group back-references, named, numbered, and relative
7
+ '(?<X>abc)\k<X>' => [3, :backref, :name_ref, '\k<X>', 9, 14, 0, 0],
8
+ "(?<X>abc)\\k'X'" => [3, :backref, :name_ref, "\\k'X'", 9, 14, 0, 0],
9
+
10
+ '(abc)\k<1>' => [3, :backref, :number_ref, '\k<1>', 5, 10, 0, 0],
11
+ "(abc)\\k'1'" => [3, :backref, :number_ref, "\\k'1'", 5, 10, 0, 0],
12
+
13
+ '(abc)\k<-1>' => [3, :backref, :number_rel_ref, '\k<-1>', 5, 11, 0, 0],
14
+ "(abc)\\k'-1'" => [3, :backref, :number_rel_ref, "\\k'-1'", 5, 11, 0, 0],
15
+
16
+ # Sub-expression invocation, named, numbered, and relative
17
+ '(?<X>abc)\g<X>' => [3, :backref, :name_call, '\g<X>', 9, 14, 0, 0],
18
+ "(?<X>abc)\\g'X'" => [3, :backref, :name_call, "\\g'X'", 9, 14, 0, 0],
19
+
20
+ '(abc)\g<1>' => [3, :backref, :number_call, '\g<1>', 5, 10, 0, 0],
21
+ "(abc)\\g'1'" => [3, :backref, :number_call, "\\g'1'", 5, 10, 0, 0],
22
+
23
+ '(abc)\g<-1>' => [3, :backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0],
24
+ "(abc)\\g'-1'" => [3, :backref, :number_rel_call, "\\g'-1'", 5, 11, 0, 0],
25
+
26
+ # Group back-references, with nesting level
27
+ '(?<X>abc)\k<X-0>' => [3, :backref, :name_nest_ref, '\k<X-0>', 9, 16, 0, 0],
28
+ "(?<X>abc)\\k'X-0'" => [3, :backref, :name_nest_ref, "\\k'X-0'", 9, 16, 0, 0],
29
+
30
+ '(abc)\k<1-0>' => [3, :backref, :number_nest_ref, '\k<1-0>', 5, 12, 0, 0],
31
+ "(abc)\\k'1-0'" => [3, :backref, :number_nest_ref, "\\k'1-0'", 5, 12, 0, 0],
32
+ }
33
+
34
+ count = 0
35
+ tests.each do |pattern, test|
36
+ define_method "test_lexer_#{test[1]}_#{test[2]}_#{count+=1}" do
37
+
38
+ tokens = RL.scan(pattern)
39
+ assert_equal( test[1,7], tokens[test[0]].to_a)
40
+ assert_equal( test[3], pattern[tokens[test[0]][3], tokens[test[0]][4]])
41
+
42
+ end
43
+ end
44
+
45
+ end
@@ -0,0 +1,44 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ %w{
4
+ alternation anchors errors escapes expression groups properties
5
+ quantifiers refcalls sets
6
+ }.each do|tc|
7
+ require File.expand_path("../test_#{tc}", __FILE__)
8
+ end
9
+
10
+ class TestParser < Test::Unit::TestCase
11
+
12
+ def test_parse_returns_a_root_expression
13
+ assert_instance_of( Regexp::Expression::Root, RP.parse('abc'))
14
+ end
15
+
16
+ def test_parse_root_contains_expressions
17
+ root = RP.parse(/^a.c+[^one]{2,3}\b\d\\\C-C$/)
18
+
19
+ assert( root.expressions.all?{|exp|
20
+ exp.kind_of?(Regexp::Expression::Base)},
21
+ "Not all nodes are instances of Regexp::Expression")
22
+ end
23
+
24
+ # too much going on here, it's just for development
25
+ def test_parse_node_types
26
+ root = RP.parse('^(one){2,3}([^d\]efm-qz\,\-]*)(ghi)+$')
27
+
28
+ assert( root.expressions[1].expressions[0].is_a?(Literal),
29
+ "Not a literal node, but should be")
30
+
31
+ assert( root.expressions[1].quantified?, "Not quanfified, but should be")
32
+
33
+ assert( root.expressions[2].expressions[0].is_a?(CharacterSet),
34
+ "Not a caracter set, but it should be")
35
+
36
+ assert_equal( false, root.expressions[2].quantified? )
37
+
38
+ assert( root.expressions[3].is_a?(Group::Capture),
39
+ "Not a group, but should be")
40
+
41
+ assert_equal( true, root.expressions[3].quantified? )
42
+ end
43
+
44
+ end
@@ -0,0 +1,46 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ParserAlternation < Test::Unit::TestCase
4
+
5
+ # TODO: these tests pass, but they show how hard and messy the tree is
6
+ # to navigate
7
+
8
+ def setup
9
+ @root = RP.parse('(ab??|cd*+|ef+)*|(gh|ij|kl)?')
10
+ end
11
+
12
+ def test_parse_alternation_root
13
+ e = @root.expressions[0]
14
+ assert_equal( true, e.is_a?(Alternation) )
15
+ end
16
+
17
+ def test_parse_alternation_alts
18
+ alts = @root.expressions[0].alternatives
19
+
20
+ assert_equal( true, alts[0].is_a?(Sequence) )
21
+ assert_equal( true, alts[1].is_a?(Sequence) )
22
+
23
+ assert_equal( true, alts[0][0].is_a?(Group::Capture) )
24
+ assert_equal( true, alts[1][0].is_a?(Group::Capture) )
25
+
26
+ assert_equal( 2, alts.length )
27
+ end
28
+
29
+ def test_parse_alternation_nested
30
+ e = @root[0].alternatives[0][0][0]
31
+
32
+ assert_equal( true, e.is_a?(Alternation) )
33
+ end
34
+
35
+ def test_parse_alternation_nested_sequence
36
+ alts = @root.expressions[0][0]
37
+ nested = alts.expressions[0][0][0]
38
+
39
+ assert_equal( true, nested.is_a?(Sequence) )
40
+
41
+ assert_equal( true, nested.expressions[0].is_a?(Literal) )
42
+ assert_equal( true, nested.expressions[1].is_a?(Literal) )
43
+ assert_equal( 2, nested.expressions.length )
44
+ end
45
+
46
+ end
@@ -0,0 +1,35 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class TestParserAnchors < Test::Unit::TestCase
4
+
5
+ tests = {
6
+ '^a' => [0, :anchor, :beginning_of_line, Anchor::BOL],
7
+ 'a$' => [1, :anchor, :end_of_line, Anchor::EOL],
8
+
9
+ '\Aa' => [0, :anchor, :bos, Anchor::BOS],
10
+ 'a\z' => [1, :anchor, :eos, Anchor::EOS],
11
+ 'a\Z' => [1, :anchor, :eos_ob_eol, Anchor::EOSobEOL],
12
+
13
+ 'a\b' => [1, :anchor, :word_boundary, Anchor::WordBoundary],
14
+ 'a\B' => [1, :anchor, :nonword_boundary, Anchor::NonWordBoundary],
15
+
16
+ 'a\G' => [1, :anchor, :match_start, Anchor::MatchStart],
17
+
18
+ "\\\\Aa" => [0, :escape, :backslash, EscapeSequence::Literal],
19
+ }
20
+
21
+ count = 0
22
+ tests.each do |pattern, test|
23
+ define_method "test_parse_anchor_#{test[2]}_#{count+=1}" do
24
+ root = RP.parse(pattern, 'ruby/1.9')
25
+ exp = root.expressions[test[0]]
26
+
27
+ assert( exp.is_a?( test[3] ),
28
+ "Expected #{test[3]}, but got #{exp.class.name}")
29
+
30
+ assert_equal( test[1], exp.type )
31
+ assert_equal( test[2], exp.token )
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,59 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ParserErrors < Test::Unit::TestCase
4
+
5
+ def test_parser_unknown_token_type
6
+ assert_raise( Regexp::Parser::UnknownTokenTypeError ) {
7
+ RP.parse_token(Regexp::Token.new(:foo, :bar))
8
+ }
9
+ end
10
+
11
+ def test_parser_unknown_set_token
12
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
13
+ RP.parse_token(Regexp::Token.new(:set, :foo))
14
+ }
15
+ end
16
+
17
+ def test_parser_unknown_meta_token
18
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
19
+ RP.parse_token(Regexp::Token.new(:meta, :foo))
20
+ }
21
+ end
22
+
23
+ def test_parser_unknown_character_type_token
24
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
25
+ RP.parse_token(Regexp::Token.new(:type, :foo))
26
+ }
27
+ end
28
+
29
+ def test_parser_unknown_unicode_property_token
30
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
31
+ RP.parse_token(Regexp::Token.new(:property, :foo))
32
+ }
33
+ end
34
+
35
+ def test_parser_unknown_unicode_nonproperty_token
36
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
37
+ RP.parse_token(Regexp::Token.new(:nonproperty, :foo))
38
+ }
39
+ end
40
+
41
+ def test_parser_unknown_anchor_token
42
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
43
+ RP.parse_token(Regexp::Token.new(:anchor, :foo))
44
+ }
45
+ end
46
+
47
+ def test_parser_unknown_quantifier_token
48
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
49
+ RP.parse_token(Regexp::Token.new(:quantifier, :foo))
50
+ }
51
+ end
52
+
53
+ def test_parser_unknown_group_open_token
54
+ assert_raise( Regexp::Parser::UnknownTokenError ) {
55
+ RP.parse_token(Regexp::Token.new(:group, :foo))
56
+ }
57
+ end
58
+
59
+ end
@@ -0,0 +1,48 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class TestParserEscapes < Test::Unit::TestCase
4
+
5
+ def test_parse_control_sequence_short
6
+ #root = RP.parse(/\b\d\\\c2\C-C\M-\C-2/)
7
+ end
8
+
9
+ tests = {
10
+ /a\ac/ => [1, :escape, :bell, EscapeSequence::Bell],
11
+ /a\ec/ => [1, :escape, :escape, EscapeSequence::AsciiEscape],
12
+ /a\fc/ => [1, :escape, :form_feed, EscapeSequence::FormFeed],
13
+ /a\nc/ => [1, :escape, :newline, EscapeSequence::Newline],
14
+ /a\rc/ => [1, :escape, :carriage, EscapeSequence::Return],
15
+ /a\tc/ => [1, :escape, :tab, EscapeSequence::Tab],
16
+ /a\vc/ => [1, :escape, :vertical_tab, EscapeSequence::VerticalTab],
17
+
18
+ # special cases
19
+ /a\bc/ => [1, :anchor, :word_boundary, Anchor::WordBoundary],
20
+ /a\sc/ => [1, :type, :space, CharacterType::Space],
21
+
22
+ # meta character escapes
23
+ /a\.c/ => [1, :escape, :dot, EscapeSequence::Literal],
24
+ /a\?c/ => [1, :escape, :zero_or_one, EscapeSequence::Literal],
25
+ /a\*c/ => [1, :escape, :zero_or_more, EscapeSequence::Literal],
26
+ /a\+c/ => [1, :escape, :one_or_more, EscapeSequence::Literal],
27
+ /a\|c/ => [1, :escape, :alternation, EscapeSequence::Literal],
28
+ /a\(c/ => [1, :escape, :group_open, EscapeSequence::Literal],
29
+ /a\)c/ => [1, :escape, :group_close, EscapeSequence::Literal],
30
+ /a\{c/ => [1, :escape, :interval_open, EscapeSequence::Literal],
31
+ /a\}c/ => [1, :escape, :interval_close, EscapeSequence::Literal],
32
+ }
33
+
34
+ count = 0
35
+ tests.each do |pattern, test|
36
+ define_method "test_parse_anchor_#{test[2]}_#{count+=1}" do
37
+ root = RP.parse(pattern, 'ruby/1.9')
38
+ exp = root.expressions[test[0]]
39
+
40
+ assert( exp.is_a?( test[3] ),
41
+ "Expected #{test[3]}, but got #{exp.class.name}")
42
+
43
+ assert_equal( test[1], exp.type )
44
+ assert_equal( test[2], exp.token )
45
+ end
46
+ end
47
+
48
+ end
@@ -0,0 +1,51 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ParserExpression < Test::Unit::TestCase
4
+
5
+ def test_parse_expression_to_s_literal_alternation
6
+ pattern = 'abcd|ghij|klmn|pqur'
7
+ assert_equal( pattern, RP.parse(pattern).to_s )
8
+ end
9
+
10
+ def test_parse_expression_to_s_quantified_alternations
11
+ pattern = '(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)'
12
+ assert_equal( pattern, RP.parse(pattern).to_s )
13
+ end
14
+
15
+ def test_parse_expression_to_s_quantified_sets
16
+ pattern = '[abc]+|[^def]{3,6}'
17
+ assert_equal( pattern, RP.parse(pattern).to_s )
18
+ end
19
+
20
+ def test_parse_expression_to_s_property_sets
21
+ pattern = '[\a\b\p{Lu}\P{Z}\c\d]+'
22
+ assert_equal( pattern, RP.parse(pattern).to_s )
23
+ end
24
+
25
+ def test_parse_expression_to_s_groups
26
+ pattern = "(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++"
27
+ assert_equal( pattern, RP.parse(pattern).to_s )
28
+ end
29
+
30
+ def test_parse_expression_to_s_assertions
31
+ pattern = '(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?'
32
+ assert_equal( pattern, RP.parse(pattern).to_s )
33
+ end
34
+
35
+ def test_parse_expression_to_s_comments
36
+ pattern = '(?#start)a(?#middle)b(?#end)'
37
+ assert_equal( pattern, RP.parse(pattern).to_s )
38
+ end
39
+
40
+ def test_parse_expression_to_s_options
41
+ pattern = '(?mix:start)a(?-mix:middle)b(?i-mx:end)'
42
+ assert_equal( pattern, RP.parse(pattern).to_s )
43
+ end
44
+
45
+ def test_parse_expression_to_s_url
46
+ pattern = '(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*'+
47
+ '\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)'
48
+ assert_equal( pattern, RP.parse(pattern).to_s )
49
+ end
50
+
51
+ end
@@ -0,0 +1,69 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class TestParserGroups < Test::Unit::TestCase
4
+
5
+ def test_parse_root_options_mi
6
+ t = RP.parse((/[abc]/mi).to_s)
7
+
8
+ assert_equal( true, t.m? )
9
+ assert_equal( true, t.i? )
10
+ assert_equal( false, t.x? )
11
+ end
12
+
13
+ def test_parse_nested_options_m
14
+ t = RP.parse('(?xi-m:a(?m-ix:b))')
15
+
16
+ assert_equal( true, t.expressions[0].expressions[1].m? )
17
+ assert_equal( false, t.expressions[0].expressions[1].i? )
18
+ assert_equal( false, t.expressions[0].expressions[1].x? )
19
+ end
20
+
21
+ def test_parse_nested_options_xm
22
+ t = RP.parse(/(?i-xm:a(?mx-i:b))/)
23
+
24
+ assert_equal( true, t.expressions[0].expressions[1].m? )
25
+ assert_equal( false, t.expressions[0].expressions[1].i? )
26
+ assert_equal( true, t.expressions[0].expressions[1].x? )
27
+ end
28
+
29
+ def test_parse_nested_options_im
30
+ t = RP.parse(/(?x-mi:a(?mi-x:b))/)
31
+
32
+ assert_equal( true, t.expressions[0].expressions[1].m? )
33
+ assert_equal( true, t.expressions[0].expressions[1].i? )
34
+ assert_equal( false, t.expressions[0].expressions[1].x? )
35
+ end
36
+
37
+ def test_parse_lookahead
38
+ t = RP.parse('(?=abc)(?!def)')
39
+
40
+ assert( t.expressions[0].is_a?(Assertion::Lookahead),
41
+ "Expected lookahead, but got #{t.expressions[0].class.name}")
42
+
43
+ assert( t.expressions[1].is_a?(Assertion::NegativeLookahead),
44
+ "Expected negative lookahead, but got #{t.expressions[0].class.name}")
45
+ end
46
+
47
+ def test_parse_lookbehind
48
+ t = RP.parse('(?<=abc)(?<!def)')
49
+
50
+ assert( t.expressions[0].is_a?(Assertion::Lookbehind),
51
+ "Expected lookbehind, but got #{t.expressions[0].class.name}")
52
+
53
+ assert( t.expressions[1].is_a?(Assertion::NegativeLookbehind),
54
+ "Expected negative lookbehind, but got #{t.expressions[0].class.name}")
55
+ end
56
+
57
+ def test_parse_comment
58
+ t = RP.parse('a(?# is for apple)b(?# for boy)c(?# cat)')
59
+
60
+ [1,3,5].each do |i|
61
+ assert( t.expressions[i].is_a?(Group::Comment),
62
+ "Expected comment, but got #{t.expressions[i].class.name}")
63
+
64
+ assert_equal( :group, t.expressions[i].type )
65
+ assert_equal( :comment, t.expressions[i].token )
66
+ end
67
+ end
68
+
69
+ end
@@ -0,0 +1,346 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ParserProperties < Test::Unit::TestCase
4
+
5
+ modes = ['p', 'P']
6
+ props = [
7
+ 'Alnum',
8
+ 'Alpha',
9
+ 'Any',
10
+ 'Ascii',
11
+ 'Blank',
12
+ 'Cntrl',
13
+ 'Digit',
14
+ 'Graph',
15
+ 'Lower',
16
+ 'Newline',
17
+ 'Print',
18
+ 'Punct',
19
+ 'Space',
20
+ 'Upper',
21
+ 'Word',
22
+ 'Xdigit',
23
+
24
+ 'L',
25
+ 'Letter',
26
+
27
+ 'Lu',
28
+ 'Uppercase_Letter',
29
+
30
+ 'Ll',
31
+ 'Lowercase_Letter',
32
+
33
+ 'Lt',
34
+ 'Titlecase_Letter',
35
+
36
+ 'Lm',
37
+ 'Modifier_Letter',
38
+
39
+ 'Lo',
40
+ 'Other_Letter',
41
+
42
+ 'M',
43
+ 'Mark',
44
+
45
+ 'Mn',
46
+ 'Nonspacing_Mark',
47
+
48
+ 'Mc',
49
+ 'Spacing_Mark',
50
+
51
+ 'Me',
52
+ 'Enclosing_Mark',
53
+
54
+ 'N',
55
+ 'Number',
56
+
57
+ 'Nd',
58
+ 'Decimal_Number',
59
+
60
+ 'Nl',
61
+ 'Letter_Number',
62
+
63
+ 'No',
64
+ 'Other_Number',
65
+
66
+ 'P',
67
+ 'Punctuation',
68
+
69
+ 'Pc',
70
+ 'Connector_Punctuation',
71
+
72
+ 'Pd',
73
+ 'Dash_Punctuation',
74
+
75
+ 'Ps',
76
+ 'Open_Punctuation',
77
+
78
+ 'Pe',
79
+ 'Close_Punctuation',
80
+
81
+ 'Pi',
82
+ 'Initial_Punctuation',
83
+
84
+ 'Pf',
85
+ 'Final_Punctuation',
86
+
87
+ 'Po',
88
+ 'Other_Punctuation',
89
+
90
+ 'S',
91
+ 'Symbol',
92
+
93
+ 'Sm',
94
+ 'Math_Symbol',
95
+
96
+ 'Sc',
97
+ 'Currency_Symbol',
98
+
99
+ 'Sk',
100
+ 'Modifier_Symbol',
101
+
102
+ 'So',
103
+ 'Other_Symbol',
104
+
105
+ 'Z',
106
+ 'Separator',
107
+
108
+ 'Zs',
109
+ 'Space_Separator',
110
+
111
+ 'Zl',
112
+ 'Line_Separator',
113
+
114
+ 'Zp',
115
+ 'Paragraph_Separator',
116
+
117
+ 'C',
118
+ 'Other',
119
+
120
+ 'Cc',
121
+ 'Control',
122
+
123
+ 'Cf',
124
+ 'Format',
125
+
126
+ 'Cs',
127
+ 'Surrogate',
128
+
129
+ 'Co',
130
+ 'Private_Use',
131
+
132
+ 'Cn',
133
+ 'Unassigned',
134
+
135
+ 'Age=1.1',
136
+ 'Age=2.0',
137
+ 'Age=2.1',
138
+ 'Age=3.0',
139
+ 'Age=3.1',
140
+ 'Age=3.2',
141
+ 'Age=4.0',
142
+ 'Age=4.1',
143
+ 'Age=5.0',
144
+ 'Age=5.1',
145
+ 'Age=5.2',
146
+ 'Age=6.0',
147
+
148
+ 'ahex',
149
+ 'ASCII_Hex_Digit',
150
+
151
+ 'Alphabetic',
152
+
153
+ 'Cased',
154
+
155
+ 'cwcf',
156
+ 'Changes_When_Casefolded',
157
+
158
+ 'cwcm',
159
+ 'Changes_When_Casemapped',
160
+
161
+ 'cwl',
162
+ 'Changes_When_Lowercased',
163
+
164
+ 'cwt',
165
+ 'Changes_When_Titlecased',
166
+
167
+ 'cwu',
168
+ 'Changes_When_Uppercased',
169
+
170
+ 'ci',
171
+ 'Case_Ignorable',
172
+
173
+ 'bidic',
174
+ 'Bidi_Control',
175
+
176
+ 'Dash',
177
+
178
+ 'dep',
179
+ 'Deprecated',
180
+
181
+ 'di',
182
+ 'Default_Ignorable_Code_Point',
183
+
184
+ 'dia',
185
+ 'Diacritic',
186
+
187
+ 'ext',
188
+ 'Extender',
189
+
190
+ 'grbase',
191
+ 'Grapheme_Base',
192
+
193
+ 'grext',
194
+ 'Grapheme_Extend',
195
+
196
+ 'grlink',
197
+ 'Grapheme_Link',
198
+
199
+ 'hex',
200
+ 'Hex_Digit',
201
+
202
+ 'Hyphen',
203
+
204
+ 'idc',
205
+ 'ID_Continue',
206
+
207
+ 'ideo',
208
+ 'Ideographic',
209
+
210
+ 'ids',
211
+ 'ID_Start',
212
+
213
+ 'idsb',
214
+ 'IDS_Binary_Operator',
215
+
216
+ 'idst',
217
+ 'IDS_Trinary_Operator',
218
+
219
+ 'joinc',
220
+ 'Join_Control',
221
+
222
+ 'loe',
223
+ 'Logical_Order_Exception',
224
+
225
+ 'Lowercase',
226
+
227
+ 'Math',
228
+
229
+ 'nchar',
230
+ 'Noncharacter_Code_Point',
231
+
232
+ 'oalpha',
233
+ 'Other_Alphabetic',
234
+
235
+ 'odi',
236
+ 'Other_Default_Ignorable_Code_Point',
237
+
238
+ 'ogrext',
239
+ 'Other_Grapheme_Extend',
240
+
241
+ 'oidc',
242
+ 'Other_ID_Continue',
243
+
244
+ 'oids',
245
+ 'Other_ID_Start',
246
+
247
+ 'olower',
248
+ 'Other_Lowercase',
249
+
250
+ 'omath',
251
+ 'Other_Math',
252
+
253
+ 'oupper',
254
+ 'Other_Uppercase',
255
+
256
+ 'patsyn',
257
+ 'Pattern_Syntax',
258
+
259
+ 'patws',
260
+ 'Pattern_Whitespace',
261
+
262
+ 'qmark',
263
+ 'quotationmark',
264
+
265
+ 'radical',
266
+
267
+ 'sd',
268
+ 'Soft_Dotted',
269
+
270
+ 'sterm',
271
+
272
+ 'term',
273
+ 'Terminal_Punctuation',
274
+
275
+ 'uideo',
276
+ 'Unified_Ideograph',
277
+
278
+ 'Uppercase',
279
+
280
+ 'vs',
281
+ 'Variation_Selector',
282
+
283
+ 'wspace',
284
+ 'whitespace',
285
+
286
+ 'xids',
287
+ 'XID_Start',
288
+
289
+ 'xidc',
290
+ 'XID_Continue',
291
+ ]
292
+
293
+ modes.each do |mode|
294
+ token_type = mode == 'p' ? :property : :nonproperty
295
+
296
+ props.each do |property|
297
+ define_method "test_parse_#{token_type}_#{property}" do
298
+ t = RP.parse "ab\\#{mode}{#{property}}"
299
+
300
+ assert( t.expressions.last.is_a?(UnicodeProperty::Base),
301
+ "Expected property, but got #{t.expressions.last.class.name}")
302
+
303
+ assert_equal( token_type, t.expressions.last.type )
304
+ assert_equal( property, t.expressions.last.name )
305
+ end
306
+ end
307
+ end
308
+
309
+ def test_parse_property_negative
310
+ t = RP.parse 'ab\p{L}cd'
311
+ assert_equal( false, t.expressions[1].negative? )
312
+ end
313
+
314
+ def test_parse_nonproperty_negative
315
+ t = RP.parse 'ab\P{L}cd'
316
+ assert_equal( true, t.expressions[1].negative? )
317
+ end
318
+
319
+ def test_parse_property_age
320
+ t = RP.parse 'ab\p{age=5.2}cd'
321
+
322
+ assert( t.expressions[1].is_a?(UnicodeProperty::Age),
323
+ "Expected Age property, but got #{t.expressions[1].class.name}")
324
+ end
325
+
326
+ def test_parse_property_derived
327
+ t = RP.parse 'ab\p{Math}cd'
328
+
329
+ assert( t.expressions[1].is_a?(UnicodeProperty::Derived),
330
+ "Expected Derived property, but got #{t.expressions[1].class.name}")
331
+ end
332
+
333
+ def test_parse_property_script
334
+ t = RP.parse 'ab\p{Hiragana}cd'
335
+
336
+ assert( t.expressions[1].is_a?(UnicodeProperty::Script),
337
+ "Expected Script property, but got #{t.expressions[1].class.name}")
338
+ end
339
+
340
+ def test_parse_property_following_literal
341
+ t = RP.parse 'ab\p{Lu}cd'
342
+
343
+ assert( t.expressions[2].is_a?(Literal),
344
+ "Expected Literal, but got #{t.expressions[2].class.name}")
345
+ end
346
+ end