regexp_parser 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +4 -0
- data/LICENSE +22 -0
- data/README.rdoc +307 -0
- data/Rakefile +91 -0
- data/lib/regexp_parser/ctype.rb +48 -0
- data/lib/regexp_parser/expression/property.rb +108 -0
- data/lib/regexp_parser/expression/set.rb +59 -0
- data/lib/regexp_parser/expression.rb +287 -0
- data/lib/regexp_parser/lexer.rb +105 -0
- data/lib/regexp_parser/parser.rb +417 -0
- data/lib/regexp_parser/scanner/property.rl +534 -0
- data/lib/regexp_parser/scanner/scanner.rl +712 -0
- data/lib/regexp_parser/scanner.rb +3325 -0
- data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
- data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
- data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
- data/lib/regexp_parser/syntax/tokens.rb +332 -0
- data/lib/regexp_parser/syntax.rb +172 -0
- data/lib/regexp_parser.rb +45 -0
- data/test/helpers.rb +8 -0
- data/test/lexer/test_all.rb +26 -0
- data/test/lexer/test_literals.rb +120 -0
- data/test/lexer/test_nesting.rb +107 -0
- data/test/lexer/test_refcalls.rb +45 -0
- data/test/parser/test_all.rb +44 -0
- data/test/parser/test_alternation.rb +46 -0
- data/test/parser/test_anchors.rb +35 -0
- data/test/parser/test_errors.rb +59 -0
- data/test/parser/test_escapes.rb +48 -0
- data/test/parser/test_expression.rb +51 -0
- data/test/parser/test_groups.rb +69 -0
- data/test/parser/test_properties.rb +346 -0
- data/test/parser/test_quantifiers.rb +236 -0
- data/test/parser/test_refcalls.rb +101 -0
- data/test/parser/test_sets.rb +99 -0
- data/test/scanner/test_all.rb +30 -0
- data/test/scanner/test_anchors.rb +35 -0
- data/test/scanner/test_errors.rb +36 -0
- data/test/scanner/test_escapes.rb +49 -0
- data/test/scanner/test_groups.rb +41 -0
- data/test/scanner/test_literals.rb +85 -0
- data/test/scanner/test_meta.rb +36 -0
- data/test/scanner/test_properties.rb +315 -0
- data/test/scanner/test_quantifiers.rb +38 -0
- data/test/scanner/test_refcalls.rb +45 -0
- data/test/scanner/test_scripts.rb +314 -0
- data/test/scanner/test_sets.rb +80 -0
- data/test/scanner/test_types.rb +30 -0
- data/test/syntax/ruby/test_1.8.rb +57 -0
- data/test/syntax/ruby/test_1.9.1.rb +39 -0
- data/test/syntax/ruby/test_1.9.3.rb +38 -0
- data/test/syntax/ruby/test_all.rb +12 -0
- data/test/syntax/test_all.rb +19 -0
- data/test/test_all.rb +4 -0
- metadata +160 -0
@@ -0,0 +1,35 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ScannerAnchors < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
'^abc' => [0, :anchor, :beginning_of_line, '^', 0, 1],
|
7
|
+
'abc$' => [1, :anchor, :end_of_line, '$', 3, 4],
|
8
|
+
|
9
|
+
'\Aabc' => [0, :anchor, :bos, '\A', 0, 2],
|
10
|
+
'abc\z' => [1, :anchor, :eos, '\z', 3, 5],
|
11
|
+
'abc\Z' => [1, :anchor, :eos_ob_eol, '\Z', 3, 5],
|
12
|
+
|
13
|
+
'a\bc' => [1, :anchor, :word_boundary, '\b', 1, 3],
|
14
|
+
'a\Bc' => [1, :anchor, :nonword_boundary, '\B', 1, 3],
|
15
|
+
|
16
|
+
'a\Gc' => [1, :anchor, :match_start, '\G', 1, 3],
|
17
|
+
|
18
|
+
"\\\\Ac" => [0, :escape, :backslash, '\\\\', 0, 2],
|
19
|
+
"a\\\\z" => [1, :escape, :backslash, '\\\\', 1, 3],
|
20
|
+
"a\\\\Z" => [1, :escape, :backslash, '\\\\', 1, 3],
|
21
|
+
"a\\\\bc" => [1, :escape, :backslash, '\\\\', 1, 3],
|
22
|
+
"a\\\\Bc" => [1, :escape, :backslash, '\\\\', 1, 3],
|
23
|
+
}
|
24
|
+
|
25
|
+
count = 0
|
26
|
+
tests.each do |pattern, test|
|
27
|
+
define_method "test_scanner_#{test[1]}_#{test[2]}_#{count+=1}" do
|
28
|
+
|
29
|
+
tokens = RS.scan(pattern)
|
30
|
+
assert_equal( test[1,5], tokens[test[0]] )
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ScannerErrors < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_scanner_unbalanced_set
|
6
|
+
assert_raise( Regexp::Scanner::PrematureEndError ) { RS.scan('[[:alpha:]') }
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_scanner_unbalanced_group
|
10
|
+
assert_raise( Regexp::Scanner::PrematureEndError ) { RS.scan('(abc') }
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_scanner_unbalanced_interval
|
14
|
+
assert_raise( Regexp::Scanner::PrematureEndError ) { RS.scan('a{1,2') }
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_scanner_incomplete_property
|
18
|
+
assert_raise( Regexp::Scanner::PrematureEndError ) { RS.scan('\p{ascii abc') }
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_scanner_unknown_property
|
22
|
+
assert_raise( Regexp::Scanner::UnknownUnicodePropertyError ) { RS.scan('\p{foobar}') }
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_scanner_incomplete_options
|
26
|
+
assert_raise( Regexp::Scanner::ScannerError ) { RS.scan('(?mix abc)') }
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_scanner_eof_options
|
30
|
+
assert_raise( Regexp::Scanner::PrematureEndError ) { RS.scan('(?mix') }
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_scanner_incorrect_options
|
34
|
+
assert_raise( Regexp::Scanner::ScannerError ) { RS.scan('(?mix^bc') }
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ScannerEscapes < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
/c\at/ => [1, :escape, :bell, '\a', 1, 3],
|
7
|
+
|
8
|
+
# not an escape outside a character set
|
9
|
+
/c\bt/ => [1, :anchor, :word_boundary, '\b', 1, 3],
|
10
|
+
|
11
|
+
/c\ft/ => [1, :escape, :form_feed, '\f', 1, 3],
|
12
|
+
/c\nt/ => [1, :escape, :newline, '\n', 1, 3],
|
13
|
+
/c\tt/ => [1, :escape, :tab, '\t', 1, 3],
|
14
|
+
/c\vt/ => [1, :escape, :vertical_tab, '\v', 1, 3],
|
15
|
+
|
16
|
+
/c\qt/ => [1, :escape, :literal, '\q', 1, 3],
|
17
|
+
|
18
|
+
'a\012c' => [1, :escape, :octal, '\012', 1, 5],
|
19
|
+
'a\0124' => [1, :escape, :octal, '\012', 1, 5],
|
20
|
+
'\712+7' => [0, :escape, :octal, '\712', 0, 4],
|
21
|
+
|
22
|
+
'a\x24c' => [1, :escape, :hex, '\x24', 1, 5],
|
23
|
+
'a\x0640c' => [1, :escape, :hex, '\x06', 1, 5],
|
24
|
+
|
25
|
+
'a\x{0640}c' => [1, :escape, :hex_wide, '\x{0640}', 1, 9],
|
26
|
+
|
27
|
+
'a\u0640c' => [1, :escape, :codepoint, '\u0640', 1, 7],
|
28
|
+
'a\u{0640 0641}c' => [1, :escape, :codepoint_list, '\u{0640 0641}', 1, 14],
|
29
|
+
|
30
|
+
'a\cCc' => [1, :escape, :control, '\cC', 1, 4],
|
31
|
+
'a\C-cc' => [1, :escape, :control, '\C-c', 1, 5],
|
32
|
+
|
33
|
+
# TODO: verify these escapes
|
34
|
+
'a\M-Cc' => [1, :escape, :meta_sequence, '\M-C', 1, 5],
|
35
|
+
'a\M-\C-cc' => [1, :escape, :meta_sequence, '\M-\C-c', 1, 8],
|
36
|
+
}
|
37
|
+
|
38
|
+
count = 0
|
39
|
+
tests.each do |pattern, test|
|
40
|
+
define_method "test_scan_#{test[1]}_#{test[2]}_#{count+=1}" do
|
41
|
+
|
42
|
+
tokens = RS.scan(pattern)
|
43
|
+
token = tokens[test[0]]
|
44
|
+
assert_equal( test[1,5], token )
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ScannerGroups < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
## Options
|
7
|
+
'(?-mix:abc)' => [0, :group, :options, '(?-mix:', 0, 7],
|
8
|
+
'(?m-ix:abc)' => [0, :group, :options, '(?m-ix:', 0, 7],
|
9
|
+
'(?mi-x:abc)' => [0, :group, :options, '(?mi-x:', 0, 7],
|
10
|
+
'(?mix:abc)' => [0, :group, :options, '(?mix:', 0, 6],
|
11
|
+
'(?mix)' => [0, :group, :options, '(?mix', 0, 5],
|
12
|
+
|
13
|
+
# Group types
|
14
|
+
'(?>abc)' => [0, :group, :atomic, '(?>', 0, 3],
|
15
|
+
'(abc)' => [0, :group, :capture, '(', 0, 1],
|
16
|
+
'(?<name>abc)' => [0, :group, :named_ab, '(?<name>', 0, 8],
|
17
|
+
"(?'name'abc)" => [0, :group, :named_sq, "(?'name'", 0, 8],
|
18
|
+
'(?:abc)' => [0, :group, :passive, '(?:', 0, 3],
|
19
|
+
|
20
|
+
# Comments
|
21
|
+
'(?#abc)' => [0, :group, :comment, '(?#abc)', 0, 7],
|
22
|
+
|
23
|
+
# Assertions
|
24
|
+
'(?=abc)' => [0, :assertion, :lookahead, '(?=', 0, 3],
|
25
|
+
'(?!abc)' => [0, :assertion, :nlookahead, '(?!', 0, 3],
|
26
|
+
'(?<=abc)' => [0, :assertion, :lookbehind, '(?<=', 0, 4],
|
27
|
+
'(?<!abc)' => [0, :assertion, :nlookbehind, '(?<!', 0, 4],
|
28
|
+
}
|
29
|
+
|
30
|
+
count = 0
|
31
|
+
tests.each do |pattern, test|
|
32
|
+
define_method "test_scan_#{test[1]}_#{test[2]}_#{count+=1}" do
|
33
|
+
|
34
|
+
tokens = RS.scan(pattern)
|
35
|
+
assert_equal( test[1,5], tokens[test[0]])
|
36
|
+
assert_equal( test[3], pattern[tokens[test[0]][3], tokens[test[0]][4]])
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path("../../helpers", __FILE__)
|
4
|
+
|
5
|
+
class ScannerUTF8 < Test::Unit::TestCase
|
6
|
+
|
7
|
+
tests = {
|
8
|
+
# ascii, single byte characters
|
9
|
+
'a' => {
|
10
|
+
0 => [:literal, :literal, 'a', 0, 1],
|
11
|
+
},
|
12
|
+
|
13
|
+
'ab+' => {
|
14
|
+
0 => [:literal, :literal, 'ab', 0, 2],
|
15
|
+
1 => [:quantifier, :one_or_more, '+', 2, 3],
|
16
|
+
},
|
17
|
+
|
18
|
+
# 2 byte wide characters, Arabic
|
19
|
+
'aاbبcت' => {
|
20
|
+
0 => [:literal, :literal, 'aاbبcت', 0, 9],
|
21
|
+
},
|
22
|
+
|
23
|
+
'aاbبت?' => {
|
24
|
+
0 => [:literal, :literal, 'aاbبت', 0, 8],
|
25
|
+
1 => [:quantifier, :zero_or_one, '?', 8, 9],
|
26
|
+
},
|
27
|
+
|
28
|
+
'aا?bبcت+' => {
|
29
|
+
0 => [:literal, :literal, 'aا', 0, 3],
|
30
|
+
1 => [:quantifier, :zero_or_one, '?', 3, 4],
|
31
|
+
2 => [:literal, :literal, 'bبcت', 4, 10],
|
32
|
+
3 => [:quantifier, :one_or_more, '+', 10, 11],
|
33
|
+
},
|
34
|
+
|
35
|
+
'a(اbب+)cت?' => {
|
36
|
+
0 => [:literal, :literal, 'a', 0, 1],
|
37
|
+
1 => [:group, :capture, '(', 1, 2],
|
38
|
+
2 => [:literal, :literal, 'اbب', 2, 7],
|
39
|
+
3 => [:quantifier, :one_or_more, '+', 7, 8],
|
40
|
+
4 => [:group, :close, ')', 8, 9],
|
41
|
+
5 => [:literal, :literal, 'cت', 9, 12],
|
42
|
+
6 => [:quantifier, :zero_or_one, '?', 12, 13],
|
43
|
+
},
|
44
|
+
|
45
|
+
# 3 byte wide characters, Japanese
|
46
|
+
'ab?れます+cd' => {
|
47
|
+
0 => [:literal, :literal, 'ab', 0, 2],
|
48
|
+
1 => [:quantifier, :zero_or_one, '?', 2, 3],
|
49
|
+
2 => [:literal, :literal, 'れます', 3, 12],
|
50
|
+
3 => [:quantifier, :one_or_more, '+', 12, 13],
|
51
|
+
4 => [:literal, :literal, 'cd', 13, 15],
|
52
|
+
},
|
53
|
+
|
54
|
+
# 4 byte wide characters, Osmanya
|
55
|
+
'𐒀𐒁?𐒂ab+𐒃' => {
|
56
|
+
0 => [:literal, :literal, '𐒀𐒁', 0, 8],
|
57
|
+
1 => [:quantifier, :zero_or_one, '?', 8, 9],
|
58
|
+
2 => [:literal, :literal, '𐒂ab', 9, 15],
|
59
|
+
3 => [:quantifier, :one_or_more, '+', 15, 16],
|
60
|
+
4 => [:literal, :literal, '𐒃', 16, 20],
|
61
|
+
},
|
62
|
+
|
63
|
+
'mu𝄞?si*𝄫c+' => {
|
64
|
+
0 => [:literal, :literal, 'mu𝄞', 0, 6],
|
65
|
+
1 => [:quantifier, :zero_or_one, '?', 6, 7],
|
66
|
+
2 => [:literal, :literal, 'si', 7, 9],
|
67
|
+
3 => [:quantifier, :zero_or_more, '*', 9, 10],
|
68
|
+
4 => [:literal, :literal, '𝄫c', 10, 15],
|
69
|
+
5 => [:quantifier, :one_or_more, '+', 15, 16],
|
70
|
+
},
|
71
|
+
}
|
72
|
+
|
73
|
+
count = 0
|
74
|
+
tests.each do |pattern, checks|
|
75
|
+
define_method "test_scan_utf8_runs_#{count+=1}" do
|
76
|
+
|
77
|
+
tokens = RS.scan(pattern)
|
78
|
+
checks.each do |offset, token|
|
79
|
+
assert_equal( token, tokens[offset] )
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ScannerMeta < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
'abc??|def*+|ghi+' => {
|
7
|
+
0 => [:literal, :literal, 'abc', 0, 3],
|
8
|
+
1 => [:quantifier, :zero_or_one_reluctant, '??', 3, 5],
|
9
|
+
2 => [:meta, :alternation, '|', 5, 6],
|
10
|
+
3 => [:literal, :literal, 'def', 6, 9],
|
11
|
+
4 => [:quantifier, :zero_or_more_possessive, '*+', 9, 11],
|
12
|
+
5 => [:meta, :alternation, '|', 11, 12],
|
13
|
+
},
|
14
|
+
|
15
|
+
'(a\|b)|(c|d)\|(e[|]f)' => {
|
16
|
+
2 => [:escape, :alternation, '\|', 2, 4],
|
17
|
+
5 => [:meta, :alternation, '|', 6, 7],
|
18
|
+
8 => [:meta, :alternation, '|', 9, 10],
|
19
|
+
11 => [:escape, :alternation, '\|', 12, 14],
|
20
|
+
15 => [:set, :member, '|', 17, 18],
|
21
|
+
},
|
22
|
+
}
|
23
|
+
|
24
|
+
count = 0
|
25
|
+
tests.each do |pattern, checks|
|
26
|
+
define_method "test_scan_meta_alternation_#{count+=1}" do
|
27
|
+
|
28
|
+
tokens = RS.scan(pattern)
|
29
|
+
checks.each do |offset, token|
|
30
|
+
assert_equal( token, tokens[offset] )
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,315 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ScannerProperties < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
'Alnum' => :alnum,
|
7
|
+
'Alpha' => :alpha,
|
8
|
+
'Any' => :any,
|
9
|
+
'Ascii' => :ascii,
|
10
|
+
'Blank' => :blank,
|
11
|
+
'Cntrl' => :cntrl,
|
12
|
+
'Digit' => :digit,
|
13
|
+
'Graph' => :graph,
|
14
|
+
'Lower' => :lower,
|
15
|
+
'Newline' => :newline,
|
16
|
+
'Print' => :print,
|
17
|
+
'Punct' => :punct,
|
18
|
+
'Space' => :space,
|
19
|
+
'Upper' => :upper,
|
20
|
+
'Word' => :word,
|
21
|
+
'Xdigit' => :xdigit,
|
22
|
+
|
23
|
+
'L' => :letter_any,
|
24
|
+
'Letter' => :letter_any,
|
25
|
+
|
26
|
+
'Lu' => :letter_uppercase,
|
27
|
+
'Uppercase_Letter' => :letter_uppercase,
|
28
|
+
|
29
|
+
'Ll' => :letter_lowercase,
|
30
|
+
'Lowercase_Letter' => :letter_lowercase,
|
31
|
+
|
32
|
+
'Lt' => :letter_titlecase,
|
33
|
+
'Titlecase_Letter' => :letter_titlecase,
|
34
|
+
|
35
|
+
'Lm' => :letter_modifier,
|
36
|
+
'Modifier_Letter' => :letter_modifier,
|
37
|
+
|
38
|
+
'Lo' => :letter_other,
|
39
|
+
'Other_Letter' => :letter_other,
|
40
|
+
|
41
|
+
'M' => :mark_any,
|
42
|
+
'Mark' => :mark_any,
|
43
|
+
|
44
|
+
'Mn' => :mark_nonspacing,
|
45
|
+
'Nonspacing_Mark' => :mark_nonspacing,
|
46
|
+
|
47
|
+
'Mc' => :mark_spacing,
|
48
|
+
'Spacing_Mark' => :mark_spacing,
|
49
|
+
|
50
|
+
'Me' => :mark_enclosing,
|
51
|
+
'Enclosing_Mark' => :mark_enclosing,
|
52
|
+
|
53
|
+
'N' => :number_any,
|
54
|
+
'Number' => :number_any,
|
55
|
+
|
56
|
+
'Nd' => :number_decimal,
|
57
|
+
'Decimal_Number' => :number_decimal,
|
58
|
+
|
59
|
+
'Nl' => :number_letter,
|
60
|
+
'Letter_Number' => :number_letter,
|
61
|
+
|
62
|
+
'No' => :number_other,
|
63
|
+
'Other_Number' => :number_other,
|
64
|
+
|
65
|
+
'P' => :punct_any,
|
66
|
+
'Punctuation' => :punct_any,
|
67
|
+
|
68
|
+
'Pc' => :punct_connector,
|
69
|
+
'Connector_Punctuation' => :punct_connector,
|
70
|
+
|
71
|
+
'Pd' => :punct_dash,
|
72
|
+
'Dash_Punctuation' => :punct_dash,
|
73
|
+
|
74
|
+
'Ps' => :punct_open,
|
75
|
+
'Open_Punctuation' => :punct_open,
|
76
|
+
|
77
|
+
'Pe' => :punct_close,
|
78
|
+
'Close_Punctuation' => :punct_close,
|
79
|
+
|
80
|
+
'Pi' => :punct_initial,
|
81
|
+
'Initial_Punctuation' => :punct_initial,
|
82
|
+
|
83
|
+
'Pf' => :punct_final,
|
84
|
+
'Final_Punctuation' => :punct_final,
|
85
|
+
|
86
|
+
'Po' => :punct_other,
|
87
|
+
'Other_Punctuation' => :punct_other,
|
88
|
+
|
89
|
+
'S' => :symbol_any,
|
90
|
+
'Symbol' => :symbol_any,
|
91
|
+
|
92
|
+
'Sm' => :symbol_math,
|
93
|
+
'Math_Symbol' => :symbol_math,
|
94
|
+
|
95
|
+
'Sc' => :symbol_currency,
|
96
|
+
'Currency_Symbol' => :symbol_currency,
|
97
|
+
|
98
|
+
'Sk' => :symbol_modifier,
|
99
|
+
'Modifier_Symbol' => :symbol_modifier,
|
100
|
+
|
101
|
+
'So' => :symbol_other,
|
102
|
+
'Other_Symbol' => :symbol_other,
|
103
|
+
|
104
|
+
'Z' => :separator_any,
|
105
|
+
'Separator' => :separator_any,
|
106
|
+
|
107
|
+
'Zs' => :separator_space,
|
108
|
+
'Space_Separator' => :separator_space,
|
109
|
+
|
110
|
+
'Zl' => :separator_line,
|
111
|
+
'Line_Separator' => :separator_line,
|
112
|
+
|
113
|
+
'Zp' => :separator_para,
|
114
|
+
'Paragraph_Separator' => :separator_para,
|
115
|
+
|
116
|
+
'C' => :other,
|
117
|
+
'Other' => :other,
|
118
|
+
|
119
|
+
'Cc' => :control,
|
120
|
+
'Control' => :control,
|
121
|
+
|
122
|
+
'Cf' => :format,
|
123
|
+
'Format' => :format,
|
124
|
+
|
125
|
+
'Cs' => :surrogate,
|
126
|
+
'Surrogate' => :surrogate,
|
127
|
+
|
128
|
+
'Co' => :private_use,
|
129
|
+
'Private_Use' => :private_use,
|
130
|
+
|
131
|
+
'Cn' => :unassigned,
|
132
|
+
'Unassigned' => :unassigned,
|
133
|
+
|
134
|
+
'Age=1.1' => :age_1_1,
|
135
|
+
'Age=2.0' => :age_2_0,
|
136
|
+
'Age=2.1' => :age_2_1,
|
137
|
+
'Age=3.0' => :age_3_0,
|
138
|
+
'Age=3.1' => :age_3_1,
|
139
|
+
'Age=3.2' => :age_3_2,
|
140
|
+
'Age=4.0' => :age_4_0,
|
141
|
+
'Age=4.1' => :age_4_1,
|
142
|
+
'Age=5.0' => :age_5_0,
|
143
|
+
'Age=5.1' => :age_5_1,
|
144
|
+
'Age=5.2' => :age_5_2,
|
145
|
+
'Age=6.0' => :age_6_0,
|
146
|
+
|
147
|
+
'ahex' => :ascii_hex,
|
148
|
+
'ASCII_Hex_Digit' => :ascii_hex,
|
149
|
+
|
150
|
+
'Alphabetic' => :alphabetic,
|
151
|
+
|
152
|
+
'Cased' => :cased,
|
153
|
+
|
154
|
+
'cwcf' => :changes_when_casefolded,
|
155
|
+
'Changes_When_Casefolded' => :changes_when_casefolded,
|
156
|
+
|
157
|
+
'cwcm' => :changes_when_casemapped,
|
158
|
+
'Changes_When_Casemapped' => :changes_when_casemapped,
|
159
|
+
|
160
|
+
'cwl' => :changes_when_lowercased,
|
161
|
+
'Changes_When_Lowercased' => :changes_when_lowercased,
|
162
|
+
|
163
|
+
'cwt' => :changes_when_titlecased,
|
164
|
+
'Changes_When_Titlecased' => :changes_when_titlecased,
|
165
|
+
|
166
|
+
'cwu' => :changes_when_uppercased,
|
167
|
+
'Changes_When_Uppercased' => :changes_when_uppercased,
|
168
|
+
|
169
|
+
'ci' => :case_ignorable,
|
170
|
+
'Case_Ignorable' => :case_ignorable,
|
171
|
+
|
172
|
+
'bidic' => :bidi_control,
|
173
|
+
'Bidi_Control' => :bidi_control,
|
174
|
+
|
175
|
+
'Dash' => :dash,
|
176
|
+
|
177
|
+
'dep' => :deprecated,
|
178
|
+
'Deprecated' => :deprecated,
|
179
|
+
|
180
|
+
'di' => :default_ignorable_cp,
|
181
|
+
'Default_Ignorable_Code_Point' => :default_ignorable_cp,
|
182
|
+
|
183
|
+
'dia' => :diacritic,
|
184
|
+
'Diacritic' => :diacritic,
|
185
|
+
|
186
|
+
'ext' => :extender,
|
187
|
+
'Extender' => :extender,
|
188
|
+
|
189
|
+
'grbase' => :grapheme_base,
|
190
|
+
'Grapheme_Base' => :grapheme_base,
|
191
|
+
|
192
|
+
'grext' => :grapheme_extend,
|
193
|
+
'Grapheme_Extend' => :grapheme_extend,
|
194
|
+
|
195
|
+
'grlink' => :grapheme_link,
|
196
|
+
'Grapheme_Link' => :grapheme_link,
|
197
|
+
|
198
|
+
'hex' => :hex_digit,
|
199
|
+
'Hex_Digit' => :hex_digit,
|
200
|
+
|
201
|
+
'Hyphen' => :hyphen,
|
202
|
+
|
203
|
+
'idc' => :id_continue,
|
204
|
+
'ID_Continue' => :id_continue,
|
205
|
+
|
206
|
+
'ideo' => :ideographic,
|
207
|
+
'Ideographic' => :ideographic,
|
208
|
+
|
209
|
+
'ids' => :id_start,
|
210
|
+
'ID_Start' => :id_start,
|
211
|
+
|
212
|
+
'idsb' => :ids_binary_op,
|
213
|
+
'IDS_Binary_Operator' => :ids_binary_op,
|
214
|
+
|
215
|
+
'idst' => :ids_trinary_op,
|
216
|
+
'IDS_Trinary_Operator' => :ids_trinary_op,
|
217
|
+
|
218
|
+
'joinc' => :join_control,
|
219
|
+
'Join_Control' => :join_control,
|
220
|
+
|
221
|
+
'loe' => :logical_order_exception,
|
222
|
+
'Logical_Order_Exception' => :logical_order_exception,
|
223
|
+
|
224
|
+
'Lowercase' => :lowercase,
|
225
|
+
|
226
|
+
'Math' => :math,
|
227
|
+
|
228
|
+
'nchar' => :non_character_cp,
|
229
|
+
'Noncharacter_Code_Point' => :non_character_cp,
|
230
|
+
|
231
|
+
'oalpha' => :other_alphabetic,
|
232
|
+
'Other_Alphabetic' => :other_alphabetic,
|
233
|
+
|
234
|
+
'odi' => :other_default_ignorable_cp,
|
235
|
+
'Other_Default_Ignorable_Code_Point' => :other_default_ignorable_cp,
|
236
|
+
|
237
|
+
'ogrext' => :other_grapheme_extended,
|
238
|
+
'Other_Grapheme_Extend' => :other_grapheme_extended,
|
239
|
+
|
240
|
+
'oidc' => :other_id_continue,
|
241
|
+
'Other_ID_Continue' => :other_id_continue,
|
242
|
+
|
243
|
+
'oids' => :other_id_start,
|
244
|
+
'Other_ID_Start' => :other_id_start,
|
245
|
+
|
246
|
+
'olower' => :other_lowercase,
|
247
|
+
'Other_Lowercase' => :other_lowercase,
|
248
|
+
|
249
|
+
'omath' => :other_math,
|
250
|
+
'Other_Math' => :other_math,
|
251
|
+
|
252
|
+
'oupper' => :other_uppercase,
|
253
|
+
'Other_Uppercase' => :other_uppercase,
|
254
|
+
|
255
|
+
'patsyn' => :pattern_syntax,
|
256
|
+
'Pattern_Syntax' => :pattern_syntax,
|
257
|
+
|
258
|
+
'patws' => :pattern_whitespace,
|
259
|
+
'Pattern_Whitespace' => :pattern_whitespace,
|
260
|
+
|
261
|
+
'qmark' => :quotation_mark,
|
262
|
+
'quotationmark' => :quotation_mark,
|
263
|
+
|
264
|
+
'radical' => :radical,
|
265
|
+
|
266
|
+
'sd' => :soft_dotted,
|
267
|
+
'Soft_Dotted' => :soft_dotted,
|
268
|
+
|
269
|
+
'sterm' => :sentence_terminal,
|
270
|
+
|
271
|
+
'term' => :terminal_punctuation,
|
272
|
+
'Terminal_Punctuation' => :terminal_punctuation,
|
273
|
+
|
274
|
+
'uideo' => :unified_ideograph,
|
275
|
+
'Unified_Ideograph' => :unified_ideograph,
|
276
|
+
|
277
|
+
'Uppercase' => :uppercase,
|
278
|
+
|
279
|
+
'vs' => :variation_selector,
|
280
|
+
'Variation_Selector' => :variation_selector,
|
281
|
+
|
282
|
+
'wspace' => :whitespace,
|
283
|
+
'whitespace' => :whitespace,
|
284
|
+
|
285
|
+
'xids' => :xid_start,
|
286
|
+
'XID_Start' => :xid_start,
|
287
|
+
|
288
|
+
'xidc' => :xid_continue,
|
289
|
+
'XID_Continue' => :xid_continue,
|
290
|
+
}
|
291
|
+
|
292
|
+
count = 0
|
293
|
+
tests.each do |property, test|
|
294
|
+
define_method "test_scan_property_#{test}_#{count+=1}" do
|
295
|
+
token = RS.scan("a\\p{#{property}}c")[1]
|
296
|
+
|
297
|
+
assert_equal( :property, token[0] )
|
298
|
+
assert_equal( test, token[1] )
|
299
|
+
end
|
300
|
+
|
301
|
+
define_method "test_scan_nonproperty_#{test}_#{count+=1}" do
|
302
|
+
token = RS.scan("a\\P{#{property}}c")[1]
|
303
|
+
|
304
|
+
assert_equal( :nonproperty, token[0] )
|
305
|
+
assert_equal( test, token[1] )
|
306
|
+
end
|
307
|
+
|
308
|
+
define_method "test_scan_caret_nonproperty_#{test}_#{count+=1}" do
|
309
|
+
token = RS.scan("a\\p{^#{property}}c")[1]
|
310
|
+
|
311
|
+
assert_equal( :nonproperty, token[0] )
|
312
|
+
assert_equal( test, token[1] )
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|