regexp_parser 0.5.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +242 -0
- data/Gemfile +1 -0
- data/README.md +21 -17
- data/Rakefile +31 -0
- data/lib/regexp_parser/expression.rb +11 -9
- data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
- data/lib/regexp_parser/expression/classes/backref.rb +21 -16
- data/lib/regexp_parser/expression/classes/escape.rb +81 -10
- data/lib/regexp_parser/expression/classes/group.rb +20 -20
- data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
- data/lib/regexp_parser/expression/classes/property.rb +6 -0
- data/lib/regexp_parser/expression/classes/set.rb +10 -93
- data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
- data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
- data/lib/regexp_parser/expression/methods/tests.rb +4 -14
- data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +3 -4
- data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
- data/lib/regexp_parser/expression/subexpression.rb +6 -10
- data/lib/regexp_parser/lexer.rb +13 -17
- data/lib/regexp_parser/parser.rb +170 -116
- data/lib/regexp_parser/scanner.rb +952 -2431
- data/lib/regexp_parser/scanner/char_type.rl +31 -0
- data/lib/regexp_parser/scanner/properties/long.yml +561 -0
- data/lib/regexp_parser/scanner/properties/short.yml +225 -0
- data/lib/regexp_parser/scanner/property.rl +7 -806
- data/lib/regexp_parser/scanner/scanner.rl +112 -154
- data/lib/regexp_parser/syntax/base.rb +4 -4
- data/lib/regexp_parser/syntax/tokens.rb +1 -0
- data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
- data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
- data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
- data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
- data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +2 -1
- data/test/expression/test_base.rb +2 -1
- data/test/expression/test_clone.rb +0 -57
- data/test/expression/test_set.rb +31 -8
- data/test/expression/test_strfregexp.rb +13 -4
- data/test/expression/test_subexpression.rb +25 -0
- data/test/expression/test_traverse.rb +25 -25
- data/test/helpers.rb +1 -0
- data/test/lexer/test_all.rb +1 -1
- data/test/lexer/test_conditionals.rb +9 -7
- data/test/lexer/test_nesting.rb +39 -21
- data/test/lexer/test_refcalls.rb +4 -4
- data/test/parser/set/test_intersections.rb +127 -0
- data/test/parser/set/test_ranges.rb +111 -0
- data/test/parser/test_all.rb +4 -1
- data/test/parser/test_escapes.rb +41 -9
- data/test/parser/test_groups.rb +22 -3
- data/test/parser/test_posix_classes.rb +27 -0
- data/test/parser/test_properties.rb +17 -290
- data/test/parser/test_refcalls.rb +66 -26
- data/test/parser/test_sets.rb +132 -129
- data/test/scanner/test_all.rb +1 -7
- data/test/scanner/test_conditionals.rb +16 -16
- data/test/scanner/test_errors.rb +0 -30
- data/test/scanner/test_escapes.rb +1 -2
- data/test/scanner/test_free_space.rb +28 -28
- data/test/scanner/test_groups.rb +35 -35
- data/test/scanner/test_meta.rb +1 -1
- data/test/scanner/test_properties.rb +87 -114
- data/test/scanner/test_refcalls.rb +18 -18
- data/test/scanner/test_scripts.rb +19 -351
- data/test/scanner/test_sets.rb +87 -60
- data/test/scanner/test_unicode_blocks.rb +4 -105
- data/test/support/warning_extractor.rb +1 -1
- data/test/syntax/test_syntax.rb +7 -0
- data/test/syntax/versions/test_1.8.rb +2 -4
- metadata +17 -7
- data/ChangeLog +0 -325
- data/test/scanner/test_emojis.rb +0 -31
data/test/scanner/test_groups.rb
CHANGED
@@ -4,61 +4,61 @@ class ScannerGroups < Test::Unit::TestCase
|
|
4
4
|
|
5
5
|
tests = {
|
6
6
|
# Options
|
7
|
-
'(?-mix:abc)' => [0, :group, :options,
|
8
|
-
'(?m-ix:abc)' => [0, :group, :options,
|
9
|
-
'(?mi-x:abc)' => [0, :group, :options,
|
10
|
-
'(?mix:abc)' => [0, :group, :options,
|
11
|
-
'(?
|
12
|
-
'(?
|
13
|
-
'(?
|
14
|
-
'(?
|
7
|
+
'(?-mix:abc)' => [0, :group, :options, '(?-mix:', 0, 7],
|
8
|
+
'(?m-ix:abc)' => [0, :group, :options, '(?m-ix:', 0, 7],
|
9
|
+
'(?mi-x:abc)' => [0, :group, :options, '(?mi-x:', 0, 7],
|
10
|
+
'(?mix:abc)' => [0, :group, :options, '(?mix:', 0, 6],
|
11
|
+
'(?m:)' => [0, :group, :options, '(?m:', 0, 4],
|
12
|
+
'(?i:)' => [0, :group, :options, '(?i:', 0, 4],
|
13
|
+
'(?x:)' => [0, :group, :options, '(?x:', 0, 4],
|
14
|
+
'(?mix)' => [0, :group, :options_switch, '(?mix', 0, 5],
|
15
15
|
|
16
16
|
# Group types
|
17
|
-
'(?>abc)' => [0, :group, :atomic,
|
18
|
-
'(abc)' => [0, :group, :capture,
|
17
|
+
'(?>abc)' => [0, :group, :atomic, '(?>', 0, 3],
|
18
|
+
'(abc)' => [0, :group, :capture, '(', 0, 1],
|
19
19
|
|
20
|
-
'(?<name>abc)' => [0, :group, :named_ab,
|
21
|
-
"(?'name'abc)" => [0, :group, :named_sq,
|
20
|
+
'(?<name>abc)' => [0, :group, :named_ab, '(?<name>', 0, 8],
|
21
|
+
"(?'name'abc)" => [0, :group, :named_sq, "(?'name'", 0, 8],
|
22
22
|
|
23
|
-
'(?<name_1>abc)' => [0, :group, :named_ab,
|
24
|
-
"(?'name_1'abc)" => [0, :group, :named_sq,
|
23
|
+
'(?<name_1>abc)' => [0, :group, :named_ab, '(?<name_1>', 0,10],
|
24
|
+
"(?'name_1'abc)" => [0, :group, :named_sq, "(?'name_1'", 0,10],
|
25
25
|
|
26
|
-
'(?:abc)' => [0, :group, :passive,
|
27
|
-
'(?:)' => [0, :group, :passive,
|
28
|
-
'(?::)' => [0, :group, :passive,
|
26
|
+
'(?:abc)' => [0, :group, :passive, '(?:', 0, 3],
|
27
|
+
'(?:)' => [0, :group, :passive, '(?:', 0, 3],
|
28
|
+
'(?::)' => [0, :group, :passive, '(?:', 0, 3],
|
29
29
|
|
30
30
|
# Comments
|
31
|
-
'(?#abc)' => [0, :group, :comment,
|
32
|
-
'(?#)' => [0, :group, :comment,
|
31
|
+
'(?#abc)' => [0, :group, :comment, '(?#abc)', 0, 7],
|
32
|
+
'(?#)' => [0, :group, :comment, '(?#)', 0, 4],
|
33
33
|
|
34
34
|
# Assertions
|
35
|
-
'(?=abc)' => [0, :assertion, :lookahead,
|
36
|
-
'(?!abc)' => [0, :assertion, :nlookahead,
|
37
|
-
'(?<=abc)' => [0, :assertion, :lookbehind,
|
38
|
-
'(?<!abc)' => [0, :assertion, :nlookbehind,
|
35
|
+
'(?=abc)' => [0, :assertion, :lookahead, '(?=', 0, 3],
|
36
|
+
'(?!abc)' => [0, :assertion, :nlookahead, '(?!', 0, 3],
|
37
|
+
'(?<=abc)' => [0, :assertion, :lookbehind, '(?<=', 0, 4],
|
38
|
+
'(?<!abc)' => [0, :assertion, :nlookbehind, '(?<!', 0, 4],
|
39
39
|
}
|
40
40
|
|
41
41
|
if RUBY_VERSION >= '2.0'
|
42
42
|
tests.merge!({
|
43
43
|
# New options
|
44
|
-
'(?d-mix:abc)' => [0, :group, :options,
|
45
|
-
'(?a-mix:abc)' => [0, :group, :options,
|
46
|
-
'(?u-mix:abc)' => [0, :group, :options,
|
47
|
-
'(?da-m:abc)' => [0, :group, :options,
|
48
|
-
'(?du-x:abc)' => [0, :group, :options,
|
49
|
-
'(?dau-i:abc)' => [0, :group, :options,
|
50
|
-
'(?dau:abc)' => [0, :group, :options,
|
51
|
-
'(?
|
52
|
-
'(?
|
53
|
-
'(?
|
54
|
-
'(?
|
44
|
+
'(?d-mix:abc)' => [0, :group, :options, '(?d-mix:', 0, 8],
|
45
|
+
'(?a-mix:abc)' => [0, :group, :options, '(?a-mix:', 0, 8],
|
46
|
+
'(?u-mix:abc)' => [0, :group, :options, '(?u-mix:', 0, 8],
|
47
|
+
'(?da-m:abc)' => [0, :group, :options, '(?da-m:', 0, 7],
|
48
|
+
'(?du-x:abc)' => [0, :group, :options, '(?du-x:', 0, 7],
|
49
|
+
'(?dau-i:abc)' => [0, :group, :options, '(?dau-i:', 0, 8],
|
50
|
+
'(?dau:abc)' => [0, :group, :options, '(?dau:', 0, 6],
|
51
|
+
'(?d:)' => [0, :group, :options, '(?d:', 0, 4],
|
52
|
+
'(?a:)' => [0, :group, :options, '(?a:', 0, 4],
|
53
|
+
'(?u:)' => [0, :group, :options, '(?u:', 0, 4],
|
54
|
+
'(?dau)' => [0, :group, :options_switch, '(?dau', 0, 5],
|
55
55
|
})
|
56
56
|
end
|
57
57
|
|
58
58
|
if RUBY_VERSION >= '2.4.1'
|
59
59
|
tests.merge!({
|
60
60
|
# New absence operator
|
61
|
-
'(?~abc)' => [0, :group, :absence,
|
61
|
+
'(?~abc)' => [0, :group, :absence, '(?~', 0, 3],
|
62
62
|
})
|
63
63
|
end
|
64
64
|
|
data/test/scanner/test_meta.rb
CHANGED
@@ -17,7 +17,7 @@ class ScannerMeta < Test::Unit::TestCase
|
|
17
17
|
5 => [:meta, :alternation, '|', 6, 7],
|
18
18
|
8 => [:meta, :alternation, '|', 9, 10],
|
19
19
|
11 => [:escape, :alternation, '\|', 12, 14],
|
20
|
-
15 => [:
|
20
|
+
15 => [:literal, :literal, '|', 17, 18],
|
21
21
|
},
|
22
22
|
}
|
23
23
|
|
@@ -4,118 +4,107 @@ class ScannerProperties < Test::Unit::TestCase
|
|
4
4
|
|
5
5
|
tests = {
|
6
6
|
'Alnum' => :alnum,
|
7
|
-
|
8
|
-
'Ascii' => :ascii,
|
9
|
-
'Blank' => :blank,
|
10
|
-
'Cntrl' => :cntrl,
|
11
|
-
'Digit' => :digit,
|
12
|
-
'Graph' => :graph,
|
13
|
-
'Lower' => :lower,
|
14
|
-
'Print' => :print,
|
15
|
-
'Punct' => :punct,
|
16
|
-
'Space' => :space,
|
17
|
-
'Upper' => :upper,
|
18
|
-
'Word' => :word,
|
19
|
-
'Xdigit' => :xdigit,
|
7
|
+
|
20
8
|
'XPosixPunct' => :xposixpunct,
|
21
9
|
|
22
10
|
'Newline' => :newline,
|
23
11
|
|
24
12
|
'Any' => :any,
|
13
|
+
|
25
14
|
'Assigned' => :assigned,
|
26
15
|
|
27
|
-
'L' => :
|
28
|
-
'Letter' => :
|
16
|
+
'L' => :letter,
|
17
|
+
'Letter' => :letter,
|
29
18
|
|
30
|
-
'Lu' => :
|
31
|
-
'Uppercase_Letter' => :
|
19
|
+
'Lu' => :uppercase_letter,
|
20
|
+
'Uppercase_Letter' => :uppercase_letter,
|
32
21
|
|
33
|
-
'Ll' => :
|
34
|
-
'Lowercase_Letter' => :
|
22
|
+
'Ll' => :lowercase_letter,
|
23
|
+
'Lowercase_Letter' => :lowercase_letter,
|
35
24
|
|
36
|
-
'Lt' => :
|
37
|
-
'Titlecase_Letter' => :
|
25
|
+
'Lt' => :titlecase_letter,
|
26
|
+
'Titlecase_Letter' => :titlecase_letter,
|
38
27
|
|
39
|
-
'Lm' => :
|
40
|
-
'Modifier_Letter' => :
|
28
|
+
'Lm' => :modifier_letter,
|
29
|
+
'Modifier_Letter' => :modifier_letter,
|
41
30
|
|
42
|
-
'Lo' => :
|
43
|
-
'Other_Letter' => :
|
31
|
+
'Lo' => :other_letter,
|
32
|
+
'Other_Letter' => :other_letter,
|
44
33
|
|
45
|
-
'M' => :
|
46
|
-
'Mark' => :
|
34
|
+
'M' => :mark,
|
35
|
+
'Mark' => :mark,
|
47
36
|
|
48
|
-
'Mn' => :
|
49
|
-
'Nonspacing_Mark' => :
|
37
|
+
'Mn' => :nonspacing_mark,
|
38
|
+
'Nonspacing_Mark' => :nonspacing_mark,
|
50
39
|
|
51
|
-
'Mc' => :
|
52
|
-
'Spacing_Mark' => :
|
40
|
+
'Mc' => :spacing_mark,
|
41
|
+
'Spacing_Mark' => :spacing_mark,
|
53
42
|
|
54
|
-
'Me' => :
|
55
|
-
'Enclosing_Mark' => :
|
43
|
+
'Me' => :enclosing_mark,
|
44
|
+
'Enclosing_Mark' => :enclosing_mark,
|
56
45
|
|
57
|
-
'N' => :
|
58
|
-
'Number' => :
|
46
|
+
'N' => :number,
|
47
|
+
'Number' => :number,
|
59
48
|
|
60
|
-
'Nd' => :
|
61
|
-
'Decimal_Number' => :
|
49
|
+
'Nd' => :decimal_number,
|
50
|
+
'Decimal_Number' => :decimal_number,
|
62
51
|
|
63
|
-
'Nl' => :
|
64
|
-
'Letter_Number' => :
|
52
|
+
'Nl' => :letter_number,
|
53
|
+
'Letter_Number' => :letter_number,
|
65
54
|
|
66
|
-
'No' => :
|
67
|
-
'Other_Number' => :
|
55
|
+
'No' => :other_number,
|
56
|
+
'Other_Number' => :other_number,
|
68
57
|
|
69
|
-
'P' => :
|
70
|
-
'Punctuation' => :
|
58
|
+
'P' => :punctuation,
|
59
|
+
'Punctuation' => :punctuation,
|
71
60
|
|
72
|
-
'Pc' => :
|
73
|
-
'Connector_Punctuation' => :
|
61
|
+
'Pc' => :connector_punctuation,
|
62
|
+
'Connector_Punctuation' => :connector_punctuation,
|
74
63
|
|
75
|
-
'Pd' => :
|
76
|
-
'Dash_Punctuation' => :
|
64
|
+
'Pd' => :dash_punctuation,
|
65
|
+
'Dash_Punctuation' => :dash_punctuation,
|
77
66
|
|
78
|
-
'Ps' => :
|
79
|
-
'Open_Punctuation' => :
|
67
|
+
'Ps' => :open_punctuation,
|
68
|
+
'Open_Punctuation' => :open_punctuation,
|
80
69
|
|
81
|
-
'Pe' => :
|
82
|
-
'Close_Punctuation' => :
|
70
|
+
'Pe' => :close_punctuation,
|
71
|
+
'Close_Punctuation' => :close_punctuation,
|
83
72
|
|
84
|
-
'Pi' => :
|
85
|
-
'Initial_Punctuation' => :
|
73
|
+
'Pi' => :initial_punctuation,
|
74
|
+
'Initial_Punctuation' => :initial_punctuation,
|
86
75
|
|
87
|
-
'Pf' => :
|
88
|
-
'Final_Punctuation' => :
|
76
|
+
'Pf' => :final_punctuation,
|
77
|
+
'Final_Punctuation' => :final_punctuation,
|
89
78
|
|
90
|
-
'Po' => :
|
91
|
-
'Other_Punctuation' => :
|
79
|
+
'Po' => :other_punctuation,
|
80
|
+
'Other_Punctuation' => :other_punctuation,
|
92
81
|
|
93
|
-
'S' => :
|
94
|
-
'Symbol' => :
|
82
|
+
'S' => :symbol,
|
83
|
+
'Symbol' => :symbol,
|
95
84
|
|
96
|
-
'Sm' => :
|
97
|
-
'Math_Symbol' => :
|
85
|
+
'Sm' => :math_symbol,
|
86
|
+
'Math_Symbol' => :math_symbol,
|
98
87
|
|
99
|
-
'Sc' => :
|
100
|
-
'Currency_Symbol' => :
|
88
|
+
'Sc' => :currency_symbol,
|
89
|
+
'Currency_Symbol' => :currency_symbol,
|
101
90
|
|
102
|
-
'Sk' => :
|
103
|
-
'Modifier_Symbol' => :
|
91
|
+
'Sk' => :modifier_symbol,
|
92
|
+
'Modifier_Symbol' => :modifier_symbol,
|
104
93
|
|
105
|
-
'So' => :
|
106
|
-
'Other_Symbol' => :
|
94
|
+
'So' => :other_symbol,
|
95
|
+
'Other_Symbol' => :other_symbol,
|
107
96
|
|
108
|
-
'Z' => :
|
109
|
-
'Separator' => :
|
97
|
+
'Z' => :separator,
|
98
|
+
'Separator' => :separator,
|
110
99
|
|
111
|
-
'Zs' => :
|
112
|
-
'Space_Separator' => :
|
100
|
+
'Zs' => :space_separator,
|
101
|
+
'Space_Separator' => :space_separator,
|
113
102
|
|
114
|
-
'Zl' => :
|
115
|
-
'Line_Separator' => :
|
103
|
+
'Zl' => :line_separator,
|
104
|
+
'Line_Separator' => :line_separator,
|
116
105
|
|
117
|
-
'Zp' => :
|
118
|
-
'Paragraph_Separator' => :
|
106
|
+
'Zp' => :paragraph_separator,
|
107
|
+
'Paragraph_Separator' => :paragraph_separator,
|
119
108
|
|
120
109
|
'C' => :other,
|
121
110
|
'Other' => :other,
|
@@ -135,28 +124,12 @@ class ScannerProperties < Test::Unit::TestCase
|
|
135
124
|
'Cn' => :unassigned,
|
136
125
|
'Unassigned' => :unassigned,
|
137
126
|
|
138
|
-
'Age=1.1' => :
|
139
|
-
'Age=
|
140
|
-
'Age=
|
141
|
-
|
142
|
-
'
|
143
|
-
'
|
144
|
-
'Age=4.0' => :age_4_0,
|
145
|
-
'Age=4.1' => :age_4_1,
|
146
|
-
'Age=5.0' => :age_5_0,
|
147
|
-
'Age=5.1' => :age_5_1,
|
148
|
-
'Age=5.2' => :age_5_2,
|
149
|
-
'Age=6.0' => :age_6_0,
|
150
|
-
'Age=6.1' => :age_6_1,
|
151
|
-
'Age=6.2' => :age_6_2,
|
152
|
-
'Age=6.3' => :age_6_3,
|
153
|
-
'Age=7.0' => :age_7_0,
|
154
|
-
'Age=8.0' => :age_8_0,
|
155
|
-
'Age=9.0' => :age_9_0,
|
156
|
-
'Age=10.0' => :age_10_0,
|
157
|
-
|
158
|
-
'ahex' => :ascii_hex,
|
159
|
-
'ASCII_Hex_Digit' => :ascii_hex,
|
127
|
+
'Age=1.1' => :'age=1.1',
|
128
|
+
'Age=6.0' => :'age=6.0',
|
129
|
+
'Age=10.0' => :'age=10.0',
|
130
|
+
|
131
|
+
'ahex' => :ascii_hex_digit,
|
132
|
+
'ASCII_Hex_Digit' => :ascii_hex_digit,
|
160
133
|
|
161
134
|
'Alphabetic' => :alphabetic,
|
162
135
|
|
@@ -188,8 +161,8 @@ class ScannerProperties < Test::Unit::TestCase
|
|
188
161
|
'dep' => :deprecated,
|
189
162
|
'Deprecated' => :deprecated,
|
190
163
|
|
191
|
-
'di' => :
|
192
|
-
'Default_Ignorable_Code_Point' => :
|
164
|
+
'di' => :default_ignorable_code_point,
|
165
|
+
'Default_Ignorable_Code_Point' => :default_ignorable_code_point,
|
193
166
|
|
194
167
|
'dia' => :diacritic,
|
195
168
|
'Diacritic' => :diacritic,
|
@@ -220,11 +193,11 @@ class ScannerProperties < Test::Unit::TestCase
|
|
220
193
|
'ids' => :id_start,
|
221
194
|
'ID_Start' => :id_start,
|
222
195
|
|
223
|
-
'idsb' => :
|
224
|
-
'IDS_Binary_Operator' => :
|
196
|
+
'idsb' => :ids_binary_operator,
|
197
|
+
'IDS_Binary_Operator' => :ids_binary_operator,
|
225
198
|
|
226
|
-
'idst' => :
|
227
|
-
'IDS_Trinary_Operator' => :
|
199
|
+
'idst' => :ids_trinary_operator,
|
200
|
+
'IDS_Trinary_Operator' => :ids_trinary_operator,
|
228
201
|
|
229
202
|
'joinc' => :join_control,
|
230
203
|
'Join_Control' => :join_control,
|
@@ -236,17 +209,17 @@ class ScannerProperties < Test::Unit::TestCase
|
|
236
209
|
|
237
210
|
'Math' => :math,
|
238
211
|
|
239
|
-
'nchar' => :
|
240
|
-
'Noncharacter_Code_Point' => :
|
212
|
+
'nchar' => :noncharacter_code_point,
|
213
|
+
'Noncharacter_Code_Point' => :noncharacter_code_point,
|
241
214
|
|
242
215
|
'oalpha' => :other_alphabetic,
|
243
216
|
'Other_Alphabetic' => :other_alphabetic,
|
244
217
|
|
245
|
-
'odi' => :
|
246
|
-
'Other_Default_Ignorable_Code_Point' => :
|
218
|
+
'odi' => :other_default_ignorable_code_point,
|
219
|
+
'Other_Default_Ignorable_Code_Point' => :other_default_ignorable_code_point,
|
247
220
|
|
248
|
-
'ogrext' => :
|
249
|
-
'Other_Grapheme_Extend' => :
|
221
|
+
'ogrext' => :other_grapheme_extend,
|
222
|
+
'Other_Grapheme_Extend' => :other_grapheme_extend,
|
250
223
|
|
251
224
|
'oidc' => :other_id_continue,
|
252
225
|
'Other_ID_Continue' => :other_id_continue,
|
@@ -266,8 +239,8 @@ class ScannerProperties < Test::Unit::TestCase
|
|
266
239
|
'patsyn' => :pattern_syntax,
|
267
240
|
'Pattern_Syntax' => :pattern_syntax,
|
268
241
|
|
269
|
-
'patws' => :
|
270
|
-
'Pattern_Whitespace' => :
|
242
|
+
'patws' => :pattern_white_space,
|
243
|
+
'Pattern_Whitespace' => :pattern_white_space,
|
271
244
|
|
272
245
|
'qmark' => :quotation_mark,
|
273
246
|
'quotationmark' => :quotation_mark,
|
@@ -278,7 +251,7 @@ class ScannerProperties < Test::Unit::TestCase
|
|
278
251
|
'Regional_Indicator' => :regional_indicator,
|
279
252
|
|
280
253
|
'sd' => :soft_dotted,
|
281
|
-
'
|
254
|
+
'Soft-Dotted' => :soft_dotted, # test dash spelling
|
282
255
|
|
283
256
|
'sterm' => :sentence_terminal,
|
284
257
|
|
@@ -293,8 +266,8 @@ class ScannerProperties < Test::Unit::TestCase
|
|
293
266
|
'vs' => :variation_selector,
|
294
267
|
'Variation_Selector' => :variation_selector,
|
295
268
|
|
296
|
-
'wspace' => :
|
297
|
-
'whitespace' => :
|
269
|
+
'wspace' => :white_space,
|
270
|
+
'whitespace' => :white_space,
|
298
271
|
|
299
272
|
'xids' => :xid_start,
|
300
273
|
'XID_Start' => :xid_start,
|
@@ -4,34 +4,34 @@ class ScannerRefCalls < Test::Unit::TestCase
|
|
4
4
|
|
5
5
|
tests = {
|
6
6
|
# Traditional numerical group back-reference
|
7
|
-
'(abc)\1' => [3, :backref, :number,
|
7
|
+
'(abc)\1' => [3, :backref, :number, '\1', 5, 7],
|
8
8
|
|
9
9
|
# Group back-references, named, numbered, and relative
|
10
|
-
'(?<X>abc)\k<X>' => [3, :backref, :name_ref_ab,
|
11
|
-
"(?<X>abc)\\k'X'" => [3, :backref, :name_ref_sq,
|
10
|
+
'(?<X>abc)\k<X>' => [3, :backref, :name_ref_ab, '\k<X>', 9, 14],
|
11
|
+
"(?<X>abc)\\k'X'" => [3, :backref, :name_ref_sq, "\\k'X'", 9, 14],
|
12
12
|
|
13
|
-
'(abc)\k<1>' => [3, :backref, :number_ref_ab,
|
14
|
-
"(abc)\\k'1'" => [3, :backref, :number_ref_sq,
|
13
|
+
'(abc)\k<1>' => [3, :backref, :number_ref_ab, '\k<1>', 5, 10],
|
14
|
+
"(abc)\\k'1'" => [3, :backref, :number_ref_sq, "\\k'1'", 5, 10],
|
15
15
|
|
16
|
-
'(abc)\k<-1>' => [3, :backref, :number_rel_ref_ab,
|
17
|
-
"(abc)\\k'-1'" => [3, :backref, :number_rel_ref_sq,
|
16
|
+
'(abc)\k<-1>' => [3, :backref, :number_rel_ref_ab, '\k<-1>', 5, 11],
|
17
|
+
"(abc)\\k'-1'" => [3, :backref, :number_rel_ref_sq, "\\k'-1'", 5, 11],
|
18
18
|
|
19
19
|
# Sub-expression invocation, named, numbered, and relative
|
20
|
-
'(?<X>abc)\g<X>' => [3, :backref, :name_call_ab,
|
21
|
-
"(?<X>abc)\\g'X'" => [3, :backref, :name_call_sq,
|
20
|
+
'(?<X>abc)\g<X>' => [3, :backref, :name_call_ab, '\g<X>', 9, 14],
|
21
|
+
"(?<X>abc)\\g'X'" => [3, :backref, :name_call_sq, "\\g'X'", 9, 14],
|
22
22
|
|
23
|
-
'(abc)\g<1>' => [3, :backref, :number_call_ab,
|
24
|
-
"(abc)\\g'1'" => [3, :backref, :number_call_sq,
|
23
|
+
'(abc)\g<1>' => [3, :backref, :number_call_ab, '\g<1>', 5, 10],
|
24
|
+
"(abc)\\g'1'" => [3, :backref, :number_call_sq, "\\g'1'", 5, 10],
|
25
25
|
|
26
|
-
'(abc)\g<-1>' => [3, :backref, :number_rel_call_ab,
|
27
|
-
"(abc)\\g'-1'" => [3, :backref, :number_rel_call_sq,
|
26
|
+
'(abc)\g<-1>' => [3, :backref, :number_rel_call_ab, '\g<-1>', 5, 11],
|
27
|
+
"(abc)\\g'-1'" => [3, :backref, :number_rel_call_sq, "\\g'-1'", 5, 11],
|
28
28
|
|
29
|
-
# Group back-references, with
|
30
|
-
'(?<X>abc)\k<X-0>' => [3, :backref, :
|
31
|
-
"(?<X>abc)\\k'X-0'" => [3, :backref, :
|
29
|
+
# Group back-references, with recursion level
|
30
|
+
'(?<X>abc)\k<X-0>' => [3, :backref, :name_recursion_ref_ab, '\k<X-0>', 9, 16],
|
31
|
+
"(?<X>abc)\\k'X-0'" => [3, :backref, :name_recursion_ref_sq, "\\k'X-0'", 9, 16],
|
32
32
|
|
33
|
-
'(abc)\k<1-0>' => [3, :backref, :
|
34
|
-
"(abc)\\k'1-0'" => [3, :backref, :
|
33
|
+
'(abc)\k<1-0>' => [3, :backref, :number_recursion_ref_ab, '\k<1-0>', 5, 12],
|
34
|
+
"(abc)\\k'1-0'" => [3, :backref, :number_recursion_ref_sq, "\\k'1-0'", 5, 12],
|
35
35
|
}
|
36
36
|
|
37
37
|
tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
|