regexp_parser 0.5.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +242 -0
  3. data/Gemfile +1 -0
  4. data/README.md +21 -17
  5. data/Rakefile +31 -0
  6. data/lib/regexp_parser/expression.rb +11 -9
  7. data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
  8. data/lib/regexp_parser/expression/classes/backref.rb +21 -16
  9. data/lib/regexp_parser/expression/classes/escape.rb +81 -10
  10. data/lib/regexp_parser/expression/classes/group.rb +20 -20
  11. data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
  12. data/lib/regexp_parser/expression/classes/property.rb +6 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +10 -93
  14. data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
  15. data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
  16. data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
  17. data/lib/regexp_parser/expression/methods/tests.rb +4 -14
  18. data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +3 -4
  20. data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
  21. data/lib/regexp_parser/expression/subexpression.rb +6 -10
  22. data/lib/regexp_parser/lexer.rb +13 -17
  23. data/lib/regexp_parser/parser.rb +170 -116
  24. data/lib/regexp_parser/scanner.rb +952 -2431
  25. data/lib/regexp_parser/scanner/char_type.rl +31 -0
  26. data/lib/regexp_parser/scanner/properties/long.yml +561 -0
  27. data/lib/regexp_parser/scanner/properties/short.yml +225 -0
  28. data/lib/regexp_parser/scanner/property.rl +7 -806
  29. data/lib/regexp_parser/scanner/scanner.rl +112 -154
  30. data/lib/regexp_parser/syntax/base.rb +4 -4
  31. data/lib/regexp_parser/syntax/tokens.rb +1 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
  34. data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
  35. data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
  36. data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
  38. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
  39. data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
  40. data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
  41. data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
  42. data/lib/regexp_parser/version.rb +1 -1
  43. data/regexp_parser.gemspec +2 -1
  44. data/test/expression/test_base.rb +2 -1
  45. data/test/expression/test_clone.rb +0 -57
  46. data/test/expression/test_set.rb +31 -8
  47. data/test/expression/test_strfregexp.rb +13 -4
  48. data/test/expression/test_subexpression.rb +25 -0
  49. data/test/expression/test_traverse.rb +25 -25
  50. data/test/helpers.rb +1 -0
  51. data/test/lexer/test_all.rb +1 -1
  52. data/test/lexer/test_conditionals.rb +9 -7
  53. data/test/lexer/test_nesting.rb +39 -21
  54. data/test/lexer/test_refcalls.rb +4 -4
  55. data/test/parser/set/test_intersections.rb +127 -0
  56. data/test/parser/set/test_ranges.rb +111 -0
  57. data/test/parser/test_all.rb +4 -1
  58. data/test/parser/test_escapes.rb +41 -9
  59. data/test/parser/test_groups.rb +22 -3
  60. data/test/parser/test_posix_classes.rb +27 -0
  61. data/test/parser/test_properties.rb +17 -290
  62. data/test/parser/test_refcalls.rb +66 -26
  63. data/test/parser/test_sets.rb +132 -129
  64. data/test/scanner/test_all.rb +1 -7
  65. data/test/scanner/test_conditionals.rb +16 -16
  66. data/test/scanner/test_errors.rb +0 -30
  67. data/test/scanner/test_escapes.rb +1 -2
  68. data/test/scanner/test_free_space.rb +28 -28
  69. data/test/scanner/test_groups.rb +35 -35
  70. data/test/scanner/test_meta.rb +1 -1
  71. data/test/scanner/test_properties.rb +87 -114
  72. data/test/scanner/test_refcalls.rb +18 -18
  73. data/test/scanner/test_scripts.rb +19 -351
  74. data/test/scanner/test_sets.rb +87 -60
  75. data/test/scanner/test_unicode_blocks.rb +4 -105
  76. data/test/support/warning_extractor.rb +1 -1
  77. data/test/syntax/test_syntax.rb +7 -0
  78. data/test/syntax/versions/test_1.8.rb +2 -4
  79. metadata +17 -7
  80. data/ChangeLog +0 -325
  81. data/test/scanner/test_emojis.rb +0 -31
@@ -4,61 +4,61 @@ class ScannerGroups < Test::Unit::TestCase
4
4
 
5
5
  tests = {
6
6
  # Options
7
- '(?-mix:abc)' => [0, :group, :options, '(?-mix:', 0, 7],
8
- '(?m-ix:abc)' => [0, :group, :options, '(?m-ix:', 0, 7],
9
- '(?mi-x:abc)' => [0, :group, :options, '(?mi-x:', 0, 7],
10
- '(?mix:abc)' => [0, :group, :options, '(?mix:', 0, 6],
11
- '(?mix)' => [0, :group, :options, '(?mix', 0, 5],
12
- '(?m:)' => [0, :group, :options, '(?m:', 0, 4],
13
- '(?i:)' => [0, :group, :options, '(?i:', 0, 4],
14
- '(?x:)' => [0, :group, :options, '(?x:', 0, 4],
7
+ '(?-mix:abc)' => [0, :group, :options, '(?-mix:', 0, 7],
8
+ '(?m-ix:abc)' => [0, :group, :options, '(?m-ix:', 0, 7],
9
+ '(?mi-x:abc)' => [0, :group, :options, '(?mi-x:', 0, 7],
10
+ '(?mix:abc)' => [0, :group, :options, '(?mix:', 0, 6],
11
+ '(?m:)' => [0, :group, :options, '(?m:', 0, 4],
12
+ '(?i:)' => [0, :group, :options, '(?i:', 0, 4],
13
+ '(?x:)' => [0, :group, :options, '(?x:', 0, 4],
14
+ '(?mix)' => [0, :group, :options_switch, '(?mix', 0, 5],
15
15
 
16
16
  # Group types
17
- '(?>abc)' => [0, :group, :atomic, '(?>', 0, 3],
18
- '(abc)' => [0, :group, :capture, '(', 0, 1],
17
+ '(?>abc)' => [0, :group, :atomic, '(?>', 0, 3],
18
+ '(abc)' => [0, :group, :capture, '(', 0, 1],
19
19
 
20
- '(?<name>abc)' => [0, :group, :named_ab, '(?<name>', 0, 8],
21
- "(?'name'abc)" => [0, :group, :named_sq, "(?'name'", 0, 8],
20
+ '(?<name>abc)' => [0, :group, :named_ab, '(?<name>', 0, 8],
21
+ "(?'name'abc)" => [0, :group, :named_sq, "(?'name'", 0, 8],
22
22
 
23
- '(?<name_1>abc)' => [0, :group, :named_ab, '(?<name_1>', 0,10],
24
- "(?'name_1'abc)" => [0, :group, :named_sq, "(?'name_1'", 0,10],
23
+ '(?<name_1>abc)' => [0, :group, :named_ab, '(?<name_1>', 0,10],
24
+ "(?'name_1'abc)" => [0, :group, :named_sq, "(?'name_1'", 0,10],
25
25
 
26
- '(?:abc)' => [0, :group, :passive, '(?:', 0, 3],
27
- '(?:)' => [0, :group, :passive, '(?:', 0, 3],
28
- '(?::)' => [0, :group, :passive, '(?:', 0, 3],
26
+ '(?:abc)' => [0, :group, :passive, '(?:', 0, 3],
27
+ '(?:)' => [0, :group, :passive, '(?:', 0, 3],
28
+ '(?::)' => [0, :group, :passive, '(?:', 0, 3],
29
29
 
30
30
  # Comments
31
- '(?#abc)' => [0, :group, :comment, '(?#abc)', 0, 7],
32
- '(?#)' => [0, :group, :comment, '(?#)', 0, 4],
31
+ '(?#abc)' => [0, :group, :comment, '(?#abc)', 0, 7],
32
+ '(?#)' => [0, :group, :comment, '(?#)', 0, 4],
33
33
 
34
34
  # Assertions
35
- '(?=abc)' => [0, :assertion, :lookahead, '(?=', 0, 3],
36
- '(?!abc)' => [0, :assertion, :nlookahead, '(?!', 0, 3],
37
- '(?<=abc)' => [0, :assertion, :lookbehind, '(?<=', 0, 4],
38
- '(?<!abc)' => [0, :assertion, :nlookbehind, '(?<!', 0, 4],
35
+ '(?=abc)' => [0, :assertion, :lookahead, '(?=', 0, 3],
36
+ '(?!abc)' => [0, :assertion, :nlookahead, '(?!', 0, 3],
37
+ '(?<=abc)' => [0, :assertion, :lookbehind, '(?<=', 0, 4],
38
+ '(?<!abc)' => [0, :assertion, :nlookbehind, '(?<!', 0, 4],
39
39
  }
40
40
 
41
41
  if RUBY_VERSION >= '2.0'
42
42
  tests.merge!({
43
43
  # New options
44
- '(?d-mix:abc)' => [0, :group, :options, '(?d-mix:', 0, 8],
45
- '(?a-mix:abc)' => [0, :group, :options, '(?a-mix:', 0, 8],
46
- '(?u-mix:abc)' => [0, :group, :options, '(?u-mix:', 0, 8],
47
- '(?da-m:abc)' => [0, :group, :options, '(?da-m:', 0, 7],
48
- '(?du-x:abc)' => [0, :group, :options, '(?du-x:', 0, 7],
49
- '(?dau-i:abc)' => [0, :group, :options, '(?dau-i:', 0, 8],
50
- '(?dau:abc)' => [0, :group, :options, '(?dau:', 0, 6],
51
- '(?dau)' => [0, :group, :options, '(?dau', 0, 5],
52
- '(?d:)' => [0, :group, :options, '(?d:', 0, 4],
53
- '(?a:)' => [0, :group, :options, '(?a:', 0, 4],
54
- '(?u:)' => [0, :group, :options, '(?u:', 0, 4],
44
+ '(?d-mix:abc)' => [0, :group, :options, '(?d-mix:', 0, 8],
45
+ '(?a-mix:abc)' => [0, :group, :options, '(?a-mix:', 0, 8],
46
+ '(?u-mix:abc)' => [0, :group, :options, '(?u-mix:', 0, 8],
47
+ '(?da-m:abc)' => [0, :group, :options, '(?da-m:', 0, 7],
48
+ '(?du-x:abc)' => [0, :group, :options, '(?du-x:', 0, 7],
49
+ '(?dau-i:abc)' => [0, :group, :options, '(?dau-i:', 0, 8],
50
+ '(?dau:abc)' => [0, :group, :options, '(?dau:', 0, 6],
51
+ '(?d:)' => [0, :group, :options, '(?d:', 0, 4],
52
+ '(?a:)' => [0, :group, :options, '(?a:', 0, 4],
53
+ '(?u:)' => [0, :group, :options, '(?u:', 0, 4],
54
+ '(?dau)' => [0, :group, :options_switch, '(?dau', 0, 5],
55
55
  })
56
56
  end
57
57
 
58
58
  if RUBY_VERSION >= '2.4.1'
59
59
  tests.merge!({
60
60
  # New absence operator
61
- '(?~abc)' => [0, :group, :absence, '(?~', 0, 3],
61
+ '(?~abc)' => [0, :group, :absence, '(?~', 0, 3],
62
62
  })
63
63
  end
64
64
 
@@ -17,7 +17,7 @@ class ScannerMeta < Test::Unit::TestCase
17
17
  5 => [:meta, :alternation, '|', 6, 7],
18
18
  8 => [:meta, :alternation, '|', 9, 10],
19
19
  11 => [:escape, :alternation, '\|', 12, 14],
20
- 15 => [:set, :member, '|', 17, 18],
20
+ 15 => [:literal, :literal, '|', 17, 18],
21
21
  },
22
22
  }
23
23
 
@@ -4,118 +4,107 @@ class ScannerProperties < Test::Unit::TestCase
4
4
 
5
5
  tests = {
6
6
  'Alnum' => :alnum,
7
- 'Alpha' => :alpha,
8
- 'Ascii' => :ascii,
9
- 'Blank' => :blank,
10
- 'Cntrl' => :cntrl,
11
- 'Digit' => :digit,
12
- 'Graph' => :graph,
13
- 'Lower' => :lower,
14
- 'Print' => :print,
15
- 'Punct' => :punct,
16
- 'Space' => :space,
17
- 'Upper' => :upper,
18
- 'Word' => :word,
19
- 'Xdigit' => :xdigit,
7
+
20
8
  'XPosixPunct' => :xposixpunct,
21
9
 
22
10
  'Newline' => :newline,
23
11
 
24
12
  'Any' => :any,
13
+
25
14
  'Assigned' => :assigned,
26
15
 
27
- 'L' => :letter_any,
28
- 'Letter' => :letter_any,
16
+ 'L' => :letter,
17
+ 'Letter' => :letter,
29
18
 
30
- 'Lu' => :letter_uppercase,
31
- 'Uppercase_Letter' => :letter_uppercase,
19
+ 'Lu' => :uppercase_letter,
20
+ 'Uppercase_Letter' => :uppercase_letter,
32
21
 
33
- 'Ll' => :letter_lowercase,
34
- 'Lowercase_Letter' => :letter_lowercase,
22
+ 'Ll' => :lowercase_letter,
23
+ 'Lowercase_Letter' => :lowercase_letter,
35
24
 
36
- 'Lt' => :letter_titlecase,
37
- 'Titlecase_Letter' => :letter_titlecase,
25
+ 'Lt' => :titlecase_letter,
26
+ 'Titlecase_Letter' => :titlecase_letter,
38
27
 
39
- 'Lm' => :letter_modifier,
40
- 'Modifier_Letter' => :letter_modifier,
28
+ 'Lm' => :modifier_letter,
29
+ 'Modifier_Letter' => :modifier_letter,
41
30
 
42
- 'Lo' => :letter_other,
43
- 'Other_Letter' => :letter_other,
31
+ 'Lo' => :other_letter,
32
+ 'Other_Letter' => :other_letter,
44
33
 
45
- 'M' => :mark_any,
46
- 'Mark' => :mark_any,
34
+ 'M' => :mark,
35
+ 'Mark' => :mark,
47
36
 
48
- 'Mn' => :mark_nonspacing,
49
- 'Nonspacing_Mark' => :mark_nonspacing,
37
+ 'Mn' => :nonspacing_mark,
38
+ 'Nonspacing_Mark' => :nonspacing_mark,
50
39
 
51
- 'Mc' => :mark_spacing,
52
- 'Spacing_Mark' => :mark_spacing,
40
+ 'Mc' => :spacing_mark,
41
+ 'Spacing_Mark' => :spacing_mark,
53
42
 
54
- 'Me' => :mark_enclosing,
55
- 'Enclosing_Mark' => :mark_enclosing,
43
+ 'Me' => :enclosing_mark,
44
+ 'Enclosing_Mark' => :enclosing_mark,
56
45
 
57
- 'N' => :number_any,
58
- 'Number' => :number_any,
46
+ 'N' => :number,
47
+ 'Number' => :number,
59
48
 
60
- 'Nd' => :number_decimal,
61
- 'Decimal_Number' => :number_decimal,
49
+ 'Nd' => :decimal_number,
50
+ 'Decimal_Number' => :decimal_number,
62
51
 
63
- 'Nl' => :number_letter,
64
- 'Letter_Number' => :number_letter,
52
+ 'Nl' => :letter_number,
53
+ 'Letter_Number' => :letter_number,
65
54
 
66
- 'No' => :number_other,
67
- 'Other_Number' => :number_other,
55
+ 'No' => :other_number,
56
+ 'Other_Number' => :other_number,
68
57
 
69
- 'P' => :punct_any,
70
- 'Punctuation' => :punct_any,
58
+ 'P' => :punctuation,
59
+ 'Punctuation' => :punctuation,
71
60
 
72
- 'Pc' => :punct_connector,
73
- 'Connector_Punctuation' => :punct_connector,
61
+ 'Pc' => :connector_punctuation,
62
+ 'Connector_Punctuation' => :connector_punctuation,
74
63
 
75
- 'Pd' => :punct_dash,
76
- 'Dash_Punctuation' => :punct_dash,
64
+ 'Pd' => :dash_punctuation,
65
+ 'Dash_Punctuation' => :dash_punctuation,
77
66
 
78
- 'Ps' => :punct_open,
79
- 'Open_Punctuation' => :punct_open,
67
+ 'Ps' => :open_punctuation,
68
+ 'Open_Punctuation' => :open_punctuation,
80
69
 
81
- 'Pe' => :punct_close,
82
- 'Close_Punctuation' => :punct_close,
70
+ 'Pe' => :close_punctuation,
71
+ 'Close_Punctuation' => :close_punctuation,
83
72
 
84
- 'Pi' => :punct_initial,
85
- 'Initial_Punctuation' => :punct_initial,
73
+ 'Pi' => :initial_punctuation,
74
+ 'Initial_Punctuation' => :initial_punctuation,
86
75
 
87
- 'Pf' => :punct_final,
88
- 'Final_Punctuation' => :punct_final,
76
+ 'Pf' => :final_punctuation,
77
+ 'Final_Punctuation' => :final_punctuation,
89
78
 
90
- 'Po' => :punct_other,
91
- 'Other_Punctuation' => :punct_other,
79
+ 'Po' => :other_punctuation,
80
+ 'Other_Punctuation' => :other_punctuation,
92
81
 
93
- 'S' => :symbol_any,
94
- 'Symbol' => :symbol_any,
82
+ 'S' => :symbol,
83
+ 'Symbol' => :symbol,
95
84
 
96
- 'Sm' => :symbol_math,
97
- 'Math_Symbol' => :symbol_math,
85
+ 'Sm' => :math_symbol,
86
+ 'Math_Symbol' => :math_symbol,
98
87
 
99
- 'Sc' => :symbol_currency,
100
- 'Currency_Symbol' => :symbol_currency,
88
+ 'Sc' => :currency_symbol,
89
+ 'Currency_Symbol' => :currency_symbol,
101
90
 
102
- 'Sk' => :symbol_modifier,
103
- 'Modifier_Symbol' => :symbol_modifier,
91
+ 'Sk' => :modifier_symbol,
92
+ 'Modifier_Symbol' => :modifier_symbol,
104
93
 
105
- 'So' => :symbol_other,
106
- 'Other_Symbol' => :symbol_other,
94
+ 'So' => :other_symbol,
95
+ 'Other_Symbol' => :other_symbol,
107
96
 
108
- 'Z' => :separator_any,
109
- 'Separator' => :separator_any,
97
+ 'Z' => :separator,
98
+ 'Separator' => :separator,
110
99
 
111
- 'Zs' => :separator_space,
112
- 'Space_Separator' => :separator_space,
100
+ 'Zs' => :space_separator,
101
+ 'Space_Separator' => :space_separator,
113
102
 
114
- 'Zl' => :separator_line,
115
- 'Line_Separator' => :separator_line,
103
+ 'Zl' => :line_separator,
104
+ 'Line_Separator' => :line_separator,
116
105
 
117
- 'Zp' => :separator_para,
118
- 'Paragraph_Separator' => :separator_para,
106
+ 'Zp' => :paragraph_separator,
107
+ 'Paragraph_Separator' => :paragraph_separator,
119
108
 
120
109
  'C' => :other,
121
110
  'Other' => :other,
@@ -135,28 +124,12 @@ class ScannerProperties < Test::Unit::TestCase
135
124
  'Cn' => :unassigned,
136
125
  'Unassigned' => :unassigned,
137
126
 
138
- 'Age=1.1' => :age_1_1,
139
- 'Age=2.0' => :age_2_0,
140
- 'Age=2.1' => :age_2_1,
141
- 'Age=3.0' => :age_3_0,
142
- 'Age=3.1' => :age_3_1,
143
- 'Age=3.2' => :age_3_2,
144
- 'Age=4.0' => :age_4_0,
145
- 'Age=4.1' => :age_4_1,
146
- 'Age=5.0' => :age_5_0,
147
- 'Age=5.1' => :age_5_1,
148
- 'Age=5.2' => :age_5_2,
149
- 'Age=6.0' => :age_6_0,
150
- 'Age=6.1' => :age_6_1,
151
- 'Age=6.2' => :age_6_2,
152
- 'Age=6.3' => :age_6_3,
153
- 'Age=7.0' => :age_7_0,
154
- 'Age=8.0' => :age_8_0,
155
- 'Age=9.0' => :age_9_0,
156
- 'Age=10.0' => :age_10_0,
157
-
158
- 'ahex' => :ascii_hex,
159
- 'ASCII_Hex_Digit' => :ascii_hex,
127
+ 'Age=1.1' => :'age=1.1',
128
+ 'Age=6.0' => :'age=6.0',
129
+ 'Age=10.0' => :'age=10.0',
130
+
131
+ 'ahex' => :ascii_hex_digit,
132
+ 'ASCII_Hex_Digit' => :ascii_hex_digit,
160
133
 
161
134
  'Alphabetic' => :alphabetic,
162
135
 
@@ -188,8 +161,8 @@ class ScannerProperties < Test::Unit::TestCase
188
161
  'dep' => :deprecated,
189
162
  'Deprecated' => :deprecated,
190
163
 
191
- 'di' => :default_ignorable_cp,
192
- 'Default_Ignorable_Code_Point' => :default_ignorable_cp,
164
+ 'di' => :default_ignorable_code_point,
165
+ 'Default_Ignorable_Code_Point' => :default_ignorable_code_point,
193
166
 
194
167
  'dia' => :diacritic,
195
168
  'Diacritic' => :diacritic,
@@ -220,11 +193,11 @@ class ScannerProperties < Test::Unit::TestCase
220
193
  'ids' => :id_start,
221
194
  'ID_Start' => :id_start,
222
195
 
223
- 'idsb' => :ids_binary_op,
224
- 'IDS_Binary_Operator' => :ids_binary_op,
196
+ 'idsb' => :ids_binary_operator,
197
+ 'IDS_Binary_Operator' => :ids_binary_operator,
225
198
 
226
- 'idst' => :ids_trinary_op,
227
- 'IDS_Trinary_Operator' => :ids_trinary_op,
199
+ 'idst' => :ids_trinary_operator,
200
+ 'IDS_Trinary_Operator' => :ids_trinary_operator,
228
201
 
229
202
  'joinc' => :join_control,
230
203
  'Join_Control' => :join_control,
@@ -236,17 +209,17 @@ class ScannerProperties < Test::Unit::TestCase
236
209
 
237
210
  'Math' => :math,
238
211
 
239
- 'nchar' => :non_character_cp,
240
- 'Noncharacter_Code_Point' => :non_character_cp,
212
+ 'nchar' => :noncharacter_code_point,
213
+ 'Noncharacter_Code_Point' => :noncharacter_code_point,
241
214
 
242
215
  'oalpha' => :other_alphabetic,
243
216
  'Other_Alphabetic' => :other_alphabetic,
244
217
 
245
- 'odi' => :other_default_ignorable_cp,
246
- 'Other_Default_Ignorable_Code_Point' => :other_default_ignorable_cp,
218
+ 'odi' => :other_default_ignorable_code_point,
219
+ 'Other_Default_Ignorable_Code_Point' => :other_default_ignorable_code_point,
247
220
 
248
- 'ogrext' => :other_grapheme_extended,
249
- 'Other_Grapheme_Extend' => :other_grapheme_extended,
221
+ 'ogrext' => :other_grapheme_extend,
222
+ 'Other_Grapheme_Extend' => :other_grapheme_extend,
250
223
 
251
224
  'oidc' => :other_id_continue,
252
225
  'Other_ID_Continue' => :other_id_continue,
@@ -266,8 +239,8 @@ class ScannerProperties < Test::Unit::TestCase
266
239
  'patsyn' => :pattern_syntax,
267
240
  'Pattern_Syntax' => :pattern_syntax,
268
241
 
269
- 'patws' => :pattern_whitespace,
270
- 'Pattern_Whitespace' => :pattern_whitespace,
242
+ 'patws' => :pattern_white_space,
243
+ 'Pattern_Whitespace' => :pattern_white_space,
271
244
 
272
245
  'qmark' => :quotation_mark,
273
246
  'quotationmark' => :quotation_mark,
@@ -278,7 +251,7 @@ class ScannerProperties < Test::Unit::TestCase
278
251
  'Regional_Indicator' => :regional_indicator,
279
252
 
280
253
  'sd' => :soft_dotted,
281
- 'Soft_Dotted' => :soft_dotted,
254
+ 'Soft-Dotted' => :soft_dotted, # test dash spelling
282
255
 
283
256
  'sterm' => :sentence_terminal,
284
257
 
@@ -293,8 +266,8 @@ class ScannerProperties < Test::Unit::TestCase
293
266
  'vs' => :variation_selector,
294
267
  'Variation_Selector' => :variation_selector,
295
268
 
296
- 'wspace' => :whitespace,
297
- 'whitespace' => :whitespace,
269
+ 'wspace' => :white_space,
270
+ 'whitespace' => :white_space,
298
271
 
299
272
  'xids' => :xid_start,
300
273
  'XID_Start' => :xid_start,
@@ -4,34 +4,34 @@ class ScannerRefCalls < Test::Unit::TestCase
4
4
 
5
5
  tests = {
6
6
  # Traditional numerical group back-reference
7
- '(abc)\1' => [3, :backref, :number, '\1', 5, 7],
7
+ '(abc)\1' => [3, :backref, :number, '\1', 5, 7],
8
8
 
9
9
  # Group back-references, named, numbered, and relative
10
- '(?<X>abc)\k<X>' => [3, :backref, :name_ref_ab, '\k<X>', 9, 14],
11
- "(?<X>abc)\\k'X'" => [3, :backref, :name_ref_sq, "\\k'X'", 9, 14],
10
+ '(?<X>abc)\k<X>' => [3, :backref, :name_ref_ab, '\k<X>', 9, 14],
11
+ "(?<X>abc)\\k'X'" => [3, :backref, :name_ref_sq, "\\k'X'", 9, 14],
12
12
 
13
- '(abc)\k<1>' => [3, :backref, :number_ref_ab, '\k<1>', 5, 10],
14
- "(abc)\\k'1'" => [3, :backref, :number_ref_sq, "\\k'1'", 5, 10],
13
+ '(abc)\k<1>' => [3, :backref, :number_ref_ab, '\k<1>', 5, 10],
14
+ "(abc)\\k'1'" => [3, :backref, :number_ref_sq, "\\k'1'", 5, 10],
15
15
 
16
- '(abc)\k<-1>' => [3, :backref, :number_rel_ref_ab, '\k<-1>', 5, 11],
17
- "(abc)\\k'-1'" => [3, :backref, :number_rel_ref_sq, "\\k'-1'", 5, 11],
16
+ '(abc)\k<-1>' => [3, :backref, :number_rel_ref_ab, '\k<-1>', 5, 11],
17
+ "(abc)\\k'-1'" => [3, :backref, :number_rel_ref_sq, "\\k'-1'", 5, 11],
18
18
 
19
19
  # Sub-expression invocation, named, numbered, and relative
20
- '(?<X>abc)\g<X>' => [3, :backref, :name_call_ab, '\g<X>', 9, 14],
21
- "(?<X>abc)\\g'X'" => [3, :backref, :name_call_sq, "\\g'X'", 9, 14],
20
+ '(?<X>abc)\g<X>' => [3, :backref, :name_call_ab, '\g<X>', 9, 14],
21
+ "(?<X>abc)\\g'X'" => [3, :backref, :name_call_sq, "\\g'X'", 9, 14],
22
22
 
23
- '(abc)\g<1>' => [3, :backref, :number_call_ab, '\g<1>', 5, 10],
24
- "(abc)\\g'1'" => [3, :backref, :number_call_sq, "\\g'1'", 5, 10],
23
+ '(abc)\g<1>' => [3, :backref, :number_call_ab, '\g<1>', 5, 10],
24
+ "(abc)\\g'1'" => [3, :backref, :number_call_sq, "\\g'1'", 5, 10],
25
25
 
26
- '(abc)\g<-1>' => [3, :backref, :number_rel_call_ab, '\g<-1>', 5, 11],
27
- "(abc)\\g'-1'" => [3, :backref, :number_rel_call_sq, "\\g'-1'", 5, 11],
26
+ '(abc)\g<-1>' => [3, :backref, :number_rel_call_ab, '\g<-1>', 5, 11],
27
+ "(abc)\\g'-1'" => [3, :backref, :number_rel_call_sq, "\\g'-1'", 5, 11],
28
28
 
29
- # Group back-references, with nesting level
30
- '(?<X>abc)\k<X-0>' => [3, :backref, :name_nest_ref_ab, '\k<X-0>', 9, 16],
31
- "(?<X>abc)\\k'X-0'" => [3, :backref, :name_nest_ref_sq, "\\k'X-0'", 9, 16],
29
+ # Group back-references, with recursion level
30
+ '(?<X>abc)\k<X-0>' => [3, :backref, :name_recursion_ref_ab, '\k<X-0>', 9, 16],
31
+ "(?<X>abc)\\k'X-0'" => [3, :backref, :name_recursion_ref_sq, "\\k'X-0'", 9, 16],
32
32
 
33
- '(abc)\k<1-0>' => [3, :backref, :number_nest_ref_ab, '\k<1-0>', 5, 12],
34
- "(abc)\\k'1-0'" => [3, :backref, :number_nest_ref_sq, "\\k'1-0'", 5, 12],
33
+ '(abc)\k<1-0>' => [3, :backref, :number_recursion_ref_ab, '\k<1-0>', 5, 12],
34
+ "(abc)\\k'1-0'" => [3, :backref, :number_recursion_ref_sq, "\\k'1-0'", 5, 12],
35
35
  }
36
36
 
37
37
  tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|