regexp_parser 0.1.1 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/ChangeLog +45 -0
  3. data/Rakefile +12 -44
  4. data/VERSION.yml +5 -0
  5. data/lib/regexp_parser.rb +5 -38
  6. data/lib/regexp_parser/expression.rb +68 -221
  7. data/lib/regexp_parser/expression/classes/alternation.rb +47 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +26 -0
  9. data/lib/regexp_parser/expression/classes/backref.rb +42 -0
  10. data/lib/regexp_parser/expression/classes/escape.rb +27 -0
  11. data/lib/regexp_parser/expression/classes/group.rb +67 -0
  12. data/lib/regexp_parser/expression/classes/literal.rb +7 -0
  13. data/lib/regexp_parser/expression/{property.rb → classes/property.rb} +1 -1
  14. data/lib/regexp_parser/expression/classes/root.rb +26 -0
  15. data/lib/regexp_parser/expression/classes/set.rb +100 -0
  16. data/lib/regexp_parser/expression/classes/type.rb +17 -0
  17. data/lib/regexp_parser/expression/quantifier.rb +26 -0
  18. data/lib/regexp_parser/expression/subexpression.rb +69 -0
  19. data/lib/regexp_parser/lexer.rb +4 -4
  20. data/lib/regexp_parser/parser.rb +31 -13
  21. data/lib/regexp_parser/scanner.rb +1849 -1488
  22. data/lib/regexp_parser/scanner/property.rl +7 -2
  23. data/lib/regexp_parser/scanner/scanner.rl +377 -191
  24. data/lib/regexp_parser/syntax.rb +7 -0
  25. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +4 -4
  26. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +9 -9
  27. data/lib/regexp_parser/syntax/ruby/2.0.0.rb +16 -0
  28. data/lib/regexp_parser/syntax/ruby/2.1.0.rb +13 -0
  29. data/lib/regexp_parser/syntax/tokens.rb +21 -320
  30. data/lib/regexp_parser/syntax/tokens/anchor.rb +17 -0
  31. data/lib/regexp_parser/syntax/tokens/assertion.rb +15 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +26 -0
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +48 -0
  34. data/lib/regexp_parser/syntax/tokens/character_type.rb +16 -0
  35. data/lib/regexp_parser/syntax/tokens/escape.rb +29 -0
  36. data/lib/regexp_parser/syntax/tokens/group.rb +22 -0
  37. data/lib/regexp_parser/syntax/tokens/meta.rb +15 -0
  38. data/lib/regexp_parser/syntax/tokens/quantifier.rb +37 -0
  39. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +204 -0
  40. data/lib/regexp_parser/token.rb +37 -0
  41. data/test/expression/test_all.rb +7 -0
  42. data/test/expression/test_base.rb +72 -0
  43. data/test/expression/test_clone.rb +144 -0
  44. data/test/{parser/test_expression.rb → expression/test_to_s.rb} +10 -10
  45. data/test/helpers.rb +1 -0
  46. data/test/parser/test_all.rb +1 -1
  47. data/test/parser/test_alternation.rb +35 -0
  48. data/test/parser/test_anchors.rb +2 -2
  49. data/test/parser/test_refcalls.rb +1 -1
  50. data/test/parser/test_sets.rb +54 -8
  51. data/test/scanner/test_anchors.rb +2 -2
  52. data/test/scanner/test_conditionals.rb +31 -0
  53. data/test/scanner/test_errors.rb +88 -8
  54. data/test/scanner/test_escapes.rb +4 -4
  55. data/test/scanner/test_groups.rb +7 -0
  56. data/test/scanner/test_quoting.rb +29 -0
  57. data/test/scanner/test_sets.rb +1 -0
  58. data/test/syntax/ruby/test_1.8.rb +3 -3
  59. data/test/test_all.rb +1 -1
  60. metadata +62 -48
  61. data/lib/regexp_parser/expression/set.rb +0 -59
@@ -58,7 +58,7 @@
58
58
  unicode_property := |*
59
59
 
60
60
  property_sequence < eof(premature_property_end) {
61
- text = data[ts-1..te-1].pack('c*')
61
+ text = text(data, ts, te, 1).first
62
62
  if in_set
63
63
  type = :set
64
64
  else
@@ -525,9 +525,14 @@
525
525
  self.emit(type, :script_unknown, text, ts-1, te)
526
526
 
527
527
  else
528
- raise UnknownUnicodePropertyError.new(name)
528
+ # Should this really be an error? Or would emitting
529
+ # an :unknown for the property be better?
530
+ #
531
+ # self.emit(type, :unknown, text, ts-1, te)
529
532
 
533
+ raise UnknownUnicodePropertyError.new(name)
530
534
  end
535
+
531
536
  fret;
532
537
  };
533
538
  *|;
@@ -28,6 +28,7 @@
28
28
 
29
29
  class_posix = ('[:' . '^'? . class_name_posix . ':]');
30
30
 
31
+
31
32
  # these are not supported in ruby, and need verification
32
33
  collating_sequence = '[.' . (alpha | [\-])+ . '.]';
33
34
  character_equivalent = '[=' . alpha . '=]';
@@ -41,14 +42,21 @@
41
42
  octal_sequence = [0-7]{1,3};
42
43
 
43
44
  hex_sequence = 'x' . xdigit{1,2};
45
+ hex_sequence_err = 'x' . [^0-9a-fA-F{];
44
46
  wide_hex_sequence = 'x' . '{' . xdigit{1,8} . '}';
45
47
 
48
+ hex_or_not = (xdigit|[^0-9a-fA-F}]); # note closing curly at end
49
+
50
+ wide_hex_seq_invalid = 'x' . '{' . hex_or_not{1,9};
51
+ wide_hex_seq_empty = 'x' . '{' . (space+)? . '}';
52
+
46
53
  codepoint_single = 'u' . xdigit{4};
47
54
  codepoint_list = 'u{' . (xdigit{4} . space?)+'}';
48
55
  codepoint_sequence = codepoint_single | codepoint_list;
49
56
 
50
- control_sequence = ('c' | 'C-') . alpha;
51
- meta_sequence = 'M-' . ((backslash . control_sequence) | alpha);
57
+ control_sequence = ('c' | 'C-');
58
+
59
+ meta_sequence = 'M-' . (backslash . control_sequence)?;
52
60
 
53
61
  zero_or_one = '?' | '??' | '?+';
54
62
  zero_or_more = '*' | '*?' | '*+';
@@ -59,11 +67,11 @@
59
67
  quantifier_possessive = '?+' | '*+' | '++';
60
68
  quantifier_mode = '?' | '+';
61
69
 
62
- quantifier_range = range_open . (digit+)? . ','? . (digit+)? .
70
+ quantifier_interval = range_open . (digit+)? . ','? . (digit+)? .
63
71
  range_close . quantifier_mode?;
64
72
 
65
73
  quantifiers = quantifier_greedy | quantifier_reluctant |
66
- quantifier_possessive | quantifier_range;
74
+ quantifier_possessive | quantifier_interval;
67
75
 
68
76
 
69
77
  group_comment = '?#' . [^)]+ . group_close;
@@ -76,10 +84,10 @@
76
84
  assertion_lookbehind = '?<=';
77
85
  assertion_nlookbehind = '?<!';
78
86
 
79
- group_options = '?' . ([mix]{1,3})? . '-'? . ([mix]{1,3})?;
87
+ group_options = '?' . [\-mix];
80
88
 
81
89
  group_ref = [gk];
82
- group_name = alpha . (alnum+)?;
90
+ group_name = (alnum . (alnum+)?)?;
83
91
  group_number = '-'? . [1-9] . ([0-9]+)?;
84
92
  group_level = [+\-] . [0-9]+;
85
93
 
@@ -113,7 +121,16 @@
113
121
  group_ref | [xucCM];
114
122
 
115
123
  # EOF error, used where it can be detected
116
- action premature_end_error { raise PrematureEndError }
124
+ action premature_end_error {
125
+ text = ts ? copy(data, ts-1..-1) : data.pack('c*')
126
+ raise PrematureEndError.new( text )
127
+ }
128
+
129
+ # Invalid sequence error, used from sequences, like escapes and sets
130
+ action invalid_sequence_error {
131
+ text = ts ? copy(data, ts-1..-1) : data.pack('c*')
132
+ raise InvalidSequenceError.new('sequence', text)
133
+ }
117
134
 
118
135
  # group (nesting) and set open/close actions
119
136
  action group_opened { group_depth += 1; in_group = true }
@@ -127,7 +144,7 @@
127
144
  set_type = set_depth > 1 ? :subset : :set
128
145
  set_depth -= 1; in_set = set_depth > 0 ? true : false
129
146
 
130
- self.emit(set_type, :close, data[ts..te-1].pack('c*'), ts, te)
147
+ emit(set_type, :close, *text(data, ts, te))
131
148
 
132
149
  if set_depth == 0
133
150
  fgoto main;
@@ -140,8 +157,8 @@
140
157
  set_type = set_depth > 1 ? :subset : :set
141
158
  set_depth -= 1; in_set = set_depth > 0 ? true : false
142
159
 
143
- self.emit(set_type, :member, data[ts..te-2].pack('c*'), ts, te)
144
- self.emit(set_type, :close, data[ts+1..te-1].pack('c*'), ts, te)
160
+ emit(set_type, :member, copy(data, ts..te-2), ts, te)
161
+ emit(set_type, :close, copy(data, ts+1..te-1), ts, te)
145
162
 
146
163
  if set_depth == 0
147
164
  fgoto main;
@@ -151,20 +168,20 @@
151
168
  };
152
169
 
153
170
  '^' {
154
- text = data[ts..te-1].pack('c*')
171
+ text = text(data, ts, te).first
155
172
  if @tokens.last[1] == :open
156
- self.emit(set_type, :negate, text, ts, te)
173
+ emit(set_type, :negate, text, ts, te)
157
174
  else
158
- self.emit(set_type, :member, text, ts, te)
175
+ emit(set_type, :member, text, ts, te)
159
176
  end
160
177
  };
161
178
 
162
179
  alnum . '-' . alnum {
163
- self.emit(set_type, :range, data[ts..te-1].pack('c*'), ts, te)
180
+ emit(set_type, :range, *text(data, ts, te))
164
181
  };
165
182
 
166
183
  '&&' {
167
- self.emit(set_type, :intersection, data[ts..te-1].pack('c*'), ts, te)
184
+ emit(set_type, :intersection, *text(data, ts, te))
168
185
  };
169
186
 
170
187
  '\\' {
@@ -175,12 +192,12 @@
175
192
  set_depth += 1; in_set = true
176
193
  set_type = set_depth > 1 ? :subset : :set
177
194
 
178
- self.emit(set_type, :open, data[ts..te-1].pack('c*'), ts, te)
195
+ emit(set_type, :open, *text(data, ts, te))
179
196
  fcall character_set;
180
197
  };
181
198
 
182
199
  class_posix >(open_bracket, 1) @eof(premature_end_error) {
183
- text = data[ts..te-1].pack('c*')
200
+ text = text(data, ts, te).first
184
201
 
185
202
  class_name = text[2..-3]
186
203
  if class_name[0].chr == '^'
@@ -188,21 +205,21 @@
188
205
  end
189
206
 
190
207
  token_sym = "class_#{class_name}".to_sym
191
- self.emit(set_type, token_sym, text, ts, te)
208
+ emit(set_type, token_sym, text, ts, te)
192
209
  };
193
210
 
194
211
  collating_sequence >(open_bracket, 1) @eof(premature_end_error) {
195
- self.emit(set_type, :collation, data[ts..te-1].pack('c*'), ts, te)
212
+ emit(set_type, :collation, *text(data, ts, te))
196
213
  };
197
214
 
198
215
  character_equivalent >(open_bracket, 1) @eof(premature_end_error) {
199
- self.emit(set_type, :equivalent, data[ts..te-1].pack('c*'), ts, te)
216
+ emit(set_type, :equivalent, *text(data, ts, te))
200
217
  };
201
218
 
202
219
  # exclude the closing bracket as a cleaner workaround for dealing with the
203
220
  # ambiguity caused upon exit from the unicode properties machine
204
221
  meta_char -- ']' {
205
- self.emit(set_type, :member, data[ts..te-1].pack('c*'), ts, te)
222
+ emit(set_type, :member, *text(data, ts, te))
206
223
  };
207
224
 
208
225
  any |
@@ -210,48 +227,48 @@
210
227
  utf8_2_byte |
211
228
  utf8_3_byte |
212
229
  utf8_4_byte {
213
- self.emit(set_type, :member, data[ts..te-1].pack('c*'), ts, te)
230
+ emit(set_type, :member, *text(data, ts, te))
214
231
  };
215
232
  *|;
216
233
 
217
234
  # set escapes scanner
218
235
  # --------------------------------------------------------------------------
219
236
  set_escape_sequence := |*
220
- 'b' {
221
- self.emit(set_type, :backspace, data[ts-1..te-1].pack('c*'), ts-1, te)
237
+ 'b' > (escaped_set_alpha, 2) {
238
+ emit(set_type, :backspace, *text(data, ts, te, 1))
222
239
  fret;
223
240
  };
224
241
 
225
242
  char_type {
226
- case text = data[ts-1..te-1].pack('c*')
227
- when '\d'; self.emit(set_type, :type_digit, text, ts-1, te)
228
- when '\D'; self.emit(set_type, :type_nondigit, text, ts-1, te)
229
- when '\h'; self.emit(set_type, :type_hex, text, ts-1, te)
230
- when '\H'; self.emit(set_type, :type_nonhex, text, ts-1, te)
231
- when '\s'; self.emit(set_type, :type_space, text, ts-1, te)
232
- when '\S'; self.emit(set_type, :type_nonspace, text, ts-1, te)
233
- when '\w'; self.emit(set_type, :type_word, text, ts-1, te)
234
- when '\W'; self.emit(set_type, :type_nonword, text, ts-1, te)
243
+ case text = text(data, ts, te, 1).first
244
+ when '\d'; emit(set_type, :type_digit, text, ts-1, te)
245
+ when '\D'; emit(set_type, :type_nondigit, text, ts-1, te)
246
+ when '\h'; emit(set_type, :type_hex, text, ts-1, te)
247
+ when '\H'; emit(set_type, :type_nonhex, text, ts-1, te)
248
+ when '\s'; emit(set_type, :type_space, text, ts-1, te)
249
+ when '\S'; emit(set_type, :type_nonspace, text, ts-1, te)
250
+ when '\w'; emit(set_type, :type_word, text, ts-1, te)
251
+ when '\W'; emit(set_type, :type_nonword, text, ts-1, te)
235
252
  end
236
253
  fret;
237
254
  };
238
255
 
239
256
  hex_sequence . '-\\' . hex_sequence {
240
- self.emit(set_type, :range_hex, data[ts-1..te-1].pack('c*'), ts-1, te)
257
+ emit(set_type, :range_hex, *text(data, ts, te, 1))
241
258
  fret;
242
259
  };
243
260
 
244
261
  hex_sequence {
245
- self.emit(set_type, :member_hex, data[ts-1..te-1].pack('c*'), ts-1, te)
262
+ emit(set_type, :member_hex, *text(data, ts, te, 1))
246
263
  fret;
247
264
  };
248
265
 
249
266
  meta_char | [\\\]\-\,] {
250
- self.emit(set_type, :escape, data[ts-1..te-1].pack('c*'), ts-1, te)
267
+ emit(set_type, :escape, *text(data, ts, te, 1))
251
268
  fret;
252
269
  };
253
270
 
254
- property_char > (escaped_set_alpha, 2) {
271
+ property_char > (escaped_set_alpha, 3) {
255
272
  fhold;
256
273
  fnext character_set;
257
274
  fcall unicode_property;
@@ -264,7 +281,7 @@
264
281
  utf8_2_byte |
265
282
  utf8_3_byte |
266
283
  utf8_4_byte {
267
- self.emit(set_type, :escape, data[ts-1..te-1].pack('c*'), ts-1, te)
284
+ emit(set_type, :escape, *text(data, ts, te, 1))
268
285
  fret;
269
286
  };
270
287
  *|;
@@ -274,33 +291,33 @@
274
291
  # --------------------------------------------------------------------------
275
292
  escape_sequence := |*
276
293
  [1-9] {
277
- text = data[ts-1..te-1].pack('c*')
278
- self.emit(:backref, :number, text, ts-1, te)
294
+ text = text(data, ts, te, 1).first
295
+ emit(:backref, :number, text, ts-1, te)
279
296
  fret;
280
297
  };
281
298
 
282
299
  octal_sequence {
283
- self.emit(:escape, :octal, data[ts-1..te-1].pack('c*'), ts-1, te)
300
+ emit(:escape, :octal, *text(data, ts, te, 1))
284
301
  fret;
285
302
  };
286
303
 
287
304
  meta_char {
288
- case text = data[ts-1..te-1].pack('c*')
289
- when '\.'; self.emit(:escape, :dot, text, ts-1, te)
290
- when '\|'; self.emit(:escape, :alternation, text, ts-1, te)
291
- when '\^'; self.emit(:escape, :beginning_of_line, text, ts-1, te)
292
- when '\$'; self.emit(:escape, :end_of_line, text, ts-1, te)
293
- when '\?'; self.emit(:escape, :zero_or_one, text, ts-1, te)
294
- when '\*'; self.emit(:escape, :zero_or_more, text, ts-1, te)
295
- when '\+'; self.emit(:escape, :one_or_more, text, ts-1, te)
296
- when '\('; self.emit(:escape, :group_open, text, ts-1, te)
297
- when '\)'; self.emit(:escape, :group_close, text, ts-1, te)
298
- when '\{'; self.emit(:escape, :interval_open, text, ts-1, te)
299
- when '\}'; self.emit(:escape, :interval_close, text, ts-1, te)
300
- when '\['; self.emit(:escape, :set_open, text, ts-1, te)
301
- when '\]'; self.emit(:escape, :set_close, text, ts-1, te)
305
+ case text = text(data, ts, te, 1).first
306
+ when '\.'; emit(:escape, :dot, text, ts-1, te)
307
+ when '\|'; emit(:escape, :alternation, text, ts-1, te)
308
+ when '\^'; emit(:escape, :bol, text, ts-1, te)
309
+ when '\$'; emit(:escape, :eol, text, ts-1, te)
310
+ when '\?'; emit(:escape, :zero_or_one, text, ts-1, te)
311
+ when '\*'; emit(:escape, :zero_or_more, text, ts-1, te)
312
+ when '\+'; emit(:escape, :one_or_more, text, ts-1, te)
313
+ when '\('; emit(:escape, :group_open, text, ts-1, te)
314
+ when '\)'; emit(:escape, :group_close, text, ts-1, te)
315
+ when '\{'; emit(:escape, :interval_open, text, ts-1, te)
316
+ when '\}'; emit(:escape, :interval_close, text, ts-1, te)
317
+ when '\['; emit(:escape, :set_open, text, ts-1, te)
318
+ when '\]'; emit(:escape, :set_close, text, ts-1, te)
302
319
  when "\\\\";
303
- self.emit(:escape, :backslash, text, ts-1, te)
320
+ emit(:escape, :backslash, text, ts-1, te)
304
321
  end
305
322
  fret;
306
323
  };
@@ -308,46 +325,76 @@
308
325
  escaped_ascii > (escaped_alpha, 7) {
309
326
  # \b is emitted as backspace only when inside a character set, otherwise
310
327
  # it is a word boundary anchor. A syntax might "normalize" it if needed.
311
- case text = data[ts-1..te-1].pack('c*')
312
- when '\a'; self.emit(:escape, :bell, text, ts-1, te)
313
- when '\e'; self.emit(:escape, :escape, text, ts-1, te)
314
- when '\f'; self.emit(:escape, :form_feed, text, ts-1, te)
315
- when '\n'; self.emit(:escape, :newline, text, ts-1, te)
316
- when '\r'; self.emit(:escape, :carriage, text, ts-1, te)
317
- when '\s'; self.emit(:escape, :space, text, ts-1, te)
318
- when '\t'; self.emit(:escape, :tab, text, ts-1, te)
319
- when '\v'; self.emit(:escape, :vertical_tab, text, ts-1, te)
328
+ case text = text(data, ts, te, 1).first
329
+ when '\a'; emit(:escape, :bell, text, ts-1, te)
330
+ when '\e'; emit(:escape, :escape, text, ts-1, te)
331
+ when '\f'; emit(:escape, :form_feed, text, ts-1, te)
332
+ when '\n'; emit(:escape, :newline, text, ts-1, te)
333
+ when '\r'; emit(:escape, :carriage, text, ts-1, te)
334
+ when '\s'; emit(:escape, :space, text, ts-1, te)
335
+ when '\t'; emit(:escape, :tab, text, ts-1, te)
336
+ when '\v'; emit(:escape, :vertical_tab, text, ts-1, te)
320
337
  end
321
338
  fret;
322
339
  };
323
340
 
324
- codepoint_sequence > (escaped_alpha, 6) {
325
- text = data[ts-1..te-1].pack('c*')
341
+ codepoint_sequence > (escaped_alpha, 6) $eof(premature_end_error) {
342
+ text = text(data, ts, te, 1).first
326
343
  if text[2].chr == '{'
327
- self.emit(:escape, :codepoint_list, text, ts-1, te)
344
+ emit(:escape, :codepoint_list, text, ts-1, te)
328
345
  else
329
- self.emit(:escape, :codepoint, text, ts-1, te)
346
+ emit(:escape, :codepoint, text, ts-1, te)
330
347
  end
331
348
  fret;
332
349
  };
333
350
 
334
- hex_sequence > (escaped_alpha, 5) {
335
- self.emit(:escape, :hex, data[ts-1..te-1].pack('c*'), ts-1, te)
351
+ hex_sequence > (escaped_alpha, 5) $eof(premature_end_error) {
352
+ emit(:escape, :hex, *text(data, ts, te, 1))
353
+ fret;
354
+ };
355
+
356
+ wide_hex_sequence > (escaped_alpha, 5) $eof(premature_end_error) {
357
+ emit(:escape, :hex_wide, *text(data, ts, te, 1))
336
358
  fret;
337
359
  };
338
360
 
339
- wide_hex_sequence > (escaped_alpha, 5) {
340
- self.emit(:escape, :hex_wide, data[ts-1..te-1].pack('c*'), ts-1, te)
361
+ hex_sequence_err @invalid_sequence_error {
341
362
  fret;
342
363
  };
343
364
 
344
- control_sequence > (escaped_alpha, 4) {
345
- self.emit(:escape, :control, data[ts-1..te-1].pack('c*'), ts-1, te)
365
+ (wide_hex_seq_invalid | wide_hex_seq_empty) {
366
+ raise InvalidSequenceError.new("wide hex sequence")
346
367
  fret;
347
368
  };
348
369
 
349
- meta_sequence > (backslashed, 3) {
350
- self.emit(:escape, :meta_sequence, data[ts-1..te-1].pack('c*'), ts-1, te)
370
+ control_sequence >(escaped_alpha, 4) $eof(premature_end_error) {
371
+ if data[te]
372
+ c = data[te].chr
373
+ if c =~ /[\x00-\x7F]/
374
+ emit(:escape, :control, copy(data, ts-1..te), ts-1, te+1)
375
+ p += 1
376
+ else
377
+ raise InvalidSequenceError.new("control sequence")
378
+ end
379
+ else
380
+ raise PrematureEndError.new("control sequence")
381
+ end
382
+ fret;
383
+ };
384
+
385
+ meta_sequence >(backslashed, 3) $eof(premature_end_error) {
386
+ if data[te]
387
+ c = data[te].chr
388
+ if c =~ /[\x00-\x7F]/
389
+ emit(:escape, :meta_sequence, copy(data, ts-1..te), ts-1, te+1)
390
+ p += 1
391
+ else
392
+ raise InvalidSequenceError.new("meta sequence")
393
+ end
394
+ else
395
+ raise PrematureEndError.new("meta sequence")
396
+ end
397
+ fret;
351
398
  };
352
399
 
353
400
  property_char > (escaped_alpha, 2) {
@@ -357,7 +404,7 @@
357
404
  };
358
405
 
359
406
  (any -- non_literal_escape) > (escaped_alpha, 1) {
360
- self.emit(:escape, :literal, data[ts-1..te-1].pack('c*'), ts-1, te)
407
+ emit(:escape, :literal, *text(data, ts, te, 1))
361
408
  fret;
362
409
  };
363
410
  *|;
@@ -370,32 +417,34 @@
370
417
  # Meta characters
371
418
  # ------------------------------------------------------------------------
372
419
  dot {
373
- self.emit(:meta, :dot, data[ts..te-1].pack('c*'), ts, te)
420
+ emit(:meta, :dot, *text(data, ts, te))
374
421
  };
375
422
 
376
423
  alternation {
377
- self.emit(:meta, :alternation, data[ts..te-1].pack('c*'), ts, te)
424
+ emit(:meta, :alternation, *text(data, ts, te))
378
425
  };
379
426
 
380
427
  # Anchors
381
428
  # ------------------------------------------------------------------------
382
429
  beginning_of_line {
383
- self.emit(:anchor, :beginning_of_line, data[ts..te-1].pack('c*'), ts, te)
430
+ emit(:anchor, :bol, *text(data, ts, te))
384
431
  };
385
432
 
386
433
  end_of_line {
387
- self.emit(:anchor, :end_of_line, data[ts..te-1].pack('c*'), ts, te)
434
+ emit(:anchor, :eol, *text(data, ts, te))
388
435
  };
389
436
 
390
437
  backslash . anchor_char > (backslashed, 3) {
391
- case text = data[ts..te-1].pack('c*')
392
- when '\\A'; self.emit(:anchor, :bos, text, ts, te)
393
- when '\\z'; self.emit(:anchor, :eos, text, ts, te)
394
- when '\\Z'; self.emit(:anchor, :eos_ob_eol, text, ts, te)
395
- when '\\b'; self.emit(:anchor, :word_boundary, text, ts, te)
396
- when '\\B'; self.emit(:anchor, :nonword_boundary, text, ts, te)
397
- when '\\G'; self.emit(:anchor, :match_start, text, ts, te)
398
- else raise ScannerError.new("Unsupported anchor at #{text} (char #{ts})")
438
+ case text = text(data, ts, te).first
439
+ when '\\A'; emit(:anchor, :bos, text, ts, te)
440
+ when '\\z'; emit(:anchor, :eos, text, ts, te)
441
+ when '\\Z'; emit(:anchor, :eos_ob_eol, text, ts, te)
442
+ when '\\b'; emit(:anchor, :word_boundary, text, ts, te)
443
+ when '\\B'; emit(:anchor, :nonword_boundary, text, ts, te)
444
+ when '\\G'; emit(:anchor, :match_start, text, ts, te)
445
+ else
446
+ raise ScannerError.new(
447
+ "Unexpected character in anchor at #{text} (char #{ts})")
399
448
  end
400
449
  };
401
450
 
@@ -406,15 +455,18 @@
406
455
  # \w, \W word, non-word
407
456
  # ------------------------------------------------------------------------
408
457
  backslash . char_type > (backslashed, 2) {
409
- case text = data[ts..te-1].pack('c*')
410
- when '\\d'; self.emit(:type, :digit, text, ts, te)
411
- when '\\D'; self.emit(:type, :nondigit, text, ts, te)
412
- when '\\h'; self.emit(:type, :hex, text, ts, te)
413
- when '\\H'; self.emit(:type, :nonhex, text, ts, te)
414
- when '\\s'; self.emit(:type, :space, text, ts, te)
415
- when '\\S'; self.emit(:type, :nonspace, text, ts, te)
416
- when '\\w'; self.emit(:type, :word, text, ts, te)
417
- when '\\W'; self.emit(:type, :nonword, text, ts, te)
458
+ case text = text(data, ts, te).first
459
+ when '\\d'; emit(:type, :digit, text, ts, te)
460
+ when '\\D'; emit(:type, :nondigit, text, ts, te)
461
+ when '\\h'; emit(:type, :hex, text, ts, te)
462
+ when '\\H'; emit(:type, :nonhex, text, ts, te)
463
+ when '\\s'; emit(:type, :space, text, ts, te)
464
+ when '\\S'; emit(:type, :nonspace, text, ts, te)
465
+ when '\\w'; emit(:type, :word, text, ts, te)
466
+ when '\\W'; emit(:type, :nonword, text, ts, te)
467
+ else
468
+ raise ScannerError.new(
469
+ "Unexpected character in type at #{text} (char #{ts})")
418
470
  end
419
471
  };
420
472
 
@@ -425,7 +477,7 @@
425
477
  set_depth += 1; in_set = true
426
478
  set_type = set_depth > 1 ? :subset : :set
427
479
 
428
- self.emit(set_type, :open, data[ts..te-1].pack('c*'), ts, te)
480
+ emit(set_type, :open, *text(data, ts, te))
429
481
  fcall character_set;
430
482
  };
431
483
 
@@ -435,7 +487,7 @@
435
487
  # correct closing count.
436
488
  # ------------------------------------------------------------------------
437
489
  group_open . group_comment $group_closed {
438
- self.emit(:group, :comment, data[ts..te-1].pack('c*'), ts, te)
490
+ emit(:group, :comment, *text(data, ts, te))
439
491
  };
440
492
 
441
493
  # Expression options:
@@ -447,21 +499,7 @@
447
499
  # (?imx-imx:subexp) option on/off for subexp
448
500
  # ------------------------------------------------------------------------
449
501
  group_open . group_options >group_opened {
450
- # special handling to resolve ambiguity with passive groups
451
- if data[te]
452
- c = data[te].chr
453
- if c == ':' # include the ':'
454
- self.emit(:group, :options, data[ts..te].pack('c*'), ts, te+1)
455
- p += 1
456
- elsif c == ')' # just options by themselves
457
- self.emit(:group, :options, data[ts..te-1].pack('c*'), ts, te)
458
- else
459
- raise ScannerError.new(
460
- "Unexpected '#{c}' in options sequence, ':' or ')' expected")
461
- end
462
- else
463
- raise PrematureEndError.new("options") unless data[te]
464
- end
502
+ p = scan_options(p, data, ts, te)
465
503
  };
466
504
 
467
505
  # Assertions
@@ -471,11 +509,11 @@
471
509
  # (?<!subexp) negative look-behind
472
510
  # ------------------------------------------------------------------------
473
511
  group_open . assertion_type >group_opened {
474
- case text = data[ts..te-1].pack('c*')
475
- when '(?='; self.emit(:assertion, :lookahead, text, ts, te)
476
- when '(?!'; self.emit(:assertion, :nlookahead, text, ts, te)
477
- when '(?<='; self.emit(:assertion, :lookbehind, text, ts, te)
478
- when '(?<!'; self.emit(:assertion, :nlookbehind, text, ts, te)
512
+ case text = text(data, ts, te).first
513
+ when '(?='; emit(:assertion, :lookahead, text, ts, te)
514
+ when '(?!'; emit(:assertion, :nlookahead, text, ts, te)
515
+ when '(?<='; emit(:assertion, :lookbehind, text, ts, te)
516
+ when '(?<!'; emit(:assertion, :nlookbehind, text, ts, te)
479
517
  end
480
518
  };
481
519
 
@@ -487,85 +525,103 @@
487
525
  # (subexp) captured group
488
526
  # ------------------------------------------------------------------------
489
527
  group_open . group_type >group_opened {
490
- case text = data[ts..te-1].pack('c*')
491
- when '(?:'; self.emit(:group, :passive, text, ts, te)
492
- when '(?>'; self.emit(:group, :atomic, text, ts, te)
493
-
494
- when /\(\?<\w+>/
495
- self.emit(:group, :named_ab, text, ts, te)
496
- when /\(\?'\w+'/
497
- self.emit(:group, :named_sq, text, ts, te)
528
+ case text = text(data, ts, te).first
529
+ when '(?:'; emit(:group, :passive, text, ts, te)
530
+ when '(?>'; emit(:group, :atomic, text, ts, te)
531
+
532
+ when /^\(\?<(\w*)>/
533
+ empty_name_error(:group, 'named group (ab)') if $1.empty?
534
+
535
+ emit(:group, :named_ab, text, ts, te)
536
+
537
+ when /^\(\?'(\w*)'/
538
+ empty_name_error(:group, 'named group (sq)') if $1.empty?
539
+
540
+ emit(:group, :named_sq, text, ts, te)
541
+
542
+ else
543
+ raise ScannerError.new(
544
+ "Unknown subexpression group format '#{text}'")
498
545
  end
499
546
  };
500
547
 
501
548
  group_open @group_opened {
502
- text = data[ts..te-1].pack('c*')
503
- self.emit(:group, :capture, text, ts, te)
549
+ text = text(data, ts, te).first
550
+ emit(:group, :capture, text, ts, te)
504
551
  };
505
552
 
506
553
  group_close @group_closed {
507
- self.emit(:group, :close, data[ts..te-1].pack('c*'), ts, te)
554
+ emit(:group, :close, *text(data, ts, te))
508
555
  };
509
556
 
510
557
 
511
- # Group back-reference, named and numbered
558
+ # Group backreference, named and numbered
512
559
  # ------------------------------------------------------------------------
513
560
  backslash . (group_name_ref | group_number_ref) > (backslashed, 4) {
514
- case text = data[ts..te-1].pack('c*')
515
- when /\\([gk])<[^\d-](\w+)?>/ # angle-brackets
561
+ case text = text(data, ts, te).first
562
+ when /^\\([gk])<>/ # angle brackets
563
+ empty_backref_error("ref/call (ab)")
564
+
565
+ when /^\\([gk])''/ # single quotes
566
+ empty_backref_error("ref/call (sq)")
567
+
568
+ when /^\\([gk])<[^\d-](\w+)?>/ # angle-brackets
516
569
  if $1 == 'k'
517
- self.emit(:backref, :name_ref_ab, text, ts, te)
570
+ emit(:backref, :name_ref_ab, text, ts, te)
518
571
  else
519
- self.emit(:backref, :name_call_ab, text, ts, te)
572
+ emit(:backref, :name_call_ab, text, ts, te)
520
573
  end
521
574
 
522
- when /\\([gk])'[^\d-](\w+)?'/ #single quotes
575
+ when /^\\([gk])'[^\d-](\w+)?'/ #single quotes
523
576
  if $1 == 'k'
524
- self.emit(:backref, :name_ref_sq, text, ts, te)
577
+ emit(:backref, :name_ref_sq, text, ts, te)
525
578
  else
526
- self.emit(:backref, :name_call_sq, text, ts, te)
579
+ emit(:backref, :name_call_sq, text, ts, te)
527
580
  end
528
581
 
529
- when /\\([gk])<\d+>/ # angle-brackets
582
+ when /^\\([gk])<\d+>/ # angle-brackets
530
583
  if $1 == 'k'
531
- self.emit(:backref, :number_ref_ab, text, ts, te)
584
+ emit(:backref, :number_ref_ab, text, ts, te)
532
585
  else
533
- self.emit(:backref, :number_call_ab, text, ts, te)
586
+ emit(:backref, :number_call_ab, text, ts, te)
534
587
  end
535
588
 
536
- when /\\([gk])'\d+'/ # single quotes
589
+ when /^\\([gk])'\d+'/ # single quotes
537
590
  if $1 == 'k'
538
- self.emit(:backref, :number_ref_sq, text, ts, te)
591
+ emit(:backref, :number_ref_sq, text, ts, te)
539
592
  else
540
- self.emit(:backref, :number_call_sq, text, ts, te)
593
+ emit(:backref, :number_call_sq, text, ts, te)
541
594
  end
542
595
 
543
- when /\\([gk])<-\d+>/ # angle-brackets
596
+ when /^\\([gk])<-\d+>/ # angle-brackets
544
597
  if $1 == 'k'
545
- self.emit(:backref, :number_rel_ref_ab, text, ts, te)
598
+ emit(:backref, :number_rel_ref_ab, text, ts, te)
546
599
  else
547
- self.emit(:backref, :number_rel_call_ab, text, ts, te)
600
+ emit(:backref, :number_rel_call_ab, text, ts, te)
548
601
  end
549
602
 
550
- when /\\([gk])'-\d+'/ # single quotes
603
+ when /^\\([gk])'-\d+'/ # single quotes
551
604
  if $1 == 'k'
552
- self.emit(:backref, :number_rel_ref_sq, text, ts, te)
605
+ emit(:backref, :number_rel_ref_sq, text, ts, te)
553
606
  else
554
- self.emit(:backref, :number_rel_call_sq, text, ts, te)
607
+ emit(:backref, :number_rel_call_sq, text, ts, te)
555
608
  end
556
609
 
557
- when /\\k<[^\d-](\w+)?[+\-]\d+>/ # angle-brackets
558
- self.emit(:backref, :name_nest_ref_ab, text, ts, te)
610
+ when /^\\k<[^\d-](\w+)?[+\-]\d+>/ # angle-brackets
611
+ emit(:backref, :name_nest_ref_ab, text, ts, te)
559
612
 
560
- when /\\k'[^\d-](\w+)?[+\-]\d+'/ # single-quotes
561
- self.emit(:backref, :name_nest_ref_sq, text, ts, te)
613
+ when /^\\k'[^\d-](\w+)?[+\-]\d+'/ # single-quotes
614
+ emit(:backref, :name_nest_ref_sq, text, ts, te)
562
615
 
563
- when /\\([gk])<\d+[+\-]\d+>/ # angle-brackets
564
- self.emit(:backref, :number_nest_ref_ab, text, ts, te)
616
+ when /^\\([gk])<\d+[+\-]\d+>/ # angle-brackets
617
+ emit(:backref, :number_nest_ref_ab, text, ts, te)
565
618
 
566
- when /\\([gk])'\d+[+\-]\d+'/ # single-quotes
567
- self.emit(:backref, :number_nest_ref_sq, text, ts, te)
619
+ when /^\\([gk])'\d+[+\-]\d+'/ # single-quotes
620
+ emit(:backref, :number_nest_ref_sq, text, ts, te)
568
621
 
622
+ else
623
+ raise ScannerError.new(
624
+ "Unknown backreference format '#{text}'")
569
625
  end
570
626
  };
571
627
 
@@ -573,31 +629,31 @@
573
629
  # Quantifiers
574
630
  # ------------------------------------------------------------------------
575
631
  zero_or_one {
576
- case text = data[ts..te-1].pack('c*')
577
- when '?' ; self.emit(:quantifier, :zero_or_one, text, ts, te)
578
- when '??'; self.emit(:quantifier, :zero_or_one_reluctant, text, ts, te)
579
- when '?+'; self.emit(:quantifier, :zero_or_one_possessive, text, ts, te)
632
+ case text = text(data, ts, te).first
633
+ when '?' ; emit(:quantifier, :zero_or_one, text, ts, te)
634
+ when '??'; emit(:quantifier, :zero_or_one_reluctant, text, ts, te)
635
+ when '?+'; emit(:quantifier, :zero_or_one_possessive, text, ts, te)
580
636
  end
581
637
  };
582
-
638
+
583
639
  zero_or_more {
584
- case text = data[ts..te-1].pack('c*')
585
- when '*' ; self.emit(:quantifier, :zero_or_more, text, ts, te)
586
- when '*?'; self.emit(:quantifier, :zero_or_more_reluctant, text, ts, te)
587
- when '*+'; self.emit(:quantifier, :zero_or_more_possessive, text, ts, te)
640
+ case text = text(data, ts, te).first
641
+ when '*' ; emit(:quantifier, :zero_or_more, text, ts, te)
642
+ when '*?'; emit(:quantifier, :zero_or_more_reluctant, text, ts, te)
643
+ when '*+'; emit(:quantifier, :zero_or_more_possessive, text, ts, te)
588
644
  end
589
645
  };
590
-
646
+
591
647
  one_or_more {
592
- case text = data[ts..te-1].pack('c*')
593
- when '+' ; self.emit(:quantifier, :one_or_more, text, ts, te)
594
- when '+?'; self.emit(:quantifier, :one_or_more_reluctant, text, ts, te)
595
- when '++'; self.emit(:quantifier, :one_or_more_possessive, text, ts, te)
648
+ case text = text(data, ts, te).first
649
+ when '+' ; emit(:quantifier, :one_or_more, text, ts, te)
650
+ when '+?'; emit(:quantifier, :one_or_more_reluctant, text, ts, te)
651
+ when '++'; emit(:quantifier, :one_or_more_possessive, text, ts, te)
596
652
  end
597
653
  };
598
654
 
599
- quantifier_range @err(premature_end_error) {
600
- self.emit(:quantifier, :interval, data[ts..te-1].pack('c*'), ts, te)
655
+ quantifier_interval @err(premature_end_error) {
656
+ emit(:quantifier, :interval, *text(data, ts, te))
601
657
  };
602
658
 
603
659
  # Escaped sequences
@@ -614,35 +670,67 @@
614
670
  utf8_2_byte+ |
615
671
  utf8_3_byte+ |
616
672
  utf8_4_byte+ {
617
- self.append_literal(data, ts, te)
673
+ append_literal(data, ts, te)
618
674
  };
619
675
 
620
676
  *|;
621
677
  }%%
622
678
 
679
+ # THIS IS A GENERATED FILE, DO NOT EDIT DIRECTLY
680
+ # This file was generated from scanner.rl
623
681
 
624
682
  module Regexp::Scanner
625
683
  %% write data;
626
684
 
685
+ # General scanner error (catch all)
627
686
  class ScannerError < StandardError
628
687
  def initialize(what)
629
688
  super what
630
689
  end
631
690
  end
632
691
 
692
+ # Base for all scanner validation errors
693
+ class ValidationError < StandardError
694
+ def initialize(reason)
695
+ super reason
696
+ end
697
+ end
698
+
699
+ # Unexpected end of pattern
633
700
  class PrematureEndError < ScannerError
634
701
  def initialize(where = '')
635
- super "Premature end of pattern: #{where}"
702
+ super "Premature end of pattern at #{where}"
703
+ end
704
+ end
705
+
706
+ # Invalid sequence format. Used for escape sequences, mainly.
707
+ class InvalidSequenceError < ValidationError
708
+ def initialize(what = 'sequence', where = '')
709
+ super "Invalid #{what} at #{where}"
710
+ end
711
+ end
712
+
713
+ # Invalid group. Used for named groups.
714
+ class InvalidGroupError < ValidationError
715
+ def initialize(what, reason)
716
+ super "Invalid #{what}, #{reason}."
717
+ end
718
+ end
719
+
720
+ # Invalid back reference. Used for name a number refs/calls.
721
+ class InvalidBackrefError < ValidationError
722
+ def initialize(what, reason)
723
+ super "Invalid back reference #{what}, #{reason}"
636
724
  end
637
725
  end
638
726
 
639
- class UnknownUnicodePropertyError < ScannerError
727
+ # The property name was not recognized by the scanner.
728
+ class UnknownUnicodePropertyError < ValidationError
640
729
  def initialize(name)
641
730
  super "Unknown unicode character property name #{name}"
642
731
  end
643
732
  end
644
733
 
645
-
646
734
  # Scans the given regular expression text, or Regexp object and collects the
647
735
  # emitted token into an array that gets returned at the end. If a block is
648
736
  # given, it gets called for each emitted token.
@@ -665,42 +753,107 @@ module Regexp::Scanner
665
753
  %% write init;
666
754
  %% write exec;
667
755
 
756
+ if cs == re_scanner_error
757
+ text = ts ? copy(data, ts-1..-1) : data.pack('c*')
758
+ raise ScannerError.new("Scan error at '#{text}'")
759
+ end
760
+
668
761
  raise PrematureEndError.new("(missing group closing paranthesis) "+
669
762
  "[#{in_group}:#{group_depth}]") if in_group
670
763
  raise PrematureEndError.new("(missing set closing bracket) "+
671
764
  "[#{in_set}:#{set_depth}]") if in_set
672
765
 
673
766
  # when the entire expression is a literal run
674
- self.emit_literal if @literal
767
+ emit_literal if @literal
675
768
 
676
769
  @tokens
677
770
  end
678
771
 
679
- # appends one or more characters to the literal buffer, to be emitted later
680
- # by a call to emit_literal. contents a mix of ASCII and UTF-8
772
+ private
773
+
774
+ # Ragel's regex-based scan of the group options introduced a lot of
775
+ # ambiguity, so we just ask it to find the beginning of what looks
776
+ # like an options run and handle the rest in here.
777
+ def self.scan_options(p, data, ts, te)
778
+ text = text(data, ts, te).first
779
+
780
+ options_char, options_length = true, 0
781
+
782
+ # Copy while we have option characters, the maximum is 7, for (?mix-mix,
783
+ # even though it doesn't make sense it is possible.
784
+ while options_char and options_length < 7
785
+ if data[te + options_length]
786
+ c = data[te + options_length].chr
787
+
788
+ if c =~ /[-mix]/
789
+ text << c ; p += 1 ; options_length += 1
790
+ else
791
+ options_char = false
792
+ end
793
+ else
794
+ raise PrematureEndError.new("expression options `#{text}'")
795
+ end
796
+ end
797
+
798
+ if data[te + options_length]
799
+ c = data[te + options_length].chr
800
+
801
+ if c == ':'
802
+ # Include the ':' in the options text
803
+ text << c ; p += 1 ; options_length += 1
804
+ emit(:group, :options, text, ts, te + options_length)
805
+
806
+ elsif c == ')'
807
+ # Don't include the closing ')', let group_close handle it.
808
+ emit(:group, :options, text, ts, te + options_length)
809
+
810
+ else
811
+ # Plain Regexp reports this as 'undefined group option'
812
+ raise ScannerError.new(
813
+ "Unexpected `#{c}' in options sequence, ':' or ')' expected")
814
+ end
815
+ else
816
+ raise PrematureEndError.new("expression options `#{text}'")
817
+ end
818
+
819
+ p # return the new value of the data pointer
820
+ end
821
+
822
+ # Copy from ts to te from data as text
823
+ def self.copy(data, range)
824
+ data[range].pack('c*')
825
+ end
826
+
827
+ # Copy from ts to te from data as text, returning an array with the text
828
+ # and the offsets used to copy it.
829
+ def self.text(data, ts, te, soff = 0)
830
+ [copy(data, ts-soff..te-1), ts-soff, te]
831
+ end
832
+
833
+ # Appends one or more characters to the literal buffer, to be emitted later
834
+ # by a call to emit_literal. Contents can be a mix of ASCII and UTF-8.
681
835
  def self.append_literal(data, ts, te)
682
836
  @literal ||= []
683
- @literal << [data[ts..te-1].pack('c*'), ts, te]
837
+ @literal << text(data, ts, te)
684
838
  end
685
839
 
686
- # emits the collected literal run collected by one or more calls to the
687
- # append_literal method
840
+ # Emits the literal run collected by calls to the append_literal method,
841
+ # using the total start (ts) and end (te) offsets of the run.
688
842
  def self.emit_literal
689
843
  ts, te = @literal.first[1], @literal.last[2]
690
844
  text = @literal.map {|t| t[0]}.join
691
845
 
692
846
  text.force_encoding('utf-8') if text.respond_to?(:force_encoding)
693
847
 
694
- self.emit(:literal, :literal, text, ts, te)
695
848
  @literal = nil
849
+ emit(:literal, :literal, text, ts, te)
696
850
  end
697
851
 
852
+ # Emits an array with the details of the scanned pattern
698
853
  def self.emit(type, token, text, ts, te)
699
- #puts " > emit: #{type}:#{token} '#{text}' [#{ts}..#{te}]"
854
+ #puts "EMIT: type: #{type}, token: #{token}, text: #{text}, ts: #{ts}, te: #{te}"
700
855
 
701
- if @literal and type != :literal
702
- self.emit_literal
703
- end
856
+ emit_literal if @literal
704
857
 
705
858
  if @block
706
859
  @block.call type, token, text, ts, te
@@ -709,4 +862,37 @@ module Regexp::Scanner
709
862
  @tokens << [type, token, text, ts, te]
710
863
  end
711
864
 
865
+ # Centralizes and unifies the handling of validation related
866
+ # errors.
867
+ def self.validation_error(type, what, reason)
868
+ case type
869
+ when :group
870
+ error = InvalidGroupError.new(what, reason)
871
+ when :backref
872
+ error = InvalidBackrefError.new(what, reason)
873
+ when :sequence
874
+ error = InvalidSequenceError.new(what, reason)
875
+ else
876
+ error = ValidationError.new('expression')
877
+ end
878
+
879
+ # TODO: configuration option to treat scanner level validation
880
+ # errors as warnings or ignore them
881
+ if false # @@config.validation_warn
882
+ $stderr.puts error.to_s # unless @@config.validation_ignore
883
+ else
884
+ raise error # unless @@config.validation_ignore
885
+ end
886
+ end
887
+
888
+ # Used for references with an empty name or number
889
+ def self.empty_backref_error(type, what)
890
+ validation_error(:backref, what, 'ref ID is empty')
891
+ end
892
+
893
+ # Used for named expressions with an empty name
894
+ def self.empty_name_error(type, what)
895
+ validation_error(type, what, 'name is empty')
896
+ end
897
+
712
898
  end # module Regexp::Scanner