regexp_parser 2.7.0 → 2.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -5
  3. data/LICENSE +1 -1
  4. data/lib/regexp_parser/expression/base.rb +0 -7
  5. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  6. data/lib/regexp_parser/expression/classes/backreference.rb +4 -6
  7. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
  8. data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
  9. data/lib/regexp_parser/expression/classes/conditional.rb +2 -14
  10. data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
  11. data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
  12. data/lib/regexp_parser/expression/classes/group.rb +0 -22
  13. data/lib/regexp_parser/expression/classes/keep.rb +1 -1
  14. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
  15. data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
  16. data/lib/regexp_parser/expression/methods/construct.rb +2 -4
  17. data/lib/regexp_parser/expression/methods/negative.rb +20 -0
  18. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  19. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  20. data/lib/regexp_parser/expression/methods/tests.rb +40 -3
  21. data/lib/regexp_parser/expression/methods/traverse.rb +33 -20
  22. data/lib/regexp_parser/expression/quantifier.rb +30 -17
  23. data/lib/regexp_parser/expression/sequence.rb +5 -9
  24. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  25. data/lib/regexp_parser/expression/shared.rb +37 -24
  26. data/lib/regexp_parser/expression/subexpression.rb +20 -18
  27. data/lib/regexp_parser/expression.rb +34 -31
  28. data/lib/regexp_parser/lexer.rb +15 -7
  29. data/lib/regexp_parser/parser.rb +91 -91
  30. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  31. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  32. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  33. data/lib/regexp_parser/scanner/properties/long.csv +29 -0
  34. data/lib/regexp_parser/scanner/properties/short.csv +3 -0
  35. data/lib/regexp_parser/scanner/property.rl +1 -1
  36. data/lib/regexp_parser/scanner/scanner.rl +44 -130
  37. data/lib/regexp_parser/scanner.rb +1096 -1297
  38. data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
  39. data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
  40. data/lib/regexp_parser/syntax/token/escape.rb +3 -1
  41. data/lib/regexp_parser/syntax/token/meta.rb +9 -2
  42. data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
  43. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  44. data/lib/regexp_parser/syntax/token.rb +13 -13
  45. data/lib/regexp_parser/syntax/versions.rb +1 -1
  46. data/lib/regexp_parser/syntax.rb +1 -1
  47. data/lib/regexp_parser/version.rb +1 -1
  48. data/lib/regexp_parser.rb +6 -6
  49. data/regexp_parser.gemspec +5 -5
  50. metadata +14 -8
  51. data/CHANGELOG.md +0 -632
  52. data/README.md +0 -503
@@ -0,0 +1,63 @@
1
+ class Regexp::Scanner
2
+ # Base for all scanner validation errors
3
+ class ValidationError < ScannerError
4
+ # Centralizes and unifies the handling of validation related errors.
5
+ def self.for(type, problem, reason = nil)
6
+ types.fetch(type).new(problem, reason)
7
+ end
8
+
9
+ def self.types
10
+ @types ||= {
11
+ backref: InvalidBackrefError,
12
+ group: InvalidGroupError,
13
+ group_option: InvalidGroupOption,
14
+ posix_class: UnknownPosixClassError,
15
+ property: UnknownUnicodePropertyError,
16
+ sequence: InvalidSequenceError,
17
+ }
18
+ end
19
+ end
20
+
21
+ # Invalid sequence format. Used for escape sequences, mainly.
22
+ class InvalidSequenceError < ValidationError
23
+ def initialize(what = 'sequence', where = '')
24
+ super "Invalid #{what} at #{where}"
25
+ end
26
+ end
27
+
28
+ # Invalid group. Used for named groups.
29
+ class InvalidGroupError < ValidationError
30
+ def initialize(what, reason)
31
+ super "Invalid #{what}, #{reason}."
32
+ end
33
+ end
34
+
35
+ # Invalid groupOption. Used for inline options.
36
+ # TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
37
+ class InvalidGroupOption < ValidationError
38
+ def initialize(option, text)
39
+ super "Invalid group option #{option} in #{text}"
40
+ end
41
+ end
42
+
43
+ # Invalid back reference. Used for name a number refs/calls.
44
+ class InvalidBackrefError < ValidationError
45
+ def initialize(what, reason)
46
+ super "Invalid back reference #{what}, #{reason}"
47
+ end
48
+ end
49
+
50
+ # The property name was not recognized by the scanner.
51
+ class UnknownUnicodePropertyError < ValidationError
52
+ def initialize(name, _)
53
+ super "Unknown unicode character property name #{name}"
54
+ end
55
+ end
56
+
57
+ # The POSIX class name was not recognized by the scanner.
58
+ class UnknownPosixClassError < ValidationError
59
+ def initialize(text, _)
60
+ super "Unknown POSIX class #{text}"
61
+ end
62
+ end
63
+ end
@@ -7,6 +7,8 @@ age=12.0,age=12.0
7
7
  age=12.1,age=12.1
8
8
  age=13.0,age=13.0
9
9
  age=14.0,age=14.0
10
+ age=15.0,age=15.0
11
+ age=15.1,age=15.1
10
12
  age=2.0,age=2.0
11
13
  age=2.1,age=2.1
12
14
  age=3.0,age=3.0
@@ -97,6 +99,7 @@ emojimodifierbase,emoji_modifier_base
97
99
  emojipresentation,emoji_presentation
98
100
  enclosingmark,enclosing_mark
99
101
  ethiopic,ethiopic
102
+ extendedpictographic,extended_pictographic
100
103
  extender,extender
101
104
  finalpunctuation,final_punctuation
102
105
  format,format
@@ -106,6 +109,19 @@ gothic,gothic
106
109
  grantha,grantha
107
110
  graph,graph
108
111
  graphemebase,grapheme_base
112
+ graphemeclusterbreak=control,grapheme_cluster_break=control
113
+ graphemeclusterbreak=cr,grapheme_cluster_break=cr
114
+ graphemeclusterbreak=extend,grapheme_cluster_break=extend
115
+ graphemeclusterbreak=l,grapheme_cluster_break=l
116
+ graphemeclusterbreak=lf,grapheme_cluster_break=lf
117
+ graphemeclusterbreak=lv,grapheme_cluster_break=lv
118
+ graphemeclusterbreak=lvt,grapheme_cluster_break=lvt
119
+ graphemeclusterbreak=prepend,grapheme_cluster_break=prepend
120
+ graphemeclusterbreak=regionalindicator,grapheme_cluster_break=regional_indicator
121
+ graphemeclusterbreak=spacingmark,grapheme_cluster_break=spacingmark
122
+ graphemeclusterbreak=t,grapheme_cluster_break=t
123
+ graphemeclusterbreak=v,grapheme_cluster_break=v
124
+ graphemeclusterbreak=zwj,grapheme_cluster_break=zwj
109
125
  graphemeextend,grapheme_extend
110
126
  graphemelink,grapheme_link
111
127
  greek,greek
@@ -121,11 +137,14 @@ hebrew,hebrew
121
137
  hexdigit,hex_digit
122
138
  hiragana,hiragana
123
139
  hyphen,hyphen
140
+ idcompatmathcontinue,id_compat_math_continue
141
+ idcompatmathstart,id_compat_math_start
124
142
  idcontinue,id_continue
125
143
  ideographic,ideographic
126
144
  idsbinaryoperator,ids_binary_operator
127
145
  idstart,id_start
128
146
  idstrinaryoperator,ids_trinary_operator
147
+ idsunaryoperator,ids_unary_operator
129
148
  imperialaramaic,imperial_aramaic
130
149
  inadlam,in_adlam
131
150
  inaegeannumbers,in_aegean_numbers
@@ -139,6 +158,7 @@ inancientsymbols,in_ancient_symbols
139
158
  inarabic,in_arabic
140
159
  inarabicextendeda,in_arabic_extended_a
141
160
  inarabicextendedb,in_arabic_extended_b
161
+ inarabicextendedc,in_arabic_extended_c
142
162
  inarabicmathematicalalphabeticsymbols,in_arabic_mathematical_alphabetic_symbols
143
163
  inarabicpresentationformsa,in_arabic_presentation_forms_a
144
164
  inarabicpresentationformsb,in_arabic_presentation_forms_b
@@ -186,6 +206,8 @@ incjkunifiedideographsextensiond,in_cjk_unified_ideographs_extension_d
186
206
  incjkunifiedideographsextensione,in_cjk_unified_ideographs_extension_e
187
207
  incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
188
208
  incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
209
+ incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
210
+ incjkunifiedideographsextensioni,in_cjk_unified_ideographs_extension_i
189
211
  incombiningdiacriticalmarks,in_combining_diacritical_marks
190
212
  incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
191
213
  incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
@@ -205,10 +227,12 @@ incyrillic,in_cyrillic
205
227
  incyrillicextendeda,in_cyrillic_extended_a
206
228
  incyrillicextendedb,in_cyrillic_extended_b
207
229
  incyrillicextendedc,in_cyrillic_extended_c
230
+ incyrillicextendedd,in_cyrillic_extended_d
208
231
  incyrillicsupplement,in_cyrillic_supplement
209
232
  indeseret,in_deseret
210
233
  indevanagari,in_devanagari
211
234
  indevanagariextended,in_devanagari_extended
235
+ indevanagariextendeda,in_devanagari_extended_a
212
236
  indingbats,in_dingbats
213
237
  indivesakuru,in_dives_akuru
214
238
  indogra,in_dogra
@@ -268,6 +292,7 @@ inipaextensions,in_ipa_extensions
268
292
  initialpunctuation,initial_punctuation
269
293
  injavanese,in_javanese
270
294
  inkaithi,in_kaithi
295
+ inkaktoviknumerals,in_kaktovik_numerals
271
296
  inkanaextendeda,in_kana_extended_a
272
297
  inkanaextendedb,in_kana_extended_b
273
298
  inkanasupplement,in_kana_supplement
@@ -276,6 +301,7 @@ inkangxiradicals,in_kangxi_radicals
276
301
  inkannada,in_kannada
277
302
  inkatakana,in_katakana
278
303
  inkatakanaphoneticextensions,in_katakana_phonetic_extensions
304
+ inkawi,in_kawi
279
305
  inkayahli,in_kayah_li
280
306
  inkharoshthi,in_kharoshthi
281
307
  inkhitansmallscript,in_khitan_small_script
@@ -339,6 +365,7 @@ inmyanmar,in_myanmar
339
365
  inmyanmarextendeda,in_myanmar_extended_a
340
366
  inmyanmarextendedb,in_myanmar_extended_b
341
367
  innabataean,in_nabataean
368
+ innagmundari,in_nag_mundari
342
369
  innandinagari,in_nandinagari
343
370
  innewa,in_newa
344
371
  innewtailue,in_new_tai_lue
@@ -457,6 +484,7 @@ joincontrol,join_control
457
484
  kaithi,kaithi
458
485
  kannada,kannada
459
486
  katakana,katakana
487
+ kawi,kawi
460
488
  kayahli,kayah_li
461
489
  kharoshthi,kharoshthi
462
490
  khitansmallscript,khitan_small_script
@@ -503,6 +531,7 @@ mro,mro
503
531
  multani,multani
504
532
  myanmar,myanmar
505
533
  nabataean,nabataean
534
+ nagmundari,nag_mundari
506
535
  nandinagari,nandinagari
507
536
  newa,newa
508
537
  newline,newline
@@ -57,6 +57,7 @@ emod,emoji_modifier
57
57
  epres,emoji_presentation
58
58
  ethi,ethiopic
59
59
  ext,extender
60
+ extpict,extended_pictographic
60
61
  geor,georgian
61
62
  glag,glagolitic
62
63
  gong,gunjala_gondi
@@ -85,6 +86,7 @@ ideo,ideographic
85
86
  ids,id_start
86
87
  idsb,ids_binary_operator
87
88
  idst,ids_trinary_operator
89
+ idsu,ids_unary_operator
88
90
  ital,old_italic
89
91
  java,javanese
90
92
  joinc,join_control
@@ -133,6 +135,7 @@ mtei,meetei_mayek
133
135
  mult,multani
134
136
  mymr,myanmar
135
137
  n,number
138
+ nagm,nag_mundari
136
139
  nand,nandinagari
137
140
  narb,old_north_arabian
138
141
  nbat,nabataean
@@ -20,7 +20,7 @@
20
20
  name = text[3..-2].gsub(/[\^\s_\-]/, '').downcase
21
21
 
22
22
  token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
23
- validation_error(:property, name) unless token
23
+ raise ValidationError.for(:property, name) unless token
24
24
 
25
25
  self.emit(type, token.to_sym, text)
26
26
 
@@ -30,11 +30,6 @@
30
30
 
31
31
  class_posix = ('[:' . '^'? . [^\[\]]* . ':]');
32
32
 
33
-
34
- # these are not supported in ruby at the moment
35
- collating_sequence = '[.' . (alpha | [\-])+ . '.]';
36
- character_equivalent = '[=' . alpha . '=]';
37
-
38
33
  line_anchor = beginning_of_line | end_of_line;
39
34
  anchor_char = [AbBzZG];
40
35
 
@@ -83,10 +78,9 @@
83
78
  # try to treat every other group head as options group, like Ruby
84
79
  group_options = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
85
80
 
86
- group_ref = [gk];
87
81
  group_name_id_ab = ([^!0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
88
82
  group_name_id_sq = ([^0-9\-'] | utf8_multibyte) . ([^'] | utf8_multibyte)*;
89
- group_number = '-'? . [1-9] . [0-9]*;
83
+ group_number = '-'? . [0-9]+;
90
84
  group_level = [+\-] . [0-9]+;
91
85
 
92
86
  group_name = ('<' . group_name_id_ab? . '>') |
@@ -95,15 +89,11 @@
95
89
 
96
90
  group_named = ('?' . group_name );
97
91
 
98
- group_name_backref = 'k' . (('<' . group_name_id_ab? . group_level? '>') |
99
- ("'" . group_name_id_sq? . group_level? "'"));
100
- group_name_call = 'g' . (('<' . group_name_id_ab? . group_level? '>') |
101
- ("'" . group_name_id_sq? . group_level? "'"));
92
+ group_ref_body = (('<' . (group_name_id_ab? | group_number) . group_level? '>') |
93
+ ("'" . (group_name_id_sq? | group_number) . group_level? "'"));
102
94
 
103
- group_number_backref = 'k' . (('<' . group_number . group_level? '>') |
104
- ("'" . group_number . group_level? "'"));
105
- group_number_call = 'g' . (('<' . ((group_number . group_level?) | '0') '>') |
106
- ("'" . ((group_number . group_level?) | '0') "'"));
95
+ group_ref = 'k' . group_ref_body;
96
+ group_call = 'g' . group_ref_body;
107
97
 
108
98
  group_type = group_atomic | group_passive | group_absence | group_named;
109
99
 
@@ -134,13 +124,13 @@
134
124
  # EOF error, used where it can be detected
135
125
  action premature_end_error {
136
126
  text = copy(data, ts ? ts-1 : 0, -1)
137
- raise PrematureEndError.new( text )
127
+ raise PrematureEndError.new(text)
138
128
  }
139
129
 
140
130
  # Invalid sequence error, used from sequences, like escapes and sets
141
131
  action invalid_sequence_error {
142
132
  text = copy(data, ts ? ts-1 : 0, -1)
143
- validation_error(:sequence, 'sequence', text)
133
+ raise ValidationError.for(:sequence, 'sequence', text)
144
134
  }
145
135
 
146
136
  # group (nesting) and set open/close actions
@@ -221,20 +211,12 @@
221
211
  end
222
212
 
223
213
  unless self.class.posix_classes.include?(class_name)
224
- validation_error(:posix_class, text)
214
+ raise ValidationError.for(:posix_class, text)
225
215
  end
226
216
 
227
217
  emit(type, class_name.to_sym, text)
228
218
  };
229
219
 
230
- # These are not supported in ruby at the moment. Enable them if they are.
231
- # collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error) {
232
- # emit(:set, :collation, copy(data, ts, te))
233
- # };
234
- # character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error) {
235
- # emit(:set, :equivalent, copy(data, ts, te))
236
- # };
237
-
238
220
  meta_char > (set_meta, 1) {
239
221
  emit(:literal, :literal, copy(data, ts, te))
240
222
  };
@@ -285,6 +267,13 @@
285
267
  fret;
286
268
  };
287
269
 
270
+ [8-9] . [0-9] { # special case, emits two tokens
271
+ text = copy(data, ts-1, te)
272
+ emit(:escape, :literal, text[0, 2])
273
+ emit(:literal, :literal, text[2])
274
+ fret;
275
+ };
276
+
288
277
  meta_char {
289
278
  case text = copy(data, ts-1, te)
290
279
  when '\.'; emit(:escape, :dot, text)
@@ -375,6 +364,7 @@
375
364
  conditional_expression := |*
376
365
  group_lookup . ')' {
377
366
  text = copy(data, ts, te-1)
367
+ text =~ /[^0]/ or raise ValidationError.for(:backref, 'condition', 'invalid ref ID')
378
368
  emit(:conditional, :condition, text)
379
369
  emit(:conditional, :condition_close, ')')
380
370
  };
@@ -457,10 +447,9 @@
457
447
 
458
448
  # (?#...) comments: parsed as a single expression, without introducing a
459
449
  # new nesting level. Comments may not include parentheses, escaped or not.
460
- # special case for close, action performed on all transitions to get the
461
- # correct closing count.
450
+ # special case for close to get the correct closing count.
462
451
  # ------------------------------------------------------------------------
463
- group_open . group_comment $group_closed {
452
+ (group_open . group_comment) @group_closed {
464
453
  emit(:group, :comment, copy(data, ts, te))
465
454
  };
466
455
 
@@ -475,10 +464,10 @@
475
464
  #
476
465
  # (?imxdau-imx:subexp) option on/off for subexp
477
466
  # ------------------------------------------------------------------------
478
- group_open . group_options >group_opened {
467
+ (group_open . group_options) >group_opened {
479
468
  text = copy(data, ts, te)
480
469
  if text[2..-1] =~ /([^\-mixdau:]|^$)|-.*([dau])/
481
- validation_error(:group_option, $1 || "-#{$2}", text)
470
+ raise ValidationError.for(:group_option, $1 || "-#{$2}", text)
482
471
  end
483
472
  emit_options(text)
484
473
  };
@@ -489,7 +478,7 @@
489
478
  # (?<=subexp) look-behind
490
479
  # (?<!subexp) negative look-behind
491
480
  # ------------------------------------------------------------------------
492
- group_open . assertion_type >group_opened {
481
+ (group_open . assertion_type) >group_opened {
493
482
  case text = copy(data, ts, te)
494
483
  when '(?='; emit(:assertion, :lookahead, text)
495
484
  when '(?!'; emit(:assertion, :nlookahead, text)
@@ -506,14 +495,14 @@
506
495
  # (?'name'subexp) named group (single quoted version)
507
496
  # (subexp) captured group
508
497
  # ------------------------------------------------------------------------
509
- group_open . group_type >group_opened {
498
+ (group_open . group_type) >group_opened {
510
499
  case text = copy(data, ts, te)
511
500
  when '(?:'; emit(:group, :passive, text)
512
501
  when '(?>'; emit(:group, :atomic, text)
513
502
  when '(?~'; emit(:group, :absence, text)
514
503
 
515
504
  when /^\(\?(?:<>|'')/
516
- validation_error(:group, 'named group', 'name is empty')
505
+ raise ValidationError.for(:group, 'named group', 'name is empty')
517
506
 
518
507
  when /^\(\?<[^>]+>/
519
508
  emit(:group, :named_ab, text)
@@ -533,7 +522,7 @@
533
522
  if conditional_stack.last == group_depth + 1
534
523
  conditional_stack.pop
535
524
  emit(:conditional, :close, ')')
536
- else
525
+ elsif group_depth >= 0
537
526
  if spacing_stack.length > 1 &&
538
527
  spacing_stack.last[:depth] == group_depth + 1
539
528
  spacing_stack.pop
@@ -541,41 +530,43 @@
541
530
  end
542
531
 
543
532
  emit(:group, :close, ')')
533
+ else
534
+ raise ValidationError.for(:group, 'group', 'unmatched close parenthesis')
544
535
  end
545
536
  };
546
537
 
547
538
 
548
539
  # Group backreference, named and numbered
549
540
  # ------------------------------------------------------------------------
550
- backslash . (group_name_backref | group_number_backref) > (backslashed, 4) {
541
+ backslash . (group_ref) > (backslashed, 4) {
551
542
  case text = copy(data, ts, te)
552
- when /^\\k(<>|'')/
553
- validation_error(:backref, 'backreference', 'ref ID is empty')
554
- when /^\\k(.)[^\p{digit}\-][^+\-]*\D$/
543
+ when /^\\k(.)[^0-9\-][^+\-]*['>]$/
555
544
  emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
556
- when /^\\k(.)\d+\D$/
545
+ when /^\\k(.)0*[1-9]\d*['>]$/
557
546
  emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
558
- when /^\\k(.)-\d+\D$/
547
+ when /^\\k(.)-0*[1-9]\d*['>]$/
559
548
  emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
560
- when /^\\k(.)[^\p{digit}\-].*[+\-]\d+\D$/
549
+ when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
561
550
  emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
562
- when /^\\k(.)-?\d+[+\-]\d+\D$/
551
+ when /^\\k(.)-?0*[1-9]\d*[+\-]\d+['>]$/
563
552
  emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
553
+ else
554
+ raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
564
555
  end
565
556
  };
566
557
 
567
558
  # Group call, named and numbered
568
559
  # ------------------------------------------------------------------------
569
- backslash . (group_name_call | group_number_call) > (backslashed, 4) {
560
+ backslash . (group_call) > (backslashed, 4) {
570
561
  case text = copy(data, ts, te)
571
- when /^\\g(<>|'')/
572
- validation_error(:backref, 'subexpression call', 'ref ID is empty')
573
- when /^\\g(.)[^\p{digit}+\->][^+\-]*/
562
+ when /^\\g(.)[^0-9+\-].*['>]$/
574
563
  emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
575
- when /^\\g(.)\d+\D$/
564
+ when /^\\g(.)(?:0|0*[1-9]\d*)['>]$/
576
565
  emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
577
- when /^\\g(.)[+-]\d+/
566
+ when /^\\g(.)[+-]0*[1-9]\d*/
578
567
  emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
568
+ else
569
+ raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
579
570
  end
580
571
  };
581
572
 
@@ -649,72 +640,11 @@
649
640
  *|;
650
641
  }%%
651
642
 
652
- # THIS IS A GENERATED FILE, DO NOT EDIT DIRECTLY
653
- # This file was generated from lib/regexp_parser/scanner/scanner.rl
654
-
655
- require 'regexp_parser/error'
643
+ require_relative 'scanner/errors/scanner_error'
644
+ require_relative 'scanner/errors/premature_end_error'
645
+ require_relative 'scanner/errors/validation_error'
656
646
 
657
647
  class Regexp::Scanner
658
- # General scanner error (catch all)
659
- class ScannerError < Regexp::Parser::Error; end
660
-
661
- # Base for all scanner validation errors
662
- class ValidationError < Regexp::Parser::Error
663
- def initialize(reason)
664
- super reason
665
- end
666
- end
667
-
668
- # Unexpected end of pattern
669
- class PrematureEndError < ScannerError
670
- def initialize(where = '')
671
- super "Premature end of pattern at #{where}"
672
- end
673
- end
674
-
675
- # Invalid sequence format. Used for escape sequences, mainly.
676
- class InvalidSequenceError < ValidationError
677
- def initialize(what = 'sequence', where = '')
678
- super "Invalid #{what} at #{where}"
679
- end
680
- end
681
-
682
- # Invalid group. Used for named groups.
683
- class InvalidGroupError < ValidationError
684
- def initialize(what, reason)
685
- super "Invalid #{what}, #{reason}."
686
- end
687
- end
688
-
689
- # Invalid groupOption. Used for inline options.
690
- # TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
691
- class InvalidGroupOption < ValidationError
692
- def initialize(option, text)
693
- super "Invalid group option #{option} in #{text}"
694
- end
695
- end
696
-
697
- # Invalid back reference. Used for name a number refs/calls.
698
- class InvalidBackrefError < ValidationError
699
- def initialize(what, reason)
700
- super "Invalid back reference #{what}, #{reason}"
701
- end
702
- end
703
-
704
- # The property name was not recognized by the scanner.
705
- class UnknownUnicodePropertyError < ValidationError
706
- def initialize(name)
707
- super "Unknown unicode character property name #{name}"
708
- end
709
- end
710
-
711
- # The POSIX class name was not recognized by the scanner.
712
- class UnknownPosixClassError < ValidationError
713
- def initialize(text)
714
- super "Unknown POSIX class #{text}"
715
- end
716
- end
717
-
718
648
  # Scans the given regular expression text, or Regexp object and collects the
719
649
  # emitted token into an array that gets returned at the end. If a block is
720
650
  # given, it gets called for each emitted token.
@@ -891,24 +821,8 @@ class Regexp::Scanner
891
821
 
892
822
  def emit_meta_control_sequence(data, ts, te, token)
893
823
  if data.last < 0x00 || data.last > 0x7F
894
- validation_error(:sequence, 'escape', token.to_s)
824
+ raise ValidationError.for(:sequence, 'escape', token.to_s)
895
825
  end
896
826
  emit(:escape, token, copy(data, ts-1, te))
897
827
  end
898
-
899
- # Centralizes and unifies the handling of validation related
900
- # errors.
901
- def validation_error(type, what, reason = nil)
902
- error =
903
- case type
904
- when :backref then InvalidBackrefError.new(what, reason)
905
- when :group then InvalidGroupError.new(what, reason)
906
- when :group_option then InvalidGroupOption.new(what, reason)
907
- when :posix_class then UnknownPosixClassError.new(what)
908
- when :property then UnknownUnicodePropertyError.new(what)
909
- when :sequence then InvalidSequenceError.new(what, reason)
910
- end
911
-
912
- raise error # unless @@config.validation_ignore
913
- end
914
828
  end # module Regexp::Scanner