regexp_parser 2.7.0 → 2.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -5
  3. data/LICENSE +1 -1
  4. data/lib/regexp_parser/expression/base.rb +0 -7
  5. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  6. data/lib/regexp_parser/expression/classes/backreference.rb +4 -6
  7. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
  8. data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
  9. data/lib/regexp_parser/expression/classes/conditional.rb +2 -14
  10. data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
  11. data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
  12. data/lib/regexp_parser/expression/classes/group.rb +0 -22
  13. data/lib/regexp_parser/expression/classes/keep.rb +1 -1
  14. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
  15. data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
  16. data/lib/regexp_parser/expression/methods/construct.rb +2 -4
  17. data/lib/regexp_parser/expression/methods/negative.rb +20 -0
  18. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  19. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  20. data/lib/regexp_parser/expression/methods/tests.rb +40 -3
  21. data/lib/regexp_parser/expression/methods/traverse.rb +33 -20
  22. data/lib/regexp_parser/expression/quantifier.rb +30 -17
  23. data/lib/regexp_parser/expression/sequence.rb +5 -9
  24. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  25. data/lib/regexp_parser/expression/shared.rb +37 -24
  26. data/lib/regexp_parser/expression/subexpression.rb +20 -18
  27. data/lib/regexp_parser/expression.rb +34 -31
  28. data/lib/regexp_parser/lexer.rb +15 -7
  29. data/lib/regexp_parser/parser.rb +91 -91
  30. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  31. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  32. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  33. data/lib/regexp_parser/scanner/properties/long.csv +29 -0
  34. data/lib/regexp_parser/scanner/properties/short.csv +3 -0
  35. data/lib/regexp_parser/scanner/property.rl +1 -1
  36. data/lib/regexp_parser/scanner/scanner.rl +44 -130
  37. data/lib/regexp_parser/scanner.rb +1096 -1297
  38. data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
  39. data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
  40. data/lib/regexp_parser/syntax/token/escape.rb +3 -1
  41. data/lib/regexp_parser/syntax/token/meta.rb +9 -2
  42. data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
  43. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  44. data/lib/regexp_parser/syntax/token.rb +13 -13
  45. data/lib/regexp_parser/syntax/versions.rb +1 -1
  46. data/lib/regexp_parser/syntax.rb +1 -1
  47. data/lib/regexp_parser/version.rb +1 -1
  48. data/lib/regexp_parser.rb +6 -6
  49. data/regexp_parser.gemspec +5 -5
  50. metadata +14 -8
  51. data/CHANGELOG.md +0 -632
  52. data/README.md +0 -503
@@ -0,0 +1,63 @@
1
+ class Regexp::Scanner
2
+ # Base for all scanner validation errors
3
+ class ValidationError < ScannerError
4
+ # Centralizes and unifies the handling of validation related errors.
5
+ def self.for(type, problem, reason = nil)
6
+ types.fetch(type).new(problem, reason)
7
+ end
8
+
9
+ def self.types
10
+ @types ||= {
11
+ backref: InvalidBackrefError,
12
+ group: InvalidGroupError,
13
+ group_option: InvalidGroupOption,
14
+ posix_class: UnknownPosixClassError,
15
+ property: UnknownUnicodePropertyError,
16
+ sequence: InvalidSequenceError,
17
+ }
18
+ end
19
+ end
20
+
21
+ # Invalid sequence format. Used for escape sequences, mainly.
22
+ class InvalidSequenceError < ValidationError
23
+ def initialize(what = 'sequence', where = '')
24
+ super "Invalid #{what} at #{where}"
25
+ end
26
+ end
27
+
28
+ # Invalid group. Used for named groups.
29
+ class InvalidGroupError < ValidationError
30
+ def initialize(what, reason)
31
+ super "Invalid #{what}, #{reason}."
32
+ end
33
+ end
34
+
35
+ # Invalid groupOption. Used for inline options.
36
+ # TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
37
+ class InvalidGroupOption < ValidationError
38
+ def initialize(option, text)
39
+ super "Invalid group option #{option} in #{text}"
40
+ end
41
+ end
42
+
43
+ # Invalid back reference. Used for name a number refs/calls.
44
+ class InvalidBackrefError < ValidationError
45
+ def initialize(what, reason)
46
+ super "Invalid back reference #{what}, #{reason}"
47
+ end
48
+ end
49
+
50
+ # The property name was not recognized by the scanner.
51
+ class UnknownUnicodePropertyError < ValidationError
52
+ def initialize(name, _)
53
+ super "Unknown unicode character property name #{name}"
54
+ end
55
+ end
56
+
57
+ # The POSIX class name was not recognized by the scanner.
58
+ class UnknownPosixClassError < ValidationError
59
+ def initialize(text, _)
60
+ super "Unknown POSIX class #{text}"
61
+ end
62
+ end
63
+ end
@@ -7,6 +7,8 @@ age=12.0,age=12.0
7
7
  age=12.1,age=12.1
8
8
  age=13.0,age=13.0
9
9
  age=14.0,age=14.0
10
+ age=15.0,age=15.0
11
+ age=15.1,age=15.1
10
12
  age=2.0,age=2.0
11
13
  age=2.1,age=2.1
12
14
  age=3.0,age=3.0
@@ -97,6 +99,7 @@ emojimodifierbase,emoji_modifier_base
97
99
  emojipresentation,emoji_presentation
98
100
  enclosingmark,enclosing_mark
99
101
  ethiopic,ethiopic
102
+ extendedpictographic,extended_pictographic
100
103
  extender,extender
101
104
  finalpunctuation,final_punctuation
102
105
  format,format
@@ -106,6 +109,19 @@ gothic,gothic
106
109
  grantha,grantha
107
110
  graph,graph
108
111
  graphemebase,grapheme_base
112
+ graphemeclusterbreak=control,grapheme_cluster_break=control
113
+ graphemeclusterbreak=cr,grapheme_cluster_break=cr
114
+ graphemeclusterbreak=extend,grapheme_cluster_break=extend
115
+ graphemeclusterbreak=l,grapheme_cluster_break=l
116
+ graphemeclusterbreak=lf,grapheme_cluster_break=lf
117
+ graphemeclusterbreak=lv,grapheme_cluster_break=lv
118
+ graphemeclusterbreak=lvt,grapheme_cluster_break=lvt
119
+ graphemeclusterbreak=prepend,grapheme_cluster_break=prepend
120
+ graphemeclusterbreak=regionalindicator,grapheme_cluster_break=regional_indicator
121
+ graphemeclusterbreak=spacingmark,grapheme_cluster_break=spacingmark
122
+ graphemeclusterbreak=t,grapheme_cluster_break=t
123
+ graphemeclusterbreak=v,grapheme_cluster_break=v
124
+ graphemeclusterbreak=zwj,grapheme_cluster_break=zwj
109
125
  graphemeextend,grapheme_extend
110
126
  graphemelink,grapheme_link
111
127
  greek,greek
@@ -121,11 +137,14 @@ hebrew,hebrew
121
137
  hexdigit,hex_digit
122
138
  hiragana,hiragana
123
139
  hyphen,hyphen
140
+ idcompatmathcontinue,id_compat_math_continue
141
+ idcompatmathstart,id_compat_math_start
124
142
  idcontinue,id_continue
125
143
  ideographic,ideographic
126
144
  idsbinaryoperator,ids_binary_operator
127
145
  idstart,id_start
128
146
  idstrinaryoperator,ids_trinary_operator
147
+ idsunaryoperator,ids_unary_operator
129
148
  imperialaramaic,imperial_aramaic
130
149
  inadlam,in_adlam
131
150
  inaegeannumbers,in_aegean_numbers
@@ -139,6 +158,7 @@ inancientsymbols,in_ancient_symbols
139
158
  inarabic,in_arabic
140
159
  inarabicextendeda,in_arabic_extended_a
141
160
  inarabicextendedb,in_arabic_extended_b
161
+ inarabicextendedc,in_arabic_extended_c
142
162
  inarabicmathematicalalphabeticsymbols,in_arabic_mathematical_alphabetic_symbols
143
163
  inarabicpresentationformsa,in_arabic_presentation_forms_a
144
164
  inarabicpresentationformsb,in_arabic_presentation_forms_b
@@ -186,6 +206,8 @@ incjkunifiedideographsextensiond,in_cjk_unified_ideographs_extension_d
186
206
  incjkunifiedideographsextensione,in_cjk_unified_ideographs_extension_e
187
207
  incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
188
208
  incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
209
+ incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
210
+ incjkunifiedideographsextensioni,in_cjk_unified_ideographs_extension_i
189
211
  incombiningdiacriticalmarks,in_combining_diacritical_marks
190
212
  incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
191
213
  incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
@@ -205,10 +227,12 @@ incyrillic,in_cyrillic
205
227
  incyrillicextendeda,in_cyrillic_extended_a
206
228
  incyrillicextendedb,in_cyrillic_extended_b
207
229
  incyrillicextendedc,in_cyrillic_extended_c
230
+ incyrillicextendedd,in_cyrillic_extended_d
208
231
  incyrillicsupplement,in_cyrillic_supplement
209
232
  indeseret,in_deseret
210
233
  indevanagari,in_devanagari
211
234
  indevanagariextended,in_devanagari_extended
235
+ indevanagariextendeda,in_devanagari_extended_a
212
236
  indingbats,in_dingbats
213
237
  indivesakuru,in_dives_akuru
214
238
  indogra,in_dogra
@@ -268,6 +292,7 @@ inipaextensions,in_ipa_extensions
268
292
  initialpunctuation,initial_punctuation
269
293
  injavanese,in_javanese
270
294
  inkaithi,in_kaithi
295
+ inkaktoviknumerals,in_kaktovik_numerals
271
296
  inkanaextendeda,in_kana_extended_a
272
297
  inkanaextendedb,in_kana_extended_b
273
298
  inkanasupplement,in_kana_supplement
@@ -276,6 +301,7 @@ inkangxiradicals,in_kangxi_radicals
276
301
  inkannada,in_kannada
277
302
  inkatakana,in_katakana
278
303
  inkatakanaphoneticextensions,in_katakana_phonetic_extensions
304
+ inkawi,in_kawi
279
305
  inkayahli,in_kayah_li
280
306
  inkharoshthi,in_kharoshthi
281
307
  inkhitansmallscript,in_khitan_small_script
@@ -339,6 +365,7 @@ inmyanmar,in_myanmar
339
365
  inmyanmarextendeda,in_myanmar_extended_a
340
366
  inmyanmarextendedb,in_myanmar_extended_b
341
367
  innabataean,in_nabataean
368
+ innagmundari,in_nag_mundari
342
369
  innandinagari,in_nandinagari
343
370
  innewa,in_newa
344
371
  innewtailue,in_new_tai_lue
@@ -457,6 +484,7 @@ joincontrol,join_control
457
484
  kaithi,kaithi
458
485
  kannada,kannada
459
486
  katakana,katakana
487
+ kawi,kawi
460
488
  kayahli,kayah_li
461
489
  kharoshthi,kharoshthi
462
490
  khitansmallscript,khitan_small_script
@@ -503,6 +531,7 @@ mro,mro
503
531
  multani,multani
504
532
  myanmar,myanmar
505
533
  nabataean,nabataean
534
+ nagmundari,nag_mundari
506
535
  nandinagari,nandinagari
507
536
  newa,newa
508
537
  newline,newline
@@ -57,6 +57,7 @@ emod,emoji_modifier
57
57
  epres,emoji_presentation
58
58
  ethi,ethiopic
59
59
  ext,extender
60
+ extpict,extended_pictographic
60
61
  geor,georgian
61
62
  glag,glagolitic
62
63
  gong,gunjala_gondi
@@ -85,6 +86,7 @@ ideo,ideographic
85
86
  ids,id_start
86
87
  idsb,ids_binary_operator
87
88
  idst,ids_trinary_operator
89
+ idsu,ids_unary_operator
88
90
  ital,old_italic
89
91
  java,javanese
90
92
  joinc,join_control
@@ -133,6 +135,7 @@ mtei,meetei_mayek
133
135
  mult,multani
134
136
  mymr,myanmar
135
137
  n,number
138
+ nagm,nag_mundari
136
139
  nand,nandinagari
137
140
  narb,old_north_arabian
138
141
  nbat,nabataean
@@ -20,7 +20,7 @@
20
20
  name = text[3..-2].gsub(/[\^\s_\-]/, '').downcase
21
21
 
22
22
  token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
23
- validation_error(:property, name) unless token
23
+ raise ValidationError.for(:property, name) unless token
24
24
 
25
25
  self.emit(type, token.to_sym, text)
26
26
 
@@ -30,11 +30,6 @@
30
30
 
31
31
  class_posix = ('[:' . '^'? . [^\[\]]* . ':]');
32
32
 
33
-
34
- # these are not supported in ruby at the moment
35
- collating_sequence = '[.' . (alpha | [\-])+ . '.]';
36
- character_equivalent = '[=' . alpha . '=]';
37
-
38
33
  line_anchor = beginning_of_line | end_of_line;
39
34
  anchor_char = [AbBzZG];
40
35
 
@@ -83,10 +78,9 @@
83
78
  # try to treat every other group head as options group, like Ruby
84
79
  group_options = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
85
80
 
86
- group_ref = [gk];
87
81
  group_name_id_ab = ([^!0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
88
82
  group_name_id_sq = ([^0-9\-'] | utf8_multibyte) . ([^'] | utf8_multibyte)*;
89
- group_number = '-'? . [1-9] . [0-9]*;
83
+ group_number = '-'? . [0-9]+;
90
84
  group_level = [+\-] . [0-9]+;
91
85
 
92
86
  group_name = ('<' . group_name_id_ab? . '>') |
@@ -95,15 +89,11 @@
95
89
 
96
90
  group_named = ('?' . group_name );
97
91
 
98
- group_name_backref = 'k' . (('<' . group_name_id_ab? . group_level? '>') |
99
- ("'" . group_name_id_sq? . group_level? "'"));
100
- group_name_call = 'g' . (('<' . group_name_id_ab? . group_level? '>') |
101
- ("'" . group_name_id_sq? . group_level? "'"));
92
+ group_ref_body = (('<' . (group_name_id_ab? | group_number) . group_level? '>') |
93
+ ("'" . (group_name_id_sq? | group_number) . group_level? "'"));
102
94
 
103
- group_number_backref = 'k' . (('<' . group_number . group_level? '>') |
104
- ("'" . group_number . group_level? "'"));
105
- group_number_call = 'g' . (('<' . ((group_number . group_level?) | '0') '>') |
106
- ("'" . ((group_number . group_level?) | '0') "'"));
95
+ group_ref = 'k' . group_ref_body;
96
+ group_call = 'g' . group_ref_body;
107
97
 
108
98
  group_type = group_atomic | group_passive | group_absence | group_named;
109
99
 
@@ -134,13 +124,13 @@
134
124
  # EOF error, used where it can be detected
135
125
  action premature_end_error {
136
126
  text = copy(data, ts ? ts-1 : 0, -1)
137
- raise PrematureEndError.new( text )
127
+ raise PrematureEndError.new(text)
138
128
  }
139
129
 
140
130
  # Invalid sequence error, used from sequences, like escapes and sets
141
131
  action invalid_sequence_error {
142
132
  text = copy(data, ts ? ts-1 : 0, -1)
143
- validation_error(:sequence, 'sequence', text)
133
+ raise ValidationError.for(:sequence, 'sequence', text)
144
134
  }
145
135
 
146
136
  # group (nesting) and set open/close actions
@@ -221,20 +211,12 @@
221
211
  end
222
212
 
223
213
  unless self.class.posix_classes.include?(class_name)
224
- validation_error(:posix_class, text)
214
+ raise ValidationError.for(:posix_class, text)
225
215
  end
226
216
 
227
217
  emit(type, class_name.to_sym, text)
228
218
  };
229
219
 
230
- # These are not supported in ruby at the moment. Enable them if they are.
231
- # collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error) {
232
- # emit(:set, :collation, copy(data, ts, te))
233
- # };
234
- # character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error) {
235
- # emit(:set, :equivalent, copy(data, ts, te))
236
- # };
237
-
238
220
  meta_char > (set_meta, 1) {
239
221
  emit(:literal, :literal, copy(data, ts, te))
240
222
  };
@@ -285,6 +267,13 @@
285
267
  fret;
286
268
  };
287
269
 
270
+ [8-9] . [0-9] { # special case, emits two tokens
271
+ text = copy(data, ts-1, te)
272
+ emit(:escape, :literal, text[0, 2])
273
+ emit(:literal, :literal, text[2])
274
+ fret;
275
+ };
276
+
288
277
  meta_char {
289
278
  case text = copy(data, ts-1, te)
290
279
  when '\.'; emit(:escape, :dot, text)
@@ -375,6 +364,7 @@
375
364
  conditional_expression := |*
376
365
  group_lookup . ')' {
377
366
  text = copy(data, ts, te-1)
367
+ text =~ /[^0]/ or raise ValidationError.for(:backref, 'condition', 'invalid ref ID')
378
368
  emit(:conditional, :condition, text)
379
369
  emit(:conditional, :condition_close, ')')
380
370
  };
@@ -457,10 +447,9 @@
457
447
 
458
448
  # (?#...) comments: parsed as a single expression, without introducing a
459
449
  # new nesting level. Comments may not include parentheses, escaped or not.
460
- # special case for close, action performed on all transitions to get the
461
- # correct closing count.
450
+ # special case for close to get the correct closing count.
462
451
  # ------------------------------------------------------------------------
463
- group_open . group_comment $group_closed {
452
+ (group_open . group_comment) @group_closed {
464
453
  emit(:group, :comment, copy(data, ts, te))
465
454
  };
466
455
 
@@ -475,10 +464,10 @@
475
464
  #
476
465
  # (?imxdau-imx:subexp) option on/off for subexp
477
466
  # ------------------------------------------------------------------------
478
- group_open . group_options >group_opened {
467
+ (group_open . group_options) >group_opened {
479
468
  text = copy(data, ts, te)
480
469
  if text[2..-1] =~ /([^\-mixdau:]|^$)|-.*([dau])/
481
- validation_error(:group_option, $1 || "-#{$2}", text)
470
+ raise ValidationError.for(:group_option, $1 || "-#{$2}", text)
482
471
  end
483
472
  emit_options(text)
484
473
  };
@@ -489,7 +478,7 @@
489
478
  # (?<=subexp) look-behind
490
479
  # (?<!subexp) negative look-behind
491
480
  # ------------------------------------------------------------------------
492
- group_open . assertion_type >group_opened {
481
+ (group_open . assertion_type) >group_opened {
493
482
  case text = copy(data, ts, te)
494
483
  when '(?='; emit(:assertion, :lookahead, text)
495
484
  when '(?!'; emit(:assertion, :nlookahead, text)
@@ -506,14 +495,14 @@
506
495
  # (?'name'subexp) named group (single quoted version)
507
496
  # (subexp) captured group
508
497
  # ------------------------------------------------------------------------
509
- group_open . group_type >group_opened {
498
+ (group_open . group_type) >group_opened {
510
499
  case text = copy(data, ts, te)
511
500
  when '(?:'; emit(:group, :passive, text)
512
501
  when '(?>'; emit(:group, :atomic, text)
513
502
  when '(?~'; emit(:group, :absence, text)
514
503
 
515
504
  when /^\(\?(?:<>|'')/
516
- validation_error(:group, 'named group', 'name is empty')
505
+ raise ValidationError.for(:group, 'named group', 'name is empty')
517
506
 
518
507
  when /^\(\?<[^>]+>/
519
508
  emit(:group, :named_ab, text)
@@ -533,7 +522,7 @@
533
522
  if conditional_stack.last == group_depth + 1
534
523
  conditional_stack.pop
535
524
  emit(:conditional, :close, ')')
536
- else
525
+ elsif group_depth >= 0
537
526
  if spacing_stack.length > 1 &&
538
527
  spacing_stack.last[:depth] == group_depth + 1
539
528
  spacing_stack.pop
@@ -541,41 +530,43 @@
541
530
  end
542
531
 
543
532
  emit(:group, :close, ')')
533
+ else
534
+ raise ValidationError.for(:group, 'group', 'unmatched close parenthesis')
544
535
  end
545
536
  };
546
537
 
547
538
 
548
539
  # Group backreference, named and numbered
549
540
  # ------------------------------------------------------------------------
550
- backslash . (group_name_backref | group_number_backref) > (backslashed, 4) {
541
+ backslash . (group_ref) > (backslashed, 4) {
551
542
  case text = copy(data, ts, te)
552
- when /^\\k(<>|'')/
553
- validation_error(:backref, 'backreference', 'ref ID is empty')
554
- when /^\\k(.)[^\p{digit}\-][^+\-]*\D$/
543
+ when /^\\k(.)[^0-9\-][^+\-]*['>]$/
555
544
  emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
556
- when /^\\k(.)\d+\D$/
545
+ when /^\\k(.)0*[1-9]\d*['>]$/
557
546
  emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
558
- when /^\\k(.)-\d+\D$/
547
+ when /^\\k(.)-0*[1-9]\d*['>]$/
559
548
  emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
560
- when /^\\k(.)[^\p{digit}\-].*[+\-]\d+\D$/
549
+ when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
561
550
  emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
562
- when /^\\k(.)-?\d+[+\-]\d+\D$/
551
+ when /^\\k(.)-?0*[1-9]\d*[+\-]\d+['>]$/
563
552
  emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
553
+ else
554
+ raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
564
555
  end
565
556
  };
566
557
 
567
558
  # Group call, named and numbered
568
559
  # ------------------------------------------------------------------------
569
- backslash . (group_name_call | group_number_call) > (backslashed, 4) {
560
+ backslash . (group_call) > (backslashed, 4) {
570
561
  case text = copy(data, ts, te)
571
- when /^\\g(<>|'')/
572
- validation_error(:backref, 'subexpression call', 'ref ID is empty')
573
- when /^\\g(.)[^\p{digit}+\->][^+\-]*/
562
+ when /^\\g(.)[^0-9+\-].*['>]$/
574
563
  emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
575
- when /^\\g(.)\d+\D$/
564
+ when /^\\g(.)(?:0|0*[1-9]\d*)['>]$/
576
565
  emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
577
- when /^\\g(.)[+-]\d+/
566
+ when /^\\g(.)[+-]0*[1-9]\d*/
578
567
  emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
568
+ else
569
+ raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
579
570
  end
580
571
  };
581
572
 
@@ -649,72 +640,11 @@
649
640
  *|;
650
641
  }%%
651
642
 
652
- # THIS IS A GENERATED FILE, DO NOT EDIT DIRECTLY
653
- # This file was generated from lib/regexp_parser/scanner/scanner.rl
654
-
655
- require 'regexp_parser/error'
643
+ require_relative 'scanner/errors/scanner_error'
644
+ require_relative 'scanner/errors/premature_end_error'
645
+ require_relative 'scanner/errors/validation_error'
656
646
 
657
647
  class Regexp::Scanner
658
- # General scanner error (catch all)
659
- class ScannerError < Regexp::Parser::Error; end
660
-
661
- # Base for all scanner validation errors
662
- class ValidationError < Regexp::Parser::Error
663
- def initialize(reason)
664
- super reason
665
- end
666
- end
667
-
668
- # Unexpected end of pattern
669
- class PrematureEndError < ScannerError
670
- def initialize(where = '')
671
- super "Premature end of pattern at #{where}"
672
- end
673
- end
674
-
675
- # Invalid sequence format. Used for escape sequences, mainly.
676
- class InvalidSequenceError < ValidationError
677
- def initialize(what = 'sequence', where = '')
678
- super "Invalid #{what} at #{where}"
679
- end
680
- end
681
-
682
- # Invalid group. Used for named groups.
683
- class InvalidGroupError < ValidationError
684
- def initialize(what, reason)
685
- super "Invalid #{what}, #{reason}."
686
- end
687
- end
688
-
689
- # Invalid groupOption. Used for inline options.
690
- # TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
691
- class InvalidGroupOption < ValidationError
692
- def initialize(option, text)
693
- super "Invalid group option #{option} in #{text}"
694
- end
695
- end
696
-
697
- # Invalid back reference. Used for name a number refs/calls.
698
- class InvalidBackrefError < ValidationError
699
- def initialize(what, reason)
700
- super "Invalid back reference #{what}, #{reason}"
701
- end
702
- end
703
-
704
- # The property name was not recognized by the scanner.
705
- class UnknownUnicodePropertyError < ValidationError
706
- def initialize(name)
707
- super "Unknown unicode character property name #{name}"
708
- end
709
- end
710
-
711
- # The POSIX class name was not recognized by the scanner.
712
- class UnknownPosixClassError < ValidationError
713
- def initialize(text)
714
- super "Unknown POSIX class #{text}"
715
- end
716
- end
717
-
718
648
  # Scans the given regular expression text, or Regexp object and collects the
719
649
  # emitted token into an array that gets returned at the end. If a block is
720
650
  # given, it gets called for each emitted token.
@@ -891,24 +821,8 @@ class Regexp::Scanner
891
821
 
892
822
  def emit_meta_control_sequence(data, ts, te, token)
893
823
  if data.last < 0x00 || data.last > 0x7F
894
- validation_error(:sequence, 'escape', token.to_s)
824
+ raise ValidationError.for(:sequence, 'escape', token.to_s)
895
825
  end
896
826
  emit(:escape, token, copy(data, ts-1, te))
897
827
  end
898
-
899
- # Centralizes and unifies the handling of validation related
900
- # errors.
901
- def validation_error(type, what, reason = nil)
902
- error =
903
- case type
904
- when :backref then InvalidBackrefError.new(what, reason)
905
- when :group then InvalidGroupError.new(what, reason)
906
- when :group_option then InvalidGroupOption.new(what, reason)
907
- when :posix_class then UnknownPosixClassError.new(what)
908
- when :property then UnknownUnicodePropertyError.new(what)
909
- when :sequence then InvalidSequenceError.new(what, reason)
910
- end
911
-
912
- raise error # unless @@config.validation_ignore
913
- end
914
828
  end # module Regexp::Scanner