regexp_parser 2.6.2 → 2.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +67 -0
  3. data/Gemfile +2 -2
  4. data/README.md +32 -29
  5. data/lib/regexp_parser/expression/base.rb +0 -7
  6. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  7. data/lib/regexp_parser/expression/classes/backreference.rb +4 -2
  8. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
  9. data/lib/regexp_parser/expression/classes/character_set.rb +3 -4
  10. data/lib/regexp_parser/expression/classes/conditional.rb +2 -6
  11. data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
  12. data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
  13. data/lib/regexp_parser/expression/classes/group.rb +0 -22
  14. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
  15. data/lib/regexp_parser/expression/classes/unicode_property.rb +5 -2
  16. data/lib/regexp_parser/expression/methods/construct.rb +2 -4
  17. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  18. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  19. data/lib/regexp_parser/expression/methods/tests.rb +40 -3
  20. data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
  21. data/lib/regexp_parser/expression/quantifier.rb +30 -17
  22. data/lib/regexp_parser/expression/sequence.rb +5 -10
  23. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  24. data/lib/regexp_parser/expression/shared.rb +37 -20
  25. data/lib/regexp_parser/expression/subexpression.rb +20 -15
  26. data/lib/regexp_parser/expression.rb +2 -0
  27. data/lib/regexp_parser/lexer.rb +76 -36
  28. data/lib/regexp_parser/parser.rb +97 -97
  29. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  30. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  31. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  32. data/lib/regexp_parser/scanner/mapping.rb +89 -0
  33. data/lib/regexp_parser/scanner/property.rl +2 -2
  34. data/lib/regexp_parser/scanner/scanner.rl +90 -169
  35. data/lib/regexp_parser/scanner.rb +1157 -1330
  36. data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
  37. data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
  38. data/lib/regexp_parser/syntax/token/escape.rb +3 -1
  39. data/lib/regexp_parser/syntax/token/meta.rb +9 -2
  40. data/lib/regexp_parser/syntax/token/unicode_property.rb +3 -0
  41. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  42. data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
  43. data/lib/regexp_parser/syntax/versions.rb +2 -0
  44. data/lib/regexp_parser/version.rb +1 -1
  45. metadata +10 -3
@@ -30,11 +30,6 @@
30
30
 
31
31
  class_posix = ('[:' . '^'? . [^\[\]]* . ':]');
32
32
 
33
-
34
- # these are not supported in ruby at the moment
35
- collating_sequence = '[.' . (alpha | [\-])+ . '.]';
36
- character_equivalent = '[=' . alpha . '=]';
37
-
38
33
  line_anchor = beginning_of_line | end_of_line;
39
34
  anchor_char = [AbBzZG];
40
35
 
@@ -59,9 +54,6 @@
59
54
  one_or_more = '+' | '+?' | '++';
60
55
 
61
56
  quantifier_greedy = '?' | '*' | '+';
62
- quantifier_reluctant = '??' | '*?' | '+?';
63
- quantifier_possessive = '?+' | '*+' | '++';
64
- quantifier_mode = '?' | '+';
65
57
 
66
58
  quantity_exact = (digit+);
67
59
  quantity_minimum = (digit+) . ',';
@@ -70,9 +62,6 @@
70
62
  quantifier_interval = range_open . ( quantity_exact | quantity_minimum |
71
63
  quantity_maximum | quantity_range ) . range_close;
72
64
 
73
- quantifiers = quantifier_greedy | quantifier_reluctant |
74
- quantifier_possessive | quantifier_interval;
75
-
76
65
  conditional = '(?(';
77
66
 
78
67
  group_comment = '?#' . [^)]* . group_close;
@@ -89,10 +78,9 @@
89
78
  # try to treat every other group head as options group, like Ruby
90
79
  group_options = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
91
80
 
92
- group_ref = [gk];
93
81
  group_name_id_ab = ([^!0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
94
82
  group_name_id_sq = ([^0-9\-'] | utf8_multibyte) . ([^'] | utf8_multibyte)*;
95
- group_number = '-'? . [1-9] . [0-9]*;
83
+ group_number = '-'? . [0-9]+;
96
84
  group_level = [+\-] . [0-9]+;
97
85
 
98
86
  group_name = ('<' . group_name_id_ab? . '>') |
@@ -101,15 +89,11 @@
101
89
 
102
90
  group_named = ('?' . group_name );
103
91
 
104
- group_name_backref = 'k' . (('<' . group_name_id_ab? . group_level? '>') |
105
- ("'" . group_name_id_sq? . group_level? "'"));
106
- group_name_call = 'g' . (('<' . group_name_id_ab? . group_level? '>') |
107
- ("'" . group_name_id_sq? . group_level? "'"));
92
+ group_ref_body = (('<' . (group_name_id_ab? | group_number) . group_level? '>') |
93
+ ("'" . (group_name_id_sq? | group_number) . group_level? "'"));
108
94
 
109
- group_number_backref = 'k' . (('<' . group_number . group_level? '>') |
110
- ("'" . group_number . group_level? "'"));
111
- group_number_call = 'g' . (('<' . ((group_number . group_level?) | '0') '>') |
112
- ("'" . ((group_number . group_level?) | '0') "'"));
95
+ group_ref = 'k' . group_ref_body;
96
+ group_call = 'g' . group_ref_body;
113
97
 
114
98
  group_type = group_atomic | group_passive | group_absence | group_named;
115
99
 
@@ -132,20 +116,21 @@
132
116
  keep_mark | sequence_char;
133
117
 
134
118
  # escapes that also work within a character set
135
- set_escape = backslash | brackets | escaped_ascii | property_char |
119
+ set_escape = backslash | brackets | escaped_ascii |
120
+ octal_sequence | property_char |
136
121
  sequence_char | single_codepoint_char_type;
137
122
 
138
123
 
139
124
  # EOF error, used where it can be detected
140
125
  action premature_end_error {
141
126
  text = copy(data, ts ? ts-1 : 0, -1)
142
- raise PrematureEndError.new( text )
127
+ raise PrematureEndError.new(text)
143
128
  }
144
129
 
145
130
  # Invalid sequence error, used from sequences, like escapes and sets
146
131
  action invalid_sequence_error {
147
132
  text = copy(data, ts ? ts-1 : 0, -1)
148
- validation_error(:sequence, 'sequence', text)
133
+ raise ValidationError.for(:sequence, 'sequence', text)
149
134
  }
150
135
 
151
136
  # group (nesting) and set open/close actions
@@ -168,8 +153,8 @@
168
153
  };
169
154
 
170
155
  '-]' @set_closed { # special case, emits two tokens
171
- emit(:literal, :literal, copy(data, ts, te-1))
172
- emit(:set, :close, copy(data, ts+1, te))
156
+ emit(:literal, :literal, '-')
157
+ emit(:set, :close, ']')
173
158
  if in_set?
174
159
  fret;
175
160
  else
@@ -183,28 +168,27 @@
183
168
  };
184
169
 
185
170
  '^' {
186
- text = copy(data, ts, te)
187
- if tokens.last[1] == :open
188
- emit(:set, :negate, text)
171
+ if prev_token[1] == :open
172
+ emit(:set, :negate, '^')
189
173
  else
190
- emit(:literal, :literal, text)
174
+ emit(:literal, :literal, '^')
191
175
  end
192
176
  };
193
177
 
194
178
  '-' {
195
- text = copy(data, ts, te)
196
- # ranges cant start with a subset or intersection/negation/range operator
197
- if tokens.last[0] == :set
198
- emit(:literal, :literal, text)
179
+ # ranges cant start with the opening bracket, a subset, or
180
+ # intersection/negation/range operators
181
+ if prev_token[0] == :set
182
+ emit(:literal, :literal, '-')
199
183
  else
200
- emit(:set, :range, text)
184
+ emit(:set, :range, '-')
201
185
  end
202
186
  };
203
187
 
204
188
  # Unlike ranges, intersections can start or end at set boundaries, whereupon
205
189
  # they match nothing: r = /[a&&]/; [r =~ ?a, r =~ ?&] # => [nil, nil]
206
190
  '&&' {
207
- emit(:set, :intersection, copy(data, ts, te))
191
+ emit(:set, :intersection, '&&')
208
192
  };
209
193
 
210
194
  backslash {
@@ -212,7 +196,7 @@
212
196
  };
213
197
 
214
198
  set_open >(open_bracket, 1) >set_opened {
215
- emit(:set, :open, copy(data, ts, te))
199
+ emit(:set, :open, '[')
216
200
  fcall character_set;
217
201
  };
218
202
 
@@ -227,20 +211,12 @@
227
211
  end
228
212
 
229
213
  unless self.class.posix_classes.include?(class_name)
230
- validation_error(:posix_class, text)
214
+ raise ValidationError.for(:posix_class, text)
231
215
  end
232
216
 
233
217
  emit(type, class_name.to_sym, text)
234
218
  };
235
219
 
236
- # These are not supported in ruby at the moment. Enable them if they are.
237
- # collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error) {
238
- # emit(:set, :collation, copy(data, ts, te))
239
- # };
240
- # character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error) {
241
- # emit(:set, :equivalent, copy(data, ts, te))
242
- # };
243
-
244
220
  meta_char > (set_meta, 1) {
245
221
  emit(:literal, :literal, copy(data, ts, te))
246
222
  };
@@ -254,12 +230,22 @@
254
230
  # set escapes scanner
255
231
  # --------------------------------------------------------------------------
256
232
  set_escape_sequence := |*
233
+ # Special case: in sets, octal sequences have higher priority than backrefs
234
+ octal_sequence {
235
+ emit(:escape, :octal, copy(data, ts-1, te))
236
+ fret;
237
+ };
238
+
239
+ # Scan all other escapes that work in sets with the generic escape scanner
257
240
  set_escape > (escaped_set_alpha, 2) {
258
241
  fhold;
259
242
  fnext character_set;
260
243
  fcall escape_sequence;
261
244
  };
262
245
 
246
+ # Treat all remaining escapes - those not supported in sets - as literal.
247
+ # (This currently includes \^, \-, \&, \:, although these could potentially
248
+ # be meta chars when not escaped, depending on their position in the set.)
263
249
  any > (escaped_set_alpha, 1) {
264
250
  emit(:escape, :literal, copy(data, ts-1, te))
265
251
  fret;
@@ -453,10 +439,9 @@
453
439
 
454
440
  # (?#...) comments: parsed as a single expression, without introducing a
455
441
  # new nesting level. Comments may not include parentheses, escaped or not.
456
- # special case for close, action performed on all transitions to get the
457
- # correct closing count.
442
+ # special case for close to get the correct closing count.
458
443
  # ------------------------------------------------------------------------
459
- group_open . group_comment $group_closed {
444
+ (group_open . group_comment) @group_closed {
460
445
  emit(:group, :comment, copy(data, ts, te))
461
446
  };
462
447
 
@@ -471,10 +456,10 @@
471
456
  #
472
457
  # (?imxdau-imx:subexp) option on/off for subexp
473
458
  # ------------------------------------------------------------------------
474
- group_open . group_options >group_opened {
459
+ (group_open . group_options) >group_opened {
475
460
  text = copy(data, ts, te)
476
461
  if text[2..-1] =~ /([^\-mixdau:]|^$)|-.*([dau])/
477
- validation_error(:group_option, $1 || "-#{$2}", text)
462
+ raise ValidationError.for(:group_option, $1 || "-#{$2}", text)
478
463
  end
479
464
  emit_options(text)
480
465
  };
@@ -485,7 +470,7 @@
485
470
  # (?<=subexp) look-behind
486
471
  # (?<!subexp) negative look-behind
487
472
  # ------------------------------------------------------------------------
488
- group_open . assertion_type >group_opened {
473
+ (group_open . assertion_type) >group_opened {
489
474
  case text = copy(data, ts, te)
490
475
  when '(?='; emit(:assertion, :lookahead, text)
491
476
  when '(?!'; emit(:assertion, :nlookahead, text)
@@ -502,14 +487,14 @@
502
487
  # (?'name'subexp) named group (single quoted version)
503
488
  # (subexp) captured group
504
489
  # ------------------------------------------------------------------------
505
- group_open . group_type >group_opened {
490
+ (group_open . group_type) >group_opened {
506
491
  case text = copy(data, ts, te)
507
492
  when '(?:'; emit(:group, :passive, text)
508
493
  when '(?>'; emit(:group, :atomic, text)
509
494
  when '(?~'; emit(:group, :absence, text)
510
495
 
511
496
  when /^\(\?(?:<>|'')/
512
- validation_error(:group, 'named group', 'name is empty')
497
+ raise ValidationError.for(:group, 'named group', 'name is empty')
513
498
 
514
499
  when /^\(\?<[^>]+>/
515
500
  emit(:group, :named_ab, text)
@@ -528,50 +513,52 @@
528
513
  group_close @group_closed {
529
514
  if conditional_stack.last == group_depth + 1
530
515
  conditional_stack.pop
531
- emit(:conditional, :close, copy(data, ts, te))
532
- else
516
+ emit(:conditional, :close, ')')
517
+ elsif group_depth >= 0
533
518
  if spacing_stack.length > 1 &&
534
519
  spacing_stack.last[:depth] == group_depth + 1
535
520
  spacing_stack.pop
536
521
  self.free_spacing = spacing_stack.last[:free_spacing]
537
522
  end
538
523
 
539
- emit(:group, :close, copy(data, ts, te))
524
+ emit(:group, :close, ')')
525
+ else
526
+ raise ValidationError.for(:group, 'group', 'unmatched close parenthesis')
540
527
  end
541
528
  };
542
529
 
543
530
 
544
531
  # Group backreference, named and numbered
545
532
  # ------------------------------------------------------------------------
546
- backslash . (group_name_backref | group_number_backref) > (backslashed, 4) {
533
+ backslash . (group_ref) > (backslashed, 4) {
547
534
  case text = copy(data, ts, te)
548
- when /^\\k(<>|'')/
549
- validation_error(:backref, 'backreference', 'ref ID is empty')
550
- when /^\\k(.)[^\p{digit}\-][^+\-]*\D$/
535
+ when /^\\k(.)[^0-9\-][^+\-]*['>]$/
551
536
  emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
552
- when /^\\k(.)\d+\D$/
537
+ when /^\\k(.)[1-9]\d*['>]$/
553
538
  emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
554
- when /^\\k(.)-\d+\D$/
539
+ when /^\\k(.)-[1-9]\d*['>]$/
555
540
  emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
556
- when /^\\k(.)[^\p{digit}\-].*[+\-]\d+\D$/
541
+ when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
557
542
  emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
558
- when /^\\k(.)-?\d+[+\-]\d+\D$/
543
+ when /^\\k(.)-?[1-9]\d*[+\-]\d+['>]$/
559
544
  emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
545
+ else
546
+ raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
560
547
  end
561
548
  };
562
549
 
563
550
  # Group call, named and numbered
564
551
  # ------------------------------------------------------------------------
565
- backslash . (group_name_call | group_number_call) > (backslashed, 4) {
552
+ backslash . (group_call) > (backslashed, 4) {
566
553
  case text = copy(data, ts, te)
567
- when /^\\g(<>|'')/
568
- validation_error(:backref, 'subexpression call', 'ref ID is empty')
569
- when /^\\g(.)[^\p{digit}+\->][^+\-]*/
554
+ when /^\\g(.)[^0-9+\-].*['>]$/
570
555
  emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
571
- when /^\\g(.)\d+\D$/
556
+ when /^\\g(.)\d+['>]$/
572
557
  emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
573
558
  when /^\\g(.)[+-]\d+/
574
559
  emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
560
+ else
561
+ raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
575
562
  end
576
563
  };
577
564
 
@@ -645,95 +632,35 @@
645
632
  *|;
646
633
  }%%
647
634
 
648
- # THIS IS A GENERATED FILE, DO NOT EDIT DIRECTLY
649
- # This file was generated from lib/regexp_parser/scanner/scanner.rl
650
-
651
- require 'regexp_parser/error'
635
+ require 'regexp_parser/scanner/errors/scanner_error'
636
+ require 'regexp_parser/scanner/errors/premature_end_error'
637
+ require 'regexp_parser/scanner/errors/validation_error'
652
638
 
653
639
  class Regexp::Scanner
654
- # General scanner error (catch all)
655
- class ScannerError < Regexp::Parser::Error; end
656
-
657
- # Base for all scanner validation errors
658
- class ValidationError < Regexp::Parser::Error
659
- def initialize(reason)
660
- super reason
661
- end
662
- end
663
-
664
- # Unexpected end of pattern
665
- class PrematureEndError < ScannerError
666
- def initialize(where = '')
667
- super "Premature end of pattern at #{where}"
668
- end
669
- end
670
-
671
- # Invalid sequence format. Used for escape sequences, mainly.
672
- class InvalidSequenceError < ValidationError
673
- def initialize(what = 'sequence', where = '')
674
- super "Invalid #{what} at #{where}"
675
- end
676
- end
677
-
678
- # Invalid group. Used for named groups.
679
- class InvalidGroupError < ValidationError
680
- def initialize(what, reason)
681
- super "Invalid #{what}, #{reason}."
682
- end
683
- end
684
-
685
- # Invalid groupOption. Used for inline options.
686
- # TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
687
- class InvalidGroupOption < ValidationError
688
- def initialize(option, text)
689
- super "Invalid group option #{option} in #{text}"
690
- end
691
- end
692
-
693
- # Invalid back reference. Used for name a number refs/calls.
694
- class InvalidBackrefError < ValidationError
695
- def initialize(what, reason)
696
- super "Invalid back reference #{what}, #{reason}"
697
- end
698
- end
699
-
700
- # The property name was not recognized by the scanner.
701
- class UnknownUnicodePropertyError < ValidationError
702
- def initialize(name)
703
- super "Unknown unicode character property name #{name}"
704
- end
705
- end
706
-
707
- # The POSIX class name was not recognized by the scanner.
708
- class UnknownPosixClassError < ValidationError
709
- def initialize(text)
710
- super "Unknown POSIX class #{text}"
711
- end
712
- end
713
-
714
640
  # Scans the given regular expression text, or Regexp object and collects the
715
641
  # emitted token into an array that gets returned at the end. If a block is
716
642
  # given, it gets called for each emitted token.
717
643
  #
718
644
  # This method may raise errors if a syntax error is encountered.
719
645
  # --------------------------------------------------------------------------
720
- def self.scan(input_object, options: nil, &block)
721
- new.scan(input_object, options: options, &block)
646
+ def self.scan(input_object, options: nil, collect_tokens: true, &block)
647
+ new.scan(input_object, options: options, collect_tokens: collect_tokens, &block)
722
648
  end
723
649
 
724
- def scan(input_object, options: nil, &block)
725
- self.literal = nil
650
+ def scan(input_object, options: nil, collect_tokens: true, &block)
651
+ self.collect_tokens = collect_tokens
652
+ self.literal_run = nil
726
653
  stack = []
727
654
 
728
655
  input = input_object.is_a?(Regexp) ? input_object.source : input_object
729
656
  self.free_spacing = free_spacing?(input_object, options)
730
657
  self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
731
658
 
732
- data = input.unpack("c*") if input.is_a?(String)
659
+ data = input.unpack("c*")
733
660
  eof = data.length
734
661
 
735
662
  self.tokens = []
736
- self.block = block_given? ? block : nil
663
+ self.block = block
737
664
 
738
665
  self.set_depth = 0
739
666
  self.group_depth = 0
@@ -758,7 +685,7 @@ class Regexp::Scanner
758
685
  "[#{set_depth}]") if in_set?
759
686
 
760
687
  # when the entire expression is a literal run
761
- emit_literal if literal
688
+ emit_literal if literal_run
762
689
 
763
690
  tokens
764
691
  end
@@ -785,26 +712,37 @@ class Regexp::Scanner
785
712
  def emit(type, token, text)
786
713
  #puts "EMIT: type: #{type}, token: #{token}, text: #{text}, ts: #{ts}, te: #{te}"
787
714
 
788
- emit_literal if literal
715
+ emit_literal if literal_run
789
716
 
790
717
  # Ragel runs with byte-based indices (ts, te). These are of little value to
791
718
  # end-users, so we keep track of char-based indices and emit those instead.
792
719
  ts_char_pos = char_pos
793
720
  te_char_pos = char_pos + text.length
794
721
 
795
- if block
796
- block.call type, token, text, ts_char_pos, te_char_pos
797
- end
722
+ tok = [type, token, text, ts_char_pos, te_char_pos]
798
723
 
799
- tokens << [type, token, text, ts_char_pos, te_char_pos]
724
+ self.prev_token = tok
800
725
 
801
726
  self.char_pos = te_char_pos
727
+
728
+ if block
729
+ block.call type, token, text, ts_char_pos, te_char_pos
730
+ # TODO: in v3.0.0, remove `collect_tokens:` kwarg and only collect if no block given
731
+ tokens << tok if collect_tokens
732
+ elsif collect_tokens
733
+ tokens << tok
734
+ end
802
735
  end
803
736
 
737
+ attr_accessor :literal_run # only public for #||= to work on ruby <= 2.5
738
+
804
739
  private
805
740
 
806
- attr_accessor :tokens, :literal, :block, :free_spacing, :spacing_stack,
807
- :group_depth, :set_depth, :conditional_stack, :char_pos
741
+ attr_accessor :block,
742
+ :collect_tokens, :tokens, :prev_token,
743
+ :free_spacing, :spacing_stack,
744
+ :group_depth, :set_depth, :conditional_stack,
745
+ :char_pos
808
746
 
809
747
  def free_spacing?(input_object, options)
810
748
  if options && !input_object.is_a?(String)
@@ -834,14 +772,13 @@ class Regexp::Scanner
834
772
  # Appends one or more characters to the literal buffer, to be emitted later
835
773
  # by a call to emit_literal.
836
774
  def append_literal(data, ts, te)
837
- self.literal = literal || []
838
- literal << copy(data, ts, te)
775
+ (self.literal_run ||= []) << copy(data, ts, te)
839
776
  end
840
777
 
841
778
  # Emits the literal run collected by calls to the append_literal method.
842
779
  def emit_literal
843
- text = literal.join
844
- self.literal = nil
780
+ text = literal_run.join
781
+ self.literal_run = nil
845
782
  emit(:literal, :literal, text)
846
783
  end
847
784
 
@@ -876,24 +813,8 @@ class Regexp::Scanner
876
813
 
877
814
  def emit_meta_control_sequence(data, ts, te, token)
878
815
  if data.last < 0x00 || data.last > 0x7F
879
- validation_error(:sequence, 'escape', token.to_s)
816
+ raise ValidationError.for(:sequence, 'escape', token.to_s)
880
817
  end
881
818
  emit(:escape, token, copy(data, ts-1, te))
882
819
  end
883
-
884
- # Centralizes and unifies the handling of validation related
885
- # errors.
886
- def validation_error(type, what, reason = nil)
887
- error =
888
- case type
889
- when :backref then InvalidBackrefError.new(what, reason)
890
- when :group then InvalidGroupError.new(what, reason)
891
- when :group_option then InvalidGroupOption.new(what, reason)
892
- when :posix_class then UnknownPosixClassError.new(what)
893
- when :property then UnknownUnicodePropertyError.new(what)
894
- when :sequence then InvalidSequenceError.new(what, reason)
895
- end
896
-
897
- raise error # unless @@config.validation_ignore
898
- end
899
820
  end # module Regexp::Scanner