regexp_parser 1.5.0 → 1.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/lib/regexp_parser/expression.rb +6 -43
- data/lib/regexp_parser/expression/classes/conditional.rb +3 -2
- data/lib/regexp_parser/expression/classes/escape.rb +0 -4
- data/lib/regexp_parser/expression/methods/match.rb +13 -0
- data/lib/regexp_parser/expression/methods/options.rb +35 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
- data/lib/regexp_parser/expression/methods/tests.rb +6 -15
- data/lib/regexp_parser/expression/sequence.rb +3 -2
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
- data/lib/regexp_parser/lexer.rb +0 -21
- data/lib/regexp_parser/parser.rb +22 -21
- data/lib/regexp_parser/scanner.rb +1159 -1329
- data/lib/regexp_parser/scanner/char_type.rl +0 -3
- data/lib/regexp_parser/scanner/scanner.rl +82 -190
- data/lib/regexp_parser/version.rb +1 -1
- data/spec/expression/base_spec.rb +14 -0
- data/spec/expression/methods/match_length_spec.rb +13 -0
- data/spec/expression/methods/match_spec.rb +25 -0
- data/spec/expression/methods/tests_spec.rb +2 -0
- data/spec/expression/options_spec.rb +128 -0
- data/spec/expression/root_spec.rb +9 -0
- data/spec/expression/sequence_spec.rb +9 -0
- data/spec/lexer/conditionals_spec.rb +49 -119
- data/spec/lexer/escapes_spec.rb +8 -32
- data/spec/lexer/keep_spec.rb +5 -17
- data/spec/lexer/literals_spec.rb +73 -110
- data/spec/lexer/nesting_spec.rb +86 -117
- data/spec/lexer/refcalls_spec.rb +51 -50
- data/spec/parser/all_spec.rb +13 -1
- data/spec/parser/anchors_spec.rb +9 -23
- data/spec/parser/conditionals_spec.rb +9 -9
- data/spec/parser/errors_spec.rb +22 -43
- data/spec/parser/escapes_spec.rb +33 -44
- data/spec/parser/groups_spec.rb +98 -257
- data/spec/parser/keep_spec.rb +2 -15
- data/spec/parser/posix_classes_spec.rb +5 -24
- data/spec/parser/properties_spec.rb +42 -54
- data/spec/parser/quantifiers_spec.rb +41 -283
- data/spec/parser/refcalls_spec.rb +60 -185
- data/spec/parser/set/intersections_spec.rb +17 -17
- data/spec/parser/set/ranges_spec.rb +17 -17
- data/spec/parser/sets_spec.rb +5 -5
- data/spec/parser/types_spec.rb +11 -36
- data/spec/scanner/anchors_spec.rb +13 -28
- data/spec/scanner/conditionals_spec.rb +121 -173
- data/spec/scanner/errors_spec.rb +65 -87
- data/spec/scanner/escapes_spec.rb +49 -50
- data/spec/scanner/free_space_spec.rb +102 -165
- data/spec/scanner/groups_spec.rb +45 -64
- data/spec/scanner/keep_spec.rb +5 -28
- data/spec/scanner/literals_spec.rb +45 -81
- data/spec/scanner/meta_spec.rb +13 -33
- data/spec/scanner/properties_spec.rb +43 -286
- data/spec/scanner/quantifiers_spec.rb +13 -28
- data/spec/scanner/refcalls_spec.rb +32 -48
- data/spec/scanner/sets_spec.rb +88 -102
- data/spec/scanner/types_spec.rb +10 -25
- data/spec/spec_helper.rb +1 -0
- data/spec/support/shared_examples.rb +77 -0
- data/spec/syntax/syntax_spec.rb +4 -0
- data/spec/syntax/versions/1.8.6_spec.rb +12 -33
- data/spec/syntax/versions/1.9.1_spec.rb +5 -18
- data/spec/syntax/versions/1.9.3_spec.rb +4 -17
- data/spec/syntax/versions/2.0.0_spec.rb +8 -23
- data/spec/syntax/versions/2.2.0_spec.rb +4 -17
- data/spec/syntax/versions/aliases_spec.rb +25 -109
- metadata +14 -6
- data/spec/scanner/scripts_spec.rb +0 -49
- data/spec/scanner/unicode_blocks_spec.rb +0 -28
@@ -21,9 +21,6 @@
|
|
21
21
|
when '\W'; emit(:type, :nonword, text, ts - 1, te)
|
22
22
|
when '\R'; emit(:type, :linebreak, text, ts - 1, te)
|
23
23
|
when '\X'; emit(:type, :xgrapheme, text, ts - 1, te)
|
24
|
-
else
|
25
|
-
raise ScannerError.new(
|
26
|
-
"Unexpected character in type at #{text} (char #{ts})")
|
27
24
|
end
|
28
25
|
fret;
|
29
26
|
};
|
@@ -49,9 +49,9 @@
|
|
49
49
|
codepoint_list = 'u{' . xdigit{1,6} . (space . xdigit{1,6})* . '}';
|
50
50
|
codepoint_sequence = codepoint_single | codepoint_list;
|
51
51
|
|
52
|
-
control_sequence = ('c' | 'C-') . (backslash . 'M-')
|
52
|
+
control_sequence = ('c' | 'C-') . (backslash . 'M-')? . backslash? . any;
|
53
53
|
|
54
|
-
meta_sequence = 'M-' . (backslash .
|
54
|
+
meta_sequence = 'M-' . (backslash . ('c' | 'C-'))? . backslash? . any;
|
55
55
|
|
56
56
|
zero_or_one = '?' | '??' | '?+';
|
57
57
|
zero_or_more = '*' | '*?' | '*+';
|
@@ -82,7 +82,8 @@
|
|
82
82
|
assertion_lookbehind = '?<=';
|
83
83
|
assertion_nlookbehind = '?<!';
|
84
84
|
|
85
|
-
|
85
|
+
# try to treat every other group head as options group, like Ruby
|
86
|
+
group_options = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
|
86
87
|
|
87
88
|
group_ref = [gk];
|
88
89
|
group_name_char = (alnum | '_');
|
@@ -135,41 +136,35 @@
|
|
135
136
|
# Invalid sequence error, used from sequences, like escapes and sets
|
136
137
|
action invalid_sequence_error {
|
137
138
|
text = ts ? copy(data, ts-1..-1) : data.pack('c*')
|
138
|
-
|
139
|
+
validation_error(:sequence, 'sequence', text)
|
139
140
|
}
|
140
141
|
|
141
142
|
# group (nesting) and set open/close actions
|
142
|
-
action group_opened { self.group_depth = group_depth + 1
|
143
|
-
action group_closed { self.group_depth = group_depth - 1
|
143
|
+
action group_opened { self.group_depth = group_depth + 1 }
|
144
|
+
action group_closed { self.group_depth = group_depth - 1 }
|
145
|
+
action set_opened { self.set_depth = set_depth + 1 }
|
146
|
+
action set_closed { self.set_depth = set_depth - 1 }
|
144
147
|
|
145
148
|
# Character set scanner, continues consuming characters until it meets the
|
146
149
|
# closing bracket of the set.
|
147
150
|
# --------------------------------------------------------------------------
|
148
151
|
character_set := |*
|
149
|
-
set_close > (set_meta, 2) {
|
150
|
-
set_depth -= 1
|
151
|
-
in_set = set_depth > 0 ? true : false
|
152
|
-
|
152
|
+
set_close > (set_meta, 2) @set_closed {
|
153
153
|
emit(:set, :close, *text(data, ts, te))
|
154
|
-
|
155
|
-
if set_depth == 0
|
156
|
-
fgoto main;
|
157
|
-
else
|
154
|
+
if in_set?
|
158
155
|
fret;
|
156
|
+
else
|
157
|
+
fgoto main;
|
159
158
|
end
|
160
159
|
};
|
161
160
|
|
162
|
-
'-]' { # special case, emits two tokens
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
emit(:literal, :literal, copy(data, ts..te-2), ts, te)
|
167
|
-
emit(:set, :close, copy(data, ts+1..te-1), ts, te)
|
168
|
-
|
169
|
-
if set_depth == 0
|
170
|
-
fgoto main;
|
171
|
-
else
|
161
|
+
'-]' @set_closed { # special case, emits two tokens
|
162
|
+
emit(:literal, :literal, copy(data, ts..te-2), ts, te - 1)
|
163
|
+
emit(:set, :close, copy(data, ts+1..te-1), ts + 1, te)
|
164
|
+
if in_set?
|
172
165
|
fret;
|
166
|
+
else
|
167
|
+
fgoto main;
|
173
168
|
end
|
174
169
|
};
|
175
170
|
|
@@ -207,14 +202,12 @@
|
|
207
202
|
fcall set_escape_sequence;
|
208
203
|
};
|
209
204
|
|
210
|
-
set_open >(open_bracket, 1) {
|
211
|
-
set_depth += 1
|
212
|
-
|
205
|
+
set_open >(open_bracket, 1) >set_opened {
|
213
206
|
emit(:set, :open, *text(data, ts, te))
|
214
207
|
fcall character_set;
|
215
208
|
};
|
216
209
|
|
217
|
-
class_posix >(open_bracket, 1) @eof(premature_end_error)
|
210
|
+
class_posix >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
218
211
|
text = text(data, ts, te).first
|
219
212
|
|
220
213
|
type = :posixclass
|
@@ -227,11 +220,11 @@
|
|
227
220
|
emit(type, class_name.to_sym, text, ts, te)
|
228
221
|
};
|
229
222
|
|
230
|
-
collating_sequence >(open_bracket, 1) @eof(premature_end_error)
|
223
|
+
collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
231
224
|
emit(:set, :collation, *text(data, ts, te))
|
232
225
|
};
|
233
226
|
|
234
|
-
character_equivalent >(open_bracket, 1) @eof(premature_end_error)
|
227
|
+
character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
235
228
|
emit(:set, :equivalent, *text(data, ts, te))
|
236
229
|
};
|
237
230
|
|
@@ -337,44 +330,24 @@
|
|
337
330
|
};
|
338
331
|
|
339
332
|
control_sequence >(escaped_alpha, 4) $eof(premature_end_error) {
|
340
|
-
|
341
|
-
c = data[te].chr
|
342
|
-
if c =~ /[\x00-\x7F]/
|
343
|
-
emit(:escape, :control, copy(data, ts-1..te), ts-1, te+1)
|
344
|
-
p += 1
|
345
|
-
else
|
346
|
-
raise InvalidSequenceError.new("control sequence")
|
347
|
-
end
|
348
|
-
else
|
349
|
-
raise PrematureEndError.new("control sequence")
|
350
|
-
end
|
333
|
+
emit_meta_control_sequence(data, ts, te, :control)
|
351
334
|
fret;
|
352
335
|
};
|
353
336
|
|
354
337
|
meta_sequence >(backslashed, 3) $eof(premature_end_error) {
|
355
|
-
|
356
|
-
c = data[te].chr
|
357
|
-
if c =~ /[\x00-\x7F]/
|
358
|
-
emit(:escape, :meta_sequence, copy(data, ts-1..te), ts-1, te+1)
|
359
|
-
p += 1
|
360
|
-
else
|
361
|
-
raise InvalidSequenceError.new("meta sequence")
|
362
|
-
end
|
363
|
-
else
|
364
|
-
raise PrematureEndError.new("meta sequence")
|
365
|
-
end
|
338
|
+
emit_meta_control_sequence(data, ts, te, :meta_sequence)
|
366
339
|
fret;
|
367
340
|
};
|
368
341
|
|
369
342
|
char_type_char > (escaped_alpha, 2) {
|
370
343
|
fhold;
|
371
|
-
fnext *(in_set ? fentry(character_set) : fentry(main));
|
344
|
+
fnext *(in_set? ? fentry(character_set) : fentry(main));
|
372
345
|
fcall char_type;
|
373
346
|
};
|
374
347
|
|
375
348
|
property_char > (escaped_alpha, 2) {
|
376
349
|
fhold;
|
377
|
-
fnext *(in_set ? fentry(character_set) : fentry(main));
|
350
|
+
fnext *(in_set? ? fentry(character_set) : fentry(main));
|
378
351
|
fcall unicode_property;
|
379
352
|
};
|
380
353
|
|
@@ -412,8 +385,7 @@
|
|
412
385
|
};
|
413
386
|
|
414
387
|
alternation {
|
415
|
-
if
|
416
|
-
conditional_stack.last[1] == group_depth
|
388
|
+
if conditional_stack.last == group_depth
|
417
389
|
emit(:conditional, :separator, *text(data, ts, te))
|
418
390
|
else
|
419
391
|
emit(:meta, :alternation, *text(data, ts, te))
|
@@ -442,18 +414,12 @@
|
|
442
414
|
when '\\b'; emit(:anchor, :word_boundary, text, ts, te)
|
443
415
|
when '\\B'; emit(:anchor, :nonword_boundary, text, ts, te)
|
444
416
|
when '\\G'; emit(:anchor, :match_start, text, ts, te)
|
445
|
-
else
|
446
|
-
raise ScannerError.new(
|
447
|
-
"Unexpected character in anchor at #{text} (char #{ts})")
|
448
417
|
end
|
449
418
|
};
|
450
419
|
|
451
420
|
# Character sets
|
452
421
|
# ------------------------------------------------------------------------
|
453
|
-
set_open {
|
454
|
-
set_depth += 1
|
455
|
-
in_set = true
|
456
|
-
|
422
|
+
set_open >set_opened {
|
457
423
|
emit(:set, :open, *text(data, ts, te))
|
458
424
|
fcall character_set;
|
459
425
|
};
|
@@ -465,9 +431,7 @@
|
|
465
431
|
conditional {
|
466
432
|
text = text(data, ts, te).first
|
467
433
|
|
468
|
-
|
469
|
-
conditional_depth += 1
|
470
|
-
conditional_stack << [conditional_depth, group_depth]
|
434
|
+
conditional_stack << group_depth
|
471
435
|
|
472
436
|
emit(:conditional, :open, text[0..-2], ts, te-1)
|
473
437
|
emit(:conditional, :condition_open, '(', te-1, te)
|
@@ -496,7 +460,11 @@
|
|
496
460
|
# (?imxdau-imx:subexp) option on/off for subexp
|
497
461
|
# ------------------------------------------------------------------------
|
498
462
|
group_open . group_options >group_opened {
|
499
|
-
|
463
|
+
text = text(data, ts, te).first
|
464
|
+
if text[2..-1] =~ /([^\-mixdau:]|^$)|-.*([dau])/
|
465
|
+
raise InvalidGroupOption.new($1 || "-#{$2}", text)
|
466
|
+
end
|
467
|
+
emit_options(text, ts, te)
|
500
468
|
};
|
501
469
|
|
502
470
|
# Assertions
|
@@ -528,19 +496,15 @@
|
|
528
496
|
when '(?>'; emit(:group, :atomic, text, ts, te)
|
529
497
|
when '(?~'; emit(:group, :absence, text, ts, te)
|
530
498
|
|
531
|
-
when /^\(
|
532
|
-
|
499
|
+
when /^\(\?(?:<>|'')/
|
500
|
+
validation_error(:group, 'named group', 'name is empty')
|
533
501
|
|
502
|
+
when /^\(\?<\w*>/
|
534
503
|
emit(:group, :named_ab, text, ts, te)
|
535
504
|
|
536
|
-
when /^\(\?'
|
537
|
-
empty_name_error(:group, 'named group (sq)') if $1.empty?
|
538
|
-
|
505
|
+
when /^\(\?'\w*'/
|
539
506
|
emit(:group, :named_sq, text, ts, te)
|
540
507
|
|
541
|
-
else
|
542
|
-
raise ScannerError.new(
|
543
|
-
"Unknown subexpression group format '#{text}'")
|
544
508
|
end
|
545
509
|
};
|
546
510
|
|
@@ -550,20 +514,13 @@
|
|
550
514
|
};
|
551
515
|
|
552
516
|
group_close @group_closed {
|
553
|
-
if
|
554
|
-
conditional_stack.last[1] == (group_depth + 1)
|
555
|
-
|
556
|
-
emit(:conditional, :close, *text(data, ts, te))
|
517
|
+
if conditional_stack.last == group_depth + 1
|
557
518
|
conditional_stack.pop
|
558
|
-
|
559
|
-
if conditional_stack.length == 0
|
560
|
-
in_conditional = false
|
561
|
-
end
|
519
|
+
emit(:conditional, :close, *text(data, ts, te))
|
562
520
|
else
|
563
|
-
if spacing_stack.length > 1
|
564
|
-
|
521
|
+
if spacing_stack.length > 1 &&
|
522
|
+
spacing_stack.last[:depth] == group_depth + 1
|
565
523
|
spacing_stack.pop
|
566
|
-
|
567
524
|
self.free_spacing = spacing_stack.last[:free_spacing]
|
568
525
|
end
|
569
526
|
|
@@ -576,11 +533,8 @@
|
|
576
533
|
# ------------------------------------------------------------------------
|
577
534
|
backslash . (group_name_ref | group_number_ref) > (backslashed, 4) {
|
578
535
|
case text = text(data, ts, te).first
|
579
|
-
when /^\\([gk])
|
580
|
-
|
581
|
-
|
582
|
-
when /^\\([gk])''/ # single quotes
|
583
|
-
empty_backref_error("ref/call (sq)")
|
536
|
+
when /^\\([gk])(<>|'')/ # angle brackets
|
537
|
+
validation_error(:backref, 'ref/call', 'ref ID is empty')
|
584
538
|
|
585
539
|
when /^\\([gk])<[^\d+-]\w*>/ # angle-brackets
|
586
540
|
if $1 == 'k'
|
@@ -636,9 +590,6 @@
|
|
636
590
|
when /^\\([gk])'[+\-]?\d+[+\-]\d+'/ # single-quotes
|
637
591
|
emit(:backref, :number_recursion_ref_sq, text, ts, te)
|
638
592
|
|
639
|
-
else
|
640
|
-
raise ScannerError.new(
|
641
|
-
"Unknown backreference format '#{text}'")
|
642
593
|
end
|
643
594
|
};
|
644
595
|
|
@@ -786,7 +737,7 @@ class Regexp::Scanner
|
|
786
737
|
input = input_object
|
787
738
|
self.free_spacing = false
|
788
739
|
end
|
789
|
-
|
740
|
+
self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
|
790
741
|
|
791
742
|
data = input.unpack("c*") if input.is_a?(String)
|
792
743
|
eof = data.length
|
@@ -794,15 +745,9 @@ class Regexp::Scanner
|
|
794
745
|
self.tokens = []
|
795
746
|
self.block = block_given? ? block : nil
|
796
747
|
|
797
|
-
self.
|
748
|
+
self.set_depth = 0
|
798
749
|
self.group_depth = 0
|
799
|
-
self.
|
800
|
-
|
801
|
-
in_set = false
|
802
|
-
set_depth = 0
|
803
|
-
in_conditional = false
|
804
|
-
conditional_depth = 0
|
805
|
-
conditional_stack = []
|
750
|
+
self.conditional_stack = []
|
806
751
|
|
807
752
|
%% write data;
|
808
753
|
%% write init;
|
@@ -817,9 +762,9 @@ class Regexp::Scanner
|
|
817
762
|
end
|
818
763
|
|
819
764
|
raise PrematureEndError.new("(missing group closing paranthesis) "+
|
820
|
-
"[#{
|
765
|
+
"[#{group_depth}]") if in_group?
|
821
766
|
raise PrematureEndError.new("(missing set closing bracket) "+
|
822
|
-
"[#{
|
767
|
+
"[#{set_depth}]") if in_set?
|
823
768
|
|
824
769
|
# when the entire expression is a literal run
|
825
770
|
emit_literal if literal
|
@@ -854,62 +799,15 @@ class Regexp::Scanner
|
|
854
799
|
|
855
800
|
private
|
856
801
|
|
857
|
-
attr_accessor :tokens, :literal, :block,
|
858
|
-
:
|
859
|
-
:free_spacing, :spacing_stack
|
860
|
-
|
861
|
-
# Ragel's regex-based scan of the group options introduced a lot of
|
862
|
-
# ambiguity, so we just ask it to find the beginning of what looks
|
863
|
-
# like an options run and handle the rest in here.
|
864
|
-
def scan_options(p, data, ts, te)
|
865
|
-
text = text(data, ts, te).first
|
866
|
-
|
867
|
-
options_char, options_length = true, 0
|
868
|
-
|
869
|
-
# Copy while we have option characters. There is no maximum length,
|
870
|
-
# as ruby allows things like '(?xxxxxxxxx-xxxxxxxxxxxxx:abc)'.
|
871
|
-
negative_options = false
|
872
|
-
while options_char
|
873
|
-
if data[te + options_length]
|
874
|
-
c = data[te + options_length].chr
|
875
|
-
|
876
|
-
if c =~ /[-mixdau]/
|
877
|
-
negative_options = true if c == '-'
|
802
|
+
attr_accessor :tokens, :literal, :block, :free_spacing, :spacing_stack,
|
803
|
+
:group_depth, :set_depth, :conditional_stack
|
878
804
|
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
text << c ; p += 1 ; options_length += 1
|
883
|
-
else
|
884
|
-
options_char = false
|
885
|
-
end
|
886
|
-
else
|
887
|
-
raise PrematureEndError.new("expression options `#{text}'")
|
888
|
-
end
|
889
|
-
end
|
890
|
-
|
891
|
-
if data[te + options_length]
|
892
|
-
c = data[te + options_length].chr
|
893
|
-
|
894
|
-
if c == ':'
|
895
|
-
# Include the ':' in the options text
|
896
|
-
text << c ; p += 1 ; options_length += 1
|
897
|
-
emit_options(text, ts, te + options_length)
|
898
|
-
|
899
|
-
elsif c == ')'
|
900
|
-
# Don't include the closing ')', let group_close handle it.
|
901
|
-
emit_options(text, ts, te + options_length)
|
902
|
-
|
903
|
-
else
|
904
|
-
# Plain Regexp reports this as 'undefined group option'
|
905
|
-
raise ScannerError.new(
|
906
|
-
"Unexpected `#{c}' in options sequence, ':' or ')' expected")
|
907
|
-
end
|
908
|
-
else
|
909
|
-
raise PrematureEndError.new("expression options `#{text}'")
|
910
|
-
end
|
805
|
+
def in_group?
|
806
|
+
group_depth > 0
|
807
|
+
end
|
911
808
|
|
912
|
-
|
809
|
+
def in_set?
|
810
|
+
set_depth > 0
|
913
811
|
end
|
914
812
|
|
915
813
|
# Copy from ts to te from data as text
|
@@ -945,32 +843,39 @@ class Regexp::Scanner
|
|
945
843
|
def emit_options(text, ts, te)
|
946
844
|
token = nil
|
947
845
|
|
948
|
-
|
949
|
-
|
846
|
+
# Ruby allows things like '(?-xxxx)' or '(?xx-xx--xx-:abc)'.
|
847
|
+
text =~ /\(\?([mixdau]*)(-(?:[mix]*))*(:)?/
|
848
|
+
positive, negative, group_local = $1, $2, $3
|
950
849
|
|
951
|
-
|
952
|
-
|
953
|
-
|
850
|
+
if positive.include?('x')
|
851
|
+
self.free_spacing = true
|
852
|
+
end
|
954
853
|
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
854
|
+
# If the x appears in both, treat it like ruby does, the second cancels
|
855
|
+
# the first.
|
856
|
+
if negative && negative.include?('x')
|
857
|
+
self.free_spacing = false
|
858
|
+
end
|
960
859
|
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
end
|
860
|
+
if group_local
|
861
|
+
spacing_stack << {:free_spacing => free_spacing, :depth => group_depth}
|
862
|
+
token = :options
|
863
|
+
else
|
864
|
+
# switch for parent group level
|
865
|
+
spacing_stack.last[:free_spacing] = free_spacing
|
866
|
+
token = :options_switch
|
969
867
|
end
|
970
868
|
|
971
869
|
emit(:group, token, text, ts, te)
|
972
870
|
end
|
973
871
|
|
872
|
+
def emit_meta_control_sequence(data, ts, te, token)
|
873
|
+
if data.last < 0x00 || data.last > 0x7F
|
874
|
+
validation_error(:sequence, 'escape', token.to_s)
|
875
|
+
end
|
876
|
+
emit(:escape, token, *text(data, ts, te, 1))
|
877
|
+
end
|
878
|
+
|
974
879
|
# Centralizes and unifies the handling of validation related
|
975
880
|
# errors.
|
976
881
|
def validation_error(type, what, reason)
|
@@ -981,21 +886,8 @@ class Regexp::Scanner
|
|
981
886
|
error = InvalidBackrefError.new(what, reason)
|
982
887
|
when :sequence
|
983
888
|
error = InvalidSequenceError.new(what, reason)
|
984
|
-
else
|
985
|
-
error = ValidationError.new('expression')
|
986
889
|
end
|
987
890
|
|
988
891
|
raise error # unless @@config.validation_ignore
|
989
892
|
end
|
990
|
-
|
991
|
-
# Used for references with an empty name or number
|
992
|
-
def empty_backref_error(type, what)
|
993
|
-
validation_error(:backref, what, 'ref ID is empty')
|
994
|
-
end
|
995
|
-
|
996
|
-
# Used for named expressions with an empty name
|
997
|
-
def empty_name_error(type, what)
|
998
|
-
validation_error(type, what, 'name is empty')
|
999
|
-
end
|
1000
|
-
|
1001
893
|
end # module Regexp::Scanner
|