regexp_parser 2.7.0 → 2.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +62 -3
- data/Gemfile +3 -3
- data/LICENSE +1 -1
- data/README.md +33 -30
- data/lib/regexp_parser/expression/base.rb +0 -7
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/backreference.rb +4 -6
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
- data/lib/regexp_parser/expression/classes/character_set.rb +3 -4
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -14
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
- data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
- data/lib/regexp_parser/expression/classes/group.rb +0 -22
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
- data/lib/regexp_parser/expression/classes/unicode_property.rb +5 -2
- data/lib/regexp_parser/expression/methods/construct.rb +2 -4
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/tests.rb +40 -3
- data/lib/regexp_parser/expression/methods/traverse.rb +33 -20
- data/lib/regexp_parser/expression/quantifier.rb +30 -17
- data/lib/regexp_parser/expression/sequence.rb +5 -9
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +37 -24
- data/lib/regexp_parser/expression/subexpression.rb +20 -18
- data/lib/regexp_parser/expression.rb +2 -0
- data/lib/regexp_parser/lexer.rb +15 -7
- data/lib/regexp_parser/parser.rb +85 -86
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +11 -0
- data/lib/regexp_parser/scanner/properties/short.csv +2 -0
- data/lib/regexp_parser/scanner/property.rl +1 -1
- data/lib/regexp_parser/scanner/scanner.rl +35 -129
- data/lib/regexp_parser/scanner.rb +1084 -1303
- data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
- data/lib/regexp_parser/syntax/token/escape.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +9 -2
- data/lib/regexp_parser/syntax/token/unicode_property.rb +17 -1
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/version.rb +1 -1
- metadata +9 -3
@@ -0,0 +1,63 @@
|
|
1
|
+
class Regexp::Scanner
|
2
|
+
# Base for all scanner validation errors
|
3
|
+
class ValidationError < ScannerError
|
4
|
+
# Centralizes and unifies the handling of validation related errors.
|
5
|
+
def self.for(type, problem, reason = nil)
|
6
|
+
types.fetch(type).new(problem, reason)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.types
|
10
|
+
@types ||= {
|
11
|
+
backref: InvalidBackrefError,
|
12
|
+
group: InvalidGroupError,
|
13
|
+
group_option: InvalidGroupOption,
|
14
|
+
posix_class: UnknownPosixClassError,
|
15
|
+
property: UnknownUnicodePropertyError,
|
16
|
+
sequence: InvalidSequenceError,
|
17
|
+
}
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Invalid sequence format. Used for escape sequences, mainly.
|
22
|
+
class InvalidSequenceError < ValidationError
|
23
|
+
def initialize(what = 'sequence', where = '')
|
24
|
+
super "Invalid #{what} at #{where}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Invalid group. Used for named groups.
|
29
|
+
class InvalidGroupError < ValidationError
|
30
|
+
def initialize(what, reason)
|
31
|
+
super "Invalid #{what}, #{reason}."
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Invalid groupOption. Used for inline options.
|
36
|
+
# TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
|
37
|
+
class InvalidGroupOption < ValidationError
|
38
|
+
def initialize(option, text)
|
39
|
+
super "Invalid group option #{option} in #{text}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Invalid back reference. Used for name a number refs/calls.
|
44
|
+
class InvalidBackrefError < ValidationError
|
45
|
+
def initialize(what, reason)
|
46
|
+
super "Invalid back reference #{what}, #{reason}"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# The property name was not recognized by the scanner.
|
51
|
+
class UnknownUnicodePropertyError < ValidationError
|
52
|
+
def initialize(name, _)
|
53
|
+
super "Unknown unicode character property name #{name}"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# The POSIX class name was not recognized by the scanner.
|
58
|
+
class UnknownPosixClassError < ValidationError
|
59
|
+
def initialize(text, _)
|
60
|
+
super "Unknown POSIX class #{text}"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -7,6 +7,7 @@ age=12.0,age=12.0
|
|
7
7
|
age=12.1,age=12.1
|
8
8
|
age=13.0,age=13.0
|
9
9
|
age=14.0,age=14.0
|
10
|
+
age=15.0,age=15.0
|
10
11
|
age=2.0,age=2.0
|
11
12
|
age=2.1,age=2.1
|
12
13
|
age=3.0,age=3.0
|
@@ -97,6 +98,7 @@ emojimodifierbase,emoji_modifier_base
|
|
97
98
|
emojipresentation,emoji_presentation
|
98
99
|
enclosingmark,enclosing_mark
|
99
100
|
ethiopic,ethiopic
|
101
|
+
extendedpictographic,extended_pictographic
|
100
102
|
extender,extender
|
101
103
|
finalpunctuation,final_punctuation
|
102
104
|
format,format
|
@@ -139,6 +141,7 @@ inancientsymbols,in_ancient_symbols
|
|
139
141
|
inarabic,in_arabic
|
140
142
|
inarabicextendeda,in_arabic_extended_a
|
141
143
|
inarabicextendedb,in_arabic_extended_b
|
144
|
+
inarabicextendedc,in_arabic_extended_c
|
142
145
|
inarabicmathematicalalphabeticsymbols,in_arabic_mathematical_alphabetic_symbols
|
143
146
|
inarabicpresentationformsa,in_arabic_presentation_forms_a
|
144
147
|
inarabicpresentationformsb,in_arabic_presentation_forms_b
|
@@ -186,6 +189,7 @@ incjkunifiedideographsextensiond,in_cjk_unified_ideographs_extension_d
|
|
186
189
|
incjkunifiedideographsextensione,in_cjk_unified_ideographs_extension_e
|
187
190
|
incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
|
188
191
|
incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
|
192
|
+
incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
|
189
193
|
incombiningdiacriticalmarks,in_combining_diacritical_marks
|
190
194
|
incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
|
191
195
|
incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
|
@@ -205,10 +209,12 @@ incyrillic,in_cyrillic
|
|
205
209
|
incyrillicextendeda,in_cyrillic_extended_a
|
206
210
|
incyrillicextendedb,in_cyrillic_extended_b
|
207
211
|
incyrillicextendedc,in_cyrillic_extended_c
|
212
|
+
incyrillicextendedd,in_cyrillic_extended_d
|
208
213
|
incyrillicsupplement,in_cyrillic_supplement
|
209
214
|
indeseret,in_deseret
|
210
215
|
indevanagari,in_devanagari
|
211
216
|
indevanagariextended,in_devanagari_extended
|
217
|
+
indevanagariextendeda,in_devanagari_extended_a
|
212
218
|
indingbats,in_dingbats
|
213
219
|
indivesakuru,in_dives_akuru
|
214
220
|
indogra,in_dogra
|
@@ -268,6 +274,7 @@ inipaextensions,in_ipa_extensions
|
|
268
274
|
initialpunctuation,initial_punctuation
|
269
275
|
injavanese,in_javanese
|
270
276
|
inkaithi,in_kaithi
|
277
|
+
inkaktoviknumerals,in_kaktovik_numerals
|
271
278
|
inkanaextendeda,in_kana_extended_a
|
272
279
|
inkanaextendedb,in_kana_extended_b
|
273
280
|
inkanasupplement,in_kana_supplement
|
@@ -276,6 +283,7 @@ inkangxiradicals,in_kangxi_radicals
|
|
276
283
|
inkannada,in_kannada
|
277
284
|
inkatakana,in_katakana
|
278
285
|
inkatakanaphoneticextensions,in_katakana_phonetic_extensions
|
286
|
+
inkawi,in_kawi
|
279
287
|
inkayahli,in_kayah_li
|
280
288
|
inkharoshthi,in_kharoshthi
|
281
289
|
inkhitansmallscript,in_khitan_small_script
|
@@ -339,6 +347,7 @@ inmyanmar,in_myanmar
|
|
339
347
|
inmyanmarextendeda,in_myanmar_extended_a
|
340
348
|
inmyanmarextendedb,in_myanmar_extended_b
|
341
349
|
innabataean,in_nabataean
|
350
|
+
innagmundari,in_nag_mundari
|
342
351
|
innandinagari,in_nandinagari
|
343
352
|
innewa,in_newa
|
344
353
|
innewtailue,in_new_tai_lue
|
@@ -457,6 +466,7 @@ joincontrol,join_control
|
|
457
466
|
kaithi,kaithi
|
458
467
|
kannada,kannada
|
459
468
|
katakana,katakana
|
469
|
+
kawi,kawi
|
460
470
|
kayahli,kayah_li
|
461
471
|
kharoshthi,kharoshthi
|
462
472
|
khitansmallscript,khitan_small_script
|
@@ -503,6 +513,7 @@ mro,mro
|
|
503
513
|
multani,multani
|
504
514
|
myanmar,myanmar
|
505
515
|
nabataean,nabataean
|
516
|
+
nagmundari,nag_mundari
|
506
517
|
nandinagari,nandinagari
|
507
518
|
newa,newa
|
508
519
|
newline,newline
|
@@ -57,6 +57,7 @@ emod,emoji_modifier
|
|
57
57
|
epres,emoji_presentation
|
58
58
|
ethi,ethiopic
|
59
59
|
ext,extender
|
60
|
+
extpict,extended_pictographic
|
60
61
|
geor,georgian
|
61
62
|
glag,glagolitic
|
62
63
|
gong,gunjala_gondi
|
@@ -133,6 +134,7 @@ mtei,meetei_mayek
|
|
133
134
|
mult,multani
|
134
135
|
mymr,myanmar
|
135
136
|
n,number
|
137
|
+
nagm,nag_mundari
|
136
138
|
nand,nandinagari
|
137
139
|
narb,old_north_arabian
|
138
140
|
nbat,nabataean
|
@@ -20,7 +20,7 @@
|
|
20
20
|
name = text[3..-2].gsub(/[\^\s_\-]/, '').downcase
|
21
21
|
|
22
22
|
token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
|
23
|
-
|
23
|
+
raise ValidationError.for(:property, name) unless token
|
24
24
|
|
25
25
|
self.emit(type, token.to_sym, text)
|
26
26
|
|
@@ -30,11 +30,6 @@
|
|
30
30
|
|
31
31
|
class_posix = ('[:' . '^'? . [^\[\]]* . ':]');
|
32
32
|
|
33
|
-
|
34
|
-
# these are not supported in ruby at the moment
|
35
|
-
collating_sequence = '[.' . (alpha | [\-])+ . '.]';
|
36
|
-
character_equivalent = '[=' . alpha . '=]';
|
37
|
-
|
38
33
|
line_anchor = beginning_of_line | end_of_line;
|
39
34
|
anchor_char = [AbBzZG];
|
40
35
|
|
@@ -83,10 +78,9 @@
|
|
83
78
|
# try to treat every other group head as options group, like Ruby
|
84
79
|
group_options = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
|
85
80
|
|
86
|
-
group_ref = [gk];
|
87
81
|
group_name_id_ab = ([^!0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
|
88
82
|
group_name_id_sq = ([^0-9\-'] | utf8_multibyte) . ([^'] | utf8_multibyte)*;
|
89
|
-
group_number = '-'? . [
|
83
|
+
group_number = '-'? . [0-9]+;
|
90
84
|
group_level = [+\-] . [0-9]+;
|
91
85
|
|
92
86
|
group_name = ('<' . group_name_id_ab? . '>') |
|
@@ -95,15 +89,11 @@
|
|
95
89
|
|
96
90
|
group_named = ('?' . group_name );
|
97
91
|
|
98
|
-
|
99
|
-
|
100
|
-
group_name_call = 'g' . (('<' . group_name_id_ab? . group_level? '>') |
|
101
|
-
("'" . group_name_id_sq? . group_level? "'"));
|
92
|
+
group_ref_body = (('<' . (group_name_id_ab? | group_number) . group_level? '>') |
|
93
|
+
("'" . (group_name_id_sq? | group_number) . group_level? "'"));
|
102
94
|
|
103
|
-
|
104
|
-
|
105
|
-
group_number_call = 'g' . (('<' . ((group_number . group_level?) | '0') '>') |
|
106
|
-
("'" . ((group_number . group_level?) | '0') "'"));
|
95
|
+
group_ref = 'k' . group_ref_body;
|
96
|
+
group_call = 'g' . group_ref_body;
|
107
97
|
|
108
98
|
group_type = group_atomic | group_passive | group_absence | group_named;
|
109
99
|
|
@@ -134,13 +124,13 @@
|
|
134
124
|
# EOF error, used where it can be detected
|
135
125
|
action premature_end_error {
|
136
126
|
text = copy(data, ts ? ts-1 : 0, -1)
|
137
|
-
raise PrematureEndError.new(
|
127
|
+
raise PrematureEndError.new(text)
|
138
128
|
}
|
139
129
|
|
140
130
|
# Invalid sequence error, used from sequences, like escapes and sets
|
141
131
|
action invalid_sequence_error {
|
142
132
|
text = copy(data, ts ? ts-1 : 0, -1)
|
143
|
-
|
133
|
+
raise ValidationError.for(:sequence, 'sequence', text)
|
144
134
|
}
|
145
135
|
|
146
136
|
# group (nesting) and set open/close actions
|
@@ -221,20 +211,12 @@
|
|
221
211
|
end
|
222
212
|
|
223
213
|
unless self.class.posix_classes.include?(class_name)
|
224
|
-
|
214
|
+
raise ValidationError.for(:posix_class, text)
|
225
215
|
end
|
226
216
|
|
227
217
|
emit(type, class_name.to_sym, text)
|
228
218
|
};
|
229
219
|
|
230
|
-
# These are not supported in ruby at the moment. Enable them if they are.
|
231
|
-
# collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
232
|
-
# emit(:set, :collation, copy(data, ts, te))
|
233
|
-
# };
|
234
|
-
# character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
235
|
-
# emit(:set, :equivalent, copy(data, ts, te))
|
236
|
-
# };
|
237
|
-
|
238
220
|
meta_char > (set_meta, 1) {
|
239
221
|
emit(:literal, :literal, copy(data, ts, te))
|
240
222
|
};
|
@@ -457,10 +439,9 @@
|
|
457
439
|
|
458
440
|
# (?#...) comments: parsed as a single expression, without introducing a
|
459
441
|
# new nesting level. Comments may not include parentheses, escaped or not.
|
460
|
-
# special case for close
|
461
|
-
# correct closing count.
|
442
|
+
# special case for close to get the correct closing count.
|
462
443
|
# ------------------------------------------------------------------------
|
463
|
-
group_open . group_comment
|
444
|
+
(group_open . group_comment) @group_closed {
|
464
445
|
emit(:group, :comment, copy(data, ts, te))
|
465
446
|
};
|
466
447
|
|
@@ -475,10 +456,10 @@
|
|
475
456
|
#
|
476
457
|
# (?imxdau-imx:subexp) option on/off for subexp
|
477
458
|
# ------------------------------------------------------------------------
|
478
|
-
group_open . group_options >group_opened {
|
459
|
+
(group_open . group_options) >group_opened {
|
479
460
|
text = copy(data, ts, te)
|
480
461
|
if text[2..-1] =~ /([^\-mixdau:]|^$)|-.*([dau])/
|
481
|
-
|
462
|
+
raise ValidationError.for(:group_option, $1 || "-#{$2}", text)
|
482
463
|
end
|
483
464
|
emit_options(text)
|
484
465
|
};
|
@@ -489,7 +470,7 @@
|
|
489
470
|
# (?<=subexp) look-behind
|
490
471
|
# (?<!subexp) negative look-behind
|
491
472
|
# ------------------------------------------------------------------------
|
492
|
-
group_open . assertion_type >group_opened {
|
473
|
+
(group_open . assertion_type) >group_opened {
|
493
474
|
case text = copy(data, ts, te)
|
494
475
|
when '(?='; emit(:assertion, :lookahead, text)
|
495
476
|
when '(?!'; emit(:assertion, :nlookahead, text)
|
@@ -506,14 +487,14 @@
|
|
506
487
|
# (?'name'subexp) named group (single quoted version)
|
507
488
|
# (subexp) captured group
|
508
489
|
# ------------------------------------------------------------------------
|
509
|
-
group_open . group_type >group_opened {
|
490
|
+
(group_open . group_type) >group_opened {
|
510
491
|
case text = copy(data, ts, te)
|
511
492
|
when '(?:'; emit(:group, :passive, text)
|
512
493
|
when '(?>'; emit(:group, :atomic, text)
|
513
494
|
when '(?~'; emit(:group, :absence, text)
|
514
495
|
|
515
496
|
when /^\(\?(?:<>|'')/
|
516
|
-
|
497
|
+
raise ValidationError.for(:group, 'named group', 'name is empty')
|
517
498
|
|
518
499
|
when /^\(\?<[^>]+>/
|
519
500
|
emit(:group, :named_ab, text)
|
@@ -533,7 +514,7 @@
|
|
533
514
|
if conditional_stack.last == group_depth + 1
|
534
515
|
conditional_stack.pop
|
535
516
|
emit(:conditional, :close, ')')
|
536
|
-
|
517
|
+
elsif group_depth >= 0
|
537
518
|
if spacing_stack.length > 1 &&
|
538
519
|
spacing_stack.last[:depth] == group_depth + 1
|
539
520
|
spacing_stack.pop
|
@@ -541,41 +522,43 @@
|
|
541
522
|
end
|
542
523
|
|
543
524
|
emit(:group, :close, ')')
|
525
|
+
else
|
526
|
+
raise ValidationError.for(:group, 'group', 'unmatched close parenthesis')
|
544
527
|
end
|
545
528
|
};
|
546
529
|
|
547
530
|
|
548
531
|
# Group backreference, named and numbered
|
549
532
|
# ------------------------------------------------------------------------
|
550
|
-
backslash . (
|
533
|
+
backslash . (group_ref) > (backslashed, 4) {
|
551
534
|
case text = copy(data, ts, te)
|
552
|
-
when /^\\k(
|
553
|
-
validation_error(:backref, 'backreference', 'ref ID is empty')
|
554
|
-
when /^\\k(.)[^\p{digit}\-][^+\-]*\D$/
|
535
|
+
when /^\\k(.)[^0-9\-][^+\-]*['>]$/
|
555
536
|
emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
|
556
|
-
when /^\\k(.)\d
|
537
|
+
when /^\\k(.)[1-9]\d*['>]$/
|
557
538
|
emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
|
558
|
-
when /^\\k(.)
|
539
|
+
when /^\\k(.)-[1-9]\d*['>]$/
|
559
540
|
emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
|
560
|
-
when /^\\k(.)[
|
541
|
+
when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
|
561
542
|
emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
|
562
|
-
when /^\\k(.)
|
543
|
+
when /^\\k(.)-?[1-9]\d*[+\-]\d+['>]$/
|
563
544
|
emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
|
545
|
+
else
|
546
|
+
raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
|
564
547
|
end
|
565
548
|
};
|
566
549
|
|
567
550
|
# Group call, named and numbered
|
568
551
|
# ------------------------------------------------------------------------
|
569
|
-
backslash . (
|
552
|
+
backslash . (group_call) > (backslashed, 4) {
|
570
553
|
case text = copy(data, ts, te)
|
571
|
-
when /^\\g(
|
572
|
-
validation_error(:backref, 'subexpression call', 'ref ID is empty')
|
573
|
-
when /^\\g(.)[^\p{digit}+\->][^+\-]*/
|
554
|
+
when /^\\g(.)[^0-9+\-].*['>]$/
|
574
555
|
emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
|
575
|
-
when /^\\g(.)\d
|
556
|
+
when /^\\g(.)\d+['>]$/
|
576
557
|
emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
|
577
558
|
when /^\\g(.)[+-]\d+/
|
578
559
|
emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
|
560
|
+
else
|
561
|
+
raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
|
579
562
|
end
|
580
563
|
};
|
581
564
|
|
@@ -649,72 +632,11 @@
|
|
649
632
|
*|;
|
650
633
|
}%%
|
651
634
|
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
require 'regexp_parser/error'
|
635
|
+
require 'regexp_parser/scanner/errors/scanner_error'
|
636
|
+
require 'regexp_parser/scanner/errors/premature_end_error'
|
637
|
+
require 'regexp_parser/scanner/errors/validation_error'
|
656
638
|
|
657
639
|
class Regexp::Scanner
|
658
|
-
# General scanner error (catch all)
|
659
|
-
class ScannerError < Regexp::Parser::Error; end
|
660
|
-
|
661
|
-
# Base for all scanner validation errors
|
662
|
-
class ValidationError < Regexp::Parser::Error
|
663
|
-
def initialize(reason)
|
664
|
-
super reason
|
665
|
-
end
|
666
|
-
end
|
667
|
-
|
668
|
-
# Unexpected end of pattern
|
669
|
-
class PrematureEndError < ScannerError
|
670
|
-
def initialize(where = '')
|
671
|
-
super "Premature end of pattern at #{where}"
|
672
|
-
end
|
673
|
-
end
|
674
|
-
|
675
|
-
# Invalid sequence format. Used for escape sequences, mainly.
|
676
|
-
class InvalidSequenceError < ValidationError
|
677
|
-
def initialize(what = 'sequence', where = '')
|
678
|
-
super "Invalid #{what} at #{where}"
|
679
|
-
end
|
680
|
-
end
|
681
|
-
|
682
|
-
# Invalid group. Used for named groups.
|
683
|
-
class InvalidGroupError < ValidationError
|
684
|
-
def initialize(what, reason)
|
685
|
-
super "Invalid #{what}, #{reason}."
|
686
|
-
end
|
687
|
-
end
|
688
|
-
|
689
|
-
# Invalid groupOption. Used for inline options.
|
690
|
-
# TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
|
691
|
-
class InvalidGroupOption < ValidationError
|
692
|
-
def initialize(option, text)
|
693
|
-
super "Invalid group option #{option} in #{text}"
|
694
|
-
end
|
695
|
-
end
|
696
|
-
|
697
|
-
# Invalid back reference. Used for name a number refs/calls.
|
698
|
-
class InvalidBackrefError < ValidationError
|
699
|
-
def initialize(what, reason)
|
700
|
-
super "Invalid back reference #{what}, #{reason}"
|
701
|
-
end
|
702
|
-
end
|
703
|
-
|
704
|
-
# The property name was not recognized by the scanner.
|
705
|
-
class UnknownUnicodePropertyError < ValidationError
|
706
|
-
def initialize(name)
|
707
|
-
super "Unknown unicode character property name #{name}"
|
708
|
-
end
|
709
|
-
end
|
710
|
-
|
711
|
-
# The POSIX class name was not recognized by the scanner.
|
712
|
-
class UnknownPosixClassError < ValidationError
|
713
|
-
def initialize(text)
|
714
|
-
super "Unknown POSIX class #{text}"
|
715
|
-
end
|
716
|
-
end
|
717
|
-
|
718
640
|
# Scans the given regular expression text, or Regexp object and collects the
|
719
641
|
# emitted token into an array that gets returned at the end. If a block is
|
720
642
|
# given, it gets called for each emitted token.
|
@@ -891,24 +813,8 @@ class Regexp::Scanner
|
|
891
813
|
|
892
814
|
def emit_meta_control_sequence(data, ts, te, token)
|
893
815
|
if data.last < 0x00 || data.last > 0x7F
|
894
|
-
|
816
|
+
raise ValidationError.for(:sequence, 'escape', token.to_s)
|
895
817
|
end
|
896
818
|
emit(:escape, token, copy(data, ts-1, te))
|
897
819
|
end
|
898
|
-
|
899
|
-
# Centralizes and unifies the handling of validation related
|
900
|
-
# errors.
|
901
|
-
def validation_error(type, what, reason = nil)
|
902
|
-
error =
|
903
|
-
case type
|
904
|
-
when :backref then InvalidBackrefError.new(what, reason)
|
905
|
-
when :group then InvalidGroupError.new(what, reason)
|
906
|
-
when :group_option then InvalidGroupOption.new(what, reason)
|
907
|
-
when :posix_class then UnknownPosixClassError.new(what)
|
908
|
-
when :property then UnknownUnicodePropertyError.new(what)
|
909
|
-
when :sequence then InvalidSequenceError.new(what, reason)
|
910
|
-
end
|
911
|
-
|
912
|
-
raise error # unless @@config.validation_ignore
|
913
|
-
end
|
914
820
|
end # module Regexp::Scanner
|