regexp_parser 2.7.0 → 2.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +62 -3
- data/Gemfile +3 -3
- data/LICENSE +1 -1
- data/README.md +33 -30
- data/lib/regexp_parser/expression/base.rb +0 -7
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/backreference.rb +4 -6
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
- data/lib/regexp_parser/expression/classes/character_set.rb +3 -4
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -14
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
- data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
- data/lib/regexp_parser/expression/classes/group.rb +0 -22
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
- data/lib/regexp_parser/expression/classes/unicode_property.rb +5 -2
- data/lib/regexp_parser/expression/methods/construct.rb +2 -4
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/tests.rb +40 -3
- data/lib/regexp_parser/expression/methods/traverse.rb +33 -20
- data/lib/regexp_parser/expression/quantifier.rb +30 -17
- data/lib/regexp_parser/expression/sequence.rb +5 -9
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +37 -24
- data/lib/regexp_parser/expression/subexpression.rb +20 -18
- data/lib/regexp_parser/expression.rb +2 -0
- data/lib/regexp_parser/lexer.rb +15 -7
- data/lib/regexp_parser/parser.rb +85 -86
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +11 -0
- data/lib/regexp_parser/scanner/properties/short.csv +2 -0
- data/lib/regexp_parser/scanner/property.rl +1 -1
- data/lib/regexp_parser/scanner/scanner.rl +35 -129
- data/lib/regexp_parser/scanner.rb +1084 -1303
- data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
- data/lib/regexp_parser/syntax/token/escape.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +9 -2
- data/lib/regexp_parser/syntax/token/unicode_property.rb +17 -1
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/version.rb +1 -1
- metadata +9 -3
@@ -0,0 +1,63 @@
|
|
1
|
+
class Regexp::Scanner
|
2
|
+
# Base for all scanner validation errors
|
3
|
+
class ValidationError < ScannerError
|
4
|
+
# Centralizes and unifies the handling of validation related errors.
|
5
|
+
def self.for(type, problem, reason = nil)
|
6
|
+
types.fetch(type).new(problem, reason)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.types
|
10
|
+
@types ||= {
|
11
|
+
backref: InvalidBackrefError,
|
12
|
+
group: InvalidGroupError,
|
13
|
+
group_option: InvalidGroupOption,
|
14
|
+
posix_class: UnknownPosixClassError,
|
15
|
+
property: UnknownUnicodePropertyError,
|
16
|
+
sequence: InvalidSequenceError,
|
17
|
+
}
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Invalid sequence format. Used for escape sequences, mainly.
|
22
|
+
class InvalidSequenceError < ValidationError
|
23
|
+
def initialize(what = 'sequence', where = '')
|
24
|
+
super "Invalid #{what} at #{where}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Invalid group. Used for named groups.
|
29
|
+
class InvalidGroupError < ValidationError
|
30
|
+
def initialize(what, reason)
|
31
|
+
super "Invalid #{what}, #{reason}."
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Invalid groupOption. Used for inline options.
|
36
|
+
# TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
|
37
|
+
class InvalidGroupOption < ValidationError
|
38
|
+
def initialize(option, text)
|
39
|
+
super "Invalid group option #{option} in #{text}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Invalid back reference. Used for name a number refs/calls.
|
44
|
+
class InvalidBackrefError < ValidationError
|
45
|
+
def initialize(what, reason)
|
46
|
+
super "Invalid back reference #{what}, #{reason}"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# The property name was not recognized by the scanner.
|
51
|
+
class UnknownUnicodePropertyError < ValidationError
|
52
|
+
def initialize(name, _)
|
53
|
+
super "Unknown unicode character property name #{name}"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# The POSIX class name was not recognized by the scanner.
|
58
|
+
class UnknownPosixClassError < ValidationError
|
59
|
+
def initialize(text, _)
|
60
|
+
super "Unknown POSIX class #{text}"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -7,6 +7,7 @@ age=12.0,age=12.0
|
|
7
7
|
age=12.1,age=12.1
|
8
8
|
age=13.0,age=13.0
|
9
9
|
age=14.0,age=14.0
|
10
|
+
age=15.0,age=15.0
|
10
11
|
age=2.0,age=2.0
|
11
12
|
age=2.1,age=2.1
|
12
13
|
age=3.0,age=3.0
|
@@ -97,6 +98,7 @@ emojimodifierbase,emoji_modifier_base
|
|
97
98
|
emojipresentation,emoji_presentation
|
98
99
|
enclosingmark,enclosing_mark
|
99
100
|
ethiopic,ethiopic
|
101
|
+
extendedpictographic,extended_pictographic
|
100
102
|
extender,extender
|
101
103
|
finalpunctuation,final_punctuation
|
102
104
|
format,format
|
@@ -139,6 +141,7 @@ inancientsymbols,in_ancient_symbols
|
|
139
141
|
inarabic,in_arabic
|
140
142
|
inarabicextendeda,in_arabic_extended_a
|
141
143
|
inarabicextendedb,in_arabic_extended_b
|
144
|
+
inarabicextendedc,in_arabic_extended_c
|
142
145
|
inarabicmathematicalalphabeticsymbols,in_arabic_mathematical_alphabetic_symbols
|
143
146
|
inarabicpresentationformsa,in_arabic_presentation_forms_a
|
144
147
|
inarabicpresentationformsb,in_arabic_presentation_forms_b
|
@@ -186,6 +189,7 @@ incjkunifiedideographsextensiond,in_cjk_unified_ideographs_extension_d
|
|
186
189
|
incjkunifiedideographsextensione,in_cjk_unified_ideographs_extension_e
|
187
190
|
incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
|
188
191
|
incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
|
192
|
+
incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
|
189
193
|
incombiningdiacriticalmarks,in_combining_diacritical_marks
|
190
194
|
incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
|
191
195
|
incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
|
@@ -205,10 +209,12 @@ incyrillic,in_cyrillic
|
|
205
209
|
incyrillicextendeda,in_cyrillic_extended_a
|
206
210
|
incyrillicextendedb,in_cyrillic_extended_b
|
207
211
|
incyrillicextendedc,in_cyrillic_extended_c
|
212
|
+
incyrillicextendedd,in_cyrillic_extended_d
|
208
213
|
incyrillicsupplement,in_cyrillic_supplement
|
209
214
|
indeseret,in_deseret
|
210
215
|
indevanagari,in_devanagari
|
211
216
|
indevanagariextended,in_devanagari_extended
|
217
|
+
indevanagariextendeda,in_devanagari_extended_a
|
212
218
|
indingbats,in_dingbats
|
213
219
|
indivesakuru,in_dives_akuru
|
214
220
|
indogra,in_dogra
|
@@ -268,6 +274,7 @@ inipaextensions,in_ipa_extensions
|
|
268
274
|
initialpunctuation,initial_punctuation
|
269
275
|
injavanese,in_javanese
|
270
276
|
inkaithi,in_kaithi
|
277
|
+
inkaktoviknumerals,in_kaktovik_numerals
|
271
278
|
inkanaextendeda,in_kana_extended_a
|
272
279
|
inkanaextendedb,in_kana_extended_b
|
273
280
|
inkanasupplement,in_kana_supplement
|
@@ -276,6 +283,7 @@ inkangxiradicals,in_kangxi_radicals
|
|
276
283
|
inkannada,in_kannada
|
277
284
|
inkatakana,in_katakana
|
278
285
|
inkatakanaphoneticextensions,in_katakana_phonetic_extensions
|
286
|
+
inkawi,in_kawi
|
279
287
|
inkayahli,in_kayah_li
|
280
288
|
inkharoshthi,in_kharoshthi
|
281
289
|
inkhitansmallscript,in_khitan_small_script
|
@@ -339,6 +347,7 @@ inmyanmar,in_myanmar
|
|
339
347
|
inmyanmarextendeda,in_myanmar_extended_a
|
340
348
|
inmyanmarextendedb,in_myanmar_extended_b
|
341
349
|
innabataean,in_nabataean
|
350
|
+
innagmundari,in_nag_mundari
|
342
351
|
innandinagari,in_nandinagari
|
343
352
|
innewa,in_newa
|
344
353
|
innewtailue,in_new_tai_lue
|
@@ -457,6 +466,7 @@ joincontrol,join_control
|
|
457
466
|
kaithi,kaithi
|
458
467
|
kannada,kannada
|
459
468
|
katakana,katakana
|
469
|
+
kawi,kawi
|
460
470
|
kayahli,kayah_li
|
461
471
|
kharoshthi,kharoshthi
|
462
472
|
khitansmallscript,khitan_small_script
|
@@ -503,6 +513,7 @@ mro,mro
|
|
503
513
|
multani,multani
|
504
514
|
myanmar,myanmar
|
505
515
|
nabataean,nabataean
|
516
|
+
nagmundari,nag_mundari
|
506
517
|
nandinagari,nandinagari
|
507
518
|
newa,newa
|
508
519
|
newline,newline
|
@@ -57,6 +57,7 @@ emod,emoji_modifier
|
|
57
57
|
epres,emoji_presentation
|
58
58
|
ethi,ethiopic
|
59
59
|
ext,extender
|
60
|
+
extpict,extended_pictographic
|
60
61
|
geor,georgian
|
61
62
|
glag,glagolitic
|
62
63
|
gong,gunjala_gondi
|
@@ -133,6 +134,7 @@ mtei,meetei_mayek
|
|
133
134
|
mult,multani
|
134
135
|
mymr,myanmar
|
135
136
|
n,number
|
137
|
+
nagm,nag_mundari
|
136
138
|
nand,nandinagari
|
137
139
|
narb,old_north_arabian
|
138
140
|
nbat,nabataean
|
@@ -20,7 +20,7 @@
|
|
20
20
|
name = text[3..-2].gsub(/[\^\s_\-]/, '').downcase
|
21
21
|
|
22
22
|
token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
|
23
|
-
|
23
|
+
raise ValidationError.for(:property, name) unless token
|
24
24
|
|
25
25
|
self.emit(type, token.to_sym, text)
|
26
26
|
|
@@ -30,11 +30,6 @@
|
|
30
30
|
|
31
31
|
class_posix = ('[:' . '^'? . [^\[\]]* . ':]');
|
32
32
|
|
33
|
-
|
34
|
-
# these are not supported in ruby at the moment
|
35
|
-
collating_sequence = '[.' . (alpha | [\-])+ . '.]';
|
36
|
-
character_equivalent = '[=' . alpha . '=]';
|
37
|
-
|
38
33
|
line_anchor = beginning_of_line | end_of_line;
|
39
34
|
anchor_char = [AbBzZG];
|
40
35
|
|
@@ -83,10 +78,9 @@
|
|
83
78
|
# try to treat every other group head as options group, like Ruby
|
84
79
|
group_options = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
|
85
80
|
|
86
|
-
group_ref = [gk];
|
87
81
|
group_name_id_ab = ([^!0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
|
88
82
|
group_name_id_sq = ([^0-9\-'] | utf8_multibyte) . ([^'] | utf8_multibyte)*;
|
89
|
-
group_number = '-'? . [
|
83
|
+
group_number = '-'? . [0-9]+;
|
90
84
|
group_level = [+\-] . [0-9]+;
|
91
85
|
|
92
86
|
group_name = ('<' . group_name_id_ab? . '>') |
|
@@ -95,15 +89,11 @@
|
|
95
89
|
|
96
90
|
group_named = ('?' . group_name );
|
97
91
|
|
98
|
-
|
99
|
-
|
100
|
-
group_name_call = 'g' . (('<' . group_name_id_ab? . group_level? '>') |
|
101
|
-
("'" . group_name_id_sq? . group_level? "'"));
|
92
|
+
group_ref_body = (('<' . (group_name_id_ab? | group_number) . group_level? '>') |
|
93
|
+
("'" . (group_name_id_sq? | group_number) . group_level? "'"));
|
102
94
|
|
103
|
-
|
104
|
-
|
105
|
-
group_number_call = 'g' . (('<' . ((group_number . group_level?) | '0') '>') |
|
106
|
-
("'" . ((group_number . group_level?) | '0') "'"));
|
95
|
+
group_ref = 'k' . group_ref_body;
|
96
|
+
group_call = 'g' . group_ref_body;
|
107
97
|
|
108
98
|
group_type = group_atomic | group_passive | group_absence | group_named;
|
109
99
|
|
@@ -134,13 +124,13 @@
|
|
134
124
|
# EOF error, used where it can be detected
|
135
125
|
action premature_end_error {
|
136
126
|
text = copy(data, ts ? ts-1 : 0, -1)
|
137
|
-
raise PrematureEndError.new(
|
127
|
+
raise PrematureEndError.new(text)
|
138
128
|
}
|
139
129
|
|
140
130
|
# Invalid sequence error, used from sequences, like escapes and sets
|
141
131
|
action invalid_sequence_error {
|
142
132
|
text = copy(data, ts ? ts-1 : 0, -1)
|
143
|
-
|
133
|
+
raise ValidationError.for(:sequence, 'sequence', text)
|
144
134
|
}
|
145
135
|
|
146
136
|
# group (nesting) and set open/close actions
|
@@ -221,20 +211,12 @@
|
|
221
211
|
end
|
222
212
|
|
223
213
|
unless self.class.posix_classes.include?(class_name)
|
224
|
-
|
214
|
+
raise ValidationError.for(:posix_class, text)
|
225
215
|
end
|
226
216
|
|
227
217
|
emit(type, class_name.to_sym, text)
|
228
218
|
};
|
229
219
|
|
230
|
-
# These are not supported in ruby at the moment. Enable them if they are.
|
231
|
-
# collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
232
|
-
# emit(:set, :collation, copy(data, ts, te))
|
233
|
-
# };
|
234
|
-
# character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
235
|
-
# emit(:set, :equivalent, copy(data, ts, te))
|
236
|
-
# };
|
237
|
-
|
238
220
|
meta_char > (set_meta, 1) {
|
239
221
|
emit(:literal, :literal, copy(data, ts, te))
|
240
222
|
};
|
@@ -457,10 +439,9 @@
|
|
457
439
|
|
458
440
|
# (?#...) comments: parsed as a single expression, without introducing a
|
459
441
|
# new nesting level. Comments may not include parentheses, escaped or not.
|
460
|
-
# special case for close
|
461
|
-
# correct closing count.
|
442
|
+
# special case for close to get the correct closing count.
|
462
443
|
# ------------------------------------------------------------------------
|
463
|
-
group_open . group_comment
|
444
|
+
(group_open . group_comment) @group_closed {
|
464
445
|
emit(:group, :comment, copy(data, ts, te))
|
465
446
|
};
|
466
447
|
|
@@ -475,10 +456,10 @@
|
|
475
456
|
#
|
476
457
|
# (?imxdau-imx:subexp) option on/off for subexp
|
477
458
|
# ------------------------------------------------------------------------
|
478
|
-
group_open . group_options >group_opened {
|
459
|
+
(group_open . group_options) >group_opened {
|
479
460
|
text = copy(data, ts, te)
|
480
461
|
if text[2..-1] =~ /([^\-mixdau:]|^$)|-.*([dau])/
|
481
|
-
|
462
|
+
raise ValidationError.for(:group_option, $1 || "-#{$2}", text)
|
482
463
|
end
|
483
464
|
emit_options(text)
|
484
465
|
};
|
@@ -489,7 +470,7 @@
|
|
489
470
|
# (?<=subexp) look-behind
|
490
471
|
# (?<!subexp) negative look-behind
|
491
472
|
# ------------------------------------------------------------------------
|
492
|
-
group_open . assertion_type >group_opened {
|
473
|
+
(group_open . assertion_type) >group_opened {
|
493
474
|
case text = copy(data, ts, te)
|
494
475
|
when '(?='; emit(:assertion, :lookahead, text)
|
495
476
|
when '(?!'; emit(:assertion, :nlookahead, text)
|
@@ -506,14 +487,14 @@
|
|
506
487
|
# (?'name'subexp) named group (single quoted version)
|
507
488
|
# (subexp) captured group
|
508
489
|
# ------------------------------------------------------------------------
|
509
|
-
group_open . group_type >group_opened {
|
490
|
+
(group_open . group_type) >group_opened {
|
510
491
|
case text = copy(data, ts, te)
|
511
492
|
when '(?:'; emit(:group, :passive, text)
|
512
493
|
when '(?>'; emit(:group, :atomic, text)
|
513
494
|
when '(?~'; emit(:group, :absence, text)
|
514
495
|
|
515
496
|
when /^\(\?(?:<>|'')/
|
516
|
-
|
497
|
+
raise ValidationError.for(:group, 'named group', 'name is empty')
|
517
498
|
|
518
499
|
when /^\(\?<[^>]+>/
|
519
500
|
emit(:group, :named_ab, text)
|
@@ -533,7 +514,7 @@
|
|
533
514
|
if conditional_stack.last == group_depth + 1
|
534
515
|
conditional_stack.pop
|
535
516
|
emit(:conditional, :close, ')')
|
536
|
-
|
517
|
+
elsif group_depth >= 0
|
537
518
|
if spacing_stack.length > 1 &&
|
538
519
|
spacing_stack.last[:depth] == group_depth + 1
|
539
520
|
spacing_stack.pop
|
@@ -541,41 +522,43 @@
|
|
541
522
|
end
|
542
523
|
|
543
524
|
emit(:group, :close, ')')
|
525
|
+
else
|
526
|
+
raise ValidationError.for(:group, 'group', 'unmatched close parenthesis')
|
544
527
|
end
|
545
528
|
};
|
546
529
|
|
547
530
|
|
548
531
|
# Group backreference, named and numbered
|
549
532
|
# ------------------------------------------------------------------------
|
550
|
-
backslash . (
|
533
|
+
backslash . (group_ref) > (backslashed, 4) {
|
551
534
|
case text = copy(data, ts, te)
|
552
|
-
when /^\\k(
|
553
|
-
validation_error(:backref, 'backreference', 'ref ID is empty')
|
554
|
-
when /^\\k(.)[^\p{digit}\-][^+\-]*\D$/
|
535
|
+
when /^\\k(.)[^0-9\-][^+\-]*['>]$/
|
555
536
|
emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
|
556
|
-
when /^\\k(.)\d
|
537
|
+
when /^\\k(.)[1-9]\d*['>]$/
|
557
538
|
emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
|
558
|
-
when /^\\k(.)
|
539
|
+
when /^\\k(.)-[1-9]\d*['>]$/
|
559
540
|
emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
|
560
|
-
when /^\\k(.)[
|
541
|
+
when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
|
561
542
|
emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
|
562
|
-
when /^\\k(.)
|
543
|
+
when /^\\k(.)-?[1-9]\d*[+\-]\d+['>]$/
|
563
544
|
emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
|
545
|
+
else
|
546
|
+
raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
|
564
547
|
end
|
565
548
|
};
|
566
549
|
|
567
550
|
# Group call, named and numbered
|
568
551
|
# ------------------------------------------------------------------------
|
569
|
-
backslash . (
|
552
|
+
backslash . (group_call) > (backslashed, 4) {
|
570
553
|
case text = copy(data, ts, te)
|
571
|
-
when /^\\g(
|
572
|
-
validation_error(:backref, 'subexpression call', 'ref ID is empty')
|
573
|
-
when /^\\g(.)[^\p{digit}+\->][^+\-]*/
|
554
|
+
when /^\\g(.)[^0-9+\-].*['>]$/
|
574
555
|
emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
|
575
|
-
when /^\\g(.)\d
|
556
|
+
when /^\\g(.)\d+['>]$/
|
576
557
|
emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
|
577
558
|
when /^\\g(.)[+-]\d+/
|
578
559
|
emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
|
560
|
+
else
|
561
|
+
raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
|
579
562
|
end
|
580
563
|
};
|
581
564
|
|
@@ -649,72 +632,11 @@
|
|
649
632
|
*|;
|
650
633
|
}%%
|
651
634
|
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
require 'regexp_parser/error'
|
635
|
+
require 'regexp_parser/scanner/errors/scanner_error'
|
636
|
+
require 'regexp_parser/scanner/errors/premature_end_error'
|
637
|
+
require 'regexp_parser/scanner/errors/validation_error'
|
656
638
|
|
657
639
|
class Regexp::Scanner
|
658
|
-
# General scanner error (catch all)
|
659
|
-
class ScannerError < Regexp::Parser::Error; end
|
660
|
-
|
661
|
-
# Base for all scanner validation errors
|
662
|
-
class ValidationError < Regexp::Parser::Error
|
663
|
-
def initialize(reason)
|
664
|
-
super reason
|
665
|
-
end
|
666
|
-
end
|
667
|
-
|
668
|
-
# Unexpected end of pattern
|
669
|
-
class PrematureEndError < ScannerError
|
670
|
-
def initialize(where = '')
|
671
|
-
super "Premature end of pattern at #{where}"
|
672
|
-
end
|
673
|
-
end
|
674
|
-
|
675
|
-
# Invalid sequence format. Used for escape sequences, mainly.
|
676
|
-
class InvalidSequenceError < ValidationError
|
677
|
-
def initialize(what = 'sequence', where = '')
|
678
|
-
super "Invalid #{what} at #{where}"
|
679
|
-
end
|
680
|
-
end
|
681
|
-
|
682
|
-
# Invalid group. Used for named groups.
|
683
|
-
class InvalidGroupError < ValidationError
|
684
|
-
def initialize(what, reason)
|
685
|
-
super "Invalid #{what}, #{reason}."
|
686
|
-
end
|
687
|
-
end
|
688
|
-
|
689
|
-
# Invalid groupOption. Used for inline options.
|
690
|
-
# TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
|
691
|
-
class InvalidGroupOption < ValidationError
|
692
|
-
def initialize(option, text)
|
693
|
-
super "Invalid group option #{option} in #{text}"
|
694
|
-
end
|
695
|
-
end
|
696
|
-
|
697
|
-
# Invalid back reference. Used for name a number refs/calls.
|
698
|
-
class InvalidBackrefError < ValidationError
|
699
|
-
def initialize(what, reason)
|
700
|
-
super "Invalid back reference #{what}, #{reason}"
|
701
|
-
end
|
702
|
-
end
|
703
|
-
|
704
|
-
# The property name was not recognized by the scanner.
|
705
|
-
class UnknownUnicodePropertyError < ValidationError
|
706
|
-
def initialize(name)
|
707
|
-
super "Unknown unicode character property name #{name}"
|
708
|
-
end
|
709
|
-
end
|
710
|
-
|
711
|
-
# The POSIX class name was not recognized by the scanner.
|
712
|
-
class UnknownPosixClassError < ValidationError
|
713
|
-
def initialize(text)
|
714
|
-
super "Unknown POSIX class #{text}"
|
715
|
-
end
|
716
|
-
end
|
717
|
-
|
718
640
|
# Scans the given regular expression text, or Regexp object and collects the
|
719
641
|
# emitted token into an array that gets returned at the end. If a block is
|
720
642
|
# given, it gets called for each emitted token.
|
@@ -891,24 +813,8 @@ class Regexp::Scanner
|
|
891
813
|
|
892
814
|
def emit_meta_control_sequence(data, ts, te, token)
|
893
815
|
if data.last < 0x00 || data.last > 0x7F
|
894
|
-
|
816
|
+
raise ValidationError.for(:sequence, 'escape', token.to_s)
|
895
817
|
end
|
896
818
|
emit(:escape, token, copy(data, ts-1, te))
|
897
819
|
end
|
898
|
-
|
899
|
-
# Centralizes and unifies the handling of validation related
|
900
|
-
# errors.
|
901
|
-
def validation_error(type, what, reason = nil)
|
902
|
-
error =
|
903
|
-
case type
|
904
|
-
when :backref then InvalidBackrefError.new(what, reason)
|
905
|
-
when :group then InvalidGroupError.new(what, reason)
|
906
|
-
when :group_option then InvalidGroupOption.new(what, reason)
|
907
|
-
when :posix_class then UnknownPosixClassError.new(what)
|
908
|
-
when :property then UnknownUnicodePropertyError.new(what)
|
909
|
-
when :sequence then InvalidSequenceError.new(what, reason)
|
910
|
-
end
|
911
|
-
|
912
|
-
raise error # unless @@config.validation_ignore
|
913
|
-
end
|
914
820
|
end # module Regexp::Scanner
|