regexp_parser 1.3.0 → 1.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -1
- data/Gemfile +3 -3
- data/README.md +12 -19
- data/Rakefile +3 -4
- data/lib/regexp_parser/expression.rb +28 -53
- data/lib/regexp_parser/expression/classes/backref.rb +18 -10
- data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
- data/lib/regexp_parser/expression/classes/escape.rb +0 -4
- data/lib/regexp_parser/expression/classes/group.rb +4 -2
- data/lib/regexp_parser/expression/classes/keep.rb +1 -3
- data/lib/regexp_parser/expression/methods/match.rb +13 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
- data/lib/regexp_parser/expression/methods/options.rb +35 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
- data/lib/regexp_parser/expression/methods/tests.rb +6 -15
- data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -6
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
- data/lib/regexp_parser/expression/subexpression.rb +3 -5
- data/lib/regexp_parser/lexer.rb +30 -44
- data/lib/regexp_parser/parser.rb +47 -24
- data/lib/regexp_parser/scanner.rb +1228 -1367
- data/lib/regexp_parser/scanner/char_type.rl +0 -3
- data/lib/regexp_parser/scanner/properties/long.yml +34 -1
- data/lib/regexp_parser/scanner/properties/short.yml +12 -0
- data/lib/regexp_parser/scanner/scanner.rl +101 -194
- data/lib/regexp_parser/syntax/tokens.rb +2 -10
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +72 -21
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +3 -3
- data/spec/expression/base_spec.rb +94 -0
- data/spec/expression/clone_spec.rb +120 -0
- data/spec/expression/conditional_spec.rb +89 -0
- data/spec/expression/free_space_spec.rb +27 -0
- data/spec/expression/methods/match_length_spec.rb +161 -0
- data/spec/expression/methods/match_spec.rb +25 -0
- data/spec/expression/methods/strfregexp_spec.rb +224 -0
- data/spec/expression/methods/tests_spec.rb +99 -0
- data/spec/expression/methods/traverse_spec.rb +161 -0
- data/spec/expression/options_spec.rb +128 -0
- data/spec/expression/root_spec.rb +9 -0
- data/spec/expression/sequence_spec.rb +9 -0
- data/spec/expression/subexpression_spec.rb +50 -0
- data/spec/expression/to_h_spec.rb +26 -0
- data/spec/expression/to_s_spec.rb +100 -0
- data/spec/lexer/all_spec.rb +22 -0
- data/spec/lexer/conditionals_spec.rb +53 -0
- data/spec/lexer/delimiters_spec.rb +68 -0
- data/spec/lexer/escapes_spec.rb +14 -0
- data/spec/lexer/keep_spec.rb +10 -0
- data/spec/lexer/literals_spec.rb +89 -0
- data/spec/lexer/nesting_spec.rb +99 -0
- data/spec/lexer/refcalls_spec.rb +55 -0
- data/spec/parser/all_spec.rb +43 -0
- data/spec/parser/alternation_spec.rb +88 -0
- data/spec/parser/anchors_spec.rb +17 -0
- data/spec/parser/conditionals_spec.rb +179 -0
- data/spec/parser/errors_spec.rb +30 -0
- data/spec/parser/escapes_spec.rb +121 -0
- data/spec/parser/free_space_spec.rb +130 -0
- data/spec/parser/groups_spec.rb +108 -0
- data/spec/parser/keep_spec.rb +6 -0
- data/spec/parser/posix_classes_spec.rb +8 -0
- data/spec/parser/properties_spec.rb +115 -0
- data/spec/parser/quantifiers_spec.rb +52 -0
- data/spec/parser/refcalls_spec.rb +112 -0
- data/spec/parser/set/intersections_spec.rb +127 -0
- data/spec/parser/set/ranges_spec.rb +111 -0
- data/spec/parser/sets_spec.rb +178 -0
- data/spec/parser/types_spec.rb +18 -0
- data/spec/scanner/all_spec.rb +18 -0
- data/spec/scanner/anchors_spec.rb +21 -0
- data/spec/scanner/conditionals_spec.rb +128 -0
- data/spec/scanner/delimiters_spec.rb +52 -0
- data/spec/scanner/errors_spec.rb +67 -0
- data/spec/scanner/escapes_spec.rb +53 -0
- data/spec/scanner/free_space_spec.rb +133 -0
- data/spec/scanner/groups_spec.rb +52 -0
- data/spec/scanner/keep_spec.rb +10 -0
- data/spec/scanner/literals_spec.rb +49 -0
- data/spec/scanner/meta_spec.rb +18 -0
- data/spec/scanner/properties_spec.rb +64 -0
- data/spec/scanner/quantifiers_spec.rb +20 -0
- data/spec/scanner/refcalls_spec.rb +36 -0
- data/spec/scanner/sets_spec.rb +102 -0
- data/spec/scanner/types_spec.rb +14 -0
- data/spec/spec_helper.rb +15 -0
- data/{test → spec}/support/runner.rb +9 -8
- data/spec/support/shared_examples.rb +77 -0
- data/{test → spec}/support/warning_extractor.rb +5 -7
- data/spec/syntax/syntax_spec.rb +48 -0
- data/spec/syntax/syntax_token_map_spec.rb +23 -0
- data/spec/syntax/versions/1.8.6_spec.rb +17 -0
- data/spec/syntax/versions/1.9.1_spec.rb +10 -0
- data/spec/syntax/versions/1.9.3_spec.rb +9 -0
- data/spec/syntax/versions/2.0.0_spec.rb +13 -0
- data/spec/syntax/versions/2.2.0_spec.rb +9 -0
- data/spec/syntax/versions/aliases_spec.rb +37 -0
- data/spec/token/token_spec.rb +85 -0
- metadata +151 -146
- data/test/expression/test_all.rb +0 -12
- data/test/expression/test_base.rb +0 -90
- data/test/expression/test_clone.rb +0 -89
- data/test/expression/test_conditionals.rb +0 -113
- data/test/expression/test_free_space.rb +0 -35
- data/test/expression/test_set.rb +0 -84
- data/test/expression/test_strfregexp.rb +0 -230
- data/test/expression/test_subexpression.rb +0 -58
- data/test/expression/test_tests.rb +0 -99
- data/test/expression/test_to_h.rb +0 -59
- data/test/expression/test_to_s.rb +0 -104
- data/test/expression/test_traverse.rb +0 -161
- data/test/helpers.rb +0 -10
- data/test/lexer/test_all.rb +0 -41
- data/test/lexer/test_conditionals.rb +0 -127
- data/test/lexer/test_keep.rb +0 -24
- data/test/lexer/test_literals.rb +0 -130
- data/test/lexer/test_nesting.rb +0 -132
- data/test/lexer/test_refcalls.rb +0 -56
- data/test/parser/set/test_intersections.rb +0 -127
- data/test/parser/set/test_ranges.rb +0 -111
- data/test/parser/test_all.rb +0 -64
- data/test/parser/test_alternation.rb +0 -92
- data/test/parser/test_anchors.rb +0 -34
- data/test/parser/test_conditionals.rb +0 -187
- data/test/parser/test_errors.rb +0 -63
- data/test/parser/test_escapes.rb +0 -134
- data/test/parser/test_free_space.rb +0 -139
- data/test/parser/test_groups.rb +0 -289
- data/test/parser/test_keep.rb +0 -21
- data/test/parser/test_posix_classes.rb +0 -27
- data/test/parser/test_properties.rb +0 -133
- data/test/parser/test_quantifiers.rb +0 -301
- data/test/parser/test_refcalls.rb +0 -186
- data/test/parser/test_sets.rb +0 -179
- data/test/parser/test_types.rb +0 -50
- data/test/scanner/test_all.rb +0 -38
- data/test/scanner/test_anchors.rb +0 -38
- data/test/scanner/test_conditionals.rb +0 -184
- data/test/scanner/test_errors.rb +0 -91
- data/test/scanner/test_escapes.rb +0 -56
- data/test/scanner/test_free_space.rb +0 -200
- data/test/scanner/test_groups.rb +0 -79
- data/test/scanner/test_keep.rb +0 -35
- data/test/scanner/test_literals.rb +0 -89
- data/test/scanner/test_meta.rb +0 -40
- data/test/scanner/test_properties.rb +0 -312
- data/test/scanner/test_quantifiers.rb +0 -37
- data/test/scanner/test_refcalls.rb +0 -52
- data/test/scanner/test_scripts.rb +0 -53
- data/test/scanner/test_sets.rb +0 -119
- data/test/scanner/test_types.rb +0 -35
- data/test/scanner/test_unicode_blocks.rb +0 -30
- data/test/support/disable_autotest.rb +0 -8
- data/test/syntax/test_all.rb +0 -6
- data/test/syntax/test_syntax.rb +0 -61
- data/test/syntax/test_syntax_token_map.rb +0 -25
- data/test/syntax/versions/test_1.8.rb +0 -55
- data/test/syntax/versions/test_1.9.1.rb +0 -36
- data/test/syntax/versions/test_1.9.3.rb +0 -32
- data/test/syntax/versions/test_2.0.0.rb +0 -37
- data/test/syntax/versions/test_2.2.0.rb +0 -32
- data/test/syntax/versions/test_aliases.rb +0 -129
- data/test/syntax/versions/test_all.rb +0 -5
- data/test/test_all.rb +0 -5
- data/test/token/test_all.rb +0 -2
- data/test/token/test_token.rb +0 -107
@@ -21,9 +21,6 @@
|
|
21
21
|
when '\W'; emit(:type, :nonword, text, ts - 1, te)
|
22
22
|
when '\R'; emit(:type, :linebreak, text, ts - 1, te)
|
23
23
|
when '\X'; emit(:type, :xgrapheme, text, ts - 1, te)
|
24
|
-
else
|
25
|
-
raise ScannerError.new(
|
26
|
-
"Unexpected character in type at #{text} (char #{ts})")
|
27
24
|
end
|
28
25
|
fret;
|
29
26
|
};
|
@@ -5,6 +5,9 @@
|
|
5
5
|
adlam: adlam
|
6
6
|
age=1.1: age=1.1
|
7
7
|
age=10.0: age=10.0
|
8
|
+
age=11.0: age=11.0
|
9
|
+
age=12.0: age=12.0
|
10
|
+
age=12.1: age=12.1
|
8
11
|
age=2.0: age=2.0
|
9
12
|
age=2.1: age=2.1
|
10
13
|
age=3.0: age=3.0
|
@@ -63,7 +66,6 @@ changeswhenuppercased: changes_when_uppercased
|
|
63
66
|
cherokee: cherokee
|
64
67
|
closepunctuation: close_punctuation
|
65
68
|
cntrl: cntrl
|
66
|
-
combiningmark: combining_mark
|
67
69
|
common: common
|
68
70
|
connectorpunctuation: connector_punctuation
|
69
71
|
control: control
|
@@ -81,9 +83,11 @@ deseret: deseret
|
|
81
83
|
devanagari: devanagari
|
82
84
|
diacritic: diacritic
|
83
85
|
digit: digit
|
86
|
+
dogra: dogra
|
84
87
|
duployan: duployan
|
85
88
|
egyptianhieroglyphs: egyptian_hieroglyphs
|
86
89
|
elbasan: elbasan
|
90
|
+
elymaic: elymaic
|
87
91
|
emoji: emoji
|
88
92
|
emojicomponent: emoji_component
|
89
93
|
emojimodifier: emoji_modifier
|
@@ -104,9 +108,11 @@ graphemeextend: grapheme_extend
|
|
104
108
|
graphemelink: grapheme_link
|
105
109
|
greek: greek
|
106
110
|
gujarati: gujarati
|
111
|
+
gunjalagondi: gunjala_gondi
|
107
112
|
gurmukhi: gurmukhi
|
108
113
|
han: han
|
109
114
|
hangul: hangul
|
115
|
+
hanifirohingya: hanifi_rohingya
|
110
116
|
hanunoo: hanunoo
|
111
117
|
hatran: hatran
|
112
118
|
hebrew: hebrew
|
@@ -160,6 +166,7 @@ inchakma: in_chakma
|
|
160
166
|
incham: in_cham
|
161
167
|
incherokee: in_cherokee
|
162
168
|
incherokeesupplement: in_cherokee_supplement
|
169
|
+
inchesssymbols: in_chess_symbols
|
163
170
|
incjkcompatibility: in_cjk_compatibility
|
164
171
|
incjkcompatibilityforms: in_cjk_compatibility_forms
|
165
172
|
incjkcompatibilityideographs: in_cjk_compatibility_ideographs
|
@@ -197,11 +204,14 @@ indeseret: in_deseret
|
|
197
204
|
indevanagari: in_devanagari
|
198
205
|
indevanagariextended: in_devanagari_extended
|
199
206
|
indingbats: in_dingbats
|
207
|
+
indogra: in_dogra
|
200
208
|
indominotiles: in_domino_tiles
|
201
209
|
induployan: in_duployan
|
202
210
|
inearlydynasticcuneiform: in_early_dynastic_cuneiform
|
211
|
+
inegyptianhieroglyphformatcontrols: in_egyptian_hieroglyph_format_controls
|
203
212
|
inegyptianhieroglyphs: in_egyptian_hieroglyphs
|
204
213
|
inelbasan: in_elbasan
|
214
|
+
inelymaic: in_elymaic
|
205
215
|
inemoticons: in_emoticons
|
206
216
|
inenclosedalphanumerics: in_enclosed_alphanumerics
|
207
217
|
inenclosedalphanumericsupplement: in_enclosed_alphanumeric_supplement
|
@@ -215,6 +225,7 @@ ingeneralpunctuation: in_general_punctuation
|
|
215
225
|
ingeometricshapes: in_geometric_shapes
|
216
226
|
ingeometricshapesextended: in_geometric_shapes_extended
|
217
227
|
ingeorgian: in_georgian
|
228
|
+
ingeorgianextended: in_georgian_extended
|
218
229
|
ingeorgiansupplement: in_georgian_supplement
|
219
230
|
inglagolitic: in_glagolitic
|
220
231
|
inglagoliticsupplement: in_glagolitic_supplement
|
@@ -223,6 +234,7 @@ ingrantha: in_grantha
|
|
223
234
|
ingreekandcoptic: in_greek_and_coptic
|
224
235
|
ingreekextended: in_greek_extended
|
225
236
|
ingujarati: in_gujarati
|
237
|
+
ingunjalagondi: in_gunjala_gondi
|
226
238
|
ingurmukhi: in_gurmukhi
|
227
239
|
inhalfwidthandfullwidthforms: in_halfwidth_and_fullwidth_forms
|
228
240
|
inhangulcompatibilityjamo: in_hangul_compatibility_jamo
|
@@ -230,6 +242,7 @@ inhanguljamo: in_hangul_jamo
|
|
230
242
|
inhanguljamoextendeda: in_hangul_jamo_extended_a
|
231
243
|
inhanguljamoextendedb: in_hangul_jamo_extended_b
|
232
244
|
inhangulsyllables: in_hangul_syllables
|
245
|
+
inhanifirohingya: in_hanifi_rohingya
|
233
246
|
inhanunoo: in_hanunoo
|
234
247
|
inhatran: in_hatran
|
235
248
|
inhebrew: in_hebrew
|
@@ -240,6 +253,7 @@ inhiragana: in_hiragana
|
|
240
253
|
inideographicdescriptioncharacters: in_ideographic_description_characters
|
241
254
|
inideographicsymbolsandpunctuation: in_ideographic_symbols_and_punctuation
|
242
255
|
inimperialaramaic: in_imperial_aramaic
|
256
|
+
inindicsiyaqnumbers: in_indic_siyaq_numbers
|
243
257
|
ininscriptionalpahlavi: in_inscriptional_pahlavi
|
244
258
|
ininscriptionalparthian: in_inscriptional_parthian
|
245
259
|
inipaextensions: in_ipa_extensions
|
@@ -279,6 +293,7 @@ inlycian: in_lycian
|
|
279
293
|
inlydian: in_lydian
|
280
294
|
inmahajani: in_mahajani
|
281
295
|
inmahjongtiles: in_mahjong_tiles
|
296
|
+
inmakasar: in_makasar
|
282
297
|
inmalayalam: in_malayalam
|
283
298
|
inmandaic: in_mandaic
|
284
299
|
inmanichaean: in_manichaean
|
@@ -286,6 +301,8 @@ inmarchen: in_marchen
|
|
286
301
|
inmasaramgondi: in_masaram_gondi
|
287
302
|
inmathematicalalphanumericsymbols: in_mathematical_alphanumeric_symbols
|
288
303
|
inmathematicaloperators: in_mathematical_operators
|
304
|
+
inmayannumerals: in_mayan_numerals
|
305
|
+
inmedefaidrin: in_medefaidrin
|
289
306
|
inmeeteimayek: in_meetei_mayek
|
290
307
|
inmeeteimayekextensions: in_meetei_mayek_extensions
|
291
308
|
inmendekikakui: in_mende_kikakui
|
@@ -309,12 +326,14 @@ inmyanmar: in_myanmar
|
|
309
326
|
inmyanmarextendeda: in_myanmar_extended_a
|
310
327
|
inmyanmarextendedb: in_myanmar_extended_b
|
311
328
|
innabataean: in_nabataean
|
329
|
+
innandinagari: in_nandinagari
|
312
330
|
innewa: in_newa
|
313
331
|
innewtailue: in_new_tai_lue
|
314
332
|
innko: in_nko
|
315
333
|
innoblock: in_no_block
|
316
334
|
innumberforms: in_number_forms
|
317
335
|
innushu: in_nushu
|
336
|
+
innyiakengpuachuehmong: in_nyiakeng_puachue_hmong
|
318
337
|
inogham: in_ogham
|
319
338
|
inolchiki: in_ol_chiki
|
320
339
|
inoldhungarian: in_old_hungarian
|
@@ -322,6 +341,7 @@ inolditalic: in_old_italic
|
|
322
341
|
inoldnortharabian: in_old_north_arabian
|
323
342
|
inoldpermic: in_old_permic
|
324
343
|
inoldpersian: in_old_persian
|
344
|
+
inoldsogdian: in_old_sogdian
|
325
345
|
inoldsoutharabian: in_old_south_arabian
|
326
346
|
inoldturkic: in_old_turkic
|
327
347
|
inopticalcharacterrecognition: in_optical_character_recognition
|
@@ -329,6 +349,7 @@ inoriya: in_oriya
|
|
329
349
|
inornamentaldingbats: in_ornamental_dingbats
|
330
350
|
inosage: in_osage
|
331
351
|
inosmanya: in_osmanya
|
352
|
+
inottomansiyaqnumbers: in_ottoman_siyaq_numbers
|
332
353
|
inpahawhhmong: in_pahawh_hmong
|
333
354
|
inpalmyrene: in_palmyrene
|
334
355
|
inpaucinhau: in_pau_cin_hau
|
@@ -354,6 +375,8 @@ insiddham: in_siddham
|
|
354
375
|
insinhala: in_sinhala
|
355
376
|
insinhalaarchaicnumbers: in_sinhala_archaic_numbers
|
356
377
|
insmallformvariants: in_small_form_variants
|
378
|
+
insmallkanaextension: in_small_kana_extension
|
379
|
+
insogdian: in_sogdian
|
357
380
|
insorasompeng: in_sora_sompeng
|
358
381
|
insoyombo: in_soyombo
|
359
382
|
inspacingmodifierletters: in_spacing_modifier_letters
|
@@ -371,6 +394,7 @@ insupplementaryprivateuseareaa: in_supplementary_private_use_area_a
|
|
371
394
|
insupplementaryprivateuseareab: in_supplementary_private_use_area_b
|
372
395
|
insuttonsignwriting: in_sutton_signwriting
|
373
396
|
insylotinagri: in_syloti_nagri
|
397
|
+
insymbolsandpictographsextendeda: in_symbols_and_pictographs_extended_a
|
374
398
|
insyriac: in_syriac
|
375
399
|
insyriacsupplement: in_syriac_supplement
|
376
400
|
intagalog: in_tagalog
|
@@ -382,6 +406,7 @@ intaiviet: in_tai_viet
|
|
382
406
|
intaixuanjingsymbols: in_tai_xuan_jing_symbols
|
383
407
|
intakri: in_takri
|
384
408
|
intamil: in_tamil
|
409
|
+
intamilsupplement: in_tamil_supplement
|
385
410
|
intangut: in_tangut
|
386
411
|
intangutcomponents: in_tangut_components
|
387
412
|
intelugu: in_telugu
|
@@ -399,6 +424,7 @@ invariationselectors: in_variation_selectors
|
|
399
424
|
invariationselectorssupplement: in_variation_selectors_supplement
|
400
425
|
invedicextensions: in_vedic_extensions
|
401
426
|
inverticalforms: in_vertical_forms
|
427
|
+
inwancho: in_wancho
|
402
428
|
inwarangciti: in_warang_citi
|
403
429
|
inyijinghexagramsymbols: in_yijing_hexagram_symbols
|
404
430
|
inyiradicals: in_yi_radicals
|
@@ -431,6 +457,7 @@ lowercaseletter: lowercase_letter
|
|
431
457
|
lycian: lycian
|
432
458
|
lydian: lydian
|
433
459
|
mahajani: mahajani
|
460
|
+
makasar: makasar
|
434
461
|
malayalam: malayalam
|
435
462
|
mandaic: mandaic
|
436
463
|
manichaean: manichaean
|
@@ -439,6 +466,7 @@ mark: mark
|
|
439
466
|
masaramgondi: masaram_gondi
|
440
467
|
math: math
|
441
468
|
mathsymbol: math_symbol
|
469
|
+
medefaidrin: medefaidrin
|
442
470
|
meeteimayek: meetei_mayek
|
443
471
|
mendekikakui: mende_kikakui
|
444
472
|
meroiticcursive: meroitic_cursive
|
@@ -452,6 +480,7 @@ mro: mro
|
|
452
480
|
multani: multani
|
453
481
|
myanmar: myanmar
|
454
482
|
nabataean: nabataean
|
483
|
+
nandinagari: nandinagari
|
455
484
|
newa: newa
|
456
485
|
newline: newline
|
457
486
|
newtailue: new_tai_lue
|
@@ -460,6 +489,7 @@ noncharactercodepoint: noncharacter_code_point
|
|
460
489
|
nonspacingmark: nonspacing_mark
|
461
490
|
number: number
|
462
491
|
nushu: nushu
|
492
|
+
nyiakengpuachuehmong: nyiakeng_puachue_hmong
|
463
493
|
ogham: ogham
|
464
494
|
olchiki: ol_chiki
|
465
495
|
oldhungarian: old_hungarian
|
@@ -467,6 +497,7 @@ olditalic: old_italic
|
|
467
497
|
oldnortharabian: old_north_arabian
|
468
498
|
oldpermic: old_permic
|
469
499
|
oldpersian: old_persian
|
500
|
+
oldsogdian: old_sogdian
|
470
501
|
oldsoutharabian: old_south_arabian
|
471
502
|
oldturkic: old_turkic
|
472
503
|
openpunctuation: open_punctuation
|
@@ -515,6 +546,7 @@ siddham: siddham
|
|
515
546
|
signwriting: signwriting
|
516
547
|
sinhala: sinhala
|
517
548
|
softdotted: soft_dotted
|
549
|
+
sogdian: sogdian
|
518
550
|
sorasompeng: sora_sompeng
|
519
551
|
soyombo: soyombo
|
520
552
|
space: space
|
@@ -550,6 +582,7 @@ uppercase: uppercase
|
|
550
582
|
uppercaseletter: uppercase_letter
|
551
583
|
vai: vai
|
552
584
|
variationselector: variation_selector
|
585
|
+
wancho: wancho
|
553
586
|
warangciti: warang_citi
|
554
587
|
whitespace: white_space
|
555
588
|
word: word
|
@@ -31,6 +31,7 @@ cher: cherokee
|
|
31
31
|
ci: case_ignorable
|
32
32
|
cn: unassigned
|
33
33
|
co: private_use
|
34
|
+
combiningmark: mark
|
34
35
|
copt: coptic
|
35
36
|
cprt: cypriot
|
36
37
|
cs: surrogate
|
@@ -44,14 +45,17 @@ dep: deprecated
|
|
44
45
|
deva: devanagari
|
45
46
|
di: default_ignorable_code_point
|
46
47
|
dia: diacritic
|
48
|
+
dogr: dogra
|
47
49
|
dsrt: deseret
|
48
50
|
dupl: duployan
|
49
51
|
egyp: egyptian_hieroglyphs
|
50
52
|
elba: elbasan
|
53
|
+
elym: elymaic
|
51
54
|
ethi: ethiopic
|
52
55
|
ext: extender
|
53
56
|
geor: georgian
|
54
57
|
glag: glagolitic
|
58
|
+
gong: gunjala_gondi
|
55
59
|
gonm: masaram_gondi
|
56
60
|
goth: gothic
|
57
61
|
gran: grantha
|
@@ -70,6 +74,7 @@ hex: hex_digit
|
|
70
74
|
hira: hiragana
|
71
75
|
hluw: anatolian_hieroglyphs
|
72
76
|
hmng: pahawh_hmong
|
77
|
+
hmnp: nyiakeng_puachue_hmong
|
73
78
|
hung: old_hungarian
|
74
79
|
idc: id_continue
|
75
80
|
ideo: ideographic
|
@@ -105,11 +110,13 @@ lyci: lycian
|
|
105
110
|
lydi: lydian
|
106
111
|
m: mark
|
107
112
|
mahj: mahajani
|
113
|
+
maka: makasar
|
108
114
|
mand: mandaic
|
109
115
|
mani: manichaean
|
110
116
|
marc: marchen
|
111
117
|
mc: spacing_mark
|
112
118
|
me: enclosing_mark
|
119
|
+
medf: medefaidrin
|
113
120
|
mend: mende_kikakui
|
114
121
|
merc: meroitic_cursive
|
115
122
|
mero: meroitic_hieroglyphs
|
@@ -121,6 +128,7 @@ mtei: meetei_mayek
|
|
121
128
|
mult: multani
|
122
129
|
mymr: myanmar
|
123
130
|
n: number
|
131
|
+
nand: nandinagari
|
124
132
|
narb: old_north_arabian
|
125
133
|
nbat: nabataean
|
126
134
|
nchar: noncharacter_code_point
|
@@ -168,6 +176,7 @@ qaai: inherited
|
|
168
176
|
qmark: quotation_mark
|
169
177
|
ri: regional_indicator
|
170
178
|
rjng: rejang
|
179
|
+
rohg: hanifi_rohingya
|
171
180
|
runr: runic
|
172
181
|
s: symbol
|
173
182
|
samr: samaritan
|
@@ -184,6 +193,8 @@ sinh: sinhala
|
|
184
193
|
sk: modifier_symbol
|
185
194
|
sm: math_symbol
|
186
195
|
so: other_symbol
|
196
|
+
sogd: sogdian
|
197
|
+
sogo: old_sogdian
|
187
198
|
sora: sora_sompeng
|
188
199
|
soyo: soyombo
|
189
200
|
sterm: sentence_terminal
|
@@ -209,6 +220,7 @@ uideo: unified_ideograph
|
|
209
220
|
vaii: vai
|
210
221
|
vs: variation_selector
|
211
222
|
wara: warang_citi
|
223
|
+
wcho: wancho
|
212
224
|
wspace: white_space
|
213
225
|
xidc: xid_continue
|
214
226
|
xids: xid_start
|
@@ -49,9 +49,9 @@
|
|
49
49
|
codepoint_list = 'u{' . xdigit{1,6} . (space . xdigit{1,6})* . '}';
|
50
50
|
codepoint_sequence = codepoint_single | codepoint_list;
|
51
51
|
|
52
|
-
control_sequence = ('c' | 'C-') . (backslash . 'M-')
|
52
|
+
control_sequence = ('c' | 'C-') . (backslash . 'M-')? . backslash? . any;
|
53
53
|
|
54
|
-
meta_sequence = 'M-' . (backslash .
|
54
|
+
meta_sequence = 'M-' . (backslash . ('c' | 'C-'))? . backslash? . any;
|
55
55
|
|
56
56
|
zero_or_one = '?' | '??' | '?+';
|
57
57
|
zero_or_more = '*' | '*?' | '*+';
|
@@ -62,13 +62,17 @@
|
|
62
62
|
quantifier_possessive = '?+' | '*+' | '++';
|
63
63
|
quantifier_mode = '?' | '+';
|
64
64
|
|
65
|
-
|
66
|
-
|
65
|
+
quantity_exact = (digit+);
|
66
|
+
quantity_minimum = (digit+) . ',';
|
67
|
+
quantity_maximum = ',' . (digit+);
|
68
|
+
quantity_range = (digit+) . ',' . (digit+);
|
69
|
+
quantifier_interval = range_open . ( quantity_exact | quantity_minimum |
|
70
|
+
quantity_maximum | quantity_range ) . range_close .
|
71
|
+
quantifier_mode?;
|
67
72
|
|
68
73
|
quantifiers = quantifier_greedy | quantifier_reluctant |
|
69
74
|
quantifier_possessive | quantifier_interval;
|
70
75
|
|
71
|
-
|
72
76
|
conditional = '(?(';
|
73
77
|
|
74
78
|
group_comment = '?#' . [^)]* . group_close;
|
@@ -82,7 +86,8 @@
|
|
82
86
|
assertion_lookbehind = '?<=';
|
83
87
|
assertion_nlookbehind = '?<!';
|
84
88
|
|
85
|
-
|
89
|
+
# try to treat every other group head as options group, like Ruby
|
90
|
+
group_options = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
|
86
91
|
|
87
92
|
group_ref = [gk];
|
88
93
|
group_name_char = (alnum | '_');
|
@@ -113,6 +118,8 @@
|
|
113
118
|
curlies | parantheses | brackets |
|
114
119
|
line_anchor | quantifier_greedy;
|
115
120
|
|
121
|
+
literal_delimiters = ']' | '}';
|
122
|
+
|
116
123
|
ascii_print = ((0x20..0x7e) - meta_char);
|
117
124
|
ascii_nonprint = (0x01..0x1f | 0x7f);
|
118
125
|
|
@@ -135,41 +142,35 @@
|
|
135
142
|
# Invalid sequence error, used from sequences, like escapes and sets
|
136
143
|
action invalid_sequence_error {
|
137
144
|
text = ts ? copy(data, ts-1..-1) : data.pack('c*')
|
138
|
-
|
145
|
+
validation_error(:sequence, 'sequence', text)
|
139
146
|
}
|
140
147
|
|
141
148
|
# group (nesting) and set open/close actions
|
142
|
-
action group_opened { self.group_depth = group_depth + 1
|
143
|
-
action group_closed { self.group_depth = group_depth - 1
|
149
|
+
action group_opened { self.group_depth = group_depth + 1 }
|
150
|
+
action group_closed { self.group_depth = group_depth - 1 }
|
151
|
+
action set_opened { self.set_depth = set_depth + 1 }
|
152
|
+
action set_closed { self.set_depth = set_depth - 1 }
|
144
153
|
|
145
154
|
# Character set scanner, continues consuming characters until it meets the
|
146
155
|
# closing bracket of the set.
|
147
156
|
# --------------------------------------------------------------------------
|
148
157
|
character_set := |*
|
149
|
-
set_close > (set_meta, 2) {
|
150
|
-
set_depth -= 1
|
151
|
-
in_set = set_depth > 0 ? true : false
|
152
|
-
|
158
|
+
set_close > (set_meta, 2) @set_closed {
|
153
159
|
emit(:set, :close, *text(data, ts, te))
|
154
|
-
|
155
|
-
if set_depth == 0
|
156
|
-
fgoto main;
|
157
|
-
else
|
160
|
+
if in_set?
|
158
161
|
fret;
|
162
|
+
else
|
163
|
+
fgoto main;
|
159
164
|
end
|
160
165
|
};
|
161
166
|
|
162
|
-
'-]' { # special case, emits two tokens
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
emit(:literal, :literal, copy(data, ts..te-2), ts, te)
|
167
|
-
emit(:set, :close, copy(data, ts+1..te-1), ts, te)
|
168
|
-
|
169
|
-
if set_depth == 0
|
170
|
-
fgoto main;
|
171
|
-
else
|
167
|
+
'-]' @set_closed { # special case, emits two tokens
|
168
|
+
emit(:literal, :literal, copy(data, ts..te-2), ts, te - 1)
|
169
|
+
emit(:set, :close, copy(data, ts+1..te-1), ts + 1, te)
|
170
|
+
if in_set?
|
172
171
|
fret;
|
172
|
+
else
|
173
|
+
fgoto main;
|
173
174
|
end
|
174
175
|
};
|
175
176
|
|
@@ -207,14 +208,12 @@
|
|
207
208
|
fcall set_escape_sequence;
|
208
209
|
};
|
209
210
|
|
210
|
-
set_open >(open_bracket, 1) {
|
211
|
-
set_depth += 1
|
212
|
-
|
211
|
+
set_open >(open_bracket, 1) >set_opened {
|
213
212
|
emit(:set, :open, *text(data, ts, te))
|
214
213
|
fcall character_set;
|
215
214
|
};
|
216
215
|
|
217
|
-
class_posix >(open_bracket, 1) @eof(premature_end_error)
|
216
|
+
class_posix >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
218
217
|
text = text(data, ts, te).first
|
219
218
|
|
220
219
|
type = :posixclass
|
@@ -227,11 +226,11 @@
|
|
227
226
|
emit(type, class_name.to_sym, text, ts, te)
|
228
227
|
};
|
229
228
|
|
230
|
-
collating_sequence >(open_bracket, 1) @eof(premature_end_error)
|
229
|
+
collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
231
230
|
emit(:set, :collation, *text(data, ts, te))
|
232
231
|
};
|
233
232
|
|
234
|
-
character_equivalent >(open_bracket, 1) @eof(premature_end_error)
|
233
|
+
character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
235
234
|
emit(:set, :equivalent, *text(data, ts, te))
|
236
235
|
};
|
237
236
|
|
@@ -337,44 +336,24 @@
|
|
337
336
|
};
|
338
337
|
|
339
338
|
control_sequence >(escaped_alpha, 4) $eof(premature_end_error) {
|
340
|
-
|
341
|
-
c = data[te].chr
|
342
|
-
if c =~ /[\x00-\x7F]/
|
343
|
-
emit(:escape, :control, copy(data, ts-1..te), ts-1, te+1)
|
344
|
-
p += 1
|
345
|
-
else
|
346
|
-
raise InvalidSequenceError.new("control sequence")
|
347
|
-
end
|
348
|
-
else
|
349
|
-
raise PrematureEndError.new("control sequence")
|
350
|
-
end
|
339
|
+
emit_meta_control_sequence(data, ts, te, :control)
|
351
340
|
fret;
|
352
341
|
};
|
353
342
|
|
354
343
|
meta_sequence >(backslashed, 3) $eof(premature_end_error) {
|
355
|
-
|
356
|
-
c = data[te].chr
|
357
|
-
if c =~ /[\x00-\x7F]/
|
358
|
-
emit(:escape, :meta_sequence, copy(data, ts-1..te), ts-1, te+1)
|
359
|
-
p += 1
|
360
|
-
else
|
361
|
-
raise InvalidSequenceError.new("meta sequence")
|
362
|
-
end
|
363
|
-
else
|
364
|
-
raise PrematureEndError.new("meta sequence")
|
365
|
-
end
|
344
|
+
emit_meta_control_sequence(data, ts, te, :meta_sequence)
|
366
345
|
fret;
|
367
346
|
};
|
368
347
|
|
369
348
|
char_type_char > (escaped_alpha, 2) {
|
370
349
|
fhold;
|
371
|
-
fnext *(in_set ? fentry(character_set) : fentry(main));
|
350
|
+
fnext *(in_set? ? fentry(character_set) : fentry(main));
|
372
351
|
fcall char_type;
|
373
352
|
};
|
374
353
|
|
375
354
|
property_char > (escaped_alpha, 2) {
|
376
355
|
fhold;
|
377
|
-
fnext *(in_set ? fentry(character_set) : fentry(main));
|
356
|
+
fnext *(in_set? ? fentry(character_set) : fentry(main));
|
378
357
|
fcall unicode_property;
|
379
358
|
};
|
380
359
|
|
@@ -412,8 +391,7 @@
|
|
412
391
|
};
|
413
392
|
|
414
393
|
alternation {
|
415
|
-
if
|
416
|
-
conditional_stack.last[1] == group_depth
|
394
|
+
if conditional_stack.last == group_depth
|
417
395
|
emit(:conditional, :separator, *text(data, ts, te))
|
418
396
|
else
|
419
397
|
emit(:meta, :alternation, *text(data, ts, te))
|
@@ -442,18 +420,16 @@
|
|
442
420
|
when '\\b'; emit(:anchor, :word_boundary, text, ts, te)
|
443
421
|
when '\\B'; emit(:anchor, :nonword_boundary, text, ts, te)
|
444
422
|
when '\\G'; emit(:anchor, :match_start, text, ts, te)
|
445
|
-
else
|
446
|
-
raise ScannerError.new(
|
447
|
-
"Unexpected character in anchor at #{text} (char #{ts})")
|
448
423
|
end
|
449
424
|
};
|
450
425
|
|
426
|
+
literal_delimiters {
|
427
|
+
append_literal(data, ts, te)
|
428
|
+
};
|
429
|
+
|
451
430
|
# Character sets
|
452
431
|
# ------------------------------------------------------------------------
|
453
|
-
set_open {
|
454
|
-
set_depth += 1
|
455
|
-
in_set = true
|
456
|
-
|
432
|
+
set_open >set_opened {
|
457
433
|
emit(:set, :open, *text(data, ts, te))
|
458
434
|
fcall character_set;
|
459
435
|
};
|
@@ -465,9 +441,7 @@
|
|
465
441
|
conditional {
|
466
442
|
text = text(data, ts, te).first
|
467
443
|
|
468
|
-
|
469
|
-
conditional_depth += 1
|
470
|
-
conditional_stack << [conditional_depth, group_depth]
|
444
|
+
conditional_stack << group_depth
|
471
445
|
|
472
446
|
emit(:conditional, :open, text[0..-2], ts, te-1)
|
473
447
|
emit(:conditional, :condition_open, '(', te-1, te)
|
@@ -496,7 +470,11 @@
|
|
496
470
|
# (?imxdau-imx:subexp) option on/off for subexp
|
497
471
|
# ------------------------------------------------------------------------
|
498
472
|
group_open . group_options >group_opened {
|
499
|
-
|
473
|
+
text = text(data, ts, te).first
|
474
|
+
if text[2..-1] =~ /([^\-mixdau:]|^$)|-.*([dau])/
|
475
|
+
raise InvalidGroupOption.new($1 || "-#{$2}", text)
|
476
|
+
end
|
477
|
+
emit_options(text, ts, te)
|
500
478
|
};
|
501
479
|
|
502
480
|
# Assertions
|
@@ -528,19 +506,15 @@
|
|
528
506
|
when '(?>'; emit(:group, :atomic, text, ts, te)
|
529
507
|
when '(?~'; emit(:group, :absence, text, ts, te)
|
530
508
|
|
531
|
-
when /^\(
|
532
|
-
|
509
|
+
when /^\(\?(?:<>|'')/
|
510
|
+
validation_error(:group, 'named group', 'name is empty')
|
533
511
|
|
512
|
+
when /^\(\?<\w*>/
|
534
513
|
emit(:group, :named_ab, text, ts, te)
|
535
514
|
|
536
|
-
when /^\(\?'
|
537
|
-
empty_name_error(:group, 'named group (sq)') if $1.empty?
|
538
|
-
|
515
|
+
when /^\(\?'\w*'/
|
539
516
|
emit(:group, :named_sq, text, ts, te)
|
540
517
|
|
541
|
-
else
|
542
|
-
raise ScannerError.new(
|
543
|
-
"Unknown subexpression group format '#{text}'")
|
544
518
|
end
|
545
519
|
};
|
546
520
|
|
@@ -550,20 +524,13 @@
|
|
550
524
|
};
|
551
525
|
|
552
526
|
group_close @group_closed {
|
553
|
-
if
|
554
|
-
conditional_stack.last[1] == (group_depth + 1)
|
555
|
-
|
556
|
-
emit(:conditional, :close, *text(data, ts, te))
|
527
|
+
if conditional_stack.last == group_depth + 1
|
557
528
|
conditional_stack.pop
|
558
|
-
|
559
|
-
if conditional_stack.length == 0
|
560
|
-
in_conditional = false
|
561
|
-
end
|
529
|
+
emit(:conditional, :close, *text(data, ts, te))
|
562
530
|
else
|
563
|
-
if spacing_stack.length > 1
|
564
|
-
|
531
|
+
if spacing_stack.length > 1 &&
|
532
|
+
spacing_stack.last[:depth] == group_depth + 1
|
565
533
|
spacing_stack.pop
|
566
|
-
|
567
534
|
self.free_spacing = spacing_stack.last[:free_spacing]
|
568
535
|
end
|
569
536
|
|
@@ -576,11 +543,8 @@
|
|
576
543
|
# ------------------------------------------------------------------------
|
577
544
|
backslash . (group_name_ref | group_number_ref) > (backslashed, 4) {
|
578
545
|
case text = text(data, ts, te).first
|
579
|
-
when /^\\([gk])
|
580
|
-
|
581
|
-
|
582
|
-
when /^\\([gk])''/ # single quotes
|
583
|
-
empty_backref_error("ref/call (sq)")
|
546
|
+
when /^\\([gk])(<>|'')/ # angle brackets
|
547
|
+
validation_error(:backref, 'ref/call', 'ref ID is empty')
|
584
548
|
|
585
549
|
when /^\\([gk])<[^\d+-]\w*>/ # angle-brackets
|
586
550
|
if $1 == 'k'
|
@@ -636,9 +600,6 @@
|
|
636
600
|
when /^\\([gk])'[+\-]?\d+[+\-]\d+'/ # single-quotes
|
637
601
|
emit(:backref, :number_recursion_ref_sq, text, ts, te)
|
638
602
|
|
639
|
-
else
|
640
|
-
raise ScannerError.new(
|
641
|
-
"Unknown backreference format '#{text}'")
|
642
603
|
end
|
643
604
|
};
|
644
605
|
|
@@ -669,10 +630,15 @@
|
|
669
630
|
end
|
670
631
|
};
|
671
632
|
|
672
|
-
quantifier_interval
|
633
|
+
quantifier_interval {
|
673
634
|
emit(:quantifier, :interval, *text(data, ts, te))
|
674
635
|
};
|
675
636
|
|
637
|
+
# Catch unmatched curly braces as literals
|
638
|
+
range_open {
|
639
|
+
append_literal(data, ts, te)
|
640
|
+
};
|
641
|
+
|
676
642
|
# Escaped sequences
|
677
643
|
# ------------------------------------------------------------------------
|
678
644
|
backslash > (backslashed, 1) {
|
@@ -786,7 +752,7 @@ class Regexp::Scanner
|
|
786
752
|
input = input_object
|
787
753
|
self.free_spacing = false
|
788
754
|
end
|
789
|
-
|
755
|
+
self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
|
790
756
|
|
791
757
|
data = input.unpack("c*") if input.is_a?(String)
|
792
758
|
eof = data.length
|
@@ -794,15 +760,9 @@ class Regexp::Scanner
|
|
794
760
|
self.tokens = []
|
795
761
|
self.block = block_given? ? block : nil
|
796
762
|
|
797
|
-
self.
|
763
|
+
self.set_depth = 0
|
798
764
|
self.group_depth = 0
|
799
|
-
self.
|
800
|
-
|
801
|
-
in_set = false
|
802
|
-
set_depth = 0
|
803
|
-
in_conditional = false
|
804
|
-
conditional_depth = 0
|
805
|
-
conditional_stack = []
|
765
|
+
self.conditional_stack = []
|
806
766
|
|
807
767
|
%% write data;
|
808
768
|
%% write init;
|
@@ -817,9 +777,9 @@ class Regexp::Scanner
|
|
817
777
|
end
|
818
778
|
|
819
779
|
raise PrematureEndError.new("(missing group closing paranthesis) "+
|
820
|
-
"[#{
|
780
|
+
"[#{group_depth}]") if in_group?
|
821
781
|
raise PrematureEndError.new("(missing set closing bracket) "+
|
822
|
-
"[#{
|
782
|
+
"[#{set_depth}]") if in_set?
|
823
783
|
|
824
784
|
# when the entire expression is a literal run
|
825
785
|
emit_literal if literal
|
@@ -854,62 +814,15 @@ class Regexp::Scanner
|
|
854
814
|
|
855
815
|
private
|
856
816
|
|
857
|
-
attr_accessor :tokens, :literal, :block,
|
858
|
-
:
|
859
|
-
:free_spacing, :spacing_stack
|
860
|
-
|
861
|
-
# Ragel's regex-based scan of the group options introduced a lot of
|
862
|
-
# ambiguity, so we just ask it to find the beginning of what looks
|
863
|
-
# like an options run and handle the rest in here.
|
864
|
-
def scan_options(p, data, ts, te)
|
865
|
-
text = text(data, ts, te).first
|
817
|
+
attr_accessor :tokens, :literal, :block, :free_spacing, :spacing_stack,
|
818
|
+
:group_depth, :set_depth, :conditional_stack
|
866
819
|
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
# as ruby allows things like '(?xxxxxxxxx-xxxxxxxxxxxxx:abc)'.
|
871
|
-
negative_options = false
|
872
|
-
while options_char
|
873
|
-
if data[te + options_length]
|
874
|
-
c = data[te + options_length].chr
|
875
|
-
|
876
|
-
if c =~ /[-mixdau]/
|
877
|
-
negative_options = true if c == '-'
|
878
|
-
|
879
|
-
raise InvalidGroupOption.new(c, text) if negative_options and
|
880
|
-
c =~ /[dau]/
|
881
|
-
|
882
|
-
text << c ; p += 1 ; options_length += 1
|
883
|
-
else
|
884
|
-
options_char = false
|
885
|
-
end
|
886
|
-
else
|
887
|
-
raise PrematureEndError.new("expression options `#{text}'")
|
888
|
-
end
|
889
|
-
end
|
890
|
-
|
891
|
-
if data[te + options_length]
|
892
|
-
c = data[te + options_length].chr
|
893
|
-
|
894
|
-
if c == ':'
|
895
|
-
# Include the ':' in the options text
|
896
|
-
text << c ; p += 1 ; options_length += 1
|
897
|
-
emit_options(text, ts, te + options_length)
|
898
|
-
|
899
|
-
elsif c == ')'
|
900
|
-
# Don't include the closing ')', let group_close handle it.
|
901
|
-
emit_options(text, ts, te + options_length)
|
902
|
-
|
903
|
-
else
|
904
|
-
# Plain Regexp reports this as 'undefined group option'
|
905
|
-
raise ScannerError.new(
|
906
|
-
"Unexpected `#{c}' in options sequence, ':' or ')' expected")
|
907
|
-
end
|
908
|
-
else
|
909
|
-
raise PrematureEndError.new("expression options `#{text}'")
|
910
|
-
end
|
820
|
+
def in_group?
|
821
|
+
group_depth > 0
|
822
|
+
end
|
911
823
|
|
912
|
-
|
824
|
+
def in_set?
|
825
|
+
set_depth > 0
|
913
826
|
end
|
914
827
|
|
915
828
|
# Copy from ts to te from data as text
|
@@ -945,32 +858,39 @@ class Regexp::Scanner
|
|
945
858
|
def emit_options(text, ts, te)
|
946
859
|
token = nil
|
947
860
|
|
948
|
-
|
949
|
-
|
861
|
+
# Ruby allows things like '(?-xxxx)' or '(?xx-xx--xx-:abc)'.
|
862
|
+
text =~ /\(\?([mixdau]*)(-(?:[mix]*))*(:)?/
|
863
|
+
positive, negative, group_local = $1, $2, $3
|
950
864
|
|
951
|
-
|
952
|
-
|
953
|
-
|
865
|
+
if positive.include?('x')
|
866
|
+
self.free_spacing = true
|
867
|
+
end
|
954
868
|
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
869
|
+
# If the x appears in both, treat it like ruby does, the second cancels
|
870
|
+
# the first.
|
871
|
+
if negative && negative.include?('x')
|
872
|
+
self.free_spacing = false
|
873
|
+
end
|
960
874
|
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
end
|
875
|
+
if group_local
|
876
|
+
spacing_stack << {:free_spacing => free_spacing, :depth => group_depth}
|
877
|
+
token = :options
|
878
|
+
else
|
879
|
+
# switch for parent group level
|
880
|
+
spacing_stack.last[:free_spacing] = free_spacing
|
881
|
+
token = :options_switch
|
969
882
|
end
|
970
883
|
|
971
884
|
emit(:group, token, text, ts, te)
|
972
885
|
end
|
973
886
|
|
887
|
+
def emit_meta_control_sequence(data, ts, te, token)
|
888
|
+
if data.last < 0x00 || data.last > 0x7F
|
889
|
+
validation_error(:sequence, 'escape', token.to_s)
|
890
|
+
end
|
891
|
+
emit(:escape, token, *text(data, ts, te, 1))
|
892
|
+
end
|
893
|
+
|
974
894
|
# Centralizes and unifies the handling of validation related
|
975
895
|
# errors.
|
976
896
|
def validation_error(type, what, reason)
|
@@ -981,21 +901,8 @@ class Regexp::Scanner
|
|
981
901
|
error = InvalidBackrefError.new(what, reason)
|
982
902
|
when :sequence
|
983
903
|
error = InvalidSequenceError.new(what, reason)
|
984
|
-
else
|
985
|
-
error = ValidationError.new('expression')
|
986
904
|
end
|
987
905
|
|
988
906
|
raise error # unless @@config.validation_ignore
|
989
907
|
end
|
990
|
-
|
991
|
-
# Used for references with an empty name or number
|
992
|
-
def empty_backref_error(type, what)
|
993
|
-
validation_error(:backref, what, 'ref ID is empty')
|
994
|
-
end
|
995
|
-
|
996
|
-
# Used for named expressions with an empty name
|
997
|
-
def empty_name_error(type, what)
|
998
|
-
validation_error(type, what, 'name is empty')
|
999
|
-
end
|
1000
|
-
|
1001
908
|
end # module Regexp::Scanner
|