regexp_parser 1.3.0 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -1
- data/Gemfile +3 -3
- data/README.md +12 -19
- data/Rakefile +3 -4
- data/lib/regexp_parser/expression.rb +28 -53
- data/lib/regexp_parser/expression/classes/backref.rb +18 -10
- data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
- data/lib/regexp_parser/expression/classes/escape.rb +0 -4
- data/lib/regexp_parser/expression/classes/group.rb +4 -2
- data/lib/regexp_parser/expression/classes/keep.rb +1 -3
- data/lib/regexp_parser/expression/methods/match.rb +13 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
- data/lib/regexp_parser/expression/methods/options.rb +35 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
- data/lib/regexp_parser/expression/methods/tests.rb +6 -15
- data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -6
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
- data/lib/regexp_parser/expression/subexpression.rb +3 -5
- data/lib/regexp_parser/lexer.rb +30 -44
- data/lib/regexp_parser/parser.rb +47 -24
- data/lib/regexp_parser/scanner.rb +1228 -1367
- data/lib/regexp_parser/scanner/char_type.rl +0 -3
- data/lib/regexp_parser/scanner/properties/long.yml +34 -1
- data/lib/regexp_parser/scanner/properties/short.yml +12 -0
- data/lib/regexp_parser/scanner/scanner.rl +101 -194
- data/lib/regexp_parser/syntax/tokens.rb +2 -10
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +72 -21
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +3 -3
- data/spec/expression/base_spec.rb +94 -0
- data/spec/expression/clone_spec.rb +120 -0
- data/spec/expression/conditional_spec.rb +89 -0
- data/spec/expression/free_space_spec.rb +27 -0
- data/spec/expression/methods/match_length_spec.rb +161 -0
- data/spec/expression/methods/match_spec.rb +25 -0
- data/spec/expression/methods/strfregexp_spec.rb +224 -0
- data/spec/expression/methods/tests_spec.rb +99 -0
- data/spec/expression/methods/traverse_spec.rb +161 -0
- data/spec/expression/options_spec.rb +128 -0
- data/spec/expression/root_spec.rb +9 -0
- data/spec/expression/sequence_spec.rb +9 -0
- data/spec/expression/subexpression_spec.rb +50 -0
- data/spec/expression/to_h_spec.rb +26 -0
- data/spec/expression/to_s_spec.rb +100 -0
- data/spec/lexer/all_spec.rb +22 -0
- data/spec/lexer/conditionals_spec.rb +53 -0
- data/spec/lexer/delimiters_spec.rb +68 -0
- data/spec/lexer/escapes_spec.rb +14 -0
- data/spec/lexer/keep_spec.rb +10 -0
- data/spec/lexer/literals_spec.rb +89 -0
- data/spec/lexer/nesting_spec.rb +99 -0
- data/spec/lexer/refcalls_spec.rb +55 -0
- data/spec/parser/all_spec.rb +43 -0
- data/spec/parser/alternation_spec.rb +88 -0
- data/spec/parser/anchors_spec.rb +17 -0
- data/spec/parser/conditionals_spec.rb +179 -0
- data/spec/parser/errors_spec.rb +30 -0
- data/spec/parser/escapes_spec.rb +121 -0
- data/spec/parser/free_space_spec.rb +130 -0
- data/spec/parser/groups_spec.rb +108 -0
- data/spec/parser/keep_spec.rb +6 -0
- data/spec/parser/posix_classes_spec.rb +8 -0
- data/spec/parser/properties_spec.rb +115 -0
- data/spec/parser/quantifiers_spec.rb +52 -0
- data/spec/parser/refcalls_spec.rb +112 -0
- data/spec/parser/set/intersections_spec.rb +127 -0
- data/spec/parser/set/ranges_spec.rb +111 -0
- data/spec/parser/sets_spec.rb +178 -0
- data/spec/parser/types_spec.rb +18 -0
- data/spec/scanner/all_spec.rb +18 -0
- data/spec/scanner/anchors_spec.rb +21 -0
- data/spec/scanner/conditionals_spec.rb +128 -0
- data/spec/scanner/delimiters_spec.rb +52 -0
- data/spec/scanner/errors_spec.rb +67 -0
- data/spec/scanner/escapes_spec.rb +53 -0
- data/spec/scanner/free_space_spec.rb +133 -0
- data/spec/scanner/groups_spec.rb +52 -0
- data/spec/scanner/keep_spec.rb +10 -0
- data/spec/scanner/literals_spec.rb +49 -0
- data/spec/scanner/meta_spec.rb +18 -0
- data/spec/scanner/properties_spec.rb +64 -0
- data/spec/scanner/quantifiers_spec.rb +20 -0
- data/spec/scanner/refcalls_spec.rb +36 -0
- data/spec/scanner/sets_spec.rb +102 -0
- data/spec/scanner/types_spec.rb +14 -0
- data/spec/spec_helper.rb +15 -0
- data/{test → spec}/support/runner.rb +9 -8
- data/spec/support/shared_examples.rb +77 -0
- data/{test → spec}/support/warning_extractor.rb +5 -7
- data/spec/syntax/syntax_spec.rb +48 -0
- data/spec/syntax/syntax_token_map_spec.rb +23 -0
- data/spec/syntax/versions/1.8.6_spec.rb +17 -0
- data/spec/syntax/versions/1.9.1_spec.rb +10 -0
- data/spec/syntax/versions/1.9.3_spec.rb +9 -0
- data/spec/syntax/versions/2.0.0_spec.rb +13 -0
- data/spec/syntax/versions/2.2.0_spec.rb +9 -0
- data/spec/syntax/versions/aliases_spec.rb +37 -0
- data/spec/token/token_spec.rb +85 -0
- metadata +151 -146
- data/test/expression/test_all.rb +0 -12
- data/test/expression/test_base.rb +0 -90
- data/test/expression/test_clone.rb +0 -89
- data/test/expression/test_conditionals.rb +0 -113
- data/test/expression/test_free_space.rb +0 -35
- data/test/expression/test_set.rb +0 -84
- data/test/expression/test_strfregexp.rb +0 -230
- data/test/expression/test_subexpression.rb +0 -58
- data/test/expression/test_tests.rb +0 -99
- data/test/expression/test_to_h.rb +0 -59
- data/test/expression/test_to_s.rb +0 -104
- data/test/expression/test_traverse.rb +0 -161
- data/test/helpers.rb +0 -10
- data/test/lexer/test_all.rb +0 -41
- data/test/lexer/test_conditionals.rb +0 -127
- data/test/lexer/test_keep.rb +0 -24
- data/test/lexer/test_literals.rb +0 -130
- data/test/lexer/test_nesting.rb +0 -132
- data/test/lexer/test_refcalls.rb +0 -56
- data/test/parser/set/test_intersections.rb +0 -127
- data/test/parser/set/test_ranges.rb +0 -111
- data/test/parser/test_all.rb +0 -64
- data/test/parser/test_alternation.rb +0 -92
- data/test/parser/test_anchors.rb +0 -34
- data/test/parser/test_conditionals.rb +0 -187
- data/test/parser/test_errors.rb +0 -63
- data/test/parser/test_escapes.rb +0 -134
- data/test/parser/test_free_space.rb +0 -139
- data/test/parser/test_groups.rb +0 -289
- data/test/parser/test_keep.rb +0 -21
- data/test/parser/test_posix_classes.rb +0 -27
- data/test/parser/test_properties.rb +0 -133
- data/test/parser/test_quantifiers.rb +0 -301
- data/test/parser/test_refcalls.rb +0 -186
- data/test/parser/test_sets.rb +0 -179
- data/test/parser/test_types.rb +0 -50
- data/test/scanner/test_all.rb +0 -38
- data/test/scanner/test_anchors.rb +0 -38
- data/test/scanner/test_conditionals.rb +0 -184
- data/test/scanner/test_errors.rb +0 -91
- data/test/scanner/test_escapes.rb +0 -56
- data/test/scanner/test_free_space.rb +0 -200
- data/test/scanner/test_groups.rb +0 -79
- data/test/scanner/test_keep.rb +0 -35
- data/test/scanner/test_literals.rb +0 -89
- data/test/scanner/test_meta.rb +0 -40
- data/test/scanner/test_properties.rb +0 -312
- data/test/scanner/test_quantifiers.rb +0 -37
- data/test/scanner/test_refcalls.rb +0 -52
- data/test/scanner/test_scripts.rb +0 -53
- data/test/scanner/test_sets.rb +0 -119
- data/test/scanner/test_types.rb +0 -35
- data/test/scanner/test_unicode_blocks.rb +0 -30
- data/test/support/disable_autotest.rb +0 -8
- data/test/syntax/test_all.rb +0 -6
- data/test/syntax/test_syntax.rb +0 -61
- data/test/syntax/test_syntax_token_map.rb +0 -25
- data/test/syntax/versions/test_1.8.rb +0 -55
- data/test/syntax/versions/test_1.9.1.rb +0 -36
- data/test/syntax/versions/test_1.9.3.rb +0 -32
- data/test/syntax/versions/test_2.0.0.rb +0 -37
- data/test/syntax/versions/test_2.2.0.rb +0 -32
- data/test/syntax/versions/test_aliases.rb +0 -129
- data/test/syntax/versions/test_all.rb +0 -5
- data/test/test_all.rb +0 -5
- data/test/token/test_all.rb +0 -2
- data/test/token/test_token.rb +0 -107
@@ -21,9 +21,6 @@
|
|
21
21
|
when '\W'; emit(:type, :nonword, text, ts - 1, te)
|
22
22
|
when '\R'; emit(:type, :linebreak, text, ts - 1, te)
|
23
23
|
when '\X'; emit(:type, :xgrapheme, text, ts - 1, te)
|
24
|
-
else
|
25
|
-
raise ScannerError.new(
|
26
|
-
"Unexpected character in type at #{text} (char #{ts})")
|
27
24
|
end
|
28
25
|
fret;
|
29
26
|
};
|
@@ -5,6 +5,9 @@
|
|
5
5
|
adlam: adlam
|
6
6
|
age=1.1: age=1.1
|
7
7
|
age=10.0: age=10.0
|
8
|
+
age=11.0: age=11.0
|
9
|
+
age=12.0: age=12.0
|
10
|
+
age=12.1: age=12.1
|
8
11
|
age=2.0: age=2.0
|
9
12
|
age=2.1: age=2.1
|
10
13
|
age=3.0: age=3.0
|
@@ -63,7 +66,6 @@ changeswhenuppercased: changes_when_uppercased
|
|
63
66
|
cherokee: cherokee
|
64
67
|
closepunctuation: close_punctuation
|
65
68
|
cntrl: cntrl
|
66
|
-
combiningmark: combining_mark
|
67
69
|
common: common
|
68
70
|
connectorpunctuation: connector_punctuation
|
69
71
|
control: control
|
@@ -81,9 +83,11 @@ deseret: deseret
|
|
81
83
|
devanagari: devanagari
|
82
84
|
diacritic: diacritic
|
83
85
|
digit: digit
|
86
|
+
dogra: dogra
|
84
87
|
duployan: duployan
|
85
88
|
egyptianhieroglyphs: egyptian_hieroglyphs
|
86
89
|
elbasan: elbasan
|
90
|
+
elymaic: elymaic
|
87
91
|
emoji: emoji
|
88
92
|
emojicomponent: emoji_component
|
89
93
|
emojimodifier: emoji_modifier
|
@@ -104,9 +108,11 @@ graphemeextend: grapheme_extend
|
|
104
108
|
graphemelink: grapheme_link
|
105
109
|
greek: greek
|
106
110
|
gujarati: gujarati
|
111
|
+
gunjalagondi: gunjala_gondi
|
107
112
|
gurmukhi: gurmukhi
|
108
113
|
han: han
|
109
114
|
hangul: hangul
|
115
|
+
hanifirohingya: hanifi_rohingya
|
110
116
|
hanunoo: hanunoo
|
111
117
|
hatran: hatran
|
112
118
|
hebrew: hebrew
|
@@ -160,6 +166,7 @@ inchakma: in_chakma
|
|
160
166
|
incham: in_cham
|
161
167
|
incherokee: in_cherokee
|
162
168
|
incherokeesupplement: in_cherokee_supplement
|
169
|
+
inchesssymbols: in_chess_symbols
|
163
170
|
incjkcompatibility: in_cjk_compatibility
|
164
171
|
incjkcompatibilityforms: in_cjk_compatibility_forms
|
165
172
|
incjkcompatibilityideographs: in_cjk_compatibility_ideographs
|
@@ -197,11 +204,14 @@ indeseret: in_deseret
|
|
197
204
|
indevanagari: in_devanagari
|
198
205
|
indevanagariextended: in_devanagari_extended
|
199
206
|
indingbats: in_dingbats
|
207
|
+
indogra: in_dogra
|
200
208
|
indominotiles: in_domino_tiles
|
201
209
|
induployan: in_duployan
|
202
210
|
inearlydynasticcuneiform: in_early_dynastic_cuneiform
|
211
|
+
inegyptianhieroglyphformatcontrols: in_egyptian_hieroglyph_format_controls
|
203
212
|
inegyptianhieroglyphs: in_egyptian_hieroglyphs
|
204
213
|
inelbasan: in_elbasan
|
214
|
+
inelymaic: in_elymaic
|
205
215
|
inemoticons: in_emoticons
|
206
216
|
inenclosedalphanumerics: in_enclosed_alphanumerics
|
207
217
|
inenclosedalphanumericsupplement: in_enclosed_alphanumeric_supplement
|
@@ -215,6 +225,7 @@ ingeneralpunctuation: in_general_punctuation
|
|
215
225
|
ingeometricshapes: in_geometric_shapes
|
216
226
|
ingeometricshapesextended: in_geometric_shapes_extended
|
217
227
|
ingeorgian: in_georgian
|
228
|
+
ingeorgianextended: in_georgian_extended
|
218
229
|
ingeorgiansupplement: in_georgian_supplement
|
219
230
|
inglagolitic: in_glagolitic
|
220
231
|
inglagoliticsupplement: in_glagolitic_supplement
|
@@ -223,6 +234,7 @@ ingrantha: in_grantha
|
|
223
234
|
ingreekandcoptic: in_greek_and_coptic
|
224
235
|
ingreekextended: in_greek_extended
|
225
236
|
ingujarati: in_gujarati
|
237
|
+
ingunjalagondi: in_gunjala_gondi
|
226
238
|
ingurmukhi: in_gurmukhi
|
227
239
|
inhalfwidthandfullwidthforms: in_halfwidth_and_fullwidth_forms
|
228
240
|
inhangulcompatibilityjamo: in_hangul_compatibility_jamo
|
@@ -230,6 +242,7 @@ inhanguljamo: in_hangul_jamo
|
|
230
242
|
inhanguljamoextendeda: in_hangul_jamo_extended_a
|
231
243
|
inhanguljamoextendedb: in_hangul_jamo_extended_b
|
232
244
|
inhangulsyllables: in_hangul_syllables
|
245
|
+
inhanifirohingya: in_hanifi_rohingya
|
233
246
|
inhanunoo: in_hanunoo
|
234
247
|
inhatran: in_hatran
|
235
248
|
inhebrew: in_hebrew
|
@@ -240,6 +253,7 @@ inhiragana: in_hiragana
|
|
240
253
|
inideographicdescriptioncharacters: in_ideographic_description_characters
|
241
254
|
inideographicsymbolsandpunctuation: in_ideographic_symbols_and_punctuation
|
242
255
|
inimperialaramaic: in_imperial_aramaic
|
256
|
+
inindicsiyaqnumbers: in_indic_siyaq_numbers
|
243
257
|
ininscriptionalpahlavi: in_inscriptional_pahlavi
|
244
258
|
ininscriptionalparthian: in_inscriptional_parthian
|
245
259
|
inipaextensions: in_ipa_extensions
|
@@ -279,6 +293,7 @@ inlycian: in_lycian
|
|
279
293
|
inlydian: in_lydian
|
280
294
|
inmahajani: in_mahajani
|
281
295
|
inmahjongtiles: in_mahjong_tiles
|
296
|
+
inmakasar: in_makasar
|
282
297
|
inmalayalam: in_malayalam
|
283
298
|
inmandaic: in_mandaic
|
284
299
|
inmanichaean: in_manichaean
|
@@ -286,6 +301,8 @@ inmarchen: in_marchen
|
|
286
301
|
inmasaramgondi: in_masaram_gondi
|
287
302
|
inmathematicalalphanumericsymbols: in_mathematical_alphanumeric_symbols
|
288
303
|
inmathematicaloperators: in_mathematical_operators
|
304
|
+
inmayannumerals: in_mayan_numerals
|
305
|
+
inmedefaidrin: in_medefaidrin
|
289
306
|
inmeeteimayek: in_meetei_mayek
|
290
307
|
inmeeteimayekextensions: in_meetei_mayek_extensions
|
291
308
|
inmendekikakui: in_mende_kikakui
|
@@ -309,12 +326,14 @@ inmyanmar: in_myanmar
|
|
309
326
|
inmyanmarextendeda: in_myanmar_extended_a
|
310
327
|
inmyanmarextendedb: in_myanmar_extended_b
|
311
328
|
innabataean: in_nabataean
|
329
|
+
innandinagari: in_nandinagari
|
312
330
|
innewa: in_newa
|
313
331
|
innewtailue: in_new_tai_lue
|
314
332
|
innko: in_nko
|
315
333
|
innoblock: in_no_block
|
316
334
|
innumberforms: in_number_forms
|
317
335
|
innushu: in_nushu
|
336
|
+
innyiakengpuachuehmong: in_nyiakeng_puachue_hmong
|
318
337
|
inogham: in_ogham
|
319
338
|
inolchiki: in_ol_chiki
|
320
339
|
inoldhungarian: in_old_hungarian
|
@@ -322,6 +341,7 @@ inolditalic: in_old_italic
|
|
322
341
|
inoldnortharabian: in_old_north_arabian
|
323
342
|
inoldpermic: in_old_permic
|
324
343
|
inoldpersian: in_old_persian
|
344
|
+
inoldsogdian: in_old_sogdian
|
325
345
|
inoldsoutharabian: in_old_south_arabian
|
326
346
|
inoldturkic: in_old_turkic
|
327
347
|
inopticalcharacterrecognition: in_optical_character_recognition
|
@@ -329,6 +349,7 @@ inoriya: in_oriya
|
|
329
349
|
inornamentaldingbats: in_ornamental_dingbats
|
330
350
|
inosage: in_osage
|
331
351
|
inosmanya: in_osmanya
|
352
|
+
inottomansiyaqnumbers: in_ottoman_siyaq_numbers
|
332
353
|
inpahawhhmong: in_pahawh_hmong
|
333
354
|
inpalmyrene: in_palmyrene
|
334
355
|
inpaucinhau: in_pau_cin_hau
|
@@ -354,6 +375,8 @@ insiddham: in_siddham
|
|
354
375
|
insinhala: in_sinhala
|
355
376
|
insinhalaarchaicnumbers: in_sinhala_archaic_numbers
|
356
377
|
insmallformvariants: in_small_form_variants
|
378
|
+
insmallkanaextension: in_small_kana_extension
|
379
|
+
insogdian: in_sogdian
|
357
380
|
insorasompeng: in_sora_sompeng
|
358
381
|
insoyombo: in_soyombo
|
359
382
|
inspacingmodifierletters: in_spacing_modifier_letters
|
@@ -371,6 +394,7 @@ insupplementaryprivateuseareaa: in_supplementary_private_use_area_a
|
|
371
394
|
insupplementaryprivateuseareab: in_supplementary_private_use_area_b
|
372
395
|
insuttonsignwriting: in_sutton_signwriting
|
373
396
|
insylotinagri: in_syloti_nagri
|
397
|
+
insymbolsandpictographsextendeda: in_symbols_and_pictographs_extended_a
|
374
398
|
insyriac: in_syriac
|
375
399
|
insyriacsupplement: in_syriac_supplement
|
376
400
|
intagalog: in_tagalog
|
@@ -382,6 +406,7 @@ intaiviet: in_tai_viet
|
|
382
406
|
intaixuanjingsymbols: in_tai_xuan_jing_symbols
|
383
407
|
intakri: in_takri
|
384
408
|
intamil: in_tamil
|
409
|
+
intamilsupplement: in_tamil_supplement
|
385
410
|
intangut: in_tangut
|
386
411
|
intangutcomponents: in_tangut_components
|
387
412
|
intelugu: in_telugu
|
@@ -399,6 +424,7 @@ invariationselectors: in_variation_selectors
|
|
399
424
|
invariationselectorssupplement: in_variation_selectors_supplement
|
400
425
|
invedicextensions: in_vedic_extensions
|
401
426
|
inverticalforms: in_vertical_forms
|
427
|
+
inwancho: in_wancho
|
402
428
|
inwarangciti: in_warang_citi
|
403
429
|
inyijinghexagramsymbols: in_yijing_hexagram_symbols
|
404
430
|
inyiradicals: in_yi_radicals
|
@@ -431,6 +457,7 @@ lowercaseletter: lowercase_letter
|
|
431
457
|
lycian: lycian
|
432
458
|
lydian: lydian
|
433
459
|
mahajani: mahajani
|
460
|
+
makasar: makasar
|
434
461
|
malayalam: malayalam
|
435
462
|
mandaic: mandaic
|
436
463
|
manichaean: manichaean
|
@@ -439,6 +466,7 @@ mark: mark
|
|
439
466
|
masaramgondi: masaram_gondi
|
440
467
|
math: math
|
441
468
|
mathsymbol: math_symbol
|
469
|
+
medefaidrin: medefaidrin
|
442
470
|
meeteimayek: meetei_mayek
|
443
471
|
mendekikakui: mende_kikakui
|
444
472
|
meroiticcursive: meroitic_cursive
|
@@ -452,6 +480,7 @@ mro: mro
|
|
452
480
|
multani: multani
|
453
481
|
myanmar: myanmar
|
454
482
|
nabataean: nabataean
|
483
|
+
nandinagari: nandinagari
|
455
484
|
newa: newa
|
456
485
|
newline: newline
|
457
486
|
newtailue: new_tai_lue
|
@@ -460,6 +489,7 @@ noncharactercodepoint: noncharacter_code_point
|
|
460
489
|
nonspacingmark: nonspacing_mark
|
461
490
|
number: number
|
462
491
|
nushu: nushu
|
492
|
+
nyiakengpuachuehmong: nyiakeng_puachue_hmong
|
463
493
|
ogham: ogham
|
464
494
|
olchiki: ol_chiki
|
465
495
|
oldhungarian: old_hungarian
|
@@ -467,6 +497,7 @@ olditalic: old_italic
|
|
467
497
|
oldnortharabian: old_north_arabian
|
468
498
|
oldpermic: old_permic
|
469
499
|
oldpersian: old_persian
|
500
|
+
oldsogdian: old_sogdian
|
470
501
|
oldsoutharabian: old_south_arabian
|
471
502
|
oldturkic: old_turkic
|
472
503
|
openpunctuation: open_punctuation
|
@@ -515,6 +546,7 @@ siddham: siddham
|
|
515
546
|
signwriting: signwriting
|
516
547
|
sinhala: sinhala
|
517
548
|
softdotted: soft_dotted
|
549
|
+
sogdian: sogdian
|
518
550
|
sorasompeng: sora_sompeng
|
519
551
|
soyombo: soyombo
|
520
552
|
space: space
|
@@ -550,6 +582,7 @@ uppercase: uppercase
|
|
550
582
|
uppercaseletter: uppercase_letter
|
551
583
|
vai: vai
|
552
584
|
variationselector: variation_selector
|
585
|
+
wancho: wancho
|
553
586
|
warangciti: warang_citi
|
554
587
|
whitespace: white_space
|
555
588
|
word: word
|
@@ -31,6 +31,7 @@ cher: cherokee
|
|
31
31
|
ci: case_ignorable
|
32
32
|
cn: unassigned
|
33
33
|
co: private_use
|
34
|
+
combiningmark: mark
|
34
35
|
copt: coptic
|
35
36
|
cprt: cypriot
|
36
37
|
cs: surrogate
|
@@ -44,14 +45,17 @@ dep: deprecated
|
|
44
45
|
deva: devanagari
|
45
46
|
di: default_ignorable_code_point
|
46
47
|
dia: diacritic
|
48
|
+
dogr: dogra
|
47
49
|
dsrt: deseret
|
48
50
|
dupl: duployan
|
49
51
|
egyp: egyptian_hieroglyphs
|
50
52
|
elba: elbasan
|
53
|
+
elym: elymaic
|
51
54
|
ethi: ethiopic
|
52
55
|
ext: extender
|
53
56
|
geor: georgian
|
54
57
|
glag: glagolitic
|
58
|
+
gong: gunjala_gondi
|
55
59
|
gonm: masaram_gondi
|
56
60
|
goth: gothic
|
57
61
|
gran: grantha
|
@@ -70,6 +74,7 @@ hex: hex_digit
|
|
70
74
|
hira: hiragana
|
71
75
|
hluw: anatolian_hieroglyphs
|
72
76
|
hmng: pahawh_hmong
|
77
|
+
hmnp: nyiakeng_puachue_hmong
|
73
78
|
hung: old_hungarian
|
74
79
|
idc: id_continue
|
75
80
|
ideo: ideographic
|
@@ -105,11 +110,13 @@ lyci: lycian
|
|
105
110
|
lydi: lydian
|
106
111
|
m: mark
|
107
112
|
mahj: mahajani
|
113
|
+
maka: makasar
|
108
114
|
mand: mandaic
|
109
115
|
mani: manichaean
|
110
116
|
marc: marchen
|
111
117
|
mc: spacing_mark
|
112
118
|
me: enclosing_mark
|
119
|
+
medf: medefaidrin
|
113
120
|
mend: mende_kikakui
|
114
121
|
merc: meroitic_cursive
|
115
122
|
mero: meroitic_hieroglyphs
|
@@ -121,6 +128,7 @@ mtei: meetei_mayek
|
|
121
128
|
mult: multani
|
122
129
|
mymr: myanmar
|
123
130
|
n: number
|
131
|
+
nand: nandinagari
|
124
132
|
narb: old_north_arabian
|
125
133
|
nbat: nabataean
|
126
134
|
nchar: noncharacter_code_point
|
@@ -168,6 +176,7 @@ qaai: inherited
|
|
168
176
|
qmark: quotation_mark
|
169
177
|
ri: regional_indicator
|
170
178
|
rjng: rejang
|
179
|
+
rohg: hanifi_rohingya
|
171
180
|
runr: runic
|
172
181
|
s: symbol
|
173
182
|
samr: samaritan
|
@@ -184,6 +193,8 @@ sinh: sinhala
|
|
184
193
|
sk: modifier_symbol
|
185
194
|
sm: math_symbol
|
186
195
|
so: other_symbol
|
196
|
+
sogd: sogdian
|
197
|
+
sogo: old_sogdian
|
187
198
|
sora: sora_sompeng
|
188
199
|
soyo: soyombo
|
189
200
|
sterm: sentence_terminal
|
@@ -209,6 +220,7 @@ uideo: unified_ideograph
|
|
209
220
|
vaii: vai
|
210
221
|
vs: variation_selector
|
211
222
|
wara: warang_citi
|
223
|
+
wcho: wancho
|
212
224
|
wspace: white_space
|
213
225
|
xidc: xid_continue
|
214
226
|
xids: xid_start
|
@@ -49,9 +49,9 @@
|
|
49
49
|
codepoint_list = 'u{' . xdigit{1,6} . (space . xdigit{1,6})* . '}';
|
50
50
|
codepoint_sequence = codepoint_single | codepoint_list;
|
51
51
|
|
52
|
-
control_sequence = ('c' | 'C-') . (backslash . 'M-')
|
52
|
+
control_sequence = ('c' | 'C-') . (backslash . 'M-')? . backslash? . any;
|
53
53
|
|
54
|
-
meta_sequence = 'M-' . (backslash .
|
54
|
+
meta_sequence = 'M-' . (backslash . ('c' | 'C-'))? . backslash? . any;
|
55
55
|
|
56
56
|
zero_or_one = '?' | '??' | '?+';
|
57
57
|
zero_or_more = '*' | '*?' | '*+';
|
@@ -62,13 +62,17 @@
|
|
62
62
|
quantifier_possessive = '?+' | '*+' | '++';
|
63
63
|
quantifier_mode = '?' | '+';
|
64
64
|
|
65
|
-
|
66
|
-
|
65
|
+
quantity_exact = (digit+);
|
66
|
+
quantity_minimum = (digit+) . ',';
|
67
|
+
quantity_maximum = ',' . (digit+);
|
68
|
+
quantity_range = (digit+) . ',' . (digit+);
|
69
|
+
quantifier_interval = range_open . ( quantity_exact | quantity_minimum |
|
70
|
+
quantity_maximum | quantity_range ) . range_close .
|
71
|
+
quantifier_mode?;
|
67
72
|
|
68
73
|
quantifiers = quantifier_greedy | quantifier_reluctant |
|
69
74
|
quantifier_possessive | quantifier_interval;
|
70
75
|
|
71
|
-
|
72
76
|
conditional = '(?(';
|
73
77
|
|
74
78
|
group_comment = '?#' . [^)]* . group_close;
|
@@ -82,7 +86,8 @@
|
|
82
86
|
assertion_lookbehind = '?<=';
|
83
87
|
assertion_nlookbehind = '?<!';
|
84
88
|
|
85
|
-
|
89
|
+
# try to treat every other group head as options group, like Ruby
|
90
|
+
group_options = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
|
86
91
|
|
87
92
|
group_ref = [gk];
|
88
93
|
group_name_char = (alnum | '_');
|
@@ -113,6 +118,8 @@
|
|
113
118
|
curlies | parantheses | brackets |
|
114
119
|
line_anchor | quantifier_greedy;
|
115
120
|
|
121
|
+
literal_delimiters = ']' | '}';
|
122
|
+
|
116
123
|
ascii_print = ((0x20..0x7e) - meta_char);
|
117
124
|
ascii_nonprint = (0x01..0x1f | 0x7f);
|
118
125
|
|
@@ -135,41 +142,35 @@
|
|
135
142
|
# Invalid sequence error, used from sequences, like escapes and sets
|
136
143
|
action invalid_sequence_error {
|
137
144
|
text = ts ? copy(data, ts-1..-1) : data.pack('c*')
|
138
|
-
|
145
|
+
validation_error(:sequence, 'sequence', text)
|
139
146
|
}
|
140
147
|
|
141
148
|
# group (nesting) and set open/close actions
|
142
|
-
action group_opened { self.group_depth = group_depth + 1
|
143
|
-
action group_closed { self.group_depth = group_depth - 1
|
149
|
+
action group_opened { self.group_depth = group_depth + 1 }
|
150
|
+
action group_closed { self.group_depth = group_depth - 1 }
|
151
|
+
action set_opened { self.set_depth = set_depth + 1 }
|
152
|
+
action set_closed { self.set_depth = set_depth - 1 }
|
144
153
|
|
145
154
|
# Character set scanner, continues consuming characters until it meets the
|
146
155
|
# closing bracket of the set.
|
147
156
|
# --------------------------------------------------------------------------
|
148
157
|
character_set := |*
|
149
|
-
set_close > (set_meta, 2) {
|
150
|
-
set_depth -= 1
|
151
|
-
in_set = set_depth > 0 ? true : false
|
152
|
-
|
158
|
+
set_close > (set_meta, 2) @set_closed {
|
153
159
|
emit(:set, :close, *text(data, ts, te))
|
154
|
-
|
155
|
-
if set_depth == 0
|
156
|
-
fgoto main;
|
157
|
-
else
|
160
|
+
if in_set?
|
158
161
|
fret;
|
162
|
+
else
|
163
|
+
fgoto main;
|
159
164
|
end
|
160
165
|
};
|
161
166
|
|
162
|
-
'-]' { # special case, emits two tokens
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
emit(:literal, :literal, copy(data, ts..te-2), ts, te)
|
167
|
-
emit(:set, :close, copy(data, ts+1..te-1), ts, te)
|
168
|
-
|
169
|
-
if set_depth == 0
|
170
|
-
fgoto main;
|
171
|
-
else
|
167
|
+
'-]' @set_closed { # special case, emits two tokens
|
168
|
+
emit(:literal, :literal, copy(data, ts..te-2), ts, te - 1)
|
169
|
+
emit(:set, :close, copy(data, ts+1..te-1), ts + 1, te)
|
170
|
+
if in_set?
|
172
171
|
fret;
|
172
|
+
else
|
173
|
+
fgoto main;
|
173
174
|
end
|
174
175
|
};
|
175
176
|
|
@@ -207,14 +208,12 @@
|
|
207
208
|
fcall set_escape_sequence;
|
208
209
|
};
|
209
210
|
|
210
|
-
set_open >(open_bracket, 1) {
|
211
|
-
set_depth += 1
|
212
|
-
|
211
|
+
set_open >(open_bracket, 1) >set_opened {
|
213
212
|
emit(:set, :open, *text(data, ts, te))
|
214
213
|
fcall character_set;
|
215
214
|
};
|
216
215
|
|
217
|
-
class_posix >(open_bracket, 1) @eof(premature_end_error)
|
216
|
+
class_posix >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
218
217
|
text = text(data, ts, te).first
|
219
218
|
|
220
219
|
type = :posixclass
|
@@ -227,11 +226,11 @@
|
|
227
226
|
emit(type, class_name.to_sym, text, ts, te)
|
228
227
|
};
|
229
228
|
|
230
|
-
collating_sequence >(open_bracket, 1) @eof(premature_end_error)
|
229
|
+
collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
231
230
|
emit(:set, :collation, *text(data, ts, te))
|
232
231
|
};
|
233
232
|
|
234
|
-
character_equivalent >(open_bracket, 1) @eof(premature_end_error)
|
233
|
+
character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
235
234
|
emit(:set, :equivalent, *text(data, ts, te))
|
236
235
|
};
|
237
236
|
|
@@ -337,44 +336,24 @@
|
|
337
336
|
};
|
338
337
|
|
339
338
|
control_sequence >(escaped_alpha, 4) $eof(premature_end_error) {
|
340
|
-
|
341
|
-
c = data[te].chr
|
342
|
-
if c =~ /[\x00-\x7F]/
|
343
|
-
emit(:escape, :control, copy(data, ts-1..te), ts-1, te+1)
|
344
|
-
p += 1
|
345
|
-
else
|
346
|
-
raise InvalidSequenceError.new("control sequence")
|
347
|
-
end
|
348
|
-
else
|
349
|
-
raise PrematureEndError.new("control sequence")
|
350
|
-
end
|
339
|
+
emit_meta_control_sequence(data, ts, te, :control)
|
351
340
|
fret;
|
352
341
|
};
|
353
342
|
|
354
343
|
meta_sequence >(backslashed, 3) $eof(premature_end_error) {
|
355
|
-
|
356
|
-
c = data[te].chr
|
357
|
-
if c =~ /[\x00-\x7F]/
|
358
|
-
emit(:escape, :meta_sequence, copy(data, ts-1..te), ts-1, te+1)
|
359
|
-
p += 1
|
360
|
-
else
|
361
|
-
raise InvalidSequenceError.new("meta sequence")
|
362
|
-
end
|
363
|
-
else
|
364
|
-
raise PrematureEndError.new("meta sequence")
|
365
|
-
end
|
344
|
+
emit_meta_control_sequence(data, ts, te, :meta_sequence)
|
366
345
|
fret;
|
367
346
|
};
|
368
347
|
|
369
348
|
char_type_char > (escaped_alpha, 2) {
|
370
349
|
fhold;
|
371
|
-
fnext *(in_set ? fentry(character_set) : fentry(main));
|
350
|
+
fnext *(in_set? ? fentry(character_set) : fentry(main));
|
372
351
|
fcall char_type;
|
373
352
|
};
|
374
353
|
|
375
354
|
property_char > (escaped_alpha, 2) {
|
376
355
|
fhold;
|
377
|
-
fnext *(in_set ? fentry(character_set) : fentry(main));
|
356
|
+
fnext *(in_set? ? fentry(character_set) : fentry(main));
|
378
357
|
fcall unicode_property;
|
379
358
|
};
|
380
359
|
|
@@ -412,8 +391,7 @@
|
|
412
391
|
};
|
413
392
|
|
414
393
|
alternation {
|
415
|
-
if
|
416
|
-
conditional_stack.last[1] == group_depth
|
394
|
+
if conditional_stack.last == group_depth
|
417
395
|
emit(:conditional, :separator, *text(data, ts, te))
|
418
396
|
else
|
419
397
|
emit(:meta, :alternation, *text(data, ts, te))
|
@@ -442,18 +420,16 @@
|
|
442
420
|
when '\\b'; emit(:anchor, :word_boundary, text, ts, te)
|
443
421
|
when '\\B'; emit(:anchor, :nonword_boundary, text, ts, te)
|
444
422
|
when '\\G'; emit(:anchor, :match_start, text, ts, te)
|
445
|
-
else
|
446
|
-
raise ScannerError.new(
|
447
|
-
"Unexpected character in anchor at #{text} (char #{ts})")
|
448
423
|
end
|
449
424
|
};
|
450
425
|
|
426
|
+
literal_delimiters {
|
427
|
+
append_literal(data, ts, te)
|
428
|
+
};
|
429
|
+
|
451
430
|
# Character sets
|
452
431
|
# ------------------------------------------------------------------------
|
453
|
-
set_open {
|
454
|
-
set_depth += 1
|
455
|
-
in_set = true
|
456
|
-
|
432
|
+
set_open >set_opened {
|
457
433
|
emit(:set, :open, *text(data, ts, te))
|
458
434
|
fcall character_set;
|
459
435
|
};
|
@@ -465,9 +441,7 @@
|
|
465
441
|
conditional {
|
466
442
|
text = text(data, ts, te).first
|
467
443
|
|
468
|
-
|
469
|
-
conditional_depth += 1
|
470
|
-
conditional_stack << [conditional_depth, group_depth]
|
444
|
+
conditional_stack << group_depth
|
471
445
|
|
472
446
|
emit(:conditional, :open, text[0..-2], ts, te-1)
|
473
447
|
emit(:conditional, :condition_open, '(', te-1, te)
|
@@ -496,7 +470,11 @@
|
|
496
470
|
# (?imxdau-imx:subexp) option on/off for subexp
|
497
471
|
# ------------------------------------------------------------------------
|
498
472
|
group_open . group_options >group_opened {
|
499
|
-
|
473
|
+
text = text(data, ts, te).first
|
474
|
+
if text[2..-1] =~ /([^\-mixdau:]|^$)|-.*([dau])/
|
475
|
+
raise InvalidGroupOption.new($1 || "-#{$2}", text)
|
476
|
+
end
|
477
|
+
emit_options(text, ts, te)
|
500
478
|
};
|
501
479
|
|
502
480
|
# Assertions
|
@@ -528,19 +506,15 @@
|
|
528
506
|
when '(?>'; emit(:group, :atomic, text, ts, te)
|
529
507
|
when '(?~'; emit(:group, :absence, text, ts, te)
|
530
508
|
|
531
|
-
when /^\(
|
532
|
-
|
509
|
+
when /^\(\?(?:<>|'')/
|
510
|
+
validation_error(:group, 'named group', 'name is empty')
|
533
511
|
|
512
|
+
when /^\(\?<\w*>/
|
534
513
|
emit(:group, :named_ab, text, ts, te)
|
535
514
|
|
536
|
-
when /^\(\?'
|
537
|
-
empty_name_error(:group, 'named group (sq)') if $1.empty?
|
538
|
-
|
515
|
+
when /^\(\?'\w*'/
|
539
516
|
emit(:group, :named_sq, text, ts, te)
|
540
517
|
|
541
|
-
else
|
542
|
-
raise ScannerError.new(
|
543
|
-
"Unknown subexpression group format '#{text}'")
|
544
518
|
end
|
545
519
|
};
|
546
520
|
|
@@ -550,20 +524,13 @@
|
|
550
524
|
};
|
551
525
|
|
552
526
|
group_close @group_closed {
|
553
|
-
if
|
554
|
-
conditional_stack.last[1] == (group_depth + 1)
|
555
|
-
|
556
|
-
emit(:conditional, :close, *text(data, ts, te))
|
527
|
+
if conditional_stack.last == group_depth + 1
|
557
528
|
conditional_stack.pop
|
558
|
-
|
559
|
-
if conditional_stack.length == 0
|
560
|
-
in_conditional = false
|
561
|
-
end
|
529
|
+
emit(:conditional, :close, *text(data, ts, te))
|
562
530
|
else
|
563
|
-
if spacing_stack.length > 1
|
564
|
-
|
531
|
+
if spacing_stack.length > 1 &&
|
532
|
+
spacing_stack.last[:depth] == group_depth + 1
|
565
533
|
spacing_stack.pop
|
566
|
-
|
567
534
|
self.free_spacing = spacing_stack.last[:free_spacing]
|
568
535
|
end
|
569
536
|
|
@@ -576,11 +543,8 @@
|
|
576
543
|
# ------------------------------------------------------------------------
|
577
544
|
backslash . (group_name_ref | group_number_ref) > (backslashed, 4) {
|
578
545
|
case text = text(data, ts, te).first
|
579
|
-
when /^\\([gk])
|
580
|
-
|
581
|
-
|
582
|
-
when /^\\([gk])''/ # single quotes
|
583
|
-
empty_backref_error("ref/call (sq)")
|
546
|
+
when /^\\([gk])(<>|'')/ # angle brackets
|
547
|
+
validation_error(:backref, 'ref/call', 'ref ID is empty')
|
584
548
|
|
585
549
|
when /^\\([gk])<[^\d+-]\w*>/ # angle-brackets
|
586
550
|
if $1 == 'k'
|
@@ -636,9 +600,6 @@
|
|
636
600
|
when /^\\([gk])'[+\-]?\d+[+\-]\d+'/ # single-quotes
|
637
601
|
emit(:backref, :number_recursion_ref_sq, text, ts, te)
|
638
602
|
|
639
|
-
else
|
640
|
-
raise ScannerError.new(
|
641
|
-
"Unknown backreference format '#{text}'")
|
642
603
|
end
|
643
604
|
};
|
644
605
|
|
@@ -669,10 +630,15 @@
|
|
669
630
|
end
|
670
631
|
};
|
671
632
|
|
672
|
-
quantifier_interval
|
633
|
+
quantifier_interval {
|
673
634
|
emit(:quantifier, :interval, *text(data, ts, te))
|
674
635
|
};
|
675
636
|
|
637
|
+
# Catch unmatched curly braces as literals
|
638
|
+
range_open {
|
639
|
+
append_literal(data, ts, te)
|
640
|
+
};
|
641
|
+
|
676
642
|
# Escaped sequences
|
677
643
|
# ------------------------------------------------------------------------
|
678
644
|
backslash > (backslashed, 1) {
|
@@ -786,7 +752,7 @@ class Regexp::Scanner
|
|
786
752
|
input = input_object
|
787
753
|
self.free_spacing = false
|
788
754
|
end
|
789
|
-
|
755
|
+
self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
|
790
756
|
|
791
757
|
data = input.unpack("c*") if input.is_a?(String)
|
792
758
|
eof = data.length
|
@@ -794,15 +760,9 @@ class Regexp::Scanner
|
|
794
760
|
self.tokens = []
|
795
761
|
self.block = block_given? ? block : nil
|
796
762
|
|
797
|
-
self.
|
763
|
+
self.set_depth = 0
|
798
764
|
self.group_depth = 0
|
799
|
-
self.
|
800
|
-
|
801
|
-
in_set = false
|
802
|
-
set_depth = 0
|
803
|
-
in_conditional = false
|
804
|
-
conditional_depth = 0
|
805
|
-
conditional_stack = []
|
765
|
+
self.conditional_stack = []
|
806
766
|
|
807
767
|
%% write data;
|
808
768
|
%% write init;
|
@@ -817,9 +777,9 @@ class Regexp::Scanner
|
|
817
777
|
end
|
818
778
|
|
819
779
|
raise PrematureEndError.new("(missing group closing paranthesis) "+
|
820
|
-
"[#{
|
780
|
+
"[#{group_depth}]") if in_group?
|
821
781
|
raise PrematureEndError.new("(missing set closing bracket) "+
|
822
|
-
"[#{
|
782
|
+
"[#{set_depth}]") if in_set?
|
823
783
|
|
824
784
|
# when the entire expression is a literal run
|
825
785
|
emit_literal if literal
|
@@ -854,62 +814,15 @@ class Regexp::Scanner
|
|
854
814
|
|
855
815
|
private
|
856
816
|
|
857
|
-
attr_accessor :tokens, :literal, :block,
|
858
|
-
:
|
859
|
-
:free_spacing, :spacing_stack
|
860
|
-
|
861
|
-
# Ragel's regex-based scan of the group options introduced a lot of
|
862
|
-
# ambiguity, so we just ask it to find the beginning of what looks
|
863
|
-
# like an options run and handle the rest in here.
|
864
|
-
def scan_options(p, data, ts, te)
|
865
|
-
text = text(data, ts, te).first
|
817
|
+
attr_accessor :tokens, :literal, :block, :free_spacing, :spacing_stack,
|
818
|
+
:group_depth, :set_depth, :conditional_stack
|
866
819
|
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
# as ruby allows things like '(?xxxxxxxxx-xxxxxxxxxxxxx:abc)'.
|
871
|
-
negative_options = false
|
872
|
-
while options_char
|
873
|
-
if data[te + options_length]
|
874
|
-
c = data[te + options_length].chr
|
875
|
-
|
876
|
-
if c =~ /[-mixdau]/
|
877
|
-
negative_options = true if c == '-'
|
878
|
-
|
879
|
-
raise InvalidGroupOption.new(c, text) if negative_options and
|
880
|
-
c =~ /[dau]/
|
881
|
-
|
882
|
-
text << c ; p += 1 ; options_length += 1
|
883
|
-
else
|
884
|
-
options_char = false
|
885
|
-
end
|
886
|
-
else
|
887
|
-
raise PrematureEndError.new("expression options `#{text}'")
|
888
|
-
end
|
889
|
-
end
|
890
|
-
|
891
|
-
if data[te + options_length]
|
892
|
-
c = data[te + options_length].chr
|
893
|
-
|
894
|
-
if c == ':'
|
895
|
-
# Include the ':' in the options text
|
896
|
-
text << c ; p += 1 ; options_length += 1
|
897
|
-
emit_options(text, ts, te + options_length)
|
898
|
-
|
899
|
-
elsif c == ')'
|
900
|
-
# Don't include the closing ')', let group_close handle it.
|
901
|
-
emit_options(text, ts, te + options_length)
|
902
|
-
|
903
|
-
else
|
904
|
-
# Plain Regexp reports this as 'undefined group option'
|
905
|
-
raise ScannerError.new(
|
906
|
-
"Unexpected `#{c}' in options sequence, ':' or ')' expected")
|
907
|
-
end
|
908
|
-
else
|
909
|
-
raise PrematureEndError.new("expression options `#{text}'")
|
910
|
-
end
|
820
|
+
def in_group?
|
821
|
+
group_depth > 0
|
822
|
+
end
|
911
823
|
|
912
|
-
|
824
|
+
def in_set?
|
825
|
+
set_depth > 0
|
913
826
|
end
|
914
827
|
|
915
828
|
# Copy from ts to te from data as text
|
@@ -945,32 +858,39 @@ class Regexp::Scanner
|
|
945
858
|
def emit_options(text, ts, te)
|
946
859
|
token = nil
|
947
860
|
|
948
|
-
|
949
|
-
|
861
|
+
# Ruby allows things like '(?-xxxx)' or '(?xx-xx--xx-:abc)'.
|
862
|
+
text =~ /\(\?([mixdau]*)(-(?:[mix]*))*(:)?/
|
863
|
+
positive, negative, group_local = $1, $2, $3
|
950
864
|
|
951
|
-
|
952
|
-
|
953
|
-
|
865
|
+
if positive.include?('x')
|
866
|
+
self.free_spacing = true
|
867
|
+
end
|
954
868
|
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
869
|
+
# If the x appears in both, treat it like ruby does, the second cancels
|
870
|
+
# the first.
|
871
|
+
if negative && negative.include?('x')
|
872
|
+
self.free_spacing = false
|
873
|
+
end
|
960
874
|
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
end
|
875
|
+
if group_local
|
876
|
+
spacing_stack << {:free_spacing => free_spacing, :depth => group_depth}
|
877
|
+
token = :options
|
878
|
+
else
|
879
|
+
# switch for parent group level
|
880
|
+
spacing_stack.last[:free_spacing] = free_spacing
|
881
|
+
token = :options_switch
|
969
882
|
end
|
970
883
|
|
971
884
|
emit(:group, token, text, ts, te)
|
972
885
|
end
|
973
886
|
|
887
|
+
def emit_meta_control_sequence(data, ts, te, token)
|
888
|
+
if data.last < 0x00 || data.last > 0x7F
|
889
|
+
validation_error(:sequence, 'escape', token.to_s)
|
890
|
+
end
|
891
|
+
emit(:escape, token, *text(data, ts, te, 1))
|
892
|
+
end
|
893
|
+
|
974
894
|
# Centralizes and unifies the handling of validation related
|
975
895
|
# errors.
|
976
896
|
def validation_error(type, what, reason)
|
@@ -981,21 +901,8 @@ class Regexp::Scanner
|
|
981
901
|
error = InvalidBackrefError.new(what, reason)
|
982
902
|
when :sequence
|
983
903
|
error = InvalidSequenceError.new(what, reason)
|
984
|
-
else
|
985
|
-
error = ValidationError.new('expression')
|
986
904
|
end
|
987
905
|
|
988
906
|
raise error # unless @@config.validation_ignore
|
989
907
|
end
|
990
|
-
|
991
|
-
# Used for references with an empty name or number
|
992
|
-
def empty_backref_error(type, what)
|
993
|
-
validation_error(:backref, what, 'ref ID is empty')
|
994
|
-
end
|
995
|
-
|
996
|
-
# Used for named expressions with an empty name
|
997
|
-
def empty_name_error(type, what)
|
998
|
-
validation_error(type, what, 'name is empty')
|
999
|
-
end
|
1000
|
-
|
1001
908
|
end # module Regexp::Scanner
|