regexp_parser 2.11.0 → 2.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +3 -3
- data/lib/regexp_parser/expression/shared.rb +4 -3
- data/lib/regexp_parser/scanner/properties/long.csv +19 -0
- data/lib/regexp_parser/scanner/properties/short.csv +8 -0
- data/lib/regexp_parser/scanner/scanner.rl +6 -5
- data/lib/regexp_parser/scanner.rb +7 -6
- data/lib/regexp_parser/syntax/token/unicode_property.rb +13 -0
- data/lib/regexp_parser/syntax/versions/3.5.0.rb +4 -0
- data/lib/regexp_parser/version.rb +1 -1
- metadata +4 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d7598b7311a82778cbcb493188dad178ce93c8478e420cd9e2382732ee90d4e1
|
4
|
+
data.tar.gz: 60a8399981030bdef025cf9657e043a5ccac93adeee62a589a8adb41ec460664
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7ac06fda5f76d4497b8f01d1e724917d009f7c9ea10befcf03a801af8e769b52619433a22cc997cf584b03e1ca9e6ced257f5fc07e327c966f5c25714d2d0b4
|
7
|
+
data.tar.gz: 3d3f89a383bb63208a41801ea059bfc407ff2e88d657d23b0f13740d418335ad47c9f5174bc1d5b7f06841d7a461828c57efa1f97f8bc1b9b42e255959bd18cf
|
data/Rakefile
CHANGED
@@ -14,10 +14,10 @@ RSpec::Core::RakeTask.new(:spec)
|
|
14
14
|
task :default => [:'test:full']
|
15
15
|
|
16
16
|
namespace :test do
|
17
|
-
task full: [:
|
17
|
+
task full: [:ragel, :spec]
|
18
18
|
end
|
19
19
|
|
20
20
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
21
21
|
# latest scanner code is generated and included in the build.
|
22
|
-
desc "Runs ragel
|
23
|
-
task :
|
22
|
+
desc "Runs ragel before building the gem"
|
23
|
+
task build: :ragel
|
@@ -70,11 +70,12 @@ module Regexp::Expression
|
|
70
70
|
# lit.to_s(:original) # => 'a +' # with quantifier AND intermittent decorations
|
71
71
|
#
|
72
72
|
def to_s(format = :full)
|
73
|
-
base =
|
73
|
+
base = ''.dup
|
74
|
+
parts.each do |part|
|
74
75
|
if part.instance_of?(String)
|
75
|
-
|
76
|
+
base << part
|
76
77
|
elsif !part.custom_to_s_handling
|
77
|
-
|
78
|
+
base << part.to_s(:original)
|
78
79
|
end
|
79
80
|
end
|
80
81
|
"#{base}#{pre_quantifier_decoration(format)}#{quantifier_affix(format)}"
|
@@ -9,6 +9,7 @@ age=13.0,age=13.0
|
|
9
9
|
age=14.0,age=14.0
|
10
10
|
age=15.0,age=15.0
|
11
11
|
age=15.1,age=15.1
|
12
|
+
age=16.0,age=16.0
|
12
13
|
age=2.0,age=2.0
|
13
14
|
age=2.1,age=2.1
|
14
15
|
age=3.0,age=3.0
|
@@ -103,6 +104,7 @@ extendedpictographic,extended_pictographic
|
|
103
104
|
extender,extender
|
104
105
|
finalpunctuation,final_punctuation
|
105
106
|
format,format
|
107
|
+
garay,garay
|
106
108
|
georgian,georgian
|
107
109
|
glagolitic,glagolitic
|
108
110
|
gothic,gothic
|
@@ -128,6 +130,7 @@ greek,greek
|
|
128
130
|
gujarati,gujarati
|
129
131
|
gunjalagondi,gunjala_gondi
|
130
132
|
gurmukhi,gurmukhi
|
133
|
+
gurungkhema,gurung_khema
|
131
134
|
han,han
|
132
135
|
hangul,hangul
|
133
136
|
hanifirohingya,hanifi_rohingya
|
@@ -241,6 +244,7 @@ induployan,in_duployan
|
|
241
244
|
inearlydynasticcuneiform,in_early_dynastic_cuneiform
|
242
245
|
inegyptianhieroglyphformatcontrols,in_egyptian_hieroglyph_format_controls
|
243
246
|
inegyptianhieroglyphs,in_egyptian_hieroglyphs
|
247
|
+
inegyptianhieroglyphsextendeda,in_egyptian_hieroglyphs_extended_a
|
244
248
|
inelbasan,in_elbasan
|
245
249
|
inelymaic,in_elymaic
|
246
250
|
inemoticons,in_emoticons
|
@@ -253,6 +257,7 @@ inethiopicextended,in_ethiopic_extended
|
|
253
257
|
inethiopicextendeda,in_ethiopic_extended_a
|
254
258
|
inethiopicextendedb,in_ethiopic_extended_b
|
255
259
|
inethiopicsupplement,in_ethiopic_supplement
|
260
|
+
ingaray,in_garay
|
256
261
|
ingeneralpunctuation,in_general_punctuation
|
257
262
|
ingeometricshapes,in_geometric_shapes
|
258
263
|
ingeometricshapesextended,in_geometric_shapes_extended
|
@@ -268,6 +273,7 @@ ingreekextended,in_greek_extended
|
|
268
273
|
ingujarati,in_gujarati
|
269
274
|
ingunjalagondi,in_gunjala_gondi
|
270
275
|
ingurmukhi,in_gurmukhi
|
276
|
+
ingurungkhema,in_gurung_khema
|
271
277
|
inhalfwidthandfullwidthforms,in_halfwidth_and_fullwidth_forms
|
272
278
|
inhangulcompatibilityjamo,in_hangul_compatibility_jamo
|
273
279
|
inhanguljamo,in_hangul_jamo
|
@@ -309,6 +315,7 @@ inkhmer,in_khmer
|
|
309
315
|
inkhmersymbols,in_khmer_symbols
|
310
316
|
inkhojki,in_khojki
|
311
317
|
inkhudawadi,in_khudawadi
|
318
|
+
inkiratrai,in_kirat_rai
|
312
319
|
inlao,in_lao
|
313
320
|
inlatin1supplement,in_latin_1_supplement
|
314
321
|
inlatinextendeda,in_latin_extended_a
|
@@ -364,6 +371,7 @@ inmusicalsymbols,in_musical_symbols
|
|
364
371
|
inmyanmar,in_myanmar
|
365
372
|
inmyanmarextendeda,in_myanmar_extended_a
|
366
373
|
inmyanmarextendedb,in_myanmar_extended_b
|
374
|
+
inmyanmarextendedc,in_myanmar_extended_c
|
367
375
|
innabataean,in_nabataean
|
368
376
|
innagmundari,in_nag_mundari
|
369
377
|
innandinagari,in_nandinagari
|
@@ -385,6 +393,7 @@ inoldsogdian,in_old_sogdian
|
|
385
393
|
inoldsoutharabian,in_old_south_arabian
|
386
394
|
inoldturkic,in_old_turkic
|
387
395
|
inolduyghur,in_old_uyghur
|
396
|
+
inolonal,in_ol_onal
|
388
397
|
inopticalcharacterrecognition,in_optical_character_recognition
|
389
398
|
inoriya,in_oriya
|
390
399
|
inornamentaldingbats,in_ornamental_dingbats
|
@@ -424,6 +433,7 @@ inspacingmodifierletters,in_spacing_modifier_letters
|
|
424
433
|
inspecials,in_specials
|
425
434
|
insundanese,in_sundanese
|
426
435
|
insundanesesupplement,in_sundanese_supplement
|
436
|
+
insunuwar,in_sunuwar
|
427
437
|
insuperscriptsandsubscripts,in_superscripts_and_subscripts
|
428
438
|
insupplementalarrowsa,in_supplemental_arrows_a
|
429
439
|
insupplementalarrowsb,in_supplemental_arrows_b
|
@@ -437,6 +447,7 @@ insuttonsignwriting,in_sutton_signwriting
|
|
437
447
|
insylotinagri,in_syloti_nagri
|
438
448
|
insymbolsandpictographsextendeda,in_symbols_and_pictographs_extended_a
|
439
449
|
insymbolsforlegacycomputing,in_symbols_for_legacy_computing
|
450
|
+
insymbolsforlegacycomputingsupplement,in_symbols_for_legacy_computing_supplement
|
440
451
|
insyriac,in_syriac
|
441
452
|
insyriacsupplement,in_syriac_supplement
|
442
453
|
intagalog,in_tagalog
|
@@ -459,8 +470,10 @@ inthai,in_thai
|
|
459
470
|
intibetan,in_tibetan
|
460
471
|
intifinagh,in_tifinagh
|
461
472
|
intirhuta,in_tirhuta
|
473
|
+
intodhri,in_todhri
|
462
474
|
intoto,in_toto
|
463
475
|
intransportandmapsymbols,in_transport_and_map_symbols
|
476
|
+
intulutigalari,in_tulu_tigalari
|
464
477
|
inugaritic,in_ugaritic
|
465
478
|
inunifiedcanadianaboriginalsyllabics,in_unified_canadian_aboriginal_syllabics
|
466
479
|
inunifiedcanadianaboriginalsyllabicsextended,in_unified_canadian_aboriginal_syllabics_extended
|
@@ -491,6 +504,7 @@ khitansmallscript,khitan_small_script
|
|
491
504
|
khmer,khmer
|
492
505
|
khojki,khojki
|
493
506
|
khudawadi,khudawadi
|
507
|
+
kiratrai,kirat_rai
|
494
508
|
lao,lao
|
495
509
|
latin,latin
|
496
510
|
lepcha,lepcha
|
@@ -524,6 +538,7 @@ meroiticcursive,meroitic_cursive
|
|
524
538
|
meroitichieroglyphs,meroitic_hieroglyphs
|
525
539
|
miao,miao
|
526
540
|
modi,modi
|
541
|
+
modifiercombiningmark,modifier_combining_mark
|
527
542
|
modifierletter,modifier_letter
|
528
543
|
modifiersymbol,modifier_symbol
|
529
544
|
mongolian,mongolian
|
@@ -553,6 +568,7 @@ oldsogdian,old_sogdian
|
|
553
568
|
oldsoutharabian,old_south_arabian
|
554
569
|
oldturkic,old_turkic
|
555
570
|
olduyghur,old_uyghur
|
571
|
+
olonal,ol_onal
|
556
572
|
openpunctuation,open_punctuation
|
557
573
|
oriya,oriya
|
558
574
|
osage,osage
|
@@ -606,6 +622,7 @@ space,space
|
|
606
622
|
spaceseparator,space_separator
|
607
623
|
spacingmark,spacing_mark
|
608
624
|
sundanese,sundanese
|
625
|
+
sunuwar,sunuwar
|
609
626
|
surrogate,surrogate
|
610
627
|
sylotinagri,syloti_nagri
|
611
628
|
symbol,symbol
|
@@ -627,7 +644,9 @@ tibetan,tibetan
|
|
627
644
|
tifinagh,tifinagh
|
628
645
|
tirhuta,tirhuta
|
629
646
|
titlecaseletter,titlecase_letter
|
647
|
+
todhri,todhri
|
630
648
|
toto,toto
|
649
|
+
tulutigalari,tulu_tigalari
|
631
650
|
ugaritic,ugaritic
|
632
651
|
unassigned,unassigned
|
633
652
|
unifiedideograph,unified_ideograph
|
@@ -58,6 +58,7 @@ epres,emoji_presentation
|
|
58
58
|
ethi,ethiopic
|
59
59
|
ext,extender
|
60
60
|
extpict,extended_pictographic
|
61
|
+
gara,garay
|
61
62
|
geor,georgian
|
62
63
|
glag,glagolitic
|
63
64
|
gong,gunjala_gondi
|
@@ -69,6 +70,7 @@ grek,greek
|
|
69
70
|
grext,grapheme_extend
|
70
71
|
grlink,grapheme_link
|
71
72
|
gujr,gujarati
|
73
|
+
gukh,gurung_khema
|
72
74
|
guru,gurmukhi
|
73
75
|
hang,hangul
|
74
76
|
hani,han
|
@@ -97,6 +99,7 @@ khmr,khmer
|
|
97
99
|
khoj,khojki
|
98
100
|
kits,khitan_small_script
|
99
101
|
knda,kannada
|
102
|
+
krai,kirat_rai
|
100
103
|
kthi,kaithi
|
101
104
|
l,letter
|
102
105
|
lana,tai_tham
|
@@ -122,6 +125,7 @@ mand,mandaic
|
|
122
125
|
mani,manichaean
|
123
126
|
marc,marchen
|
124
127
|
mc,spacing_mark
|
128
|
+
mcm,modifier_combining_mark
|
125
129
|
me,enclosing_mark
|
126
130
|
medf,medefaidrin
|
127
131
|
mend,mende_kikakui
|
@@ -154,6 +158,7 @@ oids,other_id_start
|
|
154
158
|
olck,ol_chiki
|
155
159
|
olower,other_lowercase
|
156
160
|
omath,other_math
|
161
|
+
onao,ol_onal
|
157
162
|
orkh,old_turkic
|
158
163
|
orya,oriya
|
159
164
|
osge,osage
|
@@ -208,6 +213,7 @@ sora,sora_sompeng
|
|
208
213
|
soyo,soyombo
|
209
214
|
sterm,sentence_terminal
|
210
215
|
sund,sundanese
|
216
|
+
sunu,sunuwar
|
211
217
|
sylo,syloti_nagri
|
212
218
|
syrc,syriac
|
213
219
|
tagb,tagbanwa
|
@@ -225,6 +231,8 @@ thaa,thaana
|
|
225
231
|
tibt,tibetan
|
226
232
|
tirh,tirhuta
|
227
233
|
tnsa,tangsa
|
234
|
+
todr,todhri
|
235
|
+
tutg,tulu_tigalari
|
228
236
|
ugar,ugaritic
|
229
237
|
uideo,unified_ideograph
|
230
238
|
vaii,vai
|
@@ -263,7 +263,7 @@
|
|
263
263
|
# If not enough groups have been opened, there is a fallback to either an
|
264
264
|
# octal or literal interpretation for 2+ digit numerical escapes.
|
265
265
|
digits = text[1..-1]
|
266
|
-
if digits.size == 1 || digits.to_i <=
|
266
|
+
if digits.size == 1 || digits.to_i <= capturing_group_count
|
267
267
|
emit(:backref, :number, text)
|
268
268
|
elsif digits =~ /\A[0-7]{2,}\z/
|
269
269
|
emit(:escape, :octal, text)
|
@@ -537,7 +537,7 @@
|
|
537
537
|
};
|
538
538
|
|
539
539
|
group_open @group_opened {
|
540
|
-
self.capturing_group_count
|
540
|
+
self.capturing_group_count = capturing_group_count + 1
|
541
541
|
text = copy(data, ts, te)
|
542
542
|
emit(:group, :capture, text)
|
543
543
|
};
|
@@ -737,9 +737,11 @@ class Regexp::Scanner
|
|
737
737
|
File.read("#{__dir__}/scanner/properties/#{name}.csv").scan(/(.+),(.+)/).to_h
|
738
738
|
end
|
739
739
|
|
740
|
+
# Use each_with_object for required_ruby_version >= 2.2, or #to_h for >= 2.6
|
740
741
|
POSIX_CLASSES =
|
741
742
|
%w[alnum alpha ascii blank cntrl digit graph
|
742
|
-
lower print punct space upper word xdigit]
|
743
|
+
lower print punct space upper word xdigit]
|
744
|
+
.inject({}) { |o, e| o.merge(e => true) }.freeze
|
743
745
|
|
744
746
|
# Emits an array with the details of the scanned pattern
|
745
747
|
def emit(type, token, text)
|
@@ -767,7 +769,7 @@ class Regexp::Scanner
|
|
767
769
|
end
|
768
770
|
end
|
769
771
|
|
770
|
-
attr_accessor :literal_run # only public for #||= to work on ruby <= 2.5
|
772
|
+
attr_accessor :capturing_group_count, :literal_run # only public for #||= to work on ruby <= 2.5
|
771
773
|
|
772
774
|
private
|
773
775
|
|
@@ -776,7 +778,6 @@ class Regexp::Scanner
|
|
776
778
|
:free_spacing, :spacing_stack,
|
777
779
|
:regexp_encoding,
|
778
780
|
:group_depth, :set_depth, :conditional_stack,
|
779
|
-
:capturing_group_count,
|
780
781
|
:char_pos
|
781
782
|
|
782
783
|
def free_spacing?(input_object, options)
|
@@ -3,7 +3,7 @@
|
|
3
3
|
# THIS IS A GENERATED FILE, DO NOT EDIT DIRECTLY
|
4
4
|
#
|
5
5
|
# This file was generated from scanner.rl
|
6
|
-
# by running
|
6
|
+
# by running `$ bundle exec rake ragel`
|
7
7
|
|
8
8
|
|
9
9
|
require_relative 'scanner/errors/scanner_error'
|
@@ -1464,7 +1464,7 @@ p = p - 1; begin
|
|
1464
1464
|
# If not enough groups have been opened, there is a fallback to either an
|
1465
1465
|
# octal or literal interpretation for 2+ digit numerical escapes.
|
1466
1466
|
digits = text[1..-1]
|
1467
|
-
if digits.size == 1 || digits.to_i <=
|
1467
|
+
if digits.size == 1 || digits.to_i <= capturing_group_count
|
1468
1468
|
emit(:backref, :number, text)
|
1469
1469
|
elsif digits =~ /\A[0-7]{2,}\z/
|
1470
1470
|
emit(:escape, :octal, text)
|
@@ -1888,7 +1888,7 @@ p = p - 1; begin
|
|
1888
1888
|
begin
|
1889
1889
|
te = p
|
1890
1890
|
p = p - 1; begin
|
1891
|
-
self.capturing_group_count
|
1891
|
+
self.capturing_group_count = capturing_group_count + 1
|
1892
1892
|
text = copy(data, ts, te)
|
1893
1893
|
emit(:group, :capture, text)
|
1894
1894
|
end
|
@@ -2382,9 +2382,11 @@ end
|
|
2382
2382
|
File.read("#{__dir__}/scanner/properties/#{name}.csv").scan(/(.+),(.+)/).to_h
|
2383
2383
|
end
|
2384
2384
|
|
2385
|
+
# Use each_with_object for required_ruby_version >= 2.2,or #to_h for >= 2.6
|
2385
2386
|
POSIX_CLASSES =
|
2386
2387
|
%w[alnum alpha ascii blank cntrl digit graph
|
2387
|
-
lower print punct space upper word xdigit]
|
2388
|
+
lower print punct space upper word xdigit]
|
2389
|
+
.inject({}) { |o, e| o.merge(e => true) }.freeze
|
2388
2390
|
|
2389
2391
|
# Emits an array with the details of the scanned pattern
|
2390
2392
|
def emit(type, token, text)
|
@@ -2412,7 +2414,7 @@ end
|
|
2412
2414
|
end
|
2413
2415
|
end
|
2414
2416
|
|
2415
|
-
attr_accessor :literal_run # only public for #||= to work on ruby <= 2.5
|
2417
|
+
attr_accessor :capturing_group_count, :literal_run # only public for #||= to work on ruby <= 2.5
|
2416
2418
|
|
2417
2419
|
private
|
2418
2420
|
|
@@ -2421,7 +2423,6 @@ end
|
|
2421
2423
|
:free_spacing, :spacing_stack,
|
2422
2424
|
:regexp_encoding,
|
2423
2425
|
:group_depth, :set_depth, :conditional_stack,
|
2424
|
-
:capturing_group_count,
|
2425
2426
|
:char_pos
|
2426
2427
|
|
2427
2428
|
def free_spacing?(input_object, options)
|
@@ -61,6 +61,8 @@ module Regexp::Syntax
|
|
61
61
|
|
62
62
|
Age_V3_2_0 = %i[age=14.0 age=15.0]
|
63
63
|
|
64
|
+
Age_V3_5_0 = %i[age=15.1]
|
65
|
+
|
64
66
|
Age = all[:Age_V]
|
65
67
|
|
66
68
|
Derived_V1_9_0 = %i[
|
@@ -130,6 +132,12 @@ module Regexp::Syntax
|
|
130
132
|
regional_indicator
|
131
133
|
]
|
132
134
|
|
135
|
+
Derived_V3_5_0 = %i[
|
136
|
+
id_compat_math_continue
|
137
|
+
id_compat_math_start
|
138
|
+
ids_unary_operator
|
139
|
+
]
|
140
|
+
|
133
141
|
Derived = all[:Derived_V]
|
134
142
|
|
135
143
|
Script_V1_9_0 = %i[
|
@@ -689,6 +697,10 @@ module Regexp::Syntax
|
|
689
697
|
in_znamenny_musical_notation
|
690
698
|
]
|
691
699
|
|
700
|
+
UnicodeBlock_V3_5_0 = %i[
|
701
|
+
in_cjk_unified_ideographs_extension_i
|
702
|
+
]
|
703
|
+
|
692
704
|
UnicodeBlock = all[:UnicodeBlock_V]
|
693
705
|
|
694
706
|
Emoji_V2_5_0 = %i[
|
@@ -735,6 +747,7 @@ module Regexp::Syntax
|
|
735
747
|
V2_6_3 = all[:V2_6_3]
|
736
748
|
V3_1_0 = all[:V3_1_0]
|
737
749
|
V3_2_0 = all[:V3_2_0]
|
750
|
+
V3_5_0 = all[:V3_5_0]
|
738
751
|
|
739
752
|
All = all[/^V\d+_\d+_\d+$/]
|
740
753
|
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.11.
|
4
|
+
version: 2.11.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
8
8
|
- Janosch Müller
|
9
|
-
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
13
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
15
14
|
email:
|
@@ -106,6 +105,7 @@ files:
|
|
106
105
|
- lib/regexp_parser/syntax/versions/2.6.3.rb
|
107
106
|
- lib/regexp_parser/syntax/versions/3.1.0.rb
|
108
107
|
- lib/regexp_parser/syntax/versions/3.2.0.rb
|
108
|
+
- lib/regexp_parser/syntax/versions/3.5.0.rb
|
109
109
|
- lib/regexp_parser/token.rb
|
110
110
|
- lib/regexp_parser/version.rb
|
111
111
|
- regexp_parser.gemspec
|
@@ -119,7 +119,6 @@ metadata:
|
|
119
119
|
source_code_uri: https://github.com/ammar/regexp_parser
|
120
120
|
wiki_uri: https://github.com/ammar/regexp_parser/wiki
|
121
121
|
rubygems_mfa_required: 'true'
|
122
|
-
post_install_message:
|
123
122
|
rdoc_options: []
|
124
123
|
require_paths:
|
125
124
|
- lib
|
@@ -134,8 +133,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
134
133
|
- !ruby/object:Gem::Version
|
135
134
|
version: '0'
|
136
135
|
requirements: []
|
137
|
-
rubygems_version: 3.
|
138
|
-
signing_key:
|
136
|
+
rubygems_version: 3.6.7
|
139
137
|
specification_version: 4
|
140
138
|
summary: Scanner, lexer, parser for ruby's regular expressions
|
141
139
|
test_files: []
|