regexp_parser 2.11.2 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/lib/regexp_parser/scanner/properties/long.csv +13 -0
- data/lib/regexp_parser/scanner/properties/short.csv +4 -0
- data/lib/regexp_parser/scanner/scanner.rl +1 -1
- data/lib/regexp_parser/scanner.rb +733 -641
- data/lib/regexp_parser/syntax/token/unicode_property.rb +71 -26
- data/lib/regexp_parser/syntax/versions/4.0.0.rb +4 -0
- data/lib/regexp_parser/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: aa5734a20a0705226c021d9a0efef48a6ca24b5b18f15f93b34c12ffe5021d1e
|
|
4
|
+
data.tar.gz: fa77ac98b3bd17d6bdba43d387ebbbd84a6a0a5fa86123c7eee68646e90ce37c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4bb62063aaa64e3828c5abf551fbf5ce83f726b5ebeb56af3e9210fd7e8c110c7f8f9ba1fcafd0dd0f66b8e8f36f04c1d72e5a739c4ace8fa19978dfc1a794b1
|
|
7
|
+
data.tar.gz: a82384912534ec3ca98d7665434c655468b15e4d75d6c1f13405f0b777d818746c6686d36049a158f0df9b829c129026487e5c3e6cf8a8f7c7ca65155a937db3
|
data/Gemfile
CHANGED
|
@@ -10,6 +10,7 @@ age=14.0,age=14.0
|
|
|
10
10
|
age=15.0,age=15.0
|
|
11
11
|
age=15.1,age=15.1
|
|
12
12
|
age=16.0,age=16.0
|
|
13
|
+
age=17.0,age=17.0
|
|
13
14
|
age=2.0,age=2.0
|
|
14
15
|
age=2.1,age=2.1
|
|
15
16
|
age=3.0,age=3.0
|
|
@@ -44,6 +45,7 @@ bamum,bamum
|
|
|
44
45
|
bassavah,bassa_vah
|
|
45
46
|
batak,batak
|
|
46
47
|
bengali,bengali
|
|
48
|
+
beriaerfe,beria_erfe
|
|
47
49
|
bhaiksuki,bhaiksuki
|
|
48
50
|
bidicontrol,bidi_control
|
|
49
51
|
blank,blank
|
|
@@ -176,6 +178,7 @@ inbasiclatin,in_basic_latin
|
|
|
176
178
|
inbassavah,in_bassa_vah
|
|
177
179
|
inbatak,in_batak
|
|
178
180
|
inbengali,in_bengali
|
|
181
|
+
inberiaerfe,in_beria_erfe
|
|
179
182
|
inbhaiksuki,in_bhaiksuki
|
|
180
183
|
inblockelements,in_block_elements
|
|
181
184
|
inbopomofo,in_bopomofo
|
|
@@ -211,6 +214,7 @@ incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
|
|
|
211
214
|
incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
|
|
212
215
|
incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
|
|
213
216
|
incjkunifiedideographsextensioni,in_cjk_unified_ideographs_extension_i
|
|
217
|
+
incjkunifiedideographsextensionj,in_cjk_unified_ideographs_extension_j
|
|
214
218
|
incombiningdiacriticalmarks,in_combining_diacritical_marks
|
|
215
219
|
incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
|
|
216
220
|
incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
|
|
@@ -360,6 +364,7 @@ inmiscellaneousmathematicalsymbolsb,in_miscellaneous_mathematical_symbols_b
|
|
|
360
364
|
inmiscellaneoussymbols,in_miscellaneous_symbols
|
|
361
365
|
inmiscellaneoussymbolsandarrows,in_miscellaneous_symbols_and_arrows
|
|
362
366
|
inmiscellaneoussymbolsandpictographs,in_miscellaneous_symbols_and_pictographs
|
|
367
|
+
inmiscellaneoussymbolssupplement,in_miscellaneous_symbols_supplement
|
|
363
368
|
inmiscellaneoustechnical,in_miscellaneous_technical
|
|
364
369
|
inmodi,in_modi
|
|
365
370
|
inmodifiertoneletters,in_modifier_tone_letters
|
|
@@ -419,9 +424,11 @@ insaurashtra,in_saurashtra
|
|
|
419
424
|
inscriptionalpahlavi,inscriptional_pahlavi
|
|
420
425
|
inscriptionalparthian,inscriptional_parthian
|
|
421
426
|
insharada,in_sharada
|
|
427
|
+
insharadasupplement,in_sharada_supplement
|
|
422
428
|
inshavian,in_shavian
|
|
423
429
|
inshorthandformatcontrols,in_shorthand_format_controls
|
|
424
430
|
insiddham,in_siddham
|
|
431
|
+
insidetic,in_sidetic
|
|
425
432
|
insinhala,in_sinhala
|
|
426
433
|
insinhalaarchaicnumbers,in_sinhala_archaic_numbers
|
|
427
434
|
insmallformvariants,in_small_form_variants
|
|
@@ -457,12 +464,14 @@ intaile,in_tai_le
|
|
|
457
464
|
intaitham,in_tai_tham
|
|
458
465
|
intaiviet,in_tai_viet
|
|
459
466
|
intaixuanjingsymbols,in_tai_xuan_jing_symbols
|
|
467
|
+
intaiyo,in_tai_yo
|
|
460
468
|
intakri,in_takri
|
|
461
469
|
intamil,in_tamil
|
|
462
470
|
intamilsupplement,in_tamil_supplement
|
|
463
471
|
intangsa,in_tangsa
|
|
464
472
|
intangut,in_tangut
|
|
465
473
|
intangutcomponents,in_tangut_components
|
|
474
|
+
intangutcomponentssupplement,in_tangut_components_supplement
|
|
466
475
|
intangutsupplement,in_tangut_supplement
|
|
467
476
|
intelugu,in_telugu
|
|
468
477
|
inthaana,in_thaana
|
|
@@ -471,6 +480,7 @@ intibetan,in_tibetan
|
|
|
471
480
|
intifinagh,in_tifinagh
|
|
472
481
|
intirhuta,in_tirhuta
|
|
473
482
|
intodhri,in_todhri
|
|
483
|
+
intolongsiki,in_tolong_siki
|
|
474
484
|
intoto,in_toto
|
|
475
485
|
intransportandmapsymbols,in_transport_and_map_symbols
|
|
476
486
|
intulutigalari,in_tulu_tigalari
|
|
@@ -612,6 +622,7 @@ separator,separator
|
|
|
612
622
|
sharada,sharada
|
|
613
623
|
shavian,shavian
|
|
614
624
|
siddham,siddham
|
|
625
|
+
sidetic,sidetic
|
|
615
626
|
signwriting,signwriting
|
|
616
627
|
sinhala,sinhala
|
|
617
628
|
softdotted,soft_dotted
|
|
@@ -632,6 +643,7 @@ tagbanwa,tagbanwa
|
|
|
632
643
|
taile,tai_le
|
|
633
644
|
taitham,tai_tham
|
|
634
645
|
taiviet,tai_viet
|
|
646
|
+
taiyo,tai_yo
|
|
635
647
|
takri,takri
|
|
636
648
|
tamil,tamil
|
|
637
649
|
tangsa,tangsa
|
|
@@ -645,6 +657,7 @@ tifinagh,tifinagh
|
|
|
645
657
|
tirhuta,tirhuta
|
|
646
658
|
titlecaseletter,titlecase_letter
|
|
647
659
|
todhri,todhri
|
|
660
|
+
tolongsiki,tolong_siki
|
|
648
661
|
toto,toto
|
|
649
662
|
tulutigalari,tulu_tigalari
|
|
650
663
|
ugaritic,ugaritic
|
|
@@ -11,6 +11,7 @@ bamu,bamum
|
|
|
11
11
|
bass,bassa_vah
|
|
12
12
|
batk,batak
|
|
13
13
|
beng,bengali
|
|
14
|
+
berf,beria_erfe
|
|
14
15
|
bhks,bhaiksuki
|
|
15
16
|
bidic,bidi_control
|
|
16
17
|
bopo,bopomofo
|
|
@@ -202,6 +203,7 @@ sgnw,signwriting
|
|
|
202
203
|
shaw,shavian
|
|
203
204
|
shrd,sharada
|
|
204
205
|
sidd,siddham
|
|
206
|
+
sidt,sidetic
|
|
205
207
|
sind,khudawadi
|
|
206
208
|
sinh,sinhala
|
|
207
209
|
sk,modifier_symbol
|
|
@@ -223,6 +225,7 @@ talu,new_tai_lue
|
|
|
223
225
|
taml,tamil
|
|
224
226
|
tang,tangut
|
|
225
227
|
tavt,tai_viet
|
|
228
|
+
tayo,tai_yo
|
|
226
229
|
telu,telugu
|
|
227
230
|
term,terminal_punctuation
|
|
228
231
|
tfng,tifinagh
|
|
@@ -232,6 +235,7 @@ tibt,tibetan
|
|
|
232
235
|
tirh,tirhuta
|
|
233
236
|
tnsa,tangsa
|
|
234
237
|
todr,todhri
|
|
238
|
+
tols,tolong_siki
|
|
235
239
|
tutg,tulu_tigalari
|
|
236
240
|
ugar,ugaritic
|
|
237
241
|
uideo,unified_ideograph
|
|
@@ -247,7 +247,7 @@
|
|
|
247
247
|
# Treat all remaining escapes - those not supported in sets - as literal.
|
|
248
248
|
# (This currently includes \^, \-, \&, \:, although these could potentially
|
|
249
249
|
# be meta chars when not escaped, depending on their position in the set.)
|
|
250
|
-
any > (escaped_set_alpha, 1) {
|
|
250
|
+
(any | utf8_multibyte) > (escaped_set_alpha, 1) {
|
|
251
251
|
emit(:escape, :literal, copy(data, ts-1, te))
|
|
252
252
|
fret;
|
|
253
253
|
};
|