regexp_parser 2.8.0 → 2.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +1 -1
- data/lib/regexp_parser/scanner/properties/long.csv +11 -0
- data/lib/regexp_parser/scanner/properties/short.csv +2 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +14 -1
- data/lib/regexp_parser/version.rb +1 -1
- metadata +3 -4
- data/lib/regexp_parser/scanner/mapping.rb +0 -89
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1426faee272654c45e3da8e262e94cfdbcf134dbad7804aed8cd945334c07be
|
4
|
+
data.tar.gz: 37eec721839fe2ebfc25c9d614756289b59ee766f5e7e60ecf4839b554bbb93e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: abed9d7f387634b5e16eb19cbfd5d9aab03288dd4d284b1c52688f958714479783275c5418ee623607ced96b301124ab82dff546e7e4146c7c5ec7feae3e089d
|
7
|
+
data.tar.gz: 62c0757df1c73df52fcf71ef8de666ab9a51a4a8145e71321424ab0ff8408cb2b707cf154dae64ebbcc5a9c8a12ee377a3eadab7549432a9d0e6ee0e65afddd1
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## [2.8.1] - 2023-06-10 - [Janosch Müller](mailto:janosch84@gmail.com)
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
|
14
|
+
- support for extpict unicode property, added in Ruby 2.6
|
15
|
+
- support for 10 unicode script/block properties added in Ruby 3.2
|
16
|
+
|
10
17
|
## [2.8.0] - 2023-04-17 - [Janosch Müller](mailto:janosch84@gmail.com)
|
11
18
|
|
12
19
|
### Added
|
data/Gemfile
CHANGED
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -7,6 +7,7 @@ age=12.0,age=12.0
|
|
7
7
|
age=12.1,age=12.1
|
8
8
|
age=13.0,age=13.0
|
9
9
|
age=14.0,age=14.0
|
10
|
+
age=15.0,age=15.0
|
10
11
|
age=2.0,age=2.0
|
11
12
|
age=2.1,age=2.1
|
12
13
|
age=3.0,age=3.0
|
@@ -97,6 +98,7 @@ emojimodifierbase,emoji_modifier_base
|
|
97
98
|
emojipresentation,emoji_presentation
|
98
99
|
enclosingmark,enclosing_mark
|
99
100
|
ethiopic,ethiopic
|
101
|
+
extendedpictographic,extended_pictographic
|
100
102
|
extender,extender
|
101
103
|
finalpunctuation,final_punctuation
|
102
104
|
format,format
|
@@ -139,6 +141,7 @@ inancientsymbols,in_ancient_symbols
|
|
139
141
|
inarabic,in_arabic
|
140
142
|
inarabicextendeda,in_arabic_extended_a
|
141
143
|
inarabicextendedb,in_arabic_extended_b
|
144
|
+
inarabicextendedc,in_arabic_extended_c
|
142
145
|
inarabicmathematicalalphabeticsymbols,in_arabic_mathematical_alphabetic_symbols
|
143
146
|
inarabicpresentationformsa,in_arabic_presentation_forms_a
|
144
147
|
inarabicpresentationformsb,in_arabic_presentation_forms_b
|
@@ -186,6 +189,7 @@ incjkunifiedideographsextensiond,in_cjk_unified_ideographs_extension_d
|
|
186
189
|
incjkunifiedideographsextensione,in_cjk_unified_ideographs_extension_e
|
187
190
|
incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
|
188
191
|
incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
|
192
|
+
incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
|
189
193
|
incombiningdiacriticalmarks,in_combining_diacritical_marks
|
190
194
|
incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
|
191
195
|
incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
|
@@ -205,10 +209,12 @@ incyrillic,in_cyrillic
|
|
205
209
|
incyrillicextendeda,in_cyrillic_extended_a
|
206
210
|
incyrillicextendedb,in_cyrillic_extended_b
|
207
211
|
incyrillicextendedc,in_cyrillic_extended_c
|
212
|
+
incyrillicextendedd,in_cyrillic_extended_d
|
208
213
|
incyrillicsupplement,in_cyrillic_supplement
|
209
214
|
indeseret,in_deseret
|
210
215
|
indevanagari,in_devanagari
|
211
216
|
indevanagariextended,in_devanagari_extended
|
217
|
+
indevanagariextendeda,in_devanagari_extended_a
|
212
218
|
indingbats,in_dingbats
|
213
219
|
indivesakuru,in_dives_akuru
|
214
220
|
indogra,in_dogra
|
@@ -268,6 +274,7 @@ inipaextensions,in_ipa_extensions
|
|
268
274
|
initialpunctuation,initial_punctuation
|
269
275
|
injavanese,in_javanese
|
270
276
|
inkaithi,in_kaithi
|
277
|
+
inkaktoviknumerals,in_kaktovik_numerals
|
271
278
|
inkanaextendeda,in_kana_extended_a
|
272
279
|
inkanaextendedb,in_kana_extended_b
|
273
280
|
inkanasupplement,in_kana_supplement
|
@@ -276,6 +283,7 @@ inkangxiradicals,in_kangxi_radicals
|
|
276
283
|
inkannada,in_kannada
|
277
284
|
inkatakana,in_katakana
|
278
285
|
inkatakanaphoneticextensions,in_katakana_phonetic_extensions
|
286
|
+
inkawi,in_kawi
|
279
287
|
inkayahli,in_kayah_li
|
280
288
|
inkharoshthi,in_kharoshthi
|
281
289
|
inkhitansmallscript,in_khitan_small_script
|
@@ -339,6 +347,7 @@ inmyanmar,in_myanmar
|
|
339
347
|
inmyanmarextendeda,in_myanmar_extended_a
|
340
348
|
inmyanmarextendedb,in_myanmar_extended_b
|
341
349
|
innabataean,in_nabataean
|
350
|
+
innagmundari,in_nag_mundari
|
342
351
|
innandinagari,in_nandinagari
|
343
352
|
innewa,in_newa
|
344
353
|
innewtailue,in_new_tai_lue
|
@@ -457,6 +466,7 @@ joincontrol,join_control
|
|
457
466
|
kaithi,kaithi
|
458
467
|
kannada,kannada
|
459
468
|
katakana,katakana
|
469
|
+
kawi,kawi
|
460
470
|
kayahli,kayah_li
|
461
471
|
kharoshthi,kharoshthi
|
462
472
|
khitansmallscript,khitan_small_script
|
@@ -503,6 +513,7 @@ mro,mro
|
|
503
513
|
multani,multani
|
504
514
|
myanmar,myanmar
|
505
515
|
nabataean,nabataean
|
516
|
+
nagmundari,nag_mundari
|
506
517
|
nandinagari,nandinagari
|
507
518
|
newa,newa
|
508
519
|
newline,newline
|
@@ -57,6 +57,7 @@ emod,emoji_modifier
|
|
57
57
|
epres,emoji_presentation
|
58
58
|
ethi,ethiopic
|
59
59
|
ext,extender
|
60
|
+
extpict,extended_pictographic
|
60
61
|
geor,georgian
|
61
62
|
glag,glagolitic
|
62
63
|
gong,gunjala_gondi
|
@@ -133,6 +134,7 @@ mtei,meetei_mayek
|
|
133
134
|
mult,multani
|
134
135
|
mymr,myanmar
|
135
136
|
n,number
|
137
|
+
nagm,nag_mundari
|
136
138
|
nand,nandinagari
|
137
139
|
narb,old_north_arabian
|
138
140
|
nbat,nabataean
|
@@ -59,7 +59,7 @@ module Regexp::Syntax
|
|
59
59
|
|
60
60
|
Age_V3_1_0 = %i[age=13.0]
|
61
61
|
|
62
|
-
Age_V3_2_0 = %i[age=14.0]
|
62
|
+
Age_V3_2_0 = %i[age=14.0 age=15.0]
|
63
63
|
|
64
64
|
Age = all[:Age_V]
|
65
65
|
|
@@ -321,6 +321,8 @@ module Regexp::Syntax
|
|
321
321
|
|
322
322
|
Script_V3_2_0 = %i[
|
323
323
|
cypro_minoan
|
324
|
+
kawi
|
325
|
+
nag_mundari
|
324
326
|
old_uyghur
|
325
327
|
tangsa
|
326
328
|
toto
|
@@ -667,11 +669,18 @@ module Regexp::Syntax
|
|
667
669
|
|
668
670
|
UnicodeBlock_V3_2_0 = %i[
|
669
671
|
in_arabic_extended_b
|
672
|
+
in_arabic_extended_c
|
673
|
+
in_cjk_unified_ideographs_extension_h
|
670
674
|
in_cypro_minoan
|
675
|
+
in_cyrillic_extended_d
|
676
|
+
in_devanagari_extended_a
|
671
677
|
in_ethiopic_extended_b
|
678
|
+
in_kaktovik_numerals
|
672
679
|
in_kana_extended_b
|
680
|
+
in_kawi
|
673
681
|
in_latin_extended_f
|
674
682
|
in_latin_extended_g
|
683
|
+
in_nag_mundari
|
675
684
|
in_old_uyghur
|
676
685
|
in_tangsa
|
677
686
|
in_toto
|
@@ -690,6 +699,10 @@ module Regexp::Syntax
|
|
690
699
|
emoji_presentation
|
691
700
|
]
|
692
701
|
|
702
|
+
Emoji_V2_6_0 = %i[
|
703
|
+
extended_pictographic
|
704
|
+
]
|
705
|
+
|
693
706
|
Emoji = all[:Emoji_V]
|
694
707
|
|
695
708
|
V1_9_0 = Category::All + POSIX + all[:V1_9_0]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.8.
|
4
|
+
version: 2.8.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-06-10 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
14
14
|
email:
|
@@ -64,7 +64,6 @@ files:
|
|
64
64
|
- lib/regexp_parser/scanner/errors/premature_end_error.rb
|
65
65
|
- lib/regexp_parser/scanner/errors/scanner_error.rb
|
66
66
|
- lib/regexp_parser/scanner/errors/validation_error.rb
|
67
|
-
- lib/regexp_parser/scanner/mapping.rb
|
68
67
|
- lib/regexp_parser/scanner/properties/long.csv
|
69
68
|
- lib/regexp_parser/scanner/properties/short.csv
|
70
69
|
- lib/regexp_parser/scanner/property.rl
|
@@ -132,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
132
131
|
- !ruby/object:Gem::Version
|
133
132
|
version: '0'
|
134
133
|
requirements: []
|
135
|
-
rubygems_version: 3.4.
|
134
|
+
rubygems_version: 3.4.10
|
136
135
|
signing_key:
|
137
136
|
specification_version: 4
|
138
137
|
summary: Scanner, lexer, parser for ruby's regular expressions
|
@@ -1,89 +0,0 @@
|
|
1
|
-
# mapping for simple cases with a 1:1 relation between text and token
|
2
|
-
class Regexp::Scanner
|
3
|
-
MAPPING = {
|
4
|
-
anchor: {
|
5
|
-
'\A' => :bos,
|
6
|
-
'\B' => :nonword_boundary,
|
7
|
-
'\G' => :match_start,
|
8
|
-
'\Z' => :eos_ob_eol,
|
9
|
-
'\b' => :word_boundary,
|
10
|
-
'\z' => :eos,
|
11
|
-
},
|
12
|
-
assertion: {
|
13
|
-
'(?=' => :lookahead,
|
14
|
-
'(?!' => :nlookahead,
|
15
|
-
'(?<=' => :lookbehind,
|
16
|
-
'(?<!' => :nlookbehind,
|
17
|
-
},
|
18
|
-
conditional: {
|
19
|
-
'(?' => :open,
|
20
|
-
},
|
21
|
-
escape: {
|
22
|
-
'\.' => :dot,
|
23
|
-
'\|' => :alternation,
|
24
|
-
'\^' => :bol,
|
25
|
-
'\$' => :eol,
|
26
|
-
'\?' => :zero_or_one,
|
27
|
-
'\*' => :zero_or_more,
|
28
|
-
'\+' => :one_or_more,
|
29
|
-
'\(' => :group_open,
|
30
|
-
'\)' => :group_close,
|
31
|
-
'\{' => :interval_open,
|
32
|
-
'\}' => :interval_close,
|
33
|
-
'\[' => :set_open,
|
34
|
-
'\]' => :set_close,
|
35
|
-
'\\\\' => :backslash,
|
36
|
-
'\a' => :bell,
|
37
|
-
'\b' => :backspace,
|
38
|
-
'\e' => :escape,
|
39
|
-
'\f' => :form_feed,
|
40
|
-
'\n' => :newline,
|
41
|
-
'\r' => :carriage,
|
42
|
-
'\t' => :tab,
|
43
|
-
'\v' => :vertical_tab,
|
44
|
-
},
|
45
|
-
group: {
|
46
|
-
'(?:' => :passive,
|
47
|
-
'(?>' => :atomic,
|
48
|
-
'(?~' => :absence,
|
49
|
-
},
|
50
|
-
meta: {
|
51
|
-
'|' => :alternation,
|
52
|
-
'.' => :dot,
|
53
|
-
},
|
54
|
-
quantifier: {
|
55
|
-
'?' => :zero_or_one,
|
56
|
-
'??' => :zero_or_one_reluctant,
|
57
|
-
'?+' => :zero_or_one_possessive,
|
58
|
-
'*' => :zero_or_more,
|
59
|
-
'*?' => :zero_or_more_reluctant,
|
60
|
-
'*+' => :zero_or_more_possessive,
|
61
|
-
'+' => :one_or_more,
|
62
|
-
'+?' => :one_or_more_reluctant,
|
63
|
-
'++' => :one_or_more_possessive,
|
64
|
-
},
|
65
|
-
set: {
|
66
|
-
'[' => :character,
|
67
|
-
'-' => :range,
|
68
|
-
'&&' => :intersection,
|
69
|
-
},
|
70
|
-
type: {
|
71
|
-
'\d' => :digit,
|
72
|
-
'\D' => :nondigit,
|
73
|
-
'\h' => :hex,
|
74
|
-
'\H' => :nonhex,
|
75
|
-
'\s' => :space,
|
76
|
-
'\S' => :nonspace,
|
77
|
-
'\w' => :word,
|
78
|
-
'\W' => :nonword,
|
79
|
-
'\R' => :linebreak,
|
80
|
-
'\X' => :xgrapheme,
|
81
|
-
}
|
82
|
-
}
|
83
|
-
ANCHOR_MAPPING = MAPPING[:anchor]
|
84
|
-
ASSERTION_MAPPING = MAPPING[:assertion]
|
85
|
-
ESCAPE_MAPPING = MAPPING[:escape]
|
86
|
-
GROUP_MAPPING = MAPPING[:group]
|
87
|
-
QUANTIFIER_MAPPING = MAPPING[:quantifier]
|
88
|
-
TYPE_MAPPING = MAPPING[:type]
|
89
|
-
end
|