regexp_parser 1.3.0 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +1 -1
- data/lib/regexp_parser/scanner/properties/long.yml +19 -0
- data/lib/regexp_parser/scanner/properties/short.yml +7 -0
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +42 -21
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +1 -1
- data/test/parser/test_properties.rb +2 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ff2b9541be8d00d5a0f8a355ebb9ab6bc5bc2ac50ffa14df13144bf2d239b42
|
4
|
+
data.tar.gz: b5d4c720eaa3606a7973b110251a5fb1fe87e11714fed5a195908678098a4cbe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e8759d373fdea7bbd455a5e2ff96ce1a64cb81f35c325fad49a886a99388897486c1904a847a072b14b245e6da0dded81c3ef031e74944b2fe5d8c67a4cffaab
|
7
|
+
data.tar.gz: 6e39afe8a277eced992c0508a99d022dba939401925e6e3e793cab364d0b3b2143cfade3a433b9d0445c49004dad781dc04f7a99c69229c471dff7095823d065
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
### [1.4.0] - 2019-04-02 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
|
+
|
5
|
+
### Added
|
6
|
+
|
7
|
+
- Added support for 19 new unicode properties introduced in Ruby 2.6.0
|
8
|
+
|
3
9
|
### [1.3.0] - 2018-11-14 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
10
|
|
5
11
|
### Added
|
data/README.md
CHANGED
@@ -357,7 +357,7 @@ _Note that not all of these are available in all versions of Ruby_
|
|
357
357
|
|   _**Meta**_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
|
358
358
|
|   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
|
359
359
|
|   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
|
360
|
-
| **Unicode Properties** | _<sub>([Unicode
|
360
|
+
| **Unicode Properties** | _<sub>([Unicode 11.0.0](http://www.unicode.org/versions/Unicode11.0.0/))</sub>_ | ⋱ |
|
361
361
|
|   _**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | ✓ |
|
362
362
|
|   _**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | ✓ |
|
363
363
|
|   _**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | ✓ |
|
@@ -5,6 +5,7 @@
|
|
5
5
|
adlam: adlam
|
6
6
|
age=1.1: age=1.1
|
7
7
|
age=10.0: age=10.0
|
8
|
+
age=11.0: age=11.0
|
8
9
|
age=2.0: age=2.0
|
9
10
|
age=2.1: age=2.1
|
10
11
|
age=3.0: age=3.0
|
@@ -81,6 +82,7 @@ deseret: deseret
|
|
81
82
|
devanagari: devanagari
|
82
83
|
diacritic: diacritic
|
83
84
|
digit: digit
|
85
|
+
dogra: dogra
|
84
86
|
duployan: duployan
|
85
87
|
egyptianhieroglyphs: egyptian_hieroglyphs
|
86
88
|
elbasan: elbasan
|
@@ -104,9 +106,11 @@ graphemeextend: grapheme_extend
|
|
104
106
|
graphemelink: grapheme_link
|
105
107
|
greek: greek
|
106
108
|
gujarati: gujarati
|
109
|
+
gunjalagondi: gunjala_gondi
|
107
110
|
gurmukhi: gurmukhi
|
108
111
|
han: han
|
109
112
|
hangul: hangul
|
113
|
+
hanifirohingya: hanifi_rohingya
|
110
114
|
hanunoo: hanunoo
|
111
115
|
hatran: hatran
|
112
116
|
hebrew: hebrew
|
@@ -160,6 +164,7 @@ inchakma: in_chakma
|
|
160
164
|
incham: in_cham
|
161
165
|
incherokee: in_cherokee
|
162
166
|
incherokeesupplement: in_cherokee_supplement
|
167
|
+
inchesssymbols: in_chess_symbols
|
163
168
|
incjkcompatibility: in_cjk_compatibility
|
164
169
|
incjkcompatibilityforms: in_cjk_compatibility_forms
|
165
170
|
incjkcompatibilityideographs: in_cjk_compatibility_ideographs
|
@@ -197,6 +202,7 @@ indeseret: in_deseret
|
|
197
202
|
indevanagari: in_devanagari
|
198
203
|
indevanagariextended: in_devanagari_extended
|
199
204
|
indingbats: in_dingbats
|
205
|
+
indogra: in_dogra
|
200
206
|
indominotiles: in_domino_tiles
|
201
207
|
induployan: in_duployan
|
202
208
|
inearlydynasticcuneiform: in_early_dynastic_cuneiform
|
@@ -215,6 +221,7 @@ ingeneralpunctuation: in_general_punctuation
|
|
215
221
|
ingeometricshapes: in_geometric_shapes
|
216
222
|
ingeometricshapesextended: in_geometric_shapes_extended
|
217
223
|
ingeorgian: in_georgian
|
224
|
+
ingeorgianextended: in_georgian_extended
|
218
225
|
ingeorgiansupplement: in_georgian_supplement
|
219
226
|
inglagolitic: in_glagolitic
|
220
227
|
inglagoliticsupplement: in_glagolitic_supplement
|
@@ -223,6 +230,7 @@ ingrantha: in_grantha
|
|
223
230
|
ingreekandcoptic: in_greek_and_coptic
|
224
231
|
ingreekextended: in_greek_extended
|
225
232
|
ingujarati: in_gujarati
|
233
|
+
ingunjalagondi: in_gunjala_gondi
|
226
234
|
ingurmukhi: in_gurmukhi
|
227
235
|
inhalfwidthandfullwidthforms: in_halfwidth_and_fullwidth_forms
|
228
236
|
inhangulcompatibilityjamo: in_hangul_compatibility_jamo
|
@@ -230,6 +238,7 @@ inhanguljamo: in_hangul_jamo
|
|
230
238
|
inhanguljamoextendeda: in_hangul_jamo_extended_a
|
231
239
|
inhanguljamoextendedb: in_hangul_jamo_extended_b
|
232
240
|
inhangulsyllables: in_hangul_syllables
|
241
|
+
inhanifirohingya: in_hanifi_rohingya
|
233
242
|
inhanunoo: in_hanunoo
|
234
243
|
inhatran: in_hatran
|
235
244
|
inhebrew: in_hebrew
|
@@ -240,6 +249,7 @@ inhiragana: in_hiragana
|
|
240
249
|
inideographicdescriptioncharacters: in_ideographic_description_characters
|
241
250
|
inideographicsymbolsandpunctuation: in_ideographic_symbols_and_punctuation
|
242
251
|
inimperialaramaic: in_imperial_aramaic
|
252
|
+
inindicsiyaqnumbers: in_indic_siyaq_numbers
|
243
253
|
ininscriptionalpahlavi: in_inscriptional_pahlavi
|
244
254
|
ininscriptionalparthian: in_inscriptional_parthian
|
245
255
|
inipaextensions: in_ipa_extensions
|
@@ -279,6 +289,7 @@ inlycian: in_lycian
|
|
279
289
|
inlydian: in_lydian
|
280
290
|
inmahajani: in_mahajani
|
281
291
|
inmahjongtiles: in_mahjong_tiles
|
292
|
+
inmakasar: in_makasar
|
282
293
|
inmalayalam: in_malayalam
|
283
294
|
inmandaic: in_mandaic
|
284
295
|
inmanichaean: in_manichaean
|
@@ -286,6 +297,8 @@ inmarchen: in_marchen
|
|
286
297
|
inmasaramgondi: in_masaram_gondi
|
287
298
|
inmathematicalalphanumericsymbols: in_mathematical_alphanumeric_symbols
|
288
299
|
inmathematicaloperators: in_mathematical_operators
|
300
|
+
inmayannumerals: in_mayan_numerals
|
301
|
+
inmedefaidrin: in_medefaidrin
|
289
302
|
inmeeteimayek: in_meetei_mayek
|
290
303
|
inmeeteimayekextensions: in_meetei_mayek_extensions
|
291
304
|
inmendekikakui: in_mende_kikakui
|
@@ -322,6 +335,7 @@ inolditalic: in_old_italic
|
|
322
335
|
inoldnortharabian: in_old_north_arabian
|
323
336
|
inoldpermic: in_old_permic
|
324
337
|
inoldpersian: in_old_persian
|
338
|
+
inoldsogdian: in_old_sogdian
|
325
339
|
inoldsoutharabian: in_old_south_arabian
|
326
340
|
inoldturkic: in_old_turkic
|
327
341
|
inopticalcharacterrecognition: in_optical_character_recognition
|
@@ -354,6 +368,7 @@ insiddham: in_siddham
|
|
354
368
|
insinhala: in_sinhala
|
355
369
|
insinhalaarchaicnumbers: in_sinhala_archaic_numbers
|
356
370
|
insmallformvariants: in_small_form_variants
|
371
|
+
insogdian: in_sogdian
|
357
372
|
insorasompeng: in_sora_sompeng
|
358
373
|
insoyombo: in_soyombo
|
359
374
|
inspacingmodifierletters: in_spacing_modifier_letters
|
@@ -431,6 +446,7 @@ lowercaseletter: lowercase_letter
|
|
431
446
|
lycian: lycian
|
432
447
|
lydian: lydian
|
433
448
|
mahajani: mahajani
|
449
|
+
makasar: makasar
|
434
450
|
malayalam: malayalam
|
435
451
|
mandaic: mandaic
|
436
452
|
manichaean: manichaean
|
@@ -439,6 +455,7 @@ mark: mark
|
|
439
455
|
masaramgondi: masaram_gondi
|
440
456
|
math: math
|
441
457
|
mathsymbol: math_symbol
|
458
|
+
medefaidrin: medefaidrin
|
442
459
|
meeteimayek: meetei_mayek
|
443
460
|
mendekikakui: mende_kikakui
|
444
461
|
meroiticcursive: meroitic_cursive
|
@@ -467,6 +484,7 @@ olditalic: old_italic
|
|
467
484
|
oldnortharabian: old_north_arabian
|
468
485
|
oldpermic: old_permic
|
469
486
|
oldpersian: old_persian
|
487
|
+
oldsogdian: old_sogdian
|
470
488
|
oldsoutharabian: old_south_arabian
|
471
489
|
oldturkic: old_turkic
|
472
490
|
openpunctuation: open_punctuation
|
@@ -515,6 +533,7 @@ siddham: siddham
|
|
515
533
|
signwriting: signwriting
|
516
534
|
sinhala: sinhala
|
517
535
|
softdotted: soft_dotted
|
536
|
+
sogdian: sogdian
|
518
537
|
sorasompeng: sora_sompeng
|
519
538
|
soyombo: soyombo
|
520
539
|
space: space
|
@@ -44,6 +44,7 @@ dep: deprecated
|
|
44
44
|
deva: devanagari
|
45
45
|
di: default_ignorable_code_point
|
46
46
|
dia: diacritic
|
47
|
+
dogr: dogra
|
47
48
|
dsrt: deseret
|
48
49
|
dupl: duployan
|
49
50
|
egyp: egyptian_hieroglyphs
|
@@ -52,6 +53,7 @@ ethi: ethiopic
|
|
52
53
|
ext: extender
|
53
54
|
geor: georgian
|
54
55
|
glag: glagolitic
|
56
|
+
gong: gunjala_gondi
|
55
57
|
gonm: masaram_gondi
|
56
58
|
goth: gothic
|
57
59
|
gran: grantha
|
@@ -105,11 +107,13 @@ lyci: lycian
|
|
105
107
|
lydi: lydian
|
106
108
|
m: mark
|
107
109
|
mahj: mahajani
|
110
|
+
maka: makasar
|
108
111
|
mand: mandaic
|
109
112
|
mani: manichaean
|
110
113
|
marc: marchen
|
111
114
|
mc: spacing_mark
|
112
115
|
me: enclosing_mark
|
116
|
+
medf: medefaidrin
|
113
117
|
mend: mende_kikakui
|
114
118
|
merc: meroitic_cursive
|
115
119
|
mero: meroitic_hieroglyphs
|
@@ -168,6 +172,7 @@ qaai: inherited
|
|
168
172
|
qmark: quotation_mark
|
169
173
|
ri: regional_indicator
|
170
174
|
rjng: rejang
|
175
|
+
rohg: hanifi_rohingya
|
171
176
|
runr: runic
|
172
177
|
s: symbol
|
173
178
|
samr: samaritan
|
@@ -184,6 +189,8 @@ sinh: sinhala
|
|
184
189
|
sk: modifier_symbol
|
185
190
|
sm: math_symbol
|
186
191
|
so: other_symbol
|
192
|
+
sogd: sogdian
|
193
|
+
sogo: old_sogdian
|
187
194
|
sora: sora_sompeng
|
188
195
|
soyo: soyombo
|
189
196
|
sterm: sentence_terminal
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module Regexp::Syntax
|
2
2
|
module Token
|
3
3
|
module UnicodeProperty
|
4
|
+
all = proc { |name| constants.grep(/#{name}/).flat_map(&method(:const_get)) }
|
5
|
+
|
4
6
|
CharType_V1_9_0 = [:alnum, :alpha, :ascii, :blank, :cntrl, :digit, :graph,
|
5
7
|
:lower, :print, :punct, :space, :upper, :word, :xdigit]
|
6
8
|
|
@@ -49,7 +51,9 @@ module Regexp::Syntax
|
|
49
51
|
|
50
52
|
Age_V2_5_0 = [:'age=10.0']
|
51
53
|
|
52
|
-
|
54
|
+
Age_V2_6_0 = [:'age=11.0']
|
55
|
+
|
56
|
+
Age = all[:Age_V]
|
53
57
|
|
54
58
|
Derived_V1_9_0 = [
|
55
59
|
:ascii_hex_digit,
|
@@ -118,7 +122,7 @@ module Regexp::Syntax
|
|
118
122
|
:regional_indicator
|
119
123
|
]
|
120
124
|
|
121
|
-
Derived =
|
125
|
+
Derived = all[:Derived_V]
|
122
126
|
|
123
127
|
Script_V1_9_0 = [
|
124
128
|
:arabic,
|
@@ -283,8 +287,17 @@ module Regexp::Syntax
|
|
283
287
|
:zanabazar_square,
|
284
288
|
]
|
285
289
|
|
286
|
-
|
287
|
-
|
290
|
+
Script_V2_6_0 = [
|
291
|
+
:dogra,
|
292
|
+
:gunjala_gondi,
|
293
|
+
:hanifi_rohingya,
|
294
|
+
:makasar,
|
295
|
+
:medefaidrin,
|
296
|
+
:old_sogdian,
|
297
|
+
:sogdian,
|
298
|
+
]
|
299
|
+
|
300
|
+
Script = all[:Script_V]
|
288
301
|
|
289
302
|
UnicodeBlock_V1_9_0 = [
|
290
303
|
:in_alphabetic_presentation_forms,
|
@@ -585,8 +598,21 @@ module Regexp::Syntax
|
|
585
598
|
:in_zanabazar_square,
|
586
599
|
]
|
587
600
|
|
588
|
-
|
589
|
-
|
601
|
+
UnicodeBlock_V2_6_0 = [
|
602
|
+
:in_chess_symbols,
|
603
|
+
:in_dogra,
|
604
|
+
:in_georgian_extended,
|
605
|
+
:in_gunjala_gondi,
|
606
|
+
:in_hanifi_rohingya,
|
607
|
+
:in_indic_siyaq_numbers,
|
608
|
+
:in_makasar,
|
609
|
+
:in_mayan_numerals,
|
610
|
+
:in_medefaidrin,
|
611
|
+
:in_old_sogdian,
|
612
|
+
:in_sogdian,
|
613
|
+
]
|
614
|
+
|
615
|
+
UnicodeBlock = all[:UnicodeBlock_V]
|
590
616
|
|
591
617
|
Emoji_V2_5_0 = [
|
592
618
|
:emoji,
|
@@ -596,23 +622,18 @@ module Regexp::Syntax
|
|
596
622
|
:emoji_presentation,
|
597
623
|
]
|
598
624
|
|
599
|
-
Emoji =
|
600
|
-
|
601
|
-
V1_9_0 = Category::All + POSIX + CharType_V1_9_0 + Derived_V1_9_0 + Script_V1_9_0 + UnicodeBlock_V1_9_0
|
602
|
-
|
603
|
-
V1_9_3 = Age_V1_9_3 + Script_V1_9_3
|
604
|
-
|
605
|
-
V2_0_0 = Age_V2_0_0 + Derived_V2_0_0 + Script_V2_0_0 + UnicodeBlock_V2_0_0
|
606
|
-
|
607
|
-
V2_2_0 = Age_V2_2_0 + Script_V2_2_0 + UnicodeBlock_V2_2_0
|
608
|
-
|
609
|
-
V2_3_0 = Age_V2_3_0 + Script_V2_3_0 + UnicodeBlock_V2_3_0
|
610
|
-
|
611
|
-
V2_4_0 = Age_V2_4_0 + Derived_V2_4_0 + Script_V2_4_0 + UnicodeBlock_V2_4_0
|
625
|
+
Emoji = all[:Emoji_V]
|
612
626
|
|
613
|
-
|
627
|
+
V1_9_0 = Category::All + POSIX + all[:V1_9_0]
|
628
|
+
V1_9_3 = all[:V1_9_3]
|
629
|
+
V2_0_0 = all[:V2_0_0]
|
630
|
+
V2_2_0 = all[:V2_2_0]
|
631
|
+
V2_3_0 = all[:V2_3_0]
|
632
|
+
V2_4_0 = all[:V2_4_0]
|
633
|
+
V2_5_0 = all[:V2_5_0]
|
634
|
+
V2_6_0 = all[:V2_6_0]
|
614
635
|
|
615
|
-
All =
|
636
|
+
All = all[/^V\d+_\d+_\d+$/]
|
616
637
|
|
617
638
|
Type = :property
|
618
639
|
NonType = :nonproperty
|
data/regexp_parser.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |gem|
|
|
8
8
|
|
9
9
|
gem.summary = "Scanner, lexer, parser for ruby's regular expressions"
|
10
10
|
gem.description = 'A library for tokenizing, lexing, and parsing Ruby regular expressions.'
|
11
|
-
gem.homepage = '
|
11
|
+
gem.homepage = 'https://github.com/ammar/regexp_parser'
|
12
12
|
|
13
13
|
if gem.respond_to?(:metadata)
|
14
14
|
gem.metadata = { 'issue_tracker' => 'https://github.com/ammar/regexp_parser/issues' }
|
@@ -17,6 +17,7 @@ class ParserProperties < Test::Unit::TestCase
|
|
17
17
|
'xidc',
|
18
18
|
'XID_Continue',
|
19
19
|
'Emoji',
|
20
|
+
'InChessSymbols',
|
20
21
|
]
|
21
22
|
|
22
23
|
modes.each do |mode|
|
@@ -24,7 +25,7 @@ class ParserProperties < Test::Unit::TestCase
|
|
24
25
|
|
25
26
|
example_props.each do |property|
|
26
27
|
define_method "test_parse_#{token_type}_#{property}" do
|
27
|
-
t = RP.parse "ab\\#{mode}{#{property}}", '
|
28
|
+
t = RP.parse "ab\\#{mode}{#{property}}", '*'
|
28
29
|
|
29
30
|
assert t.expressions.last.is_a?(UnicodeProperty::Base),
|
30
31
|
"Expected property, but got #{t.expressions.last.class.name}"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-04-02 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
14
14
|
email:
|
@@ -83,6 +83,7 @@ files:
|
|
83
83
|
- lib/regexp_parser/syntax/versions/2.4.0.rb
|
84
84
|
- lib/regexp_parser/syntax/versions/2.4.1.rb
|
85
85
|
- lib/regexp_parser/syntax/versions/2.5.0.rb
|
86
|
+
- lib/regexp_parser/syntax/versions/2.6.0.rb
|
86
87
|
- lib/regexp_parser/token.rb
|
87
88
|
- lib/regexp_parser/version.rb
|
88
89
|
- regexp_parser.gemspec
|
@@ -155,7 +156,7 @@ files:
|
|
155
156
|
- test/test_all.rb
|
156
157
|
- test/token/test_all.rb
|
157
158
|
- test/token/test_token.rb
|
158
|
-
homepage:
|
159
|
+
homepage: https://github.com/ammar/regexp_parser
|
159
160
|
licenses:
|
160
161
|
- MIT
|
161
162
|
metadata:
|
@@ -177,8 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
177
178
|
- !ruby/object:Gem::Version
|
178
179
|
version: '0'
|
179
180
|
requirements: []
|
180
|
-
|
181
|
-
rubygems_version: 2.7.6
|
181
|
+
rubygems_version: 3.0.3
|
182
182
|
signing_key:
|
183
183
|
specification_version: 4
|
184
184
|
summary: Scanner, lexer, parser for ruby's regular expressions
|