regexp_property_values 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +29 -0
- data/.rspec +3 -0
- data/.travis.yml +9 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +35 -0
- data/LICENSE.txt +21 -0
- data/README.md +30 -0
- data/Rakefile +6 -0
- data/bin/console +16 -0
- data/bin/setup +8 -0
- data/lib/UnicodeProps.txt +802 -0
- data/lib/regexp_property_values.rb +77 -0
- data/lib/regexp_property_values/version.rb +3 -0
- data/regexp_property_values.gemspec +27 -0
- metadata +110 -0
data/.gitignore
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
*.gem
|
2
|
+
*.iml
|
3
|
+
*.stTheme.cache
|
4
|
+
*.sublime-project
|
5
|
+
*.sublime-workspace
|
6
|
+
*.swp
|
7
|
+
*.tmlanguage.cache
|
8
|
+
*.tmPreferences.cache
|
9
|
+
*~
|
10
|
+
.byebug_history
|
11
|
+
.DS_Store
|
12
|
+
.idea/
|
13
|
+
.ruby-gemset
|
14
|
+
.ruby-version
|
15
|
+
.tags
|
16
|
+
.tags1
|
17
|
+
bbin/
|
18
|
+
binstubs/*
|
19
|
+
bundler_stubs/*/.yardoc
|
20
|
+
/.bundle/
|
21
|
+
/_yardoc/
|
22
|
+
/coverage/
|
23
|
+
/doc/
|
24
|
+
/pkg/
|
25
|
+
/spec/reports/
|
26
|
+
/tmp/
|
27
|
+
|
28
|
+
# rspec failure tracking
|
29
|
+
.rspec_status
|
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
regexp_property_values (0.1.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.3)
|
10
|
+
rake (10.5.0)
|
11
|
+
rspec (3.7.0)
|
12
|
+
rspec-core (~> 3.7.0)
|
13
|
+
rspec-expectations (~> 3.7.0)
|
14
|
+
rspec-mocks (~> 3.7.0)
|
15
|
+
rspec-core (3.7.1)
|
16
|
+
rspec-support (~> 3.7.0)
|
17
|
+
rspec-expectations (3.7.0)
|
18
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
19
|
+
rspec-support (~> 3.7.0)
|
20
|
+
rspec-mocks (3.7.0)
|
21
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
22
|
+
rspec-support (~> 3.7.0)
|
23
|
+
rspec-support (3.7.1)
|
24
|
+
|
25
|
+
PLATFORMS
|
26
|
+
ruby
|
27
|
+
|
28
|
+
DEPENDENCIES
|
29
|
+
bundler (~> 1.16)
|
30
|
+
rake (~> 10.0)
|
31
|
+
regexp_property_values!
|
32
|
+
rspec (~> 3.0)
|
33
|
+
|
34
|
+
BUNDLED WITH
|
35
|
+
1.16.1
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2018 Jannosch Müller
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# RegexpPropertyValues
|
2
|
+
|
3
|
+
This microlibrary lets you see which property values are supported by the regular expression engine of the Ruby version you are running.
|
4
|
+
|
5
|
+
That is, it determines all supported values for `\p{value}` expressions.
|
6
|
+
|
7
|
+
## Usage
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
require 'regexp_property_values'
|
11
|
+
|
12
|
+
PV = RegexpPropertyValues
|
13
|
+
|
14
|
+
PV.all # => ["Alpha", "Blank", "Cntrl", ...]
|
15
|
+
PV.all.sort # => ["AHex", "ASCII", "Adlam", "Adlm", "Age=1.1", ...]
|
16
|
+
|
17
|
+
PV.by_category # => {"POSIX brackets" => ["Alpha", "Blank", ...], ...}
|
18
|
+
PV.by_category.keys # => ["POSIX brackets", "Special", "Scripts", ...]
|
19
|
+
|
20
|
+
PV.short_and_long_names # => [["M", "Grek", ...], ["Mark", "Greek", ...]]
|
21
|
+
|
22
|
+
# this one takes a second
|
23
|
+
PV.matched_characters('AHex') # => %w[0 1 2 3 4 5 6 7 8 9 A B C ...]
|
24
|
+
|
25
|
+
# this one takes a minute
|
26
|
+
PV.alias_hash # => {"M" => "Mark", "Grek" => "Greek", ...}
|
27
|
+
|
28
|
+
# download the latest list of possible properties
|
29
|
+
PV.update
|
30
|
+
```
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "regexp_property_values"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
PV = RegexpPropertyValues
|
14
|
+
|
15
|
+
require "irb"
|
16
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1,802 @@
|
|
1
|
+
Onigmo (Oniguruma-mod) Unicode Properties Version 6.2.0 2017/07/17
|
2
|
+
|
3
|
+
* POSIX brackets
|
4
|
+
Alpha
|
5
|
+
Blank
|
6
|
+
Cntrl
|
7
|
+
Digit
|
8
|
+
Graph
|
9
|
+
Lower
|
10
|
+
Print
|
11
|
+
Punct
|
12
|
+
Space
|
13
|
+
Upper
|
14
|
+
XDigit
|
15
|
+
Word
|
16
|
+
Alnum
|
17
|
+
ASCII
|
18
|
+
XPosixPunct
|
19
|
+
|
20
|
+
* Special
|
21
|
+
Any
|
22
|
+
Assigned
|
23
|
+
|
24
|
+
* Major and General Categories
|
25
|
+
C
|
26
|
+
Cc
|
27
|
+
Cf
|
28
|
+
Cn
|
29
|
+
Co
|
30
|
+
Cs
|
31
|
+
L
|
32
|
+
LC
|
33
|
+
Ll
|
34
|
+
Lm
|
35
|
+
Lo
|
36
|
+
Lt
|
37
|
+
Lu
|
38
|
+
M
|
39
|
+
Mc
|
40
|
+
Me
|
41
|
+
Mn
|
42
|
+
N
|
43
|
+
Nd
|
44
|
+
Nl
|
45
|
+
No
|
46
|
+
P
|
47
|
+
Pc
|
48
|
+
Pd
|
49
|
+
Pe
|
50
|
+
Pf
|
51
|
+
Pi
|
52
|
+
Po
|
53
|
+
Ps
|
54
|
+
S
|
55
|
+
Sc
|
56
|
+
Sk
|
57
|
+
Sm
|
58
|
+
So
|
59
|
+
Z
|
60
|
+
Zl
|
61
|
+
Zp
|
62
|
+
Zs
|
63
|
+
|
64
|
+
* Scripts
|
65
|
+
Adlam
|
66
|
+
Ahom
|
67
|
+
Anatolian_Hieroglyphs
|
68
|
+
Arabic
|
69
|
+
Armenian
|
70
|
+
Avestan
|
71
|
+
Balinese
|
72
|
+
Bamum
|
73
|
+
Bassa_Vah
|
74
|
+
Batak
|
75
|
+
Bengali
|
76
|
+
Bhaiksuki
|
77
|
+
Bopomofo
|
78
|
+
Brahmi
|
79
|
+
Braille
|
80
|
+
Buginese
|
81
|
+
Buhid
|
82
|
+
Canadian_Aboriginal
|
83
|
+
Carian
|
84
|
+
Caucasian_Albanian
|
85
|
+
Chakma
|
86
|
+
Cham
|
87
|
+
Cherokee
|
88
|
+
Common
|
89
|
+
Coptic
|
90
|
+
Cuneiform
|
91
|
+
Cypriot
|
92
|
+
Cyrillic
|
93
|
+
Deseret
|
94
|
+
Devanagari
|
95
|
+
Duployan
|
96
|
+
Egyptian_Hieroglyphs
|
97
|
+
Elbasan
|
98
|
+
Ethiopic
|
99
|
+
Georgian
|
100
|
+
Glagolitic
|
101
|
+
Gothic
|
102
|
+
Grantha
|
103
|
+
Greek
|
104
|
+
Gujarati
|
105
|
+
Gurmukhi
|
106
|
+
Han
|
107
|
+
Hangul
|
108
|
+
Hanunoo
|
109
|
+
Hatran
|
110
|
+
Hebrew
|
111
|
+
Hiragana
|
112
|
+
Imperial_Aramaic
|
113
|
+
Inherited
|
114
|
+
Inscriptional_Pahlavi
|
115
|
+
Inscriptional_Parthian
|
116
|
+
Javanese
|
117
|
+
Kaithi
|
118
|
+
Kannada
|
119
|
+
Katakana
|
120
|
+
Kayah_Li
|
121
|
+
Kharoshthi
|
122
|
+
Khmer
|
123
|
+
Khojki
|
124
|
+
Khudawadi
|
125
|
+
Lao
|
126
|
+
Latin
|
127
|
+
Lepcha
|
128
|
+
Limbu
|
129
|
+
Linear_A
|
130
|
+
Linear_B
|
131
|
+
Lisu
|
132
|
+
Lycian
|
133
|
+
Lydian
|
134
|
+
Mahajani
|
135
|
+
Malayalam
|
136
|
+
Mandaic
|
137
|
+
Manichaean
|
138
|
+
Marchen
|
139
|
+
Masaram_Gondi
|
140
|
+
Meetei_Mayek
|
141
|
+
Mende_Kikakui
|
142
|
+
Meroitic_Cursive
|
143
|
+
Meroitic_Hieroglyphs
|
144
|
+
Miao
|
145
|
+
Modi
|
146
|
+
Mongolian
|
147
|
+
Mro
|
148
|
+
Multani
|
149
|
+
Myanmar
|
150
|
+
Nabataean
|
151
|
+
New_Tai_Lue
|
152
|
+
Newa
|
153
|
+
Nko
|
154
|
+
Nushu
|
155
|
+
Ogham
|
156
|
+
Ol_Chiki
|
157
|
+
Old_Hungarian
|
158
|
+
Old_Italic
|
159
|
+
Old_North_Arabian
|
160
|
+
Old_Permic
|
161
|
+
Old_Persian
|
162
|
+
Old_South_Arabian
|
163
|
+
Old_Turkic
|
164
|
+
Oriya
|
165
|
+
Osage
|
166
|
+
Osmanya
|
167
|
+
Pahawh_Hmong
|
168
|
+
Palmyrene
|
169
|
+
Pau_Cin_Hau
|
170
|
+
Phags_Pa
|
171
|
+
Phoenician
|
172
|
+
Psalter_Pahlavi
|
173
|
+
Rejang
|
174
|
+
Runic
|
175
|
+
Samaritan
|
176
|
+
Saurashtra
|
177
|
+
Sharada
|
178
|
+
Shavian
|
179
|
+
Siddham
|
180
|
+
SignWriting
|
181
|
+
Sinhala
|
182
|
+
Sora_Sompeng
|
183
|
+
Soyombo
|
184
|
+
Sundanese
|
185
|
+
Syloti_Nagri
|
186
|
+
Syriac
|
187
|
+
Tagalog
|
188
|
+
Tagbanwa
|
189
|
+
Tai_Le
|
190
|
+
Tai_Tham
|
191
|
+
Tai_Viet
|
192
|
+
Takri
|
193
|
+
Tamil
|
194
|
+
Tangut
|
195
|
+
Telugu
|
196
|
+
Thaana
|
197
|
+
Thai
|
198
|
+
Tibetan
|
199
|
+
Tifinagh
|
200
|
+
Tirhuta
|
201
|
+
Ugaritic
|
202
|
+
Unknown
|
203
|
+
Vai
|
204
|
+
Warang_Citi
|
205
|
+
Yi
|
206
|
+
Zanabazar_Square
|
207
|
+
|
208
|
+
* DerivedCoreProperties
|
209
|
+
Alphabetic
|
210
|
+
Case_Ignorable
|
211
|
+
Cased
|
212
|
+
Changes_When_Casefolded
|
213
|
+
Changes_When_Casemapped
|
214
|
+
Changes_When_Lowercased
|
215
|
+
Changes_When_Titlecased
|
216
|
+
Changes_When_Uppercased
|
217
|
+
Default_Ignorable_Code_Point
|
218
|
+
Grapheme_Base
|
219
|
+
Grapheme_Extend
|
220
|
+
Grapheme_Link
|
221
|
+
ID_Continue
|
222
|
+
ID_Start
|
223
|
+
Lowercase
|
224
|
+
Math
|
225
|
+
Uppercase
|
226
|
+
XID_Continue
|
227
|
+
XID_Start
|
228
|
+
|
229
|
+
* PropList
|
230
|
+
ASCII_Hex_Digit
|
231
|
+
Bidi_Control
|
232
|
+
Dash
|
233
|
+
Deprecated
|
234
|
+
Diacritic
|
235
|
+
Extender
|
236
|
+
Hex_Digit
|
237
|
+
Hyphen
|
238
|
+
IDS_Binary_Operator
|
239
|
+
IDS_Trinary_Operator
|
240
|
+
Ideographic
|
241
|
+
Join_Control
|
242
|
+
Logical_Order_Exception
|
243
|
+
Noncharacter_Code_Point
|
244
|
+
Other_Alphabetic
|
245
|
+
Other_Default_Ignorable_Code_Point
|
246
|
+
Other_Grapheme_Extend
|
247
|
+
Other_ID_Continue
|
248
|
+
Other_ID_Start
|
249
|
+
Other_Lowercase
|
250
|
+
Other_Math
|
251
|
+
Other_Uppercase
|
252
|
+
Pattern_Syntax
|
253
|
+
Pattern_White_Space
|
254
|
+
Prepended_Concatenation_Mark
|
255
|
+
Quotation_Mark
|
256
|
+
Radical
|
257
|
+
Regional_Indicator
|
258
|
+
Sentence_Terminal
|
259
|
+
Soft_Dotted
|
260
|
+
Terminal_Punctuation
|
261
|
+
Unified_Ideograph
|
262
|
+
Variation_Selector
|
263
|
+
White_Space
|
264
|
+
|
265
|
+
* Emoji
|
266
|
+
Emoji
|
267
|
+
Emoji_Component
|
268
|
+
Emoji_Modifier
|
269
|
+
Emoji_Modifier_Base
|
270
|
+
Emoji_Presentation
|
271
|
+
|
272
|
+
* PropertyAliases
|
273
|
+
AHex
|
274
|
+
Bidi_C
|
275
|
+
CI
|
276
|
+
CWCF
|
277
|
+
CWCM
|
278
|
+
CWL
|
279
|
+
CWT
|
280
|
+
CWU
|
281
|
+
DI
|
282
|
+
Dep
|
283
|
+
Dia
|
284
|
+
Ext
|
285
|
+
Gr_Base
|
286
|
+
Gr_Ext
|
287
|
+
Gr_Link
|
288
|
+
Hex
|
289
|
+
IDC
|
290
|
+
IDS
|
291
|
+
IDSB
|
292
|
+
IDST
|
293
|
+
Ideo
|
294
|
+
Join_C
|
295
|
+
LOE
|
296
|
+
NChar
|
297
|
+
OAlpha
|
298
|
+
ODI
|
299
|
+
OGr_Ext
|
300
|
+
OIDC
|
301
|
+
OIDS
|
302
|
+
OLower
|
303
|
+
OMath
|
304
|
+
OUpper
|
305
|
+
PCM
|
306
|
+
Pat_Syn
|
307
|
+
Pat_WS
|
308
|
+
QMark
|
309
|
+
RI
|
310
|
+
SD
|
311
|
+
STerm
|
312
|
+
Term
|
313
|
+
UIdeo
|
314
|
+
VS
|
315
|
+
WSpace
|
316
|
+
XIDC
|
317
|
+
XIDS
|
318
|
+
|
319
|
+
* PropertyValueAliases (General_Category)
|
320
|
+
Other
|
321
|
+
Control
|
322
|
+
Format
|
323
|
+
Unassigned
|
324
|
+
Private_Use
|
325
|
+
Surrogate
|
326
|
+
Letter
|
327
|
+
Cased_Letter
|
328
|
+
Lowercase_Letter
|
329
|
+
Modifier_Letter
|
330
|
+
Other_Letter
|
331
|
+
Titlecase_Letter
|
332
|
+
Uppercase_Letter
|
333
|
+
Mark
|
334
|
+
Combining_Mark
|
335
|
+
Spacing_Mark
|
336
|
+
Enclosing_Mark
|
337
|
+
Nonspacing_Mark
|
338
|
+
Number
|
339
|
+
Decimal_Number
|
340
|
+
Letter_Number
|
341
|
+
Other_Number
|
342
|
+
Punctuation
|
343
|
+
Connector_Punctuation
|
344
|
+
Dash_Punctuation
|
345
|
+
Close_Punctuation
|
346
|
+
Final_Punctuation
|
347
|
+
Initial_Punctuation
|
348
|
+
Other_Punctuation
|
349
|
+
Open_Punctuation
|
350
|
+
Symbol
|
351
|
+
Currency_Symbol
|
352
|
+
Modifier_Symbol
|
353
|
+
Math_Symbol
|
354
|
+
Other_Symbol
|
355
|
+
Separator
|
356
|
+
Line_Separator
|
357
|
+
Paragraph_Separator
|
358
|
+
Space_Separator
|
359
|
+
|
360
|
+
* PropertyValueAliases (Script)
|
361
|
+
Adlm
|
362
|
+
Aghb
|
363
|
+
Arab
|
364
|
+
Armi
|
365
|
+
Armn
|
366
|
+
Avst
|
367
|
+
Bali
|
368
|
+
Bamu
|
369
|
+
Bass
|
370
|
+
Batk
|
371
|
+
Beng
|
372
|
+
Bhks
|
373
|
+
Bopo
|
374
|
+
Brah
|
375
|
+
Brai
|
376
|
+
Bugi
|
377
|
+
Buhd
|
378
|
+
Cakm
|
379
|
+
Cans
|
380
|
+
Cari
|
381
|
+
Cher
|
382
|
+
Copt
|
383
|
+
Qaac
|
384
|
+
Cprt
|
385
|
+
Cyrl
|
386
|
+
Deva
|
387
|
+
Dsrt
|
388
|
+
Dupl
|
389
|
+
Egyp
|
390
|
+
Elba
|
391
|
+
Ethi
|
392
|
+
Geor
|
393
|
+
Glag
|
394
|
+
Gonm
|
395
|
+
Goth
|
396
|
+
Gran
|
397
|
+
Grek
|
398
|
+
Gujr
|
399
|
+
Guru
|
400
|
+
Hang
|
401
|
+
Hani
|
402
|
+
Hano
|
403
|
+
Hatr
|
404
|
+
Hebr
|
405
|
+
Hira
|
406
|
+
Hluw
|
407
|
+
Hmng
|
408
|
+
Hung
|
409
|
+
Ital
|
410
|
+
Java
|
411
|
+
Kali
|
412
|
+
Kana
|
413
|
+
Khar
|
414
|
+
Khmr
|
415
|
+
Khoj
|
416
|
+
Knda
|
417
|
+
Kthi
|
418
|
+
Lana
|
419
|
+
Laoo
|
420
|
+
Latn
|
421
|
+
Lepc
|
422
|
+
Limb
|
423
|
+
Lina
|
424
|
+
Linb
|
425
|
+
Lyci
|
426
|
+
Lydi
|
427
|
+
Mahj
|
428
|
+
Mand
|
429
|
+
Mani
|
430
|
+
Marc
|
431
|
+
Mend
|
432
|
+
Merc
|
433
|
+
Mero
|
434
|
+
Mlym
|
435
|
+
Mong
|
436
|
+
Mroo
|
437
|
+
Mtei
|
438
|
+
Mult
|
439
|
+
Mymr
|
440
|
+
Narb
|
441
|
+
Nbat
|
442
|
+
Nkoo
|
443
|
+
Nshu
|
444
|
+
Ogam
|
445
|
+
Olck
|
446
|
+
Orkh
|
447
|
+
Orya
|
448
|
+
Osge
|
449
|
+
Osma
|
450
|
+
Palm
|
451
|
+
Pauc
|
452
|
+
Perm
|
453
|
+
Phag
|
454
|
+
Phli
|
455
|
+
Phlp
|
456
|
+
Phnx
|
457
|
+
Plrd
|
458
|
+
Prti
|
459
|
+
Rjng
|
460
|
+
Runr
|
461
|
+
Samr
|
462
|
+
Sarb
|
463
|
+
Saur
|
464
|
+
Sgnw
|
465
|
+
Shaw
|
466
|
+
Shrd
|
467
|
+
Sidd
|
468
|
+
Sind
|
469
|
+
Sinh
|
470
|
+
Sora
|
471
|
+
Soyo
|
472
|
+
Sund
|
473
|
+
Sylo
|
474
|
+
Syrc
|
475
|
+
Tagb
|
476
|
+
Takr
|
477
|
+
Tale
|
478
|
+
Talu
|
479
|
+
Taml
|
480
|
+
Tang
|
481
|
+
Tavt
|
482
|
+
Telu
|
483
|
+
Tfng
|
484
|
+
Tglg
|
485
|
+
Thaa
|
486
|
+
Tibt
|
487
|
+
Tirh
|
488
|
+
Ugar
|
489
|
+
Vaii
|
490
|
+
Wara
|
491
|
+
Xpeo
|
492
|
+
Xsux
|
493
|
+
Yiii
|
494
|
+
Zanb
|
495
|
+
Zinh
|
496
|
+
Qaai
|
497
|
+
Zyyy
|
498
|
+
Zzzz
|
499
|
+
|
500
|
+
* DerivedAges
|
501
|
+
Age=1.1
|
502
|
+
Age=10.0
|
503
|
+
Age=2.0
|
504
|
+
Age=2.1
|
505
|
+
Age=3.0
|
506
|
+
Age=3.1
|
507
|
+
Age=3.2
|
508
|
+
Age=4.0
|
509
|
+
Age=4.1
|
510
|
+
Age=5.0
|
511
|
+
Age=5.1
|
512
|
+
Age=5.2
|
513
|
+
Age=6.0
|
514
|
+
Age=6.1
|
515
|
+
Age=6.2
|
516
|
+
Age=6.3
|
517
|
+
Age=7.0
|
518
|
+
Age=8.0
|
519
|
+
Age=9.0
|
520
|
+
|
521
|
+
* Blocks
|
522
|
+
In_Basic_Latin
|
523
|
+
In_Latin_1_Supplement
|
524
|
+
In_Latin_Extended_A
|
525
|
+
In_Latin_Extended_B
|
526
|
+
In_IPA_Extensions
|
527
|
+
In_Spacing_Modifier_Letters
|
528
|
+
In_Combining_Diacritical_Marks
|
529
|
+
In_Greek_and_Coptic
|
530
|
+
In_Cyrillic
|
531
|
+
In_Cyrillic_Supplement
|
532
|
+
In_Armenian
|
533
|
+
In_Hebrew
|
534
|
+
In_Arabic
|
535
|
+
In_Syriac
|
536
|
+
In_Arabic_Supplement
|
537
|
+
In_Thaana
|
538
|
+
In_NKo
|
539
|
+
In_Samaritan
|
540
|
+
In_Mandaic
|
541
|
+
In_Syriac_Supplement
|
542
|
+
In_Arabic_Extended_A
|
543
|
+
In_Devanagari
|
544
|
+
In_Bengali
|
545
|
+
In_Gurmukhi
|
546
|
+
In_Gujarati
|
547
|
+
In_Oriya
|
548
|
+
In_Tamil
|
549
|
+
In_Telugu
|
550
|
+
In_Kannada
|
551
|
+
In_Malayalam
|
552
|
+
In_Sinhala
|
553
|
+
In_Thai
|
554
|
+
In_Lao
|
555
|
+
In_Tibetan
|
556
|
+
In_Myanmar
|
557
|
+
In_Georgian
|
558
|
+
In_Hangul_Jamo
|
559
|
+
In_Ethiopic
|
560
|
+
In_Ethiopic_Supplement
|
561
|
+
In_Cherokee
|
562
|
+
In_Unified_Canadian_Aboriginal_Syllabics
|
563
|
+
In_Ogham
|
564
|
+
In_Runic
|
565
|
+
In_Tagalog
|
566
|
+
In_Hanunoo
|
567
|
+
In_Buhid
|
568
|
+
In_Tagbanwa
|
569
|
+
In_Khmer
|
570
|
+
In_Mongolian
|
571
|
+
In_Unified_Canadian_Aboriginal_Syllabics_Extended
|
572
|
+
In_Limbu
|
573
|
+
In_Tai_Le
|
574
|
+
In_New_Tai_Lue
|
575
|
+
In_Khmer_Symbols
|
576
|
+
In_Buginese
|
577
|
+
In_Tai_Tham
|
578
|
+
In_Combining_Diacritical_Marks_Extended
|
579
|
+
In_Balinese
|
580
|
+
In_Sundanese
|
581
|
+
In_Batak
|
582
|
+
In_Lepcha
|
583
|
+
In_Ol_Chiki
|
584
|
+
In_Cyrillic_Extended_C
|
585
|
+
In_Sundanese_Supplement
|
586
|
+
In_Vedic_Extensions
|
587
|
+
In_Phonetic_Extensions
|
588
|
+
In_Phonetic_Extensions_Supplement
|
589
|
+
In_Combining_Diacritical_Marks_Supplement
|
590
|
+
In_Latin_Extended_Additional
|
591
|
+
In_Greek_Extended
|
592
|
+
In_General_Punctuation
|
593
|
+
In_Superscripts_and_Subscripts
|
594
|
+
In_Currency_Symbols
|
595
|
+
In_Combining_Diacritical_Marks_for_Symbols
|
596
|
+
In_Letterlike_Symbols
|
597
|
+
In_Number_Forms
|
598
|
+
In_Arrows
|
599
|
+
In_Mathematical_Operators
|
600
|
+
In_Miscellaneous_Technical
|
601
|
+
In_Control_Pictures
|
602
|
+
In_Optical_Character_Recognition
|
603
|
+
In_Enclosed_Alphanumerics
|
604
|
+
In_Box_Drawing
|
605
|
+
In_Block_Elements
|
606
|
+
In_Geometric_Shapes
|
607
|
+
In_Miscellaneous_Symbols
|
608
|
+
In_Dingbats
|
609
|
+
In_Miscellaneous_Mathematical_Symbols_A
|
610
|
+
In_Supplemental_Arrows_A
|
611
|
+
In_Braille_Patterns
|
612
|
+
In_Supplemental_Arrows_B
|
613
|
+
In_Miscellaneous_Mathematical_Symbols_B
|
614
|
+
In_Supplemental_Mathematical_Operators
|
615
|
+
In_Miscellaneous_Symbols_and_Arrows
|
616
|
+
In_Glagolitic
|
617
|
+
In_Latin_Extended_C
|
618
|
+
In_Coptic
|
619
|
+
In_Georgian_Supplement
|
620
|
+
In_Tifinagh
|
621
|
+
In_Ethiopic_Extended
|
622
|
+
In_Cyrillic_Extended_A
|
623
|
+
In_Supplemental_Punctuation
|
624
|
+
In_CJK_Radicals_Supplement
|
625
|
+
In_Kangxi_Radicals
|
626
|
+
In_Ideographic_Description_Characters
|
627
|
+
In_CJK_Symbols_and_Punctuation
|
628
|
+
In_Hiragana
|
629
|
+
In_Katakana
|
630
|
+
In_Bopomofo
|
631
|
+
In_Hangul_Compatibility_Jamo
|
632
|
+
In_Kanbun
|
633
|
+
In_Bopomofo_Extended
|
634
|
+
In_CJK_Strokes
|
635
|
+
In_Katakana_Phonetic_Extensions
|
636
|
+
In_Enclosed_CJK_Letters_and_Months
|
637
|
+
In_CJK_Compatibility
|
638
|
+
In_CJK_Unified_Ideographs_Extension_A
|
639
|
+
In_Yijing_Hexagram_Symbols
|
640
|
+
In_CJK_Unified_Ideographs
|
641
|
+
In_Yi_Syllables
|
642
|
+
In_Yi_Radicals
|
643
|
+
In_Lisu
|
644
|
+
In_Vai
|
645
|
+
In_Cyrillic_Extended_B
|
646
|
+
In_Bamum
|
647
|
+
In_Modifier_Tone_Letters
|
648
|
+
In_Latin_Extended_D
|
649
|
+
In_Syloti_Nagri
|
650
|
+
In_Common_Indic_Number_Forms
|
651
|
+
In_Phags_pa
|
652
|
+
In_Saurashtra
|
653
|
+
In_Devanagari_Extended
|
654
|
+
In_Kayah_Li
|
655
|
+
In_Rejang
|
656
|
+
In_Hangul_Jamo_Extended_A
|
657
|
+
In_Javanese
|
658
|
+
In_Myanmar_Extended_B
|
659
|
+
In_Cham
|
660
|
+
In_Myanmar_Extended_A
|
661
|
+
In_Tai_Viet
|
662
|
+
In_Meetei_Mayek_Extensions
|
663
|
+
In_Ethiopic_Extended_A
|
664
|
+
In_Latin_Extended_E
|
665
|
+
In_Cherokee_Supplement
|
666
|
+
In_Meetei_Mayek
|
667
|
+
In_Hangul_Syllables
|
668
|
+
In_Hangul_Jamo_Extended_B
|
669
|
+
In_High_Surrogates
|
670
|
+
In_High_Private_Use_Surrogates
|
671
|
+
In_Low_Surrogates
|
672
|
+
In_Private_Use_Area
|
673
|
+
In_CJK_Compatibility_Ideographs
|
674
|
+
In_Alphabetic_Presentation_Forms
|
675
|
+
In_Arabic_Presentation_Forms_A
|
676
|
+
In_Variation_Selectors
|
677
|
+
In_Vertical_Forms
|
678
|
+
In_Combining_Half_Marks
|
679
|
+
In_CJK_Compatibility_Forms
|
680
|
+
In_Small_Form_Variants
|
681
|
+
In_Arabic_Presentation_Forms_B
|
682
|
+
In_Halfwidth_and_Fullwidth_Forms
|
683
|
+
In_Specials
|
684
|
+
In_Linear_B_Syllabary
|
685
|
+
In_Linear_B_Ideograms
|
686
|
+
In_Aegean_Numbers
|
687
|
+
In_Ancient_Greek_Numbers
|
688
|
+
In_Ancient_Symbols
|
689
|
+
In_Phaistos_Disc
|
690
|
+
In_Lycian
|
691
|
+
In_Carian
|
692
|
+
In_Coptic_Epact_Numbers
|
693
|
+
In_Old_Italic
|
694
|
+
In_Gothic
|
695
|
+
In_Old_Permic
|
696
|
+
In_Ugaritic
|
697
|
+
In_Old_Persian
|
698
|
+
In_Deseret
|
699
|
+
In_Shavian
|
700
|
+
In_Osmanya
|
701
|
+
In_Osage
|
702
|
+
In_Elbasan
|
703
|
+
In_Caucasian_Albanian
|
704
|
+
In_Linear_A
|
705
|
+
In_Cypriot_Syllabary
|
706
|
+
In_Imperial_Aramaic
|
707
|
+
In_Palmyrene
|
708
|
+
In_Nabataean
|
709
|
+
In_Hatran
|
710
|
+
In_Phoenician
|
711
|
+
In_Lydian
|
712
|
+
In_Meroitic_Hieroglyphs
|
713
|
+
In_Meroitic_Cursive
|
714
|
+
In_Kharoshthi
|
715
|
+
In_Old_South_Arabian
|
716
|
+
In_Old_North_Arabian
|
717
|
+
In_Manichaean
|
718
|
+
In_Avestan
|
719
|
+
In_Inscriptional_Parthian
|
720
|
+
In_Inscriptional_Pahlavi
|
721
|
+
In_Psalter_Pahlavi
|
722
|
+
In_Old_Turkic
|
723
|
+
In_Old_Hungarian
|
724
|
+
In_Rumi_Numeral_Symbols
|
725
|
+
In_Brahmi
|
726
|
+
In_Kaithi
|
727
|
+
In_Sora_Sompeng
|
728
|
+
In_Chakma
|
729
|
+
In_Mahajani
|
730
|
+
In_Sharada
|
731
|
+
In_Sinhala_Archaic_Numbers
|
732
|
+
In_Khojki
|
733
|
+
In_Multani
|
734
|
+
In_Khudawadi
|
735
|
+
In_Grantha
|
736
|
+
In_Newa
|
737
|
+
In_Tirhuta
|
738
|
+
In_Siddham
|
739
|
+
In_Modi
|
740
|
+
In_Mongolian_Supplement
|
741
|
+
In_Takri
|
742
|
+
In_Ahom
|
743
|
+
In_Warang_Citi
|
744
|
+
In_Zanabazar_Square
|
745
|
+
In_Soyombo
|
746
|
+
In_Pau_Cin_Hau
|
747
|
+
In_Bhaiksuki
|
748
|
+
In_Marchen
|
749
|
+
In_Masaram_Gondi
|
750
|
+
In_Cuneiform
|
751
|
+
In_Cuneiform_Numbers_and_Punctuation
|
752
|
+
In_Early_Dynastic_Cuneiform
|
753
|
+
In_Egyptian_Hieroglyphs
|
754
|
+
In_Anatolian_Hieroglyphs
|
755
|
+
In_Bamum_Supplement
|
756
|
+
In_Mro
|
757
|
+
In_Bassa_Vah
|
758
|
+
In_Pahawh_Hmong
|
759
|
+
In_Miao
|
760
|
+
In_Ideographic_Symbols_and_Punctuation
|
761
|
+
In_Tangut
|
762
|
+
In_Tangut_Components
|
763
|
+
In_Kana_Supplement
|
764
|
+
In_Kana_Extended_A
|
765
|
+
In_Nushu
|
766
|
+
In_Duployan
|
767
|
+
In_Shorthand_Format_Controls
|
768
|
+
In_Byzantine_Musical_Symbols
|
769
|
+
In_Musical_Symbols
|
770
|
+
In_Ancient_Greek_Musical_Notation
|
771
|
+
In_Tai_Xuan_Jing_Symbols
|
772
|
+
In_Counting_Rod_Numerals
|
773
|
+
In_Mathematical_Alphanumeric_Symbols
|
774
|
+
In_Sutton_SignWriting
|
775
|
+
In_Glagolitic_Supplement
|
776
|
+
In_Mende_Kikakui
|
777
|
+
In_Adlam
|
778
|
+
In_Arabic_Mathematical_Alphabetic_Symbols
|
779
|
+
In_Mahjong_Tiles
|
780
|
+
In_Domino_Tiles
|
781
|
+
In_Playing_Cards
|
782
|
+
In_Enclosed_Alphanumeric_Supplement
|
783
|
+
In_Enclosed_Ideographic_Supplement
|
784
|
+
In_Miscellaneous_Symbols_and_Pictographs
|
785
|
+
In_Emoticons
|
786
|
+
In_Ornamental_Dingbats
|
787
|
+
In_Transport_and_Map_Symbols
|
788
|
+
In_Alchemical_Symbols
|
789
|
+
In_Geometric_Shapes_Extended
|
790
|
+
In_Supplemental_Arrows_C
|
791
|
+
In_Supplemental_Symbols_and_Pictographs
|
792
|
+
In_CJK_Unified_Ideographs_Extension_B
|
793
|
+
In_CJK_Unified_Ideographs_Extension_C
|
794
|
+
In_CJK_Unified_Ideographs_Extension_D
|
795
|
+
In_CJK_Unified_Ideographs_Extension_E
|
796
|
+
In_CJK_Unified_Ideographs_Extension_F
|
797
|
+
In_CJK_Compatibility_Ideographs_Supplement
|
798
|
+
In_Tags
|
799
|
+
In_Variation_Selectors_Supplement
|
800
|
+
In_Supplementary_Private_Use_Area_A
|
801
|
+
In_Supplementary_Private_Use_Area_B
|
802
|
+
In_No_Block
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require "regexp_property_values/version"
|
2
|
+
|
3
|
+
module RegexpPropertyValues
|
4
|
+
module_function
|
5
|
+
|
6
|
+
LIST_URL = 'https://raw.githubusercontent.com/k-takata/Onigmo/master/doc/UnicodeProps.txt'
|
7
|
+
|
8
|
+
def update
|
9
|
+
puts "Downloading #{LIST_URL}"
|
10
|
+
require 'open-uri'
|
11
|
+
File.open(file_path, 'w') { |f| IO.copy_stream(open(LIST_URL), f) }
|
12
|
+
puts 'Done!'
|
13
|
+
end
|
14
|
+
|
15
|
+
def file_path
|
16
|
+
File.expand_path('../UnicodeProps.txt', __FILE__)
|
17
|
+
end
|
18
|
+
|
19
|
+
def all
|
20
|
+
by_category.values.flatten
|
21
|
+
end
|
22
|
+
|
23
|
+
def by_category
|
24
|
+
result = File.foreach(file_path).inject({}) do |hash, line|
|
25
|
+
if /^\* (?<category>\S.+)/ =~ line
|
26
|
+
@current_category = category
|
27
|
+
hash[@current_category] ||= []
|
28
|
+
elsif /^ {4}(?<property>\S.*)/ =~ line
|
29
|
+
# only include props that are supported by the host ruby version
|
30
|
+
begin /\p{#{property}}/u; rescue RegexpError, SyntaxError; next hash end
|
31
|
+
hash[@current_category] << property
|
32
|
+
end
|
33
|
+
hash
|
34
|
+
end
|
35
|
+
add_oniguruma_properties(result)
|
36
|
+
result
|
37
|
+
end
|
38
|
+
|
39
|
+
def alias_hash
|
40
|
+
short_names, long_names = short_and_long_names
|
41
|
+
return {} if short_names.empty?
|
42
|
+
|
43
|
+
by_matched_characters.each_value.inject({}) do |hash, props|
|
44
|
+
next hash if props.count < 2
|
45
|
+
long_name = (props & long_names)[0] || fail("no long name for #{props}")
|
46
|
+
(props & short_names).each { |short_name| hash[short_name] = long_name }
|
47
|
+
hash
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def short_and_long_names
|
52
|
+
short_name_categories = ['Major and General Categories',
|
53
|
+
'PropertyAliases',
|
54
|
+
'PropertyValueAliases (Script)']
|
55
|
+
by_category.inject([[], []]) do |(short, long), (cat_name, props)|
|
56
|
+
(short_name_categories.include?(cat_name) ? short : long).concat(props)
|
57
|
+
[short, long]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def by_matched_characters
|
62
|
+
puts 'Establishing property characters, this may take a bit ...'
|
63
|
+
all.group_by { |prop| matched_characters(prop) }
|
64
|
+
end
|
65
|
+
|
66
|
+
def matched_characters(prop)
|
67
|
+
@characters ||= ((0..55_295).to_a + (57_344..1_114_111).to_a)
|
68
|
+
.map { |cp_number| [cp_number].pack('U') }
|
69
|
+
prop_regex = /\p{#{prop}}/u
|
70
|
+
@characters.select { |char| prop_regex.match(char) }
|
71
|
+
end
|
72
|
+
|
73
|
+
def add_oniguruma_properties(props_by_category)
|
74
|
+
return if Gem::Version.new(RUBY_VERSION.dup) >= Gem::Version.new('2.0.0')
|
75
|
+
props_by_category['Special'] << 'Newline'
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
lib = File.expand_path("../lib", __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require 'regexp_property_values/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'regexp_property_values'
|
7
|
+
s.version = RegexpPropertyValues::VERSION
|
8
|
+
s.authors = ['Janosch Müller']
|
9
|
+
s.email = ['janosch84@gmail.com']
|
10
|
+
|
11
|
+
s.summary = "Lists property values supported by Ruby's regex engine"
|
12
|
+
s.description = 'This microlibrary lets you see which property values are '\
|
13
|
+
'supported by the regular expression engine of the Ruby '\
|
14
|
+
'version you are running. That is, it determines all '\
|
15
|
+
'supported values for `\p{value}` expressions.'
|
16
|
+
s.homepage = 'https://github.com/janosch-x/regexp_property_values'
|
17
|
+
s.license = 'MIT'
|
18
|
+
|
19
|
+
s.files = `git ls-files -z`.split("\x0").reject do |f|
|
20
|
+
f.match(%r{^(test|spec|features)/})
|
21
|
+
end
|
22
|
+
s.require_paths = ['lib']
|
23
|
+
|
24
|
+
s.add_development_dependency 'bundler', '~> 1.16'
|
25
|
+
s.add_development_dependency 'rake', '~> 10.0'
|
26
|
+
s.add_development_dependency 'rspec', '~> 3.0'
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: regexp_property_values
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Janosch Müller
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2018-05-11 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.16'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.16'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '10.0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '10.0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rspec
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '3.0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
description: This microlibrary lets you see which property values are supported by
|
63
|
+
the regular expression engine of the Ruby version you are running. That is, it determines
|
64
|
+
all supported values for `\p{value}` expressions.
|
65
|
+
email:
|
66
|
+
- janosch84@gmail.com
|
67
|
+
executables: []
|
68
|
+
extensions: []
|
69
|
+
extra_rdoc_files: []
|
70
|
+
files:
|
71
|
+
- .gitignore
|
72
|
+
- .rspec
|
73
|
+
- .travis.yml
|
74
|
+
- Gemfile
|
75
|
+
- Gemfile.lock
|
76
|
+
- LICENSE.txt
|
77
|
+
- README.md
|
78
|
+
- Rakefile
|
79
|
+
- bin/console
|
80
|
+
- bin/setup
|
81
|
+
- lib/UnicodeProps.txt
|
82
|
+
- lib/regexp_property_values.rb
|
83
|
+
- lib/regexp_property_values/version.rb
|
84
|
+
- regexp_property_values.gemspec
|
85
|
+
homepage: https://github.com/janosch-x/regexp_property_values
|
86
|
+
licenses:
|
87
|
+
- MIT
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options: []
|
90
|
+
require_paths:
|
91
|
+
- lib
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
93
|
+
none: false
|
94
|
+
requirements:
|
95
|
+
- - ! '>='
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ! '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
requirements: []
|
105
|
+
rubyforge_project:
|
106
|
+
rubygems_version: 1.8.23.2
|
107
|
+
signing_key:
|
108
|
+
specification_version: 3
|
109
|
+
summary: Lists property values supported by Ruby's regex engine
|
110
|
+
test_files: []
|