regexp_property_values 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,29 @@
1
+ *.gem
2
+ *.iml
3
+ *.stTheme.cache
4
+ *.sublime-project
5
+ *.sublime-workspace
6
+ *.swp
7
+ *.tmlanguage.cache
8
+ *.tmPreferences.cache
9
+ *~
10
+ .byebug_history
11
+ .DS_Store
12
+ .idea/
13
+ .ruby-gemset
14
+ .ruby-version
15
+ .tags
16
+ .tags1
17
+ bbin/
18
+ binstubs/*
19
+ bundler_stubs/*/.yardoc
20
+ /.bundle/
21
+ /_yardoc/
22
+ /coverage/
23
+ /doc/
24
+ /pkg/
25
+ /spec/reports/
26
+ /tmp/
27
+
28
+ # rspec failure tracking
29
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 1.9.3
5
+ - 2.0.0
6
+ - 2.2.0
7
+ - 2.5.0
8
+ - 2.6.0
9
+ before_install: gem install bundler -v 1.16.1
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in regexp_property_values.gemspec
6
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,35 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ regexp_property_values (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.3)
10
+ rake (10.5.0)
11
+ rspec (3.7.0)
12
+ rspec-core (~> 3.7.0)
13
+ rspec-expectations (~> 3.7.0)
14
+ rspec-mocks (~> 3.7.0)
15
+ rspec-core (3.7.1)
16
+ rspec-support (~> 3.7.0)
17
+ rspec-expectations (3.7.0)
18
+ diff-lcs (>= 1.2.0, < 2.0)
19
+ rspec-support (~> 3.7.0)
20
+ rspec-mocks (3.7.0)
21
+ diff-lcs (>= 1.2.0, < 2.0)
22
+ rspec-support (~> 3.7.0)
23
+ rspec-support (3.7.1)
24
+
25
+ PLATFORMS
26
+ ruby
27
+
28
+ DEPENDENCIES
29
+ bundler (~> 1.16)
30
+ rake (~> 10.0)
31
+ regexp_property_values!
32
+ rspec (~> 3.0)
33
+
34
+ BUNDLED WITH
35
+ 1.16.1
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Jannosch Müller
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,30 @@
1
+ # RegexpPropertyValues
2
+
3
+ This microlibrary lets you see which property values are supported by the regular expression engine of the Ruby version you are running.
4
+
5
+ That is, it determines all supported values for `\p{value}` expressions.
6
+
7
+ ## Usage
8
+
9
+ ```ruby
10
+ require 'regexp_property_values'
11
+
12
+ PV = RegexpPropertyValues
13
+
14
+ PV.all # => ["Alpha", "Blank", "Cntrl", ...]
15
+ PV.all.sort # => ["AHex", "ASCII", "Adlam", "Adlm", "Age=1.1", ...]
16
+
17
+ PV.by_category # => {"POSIX brackets" => ["Alpha", "Blank", ...], ...}
18
+ PV.by_category.keys # => ["POSIX brackets", "Special", "Scripts", ...]
19
+
20
+ PV.short_and_long_names # => [["M", "Grek", ...], ["Mark", "Greek", ...]]
21
+
22
+ # this one takes a second
23
+ PV.matched_characters('AHex') # => %w[0 1 2 3 4 5 6 7 8 9 A B C ...]
24
+
25
+ # this one takes a minute
26
+ PV.alias_hash # => {"M" => "Mark", "Grek" => "Greek", ...}
27
+
28
+ # download the latest list of possible properties
29
+ PV.update
30
+ ```
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "regexp_property_values"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ PV = RegexpPropertyValues
14
+
15
+ require "irb"
16
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,802 @@
1
+ Onigmo (Oniguruma-mod) Unicode Properties Version 6.2.0 2017/07/17
2
+
3
+ * POSIX brackets
4
+ Alpha
5
+ Blank
6
+ Cntrl
7
+ Digit
8
+ Graph
9
+ Lower
10
+ Print
11
+ Punct
12
+ Space
13
+ Upper
14
+ XDigit
15
+ Word
16
+ Alnum
17
+ ASCII
18
+ XPosixPunct
19
+
20
+ * Special
21
+ Any
22
+ Assigned
23
+
24
+ * Major and General Categories
25
+ C
26
+ Cc
27
+ Cf
28
+ Cn
29
+ Co
30
+ Cs
31
+ L
32
+ LC
33
+ Ll
34
+ Lm
35
+ Lo
36
+ Lt
37
+ Lu
38
+ M
39
+ Mc
40
+ Me
41
+ Mn
42
+ N
43
+ Nd
44
+ Nl
45
+ No
46
+ P
47
+ Pc
48
+ Pd
49
+ Pe
50
+ Pf
51
+ Pi
52
+ Po
53
+ Ps
54
+ S
55
+ Sc
56
+ Sk
57
+ Sm
58
+ So
59
+ Z
60
+ Zl
61
+ Zp
62
+ Zs
63
+
64
+ * Scripts
65
+ Adlam
66
+ Ahom
67
+ Anatolian_Hieroglyphs
68
+ Arabic
69
+ Armenian
70
+ Avestan
71
+ Balinese
72
+ Bamum
73
+ Bassa_Vah
74
+ Batak
75
+ Bengali
76
+ Bhaiksuki
77
+ Bopomofo
78
+ Brahmi
79
+ Braille
80
+ Buginese
81
+ Buhid
82
+ Canadian_Aboriginal
83
+ Carian
84
+ Caucasian_Albanian
85
+ Chakma
86
+ Cham
87
+ Cherokee
88
+ Common
89
+ Coptic
90
+ Cuneiform
91
+ Cypriot
92
+ Cyrillic
93
+ Deseret
94
+ Devanagari
95
+ Duployan
96
+ Egyptian_Hieroglyphs
97
+ Elbasan
98
+ Ethiopic
99
+ Georgian
100
+ Glagolitic
101
+ Gothic
102
+ Grantha
103
+ Greek
104
+ Gujarati
105
+ Gurmukhi
106
+ Han
107
+ Hangul
108
+ Hanunoo
109
+ Hatran
110
+ Hebrew
111
+ Hiragana
112
+ Imperial_Aramaic
113
+ Inherited
114
+ Inscriptional_Pahlavi
115
+ Inscriptional_Parthian
116
+ Javanese
117
+ Kaithi
118
+ Kannada
119
+ Katakana
120
+ Kayah_Li
121
+ Kharoshthi
122
+ Khmer
123
+ Khojki
124
+ Khudawadi
125
+ Lao
126
+ Latin
127
+ Lepcha
128
+ Limbu
129
+ Linear_A
130
+ Linear_B
131
+ Lisu
132
+ Lycian
133
+ Lydian
134
+ Mahajani
135
+ Malayalam
136
+ Mandaic
137
+ Manichaean
138
+ Marchen
139
+ Masaram_Gondi
140
+ Meetei_Mayek
141
+ Mende_Kikakui
142
+ Meroitic_Cursive
143
+ Meroitic_Hieroglyphs
144
+ Miao
145
+ Modi
146
+ Mongolian
147
+ Mro
148
+ Multani
149
+ Myanmar
150
+ Nabataean
151
+ New_Tai_Lue
152
+ Newa
153
+ Nko
154
+ Nushu
155
+ Ogham
156
+ Ol_Chiki
157
+ Old_Hungarian
158
+ Old_Italic
159
+ Old_North_Arabian
160
+ Old_Permic
161
+ Old_Persian
162
+ Old_South_Arabian
163
+ Old_Turkic
164
+ Oriya
165
+ Osage
166
+ Osmanya
167
+ Pahawh_Hmong
168
+ Palmyrene
169
+ Pau_Cin_Hau
170
+ Phags_Pa
171
+ Phoenician
172
+ Psalter_Pahlavi
173
+ Rejang
174
+ Runic
175
+ Samaritan
176
+ Saurashtra
177
+ Sharada
178
+ Shavian
179
+ Siddham
180
+ SignWriting
181
+ Sinhala
182
+ Sora_Sompeng
183
+ Soyombo
184
+ Sundanese
185
+ Syloti_Nagri
186
+ Syriac
187
+ Tagalog
188
+ Tagbanwa
189
+ Tai_Le
190
+ Tai_Tham
191
+ Tai_Viet
192
+ Takri
193
+ Tamil
194
+ Tangut
195
+ Telugu
196
+ Thaana
197
+ Thai
198
+ Tibetan
199
+ Tifinagh
200
+ Tirhuta
201
+ Ugaritic
202
+ Unknown
203
+ Vai
204
+ Warang_Citi
205
+ Yi
206
+ Zanabazar_Square
207
+
208
+ * DerivedCoreProperties
209
+ Alphabetic
210
+ Case_Ignorable
211
+ Cased
212
+ Changes_When_Casefolded
213
+ Changes_When_Casemapped
214
+ Changes_When_Lowercased
215
+ Changes_When_Titlecased
216
+ Changes_When_Uppercased
217
+ Default_Ignorable_Code_Point
218
+ Grapheme_Base
219
+ Grapheme_Extend
220
+ Grapheme_Link
221
+ ID_Continue
222
+ ID_Start
223
+ Lowercase
224
+ Math
225
+ Uppercase
226
+ XID_Continue
227
+ XID_Start
228
+
229
+ * PropList
230
+ ASCII_Hex_Digit
231
+ Bidi_Control
232
+ Dash
233
+ Deprecated
234
+ Diacritic
235
+ Extender
236
+ Hex_Digit
237
+ Hyphen
238
+ IDS_Binary_Operator
239
+ IDS_Trinary_Operator
240
+ Ideographic
241
+ Join_Control
242
+ Logical_Order_Exception
243
+ Noncharacter_Code_Point
244
+ Other_Alphabetic
245
+ Other_Default_Ignorable_Code_Point
246
+ Other_Grapheme_Extend
247
+ Other_ID_Continue
248
+ Other_ID_Start
249
+ Other_Lowercase
250
+ Other_Math
251
+ Other_Uppercase
252
+ Pattern_Syntax
253
+ Pattern_White_Space
254
+ Prepended_Concatenation_Mark
255
+ Quotation_Mark
256
+ Radical
257
+ Regional_Indicator
258
+ Sentence_Terminal
259
+ Soft_Dotted
260
+ Terminal_Punctuation
261
+ Unified_Ideograph
262
+ Variation_Selector
263
+ White_Space
264
+
265
+ * Emoji
266
+ Emoji
267
+ Emoji_Component
268
+ Emoji_Modifier
269
+ Emoji_Modifier_Base
270
+ Emoji_Presentation
271
+
272
+ * PropertyAliases
273
+ AHex
274
+ Bidi_C
275
+ CI
276
+ CWCF
277
+ CWCM
278
+ CWL
279
+ CWT
280
+ CWU
281
+ DI
282
+ Dep
283
+ Dia
284
+ Ext
285
+ Gr_Base
286
+ Gr_Ext
287
+ Gr_Link
288
+ Hex
289
+ IDC
290
+ IDS
291
+ IDSB
292
+ IDST
293
+ Ideo
294
+ Join_C
295
+ LOE
296
+ NChar
297
+ OAlpha
298
+ ODI
299
+ OGr_Ext
300
+ OIDC
301
+ OIDS
302
+ OLower
303
+ OMath
304
+ OUpper
305
+ PCM
306
+ Pat_Syn
307
+ Pat_WS
308
+ QMark
309
+ RI
310
+ SD
311
+ STerm
312
+ Term
313
+ UIdeo
314
+ VS
315
+ WSpace
316
+ XIDC
317
+ XIDS
318
+
319
+ * PropertyValueAliases (General_Category)
320
+ Other
321
+ Control
322
+ Format
323
+ Unassigned
324
+ Private_Use
325
+ Surrogate
326
+ Letter
327
+ Cased_Letter
328
+ Lowercase_Letter
329
+ Modifier_Letter
330
+ Other_Letter
331
+ Titlecase_Letter
332
+ Uppercase_Letter
333
+ Mark
334
+ Combining_Mark
335
+ Spacing_Mark
336
+ Enclosing_Mark
337
+ Nonspacing_Mark
338
+ Number
339
+ Decimal_Number
340
+ Letter_Number
341
+ Other_Number
342
+ Punctuation
343
+ Connector_Punctuation
344
+ Dash_Punctuation
345
+ Close_Punctuation
346
+ Final_Punctuation
347
+ Initial_Punctuation
348
+ Other_Punctuation
349
+ Open_Punctuation
350
+ Symbol
351
+ Currency_Symbol
352
+ Modifier_Symbol
353
+ Math_Symbol
354
+ Other_Symbol
355
+ Separator
356
+ Line_Separator
357
+ Paragraph_Separator
358
+ Space_Separator
359
+
360
+ * PropertyValueAliases (Script)
361
+ Adlm
362
+ Aghb
363
+ Arab
364
+ Armi
365
+ Armn
366
+ Avst
367
+ Bali
368
+ Bamu
369
+ Bass
370
+ Batk
371
+ Beng
372
+ Bhks
373
+ Bopo
374
+ Brah
375
+ Brai
376
+ Bugi
377
+ Buhd
378
+ Cakm
379
+ Cans
380
+ Cari
381
+ Cher
382
+ Copt
383
+ Qaac
384
+ Cprt
385
+ Cyrl
386
+ Deva
387
+ Dsrt
388
+ Dupl
389
+ Egyp
390
+ Elba
391
+ Ethi
392
+ Geor
393
+ Glag
394
+ Gonm
395
+ Goth
396
+ Gran
397
+ Grek
398
+ Gujr
399
+ Guru
400
+ Hang
401
+ Hani
402
+ Hano
403
+ Hatr
404
+ Hebr
405
+ Hira
406
+ Hluw
407
+ Hmng
408
+ Hung
409
+ Ital
410
+ Java
411
+ Kali
412
+ Kana
413
+ Khar
414
+ Khmr
415
+ Khoj
416
+ Knda
417
+ Kthi
418
+ Lana
419
+ Laoo
420
+ Latn
421
+ Lepc
422
+ Limb
423
+ Lina
424
+ Linb
425
+ Lyci
426
+ Lydi
427
+ Mahj
428
+ Mand
429
+ Mani
430
+ Marc
431
+ Mend
432
+ Merc
433
+ Mero
434
+ Mlym
435
+ Mong
436
+ Mroo
437
+ Mtei
438
+ Mult
439
+ Mymr
440
+ Narb
441
+ Nbat
442
+ Nkoo
443
+ Nshu
444
+ Ogam
445
+ Olck
446
+ Orkh
447
+ Orya
448
+ Osge
449
+ Osma
450
+ Palm
451
+ Pauc
452
+ Perm
453
+ Phag
454
+ Phli
455
+ Phlp
456
+ Phnx
457
+ Plrd
458
+ Prti
459
+ Rjng
460
+ Runr
461
+ Samr
462
+ Sarb
463
+ Saur
464
+ Sgnw
465
+ Shaw
466
+ Shrd
467
+ Sidd
468
+ Sind
469
+ Sinh
470
+ Sora
471
+ Soyo
472
+ Sund
473
+ Sylo
474
+ Syrc
475
+ Tagb
476
+ Takr
477
+ Tale
478
+ Talu
479
+ Taml
480
+ Tang
481
+ Tavt
482
+ Telu
483
+ Tfng
484
+ Tglg
485
+ Thaa
486
+ Tibt
487
+ Tirh
488
+ Ugar
489
+ Vaii
490
+ Wara
491
+ Xpeo
492
+ Xsux
493
+ Yiii
494
+ Zanb
495
+ Zinh
496
+ Qaai
497
+ Zyyy
498
+ Zzzz
499
+
500
+ * DerivedAges
501
+ Age=1.1
502
+ Age=10.0
503
+ Age=2.0
504
+ Age=2.1
505
+ Age=3.0
506
+ Age=3.1
507
+ Age=3.2
508
+ Age=4.0
509
+ Age=4.1
510
+ Age=5.0
511
+ Age=5.1
512
+ Age=5.2
513
+ Age=6.0
514
+ Age=6.1
515
+ Age=6.2
516
+ Age=6.3
517
+ Age=7.0
518
+ Age=8.0
519
+ Age=9.0
520
+
521
+ * Blocks
522
+ In_Basic_Latin
523
+ In_Latin_1_Supplement
524
+ In_Latin_Extended_A
525
+ In_Latin_Extended_B
526
+ In_IPA_Extensions
527
+ In_Spacing_Modifier_Letters
528
+ In_Combining_Diacritical_Marks
529
+ In_Greek_and_Coptic
530
+ In_Cyrillic
531
+ In_Cyrillic_Supplement
532
+ In_Armenian
533
+ In_Hebrew
534
+ In_Arabic
535
+ In_Syriac
536
+ In_Arabic_Supplement
537
+ In_Thaana
538
+ In_NKo
539
+ In_Samaritan
540
+ In_Mandaic
541
+ In_Syriac_Supplement
542
+ In_Arabic_Extended_A
543
+ In_Devanagari
544
+ In_Bengali
545
+ In_Gurmukhi
546
+ In_Gujarati
547
+ In_Oriya
548
+ In_Tamil
549
+ In_Telugu
550
+ In_Kannada
551
+ In_Malayalam
552
+ In_Sinhala
553
+ In_Thai
554
+ In_Lao
555
+ In_Tibetan
556
+ In_Myanmar
557
+ In_Georgian
558
+ In_Hangul_Jamo
559
+ In_Ethiopic
560
+ In_Ethiopic_Supplement
561
+ In_Cherokee
562
+ In_Unified_Canadian_Aboriginal_Syllabics
563
+ In_Ogham
564
+ In_Runic
565
+ In_Tagalog
566
+ In_Hanunoo
567
+ In_Buhid
568
+ In_Tagbanwa
569
+ In_Khmer
570
+ In_Mongolian
571
+ In_Unified_Canadian_Aboriginal_Syllabics_Extended
572
+ In_Limbu
573
+ In_Tai_Le
574
+ In_New_Tai_Lue
575
+ In_Khmer_Symbols
576
+ In_Buginese
577
+ In_Tai_Tham
578
+ In_Combining_Diacritical_Marks_Extended
579
+ In_Balinese
580
+ In_Sundanese
581
+ In_Batak
582
+ In_Lepcha
583
+ In_Ol_Chiki
584
+ In_Cyrillic_Extended_C
585
+ In_Sundanese_Supplement
586
+ In_Vedic_Extensions
587
+ In_Phonetic_Extensions
588
+ In_Phonetic_Extensions_Supplement
589
+ In_Combining_Diacritical_Marks_Supplement
590
+ In_Latin_Extended_Additional
591
+ In_Greek_Extended
592
+ In_General_Punctuation
593
+ In_Superscripts_and_Subscripts
594
+ In_Currency_Symbols
595
+ In_Combining_Diacritical_Marks_for_Symbols
596
+ In_Letterlike_Symbols
597
+ In_Number_Forms
598
+ In_Arrows
599
+ In_Mathematical_Operators
600
+ In_Miscellaneous_Technical
601
+ In_Control_Pictures
602
+ In_Optical_Character_Recognition
603
+ In_Enclosed_Alphanumerics
604
+ In_Box_Drawing
605
+ In_Block_Elements
606
+ In_Geometric_Shapes
607
+ In_Miscellaneous_Symbols
608
+ In_Dingbats
609
+ In_Miscellaneous_Mathematical_Symbols_A
610
+ In_Supplemental_Arrows_A
611
+ In_Braille_Patterns
612
+ In_Supplemental_Arrows_B
613
+ In_Miscellaneous_Mathematical_Symbols_B
614
+ In_Supplemental_Mathematical_Operators
615
+ In_Miscellaneous_Symbols_and_Arrows
616
+ In_Glagolitic
617
+ In_Latin_Extended_C
618
+ In_Coptic
619
+ In_Georgian_Supplement
620
+ In_Tifinagh
621
+ In_Ethiopic_Extended
622
+ In_Cyrillic_Extended_A
623
+ In_Supplemental_Punctuation
624
+ In_CJK_Radicals_Supplement
625
+ In_Kangxi_Radicals
626
+ In_Ideographic_Description_Characters
627
+ In_CJK_Symbols_and_Punctuation
628
+ In_Hiragana
629
+ In_Katakana
630
+ In_Bopomofo
631
+ In_Hangul_Compatibility_Jamo
632
+ In_Kanbun
633
+ In_Bopomofo_Extended
634
+ In_CJK_Strokes
635
+ In_Katakana_Phonetic_Extensions
636
+ In_Enclosed_CJK_Letters_and_Months
637
+ In_CJK_Compatibility
638
+ In_CJK_Unified_Ideographs_Extension_A
639
+ In_Yijing_Hexagram_Symbols
640
+ In_CJK_Unified_Ideographs
641
+ In_Yi_Syllables
642
+ In_Yi_Radicals
643
+ In_Lisu
644
+ In_Vai
645
+ In_Cyrillic_Extended_B
646
+ In_Bamum
647
+ In_Modifier_Tone_Letters
648
+ In_Latin_Extended_D
649
+ In_Syloti_Nagri
650
+ In_Common_Indic_Number_Forms
651
+ In_Phags_pa
652
+ In_Saurashtra
653
+ In_Devanagari_Extended
654
+ In_Kayah_Li
655
+ In_Rejang
656
+ In_Hangul_Jamo_Extended_A
657
+ In_Javanese
658
+ In_Myanmar_Extended_B
659
+ In_Cham
660
+ In_Myanmar_Extended_A
661
+ In_Tai_Viet
662
+ In_Meetei_Mayek_Extensions
663
+ In_Ethiopic_Extended_A
664
+ In_Latin_Extended_E
665
+ In_Cherokee_Supplement
666
+ In_Meetei_Mayek
667
+ In_Hangul_Syllables
668
+ In_Hangul_Jamo_Extended_B
669
+ In_High_Surrogates
670
+ In_High_Private_Use_Surrogates
671
+ In_Low_Surrogates
672
+ In_Private_Use_Area
673
+ In_CJK_Compatibility_Ideographs
674
+ In_Alphabetic_Presentation_Forms
675
+ In_Arabic_Presentation_Forms_A
676
+ In_Variation_Selectors
677
+ In_Vertical_Forms
678
+ In_Combining_Half_Marks
679
+ In_CJK_Compatibility_Forms
680
+ In_Small_Form_Variants
681
+ In_Arabic_Presentation_Forms_B
682
+ In_Halfwidth_and_Fullwidth_Forms
683
+ In_Specials
684
+ In_Linear_B_Syllabary
685
+ In_Linear_B_Ideograms
686
+ In_Aegean_Numbers
687
+ In_Ancient_Greek_Numbers
688
+ In_Ancient_Symbols
689
+ In_Phaistos_Disc
690
+ In_Lycian
691
+ In_Carian
692
+ In_Coptic_Epact_Numbers
693
+ In_Old_Italic
694
+ In_Gothic
695
+ In_Old_Permic
696
+ In_Ugaritic
697
+ In_Old_Persian
698
+ In_Deseret
699
+ In_Shavian
700
+ In_Osmanya
701
+ In_Osage
702
+ In_Elbasan
703
+ In_Caucasian_Albanian
704
+ In_Linear_A
705
+ In_Cypriot_Syllabary
706
+ In_Imperial_Aramaic
707
+ In_Palmyrene
708
+ In_Nabataean
709
+ In_Hatran
710
+ In_Phoenician
711
+ In_Lydian
712
+ In_Meroitic_Hieroglyphs
713
+ In_Meroitic_Cursive
714
+ In_Kharoshthi
715
+ In_Old_South_Arabian
716
+ In_Old_North_Arabian
717
+ In_Manichaean
718
+ In_Avestan
719
+ In_Inscriptional_Parthian
720
+ In_Inscriptional_Pahlavi
721
+ In_Psalter_Pahlavi
722
+ In_Old_Turkic
723
+ In_Old_Hungarian
724
+ In_Rumi_Numeral_Symbols
725
+ In_Brahmi
726
+ In_Kaithi
727
+ In_Sora_Sompeng
728
+ In_Chakma
729
+ In_Mahajani
730
+ In_Sharada
731
+ In_Sinhala_Archaic_Numbers
732
+ In_Khojki
733
+ In_Multani
734
+ In_Khudawadi
735
+ In_Grantha
736
+ In_Newa
737
+ In_Tirhuta
738
+ In_Siddham
739
+ In_Modi
740
+ In_Mongolian_Supplement
741
+ In_Takri
742
+ In_Ahom
743
+ In_Warang_Citi
744
+ In_Zanabazar_Square
745
+ In_Soyombo
746
+ In_Pau_Cin_Hau
747
+ In_Bhaiksuki
748
+ In_Marchen
749
+ In_Masaram_Gondi
750
+ In_Cuneiform
751
+ In_Cuneiform_Numbers_and_Punctuation
752
+ In_Early_Dynastic_Cuneiform
753
+ In_Egyptian_Hieroglyphs
754
+ In_Anatolian_Hieroglyphs
755
+ In_Bamum_Supplement
756
+ In_Mro
757
+ In_Bassa_Vah
758
+ In_Pahawh_Hmong
759
+ In_Miao
760
+ In_Ideographic_Symbols_and_Punctuation
761
+ In_Tangut
762
+ In_Tangut_Components
763
+ In_Kana_Supplement
764
+ In_Kana_Extended_A
765
+ In_Nushu
766
+ In_Duployan
767
+ In_Shorthand_Format_Controls
768
+ In_Byzantine_Musical_Symbols
769
+ In_Musical_Symbols
770
+ In_Ancient_Greek_Musical_Notation
771
+ In_Tai_Xuan_Jing_Symbols
772
+ In_Counting_Rod_Numerals
773
+ In_Mathematical_Alphanumeric_Symbols
774
+ In_Sutton_SignWriting
775
+ In_Glagolitic_Supplement
776
+ In_Mende_Kikakui
777
+ In_Adlam
778
+ In_Arabic_Mathematical_Alphabetic_Symbols
779
+ In_Mahjong_Tiles
780
+ In_Domino_Tiles
781
+ In_Playing_Cards
782
+ In_Enclosed_Alphanumeric_Supplement
783
+ In_Enclosed_Ideographic_Supplement
784
+ In_Miscellaneous_Symbols_and_Pictographs
785
+ In_Emoticons
786
+ In_Ornamental_Dingbats
787
+ In_Transport_and_Map_Symbols
788
+ In_Alchemical_Symbols
789
+ In_Geometric_Shapes_Extended
790
+ In_Supplemental_Arrows_C
791
+ In_Supplemental_Symbols_and_Pictographs
792
+ In_CJK_Unified_Ideographs_Extension_B
793
+ In_CJK_Unified_Ideographs_Extension_C
794
+ In_CJK_Unified_Ideographs_Extension_D
795
+ In_CJK_Unified_Ideographs_Extension_E
796
+ In_CJK_Unified_Ideographs_Extension_F
797
+ In_CJK_Compatibility_Ideographs_Supplement
798
+ In_Tags
799
+ In_Variation_Selectors_Supplement
800
+ In_Supplementary_Private_Use_Area_A
801
+ In_Supplementary_Private_Use_Area_B
802
+ In_No_Block
@@ -0,0 +1,77 @@
1
+ require "regexp_property_values/version"
2
+
3
+ module RegexpPropertyValues
4
+ module_function
5
+
6
+ LIST_URL = 'https://raw.githubusercontent.com/k-takata/Onigmo/master/doc/UnicodeProps.txt'
7
+
8
+ def update
9
+ puts "Downloading #{LIST_URL}"
10
+ require 'open-uri'
11
+ File.open(file_path, 'w') { |f| IO.copy_stream(open(LIST_URL), f) }
12
+ puts 'Done!'
13
+ end
14
+
15
+ def file_path
16
+ File.expand_path('../UnicodeProps.txt', __FILE__)
17
+ end
18
+
19
+ def all
20
+ by_category.values.flatten
21
+ end
22
+
23
+ def by_category
24
+ result = File.foreach(file_path).inject({}) do |hash, line|
25
+ if /^\* (?<category>\S.+)/ =~ line
26
+ @current_category = category
27
+ hash[@current_category] ||= []
28
+ elsif /^ {4}(?<property>\S.*)/ =~ line
29
+ # only include props that are supported by the host ruby version
30
+ begin /\p{#{property}}/u; rescue RegexpError, SyntaxError; next hash end
31
+ hash[@current_category] << property
32
+ end
33
+ hash
34
+ end
35
+ add_oniguruma_properties(result)
36
+ result
37
+ end
38
+
39
+ def alias_hash
40
+ short_names, long_names = short_and_long_names
41
+ return {} if short_names.empty?
42
+
43
+ by_matched_characters.each_value.inject({}) do |hash, props|
44
+ next hash if props.count < 2
45
+ long_name = (props & long_names)[0] || fail("no long name for #{props}")
46
+ (props & short_names).each { |short_name| hash[short_name] = long_name }
47
+ hash
48
+ end
49
+ end
50
+
51
+ def short_and_long_names
52
+ short_name_categories = ['Major and General Categories',
53
+ 'PropertyAliases',
54
+ 'PropertyValueAliases (Script)']
55
+ by_category.inject([[], []]) do |(short, long), (cat_name, props)|
56
+ (short_name_categories.include?(cat_name) ? short : long).concat(props)
57
+ [short, long]
58
+ end
59
+ end
60
+
61
+ def by_matched_characters
62
+ puts 'Establishing property characters, this may take a bit ...'
63
+ all.group_by { |prop| matched_characters(prop) }
64
+ end
65
+
66
+ def matched_characters(prop)
67
+ @characters ||= ((0..55_295).to_a + (57_344..1_114_111).to_a)
68
+ .map { |cp_number| [cp_number].pack('U') }
69
+ prop_regex = /\p{#{prop}}/u
70
+ @characters.select { |char| prop_regex.match(char) }
71
+ end
72
+
73
+ def add_oniguruma_properties(props_by_category)
74
+ return if Gem::Version.new(RUBY_VERSION.dup) >= Gem::Version.new('2.0.0')
75
+ props_by_category['Special'] << 'Newline'
76
+ end
77
+ end
@@ -0,0 +1,3 @@
1
+ module RegexpPropertyValues
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,27 @@
1
+ lib = File.expand_path("../lib", __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'regexp_property_values/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'regexp_property_values'
7
+ s.version = RegexpPropertyValues::VERSION
8
+ s.authors = ['Janosch Müller']
9
+ s.email = ['janosch84@gmail.com']
10
+
11
+ s.summary = "Lists property values supported by Ruby's regex engine"
12
+ s.description = 'This microlibrary lets you see which property values are '\
13
+ 'supported by the regular expression engine of the Ruby '\
14
+ 'version you are running. That is, it determines all '\
15
+ 'supported values for `\p{value}` expressions.'
16
+ s.homepage = 'https://github.com/janosch-x/regexp_property_values'
17
+ s.license = 'MIT'
18
+
19
+ s.files = `git ls-files -z`.split("\x0").reject do |f|
20
+ f.match(%r{^(test|spec|features)/})
21
+ end
22
+ s.require_paths = ['lib']
23
+
24
+ s.add_development_dependency 'bundler', '~> 1.16'
25
+ s.add_development_dependency 'rake', '~> 10.0'
26
+ s.add_development_dependency 'rspec', '~> 3.0'
27
+ end
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: regexp_property_values
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Janosch Müller
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2018-05-11 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.16'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.16'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '10.0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '10.0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '3.0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ description: This microlibrary lets you see which property values are supported by
63
+ the regular expression engine of the Ruby version you are running. That is, it determines
64
+ all supported values for `\p{value}` expressions.
65
+ email:
66
+ - janosch84@gmail.com
67
+ executables: []
68
+ extensions: []
69
+ extra_rdoc_files: []
70
+ files:
71
+ - .gitignore
72
+ - .rspec
73
+ - .travis.yml
74
+ - Gemfile
75
+ - Gemfile.lock
76
+ - LICENSE.txt
77
+ - README.md
78
+ - Rakefile
79
+ - bin/console
80
+ - bin/setup
81
+ - lib/UnicodeProps.txt
82
+ - lib/regexp_property_values.rb
83
+ - lib/regexp_property_values/version.rb
84
+ - regexp_property_values.gemspec
85
+ homepage: https://github.com/janosch-x/regexp_property_values
86
+ licenses:
87
+ - MIT
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 1.8.23.2
107
+ signing_key:
108
+ specification_version: 3
109
+ summary: Lists property values supported by Ruby's regex engine
110
+ test_files: []