regexp_property_values 0.3.2-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 741f08a7bd8b4b757cbc30ba3c95f8f4cf43c9d9
4
+ data.tar.gz: ade50a04acc72fd6a7aceeaf7ca74cf6c51706ab
5
+ SHA512:
6
+ metadata.gz: 490a5dd24b4a96333623308dd72e66af078501f953d54de2f8ab8ebb92cd2889f826723e947a28bc2d523d61bc1edf455f6bffda0f34958ab8090ec4fcb6c40a
7
+ data.tar.gz: 2c9f70cba57585f5b7ea68fd0009157c953ef96a134473d945644a7f9c4aeba541d74cc549531bec8d4d7c7b1055cc4cf9e3a2655100ec5d71adb997754a06ee
data/.gitignore ADDED
@@ -0,0 +1,31 @@
1
+ *.bundle
2
+ *.gem
3
+ *.iml
4
+ *.stTheme.cache
5
+ *.sublime-project
6
+ *.sublime-workspace
7
+ *.swp
8
+ *.tmlanguage.cache
9
+ *.tmPreferences.cache
10
+ *~
11
+ .byebug_history
12
+ .DS_Store
13
+ .idea/
14
+ .ruby-gemset
15
+ .ruby-version
16
+ .tags
17
+ .tags1
18
+ bbin/
19
+ binstubs/*
20
+ bundler_stubs/*/.yardoc
21
+ Gemfile.lock
22
+ /.bundle/
23
+ /_yardoc/
24
+ /coverage/
25
+ /doc/
26
+ /pkg/
27
+ /spec/reports/
28
+ /tmp/
29
+
30
+ # rspec failure tracking
31
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,10 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.0
5
+ - 2.4
6
+ - 2.5
7
+ - 2.6
8
+ before_install:
9
+ - gem update --system
10
+ - gem install bundler
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in regexp_property_values.gemspec
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Jannosch Müller
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,53 @@
1
+ # RegexpPropertyValues
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/regexp_property_values.svg)](http://badge.fury.io/rb/regexp_property_values)
4
+ [![Build Status](https://travis-ci.org/janosch-x/regexp_property_values.svg?branch=master)](https://travis-ci.org/janosch-x/regexp_property_values)
5
+
6
+ This small library lets you see which property values are supported by the regular expression engine of the Ruby version you are running and directly reads out their codepoint ranges from there.
7
+
8
+ That is, it determines all supported values for `\p{value}` expressions and what they match.
9
+
10
+ ## Usage
11
+
12
+ ##### Browse all property values (supported by any Ruby, ever)
13
+
14
+ ```ruby
15
+ require 'regexp_property_values'
16
+
17
+ PV = RegexpPropertyValues
18
+
19
+ PV.all # => ["Alpha", "Blank", "Cntrl", ...]
20
+ PV.by_category # => {"POSIX brackets" => ["Alpha", ...], "Special" => ...}
21
+ PV.short_and_long_names # => [["M", "Grek", ...], ["Mark", "Greek", ...]]
22
+ ```
23
+
24
+ ##### Browse property values supported by the Ruby you are running
25
+
26
+ ```ruby
27
+ PV.all_for_current_ruby # => ["Alpha", "Blank", "Cntrl", ...]
28
+
29
+ PV.by_category.map { |k, v| [k, v.select(&:supported_by_current_ruby?] }
30
+
31
+ # etc.
32
+ ```
33
+
34
+ ##### Inspect properties
35
+
36
+ ```ruby
37
+ PV['alpha'].supported_by_current_ruby? # => true
38
+ PV['foobar'].supported_by_current_ruby? # => false
39
+
40
+ PV['AHex'].matched_characters # => %w[0 1 2 3 4 5 6 7 8 9 A B C ...]
41
+ PV['AHex'].matched_codepoints # => [48, 49, 50, ...]
42
+ PV['AHex'].matched_ranges # => [48..57, 65..70, 97..102]
43
+ ```
44
+
45
+ ##### Utility methods
46
+
47
+ ```ruby
48
+ # This one takes a few seconds (or minutes, without the C extension)
49
+ PV.alias_hash # => {"M" => "Mark", "Grek" => "Greek", ...}
50
+
51
+ # download the latest list of possible properties
52
+ PV.update
53
+ ```
data/Rakefile ADDED
@@ -0,0 +1,32 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rubygems/package_task'
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task :default => :spec
8
+
9
+ require 'rake/extensiontask'
10
+
11
+ Rake::ExtensionTask.new('regexp_property_values') do |ext|
12
+ ext.lib_dir = 'lib/regexp_property_values'
13
+ end
14
+
15
+ namespace :java do
16
+ java_gemspec = eval File.read('./regexp_property_values.gemspec')
17
+ java_gemspec.platform = 'java'
18
+ java_gemspec.extensions = []
19
+
20
+ Gem::PackageTask.new(java_gemspec) do |pkg|
21
+ pkg.need_zip = true
22
+ pkg.need_tar = true
23
+ pkg.package_dir = 'pkg'
24
+ end
25
+ end
26
+
27
+ task package: 'java:gem'
28
+
29
+ if RUBY_PLATFORM !~ /java/i
30
+ # recompile before running specs
31
+ task(:spec).enhance([:compile])
32
+ end
data/bin/console ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "regexp_property_values"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ PV = RegexpPropertyValues
14
+
15
+ require "irb"
16
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,5 @@
1
+ require 'mkmf'
2
+
3
+ name = 'regexp_property_values'
4
+
5
+ create_makefile("#{name}/#{name}")
@@ -0,0 +1,56 @@
1
+ #include "ruby.h"
2
+ #include "ruby/encoding.h"
3
+ #include "ruby/oniguruma.h" // still in recent rubies f. backwards compatibility
4
+
5
+ static int prop_name_to_ctype(char* name, rb_encoding *enc) {
6
+ UChar *uname;
7
+ int ctype;
8
+
9
+ uname = (UChar*)name;
10
+ ctype = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, uname, uname + strlen(name));
11
+ if (ctype < 0) rb_raise(rb_eArgError, "Unknown property name `%s`", name);
12
+
13
+ return ctype;
14
+ }
15
+
16
+ VALUE onig_ranges_to_rb(const OnigCodePoint *onig_ranges) {
17
+ unsigned int range_count, i;
18
+ VALUE result, sub_range;
19
+
20
+ range_count = onig_ranges[0];
21
+ result = rb_ary_new2(range_count); // rb_ary_new_capa not avail. in Ruby 2.0
22
+
23
+ for (i = 0; i < range_count; i++) {
24
+ sub_range = rb_range_new(INT2FIX(onig_ranges[(i * 2) + 1]),
25
+ INT2FIX(onig_ranges[(i * 2) + 2]),
26
+ 0);
27
+ rb_ary_store(result, i, sub_range);
28
+ }
29
+
30
+ return result;
31
+ }
32
+
33
+ VALUE rb_prop_ranges(char* name, rb_encoding *enc) {
34
+ int ctype;
35
+ const OnigCodePoint *onig_ranges;
36
+ OnigCodePoint sb_out;
37
+
38
+ ctype = prop_name_to_ctype(name, enc);
39
+ ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &onig_ranges);
40
+ return onig_ranges_to_rb(onig_ranges);
41
+ }
42
+
43
+ VALUE method_matched_ranges(VALUE self, VALUE arg) {
44
+ char *prop_name;
45
+ rb_encoding *enc;
46
+
47
+ prop_name = StringValueCStr(arg);
48
+ enc = rb_enc_get(arg);
49
+ return rb_prop_ranges(prop_name, enc);
50
+ }
51
+
52
+ void Init_regexp_property_values() {
53
+ VALUE module;
54
+ module = rb_define_module("OnigRegexpPropertyHelper");
55
+ rb_define_singleton_method(module, "matched_ranges", method_matched_ranges, 1);
56
+ }
@@ -0,0 +1,802 @@
1
+ Onigmo (Oniguruma-mod) Unicode Properties Version 6.2.0 2017/07/17
2
+
3
+ * POSIX brackets
4
+ Alpha
5
+ Blank
6
+ Cntrl
7
+ Digit
8
+ Graph
9
+ Lower
10
+ Print
11
+ Punct
12
+ Space
13
+ Upper
14
+ XDigit
15
+ Word
16
+ Alnum
17
+ ASCII
18
+ XPosixPunct
19
+
20
+ * Special
21
+ Any
22
+ Assigned
23
+
24
+ * Major and General Categories
25
+ C
26
+ Cc
27
+ Cf
28
+ Cn
29
+ Co
30
+ Cs
31
+ L
32
+ LC
33
+ Ll
34
+ Lm
35
+ Lo
36
+ Lt
37
+ Lu
38
+ M
39
+ Mc
40
+ Me
41
+ Mn
42
+ N
43
+ Nd
44
+ Nl
45
+ No
46
+ P
47
+ Pc
48
+ Pd
49
+ Pe
50
+ Pf
51
+ Pi
52
+ Po
53
+ Ps
54
+ S
55
+ Sc
56
+ Sk
57
+ Sm
58
+ So
59
+ Z
60
+ Zl
61
+ Zp
62
+ Zs
63
+
64
+ * Scripts
65
+ Adlam
66
+ Ahom
67
+ Anatolian_Hieroglyphs
68
+ Arabic
69
+ Armenian
70
+ Avestan
71
+ Balinese
72
+ Bamum
73
+ Bassa_Vah
74
+ Batak
75
+ Bengali
76
+ Bhaiksuki
77
+ Bopomofo
78
+ Brahmi
79
+ Braille
80
+ Buginese
81
+ Buhid
82
+ Canadian_Aboriginal
83
+ Carian
84
+ Caucasian_Albanian
85
+ Chakma
86
+ Cham
87
+ Cherokee
88
+ Common
89
+ Coptic
90
+ Cuneiform
91
+ Cypriot
92
+ Cyrillic
93
+ Deseret
94
+ Devanagari
95
+ Duployan
96
+ Egyptian_Hieroglyphs
97
+ Elbasan
98
+ Ethiopic
99
+ Georgian
100
+ Glagolitic
101
+ Gothic
102
+ Grantha
103
+ Greek
104
+ Gujarati
105
+ Gurmukhi
106
+ Han
107
+ Hangul
108
+ Hanunoo
109
+ Hatran
110
+ Hebrew
111
+ Hiragana
112
+ Imperial_Aramaic
113
+ Inherited
114
+ Inscriptional_Pahlavi
115
+ Inscriptional_Parthian
116
+ Javanese
117
+ Kaithi
118
+ Kannada
119
+ Katakana
120
+ Kayah_Li
121
+ Kharoshthi
122
+ Khmer
123
+ Khojki
124
+ Khudawadi
125
+ Lao
126
+ Latin
127
+ Lepcha
128
+ Limbu
129
+ Linear_A
130
+ Linear_B
131
+ Lisu
132
+ Lycian
133
+ Lydian
134
+ Mahajani
135
+ Malayalam
136
+ Mandaic
137
+ Manichaean
138
+ Marchen
139
+ Masaram_Gondi
140
+ Meetei_Mayek
141
+ Mende_Kikakui
142
+ Meroitic_Cursive
143
+ Meroitic_Hieroglyphs
144
+ Miao
145
+ Modi
146
+ Mongolian
147
+ Mro
148
+ Multani
149
+ Myanmar
150
+ Nabataean
151
+ New_Tai_Lue
152
+ Newa
153
+ Nko
154
+ Nushu
155
+ Ogham
156
+ Ol_Chiki
157
+ Old_Hungarian
158
+ Old_Italic
159
+ Old_North_Arabian
160
+ Old_Permic
161
+ Old_Persian
162
+ Old_South_Arabian
163
+ Old_Turkic
164
+ Oriya
165
+ Osage
166
+ Osmanya
167
+ Pahawh_Hmong
168
+ Palmyrene
169
+ Pau_Cin_Hau
170
+ Phags_Pa
171
+ Phoenician
172
+ Psalter_Pahlavi
173
+ Rejang
174
+ Runic
175
+ Samaritan
176
+ Saurashtra
177
+ Sharada
178
+ Shavian
179
+ Siddham
180
+ SignWriting
181
+ Sinhala
182
+ Sora_Sompeng
183
+ Soyombo
184
+ Sundanese
185
+ Syloti_Nagri
186
+ Syriac
187
+ Tagalog
188
+ Tagbanwa
189
+ Tai_Le
190
+ Tai_Tham
191
+ Tai_Viet
192
+ Takri
193
+ Tamil
194
+ Tangut
195
+ Telugu
196
+ Thaana
197
+ Thai
198
+ Tibetan
199
+ Tifinagh
200
+ Tirhuta
201
+ Ugaritic
202
+ Unknown
203
+ Vai
204
+ Warang_Citi
205
+ Yi
206
+ Zanabazar_Square
207
+
208
+ * DerivedCoreProperties
209
+ Alphabetic
210
+ Case_Ignorable
211
+ Cased
212
+ Changes_When_Casefolded
213
+ Changes_When_Casemapped
214
+ Changes_When_Lowercased
215
+ Changes_When_Titlecased
216
+ Changes_When_Uppercased
217
+ Default_Ignorable_Code_Point
218
+ Grapheme_Base
219
+ Grapheme_Extend
220
+ Grapheme_Link
221
+ ID_Continue
222
+ ID_Start
223
+ Lowercase
224
+ Math
225
+ Uppercase
226
+ XID_Continue
227
+ XID_Start
228
+
229
+ * PropList
230
+ ASCII_Hex_Digit
231
+ Bidi_Control
232
+ Dash
233
+ Deprecated
234
+ Diacritic
235
+ Extender
236
+ Hex_Digit
237
+ Hyphen
238
+ IDS_Binary_Operator
239
+ IDS_Trinary_Operator
240
+ Ideographic
241
+ Join_Control
242
+ Logical_Order_Exception
243
+ Noncharacter_Code_Point
244
+ Other_Alphabetic
245
+ Other_Default_Ignorable_Code_Point
246
+ Other_Grapheme_Extend
247
+ Other_ID_Continue
248
+ Other_ID_Start
249
+ Other_Lowercase
250
+ Other_Math
251
+ Other_Uppercase
252
+ Pattern_Syntax
253
+ Pattern_White_Space
254
+ Prepended_Concatenation_Mark
255
+ Quotation_Mark
256
+ Radical
257
+ Regional_Indicator
258
+ Sentence_Terminal
259
+ Soft_Dotted
260
+ Terminal_Punctuation
261
+ Unified_Ideograph
262
+ Variation_Selector
263
+ White_Space
264
+
265
+ * Emoji
266
+ Emoji
267
+ Emoji_Component
268
+ Emoji_Modifier
269
+ Emoji_Modifier_Base
270
+ Emoji_Presentation
271
+
272
+ * PropertyAliases
273
+ AHex
274
+ Bidi_C
275
+ CI
276
+ CWCF
277
+ CWCM
278
+ CWL
279
+ CWT
280
+ CWU
281
+ DI
282
+ Dep
283
+ Dia
284
+ Ext
285
+ Gr_Base
286
+ Gr_Ext
287
+ Gr_Link
288
+ Hex
289
+ IDC
290
+ IDS
291
+ IDSB
292
+ IDST
293
+ Ideo
294
+ Join_C
295
+ LOE
296
+ NChar
297
+ OAlpha
298
+ ODI
299
+ OGr_Ext
300
+ OIDC
301
+ OIDS
302
+ OLower
303
+ OMath
304
+ OUpper
305
+ PCM
306
+ Pat_Syn
307
+ Pat_WS
308
+ QMark
309
+ RI
310
+ SD
311
+ STerm
312
+ Term
313
+ UIdeo
314
+ VS
315
+ WSpace
316
+ XIDC
317
+ XIDS
318
+
319
+ * PropertyValueAliases (General_Category)
320
+ Other
321
+ Control
322
+ Format
323
+ Unassigned
324
+ Private_Use
325
+ Surrogate
326
+ Letter
327
+ Cased_Letter
328
+ Lowercase_Letter
329
+ Modifier_Letter
330
+ Other_Letter
331
+ Titlecase_Letter
332
+ Uppercase_Letter
333
+ Mark
334
+ Combining_Mark
335
+ Spacing_Mark
336
+ Enclosing_Mark
337
+ Nonspacing_Mark
338
+ Number
339
+ Decimal_Number
340
+ Letter_Number
341
+ Other_Number
342
+ Punctuation
343
+ Connector_Punctuation
344
+ Dash_Punctuation
345
+ Close_Punctuation
346
+ Final_Punctuation
347
+ Initial_Punctuation
348
+ Other_Punctuation
349
+ Open_Punctuation
350
+ Symbol
351
+ Currency_Symbol
352
+ Modifier_Symbol
353
+ Math_Symbol
354
+ Other_Symbol
355
+ Separator
356
+ Line_Separator
357
+ Paragraph_Separator
358
+ Space_Separator
359
+
360
+ * PropertyValueAliases (Script)
361
+ Adlm
362
+ Aghb
363
+ Arab
364
+ Armi
365
+ Armn
366
+ Avst
367
+ Bali
368
+ Bamu
369
+ Bass
370
+ Batk
371
+ Beng
372
+ Bhks
373
+ Bopo
374
+ Brah
375
+ Brai
376
+ Bugi
377
+ Buhd
378
+ Cakm
379
+ Cans
380
+ Cari
381
+ Cher
382
+ Copt
383
+ Qaac
384
+ Cprt
385
+ Cyrl
386
+ Deva
387
+ Dsrt
388
+ Dupl
389
+ Egyp
390
+ Elba
391
+ Ethi
392
+ Geor
393
+ Glag
394
+ Gonm
395
+ Goth
396
+ Gran
397
+ Grek
398
+ Gujr
399
+ Guru
400
+ Hang
401
+ Hani
402
+ Hano
403
+ Hatr
404
+ Hebr
405
+ Hira
406
+ Hluw
407
+ Hmng
408
+ Hung
409
+ Ital
410
+ Java
411
+ Kali
412
+ Kana
413
+ Khar
414
+ Khmr
415
+ Khoj
416
+ Knda
417
+ Kthi
418
+ Lana
419
+ Laoo
420
+ Latn
421
+ Lepc
422
+ Limb
423
+ Lina
424
+ Linb
425
+ Lyci
426
+ Lydi
427
+ Mahj
428
+ Mand
429
+ Mani
430
+ Marc
431
+ Mend
432
+ Merc
433
+ Mero
434
+ Mlym
435
+ Mong
436
+ Mroo
437
+ Mtei
438
+ Mult
439
+ Mymr
440
+ Narb
441
+ Nbat
442
+ Nkoo
443
+ Nshu
444
+ Ogam
445
+ Olck
446
+ Orkh
447
+ Orya
448
+ Osge
449
+ Osma
450
+ Palm
451
+ Pauc
452
+ Perm
453
+ Phag
454
+ Phli
455
+ Phlp
456
+ Phnx
457
+ Plrd
458
+ Prti
459
+ Rjng
460
+ Runr
461
+ Samr
462
+ Sarb
463
+ Saur
464
+ Sgnw
465
+ Shaw
466
+ Shrd
467
+ Sidd
468
+ Sind
469
+ Sinh
470
+ Sora
471
+ Soyo
472
+ Sund
473
+ Sylo
474
+ Syrc
475
+ Tagb
476
+ Takr
477
+ Tale
478
+ Talu
479
+ Taml
480
+ Tang
481
+ Tavt
482
+ Telu
483
+ Tfng
484
+ Tglg
485
+ Thaa
486
+ Tibt
487
+ Tirh
488
+ Ugar
489
+ Vaii
490
+ Wara
491
+ Xpeo
492
+ Xsux
493
+ Yiii
494
+ Zanb
495
+ Zinh
496
+ Qaai
497
+ Zyyy
498
+ Zzzz
499
+
500
+ * DerivedAges
501
+ Age=1.1
502
+ Age=10.0
503
+ Age=2.0
504
+ Age=2.1
505
+ Age=3.0
506
+ Age=3.1
507
+ Age=3.2
508
+ Age=4.0
509
+ Age=4.1
510
+ Age=5.0
511
+ Age=5.1
512
+ Age=5.2
513
+ Age=6.0
514
+ Age=6.1
515
+ Age=6.2
516
+ Age=6.3
517
+ Age=7.0
518
+ Age=8.0
519
+ Age=9.0
520
+
521
+ * Blocks
522
+ In_Basic_Latin
523
+ In_Latin_1_Supplement
524
+ In_Latin_Extended_A
525
+ In_Latin_Extended_B
526
+ In_IPA_Extensions
527
+ In_Spacing_Modifier_Letters
528
+ In_Combining_Diacritical_Marks
529
+ In_Greek_and_Coptic
530
+ In_Cyrillic
531
+ In_Cyrillic_Supplement
532
+ In_Armenian
533
+ In_Hebrew
534
+ In_Arabic
535
+ In_Syriac
536
+ In_Arabic_Supplement
537
+ In_Thaana
538
+ In_NKo
539
+ In_Samaritan
540
+ In_Mandaic
541
+ In_Syriac_Supplement
542
+ In_Arabic_Extended_A
543
+ In_Devanagari
544
+ In_Bengali
545
+ In_Gurmukhi
546
+ In_Gujarati
547
+ In_Oriya
548
+ In_Tamil
549
+ In_Telugu
550
+ In_Kannada
551
+ In_Malayalam
552
+ In_Sinhala
553
+ In_Thai
554
+ In_Lao
555
+ In_Tibetan
556
+ In_Myanmar
557
+ In_Georgian
558
+ In_Hangul_Jamo
559
+ In_Ethiopic
560
+ In_Ethiopic_Supplement
561
+ In_Cherokee
562
+ In_Unified_Canadian_Aboriginal_Syllabics
563
+ In_Ogham
564
+ In_Runic
565
+ In_Tagalog
566
+ In_Hanunoo
567
+ In_Buhid
568
+ In_Tagbanwa
569
+ In_Khmer
570
+ In_Mongolian
571
+ In_Unified_Canadian_Aboriginal_Syllabics_Extended
572
+ In_Limbu
573
+ In_Tai_Le
574
+ In_New_Tai_Lue
575
+ In_Khmer_Symbols
576
+ In_Buginese
577
+ In_Tai_Tham
578
+ In_Combining_Diacritical_Marks_Extended
579
+ In_Balinese
580
+ In_Sundanese
581
+ In_Batak
582
+ In_Lepcha
583
+ In_Ol_Chiki
584
+ In_Cyrillic_Extended_C
585
+ In_Sundanese_Supplement
586
+ In_Vedic_Extensions
587
+ In_Phonetic_Extensions
588
+ In_Phonetic_Extensions_Supplement
589
+ In_Combining_Diacritical_Marks_Supplement
590
+ In_Latin_Extended_Additional
591
+ In_Greek_Extended
592
+ In_General_Punctuation
593
+ In_Superscripts_and_Subscripts
594
+ In_Currency_Symbols
595
+ In_Combining_Diacritical_Marks_for_Symbols
596
+ In_Letterlike_Symbols
597
+ In_Number_Forms
598
+ In_Arrows
599
+ In_Mathematical_Operators
600
+ In_Miscellaneous_Technical
601
+ In_Control_Pictures
602
+ In_Optical_Character_Recognition
603
+ In_Enclosed_Alphanumerics
604
+ In_Box_Drawing
605
+ In_Block_Elements
606
+ In_Geometric_Shapes
607
+ In_Miscellaneous_Symbols
608
+ In_Dingbats
609
+ In_Miscellaneous_Mathematical_Symbols_A
610
+ In_Supplemental_Arrows_A
611
+ In_Braille_Patterns
612
+ In_Supplemental_Arrows_B
613
+ In_Miscellaneous_Mathematical_Symbols_B
614
+ In_Supplemental_Mathematical_Operators
615
+ In_Miscellaneous_Symbols_and_Arrows
616
+ In_Glagolitic
617
+ In_Latin_Extended_C
618
+ In_Coptic
619
+ In_Georgian_Supplement
620
+ In_Tifinagh
621
+ In_Ethiopic_Extended
622
+ In_Cyrillic_Extended_A
623
+ In_Supplemental_Punctuation
624
+ In_CJK_Radicals_Supplement
625
+ In_Kangxi_Radicals
626
+ In_Ideographic_Description_Characters
627
+ In_CJK_Symbols_and_Punctuation
628
+ In_Hiragana
629
+ In_Katakana
630
+ In_Bopomofo
631
+ In_Hangul_Compatibility_Jamo
632
+ In_Kanbun
633
+ In_Bopomofo_Extended
634
+ In_CJK_Strokes
635
+ In_Katakana_Phonetic_Extensions
636
+ In_Enclosed_CJK_Letters_and_Months
637
+ In_CJK_Compatibility
638
+ In_CJK_Unified_Ideographs_Extension_A
639
+ In_Yijing_Hexagram_Symbols
640
+ In_CJK_Unified_Ideographs
641
+ In_Yi_Syllables
642
+ In_Yi_Radicals
643
+ In_Lisu
644
+ In_Vai
645
+ In_Cyrillic_Extended_B
646
+ In_Bamum
647
+ In_Modifier_Tone_Letters
648
+ In_Latin_Extended_D
649
+ In_Syloti_Nagri
650
+ In_Common_Indic_Number_Forms
651
+ In_Phags_pa
652
+ In_Saurashtra
653
+ In_Devanagari_Extended
654
+ In_Kayah_Li
655
+ In_Rejang
656
+ In_Hangul_Jamo_Extended_A
657
+ In_Javanese
658
+ In_Myanmar_Extended_B
659
+ In_Cham
660
+ In_Myanmar_Extended_A
661
+ In_Tai_Viet
662
+ In_Meetei_Mayek_Extensions
663
+ In_Ethiopic_Extended_A
664
+ In_Latin_Extended_E
665
+ In_Cherokee_Supplement
666
+ In_Meetei_Mayek
667
+ In_Hangul_Syllables
668
+ In_Hangul_Jamo_Extended_B
669
+ In_High_Surrogates
670
+ In_High_Private_Use_Surrogates
671
+ In_Low_Surrogates
672
+ In_Private_Use_Area
673
+ In_CJK_Compatibility_Ideographs
674
+ In_Alphabetic_Presentation_Forms
675
+ In_Arabic_Presentation_Forms_A
676
+ In_Variation_Selectors
677
+ In_Vertical_Forms
678
+ In_Combining_Half_Marks
679
+ In_CJK_Compatibility_Forms
680
+ In_Small_Form_Variants
681
+ In_Arabic_Presentation_Forms_B
682
+ In_Halfwidth_and_Fullwidth_Forms
683
+ In_Specials
684
+ In_Linear_B_Syllabary
685
+ In_Linear_B_Ideograms
686
+ In_Aegean_Numbers
687
+ In_Ancient_Greek_Numbers
688
+ In_Ancient_Symbols
689
+ In_Phaistos_Disc
690
+ In_Lycian
691
+ In_Carian
692
+ In_Coptic_Epact_Numbers
693
+ In_Old_Italic
694
+ In_Gothic
695
+ In_Old_Permic
696
+ In_Ugaritic
697
+ In_Old_Persian
698
+ In_Deseret
699
+ In_Shavian
700
+ In_Osmanya
701
+ In_Osage
702
+ In_Elbasan
703
+ In_Caucasian_Albanian
704
+ In_Linear_A
705
+ In_Cypriot_Syllabary
706
+ In_Imperial_Aramaic
707
+ In_Palmyrene
708
+ In_Nabataean
709
+ In_Hatran
710
+ In_Phoenician
711
+ In_Lydian
712
+ In_Meroitic_Hieroglyphs
713
+ In_Meroitic_Cursive
714
+ In_Kharoshthi
715
+ In_Old_South_Arabian
716
+ In_Old_North_Arabian
717
+ In_Manichaean
718
+ In_Avestan
719
+ In_Inscriptional_Parthian
720
+ In_Inscriptional_Pahlavi
721
+ In_Psalter_Pahlavi
722
+ In_Old_Turkic
723
+ In_Old_Hungarian
724
+ In_Rumi_Numeral_Symbols
725
+ In_Brahmi
726
+ In_Kaithi
727
+ In_Sora_Sompeng
728
+ In_Chakma
729
+ In_Mahajani
730
+ In_Sharada
731
+ In_Sinhala_Archaic_Numbers
732
+ In_Khojki
733
+ In_Multani
734
+ In_Khudawadi
735
+ In_Grantha
736
+ In_Newa
737
+ In_Tirhuta
738
+ In_Siddham
739
+ In_Modi
740
+ In_Mongolian_Supplement
741
+ In_Takri
742
+ In_Ahom
743
+ In_Warang_Citi
744
+ In_Zanabazar_Square
745
+ In_Soyombo
746
+ In_Pau_Cin_Hau
747
+ In_Bhaiksuki
748
+ In_Marchen
749
+ In_Masaram_Gondi
750
+ In_Cuneiform
751
+ In_Cuneiform_Numbers_and_Punctuation
752
+ In_Early_Dynastic_Cuneiform
753
+ In_Egyptian_Hieroglyphs
754
+ In_Anatolian_Hieroglyphs
755
+ In_Bamum_Supplement
756
+ In_Mro
757
+ In_Bassa_Vah
758
+ In_Pahawh_Hmong
759
+ In_Miao
760
+ In_Ideographic_Symbols_and_Punctuation
761
+ In_Tangut
762
+ In_Tangut_Components
763
+ In_Kana_Supplement
764
+ In_Kana_Extended_A
765
+ In_Nushu
766
+ In_Duployan
767
+ In_Shorthand_Format_Controls
768
+ In_Byzantine_Musical_Symbols
769
+ In_Musical_Symbols
770
+ In_Ancient_Greek_Musical_Notation
771
+ In_Tai_Xuan_Jing_Symbols
772
+ In_Counting_Rod_Numerals
773
+ In_Mathematical_Alphanumeric_Symbols
774
+ In_Sutton_SignWriting
775
+ In_Glagolitic_Supplement
776
+ In_Mende_Kikakui
777
+ In_Adlam
778
+ In_Arabic_Mathematical_Alphabetic_Symbols
779
+ In_Mahjong_Tiles
780
+ In_Domino_Tiles
781
+ In_Playing_Cards
782
+ In_Enclosed_Alphanumeric_Supplement
783
+ In_Enclosed_Ideographic_Supplement
784
+ In_Miscellaneous_Symbols_and_Pictographs
785
+ In_Emoticons
786
+ In_Ornamental_Dingbats
787
+ In_Transport_and_Map_Symbols
788
+ In_Alchemical_Symbols
789
+ In_Geometric_Shapes_Extended
790
+ In_Supplemental_Arrows_C
791
+ In_Supplemental_Symbols_and_Pictographs
792
+ In_CJK_Unified_Ideographs_Extension_B
793
+ In_CJK_Unified_Ideographs_Extension_C
794
+ In_CJK_Unified_Ideographs_Extension_D
795
+ In_CJK_Unified_Ideographs_Extension_E
796
+ In_CJK_Unified_Ideographs_Extension_F
797
+ In_CJK_Compatibility_Ideographs_Supplement
798
+ In_Tags
799
+ In_Variation_Selectors_Supplement
800
+ In_Supplementary_Private_Use_Area_A
801
+ In_Supplementary_Private_Use_Area_B
802
+ In_No_Block
@@ -0,0 +1,79 @@
1
+ begin
2
+ require 'regexp_property_values/regexp_property_values'
3
+ rescue LoadError
4
+ warn 'regexp_property_values could not load C extension, using slower Ruby'
5
+ end
6
+ require 'regexp_property_values/extension'
7
+ require 'regexp_property_values/version'
8
+
9
+ module RegexpPropertyValues
10
+ module_function
11
+
12
+ LIST_URL = 'https://raw.githubusercontent.com/k-takata/Onigmo/master/doc/UnicodeProps.txt'
13
+
14
+ def update
15
+ puts "Downloading #{LIST_URL}"
16
+ require 'open-uri'
17
+ File.open(file_path, 'w') { |f| IO.copy_stream(open(LIST_URL), f) }
18
+ puts 'Done!'
19
+ end
20
+
21
+ def file_path
22
+ File.expand_path('../UnicodeProps.txt', __FILE__)
23
+ end
24
+
25
+ def all
26
+ by_category.values.flatten
27
+ end
28
+
29
+ def all_for_current_ruby
30
+ all.select(&:supported_by_current_ruby?)
31
+ end
32
+
33
+ def by_category
34
+ result = File.foreach(file_path).each_with_object({}) do |line, hash|
35
+ if /^\* (?<category>\S.+)/ =~ line
36
+ @current_category = category
37
+ hash[@current_category] ||= []
38
+ elsif /^ {4}(?<value_name>\S.*)/ =~ line
39
+ hash[@current_category] << value_name.extend(Extension)
40
+ end
41
+ end
42
+ add_oniguruma_properties(result)
43
+ result
44
+ end
45
+
46
+ def add_oniguruma_properties(props_by_category)
47
+ props_by_category['Special'] << 'Newline'.extend(Extension)
48
+ end
49
+
50
+ def alias_hash
51
+ short_names, long_names = short_and_long_names
52
+ return {} if short_names.empty?
53
+
54
+ long_names -= by_category['POSIX brackets']
55
+ by_matched_codepoints.each_value.each_with_object({}) do |props, hash|
56
+ next if props.count < 2
57
+ long_name = (props & long_names)[0] || fail("no long name for #{props}")
58
+ (props & short_names).each { |short_name| hash[short_name] = long_name }
59
+ end
60
+ end
61
+
62
+ def short_and_long_names
63
+ short_name_categories = ['Major and General Categories',
64
+ 'PropertyAliases',
65
+ 'PropertyValueAliases (Script)']
66
+ by_category.each_with_object([[], []]) do |(cat_name, props), (short, long)|
67
+ (short_name_categories.include?(cat_name) ? short : long).concat(props)
68
+ end
69
+ end
70
+
71
+ def by_matched_codepoints
72
+ puts 'Establishing property codepoints, this may take a bit ...'
73
+ all_for_current_ruby.group_by(&:matched_codepoints)
74
+ end
75
+
76
+ def [](prop)
77
+ prop.extend(Extension)
78
+ end
79
+ end
@@ -0,0 +1,55 @@
1
+ module RegexpPropertyValues
2
+ module Extension
3
+ def supported_by_current_ruby?
4
+ !!regexp
5
+ rescue ArgumentError
6
+ false
7
+ end
8
+
9
+ def regexp
10
+ @regexp ||= /\p{#{self}}/u
11
+ rescue RegexpError, SyntaxError
12
+ raise ArgumentError, "Unknown property name #{self}"
13
+ end
14
+
15
+ if const_defined?(:OnigRegexpPropertyHelper)
16
+ # C extension loaded
17
+
18
+ def matched_codepoints
19
+ matched_ranges.flat_map(&:to_a)
20
+ end
21
+
22
+ def matched_ranges
23
+ OnigRegexpPropertyHelper.matched_ranges(self)
24
+ end
25
+
26
+ def matched_characters
27
+ matched_codepoints.map { |cp| cp.chr('utf-8') }
28
+ end
29
+ else
30
+ # Ruby fallback - this stuff is slow as hell, and it wont get much faster
31
+
32
+ def matched_codepoints
33
+ matched_characters.map(&:ord)
34
+ end
35
+
36
+ def matched_ranges
37
+ require 'set'
38
+ matched_codepoints
39
+ .to_set(SortedSet)
40
+ .divide { |i, j| (i - j).abs == 1 }
41
+ .map { |s| a = s.to_a; a.first..a.last }
42
+ end
43
+
44
+ def matched_characters
45
+ regexp.respond_to?(:match?) ||
46
+ regexp.define_singleton_method(:match?) { |str| !!match(str) }
47
+
48
+ @@characters ||= ((0..0xD7FF).to_a + (0xE000..0x10FFFF).to_a)
49
+ .map { |cp_number| [cp_number].pack('U') }
50
+
51
+ @@characters.select { |char| regexp.match?(char) }
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,3 @@
1
+ module RegexpPropertyValues
2
+ VERSION = '0.3.2'
3
+ end
@@ -0,0 +1,31 @@
1
+ lib = File.expand_path("../lib", __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'regexp_property_values/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'regexp_property_values'
7
+ s.version = RegexpPropertyValues::VERSION
8
+ s.authors = ['Janosch Müller']
9
+ s.email = ['janosch84@gmail.com']
10
+
11
+ s.summary = "Inspect property values supported by Ruby's regex engine"
12
+ s.description = 'This small library lets you see which property values '\
13
+ 'are supported by the regular expression engine of the '\
14
+ 'Ruby version you are running, and what they match.'
15
+ s.homepage = 'https://github.com/janosch-x/regexp_property_values'
16
+ s.license = 'MIT'
17
+
18
+ s.files = `git ls-files -z`.split("\x0").reject do |f|
19
+ f.match(%r{^(test|spec|features)/})
20
+ end
21
+ s.require_paths = ['lib']
22
+
23
+ s.extensions = %w[ext/regexp_property_values/extconf.rb]
24
+
25
+ s.required_ruby_version = '>= 2.0.0'
26
+
27
+ s.add_development_dependency 'bundler', '~> 1.16'
28
+ s.add_development_dependency 'rake', '~> 10.0'
29
+ s.add_development_dependency 'rake-compiler', '~> 1.0'
30
+ s.add_development_dependency 'rspec', '~> 3.0'
31
+ end
metadata ADDED
@@ -0,0 +1,118 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: regexp_property_values
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.2
5
+ platform: java
6
+ authors:
7
+ - Janosch Müller
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-07-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '1.16'
19
+ name: bundler
20
+ prerelease: false
21
+ type: :development
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.16'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '10.0'
33
+ name: rake
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.0'
47
+ name: rake-compiler
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '3.0'
61
+ name: rspec
62
+ prerelease: false
63
+ type: :development
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ description: This small library lets you see which property values are supported by
70
+ the regular expression engine of the Ruby version you are running, and what they
71
+ match.
72
+ email:
73
+ - janosch84@gmail.com
74
+ executables: []
75
+ extensions: []
76
+ extra_rdoc_files: []
77
+ files:
78
+ - ".gitignore"
79
+ - ".rspec"
80
+ - ".travis.yml"
81
+ - Gemfile
82
+ - LICENSE.txt
83
+ - README.md
84
+ - Rakefile
85
+ - bin/console
86
+ - bin/setup
87
+ - ext/regexp_property_values/extconf.rb
88
+ - ext/regexp_property_values/regexp_property_values.c
89
+ - lib/UnicodeProps.txt
90
+ - lib/regexp_property_values.rb
91
+ - lib/regexp_property_values/extension.rb
92
+ - lib/regexp_property_values/version.rb
93
+ - regexp_property_values.gemspec
94
+ homepage: https://github.com/janosch-x/regexp_property_values
95
+ licenses:
96
+ - MIT
97
+ metadata: {}
98
+ post_install_message:
99
+ rdoc_options: []
100
+ require_paths:
101
+ - lib
102
+ required_ruby_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: 2.0.0
107
+ required_rubygems_version: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - ">="
110
+ - !ruby/object:Gem::Version
111
+ version: '0'
112
+ requirements: []
113
+ rubyforge_project:
114
+ rubygems_version: 2.6.14
115
+ signing_key:
116
+ specification_version: 4
117
+ summary: Inspect property values supported by Ruby's regex engine
118
+ test_files: []