regexp_property_values 0.3.5-java → 1.0.0-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,133 @@
1
+ module RegexpPropertyValues
2
+ module Updater
3
+ module_function
4
+
5
+ require 'fileutils'
6
+ require 'set'
7
+
8
+ BASE_URL = 'http://www.unicode.org/Public/'
9
+
10
+ UCD_FILES = %w[
11
+ Blocks.txt
12
+ DerivedAge.txt
13
+ DerivedCoreProperties.txt
14
+ PropertyAliases.txt
15
+ PropertyValueAliases.txt
16
+ PropList.txt
17
+ Scripts.txt
18
+ ]
19
+
20
+ EMOJI_FILES = %w[
21
+ emoji-data.txt
22
+ ]
23
+
24
+ TMP_DIR = File.join(__dir__, 'tmp_ucd')
25
+
26
+ def call
27
+ prepare_tmp_dir
28
+ download_ucd_files
29
+ write_values
30
+ write_aliases
31
+ remove_tmp_dir
32
+ print_stats
33
+ end
34
+
35
+ def prepare_tmp_dir
36
+ FileUtils.rm_rf(TMP_DIR) if File.exist?(TMP_DIR)
37
+ FileUtils.mkdir(TMP_DIR)
38
+ end
39
+
40
+ def download_ucd_files
41
+ unicode_version = RbConfig::CONFIG.fetch('UNICODE_VERSION')
42
+ emoji_version = RbConfig::CONFIG.fetch('UNICODE_EMOJI_VERSION')
43
+ puts 'This will load ucd and emoji data for the CURRENT RUBY '\
44
+ "(#{unicode_version} / #{emoji_version}). Run this on the "\
45
+ 'latest Ruby version you want to support. Continue? [y/n]'
46
+ return puts 'download skipped.' unless $stdin.gets =~ /^y/i
47
+
48
+ Dir.chdir(TMP_DIR) do
49
+ UCD_FILES.each { |f| `wget #{BASE_URL}/#{unicode_version}/ucd/#{f}` }
50
+ EMOJI_FILES.each { |f| `wget #{BASE_URL}/emoji/#{emoji_version}/#{f}` }
51
+ end
52
+ end
53
+
54
+ def write_values
55
+ @values = Set.new
56
+
57
+ # posix properties
58
+ @values += %w[
59
+ Alpha Blank Cntrl Digit Graph Lower Print
60
+ Punct Space Upper XDigit Word Alnum ASCII
61
+ XPosixPunct
62
+ ]
63
+
64
+ # special properties
65
+ @values += %w[Any Assigned In_No_Block Unknown]
66
+
67
+ # legacy properties
68
+ @values += %w[Newline]
69
+
70
+ regexp = /^[0-9a-fA-F]+(?:\.\.[0-9a-fA-F]+)? *; (?<prop_name>\w+) +# /
71
+ %w[
72
+ DerivedCoreProperties.txt
73
+ PropList.txt
74
+ Scripts.txt
75
+ emoji-data.txt
76
+ ].each { |file| scan(file, regexp) { |caps| @values << caps[:prop_name] } }
77
+
78
+ scan('PropertyValueAliases.txt', /^gc ; \w+ *; (?<prop_name>\w+)/) do |caps|
79
+ @values << caps[:prop_name]
80
+ end
81
+
82
+ scan('Blocks.txt', /^[\dA-F.]+ *; (?<block_name>[-\w ]+)/) do |caps|
83
+ @values << 'In_' + caps[:block_name].gsub(/\W/, '_')
84
+ end
85
+
86
+ scan('DerivedAge.txt', /^[\dA-F.]+ *; (?<age_num>[\d.]+)/) do |caps|
87
+ @values << 'Age=' + caps[:age_num]
88
+ end
89
+
90
+ File.write(RegexpPropertyValues::VALUES_PATH, @values.sort.join("\n"))
91
+ end
92
+
93
+ def write_aliases
94
+ @aliases = Set.new
95
+
96
+ scan('PropertyAliases.txt', /^(?<alias>\w+) *; (?<name>\w+)/) do |caps|
97
+ if in_values?(caps[:name]) && !in_values?(caps[:alias])
98
+ @aliases << [caps[:alias], caps[:name]]
99
+ end
100
+ end
101
+
102
+ scan('PropertyValueAliases.txt',
103
+ /^[gs]c ; (?<alias1>\w+) *; (?<name>\w+)(?: *; (?<alias2>\w+))?/) do |caps|
104
+ if in_values?(caps[:name]) && !in_values?(caps[:alias1])
105
+ @aliases << [caps[:alias1], caps[:name]]
106
+ end
107
+ if in_values?(caps[:name]) && caps[:alias2] && !in_values?(caps[:alias2])
108
+ @aliases << [caps[:alias2], caps[:name]]
109
+ end
110
+ end
111
+
112
+ File.write(RegexpPropertyValues::ALIASES_PATH,
113
+ @aliases.sort.map { |pair| pair.join(';') }.join("\n"))
114
+ end
115
+
116
+ def in_values?(string)
117
+ @values.any? { |value| value.casecmp?(string) }
118
+ end
119
+
120
+ def scan(file, pattern)
121
+ path = File.join(TMP_DIR, file)
122
+ File.read(path).scan(pattern) { yield(Regexp.last_match) }
123
+ end
124
+
125
+ def remove_tmp_dir
126
+ FileUtils.rm_rf(TMP_DIR)
127
+ end
128
+
129
+ def print_stats
130
+ print "\nFetched #{@values.size} values and #{@aliases.size} aliases.\n\n"
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,14 @@
1
+ module RegexpPropertyValues
2
+ class Value
3
+ require_relative 'value/shared_methods'
4
+ include SharedMethods
5
+
6
+ if const_defined?(:OnigRegexpPropertyHelper)
7
+ require_relative 'value/ext_adapter'
8
+ include ExtAdapter
9
+ else
10
+ require_relative 'value/ruby_fallback'
11
+ include RubyFallback
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,19 @@
1
+ module RegexpPropertyValues
2
+ class Value
3
+ module ExtAdapter
4
+ def matched_characters
5
+ matched_codepoints.map { |cp| cp.chr('utf-8') }
6
+ end
7
+
8
+ def matched_codepoints
9
+ matched_ranges.flat_map(&:to_a)
10
+ end
11
+
12
+ def matched_ranges
13
+ OnigRegexpPropertyHelper.matched_ranges(name)
14
+ rescue ArgumentError
15
+ raise_unsupported_or_unknown_error
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,21 @@
1
+ module RegexpPropertyValues
2
+ class Value
3
+ module RubyFallback
4
+ def matched_characters
5
+ matched_codepoints.map { |cp| cp.chr('utf-8') }
6
+ end
7
+
8
+ def matched_codepoints
9
+ # turns out scanning one big string is the least slow way to do this
10
+ @@test_str ||= (0..0xD7FF).map { |cp| cp.chr('utf-8') }.join <<
11
+ (0xE000..0x10FFFF).map { |cp| cp.chr('utf-8') }.join
12
+ @@test_str.scan(regexp).flat_map(&:codepoints)
13
+ end
14
+
15
+ def matched_ranges
16
+ require 'range_compressor'
17
+ RangeCompressor.compress(matched_codepoints)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,63 @@
1
+ module RegexpPropertyValues
2
+ class Value
3
+ module SharedMethods
4
+ attr_reader :name
5
+
6
+ def initialize(name)
7
+ @name = name
8
+ end
9
+
10
+ def supported_by_current_ruby?
11
+ !!regexp rescue false
12
+ end
13
+
14
+ def ==(other)
15
+ identifier == other.identifier
16
+ end
17
+ alias eql? ==
18
+
19
+ def hash
20
+ @hash ||= identifier.hash
21
+ end
22
+
23
+ def identifier
24
+ @identifier ||= name.to_s.downcase.gsub(/[^0-9a-z=.]/, '')
25
+ end
26
+ alias to_s identifier
27
+
28
+ def full_name
29
+ (original = find_original) ? original.name : raise_unknown_error
30
+ end
31
+
32
+ def character_set
33
+ require 'character_set'
34
+ CharacterSet.from_ranges(*matched_ranges)
35
+ end
36
+
37
+ private
38
+
39
+ def regexp
40
+ @regexp ||= /\p{#{identifier}}/u
41
+ rescue RegexpError, SyntaxError
42
+ raise_unsupported_or_unknown_error
43
+ end
44
+
45
+ def find_original
46
+ RegexpPropertyValues.all.find { |orig| orig.eql?(self) } ||
47
+ RegexpPropertyValues.alias_hash[self]
48
+ end
49
+
50
+ def raise_unsupported_or_unknown_error
51
+ find_original ? raise_unsupported_error : raise_unknown_error
52
+ end
53
+
54
+ def raise_unsupported_error
55
+ raise Error, "Property name `#{name}` is known, but not in this Ruby"
56
+ end
57
+
58
+ def raise_unknown_error
59
+ raise Error, "Property name `#{name}` is not known in any Ruby"
60
+ end
61
+ end
62
+ end
63
+ end
@@ -1,3 +1,3 @@
1
1
  module RegexpPropertyValues
2
- VERSION = '0.3.5'
2
+ VERSION = '1.0.0'
3
3
  end
data/lib/values ADDED
@@ -0,0 +1,590 @@
1
+ ASCII
2
+ ASCII_Hex_Digit
3
+ Adlam
4
+ Age=1.1
5
+ Age=10.0
6
+ Age=11.0
7
+ Age=12.0
8
+ Age=12.1
9
+ Age=2.0
10
+ Age=2.1
11
+ Age=3.0
12
+ Age=3.1
13
+ Age=3.2
14
+ Age=4.0
15
+ Age=4.1
16
+ Age=5.0
17
+ Age=5.1
18
+ Age=5.2
19
+ Age=6.0
20
+ Age=6.1
21
+ Age=6.2
22
+ Age=6.3
23
+ Age=7.0
24
+ Age=8.0
25
+ Age=9.0
26
+ Ahom
27
+ Alnum
28
+ Alpha
29
+ Alphabetic
30
+ Anatolian_Hieroglyphs
31
+ Any
32
+ Arabic
33
+ Armenian
34
+ Assigned
35
+ Avestan
36
+ Balinese
37
+ Bamum
38
+ Bassa_Vah
39
+ Batak
40
+ Bengali
41
+ Bhaiksuki
42
+ Bidi_Control
43
+ Blank
44
+ Bopomofo
45
+ Brahmi
46
+ Braille
47
+ Buginese
48
+ Buhid
49
+ Canadian_Aboriginal
50
+ Carian
51
+ Case_Ignorable
52
+ Cased
53
+ Cased_Letter
54
+ Caucasian_Albanian
55
+ Chakma
56
+ Cham
57
+ Changes_When_Casefolded
58
+ Changes_When_Casemapped
59
+ Changes_When_Lowercased
60
+ Changes_When_Titlecased
61
+ Changes_When_Uppercased
62
+ Cherokee
63
+ Close_Punctuation
64
+ Cntrl
65
+ Common
66
+ Connector_Punctuation
67
+ Control
68
+ Coptic
69
+ Cuneiform
70
+ Currency_Symbol
71
+ Cypriot
72
+ Cyrillic
73
+ Dash
74
+ Dash_Punctuation
75
+ Decimal_Number
76
+ Default_Ignorable_Code_Point
77
+ Deprecated
78
+ Deseret
79
+ Devanagari
80
+ Diacritic
81
+ Digit
82
+ Dogra
83
+ Duployan
84
+ Egyptian_Hieroglyphs
85
+ Elbasan
86
+ Elymaic
87
+ Emoji
88
+ Emoji_Component
89
+ Emoji_Modifier
90
+ Emoji_Modifier_Base
91
+ Emoji_Presentation
92
+ Enclosing_Mark
93
+ Ethiopic
94
+ Extender
95
+ Final_Punctuation
96
+ Format
97
+ Georgian
98
+ Glagolitic
99
+ Gothic
100
+ Grantha
101
+ Graph
102
+ Grapheme_Base
103
+ Grapheme_Extend
104
+ Grapheme_Link
105
+ Greek
106
+ Gujarati
107
+ Gunjala_Gondi
108
+ Gurmukhi
109
+ Han
110
+ Hangul
111
+ Hanifi_Rohingya
112
+ Hanunoo
113
+ Hatran
114
+ Hebrew
115
+ Hex_Digit
116
+ Hiragana
117
+ Hyphen
118
+ IDS_Binary_Operator
119
+ IDS_Trinary_Operator
120
+ ID_Continue
121
+ ID_Start
122
+ Ideographic
123
+ Imperial_Aramaic
124
+ In_Adlam
125
+ In_Aegean_Numbers
126
+ In_Ahom
127
+ In_Alchemical_Symbols
128
+ In_Alphabetic_Presentation_Forms
129
+ In_Anatolian_Hieroglyphs
130
+ In_Ancient_Greek_Musical_Notation
131
+ In_Ancient_Greek_Numbers
132
+ In_Ancient_Symbols
133
+ In_Arabic
134
+ In_Arabic_Extended_A
135
+ In_Arabic_Mathematical_Alphabetic_Symbols
136
+ In_Arabic_Presentation_Forms_A
137
+ In_Arabic_Presentation_Forms_B
138
+ In_Arabic_Supplement
139
+ In_Armenian
140
+ In_Arrows
141
+ In_Avestan
142
+ In_Balinese
143
+ In_Bamum
144
+ In_Bamum_Supplement
145
+ In_Basic_Latin
146
+ In_Bassa_Vah
147
+ In_Batak
148
+ In_Bengali
149
+ In_Bhaiksuki
150
+ In_Block_Elements
151
+ In_Bopomofo
152
+ In_Bopomofo_Extended
153
+ In_Box_Drawing
154
+ In_Brahmi
155
+ In_Braille_Patterns
156
+ In_Buginese
157
+ In_Buhid
158
+ In_Byzantine_Musical_Symbols
159
+ In_CJK_Compatibility
160
+ In_CJK_Compatibility_Forms
161
+ In_CJK_Compatibility_Ideographs
162
+ In_CJK_Compatibility_Ideographs_Supplement
163
+ In_CJK_Radicals_Supplement
164
+ In_CJK_Strokes
165
+ In_CJK_Symbols_and_Punctuation
166
+ In_CJK_Unified_Ideographs
167
+ In_CJK_Unified_Ideographs_Extension_A
168
+ In_CJK_Unified_Ideographs_Extension_B
169
+ In_CJK_Unified_Ideographs_Extension_C
170
+ In_CJK_Unified_Ideographs_Extension_D
171
+ In_CJK_Unified_Ideographs_Extension_E
172
+ In_CJK_Unified_Ideographs_Extension_F
173
+ In_Carian
174
+ In_Caucasian_Albanian
175
+ In_Chakma
176
+ In_Cham
177
+ In_Cherokee
178
+ In_Cherokee_Supplement
179
+ In_Chess_Symbols
180
+ In_Combining_Diacritical_Marks
181
+ In_Combining_Diacritical_Marks_Extended
182
+ In_Combining_Diacritical_Marks_Supplement
183
+ In_Combining_Diacritical_Marks_for_Symbols
184
+ In_Combining_Half_Marks
185
+ In_Common_Indic_Number_Forms
186
+ In_Control_Pictures
187
+ In_Coptic
188
+ In_Coptic_Epact_Numbers
189
+ In_Counting_Rod_Numerals
190
+ In_Cuneiform
191
+ In_Cuneiform_Numbers_and_Punctuation
192
+ In_Currency_Symbols
193
+ In_Cypriot_Syllabary
194
+ In_Cyrillic
195
+ In_Cyrillic_Extended_A
196
+ In_Cyrillic_Extended_B
197
+ In_Cyrillic_Extended_C
198
+ In_Cyrillic_Supplement
199
+ In_Deseret
200
+ In_Devanagari
201
+ In_Devanagari_Extended
202
+ In_Dingbats
203
+ In_Dogra
204
+ In_Domino_Tiles
205
+ In_Duployan
206
+ In_Early_Dynastic_Cuneiform
207
+ In_Egyptian_Hieroglyph_Format_Controls
208
+ In_Egyptian_Hieroglyphs
209
+ In_Elbasan
210
+ In_Elymaic
211
+ In_Emoticons
212
+ In_Enclosed_Alphanumeric_Supplement
213
+ In_Enclosed_Alphanumerics
214
+ In_Enclosed_CJK_Letters_and_Months
215
+ In_Enclosed_Ideographic_Supplement
216
+ In_Ethiopic
217
+ In_Ethiopic_Extended
218
+ In_Ethiopic_Extended_A
219
+ In_Ethiopic_Supplement
220
+ In_General_Punctuation
221
+ In_Geometric_Shapes
222
+ In_Geometric_Shapes_Extended
223
+ In_Georgian
224
+ In_Georgian_Extended
225
+ In_Georgian_Supplement
226
+ In_Glagolitic
227
+ In_Glagolitic_Supplement
228
+ In_Gothic
229
+ In_Grantha
230
+ In_Greek_Extended
231
+ In_Greek_and_Coptic
232
+ In_Gujarati
233
+ In_Gunjala_Gondi
234
+ In_Gurmukhi
235
+ In_Halfwidth_and_Fullwidth_Forms
236
+ In_Hangul_Compatibility_Jamo
237
+ In_Hangul_Jamo
238
+ In_Hangul_Jamo_Extended_A
239
+ In_Hangul_Jamo_Extended_B
240
+ In_Hangul_Syllables
241
+ In_Hanifi_Rohingya
242
+ In_Hanunoo
243
+ In_Hatran
244
+ In_Hebrew
245
+ In_High_Private_Use_Surrogates
246
+ In_High_Surrogates
247
+ In_Hiragana
248
+ In_IPA_Extensions
249
+ In_Ideographic_Description_Characters
250
+ In_Ideographic_Symbols_and_Punctuation
251
+ In_Imperial_Aramaic
252
+ In_Indic_Siyaq_Numbers
253
+ In_Inscriptional_Pahlavi
254
+ In_Inscriptional_Parthian
255
+ In_Javanese
256
+ In_Kaithi
257
+ In_Kana_Extended_A
258
+ In_Kana_Supplement
259
+ In_Kanbun
260
+ In_Kangxi_Radicals
261
+ In_Kannada
262
+ In_Katakana
263
+ In_Katakana_Phonetic_Extensions
264
+ In_Kayah_Li
265
+ In_Kharoshthi
266
+ In_Khmer
267
+ In_Khmer_Symbols
268
+ In_Khojki
269
+ In_Khudawadi
270
+ In_Lao
271
+ In_Latin_1_Supplement
272
+ In_Latin_Extended_A
273
+ In_Latin_Extended_Additional
274
+ In_Latin_Extended_B
275
+ In_Latin_Extended_C
276
+ In_Latin_Extended_D
277
+ In_Latin_Extended_E
278
+ In_Lepcha
279
+ In_Letterlike_Symbols
280
+ In_Limbu
281
+ In_Linear_A
282
+ In_Linear_B_Ideograms
283
+ In_Linear_B_Syllabary
284
+ In_Lisu
285
+ In_Low_Surrogates
286
+ In_Lycian
287
+ In_Lydian
288
+ In_Mahajani
289
+ In_Mahjong_Tiles
290
+ In_Makasar
291
+ In_Malayalam
292
+ In_Mandaic
293
+ In_Manichaean
294
+ In_Marchen
295
+ In_Masaram_Gondi
296
+ In_Mathematical_Alphanumeric_Symbols
297
+ In_Mathematical_Operators
298
+ In_Mayan_Numerals
299
+ In_Medefaidrin
300
+ In_Meetei_Mayek
301
+ In_Meetei_Mayek_Extensions
302
+ In_Mende_Kikakui
303
+ In_Meroitic_Cursive
304
+ In_Meroitic_Hieroglyphs
305
+ In_Miao
306
+ In_Miscellaneous_Mathematical_Symbols_A
307
+ In_Miscellaneous_Mathematical_Symbols_B
308
+ In_Miscellaneous_Symbols
309
+ In_Miscellaneous_Symbols_and_Arrows
310
+ In_Miscellaneous_Symbols_and_Pictographs
311
+ In_Miscellaneous_Technical
312
+ In_Modi
313
+ In_Modifier_Tone_Letters
314
+ In_Mongolian
315
+ In_Mongolian_Supplement
316
+ In_Mro
317
+ In_Multani
318
+ In_Musical_Symbols
319
+ In_Myanmar
320
+ In_Myanmar_Extended_A
321
+ In_Myanmar_Extended_B
322
+ In_NKo
323
+ In_Nabataean
324
+ In_Nandinagari
325
+ In_New_Tai_Lue
326
+ In_Newa
327
+ In_No_Block
328
+ In_Number_Forms
329
+ In_Nushu
330
+ In_Nyiakeng_Puachue_Hmong
331
+ In_Ogham
332
+ In_Ol_Chiki
333
+ In_Old_Hungarian
334
+ In_Old_Italic
335
+ In_Old_North_Arabian
336
+ In_Old_Permic
337
+ In_Old_Persian
338
+ In_Old_Sogdian
339
+ In_Old_South_Arabian
340
+ In_Old_Turkic
341
+ In_Optical_Character_Recognition
342
+ In_Oriya
343
+ In_Ornamental_Dingbats
344
+ In_Osage
345
+ In_Osmanya
346
+ In_Ottoman_Siyaq_Numbers
347
+ In_Pahawh_Hmong
348
+ In_Palmyrene
349
+ In_Pau_Cin_Hau
350
+ In_Phags_pa
351
+ In_Phaistos_Disc
352
+ In_Phoenician
353
+ In_Phonetic_Extensions
354
+ In_Phonetic_Extensions_Supplement
355
+ In_Playing_Cards
356
+ In_Private_Use_Area
357
+ In_Psalter_Pahlavi
358
+ In_Rejang
359
+ In_Rumi_Numeral_Symbols
360
+ In_Runic
361
+ In_Samaritan
362
+ In_Saurashtra
363
+ In_Sharada
364
+ In_Shavian
365
+ In_Shorthand_Format_Controls
366
+ In_Siddham
367
+ In_Sinhala
368
+ In_Sinhala_Archaic_Numbers
369
+ In_Small_Form_Variants
370
+ In_Small_Kana_Extension
371
+ In_Sogdian
372
+ In_Sora_Sompeng
373
+ In_Soyombo
374
+ In_Spacing_Modifier_Letters
375
+ In_Specials
376
+ In_Sundanese
377
+ In_Sundanese_Supplement
378
+ In_Superscripts_and_Subscripts
379
+ In_Supplemental_Arrows_A
380
+ In_Supplemental_Arrows_B
381
+ In_Supplemental_Arrows_C
382
+ In_Supplemental_Mathematical_Operators
383
+ In_Supplemental_Punctuation
384
+ In_Supplemental_Symbols_and_Pictographs
385
+ In_Supplementary_Private_Use_Area_A
386
+ In_Supplementary_Private_Use_Area_B
387
+ In_Sutton_SignWriting
388
+ In_Syloti_Nagri
389
+ In_Symbols_and_Pictographs_Extended_A
390
+ In_Syriac
391
+ In_Syriac_Supplement
392
+ In_Tagalog
393
+ In_Tagbanwa
394
+ In_Tags
395
+ In_Tai_Le
396
+ In_Tai_Tham
397
+ In_Tai_Viet
398
+ In_Tai_Xuan_Jing_Symbols
399
+ In_Takri
400
+ In_Tamil
401
+ In_Tamil_Supplement
402
+ In_Tangut
403
+ In_Tangut_Components
404
+ In_Telugu
405
+ In_Thaana
406
+ In_Thai
407
+ In_Tibetan
408
+ In_Tifinagh
409
+ In_Tirhuta
410
+ In_Transport_and_Map_Symbols
411
+ In_Ugaritic
412
+ In_Unified_Canadian_Aboriginal_Syllabics
413
+ In_Unified_Canadian_Aboriginal_Syllabics_Extended
414
+ In_Vai
415
+ In_Variation_Selectors
416
+ In_Variation_Selectors_Supplement
417
+ In_Vedic_Extensions
418
+ In_Vertical_Forms
419
+ In_Wancho
420
+ In_Warang_Citi
421
+ In_Yi_Radicals
422
+ In_Yi_Syllables
423
+ In_Yijing_Hexagram_Symbols
424
+ In_Zanabazar_Square
425
+ Inherited
426
+ Initial_Punctuation
427
+ Inscriptional_Pahlavi
428
+ Inscriptional_Parthian
429
+ Javanese
430
+ Join_Control
431
+ Kaithi
432
+ Kannada
433
+ Katakana
434
+ Kayah_Li
435
+ Kharoshthi
436
+ Khmer
437
+ Khojki
438
+ Khudawadi
439
+ Lao
440
+ Latin
441
+ Lepcha
442
+ Letter
443
+ Letter_Number
444
+ Limbu
445
+ Line_Separator
446
+ Linear_A
447
+ Linear_B
448
+ Lisu
449
+ Logical_Order_Exception
450
+ Lower
451
+ Lowercase
452
+ Lowercase_Letter
453
+ Lycian
454
+ Lydian
455
+ Mahajani
456
+ Makasar
457
+ Malayalam
458
+ Mandaic
459
+ Manichaean
460
+ Marchen
461
+ Mark
462
+ Masaram_Gondi
463
+ Math
464
+ Math_Symbol
465
+ Medefaidrin
466
+ Meetei_Mayek
467
+ Mende_Kikakui
468
+ Meroitic_Cursive
469
+ Meroitic_Hieroglyphs
470
+ Miao
471
+ Modi
472
+ Modifier_Letter
473
+ Modifier_Symbol
474
+ Mongolian
475
+ Mro
476
+ Multani
477
+ Myanmar
478
+ Nabataean
479
+ Nandinagari
480
+ New_Tai_Lue
481
+ Newa
482
+ Newline
483
+ Nko
484
+ Noncharacter_Code_Point
485
+ Nonspacing_Mark
486
+ Number
487
+ Nushu
488
+ Nyiakeng_Puachue_Hmong
489
+ Ogham
490
+ Ol_Chiki
491
+ Old_Hungarian
492
+ Old_Italic
493
+ Old_North_Arabian
494
+ Old_Permic
495
+ Old_Persian
496
+ Old_Sogdian
497
+ Old_South_Arabian
498
+ Old_Turkic
499
+ Open_Punctuation
500
+ Oriya
501
+ Osage
502
+ Osmanya
503
+ Other
504
+ Other_Alphabetic
505
+ Other_Default_Ignorable_Code_Point
506
+ Other_Grapheme_Extend
507
+ Other_ID_Continue
508
+ Other_ID_Start
509
+ Other_Letter
510
+ Other_Lowercase
511
+ Other_Math
512
+ Other_Number
513
+ Other_Punctuation
514
+ Other_Symbol
515
+ Other_Uppercase
516
+ Pahawh_Hmong
517
+ Palmyrene
518
+ Paragraph_Separator
519
+ Pattern_Syntax
520
+ Pattern_White_Space
521
+ Pau_Cin_Hau
522
+ Phags_Pa
523
+ Phoenician
524
+ Prepended_Concatenation_Mark
525
+ Print
526
+ Private_Use
527
+ Psalter_Pahlavi
528
+ Punct
529
+ Punctuation
530
+ Quotation_Mark
531
+ Radical
532
+ Regional_Indicator
533
+ Rejang
534
+ Runic
535
+ Samaritan
536
+ Saurashtra
537
+ Sentence_Terminal
538
+ Separator
539
+ Sharada
540
+ Shavian
541
+ Siddham
542
+ SignWriting
543
+ Sinhala
544
+ Soft_Dotted
545
+ Sogdian
546
+ Sora_Sompeng
547
+ Soyombo
548
+ Space
549
+ Space_Separator
550
+ Spacing_Mark
551
+ Sundanese
552
+ Surrogate
553
+ Syloti_Nagri
554
+ Symbol
555
+ Syriac
556
+ Tagalog
557
+ Tagbanwa
558
+ Tai_Le
559
+ Tai_Tham
560
+ Tai_Viet
561
+ Takri
562
+ Tamil
563
+ Tangut
564
+ Telugu
565
+ Terminal_Punctuation
566
+ Thaana
567
+ Thai
568
+ Tibetan
569
+ Tifinagh
570
+ Tirhuta
571
+ Titlecase_Letter
572
+ Ugaritic
573
+ Unassigned
574
+ Unified_Ideograph
575
+ Unknown
576
+ Upper
577
+ Uppercase
578
+ Uppercase_Letter
579
+ Vai
580
+ Variation_Selector
581
+ Wancho
582
+ Warang_Citi
583
+ White_Space
584
+ Word
585
+ XDigit
586
+ XID_Continue
587
+ XID_Start
588
+ XPosixPunct
589
+ Yi
590
+ Zanabazar_Square