regexp_parser 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +242 -0
  3. data/Gemfile +1 -0
  4. data/README.md +21 -17
  5. data/Rakefile +31 -0
  6. data/lib/regexp_parser/expression.rb +11 -9
  7. data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
  8. data/lib/regexp_parser/expression/classes/backref.rb +21 -16
  9. data/lib/regexp_parser/expression/classes/escape.rb +81 -10
  10. data/lib/regexp_parser/expression/classes/group.rb +20 -20
  11. data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
  12. data/lib/regexp_parser/expression/classes/property.rb +6 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +10 -93
  14. data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
  15. data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
  16. data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
  17. data/lib/regexp_parser/expression/methods/tests.rb +4 -14
  18. data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +3 -4
  20. data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
  21. data/lib/regexp_parser/expression/subexpression.rb +6 -10
  22. data/lib/regexp_parser/lexer.rb +13 -17
  23. data/lib/regexp_parser/parser.rb +170 -116
  24. data/lib/regexp_parser/scanner.rb +952 -2431
  25. data/lib/regexp_parser/scanner/char_type.rl +31 -0
  26. data/lib/regexp_parser/scanner/properties/long.yml +561 -0
  27. data/lib/regexp_parser/scanner/properties/short.yml +225 -0
  28. data/lib/regexp_parser/scanner/property.rl +7 -806
  29. data/lib/regexp_parser/scanner/scanner.rl +112 -154
  30. data/lib/regexp_parser/syntax/base.rb +4 -4
  31. data/lib/regexp_parser/syntax/tokens.rb +1 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
  34. data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
  35. data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
  36. data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
  38. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
  39. data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
  40. data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
  41. data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
  42. data/lib/regexp_parser/version.rb +1 -1
  43. data/regexp_parser.gemspec +2 -1
  44. data/test/expression/test_base.rb +2 -1
  45. data/test/expression/test_clone.rb +0 -57
  46. data/test/expression/test_set.rb +31 -8
  47. data/test/expression/test_strfregexp.rb +13 -4
  48. data/test/expression/test_subexpression.rb +25 -0
  49. data/test/expression/test_traverse.rb +25 -25
  50. data/test/helpers.rb +1 -0
  51. data/test/lexer/test_all.rb +1 -1
  52. data/test/lexer/test_conditionals.rb +9 -7
  53. data/test/lexer/test_nesting.rb +39 -21
  54. data/test/lexer/test_refcalls.rb +4 -4
  55. data/test/parser/set/test_intersections.rb +127 -0
  56. data/test/parser/set/test_ranges.rb +111 -0
  57. data/test/parser/test_all.rb +4 -1
  58. data/test/parser/test_escapes.rb +41 -9
  59. data/test/parser/test_groups.rb +22 -3
  60. data/test/parser/test_posix_classes.rb +27 -0
  61. data/test/parser/test_properties.rb +17 -290
  62. data/test/parser/test_refcalls.rb +66 -26
  63. data/test/parser/test_sets.rb +132 -129
  64. data/test/scanner/test_all.rb +1 -7
  65. data/test/scanner/test_conditionals.rb +16 -16
  66. data/test/scanner/test_errors.rb +0 -30
  67. data/test/scanner/test_escapes.rb +1 -2
  68. data/test/scanner/test_free_space.rb +28 -28
  69. data/test/scanner/test_groups.rb +35 -35
  70. data/test/scanner/test_meta.rb +1 -1
  71. data/test/scanner/test_properties.rb +87 -114
  72. data/test/scanner/test_refcalls.rb +18 -18
  73. data/test/scanner/test_scripts.rb +19 -351
  74. data/test/scanner/test_sets.rb +87 -60
  75. data/test/scanner/test_unicode_blocks.rb +4 -105
  76. data/test/support/warning_extractor.rb +1 -1
  77. data/test/syntax/test_syntax.rb +7 -0
  78. data/test/syntax/versions/test_1.8.rb +2 -4
  79. metadata +17 -7
  80. data/ChangeLog +0 -325
  81. data/test/scanner/test_emojis.rb +0 -31
@@ -0,0 +1,31 @@
1
+ %%{
2
+ machine re_char_type;
3
+
4
+ single_codepoint_char_type = [dDhHsSwW];
5
+ multi_codepoint_char_type = [RX];
6
+
7
+ char_type_char = single_codepoint_char_type | multi_codepoint_char_type;
8
+
9
+ # Char types scanner
10
+ # --------------------------------------------------------------------------
11
+ char_type := |*
12
+ char_type_char {
13
+ case text = text(data, ts, te, 1).first
14
+ when '\d'; emit(:type, :digit, text, ts - 1, te)
15
+ when '\D'; emit(:type, :nondigit, text, ts - 1, te)
16
+ when '\h'; emit(:type, :hex, text, ts - 1, te)
17
+ when '\H'; emit(:type, :nonhex, text, ts - 1, te)
18
+ when '\s'; emit(:type, :space, text, ts - 1, te)
19
+ when '\S'; emit(:type, :nonspace, text, ts - 1, te)
20
+ when '\w'; emit(:type, :word, text, ts - 1, te)
21
+ when '\W'; emit(:type, :nonword, text, ts - 1, te)
22
+ when '\R'; emit(:type, :linebreak, text, ts - 1, te)
23
+ when '\X'; emit(:type, :xgrapheme, text, ts - 1, te)
24
+ else
25
+ raise ScannerError.new(
26
+ "Unexpected character in type at #{text} (char #{ts})")
27
+ end
28
+ fret;
29
+ };
30
+ *|;
31
+ }%%
@@ -0,0 +1,561 @@
1
+ #
2
+ # THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT
3
+ #
4
+ ---
5
+ adlam: adlam
6
+ age=1.1: age=1.1
7
+ age=10.0: age=10.0
8
+ age=2.0: age=2.0
9
+ age=2.1: age=2.1
10
+ age=3.0: age=3.0
11
+ age=3.1: age=3.1
12
+ age=3.2: age=3.2
13
+ age=4.0: age=4.0
14
+ age=4.1: age=4.1
15
+ age=5.0: age=5.0
16
+ age=5.1: age=5.1
17
+ age=5.2: age=5.2
18
+ age=6.0: age=6.0
19
+ age=6.1: age=6.1
20
+ age=6.2: age=6.2
21
+ age=6.3: age=6.3
22
+ age=7.0: age=7.0
23
+ age=8.0: age=8.0
24
+ age=9.0: age=9.0
25
+ ahom: ahom
26
+ alnum: alnum
27
+ alpha: alpha
28
+ alphabetic: alphabetic
29
+ anatolianhieroglyphs: anatolian_hieroglyphs
30
+ any: any
31
+ arabic: arabic
32
+ armenian: armenian
33
+ ascii: ascii
34
+ asciihexdigit: ascii_hex_digit
35
+ assigned: assigned
36
+ avestan: avestan
37
+ balinese: balinese
38
+ bamum: bamum
39
+ bassavah: bassa_vah
40
+ batak: batak
41
+ bengali: bengali
42
+ bhaiksuki: bhaiksuki
43
+ bidicontrol: bidi_control
44
+ blank: blank
45
+ bopomofo: bopomofo
46
+ brahmi: brahmi
47
+ braille: braille
48
+ buginese: buginese
49
+ buhid: buhid
50
+ canadianaboriginal: canadian_aboriginal
51
+ carian: carian
52
+ cased: cased
53
+ casedletter: cased_letter
54
+ caseignorable: case_ignorable
55
+ caucasianalbanian: caucasian_albanian
56
+ chakma: chakma
57
+ cham: cham
58
+ changeswhencasefolded: changes_when_casefolded
59
+ changeswhencasemapped: changes_when_casemapped
60
+ changeswhenlowercased: changes_when_lowercased
61
+ changeswhentitlecased: changes_when_titlecased
62
+ changeswhenuppercased: changes_when_uppercased
63
+ cherokee: cherokee
64
+ closepunctuation: close_punctuation
65
+ cntrl: cntrl
66
+ combiningmark: combining_mark
67
+ common: common
68
+ connectorpunctuation: connector_punctuation
69
+ control: control
70
+ coptic: coptic
71
+ cuneiform: cuneiform
72
+ currencysymbol: currency_symbol
73
+ cypriot: cypriot
74
+ cyrillic: cyrillic
75
+ dash: dash
76
+ dashpunctuation: dash_punctuation
77
+ decimalnumber: decimal_number
78
+ defaultignorablecodepoint: default_ignorable_code_point
79
+ deprecated: deprecated
80
+ deseret: deseret
81
+ devanagari: devanagari
82
+ diacritic: diacritic
83
+ digit: digit
84
+ duployan: duployan
85
+ egyptianhieroglyphs: egyptian_hieroglyphs
86
+ elbasan: elbasan
87
+ emoji: emoji
88
+ emojicomponent: emoji_component
89
+ emojimodifier: emoji_modifier
90
+ emojimodifierbase: emoji_modifier_base
91
+ emojipresentation: emoji_presentation
92
+ enclosingmark: enclosing_mark
93
+ ethiopic: ethiopic
94
+ extender: extender
95
+ finalpunctuation: final_punctuation
96
+ format: format
97
+ georgian: georgian
98
+ glagolitic: glagolitic
99
+ gothic: gothic
100
+ grantha: grantha
101
+ graph: graph
102
+ graphemebase: grapheme_base
103
+ graphemeextend: grapheme_extend
104
+ graphemelink: grapheme_link
105
+ greek: greek
106
+ gujarati: gujarati
107
+ gurmukhi: gurmukhi
108
+ han: han
109
+ hangul: hangul
110
+ hanunoo: hanunoo
111
+ hatran: hatran
112
+ hebrew: hebrew
113
+ hexdigit: hex_digit
114
+ hiragana: hiragana
115
+ hyphen: hyphen
116
+ idcontinue: id_continue
117
+ ideographic: ideographic
118
+ idsbinaryoperator: ids_binary_operator
119
+ idstart: id_start
120
+ idstrinaryoperator: ids_trinary_operator
121
+ imperialaramaic: imperial_aramaic
122
+ inadlam: in_adlam
123
+ inaegeannumbers: in_aegean_numbers
124
+ inahom: in_ahom
125
+ inalchemicalsymbols: in_alchemical_symbols
126
+ inalphabeticpresentationforms: in_alphabetic_presentation_forms
127
+ inanatolianhieroglyphs: in_anatolian_hieroglyphs
128
+ inancientgreekmusicalnotation: in_ancient_greek_musical_notation
129
+ inancientgreeknumbers: in_ancient_greek_numbers
130
+ inancientsymbols: in_ancient_symbols
131
+ inarabic: in_arabic
132
+ inarabicextendeda: in_arabic_extended_a
133
+ inarabicmathematicalalphabeticsymbols: in_arabic_mathematical_alphabetic_symbols
134
+ inarabicpresentationformsa: in_arabic_presentation_forms_a
135
+ inarabicpresentationformsb: in_arabic_presentation_forms_b
136
+ inarabicsupplement: in_arabic_supplement
137
+ inarmenian: in_armenian
138
+ inarrows: in_arrows
139
+ inavestan: in_avestan
140
+ inbalinese: in_balinese
141
+ inbamum: in_bamum
142
+ inbamumsupplement: in_bamum_supplement
143
+ inbasiclatin: in_basic_latin
144
+ inbassavah: in_bassa_vah
145
+ inbatak: in_batak
146
+ inbengali: in_bengali
147
+ inbhaiksuki: in_bhaiksuki
148
+ inblockelements: in_block_elements
149
+ inbopomofo: in_bopomofo
150
+ inbopomofoextended: in_bopomofo_extended
151
+ inboxdrawing: in_box_drawing
152
+ inbrahmi: in_brahmi
153
+ inbraillepatterns: in_braille_patterns
154
+ inbuginese: in_buginese
155
+ inbuhid: in_buhid
156
+ inbyzantinemusicalsymbols: in_byzantine_musical_symbols
157
+ incarian: in_carian
158
+ incaucasianalbanian: in_caucasian_albanian
159
+ inchakma: in_chakma
160
+ incham: in_cham
161
+ incherokee: in_cherokee
162
+ incherokeesupplement: in_cherokee_supplement
163
+ incjkcompatibility: in_cjk_compatibility
164
+ incjkcompatibilityforms: in_cjk_compatibility_forms
165
+ incjkcompatibilityideographs: in_cjk_compatibility_ideographs
166
+ incjkcompatibilityideographssupplement: in_cjk_compatibility_ideographs_supplement
167
+ incjkradicalssupplement: in_cjk_radicals_supplement
168
+ incjkstrokes: in_cjk_strokes
169
+ incjksymbolsandpunctuation: in_cjk_symbols_and_punctuation
170
+ incjkunifiedideographs: in_cjk_unified_ideographs
171
+ incjkunifiedideographsextensiona: in_cjk_unified_ideographs_extension_a
172
+ incjkunifiedideographsextensionb: in_cjk_unified_ideographs_extension_b
173
+ incjkunifiedideographsextensionc: in_cjk_unified_ideographs_extension_c
174
+ incjkunifiedideographsextensiond: in_cjk_unified_ideographs_extension_d
175
+ incjkunifiedideographsextensione: in_cjk_unified_ideographs_extension_e
176
+ incjkunifiedideographsextensionf: in_cjk_unified_ideographs_extension_f
177
+ incombiningdiacriticalmarks: in_combining_diacritical_marks
178
+ incombiningdiacriticalmarksextended: in_combining_diacritical_marks_extended
179
+ incombiningdiacriticalmarksforsymbols: in_combining_diacritical_marks_for_symbols
180
+ incombiningdiacriticalmarkssupplement: in_combining_diacritical_marks_supplement
181
+ incombininghalfmarks: in_combining_half_marks
182
+ incommonindicnumberforms: in_common_indic_number_forms
183
+ incontrolpictures: in_control_pictures
184
+ incoptic: in_coptic
185
+ incopticepactnumbers: in_coptic_epact_numbers
186
+ incountingrodnumerals: in_counting_rod_numerals
187
+ incuneiform: in_cuneiform
188
+ incuneiformnumbersandpunctuation: in_cuneiform_numbers_and_punctuation
189
+ incurrencysymbols: in_currency_symbols
190
+ incypriotsyllabary: in_cypriot_syllabary
191
+ incyrillic: in_cyrillic
192
+ incyrillicextendeda: in_cyrillic_extended_a
193
+ incyrillicextendedb: in_cyrillic_extended_b
194
+ incyrillicextendedc: in_cyrillic_extended_c
195
+ incyrillicsupplement: in_cyrillic_supplement
196
+ indeseret: in_deseret
197
+ indevanagari: in_devanagari
198
+ indevanagariextended: in_devanagari_extended
199
+ indingbats: in_dingbats
200
+ indominotiles: in_domino_tiles
201
+ induployan: in_duployan
202
+ inearlydynasticcuneiform: in_early_dynastic_cuneiform
203
+ inegyptianhieroglyphs: in_egyptian_hieroglyphs
204
+ inelbasan: in_elbasan
205
+ inemoticons: in_emoticons
206
+ inenclosedalphanumerics: in_enclosed_alphanumerics
207
+ inenclosedalphanumericsupplement: in_enclosed_alphanumeric_supplement
208
+ inenclosedcjklettersandmonths: in_enclosed_cjk_letters_and_months
209
+ inenclosedideographicsupplement: in_enclosed_ideographic_supplement
210
+ inethiopic: in_ethiopic
211
+ inethiopicextended: in_ethiopic_extended
212
+ inethiopicextendeda: in_ethiopic_extended_a
213
+ inethiopicsupplement: in_ethiopic_supplement
214
+ ingeneralpunctuation: in_general_punctuation
215
+ ingeometricshapes: in_geometric_shapes
216
+ ingeometricshapesextended: in_geometric_shapes_extended
217
+ ingeorgian: in_georgian
218
+ ingeorgiansupplement: in_georgian_supplement
219
+ inglagolitic: in_glagolitic
220
+ inglagoliticsupplement: in_glagolitic_supplement
221
+ ingothic: in_gothic
222
+ ingrantha: in_grantha
223
+ ingreekandcoptic: in_greek_and_coptic
224
+ ingreekextended: in_greek_extended
225
+ ingujarati: in_gujarati
226
+ ingurmukhi: in_gurmukhi
227
+ inhalfwidthandfullwidthforms: in_halfwidth_and_fullwidth_forms
228
+ inhangulcompatibilityjamo: in_hangul_compatibility_jamo
229
+ inhanguljamo: in_hangul_jamo
230
+ inhanguljamoextendeda: in_hangul_jamo_extended_a
231
+ inhanguljamoextendedb: in_hangul_jamo_extended_b
232
+ inhangulsyllables: in_hangul_syllables
233
+ inhanunoo: in_hanunoo
234
+ inhatran: in_hatran
235
+ inhebrew: in_hebrew
236
+ inherited: inherited
237
+ inhighprivateusesurrogates: in_high_private_use_surrogates
238
+ inhighsurrogates: in_high_surrogates
239
+ inhiragana: in_hiragana
240
+ inideographicdescriptioncharacters: in_ideographic_description_characters
241
+ inideographicsymbolsandpunctuation: in_ideographic_symbols_and_punctuation
242
+ inimperialaramaic: in_imperial_aramaic
243
+ ininscriptionalpahlavi: in_inscriptional_pahlavi
244
+ ininscriptionalparthian: in_inscriptional_parthian
245
+ inipaextensions: in_ipa_extensions
246
+ initialpunctuation: initial_punctuation
247
+ injavanese: in_javanese
248
+ inkaithi: in_kaithi
249
+ inkanaextendeda: in_kana_extended_a
250
+ inkanasupplement: in_kana_supplement
251
+ inkanbun: in_kanbun
252
+ inkangxiradicals: in_kangxi_radicals
253
+ inkannada: in_kannada
254
+ inkatakana: in_katakana
255
+ inkatakanaphoneticextensions: in_katakana_phonetic_extensions
256
+ inkayahli: in_kayah_li
257
+ inkharoshthi: in_kharoshthi
258
+ inkhmer: in_khmer
259
+ inkhmersymbols: in_khmer_symbols
260
+ inkhojki: in_khojki
261
+ inkhudawadi: in_khudawadi
262
+ inlao: in_lao
263
+ inlatin1supplement: in_latin_1_supplement
264
+ inlatinextendeda: in_latin_extended_a
265
+ inlatinextendedadditional: in_latin_extended_additional
266
+ inlatinextendedb: in_latin_extended_b
267
+ inlatinextendedc: in_latin_extended_c
268
+ inlatinextendedd: in_latin_extended_d
269
+ inlatinextendede: in_latin_extended_e
270
+ inlepcha: in_lepcha
271
+ inletterlikesymbols: in_letterlike_symbols
272
+ inlimbu: in_limbu
273
+ inlineara: in_linear_a
274
+ inlinearbideograms: in_linear_b_ideograms
275
+ inlinearbsyllabary: in_linear_b_syllabary
276
+ inlisu: in_lisu
277
+ inlowsurrogates: in_low_surrogates
278
+ inlycian: in_lycian
279
+ inlydian: in_lydian
280
+ inmahajani: in_mahajani
281
+ inmahjongtiles: in_mahjong_tiles
282
+ inmalayalam: in_malayalam
283
+ inmandaic: in_mandaic
284
+ inmanichaean: in_manichaean
285
+ inmarchen: in_marchen
286
+ inmasaramgondi: in_masaram_gondi
287
+ inmathematicalalphanumericsymbols: in_mathematical_alphanumeric_symbols
288
+ inmathematicaloperators: in_mathematical_operators
289
+ inmeeteimayek: in_meetei_mayek
290
+ inmeeteimayekextensions: in_meetei_mayek_extensions
291
+ inmendekikakui: in_mende_kikakui
292
+ inmeroiticcursive: in_meroitic_cursive
293
+ inmeroitichieroglyphs: in_meroitic_hieroglyphs
294
+ inmiao: in_miao
295
+ inmiscellaneousmathematicalsymbolsa: in_miscellaneous_mathematical_symbols_a
296
+ inmiscellaneousmathematicalsymbolsb: in_miscellaneous_mathematical_symbols_b
297
+ inmiscellaneoussymbols: in_miscellaneous_symbols
298
+ inmiscellaneoussymbolsandarrows: in_miscellaneous_symbols_and_arrows
299
+ inmiscellaneoussymbolsandpictographs: in_miscellaneous_symbols_and_pictographs
300
+ inmiscellaneoustechnical: in_miscellaneous_technical
301
+ inmodi: in_modi
302
+ inmodifiertoneletters: in_modifier_tone_letters
303
+ inmongolian: in_mongolian
304
+ inmongoliansupplement: in_mongolian_supplement
305
+ inmro: in_mro
306
+ inmultani: in_multani
307
+ inmusicalsymbols: in_musical_symbols
308
+ inmyanmar: in_myanmar
309
+ inmyanmarextendeda: in_myanmar_extended_a
310
+ inmyanmarextendedb: in_myanmar_extended_b
311
+ innabataean: in_nabataean
312
+ innewa: in_newa
313
+ innewtailue: in_new_tai_lue
314
+ innko: in_nko
315
+ innoblock: in_no_block
316
+ innumberforms: in_number_forms
317
+ innushu: in_nushu
318
+ inogham: in_ogham
319
+ inolchiki: in_ol_chiki
320
+ inoldhungarian: in_old_hungarian
321
+ inolditalic: in_old_italic
322
+ inoldnortharabian: in_old_north_arabian
323
+ inoldpermic: in_old_permic
324
+ inoldpersian: in_old_persian
325
+ inoldsoutharabian: in_old_south_arabian
326
+ inoldturkic: in_old_turkic
327
+ inopticalcharacterrecognition: in_optical_character_recognition
328
+ inoriya: in_oriya
329
+ inornamentaldingbats: in_ornamental_dingbats
330
+ inosage: in_osage
331
+ inosmanya: in_osmanya
332
+ inpahawhhmong: in_pahawh_hmong
333
+ inpalmyrene: in_palmyrene
334
+ inpaucinhau: in_pau_cin_hau
335
+ inphagspa: in_phags_pa
336
+ inphaistosdisc: in_phaistos_disc
337
+ inphoenician: in_phoenician
338
+ inphoneticextensions: in_phonetic_extensions
339
+ inphoneticextensionssupplement: in_phonetic_extensions_supplement
340
+ inplayingcards: in_playing_cards
341
+ inprivateusearea: in_private_use_area
342
+ inpsalterpahlavi: in_psalter_pahlavi
343
+ inrejang: in_rejang
344
+ inruminumeralsymbols: in_rumi_numeral_symbols
345
+ inrunic: in_runic
346
+ insamaritan: in_samaritan
347
+ insaurashtra: in_saurashtra
348
+ inscriptionalpahlavi: inscriptional_pahlavi
349
+ inscriptionalparthian: inscriptional_parthian
350
+ insharada: in_sharada
351
+ inshavian: in_shavian
352
+ inshorthandformatcontrols: in_shorthand_format_controls
353
+ insiddham: in_siddham
354
+ insinhala: in_sinhala
355
+ insinhalaarchaicnumbers: in_sinhala_archaic_numbers
356
+ insmallformvariants: in_small_form_variants
357
+ insorasompeng: in_sora_sompeng
358
+ insoyombo: in_soyombo
359
+ inspacingmodifierletters: in_spacing_modifier_letters
360
+ inspecials: in_specials
361
+ insundanese: in_sundanese
362
+ insundanesesupplement: in_sundanese_supplement
363
+ insuperscriptsandsubscripts: in_superscripts_and_subscripts
364
+ insupplementalarrowsa: in_supplemental_arrows_a
365
+ insupplementalarrowsb: in_supplemental_arrows_b
366
+ insupplementalarrowsc: in_supplemental_arrows_c
367
+ insupplementalmathematicaloperators: in_supplemental_mathematical_operators
368
+ insupplementalpunctuation: in_supplemental_punctuation
369
+ insupplementalsymbolsandpictographs: in_supplemental_symbols_and_pictographs
370
+ insupplementaryprivateuseareaa: in_supplementary_private_use_area_a
371
+ insupplementaryprivateuseareab: in_supplementary_private_use_area_b
372
+ insuttonsignwriting: in_sutton_signwriting
373
+ insylotinagri: in_syloti_nagri
374
+ insyriac: in_syriac
375
+ insyriacsupplement: in_syriac_supplement
376
+ intagalog: in_tagalog
377
+ intagbanwa: in_tagbanwa
378
+ intags: in_tags
379
+ intaile: in_tai_le
380
+ intaitham: in_tai_tham
381
+ intaiviet: in_tai_viet
382
+ intaixuanjingsymbols: in_tai_xuan_jing_symbols
383
+ intakri: in_takri
384
+ intamil: in_tamil
385
+ intangut: in_tangut
386
+ intangutcomponents: in_tangut_components
387
+ intelugu: in_telugu
388
+ inthaana: in_thaana
389
+ inthai: in_thai
390
+ intibetan: in_tibetan
391
+ intifinagh: in_tifinagh
392
+ intirhuta: in_tirhuta
393
+ intransportandmapsymbols: in_transport_and_map_symbols
394
+ inugaritic: in_ugaritic
395
+ inunifiedcanadianaboriginalsyllabics: in_unified_canadian_aboriginal_syllabics
396
+ inunifiedcanadianaboriginalsyllabicsextended: in_unified_canadian_aboriginal_syllabics_extended
397
+ invai: in_vai
398
+ invariationselectors: in_variation_selectors
399
+ invariationselectorssupplement: in_variation_selectors_supplement
400
+ invedicextensions: in_vedic_extensions
401
+ inverticalforms: in_vertical_forms
402
+ inwarangciti: in_warang_citi
403
+ inyijinghexagramsymbols: in_yijing_hexagram_symbols
404
+ inyiradicals: in_yi_radicals
405
+ inyisyllables: in_yi_syllables
406
+ inzanabazarsquare: in_zanabazar_square
407
+ javanese: javanese
408
+ joincontrol: join_control
409
+ kaithi: kaithi
410
+ kannada: kannada
411
+ katakana: katakana
412
+ kayahli: kayah_li
413
+ kharoshthi: kharoshthi
414
+ khmer: khmer
415
+ khojki: khojki
416
+ khudawadi: khudawadi
417
+ lao: lao
418
+ latin: latin
419
+ lepcha: lepcha
420
+ letter: letter
421
+ letternumber: letter_number
422
+ limbu: limbu
423
+ lineara: linear_a
424
+ linearb: linear_b
425
+ lineseparator: line_separator
426
+ lisu: lisu
427
+ logicalorderexception: logical_order_exception
428
+ lower: lower
429
+ lowercase: lowercase
430
+ lowercaseletter: lowercase_letter
431
+ lycian: lycian
432
+ lydian: lydian
433
+ mahajani: mahajani
434
+ malayalam: malayalam
435
+ mandaic: mandaic
436
+ manichaean: manichaean
437
+ marchen: marchen
438
+ mark: mark
439
+ masaramgondi: masaram_gondi
440
+ math: math
441
+ mathsymbol: math_symbol
442
+ meeteimayek: meetei_mayek
443
+ mendekikakui: mende_kikakui
444
+ meroiticcursive: meroitic_cursive
445
+ meroitichieroglyphs: meroitic_hieroglyphs
446
+ miao: miao
447
+ modi: modi
448
+ modifierletter: modifier_letter
449
+ modifiersymbol: modifier_symbol
450
+ mongolian: mongolian
451
+ mro: mro
452
+ multani: multani
453
+ myanmar: myanmar
454
+ nabataean: nabataean
455
+ newa: newa
456
+ newline: newline
457
+ newtailue: new_tai_lue
458
+ nko: nko
459
+ noncharactercodepoint: noncharacter_code_point
460
+ nonspacingmark: nonspacing_mark
461
+ number: number
462
+ nushu: nushu
463
+ ogham: ogham
464
+ olchiki: ol_chiki
465
+ oldhungarian: old_hungarian
466
+ olditalic: old_italic
467
+ oldnortharabian: old_north_arabian
468
+ oldpermic: old_permic
469
+ oldpersian: old_persian
470
+ oldsoutharabian: old_south_arabian
471
+ oldturkic: old_turkic
472
+ openpunctuation: open_punctuation
473
+ oriya: oriya
474
+ osage: osage
475
+ osmanya: osmanya
476
+ other: other
477
+ otheralphabetic: other_alphabetic
478
+ otherdefaultignorablecodepoint: other_default_ignorable_code_point
479
+ othergraphemeextend: other_grapheme_extend
480
+ otheridcontinue: other_id_continue
481
+ otheridstart: other_id_start
482
+ otherletter: other_letter
483
+ otherlowercase: other_lowercase
484
+ othermath: other_math
485
+ othernumber: other_number
486
+ otherpunctuation: other_punctuation
487
+ othersymbol: other_symbol
488
+ otheruppercase: other_uppercase
489
+ pahawhhmong: pahawh_hmong
490
+ palmyrene: palmyrene
491
+ paragraphseparator: paragraph_separator
492
+ patternsyntax: pattern_syntax
493
+ patternwhitespace: pattern_white_space
494
+ paucinhau: pau_cin_hau
495
+ phagspa: phags_pa
496
+ phoenician: phoenician
497
+ prependedconcatenationmark: prepended_concatenation_mark
498
+ print: print
499
+ privateuse: private_use
500
+ psalterpahlavi: psalter_pahlavi
501
+ punct: punct
502
+ punctuation: punctuation
503
+ quotationmark: quotation_mark
504
+ radical: radical
505
+ regionalindicator: regional_indicator
506
+ rejang: rejang
507
+ runic: runic
508
+ samaritan: samaritan
509
+ saurashtra: saurashtra
510
+ sentenceterminal: sentence_terminal
511
+ separator: separator
512
+ sharada: sharada
513
+ shavian: shavian
514
+ siddham: siddham
515
+ signwriting: signwriting
516
+ sinhala: sinhala
517
+ softdotted: soft_dotted
518
+ sorasompeng: sora_sompeng
519
+ soyombo: soyombo
520
+ space: space
521
+ spaceseparator: space_separator
522
+ spacingmark: spacing_mark
523
+ sundanese: sundanese
524
+ surrogate: surrogate
525
+ sylotinagri: syloti_nagri
526
+ symbol: symbol
527
+ syriac: syriac
528
+ tagalog: tagalog
529
+ tagbanwa: tagbanwa
530
+ taile: tai_le
531
+ taitham: tai_tham
532
+ taiviet: tai_viet
533
+ takri: takri
534
+ tamil: tamil
535
+ tangut: tangut
536
+ telugu: telugu
537
+ terminalpunctuation: terminal_punctuation
538
+ thaana: thaana
539
+ thai: thai
540
+ tibetan: tibetan
541
+ tifinagh: tifinagh
542
+ tirhuta: tirhuta
543
+ titlecaseletter: titlecase_letter
544
+ ugaritic: ugaritic
545
+ unassigned: unassigned
546
+ unifiedideograph: unified_ideograph
547
+ unknown: unknown
548
+ upper: upper
549
+ uppercase: uppercase
550
+ uppercaseletter: uppercase_letter
551
+ vai: vai
552
+ variationselector: variation_selector
553
+ warangciti: warang_citi
554
+ whitespace: white_space
555
+ word: word
556
+ xdigit: xdigit
557
+ xidcontinue: xid_continue
558
+ xidstart: xid_start
559
+ xposixpunct: xposixpunct
560
+ yi: yi
561
+ zanabazarsquare: zanabazar_square