regexp_parser 0.5.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +242 -0
  3. data/Gemfile +1 -0
  4. data/README.md +21 -17
  5. data/Rakefile +31 -0
  6. data/lib/regexp_parser/expression.rb +11 -9
  7. data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
  8. data/lib/regexp_parser/expression/classes/backref.rb +21 -16
  9. data/lib/regexp_parser/expression/classes/escape.rb +81 -10
  10. data/lib/regexp_parser/expression/classes/group.rb +20 -20
  11. data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
  12. data/lib/regexp_parser/expression/classes/property.rb +6 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +10 -93
  14. data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
  15. data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
  16. data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
  17. data/lib/regexp_parser/expression/methods/tests.rb +4 -14
  18. data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +3 -4
  20. data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
  21. data/lib/regexp_parser/expression/subexpression.rb +6 -10
  22. data/lib/regexp_parser/lexer.rb +13 -17
  23. data/lib/regexp_parser/parser.rb +170 -116
  24. data/lib/regexp_parser/scanner.rb +952 -2431
  25. data/lib/regexp_parser/scanner/char_type.rl +31 -0
  26. data/lib/regexp_parser/scanner/properties/long.yml +561 -0
  27. data/lib/regexp_parser/scanner/properties/short.yml +225 -0
  28. data/lib/regexp_parser/scanner/property.rl +7 -806
  29. data/lib/regexp_parser/scanner/scanner.rl +112 -154
  30. data/lib/regexp_parser/syntax/base.rb +4 -4
  31. data/lib/regexp_parser/syntax/tokens.rb +1 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
  34. data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
  35. data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
  36. data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
  38. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
  39. data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
  40. data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
  41. data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
  42. data/lib/regexp_parser/version.rb +1 -1
  43. data/regexp_parser.gemspec +2 -1
  44. data/test/expression/test_base.rb +2 -1
  45. data/test/expression/test_clone.rb +0 -57
  46. data/test/expression/test_set.rb +31 -8
  47. data/test/expression/test_strfregexp.rb +13 -4
  48. data/test/expression/test_subexpression.rb +25 -0
  49. data/test/expression/test_traverse.rb +25 -25
  50. data/test/helpers.rb +1 -0
  51. data/test/lexer/test_all.rb +1 -1
  52. data/test/lexer/test_conditionals.rb +9 -7
  53. data/test/lexer/test_nesting.rb +39 -21
  54. data/test/lexer/test_refcalls.rb +4 -4
  55. data/test/parser/set/test_intersections.rb +127 -0
  56. data/test/parser/set/test_ranges.rb +111 -0
  57. data/test/parser/test_all.rb +4 -1
  58. data/test/parser/test_escapes.rb +41 -9
  59. data/test/parser/test_groups.rb +22 -3
  60. data/test/parser/test_posix_classes.rb +27 -0
  61. data/test/parser/test_properties.rb +17 -290
  62. data/test/parser/test_refcalls.rb +66 -26
  63. data/test/parser/test_sets.rb +132 -129
  64. data/test/scanner/test_all.rb +1 -7
  65. data/test/scanner/test_conditionals.rb +16 -16
  66. data/test/scanner/test_errors.rb +0 -30
  67. data/test/scanner/test_escapes.rb +1 -2
  68. data/test/scanner/test_free_space.rb +28 -28
  69. data/test/scanner/test_groups.rb +35 -35
  70. data/test/scanner/test_meta.rb +1 -1
  71. data/test/scanner/test_properties.rb +87 -114
  72. data/test/scanner/test_refcalls.rb +18 -18
  73. data/test/scanner/test_scripts.rb +19 -351
  74. data/test/scanner/test_sets.rb +87 -60
  75. data/test/scanner/test_unicode_blocks.rb +4 -105
  76. data/test/support/warning_extractor.rb +1 -1
  77. data/test/syntax/test_syntax.rb +7 -0
  78. data/test/syntax/versions/test_1.8.rb +2 -4
  79. metadata +17 -7
  80. data/ChangeLog +0 -325
  81. data/test/scanner/test_emojis.rb +0 -31
@@ -0,0 +1,31 @@
1
+ %%{
2
+ machine re_char_type;
3
+
4
+ single_codepoint_char_type = [dDhHsSwW];
5
+ multi_codepoint_char_type = [RX];
6
+
7
+ char_type_char = single_codepoint_char_type | multi_codepoint_char_type;
8
+
9
+ # Char types scanner
10
+ # --------------------------------------------------------------------------
11
+ char_type := |*
12
+ char_type_char {
13
+ case text = text(data, ts, te, 1).first
14
+ when '\d'; emit(:type, :digit, text, ts - 1, te)
15
+ when '\D'; emit(:type, :nondigit, text, ts - 1, te)
16
+ when '\h'; emit(:type, :hex, text, ts - 1, te)
17
+ when '\H'; emit(:type, :nonhex, text, ts - 1, te)
18
+ when '\s'; emit(:type, :space, text, ts - 1, te)
19
+ when '\S'; emit(:type, :nonspace, text, ts - 1, te)
20
+ when '\w'; emit(:type, :word, text, ts - 1, te)
21
+ when '\W'; emit(:type, :nonword, text, ts - 1, te)
22
+ when '\R'; emit(:type, :linebreak, text, ts - 1, te)
23
+ when '\X'; emit(:type, :xgrapheme, text, ts - 1, te)
24
+ else
25
+ raise ScannerError.new(
26
+ "Unexpected character in type at #{text} (char #{ts})")
27
+ end
28
+ fret;
29
+ };
30
+ *|;
31
+ }%%
@@ -0,0 +1,561 @@
1
+ #
2
+ # THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT
3
+ #
4
+ ---
5
+ adlam: adlam
6
+ age=1.1: age=1.1
7
+ age=10.0: age=10.0
8
+ age=2.0: age=2.0
9
+ age=2.1: age=2.1
10
+ age=3.0: age=3.0
11
+ age=3.1: age=3.1
12
+ age=3.2: age=3.2
13
+ age=4.0: age=4.0
14
+ age=4.1: age=4.1
15
+ age=5.0: age=5.0
16
+ age=5.1: age=5.1
17
+ age=5.2: age=5.2
18
+ age=6.0: age=6.0
19
+ age=6.1: age=6.1
20
+ age=6.2: age=6.2
21
+ age=6.3: age=6.3
22
+ age=7.0: age=7.0
23
+ age=8.0: age=8.0
24
+ age=9.0: age=9.0
25
+ ahom: ahom
26
+ alnum: alnum
27
+ alpha: alpha
28
+ alphabetic: alphabetic
29
+ anatolianhieroglyphs: anatolian_hieroglyphs
30
+ any: any
31
+ arabic: arabic
32
+ armenian: armenian
33
+ ascii: ascii
34
+ asciihexdigit: ascii_hex_digit
35
+ assigned: assigned
36
+ avestan: avestan
37
+ balinese: balinese
38
+ bamum: bamum
39
+ bassavah: bassa_vah
40
+ batak: batak
41
+ bengali: bengali
42
+ bhaiksuki: bhaiksuki
43
+ bidicontrol: bidi_control
44
+ blank: blank
45
+ bopomofo: bopomofo
46
+ brahmi: brahmi
47
+ braille: braille
48
+ buginese: buginese
49
+ buhid: buhid
50
+ canadianaboriginal: canadian_aboriginal
51
+ carian: carian
52
+ cased: cased
53
+ casedletter: cased_letter
54
+ caseignorable: case_ignorable
55
+ caucasianalbanian: caucasian_albanian
56
+ chakma: chakma
57
+ cham: cham
58
+ changeswhencasefolded: changes_when_casefolded
59
+ changeswhencasemapped: changes_when_casemapped
60
+ changeswhenlowercased: changes_when_lowercased
61
+ changeswhentitlecased: changes_when_titlecased
62
+ changeswhenuppercased: changes_when_uppercased
63
+ cherokee: cherokee
64
+ closepunctuation: close_punctuation
65
+ cntrl: cntrl
66
+ combiningmark: combining_mark
67
+ common: common
68
+ connectorpunctuation: connector_punctuation
69
+ control: control
70
+ coptic: coptic
71
+ cuneiform: cuneiform
72
+ currencysymbol: currency_symbol
73
+ cypriot: cypriot
74
+ cyrillic: cyrillic
75
+ dash: dash
76
+ dashpunctuation: dash_punctuation
77
+ decimalnumber: decimal_number
78
+ defaultignorablecodepoint: default_ignorable_code_point
79
+ deprecated: deprecated
80
+ deseret: deseret
81
+ devanagari: devanagari
82
+ diacritic: diacritic
83
+ digit: digit
84
+ duployan: duployan
85
+ egyptianhieroglyphs: egyptian_hieroglyphs
86
+ elbasan: elbasan
87
+ emoji: emoji
88
+ emojicomponent: emoji_component
89
+ emojimodifier: emoji_modifier
90
+ emojimodifierbase: emoji_modifier_base
91
+ emojipresentation: emoji_presentation
92
+ enclosingmark: enclosing_mark
93
+ ethiopic: ethiopic
94
+ extender: extender
95
+ finalpunctuation: final_punctuation
96
+ format: format
97
+ georgian: georgian
98
+ glagolitic: glagolitic
99
+ gothic: gothic
100
+ grantha: grantha
101
+ graph: graph
102
+ graphemebase: grapheme_base
103
+ graphemeextend: grapheme_extend
104
+ graphemelink: grapheme_link
105
+ greek: greek
106
+ gujarati: gujarati
107
+ gurmukhi: gurmukhi
108
+ han: han
109
+ hangul: hangul
110
+ hanunoo: hanunoo
111
+ hatran: hatran
112
+ hebrew: hebrew
113
+ hexdigit: hex_digit
114
+ hiragana: hiragana
115
+ hyphen: hyphen
116
+ idcontinue: id_continue
117
+ ideographic: ideographic
118
+ idsbinaryoperator: ids_binary_operator
119
+ idstart: id_start
120
+ idstrinaryoperator: ids_trinary_operator
121
+ imperialaramaic: imperial_aramaic
122
+ inadlam: in_adlam
123
+ inaegeannumbers: in_aegean_numbers
124
+ inahom: in_ahom
125
+ inalchemicalsymbols: in_alchemical_symbols
126
+ inalphabeticpresentationforms: in_alphabetic_presentation_forms
127
+ inanatolianhieroglyphs: in_anatolian_hieroglyphs
128
+ inancientgreekmusicalnotation: in_ancient_greek_musical_notation
129
+ inancientgreeknumbers: in_ancient_greek_numbers
130
+ inancientsymbols: in_ancient_symbols
131
+ inarabic: in_arabic
132
+ inarabicextendeda: in_arabic_extended_a
133
+ inarabicmathematicalalphabeticsymbols: in_arabic_mathematical_alphabetic_symbols
134
+ inarabicpresentationformsa: in_arabic_presentation_forms_a
135
+ inarabicpresentationformsb: in_arabic_presentation_forms_b
136
+ inarabicsupplement: in_arabic_supplement
137
+ inarmenian: in_armenian
138
+ inarrows: in_arrows
139
+ inavestan: in_avestan
140
+ inbalinese: in_balinese
141
+ inbamum: in_bamum
142
+ inbamumsupplement: in_bamum_supplement
143
+ inbasiclatin: in_basic_latin
144
+ inbassavah: in_bassa_vah
145
+ inbatak: in_batak
146
+ inbengali: in_bengali
147
+ inbhaiksuki: in_bhaiksuki
148
+ inblockelements: in_block_elements
149
+ inbopomofo: in_bopomofo
150
+ inbopomofoextended: in_bopomofo_extended
151
+ inboxdrawing: in_box_drawing
152
+ inbrahmi: in_brahmi
153
+ inbraillepatterns: in_braille_patterns
154
+ inbuginese: in_buginese
155
+ inbuhid: in_buhid
156
+ inbyzantinemusicalsymbols: in_byzantine_musical_symbols
157
+ incarian: in_carian
158
+ incaucasianalbanian: in_caucasian_albanian
159
+ inchakma: in_chakma
160
+ incham: in_cham
161
+ incherokee: in_cherokee
162
+ incherokeesupplement: in_cherokee_supplement
163
+ incjkcompatibility: in_cjk_compatibility
164
+ incjkcompatibilityforms: in_cjk_compatibility_forms
165
+ incjkcompatibilityideographs: in_cjk_compatibility_ideographs
166
+ incjkcompatibilityideographssupplement: in_cjk_compatibility_ideographs_supplement
167
+ incjkradicalssupplement: in_cjk_radicals_supplement
168
+ incjkstrokes: in_cjk_strokes
169
+ incjksymbolsandpunctuation: in_cjk_symbols_and_punctuation
170
+ incjkunifiedideographs: in_cjk_unified_ideographs
171
+ incjkunifiedideographsextensiona: in_cjk_unified_ideographs_extension_a
172
+ incjkunifiedideographsextensionb: in_cjk_unified_ideographs_extension_b
173
+ incjkunifiedideographsextensionc: in_cjk_unified_ideographs_extension_c
174
+ incjkunifiedideographsextensiond: in_cjk_unified_ideographs_extension_d
175
+ incjkunifiedideographsextensione: in_cjk_unified_ideographs_extension_e
176
+ incjkunifiedideographsextensionf: in_cjk_unified_ideographs_extension_f
177
+ incombiningdiacriticalmarks: in_combining_diacritical_marks
178
+ incombiningdiacriticalmarksextended: in_combining_diacritical_marks_extended
179
+ incombiningdiacriticalmarksforsymbols: in_combining_diacritical_marks_for_symbols
180
+ incombiningdiacriticalmarkssupplement: in_combining_diacritical_marks_supplement
181
+ incombininghalfmarks: in_combining_half_marks
182
+ incommonindicnumberforms: in_common_indic_number_forms
183
+ incontrolpictures: in_control_pictures
184
+ incoptic: in_coptic
185
+ incopticepactnumbers: in_coptic_epact_numbers
186
+ incountingrodnumerals: in_counting_rod_numerals
187
+ incuneiform: in_cuneiform
188
+ incuneiformnumbersandpunctuation: in_cuneiform_numbers_and_punctuation
189
+ incurrencysymbols: in_currency_symbols
190
+ incypriotsyllabary: in_cypriot_syllabary
191
+ incyrillic: in_cyrillic
192
+ incyrillicextendeda: in_cyrillic_extended_a
193
+ incyrillicextendedb: in_cyrillic_extended_b
194
+ incyrillicextendedc: in_cyrillic_extended_c
195
+ incyrillicsupplement: in_cyrillic_supplement
196
+ indeseret: in_deseret
197
+ indevanagari: in_devanagari
198
+ indevanagariextended: in_devanagari_extended
199
+ indingbats: in_dingbats
200
+ indominotiles: in_domino_tiles
201
+ induployan: in_duployan
202
+ inearlydynasticcuneiform: in_early_dynastic_cuneiform
203
+ inegyptianhieroglyphs: in_egyptian_hieroglyphs
204
+ inelbasan: in_elbasan
205
+ inemoticons: in_emoticons
206
+ inenclosedalphanumerics: in_enclosed_alphanumerics
207
+ inenclosedalphanumericsupplement: in_enclosed_alphanumeric_supplement
208
+ inenclosedcjklettersandmonths: in_enclosed_cjk_letters_and_months
209
+ inenclosedideographicsupplement: in_enclosed_ideographic_supplement
210
+ inethiopic: in_ethiopic
211
+ inethiopicextended: in_ethiopic_extended
212
+ inethiopicextendeda: in_ethiopic_extended_a
213
+ inethiopicsupplement: in_ethiopic_supplement
214
+ ingeneralpunctuation: in_general_punctuation
215
+ ingeometricshapes: in_geometric_shapes
216
+ ingeometricshapesextended: in_geometric_shapes_extended
217
+ ingeorgian: in_georgian
218
+ ingeorgiansupplement: in_georgian_supplement
219
+ inglagolitic: in_glagolitic
220
+ inglagoliticsupplement: in_glagolitic_supplement
221
+ ingothic: in_gothic
222
+ ingrantha: in_grantha
223
+ ingreekandcoptic: in_greek_and_coptic
224
+ ingreekextended: in_greek_extended
225
+ ingujarati: in_gujarati
226
+ ingurmukhi: in_gurmukhi
227
+ inhalfwidthandfullwidthforms: in_halfwidth_and_fullwidth_forms
228
+ inhangulcompatibilityjamo: in_hangul_compatibility_jamo
229
+ inhanguljamo: in_hangul_jamo
230
+ inhanguljamoextendeda: in_hangul_jamo_extended_a
231
+ inhanguljamoextendedb: in_hangul_jamo_extended_b
232
+ inhangulsyllables: in_hangul_syllables
233
+ inhanunoo: in_hanunoo
234
+ inhatran: in_hatran
235
+ inhebrew: in_hebrew
236
+ inherited: inherited
237
+ inhighprivateusesurrogates: in_high_private_use_surrogates
238
+ inhighsurrogates: in_high_surrogates
239
+ inhiragana: in_hiragana
240
+ inideographicdescriptioncharacters: in_ideographic_description_characters
241
+ inideographicsymbolsandpunctuation: in_ideographic_symbols_and_punctuation
242
+ inimperialaramaic: in_imperial_aramaic
243
+ ininscriptionalpahlavi: in_inscriptional_pahlavi
244
+ ininscriptionalparthian: in_inscriptional_parthian
245
+ inipaextensions: in_ipa_extensions
246
+ initialpunctuation: initial_punctuation
247
+ injavanese: in_javanese
248
+ inkaithi: in_kaithi
249
+ inkanaextendeda: in_kana_extended_a
250
+ inkanasupplement: in_kana_supplement
251
+ inkanbun: in_kanbun
252
+ inkangxiradicals: in_kangxi_radicals
253
+ inkannada: in_kannada
254
+ inkatakana: in_katakana
255
+ inkatakanaphoneticextensions: in_katakana_phonetic_extensions
256
+ inkayahli: in_kayah_li
257
+ inkharoshthi: in_kharoshthi
258
+ inkhmer: in_khmer
259
+ inkhmersymbols: in_khmer_symbols
260
+ inkhojki: in_khojki
261
+ inkhudawadi: in_khudawadi
262
+ inlao: in_lao
263
+ inlatin1supplement: in_latin_1_supplement
264
+ inlatinextendeda: in_latin_extended_a
265
+ inlatinextendedadditional: in_latin_extended_additional
266
+ inlatinextendedb: in_latin_extended_b
267
+ inlatinextendedc: in_latin_extended_c
268
+ inlatinextendedd: in_latin_extended_d
269
+ inlatinextendede: in_latin_extended_e
270
+ inlepcha: in_lepcha
271
+ inletterlikesymbols: in_letterlike_symbols
272
+ inlimbu: in_limbu
273
+ inlineara: in_linear_a
274
+ inlinearbideograms: in_linear_b_ideograms
275
+ inlinearbsyllabary: in_linear_b_syllabary
276
+ inlisu: in_lisu
277
+ inlowsurrogates: in_low_surrogates
278
+ inlycian: in_lycian
279
+ inlydian: in_lydian
280
+ inmahajani: in_mahajani
281
+ inmahjongtiles: in_mahjong_tiles
282
+ inmalayalam: in_malayalam
283
+ inmandaic: in_mandaic
284
+ inmanichaean: in_manichaean
285
+ inmarchen: in_marchen
286
+ inmasaramgondi: in_masaram_gondi
287
+ inmathematicalalphanumericsymbols: in_mathematical_alphanumeric_symbols
288
+ inmathematicaloperators: in_mathematical_operators
289
+ inmeeteimayek: in_meetei_mayek
290
+ inmeeteimayekextensions: in_meetei_mayek_extensions
291
+ inmendekikakui: in_mende_kikakui
292
+ inmeroiticcursive: in_meroitic_cursive
293
+ inmeroitichieroglyphs: in_meroitic_hieroglyphs
294
+ inmiao: in_miao
295
+ inmiscellaneousmathematicalsymbolsa: in_miscellaneous_mathematical_symbols_a
296
+ inmiscellaneousmathematicalsymbolsb: in_miscellaneous_mathematical_symbols_b
297
+ inmiscellaneoussymbols: in_miscellaneous_symbols
298
+ inmiscellaneoussymbolsandarrows: in_miscellaneous_symbols_and_arrows
299
+ inmiscellaneoussymbolsandpictographs: in_miscellaneous_symbols_and_pictographs
300
+ inmiscellaneoustechnical: in_miscellaneous_technical
301
+ inmodi: in_modi
302
+ inmodifiertoneletters: in_modifier_tone_letters
303
+ inmongolian: in_mongolian
304
+ inmongoliansupplement: in_mongolian_supplement
305
+ inmro: in_mro
306
+ inmultani: in_multani
307
+ inmusicalsymbols: in_musical_symbols
308
+ inmyanmar: in_myanmar
309
+ inmyanmarextendeda: in_myanmar_extended_a
310
+ inmyanmarextendedb: in_myanmar_extended_b
311
+ innabataean: in_nabataean
312
+ innewa: in_newa
313
+ innewtailue: in_new_tai_lue
314
+ innko: in_nko
315
+ innoblock: in_no_block
316
+ innumberforms: in_number_forms
317
+ innushu: in_nushu
318
+ inogham: in_ogham
319
+ inolchiki: in_ol_chiki
320
+ inoldhungarian: in_old_hungarian
321
+ inolditalic: in_old_italic
322
+ inoldnortharabian: in_old_north_arabian
323
+ inoldpermic: in_old_permic
324
+ inoldpersian: in_old_persian
325
+ inoldsoutharabian: in_old_south_arabian
326
+ inoldturkic: in_old_turkic
327
+ inopticalcharacterrecognition: in_optical_character_recognition
328
+ inoriya: in_oriya
329
+ inornamentaldingbats: in_ornamental_dingbats
330
+ inosage: in_osage
331
+ inosmanya: in_osmanya
332
+ inpahawhhmong: in_pahawh_hmong
333
+ inpalmyrene: in_palmyrene
334
+ inpaucinhau: in_pau_cin_hau
335
+ inphagspa: in_phags_pa
336
+ inphaistosdisc: in_phaistos_disc
337
+ inphoenician: in_phoenician
338
+ inphoneticextensions: in_phonetic_extensions
339
+ inphoneticextensionssupplement: in_phonetic_extensions_supplement
340
+ inplayingcards: in_playing_cards
341
+ inprivateusearea: in_private_use_area
342
+ inpsalterpahlavi: in_psalter_pahlavi
343
+ inrejang: in_rejang
344
+ inruminumeralsymbols: in_rumi_numeral_symbols
345
+ inrunic: in_runic
346
+ insamaritan: in_samaritan
347
+ insaurashtra: in_saurashtra
348
+ inscriptionalpahlavi: inscriptional_pahlavi
349
+ inscriptionalparthian: inscriptional_parthian
350
+ insharada: in_sharada
351
+ inshavian: in_shavian
352
+ inshorthandformatcontrols: in_shorthand_format_controls
353
+ insiddham: in_siddham
354
+ insinhala: in_sinhala
355
+ insinhalaarchaicnumbers: in_sinhala_archaic_numbers
356
+ insmallformvariants: in_small_form_variants
357
+ insorasompeng: in_sora_sompeng
358
+ insoyombo: in_soyombo
359
+ inspacingmodifierletters: in_spacing_modifier_letters
360
+ inspecials: in_specials
361
+ insundanese: in_sundanese
362
+ insundanesesupplement: in_sundanese_supplement
363
+ insuperscriptsandsubscripts: in_superscripts_and_subscripts
364
+ insupplementalarrowsa: in_supplemental_arrows_a
365
+ insupplementalarrowsb: in_supplemental_arrows_b
366
+ insupplementalarrowsc: in_supplemental_arrows_c
367
+ insupplementalmathematicaloperators: in_supplemental_mathematical_operators
368
+ insupplementalpunctuation: in_supplemental_punctuation
369
+ insupplementalsymbolsandpictographs: in_supplemental_symbols_and_pictographs
370
+ insupplementaryprivateuseareaa: in_supplementary_private_use_area_a
371
+ insupplementaryprivateuseareab: in_supplementary_private_use_area_b
372
+ insuttonsignwriting: in_sutton_signwriting
373
+ insylotinagri: in_syloti_nagri
374
+ insyriac: in_syriac
375
+ insyriacsupplement: in_syriac_supplement
376
+ intagalog: in_tagalog
377
+ intagbanwa: in_tagbanwa
378
+ intags: in_tags
379
+ intaile: in_tai_le
380
+ intaitham: in_tai_tham
381
+ intaiviet: in_tai_viet
382
+ intaixuanjingsymbols: in_tai_xuan_jing_symbols
383
+ intakri: in_takri
384
+ intamil: in_tamil
385
+ intangut: in_tangut
386
+ intangutcomponents: in_tangut_components
387
+ intelugu: in_telugu
388
+ inthaana: in_thaana
389
+ inthai: in_thai
390
+ intibetan: in_tibetan
391
+ intifinagh: in_tifinagh
392
+ intirhuta: in_tirhuta
393
+ intransportandmapsymbols: in_transport_and_map_symbols
394
+ inugaritic: in_ugaritic
395
+ inunifiedcanadianaboriginalsyllabics: in_unified_canadian_aboriginal_syllabics
396
+ inunifiedcanadianaboriginalsyllabicsextended: in_unified_canadian_aboriginal_syllabics_extended
397
+ invai: in_vai
398
+ invariationselectors: in_variation_selectors
399
+ invariationselectorssupplement: in_variation_selectors_supplement
400
+ invedicextensions: in_vedic_extensions
401
+ inverticalforms: in_vertical_forms
402
+ inwarangciti: in_warang_citi
403
+ inyijinghexagramsymbols: in_yijing_hexagram_symbols
404
+ inyiradicals: in_yi_radicals
405
+ inyisyllables: in_yi_syllables
406
+ inzanabazarsquare: in_zanabazar_square
407
+ javanese: javanese
408
+ joincontrol: join_control
409
+ kaithi: kaithi
410
+ kannada: kannada
411
+ katakana: katakana
412
+ kayahli: kayah_li
413
+ kharoshthi: kharoshthi
414
+ khmer: khmer
415
+ khojki: khojki
416
+ khudawadi: khudawadi
417
+ lao: lao
418
+ latin: latin
419
+ lepcha: lepcha
420
+ letter: letter
421
+ letternumber: letter_number
422
+ limbu: limbu
423
+ lineara: linear_a
424
+ linearb: linear_b
425
+ lineseparator: line_separator
426
+ lisu: lisu
427
+ logicalorderexception: logical_order_exception
428
+ lower: lower
429
+ lowercase: lowercase
430
+ lowercaseletter: lowercase_letter
431
+ lycian: lycian
432
+ lydian: lydian
433
+ mahajani: mahajani
434
+ malayalam: malayalam
435
+ mandaic: mandaic
436
+ manichaean: manichaean
437
+ marchen: marchen
438
+ mark: mark
439
+ masaramgondi: masaram_gondi
440
+ math: math
441
+ mathsymbol: math_symbol
442
+ meeteimayek: meetei_mayek
443
+ mendekikakui: mende_kikakui
444
+ meroiticcursive: meroitic_cursive
445
+ meroitichieroglyphs: meroitic_hieroglyphs
446
+ miao: miao
447
+ modi: modi
448
+ modifierletter: modifier_letter
449
+ modifiersymbol: modifier_symbol
450
+ mongolian: mongolian
451
+ mro: mro
452
+ multani: multani
453
+ myanmar: myanmar
454
+ nabataean: nabataean
455
+ newa: newa
456
+ newline: newline
457
+ newtailue: new_tai_lue
458
+ nko: nko
459
+ noncharactercodepoint: noncharacter_code_point
460
+ nonspacingmark: nonspacing_mark
461
+ number: number
462
+ nushu: nushu
463
+ ogham: ogham
464
+ olchiki: ol_chiki
465
+ oldhungarian: old_hungarian
466
+ olditalic: old_italic
467
+ oldnortharabian: old_north_arabian
468
+ oldpermic: old_permic
469
+ oldpersian: old_persian
470
+ oldsoutharabian: old_south_arabian
471
+ oldturkic: old_turkic
472
+ openpunctuation: open_punctuation
473
+ oriya: oriya
474
+ osage: osage
475
+ osmanya: osmanya
476
+ other: other
477
+ otheralphabetic: other_alphabetic
478
+ otherdefaultignorablecodepoint: other_default_ignorable_code_point
479
+ othergraphemeextend: other_grapheme_extend
480
+ otheridcontinue: other_id_continue
481
+ otheridstart: other_id_start
482
+ otherletter: other_letter
483
+ otherlowercase: other_lowercase
484
+ othermath: other_math
485
+ othernumber: other_number
486
+ otherpunctuation: other_punctuation
487
+ othersymbol: other_symbol
488
+ otheruppercase: other_uppercase
489
+ pahawhhmong: pahawh_hmong
490
+ palmyrene: palmyrene
491
+ paragraphseparator: paragraph_separator
492
+ patternsyntax: pattern_syntax
493
+ patternwhitespace: pattern_white_space
494
+ paucinhau: pau_cin_hau
495
+ phagspa: phags_pa
496
+ phoenician: phoenician
497
+ prependedconcatenationmark: prepended_concatenation_mark
498
+ print: print
499
+ privateuse: private_use
500
+ psalterpahlavi: psalter_pahlavi
501
+ punct: punct
502
+ punctuation: punctuation
503
+ quotationmark: quotation_mark
504
+ radical: radical
505
+ regionalindicator: regional_indicator
506
+ rejang: rejang
507
+ runic: runic
508
+ samaritan: samaritan
509
+ saurashtra: saurashtra
510
+ sentenceterminal: sentence_terminal
511
+ separator: separator
512
+ sharada: sharada
513
+ shavian: shavian
514
+ siddham: siddham
515
+ signwriting: signwriting
516
+ sinhala: sinhala
517
+ softdotted: soft_dotted
518
+ sorasompeng: sora_sompeng
519
+ soyombo: soyombo
520
+ space: space
521
+ spaceseparator: space_separator
522
+ spacingmark: spacing_mark
523
+ sundanese: sundanese
524
+ surrogate: surrogate
525
+ sylotinagri: syloti_nagri
526
+ symbol: symbol
527
+ syriac: syriac
528
+ tagalog: tagalog
529
+ tagbanwa: tagbanwa
530
+ taile: tai_le
531
+ taitham: tai_tham
532
+ taiviet: tai_viet
533
+ takri: takri
534
+ tamil: tamil
535
+ tangut: tangut
536
+ telugu: telugu
537
+ terminalpunctuation: terminal_punctuation
538
+ thaana: thaana
539
+ thai: thai
540
+ tibetan: tibetan
541
+ tifinagh: tifinagh
542
+ tirhuta: tirhuta
543
+ titlecaseletter: titlecase_letter
544
+ ugaritic: ugaritic
545
+ unassigned: unassigned
546
+ unifiedideograph: unified_ideograph
547
+ unknown: unknown
548
+ upper: upper
549
+ uppercase: uppercase
550
+ uppercaseletter: uppercase_letter
551
+ vai: vai
552
+ variationselector: variation_selector
553
+ warangciti: warang_citi
554
+ whitespace: white_space
555
+ word: word
556
+ xdigit: xdigit
557
+ xidcontinue: xid_continue
558
+ xidstart: xid_start
559
+ xposixpunct: xposixpunct
560
+ yi: yi
561
+ zanabazarsquare: zanabazar_square