regexp_parser 0.5.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +242 -0
- data/Gemfile +1 -0
- data/README.md +21 -17
- data/Rakefile +31 -0
- data/lib/regexp_parser/expression.rb +11 -9
- data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
- data/lib/regexp_parser/expression/classes/backref.rb +21 -16
- data/lib/regexp_parser/expression/classes/escape.rb +81 -10
- data/lib/regexp_parser/expression/classes/group.rb +20 -20
- data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
- data/lib/regexp_parser/expression/classes/property.rb +6 -0
- data/lib/regexp_parser/expression/classes/set.rb +10 -93
- data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
- data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
- data/lib/regexp_parser/expression/methods/tests.rb +4 -14
- data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +3 -4
- data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
- data/lib/regexp_parser/expression/subexpression.rb +6 -10
- data/lib/regexp_parser/lexer.rb +13 -17
- data/lib/regexp_parser/parser.rb +170 -116
- data/lib/regexp_parser/scanner.rb +952 -2431
- data/lib/regexp_parser/scanner/char_type.rl +31 -0
- data/lib/regexp_parser/scanner/properties/long.yml +561 -0
- data/lib/regexp_parser/scanner/properties/short.yml +225 -0
- data/lib/regexp_parser/scanner/property.rl +7 -806
- data/lib/regexp_parser/scanner/scanner.rl +112 -154
- data/lib/regexp_parser/syntax/base.rb +4 -4
- data/lib/regexp_parser/syntax/tokens.rb +1 -0
- data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
- data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
- data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
- data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
- data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +2 -1
- data/test/expression/test_base.rb +2 -1
- data/test/expression/test_clone.rb +0 -57
- data/test/expression/test_set.rb +31 -8
- data/test/expression/test_strfregexp.rb +13 -4
- data/test/expression/test_subexpression.rb +25 -0
- data/test/expression/test_traverse.rb +25 -25
- data/test/helpers.rb +1 -0
- data/test/lexer/test_all.rb +1 -1
- data/test/lexer/test_conditionals.rb +9 -7
- data/test/lexer/test_nesting.rb +39 -21
- data/test/lexer/test_refcalls.rb +4 -4
- data/test/parser/set/test_intersections.rb +127 -0
- data/test/parser/set/test_ranges.rb +111 -0
- data/test/parser/test_all.rb +4 -1
- data/test/parser/test_escapes.rb +41 -9
- data/test/parser/test_groups.rb +22 -3
- data/test/parser/test_posix_classes.rb +27 -0
- data/test/parser/test_properties.rb +17 -290
- data/test/parser/test_refcalls.rb +66 -26
- data/test/parser/test_sets.rb +132 -129
- data/test/scanner/test_all.rb +1 -7
- data/test/scanner/test_conditionals.rb +16 -16
- data/test/scanner/test_errors.rb +0 -30
- data/test/scanner/test_escapes.rb +1 -2
- data/test/scanner/test_free_space.rb +28 -28
- data/test/scanner/test_groups.rb +35 -35
- data/test/scanner/test_meta.rb +1 -1
- data/test/scanner/test_properties.rb +87 -114
- data/test/scanner/test_refcalls.rb +18 -18
- data/test/scanner/test_scripts.rb +19 -351
- data/test/scanner/test_sets.rb +87 -60
- data/test/scanner/test_unicode_blocks.rb +4 -105
- data/test/support/warning_extractor.rb +1 -1
- data/test/syntax/test_syntax.rb +7 -0
- data/test/syntax/versions/test_1.8.rb +2 -4
- metadata +17 -7
- data/ChangeLog +0 -325
- data/test/scanner/test_emojis.rb +0 -31
@@ -0,0 +1,31 @@
|
|
1
|
+
%%{
|
2
|
+
machine re_char_type;
|
3
|
+
|
4
|
+
single_codepoint_char_type = [dDhHsSwW];
|
5
|
+
multi_codepoint_char_type = [RX];
|
6
|
+
|
7
|
+
char_type_char = single_codepoint_char_type | multi_codepoint_char_type;
|
8
|
+
|
9
|
+
# Char types scanner
|
10
|
+
# --------------------------------------------------------------------------
|
11
|
+
char_type := |*
|
12
|
+
char_type_char {
|
13
|
+
case text = text(data, ts, te, 1).first
|
14
|
+
when '\d'; emit(:type, :digit, text, ts - 1, te)
|
15
|
+
when '\D'; emit(:type, :nondigit, text, ts - 1, te)
|
16
|
+
when '\h'; emit(:type, :hex, text, ts - 1, te)
|
17
|
+
when '\H'; emit(:type, :nonhex, text, ts - 1, te)
|
18
|
+
when '\s'; emit(:type, :space, text, ts - 1, te)
|
19
|
+
when '\S'; emit(:type, :nonspace, text, ts - 1, te)
|
20
|
+
when '\w'; emit(:type, :word, text, ts - 1, te)
|
21
|
+
when '\W'; emit(:type, :nonword, text, ts - 1, te)
|
22
|
+
when '\R'; emit(:type, :linebreak, text, ts - 1, te)
|
23
|
+
when '\X'; emit(:type, :xgrapheme, text, ts - 1, te)
|
24
|
+
else
|
25
|
+
raise ScannerError.new(
|
26
|
+
"Unexpected character in type at #{text} (char #{ts})")
|
27
|
+
end
|
28
|
+
fret;
|
29
|
+
};
|
30
|
+
*|;
|
31
|
+
}%%
|
@@ -0,0 +1,561 @@
|
|
1
|
+
#
|
2
|
+
# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT
|
3
|
+
#
|
4
|
+
---
|
5
|
+
adlam: adlam
|
6
|
+
age=1.1: age=1.1
|
7
|
+
age=10.0: age=10.0
|
8
|
+
age=2.0: age=2.0
|
9
|
+
age=2.1: age=2.1
|
10
|
+
age=3.0: age=3.0
|
11
|
+
age=3.1: age=3.1
|
12
|
+
age=3.2: age=3.2
|
13
|
+
age=4.0: age=4.0
|
14
|
+
age=4.1: age=4.1
|
15
|
+
age=5.0: age=5.0
|
16
|
+
age=5.1: age=5.1
|
17
|
+
age=5.2: age=5.2
|
18
|
+
age=6.0: age=6.0
|
19
|
+
age=6.1: age=6.1
|
20
|
+
age=6.2: age=6.2
|
21
|
+
age=6.3: age=6.3
|
22
|
+
age=7.0: age=7.0
|
23
|
+
age=8.0: age=8.0
|
24
|
+
age=9.0: age=9.0
|
25
|
+
ahom: ahom
|
26
|
+
alnum: alnum
|
27
|
+
alpha: alpha
|
28
|
+
alphabetic: alphabetic
|
29
|
+
anatolianhieroglyphs: anatolian_hieroglyphs
|
30
|
+
any: any
|
31
|
+
arabic: arabic
|
32
|
+
armenian: armenian
|
33
|
+
ascii: ascii
|
34
|
+
asciihexdigit: ascii_hex_digit
|
35
|
+
assigned: assigned
|
36
|
+
avestan: avestan
|
37
|
+
balinese: balinese
|
38
|
+
bamum: bamum
|
39
|
+
bassavah: bassa_vah
|
40
|
+
batak: batak
|
41
|
+
bengali: bengali
|
42
|
+
bhaiksuki: bhaiksuki
|
43
|
+
bidicontrol: bidi_control
|
44
|
+
blank: blank
|
45
|
+
bopomofo: bopomofo
|
46
|
+
brahmi: brahmi
|
47
|
+
braille: braille
|
48
|
+
buginese: buginese
|
49
|
+
buhid: buhid
|
50
|
+
canadianaboriginal: canadian_aboriginal
|
51
|
+
carian: carian
|
52
|
+
cased: cased
|
53
|
+
casedletter: cased_letter
|
54
|
+
caseignorable: case_ignorable
|
55
|
+
caucasianalbanian: caucasian_albanian
|
56
|
+
chakma: chakma
|
57
|
+
cham: cham
|
58
|
+
changeswhencasefolded: changes_when_casefolded
|
59
|
+
changeswhencasemapped: changes_when_casemapped
|
60
|
+
changeswhenlowercased: changes_when_lowercased
|
61
|
+
changeswhentitlecased: changes_when_titlecased
|
62
|
+
changeswhenuppercased: changes_when_uppercased
|
63
|
+
cherokee: cherokee
|
64
|
+
closepunctuation: close_punctuation
|
65
|
+
cntrl: cntrl
|
66
|
+
combiningmark: combining_mark
|
67
|
+
common: common
|
68
|
+
connectorpunctuation: connector_punctuation
|
69
|
+
control: control
|
70
|
+
coptic: coptic
|
71
|
+
cuneiform: cuneiform
|
72
|
+
currencysymbol: currency_symbol
|
73
|
+
cypriot: cypriot
|
74
|
+
cyrillic: cyrillic
|
75
|
+
dash: dash
|
76
|
+
dashpunctuation: dash_punctuation
|
77
|
+
decimalnumber: decimal_number
|
78
|
+
defaultignorablecodepoint: default_ignorable_code_point
|
79
|
+
deprecated: deprecated
|
80
|
+
deseret: deseret
|
81
|
+
devanagari: devanagari
|
82
|
+
diacritic: diacritic
|
83
|
+
digit: digit
|
84
|
+
duployan: duployan
|
85
|
+
egyptianhieroglyphs: egyptian_hieroglyphs
|
86
|
+
elbasan: elbasan
|
87
|
+
emoji: emoji
|
88
|
+
emojicomponent: emoji_component
|
89
|
+
emojimodifier: emoji_modifier
|
90
|
+
emojimodifierbase: emoji_modifier_base
|
91
|
+
emojipresentation: emoji_presentation
|
92
|
+
enclosingmark: enclosing_mark
|
93
|
+
ethiopic: ethiopic
|
94
|
+
extender: extender
|
95
|
+
finalpunctuation: final_punctuation
|
96
|
+
format: format
|
97
|
+
georgian: georgian
|
98
|
+
glagolitic: glagolitic
|
99
|
+
gothic: gothic
|
100
|
+
grantha: grantha
|
101
|
+
graph: graph
|
102
|
+
graphemebase: grapheme_base
|
103
|
+
graphemeextend: grapheme_extend
|
104
|
+
graphemelink: grapheme_link
|
105
|
+
greek: greek
|
106
|
+
gujarati: gujarati
|
107
|
+
gurmukhi: gurmukhi
|
108
|
+
han: han
|
109
|
+
hangul: hangul
|
110
|
+
hanunoo: hanunoo
|
111
|
+
hatran: hatran
|
112
|
+
hebrew: hebrew
|
113
|
+
hexdigit: hex_digit
|
114
|
+
hiragana: hiragana
|
115
|
+
hyphen: hyphen
|
116
|
+
idcontinue: id_continue
|
117
|
+
ideographic: ideographic
|
118
|
+
idsbinaryoperator: ids_binary_operator
|
119
|
+
idstart: id_start
|
120
|
+
idstrinaryoperator: ids_trinary_operator
|
121
|
+
imperialaramaic: imperial_aramaic
|
122
|
+
inadlam: in_adlam
|
123
|
+
inaegeannumbers: in_aegean_numbers
|
124
|
+
inahom: in_ahom
|
125
|
+
inalchemicalsymbols: in_alchemical_symbols
|
126
|
+
inalphabeticpresentationforms: in_alphabetic_presentation_forms
|
127
|
+
inanatolianhieroglyphs: in_anatolian_hieroglyphs
|
128
|
+
inancientgreekmusicalnotation: in_ancient_greek_musical_notation
|
129
|
+
inancientgreeknumbers: in_ancient_greek_numbers
|
130
|
+
inancientsymbols: in_ancient_symbols
|
131
|
+
inarabic: in_arabic
|
132
|
+
inarabicextendeda: in_arabic_extended_a
|
133
|
+
inarabicmathematicalalphabeticsymbols: in_arabic_mathematical_alphabetic_symbols
|
134
|
+
inarabicpresentationformsa: in_arabic_presentation_forms_a
|
135
|
+
inarabicpresentationformsb: in_arabic_presentation_forms_b
|
136
|
+
inarabicsupplement: in_arabic_supplement
|
137
|
+
inarmenian: in_armenian
|
138
|
+
inarrows: in_arrows
|
139
|
+
inavestan: in_avestan
|
140
|
+
inbalinese: in_balinese
|
141
|
+
inbamum: in_bamum
|
142
|
+
inbamumsupplement: in_bamum_supplement
|
143
|
+
inbasiclatin: in_basic_latin
|
144
|
+
inbassavah: in_bassa_vah
|
145
|
+
inbatak: in_batak
|
146
|
+
inbengali: in_bengali
|
147
|
+
inbhaiksuki: in_bhaiksuki
|
148
|
+
inblockelements: in_block_elements
|
149
|
+
inbopomofo: in_bopomofo
|
150
|
+
inbopomofoextended: in_bopomofo_extended
|
151
|
+
inboxdrawing: in_box_drawing
|
152
|
+
inbrahmi: in_brahmi
|
153
|
+
inbraillepatterns: in_braille_patterns
|
154
|
+
inbuginese: in_buginese
|
155
|
+
inbuhid: in_buhid
|
156
|
+
inbyzantinemusicalsymbols: in_byzantine_musical_symbols
|
157
|
+
incarian: in_carian
|
158
|
+
incaucasianalbanian: in_caucasian_albanian
|
159
|
+
inchakma: in_chakma
|
160
|
+
incham: in_cham
|
161
|
+
incherokee: in_cherokee
|
162
|
+
incherokeesupplement: in_cherokee_supplement
|
163
|
+
incjkcompatibility: in_cjk_compatibility
|
164
|
+
incjkcompatibilityforms: in_cjk_compatibility_forms
|
165
|
+
incjkcompatibilityideographs: in_cjk_compatibility_ideographs
|
166
|
+
incjkcompatibilityideographssupplement: in_cjk_compatibility_ideographs_supplement
|
167
|
+
incjkradicalssupplement: in_cjk_radicals_supplement
|
168
|
+
incjkstrokes: in_cjk_strokes
|
169
|
+
incjksymbolsandpunctuation: in_cjk_symbols_and_punctuation
|
170
|
+
incjkunifiedideographs: in_cjk_unified_ideographs
|
171
|
+
incjkunifiedideographsextensiona: in_cjk_unified_ideographs_extension_a
|
172
|
+
incjkunifiedideographsextensionb: in_cjk_unified_ideographs_extension_b
|
173
|
+
incjkunifiedideographsextensionc: in_cjk_unified_ideographs_extension_c
|
174
|
+
incjkunifiedideographsextensiond: in_cjk_unified_ideographs_extension_d
|
175
|
+
incjkunifiedideographsextensione: in_cjk_unified_ideographs_extension_e
|
176
|
+
incjkunifiedideographsextensionf: in_cjk_unified_ideographs_extension_f
|
177
|
+
incombiningdiacriticalmarks: in_combining_diacritical_marks
|
178
|
+
incombiningdiacriticalmarksextended: in_combining_diacritical_marks_extended
|
179
|
+
incombiningdiacriticalmarksforsymbols: in_combining_diacritical_marks_for_symbols
|
180
|
+
incombiningdiacriticalmarkssupplement: in_combining_diacritical_marks_supplement
|
181
|
+
incombininghalfmarks: in_combining_half_marks
|
182
|
+
incommonindicnumberforms: in_common_indic_number_forms
|
183
|
+
incontrolpictures: in_control_pictures
|
184
|
+
incoptic: in_coptic
|
185
|
+
incopticepactnumbers: in_coptic_epact_numbers
|
186
|
+
incountingrodnumerals: in_counting_rod_numerals
|
187
|
+
incuneiform: in_cuneiform
|
188
|
+
incuneiformnumbersandpunctuation: in_cuneiform_numbers_and_punctuation
|
189
|
+
incurrencysymbols: in_currency_symbols
|
190
|
+
incypriotsyllabary: in_cypriot_syllabary
|
191
|
+
incyrillic: in_cyrillic
|
192
|
+
incyrillicextendeda: in_cyrillic_extended_a
|
193
|
+
incyrillicextendedb: in_cyrillic_extended_b
|
194
|
+
incyrillicextendedc: in_cyrillic_extended_c
|
195
|
+
incyrillicsupplement: in_cyrillic_supplement
|
196
|
+
indeseret: in_deseret
|
197
|
+
indevanagari: in_devanagari
|
198
|
+
indevanagariextended: in_devanagari_extended
|
199
|
+
indingbats: in_dingbats
|
200
|
+
indominotiles: in_domino_tiles
|
201
|
+
induployan: in_duployan
|
202
|
+
inearlydynasticcuneiform: in_early_dynastic_cuneiform
|
203
|
+
inegyptianhieroglyphs: in_egyptian_hieroglyphs
|
204
|
+
inelbasan: in_elbasan
|
205
|
+
inemoticons: in_emoticons
|
206
|
+
inenclosedalphanumerics: in_enclosed_alphanumerics
|
207
|
+
inenclosedalphanumericsupplement: in_enclosed_alphanumeric_supplement
|
208
|
+
inenclosedcjklettersandmonths: in_enclosed_cjk_letters_and_months
|
209
|
+
inenclosedideographicsupplement: in_enclosed_ideographic_supplement
|
210
|
+
inethiopic: in_ethiopic
|
211
|
+
inethiopicextended: in_ethiopic_extended
|
212
|
+
inethiopicextendeda: in_ethiopic_extended_a
|
213
|
+
inethiopicsupplement: in_ethiopic_supplement
|
214
|
+
ingeneralpunctuation: in_general_punctuation
|
215
|
+
ingeometricshapes: in_geometric_shapes
|
216
|
+
ingeometricshapesextended: in_geometric_shapes_extended
|
217
|
+
ingeorgian: in_georgian
|
218
|
+
ingeorgiansupplement: in_georgian_supplement
|
219
|
+
inglagolitic: in_glagolitic
|
220
|
+
inglagoliticsupplement: in_glagolitic_supplement
|
221
|
+
ingothic: in_gothic
|
222
|
+
ingrantha: in_grantha
|
223
|
+
ingreekandcoptic: in_greek_and_coptic
|
224
|
+
ingreekextended: in_greek_extended
|
225
|
+
ingujarati: in_gujarati
|
226
|
+
ingurmukhi: in_gurmukhi
|
227
|
+
inhalfwidthandfullwidthforms: in_halfwidth_and_fullwidth_forms
|
228
|
+
inhangulcompatibilityjamo: in_hangul_compatibility_jamo
|
229
|
+
inhanguljamo: in_hangul_jamo
|
230
|
+
inhanguljamoextendeda: in_hangul_jamo_extended_a
|
231
|
+
inhanguljamoextendedb: in_hangul_jamo_extended_b
|
232
|
+
inhangulsyllables: in_hangul_syllables
|
233
|
+
inhanunoo: in_hanunoo
|
234
|
+
inhatran: in_hatran
|
235
|
+
inhebrew: in_hebrew
|
236
|
+
inherited: inherited
|
237
|
+
inhighprivateusesurrogates: in_high_private_use_surrogates
|
238
|
+
inhighsurrogates: in_high_surrogates
|
239
|
+
inhiragana: in_hiragana
|
240
|
+
inideographicdescriptioncharacters: in_ideographic_description_characters
|
241
|
+
inideographicsymbolsandpunctuation: in_ideographic_symbols_and_punctuation
|
242
|
+
inimperialaramaic: in_imperial_aramaic
|
243
|
+
ininscriptionalpahlavi: in_inscriptional_pahlavi
|
244
|
+
ininscriptionalparthian: in_inscriptional_parthian
|
245
|
+
inipaextensions: in_ipa_extensions
|
246
|
+
initialpunctuation: initial_punctuation
|
247
|
+
injavanese: in_javanese
|
248
|
+
inkaithi: in_kaithi
|
249
|
+
inkanaextendeda: in_kana_extended_a
|
250
|
+
inkanasupplement: in_kana_supplement
|
251
|
+
inkanbun: in_kanbun
|
252
|
+
inkangxiradicals: in_kangxi_radicals
|
253
|
+
inkannada: in_kannada
|
254
|
+
inkatakana: in_katakana
|
255
|
+
inkatakanaphoneticextensions: in_katakana_phonetic_extensions
|
256
|
+
inkayahli: in_kayah_li
|
257
|
+
inkharoshthi: in_kharoshthi
|
258
|
+
inkhmer: in_khmer
|
259
|
+
inkhmersymbols: in_khmer_symbols
|
260
|
+
inkhojki: in_khojki
|
261
|
+
inkhudawadi: in_khudawadi
|
262
|
+
inlao: in_lao
|
263
|
+
inlatin1supplement: in_latin_1_supplement
|
264
|
+
inlatinextendeda: in_latin_extended_a
|
265
|
+
inlatinextendedadditional: in_latin_extended_additional
|
266
|
+
inlatinextendedb: in_latin_extended_b
|
267
|
+
inlatinextendedc: in_latin_extended_c
|
268
|
+
inlatinextendedd: in_latin_extended_d
|
269
|
+
inlatinextendede: in_latin_extended_e
|
270
|
+
inlepcha: in_lepcha
|
271
|
+
inletterlikesymbols: in_letterlike_symbols
|
272
|
+
inlimbu: in_limbu
|
273
|
+
inlineara: in_linear_a
|
274
|
+
inlinearbideograms: in_linear_b_ideograms
|
275
|
+
inlinearbsyllabary: in_linear_b_syllabary
|
276
|
+
inlisu: in_lisu
|
277
|
+
inlowsurrogates: in_low_surrogates
|
278
|
+
inlycian: in_lycian
|
279
|
+
inlydian: in_lydian
|
280
|
+
inmahajani: in_mahajani
|
281
|
+
inmahjongtiles: in_mahjong_tiles
|
282
|
+
inmalayalam: in_malayalam
|
283
|
+
inmandaic: in_mandaic
|
284
|
+
inmanichaean: in_manichaean
|
285
|
+
inmarchen: in_marchen
|
286
|
+
inmasaramgondi: in_masaram_gondi
|
287
|
+
inmathematicalalphanumericsymbols: in_mathematical_alphanumeric_symbols
|
288
|
+
inmathematicaloperators: in_mathematical_operators
|
289
|
+
inmeeteimayek: in_meetei_mayek
|
290
|
+
inmeeteimayekextensions: in_meetei_mayek_extensions
|
291
|
+
inmendekikakui: in_mende_kikakui
|
292
|
+
inmeroiticcursive: in_meroitic_cursive
|
293
|
+
inmeroitichieroglyphs: in_meroitic_hieroglyphs
|
294
|
+
inmiao: in_miao
|
295
|
+
inmiscellaneousmathematicalsymbolsa: in_miscellaneous_mathematical_symbols_a
|
296
|
+
inmiscellaneousmathematicalsymbolsb: in_miscellaneous_mathematical_symbols_b
|
297
|
+
inmiscellaneoussymbols: in_miscellaneous_symbols
|
298
|
+
inmiscellaneoussymbolsandarrows: in_miscellaneous_symbols_and_arrows
|
299
|
+
inmiscellaneoussymbolsandpictographs: in_miscellaneous_symbols_and_pictographs
|
300
|
+
inmiscellaneoustechnical: in_miscellaneous_technical
|
301
|
+
inmodi: in_modi
|
302
|
+
inmodifiertoneletters: in_modifier_tone_letters
|
303
|
+
inmongolian: in_mongolian
|
304
|
+
inmongoliansupplement: in_mongolian_supplement
|
305
|
+
inmro: in_mro
|
306
|
+
inmultani: in_multani
|
307
|
+
inmusicalsymbols: in_musical_symbols
|
308
|
+
inmyanmar: in_myanmar
|
309
|
+
inmyanmarextendeda: in_myanmar_extended_a
|
310
|
+
inmyanmarextendedb: in_myanmar_extended_b
|
311
|
+
innabataean: in_nabataean
|
312
|
+
innewa: in_newa
|
313
|
+
innewtailue: in_new_tai_lue
|
314
|
+
innko: in_nko
|
315
|
+
innoblock: in_no_block
|
316
|
+
innumberforms: in_number_forms
|
317
|
+
innushu: in_nushu
|
318
|
+
inogham: in_ogham
|
319
|
+
inolchiki: in_ol_chiki
|
320
|
+
inoldhungarian: in_old_hungarian
|
321
|
+
inolditalic: in_old_italic
|
322
|
+
inoldnortharabian: in_old_north_arabian
|
323
|
+
inoldpermic: in_old_permic
|
324
|
+
inoldpersian: in_old_persian
|
325
|
+
inoldsoutharabian: in_old_south_arabian
|
326
|
+
inoldturkic: in_old_turkic
|
327
|
+
inopticalcharacterrecognition: in_optical_character_recognition
|
328
|
+
inoriya: in_oriya
|
329
|
+
inornamentaldingbats: in_ornamental_dingbats
|
330
|
+
inosage: in_osage
|
331
|
+
inosmanya: in_osmanya
|
332
|
+
inpahawhhmong: in_pahawh_hmong
|
333
|
+
inpalmyrene: in_palmyrene
|
334
|
+
inpaucinhau: in_pau_cin_hau
|
335
|
+
inphagspa: in_phags_pa
|
336
|
+
inphaistosdisc: in_phaistos_disc
|
337
|
+
inphoenician: in_phoenician
|
338
|
+
inphoneticextensions: in_phonetic_extensions
|
339
|
+
inphoneticextensionssupplement: in_phonetic_extensions_supplement
|
340
|
+
inplayingcards: in_playing_cards
|
341
|
+
inprivateusearea: in_private_use_area
|
342
|
+
inpsalterpahlavi: in_psalter_pahlavi
|
343
|
+
inrejang: in_rejang
|
344
|
+
inruminumeralsymbols: in_rumi_numeral_symbols
|
345
|
+
inrunic: in_runic
|
346
|
+
insamaritan: in_samaritan
|
347
|
+
insaurashtra: in_saurashtra
|
348
|
+
inscriptionalpahlavi: inscriptional_pahlavi
|
349
|
+
inscriptionalparthian: inscriptional_parthian
|
350
|
+
insharada: in_sharada
|
351
|
+
inshavian: in_shavian
|
352
|
+
inshorthandformatcontrols: in_shorthand_format_controls
|
353
|
+
insiddham: in_siddham
|
354
|
+
insinhala: in_sinhala
|
355
|
+
insinhalaarchaicnumbers: in_sinhala_archaic_numbers
|
356
|
+
insmallformvariants: in_small_form_variants
|
357
|
+
insorasompeng: in_sora_sompeng
|
358
|
+
insoyombo: in_soyombo
|
359
|
+
inspacingmodifierletters: in_spacing_modifier_letters
|
360
|
+
inspecials: in_specials
|
361
|
+
insundanese: in_sundanese
|
362
|
+
insundanesesupplement: in_sundanese_supplement
|
363
|
+
insuperscriptsandsubscripts: in_superscripts_and_subscripts
|
364
|
+
insupplementalarrowsa: in_supplemental_arrows_a
|
365
|
+
insupplementalarrowsb: in_supplemental_arrows_b
|
366
|
+
insupplementalarrowsc: in_supplemental_arrows_c
|
367
|
+
insupplementalmathematicaloperators: in_supplemental_mathematical_operators
|
368
|
+
insupplementalpunctuation: in_supplemental_punctuation
|
369
|
+
insupplementalsymbolsandpictographs: in_supplemental_symbols_and_pictographs
|
370
|
+
insupplementaryprivateuseareaa: in_supplementary_private_use_area_a
|
371
|
+
insupplementaryprivateuseareab: in_supplementary_private_use_area_b
|
372
|
+
insuttonsignwriting: in_sutton_signwriting
|
373
|
+
insylotinagri: in_syloti_nagri
|
374
|
+
insyriac: in_syriac
|
375
|
+
insyriacsupplement: in_syriac_supplement
|
376
|
+
intagalog: in_tagalog
|
377
|
+
intagbanwa: in_tagbanwa
|
378
|
+
intags: in_tags
|
379
|
+
intaile: in_tai_le
|
380
|
+
intaitham: in_tai_tham
|
381
|
+
intaiviet: in_tai_viet
|
382
|
+
intaixuanjingsymbols: in_tai_xuan_jing_symbols
|
383
|
+
intakri: in_takri
|
384
|
+
intamil: in_tamil
|
385
|
+
intangut: in_tangut
|
386
|
+
intangutcomponents: in_tangut_components
|
387
|
+
intelugu: in_telugu
|
388
|
+
inthaana: in_thaana
|
389
|
+
inthai: in_thai
|
390
|
+
intibetan: in_tibetan
|
391
|
+
intifinagh: in_tifinagh
|
392
|
+
intirhuta: in_tirhuta
|
393
|
+
intransportandmapsymbols: in_transport_and_map_symbols
|
394
|
+
inugaritic: in_ugaritic
|
395
|
+
inunifiedcanadianaboriginalsyllabics: in_unified_canadian_aboriginal_syllabics
|
396
|
+
inunifiedcanadianaboriginalsyllabicsextended: in_unified_canadian_aboriginal_syllabics_extended
|
397
|
+
invai: in_vai
|
398
|
+
invariationselectors: in_variation_selectors
|
399
|
+
invariationselectorssupplement: in_variation_selectors_supplement
|
400
|
+
invedicextensions: in_vedic_extensions
|
401
|
+
inverticalforms: in_vertical_forms
|
402
|
+
inwarangciti: in_warang_citi
|
403
|
+
inyijinghexagramsymbols: in_yijing_hexagram_symbols
|
404
|
+
inyiradicals: in_yi_radicals
|
405
|
+
inyisyllables: in_yi_syllables
|
406
|
+
inzanabazarsquare: in_zanabazar_square
|
407
|
+
javanese: javanese
|
408
|
+
joincontrol: join_control
|
409
|
+
kaithi: kaithi
|
410
|
+
kannada: kannada
|
411
|
+
katakana: katakana
|
412
|
+
kayahli: kayah_li
|
413
|
+
kharoshthi: kharoshthi
|
414
|
+
khmer: khmer
|
415
|
+
khojki: khojki
|
416
|
+
khudawadi: khudawadi
|
417
|
+
lao: lao
|
418
|
+
latin: latin
|
419
|
+
lepcha: lepcha
|
420
|
+
letter: letter
|
421
|
+
letternumber: letter_number
|
422
|
+
limbu: limbu
|
423
|
+
lineara: linear_a
|
424
|
+
linearb: linear_b
|
425
|
+
lineseparator: line_separator
|
426
|
+
lisu: lisu
|
427
|
+
logicalorderexception: logical_order_exception
|
428
|
+
lower: lower
|
429
|
+
lowercase: lowercase
|
430
|
+
lowercaseletter: lowercase_letter
|
431
|
+
lycian: lycian
|
432
|
+
lydian: lydian
|
433
|
+
mahajani: mahajani
|
434
|
+
malayalam: malayalam
|
435
|
+
mandaic: mandaic
|
436
|
+
manichaean: manichaean
|
437
|
+
marchen: marchen
|
438
|
+
mark: mark
|
439
|
+
masaramgondi: masaram_gondi
|
440
|
+
math: math
|
441
|
+
mathsymbol: math_symbol
|
442
|
+
meeteimayek: meetei_mayek
|
443
|
+
mendekikakui: mende_kikakui
|
444
|
+
meroiticcursive: meroitic_cursive
|
445
|
+
meroitichieroglyphs: meroitic_hieroglyphs
|
446
|
+
miao: miao
|
447
|
+
modi: modi
|
448
|
+
modifierletter: modifier_letter
|
449
|
+
modifiersymbol: modifier_symbol
|
450
|
+
mongolian: mongolian
|
451
|
+
mro: mro
|
452
|
+
multani: multani
|
453
|
+
myanmar: myanmar
|
454
|
+
nabataean: nabataean
|
455
|
+
newa: newa
|
456
|
+
newline: newline
|
457
|
+
newtailue: new_tai_lue
|
458
|
+
nko: nko
|
459
|
+
noncharactercodepoint: noncharacter_code_point
|
460
|
+
nonspacingmark: nonspacing_mark
|
461
|
+
number: number
|
462
|
+
nushu: nushu
|
463
|
+
ogham: ogham
|
464
|
+
olchiki: ol_chiki
|
465
|
+
oldhungarian: old_hungarian
|
466
|
+
olditalic: old_italic
|
467
|
+
oldnortharabian: old_north_arabian
|
468
|
+
oldpermic: old_permic
|
469
|
+
oldpersian: old_persian
|
470
|
+
oldsoutharabian: old_south_arabian
|
471
|
+
oldturkic: old_turkic
|
472
|
+
openpunctuation: open_punctuation
|
473
|
+
oriya: oriya
|
474
|
+
osage: osage
|
475
|
+
osmanya: osmanya
|
476
|
+
other: other
|
477
|
+
otheralphabetic: other_alphabetic
|
478
|
+
otherdefaultignorablecodepoint: other_default_ignorable_code_point
|
479
|
+
othergraphemeextend: other_grapheme_extend
|
480
|
+
otheridcontinue: other_id_continue
|
481
|
+
otheridstart: other_id_start
|
482
|
+
otherletter: other_letter
|
483
|
+
otherlowercase: other_lowercase
|
484
|
+
othermath: other_math
|
485
|
+
othernumber: other_number
|
486
|
+
otherpunctuation: other_punctuation
|
487
|
+
othersymbol: other_symbol
|
488
|
+
otheruppercase: other_uppercase
|
489
|
+
pahawhhmong: pahawh_hmong
|
490
|
+
palmyrene: palmyrene
|
491
|
+
paragraphseparator: paragraph_separator
|
492
|
+
patternsyntax: pattern_syntax
|
493
|
+
patternwhitespace: pattern_white_space
|
494
|
+
paucinhau: pau_cin_hau
|
495
|
+
phagspa: phags_pa
|
496
|
+
phoenician: phoenician
|
497
|
+
prependedconcatenationmark: prepended_concatenation_mark
|
498
|
+
print: print
|
499
|
+
privateuse: private_use
|
500
|
+
psalterpahlavi: psalter_pahlavi
|
501
|
+
punct: punct
|
502
|
+
punctuation: punctuation
|
503
|
+
quotationmark: quotation_mark
|
504
|
+
radical: radical
|
505
|
+
regionalindicator: regional_indicator
|
506
|
+
rejang: rejang
|
507
|
+
runic: runic
|
508
|
+
samaritan: samaritan
|
509
|
+
saurashtra: saurashtra
|
510
|
+
sentenceterminal: sentence_terminal
|
511
|
+
separator: separator
|
512
|
+
sharada: sharada
|
513
|
+
shavian: shavian
|
514
|
+
siddham: siddham
|
515
|
+
signwriting: signwriting
|
516
|
+
sinhala: sinhala
|
517
|
+
softdotted: soft_dotted
|
518
|
+
sorasompeng: sora_sompeng
|
519
|
+
soyombo: soyombo
|
520
|
+
space: space
|
521
|
+
spaceseparator: space_separator
|
522
|
+
spacingmark: spacing_mark
|
523
|
+
sundanese: sundanese
|
524
|
+
surrogate: surrogate
|
525
|
+
sylotinagri: syloti_nagri
|
526
|
+
symbol: symbol
|
527
|
+
syriac: syriac
|
528
|
+
tagalog: tagalog
|
529
|
+
tagbanwa: tagbanwa
|
530
|
+
taile: tai_le
|
531
|
+
taitham: tai_tham
|
532
|
+
taiviet: tai_viet
|
533
|
+
takri: takri
|
534
|
+
tamil: tamil
|
535
|
+
tangut: tangut
|
536
|
+
telugu: telugu
|
537
|
+
terminalpunctuation: terminal_punctuation
|
538
|
+
thaana: thaana
|
539
|
+
thai: thai
|
540
|
+
tibetan: tibetan
|
541
|
+
tifinagh: tifinagh
|
542
|
+
tirhuta: tirhuta
|
543
|
+
titlecaseletter: titlecase_letter
|
544
|
+
ugaritic: ugaritic
|
545
|
+
unassigned: unassigned
|
546
|
+
unifiedideograph: unified_ideograph
|
547
|
+
unknown: unknown
|
548
|
+
upper: upper
|
549
|
+
uppercase: uppercase
|
550
|
+
uppercaseletter: uppercase_letter
|
551
|
+
vai: vai
|
552
|
+
variationselector: variation_selector
|
553
|
+
warangciti: warang_citi
|
554
|
+
whitespace: white_space
|
555
|
+
word: word
|
556
|
+
xdigit: xdigit
|
557
|
+
xidcontinue: xid_continue
|
558
|
+
xidstart: xid_start
|
559
|
+
xposixpunct: xposixpunct
|
560
|
+
yi: yi
|
561
|
+
zanabazarsquare: zanabazar_square
|