regexp_parser 0.5.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +242 -0
  3. data/Gemfile +1 -0
  4. data/README.md +21 -17
  5. data/Rakefile +31 -0
  6. data/lib/regexp_parser/expression.rb +11 -9
  7. data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
  8. data/lib/regexp_parser/expression/classes/backref.rb +21 -16
  9. data/lib/regexp_parser/expression/classes/escape.rb +81 -10
  10. data/lib/regexp_parser/expression/classes/group.rb +20 -20
  11. data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
  12. data/lib/regexp_parser/expression/classes/property.rb +6 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +10 -93
  14. data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
  15. data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
  16. data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
  17. data/lib/regexp_parser/expression/methods/tests.rb +4 -14
  18. data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +3 -4
  20. data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
  21. data/lib/regexp_parser/expression/subexpression.rb +6 -10
  22. data/lib/regexp_parser/lexer.rb +13 -17
  23. data/lib/regexp_parser/parser.rb +170 -116
  24. data/lib/regexp_parser/scanner.rb +952 -2431
  25. data/lib/regexp_parser/scanner/char_type.rl +31 -0
  26. data/lib/regexp_parser/scanner/properties/long.yml +561 -0
  27. data/lib/regexp_parser/scanner/properties/short.yml +225 -0
  28. data/lib/regexp_parser/scanner/property.rl +7 -806
  29. data/lib/regexp_parser/scanner/scanner.rl +112 -154
  30. data/lib/regexp_parser/syntax/base.rb +4 -4
  31. data/lib/regexp_parser/syntax/tokens.rb +1 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
  34. data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
  35. data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
  36. data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
  38. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
  39. data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
  40. data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
  41. data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
  42. data/lib/regexp_parser/version.rb +1 -1
  43. data/regexp_parser.gemspec +2 -1
  44. data/test/expression/test_base.rb +2 -1
  45. data/test/expression/test_clone.rb +0 -57
  46. data/test/expression/test_set.rb +31 -8
  47. data/test/expression/test_strfregexp.rb +13 -4
  48. data/test/expression/test_subexpression.rb +25 -0
  49. data/test/expression/test_traverse.rb +25 -25
  50. data/test/helpers.rb +1 -0
  51. data/test/lexer/test_all.rb +1 -1
  52. data/test/lexer/test_conditionals.rb +9 -7
  53. data/test/lexer/test_nesting.rb +39 -21
  54. data/test/lexer/test_refcalls.rb +4 -4
  55. data/test/parser/set/test_intersections.rb +127 -0
  56. data/test/parser/set/test_ranges.rb +111 -0
  57. data/test/parser/test_all.rb +4 -1
  58. data/test/parser/test_escapes.rb +41 -9
  59. data/test/parser/test_groups.rb +22 -3
  60. data/test/parser/test_posix_classes.rb +27 -0
  61. data/test/parser/test_properties.rb +17 -290
  62. data/test/parser/test_refcalls.rb +66 -26
  63. data/test/parser/test_sets.rb +132 -129
  64. data/test/scanner/test_all.rb +1 -7
  65. data/test/scanner/test_conditionals.rb +16 -16
  66. data/test/scanner/test_errors.rb +0 -30
  67. data/test/scanner/test_escapes.rb +1 -2
  68. data/test/scanner/test_free_space.rb +28 -28
  69. data/test/scanner/test_groups.rb +35 -35
  70. data/test/scanner/test_meta.rb +1 -1
  71. data/test/scanner/test_properties.rb +87 -114
  72. data/test/scanner/test_refcalls.rb +18 -18
  73. data/test/scanner/test_scripts.rb +19 -351
  74. data/test/scanner/test_sets.rb +87 -60
  75. data/test/scanner/test_unicode_blocks.rb +4 -105
  76. data/test/support/warning_extractor.rb +1 -1
  77. data/test/syntax/test_syntax.rb +7 -0
  78. data/test/syntax/versions/test_1.8.rb +2 -4
  79. metadata +17 -7
  80. data/ChangeLog +0 -325
  81. data/test/scanner/test_emojis.rb +0 -31
@@ -0,0 +1,225 @@
1
+ #
2
+ # THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT
3
+ #
4
+ ---
5
+ adlm: adlam
6
+ aghb: caucasian_albanian
7
+ ahex: ascii_hex_digit
8
+ arab: arabic
9
+ armi: imperial_aramaic
10
+ armn: armenian
11
+ avst: avestan
12
+ bali: balinese
13
+ bamu: bamum
14
+ bass: bassa_vah
15
+ batk: batak
16
+ beng: bengali
17
+ bhks: bhaiksuki
18
+ bidic: bidi_control
19
+ bopo: bopomofo
20
+ brah: brahmi
21
+ brai: braille
22
+ bugi: buginese
23
+ buhd: buhid
24
+ c: other
25
+ cakm: chakma
26
+ cans: canadian_aboriginal
27
+ cari: carian
28
+ cc: control
29
+ cf: format
30
+ cher: cherokee
31
+ ci: case_ignorable
32
+ cn: unassigned
33
+ co: private_use
34
+ copt: coptic
35
+ cprt: cypriot
36
+ cs: surrogate
37
+ cwcf: changes_when_casefolded
38
+ cwcm: changes_when_casemapped
39
+ cwl: changes_when_lowercased
40
+ cwt: changes_when_titlecased
41
+ cwu: changes_when_uppercased
42
+ cyrl: cyrillic
43
+ dep: deprecated
44
+ deva: devanagari
45
+ di: default_ignorable_code_point
46
+ dia: diacritic
47
+ dsrt: deseret
48
+ dupl: duployan
49
+ egyp: egyptian_hieroglyphs
50
+ elba: elbasan
51
+ ethi: ethiopic
52
+ ext: extender
53
+ geor: georgian
54
+ glag: glagolitic
55
+ gonm: masaram_gondi
56
+ goth: gothic
57
+ gran: grantha
58
+ grbase: grapheme_base
59
+ grek: greek
60
+ grext: grapheme_extend
61
+ grlink: grapheme_link
62
+ gujr: gujarati
63
+ guru: gurmukhi
64
+ hang: hangul
65
+ hani: han
66
+ hano: hanunoo
67
+ hatr: hatran
68
+ hebr: hebrew
69
+ hex: hex_digit
70
+ hira: hiragana
71
+ hluw: anatolian_hieroglyphs
72
+ hmng: pahawh_hmong
73
+ hung: old_hungarian
74
+ idc: id_continue
75
+ ideo: ideographic
76
+ ids: id_start
77
+ idsb: ids_binary_operator
78
+ idst: ids_trinary_operator
79
+ ital: old_italic
80
+ java: javanese
81
+ joinc: join_control
82
+ kali: kayah_li
83
+ kana: katakana
84
+ khar: kharoshthi
85
+ khmr: khmer
86
+ khoj: khojki
87
+ knda: kannada
88
+ kthi: kaithi
89
+ l: letter
90
+ lana: tai_tham
91
+ laoo: lao
92
+ latn: latin
93
+ lc: cased_letter
94
+ lepc: lepcha
95
+ limb: limbu
96
+ lina: linear_a
97
+ linb: linear_b
98
+ ll: lowercase_letter
99
+ lm: modifier_letter
100
+ lo: other_letter
101
+ loe: logical_order_exception
102
+ lt: titlecase_letter
103
+ lu: uppercase_letter
104
+ lyci: lycian
105
+ lydi: lydian
106
+ m: mark
107
+ mahj: mahajani
108
+ mand: mandaic
109
+ mani: manichaean
110
+ marc: marchen
111
+ mc: spacing_mark
112
+ me: enclosing_mark
113
+ mend: mende_kikakui
114
+ merc: meroitic_cursive
115
+ mero: meroitic_hieroglyphs
116
+ mlym: malayalam
117
+ mn: nonspacing_mark
118
+ mong: mongolian
119
+ mroo: mro
120
+ mtei: meetei_mayek
121
+ mult: multani
122
+ mymr: myanmar
123
+ n: number
124
+ narb: old_north_arabian
125
+ nbat: nabataean
126
+ nchar: noncharacter_code_point
127
+ nd: decimal_number
128
+ nkoo: nko
129
+ nl: letter_number
130
+ 'no': other_number
131
+ nshu: nushu
132
+ oalpha: other_alphabetic
133
+ odi: other_default_ignorable_code_point
134
+ ogam: ogham
135
+ ogrext: other_grapheme_extend
136
+ oidc: other_id_continue
137
+ oids: other_id_start
138
+ olck: ol_chiki
139
+ olower: other_lowercase
140
+ omath: other_math
141
+ orkh: old_turkic
142
+ orya: oriya
143
+ osge: osage
144
+ osma: osmanya
145
+ oupper: other_uppercase
146
+ p: punctuation
147
+ palm: palmyrene
148
+ patsyn: pattern_syntax
149
+ patws: pattern_white_space
150
+ pauc: pau_cin_hau
151
+ pc: connector_punctuation
152
+ pcm: prepended_concatenation_mark
153
+ pd: dash_punctuation
154
+ pe: close_punctuation
155
+ perm: old_permic
156
+ pf: final_punctuation
157
+ phag: phags_pa
158
+ phli: inscriptional_pahlavi
159
+ phlp: psalter_pahlavi
160
+ phnx: phoenician
161
+ pi: initial_punctuation
162
+ plrd: miao
163
+ po: other_punctuation
164
+ prti: inscriptional_parthian
165
+ ps: open_punctuation
166
+ qaac: coptic
167
+ qaai: inherited
168
+ qmark: quotation_mark
169
+ ri: regional_indicator
170
+ rjng: rejang
171
+ runr: runic
172
+ s: symbol
173
+ samr: samaritan
174
+ sarb: old_south_arabian
175
+ saur: saurashtra
176
+ sc: currency_symbol
177
+ sd: soft_dotted
178
+ sgnw: signwriting
179
+ shaw: shavian
180
+ shrd: sharada
181
+ sidd: siddham
182
+ sind: khudawadi
183
+ sinh: sinhala
184
+ sk: modifier_symbol
185
+ sm: math_symbol
186
+ so: other_symbol
187
+ sora: sora_sompeng
188
+ soyo: soyombo
189
+ sterm: sentence_terminal
190
+ sund: sundanese
191
+ sylo: syloti_nagri
192
+ syrc: syriac
193
+ tagb: tagbanwa
194
+ takr: takri
195
+ tale: tai_le
196
+ talu: new_tai_lue
197
+ taml: tamil
198
+ tang: tangut
199
+ tavt: tai_viet
200
+ telu: telugu
201
+ term: terminal_punctuation
202
+ tfng: tifinagh
203
+ tglg: tagalog
204
+ thaa: thaana
205
+ tibt: tibetan
206
+ tirh: tirhuta
207
+ ugar: ugaritic
208
+ uideo: unified_ideograph
209
+ vaii: vai
210
+ vs: variation_selector
211
+ wara: warang_citi
212
+ wspace: white_space
213
+ xidc: xid_continue
214
+ xids: xid_start
215
+ xpeo: old_persian
216
+ xsux: cuneiform
217
+ yiii: yi
218
+ z: separator
219
+ zanb: zanabazar_square
220
+ zinh: inherited
221
+ zl: line_separator
222
+ zp: paragraph_separator
223
+ zs: space_separator
224
+ zyyy: common
225
+ zzzz: unknown
@@ -1,55 +1,9 @@
1
1
  %%{
2
2
  machine re_property;
3
3
 
4
- property_char = [pP];
4
+ property_char = [pP];
5
5
 
6
- # Property names are being treated as case-insensitive, but it is not clear
7
- # yet if this applies to all flavors and in all encodings. A bug has just
8
- # been filed against ruby regarding this issue, see:
9
- # http://redmine.ruby-lang.org/issues/show/4014
10
- property_name_unicode = 'alnum'i | 'alpha'i | 'any'i | 'ascii'i | 'blank'i |
11
- 'cntrl'i | 'digit'i | 'graph'i | 'lower'i | 'print'i |
12
- 'punct'i | 'space'i | 'upper'i | 'word'i | 'xdigit'i;
13
-
14
- property_name_posix = 'any'i | 'assigned'i | 'newline'i;
15
-
16
- property_name = property_name_unicode | property_name_posix;
17
-
18
- category_letter = [Ll] . [ultmo]?;
19
- category_mark = [Mm] . [nce]?;
20
- category_number = [Nn] . [dlo]?;
21
- category_punctuation = [Pp] . [cdseifo]?;
22
- category_symbol = [Ss] . [mcko]?;
23
- category_separator = [Zz] . [slp]?;
24
- category_codepoint = [Cc] . [cfson]?;
25
-
26
- general_category = category_letter | category_mark |
27
- category_number | category_punctuation |
28
- category_symbol | category_separator |
29
- category_codepoint;
30
-
31
- property_derived = 'math'i | 'alphabetic'i |
32
- 'lowercase'i | 'uppercase'i |
33
- 'id_start'i | 'id_continue'i |
34
- 'xid_start'i | 'xid_continue'i |
35
- 'grapheme_base'i | 'grapheme_extend'i |
36
- 'default_ignorable_code_point'i;
37
-
38
- property_age = 'age=1.1'i | 'age=2.0'i | 'age=2.1'i |
39
- 'age=3.0'i | 'age=3.1'i | 'age=3.2'i |
40
- 'age=4.0'i | 'age=4.1'i | 'age=5.0'i |
41
- 'age=5.1'i | 'age=5.2'i | 'age=6.0'i |
42
- 'age=6.1'i | 'age=6.2'i | 'age=6.3'i |
43
- 'age=7.0'i | 'age=8.0'i | 'age=9.0'i |
44
- 'age=10.0'i;
45
-
46
- property_script = (alnum | space | '_' | '-')+; # everything else
47
-
48
- property_sequence = property_char . '{' . '^'? (
49
- property_name | general_category |
50
- property_age | property_derived |
51
- property_script
52
- ) . '}';
6
+ property_sequence = property_char . '{' . '^'? (alnum|space|[_\-\.=])+ '}';
53
7
 
54
8
  action premature_property_end {
55
9
  raise PrematureEndError.new('unicode property')
@@ -61,767 +15,14 @@
61
15
 
62
16
  property_sequence < eof(premature_property_end) {
63
17
  text = text(data, ts, te, 1).first
64
- if in_set
65
- type = :set
66
- else
67
- type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property
68
- end
69
-
70
- name = data[ts+2..te-2].pack('c*').gsub(/[\s_]/,'').downcase
71
- if name[0].chr == '^'
72
- name = name[1..-1]
73
- end
74
-
75
- case name
76
- # Named
77
- when 'alnum'
78
- self.emit(type, :alnum, text, ts-1, te)
79
- when 'alpha'
80
- self.emit(type, :alpha, text, ts-1, te)
81
- when 'ascii'
82
- self.emit(type, :ascii, text, ts-1, te)
83
- when 'blank'
84
- self.emit(type, :blank, text, ts-1, te)
85
- when 'cntrl'
86
- self.emit(type, :cntrl, text, ts-1, te)
87
- when 'digit'
88
- self.emit(type, :digit, text, ts-1, te)
89
- when 'graph'
90
- self.emit(type, :graph, text, ts-1, te)
91
- when 'lower'
92
- self.emit(type, :lower, text, ts-1, te)
93
- when 'print'
94
- self.emit(type, :print, text, ts-1, te)
95
- when 'punct'
96
- self.emit(type, :punct, text, ts-1, te)
97
- when 'space'
98
- self.emit(type, :space, text, ts-1, te)
99
- when 'upper'
100
- self.emit(type, :upper, text, ts-1, te)
101
- when 'word'
102
- self.emit(type, :word, text, ts-1, te)
103
- when 'xdigit'
104
- self.emit(type, :xdigit, text, ts-1, te)
105
- when 'xposixpunct'
106
- self.emit(type, :xposixpunct, text, ts-1, te)
107
-
108
- # Only in Oniguruma (old Rubies)
109
- when 'newline'
110
- self.emit(type, :newline, text, ts-1, te)
111
-
112
- when 'any'
113
- self.emit(type, :any, text, ts-1, te)
114
- when 'assigned'
115
- self.emit(type, :assigned, text, ts-1, te)
116
-
117
- # Letters
118
- when 'l', 'letter'
119
- self.emit(type, :letter_any, text, ts-1, te)
120
- when 'lu', 'uppercaseletter'
121
- self.emit(type, :letter_uppercase, text, ts-1, te)
122
- when 'll', 'lowercaseletter'
123
- self.emit(type, :letter_lowercase, text, ts-1, te)
124
- when 'lt', 'titlecaseletter'
125
- self.emit(type, :letter_titlecase, text, ts-1, te)
126
- when 'lm', 'modifierletter'
127
- self.emit(type, :letter_modifier, text, ts-1, te)
128
- when 'lo', 'otherletter'
129
- self.emit(type, :letter_other, text, ts-1, te)
130
-
131
- # Marks
132
- when 'm', 'mark'
133
- self.emit(type, :mark_any, text, ts-1, te)
134
- when 'mn', 'nonspacingmark'
135
- self.emit(type, :mark_nonspacing, text, ts-1, te)
136
- when 'mc', 'spacingmark'
137
- self.emit(type, :mark_spacing, text, ts-1, te)
138
- when 'me', 'enclosingmark'
139
- self.emit(type, :mark_enclosing, text, ts-1, te)
140
-
141
- # Numbers
142
- when 'n', 'number'
143
- self.emit(type, :number_any, text, ts-1, te)
144
- when 'nd', 'decimalnumber'
145
- self.emit(type, :number_decimal, text, ts-1, te)
146
- when 'nl', 'letternumber'
147
- self.emit(type, :number_letter, text, ts-1, te)
148
- when 'no', 'othernumber'
149
- self.emit(type, :number_other, text, ts-1, te)
150
-
151
- # Punctuation
152
- when 'p', 'punctuation'
153
- self.emit(type, :punct_any, text, ts-1, te)
154
- when 'pc', 'connectorpunctuation'
155
- self.emit(type, :punct_connector, text, ts-1, te)
156
- when 'pd', 'dashpunctuation'
157
- self.emit(type, :punct_dash, text, ts-1, te)
158
- when 'ps', 'openpunctuation'
159
- self.emit(type, :punct_open, text, ts-1, te)
160
- when 'pe', 'closepunctuation'
161
- self.emit(type, :punct_close, text, ts-1, te)
162
- when 'pi', 'initialpunctuation'
163
- self.emit(type, :punct_initial, text, ts-1, te)
164
- when 'pf', 'finalpunctuation'
165
- self.emit(type, :punct_final, text, ts-1, te)
166
- when 'po', 'otherpunctuation'
167
- self.emit(type, :punct_other, text, ts-1, te)
168
-
169
- # Symbols
170
- when 's', 'symbol'
171
- self.emit(type, :symbol_any, text, ts-1, te)
172
- when 'sm', 'mathsymbol'
173
- self.emit(type, :symbol_math, text, ts-1, te)
174
- when 'sc', 'currencysymbol'
175
- self.emit(type, :symbol_currency, text, ts-1, te)
176
- when 'sk', 'modifiersymbol'
177
- self.emit(type, :symbol_modifier, text, ts-1, te)
178
- when 'so', 'othersymbol'
179
- self.emit(type, :symbol_other, text, ts-1, te)
180
-
181
- # Separators
182
- when 'z', 'separator'
183
- self.emit(type, :separator_any, text, ts-1, te)
184
- when 'zs', 'spaceseparator'
185
- self.emit(type, :separator_space, text, ts-1, te)
186
- when 'zl', 'lineseparator'
187
- self.emit(type, :separator_line, text, ts-1, te)
188
- when 'zp', 'paragraphseparator'
189
- self.emit(type, :separator_para, text, ts-1, te)
190
-
191
- # Codepoints
192
- when 'c', 'other'
193
- self.emit(type, :other, text, ts-1, te)
194
- when 'cc', 'control'
195
- self.emit(type, :control, text, ts-1, te)
196
- when 'cf', 'format'
197
- self.emit(type, :format, text, ts-1, te)
198
- when 'cs', 'surrogate'
199
- self.emit(type, :surrogate, text, ts-1, te)
200
- when 'co', 'privateuse'
201
- self.emit(type, :private_use, text, ts-1, te)
202
- when 'cn', 'unassigned'
203
- self.emit(type, :unassigned, text, ts-1, te)
204
-
205
- # Age
206
- when 'age=1.1'
207
- self.emit(type, :age_1_1, text, ts-1, te)
208
- when 'age=2.0'
209
- self.emit(type, :age_2_0, text, ts-1, te)
210
- when 'age=2.1'
211
- self.emit(type, :age_2_1, text, ts-1, te)
212
- when 'age=3.0'
213
- self.emit(type, :age_3_0, text, ts-1, te)
214
- when 'age=3.1'
215
- self.emit(type, :age_3_1, text, ts-1, te)
216
- when 'age=3.2'
217
- self.emit(type, :age_3_2, text, ts-1, te)
218
- when 'age=4.0'
219
- self.emit(type, :age_4_0, text, ts-1, te)
220
- when 'age=4.1'
221
- self.emit(type, :age_4_1, text, ts-1, te)
222
- when 'age=5.0'
223
- self.emit(type, :age_5_0, text, ts-1, te)
224
- when 'age=5.1'
225
- self.emit(type, :age_5_1, text, ts-1, te)
226
- when 'age=5.2'
227
- self.emit(type, :age_5_2, text, ts-1, te)
228
- when 'age=6.0'
229
- self.emit(type, :age_6_0, text, ts-1, te)
230
- when 'age=6.1'
231
- self.emit(type, :age_6_1, text, ts-1, te)
232
- when 'age=6.2'
233
- self.emit(type, :age_6_2, text, ts-1, te)
234
- when 'age=6.3'
235
- self.emit(type, :age_6_3, text, ts-1, te)
236
- when 'age=7.0'
237
- self.emit(type, :age_7_0, text, ts-1, te)
238
- when 'age=8.0'
239
- self.emit(type, :age_8_0, text, ts-1, te)
240
- when 'age=9.0'
241
- self.emit(type, :age_9_0, text, ts-1, te)
242
- when 'age=10.0'
243
- self.emit(type, :age_10_0, text, ts-1, te)
244
-
245
- # Derived Properties
246
- when 'ahex', 'asciihexdigit'
247
- self.emit(type, :ascii_hex, text, ts-1, te)
248
- when 'alphabetic'
249
- self.emit(type, :alphabetic, text, ts-1, te)
250
- when 'cased'
251
- self.emit(type, :cased, text, ts-1, te)
252
- when 'cwcf', 'changeswhencasefolded'
253
- self.emit(type, :changes_when_casefolded, text, ts-1, te)
254
- when 'cwcm', 'changeswhencasemapped'
255
- self.emit(type, :changes_when_casemapped, text, ts-1, te)
256
- when 'cwl', 'changeswhenlowercased'
257
- self.emit(type, :changes_when_lowercased, text, ts-1, te)
258
- when 'cwt', 'changeswhentitlecased'
259
- self.emit(type, :changes_when_titlecased, text, ts-1, te)
260
- when 'cwu', 'changeswhenuppercased'
261
- self.emit(type, :changes_when_uppercased, text, ts-1, te)
262
- when 'ci', 'caseignorable'
263
- self.emit(type, :case_ignorable, text, ts-1, te)
264
- when 'bidic', 'bidicontrol'
265
- self.emit(type, :bidi_control, text, ts-1, te)
266
- when 'dash'
267
- self.emit(type, :dash, text, ts-1, te)
268
- when 'dep', 'deprecated'
269
- self.emit(type, :deprecated, text, ts-1, te)
270
- when 'di', 'defaultignorablecodepoint'
271
- self.emit(type, :default_ignorable_cp, text, ts-1, te)
272
- when 'dia', 'diacritic'
273
- self.emit(type, :diacritic, text, ts-1, te)
274
- when 'ext', 'extender'
275
- self.emit(type, :extender, text, ts-1, te)
276
- when 'grbase', 'graphemebase'
277
- self.emit(type, :grapheme_base, text, ts-1, te)
278
- when 'grext', 'graphemeextend'
279
- self.emit(type, :grapheme_extend, text, ts-1, te)
280
- when 'grlink', 'graphemelink' # NOTE: deprecated as of Unicode 5.0
281
- self.emit(type, :grapheme_link, text, ts-1, te)
282
- when 'hex', 'hexdigit'
283
- self.emit(type, :hex_digit, text, ts-1, te)
284
- when 'hyphen' # NOTE: deprecated as of Unicode 6.0
285
- self.emit(type, :hyphen, text, ts-1, te)
286
- when 'idc', 'idcontinue'
287
- self.emit(type, :id_continue, text, ts-1, te)
288
- when 'ideo', 'ideographic'
289
- self.emit(type, :ideographic, text, ts-1, te)
290
- when 'ids', 'idstart'
291
- self.emit(type, :id_start, text, ts-1, te)
292
- when 'idsb', 'idsbinaryoperator'
293
- self.emit(type, :ids_binary_op, text, ts-1, te)
294
- when 'idst', 'idstrinaryoperator'
295
- self.emit(type, :ids_trinary_op, text, ts-1, te)
296
- when 'joinc', 'joincontrol'
297
- self.emit(type, :join_control, text, ts-1, te)
298
- when 'loe', 'logicalorderexception'
299
- self.emit(type, :logical_order_exception, text, ts-1, te)
300
- when 'lowercase'
301
- self.emit(type, :lowercase, text, ts-1, te)
302
- when 'math'
303
- self.emit(type, :math, text, ts-1, te)
304
- when 'nchar', 'noncharactercodepoint'
305
- self.emit(type, :non_character_cp, text, ts-1, te)
306
- when 'oalpha', 'otheralphabetic'
307
- self.emit(type, :other_alphabetic, text, ts-1, te)
308
- when 'odi', 'otherdefaultignorablecodepoint'
309
- self.emit(type, :other_default_ignorable_cp, text, ts-1, te)
310
- when 'ogrext', 'othergraphemeextend'
311
- self.emit(type, :other_grapheme_extended, text, ts-1, te)
312
- when 'oidc', 'otheridcontinue'
313
- self.emit(type, :other_id_continue, text, ts-1, te)
314
- when 'oids', 'otheridstart'
315
- self.emit(type, :other_id_start, text, ts-1, te)
316
- when 'olower', 'otherlowercase'
317
- self.emit(type, :other_lowercase, text, ts-1, te)
318
- when 'omath', 'othermath'
319
- self.emit(type, :other_math, text, ts-1, te)
320
- when 'oupper', 'otheruppercase'
321
- self.emit(type, :other_uppercase, text, ts-1, te)
322
- when 'patsyn', 'patternsyntax'
323
- self.emit(type, :pattern_syntax, text, ts-1, te)
324
- when 'patws', 'patternwhitespace'
325
- self.emit(type, :pattern_whitespace, text, ts-1, te)
326
- when 'qmark', 'quotationmark'
327
- self.emit(type, :quotation_mark, text, ts-1, te)
328
- when 'radical'
329
- self.emit(type, :radical, text, ts-1, te)
330
- when 'ri', 'regionalindicator'
331
- self.emit(type, :regional_indicator, text, ts-1, te)
332
- when 'sd', 'softdotted'
333
- self.emit(type, :soft_dotted, text, ts-1, te)
334
- when 'sterm'
335
- self.emit(type, :sentence_terminal, text, ts-1, te)
336
- when 'term', 'terminalpunctuation'
337
- self.emit(type, :terminal_punctuation, text, ts-1, te)
338
- when 'uideo', 'unifiedideograph'
339
- self.emit(type, :unified_ideograph, text, ts-1, te)
340
- when 'uppercase'
341
- self.emit(type, :uppercase, text, ts-1, te)
342
- when 'vs', 'variationselector'
343
- self.emit(type, :variation_selector, text, ts-1, te)
344
- when 'wspace', 'whitespace'
345
- self.emit(type, :whitespace, text, ts-1, te)
346
- when 'xids', 'xidstart'
347
- self.emit(type, :xid_start, text, ts-1, te)
348
- when 'xidc', 'xidcontinue'
349
- self.emit(type, :xid_continue, text, ts-1, te)
350
-
351
- # Emoji
352
- when 'emoji'
353
- self.emit(type, :emoji_any, text, ts-1, te)
354
- when 'emojicomponent'
355
- self.emit(type, :emoji_component, text, ts-1, te)
356
- when 'emojimodifier'
357
- self.emit(type, :emoji_modifier, text, ts-1, te)
358
- when 'emojimodifierbase'
359
- self.emit(type, :emoji_modifier_base, text, ts-1, te)
360
- when 'emojipresentation'
361
- self.emit(type, :emoji_presentation, text, ts-1, te)
362
-
363
- # Scripts
364
- when 'aghb', 'caucasianalbanian'
365
- self.emit(type, :script_caucasian_albanian, text, ts-1, te)
366
- when 'arab', 'arabic'
367
- self.emit(type, :script_arabic, text, ts-1, te)
368
- when 'armi', 'imperialaramaic'
369
- self.emit(type, :script_imperial_aramaic, text, ts-1, te)
370
- when 'armn', 'armenian'
371
- self.emit(type, :script_armenian, text, ts-1, te)
372
- when 'avst', 'avestan'
373
- self.emit(type, :script_avestan, text, ts-1, te)
374
- when 'bali', 'balinese'
375
- self.emit(type, :script_balinese, text, ts-1, te)
376
- when 'bamu', 'bamum'
377
- self.emit(type, :script_bamum, text, ts-1, te)
378
- when 'bass', 'bassavah'
379
- self.emit(type, :script_bassa_vah, text, ts-1, te)
380
- when 'batk', 'batak'
381
- self.emit(type, :script_batak, text, ts-1, te)
382
- when 'beng', 'bengali'
383
- self.emit(type, :script_bengali, text, ts-1, te)
384
- when 'bopo', 'bopomofo'
385
- self.emit(type, :script_bopomofo, text, ts-1, te)
386
- when 'brah', 'brahmi'
387
- self.emit(type, :script_brahmi, text, ts-1, te)
388
- when 'brai', 'braille'
389
- self.emit(type, :script_braille, text, ts-1, te)
390
- when 'bugi', 'buginese'
391
- self.emit(type, :script_buginese, text, ts-1, te)
392
- when 'buhd', 'buhid'
393
- self.emit(type, :script_buhid, text, ts-1, te)
394
- when 'cans', 'canadianaboriginal'
395
- self.emit(type, :script_canadian_aboriginal, text, ts-1, te)
396
- when 'cari', 'carian'
397
- self.emit(type, :script_carian, text, ts-1, te)
398
- when 'cham'
399
- self.emit(type, :script_cham, text, ts-1, te)
400
- when 'cher', 'cherokee'
401
- self.emit(type, :script_cherokee, text, ts-1, te)
402
- when 'copt', 'coptic', 'qaac'
403
- self.emit(type, :script_coptic, text, ts-1, te)
404
- when 'cprt', 'cypriot'
405
- self.emit(type, :script_cypriot, text, ts-1, te)
406
- when 'cyrl', 'cyrillic'
407
- self.emit(type, :script_cyrillic, text, ts-1, te)
408
- when 'deva', 'devanagari'
409
- self.emit(type, :script_devanagari, text, ts-1, te)
410
- when 'dsrt', 'deseret'
411
- self.emit(type, :script_deseret, text, ts-1, te)
412
- when 'dupl', 'duployan'
413
- self.emit(type, :script_duployan, text, ts-1, te)
414
- when 'egyp', 'egyptianhieroglyphs'
415
- self.emit(type, :script_egyptian_hieroglyphs, text, ts-1, te)
416
- when 'elba', 'elbasan'
417
- self.emit(type, :script_elbasan, text, ts-1, te)
418
- when 'ethi', 'ethiopic'
419
- self.emit(type, :script_ethiopic, text, ts-1, te)
420
- when 'geor', 'georgian'
421
- self.emit(type, :script_georgian, text, ts-1, te)
422
- when 'glag', 'glagolitic'
423
- self.emit(type, :script_glagolitic, text, ts-1, te)
424
- when 'goth', 'gothic'
425
- self.emit(type, :script_gothic, text, ts-1, te)
426
- when 'gran', 'grantha'
427
- self.emit(type, :script_grantha, text, ts-1, te)
428
- when 'grek', 'greek'
429
- self.emit(type, :script_greek, text, ts-1, te)
430
- when 'gujr', 'gujarati'
431
- self.emit(type, :script_gujarati, text, ts-1, te)
432
- when 'guru', 'gurmukhi'
433
- self.emit(type, :script_gurmukhi, text, ts-1, te)
434
- when 'hang', 'hangul'
435
- self.emit(type, :script_hangul, text, ts-1, te)
436
- when 'hani', 'han'
437
- self.emit(type, :script_han, text, ts-1, te)
438
- when 'hano', 'hanunoo'
439
- self.emit(type, :script_hanunoo, text, ts-1, te)
440
- when 'hebr', 'hebrew'
441
- self.emit(type, :script_hebrew, text, ts-1, te)
442
- when 'hira', 'hiragana'
443
- self.emit(type, :script_hiragana, text, ts-1, te)
444
- when 'hmng', 'pahawhhmong'
445
- self.emit(type, :script_pahawh_hmong, text, ts-1, te)
446
- when 'hrkt', 'katakanaorhiragana'
447
- self.emit(type, :script_katakana_or_hiragana, text, ts-1, te)
448
- when 'ital', 'olditalic'
449
- self.emit(type, :script_old_italic, text, ts-1, te)
450
- when 'java', 'javanese'
451
- self.emit(type, :script_javanese, text, ts-1, te)
452
- when 'kali', 'kayahli'
453
- self.emit(type, :script_kayah_li, text, ts-1, te)
454
- when 'kana', 'katakana'
455
- self.emit(type, :script_katakana, text, ts-1, te)
456
- when 'khar', 'kharoshthi'
457
- self.emit(type, :script_kharoshthi, text, ts-1, te)
458
- when 'khmr', 'khmer'
459
- self.emit(type, :script_khmer, text, ts-1, te)
460
- when 'khoj', 'khojki'
461
- self.emit(type, :script_khojki, text, ts-1, te)
462
- when 'knda', 'kannada'
463
- self.emit(type, :script_kannada, text, ts-1, te)
464
- when 'kthi', 'kaithi'
465
- self.emit(type, :script_kaithi, text, ts-1, te)
466
- when 'lana', 'taitham'
467
- self.emit(type, :script_tai_tham, text, ts-1, te)
468
- when 'laoo', 'lao'
469
- self.emit(type, :script_lao, text, ts-1, te)
470
- when 'latn', 'latin'
471
- self.emit(type, :script_latin, text, ts-1, te)
472
- when 'lepc', 'lepcha'
473
- self.emit(type, :script_lepcha, text, ts-1, te)
474
- when 'limb', 'limbu'
475
- self.emit(type, :script_limbu, text, ts-1, te)
476
- when 'lina', 'lineara'
477
- self.emit(type, :script_linear_a, text, ts-1, te)
478
- when 'linb', 'linearb'
479
- self.emit(type, :script_linear_b, text, ts-1, te)
480
- when 'lisu'
481
- self.emit(type, :script_lisu, text, ts-1, te)
482
- when 'lyci', 'lycian'
483
- self.emit(type, :script_lycian, text, ts-1, te)
484
- when 'lydi', 'lydian'
485
- self.emit(type, :script_lydian, text, ts-1, te)
486
- when 'mlym', 'malayalam'
487
- self.emit(type, :script_malayalam, text, ts-1, te)
488
- when 'mahj', 'mahajani'
489
- self.emit(type, :script_mahajani, text, ts-1, te)
490
- when 'mand', 'mandaic'
491
- self.emit(type, :script_mandaic, text, ts-1, te)
492
- when 'mani', 'manichaean'
493
- self.emit(type, :script_manichaean, text, ts-1, te)
494
- when 'mend', 'mendekikakui'
495
- self.emit(type, :script_mende_kikakui, text, ts-1, te)
496
- when 'modi'
497
- self.emit(type, :script_modi, text, ts-1, te)
498
- when 'mong', 'mongolian'
499
- self.emit(type, :script_mongolian, text, ts-1, te)
500
- when 'mroo', 'mro'
501
- self.emit(type, :script_mro, text, ts-1, te)
502
- when 'mtei', 'meeteimayek'
503
- self.emit(type, :script_meetei_mayek, text, ts-1, te)
504
- when 'mymr', 'myanmar'
505
- self.emit(type, :script_myanmar, text, ts-1, te)
506
- when 'narb', 'oldnortharabian'
507
- self.emit(type, :script_old_north_arabian, text, ts-1, te)
508
- when 'nbat', 'nabataean'
509
- self.emit(type, :script_nabataean, text, ts-1, te)
510
- when 'nkoo', 'nko'
511
- self.emit(type, :script_nko, text, ts-1, te)
512
- when 'ogam', 'ogham'
513
- self.emit(type, :script_ogham, text, ts-1, te)
514
- when 'olck', 'olchiki'
515
- self.emit(type, :script_ol_chiki, text, ts-1, te)
516
- when 'orkh', 'oldturkic'
517
- self.emit(type, :script_old_turkic, text, ts-1, te)
518
- when 'orya', 'oriya'
519
- self.emit(type, :script_oriya, text, ts-1, te)
520
- when 'osma', 'osmanya'
521
- self.emit(type, :script_osmanya, text, ts-1, te)
522
- when 'palm', 'palmyrene'
523
- self.emit(type, :script_palmyrene, text, ts-1, te)
524
- when 'pauc', 'paucinhau'
525
- self.emit(type, :script_pau_cin_hau, text, ts-1, te)
526
- when 'perm', 'oldpermic'
527
- self.emit(type, :script_old_permic, text, ts-1, te)
528
- when 'phag', 'phagspa'
529
- self.emit(type, :script_phags_pa, text, ts-1, te)
530
- when 'phli', 'inscriptionalpahlavi'
531
- self.emit(type, :script_inscriptional_pahlavi, text, ts-1, te)
532
- when 'phlp', 'psalterpahlavi'
533
- self.emit(type, :script_psalter_pahlavi, text, ts-1, te)
534
- when 'phnx', 'phoenician'
535
- self.emit(type, :script_phoenician, text, ts-1, te)
536
- when 'prti', 'inscriptionalparthian'
537
- self.emit(type, :script_inscriptional_parthian, text, ts-1, te)
538
- when 'rjng', 'rejang'
539
- self.emit(type, :script_rejang, text, ts-1, te)
540
- when 'runr', 'runic'
541
- self.emit(type, :script_runic, text, ts-1, te)
542
- when 'samr', 'samaritan'
543
- self.emit(type, :script_samaritan, text, ts-1, te)
544
- when 'sarb', 'oldsoutharabian'
545
- self.emit(type, :script_old_south_arabian, text, ts-1, te)
546
- when 'saur', 'saurashtra'
547
- self.emit(type, :script_saurashtra, text, ts-1, te)
548
- when 'shaw', 'shavian'
549
- self.emit(type, :script_shavian, text, ts-1, te)
550
- when 'sidd', 'siddham'
551
- self.emit(type, :script_siddham, text, ts-1, te)
552
- when 'sind', 'khudawadi'
553
- self.emit(type, :script_khudawadi, text, ts-1, te)
554
- when 'sinh', 'sinhala'
555
- self.emit(type, :script_sinhala, text, ts-1, te)
556
- when 'sund', 'sundanese'
557
- self.emit(type, :script_sundanese, text, ts-1, te)
558
- when 'sylo', 'sylotinagri'
559
- self.emit(type, :script_syloti_nagri, text, ts-1, te)
560
- when 'syrc', 'syriac'
561
- self.emit(type, :script_syriac, text, ts-1, te)
562
- when 'tagb', 'tagbanwa'
563
- self.emit(type, :script_tagbanwa, text, ts-1, te)
564
- when 'tale', 'taile'
565
- self.emit(type, :script_tai_le, text, ts-1, te)
566
- when 'talu', 'newtailue'
567
- self.emit(type, :script_new_tai_lue, text, ts-1, te)
568
- when 'taml', 'tamil'
569
- self.emit(type, :script_tamil, text, ts-1, te)
570
- when 'tavt', 'taiviet'
571
- self.emit(type, :script_tai_viet, text, ts-1, te)
572
- when 'telu', 'telugu'
573
- self.emit(type, :script_telugu, text, ts-1, te)
574
- when 'tfng', 'tifinagh'
575
- self.emit(type, :script_tifinagh, text, ts-1, te)
576
- when 'tglg', 'tagalog'
577
- self.emit(type, :script_tagalog, text, ts-1, te)
578
- when 'thaa', 'thaana'
579
- self.emit(type, :script_thaana, text, ts-1, te)
580
- when 'thai'
581
- self.emit(type, :script_thai, text, ts-1, te)
582
- when 'tibt', 'tibetan'
583
- self.emit(type, :script_tibetan, text, ts-1, te)
584
- when 'tirh', 'tirhuta'
585
- self.emit(type, :script_tirhuta, text, ts-1, te)
586
- when 'ugar', 'ugaritic'
587
- self.emit(type, :script_ugaritic, text, ts-1, te)
588
- when 'vaii', 'vai'
589
- self.emit(type, :script_vai, text, ts-1, te)
590
- when 'wara', 'warangciti'
591
- self.emit(type, :script_warang_citi, text, ts-1, te)
592
- when 'xpeo', 'oldpersian'
593
- self.emit(type, :script_old_persian, text, ts-1, te)
594
- when 'xsux', 'cuneiform'
595
- self.emit(type, :script_cuneiform, text, ts-1, te)
596
- when 'yiii', 'yi'
597
- self.emit(type, :script_yi, text, ts-1, te)
598
- when 'zinh', 'inherited', 'qaai'
599
- self.emit(type, :script_inherited, text, ts-1, te)
600
- when 'zyyy', 'common'
601
- self.emit(type, :script_common, text, ts-1, te)
602
- when 'zzzz', 'unknown'
603
- self.emit(type, :script_unknown, text, ts-1, te)
18
+ type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property
604
19
 
605
- # Unicode blocks
606
- when 'inalphabeticpresentationforms'
607
- self.emit(type, :block_inalphabetic_presentation_forms, text, ts-1, te)
608
- when 'inarabicpresentationforms-a'
609
- self.emit(type, :block_inarabic_presentation_forms_a, text, ts-1, te)
610
- when 'inarabicpresentationforms-b'
611
- self.emit(type, :block_inarabic_presentation_forms_b, text, ts-1, te)
612
- when 'inarabic'
613
- self.emit(type, :block_inarabic, text, ts-1, te)
614
- when 'inarmenian'
615
- self.emit(type, :block_inarmenian, text, ts-1, te)
616
- when 'inarrows'
617
- self.emit(type, :block_inarrows, text, ts-1, te)
618
- when 'inbasiclatin'
619
- self.emit(type, :block_inbasic_latin, text, ts-1, te)
620
- when 'inbengali'
621
- self.emit(type, :block_inbengali, text, ts-1, te)
622
- when 'inblockelements'
623
- self.emit(type, :block_inblock_elements, text, ts-1, te)
624
- when 'inbopomofoextended'
625
- self.emit(type, :block_inbopomofo_extended, text, ts-1, te)
626
- when 'inbopomofo'
627
- self.emit(type, :block_inbopomofo, text, ts-1, te)
628
- when 'inboxdrawing'
629
- self.emit(type, :block_inbox_drawing, text, ts-1, te)
630
- when 'inbraillepatterns'
631
- self.emit(type, :block_inbraille_patterns, text, ts-1, te)
632
- when 'inbuhid'
633
- self.emit(type, :block_inbuhid, text, ts-1, te)
634
- when 'incjkcompatibilityforms'
635
- self.emit(type, :block_incjk_compatibility_forms, text, ts-1, te)
636
- when 'incjkcompatibilityideographs'
637
- self.emit(type, :block_incjk_compatibility_ideographs, text, ts-1, te)
638
- when 'incjkcompatibility'
639
- self.emit(type, :block_incjk_compatibility, text, ts-1, te)
640
- when 'incjkradicalssupplement'
641
- self.emit(type, :block_incjk_radicals_supplement, text, ts-1, te)
642
- when 'incjksymbolsandpunctuation'
643
- self.emit(type, :block_incjk_symbols_and_punctuation, text, ts-1, te)
644
- when 'incjkunifiedideographsextensiona'
645
- self.emit(type, :block_incjk_unified_ideographs_extension_a, text, ts-1, te)
646
- when 'incjkunifiedideographs'
647
- self.emit(type, :block_incjk_unified_ideographs, text, ts-1, te)
648
- when 'incherokee'
649
- self.emit(type, :block_incherokee, text, ts-1, te)
650
- when 'incombiningdiacriticalmarksforsymbols'
651
- self.emit(type, :block_incombining_diacritical_marks_for_symbols, text, ts-1, te)
652
- when 'incombiningdiacriticalmarks'
653
- self.emit(type, :block_incombining_diacritical_marks, text, ts-1, te)
654
- when 'incombininghalfmarks'
655
- self.emit(type, :block_incombining_half_marks, text, ts-1, te)
656
- when 'incontrolpictures'
657
- self.emit(type, :block_incontrol_pictures, text, ts-1, te)
658
- when 'incurrencysymbols'
659
- self.emit(type, :block_incurrency_symbols, text, ts-1, te)
660
- when 'incyrillicsupplement'
661
- self.emit(type, :block_incyrillic_supplement, text, ts-1, te)
662
- when 'incyrillic'
663
- self.emit(type, :block_incyrillic, text, ts-1, te)
664
- when 'indevanagari'
665
- self.emit(type, :block_indevanagari, text, ts-1, te)
666
- when 'indingbats'
667
- self.emit(type, :block_indingbats, text, ts-1, te)
668
- when 'inenclosedalphanumerics'
669
- self.emit(type, :block_inenclosed_alphanumerics, text, ts-1, te)
670
- when 'inenclosedcjklettersandmonths'
671
- self.emit(type, :block_inenclosed_cjk_letters_and_months, text, ts-1, te)
672
- when 'inethiopic'
673
- self.emit(type, :block_inethiopic, text, ts-1, te)
674
- when 'ingeneralpunctuation'
675
- self.emit(type, :block_ingeneral_punctuation, text, ts-1, te)
676
- when 'ingeometricshapes'
677
- self.emit(type, :block_ingeometric_shapes, text, ts-1, te)
678
- when 'ingeorgian'
679
- self.emit(type, :block_ingeorgian, text, ts-1, te)
680
- when 'ingreekextended'
681
- self.emit(type, :block_ingreek_extended, text, ts-1, te)
682
- when 'ingreekandcoptic'
683
- self.emit(type, :block_ingreek_and_coptic, text, ts-1, te)
684
- when 'ingujarati'
685
- self.emit(type, :block_ingujarati, text, ts-1, te)
686
- when 'ingurmukhi'
687
- self.emit(type, :block_ingurmukhi, text, ts-1, te)
688
- when 'inhalfwidthandfullwidthforms'
689
- self.emit(type, :block_inhalfwidth_and_fullwidth_forms, text, ts-1, te)
690
- when 'inhangulcompatibilityjamo'
691
- self.emit(type, :block_inhangul_compatibility_jamo, text, ts-1, te)
692
- when 'inhanguljamo'
693
- self.emit(type, :block_inhangul_jamo, text, ts-1, te)
694
- when 'inhangulsyllables'
695
- self.emit(type, :block_inhangul_syllables, text, ts-1, te)
696
- when 'inhanunoo'
697
- self.emit(type, :block_inhanunoo, text, ts-1, te)
698
- when 'inhebrew'
699
- self.emit(type, :block_inhebrew, text, ts-1, te)
700
- when 'inhighprivateusesurrogates'
701
- self.emit(type, :block_inhigh_private_use_surrogates, text, ts-1, te)
702
- when 'inhighsurrogates'
703
- self.emit(type, :block_inhigh_surrogates, text, ts-1, te)
704
- when 'inhiragana'
705
- self.emit(type, :block_inhiragana, text, ts-1, te)
706
- when 'inipaextensions'
707
- self.emit(type, :block_inipa_extensions, text, ts-1, te)
708
- when 'inideographicdescriptioncharacters'
709
- self.emit(type, :block_inideographic_description_characters, text, ts-1, te)
710
- when 'inkanbun'
711
- self.emit(type, :block_inkanbun, text, ts-1, te)
712
- when 'inkangxiradicals'
713
- self.emit(type, :block_inkangxi_radicals, text, ts-1, te)
714
- when 'inkannada'
715
- self.emit(type, :block_inkannada, text, ts-1, te)
716
- when 'inkatakanaphoneticextensions'
717
- self.emit(type, :block_inkatakana_phonetic_extensions, text, ts-1, te)
718
- when 'inkatakana'
719
- self.emit(type, :block_inkatakana, text, ts-1, te)
720
- when 'inkhmersymbols'
721
- self.emit(type, :block_inkhmer_symbols, text, ts-1, te)
722
- when 'inkhmer'
723
- self.emit(type, :block_inkhmer, text, ts-1, te)
724
- when 'inlao'
725
- self.emit(type, :block_inlao, text, ts-1, te)
726
- when 'inlatin-1supplement'
727
- self.emit(type, :block_inlatin_1_supplement, text, ts-1, te)
728
- when 'inlatinextended-a'
729
- self.emit(type, :block_inlatin_extended_a, text, ts-1, te)
730
- when 'inlatinextended-b'
731
- self.emit(type, :block_inlatin_extended_b, text, ts-1, te)
732
- when 'inlatinextendedadditional'
733
- self.emit(type, :block_inlatin_extended_additional, text, ts-1, te)
734
- when 'inletterlikesymbols'
735
- self.emit(type, :block_inletterlike_symbols, text, ts-1, te)
736
- when 'inlimbu'
737
- self.emit(type, :block_inlimbu, text, ts-1, te)
738
- when 'inlowsurrogates'
739
- self.emit(type, :block_inlow_surrogates, text, ts-1, te)
740
- when 'inmalayalam'
741
- self.emit(type, :block_inmalayalam, text, ts-1, te)
742
- when 'inmathematicaloperators'
743
- self.emit(type, :block_inmathematical_operators, text, ts-1, te)
744
- when 'inmiscellaneousmathematicalsymbols-a'
745
- self.emit(type, :block_inmiscellaneous_mathematical_symbols_a, text, ts-1, te)
746
- when 'inmiscellaneousmathematicalsymbols-b'
747
- self.emit(type, :block_inmiscellaneous_mathematical_symbols_b, text, ts-1, te)
748
- when 'inmiscellaneoussymbolsandarrows'
749
- self.emit(type, :block_inmiscellaneous_symbols_and_arrows, text, ts-1, te)
750
- when 'inmiscellaneoussymbols'
751
- self.emit(type, :block_inmiscellaneous_symbols, text, ts-1, te)
752
- when 'inmiscellaneoustechnical'
753
- self.emit(type, :block_inmiscellaneous_technical, text, ts-1, te)
754
- when 'inmongolian'
755
- self.emit(type, :block_inmongolian, text, ts-1, te)
756
- when 'inmyanmar'
757
- self.emit(type, :block_inmyanmar, text, ts-1, te)
758
- when 'innumberforms'
759
- self.emit(type, :block_innumber_forms, text, ts-1, te)
760
- when 'inogham'
761
- self.emit(type, :block_inogham, text, ts-1, te)
762
- when 'inopticalcharacterrecognition'
763
- self.emit(type, :block_inoptical_character_recognition, text, ts-1, te)
764
- when 'inoriya'
765
- self.emit(type, :block_inoriya, text, ts-1, te)
766
- when 'inphoneticextensions'
767
- self.emit(type, :block_inphonetic_extensions, text, ts-1, te)
768
- when 'inprivateusearea'
769
- self.emit(type, :block_inprivate_use_area, text, ts-1, te)
770
- when 'inrunic'
771
- self.emit(type, :block_inrunic, text, ts-1, te)
772
- when 'insinhala'
773
- self.emit(type, :block_insinhala, text, ts-1, te)
774
- when 'insmallformvariants'
775
- self.emit(type, :block_insmall_form_variants, text, ts-1, te)
776
- when 'inspacingmodifierletters'
777
- self.emit(type, :block_inspacing_modifier_letters, text, ts-1, te)
778
- when 'inspecials'
779
- self.emit(type, :block_inspecials, text, ts-1, te)
780
- when 'insuperscriptsandsubscripts'
781
- self.emit(type, :block_insuperscripts_and_subscripts, text, ts-1, te)
782
- when 'insupplementalarrows-a'
783
- self.emit(type, :block_insupplemental_arrows_a, text, ts-1, te)
784
- when 'insupplementalarrows-b'
785
- self.emit(type, :block_insupplemental_arrows_b, text, ts-1, te)
786
- when 'insupplementalmathematicaloperators'
787
- self.emit(type, :block_insupplemental_mathematical_operators, text, ts-1, te)
788
- when 'insyriac'
789
- self.emit(type, :block_insyriac, text, ts-1, te)
790
- when 'intagalog'
791
- self.emit(type, :block_intagalog, text, ts-1, te)
792
- when 'intagbanwa'
793
- self.emit(type, :block_intagbanwa, text, ts-1, te)
794
- when 'intaile'
795
- self.emit(type, :block_intai_le, text, ts-1, te)
796
- when 'intamil'
797
- self.emit(type, :block_intamil, text, ts-1, te)
798
- when 'intelugu'
799
- self.emit(type, :block_intelugu, text, ts-1, te)
800
- when 'inthaana'
801
- self.emit(type, :block_inthaana, text, ts-1, te)
802
- when 'inthai'
803
- self.emit(type, :block_inthai, text, ts-1, te)
804
- when 'intibetan'
805
- self.emit(type, :block_intibetan, text, ts-1, te)
806
- when 'inunifiedcanadianaboriginalsyllabics'
807
- self.emit(type, :block_inunified_canadian_aboriginal_syllabics, text, ts-1, te)
808
- when 'invariationselectors'
809
- self.emit(type, :block_invariation_selectors, text, ts-1, te)
810
- when 'inyiradicals'
811
- self.emit(type, :block_inyi_radicals, text, ts-1, te)
812
- when 'inyisyllables'
813
- self.emit(type, :block_inyi_syllables, text, ts-1, te)
814
- when 'inyijinghexagramsymbols'
815
- self.emit(type, :block_inyijing_hexagram_symbols, text, ts-1, te)
20
+ name = data[ts+2..te-2].pack('c*').gsub(/[\^\s_\-]/, '').downcase
816
21
 
817
- else
818
- # Should this really be an error? Or would emitting
819
- # an :unknown for the property be better?
820
- #
821
- # self.emit(type, :unknown, text, ts-1, te)
22
+ token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
23
+ raise UnknownUnicodePropertyError.new(name) unless token
822
24
 
823
- raise UnknownUnicodePropertyError.new(name)
824
- end
25
+ self.emit(type, token.to_sym, text, ts-1, te)
825
26
 
826
27
  fret;
827
28
  };