regexp_parser 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +242 -0
  3. data/Gemfile +1 -0
  4. data/README.md +21 -17
  5. data/Rakefile +31 -0
  6. data/lib/regexp_parser/expression.rb +11 -9
  7. data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
  8. data/lib/regexp_parser/expression/classes/backref.rb +21 -16
  9. data/lib/regexp_parser/expression/classes/escape.rb +81 -10
  10. data/lib/regexp_parser/expression/classes/group.rb +20 -20
  11. data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
  12. data/lib/regexp_parser/expression/classes/property.rb +6 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +10 -93
  14. data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
  15. data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
  16. data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
  17. data/lib/regexp_parser/expression/methods/tests.rb +4 -14
  18. data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +3 -4
  20. data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
  21. data/lib/regexp_parser/expression/subexpression.rb +6 -10
  22. data/lib/regexp_parser/lexer.rb +13 -17
  23. data/lib/regexp_parser/parser.rb +170 -116
  24. data/lib/regexp_parser/scanner.rb +952 -2431
  25. data/lib/regexp_parser/scanner/char_type.rl +31 -0
  26. data/lib/regexp_parser/scanner/properties/long.yml +561 -0
  27. data/lib/regexp_parser/scanner/properties/short.yml +225 -0
  28. data/lib/regexp_parser/scanner/property.rl +7 -806
  29. data/lib/regexp_parser/scanner/scanner.rl +112 -154
  30. data/lib/regexp_parser/syntax/base.rb +4 -4
  31. data/lib/regexp_parser/syntax/tokens.rb +1 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
  34. data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
  35. data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
  36. data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
  38. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
  39. data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
  40. data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
  41. data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
  42. data/lib/regexp_parser/version.rb +1 -1
  43. data/regexp_parser.gemspec +2 -1
  44. data/test/expression/test_base.rb +2 -1
  45. data/test/expression/test_clone.rb +0 -57
  46. data/test/expression/test_set.rb +31 -8
  47. data/test/expression/test_strfregexp.rb +13 -4
  48. data/test/expression/test_subexpression.rb +25 -0
  49. data/test/expression/test_traverse.rb +25 -25
  50. data/test/helpers.rb +1 -0
  51. data/test/lexer/test_all.rb +1 -1
  52. data/test/lexer/test_conditionals.rb +9 -7
  53. data/test/lexer/test_nesting.rb +39 -21
  54. data/test/lexer/test_refcalls.rb +4 -4
  55. data/test/parser/set/test_intersections.rb +127 -0
  56. data/test/parser/set/test_ranges.rb +111 -0
  57. data/test/parser/test_all.rb +4 -1
  58. data/test/parser/test_escapes.rb +41 -9
  59. data/test/parser/test_groups.rb +22 -3
  60. data/test/parser/test_posix_classes.rb +27 -0
  61. data/test/parser/test_properties.rb +17 -290
  62. data/test/parser/test_refcalls.rb +66 -26
  63. data/test/parser/test_sets.rb +132 -129
  64. data/test/scanner/test_all.rb +1 -7
  65. data/test/scanner/test_conditionals.rb +16 -16
  66. data/test/scanner/test_errors.rb +0 -30
  67. data/test/scanner/test_escapes.rb +1 -2
  68. data/test/scanner/test_free_space.rb +28 -28
  69. data/test/scanner/test_groups.rb +35 -35
  70. data/test/scanner/test_meta.rb +1 -1
  71. data/test/scanner/test_properties.rb +87 -114
  72. data/test/scanner/test_refcalls.rb +18 -18
  73. data/test/scanner/test_scripts.rb +19 -351
  74. data/test/scanner/test_sets.rb +87 -60
  75. data/test/scanner/test_unicode_blocks.rb +4 -105
  76. data/test/support/warning_extractor.rb +1 -1
  77. data/test/syntax/test_syntax.rb +7 -0
  78. data/test/syntax/versions/test_1.8.rb +2 -4
  79. metadata +17 -7
  80. data/ChangeLog +0 -325
  81. data/test/scanner/test_emojis.rb +0 -31
@@ -0,0 +1,225 @@
1
+ #
2
+ # THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT
3
+ #
4
+ ---
5
+ adlm: adlam
6
+ aghb: caucasian_albanian
7
+ ahex: ascii_hex_digit
8
+ arab: arabic
9
+ armi: imperial_aramaic
10
+ armn: armenian
11
+ avst: avestan
12
+ bali: balinese
13
+ bamu: bamum
14
+ bass: bassa_vah
15
+ batk: batak
16
+ beng: bengali
17
+ bhks: bhaiksuki
18
+ bidic: bidi_control
19
+ bopo: bopomofo
20
+ brah: brahmi
21
+ brai: braille
22
+ bugi: buginese
23
+ buhd: buhid
24
+ c: other
25
+ cakm: chakma
26
+ cans: canadian_aboriginal
27
+ cari: carian
28
+ cc: control
29
+ cf: format
30
+ cher: cherokee
31
+ ci: case_ignorable
32
+ cn: unassigned
33
+ co: private_use
34
+ copt: coptic
35
+ cprt: cypriot
36
+ cs: surrogate
37
+ cwcf: changes_when_casefolded
38
+ cwcm: changes_when_casemapped
39
+ cwl: changes_when_lowercased
40
+ cwt: changes_when_titlecased
41
+ cwu: changes_when_uppercased
42
+ cyrl: cyrillic
43
+ dep: deprecated
44
+ deva: devanagari
45
+ di: default_ignorable_code_point
46
+ dia: diacritic
47
+ dsrt: deseret
48
+ dupl: duployan
49
+ egyp: egyptian_hieroglyphs
50
+ elba: elbasan
51
+ ethi: ethiopic
52
+ ext: extender
53
+ geor: georgian
54
+ glag: glagolitic
55
+ gonm: masaram_gondi
56
+ goth: gothic
57
+ gran: grantha
58
+ grbase: grapheme_base
59
+ grek: greek
60
+ grext: grapheme_extend
61
+ grlink: grapheme_link
62
+ gujr: gujarati
63
+ guru: gurmukhi
64
+ hang: hangul
65
+ hani: han
66
+ hano: hanunoo
67
+ hatr: hatran
68
+ hebr: hebrew
69
+ hex: hex_digit
70
+ hira: hiragana
71
+ hluw: anatolian_hieroglyphs
72
+ hmng: pahawh_hmong
73
+ hung: old_hungarian
74
+ idc: id_continue
75
+ ideo: ideographic
76
+ ids: id_start
77
+ idsb: ids_binary_operator
78
+ idst: ids_trinary_operator
79
+ ital: old_italic
80
+ java: javanese
81
+ joinc: join_control
82
+ kali: kayah_li
83
+ kana: katakana
84
+ khar: kharoshthi
85
+ khmr: khmer
86
+ khoj: khojki
87
+ knda: kannada
88
+ kthi: kaithi
89
+ l: letter
90
+ lana: tai_tham
91
+ laoo: lao
92
+ latn: latin
93
+ lc: cased_letter
94
+ lepc: lepcha
95
+ limb: limbu
96
+ lina: linear_a
97
+ linb: linear_b
98
+ ll: lowercase_letter
99
+ lm: modifier_letter
100
+ lo: other_letter
101
+ loe: logical_order_exception
102
+ lt: titlecase_letter
103
+ lu: uppercase_letter
104
+ lyci: lycian
105
+ lydi: lydian
106
+ m: mark
107
+ mahj: mahajani
108
+ mand: mandaic
109
+ mani: manichaean
110
+ marc: marchen
111
+ mc: spacing_mark
112
+ me: enclosing_mark
113
+ mend: mende_kikakui
114
+ merc: meroitic_cursive
115
+ mero: meroitic_hieroglyphs
116
+ mlym: malayalam
117
+ mn: nonspacing_mark
118
+ mong: mongolian
119
+ mroo: mro
120
+ mtei: meetei_mayek
121
+ mult: multani
122
+ mymr: myanmar
123
+ n: number
124
+ narb: old_north_arabian
125
+ nbat: nabataean
126
+ nchar: noncharacter_code_point
127
+ nd: decimal_number
128
+ nkoo: nko
129
+ nl: letter_number
130
+ 'no': other_number
131
+ nshu: nushu
132
+ oalpha: other_alphabetic
133
+ odi: other_default_ignorable_code_point
134
+ ogam: ogham
135
+ ogrext: other_grapheme_extend
136
+ oidc: other_id_continue
137
+ oids: other_id_start
138
+ olck: ol_chiki
139
+ olower: other_lowercase
140
+ omath: other_math
141
+ orkh: old_turkic
142
+ orya: oriya
143
+ osge: osage
144
+ osma: osmanya
145
+ oupper: other_uppercase
146
+ p: punctuation
147
+ palm: palmyrene
148
+ patsyn: pattern_syntax
149
+ patws: pattern_white_space
150
+ pauc: pau_cin_hau
151
+ pc: connector_punctuation
152
+ pcm: prepended_concatenation_mark
153
+ pd: dash_punctuation
154
+ pe: close_punctuation
155
+ perm: old_permic
156
+ pf: final_punctuation
157
+ phag: phags_pa
158
+ phli: inscriptional_pahlavi
159
+ phlp: psalter_pahlavi
160
+ phnx: phoenician
161
+ pi: initial_punctuation
162
+ plrd: miao
163
+ po: other_punctuation
164
+ prti: inscriptional_parthian
165
+ ps: open_punctuation
166
+ qaac: coptic
167
+ qaai: inherited
168
+ qmark: quotation_mark
169
+ ri: regional_indicator
170
+ rjng: rejang
171
+ runr: runic
172
+ s: symbol
173
+ samr: samaritan
174
+ sarb: old_south_arabian
175
+ saur: saurashtra
176
+ sc: currency_symbol
177
+ sd: soft_dotted
178
+ sgnw: signwriting
179
+ shaw: shavian
180
+ shrd: sharada
181
+ sidd: siddham
182
+ sind: khudawadi
183
+ sinh: sinhala
184
+ sk: modifier_symbol
185
+ sm: math_symbol
186
+ so: other_symbol
187
+ sora: sora_sompeng
188
+ soyo: soyombo
189
+ sterm: sentence_terminal
190
+ sund: sundanese
191
+ sylo: syloti_nagri
192
+ syrc: syriac
193
+ tagb: tagbanwa
194
+ takr: takri
195
+ tale: tai_le
196
+ talu: new_tai_lue
197
+ taml: tamil
198
+ tang: tangut
199
+ tavt: tai_viet
200
+ telu: telugu
201
+ term: terminal_punctuation
202
+ tfng: tifinagh
203
+ tglg: tagalog
204
+ thaa: thaana
205
+ tibt: tibetan
206
+ tirh: tirhuta
207
+ ugar: ugaritic
208
+ uideo: unified_ideograph
209
+ vaii: vai
210
+ vs: variation_selector
211
+ wara: warang_citi
212
+ wspace: white_space
213
+ xidc: xid_continue
214
+ xids: xid_start
215
+ xpeo: old_persian
216
+ xsux: cuneiform
217
+ yiii: yi
218
+ z: separator
219
+ zanb: zanabazar_square
220
+ zinh: inherited
221
+ zl: line_separator
222
+ zp: paragraph_separator
223
+ zs: space_separator
224
+ zyyy: common
225
+ zzzz: unknown
@@ -1,55 +1,9 @@
1
1
  %%{
2
2
  machine re_property;
3
3
 
4
- property_char = [pP];
4
+ property_char = [pP];
5
5
 
6
- # Property names are being treated as case-insensitive, but it is not clear
7
- # yet if this applies to all flavors and in all encodings. A bug has just
8
- # been filed against ruby regarding this issue, see:
9
- # http://redmine.ruby-lang.org/issues/show/4014
10
- property_name_unicode = 'alnum'i | 'alpha'i | 'any'i | 'ascii'i | 'blank'i |
11
- 'cntrl'i | 'digit'i | 'graph'i | 'lower'i | 'print'i |
12
- 'punct'i | 'space'i | 'upper'i | 'word'i | 'xdigit'i;
13
-
14
- property_name_posix = 'any'i | 'assigned'i | 'newline'i;
15
-
16
- property_name = property_name_unicode | property_name_posix;
17
-
18
- category_letter = [Ll] . [ultmo]?;
19
- category_mark = [Mm] . [nce]?;
20
- category_number = [Nn] . [dlo]?;
21
- category_punctuation = [Pp] . [cdseifo]?;
22
- category_symbol = [Ss] . [mcko]?;
23
- category_separator = [Zz] . [slp]?;
24
- category_codepoint = [Cc] . [cfson]?;
25
-
26
- general_category = category_letter | category_mark |
27
- category_number | category_punctuation |
28
- category_symbol | category_separator |
29
- category_codepoint;
30
-
31
- property_derived = 'math'i | 'alphabetic'i |
32
- 'lowercase'i | 'uppercase'i |
33
- 'id_start'i | 'id_continue'i |
34
- 'xid_start'i | 'xid_continue'i |
35
- 'grapheme_base'i | 'grapheme_extend'i |
36
- 'default_ignorable_code_point'i;
37
-
38
- property_age = 'age=1.1'i | 'age=2.0'i | 'age=2.1'i |
39
- 'age=3.0'i | 'age=3.1'i | 'age=3.2'i |
40
- 'age=4.0'i | 'age=4.1'i | 'age=5.0'i |
41
- 'age=5.1'i | 'age=5.2'i | 'age=6.0'i |
42
- 'age=6.1'i | 'age=6.2'i | 'age=6.3'i |
43
- 'age=7.0'i | 'age=8.0'i | 'age=9.0'i |
44
- 'age=10.0'i;
45
-
46
- property_script = (alnum | space | '_' | '-')+; # everything else
47
-
48
- property_sequence = property_char . '{' . '^'? (
49
- property_name | general_category |
50
- property_age | property_derived |
51
- property_script
52
- ) . '}';
6
+ property_sequence = property_char . '{' . '^'? (alnum|space|[_\-\.=])+ '}';
53
7
 
54
8
  action premature_property_end {
55
9
  raise PrematureEndError.new('unicode property')
@@ -61,767 +15,14 @@
61
15
 
62
16
  property_sequence < eof(premature_property_end) {
63
17
  text = text(data, ts, te, 1).first
64
- if in_set
65
- type = :set
66
- else
67
- type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property
68
- end
69
-
70
- name = data[ts+2..te-2].pack('c*').gsub(/[\s_]/,'').downcase
71
- if name[0].chr == '^'
72
- name = name[1..-1]
73
- end
74
-
75
- case name
76
- # Named
77
- when 'alnum'
78
- self.emit(type, :alnum, text, ts-1, te)
79
- when 'alpha'
80
- self.emit(type, :alpha, text, ts-1, te)
81
- when 'ascii'
82
- self.emit(type, :ascii, text, ts-1, te)
83
- when 'blank'
84
- self.emit(type, :blank, text, ts-1, te)
85
- when 'cntrl'
86
- self.emit(type, :cntrl, text, ts-1, te)
87
- when 'digit'
88
- self.emit(type, :digit, text, ts-1, te)
89
- when 'graph'
90
- self.emit(type, :graph, text, ts-1, te)
91
- when 'lower'
92
- self.emit(type, :lower, text, ts-1, te)
93
- when 'print'
94
- self.emit(type, :print, text, ts-1, te)
95
- when 'punct'
96
- self.emit(type, :punct, text, ts-1, te)
97
- when 'space'
98
- self.emit(type, :space, text, ts-1, te)
99
- when 'upper'
100
- self.emit(type, :upper, text, ts-1, te)
101
- when 'word'
102
- self.emit(type, :word, text, ts-1, te)
103
- when 'xdigit'
104
- self.emit(type, :xdigit, text, ts-1, te)
105
- when 'xposixpunct'
106
- self.emit(type, :xposixpunct, text, ts-1, te)
107
-
108
- # Only in Oniguruma (old Rubies)
109
- when 'newline'
110
- self.emit(type, :newline, text, ts-1, te)
111
-
112
- when 'any'
113
- self.emit(type, :any, text, ts-1, te)
114
- when 'assigned'
115
- self.emit(type, :assigned, text, ts-1, te)
116
-
117
- # Letters
118
- when 'l', 'letter'
119
- self.emit(type, :letter_any, text, ts-1, te)
120
- when 'lu', 'uppercaseletter'
121
- self.emit(type, :letter_uppercase, text, ts-1, te)
122
- when 'll', 'lowercaseletter'
123
- self.emit(type, :letter_lowercase, text, ts-1, te)
124
- when 'lt', 'titlecaseletter'
125
- self.emit(type, :letter_titlecase, text, ts-1, te)
126
- when 'lm', 'modifierletter'
127
- self.emit(type, :letter_modifier, text, ts-1, te)
128
- when 'lo', 'otherletter'
129
- self.emit(type, :letter_other, text, ts-1, te)
130
-
131
- # Marks
132
- when 'm', 'mark'
133
- self.emit(type, :mark_any, text, ts-1, te)
134
- when 'mn', 'nonspacingmark'
135
- self.emit(type, :mark_nonspacing, text, ts-1, te)
136
- when 'mc', 'spacingmark'
137
- self.emit(type, :mark_spacing, text, ts-1, te)
138
- when 'me', 'enclosingmark'
139
- self.emit(type, :mark_enclosing, text, ts-1, te)
140
-
141
- # Numbers
142
- when 'n', 'number'
143
- self.emit(type, :number_any, text, ts-1, te)
144
- when 'nd', 'decimalnumber'
145
- self.emit(type, :number_decimal, text, ts-1, te)
146
- when 'nl', 'letternumber'
147
- self.emit(type, :number_letter, text, ts-1, te)
148
- when 'no', 'othernumber'
149
- self.emit(type, :number_other, text, ts-1, te)
150
-
151
- # Punctuation
152
- when 'p', 'punctuation'
153
- self.emit(type, :punct_any, text, ts-1, te)
154
- when 'pc', 'connectorpunctuation'
155
- self.emit(type, :punct_connector, text, ts-1, te)
156
- when 'pd', 'dashpunctuation'
157
- self.emit(type, :punct_dash, text, ts-1, te)
158
- when 'ps', 'openpunctuation'
159
- self.emit(type, :punct_open, text, ts-1, te)
160
- when 'pe', 'closepunctuation'
161
- self.emit(type, :punct_close, text, ts-1, te)
162
- when 'pi', 'initialpunctuation'
163
- self.emit(type, :punct_initial, text, ts-1, te)
164
- when 'pf', 'finalpunctuation'
165
- self.emit(type, :punct_final, text, ts-1, te)
166
- when 'po', 'otherpunctuation'
167
- self.emit(type, :punct_other, text, ts-1, te)
168
-
169
- # Symbols
170
- when 's', 'symbol'
171
- self.emit(type, :symbol_any, text, ts-1, te)
172
- when 'sm', 'mathsymbol'
173
- self.emit(type, :symbol_math, text, ts-1, te)
174
- when 'sc', 'currencysymbol'
175
- self.emit(type, :symbol_currency, text, ts-1, te)
176
- when 'sk', 'modifiersymbol'
177
- self.emit(type, :symbol_modifier, text, ts-1, te)
178
- when 'so', 'othersymbol'
179
- self.emit(type, :symbol_other, text, ts-1, te)
180
-
181
- # Separators
182
- when 'z', 'separator'
183
- self.emit(type, :separator_any, text, ts-1, te)
184
- when 'zs', 'spaceseparator'
185
- self.emit(type, :separator_space, text, ts-1, te)
186
- when 'zl', 'lineseparator'
187
- self.emit(type, :separator_line, text, ts-1, te)
188
- when 'zp', 'paragraphseparator'
189
- self.emit(type, :separator_para, text, ts-1, te)
190
-
191
- # Codepoints
192
- when 'c', 'other'
193
- self.emit(type, :other, text, ts-1, te)
194
- when 'cc', 'control'
195
- self.emit(type, :control, text, ts-1, te)
196
- when 'cf', 'format'
197
- self.emit(type, :format, text, ts-1, te)
198
- when 'cs', 'surrogate'
199
- self.emit(type, :surrogate, text, ts-1, te)
200
- when 'co', 'privateuse'
201
- self.emit(type, :private_use, text, ts-1, te)
202
- when 'cn', 'unassigned'
203
- self.emit(type, :unassigned, text, ts-1, te)
204
-
205
- # Age
206
- when 'age=1.1'
207
- self.emit(type, :age_1_1, text, ts-1, te)
208
- when 'age=2.0'
209
- self.emit(type, :age_2_0, text, ts-1, te)
210
- when 'age=2.1'
211
- self.emit(type, :age_2_1, text, ts-1, te)
212
- when 'age=3.0'
213
- self.emit(type, :age_3_0, text, ts-1, te)
214
- when 'age=3.1'
215
- self.emit(type, :age_3_1, text, ts-1, te)
216
- when 'age=3.2'
217
- self.emit(type, :age_3_2, text, ts-1, te)
218
- when 'age=4.0'
219
- self.emit(type, :age_4_0, text, ts-1, te)
220
- when 'age=4.1'
221
- self.emit(type, :age_4_1, text, ts-1, te)
222
- when 'age=5.0'
223
- self.emit(type, :age_5_0, text, ts-1, te)
224
- when 'age=5.1'
225
- self.emit(type, :age_5_1, text, ts-1, te)
226
- when 'age=5.2'
227
- self.emit(type, :age_5_2, text, ts-1, te)
228
- when 'age=6.0'
229
- self.emit(type, :age_6_0, text, ts-1, te)
230
- when 'age=6.1'
231
- self.emit(type, :age_6_1, text, ts-1, te)
232
- when 'age=6.2'
233
- self.emit(type, :age_6_2, text, ts-1, te)
234
- when 'age=6.3'
235
- self.emit(type, :age_6_3, text, ts-1, te)
236
- when 'age=7.0'
237
- self.emit(type, :age_7_0, text, ts-1, te)
238
- when 'age=8.0'
239
- self.emit(type, :age_8_0, text, ts-1, te)
240
- when 'age=9.0'
241
- self.emit(type, :age_9_0, text, ts-1, te)
242
- when 'age=10.0'
243
- self.emit(type, :age_10_0, text, ts-1, te)
244
-
245
- # Derived Properties
246
- when 'ahex', 'asciihexdigit'
247
- self.emit(type, :ascii_hex, text, ts-1, te)
248
- when 'alphabetic'
249
- self.emit(type, :alphabetic, text, ts-1, te)
250
- when 'cased'
251
- self.emit(type, :cased, text, ts-1, te)
252
- when 'cwcf', 'changeswhencasefolded'
253
- self.emit(type, :changes_when_casefolded, text, ts-1, te)
254
- when 'cwcm', 'changeswhencasemapped'
255
- self.emit(type, :changes_when_casemapped, text, ts-1, te)
256
- when 'cwl', 'changeswhenlowercased'
257
- self.emit(type, :changes_when_lowercased, text, ts-1, te)
258
- when 'cwt', 'changeswhentitlecased'
259
- self.emit(type, :changes_when_titlecased, text, ts-1, te)
260
- when 'cwu', 'changeswhenuppercased'
261
- self.emit(type, :changes_when_uppercased, text, ts-1, te)
262
- when 'ci', 'caseignorable'
263
- self.emit(type, :case_ignorable, text, ts-1, te)
264
- when 'bidic', 'bidicontrol'
265
- self.emit(type, :bidi_control, text, ts-1, te)
266
- when 'dash'
267
- self.emit(type, :dash, text, ts-1, te)
268
- when 'dep', 'deprecated'
269
- self.emit(type, :deprecated, text, ts-1, te)
270
- when 'di', 'defaultignorablecodepoint'
271
- self.emit(type, :default_ignorable_cp, text, ts-1, te)
272
- when 'dia', 'diacritic'
273
- self.emit(type, :diacritic, text, ts-1, te)
274
- when 'ext', 'extender'
275
- self.emit(type, :extender, text, ts-1, te)
276
- when 'grbase', 'graphemebase'
277
- self.emit(type, :grapheme_base, text, ts-1, te)
278
- when 'grext', 'graphemeextend'
279
- self.emit(type, :grapheme_extend, text, ts-1, te)
280
- when 'grlink', 'graphemelink' # NOTE: deprecated as of Unicode 5.0
281
- self.emit(type, :grapheme_link, text, ts-1, te)
282
- when 'hex', 'hexdigit'
283
- self.emit(type, :hex_digit, text, ts-1, te)
284
- when 'hyphen' # NOTE: deprecated as of Unicode 6.0
285
- self.emit(type, :hyphen, text, ts-1, te)
286
- when 'idc', 'idcontinue'
287
- self.emit(type, :id_continue, text, ts-1, te)
288
- when 'ideo', 'ideographic'
289
- self.emit(type, :ideographic, text, ts-1, te)
290
- when 'ids', 'idstart'
291
- self.emit(type, :id_start, text, ts-1, te)
292
- when 'idsb', 'idsbinaryoperator'
293
- self.emit(type, :ids_binary_op, text, ts-1, te)
294
- when 'idst', 'idstrinaryoperator'
295
- self.emit(type, :ids_trinary_op, text, ts-1, te)
296
- when 'joinc', 'joincontrol'
297
- self.emit(type, :join_control, text, ts-1, te)
298
- when 'loe', 'logicalorderexception'
299
- self.emit(type, :logical_order_exception, text, ts-1, te)
300
- when 'lowercase'
301
- self.emit(type, :lowercase, text, ts-1, te)
302
- when 'math'
303
- self.emit(type, :math, text, ts-1, te)
304
- when 'nchar', 'noncharactercodepoint'
305
- self.emit(type, :non_character_cp, text, ts-1, te)
306
- when 'oalpha', 'otheralphabetic'
307
- self.emit(type, :other_alphabetic, text, ts-1, te)
308
- when 'odi', 'otherdefaultignorablecodepoint'
309
- self.emit(type, :other_default_ignorable_cp, text, ts-1, te)
310
- when 'ogrext', 'othergraphemeextend'
311
- self.emit(type, :other_grapheme_extended, text, ts-1, te)
312
- when 'oidc', 'otheridcontinue'
313
- self.emit(type, :other_id_continue, text, ts-1, te)
314
- when 'oids', 'otheridstart'
315
- self.emit(type, :other_id_start, text, ts-1, te)
316
- when 'olower', 'otherlowercase'
317
- self.emit(type, :other_lowercase, text, ts-1, te)
318
- when 'omath', 'othermath'
319
- self.emit(type, :other_math, text, ts-1, te)
320
- when 'oupper', 'otheruppercase'
321
- self.emit(type, :other_uppercase, text, ts-1, te)
322
- when 'patsyn', 'patternsyntax'
323
- self.emit(type, :pattern_syntax, text, ts-1, te)
324
- when 'patws', 'patternwhitespace'
325
- self.emit(type, :pattern_whitespace, text, ts-1, te)
326
- when 'qmark', 'quotationmark'
327
- self.emit(type, :quotation_mark, text, ts-1, te)
328
- when 'radical'
329
- self.emit(type, :radical, text, ts-1, te)
330
- when 'ri', 'regionalindicator'
331
- self.emit(type, :regional_indicator, text, ts-1, te)
332
- when 'sd', 'softdotted'
333
- self.emit(type, :soft_dotted, text, ts-1, te)
334
- when 'sterm'
335
- self.emit(type, :sentence_terminal, text, ts-1, te)
336
- when 'term', 'terminalpunctuation'
337
- self.emit(type, :terminal_punctuation, text, ts-1, te)
338
- when 'uideo', 'unifiedideograph'
339
- self.emit(type, :unified_ideograph, text, ts-1, te)
340
- when 'uppercase'
341
- self.emit(type, :uppercase, text, ts-1, te)
342
- when 'vs', 'variationselector'
343
- self.emit(type, :variation_selector, text, ts-1, te)
344
- when 'wspace', 'whitespace'
345
- self.emit(type, :whitespace, text, ts-1, te)
346
- when 'xids', 'xidstart'
347
- self.emit(type, :xid_start, text, ts-1, te)
348
- when 'xidc', 'xidcontinue'
349
- self.emit(type, :xid_continue, text, ts-1, te)
350
-
351
- # Emoji
352
- when 'emoji'
353
- self.emit(type, :emoji_any, text, ts-1, te)
354
- when 'emojicomponent'
355
- self.emit(type, :emoji_component, text, ts-1, te)
356
- when 'emojimodifier'
357
- self.emit(type, :emoji_modifier, text, ts-1, te)
358
- when 'emojimodifierbase'
359
- self.emit(type, :emoji_modifier_base, text, ts-1, te)
360
- when 'emojipresentation'
361
- self.emit(type, :emoji_presentation, text, ts-1, te)
362
-
363
- # Scripts
364
- when 'aghb', 'caucasianalbanian'
365
- self.emit(type, :script_caucasian_albanian, text, ts-1, te)
366
- when 'arab', 'arabic'
367
- self.emit(type, :script_arabic, text, ts-1, te)
368
- when 'armi', 'imperialaramaic'
369
- self.emit(type, :script_imperial_aramaic, text, ts-1, te)
370
- when 'armn', 'armenian'
371
- self.emit(type, :script_armenian, text, ts-1, te)
372
- when 'avst', 'avestan'
373
- self.emit(type, :script_avestan, text, ts-1, te)
374
- when 'bali', 'balinese'
375
- self.emit(type, :script_balinese, text, ts-1, te)
376
- when 'bamu', 'bamum'
377
- self.emit(type, :script_bamum, text, ts-1, te)
378
- when 'bass', 'bassavah'
379
- self.emit(type, :script_bassa_vah, text, ts-1, te)
380
- when 'batk', 'batak'
381
- self.emit(type, :script_batak, text, ts-1, te)
382
- when 'beng', 'bengali'
383
- self.emit(type, :script_bengali, text, ts-1, te)
384
- when 'bopo', 'bopomofo'
385
- self.emit(type, :script_bopomofo, text, ts-1, te)
386
- when 'brah', 'brahmi'
387
- self.emit(type, :script_brahmi, text, ts-1, te)
388
- when 'brai', 'braille'
389
- self.emit(type, :script_braille, text, ts-1, te)
390
- when 'bugi', 'buginese'
391
- self.emit(type, :script_buginese, text, ts-1, te)
392
- when 'buhd', 'buhid'
393
- self.emit(type, :script_buhid, text, ts-1, te)
394
- when 'cans', 'canadianaboriginal'
395
- self.emit(type, :script_canadian_aboriginal, text, ts-1, te)
396
- when 'cari', 'carian'
397
- self.emit(type, :script_carian, text, ts-1, te)
398
- when 'cham'
399
- self.emit(type, :script_cham, text, ts-1, te)
400
- when 'cher', 'cherokee'
401
- self.emit(type, :script_cherokee, text, ts-1, te)
402
- when 'copt', 'coptic', 'qaac'
403
- self.emit(type, :script_coptic, text, ts-1, te)
404
- when 'cprt', 'cypriot'
405
- self.emit(type, :script_cypriot, text, ts-1, te)
406
- when 'cyrl', 'cyrillic'
407
- self.emit(type, :script_cyrillic, text, ts-1, te)
408
- when 'deva', 'devanagari'
409
- self.emit(type, :script_devanagari, text, ts-1, te)
410
- when 'dsrt', 'deseret'
411
- self.emit(type, :script_deseret, text, ts-1, te)
412
- when 'dupl', 'duployan'
413
- self.emit(type, :script_duployan, text, ts-1, te)
414
- when 'egyp', 'egyptianhieroglyphs'
415
- self.emit(type, :script_egyptian_hieroglyphs, text, ts-1, te)
416
- when 'elba', 'elbasan'
417
- self.emit(type, :script_elbasan, text, ts-1, te)
418
- when 'ethi', 'ethiopic'
419
- self.emit(type, :script_ethiopic, text, ts-1, te)
420
- when 'geor', 'georgian'
421
- self.emit(type, :script_georgian, text, ts-1, te)
422
- when 'glag', 'glagolitic'
423
- self.emit(type, :script_glagolitic, text, ts-1, te)
424
- when 'goth', 'gothic'
425
- self.emit(type, :script_gothic, text, ts-1, te)
426
- when 'gran', 'grantha'
427
- self.emit(type, :script_grantha, text, ts-1, te)
428
- when 'grek', 'greek'
429
- self.emit(type, :script_greek, text, ts-1, te)
430
- when 'gujr', 'gujarati'
431
- self.emit(type, :script_gujarati, text, ts-1, te)
432
- when 'guru', 'gurmukhi'
433
- self.emit(type, :script_gurmukhi, text, ts-1, te)
434
- when 'hang', 'hangul'
435
- self.emit(type, :script_hangul, text, ts-1, te)
436
- when 'hani', 'han'
437
- self.emit(type, :script_han, text, ts-1, te)
438
- when 'hano', 'hanunoo'
439
- self.emit(type, :script_hanunoo, text, ts-1, te)
440
- when 'hebr', 'hebrew'
441
- self.emit(type, :script_hebrew, text, ts-1, te)
442
- when 'hira', 'hiragana'
443
- self.emit(type, :script_hiragana, text, ts-1, te)
444
- when 'hmng', 'pahawhhmong'
445
- self.emit(type, :script_pahawh_hmong, text, ts-1, te)
446
- when 'hrkt', 'katakanaorhiragana'
447
- self.emit(type, :script_katakana_or_hiragana, text, ts-1, te)
448
- when 'ital', 'olditalic'
449
- self.emit(type, :script_old_italic, text, ts-1, te)
450
- when 'java', 'javanese'
451
- self.emit(type, :script_javanese, text, ts-1, te)
452
- when 'kali', 'kayahli'
453
- self.emit(type, :script_kayah_li, text, ts-1, te)
454
- when 'kana', 'katakana'
455
- self.emit(type, :script_katakana, text, ts-1, te)
456
- when 'khar', 'kharoshthi'
457
- self.emit(type, :script_kharoshthi, text, ts-1, te)
458
- when 'khmr', 'khmer'
459
- self.emit(type, :script_khmer, text, ts-1, te)
460
- when 'khoj', 'khojki'
461
- self.emit(type, :script_khojki, text, ts-1, te)
462
- when 'knda', 'kannada'
463
- self.emit(type, :script_kannada, text, ts-1, te)
464
- when 'kthi', 'kaithi'
465
- self.emit(type, :script_kaithi, text, ts-1, te)
466
- when 'lana', 'taitham'
467
- self.emit(type, :script_tai_tham, text, ts-1, te)
468
- when 'laoo', 'lao'
469
- self.emit(type, :script_lao, text, ts-1, te)
470
- when 'latn', 'latin'
471
- self.emit(type, :script_latin, text, ts-1, te)
472
- when 'lepc', 'lepcha'
473
- self.emit(type, :script_lepcha, text, ts-1, te)
474
- when 'limb', 'limbu'
475
- self.emit(type, :script_limbu, text, ts-1, te)
476
- when 'lina', 'lineara'
477
- self.emit(type, :script_linear_a, text, ts-1, te)
478
- when 'linb', 'linearb'
479
- self.emit(type, :script_linear_b, text, ts-1, te)
480
- when 'lisu'
481
- self.emit(type, :script_lisu, text, ts-1, te)
482
- when 'lyci', 'lycian'
483
- self.emit(type, :script_lycian, text, ts-1, te)
484
- when 'lydi', 'lydian'
485
- self.emit(type, :script_lydian, text, ts-1, te)
486
- when 'mlym', 'malayalam'
487
- self.emit(type, :script_malayalam, text, ts-1, te)
488
- when 'mahj', 'mahajani'
489
- self.emit(type, :script_mahajani, text, ts-1, te)
490
- when 'mand', 'mandaic'
491
- self.emit(type, :script_mandaic, text, ts-1, te)
492
- when 'mani', 'manichaean'
493
- self.emit(type, :script_manichaean, text, ts-1, te)
494
- when 'mend', 'mendekikakui'
495
- self.emit(type, :script_mende_kikakui, text, ts-1, te)
496
- when 'modi'
497
- self.emit(type, :script_modi, text, ts-1, te)
498
- when 'mong', 'mongolian'
499
- self.emit(type, :script_mongolian, text, ts-1, te)
500
- when 'mroo', 'mro'
501
- self.emit(type, :script_mro, text, ts-1, te)
502
- when 'mtei', 'meeteimayek'
503
- self.emit(type, :script_meetei_mayek, text, ts-1, te)
504
- when 'mymr', 'myanmar'
505
- self.emit(type, :script_myanmar, text, ts-1, te)
506
- when 'narb', 'oldnortharabian'
507
- self.emit(type, :script_old_north_arabian, text, ts-1, te)
508
- when 'nbat', 'nabataean'
509
- self.emit(type, :script_nabataean, text, ts-1, te)
510
- when 'nkoo', 'nko'
511
- self.emit(type, :script_nko, text, ts-1, te)
512
- when 'ogam', 'ogham'
513
- self.emit(type, :script_ogham, text, ts-1, te)
514
- when 'olck', 'olchiki'
515
- self.emit(type, :script_ol_chiki, text, ts-1, te)
516
- when 'orkh', 'oldturkic'
517
- self.emit(type, :script_old_turkic, text, ts-1, te)
518
- when 'orya', 'oriya'
519
- self.emit(type, :script_oriya, text, ts-1, te)
520
- when 'osma', 'osmanya'
521
- self.emit(type, :script_osmanya, text, ts-1, te)
522
- when 'palm', 'palmyrene'
523
- self.emit(type, :script_palmyrene, text, ts-1, te)
524
- when 'pauc', 'paucinhau'
525
- self.emit(type, :script_pau_cin_hau, text, ts-1, te)
526
- when 'perm', 'oldpermic'
527
- self.emit(type, :script_old_permic, text, ts-1, te)
528
- when 'phag', 'phagspa'
529
- self.emit(type, :script_phags_pa, text, ts-1, te)
530
- when 'phli', 'inscriptionalpahlavi'
531
- self.emit(type, :script_inscriptional_pahlavi, text, ts-1, te)
532
- when 'phlp', 'psalterpahlavi'
533
- self.emit(type, :script_psalter_pahlavi, text, ts-1, te)
534
- when 'phnx', 'phoenician'
535
- self.emit(type, :script_phoenician, text, ts-1, te)
536
- when 'prti', 'inscriptionalparthian'
537
- self.emit(type, :script_inscriptional_parthian, text, ts-1, te)
538
- when 'rjng', 'rejang'
539
- self.emit(type, :script_rejang, text, ts-1, te)
540
- when 'runr', 'runic'
541
- self.emit(type, :script_runic, text, ts-1, te)
542
- when 'samr', 'samaritan'
543
- self.emit(type, :script_samaritan, text, ts-1, te)
544
- when 'sarb', 'oldsoutharabian'
545
- self.emit(type, :script_old_south_arabian, text, ts-1, te)
546
- when 'saur', 'saurashtra'
547
- self.emit(type, :script_saurashtra, text, ts-1, te)
548
- when 'shaw', 'shavian'
549
- self.emit(type, :script_shavian, text, ts-1, te)
550
- when 'sidd', 'siddham'
551
- self.emit(type, :script_siddham, text, ts-1, te)
552
- when 'sind', 'khudawadi'
553
- self.emit(type, :script_khudawadi, text, ts-1, te)
554
- when 'sinh', 'sinhala'
555
- self.emit(type, :script_sinhala, text, ts-1, te)
556
- when 'sund', 'sundanese'
557
- self.emit(type, :script_sundanese, text, ts-1, te)
558
- when 'sylo', 'sylotinagri'
559
- self.emit(type, :script_syloti_nagri, text, ts-1, te)
560
- when 'syrc', 'syriac'
561
- self.emit(type, :script_syriac, text, ts-1, te)
562
- when 'tagb', 'tagbanwa'
563
- self.emit(type, :script_tagbanwa, text, ts-1, te)
564
- when 'tale', 'taile'
565
- self.emit(type, :script_tai_le, text, ts-1, te)
566
- when 'talu', 'newtailue'
567
- self.emit(type, :script_new_tai_lue, text, ts-1, te)
568
- when 'taml', 'tamil'
569
- self.emit(type, :script_tamil, text, ts-1, te)
570
- when 'tavt', 'taiviet'
571
- self.emit(type, :script_tai_viet, text, ts-1, te)
572
- when 'telu', 'telugu'
573
- self.emit(type, :script_telugu, text, ts-1, te)
574
- when 'tfng', 'tifinagh'
575
- self.emit(type, :script_tifinagh, text, ts-1, te)
576
- when 'tglg', 'tagalog'
577
- self.emit(type, :script_tagalog, text, ts-1, te)
578
- when 'thaa', 'thaana'
579
- self.emit(type, :script_thaana, text, ts-1, te)
580
- when 'thai'
581
- self.emit(type, :script_thai, text, ts-1, te)
582
- when 'tibt', 'tibetan'
583
- self.emit(type, :script_tibetan, text, ts-1, te)
584
- when 'tirh', 'tirhuta'
585
- self.emit(type, :script_tirhuta, text, ts-1, te)
586
- when 'ugar', 'ugaritic'
587
- self.emit(type, :script_ugaritic, text, ts-1, te)
588
- when 'vaii', 'vai'
589
- self.emit(type, :script_vai, text, ts-1, te)
590
- when 'wara', 'warangciti'
591
- self.emit(type, :script_warang_citi, text, ts-1, te)
592
- when 'xpeo', 'oldpersian'
593
- self.emit(type, :script_old_persian, text, ts-1, te)
594
- when 'xsux', 'cuneiform'
595
- self.emit(type, :script_cuneiform, text, ts-1, te)
596
- when 'yiii', 'yi'
597
- self.emit(type, :script_yi, text, ts-1, te)
598
- when 'zinh', 'inherited', 'qaai'
599
- self.emit(type, :script_inherited, text, ts-1, te)
600
- when 'zyyy', 'common'
601
- self.emit(type, :script_common, text, ts-1, te)
602
- when 'zzzz', 'unknown'
603
- self.emit(type, :script_unknown, text, ts-1, te)
18
+ type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property
604
19
 
605
- # Unicode blocks
606
- when 'inalphabeticpresentationforms'
607
- self.emit(type, :block_inalphabetic_presentation_forms, text, ts-1, te)
608
- when 'inarabicpresentationforms-a'
609
- self.emit(type, :block_inarabic_presentation_forms_a, text, ts-1, te)
610
- when 'inarabicpresentationforms-b'
611
- self.emit(type, :block_inarabic_presentation_forms_b, text, ts-1, te)
612
- when 'inarabic'
613
- self.emit(type, :block_inarabic, text, ts-1, te)
614
- when 'inarmenian'
615
- self.emit(type, :block_inarmenian, text, ts-1, te)
616
- when 'inarrows'
617
- self.emit(type, :block_inarrows, text, ts-1, te)
618
- when 'inbasiclatin'
619
- self.emit(type, :block_inbasic_latin, text, ts-1, te)
620
- when 'inbengali'
621
- self.emit(type, :block_inbengali, text, ts-1, te)
622
- when 'inblockelements'
623
- self.emit(type, :block_inblock_elements, text, ts-1, te)
624
- when 'inbopomofoextended'
625
- self.emit(type, :block_inbopomofo_extended, text, ts-1, te)
626
- when 'inbopomofo'
627
- self.emit(type, :block_inbopomofo, text, ts-1, te)
628
- when 'inboxdrawing'
629
- self.emit(type, :block_inbox_drawing, text, ts-1, te)
630
- when 'inbraillepatterns'
631
- self.emit(type, :block_inbraille_patterns, text, ts-1, te)
632
- when 'inbuhid'
633
- self.emit(type, :block_inbuhid, text, ts-1, te)
634
- when 'incjkcompatibilityforms'
635
- self.emit(type, :block_incjk_compatibility_forms, text, ts-1, te)
636
- when 'incjkcompatibilityideographs'
637
- self.emit(type, :block_incjk_compatibility_ideographs, text, ts-1, te)
638
- when 'incjkcompatibility'
639
- self.emit(type, :block_incjk_compatibility, text, ts-1, te)
640
- when 'incjkradicalssupplement'
641
- self.emit(type, :block_incjk_radicals_supplement, text, ts-1, te)
642
- when 'incjksymbolsandpunctuation'
643
- self.emit(type, :block_incjk_symbols_and_punctuation, text, ts-1, te)
644
- when 'incjkunifiedideographsextensiona'
645
- self.emit(type, :block_incjk_unified_ideographs_extension_a, text, ts-1, te)
646
- when 'incjkunifiedideographs'
647
- self.emit(type, :block_incjk_unified_ideographs, text, ts-1, te)
648
- when 'incherokee'
649
- self.emit(type, :block_incherokee, text, ts-1, te)
650
- when 'incombiningdiacriticalmarksforsymbols'
651
- self.emit(type, :block_incombining_diacritical_marks_for_symbols, text, ts-1, te)
652
- when 'incombiningdiacriticalmarks'
653
- self.emit(type, :block_incombining_diacritical_marks, text, ts-1, te)
654
- when 'incombininghalfmarks'
655
- self.emit(type, :block_incombining_half_marks, text, ts-1, te)
656
- when 'incontrolpictures'
657
- self.emit(type, :block_incontrol_pictures, text, ts-1, te)
658
- when 'incurrencysymbols'
659
- self.emit(type, :block_incurrency_symbols, text, ts-1, te)
660
- when 'incyrillicsupplement'
661
- self.emit(type, :block_incyrillic_supplement, text, ts-1, te)
662
- when 'incyrillic'
663
- self.emit(type, :block_incyrillic, text, ts-1, te)
664
- when 'indevanagari'
665
- self.emit(type, :block_indevanagari, text, ts-1, te)
666
- when 'indingbats'
667
- self.emit(type, :block_indingbats, text, ts-1, te)
668
- when 'inenclosedalphanumerics'
669
- self.emit(type, :block_inenclosed_alphanumerics, text, ts-1, te)
670
- when 'inenclosedcjklettersandmonths'
671
- self.emit(type, :block_inenclosed_cjk_letters_and_months, text, ts-1, te)
672
- when 'inethiopic'
673
- self.emit(type, :block_inethiopic, text, ts-1, te)
674
- when 'ingeneralpunctuation'
675
- self.emit(type, :block_ingeneral_punctuation, text, ts-1, te)
676
- when 'ingeometricshapes'
677
- self.emit(type, :block_ingeometric_shapes, text, ts-1, te)
678
- when 'ingeorgian'
679
- self.emit(type, :block_ingeorgian, text, ts-1, te)
680
- when 'ingreekextended'
681
- self.emit(type, :block_ingreek_extended, text, ts-1, te)
682
- when 'ingreekandcoptic'
683
- self.emit(type, :block_ingreek_and_coptic, text, ts-1, te)
684
- when 'ingujarati'
685
- self.emit(type, :block_ingujarati, text, ts-1, te)
686
- when 'ingurmukhi'
687
- self.emit(type, :block_ingurmukhi, text, ts-1, te)
688
- when 'inhalfwidthandfullwidthforms'
689
- self.emit(type, :block_inhalfwidth_and_fullwidth_forms, text, ts-1, te)
690
- when 'inhangulcompatibilityjamo'
691
- self.emit(type, :block_inhangul_compatibility_jamo, text, ts-1, te)
692
- when 'inhanguljamo'
693
- self.emit(type, :block_inhangul_jamo, text, ts-1, te)
694
- when 'inhangulsyllables'
695
- self.emit(type, :block_inhangul_syllables, text, ts-1, te)
696
- when 'inhanunoo'
697
- self.emit(type, :block_inhanunoo, text, ts-1, te)
698
- when 'inhebrew'
699
- self.emit(type, :block_inhebrew, text, ts-1, te)
700
- when 'inhighprivateusesurrogates'
701
- self.emit(type, :block_inhigh_private_use_surrogates, text, ts-1, te)
702
- when 'inhighsurrogates'
703
- self.emit(type, :block_inhigh_surrogates, text, ts-1, te)
704
- when 'inhiragana'
705
- self.emit(type, :block_inhiragana, text, ts-1, te)
706
- when 'inipaextensions'
707
- self.emit(type, :block_inipa_extensions, text, ts-1, te)
708
- when 'inideographicdescriptioncharacters'
709
- self.emit(type, :block_inideographic_description_characters, text, ts-1, te)
710
- when 'inkanbun'
711
- self.emit(type, :block_inkanbun, text, ts-1, te)
712
- when 'inkangxiradicals'
713
- self.emit(type, :block_inkangxi_radicals, text, ts-1, te)
714
- when 'inkannada'
715
- self.emit(type, :block_inkannada, text, ts-1, te)
716
- when 'inkatakanaphoneticextensions'
717
- self.emit(type, :block_inkatakana_phonetic_extensions, text, ts-1, te)
718
- when 'inkatakana'
719
- self.emit(type, :block_inkatakana, text, ts-1, te)
720
- when 'inkhmersymbols'
721
- self.emit(type, :block_inkhmer_symbols, text, ts-1, te)
722
- when 'inkhmer'
723
- self.emit(type, :block_inkhmer, text, ts-1, te)
724
- when 'inlao'
725
- self.emit(type, :block_inlao, text, ts-1, te)
726
- when 'inlatin-1supplement'
727
- self.emit(type, :block_inlatin_1_supplement, text, ts-1, te)
728
- when 'inlatinextended-a'
729
- self.emit(type, :block_inlatin_extended_a, text, ts-1, te)
730
- when 'inlatinextended-b'
731
- self.emit(type, :block_inlatin_extended_b, text, ts-1, te)
732
- when 'inlatinextendedadditional'
733
- self.emit(type, :block_inlatin_extended_additional, text, ts-1, te)
734
- when 'inletterlikesymbols'
735
- self.emit(type, :block_inletterlike_symbols, text, ts-1, te)
736
- when 'inlimbu'
737
- self.emit(type, :block_inlimbu, text, ts-1, te)
738
- when 'inlowsurrogates'
739
- self.emit(type, :block_inlow_surrogates, text, ts-1, te)
740
- when 'inmalayalam'
741
- self.emit(type, :block_inmalayalam, text, ts-1, te)
742
- when 'inmathematicaloperators'
743
- self.emit(type, :block_inmathematical_operators, text, ts-1, te)
744
- when 'inmiscellaneousmathematicalsymbols-a'
745
- self.emit(type, :block_inmiscellaneous_mathematical_symbols_a, text, ts-1, te)
746
- when 'inmiscellaneousmathematicalsymbols-b'
747
- self.emit(type, :block_inmiscellaneous_mathematical_symbols_b, text, ts-1, te)
748
- when 'inmiscellaneoussymbolsandarrows'
749
- self.emit(type, :block_inmiscellaneous_symbols_and_arrows, text, ts-1, te)
750
- when 'inmiscellaneoussymbols'
751
- self.emit(type, :block_inmiscellaneous_symbols, text, ts-1, te)
752
- when 'inmiscellaneoustechnical'
753
- self.emit(type, :block_inmiscellaneous_technical, text, ts-1, te)
754
- when 'inmongolian'
755
- self.emit(type, :block_inmongolian, text, ts-1, te)
756
- when 'inmyanmar'
757
- self.emit(type, :block_inmyanmar, text, ts-1, te)
758
- when 'innumberforms'
759
- self.emit(type, :block_innumber_forms, text, ts-1, te)
760
- when 'inogham'
761
- self.emit(type, :block_inogham, text, ts-1, te)
762
- when 'inopticalcharacterrecognition'
763
- self.emit(type, :block_inoptical_character_recognition, text, ts-1, te)
764
- when 'inoriya'
765
- self.emit(type, :block_inoriya, text, ts-1, te)
766
- when 'inphoneticextensions'
767
- self.emit(type, :block_inphonetic_extensions, text, ts-1, te)
768
- when 'inprivateusearea'
769
- self.emit(type, :block_inprivate_use_area, text, ts-1, te)
770
- when 'inrunic'
771
- self.emit(type, :block_inrunic, text, ts-1, te)
772
- when 'insinhala'
773
- self.emit(type, :block_insinhala, text, ts-1, te)
774
- when 'insmallformvariants'
775
- self.emit(type, :block_insmall_form_variants, text, ts-1, te)
776
- when 'inspacingmodifierletters'
777
- self.emit(type, :block_inspacing_modifier_letters, text, ts-1, te)
778
- when 'inspecials'
779
- self.emit(type, :block_inspecials, text, ts-1, te)
780
- when 'insuperscriptsandsubscripts'
781
- self.emit(type, :block_insuperscripts_and_subscripts, text, ts-1, te)
782
- when 'insupplementalarrows-a'
783
- self.emit(type, :block_insupplemental_arrows_a, text, ts-1, te)
784
- when 'insupplementalarrows-b'
785
- self.emit(type, :block_insupplemental_arrows_b, text, ts-1, te)
786
- when 'insupplementalmathematicaloperators'
787
- self.emit(type, :block_insupplemental_mathematical_operators, text, ts-1, te)
788
- when 'insyriac'
789
- self.emit(type, :block_insyriac, text, ts-1, te)
790
- when 'intagalog'
791
- self.emit(type, :block_intagalog, text, ts-1, te)
792
- when 'intagbanwa'
793
- self.emit(type, :block_intagbanwa, text, ts-1, te)
794
- when 'intaile'
795
- self.emit(type, :block_intai_le, text, ts-1, te)
796
- when 'intamil'
797
- self.emit(type, :block_intamil, text, ts-1, te)
798
- when 'intelugu'
799
- self.emit(type, :block_intelugu, text, ts-1, te)
800
- when 'inthaana'
801
- self.emit(type, :block_inthaana, text, ts-1, te)
802
- when 'inthai'
803
- self.emit(type, :block_inthai, text, ts-1, te)
804
- when 'intibetan'
805
- self.emit(type, :block_intibetan, text, ts-1, te)
806
- when 'inunifiedcanadianaboriginalsyllabics'
807
- self.emit(type, :block_inunified_canadian_aboriginal_syllabics, text, ts-1, te)
808
- when 'invariationselectors'
809
- self.emit(type, :block_invariation_selectors, text, ts-1, te)
810
- when 'inyiradicals'
811
- self.emit(type, :block_inyi_radicals, text, ts-1, te)
812
- when 'inyisyllables'
813
- self.emit(type, :block_inyi_syllables, text, ts-1, te)
814
- when 'inyijinghexagramsymbols'
815
- self.emit(type, :block_inyijing_hexagram_symbols, text, ts-1, te)
20
+ name = data[ts+2..te-2].pack('c*').gsub(/[\^\s_\-]/, '').downcase
816
21
 
817
- else
818
- # Should this really be an error? Or would emitting
819
- # an :unknown for the property be better?
820
- #
821
- # self.emit(type, :unknown, text, ts-1, te)
22
+ token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
23
+ raise UnknownUnicodePropertyError.new(name) unless token
822
24
 
823
- raise UnknownUnicodePropertyError.new(name)
824
- end
25
+ self.emit(type, token.to_sym, text, ts-1, te)
825
26
 
826
27
  fret;
827
28
  };