regexp_parser 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +4 -0
- data/LICENSE +22 -0
- data/README.rdoc +307 -0
- data/Rakefile +91 -0
- data/lib/regexp_parser/ctype.rb +48 -0
- data/lib/regexp_parser/expression/property.rb +108 -0
- data/lib/regexp_parser/expression/set.rb +59 -0
- data/lib/regexp_parser/expression.rb +287 -0
- data/lib/regexp_parser/lexer.rb +105 -0
- data/lib/regexp_parser/parser.rb +417 -0
- data/lib/regexp_parser/scanner/property.rl +534 -0
- data/lib/regexp_parser/scanner/scanner.rl +712 -0
- data/lib/regexp_parser/scanner.rb +3325 -0
- data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
- data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
- data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
- data/lib/regexp_parser/syntax/tokens.rb +332 -0
- data/lib/regexp_parser/syntax.rb +172 -0
- data/lib/regexp_parser.rb +45 -0
- data/test/helpers.rb +8 -0
- data/test/lexer/test_all.rb +26 -0
- data/test/lexer/test_literals.rb +120 -0
- data/test/lexer/test_nesting.rb +107 -0
- data/test/lexer/test_refcalls.rb +45 -0
- data/test/parser/test_all.rb +44 -0
- data/test/parser/test_alternation.rb +46 -0
- data/test/parser/test_anchors.rb +35 -0
- data/test/parser/test_errors.rb +59 -0
- data/test/parser/test_escapes.rb +48 -0
- data/test/parser/test_expression.rb +51 -0
- data/test/parser/test_groups.rb +69 -0
- data/test/parser/test_properties.rb +346 -0
- data/test/parser/test_quantifiers.rb +236 -0
- data/test/parser/test_refcalls.rb +101 -0
- data/test/parser/test_sets.rb +99 -0
- data/test/scanner/test_all.rb +30 -0
- data/test/scanner/test_anchors.rb +35 -0
- data/test/scanner/test_errors.rb +36 -0
- data/test/scanner/test_escapes.rb +49 -0
- data/test/scanner/test_groups.rb +41 -0
- data/test/scanner/test_literals.rb +85 -0
- data/test/scanner/test_meta.rb +36 -0
- data/test/scanner/test_properties.rb +315 -0
- data/test/scanner/test_quantifiers.rb +38 -0
- data/test/scanner/test_refcalls.rb +45 -0
- data/test/scanner/test_scripts.rb +314 -0
- data/test/scanner/test_sets.rb +80 -0
- data/test/scanner/test_types.rb +30 -0
- data/test/syntax/ruby/test_1.8.rb +57 -0
- data/test/syntax/ruby/test_1.9.1.rb +39 -0
- data/test/syntax/ruby/test_1.9.3.rb +38 -0
- data/test/syntax/ruby/test_all.rb +12 -0
- data/test/syntax/test_all.rb +19 -0
- data/test/test_all.rb +4 -0
- metadata +160 -0
@@ -0,0 +1,534 @@
|
|
1
|
+
%%{
|
2
|
+
machine re_property;
|
3
|
+
|
4
|
+
property_char = [pP];
|
5
|
+
|
6
|
+
# Property names are being treated as case-insensitive, but it is not clear
|
7
|
+
# yet if this applies to all flavors and in all encodings. A bug has just
|
8
|
+
# been filed against ruby regarding this issue, see:
|
9
|
+
# http://redmine.ruby-lang.org/issues/show/4014
|
10
|
+
property_name_unicode = 'alnum'i | 'alpha'i | 'any'i | 'ascii'i | 'blank'i |
|
11
|
+
'cntrl'i | 'digit'i | 'graph'i | 'lower'i | 'print'i |
|
12
|
+
'punct'i | 'space'i | 'upper'i | 'word'i | 'xdigit'i;
|
13
|
+
|
14
|
+
# TODO: are these case-insensitive?
|
15
|
+
property_name_posix = 'any'i | 'assigned'i | 'newline'i;
|
16
|
+
|
17
|
+
property_name = property_name_unicode | property_name_posix;
|
18
|
+
|
19
|
+
category_letter = [Ll] . [ultmo]?;
|
20
|
+
category_mark = [Mm] . [nce]?;
|
21
|
+
category_number = [Nn] . [dlo]?;
|
22
|
+
category_punctuation = [Pp] . [cdseifo]?;
|
23
|
+
category_symbol = [Ss] . [mcko]?;
|
24
|
+
category_separator = [Zz] . [slp]?;
|
25
|
+
category_codepoint = [Cc] . [cfson]?;
|
26
|
+
|
27
|
+
general_category = category_letter | category_mark |
|
28
|
+
category_number | category_punctuation |
|
29
|
+
category_symbol | category_separator |
|
30
|
+
category_codepoint;
|
31
|
+
|
32
|
+
property_derived = 'math'i | 'alphabetic'i |
|
33
|
+
'lowercase'i | 'uppercase'i |
|
34
|
+
'id_start'i | 'id_continue'i |
|
35
|
+
'xid_start'i | 'xid_continue'i |
|
36
|
+
'grapheme_base'i | 'grapheme_extend'i |
|
37
|
+
'default_ignorable_code_point'i;
|
38
|
+
|
39
|
+
property_age = 'age=1.1'i | 'age=2.0'i | 'age=2.1'i |
|
40
|
+
'age=3.0'i | 'age=3.1'i | 'age=3.2'i |
|
41
|
+
'age=4.0'i | 'age=4.1'i | 'age=5.0'i |
|
42
|
+
'age=5.1'i | 'age=5.2'i | 'age=6.0'i;
|
43
|
+
|
44
|
+
property_script = (alpha | space | '_')+; # everything else
|
45
|
+
|
46
|
+
property_sequence = property_char . '{' . '^'? (
|
47
|
+
property_name | general_category |
|
48
|
+
property_age | property_derived |
|
49
|
+
property_script
|
50
|
+
) . '}';
|
51
|
+
|
52
|
+
action premature_property_end {
|
53
|
+
raise PrematureEndError.new('unicode property')
|
54
|
+
}
|
55
|
+
|
56
|
+
# Unicode properties scanner
|
57
|
+
# --------------------------------------------------------------------------
|
58
|
+
unicode_property := |*
|
59
|
+
|
60
|
+
property_sequence < eof(premature_property_end) {
|
61
|
+
text = data[ts-1..te-1].pack('c*')
|
62
|
+
if in_set
|
63
|
+
type = :set
|
64
|
+
else
|
65
|
+
type = text[1,1] == 'p' ? :property : :nonproperty
|
66
|
+
end
|
67
|
+
|
68
|
+
name = data[ts+2..te-2].pack('c*').gsub(/[\s_]/,'').downcase
|
69
|
+
if name[0].chr == '^'
|
70
|
+
name = name[1..-1]
|
71
|
+
type = :nonproperty
|
72
|
+
end
|
73
|
+
|
74
|
+
case name
|
75
|
+
# Named
|
76
|
+
when 'alnum'
|
77
|
+
self.emit(type, :alnum, text, ts-1, te)
|
78
|
+
when 'alpha'
|
79
|
+
self.emit(type, :alpha, text, ts-1, te)
|
80
|
+
when 'ascii'
|
81
|
+
self.emit(type, :ascii, text, ts-1, te)
|
82
|
+
when 'blank'
|
83
|
+
self.emit(type, :blank, text, ts-1, te)
|
84
|
+
when 'cntrl'
|
85
|
+
self.emit(type, :cntrl, text, ts-1, te)
|
86
|
+
when 'digit'
|
87
|
+
self.emit(type, :digit, text, ts-1, te)
|
88
|
+
when 'graph'
|
89
|
+
self.emit(type, :graph, text, ts-1, te)
|
90
|
+
when 'lower'
|
91
|
+
self.emit(type, :lower, text, ts-1, te)
|
92
|
+
when 'print'
|
93
|
+
self.emit(type, :print, text, ts-1, te)
|
94
|
+
when 'punct'
|
95
|
+
self.emit(type, :punct, text, ts-1, te)
|
96
|
+
when 'space'
|
97
|
+
self.emit(type, :space, text, ts-1, te)
|
98
|
+
when 'upper'
|
99
|
+
self.emit(type, :upper, text, ts-1, te)
|
100
|
+
when 'xdigit'
|
101
|
+
self.emit(type, :xdigit, text, ts-1, te)
|
102
|
+
|
103
|
+
when 'any'
|
104
|
+
self.emit(type, :any, text, ts-1, te)
|
105
|
+
when 'assigned'
|
106
|
+
self.emit(type, :assigned, text, ts-1, te)
|
107
|
+
when 'newline'
|
108
|
+
self.emit(type, :newline, text, ts-1, te)
|
109
|
+
when 'word'
|
110
|
+
self.emit(type, :word, text, ts-1, te)
|
111
|
+
|
112
|
+
# Letters
|
113
|
+
when 'l', 'letter'
|
114
|
+
self.emit(type, :letter_any, text, ts-1, te)
|
115
|
+
when 'lu', 'uppercaseletter'
|
116
|
+
self.emit(type, :letter_uppercase, text, ts-1, te)
|
117
|
+
when 'll', 'lowercaseletter'
|
118
|
+
self.emit(type, :letter_lowercase, text, ts-1, te)
|
119
|
+
when 'lt', 'titlecaseletter'
|
120
|
+
self.emit(type, :letter_titlecase, text, ts-1, te)
|
121
|
+
when 'lm', 'modifierletter'
|
122
|
+
self.emit(type, :letter_modifier, text, ts-1, te)
|
123
|
+
when 'lo', 'otherletter'
|
124
|
+
self.emit(type, :letter_other, text, ts-1, te)
|
125
|
+
|
126
|
+
# Marks
|
127
|
+
when 'm', 'mark'
|
128
|
+
self.emit(type, :mark_any, text, ts-1, te)
|
129
|
+
when 'mn', 'nonspacingmark'
|
130
|
+
self.emit(type, :mark_nonspacing, text, ts-1, te)
|
131
|
+
when 'mc', 'spacingmark'
|
132
|
+
self.emit(type, :mark_spacing, text, ts-1, te)
|
133
|
+
when 'me', 'enclosingmark'
|
134
|
+
self.emit(type, :mark_enclosing, text, ts-1, te)
|
135
|
+
|
136
|
+
# Numbers
|
137
|
+
when 'n', 'number'
|
138
|
+
self.emit(type, :number_any, text, ts-1, te)
|
139
|
+
when 'nd', 'decimalnumber'
|
140
|
+
self.emit(type, :number_decimal, text, ts-1, te)
|
141
|
+
when 'nl', 'letternumber'
|
142
|
+
self.emit(type, :number_letter, text, ts-1, te)
|
143
|
+
when 'no', 'othernumber'
|
144
|
+
self.emit(type, :number_other, text, ts-1, te)
|
145
|
+
|
146
|
+
# Punctuation
|
147
|
+
when 'p', 'punctuation'
|
148
|
+
self.emit(type, :punct_any, text, ts-1, te)
|
149
|
+
when 'pc', 'connectorpunctuation'
|
150
|
+
self.emit(type, :punct_connector, text, ts-1, te)
|
151
|
+
when 'pd', 'dashpunctuation'
|
152
|
+
self.emit(type, :punct_dash, text, ts-1, te)
|
153
|
+
when 'ps', 'openpunctuation'
|
154
|
+
self.emit(type, :punct_open, text, ts-1, te)
|
155
|
+
when 'pe', 'closepunctuation'
|
156
|
+
self.emit(type, :punct_close, text, ts-1, te)
|
157
|
+
when 'pi', 'initialpunctuation'
|
158
|
+
self.emit(type, :punct_initial, text, ts-1, te)
|
159
|
+
when 'pf', 'finalpunctuation'
|
160
|
+
self.emit(type, :punct_final, text, ts-1, te)
|
161
|
+
when 'po', 'otherpunctuation'
|
162
|
+
self.emit(type, :punct_other, text, ts-1, te)
|
163
|
+
|
164
|
+
# Symbols
|
165
|
+
when 's', 'symbol'
|
166
|
+
self.emit(type, :symbol_any, text, ts-1, te)
|
167
|
+
when 'sm', 'mathsymbol'
|
168
|
+
self.emit(type, :symbol_math, text, ts-1, te)
|
169
|
+
when 'sc', 'currencysymbol'
|
170
|
+
self.emit(type, :symbol_currency, text, ts-1, te)
|
171
|
+
when 'sk', 'modifiersymbol'
|
172
|
+
self.emit(type, :symbol_modifier, text, ts-1, te)
|
173
|
+
when 'so', 'othersymbol'
|
174
|
+
self.emit(type, :symbol_other, text, ts-1, te)
|
175
|
+
|
176
|
+
# Separators
|
177
|
+
when 'z', 'separator'
|
178
|
+
self.emit(type, :separator_any, text, ts-1, te)
|
179
|
+
when 'zs', 'spaceseparator'
|
180
|
+
self.emit(type, :separator_space, text, ts-1, te)
|
181
|
+
when 'zl', 'lineseparator'
|
182
|
+
self.emit(type, :separator_line, text, ts-1, te)
|
183
|
+
when 'zp', 'paragraphseparator'
|
184
|
+
self.emit(type, :separator_para, text, ts-1, te)
|
185
|
+
|
186
|
+
# Codepoints
|
187
|
+
when 'c', 'other'
|
188
|
+
self.emit(type, :other, text, ts-1, te)
|
189
|
+
when 'cc', 'control'
|
190
|
+
self.emit(type, :control, text, ts-1, te)
|
191
|
+
when 'cf', 'format'
|
192
|
+
self.emit(type, :format, text, ts-1, te)
|
193
|
+
when 'cs', 'surrogate'
|
194
|
+
self.emit(type, :surrogate, text, ts-1, te)
|
195
|
+
when 'co', 'privateuse'
|
196
|
+
self.emit(type, :private_use, text, ts-1, te)
|
197
|
+
when 'cn', 'unassigned'
|
198
|
+
self.emit(type, :unassigned, text, ts-1, te)
|
199
|
+
|
200
|
+
# Age
|
201
|
+
when 'age=1.1'
|
202
|
+
self.emit(type, :age_1_1, text, ts-1, te)
|
203
|
+
when 'age=2.0'
|
204
|
+
self.emit(type, :age_2_0, text, ts-1, te)
|
205
|
+
when 'age=2.1'
|
206
|
+
self.emit(type, :age_2_1, text, ts-1, te)
|
207
|
+
when 'age=3.0'
|
208
|
+
self.emit(type, :age_3_0, text, ts-1, te)
|
209
|
+
when 'age=3.1'
|
210
|
+
self.emit(type, :age_3_1, text, ts-1, te)
|
211
|
+
when 'age=3.2'
|
212
|
+
self.emit(type, :age_3_2, text, ts-1, te)
|
213
|
+
when 'age=4.0'
|
214
|
+
self.emit(type, :age_4_0, text, ts-1, te)
|
215
|
+
when 'age=4.1'
|
216
|
+
self.emit(type, :age_4_1, text, ts-1, te)
|
217
|
+
when 'age=5.0'
|
218
|
+
self.emit(type, :age_5_0, text, ts-1, te)
|
219
|
+
when 'age=5.1'
|
220
|
+
self.emit(type, :age_5_1, text, ts-1, te)
|
221
|
+
when 'age=5.2'
|
222
|
+
self.emit(type, :age_5_2, text, ts-1, te)
|
223
|
+
when 'age=6.0'
|
224
|
+
self.emit(type, :age_6_0, text, ts-1, te)
|
225
|
+
|
226
|
+
# Derived Properties
|
227
|
+
when 'ahex', 'asciihexdigit'
|
228
|
+
self.emit(type, :ascii_hex, text, ts-1, te)
|
229
|
+
when 'alphabetic'
|
230
|
+
self.emit(type, :alphabetic, text, ts-1, te)
|
231
|
+
when 'cased'
|
232
|
+
self.emit(type, :cased, text, ts-1, te)
|
233
|
+
when 'cwcf', 'changeswhencasefolded'
|
234
|
+
self.emit(type, :changes_when_casefolded, text, ts-1, te)
|
235
|
+
when 'cwcm', 'changeswhencasemapped'
|
236
|
+
self.emit(type, :changes_when_casemapped, text, ts-1, te)
|
237
|
+
when 'cwl', 'changeswhenlowercased'
|
238
|
+
self.emit(type, :changes_when_lowercased, text, ts-1, te)
|
239
|
+
when 'cwt', 'changeswhentitlecased'
|
240
|
+
self.emit(type, :changes_when_titlecased, text, ts-1, te)
|
241
|
+
when 'cwu', 'changeswhenuppercased'
|
242
|
+
self.emit(type, :changes_when_uppercased, text, ts-1, te)
|
243
|
+
when 'ci', 'caseignorable'
|
244
|
+
self.emit(type, :case_ignorable, text, ts-1, te)
|
245
|
+
when 'bidic', 'bidicontrol'
|
246
|
+
self.emit(type, :bidi_control, text, ts-1, te)
|
247
|
+
when 'dash'
|
248
|
+
self.emit(type, :dash, text, ts-1, te)
|
249
|
+
when 'dep', 'deprecated'
|
250
|
+
self.emit(type, :deprecated, text, ts-1, te)
|
251
|
+
when 'di', 'defaultignorablecodepoint'
|
252
|
+
self.emit(type, :default_ignorable_cp, text, ts-1, te)
|
253
|
+
when 'dia', 'diacritic'
|
254
|
+
self.emit(type, :diacritic, text, ts-1, te)
|
255
|
+
when 'ext', 'extender'
|
256
|
+
self.emit(type, :extender, text, ts-1, te)
|
257
|
+
when 'grbase', 'graphemebase'
|
258
|
+
self.emit(type, :grapheme_base, text, ts-1, te)
|
259
|
+
when 'grext', 'graphemeextend'
|
260
|
+
self.emit(type, :grapheme_extend, text, ts-1, te)
|
261
|
+
when 'grlink', 'graphemelink' # NOTE: deprecated as of Unicode 5.0
|
262
|
+
self.emit(type, :grapheme_link, text, ts-1, te)
|
263
|
+
when 'hex', 'hexdigit'
|
264
|
+
self.emit(type, :hex_digit, text, ts-1, te)
|
265
|
+
when 'hyphen' # NOTE: deprecated as of Unicode 6.0
|
266
|
+
self.emit(type, :hyphen, text, ts-1, te)
|
267
|
+
when 'idc', 'idcontinue'
|
268
|
+
self.emit(type, :id_continue, text, ts-1, te)
|
269
|
+
when 'ideo', 'ideographic'
|
270
|
+
self.emit(type, :ideographic, text, ts-1, te)
|
271
|
+
when 'ids', 'idstart'
|
272
|
+
self.emit(type, :id_start, text, ts-1, te)
|
273
|
+
when 'idsb', 'idsbinaryoperator'
|
274
|
+
self.emit(type, :ids_binary_op, text, ts-1, te)
|
275
|
+
when 'idst', 'idstrinaryoperator'
|
276
|
+
self.emit(type, :ids_trinary_op, text, ts-1, te)
|
277
|
+
when 'joinc', 'joincontrol'
|
278
|
+
self.emit(type, :join_control, text, ts-1, te)
|
279
|
+
when 'loe', 'logicalorderexception'
|
280
|
+
self.emit(type, :logical_order_exception, text, ts-1, te)
|
281
|
+
when 'lowercase'
|
282
|
+
self.emit(type, :lowercase, text, ts-1, te)
|
283
|
+
when 'math'
|
284
|
+
self.emit(type, :math, text, ts-1, te)
|
285
|
+
when 'nchar', 'noncharactercodepoint'
|
286
|
+
self.emit(type, :non_character_cp, text, ts-1, te)
|
287
|
+
when 'oalpha', 'otheralphabetic'
|
288
|
+
self.emit(type, :other_alphabetic, text, ts-1, te)
|
289
|
+
when 'odi', 'otherdefaultignorablecodepoint'
|
290
|
+
self.emit(type, :other_default_ignorable_cp, text, ts-1, te)
|
291
|
+
when 'ogrext', 'othergraphemeextend'
|
292
|
+
self.emit(type, :other_grapheme_extended, text, ts-1, te)
|
293
|
+
when 'oidc', 'otheridcontinue'
|
294
|
+
self.emit(type, :other_id_continue, text, ts-1, te)
|
295
|
+
when 'oids', 'otheridstart'
|
296
|
+
self.emit(type, :other_id_start, text, ts-1, te)
|
297
|
+
when 'olower', 'otherlowercase'
|
298
|
+
self.emit(type, :other_lowercase, text, ts-1, te)
|
299
|
+
when 'omath', 'othermath'
|
300
|
+
self.emit(type, :other_math, text, ts-1, te)
|
301
|
+
when 'oupper', 'otheruppercase'
|
302
|
+
self.emit(type, :other_uppercase, text, ts-1, te)
|
303
|
+
when 'patsyn', 'patternsyntax'
|
304
|
+
self.emit(type, :pattern_syntax, text, ts-1, te)
|
305
|
+
when 'patws', 'patternwhitespace'
|
306
|
+
self.emit(type, :pattern_whitespace, text, ts-1, te)
|
307
|
+
when 'qmark', 'quotationmark'
|
308
|
+
self.emit(type, :quotation_mark, text, ts-1, te)
|
309
|
+
when 'radical'
|
310
|
+
self.emit(type, :radical, text, ts-1, te)
|
311
|
+
when 'sd', 'softdotted'
|
312
|
+
self.emit(type, :soft_dotted, text, ts-1, te)
|
313
|
+
when 'sterm'
|
314
|
+
self.emit(type, :sentence_terminal, text, ts-1, te)
|
315
|
+
when 'term', 'terminalpunctuation'
|
316
|
+
self.emit(type, :terminal_punctuation, text, ts-1, te)
|
317
|
+
when 'uideo', 'unifiedideograph'
|
318
|
+
self.emit(type, :unified_ideograph, text, ts-1, te)
|
319
|
+
when 'uppercase'
|
320
|
+
self.emit(type, :uppercase, text, ts-1, te)
|
321
|
+
when 'vs', 'variationselector'
|
322
|
+
self.emit(type, :variation_selector, text, ts-1, te)
|
323
|
+
when 'wspace', 'whitespace'
|
324
|
+
self.emit(type, :whitespace, text, ts-1, te)
|
325
|
+
when 'xids', 'xidstart'
|
326
|
+
self.emit(type, :xid_start, text, ts-1, te)
|
327
|
+
when 'xidc', 'xidcontinue'
|
328
|
+
self.emit(type, :xid_continue, text, ts-1, te)
|
329
|
+
|
330
|
+
|
331
|
+
# Scripts
|
332
|
+
when 'arab', 'arabic'
|
333
|
+
self.emit(type, :script_arabic, text, ts-1, te)
|
334
|
+
when 'armi', 'imperialaramaic'
|
335
|
+
self.emit(type, :script_imperial_aramaic, text, ts-1, te)
|
336
|
+
when 'armn', 'armenian'
|
337
|
+
self.emit(type, :script_armenian, text, ts-1, te)
|
338
|
+
when 'avst', 'avestan'
|
339
|
+
self.emit(type, :script_avestan, text, ts-1, te)
|
340
|
+
when 'bali', 'balinese'
|
341
|
+
self.emit(type, :script_balinese, text, ts-1, te)
|
342
|
+
when 'bamu', 'bamum'
|
343
|
+
self.emit(type, :script_bamum, text, ts-1, te)
|
344
|
+
when 'batk', 'batak'
|
345
|
+
self.emit(type, :script_batak, text, ts-1, te)
|
346
|
+
when 'beng', 'bengali'
|
347
|
+
self.emit(type, :script_bengali, text, ts-1, te)
|
348
|
+
when 'bopo', 'bopomofo'
|
349
|
+
self.emit(type, :script_bopomofo, text, ts-1, te)
|
350
|
+
when 'brah', 'brahmi'
|
351
|
+
self.emit(type, :script_brahmi, text, ts-1, te)
|
352
|
+
when 'brai', 'braille'
|
353
|
+
self.emit(type, :script_braille, text, ts-1, te)
|
354
|
+
when 'bugi', 'buginese'
|
355
|
+
self.emit(type, :script_buginese, text, ts-1, te)
|
356
|
+
when 'buhd', 'buhid'
|
357
|
+
self.emit(type, :script_buhid, text, ts-1, te)
|
358
|
+
when 'cans', 'canadianaboriginal'
|
359
|
+
self.emit(type, :script_canadian_aboriginal, text, ts-1, te)
|
360
|
+
when 'cari', 'carian'
|
361
|
+
self.emit(type, :script_carian, text, ts-1, te)
|
362
|
+
when 'cham'
|
363
|
+
self.emit(type, :script_cham, text, ts-1, te)
|
364
|
+
when 'cher', 'cherokee'
|
365
|
+
self.emit(type, :script_cherokee, text, ts-1, te)
|
366
|
+
when 'copt', 'coptic', 'qaac'
|
367
|
+
self.emit(type, :script_coptic, text, ts-1, te)
|
368
|
+
when 'cprt', 'cypriot'
|
369
|
+
self.emit(type, :script_cypriot, text, ts-1, te)
|
370
|
+
when 'cyrl', 'cyrillic'
|
371
|
+
self.emit(type, :script_cyrillic, text, ts-1, te)
|
372
|
+
when 'deva', 'devanagari'
|
373
|
+
self.emit(type, :script_devanagari, text, ts-1, te)
|
374
|
+
when 'dsrt', 'deseret'
|
375
|
+
self.emit(type, :script_deseret, text, ts-1, te)
|
376
|
+
when 'egyp', 'egyptianhieroglyphs'
|
377
|
+
self.emit(type, :script_egyptian_hieroglyphs, text, ts-1, te)
|
378
|
+
when 'ethi', 'ethiopic'
|
379
|
+
self.emit(type, :script_ethiopic, text, ts-1, te)
|
380
|
+
when 'geor', 'georgian'
|
381
|
+
self.emit(type, :script_georgian, text, ts-1, te)
|
382
|
+
when 'glag', 'glagolitic'
|
383
|
+
self.emit(type, :script_glagolitic, text, ts-1, te)
|
384
|
+
when 'goth', 'gothic'
|
385
|
+
self.emit(type, :script_gothic, text, ts-1, te)
|
386
|
+
when 'grek', 'greek'
|
387
|
+
self.emit(type, :script_greek, text, ts-1, te)
|
388
|
+
when 'gujr', 'gujarati'
|
389
|
+
self.emit(type, :script_gujarati, text, ts-1, te)
|
390
|
+
when 'guru', 'gurmukhi'
|
391
|
+
self.emit(type, :script_gurmukhi, text, ts-1, te)
|
392
|
+
when 'hang', 'hangul'
|
393
|
+
self.emit(type, :script_hangul, text, ts-1, te)
|
394
|
+
when 'hani', 'han'
|
395
|
+
self.emit(type, :script_han, text, ts-1, te)
|
396
|
+
when 'hano', 'hanunoo'
|
397
|
+
self.emit(type, :script_hanunoo, text, ts-1, te)
|
398
|
+
when 'hebr', 'hebrew'
|
399
|
+
self.emit(type, :script_hebrew, text, ts-1, te)
|
400
|
+
when 'hira', 'hiragana'
|
401
|
+
self.emit(type, :script_hiragana, text, ts-1, te)
|
402
|
+
when 'hrkt', 'katakanaorhiragana'
|
403
|
+
self.emit(type, :script_katakana_or_hiragana, text, ts-1, te)
|
404
|
+
when 'ital', 'olditalic'
|
405
|
+
self.emit(type, :script_old_italic, text, ts-1, te)
|
406
|
+
when 'java', 'javanese'
|
407
|
+
self.emit(type, :script_javanese, text, ts-1, te)
|
408
|
+
when 'kali', 'kayahli'
|
409
|
+
self.emit(type, :script_kayah_li, text, ts-1, te)
|
410
|
+
when 'kana', 'katakana'
|
411
|
+
self.emit(type, :script_katakana, text, ts-1, te)
|
412
|
+
when 'khar', 'kharoshthi'
|
413
|
+
self.emit(type, :script_kharoshthi, text, ts-1, te)
|
414
|
+
when 'khmr', 'khmer'
|
415
|
+
self.emit(type, :script_khmer, text, ts-1, te)
|
416
|
+
when 'knda', 'kannada'
|
417
|
+
self.emit(type, :script_kannada, text, ts-1, te)
|
418
|
+
when 'kthi', 'kaithi'
|
419
|
+
self.emit(type, :script_kaithi, text, ts-1, te)
|
420
|
+
when 'lana', 'taitham'
|
421
|
+
self.emit(type, :script_tai_tham, text, ts-1, te)
|
422
|
+
when 'laoo', 'lao'
|
423
|
+
self.emit(type, :script_lao, text, ts-1, te)
|
424
|
+
when 'latn', 'latin'
|
425
|
+
self.emit(type, :script_latin, text, ts-1, te)
|
426
|
+
when 'lepc', 'lepcha'
|
427
|
+
self.emit(type, :script_lepcha, text, ts-1, te)
|
428
|
+
when 'limb', 'limbu'
|
429
|
+
self.emit(type, :script_limbu, text, ts-1, te)
|
430
|
+
when 'linb', 'linearb'
|
431
|
+
self.emit(type, :script_linear_b, text, ts-1, te)
|
432
|
+
when 'lisu'
|
433
|
+
self.emit(type, :script_lisu, text, ts-1, te)
|
434
|
+
when 'lyci', 'lycian'
|
435
|
+
self.emit(type, :script_lycian, text, ts-1, te)
|
436
|
+
when 'lydi', 'lydian'
|
437
|
+
self.emit(type, :script_lydian, text, ts-1, te)
|
438
|
+
when 'mlym', 'malayalam'
|
439
|
+
self.emit(type, :script_malayalam, text, ts-1, te)
|
440
|
+
when 'mand', 'mandaic'
|
441
|
+
self.emit(type, :script_mandaic, text, ts-1, te)
|
442
|
+
when 'mong', 'mongolian'
|
443
|
+
self.emit(type, :script_mongolian, text, ts-1, te)
|
444
|
+
when 'mtei', 'meeteimayek'
|
445
|
+
self.emit(type, :script_meetei_mayek, text, ts-1, te)
|
446
|
+
when 'mymr', 'myanmar'
|
447
|
+
self.emit(type, :script_myanmar, text, ts-1, te)
|
448
|
+
when 'nkoo', 'nko'
|
449
|
+
self.emit(type, :script_nko, text, ts-1, te)
|
450
|
+
when 'ogam', 'ogham'
|
451
|
+
self.emit(type, :script_ogham, text, ts-1, te)
|
452
|
+
when 'olck', 'olchiki'
|
453
|
+
self.emit(type, :script_ol_chiki, text, ts-1, te)
|
454
|
+
when 'orkh', 'oldturkic'
|
455
|
+
self.emit(type, :script_old_turkic, text, ts-1, te)
|
456
|
+
when 'orya', 'oriya'
|
457
|
+
self.emit(type, :script_oriya, text, ts-1, te)
|
458
|
+
when 'osma', 'osmanya'
|
459
|
+
self.emit(type, :script_osmanya, text, ts-1, te)
|
460
|
+
when 'phag', 'phagspa'
|
461
|
+
self.emit(type, :script_phags_pa, text, ts-1, te)
|
462
|
+
when 'phli', 'inscriptionalpahlavi'
|
463
|
+
self.emit(type, :script_inscriptional_pahlavi, text, ts-1, te)
|
464
|
+
when 'phnx', 'phoenician'
|
465
|
+
self.emit(type, :script_phoenician, text, ts-1, te)
|
466
|
+
when 'prti', 'inscriptionalparthian'
|
467
|
+
self.emit(type, :script_inscriptional_parthian, text, ts-1, te)
|
468
|
+
when 'rjng', 'rejang'
|
469
|
+
self.emit(type, :script_rejang, text, ts-1, te)
|
470
|
+
when 'runr', 'runic'
|
471
|
+
self.emit(type, :script_runic, text, ts-1, te)
|
472
|
+
when 'samr', 'samaritan'
|
473
|
+
self.emit(type, :script_samaritan, text, ts-1, te)
|
474
|
+
when 'sarb', 'oldsoutharabian'
|
475
|
+
self.emit(type, :script_old_south_arabian, text, ts-1, te)
|
476
|
+
when 'saur', 'saurashtra'
|
477
|
+
self.emit(type, :script_saurashtra, text, ts-1, te)
|
478
|
+
when 'shaw', 'shavian'
|
479
|
+
self.emit(type, :script_shavian, text, ts-1, te)
|
480
|
+
when 'sinh', 'sinhala'
|
481
|
+
self.emit(type, :script_sinhala, text, ts-1, te)
|
482
|
+
when 'sund', 'sundanese'
|
483
|
+
self.emit(type, :script_sundanese, text, ts-1, te)
|
484
|
+
when 'sylo', 'sylotinagri'
|
485
|
+
self.emit(type, :script_syloti_nagri, text, ts-1, te)
|
486
|
+
when 'syrc', 'syriac'
|
487
|
+
self.emit(type, :script_syriac, text, ts-1, te)
|
488
|
+
when 'tagb', 'tagbanwa'
|
489
|
+
self.emit(type, :script_tagbanwa, text, ts-1, te)
|
490
|
+
when 'tale', 'taile'
|
491
|
+
self.emit(type, :script_tai_le, text, ts-1, te)
|
492
|
+
when 'talu', 'newtailue'
|
493
|
+
self.emit(type, :script_new_tai_lue, text, ts-1, te)
|
494
|
+
when 'taml', 'tamil'
|
495
|
+
self.emit(type, :script_tamil, text, ts-1, te)
|
496
|
+
when 'tavt', 'taiviet'
|
497
|
+
self.emit(type, :script_tai_viet, text, ts-1, te)
|
498
|
+
when 'telu', 'telugu'
|
499
|
+
self.emit(type, :script_telugu, text, ts-1, te)
|
500
|
+
when 'tfng', 'tifinagh'
|
501
|
+
self.emit(type, :script_tifinagh, text, ts-1, te)
|
502
|
+
when 'tglg', 'tagalog'
|
503
|
+
self.emit(type, :script_tagalog, text, ts-1, te)
|
504
|
+
when 'thaa', 'thaana'
|
505
|
+
self.emit(type, :script_thaana, text, ts-1, te)
|
506
|
+
when 'thai'
|
507
|
+
self.emit(type, :script_thai, text, ts-1, te)
|
508
|
+
when 'tibt', 'tibetan'
|
509
|
+
self.emit(type, :script_tibetan, text, ts-1, te)
|
510
|
+
when 'ugar', 'ugaritic'
|
511
|
+
self.emit(type, :script_ugaritic, text, ts-1, te)
|
512
|
+
when 'vaii', 'vai'
|
513
|
+
self.emit(type, :script_vai, text, ts-1, te)
|
514
|
+
when 'xpeo', 'oldpersian'
|
515
|
+
self.emit(type, :script_old_persian, text, ts-1, te)
|
516
|
+
when 'xsux', 'cuneiform'
|
517
|
+
self.emit(type, :script_cuneiform, text, ts-1, te)
|
518
|
+
when 'yiii', 'yi'
|
519
|
+
self.emit(type, :script_yi, text, ts-1, te)
|
520
|
+
when 'zinh', 'inherited', 'qaai'
|
521
|
+
self.emit(type, :script_inherited, text, ts-1, te)
|
522
|
+
when 'zyyy', 'common'
|
523
|
+
self.emit(type, :script_common, text, ts-1, te)
|
524
|
+
when 'zzzz', 'unknown'
|
525
|
+
self.emit(type, :script_unknown, text, ts-1, te)
|
526
|
+
|
527
|
+
else
|
528
|
+
raise UnknownUnicodePropertyError.new(name)
|
529
|
+
|
530
|
+
end
|
531
|
+
fret;
|
532
|
+
};
|
533
|
+
*|;
|
534
|
+
}%%
|