unicode_utils 1.1.2 → 1.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES.txt +14 -0
- data/README.txt +1 -2
- data/cdata/east_asian_width_property_per_cp +1 -0
- data/cdata/east_asian_width_property_ranges +1 -0
- data/cdata/general_category_aliases +38 -0
- data/cdata/general_category_per_cp +1 -0
- data/cdata/general_category_ranges +1 -0
- data/cdata/prop_set_default_ignorable +1 -0
- data/lib/unicode_utils.rb +9 -0
- data/lib/unicode_utils/char_display_width.rb +31 -0
- data/lib/unicode_utils/char_type.rb +32 -0
- data/lib/unicode_utils/debug.rb +121 -0
- data/lib/unicode_utils/default_ignorable_char_q.rb +22 -0
- data/lib/unicode_utils/display_width.rb +52 -0
- data/lib/unicode_utils/east_asian_width.rb +27 -0
- data/lib/unicode_utils/gc.rb +42 -0
- data/lib/unicode_utils/general_category.rb +27 -0
- data/lib/unicode_utils/graphic_char_q.rb +31 -0
- data/lib/unicode_utils/read_cdata.rb +95 -0
- data/lib/unicode_utils/version.rb +1 -1
- data/test/test_unicode_utils.rb +213 -0
- metadata +18 -3
@@ -0,0 +1,31 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require "unicode_utils/gc"
|
4
|
+
|
5
|
+
module UnicodeUtils
|
6
|
+
|
7
|
+
GENERAL_CATEGORY_IS_GRAPHIC_MAP = {
|
8
|
+
Lu: true, Ll: true, Lt: true, Lm: true, Lo: true,
|
9
|
+
Mn: true, Mc: true, Me: true,
|
10
|
+
Nd: true, Nl: true, No: true,
|
11
|
+
Pc: true, Pd: true, Ps: true, Pe: true, Pi: true, Pf: true, Po: true,
|
12
|
+
Sm: true, Sc: true, Sk: true, So: true,
|
13
|
+
Zs: true, Zl: false, Zp: false,
|
14
|
+
Cc: false, Cf: false, Cs: false, Co: false, Cn: false
|
15
|
+
} # :nodoc:
|
16
|
+
|
17
|
+
# Returns true if the given char is a graphic char, false otherwise.
|
18
|
+
# See table 2-3 in section 2.4 of Unicode 6.0.0.
|
19
|
+
#
|
20
|
+
# Examples:
|
21
|
+
#
|
22
|
+
# require "unicode_utils/graphic_char_q"
|
23
|
+
# UnicodeUtils.graphic_char?("a") # => true
|
24
|
+
# UnicodeUtils.graphic_char?("\n") # => false
|
25
|
+
# UnicodeUtils.graphic_char?(0x0) # => false
|
26
|
+
def graphic_char?(char)
|
27
|
+
GENERAL_CATEGORY_IS_GRAPHIC_MAP[UnicodeUtils.gc(char)]
|
28
|
+
end
|
29
|
+
module_function :graphic_char?
|
30
|
+
|
31
|
+
end
|
@@ -8,6 +8,14 @@ module UnicodeUtils
|
|
8
8
|
|
9
9
|
module Impl # :nodoc:
|
10
10
|
|
11
|
+
EAST_ASIAN_WIDTH_SYMBOL_MAP = {
|
12
|
+
1 => :Ambiguous,
|
13
|
+
2 => :Halfwidth,
|
14
|
+
3 => :Wide,
|
15
|
+
4 => :Fullwidth,
|
16
|
+
5 => :Narrow
|
17
|
+
}.freeze
|
18
|
+
|
11
19
|
def self.open_cdata_file(filename, &block)
|
12
20
|
File.open(File.join(CDATA_DIR, filename), "r:US-ASCII:-", &block)
|
13
21
|
end
|
@@ -113,6 +121,93 @@ module UnicodeUtils
|
|
113
121
|
}
|
114
122
|
end
|
115
123
|
|
124
|
+
# Returns a list (array) of pairs (two element Arrays) of Range
|
125
|
+
# (codepoints) and associated integer value.
|
126
|
+
def self.read_range_to_hexdigit_list(filename)
|
127
|
+
Array.new.tap { |list|
|
128
|
+
open_cdata_file(filename) do |input|
|
129
|
+
cp_buffer = "x" * 6
|
130
|
+
cp_buffer.force_encoding(Encoding::US_ASCII)
|
131
|
+
val_buffer = "x"
|
132
|
+
val_buffer.force_encoding(Encoding::US_ASCII)
|
133
|
+
while input.read(6, cp_buffer)
|
134
|
+
list << [
|
135
|
+
Range.new(cp_buffer.to_i(16), input.read(6, cp_buffer).to_i(16)),
|
136
|
+
input.read(1, val_buffer).to_i(16)
|
137
|
+
]
|
138
|
+
end
|
139
|
+
end
|
140
|
+
}
|
141
|
+
end
|
142
|
+
|
143
|
+
def self.read_east_asian_width_per_cp(filename)
|
144
|
+
# like read_hexdigit_map, but with translation to symbol values
|
145
|
+
Hash.new(:Neutral).tap { |map|
|
146
|
+
open_cdata_file(filename) do |input|
|
147
|
+
buffer = "x" * 6
|
148
|
+
buffer.force_encoding(Encoding::US_ASCII)
|
149
|
+
val_buffer = "x"
|
150
|
+
val_buffer.force_encoding(Encoding::US_ASCII)
|
151
|
+
while input.read(6, buffer)
|
152
|
+
map[buffer.to_i(16)] =
|
153
|
+
EAST_ASIAN_WIDTH_SYMBOL_MAP[input.read(1, val_buffer).to_i(16)]
|
154
|
+
end
|
155
|
+
end
|
156
|
+
}
|
157
|
+
end
|
158
|
+
|
159
|
+
def self.read_east_asian_width_ranges(filename)
|
160
|
+
read_range_to_hexdigit_list(filename).tap { |list|
|
161
|
+
list.each { |pair|
|
162
|
+
pair[1] = EAST_ASIAN_WIDTH_SYMBOL_MAP[pair[1]]
|
163
|
+
}
|
164
|
+
}
|
165
|
+
end
|
166
|
+
|
167
|
+
def self.read_general_category_per_cp(filename)
|
168
|
+
Hash.new.tap { |map|
|
169
|
+
open_cdata_file(filename) do |input|
|
170
|
+
cp_buffer = "x" * 6
|
171
|
+
cp_buffer.force_encoding(Encoding::US_ASCII)
|
172
|
+
cat_buffer = "x" * 2
|
173
|
+
cat_buffer.force_encoding(Encoding::US_ASCII)
|
174
|
+
while input.read(6, cp_buffer)
|
175
|
+
map[cp_buffer.to_i(16)] = input.read(2, cat_buffer).to_sym
|
176
|
+
end
|
177
|
+
end
|
178
|
+
}
|
179
|
+
end
|
180
|
+
|
181
|
+
def self.read_general_category_ranges(filename)
|
182
|
+
Array.new.tap { |list|
|
183
|
+
open_cdata_file(filename) do |input|
|
184
|
+
cp_buffer = "x" * 6
|
185
|
+
cp_buffer.force_encoding(Encoding::US_ASCII)
|
186
|
+
cat_buffer = "x" * 2
|
187
|
+
cat_buffer.force_encoding(Encoding::US_ASCII)
|
188
|
+
while input.read(6, cp_buffer)
|
189
|
+
list << [
|
190
|
+
Range.new(cp_buffer.to_i(16), input.read(6, cp_buffer).to_i(16)),
|
191
|
+
input.read(2, cat_buffer).to_sym
|
192
|
+
]
|
193
|
+
end
|
194
|
+
end
|
195
|
+
}
|
196
|
+
end
|
197
|
+
|
198
|
+
def self.read_symbol_map(filename)
|
199
|
+
Hash.new.tap { |map|
|
200
|
+
open_cdata_file(filename) do |input|
|
201
|
+
input.each_line { |line|
|
202
|
+
parts = line.split(";")
|
203
|
+
parts[0].strip!
|
204
|
+
parts[1].strip!
|
205
|
+
map[parts[0].to_sym] = parts[1].to_sym
|
206
|
+
}
|
207
|
+
end
|
208
|
+
}
|
209
|
+
end
|
210
|
+
|
116
211
|
end
|
117
212
|
|
118
213
|
end
|
data/test/test_unicode_utils.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
require "test/unit"
|
4
|
+
require "stringio"
|
4
5
|
|
5
6
|
require "unicode_utils"
|
6
7
|
|
@@ -223,4 +224,216 @@ class TestUnicodeUtils < Test::Unit::TestCase
|
|
223
224
|
UnicodeUtils.titlecase("i can has 1kg CHEESBURGER", :tr)
|
224
225
|
end
|
225
226
|
|
227
|
+
def test_east_asian_width
|
228
|
+
assert_equal :Neutral, UnicodeUtils.east_asian_width("\u{0}")
|
229
|
+
assert_equal :Neutral, UnicodeUtils.east_asian_width("\u{10FFFF}")
|
230
|
+
assert_equal :Neutral, UnicodeUtils.east_asian_width("\u{C5}")
|
231
|
+
assert_equal :Ambiguous, UnicodeUtils.east_asian_width(0xA1)
|
232
|
+
assert_equal :Ambiguous, UnicodeUtils.east_asian_width(0xE000)
|
233
|
+
assert_equal :Ambiguous, UnicodeUtils.east_asian_width(0xF8FF)
|
234
|
+
assert_equal :Ambiguous, UnicodeUtils.east_asian_width(0xFE05)
|
235
|
+
assert_equal :Ambiguous, UnicodeUtils.east_asian_width("ß")
|
236
|
+
assert_equal :Halfwidth, UnicodeUtils.east_asian_width(0xFFA5)
|
237
|
+
assert_equal :Halfwidth, UnicodeUtils.east_asian_width(0xFFEE)
|
238
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0xB116)
|
239
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0x11A5)
|
240
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0x1100)
|
241
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0x2E94)
|
242
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0x3400)
|
243
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0x4E05)
|
244
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0x4FCD)
|
245
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0xFA2E)
|
246
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0x3FFFD)
|
247
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0x4DB6)
|
248
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0x4DBF)
|
249
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0x9FCC)
|
250
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0x9FFF)
|
251
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0xFA2E)
|
252
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0xFA2F)
|
253
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0xFA6E)
|
254
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0xFA6F)
|
255
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0xFADA)
|
256
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0xFAFF)
|
257
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0x2A6D7)
|
258
|
+
assert_equal :Wide, UnicodeUtils.east_asian_width(0x2A6FF)
|
259
|
+
assert_equal :Fullwidth, UnicodeUtils.east_asian_width(0x3000)
|
260
|
+
assert_equal :Fullwidth, UnicodeUtils.east_asian_width(0xFF0D)
|
261
|
+
assert_equal :Fullwidth, UnicodeUtils.east_asian_width(0xFFE6)
|
262
|
+
assert_equal :Narrow, UnicodeUtils.east_asian_width(0x20)
|
263
|
+
assert_equal :Narrow, UnicodeUtils.east_asian_width(0x2C)
|
264
|
+
assert_equal :Narrow, UnicodeUtils.east_asian_width(0x7E)
|
265
|
+
assert_equal :Narrow, UnicodeUtils.east_asian_width(0xA6)
|
266
|
+
assert_equal :Narrow, UnicodeUtils.east_asian_width(0x2986)
|
267
|
+
end
|
268
|
+
|
269
|
+
def test_display_width
|
270
|
+
assert_equal 0, UnicodeUtils.display_width("")
|
271
|
+
assert_equal 18, UnicodeUtils.display_width("別れる時に発する語")
|
272
|
+
assert_equal 18, UnicodeUtils.display_width("123456789aBcDeFgHi")
|
273
|
+
assert_equal 6, UnicodeUtils.display_width("Straße")
|
274
|
+
assert_equal 1, UnicodeUtils.display_width("a\u{308}")
|
275
|
+
assert_equal 5, UnicodeUtils.display_width("Now!")
|
276
|
+
assert_equal 2, UnicodeUtils.display_width("a̦b") # General Category Mn
|
277
|
+
assert_equal 4, UnicodeUtils.display_width("a\u{93b}bc") # General Category Mc
|
278
|
+
assert_equal 3, UnicodeUtils.display_width("ab\u{20dd}c") # General Category Me
|
279
|
+
assert_equal 4, UnicodeUtils.display_width("ab\u{a8}c") # General Category Sk
|
280
|
+
assert_equal 4, UnicodeUtils.display_width("ab\u{2000}c") # General Category Zs
|
281
|
+
assert_equal 3, UnicodeUtils.display_width("a b") # Zs
|
282
|
+
assert_equal 3, UnicodeUtils.display_width("a\u{1680}b") # Zs
|
283
|
+
end
|
284
|
+
|
285
|
+
def test_char_display_width
|
286
|
+
assert_equal 2, UnicodeUtils.char_display_width("別")
|
287
|
+
assert_equal 0, UnicodeUtils.char_display_width(0x308)
|
288
|
+
assert_equal 1, UnicodeUtils.char_display_width("a")
|
289
|
+
end
|
290
|
+
|
291
|
+
def test_default_ignorable_char?
|
292
|
+
assert_equal true, UnicodeUtils.default_ignorable_char?(0xad)
|
293
|
+
assert_equal true, UnicodeUtils.default_ignorable_char?(0x34f)
|
294
|
+
assert_equal true, UnicodeUtils.default_ignorable_char?(0x115f)
|
295
|
+
assert_equal true, UnicodeUtils.default_ignorable_char?(0x1160)
|
296
|
+
assert_equal true, UnicodeUtils.default_ignorable_char?(0x2065)
|
297
|
+
assert_equal true, UnicodeUtils.default_ignorable_char?(0x2069)
|
298
|
+
assert_equal true, UnicodeUtils.default_ignorable_char?(0xe008a)
|
299
|
+
assert_equal true, UnicodeUtils.default_ignorable_char?(0xffa0)
|
300
|
+
assert_equal true, UnicodeUtils.default_ignorable_char?(0xe0fff)
|
301
|
+
assert_equal false, UnicodeUtils.default_ignorable_char?("a")
|
302
|
+
assert_equal false, UnicodeUtils.default_ignorable_char?("語")
|
303
|
+
assert_equal false, UnicodeUtils.default_ignorable_char?(" ")
|
304
|
+
assert_equal false, UnicodeUtils.default_ignorable_char?("\u{308}")
|
305
|
+
end
|
306
|
+
|
307
|
+
def test_gc
|
308
|
+
assert_equal :Lu, UnicodeUtils.gc("A")
|
309
|
+
assert_equal :Ll, UnicodeUtils.gc("a")
|
310
|
+
assert_equal :Lt, UnicodeUtils.gc(0x1cb)
|
311
|
+
assert_equal :Lm, UnicodeUtils.gc(0x2b5)
|
312
|
+
assert_equal :Lo, UnicodeUtils.gc(0x10923)
|
313
|
+
|
314
|
+
assert_equal :Mn, UnicodeUtils.gc(0x5a0)
|
315
|
+
assert_equal :Mc, UnicodeUtils.gc(0x93f)
|
316
|
+
assert_equal :Me, UnicodeUtils.gc(0x20dd)
|
317
|
+
|
318
|
+
assert_equal :Nd, UnicodeUtils.gc(0xa901)
|
319
|
+
assert_equal :Nl, UnicodeUtils.gc(0x10144)
|
320
|
+
assert_equal :No, UnicodeUtils.gc(0x10917)
|
321
|
+
|
322
|
+
assert_equal :Pc, UnicodeUtils.gc(0x5f)
|
323
|
+
assert_equal :Pd, UnicodeUtils.gc(0x2011)
|
324
|
+
assert_equal :Ps, UnicodeUtils.gc(0x2329)
|
325
|
+
assert_equal :Pe, UnicodeUtils.gc(0xfe38)
|
326
|
+
assert_equal :Pi, UnicodeUtils.gc(0x201c)
|
327
|
+
assert_equal :Pf, UnicodeUtils.gc(0x201d)
|
328
|
+
assert_equal :Po, UnicodeUtils.gc(0x2e10)
|
329
|
+
|
330
|
+
assert_equal :Sm, UnicodeUtils.gc(0xff0b)
|
331
|
+
assert_equal :Sc, UnicodeUtils.gc(0xa3)
|
332
|
+
assert_equal :Sk, UnicodeUtils.gc(0x2c2)
|
333
|
+
assert_equal :So, UnicodeUtils.gc(0x60f)
|
334
|
+
|
335
|
+
assert_equal :Zs, UnicodeUtils.gc(0x2001)
|
336
|
+
assert_equal :Zl, UnicodeUtils.gc(0x2028)
|
337
|
+
assert_equal :Zp, UnicodeUtils.gc(0x2029)
|
338
|
+
|
339
|
+
assert_equal :Cc, UnicodeUtils.gc(0x0)
|
340
|
+
assert_equal :Cf, UnicodeUtils.gc(0x70f)
|
341
|
+
assert_equal :Cs, UnicodeUtils.gc(0xdb82)
|
342
|
+
assert_equal :Co, UnicodeUtils.gc(0xf1020)
|
343
|
+
assert_equal :Cn, UnicodeUtils.gc(0x10ffff)
|
344
|
+
### 30 general categories ###
|
345
|
+
|
346
|
+
assert_equal :Lo, UnicodeUtils.gc(0x3400)
|
347
|
+
assert_equal :Lo, UnicodeUtils.gc(0x4000)
|
348
|
+
assert_equal :Lo, UnicodeUtils.gc(0x4db5)
|
349
|
+
|
350
|
+
assert_equal :Lo, UnicodeUtils.gc(0x4e00)
|
351
|
+
assert_equal :Lo, UnicodeUtils.gc(0x9fcb)
|
352
|
+
assert_equal :Lo, UnicodeUtils.gc(0x7111)
|
353
|
+
|
354
|
+
assert_equal :Lo, UnicodeUtils.gc(0xac00)
|
355
|
+
assert_equal :Lo, UnicodeUtils.gc(0xd7a3)
|
356
|
+
assert_equal :Lo, UnicodeUtils.gc(0xb70f)
|
357
|
+
|
358
|
+
assert_equal :Cs, UnicodeUtils.gc(0xd800)
|
359
|
+
assert_equal :Cs, UnicodeUtils.gc(0xdb7f)
|
360
|
+
assert_equal :Cs, UnicodeUtils.gc(0xda00)
|
361
|
+
|
362
|
+
assert_equal :Cs, UnicodeUtils.gc(0xdb80)
|
363
|
+
assert_equal :Cs, UnicodeUtils.gc(0xdbff)
|
364
|
+
assert_equal :Cs, UnicodeUtils.gc(0xdb90)
|
365
|
+
|
366
|
+
assert_equal :Cs, UnicodeUtils.gc(0xdc00)
|
367
|
+
assert_equal :Cs, UnicodeUtils.gc(0xdfff)
|
368
|
+
assert_equal :Cs, UnicodeUtils.gc(0xdc01)
|
369
|
+
|
370
|
+
assert_equal :Co, UnicodeUtils.gc(0xe000)
|
371
|
+
assert_equal :Co, UnicodeUtils.gc(0xf8ff)
|
372
|
+
assert_equal :Co, UnicodeUtils.gc(0xf8fe)
|
373
|
+
|
374
|
+
assert_equal :Lo, UnicodeUtils.gc(0x20000)
|
375
|
+
assert_equal :Lo, UnicodeUtils.gc(0x2a6d6)
|
376
|
+
assert_equal :Lo, UnicodeUtils.gc(0x2b000)
|
377
|
+
|
378
|
+
assert_equal :Lo, UnicodeUtils.gc(0x2a700)
|
379
|
+
assert_equal :Lo, UnicodeUtils.gc(0x2b734)
|
380
|
+
assert_equal :Lo, UnicodeUtils.gc(0x2b800)
|
381
|
+
|
382
|
+
assert_equal :Lo, UnicodeUtils.gc(0x2b740)
|
383
|
+
assert_equal :Lo, UnicodeUtils.gc(0x2b81d)
|
384
|
+
assert_equal :Lo, UnicodeUtils.gc(0x2b810)
|
385
|
+
|
386
|
+
assert_equal :Co, UnicodeUtils.gc(0xf0000)
|
387
|
+
assert_equal :Co, UnicodeUtils.gc(0xffffd)
|
388
|
+
assert_equal :Co, UnicodeUtils.gc(0xffafd)
|
389
|
+
|
390
|
+
assert_equal :Co, UnicodeUtils.gc(0x100000)
|
391
|
+
assert_equal :Co, UnicodeUtils.gc(0x10fffd)
|
392
|
+
assert_equal :Co, UnicodeUtils.gc(0x100ffd)
|
393
|
+
|
394
|
+
assert_equal nil, UnicodeUtils.gc(-1)
|
395
|
+
assert_equal nil, UnicodeUtils.gc(0x110000)
|
396
|
+
end
|
397
|
+
|
398
|
+
def test_general_category
|
399
|
+
assert_equal :Uppercase_Letter, UnicodeUtils.general_category("B")
|
400
|
+
assert_equal :Lowercase_Letter, UnicodeUtils.general_category("b")
|
401
|
+
assert_equal :Control, UnicodeUtils.general_category(0x0)
|
402
|
+
assert_equal nil, UnicodeUtils.general_category(-1)
|
403
|
+
end
|
404
|
+
|
405
|
+
def test_char_type
|
406
|
+
assert_equal :Letter, UnicodeUtils.char_type("Ä")
|
407
|
+
assert_equal :Letter, UnicodeUtils.char_type("ä")
|
408
|
+
assert_equal :Other, UnicodeUtils.char_type(0x0)
|
409
|
+
assert_equal :Number, UnicodeUtils.char_type("1")
|
410
|
+
assert_equal nil, UnicodeUtils.char_type(-1)
|
411
|
+
end
|
412
|
+
|
413
|
+
def test_graphic_char?
|
414
|
+
assert_equal true, UnicodeUtils.graphic_char?("a")
|
415
|
+
assert_equal true, UnicodeUtils.graphic_char?(0x308)
|
416
|
+
assert_equal false, UnicodeUtils.graphic_char?("\n")
|
417
|
+
assert_equal false, UnicodeUtils.graphic_char?(0x0)
|
418
|
+
end
|
419
|
+
|
420
|
+
def test_debug
|
421
|
+
io = StringIO.new
|
422
|
+
UnicodeUtils.debug("", io: io)
|
423
|
+
assert_equal <<-'EOF', io.string
|
424
|
+
Char | Ordinal | Name | General Category | UTF-8
|
425
|
+
------+---------+------+------------------+-------
|
426
|
+
EOF
|
427
|
+
io = StringIO.new
|
428
|
+
UnicodeUtils.debug("一 \u{100000}\n", io: io)
|
429
|
+
assert_equal <<-'EOF', io.string
|
430
|
+
Char | Ordinal | Name | General Category | UTF-8
|
431
|
+
------+---------+----------------------------+------------------+-------------
|
432
|
+
"一" | 4E00 | CJK UNIFIED IDEOGRAPH-4E00 | Other_Letter | E4 B8 80
|
433
|
+
" " | 20 | SPACE | Space_Separator | 20
|
434
|
+
N/A | 100000 | N/A | Private_Use | F4 80 80 80
|
435
|
+
"\n" | A | <control> | Control | 0A
|
436
|
+
EOF
|
437
|
+
end
|
438
|
+
|
226
439
|
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 1
|
7
|
-
- 1
|
8
7
|
- 2
|
9
|
-
|
8
|
+
- 2
|
9
|
+
version: 1.2.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Stefan Lang
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-11-
|
17
|
+
date: 2011-11-27 00:00:00 +01:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
@@ -33,10 +33,14 @@ files:
|
|
33
33
|
- lib/unicode_utils/conditional_casing.rb
|
34
34
|
- lib/unicode_utils/version.rb
|
35
35
|
- lib/unicode_utils/grep.rb
|
36
|
+
- lib/unicode_utils/gc.rb
|
36
37
|
- lib/unicode_utils/nfkc.rb
|
38
|
+
- lib/unicode_utils/char_display_width.rb
|
37
39
|
- lib/unicode_utils/nfkd.rb
|
38
40
|
- lib/unicode_utils/downcase.rb
|
39
41
|
- lib/unicode_utils/case_ignorable_char_q.rb
|
42
|
+
- lib/unicode_utils/east_asian_width.rb
|
43
|
+
- lib/unicode_utils/general_category.rb
|
40
44
|
- lib/unicode_utils/uppercase_char_q.rb
|
41
45
|
- lib/unicode_utils/upcase.rb
|
42
46
|
- lib/unicode_utils/u.rb
|
@@ -45,6 +49,7 @@ files:
|
|
45
49
|
- lib/unicode_utils/lowercase_char_q.rb
|
46
50
|
- lib/unicode_utils/read_cdata.rb
|
47
51
|
- lib/unicode_utils/compatibility_decomposition.rb
|
52
|
+
- lib/unicode_utils/display_width.rb
|
48
53
|
- lib/unicode_utils/each_word.rb
|
49
54
|
- lib/unicode_utils/combining_class.rb
|
50
55
|
- lib/unicode_utils/char_name.rb
|
@@ -54,6 +59,8 @@ files:
|
|
54
59
|
- lib/unicode_utils/cased_char_q.rb
|
55
60
|
- lib/unicode_utils/simple_casefold.rb
|
56
61
|
- lib/unicode_utils/titlecase_char_q.rb
|
62
|
+
- lib/unicode_utils/char_type.rb
|
63
|
+
- lib/unicode_utils/default_ignorable_char_q.rb
|
57
64
|
- lib/unicode_utils/canonical_equivalents_q.rb
|
58
65
|
- lib/unicode_utils/nfd.rb
|
59
66
|
- lib/unicode_utils/jamo_short_name.rb
|
@@ -61,18 +68,23 @@ files:
|
|
61
68
|
- lib/unicode_utils/each_grapheme.rb
|
62
69
|
- lib/unicode_utils/simple_upcase.rb
|
63
70
|
- lib/unicode_utils/simple_downcase.rb
|
71
|
+
- lib/unicode_utils/debug.rb
|
72
|
+
- lib/unicode_utils/graphic_char_q.rb
|
64
73
|
- lib/unicode_utils/codepoint.rb
|
65
74
|
- cdata/prop_set_lowercase
|
66
75
|
- cdata/casefold_c_map
|
67
76
|
- cdata/combining_class_map
|
68
77
|
- cdata/grapheme_break_property
|
69
78
|
- cdata/casefold_s_map
|
79
|
+
- cdata/prop_set_default_ignorable
|
80
|
+
- cdata/east_asian_width_property_ranges
|
70
81
|
- cdata/soft_dotted_set
|
71
82
|
- cdata/cond_lc_map
|
72
83
|
- cdata/simple_tc_map
|
73
84
|
- cdata/prop_set_uppercase
|
74
85
|
- cdata/jamo_short_names
|
75
86
|
- cdata/composition_exclusion_set
|
87
|
+
- cdata/general_category_aliases
|
76
88
|
- cdata/canonical_decomposition_map
|
77
89
|
- cdata/cat_set_titlecase
|
78
90
|
- cdata/casefold_f_map
|
@@ -85,6 +97,9 @@ files:
|
|
85
97
|
- cdata/simple_uc_map
|
86
98
|
- cdata/simple_lc_map
|
87
99
|
- cdata/special_lc_map
|
100
|
+
- cdata/general_category_per_cp
|
101
|
+
- cdata/general_category_ranges
|
102
|
+
- cdata/east_asian_width_property_per_cp
|
88
103
|
- cdata/cond_uc_map
|
89
104
|
- cdata/compatibility_decomposition_map
|
90
105
|
- test/test_unicode_utils.rb
|