unicode_utils 1.1.2 → 1.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/gc"
4
+
5
+ module UnicodeUtils
6
+
7
+ GENERAL_CATEGORY_IS_GRAPHIC_MAP = {
8
+ Lu: true, Ll: true, Lt: true, Lm: true, Lo: true,
9
+ Mn: true, Mc: true, Me: true,
10
+ Nd: true, Nl: true, No: true,
11
+ Pc: true, Pd: true, Ps: true, Pe: true, Pi: true, Pf: true, Po: true,
12
+ Sm: true, Sc: true, Sk: true, So: true,
13
+ Zs: true, Zl: false, Zp: false,
14
+ Cc: false, Cf: false, Cs: false, Co: false, Cn: false
15
+ } # :nodoc:
16
+
17
+ # Returns true if the given char is a graphic char, false otherwise.
18
+ # See table 2-3 in section 2.4 of Unicode 6.0.0.
19
+ #
20
+ # Examples:
21
+ #
22
+ # require "unicode_utils/graphic_char_q"
23
+ # UnicodeUtils.graphic_char?("a") # => true
24
+ # UnicodeUtils.graphic_char?("\n") # => false
25
+ # UnicodeUtils.graphic_char?(0x0) # => false
26
+ def graphic_char?(char)
27
+ GENERAL_CATEGORY_IS_GRAPHIC_MAP[UnicodeUtils.gc(char)]
28
+ end
29
+ module_function :graphic_char?
30
+
31
+ end
@@ -8,6 +8,14 @@ module UnicodeUtils
8
8
 
9
9
  module Impl # :nodoc:
10
10
 
11
+ EAST_ASIAN_WIDTH_SYMBOL_MAP = {
12
+ 1 => :Ambiguous,
13
+ 2 => :Halfwidth,
14
+ 3 => :Wide,
15
+ 4 => :Fullwidth,
16
+ 5 => :Narrow
17
+ }.freeze
18
+
11
19
  def self.open_cdata_file(filename, &block)
12
20
  File.open(File.join(CDATA_DIR, filename), "r:US-ASCII:-", &block)
13
21
  end
@@ -113,6 +121,93 @@ module UnicodeUtils
113
121
  }
114
122
  end
115
123
 
124
+ # Returns a list (array) of pairs (two element Arrays) of Range
125
+ # (codepoints) and associated integer value.
126
+ def self.read_range_to_hexdigit_list(filename)
127
+ Array.new.tap { |list|
128
+ open_cdata_file(filename) do |input|
129
+ cp_buffer = "x" * 6
130
+ cp_buffer.force_encoding(Encoding::US_ASCII)
131
+ val_buffer = "x"
132
+ val_buffer.force_encoding(Encoding::US_ASCII)
133
+ while input.read(6, cp_buffer)
134
+ list << [
135
+ Range.new(cp_buffer.to_i(16), input.read(6, cp_buffer).to_i(16)),
136
+ input.read(1, val_buffer).to_i(16)
137
+ ]
138
+ end
139
+ end
140
+ }
141
+ end
142
+
143
+ def self.read_east_asian_width_per_cp(filename)
144
+ # like read_hexdigit_map, but with translation to symbol values
145
+ Hash.new(:Neutral).tap { |map|
146
+ open_cdata_file(filename) do |input|
147
+ buffer = "x" * 6
148
+ buffer.force_encoding(Encoding::US_ASCII)
149
+ val_buffer = "x"
150
+ val_buffer.force_encoding(Encoding::US_ASCII)
151
+ while input.read(6, buffer)
152
+ map[buffer.to_i(16)] =
153
+ EAST_ASIAN_WIDTH_SYMBOL_MAP[input.read(1, val_buffer).to_i(16)]
154
+ end
155
+ end
156
+ }
157
+ end
158
+
159
+ def self.read_east_asian_width_ranges(filename)
160
+ read_range_to_hexdigit_list(filename).tap { |list|
161
+ list.each { |pair|
162
+ pair[1] = EAST_ASIAN_WIDTH_SYMBOL_MAP[pair[1]]
163
+ }
164
+ }
165
+ end
166
+
167
+ def self.read_general_category_per_cp(filename)
168
+ Hash.new.tap { |map|
169
+ open_cdata_file(filename) do |input|
170
+ cp_buffer = "x" * 6
171
+ cp_buffer.force_encoding(Encoding::US_ASCII)
172
+ cat_buffer = "x" * 2
173
+ cat_buffer.force_encoding(Encoding::US_ASCII)
174
+ while input.read(6, cp_buffer)
175
+ map[cp_buffer.to_i(16)] = input.read(2, cat_buffer).to_sym
176
+ end
177
+ end
178
+ }
179
+ end
180
+
181
+ def self.read_general_category_ranges(filename)
182
+ Array.new.tap { |list|
183
+ open_cdata_file(filename) do |input|
184
+ cp_buffer = "x" * 6
185
+ cp_buffer.force_encoding(Encoding::US_ASCII)
186
+ cat_buffer = "x" * 2
187
+ cat_buffer.force_encoding(Encoding::US_ASCII)
188
+ while input.read(6, cp_buffer)
189
+ list << [
190
+ Range.new(cp_buffer.to_i(16), input.read(6, cp_buffer).to_i(16)),
191
+ input.read(2, cat_buffer).to_sym
192
+ ]
193
+ end
194
+ end
195
+ }
196
+ end
197
+
198
+ def self.read_symbol_map(filename)
199
+ Hash.new.tap { |map|
200
+ open_cdata_file(filename) do |input|
201
+ input.each_line { |line|
202
+ parts = line.split(";")
203
+ parts[0].strip!
204
+ parts[1].strip!
205
+ map[parts[0].to_sym] = parts[1].to_sym
206
+ }
207
+ end
208
+ }
209
+ end
210
+
116
211
  end
117
212
 
118
213
  end
@@ -11,6 +11,6 @@ module UnicodeUtils
11
11
  #
12
12
  # A release always has an even PATCHLEVEL. PATCHLEVEL is uneven
13
13
  # during development.
14
- VERSION = "1.1.2"
14
+ VERSION = "1.2.2"
15
15
 
16
16
  end
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require "test/unit"
4
+ require "stringio"
4
5
 
5
6
  require "unicode_utils"
6
7
 
@@ -223,4 +224,216 @@ class TestUnicodeUtils < Test::Unit::TestCase
223
224
  UnicodeUtils.titlecase("i can has 1kg CHEESBURGER", :tr)
224
225
  end
225
226
 
227
+ def test_east_asian_width
228
+ assert_equal :Neutral, UnicodeUtils.east_asian_width("\u{0}")
229
+ assert_equal :Neutral, UnicodeUtils.east_asian_width("\u{10FFFF}")
230
+ assert_equal :Neutral, UnicodeUtils.east_asian_width("\u{C5}")
231
+ assert_equal :Ambiguous, UnicodeUtils.east_asian_width(0xA1)
232
+ assert_equal :Ambiguous, UnicodeUtils.east_asian_width(0xE000)
233
+ assert_equal :Ambiguous, UnicodeUtils.east_asian_width(0xF8FF)
234
+ assert_equal :Ambiguous, UnicodeUtils.east_asian_width(0xFE05)
235
+ assert_equal :Ambiguous, UnicodeUtils.east_asian_width("ß")
236
+ assert_equal :Halfwidth, UnicodeUtils.east_asian_width(0xFFA5)
237
+ assert_equal :Halfwidth, UnicodeUtils.east_asian_width(0xFFEE)
238
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0xB116)
239
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0x11A5)
240
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0x1100)
241
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0x2E94)
242
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0x3400)
243
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0x4E05)
244
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0x4FCD)
245
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0xFA2E)
246
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0x3FFFD)
247
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0x4DB6)
248
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0x4DBF)
249
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0x9FCC)
250
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0x9FFF)
251
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0xFA2E)
252
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0xFA2F)
253
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0xFA6E)
254
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0xFA6F)
255
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0xFADA)
256
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0xFAFF)
257
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0x2A6D7)
258
+ assert_equal :Wide, UnicodeUtils.east_asian_width(0x2A6FF)
259
+ assert_equal :Fullwidth, UnicodeUtils.east_asian_width(0x3000)
260
+ assert_equal :Fullwidth, UnicodeUtils.east_asian_width(0xFF0D)
261
+ assert_equal :Fullwidth, UnicodeUtils.east_asian_width(0xFFE6)
262
+ assert_equal :Narrow, UnicodeUtils.east_asian_width(0x20)
263
+ assert_equal :Narrow, UnicodeUtils.east_asian_width(0x2C)
264
+ assert_equal :Narrow, UnicodeUtils.east_asian_width(0x7E)
265
+ assert_equal :Narrow, UnicodeUtils.east_asian_width(0xA6)
266
+ assert_equal :Narrow, UnicodeUtils.east_asian_width(0x2986)
267
+ end
268
+
269
+ def test_display_width
270
+ assert_equal 0, UnicodeUtils.display_width("")
271
+ assert_equal 18, UnicodeUtils.display_width("別れる時に発する語")
272
+ assert_equal 18, UnicodeUtils.display_width("123456789aBcDeFgHi")
273
+ assert_equal 6, UnicodeUtils.display_width("Straße")
274
+ assert_equal 1, UnicodeUtils.display_width("a\u{308}")
275
+ assert_equal 5, UnicodeUtils.display_width("Now!")
276
+ assert_equal 2, UnicodeUtils.display_width("a̦b") # General Category Mn
277
+ assert_equal 4, UnicodeUtils.display_width("a\u{93b}bc") # General Category Mc
278
+ assert_equal 3, UnicodeUtils.display_width("ab\u{20dd}c") # General Category Me
279
+ assert_equal 4, UnicodeUtils.display_width("ab\u{a8}c") # General Category Sk
280
+ assert_equal 4, UnicodeUtils.display_width("ab\u{2000}c") # General Category Zs
281
+ assert_equal 3, UnicodeUtils.display_width("a b") # Zs
282
+ assert_equal 3, UnicodeUtils.display_width("a\u{1680}b") # Zs
283
+ end
284
+
285
+ def test_char_display_width
286
+ assert_equal 2, UnicodeUtils.char_display_width("別")
287
+ assert_equal 0, UnicodeUtils.char_display_width(0x308)
288
+ assert_equal 1, UnicodeUtils.char_display_width("a")
289
+ end
290
+
291
+ def test_default_ignorable_char?
292
+ assert_equal true, UnicodeUtils.default_ignorable_char?(0xad)
293
+ assert_equal true, UnicodeUtils.default_ignorable_char?(0x34f)
294
+ assert_equal true, UnicodeUtils.default_ignorable_char?(0x115f)
295
+ assert_equal true, UnicodeUtils.default_ignorable_char?(0x1160)
296
+ assert_equal true, UnicodeUtils.default_ignorable_char?(0x2065)
297
+ assert_equal true, UnicodeUtils.default_ignorable_char?(0x2069)
298
+ assert_equal true, UnicodeUtils.default_ignorable_char?(0xe008a)
299
+ assert_equal true, UnicodeUtils.default_ignorable_char?(0xffa0)
300
+ assert_equal true, UnicodeUtils.default_ignorable_char?(0xe0fff)
301
+ assert_equal false, UnicodeUtils.default_ignorable_char?("a")
302
+ assert_equal false, UnicodeUtils.default_ignorable_char?("語")
303
+ assert_equal false, UnicodeUtils.default_ignorable_char?(" ")
304
+ assert_equal false, UnicodeUtils.default_ignorable_char?("\u{308}")
305
+ end
306
+
307
+ def test_gc
308
+ assert_equal :Lu, UnicodeUtils.gc("A")
309
+ assert_equal :Ll, UnicodeUtils.gc("a")
310
+ assert_equal :Lt, UnicodeUtils.gc(0x1cb)
311
+ assert_equal :Lm, UnicodeUtils.gc(0x2b5)
312
+ assert_equal :Lo, UnicodeUtils.gc(0x10923)
313
+
314
+ assert_equal :Mn, UnicodeUtils.gc(0x5a0)
315
+ assert_equal :Mc, UnicodeUtils.gc(0x93f)
316
+ assert_equal :Me, UnicodeUtils.gc(0x20dd)
317
+
318
+ assert_equal :Nd, UnicodeUtils.gc(0xa901)
319
+ assert_equal :Nl, UnicodeUtils.gc(0x10144)
320
+ assert_equal :No, UnicodeUtils.gc(0x10917)
321
+
322
+ assert_equal :Pc, UnicodeUtils.gc(0x5f)
323
+ assert_equal :Pd, UnicodeUtils.gc(0x2011)
324
+ assert_equal :Ps, UnicodeUtils.gc(0x2329)
325
+ assert_equal :Pe, UnicodeUtils.gc(0xfe38)
326
+ assert_equal :Pi, UnicodeUtils.gc(0x201c)
327
+ assert_equal :Pf, UnicodeUtils.gc(0x201d)
328
+ assert_equal :Po, UnicodeUtils.gc(0x2e10)
329
+
330
+ assert_equal :Sm, UnicodeUtils.gc(0xff0b)
331
+ assert_equal :Sc, UnicodeUtils.gc(0xa3)
332
+ assert_equal :Sk, UnicodeUtils.gc(0x2c2)
333
+ assert_equal :So, UnicodeUtils.gc(0x60f)
334
+
335
+ assert_equal :Zs, UnicodeUtils.gc(0x2001)
336
+ assert_equal :Zl, UnicodeUtils.gc(0x2028)
337
+ assert_equal :Zp, UnicodeUtils.gc(0x2029)
338
+
339
+ assert_equal :Cc, UnicodeUtils.gc(0x0)
340
+ assert_equal :Cf, UnicodeUtils.gc(0x70f)
341
+ assert_equal :Cs, UnicodeUtils.gc(0xdb82)
342
+ assert_equal :Co, UnicodeUtils.gc(0xf1020)
343
+ assert_equal :Cn, UnicodeUtils.gc(0x10ffff)
344
+ ### 30 general categories ###
345
+
346
+ assert_equal :Lo, UnicodeUtils.gc(0x3400)
347
+ assert_equal :Lo, UnicodeUtils.gc(0x4000)
348
+ assert_equal :Lo, UnicodeUtils.gc(0x4db5)
349
+
350
+ assert_equal :Lo, UnicodeUtils.gc(0x4e00)
351
+ assert_equal :Lo, UnicodeUtils.gc(0x9fcb)
352
+ assert_equal :Lo, UnicodeUtils.gc(0x7111)
353
+
354
+ assert_equal :Lo, UnicodeUtils.gc(0xac00)
355
+ assert_equal :Lo, UnicodeUtils.gc(0xd7a3)
356
+ assert_equal :Lo, UnicodeUtils.gc(0xb70f)
357
+
358
+ assert_equal :Cs, UnicodeUtils.gc(0xd800)
359
+ assert_equal :Cs, UnicodeUtils.gc(0xdb7f)
360
+ assert_equal :Cs, UnicodeUtils.gc(0xda00)
361
+
362
+ assert_equal :Cs, UnicodeUtils.gc(0xdb80)
363
+ assert_equal :Cs, UnicodeUtils.gc(0xdbff)
364
+ assert_equal :Cs, UnicodeUtils.gc(0xdb90)
365
+
366
+ assert_equal :Cs, UnicodeUtils.gc(0xdc00)
367
+ assert_equal :Cs, UnicodeUtils.gc(0xdfff)
368
+ assert_equal :Cs, UnicodeUtils.gc(0xdc01)
369
+
370
+ assert_equal :Co, UnicodeUtils.gc(0xe000)
371
+ assert_equal :Co, UnicodeUtils.gc(0xf8ff)
372
+ assert_equal :Co, UnicodeUtils.gc(0xf8fe)
373
+
374
+ assert_equal :Lo, UnicodeUtils.gc(0x20000)
375
+ assert_equal :Lo, UnicodeUtils.gc(0x2a6d6)
376
+ assert_equal :Lo, UnicodeUtils.gc(0x2b000)
377
+
378
+ assert_equal :Lo, UnicodeUtils.gc(0x2a700)
379
+ assert_equal :Lo, UnicodeUtils.gc(0x2b734)
380
+ assert_equal :Lo, UnicodeUtils.gc(0x2b800)
381
+
382
+ assert_equal :Lo, UnicodeUtils.gc(0x2b740)
383
+ assert_equal :Lo, UnicodeUtils.gc(0x2b81d)
384
+ assert_equal :Lo, UnicodeUtils.gc(0x2b810)
385
+
386
+ assert_equal :Co, UnicodeUtils.gc(0xf0000)
387
+ assert_equal :Co, UnicodeUtils.gc(0xffffd)
388
+ assert_equal :Co, UnicodeUtils.gc(0xffafd)
389
+
390
+ assert_equal :Co, UnicodeUtils.gc(0x100000)
391
+ assert_equal :Co, UnicodeUtils.gc(0x10fffd)
392
+ assert_equal :Co, UnicodeUtils.gc(0x100ffd)
393
+
394
+ assert_equal nil, UnicodeUtils.gc(-1)
395
+ assert_equal nil, UnicodeUtils.gc(0x110000)
396
+ end
397
+
398
+ def test_general_category
399
+ assert_equal :Uppercase_Letter, UnicodeUtils.general_category("B")
400
+ assert_equal :Lowercase_Letter, UnicodeUtils.general_category("b")
401
+ assert_equal :Control, UnicodeUtils.general_category(0x0)
402
+ assert_equal nil, UnicodeUtils.general_category(-1)
403
+ end
404
+
405
+ def test_char_type
406
+ assert_equal :Letter, UnicodeUtils.char_type("Ä")
407
+ assert_equal :Letter, UnicodeUtils.char_type("ä")
408
+ assert_equal :Other, UnicodeUtils.char_type(0x0)
409
+ assert_equal :Number, UnicodeUtils.char_type("1")
410
+ assert_equal nil, UnicodeUtils.char_type(-1)
411
+ end
412
+
413
+ def test_graphic_char?
414
+ assert_equal true, UnicodeUtils.graphic_char?("a")
415
+ assert_equal true, UnicodeUtils.graphic_char?(0x308)
416
+ assert_equal false, UnicodeUtils.graphic_char?("\n")
417
+ assert_equal false, UnicodeUtils.graphic_char?(0x0)
418
+ end
419
+
420
+ def test_debug
421
+ io = StringIO.new
422
+ UnicodeUtils.debug("", io: io)
423
+ assert_equal <<-'EOF', io.string
424
+ Char | Ordinal | Name | General Category | UTF-8
425
+ ------+---------+------+------------------+-------
426
+ EOF
427
+ io = StringIO.new
428
+ UnicodeUtils.debug("一 \u{100000}\n", io: io)
429
+ assert_equal <<-'EOF', io.string
430
+ Char | Ordinal | Name | General Category | UTF-8
431
+ ------+---------+----------------------------+------------------+-------------
432
+ "一" | 4E00 | CJK UNIFIED IDEOGRAPH-4E00 | Other_Letter | E4 B8 80
433
+ " " | 20 | SPACE | Space_Separator | 20
434
+ N/A | 100000 | N/A | Private_Use | F4 80 80 80
435
+ "\n" | A | <control> | Control | 0A
436
+ EOF
437
+ end
438
+
226
439
  end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 1
7
- - 1
8
7
  - 2
9
- version: 1.1.2
8
+ - 2
9
+ version: 1.2.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - Stefan Lang
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-11-18 00:00:00 +01:00
17
+ date: 2011-11-27 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
@@ -33,10 +33,14 @@ files:
33
33
  - lib/unicode_utils/conditional_casing.rb
34
34
  - lib/unicode_utils/version.rb
35
35
  - lib/unicode_utils/grep.rb
36
+ - lib/unicode_utils/gc.rb
36
37
  - lib/unicode_utils/nfkc.rb
38
+ - lib/unicode_utils/char_display_width.rb
37
39
  - lib/unicode_utils/nfkd.rb
38
40
  - lib/unicode_utils/downcase.rb
39
41
  - lib/unicode_utils/case_ignorable_char_q.rb
42
+ - lib/unicode_utils/east_asian_width.rb
43
+ - lib/unicode_utils/general_category.rb
40
44
  - lib/unicode_utils/uppercase_char_q.rb
41
45
  - lib/unicode_utils/upcase.rb
42
46
  - lib/unicode_utils/u.rb
@@ -45,6 +49,7 @@ files:
45
49
  - lib/unicode_utils/lowercase_char_q.rb
46
50
  - lib/unicode_utils/read_cdata.rb
47
51
  - lib/unicode_utils/compatibility_decomposition.rb
52
+ - lib/unicode_utils/display_width.rb
48
53
  - lib/unicode_utils/each_word.rb
49
54
  - lib/unicode_utils/combining_class.rb
50
55
  - lib/unicode_utils/char_name.rb
@@ -54,6 +59,8 @@ files:
54
59
  - lib/unicode_utils/cased_char_q.rb
55
60
  - lib/unicode_utils/simple_casefold.rb
56
61
  - lib/unicode_utils/titlecase_char_q.rb
62
+ - lib/unicode_utils/char_type.rb
63
+ - lib/unicode_utils/default_ignorable_char_q.rb
57
64
  - lib/unicode_utils/canonical_equivalents_q.rb
58
65
  - lib/unicode_utils/nfd.rb
59
66
  - lib/unicode_utils/jamo_short_name.rb
@@ -61,18 +68,23 @@ files:
61
68
  - lib/unicode_utils/each_grapheme.rb
62
69
  - lib/unicode_utils/simple_upcase.rb
63
70
  - lib/unicode_utils/simple_downcase.rb
71
+ - lib/unicode_utils/debug.rb
72
+ - lib/unicode_utils/graphic_char_q.rb
64
73
  - lib/unicode_utils/codepoint.rb
65
74
  - cdata/prop_set_lowercase
66
75
  - cdata/casefold_c_map
67
76
  - cdata/combining_class_map
68
77
  - cdata/grapheme_break_property
69
78
  - cdata/casefold_s_map
79
+ - cdata/prop_set_default_ignorable
80
+ - cdata/east_asian_width_property_ranges
70
81
  - cdata/soft_dotted_set
71
82
  - cdata/cond_lc_map
72
83
  - cdata/simple_tc_map
73
84
  - cdata/prop_set_uppercase
74
85
  - cdata/jamo_short_names
75
86
  - cdata/composition_exclusion_set
87
+ - cdata/general_category_aliases
76
88
  - cdata/canonical_decomposition_map
77
89
  - cdata/cat_set_titlecase
78
90
  - cdata/casefold_f_map
@@ -85,6 +97,9 @@ files:
85
97
  - cdata/simple_uc_map
86
98
  - cdata/simple_lc_map
87
99
  - cdata/special_lc_map
100
+ - cdata/general_category_per_cp
101
+ - cdata/general_category_ranges
102
+ - cdata/east_asian_width_property_per_cp
88
103
  - cdata/cond_uc_map
89
104
  - cdata/compatibility_decomposition_map
90
105
  - test/test_unicode_utils.rb