unicode-display_width 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemspec +2 -2
- data/README.rdoc +16 -6
- data/data/EastAsianWidth.index +0 -0
- data/lib/unicode/display_width.rb +37 -43
- metadata +5 -8
data/.gemspec
CHANGED
@@ -7,8 +7,8 @@ Gem::Specification.new do |s|
|
|
7
7
|
s.version = Unicode::DisplayWidth::VERSION
|
8
8
|
s.authors = ["Jan Lelis"]
|
9
9
|
s.email = "mail@janlelis.de"
|
10
|
-
s.homepage = "http://github.com/janlelis/unicode-
|
11
|
-
s.summary = "Support for east_asian_width
|
10
|
+
s.homepage = "http://github.com/janlelis/unicode-display_width"
|
11
|
+
s.summary = "Support for east_asian_width string widths."
|
12
12
|
s.description = "This gem adds String#display_size to get the display size of a string using EastAsianWidth.txt."
|
13
13
|
s.required_rubygems_version = ">= 1.3.6"
|
14
14
|
s.files = Dir.glob(%w[{lib,test}/**/*.rb bin/* [A-Z]*.{txt,rdoc} data/* ext/**/*.{rb,c} **/deps.rip]) + %w{Rakefile .gemspec}
|
data/README.rdoc
CHANGED
@@ -1,26 +1,36 @@
|
|
1
1
|
== Description
|
2
|
-
|
2
|
+
|
3
|
+
An early draft of a way to determine the size of the characters using <tt>EastAsianWidth.txt</tt>, based on the very early draft of a {Ruby interface to UnicodeData.txt}[https://github.com/runpaint/unicode-data] by runpaint.
|
3
4
|
|
4
5
|
== Install
|
6
|
+
|
5
7
|
Install the gem with
|
6
8
|
|
7
9
|
gem install unicode-display_width
|
8
10
|
|
9
11
|
== Usage
|
12
|
+
|
10
13
|
require 'unicode/display_width'
|
11
14
|
|
12
15
|
The easy way is to use the String#display_size method:
|
13
16
|
"⚀".display_width #=> 1
|
17
|
+
'一'.display_width #=> 2
|
14
18
|
|
15
|
-
To obtain more detailed data, you can use the
|
16
|
-
Unicode::DiplayWidth.codepoint( c )
|
19
|
+
To obtain more detailed data, you can use the following syntax:
|
20
|
+
Unicode::DiplayWidth.codepoint( c )
|
17
21
|
|
18
22
|
== Bugs / TODO
|
19
|
-
|
23
|
+
|
24
|
+
Since it is not tested extensively, it does probably not work correctly for all characters... For example, there is a class of unicode characters, which is not exactly defined to be narrow or full-width ("ambiguous"). This gem currently gives them a size of 1, which is not always correct.
|
25
|
+
|
26
|
+
Furthermore, the index data structure can be improved.
|
20
27
|
|
21
28
|
== Copyright
|
29
|
+
|
22
30
|
Copyright (c) 2011 Jan Lelis, http://rbjl.net, released under the MIT license.
|
23
|
-
|
24
|
-
|
31
|
+
|
32
|
+
Contains code by runpaint: Copyright (c) 2009 Run Paint Run Run
|
33
|
+
|
34
|
+
Contains EastAsianWidth.txt: Copyright (c) 1991-2010 Unicode, Inc.
|
25
35
|
|
26
36
|
J-_-L
|
data/data/EastAsianWidth.index
CHANGED
Binary file
|
@@ -1,68 +1,62 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
module Unicode; end
|
3
|
-
|
4
3
|
module Unicode::DisplayWidth
|
5
|
-
VERSION = '0.1.
|
4
|
+
VERSION = '0.1.1'
|
5
|
+
end
|
6
6
|
|
7
|
+
class << Unicode::DisplayWidth
|
7
8
|
DATA_DIR = File.join(File.dirname(__FILE__), '../../data/')
|
8
|
-
|
9
|
-
INDEX_FILE = DATA_DIR + 'EastAsianWidth.index'
|
9
|
+
TABLE_FILE = DATA_DIR + 'EastAsianWidth.index'
|
10
10
|
DATA_FILE = DATA_DIR + 'EastAsianWidth.txt'
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
def self.offsets
|
17
|
-
@@offsets ||= Marshal.load File.respond_to?(:binread) ? File.binread(INDEX_FILE) : File.read(INDEX_FILE)
|
12
|
+
# only needed for building the index
|
13
|
+
def data
|
14
|
+
@data ||= File.open DATA_FILE
|
18
15
|
end
|
19
16
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
17
|
+
def table
|
18
|
+
if @table
|
19
|
+
@table
|
20
|
+
else
|
21
|
+
build_table unless File.file?(TABLE_FILE)
|
22
|
+
@table = Marshal.load File.respond_to?(:binread) ? File.binread(TABLE_FILE) : File.read(TABLE_FILE)
|
25
23
|
end
|
26
|
-
|
27
|
-
def self.from_line(line)
|
28
|
-
line =~ /(.*);(.*) # (.*)$/
|
29
|
-
raise 'BUG' unless line
|
30
|
-
new $1,$2,$3
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def self.line(n)
|
35
|
-
data.rewind
|
36
|
-
offset = offsets[n] or raise ArgumentError
|
37
|
-
data.seek offset
|
38
|
-
data.readline.chomp
|
39
|
-
end
|
40
|
-
|
41
|
-
def self.codepoint(n)
|
42
|
-
Codepoint.from_line line(n)
|
43
24
|
end
|
44
25
|
|
45
|
-
def
|
46
|
-
|
26
|
+
def codepoint(n)
|
27
|
+
n = n.to_s.unpack('U')[0] unless n.is_a? Integer
|
28
|
+
table[n] or raise ArgumentError
|
47
29
|
end
|
30
|
+
alias width codepoint
|
31
|
+
alias of codepoint
|
48
32
|
|
49
|
-
def
|
33
|
+
def build_table
|
50
34
|
data.rewind
|
51
|
-
|
52
|
-
dir = File.dirname
|
35
|
+
table = {}
|
36
|
+
dir = File.dirname TABLE_FILE
|
53
37
|
Dir.mkdir(dir) unless Dir.exists?(dir)
|
54
|
-
data.lines.
|
55
|
-
|
56
|
-
|
57
|
-
|
38
|
+
data.lines.each{ |line|
|
39
|
+
line =~ /^(.*);(.*) # .*$/
|
40
|
+
if $1 && $2
|
41
|
+
cps, width = $1, $2
|
42
|
+
if cps['..']
|
43
|
+
range = Range.new *cps.split('..').map{ |cp| cp.to_i(16) }
|
44
|
+
range.each{ |cp| table[ cp ] = width.to_sym }
|
45
|
+
else
|
46
|
+
table[ cps.to_i(16) ] = width.to_sym
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
}
|
51
|
+
File.open(TABLE_FILE, 'wb') { |f| Marshal.dump(table, f) }
|
58
52
|
end
|
59
53
|
end
|
60
54
|
|
61
55
|
class String
|
62
|
-
def display_width(ambiguous=1)
|
56
|
+
def display_width(ambiguous = 1)
|
63
57
|
#codepoints.inject(0){ |a,c|
|
64
58
|
unpack('U*').inject(0){ |a,c|
|
65
|
-
width = case Unicode::DisplayWidth.codepoint(c).
|
59
|
+
width = case Unicode::DisplayWidth.codepoint(c).to_s
|
66
60
|
when *%w[F W]
|
67
61
|
2
|
68
62
|
when *%w[N Na H]
|
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-display_width
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 27
|
5
4
|
prerelease: false
|
6
5
|
segments:
|
7
6
|
- 0
|
8
7
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
8
|
+
- 1
|
9
|
+
version: 0.1.1
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- Jan Lelis
|
@@ -15,7 +14,7 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date: 2011-01-
|
17
|
+
date: 2011-01-05 00:00:00 +01:00
|
19
18
|
default_executable:
|
20
19
|
dependencies: []
|
21
20
|
|
@@ -38,7 +37,7 @@ files:
|
|
38
37
|
- Rakefile
|
39
38
|
- .gemspec
|
40
39
|
has_rdoc: true
|
41
|
-
homepage: http://github.com/janlelis/unicode-
|
40
|
+
homepage: http://github.com/janlelis/unicode-display_width
|
42
41
|
licenses:
|
43
42
|
- MIT
|
44
43
|
post_install_message:
|
@@ -51,7 +50,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
51
50
|
requirements:
|
52
51
|
- - ">="
|
53
52
|
- !ruby/object:Gem::Version
|
54
|
-
hash: 3
|
55
53
|
segments:
|
56
54
|
- 0
|
57
55
|
version: "0"
|
@@ -60,7 +58,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
60
58
|
requirements:
|
61
59
|
- - ">="
|
62
60
|
- !ruby/object:Gem::Version
|
63
|
-
hash: 23
|
64
61
|
segments:
|
65
62
|
- 1
|
66
63
|
- 3
|
@@ -72,6 +69,6 @@ rubyforge_project:
|
|
72
69
|
rubygems_version: 1.3.7
|
73
70
|
signing_key:
|
74
71
|
specification_version: 3
|
75
|
-
summary: Support for east_asian_width
|
72
|
+
summary: Support for east_asian_width string widths.
|
76
73
|
test_files: []
|
77
74
|
|