unicode-display_width 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemspec +2 -2
- data/README.rdoc +16 -6
- data/data/EastAsianWidth.index +0 -0
- data/lib/unicode/display_width.rb +37 -43
- metadata +5 -8
data/.gemspec
CHANGED
@@ -7,8 +7,8 @@ Gem::Specification.new do |s|
|
|
7
7
|
s.version = Unicode::DisplayWidth::VERSION
|
8
8
|
s.authors = ["Jan Lelis"]
|
9
9
|
s.email = "mail@janlelis.de"
|
10
|
-
s.homepage = "http://github.com/janlelis/unicode-
|
11
|
-
s.summary = "Support for east_asian_width
|
10
|
+
s.homepage = "http://github.com/janlelis/unicode-display_width"
|
11
|
+
s.summary = "Support for east_asian_width string widths."
|
12
12
|
s.description = "This gem adds String#display_size to get the display size of a string using EastAsianWidth.txt."
|
13
13
|
s.required_rubygems_version = ">= 1.3.6"
|
14
14
|
s.files = Dir.glob(%w[{lib,test}/**/*.rb bin/* [A-Z]*.{txt,rdoc} data/* ext/**/*.{rb,c} **/deps.rip]) + %w{Rakefile .gemspec}
|
data/README.rdoc
CHANGED
@@ -1,26 +1,36 @@
|
|
1
1
|
== Description
|
2
|
-
|
2
|
+
|
3
|
+
An early draft of a way to determine the size of the characters using <tt>EastAsianWidth.txt</tt>, based on the very early draft of a {Ruby interface to UnicodeData.txt}[https://github.com/runpaint/unicode-data] by runpaint.
|
3
4
|
|
4
5
|
== Install
|
6
|
+
|
5
7
|
Install the gem with
|
6
8
|
|
7
9
|
gem install unicode-display_width
|
8
10
|
|
9
11
|
== Usage
|
12
|
+
|
10
13
|
require 'unicode/display_width'
|
11
14
|
|
12
15
|
The easy way is to use the String#display_size method:
|
13
16
|
"⚀".display_width #=> 1
|
17
|
+
'一'.display_width #=> 2
|
14
18
|
|
15
|
-
To obtain more detailed data, you can use the
|
16
|
-
Unicode::DiplayWidth.codepoint( c )
|
19
|
+
To obtain more detailed data, you can use the following syntax:
|
20
|
+
Unicode::DiplayWidth.codepoint( c )
|
17
21
|
|
18
22
|
== Bugs / TODO
|
19
|
-
|
23
|
+
|
24
|
+
Since it is not tested extensively, it does probably not work correctly for all characters... For example, there is a class of unicode characters, which is not exactly defined to be narrow or full-width ("ambiguous"). This gem currently gives them a size of 1, which is not always correct.
|
25
|
+
|
26
|
+
Furthermore, the index data structure can be improved.
|
20
27
|
|
21
28
|
== Copyright
|
29
|
+
|
22
30
|
Copyright (c) 2011 Jan Lelis, http://rbjl.net, released under the MIT license.
|
23
|
-
|
24
|
-
|
31
|
+
|
32
|
+
Contains code by runpaint: Copyright (c) 2009 Run Paint Run Run
|
33
|
+
|
34
|
+
Contains EastAsianWidth.txt: Copyright (c) 1991-2010 Unicode, Inc.
|
25
35
|
|
26
36
|
J-_-L
|
data/data/EastAsianWidth.index
CHANGED
Binary file
|
@@ -1,68 +1,62 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
module Unicode; end
|
3
|
-
|
4
3
|
module Unicode::DisplayWidth
|
5
|
-
VERSION = '0.1.
|
4
|
+
VERSION = '0.1.1'
|
5
|
+
end
|
6
6
|
|
7
|
+
class << Unicode::DisplayWidth
|
7
8
|
DATA_DIR = File.join(File.dirname(__FILE__), '../../data/')
|
8
|
-
|
9
|
-
INDEX_FILE = DATA_DIR + 'EastAsianWidth.index'
|
9
|
+
TABLE_FILE = DATA_DIR + 'EastAsianWidth.index'
|
10
10
|
DATA_FILE = DATA_DIR + 'EastAsianWidth.txt'
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
def self.offsets
|
17
|
-
@@offsets ||= Marshal.load File.respond_to?(:binread) ? File.binread(INDEX_FILE) : File.read(INDEX_FILE)
|
12
|
+
# only needed for building the index
|
13
|
+
def data
|
14
|
+
@data ||= File.open DATA_FILE
|
18
15
|
end
|
19
16
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
17
|
+
def table
|
18
|
+
if @table
|
19
|
+
@table
|
20
|
+
else
|
21
|
+
build_table unless File.file?(TABLE_FILE)
|
22
|
+
@table = Marshal.load File.respond_to?(:binread) ? File.binread(TABLE_FILE) : File.read(TABLE_FILE)
|
25
23
|
end
|
26
|
-
|
27
|
-
def self.from_line(line)
|
28
|
-
line =~ /(.*);(.*) # (.*)$/
|
29
|
-
raise 'BUG' unless line
|
30
|
-
new $1,$2,$3
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def self.line(n)
|
35
|
-
data.rewind
|
36
|
-
offset = offsets[n] or raise ArgumentError
|
37
|
-
data.seek offset
|
38
|
-
data.readline.chomp
|
39
|
-
end
|
40
|
-
|
41
|
-
def self.codepoint(n)
|
42
|
-
Codepoint.from_line line(n)
|
43
24
|
end
|
44
25
|
|
45
|
-
def
|
46
|
-
|
26
|
+
def codepoint(n)
|
27
|
+
n = n.to_s.unpack('U')[0] unless n.is_a? Integer
|
28
|
+
table[n] or raise ArgumentError
|
47
29
|
end
|
30
|
+
alias width codepoint
|
31
|
+
alias of codepoint
|
48
32
|
|
49
|
-
def
|
33
|
+
def build_table
|
50
34
|
data.rewind
|
51
|
-
|
52
|
-
dir = File.dirname
|
35
|
+
table = {}
|
36
|
+
dir = File.dirname TABLE_FILE
|
53
37
|
Dir.mkdir(dir) unless Dir.exists?(dir)
|
54
|
-
data.lines.
|
55
|
-
|
56
|
-
|
57
|
-
|
38
|
+
data.lines.each{ |line|
|
39
|
+
line =~ /^(.*);(.*) # .*$/
|
40
|
+
if $1 && $2
|
41
|
+
cps, width = $1, $2
|
42
|
+
if cps['..']
|
43
|
+
range = Range.new *cps.split('..').map{ |cp| cp.to_i(16) }
|
44
|
+
range.each{ |cp| table[ cp ] = width.to_sym }
|
45
|
+
else
|
46
|
+
table[ cps.to_i(16) ] = width.to_sym
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
}
|
51
|
+
File.open(TABLE_FILE, 'wb') { |f| Marshal.dump(table, f) }
|
58
52
|
end
|
59
53
|
end
|
60
54
|
|
61
55
|
class String
|
62
|
-
def display_width(ambiguous=1)
|
56
|
+
def display_width(ambiguous = 1)
|
63
57
|
#codepoints.inject(0){ |a,c|
|
64
58
|
unpack('U*').inject(0){ |a,c|
|
65
|
-
width = case Unicode::DisplayWidth.codepoint(c).
|
59
|
+
width = case Unicode::DisplayWidth.codepoint(c).to_s
|
66
60
|
when *%w[F W]
|
67
61
|
2
|
68
62
|
when *%w[N Na H]
|
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-display_width
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 27
|
5
4
|
prerelease: false
|
6
5
|
segments:
|
7
6
|
- 0
|
8
7
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
8
|
+
- 1
|
9
|
+
version: 0.1.1
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- Jan Lelis
|
@@ -15,7 +14,7 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date: 2011-01-
|
17
|
+
date: 2011-01-05 00:00:00 +01:00
|
19
18
|
default_executable:
|
20
19
|
dependencies: []
|
21
20
|
|
@@ -38,7 +37,7 @@ files:
|
|
38
37
|
- Rakefile
|
39
38
|
- .gemspec
|
40
39
|
has_rdoc: true
|
41
|
-
homepage: http://github.com/janlelis/unicode-
|
40
|
+
homepage: http://github.com/janlelis/unicode-display_width
|
42
41
|
licenses:
|
43
42
|
- MIT
|
44
43
|
post_install_message:
|
@@ -51,7 +50,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
51
50
|
requirements:
|
52
51
|
- - ">="
|
53
52
|
- !ruby/object:Gem::Version
|
54
|
-
hash: 3
|
55
53
|
segments:
|
56
54
|
- 0
|
57
55
|
version: "0"
|
@@ -60,7 +58,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
60
58
|
requirements:
|
61
59
|
- - ">="
|
62
60
|
- !ruby/object:Gem::Version
|
63
|
-
hash: 23
|
64
61
|
segments:
|
65
62
|
- 1
|
66
63
|
- 3
|
@@ -72,6 +69,6 @@ rubyforge_project:
|
|
72
69
|
rubygems_version: 1.3.7
|
73
70
|
signing_key:
|
74
71
|
specification_version: 3
|
75
|
-
summary: Support for east_asian_width
|
72
|
+
summary: Support for east_asian_width string widths.
|
76
73
|
test_files: []
|
77
74
|
|