unicode-display_width 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gemspec CHANGED
@@ -7,8 +7,8 @@ Gem::Specification.new do |s|
7
7
  s.version = Unicode::DisplayWidth::VERSION
8
8
  s.authors = ["Jan Lelis"]
9
9
  s.email = "mail@janlelis.de"
10
- s.homepage = "http://github.com/janlelis/unicode-display_size"
11
- s.summary = "Support for east_asian_width String sizes."
10
+ s.homepage = "http://github.com/janlelis/unicode-display_width"
11
+ s.summary = "Support for east_asian_width string widths."
12
12
  s.description = "This gem adds String#display_size to get the display size of a string using EastAsianWidth.txt."
13
13
  s.required_rubygems_version = ">= 1.3.6"
14
14
  s.files = Dir.glob(%w[{lib,test}/**/*.rb bin/* [A-Z]*.{txt,rdoc} data/* ext/**/*.{rb,c} **/deps.rip]) + %w{Rakefile .gemspec}
@@ -1,26 +1,36 @@
1
1
  == Description
2
- A early draft of a way to determine the size of the characters using EastAsianWidth.txt, based on the very early draft of a [Ruby interface to UnicodeData.txt]{https://github.com/runpaint/unicode-data} by runpaint.
2
+
3
+ An early draft of a way to determine the size of the characters using <tt>EastAsianWidth.txt</tt>, based on the very early draft of a {Ruby interface to UnicodeData.txt}[https://github.com/runpaint/unicode-data] by runpaint.
3
4
 
4
5
  == Install
6
+
5
7
  Install the gem with
6
8
 
7
9
  gem install unicode-display_width
8
10
 
9
11
  == Usage
12
+
10
13
  require 'unicode/display_width'
11
14
 
12
15
  The easy way is to use the String#display_size method:
13
16
  "⚀".display_width #=> 1
17
+ '一'.display_width #=> 2
14
18
 
15
- To obtain more detailed data, you can use the Codepoint struct (c is the codepoint in integer format):
16
- Unicode::DiplayWidth.codepoint( c ).width
19
+ To obtain more detailed data, you can use the following syntax:
20
+ Unicode::DiplayWidth.codepoint( c )
17
21
 
18
22
  == Bugs / TODO
19
- It does not work correctly for all characters ;). For example, there is a class of unicode characters, which is not exactly defined to be narrow or full-width ("ambiguous"). This gem currently gives them a size of 1, which is not always correct.
23
+
24
+ Since it is not tested extensively, it does probably not work correctly for all characters... For example, there is a class of unicode characters, which is not exactly defined to be narrow or full-width ("ambiguous"). This gem currently gives them a size of 1, which is not always correct.
25
+
26
+ Furthermore, the index data structure can be improved.
20
27
 
21
28
  == Copyright
29
+
22
30
  Copyright (c) 2011 Jan Lelis, http://rbjl.net, released under the MIT license.
23
- Copyright (c) 2009 Run Paint Run Run
24
- EastAsianWidth.txt: Copyright (c) 1991-2010 Unicode, Inc.
31
+
32
+ Contains code by runpaint: Copyright (c) 2009 Run Paint Run Run
33
+
34
+ Contains EastAsianWidth.txt: Copyright (c) 1991-2010 Unicode, Inc.
25
35
 
26
36
  J-_-L
Binary file
@@ -1,68 +1,62 @@
1
1
  # encoding: utf-8
2
2
  module Unicode; end
3
-
4
3
  module Unicode::DisplayWidth
5
- VERSION = '0.1.0'
4
+ VERSION = '0.1.1'
5
+ end
6
6
 
7
+ class << Unicode::DisplayWidth
7
8
  DATA_DIR = File.join(File.dirname(__FILE__), '../../data/')
8
- FIELDS = [:codepoint, :width, :name]
9
- INDEX_FILE = DATA_DIR + 'EastAsianWidth.index'
9
+ TABLE_FILE = DATA_DIR + 'EastAsianWidth.index'
10
10
  DATA_FILE = DATA_DIR + 'EastAsianWidth.txt'
11
11
 
12
- def self.data
13
- @@data ||= File.open DATA_FILE
14
- end
15
-
16
- def self.offsets
17
- @@offsets ||= Marshal.load File.respond_to?(:binread) ? File.binread(INDEX_FILE) : File.read(INDEX_FILE)
12
+ # only needed for building the index
13
+ def data
14
+ @data ||= File.open DATA_FILE
18
15
  end
19
16
 
20
- class Codepoint < Struct.new(*FIELDS)
21
- def initialize(*args)
22
- super
23
- self.codepoint = self.codepoint.to_i(16) if self.codepoint && self.codepoint !~ /\.\./
24
- # TODO cleaner # FIXME ranges
17
+ def table
18
+ if @table
19
+ @table
20
+ else
21
+ build_table unless File.file?(TABLE_FILE)
22
+ @table = Marshal.load File.respond_to?(:binread) ? File.binread(TABLE_FILE) : File.read(TABLE_FILE)
25
23
  end
26
-
27
- def self.from_line(line)
28
- line =~ /(.*);(.*) # (.*)$/
29
- raise 'BUG' unless line
30
- new $1,$2,$3
31
- end
32
- end
33
-
34
- def self.line(n)
35
- data.rewind
36
- offset = offsets[n] or raise ArgumentError
37
- data.seek offset
38
- data.readline.chomp
39
- end
40
-
41
- def self.codepoint(n)
42
- Codepoint.from_line line(n)
43
24
  end
44
25
 
45
- def self.valid_index?
46
- !!offsets rescue false
26
+ def codepoint(n)
27
+ n = n.to_s.unpack('U')[0] unless n.is_a? Integer
28
+ table[n] or raise ArgumentError
47
29
  end
30
+ alias width codepoint
31
+ alias of codepoint
48
32
 
49
- def self.build_index
33
+ def build_table
50
34
  data.rewind
51
- offsets = {}
52
- dir = File.dirname INDEX_FILE
35
+ table = {}
36
+ dir = File.dirname TABLE_FILE
53
37
  Dir.mkdir(dir) unless Dir.exists?(dir)
54
- data.lines.map do |line|
55
- offsets[Codepoint.from_line(line).codepoint] = data.pos - line.size
56
- end
57
- File.open(INDEX_FILE, 'wb') { |f| Marshal.dump(offsets, f) }
38
+ data.lines.each{ |line|
39
+ line =~ /^(.*);(.*) # .*$/
40
+ if $1 && $2
41
+ cps, width = $1, $2
42
+ if cps['..']
43
+ range = Range.new *cps.split('..').map{ |cp| cp.to_i(16) }
44
+ range.each{ |cp| table[ cp ] = width.to_sym }
45
+ else
46
+ table[ cps.to_i(16) ] = width.to_sym
47
+ end
48
+ end
49
+
50
+ }
51
+ File.open(TABLE_FILE, 'wb') { |f| Marshal.dump(table, f) }
58
52
  end
59
53
  end
60
54
 
61
55
  class String
62
- def display_width(ambiguous=1)
56
+ def display_width(ambiguous = 1)
63
57
  #codepoints.inject(0){ |a,c|
64
58
  unpack('U*').inject(0){ |a,c|
65
- width = case Unicode::DisplayWidth.codepoint(c).width
59
+ width = case Unicode::DisplayWidth.codepoint(c).to_s
66
60
  when *%w[F W]
67
61
  2
68
62
  when *%w[N Na H]
metadata CHANGED
@@ -1,13 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode-display_width
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
5
4
  prerelease: false
6
5
  segments:
7
6
  - 0
8
7
  - 1
9
- - 0
10
- version: 0.1.0
8
+ - 1
9
+ version: 0.1.1
11
10
  platform: ruby
12
11
  authors:
13
12
  - Jan Lelis
@@ -15,7 +14,7 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2011-01-03 00:00:00 +01:00
17
+ date: 2011-01-05 00:00:00 +01:00
19
18
  default_executable:
20
19
  dependencies: []
21
20
 
@@ -38,7 +37,7 @@ files:
38
37
  - Rakefile
39
38
  - .gemspec
40
39
  has_rdoc: true
41
- homepage: http://github.com/janlelis/unicode-display_size
40
+ homepage: http://github.com/janlelis/unicode-display_width
42
41
  licenses:
43
42
  - MIT
44
43
  post_install_message:
@@ -51,7 +50,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
51
50
  requirements:
52
51
  - - ">="
53
52
  - !ruby/object:Gem::Version
54
- hash: 3
55
53
  segments:
56
54
  - 0
57
55
  version: "0"
@@ -60,7 +58,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
60
58
  requirements:
61
59
  - - ">="
62
60
  - !ruby/object:Gem::Version
63
- hash: 23
64
61
  segments:
65
62
  - 1
66
63
  - 3
@@ -72,6 +69,6 @@ rubyforge_project:
72
69
  rubygems_version: 1.3.7
73
70
  signing_key:
74
71
  specification_version: 3
75
- summary: Support for east_asian_width String sizes.
72
+ summary: Support for east_asian_width string widths.
76
73
  test_files: []
77
74