visual_width 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +2 -0
- data/Changes +4 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +22 -0
- data/README.md +42 -0
- data/Rakefile +14 -0
- data/lib/visual_width/data.rb +204 -0
- data/lib/visual_width/string_ext.rb +11 -0
- data/lib/visual_width/string_refine.rb +13 -0
- data/lib/visual_width/version.rb +3 -0
- data/lib/visual_width.rb +53 -0
- data/spec/visual_width/string_refine_spec.rb +19 -0
- data/spec/visual_width_spec.rb +90 -0
- data/tool/benchmark.rb +21 -0
- data/tool/east-asian-width.pl +113 -0
- data/visual_width.gemspec +23 -0
- metadata +92 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 05cf7a6f8d1cb732fa2f35fabbe3a495c727e4d3
|
4
|
+
data.tar.gz: 47a372ba9ede858b3cf1c1f697c6476027ecfa4f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3077e27f95bf06842b167e819396557fe3337c48bed7f520e1225da0f08918004120676ec78e7fa1188f1c610628c3dc557d67af14ddb86616bded777ce90cb6
|
7
|
+
data.tar.gz: 7150d1b9c1f9fe84109098848d98f1b0da45cf56a2125b011b974a4ff8270a8f251077b508633245d73a78e85dc1faeaba85d3307f906bb91078660698f428df
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Changes
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Fuji, Goro
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# VisualWidth [![Build Status](https://travis-ci.org/gfx/visual_width.rb.png?branch=master)](https://travis-ci.org/gfx/visual_width.rb)
|
2
|
+
|
3
|
+
This gem handles Unicode East Asian Width:
|
4
|
+
|
5
|
+
* http://www.unicode.org/reports/tr11/
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'visual_width'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install visual_width
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require 'visual_width'
|
25
|
+
|
26
|
+
p VisualWidth.count("こんにちは") # => 10
|
27
|
+
p VisualWidth.count("abcdefghij") # => 10
|
28
|
+
|
29
|
+
p VisualWidth.truncate("恋すてふ 我が名はまだき 立ちにけり 人知れずこそ 思ひそめしか", 20) # => "恋すてふ 我が名は..."
|
30
|
+
```
|
31
|
+
|
32
|
+
Each method can take `east_asian: false` to tell it is not in an East Asian context, regarding ambiguous characters as half-width.
|
33
|
+
|
34
|
+
See [Ambiguous Characters](http://www.unicode.org/reports/tr11/#Ambiguous) in the report.
|
35
|
+
|
36
|
+
## Contributing
|
37
|
+
|
38
|
+
1. Fork it
|
39
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
40
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
41
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
42
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
|
6
|
+
task :default => :spec
|
7
|
+
|
8
|
+
task :data do
|
9
|
+
sh "tool/east-asian-width.pl > lib/visual_width/data.rb"
|
10
|
+
end
|
11
|
+
|
12
|
+
task :bench do
|
13
|
+
sh "ruby -Ilib tool/benchmark.rb"
|
14
|
+
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
# THIS FILE IS AUTOMATICALLY GENERATED.
|
2
|
+
# DO NOT EDIT IT!
|
3
|
+
#
|
4
|
+
# Unicode version 6.3.0 of EastAsianWidth characters
|
5
|
+
#
|
6
|
+
# see http://www.unicode.org/reports/tr11/ for specification
|
7
|
+
# see http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt for data
|
8
|
+
|
9
|
+
module VisualWidth
|
10
|
+
Wide = '[' +
|
11
|
+
'\u{1100}-\u{115F}' +
|
12
|
+
'\u{2329}-\u{232A}' +
|
13
|
+
'\u{2E80}-\u{2FFF}' +
|
14
|
+
'\u{3001}-\u{303E}' +
|
15
|
+
'\u{3041}-\u{3247}' +
|
16
|
+
'\u{3250}-\u{4DBF}' +
|
17
|
+
'\u{4E00}-\u{A4CF}' +
|
18
|
+
'\u{A960}-\u{A97F}' +
|
19
|
+
'\u{AC00}-\u{D7AF}' +
|
20
|
+
'\u{F900}-\u{FAFF}' +
|
21
|
+
'\u{FE10}-\u{FE1F}' +
|
22
|
+
'\u{FE30}-\u{FE6F}' +
|
23
|
+
'\u{1B000}-\u{1CFFF}' +
|
24
|
+
'\u{1F200}-\u{1F2FF}' +
|
25
|
+
'\u{20000}-\u{E0000}' +
|
26
|
+
']'
|
27
|
+
|
28
|
+
Fullwide = '[' +
|
29
|
+
'\u{3000}' +
|
30
|
+
'\u{FF01}-\u{FF60}' +
|
31
|
+
'\u{FFE0}-\u{FFE7}' +
|
32
|
+
']'
|
33
|
+
|
34
|
+
Ambiguous = '[' +
|
35
|
+
'\u{00A1}' +
|
36
|
+
'\u{00A4}' +
|
37
|
+
'\u{00A7}-\u{00A8}' +
|
38
|
+
'\u{00AA}' +
|
39
|
+
'\u{00AD}-\u{00AE}' +
|
40
|
+
'\u{00B0}-\u{00B4}' +
|
41
|
+
'\u{00B6}-\u{00BA}' +
|
42
|
+
'\u{00BC}-\u{00BF}' +
|
43
|
+
'\u{00C6}' +
|
44
|
+
'\u{00D0}' +
|
45
|
+
'\u{00D7}-\u{00D8}' +
|
46
|
+
'\u{00DE}-\u{00E1}' +
|
47
|
+
'\u{00E6}' +
|
48
|
+
'\u{00E8}-\u{00EA}' +
|
49
|
+
'\u{00EC}-\u{00ED}' +
|
50
|
+
'\u{00F0}' +
|
51
|
+
'\u{00F2}-\u{00F3}' +
|
52
|
+
'\u{00F7}-\u{00FA}' +
|
53
|
+
'\u{00FC}' +
|
54
|
+
'\u{00FE}' +
|
55
|
+
'\u{0101}' +
|
56
|
+
'\u{0111}' +
|
57
|
+
'\u{0113}' +
|
58
|
+
'\u{011B}' +
|
59
|
+
'\u{0126}-\u{0127}' +
|
60
|
+
'\u{012B}' +
|
61
|
+
'\u{0131}-\u{0133}' +
|
62
|
+
'\u{0138}' +
|
63
|
+
'\u{013F}-\u{0142}' +
|
64
|
+
'\u{0144}' +
|
65
|
+
'\u{0148}-\u{014B}' +
|
66
|
+
'\u{014D}' +
|
67
|
+
'\u{0152}-\u{0153}' +
|
68
|
+
'\u{0166}-\u{0167}' +
|
69
|
+
'\u{016B}' +
|
70
|
+
'\u{01CE}' +
|
71
|
+
'\u{01D0}' +
|
72
|
+
'\u{01D2}' +
|
73
|
+
'\u{01D4}' +
|
74
|
+
'\u{01D6}' +
|
75
|
+
'\u{01D8}' +
|
76
|
+
'\u{01DA}' +
|
77
|
+
'\u{01DC}' +
|
78
|
+
'\u{0251}' +
|
79
|
+
'\u{0261}' +
|
80
|
+
'\u{02C4}' +
|
81
|
+
'\u{02C7}' +
|
82
|
+
'\u{02C9}-\u{02CB}' +
|
83
|
+
'\u{02CD}' +
|
84
|
+
'\u{02D0}' +
|
85
|
+
'\u{02D8}-\u{02DB}' +
|
86
|
+
'\u{02DD}' +
|
87
|
+
'\u{02DF}' +
|
88
|
+
'\u{0300}-\u{036F}' +
|
89
|
+
'\u{0391}-\u{03A9}' +
|
90
|
+
'\u{03B1}-\u{03C1}' +
|
91
|
+
'\u{03C3}-\u{03C9}' +
|
92
|
+
'\u{0401}' +
|
93
|
+
'\u{0410}-\u{044F}' +
|
94
|
+
'\u{0451}' +
|
95
|
+
'\u{2010}' +
|
96
|
+
'\u{2013}-\u{2016}' +
|
97
|
+
'\u{2018}-\u{2019}' +
|
98
|
+
'\u{201C}-\u{201D}' +
|
99
|
+
'\u{2020}-\u{2022}' +
|
100
|
+
'\u{2024}-\u{2027}' +
|
101
|
+
'\u{2030}' +
|
102
|
+
'\u{2032}-\u{2033}' +
|
103
|
+
'\u{2035}' +
|
104
|
+
'\u{203B}' +
|
105
|
+
'\u{203E}' +
|
106
|
+
'\u{2074}' +
|
107
|
+
'\u{207F}' +
|
108
|
+
'\u{2081}-\u{2084}' +
|
109
|
+
'\u{20AC}' +
|
110
|
+
'\u{2103}' +
|
111
|
+
'\u{2105}' +
|
112
|
+
'\u{2109}' +
|
113
|
+
'\u{2113}' +
|
114
|
+
'\u{2116}' +
|
115
|
+
'\u{2121}-\u{2122}' +
|
116
|
+
'\u{2126}' +
|
117
|
+
'\u{212B}' +
|
118
|
+
'\u{2153}-\u{2154}' +
|
119
|
+
'\u{215B}-\u{215E}' +
|
120
|
+
'\u{2160}-\u{216B}' +
|
121
|
+
'\u{2170}-\u{2179}' +
|
122
|
+
'\u{2189}-\u{2199}' +
|
123
|
+
'\u{21B8}-\u{21B9}' +
|
124
|
+
'\u{21D2}' +
|
125
|
+
'\u{21D4}' +
|
126
|
+
'\u{21E7}' +
|
127
|
+
'\u{2200}' +
|
128
|
+
'\u{2202}-\u{2203}' +
|
129
|
+
'\u{2207}-\u{2208}' +
|
130
|
+
'\u{220B}' +
|
131
|
+
'\u{220F}' +
|
132
|
+
'\u{2211}' +
|
133
|
+
'\u{2215}' +
|
134
|
+
'\u{221A}' +
|
135
|
+
'\u{221D}-\u{2220}' +
|
136
|
+
'\u{2223}' +
|
137
|
+
'\u{2225}' +
|
138
|
+
'\u{2227}-\u{222C}' +
|
139
|
+
'\u{222E}' +
|
140
|
+
'\u{2234}-\u{2237}' +
|
141
|
+
'\u{223C}-\u{223D}' +
|
142
|
+
'\u{2248}' +
|
143
|
+
'\u{224C}' +
|
144
|
+
'\u{2252}' +
|
145
|
+
'\u{2260}-\u{2261}' +
|
146
|
+
'\u{2264}-\u{2267}' +
|
147
|
+
'\u{226A}-\u{226B}' +
|
148
|
+
'\u{226E}-\u{226F}' +
|
149
|
+
'\u{2282}-\u{2283}' +
|
150
|
+
'\u{2286}-\u{2287}' +
|
151
|
+
'\u{2295}' +
|
152
|
+
'\u{2299}' +
|
153
|
+
'\u{22A5}' +
|
154
|
+
'\u{22BF}' +
|
155
|
+
'\u{2312}' +
|
156
|
+
'\u{2460}-\u{24E9}' +
|
157
|
+
'\u{24EB}-\u{254B}' +
|
158
|
+
'\u{2550}-\u{2573}' +
|
159
|
+
'\u{2580}-\u{258F}' +
|
160
|
+
'\u{2592}-\u{2595}' +
|
161
|
+
'\u{25A0}-\u{25A1}' +
|
162
|
+
'\u{25A3}-\u{25A9}' +
|
163
|
+
'\u{25B2}-\u{25B3}' +
|
164
|
+
'\u{25B6}-\u{25B7}' +
|
165
|
+
'\u{25BC}-\u{25BD}' +
|
166
|
+
'\u{25C0}-\u{25C1}' +
|
167
|
+
'\u{25C6}-\u{25C8}' +
|
168
|
+
'\u{25CB}' +
|
169
|
+
'\u{25CE}-\u{25D1}' +
|
170
|
+
'\u{25E2}-\u{25E5}' +
|
171
|
+
'\u{25EF}' +
|
172
|
+
'\u{2605}-\u{2606}' +
|
173
|
+
'\u{2609}' +
|
174
|
+
'\u{260E}-\u{260F}' +
|
175
|
+
'\u{2614}-\u{2615}' +
|
176
|
+
'\u{261C}' +
|
177
|
+
'\u{261E}' +
|
178
|
+
'\u{2640}' +
|
179
|
+
'\u{2642}' +
|
180
|
+
'\u{2660}-\u{2661}' +
|
181
|
+
'\u{2663}-\u{2665}' +
|
182
|
+
'\u{2667}-\u{266A}' +
|
183
|
+
'\u{266C}-\u{266D}' +
|
184
|
+
'\u{266F}' +
|
185
|
+
'\u{269E}-\u{269F}' +
|
186
|
+
'\u{26BE}-\u{26BF}' +
|
187
|
+
'\u{26C4}-\u{26CD}' +
|
188
|
+
'\u{26CF}-\u{26E1}' +
|
189
|
+
'\u{26E3}' +
|
190
|
+
'\u{26E8}-\u{2700}' +
|
191
|
+
'\u{273D}' +
|
192
|
+
'\u{2757}' +
|
193
|
+
'\u{2776}-\u{277F}' +
|
194
|
+
'\u{2B55}-\u{2BFF}' +
|
195
|
+
'\u{3248}-\u{324F}' +
|
196
|
+
'\u{E000}-\u{F8FF}' +
|
197
|
+
'\u{FE00}-\u{FE0F}' +
|
198
|
+
'\u{FFFD}-\u{FFFF}' +
|
199
|
+
'\u{1F100}-\u{1F12D}' +
|
200
|
+
'\u{1F130}-\u{1F169}' +
|
201
|
+
'\u{1F170}-\u{1F1E5}' +
|
202
|
+
']'
|
203
|
+
end
|
204
|
+
|
data/lib/visual_width.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require "visual_width/version"
|
2
|
+
require "visual_width/data"
|
3
|
+
|
4
|
+
module VisualWidth
|
5
|
+
EAST_ASIAN = true
|
6
|
+
|
7
|
+
@@c1 = /( (?:#{Fullwide} | #{Wide} | #{Ambiguous})+ )/x
|
8
|
+
@@c0 = /( (?:#{Fullwide} | #{Wide} )+ )/x
|
9
|
+
|
10
|
+
@@t1 = /( (?:#{Fullwide} | #{Wide} | #{Ambiguous}) ) | ./x
|
11
|
+
@@t0 = /( (?:#{Fullwide} | #{Wide} ) ) | ./x
|
12
|
+
|
13
|
+
module_function
|
14
|
+
|
15
|
+
def count(str, east_asian: EAST_ASIAN)
|
16
|
+
rx = east_asian ? @@c1 : @@c0
|
17
|
+
full_width = 0
|
18
|
+
str.scan(rx) do |w,|
|
19
|
+
full_width += w.length
|
20
|
+
end
|
21
|
+
(full_width * 2) + (str.length - full_width)
|
22
|
+
end
|
23
|
+
|
24
|
+
def truncate(str, max_length, omission: '...', east_asian: EAST_ASIAN)
|
25
|
+
max = max_length - omission.length
|
26
|
+
rx = east_asian ? @@t1 : @@t0
|
27
|
+
pos = 0
|
28
|
+
width = 0
|
29
|
+
str.scan(rx) do |wide,|
|
30
|
+
if wide
|
31
|
+
width += 2
|
32
|
+
else
|
33
|
+
width += 1
|
34
|
+
end
|
35
|
+
|
36
|
+
if width > max
|
37
|
+
break
|
38
|
+
end
|
39
|
+
|
40
|
+
pos += 1
|
41
|
+
|
42
|
+
if width == max
|
43
|
+
break
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
if width < str.length
|
48
|
+
str.slice(0, pos) + omission
|
49
|
+
else
|
50
|
+
str
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'rspec'
|
2
|
+
|
3
|
+
require 'visual_width/string_refine'
|
4
|
+
|
5
|
+
using VisualWidth
|
6
|
+
|
7
|
+
describe VisualWidth do
|
8
|
+
context "String#width" do
|
9
|
+
it do
|
10
|
+
expect("こんにちは".width).to eql(10)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
context "String#width" do
|
15
|
+
it do
|
16
|
+
expect("こんにちは".truncate(5)).to eql('こ...')
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'rspec'
|
2
|
+
|
3
|
+
require 'visual_width'
|
4
|
+
require 'visual_width/string_ext'
|
5
|
+
|
6
|
+
describe VisualWidth do
|
7
|
+
context ".count" do
|
8
|
+
it "counts Half characters" do
|
9
|
+
expect(VisualWidth.count("foo")).to eql(3)
|
10
|
+
end
|
11
|
+
it "counts Wide characters" do
|
12
|
+
expect(VisualWidth.count("こんにちは")).to eql(10)
|
13
|
+
end
|
14
|
+
|
15
|
+
it "counts Half characters" do
|
16
|
+
expect(VisualWidth.count("コンニチハ")).to eql(5)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "counts Ambiguous characters" do
|
20
|
+
expect(VisualWidth.count("αβ")).to eql(4)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
context ".counnt with east_asian: false" do
|
25
|
+
it "counts Halfwide characters" do
|
26
|
+
expect(VisualWidth.count("foo", east_asian: false)).to eql(3)
|
27
|
+
end
|
28
|
+
it "counts Wide characters" do
|
29
|
+
expect(VisualWidth.count("こんにちは", east_asian: false)).to eql(10)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "counts Half characters" do
|
33
|
+
expect(VisualWidth.count("コンニチハ", east_asian: false)).to eql(5)
|
34
|
+
end
|
35
|
+
|
36
|
+
it "counts Ambiguous characters" do
|
37
|
+
expect(VisualWidth.count("αβ", east_asian: false)).to eql(2)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context ".truncate" do
|
42
|
+
it "does nothing if str is short enough" do
|
43
|
+
expect(VisualWidth.truncate("foo", 20)).to eql("foo")
|
44
|
+
end
|
45
|
+
|
46
|
+
it "truncates str, adding '...'" do
|
47
|
+
str = "The quick brown fox jumps over the lazy dog."
|
48
|
+
s = VisualWidth.truncate(str, 15)
|
49
|
+
expect(s.visual_width).to be <= 15
|
50
|
+
expect(s).to eql('The quick br...')
|
51
|
+
end
|
52
|
+
|
53
|
+
it "truncates str, adding '(snip)'" do
|
54
|
+
str = "The quick brown fox jumps over the lazy dog."
|
55
|
+
s = VisualWidth.truncate(str, 15, omission: '(snip)')
|
56
|
+
expect(s.visual_width).to be <= 15
|
57
|
+
expect(s).to eql('The quick(snip)')
|
58
|
+
end
|
59
|
+
|
60
|
+
it "truncates Wide characters" do
|
61
|
+
str = "くにざかいのながいトンネルを抜けるとゆきぐにであった"
|
62
|
+
s = VisualWidth.truncate(str, 20)
|
63
|
+
expect(s.visual_width).to be <= 20
|
64
|
+
expect(s).to eql('くにざかいのなが...')
|
65
|
+
end
|
66
|
+
|
67
|
+
it "truncates mixed character types" do
|
68
|
+
str = "くにざかいの ながい トンネルを 抜けると ゆきぐにであった"
|
69
|
+
s = VisualWidth.truncate(str, 20)
|
70
|
+
expect(s.visual_width).to be <= 20
|
71
|
+
expect(s).to eql('くにざかいの なが...')
|
72
|
+
end
|
73
|
+
|
74
|
+
it "truncates Ambiguous characters" do
|
75
|
+
str = "αβγδεζηθικλμνξοπρστυφχψω"
|
76
|
+
s = VisualWidth.truncate(str, 10)
|
77
|
+
expect(s.visual_width).to be <= 10
|
78
|
+
expect(s).to eql('αβγ...')
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
context ".truncate with east_asian: false" do
|
83
|
+
it "truncates str with Ambiguous characters" do
|
84
|
+
str = "αβγδεζηθικλμνξοπρστυφχψω"
|
85
|
+
s = VisualWidth.truncate(str, 10, east_asian: false)
|
86
|
+
expect(s.visual_width(east_asian: false)).to be <= 10
|
87
|
+
expect(s).to eql('αβγδεζη...')
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
data/tool/benchmark.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'visual_width'
|
5
|
+
require 'unicode/display_width'
|
6
|
+
|
7
|
+
str = "foo あいうえお bar αβγδε baz"
|
8
|
+
|
9
|
+
VisualWidth.count(str) == str.display_width(2)
|
10
|
+
|
11
|
+
[str, str * 5, str * 10].each do |str|
|
12
|
+
puts "\n", "for length #{str.length}:"
|
13
|
+
Benchmark.bmbm do |x|
|
14
|
+
x.report("visual_width") do
|
15
|
+
1000.times { VisualWidth.count(str) }
|
16
|
+
end
|
17
|
+
x.report("display_width") do
|
18
|
+
1000.times { str.display_width(2) }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
# vim: set expandtab shiftwidth=2 tabstop=2:
|
3
|
+
use 5.14.0;
|
4
|
+
use strict;
|
5
|
+
use warnings;
|
6
|
+
use LWP::Simple qw(get);
|
7
|
+
use Text::Xslate;
|
8
|
+
use Log::Minimal qw(infof);
|
9
|
+
|
10
|
+
my $SPEC = "http://www.unicode.org/reports/tr11/";
|
11
|
+
my $DATA = "http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt";
|
12
|
+
|
13
|
+
my %params = (
|
14
|
+
spec_uri => $SPEC,
|
15
|
+
data_uri => $DATA,
|
16
|
+
|
17
|
+
F => [],
|
18
|
+
W => [],
|
19
|
+
H => [],
|
20
|
+
N => [],
|
21
|
+
Na => [],
|
22
|
+
A => [],
|
23
|
+
);
|
24
|
+
|
25
|
+
infof 'get data from %s', $DATA;
|
26
|
+
my $data = get($DATA);
|
27
|
+
|
28
|
+
my($version) = $data =~ /EastAsianWidth-([\d\.]+)\.txt/;
|
29
|
+
$params{version} = $version;
|
30
|
+
|
31
|
+
infof 'parse character tratis';
|
32
|
+
my @traits;
|
33
|
+
for my $line(split /\n/, $data) {
|
34
|
+
$line =~ s/\#.*//;
|
35
|
+
if ($line =~ m{ (?<code_first> \w+) \.\.(?<code_last> \w+) ; (?<trait> \w+) }xms) {
|
36
|
+
for my $code(hex($+{code_first}) .. hex($+{code_last})) {
|
37
|
+
$traits[$code] = $+{trait};
|
38
|
+
}
|
39
|
+
}
|
40
|
+
elsif ($line =~ m{ (?<code> \w+) ; (?<trait> \w+) }xms) {
|
41
|
+
$traits[hex $+{code}] = $+{trait};
|
42
|
+
}
|
43
|
+
}
|
44
|
+
infof 'make ranges from traits';
|
45
|
+
|
46
|
+
my $c = 0;
|
47
|
+
my $trait = "";
|
48
|
+
my $first = -1;
|
49
|
+
foreach my $t(@traits) {
|
50
|
+
next unless $t;
|
51
|
+
|
52
|
+
if ($t ne $trait) {
|
53
|
+
if ($trait ne "" && $first != -1) {
|
54
|
+
push $params{$trait}, [ $first, $c - 1 ];
|
55
|
+
}
|
56
|
+
|
57
|
+
$first = $c;
|
58
|
+
$trait = $t;
|
59
|
+
}
|
60
|
+
}
|
61
|
+
continue {
|
62
|
+
++$c;
|
63
|
+
}
|
64
|
+
|
65
|
+
infof 'render it';
|
66
|
+
|
67
|
+
my $xslate = Text::Xslate->new(
|
68
|
+
type => "text",
|
69
|
+
cache => 0,
|
70
|
+
type => 'text',
|
71
|
+
module => ['Text::Xslate::Bridge::Star'],
|
72
|
+
);
|
73
|
+
|
74
|
+
print $xslate->render_string(do {
|
75
|
+
local $/;
|
76
|
+
<DATA>;
|
77
|
+
}, \%params);
|
78
|
+
|
79
|
+
__DATA__
|
80
|
+
# THIS FILE IS AUTOMATICALLY GENERATED.
|
81
|
+
# DO NOT EDIT IT!
|
82
|
+
#
|
83
|
+
# Unicode version <: $version :> of EastAsianWidth characters
|
84
|
+
#
|
85
|
+
# see <: $spec_uri :> for specification
|
86
|
+
# see <: $data_uri :> for data
|
87
|
+
|
88
|
+
module VisualWidth
|
89
|
+
<:-
|
90
|
+
macro range -> $r {
|
91
|
+
if $r[0] == $r[1] {
|
92
|
+
sprintf('\u{%04X}', $r[0]);
|
93
|
+
}
|
94
|
+
else {
|
95
|
+
sprintf('\u{%04X}-\u{%04X}', $r[0], $r[1]);
|
96
|
+
}
|
97
|
+
}
|
98
|
+
-:>
|
99
|
+
: macro make_conditions -> $t, $name {
|
100
|
+
<: $name :> = '[' +
|
101
|
+
: for $t -> $range {
|
102
|
+
'<: range($range) :>' +
|
103
|
+
: }
|
104
|
+
']'
|
105
|
+
: }
|
106
|
+
|
107
|
+
: make_conditions($W, "Wide");
|
108
|
+
|
109
|
+
: make_conditions($F, "Fullwide");
|
110
|
+
|
111
|
+
: make_conditions($A, "Ambiguous");
|
112
|
+
end
|
113
|
+
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'visual_width/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "visual_width"
|
8
|
+
spec.version = VisualWidth::VERSION
|
9
|
+
spec.authors = ["Fuji, Goro (gfx)"]
|
10
|
+
spec.email = ["gfuji@cpan.org"]
|
11
|
+
spec.description = %q{Deals with East Asian Width defined in Unicode}
|
12
|
+
spec.summary = %q{Ruby Implementation of East Asian Width}
|
13
|
+
spec.homepage = "https://github.com/gfx/visual_width.rb"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: visual_width
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Fuji, Goro (gfx)
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-10-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: Deals with East Asian Width defined in Unicode
|
42
|
+
email:
|
43
|
+
- gfuji@cpan.org
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- .gitignore
|
49
|
+
- .rspec
|
50
|
+
- Changes
|
51
|
+
- Gemfile
|
52
|
+
- LICENSE.txt
|
53
|
+
- README.md
|
54
|
+
- Rakefile
|
55
|
+
- lib/visual_width.rb
|
56
|
+
- lib/visual_width/data.rb
|
57
|
+
- lib/visual_width/string_ext.rb
|
58
|
+
- lib/visual_width/string_refine.rb
|
59
|
+
- lib/visual_width/version.rb
|
60
|
+
- spec/visual_width/string_refine_spec.rb
|
61
|
+
- spec/visual_width_spec.rb
|
62
|
+
- tool/benchmark.rb
|
63
|
+
- tool/east-asian-width.pl
|
64
|
+
- visual_width.gemspec
|
65
|
+
homepage: https://github.com/gfx/visual_width.rb
|
66
|
+
licenses:
|
67
|
+
- MIT
|
68
|
+
metadata: {}
|
69
|
+
post_install_message:
|
70
|
+
rdoc_options: []
|
71
|
+
require_paths:
|
72
|
+
- lib
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
requirements: []
|
84
|
+
rubyforge_project:
|
85
|
+
rubygems_version: 2.0.3
|
86
|
+
signing_key:
|
87
|
+
specification_version: 4
|
88
|
+
summary: Ruby Implementation of East Asian Width
|
89
|
+
test_files:
|
90
|
+
- spec/visual_width/string_refine_spec.rb
|
91
|
+
- spec/visual_width_spec.rb
|
92
|
+
has_rdoc:
|