unicode-display_width 1.1.3 β 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.txt +7 -0
- data/README.md +18 -0
- data/Rakefile +1 -0
- data/data/display_width.marshal.gz +0 -0
- data/lib/unicode/display_width.rb +20 -1
- data/lib/unicode/display_width/constants.rb +1 -1
- data/lib/unicode/display_width/string_ext.rb +2 -2
- data/spec/display_width_spec.rb +23 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc469a91d5eda226bed21bce0669da6fa9e5d064
|
4
|
+
data.tar.gz: 5b376806fdd80debc0e1baa8694f0ef1eb697ca2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63022c8bd0776c973347dcb3ccbf0a3c79610134f20ea82fd0832b2143517e492bd56f11564a5639c8a88136bf402abde808ae90b0a79a4ea2ff14020f36d496
|
7
|
+
data.tar.gz: 6765c915c17a1cd7a13c9fad7a4fbc6621a99e8c119b148f4a10fd33b4c3b81cf45a2eed2a54fafc6f36c6d268650e68d67740d04190281568b5f87ebb57e9f1
|
data/CHANGELOG.txt
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
+
## 1.2.0
|
4
|
+
|
5
|
+
- Add zero-width codepoint ranges: U+2060..U+206F, U+FFF0..U+FFF8, U+E0000..U+E0FFF
|
6
|
+
- Add full-witdh codepoint ranges: U+3400..U+4DBF, U+4E00..U+9FFF, U+F900..U+FAFF, U+20000..U+2FFFD, U+30000..U+3FFFD
|
7
|
+
- Experimental emoji support using the [unicode-emoji](https://github.com/janlelis/unicode-emoji) gem
|
8
|
+
- Fix minor bug in index compression scheme
|
9
|
+
|
3
10
|
## 1.1.3
|
4
11
|
|
5
12
|
- Fix that non-UTF-8 encodings do not throw errors, patch by @windwiny
|
data/README.md
CHANGED
@@ -22,7 +22,9 @@ X | (user defined) | Overwrites any other values
|
|
22
22
|
3 | `"\u{2E3B}"` | THREE-EM DASH
|
23
23
|
0 | General Categories: Mn, Me, Cf (non-arabic) | Excludes ARABIC format characters
|
24
24
|
0 | `"\u{1160}".."\u{11FF}"` | HANGUL JUNGSEONG
|
25
|
+
0 | `"\u{2060}".."\u{206F}"`, `"\u{FFF0}".."\u{FFF8}"`, `"\u{E0000}".."\u{E0FFF}"` | Ignorable ranges
|
25
26
|
2 | East Asian Width: F, W | Full-width characters
|
27
|
+
2 | `"\u{3400}"`..`"\u{4DBF}"`, `"\u{4E00}"`..`"\u{9FFF}"`, `"\u{F900}"`..`"\u{FAFF}"`, `"\u{20000}"`..`"\u{2FFFD}"`, `"\u{30000}"`..`"\u{3FFFD}"` | Full-width ranges
|
26
28
|
1 or 2 | East Asian Width: A | Ambiguous characters, user defined, default: 1
|
27
29
|
1 | All other codepoints | -
|
28
30
|
|
@@ -62,6 +64,22 @@ You can overwrite how to handle specific code points by passing a hash (or even
|
|
62
64
|
Unicode::DisplayWidth.of("a\tb", 1, 0x09 => 10)) # => 12
|
63
65
|
```
|
64
66
|
|
67
|
+
### Emoji Support
|
68
|
+
|
69
|
+
Experimental emoji support is included. It will adjust the string's size for modifier and zero-width joiner sequences. You will need to add the [unicode-emoji](https://github.com/janlelis/unicode-emoji) gem to your Gemfile:
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
gem 'unicode-display_width'
|
73
|
+
gem 'unicode-emoji'
|
74
|
+
```
|
75
|
+
|
76
|
+
You can then activate the emoji string width adjustments by passing `emoji: true` as fourth parameter:
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
Unicode::DisplayWidth.of "π€Ύπ½ββοΈ" # => 5
|
80
|
+
Unicode::DisplayWidth.of "π€Ύπ½ββοΈ", 1, {}, emoji: true # => 2
|
81
|
+
```
|
82
|
+
|
65
83
|
### Usage with String Extension
|
66
84
|
|
67
85
|
Activated by default. Will be deactivated in version 2.0:
|
data/Rakefile
CHANGED
Binary file
|
@@ -5,7 +5,7 @@ module Unicode
|
|
5
5
|
module DisplayWidth
|
6
6
|
DEPTHS = [0x10000, 0x1000, 0x100, 0x10].freeze
|
7
7
|
|
8
|
-
def self.of(string, ambiguous = 1, overwrite = {})
|
8
|
+
def self.of(string, ambiguous = 1, overwrite = {}, options = {})
|
9
9
|
res = string.codepoints.inject(0){ |total_width, codepoint|
|
10
10
|
index_or_value = INDEX
|
11
11
|
codepoint_depth_offset = codepoint
|
@@ -19,8 +19,27 @@ module Unicode
|
|
19
19
|
total_width + (overwrite[codepoint] || width || 1)
|
20
20
|
}
|
21
21
|
|
22
|
+
res -= emoji_extra_width_of(string) if options[:emoji]
|
22
23
|
res < 0 ? 0 : res
|
23
24
|
end
|
25
|
+
|
26
|
+
def self.emoji_extra_width_of(string, ambiguous = 1, overwrite = {}, _ = {})
|
27
|
+
require "unicode/emoji"
|
28
|
+
|
29
|
+
extra_width = 0
|
30
|
+
modifier_regex = /[#{ Unicode::Emoji::EMOJI_MODIFIERS.pack("U*") }]/
|
31
|
+
zwj_regex = /(?<=#{ [Unicode::Emoji::ZWJ].pack("U") })./
|
32
|
+
|
33
|
+
string.scan(Unicode::Emoji::REGEX){ |emoji|
|
34
|
+
extra_width += 2 * emoji.match(modifier_regex).size
|
35
|
+
|
36
|
+
emoji.scan(zwj_regex){ |zwj_succ|
|
37
|
+
extra_width += self.of(zwj_succ, ambiguous, overwrite)
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
extra_width
|
42
|
+
end
|
24
43
|
end
|
25
44
|
end
|
26
45
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Unicode
|
2
2
|
module DisplayWidth
|
3
|
-
VERSION = '1.
|
3
|
+
VERSION = '1.2.0'
|
4
4
|
UNICODE_VERSION = "9.0.0".freeze
|
5
5
|
DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + '/../../../data/').freeze
|
6
6
|
INDEX_FILENAME = (DATA_DIRECTORY + '/display_width.marshal.gz').freeze
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require_relative '../display_width' unless defined? Unicode::DisplayWidth
|
2
2
|
|
3
3
|
class String
|
4
|
-
def display_width(ambiguous = 1, overwrite = {})
|
5
|
-
Unicode::DisplayWidth.of(self, ambiguous, overwrite)
|
4
|
+
def display_width(ambiguous = 1, overwrite = {}, options = {})
|
5
|
+
Unicode::DisplayWidth.of(self, ambiguous, overwrite, options)
|
6
6
|
end
|
7
7
|
|
8
8
|
def display_size(*args)
|
data/spec/display_width_spec.rb
CHANGED
@@ -1,6 +1,4 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'unicode/display_width'
|
1
|
+
require_relative '../lib/unicode/display_width'
|
4
2
|
|
5
3
|
describe 'Unicode::DisplayWidth.of' do
|
6
4
|
describe '[east asian width]' do
|
@@ -12,6 +10,10 @@ describe 'Unicode::DisplayWidth.of' do
|
|
12
10
|
expect( 'δΈ'.display_width ).to eq 2
|
13
11
|
end
|
14
12
|
|
13
|
+
it 'returns 2 for W (which are currently unassigned)' do
|
14
|
+
expect( "\u{3FFFD}".display_width ).to eq 2
|
15
|
+
end
|
16
|
+
|
15
17
|
it 'returns 1 for N' do
|
16
18
|
expect( 'Γ'.display_width ).to eq 1
|
17
19
|
end
|
@@ -53,6 +55,18 @@ describe 'Unicode::DisplayWidth.of' do
|
|
53
55
|
it 'returns 0 for HANGUL JUNGSEONG chars' do
|
54
56
|
expect( 'α
'.display_width ).to eq 0
|
55
57
|
end
|
58
|
+
|
59
|
+
it 'returns 0 for U+2060..U+206F' do
|
60
|
+
expect( "\u{2060}".display_width ).to eq 0
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'returns 0 for U+FFF0..U+FFF8' do
|
64
|
+
expect( "\u{FFF0}".display_width ).to eq 0
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'returns 0 for U+E0000..U+E0FFF' do
|
68
|
+
expect( "\u{E0000}".display_width ).to eq 0
|
69
|
+
end
|
56
70
|
end
|
57
71
|
|
58
72
|
describe '[special characters]' do
|
@@ -129,4 +143,10 @@ describe 'Unicode::DisplayWidth.of' do
|
|
129
143
|
expect( 'Γ'.encode("UTF-16LE").display_width ).to eq 1
|
130
144
|
end
|
131
145
|
end
|
146
|
+
|
147
|
+
describe '[emoji]' do
|
148
|
+
it 'does not count modifiers and zjw sequences for valid emoji' do
|
149
|
+
expect( "π€Ύπ½ββοΈ".display_width(1, {}, emoji: true) ).to eq 2
|
150
|
+
end
|
151
|
+
end
|
132
152
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-display_width
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-04-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|