unicode-display_width 1.1.3 β 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.txt +7 -0
- data/README.md +18 -0
- data/Rakefile +1 -0
- data/data/display_width.marshal.gz +0 -0
- data/lib/unicode/display_width.rb +20 -1
- data/lib/unicode/display_width/constants.rb +1 -1
- data/lib/unicode/display_width/string_ext.rb +2 -2
- data/spec/display_width_spec.rb +23 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc469a91d5eda226bed21bce0669da6fa9e5d064
|
4
|
+
data.tar.gz: 5b376806fdd80debc0e1baa8694f0ef1eb697ca2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63022c8bd0776c973347dcb3ccbf0a3c79610134f20ea82fd0832b2143517e492bd56f11564a5639c8a88136bf402abde808ae90b0a79a4ea2ff14020f36d496
|
7
|
+
data.tar.gz: 6765c915c17a1cd7a13c9fad7a4fbc6621a99e8c119b148f4a10fd33b4c3b81cf45a2eed2a54fafc6f36c6d268650e68d67740d04190281568b5f87ebb57e9f1
|
data/CHANGELOG.txt
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
+
## 1.2.0
|
4
|
+
|
5
|
+
- Add zero-width codepoint ranges: U+2060..U+206F, U+FFF0..U+FFF8, U+E0000..U+E0FFF
|
6
|
+
- Add full-witdh codepoint ranges: U+3400..U+4DBF, U+4E00..U+9FFF, U+F900..U+FAFF, U+20000..U+2FFFD, U+30000..U+3FFFD
|
7
|
+
- Experimental emoji support using the [unicode-emoji](https://github.com/janlelis/unicode-emoji) gem
|
8
|
+
- Fix minor bug in index compression scheme
|
9
|
+
|
3
10
|
## 1.1.3
|
4
11
|
|
5
12
|
- Fix that non-UTF-8 encodings do not throw errors, patch by @windwiny
|
data/README.md
CHANGED
@@ -22,7 +22,9 @@ X | (user defined) | Overwrites any other values
|
|
22
22
|
3 | `"\u{2E3B}"` | THREE-EM DASH
|
23
23
|
0 | General Categories: Mn, Me, Cf (non-arabic) | Excludes ARABIC format characters
|
24
24
|
0 | `"\u{1160}".."\u{11FF}"` | HANGUL JUNGSEONG
|
25
|
+
0 | `"\u{2060}".."\u{206F}"`, `"\u{FFF0}".."\u{FFF8}"`, `"\u{E0000}".."\u{E0FFF}"` | Ignorable ranges
|
25
26
|
2 | East Asian Width: F, W | Full-width characters
|
27
|
+
2 | `"\u{3400}"`..`"\u{4DBF}"`, `"\u{4E00}"`..`"\u{9FFF}"`, `"\u{F900}"`..`"\u{FAFF}"`, `"\u{20000}"`..`"\u{2FFFD}"`, `"\u{30000}"`..`"\u{3FFFD}"` | Full-width ranges
|
26
28
|
1 or 2 | East Asian Width: A | Ambiguous characters, user defined, default: 1
|
27
29
|
1 | All other codepoints | -
|
28
30
|
|
@@ -62,6 +64,22 @@ You can overwrite how to handle specific code points by passing a hash (or even
|
|
62
64
|
Unicode::DisplayWidth.of("a\tb", 1, 0x09 => 10)) # => 12
|
63
65
|
```
|
64
66
|
|
67
|
+
### Emoji Support
|
68
|
+
|
69
|
+
Experimental emoji support is included. It will adjust the string's size for modifier and zero-width joiner sequences. You will need to add the [unicode-emoji](https://github.com/janlelis/unicode-emoji) gem to your Gemfile:
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
gem 'unicode-display_width'
|
73
|
+
gem 'unicode-emoji'
|
74
|
+
```
|
75
|
+
|
76
|
+
You can then activate the emoji string width adjustments by passing `emoji: true` as fourth parameter:
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
Unicode::DisplayWidth.of "π€Ύπ½ββοΈ" # => 5
|
80
|
+
Unicode::DisplayWidth.of "π€Ύπ½ββοΈ", 1, {}, emoji: true # => 2
|
81
|
+
```
|
82
|
+
|
65
83
|
### Usage with String Extension
|
66
84
|
|
67
85
|
Activated by default. Will be deactivated in version 2.0:
|
data/Rakefile
CHANGED
Binary file
|
@@ -5,7 +5,7 @@ module Unicode
|
|
5
5
|
module DisplayWidth
|
6
6
|
DEPTHS = [0x10000, 0x1000, 0x100, 0x10].freeze
|
7
7
|
|
8
|
-
def self.of(string, ambiguous = 1, overwrite = {})
|
8
|
+
def self.of(string, ambiguous = 1, overwrite = {}, options = {})
|
9
9
|
res = string.codepoints.inject(0){ |total_width, codepoint|
|
10
10
|
index_or_value = INDEX
|
11
11
|
codepoint_depth_offset = codepoint
|
@@ -19,8 +19,27 @@ module Unicode
|
|
19
19
|
total_width + (overwrite[codepoint] || width || 1)
|
20
20
|
}
|
21
21
|
|
22
|
+
res -= emoji_extra_width_of(string) if options[:emoji]
|
22
23
|
res < 0 ? 0 : res
|
23
24
|
end
|
25
|
+
|
26
|
+
def self.emoji_extra_width_of(string, ambiguous = 1, overwrite = {}, _ = {})
|
27
|
+
require "unicode/emoji"
|
28
|
+
|
29
|
+
extra_width = 0
|
30
|
+
modifier_regex = /[#{ Unicode::Emoji::EMOJI_MODIFIERS.pack("U*") }]/
|
31
|
+
zwj_regex = /(?<=#{ [Unicode::Emoji::ZWJ].pack("U") })./
|
32
|
+
|
33
|
+
string.scan(Unicode::Emoji::REGEX){ |emoji|
|
34
|
+
extra_width += 2 * emoji.match(modifier_regex).size
|
35
|
+
|
36
|
+
emoji.scan(zwj_regex){ |zwj_succ|
|
37
|
+
extra_width += self.of(zwj_succ, ambiguous, overwrite)
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
extra_width
|
42
|
+
end
|
24
43
|
end
|
25
44
|
end
|
26
45
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Unicode
|
2
2
|
module DisplayWidth
|
3
|
-
VERSION = '1.
|
3
|
+
VERSION = '1.2.0'
|
4
4
|
UNICODE_VERSION = "9.0.0".freeze
|
5
5
|
DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + '/../../../data/').freeze
|
6
6
|
INDEX_FILENAME = (DATA_DIRECTORY + '/display_width.marshal.gz').freeze
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require_relative '../display_width' unless defined? Unicode::DisplayWidth
|
2
2
|
|
3
3
|
class String
|
4
|
-
def display_width(ambiguous = 1, overwrite = {})
|
5
|
-
Unicode::DisplayWidth.of(self, ambiguous, overwrite)
|
4
|
+
def display_width(ambiguous = 1, overwrite = {}, options = {})
|
5
|
+
Unicode::DisplayWidth.of(self, ambiguous, overwrite, options)
|
6
6
|
end
|
7
7
|
|
8
8
|
def display_size(*args)
|
data/spec/display_width_spec.rb
CHANGED
@@ -1,6 +1,4 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'unicode/display_width'
|
1
|
+
require_relative '../lib/unicode/display_width'
|
4
2
|
|
5
3
|
describe 'Unicode::DisplayWidth.of' do
|
6
4
|
describe '[east asian width]' do
|
@@ -12,6 +10,10 @@ describe 'Unicode::DisplayWidth.of' do
|
|
12
10
|
expect( 'δΈ'.display_width ).to eq 2
|
13
11
|
end
|
14
12
|
|
13
|
+
it 'returns 2 for W (which are currently unassigned)' do
|
14
|
+
expect( "\u{3FFFD}".display_width ).to eq 2
|
15
|
+
end
|
16
|
+
|
15
17
|
it 'returns 1 for N' do
|
16
18
|
expect( 'Γ'.display_width ).to eq 1
|
17
19
|
end
|
@@ -53,6 +55,18 @@ describe 'Unicode::DisplayWidth.of' do
|
|
53
55
|
it 'returns 0 for HANGUL JUNGSEONG chars' do
|
54
56
|
expect( 'α
'.display_width ).to eq 0
|
55
57
|
end
|
58
|
+
|
59
|
+
it 'returns 0 for U+2060..U+206F' do
|
60
|
+
expect( "\u{2060}".display_width ).to eq 0
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'returns 0 for U+FFF0..U+FFF8' do
|
64
|
+
expect( "\u{FFF0}".display_width ).to eq 0
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'returns 0 for U+E0000..U+E0FFF' do
|
68
|
+
expect( "\u{E0000}".display_width ).to eq 0
|
69
|
+
end
|
56
70
|
end
|
57
71
|
|
58
72
|
describe '[special characters]' do
|
@@ -129,4 +143,10 @@ describe 'Unicode::DisplayWidth.of' do
|
|
129
143
|
expect( 'Γ'.encode("UTF-16LE").display_width ).to eq 1
|
130
144
|
end
|
131
145
|
end
|
146
|
+
|
147
|
+
describe '[emoji]' do
|
148
|
+
it 'does not count modifiers and zjw sequences for valid emoji' do
|
149
|
+
expect( "π€Ύπ½ββοΈ".display_width(1, {}, emoji: true) ).to eq 2
|
150
|
+
end
|
151
|
+
end
|
132
152
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-display_width
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-04-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|