unicode-display_width 2.4.0 → 2.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/MIT-LICENSE.txt +1 -1
- data/README.md +10 -1
- data/lib/unicode/display_width/constants.rb +1 -1
- data/lib/unicode/display_width/index.rb +20 -0
- data/lib/unicode/display_width.rb +64 -16
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0bcd900f031999ffa43dd6ef091b07b45d425b1ab04e6559f8c8e2c54e08710
|
4
|
+
data.tar.gz: ec3daad5e92107072f8f590d5f2217fd1213d7b25d6491bb3b20ee103f7a2087
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da2098a3b5d56518129b453aa88e113583403fefb37bb23eeb3f2ff3213426ea6a7076ab0a2f4c778f91a6cfbe3f98f59ea8dc6a4e37ff4bb6a1499536a5a4b9
|
7
|
+
data.tar.gz: 8559483daad47ca76757cf8701f09060bf66f99017a32d62b9c6f739794e1fcc15f2a4b9aae754252d7c8176571f18b590555ed3aa83748836e8e6a681bc7e10
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,19 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
+
## 2.4.2
|
4
|
+
|
5
|
+
More performance improvements:
|
6
|
+
|
7
|
+
- Optimize lookup of first 4096 codepoints
|
8
|
+
- Avoid overwrite lookup if no overwrites are set
|
9
|
+
|
10
|
+
## 2.4.1
|
11
|
+
|
12
|
+
- Improve general performance!
|
13
|
+
- Further improve performance for ASCII strings
|
14
|
+
|
15
|
+
*You should really upgrade - it's much faster now!*
|
16
|
+
|
3
17
|
## 2.4.0
|
4
18
|
- Improve performance for ASCII-only strings, by @fatkodima
|
5
19
|
- Require Ruby 2.4
|
data/MIT-LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -10,6 +10,12 @@ Old Rubies which might still work: **2.6**, **2.5**, **2.4**
|
|
10
10
|
|
11
11
|
For even older Rubies, use version 2.3.0 of this gem: **2.3**, **2.2**, **2.1**, **2.0**, **1.9**
|
12
12
|
|
13
|
+
## Version 2.4.2 — Performance Updates
|
14
|
+
|
15
|
+
**If you use this gem, you should really upgrade to 2.4.2. It's often 100x faster, sometimes even 1000x and more!**
|
16
|
+
|
17
|
+
This is possible because the gem now detects if you use very basic (and common) characters, like ASCII characters. Furthermore, the charachter width lookup code has been optimized, so even when full-width characters are involved, the gem is much faster now.
|
18
|
+
|
13
19
|
## Version 2.0 — Breaking Changes
|
14
20
|
|
15
21
|
Some features of this library were marked deprecated for a long time and have been removed with Version 2.0:
|
@@ -91,6 +97,9 @@ You can overwrite how to handle specific code points by passing a hash (or even
|
|
91
97
|
Unicode::DisplayWidth.of("a\tb", 1, "\t".ord => 10)) # => tab counted as 10, so result is 12
|
92
98
|
```
|
93
99
|
|
100
|
+
Please note that using overwrites disables some perfomance optimizations of this gem.
|
101
|
+
|
102
|
+
|
94
103
|
#### Emoji Support
|
95
104
|
|
96
105
|
Emoji width support is included, but in must be activated manually. It will adjust the string's size for modifier and zero-width joiner sequences. You also need to add the [unicode-emoji](https://github.com/janlelis/unicode-emoji) gem to your Gemfile:
|
@@ -156,7 +165,7 @@ See [unicode-x](https://github.com/janlelis/unicode-x) for more Unicode related
|
|
156
165
|
|
157
166
|
## Copyright & Info
|
158
167
|
|
159
|
-
- Copyright (c) 2011, 2015-
|
168
|
+
- Copyright (c) 2011, 2015-2023 Jan Lelis, https://janlelis.com, released under the MIT
|
160
169
|
license
|
161
170
|
- Early versions based on runpaint's unicode-data interface: Copyright (c) 2009 Run Paint Run Run
|
162
171
|
- Unicode data: https://www.unicode.org/copyright.html#Exhibit1
|
@@ -10,5 +10,25 @@ module Unicode
|
|
10
10
|
serialized_data.force_encoding Encoding::BINARY
|
11
11
|
INDEX = Marshal.load(serialized_data)
|
12
12
|
end
|
13
|
+
|
14
|
+
def self.decompress_index(index, level)
|
15
|
+
index.flat_map{ |value|
|
16
|
+
if level > 0
|
17
|
+
if value.instance_of?(Array)
|
18
|
+
value[15] ||= nil
|
19
|
+
decompress_index(value, level - 1)
|
20
|
+
else
|
21
|
+
decompress_index([value] * 16, level - 1)
|
22
|
+
end
|
23
|
+
else
|
24
|
+
if value.instance_of?(Array)
|
25
|
+
value[15] ||= nil
|
26
|
+
value
|
27
|
+
else
|
28
|
+
[value] * 16
|
29
|
+
end
|
30
|
+
end
|
31
|
+
}
|
32
|
+
end
|
13
33
|
end
|
14
34
|
end
|
@@ -5,27 +5,74 @@ require_relative "display_width/index"
|
|
5
5
|
|
6
6
|
module Unicode
|
7
7
|
class DisplayWidth
|
8
|
-
|
8
|
+
INITIAL_DEPTH = 0x10000
|
9
9
|
ASCII_NON_ZERO_REGEX = /[\0\x05\a\b\n\v\f\r\x0E\x0F]/
|
10
|
+
FIRST_4096 = decompress_index(INDEX[0][0], 1)
|
10
11
|
|
11
12
|
def self.of(string, ambiguous = 1, overwrite = {}, options = {})
|
12
|
-
|
13
|
-
|
14
|
-
|
13
|
+
if overwrite.empty?
|
14
|
+
# Optimization for ASCII-only strings without certain control symbols
|
15
|
+
if string.ascii_only?
|
16
|
+
if string.match?(ASCII_NON_ZERO_REGEX)
|
17
|
+
res = string.gsub(ASCII_NON_ZERO_REGEX, "").size - string.count("\b")
|
18
|
+
res < 0 ? 0 : res
|
19
|
+
else
|
20
|
+
string.size
|
21
|
+
end
|
22
|
+
else
|
23
|
+
width_no_overwrite(string, ambiguous, options)
|
24
|
+
end
|
25
|
+
else
|
26
|
+
width_all_features(string, ambiguous, overwrite, options)
|
15
27
|
end
|
28
|
+
end
|
16
29
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
30
|
+
def self.width_no_overwrite(string, ambiguous, options = {})
|
31
|
+
# Sum of all chars widths
|
32
|
+
res = string.codepoints.sum{ |codepoint|
|
33
|
+
if codepoint > 15 && codepoint < 161 # very common
|
34
|
+
next 1
|
35
|
+
elsif codepoint < 0x1001
|
36
|
+
width = FIRST_4096[codepoint]
|
37
|
+
else
|
38
|
+
width = INDEX
|
39
|
+
depth = INITIAL_DEPTH
|
40
|
+
while (width = width[codepoint / depth]).instance_of? Array
|
41
|
+
codepoint %= depth
|
42
|
+
depth /= 16
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
width == :A ? ambiguous : (width || 1)
|
47
|
+
}
|
48
|
+
|
49
|
+
# Substract emoji error
|
50
|
+
res -= emoji_extra_width_of(string, ambiguous) if options[:emoji]
|
51
|
+
|
52
|
+
# Return result + prevent negative lengths
|
53
|
+
res < 0 ? 0 : res
|
54
|
+
end
|
55
|
+
|
56
|
+
# Same as .width_no_overwrite - but with applying overwrites for each char
|
57
|
+
def self.width_all_features(string, ambiguous, overwrite, options)
|
58
|
+
# Sum of all chars widths
|
59
|
+
res = string.codepoints.sum{ |codepoint|
|
60
|
+
next overwrite[codepoint] if overwrite[codepoint]
|
61
|
+
|
62
|
+
if codepoint > 15 && codepoint < 161 # very common
|
63
|
+
next 1
|
64
|
+
elsif codepoint < 0x1001
|
65
|
+
width = FIRST_4096[codepoint]
|
66
|
+
else
|
67
|
+
width = INDEX
|
68
|
+
depth = INITIAL_DEPTH
|
69
|
+
while (width = width[codepoint / depth]).instance_of? Array
|
70
|
+
codepoint %= depth
|
71
|
+
depth /= 16
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
width == :A ? ambiguous : (width || 1)
|
29
76
|
}
|
30
77
|
|
31
78
|
# Substract emoji error
|
@@ -35,6 +82,7 @@ module Unicode
|
|
35
82
|
res < 0 ? 0 : res
|
36
83
|
end
|
37
84
|
|
85
|
+
|
38
86
|
def self.emoji_extra_width_of(string, ambiguous = 1, overwrite = {}, _ = {})
|
39
87
|
require "unicode/emoji"
|
40
88
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-display_width
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.4.
|
4
|
+
version: 2.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -81,7 +81,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
requirements: []
|
84
|
-
rubygems_version: 3.
|
84
|
+
rubygems_version: 3.4.1
|
85
85
|
signing_key:
|
86
86
|
specification_version: 4
|
87
87
|
summary: Determines the monospace display width of a string in Ruby.
|