unicode-display_width 2.4.1 → 2.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/MIT-LICENSE.txt +1 -1
- data/README.md +7 -1
- data/lib/unicode/display_width/constants.rb +1 -1
- data/lib/unicode/display_width/index.rb +20 -0
- data/lib/unicode/display_width.rb +56 -14
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0bcd900f031999ffa43dd6ef091b07b45d425b1ab04e6559f8c8e2c54e08710
|
4
|
+
data.tar.gz: ec3daad5e92107072f8f590d5f2217fd1213d7b25d6491bb3b20ee103f7a2087
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da2098a3b5d56518129b453aa88e113583403fefb37bb23eeb3f2ff3213426ea6a7076ab0a2f4c778f91a6cfbe3f98f59ea8dc6a4e37ff4bb6a1499536a5a4b9
|
7
|
+
data.tar.gz: 8559483daad47ca76757cf8701f09060bf66f99017a32d62b9c6f739794e1fcc15f2a4b9aae754252d7c8176571f18b590555ed3aa83748836e8e6a681bc7e10
|
data/CHANGELOG.md
CHANGED
data/MIT-LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -10,6 +10,12 @@ Old Rubies which might still work: **2.6**, **2.5**, **2.4**
|
|
10
10
|
|
11
11
|
For even older Rubies, use version 2.3.0 of this gem: **2.3**, **2.2**, **2.1**, **2.0**, **1.9**
|
12
12
|
|
13
|
+
## Version 2.4.2 — Performance Updates
|
14
|
+
|
15
|
+
**If you use this gem, you should really upgrade to 2.4.2. It's often 100x faster, sometimes even 1000x and more!**
|
16
|
+
|
17
|
+
This is possible because the gem now detects if you use very basic (and common) characters, like ASCII characters. Furthermore, the charachter width lookup code has been optimized, so even when full-width characters are involved, the gem is much faster now.
|
18
|
+
|
13
19
|
## Version 2.0 — Breaking Changes
|
14
20
|
|
15
21
|
Some features of this library were marked deprecated for a long time and have been removed with Version 2.0:
|
@@ -159,7 +165,7 @@ See [unicode-x](https://github.com/janlelis/unicode-x) for more Unicode related
|
|
159
165
|
|
160
166
|
## Copyright & Info
|
161
167
|
|
162
|
-
- Copyright (c) 2011, 2015-
|
168
|
+
- Copyright (c) 2011, 2015-2023 Jan Lelis, https://janlelis.com, released under the MIT
|
163
169
|
license
|
164
170
|
- Early versions based on runpaint's unicode-data interface: Copyright (c) 2009 Run Paint Run Run
|
165
171
|
- Unicode data: https://www.unicode.org/copyright.html#Exhibit1
|
@@ -10,5 +10,25 @@ module Unicode
|
|
10
10
|
serialized_data.force_encoding Encoding::BINARY
|
11
11
|
INDEX = Marshal.load(serialized_data)
|
12
12
|
end
|
13
|
+
|
14
|
+
def self.decompress_index(index, level)
|
15
|
+
index.flat_map{ |value|
|
16
|
+
if level > 0
|
17
|
+
if value.instance_of?(Array)
|
18
|
+
value[15] ||= nil
|
19
|
+
decompress_index(value, level - 1)
|
20
|
+
else
|
21
|
+
decompress_index([value] * 16, level - 1)
|
22
|
+
end
|
23
|
+
else
|
24
|
+
if value.instance_of?(Array)
|
25
|
+
value[15] ||= nil
|
26
|
+
value
|
27
|
+
else
|
28
|
+
[value] * 16
|
29
|
+
end
|
30
|
+
end
|
31
|
+
}
|
32
|
+
end
|
13
33
|
end
|
14
34
|
end
|
@@ -7,31 +7,72 @@ module Unicode
|
|
7
7
|
class DisplayWidth
|
8
8
|
INITIAL_DEPTH = 0x10000
|
9
9
|
ASCII_NON_ZERO_REGEX = /[\0\x05\a\b\n\v\f\r\x0E\x0F]/
|
10
|
+
FIRST_4096 = decompress_index(INDEX[0][0], 1)
|
10
11
|
|
11
12
|
def self.of(string, ambiguous = 1, overwrite = {}, options = {})
|
12
|
-
|
13
|
-
|
14
|
-
if string.
|
15
|
-
|
16
|
-
|
13
|
+
if overwrite.empty?
|
14
|
+
# Optimization for ASCII-only strings without certain control symbols
|
15
|
+
if string.ascii_only?
|
16
|
+
if string.match?(ASCII_NON_ZERO_REGEX)
|
17
|
+
res = string.gsub(ASCII_NON_ZERO_REGEX, "").size - string.count("\b")
|
18
|
+
res < 0 ? 0 : res
|
19
|
+
else
|
20
|
+
string.size
|
21
|
+
end
|
17
22
|
else
|
18
|
-
|
23
|
+
width_no_overwrite(string, ambiguous, options)
|
19
24
|
end
|
25
|
+
else
|
26
|
+
width_all_features(string, ambiguous, overwrite, options)
|
20
27
|
end
|
28
|
+
end
|
21
29
|
|
30
|
+
def self.width_no_overwrite(string, ambiguous, options = {})
|
31
|
+
# Sum of all chars widths
|
32
|
+
res = string.codepoints.sum{ |codepoint|
|
33
|
+
if codepoint > 15 && codepoint < 161 # very common
|
34
|
+
next 1
|
35
|
+
elsif codepoint < 0x1001
|
36
|
+
width = FIRST_4096[codepoint]
|
37
|
+
else
|
38
|
+
width = INDEX
|
39
|
+
depth = INITIAL_DEPTH
|
40
|
+
while (width = width[codepoint / depth]).instance_of? Array
|
41
|
+
codepoint %= depth
|
42
|
+
depth /= 16
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
width == :A ? ambiguous : (width || 1)
|
47
|
+
}
|
48
|
+
|
49
|
+
# Substract emoji error
|
50
|
+
res -= emoji_extra_width_of(string, ambiguous) if options[:emoji]
|
51
|
+
|
52
|
+
# Return result + prevent negative lengths
|
53
|
+
res < 0 ? 0 : res
|
54
|
+
end
|
55
|
+
|
56
|
+
# Same as .width_no_overwrite - but with applying overwrites for each char
|
57
|
+
def self.width_all_features(string, ambiguous, overwrite, options)
|
22
58
|
# Sum of all chars widths
|
23
59
|
res = string.codepoints.sum{ |codepoint|
|
24
60
|
next overwrite[codepoint] if overwrite[codepoint]
|
25
|
-
next 1 if codepoint > 15 && codepoint < 161 # very common
|
26
61
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
62
|
+
if codepoint > 15 && codepoint < 161 # very common
|
63
|
+
next 1
|
64
|
+
elsif codepoint < 0x1001
|
65
|
+
width = FIRST_4096[codepoint]
|
66
|
+
else
|
67
|
+
width = INDEX
|
68
|
+
depth = INITIAL_DEPTH
|
69
|
+
while (width = width[codepoint / depth]).instance_of? Array
|
70
|
+
codepoint %= depth
|
71
|
+
depth /= 16
|
72
|
+
end
|
32
73
|
end
|
33
|
-
|
34
|
-
width || 1
|
74
|
+
|
75
|
+
width == :A ? ambiguous : (width || 1)
|
35
76
|
}
|
36
77
|
|
37
78
|
# Substract emoji error
|
@@ -41,6 +82,7 @@ module Unicode
|
|
41
82
|
res < 0 ? 0 : res
|
42
83
|
end
|
43
84
|
|
85
|
+
|
44
86
|
def self.emoji_extra_width_of(string, ambiguous = 1, overwrite = {}, _ = {})
|
45
87
|
require "unicode/emoji"
|
46
88
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-display_width
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.4.
|
4
|
+
version: 2.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|