unicode-display_width 3.1.1 → 3.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/data/display_width.marshal.gz +0 -0
- data/lib/unicode/display_width/constants.rb +1 -1
- data/lib/unicode/display_width.rb +87 -107
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a85ca57ca5e291c17993e526d222dda44b884286484b3831bb8173ce92aafb1a
|
4
|
+
data.tar.gz: d1036dfc6464459de04a713e273d09dea767a3b9a9629d9e491052c2ffe97c23
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d669e8a2866b56a78bafb3fff6d2d6430fab6bb1ca2633aeaac68e0634ca14374ac0b325bc7159ef90afe0bdffd9c154700cae1fc3183b1d74281ff4b5024e1b
|
7
|
+
data.tar.gz: 5f319484d27dad70b3851398e11cd3cb93b5c4f41a6c3a76c958d505d8357f9e303b661fd7a0339262d1458b82cb8619e6682ee2dbf8c583d33fbde4fd1a8680
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
+
## 3.1.2
|
4
|
+
|
5
|
+
- Performance improvements
|
6
|
+
|
3
7
|
## 3.1.1
|
4
8
|
|
5
9
|
- Performance improvements
|
@@ -11,7 +15,7 @@
|
|
11
15
|
- Emoji modes: Differentiate between well-formed Emoji (`:possible`) and any
|
12
16
|
ZWJ/modifier sequence (`:all`). The latter is more common and more efficient
|
13
17
|
to implement.
|
14
|
-
- Unify
|
18
|
+
- Unify `:rgi_{fqe,mqe,uqe}` options to just `:rgi` to keep things simpler (corresponds to
|
15
19
|
the former `:rgi_uqe` option). Most terminals that want to support the RGI set
|
16
20
|
will probably want to catch Emoji sequences with missing VS16s.
|
17
21
|
- Add new `:all_no_vs16` and `:rgi_at` modes to be able to support some terminals
|
Binary file
|
@@ -10,8 +10,8 @@ module Unicode
|
|
10
10
|
class DisplayWidth
|
11
11
|
DEFAULT_AMBIGUOUS = 1
|
12
12
|
INITIAL_DEPTH = 0x10000
|
13
|
-
ASCII_NON_ZERO_REGEX = /[\0\x05\a\b\n
|
14
|
-
ASCII_NON_ZERO_STRING = "\0\x05\a\b\n
|
13
|
+
ASCII_NON_ZERO_REGEX = /[\0\x05\a\b\n-\x0F]/
|
14
|
+
ASCII_NON_ZERO_STRING = "\0\x05\a\b\n-\x0F"
|
15
15
|
ASCII_BACKSPACE = "\b"
|
16
16
|
AMBIGUOUS_MAP = {
|
17
17
|
1 => :WIDTH_ONE,
|
@@ -21,6 +21,10 @@ module Unicode
|
|
21
21
|
WIDTH_ONE: 768,
|
22
22
|
WIDTH_TWO: 161,
|
23
23
|
}
|
24
|
+
NOT_COMMON_NARROW_REGEX = {
|
25
|
+
WIDTH_ONE: /[^\u{10}-\u{2FF}]/m,
|
26
|
+
WIDTH_TWO: /[^\u{10}-\u{A1}]/m,
|
27
|
+
}
|
24
28
|
FIRST_4096 = {
|
25
29
|
WIDTH_ONE: decompress_index(INDEX[:WIDTH_ONE][0][0], 1),
|
26
30
|
WIDTH_TWO: decompress_index(INDEX[:WIDTH_TWO][0][0], 1),
|
@@ -30,7 +34,6 @@ module Unicode
|
|
30
34
|
rgi_at: :REGEX_INCLUDE_MQE_UQE,
|
31
35
|
possible: :REGEX_WELL_FORMED,
|
32
36
|
}
|
33
|
-
REGEX_EMOJI_NOT_POSSIBLE = /\A[#*0-9]\z/
|
34
37
|
REGEX_EMOJI_VS16 = Regexp.union(
|
35
38
|
Regexp.compile(
|
36
39
|
Unicode::Emoji::REGEX_TEXT_PRESENTATION.source +
|
@@ -44,120 +47,48 @@ module Unicode
|
|
44
47
|
|
45
48
|
# Returns monospace display width of string
|
46
49
|
def self.of(string, ambiguous = nil, overwrite = nil, old_options = {}, **options)
|
47
|
-
unless
|
48
|
-
|
49
|
-
options.merge! old_options
|
50
|
-
end
|
50
|
+
string = string.encode(Encoding::UTF_8) unless string.encoding == Encoding::UTF_8
|
51
|
+
options = normalize_options(string, ambiguous, overwrite, old_options, **options)
|
51
52
|
|
52
|
-
|
53
|
-
options[:ambiguous] ||= DEFAULT_AMBIGUOUS
|
53
|
+
width = 0
|
54
54
|
|
55
|
-
|
56
|
-
|
55
|
+
unless options[:overwrite].empty?
|
56
|
+
width, string = width_custom(string, options[:overwrite])
|
57
57
|
end
|
58
58
|
|
59
|
-
if
|
60
|
-
|
61
|
-
options[:overwrite] = overwrite
|
59
|
+
if string.ascii_only?
|
60
|
+
return width + width_ascii(string)
|
62
61
|
end
|
63
|
-
options[:overwrite] ||= {}
|
64
|
-
|
65
|
-
if [nil, true, :auto].include?(options[:emoji])
|
66
|
-
options[:emoji] = EmojiSupport.recommended
|
67
|
-
end
|
68
|
-
|
69
|
-
# # #
|
70
62
|
|
71
|
-
|
72
|
-
return width_frame(string, options) do |string, index_full, index_low, first_ambiguous|
|
73
|
-
width_all_features(string, index_full, index_low, first_ambiguous, options[:overwrite])
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
if !string.ascii_only?
|
78
|
-
return width_frame(string, options) do |string, index_full, index_low, first_ambiguous|
|
79
|
-
width_no_overwrite(string, index_full, index_low, first_ambiguous)
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
width_ascii(string)
|
84
|
-
end
|
63
|
+
ambiguous_index_name = AMBIGUOUS_MAP[options[:ambiguous]]
|
85
64
|
|
86
|
-
|
87
|
-
|
88
|
-
if string.match?(ASCII_NON_ZERO_REGEX)
|
89
|
-
res = string.delete(ASCII_NON_ZERO_STRING).size - string.count(ASCII_BACKSPACE)
|
90
|
-
return res < 0 ? 0 : res
|
65
|
+
unless string.match?(NOT_COMMON_NARROW_REGEX[ambiguous_index_name])
|
66
|
+
return width + string.size
|
91
67
|
end
|
92
68
|
|
93
|
-
# Pure ASCII
|
94
|
-
string.size
|
95
|
-
end
|
96
|
-
|
97
|
-
def self.width_frame(string, options)
|
98
69
|
# Retrieve Emoji width
|
99
|
-
if options[:emoji]
|
100
|
-
|
101
|
-
else
|
102
|
-
res, string = emoji_width(
|
70
|
+
if options[:emoji] != :none
|
71
|
+
e_width, string = emoji_width(
|
103
72
|
string,
|
104
73
|
options[:emoji],
|
105
74
|
options[:ambiguous],
|
106
75
|
)
|
107
|
-
|
108
|
-
|
109
|
-
# Prepare indexes
|
110
|
-
ambiguous_index_name = AMBIGUOUS_MAP[options[:ambiguous]]
|
111
|
-
|
112
|
-
# Get general width
|
113
|
-
res += yield(string, INDEX[ambiguous_index_name], FIRST_4096[ambiguous_index_name], FIRST_AMBIGUOUS[ambiguous_index_name])
|
114
|
-
|
115
|
-
# Return result + prevent negative lengths
|
116
|
-
res < 0 ? 0 : res
|
117
|
-
end
|
76
|
+
width += e_width
|
118
77
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
# Make sure we have UTF-8
|
123
|
-
string = string.encode(Encoding::UTF_8) unless string.encoding.name == "utf-8"
|
124
|
-
|
125
|
-
string.scan(/.{,80}/m){ |batch|
|
126
|
-
if batch.ascii_only?
|
127
|
-
res += batch.size
|
128
|
-
else
|
129
|
-
batch.each_codepoint{ |codepoint|
|
130
|
-
if codepoint > 15 && codepoint < first_ambiguous
|
131
|
-
res += 1
|
132
|
-
elsif codepoint < 0x1001
|
133
|
-
res += index_low[codepoint] || 1
|
134
|
-
else
|
135
|
-
d = INITIAL_DEPTH
|
136
|
-
w = index_full[codepoint / d]
|
137
|
-
while w.instance_of? Array
|
138
|
-
w = w[(codepoint %= d) / (d /= 16)]
|
139
|
-
end
|
140
|
-
|
141
|
-
res += w || 1
|
142
|
-
end
|
143
|
-
}
|
78
|
+
unless string.match?(NOT_COMMON_NARROW_REGEX[ambiguous_index_name])
|
79
|
+
return width + string.size
|
144
80
|
end
|
145
|
-
|
146
|
-
|
147
|
-
res
|
148
|
-
end
|
81
|
+
end
|
149
82
|
|
150
|
-
|
151
|
-
|
152
|
-
|
83
|
+
index_full = INDEX[ambiguous_index_name]
|
84
|
+
index_low = FIRST_4096[ambiguous_index_name]
|
85
|
+
first_ambiguous = FIRST_AMBIGUOUS[ambiguous_index_name]
|
153
86
|
|
154
87
|
string.each_codepoint{ |codepoint|
|
155
|
-
if
|
156
|
-
|
157
|
-
elsif codepoint > 15 && codepoint < first_ambiguous
|
158
|
-
res += 1
|
88
|
+
if codepoint > 15 && codepoint < first_ambiguous
|
89
|
+
width += 1
|
159
90
|
elsif codepoint < 0x1001
|
160
|
-
|
91
|
+
width += index_low[codepoint] || 1
|
161
92
|
else
|
162
93
|
d = INITIAL_DEPTH
|
163
94
|
w = index_full[codepoint / d]
|
@@ -165,19 +96,44 @@ module Unicode
|
|
165
96
|
w = w[(codepoint %= d) / (d /= 16)]
|
166
97
|
end
|
167
98
|
|
168
|
-
|
99
|
+
width += w || 1
|
169
100
|
end
|
170
101
|
}
|
171
102
|
|
172
|
-
|
103
|
+
# Return result + prevent negative lengths
|
104
|
+
width < 0 ? 0 : width
|
173
105
|
end
|
174
106
|
|
107
|
+
# Returns width of custom overwrites and remaining string
|
108
|
+
def self.width_custom(string, overwrite)
|
109
|
+
width = 0
|
110
|
+
|
111
|
+
string = string.each_codepoint.select{ |codepoint|
|
112
|
+
if overwrite[codepoint]
|
113
|
+
width += overwrite[codepoint]
|
114
|
+
nil
|
115
|
+
else
|
116
|
+
codepoint
|
117
|
+
end
|
118
|
+
}.pack("U*")
|
175
119
|
|
120
|
+
[width, string]
|
121
|
+
end
|
122
|
+
|
123
|
+
# Returns width for ASCII-only strings. Will consider zero-width control symbols.
|
124
|
+
def self.width_ascii(string)
|
125
|
+
if string.match?(ASCII_NON_ZERO_REGEX)
|
126
|
+
res = string.delete(ASCII_NON_ZERO_STRING).bytesize - string.count(ASCII_BACKSPACE)
|
127
|
+
return res < 0 ? 0 : res
|
128
|
+
end
|
129
|
+
|
130
|
+
string.bytesize
|
131
|
+
end
|
132
|
+
|
133
|
+
# Returns width of all considered Emoji and remaining string
|
176
134
|
def self.emoji_width(string, mode = :all, ambiguous = DEFAULT_AMBIGUOUS)
|
177
135
|
res = 0
|
178
136
|
|
179
|
-
string = string.encode(Encoding::UTF_8) unless string.encoding.name == "utf-8"
|
180
|
-
|
181
137
|
if emoji_set_regex = EMOJI_SEQUENCES_REGEX_MAPPING[mode]
|
182
138
|
emoji_width_via_possible(
|
183
139
|
string,
|
@@ -209,13 +165,9 @@ module Unicode
|
|
209
165
|
res = 0
|
210
166
|
|
211
167
|
# For each string possibly an emoji
|
212
|
-
no_emoji_string = string.gsub(
|
213
|
-
# Skip notorious false positives
|
214
|
-
if REGEX_EMOJI_NOT_POSSIBLE.match?(emoji_candidate)
|
215
|
-
emoji_candidate
|
216
|
-
|
168
|
+
no_emoji_string = string.gsub(REGEX_EMOJI_ALL_SEQUENCES_AND_VS16){ |emoji_candidate|
|
217
169
|
# Check if we have a combined Emoji with width 2 (or EAW an Apple Terminal)
|
218
|
-
|
170
|
+
if emoji_candidate == emoji_candidate[emoji_set_regex]
|
219
171
|
if strict_eaw
|
220
172
|
res += self.of(emoji_candidate[0], ambiguous, emoji: false)
|
221
173
|
else
|
@@ -237,6 +189,34 @@ module Unicode
|
|
237
189
|
[res, no_emoji_string]
|
238
190
|
end
|
239
191
|
|
192
|
+
def self.normalize_options(string, ambiguous = nil, overwrite = nil, old_options = {}, **options)
|
193
|
+
unless old_options.empty?
|
194
|
+
warn "Unicode::DisplayWidth: Please migrate to keyword arguments - #{old_options.inspect}"
|
195
|
+
options.merge! old_options
|
196
|
+
end
|
197
|
+
|
198
|
+
options[:ambiguous] = ambiguous if ambiguous
|
199
|
+
options[:ambiguous] ||= DEFAULT_AMBIGUOUS
|
200
|
+
|
201
|
+
if options[:ambiguous] != 1 && options[:ambiguous] != 2
|
202
|
+
raise ArgumentError, "Unicode::DisplayWidth: Ambiguous width must be 1 or 2"
|
203
|
+
end
|
204
|
+
|
205
|
+
if overwrite && !overwrite.empty?
|
206
|
+
warn "Unicode::DisplayWidth: Please migrate to keyword arguments - overwrite: #{overwrite.inspect}"
|
207
|
+
options[:overwrite] = overwrite
|
208
|
+
end
|
209
|
+
options[:overwrite] ||= {}
|
210
|
+
|
211
|
+
if [nil, true, :auto].include?(options[:emoji])
|
212
|
+
options[:emoji] = EmojiSupport.recommended
|
213
|
+
elsif options[:emoji] == false
|
214
|
+
options[:emoji] = :none
|
215
|
+
end
|
216
|
+
|
217
|
+
options
|
218
|
+
end
|
219
|
+
|
240
220
|
def initialize(ambiguous: DEFAULT_AMBIGUOUS, overwrite: {}, emoji: true)
|
241
221
|
@ambiguous = ambiguous
|
242
222
|
@overwrite = overwrite
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-display_width
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-11-
|
11
|
+
date: 2024-11-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unicode-emoji
|