unicode-display_width 3.1.1 → 3.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1886b39340f01645fd4e64d032ab1611e471e60c4dbd46e3d8867125ef45232d
4
- data.tar.gz: dc452df48efa0f7f9cd862b0390f611df83e4dcd82c640f4faa151b05022596f
3
+ metadata.gz: a85ca57ca5e291c17993e526d222dda44b884286484b3831bb8173ce92aafb1a
4
+ data.tar.gz: d1036dfc6464459de04a713e273d09dea767a3b9a9629d9e491052c2ffe97c23
5
5
  SHA512:
6
- metadata.gz: 85dfef303836ba1c13271144ad24f89dbc40591d9056ee187c9ee5b7b6ff1f19d8d9ebd4e21108f78a61d2e3d3c6ce44f560005e3216cf3f8fa595466c50dfc7
7
- data.tar.gz: eae08ff81ed83a3965820aaf09ce10fd028428adf6653807d3fb87b0c96b9ae7be757d38ffa90a8a27b39a8454e4b2694109a8aaddcc75d21d0e449ce8f7f628
6
+ metadata.gz: d669e8a2866b56a78bafb3fff6d2d6430fab6bb1ca2633aeaac68e0634ca14374ac0b325bc7159ef90afe0bdffd9c154700cae1fc3183b1d74281ff4b5024e1b
7
+ data.tar.gz: 5f319484d27dad70b3851398e11cd3cb93b5c4f41a6c3a76c958d505d8357f9e303b661fd7a0339262d1458b82cb8619e6682ee2dbf8c583d33fbde4fd1a8680
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## 3.1.2
4
+
5
+ - Performance improvements
6
+
3
7
  ## 3.1.1
4
8
 
5
9
  - Performance improvements
@@ -11,7 +15,7 @@
11
15
  - Emoji modes: Differentiate between well-formed Emoji (`:possible`) and any
12
16
  ZWJ/modifier sequence (`:all`). The latter is more common and more efficient
13
17
  to implement.
14
- - Unify `rgi_{fqe,mqe,uqe}` options to just `:rgi` to keep things simpler (corresponds to
18
+ - Unify `:rgi_{fqe,mqe,uqe}` options to just `:rgi` to keep things simpler (corresponds to
15
19
  the former `:rgi_uqe` option). Most terminals that want to support the RGI set
16
20
  will probably want to catch Emoji sequences with missing VS16s.
17
21
  - Add new `:all_no_vs16` and `:rgi_at` modes to be able to support some terminals
Binary file
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Unicode
4
4
  class DisplayWidth
5
- VERSION = "3.1.1"
5
+ VERSION = "3.1.2"
6
6
  UNICODE_VERSION = "16.0.0"
7
7
  DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + "/../../../data/")
8
8
  INDEX_FILENAME = DATA_DIRECTORY + "/display_width.marshal.gz"
@@ -10,8 +10,8 @@ module Unicode
10
10
  class DisplayWidth
11
11
  DEFAULT_AMBIGUOUS = 1
12
12
  INITIAL_DEPTH = 0x10000
13
- ASCII_NON_ZERO_REGEX = /[\0\x05\a\b\n\v\f\r\x0E\x0F]/
14
- ASCII_NON_ZERO_STRING = "\0\x05\a\b\n\v\f\r\x0E\x0F"
13
+ ASCII_NON_ZERO_REGEX = /[\0\x05\a\b\n-\x0F]/
14
+ ASCII_NON_ZERO_STRING = "\0\x05\a\b\n-\x0F"
15
15
  ASCII_BACKSPACE = "\b"
16
16
  AMBIGUOUS_MAP = {
17
17
  1 => :WIDTH_ONE,
@@ -21,6 +21,10 @@ module Unicode
21
21
  WIDTH_ONE: 768,
22
22
  WIDTH_TWO: 161,
23
23
  }
24
+ NOT_COMMON_NARROW_REGEX = {
25
+ WIDTH_ONE: /[^\u{10}-\u{2FF}]/m,
26
+ WIDTH_TWO: /[^\u{10}-\u{A1}]/m,
27
+ }
24
28
  FIRST_4096 = {
25
29
  WIDTH_ONE: decompress_index(INDEX[:WIDTH_ONE][0][0], 1),
26
30
  WIDTH_TWO: decompress_index(INDEX[:WIDTH_TWO][0][0], 1),
@@ -30,7 +34,6 @@ module Unicode
30
34
  rgi_at: :REGEX_INCLUDE_MQE_UQE,
31
35
  possible: :REGEX_WELL_FORMED,
32
36
  }
33
- REGEX_EMOJI_NOT_POSSIBLE = /\A[#*0-9]\z/
34
37
  REGEX_EMOJI_VS16 = Regexp.union(
35
38
  Regexp.compile(
36
39
  Unicode::Emoji::REGEX_TEXT_PRESENTATION.source +
@@ -44,120 +47,48 @@ module Unicode
44
47
 
45
48
  # Returns monospace display width of string
46
49
  def self.of(string, ambiguous = nil, overwrite = nil, old_options = {}, **options)
47
- unless old_options.empty?
48
- warn "Unicode::DisplayWidth: Please migrate to keyword arguments - #{old_options.inspect}"
49
- options.merge! old_options
50
- end
50
+ string = string.encode(Encoding::UTF_8) unless string.encoding == Encoding::UTF_8
51
+ options = normalize_options(string, ambiguous, overwrite, old_options, **options)
51
52
 
52
- options[:ambiguous] = ambiguous if ambiguous
53
- options[:ambiguous] ||= DEFAULT_AMBIGUOUS
53
+ width = 0
54
54
 
55
- if options[:ambiguous] != 1 && options[:ambiguous] != 2
56
- raise ArgumentError, "Unicode::DisplayWidth: Ambiguous width must be 1 or 2"
55
+ unless options[:overwrite].empty?
56
+ width, string = width_custom(string, options[:overwrite])
57
57
  end
58
58
 
59
- if overwrite && !overwrite.empty?
60
- warn "Unicode::DisplayWidth: Please migrate to keyword arguments - overwrite: #{overwrite.inspect}"
61
- options[:overwrite] = overwrite
59
+ if string.ascii_only?
60
+ return width + width_ascii(string)
62
61
  end
63
- options[:overwrite] ||= {}
64
-
65
- if [nil, true, :auto].include?(options[:emoji])
66
- options[:emoji] = EmojiSupport.recommended
67
- end
68
-
69
- # # #
70
62
 
71
- if !options[:overwrite].empty?
72
- return width_frame(string, options) do |string, index_full, index_low, first_ambiguous|
73
- width_all_features(string, index_full, index_low, first_ambiguous, options[:overwrite])
74
- end
75
- end
76
-
77
- if !string.ascii_only?
78
- return width_frame(string, options) do |string, index_full, index_low, first_ambiguous|
79
- width_no_overwrite(string, index_full, index_low, first_ambiguous)
80
- end
81
- end
82
-
83
- width_ascii(string)
84
- end
63
+ ambiguous_index_name = AMBIGUOUS_MAP[options[:ambiguous]]
85
64
 
86
- def self.width_ascii(string)
87
- # Optimization for ASCII-only strings without certain control symbols
88
- if string.match?(ASCII_NON_ZERO_REGEX)
89
- res = string.delete(ASCII_NON_ZERO_STRING).size - string.count(ASCII_BACKSPACE)
90
- return res < 0 ? 0 : res
65
+ unless string.match?(NOT_COMMON_NARROW_REGEX[ambiguous_index_name])
66
+ return width + string.size
91
67
  end
92
68
 
93
- # Pure ASCII
94
- string.size
95
- end
96
-
97
- def self.width_frame(string, options)
98
69
  # Retrieve Emoji width
99
- if options[:emoji] == false || options[:emoji] == :none
100
- res = 0
101
- else
102
- res, string = emoji_width(
70
+ if options[:emoji] != :none
71
+ e_width, string = emoji_width(
103
72
  string,
104
73
  options[:emoji],
105
74
  options[:ambiguous],
106
75
  )
107
- end
108
-
109
- # Prepare indexes
110
- ambiguous_index_name = AMBIGUOUS_MAP[options[:ambiguous]]
111
-
112
- # Get general width
113
- res += yield(string, INDEX[ambiguous_index_name], FIRST_4096[ambiguous_index_name], FIRST_AMBIGUOUS[ambiguous_index_name])
114
-
115
- # Return result + prevent negative lengths
116
- res < 0 ? 0 : res
117
- end
76
+ width += e_width
118
77
 
119
- def self.width_no_overwrite(string, index_full, index_low, first_ambiguous, _ = {})
120
- res = 0
121
-
122
- # Make sure we have UTF-8
123
- string = string.encode(Encoding::UTF_8) unless string.encoding.name == "utf-8"
124
-
125
- string.scan(/.{,80}/m){ |batch|
126
- if batch.ascii_only?
127
- res += batch.size
128
- else
129
- batch.each_codepoint{ |codepoint|
130
- if codepoint > 15 && codepoint < first_ambiguous
131
- res += 1
132
- elsif codepoint < 0x1001
133
- res += index_low[codepoint] || 1
134
- else
135
- d = INITIAL_DEPTH
136
- w = index_full[codepoint / d]
137
- while w.instance_of? Array
138
- w = w[(codepoint %= d) / (d /= 16)]
139
- end
140
-
141
- res += w || 1
142
- end
143
- }
78
+ unless string.match?(NOT_COMMON_NARROW_REGEX[ambiguous_index_name])
79
+ return width + string.size
144
80
  end
145
- }
146
-
147
- res
148
- end
81
+ end
149
82
 
150
- # Same as .width_no_overwrite - but with applying overwrites for each char
151
- def self.width_all_features(string, index_full, index_low, first_ambiguous, overwrite)
152
- res = 0
83
+ index_full = INDEX[ambiguous_index_name]
84
+ index_low = FIRST_4096[ambiguous_index_name]
85
+ first_ambiguous = FIRST_AMBIGUOUS[ambiguous_index_name]
153
86
 
154
87
  string.each_codepoint{ |codepoint|
155
- if overwrite[codepoint]
156
- res += overwrite[codepoint]
157
- elsif codepoint > 15 && codepoint < first_ambiguous
158
- res += 1
88
+ if codepoint > 15 && codepoint < first_ambiguous
89
+ width += 1
159
90
  elsif codepoint < 0x1001
160
- res += index_low[codepoint] || 1
91
+ width += index_low[codepoint] || 1
161
92
  else
162
93
  d = INITIAL_DEPTH
163
94
  w = index_full[codepoint / d]
@@ -165,19 +96,44 @@ module Unicode
165
96
  w = w[(codepoint %= d) / (d /= 16)]
166
97
  end
167
98
 
168
- res += w || 1
99
+ width += w || 1
169
100
  end
170
101
  }
171
102
 
172
- res
103
+ # Return result + prevent negative lengths
104
+ width < 0 ? 0 : width
173
105
  end
174
106
 
107
+ # Returns width of custom overwrites and remaining string
108
+ def self.width_custom(string, overwrite)
109
+ width = 0
110
+
111
+ string = string.each_codepoint.select{ |codepoint|
112
+ if overwrite[codepoint]
113
+ width += overwrite[codepoint]
114
+ nil
115
+ else
116
+ codepoint
117
+ end
118
+ }.pack("U*")
175
119
 
120
+ [width, string]
121
+ end
122
+
123
+ # Returns width for ASCII-only strings. Will consider zero-width control symbols.
124
+ def self.width_ascii(string)
125
+ if string.match?(ASCII_NON_ZERO_REGEX)
126
+ res = string.delete(ASCII_NON_ZERO_STRING).bytesize - string.count(ASCII_BACKSPACE)
127
+ return res < 0 ? 0 : res
128
+ end
129
+
130
+ string.bytesize
131
+ end
132
+
133
+ # Returns width of all considered Emoji and remaining string
176
134
  def self.emoji_width(string, mode = :all, ambiguous = DEFAULT_AMBIGUOUS)
177
135
  res = 0
178
136
 
179
- string = string.encode(Encoding::UTF_8) unless string.encoding.name == "utf-8"
180
-
181
137
  if emoji_set_regex = EMOJI_SEQUENCES_REGEX_MAPPING[mode]
182
138
  emoji_width_via_possible(
183
139
  string,
@@ -209,13 +165,9 @@ module Unicode
209
165
  res = 0
210
166
 
211
167
  # For each string possibly an emoji
212
- no_emoji_string = string.gsub(Unicode::Emoji::REGEX_POSSIBLE){ |emoji_candidate|
213
- # Skip notorious false positives
214
- if REGEX_EMOJI_NOT_POSSIBLE.match?(emoji_candidate)
215
- emoji_candidate
216
-
168
+ no_emoji_string = string.gsub(REGEX_EMOJI_ALL_SEQUENCES_AND_VS16){ |emoji_candidate|
217
169
  # Check if we have a combined Emoji with width 2 (or EAW an Apple Terminal)
218
- elsif emoji_candidate == emoji_candidate[emoji_set_regex]
170
+ if emoji_candidate == emoji_candidate[emoji_set_regex]
219
171
  if strict_eaw
220
172
  res += self.of(emoji_candidate[0], ambiguous, emoji: false)
221
173
  else
@@ -237,6 +189,34 @@ module Unicode
237
189
  [res, no_emoji_string]
238
190
  end
239
191
 
192
+ def self.normalize_options(string, ambiguous = nil, overwrite = nil, old_options = {}, **options)
193
+ unless old_options.empty?
194
+ warn "Unicode::DisplayWidth: Please migrate to keyword arguments - #{old_options.inspect}"
195
+ options.merge! old_options
196
+ end
197
+
198
+ options[:ambiguous] = ambiguous if ambiguous
199
+ options[:ambiguous] ||= DEFAULT_AMBIGUOUS
200
+
201
+ if options[:ambiguous] != 1 && options[:ambiguous] != 2
202
+ raise ArgumentError, "Unicode::DisplayWidth: Ambiguous width must be 1 or 2"
203
+ end
204
+
205
+ if overwrite && !overwrite.empty?
206
+ warn "Unicode::DisplayWidth: Please migrate to keyword arguments - overwrite: #{overwrite.inspect}"
207
+ options[:overwrite] = overwrite
208
+ end
209
+ options[:overwrite] ||= {}
210
+
211
+ if [nil, true, :auto].include?(options[:emoji])
212
+ options[:emoji] = EmojiSupport.recommended
213
+ elsif options[:emoji] == false
214
+ options[:emoji] = :none
215
+ end
216
+
217
+ options
218
+ end
219
+
240
220
  def initialize(ambiguous: DEFAULT_AMBIGUOUS, overwrite: {}, emoji: true)
241
221
  @ambiguous = ambiguous
242
222
  @overwrite = overwrite
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode-display_width
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.1
4
+ version: 3.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-11-19 00:00:00.000000000 Z
11
+ date: 2024-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unicode-emoji