unicode-display_width 3.1.1 → 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1886b39340f01645fd4e64d032ab1611e471e60c4dbd46e3d8867125ef45232d
4
- data.tar.gz: dc452df48efa0f7f9cd862b0390f611df83e4dcd82c640f4faa151b05022596f
3
+ metadata.gz: a85ca57ca5e291c17993e526d222dda44b884286484b3831bb8173ce92aafb1a
4
+ data.tar.gz: d1036dfc6464459de04a713e273d09dea767a3b9a9629d9e491052c2ffe97c23
5
5
  SHA512:
6
- metadata.gz: 85dfef303836ba1c13271144ad24f89dbc40591d9056ee187c9ee5b7b6ff1f19d8d9ebd4e21108f78a61d2e3d3c6ce44f560005e3216cf3f8fa595466c50dfc7
7
- data.tar.gz: eae08ff81ed83a3965820aaf09ce10fd028428adf6653807d3fb87b0c96b9ae7be757d38ffa90a8a27b39a8454e4b2694109a8aaddcc75d21d0e449ce8f7f628
6
+ metadata.gz: d669e8a2866b56a78bafb3fff6d2d6430fab6bb1ca2633aeaac68e0634ca14374ac0b325bc7159ef90afe0bdffd9c154700cae1fc3183b1d74281ff4b5024e1b
7
+ data.tar.gz: 5f319484d27dad70b3851398e11cd3cb93b5c4f41a6c3a76c958d505d8357f9e303b661fd7a0339262d1458b82cb8619e6682ee2dbf8c583d33fbde4fd1a8680
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## 3.1.2
4
+
5
+ - Performance improvements
6
+
3
7
  ## 3.1.1
4
8
 
5
9
  - Performance improvements
@@ -11,7 +15,7 @@
11
15
  - Emoji modes: Differentiate between well-formed Emoji (`:possible`) and any
12
16
  ZWJ/modifier sequence (`:all`). The latter is more common and more efficient
13
17
  to implement.
14
- - Unify `rgi_{fqe,mqe,uqe}` options to just `:rgi` to keep things simpler (corresponds to
18
+ - Unify `:rgi_{fqe,mqe,uqe}` options to just `:rgi` to keep things simpler (corresponds to
15
19
  the former `:rgi_uqe` option). Most terminals that want to support the RGI set
16
20
  will probably want to catch Emoji sequences with missing VS16s.
17
21
  - Add new `:all_no_vs16` and `:rgi_at` modes to be able to support some terminals
Binary file
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Unicode
4
4
  class DisplayWidth
5
- VERSION = "3.1.1"
5
+ VERSION = "3.1.2"
6
6
  UNICODE_VERSION = "16.0.0"
7
7
  DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + "/../../../data/")
8
8
  INDEX_FILENAME = DATA_DIRECTORY + "/display_width.marshal.gz"
@@ -10,8 +10,8 @@ module Unicode
10
10
  class DisplayWidth
11
11
  DEFAULT_AMBIGUOUS = 1
12
12
  INITIAL_DEPTH = 0x10000
13
- ASCII_NON_ZERO_REGEX = /[\0\x05\a\b\n\v\f\r\x0E\x0F]/
14
- ASCII_NON_ZERO_STRING = "\0\x05\a\b\n\v\f\r\x0E\x0F"
13
+ ASCII_NON_ZERO_REGEX = /[\0\x05\a\b\n-\x0F]/
14
+ ASCII_NON_ZERO_STRING = "\0\x05\a\b\n-\x0F"
15
15
  ASCII_BACKSPACE = "\b"
16
16
  AMBIGUOUS_MAP = {
17
17
  1 => :WIDTH_ONE,
@@ -21,6 +21,10 @@ module Unicode
21
21
  WIDTH_ONE: 768,
22
22
  WIDTH_TWO: 161,
23
23
  }
24
+ NOT_COMMON_NARROW_REGEX = {
25
+ WIDTH_ONE: /[^\u{10}-\u{2FF}]/m,
26
+ WIDTH_TWO: /[^\u{10}-\u{A1}]/m,
27
+ }
24
28
  FIRST_4096 = {
25
29
  WIDTH_ONE: decompress_index(INDEX[:WIDTH_ONE][0][0], 1),
26
30
  WIDTH_TWO: decompress_index(INDEX[:WIDTH_TWO][0][0], 1),
@@ -30,7 +34,6 @@ module Unicode
30
34
  rgi_at: :REGEX_INCLUDE_MQE_UQE,
31
35
  possible: :REGEX_WELL_FORMED,
32
36
  }
33
- REGEX_EMOJI_NOT_POSSIBLE = /\A[#*0-9]\z/
34
37
  REGEX_EMOJI_VS16 = Regexp.union(
35
38
  Regexp.compile(
36
39
  Unicode::Emoji::REGEX_TEXT_PRESENTATION.source +
@@ -44,120 +47,48 @@ module Unicode
44
47
 
45
48
  # Returns monospace display width of string
46
49
  def self.of(string, ambiguous = nil, overwrite = nil, old_options = {}, **options)
47
- unless old_options.empty?
48
- warn "Unicode::DisplayWidth: Please migrate to keyword arguments - #{old_options.inspect}"
49
- options.merge! old_options
50
- end
50
+ string = string.encode(Encoding::UTF_8) unless string.encoding == Encoding::UTF_8
51
+ options = normalize_options(string, ambiguous, overwrite, old_options, **options)
51
52
 
52
- options[:ambiguous] = ambiguous if ambiguous
53
- options[:ambiguous] ||= DEFAULT_AMBIGUOUS
53
+ width = 0
54
54
 
55
- if options[:ambiguous] != 1 && options[:ambiguous] != 2
56
- raise ArgumentError, "Unicode::DisplayWidth: Ambiguous width must be 1 or 2"
55
+ unless options[:overwrite].empty?
56
+ width, string = width_custom(string, options[:overwrite])
57
57
  end
58
58
 
59
- if overwrite && !overwrite.empty?
60
- warn "Unicode::DisplayWidth: Please migrate to keyword arguments - overwrite: #{overwrite.inspect}"
61
- options[:overwrite] = overwrite
59
+ if string.ascii_only?
60
+ return width + width_ascii(string)
62
61
  end
63
- options[:overwrite] ||= {}
64
-
65
- if [nil, true, :auto].include?(options[:emoji])
66
- options[:emoji] = EmojiSupport.recommended
67
- end
68
-
69
- # # #
70
62
 
71
- if !options[:overwrite].empty?
72
- return width_frame(string, options) do |string, index_full, index_low, first_ambiguous|
73
- width_all_features(string, index_full, index_low, first_ambiguous, options[:overwrite])
74
- end
75
- end
76
-
77
- if !string.ascii_only?
78
- return width_frame(string, options) do |string, index_full, index_low, first_ambiguous|
79
- width_no_overwrite(string, index_full, index_low, first_ambiguous)
80
- end
81
- end
82
-
83
- width_ascii(string)
84
- end
63
+ ambiguous_index_name = AMBIGUOUS_MAP[options[:ambiguous]]
85
64
 
86
- def self.width_ascii(string)
87
- # Optimization for ASCII-only strings without certain control symbols
88
- if string.match?(ASCII_NON_ZERO_REGEX)
89
- res = string.delete(ASCII_NON_ZERO_STRING).size - string.count(ASCII_BACKSPACE)
90
- return res < 0 ? 0 : res
65
+ unless string.match?(NOT_COMMON_NARROW_REGEX[ambiguous_index_name])
66
+ return width + string.size
91
67
  end
92
68
 
93
- # Pure ASCII
94
- string.size
95
- end
96
-
97
- def self.width_frame(string, options)
98
69
  # Retrieve Emoji width
99
- if options[:emoji] == false || options[:emoji] == :none
100
- res = 0
101
- else
102
- res, string = emoji_width(
70
+ if options[:emoji] != :none
71
+ e_width, string = emoji_width(
103
72
  string,
104
73
  options[:emoji],
105
74
  options[:ambiguous],
106
75
  )
107
- end
108
-
109
- # Prepare indexes
110
- ambiguous_index_name = AMBIGUOUS_MAP[options[:ambiguous]]
111
-
112
- # Get general width
113
- res += yield(string, INDEX[ambiguous_index_name], FIRST_4096[ambiguous_index_name], FIRST_AMBIGUOUS[ambiguous_index_name])
114
-
115
- # Return result + prevent negative lengths
116
- res < 0 ? 0 : res
117
- end
76
+ width += e_width
118
77
 
119
- def self.width_no_overwrite(string, index_full, index_low, first_ambiguous, _ = {})
120
- res = 0
121
-
122
- # Make sure we have UTF-8
123
- string = string.encode(Encoding::UTF_8) unless string.encoding.name == "utf-8"
124
-
125
- string.scan(/.{,80}/m){ |batch|
126
- if batch.ascii_only?
127
- res += batch.size
128
- else
129
- batch.each_codepoint{ |codepoint|
130
- if codepoint > 15 && codepoint < first_ambiguous
131
- res += 1
132
- elsif codepoint < 0x1001
133
- res += index_low[codepoint] || 1
134
- else
135
- d = INITIAL_DEPTH
136
- w = index_full[codepoint / d]
137
- while w.instance_of? Array
138
- w = w[(codepoint %= d) / (d /= 16)]
139
- end
140
-
141
- res += w || 1
142
- end
143
- }
78
+ unless string.match?(NOT_COMMON_NARROW_REGEX[ambiguous_index_name])
79
+ return width + string.size
144
80
  end
145
- }
146
-
147
- res
148
- end
81
+ end
149
82
 
150
- # Same as .width_no_overwrite - but with applying overwrites for each char
151
- def self.width_all_features(string, index_full, index_low, first_ambiguous, overwrite)
152
- res = 0
83
+ index_full = INDEX[ambiguous_index_name]
84
+ index_low = FIRST_4096[ambiguous_index_name]
85
+ first_ambiguous = FIRST_AMBIGUOUS[ambiguous_index_name]
153
86
 
154
87
  string.each_codepoint{ |codepoint|
155
- if overwrite[codepoint]
156
- res += overwrite[codepoint]
157
- elsif codepoint > 15 && codepoint < first_ambiguous
158
- res += 1
88
+ if codepoint > 15 && codepoint < first_ambiguous
89
+ width += 1
159
90
  elsif codepoint < 0x1001
160
- res += index_low[codepoint] || 1
91
+ width += index_low[codepoint] || 1
161
92
  else
162
93
  d = INITIAL_DEPTH
163
94
  w = index_full[codepoint / d]
@@ -165,19 +96,44 @@ module Unicode
165
96
  w = w[(codepoint %= d) / (d /= 16)]
166
97
  end
167
98
 
168
- res += w || 1
99
+ width += w || 1
169
100
  end
170
101
  }
171
102
 
172
- res
103
+ # Return result + prevent negative lengths
104
+ width < 0 ? 0 : width
173
105
  end
174
106
 
107
+ # Returns width of custom overwrites and remaining string
108
+ def self.width_custom(string, overwrite)
109
+ width = 0
110
+
111
+ string = string.each_codepoint.select{ |codepoint|
112
+ if overwrite[codepoint]
113
+ width += overwrite[codepoint]
114
+ nil
115
+ else
116
+ codepoint
117
+ end
118
+ }.pack("U*")
175
119
 
120
+ [width, string]
121
+ end
122
+
123
+ # Returns width for ASCII-only strings. Will consider zero-width control symbols.
124
+ def self.width_ascii(string)
125
+ if string.match?(ASCII_NON_ZERO_REGEX)
126
+ res = string.delete(ASCII_NON_ZERO_STRING).bytesize - string.count(ASCII_BACKSPACE)
127
+ return res < 0 ? 0 : res
128
+ end
129
+
130
+ string.bytesize
131
+ end
132
+
133
+ # Returns width of all considered Emoji and remaining string
176
134
  def self.emoji_width(string, mode = :all, ambiguous = DEFAULT_AMBIGUOUS)
177
135
  res = 0
178
136
 
179
- string = string.encode(Encoding::UTF_8) unless string.encoding.name == "utf-8"
180
-
181
137
  if emoji_set_regex = EMOJI_SEQUENCES_REGEX_MAPPING[mode]
182
138
  emoji_width_via_possible(
183
139
  string,
@@ -209,13 +165,9 @@ module Unicode
209
165
  res = 0
210
166
 
211
167
  # For each string possibly an emoji
212
- no_emoji_string = string.gsub(Unicode::Emoji::REGEX_POSSIBLE){ |emoji_candidate|
213
- # Skip notorious false positives
214
- if REGEX_EMOJI_NOT_POSSIBLE.match?(emoji_candidate)
215
- emoji_candidate
216
-
168
+ no_emoji_string = string.gsub(REGEX_EMOJI_ALL_SEQUENCES_AND_VS16){ |emoji_candidate|
217
169
  # Check if we have a combined Emoji with width 2 (or EAW an Apple Terminal)
218
- elsif emoji_candidate == emoji_candidate[emoji_set_regex]
170
+ if emoji_candidate == emoji_candidate[emoji_set_regex]
219
171
  if strict_eaw
220
172
  res += self.of(emoji_candidate[0], ambiguous, emoji: false)
221
173
  else
@@ -237,6 +189,34 @@ module Unicode
237
189
  [res, no_emoji_string]
238
190
  end
239
191
 
192
+ def self.normalize_options(string, ambiguous = nil, overwrite = nil, old_options = {}, **options)
193
+ unless old_options.empty?
194
+ warn "Unicode::DisplayWidth: Please migrate to keyword arguments - #{old_options.inspect}"
195
+ options.merge! old_options
196
+ end
197
+
198
+ options[:ambiguous] = ambiguous if ambiguous
199
+ options[:ambiguous] ||= DEFAULT_AMBIGUOUS
200
+
201
+ if options[:ambiguous] != 1 && options[:ambiguous] != 2
202
+ raise ArgumentError, "Unicode::DisplayWidth: Ambiguous width must be 1 or 2"
203
+ end
204
+
205
+ if overwrite && !overwrite.empty?
206
+ warn "Unicode::DisplayWidth: Please migrate to keyword arguments - overwrite: #{overwrite.inspect}"
207
+ options[:overwrite] = overwrite
208
+ end
209
+ options[:overwrite] ||= {}
210
+
211
+ if [nil, true, :auto].include?(options[:emoji])
212
+ options[:emoji] = EmojiSupport.recommended
213
+ elsif options[:emoji] == false
214
+ options[:emoji] = :none
215
+ end
216
+
217
+ options
218
+ end
219
+
240
220
  def initialize(ambiguous: DEFAULT_AMBIGUOUS, overwrite: {}, emoji: true)
241
221
  @ambiguous = ambiguous
242
222
  @overwrite = overwrite
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode-display_width
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.1
4
+ version: 3.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-11-19 00:00:00.000000000 Z
11
+ date: 2024-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unicode-emoji