unicode-display_width 2.4.1 → 2.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 26234cf1d4a965966cfab38151afe61c8f0a4c2d4f31fc006a319997af7bf076
4
- data.tar.gz: eb5c24564cc94da5378b1d5b9065f90a70fcbcff50f914cc858634825694a544
3
+ metadata.gz: e0bcd900f031999ffa43dd6ef091b07b45d425b1ab04e6559f8c8e2c54e08710
4
+ data.tar.gz: ec3daad5e92107072f8f590d5f2217fd1213d7b25d6491bb3b20ee103f7a2087
5
5
  SHA512:
6
- metadata.gz: 8687c39d5c62292c8d124ca9a3889c30f5f7d8e64b7e5ff79fc3b58137708bfeb844b36d32a5f73920f5e63a3cb60fc4a3f465f8505ccb8134e30d2445fc1af3
7
- data.tar.gz: 37b073e36650f21bca8aa40cf9809a882fcd84642824ca8380542c80f3cb7a5f78c43eabe9abd6f09a73f6ee2cb5c49f60aa8b1ec72bcc8100cd411cdda15a5a
6
+ metadata.gz: da2098a3b5d56518129b453aa88e113583403fefb37bb23eeb3f2ff3213426ea6a7076ab0a2f4c778f91a6cfbe3f98f59ea8dc6a4e37ff4bb6a1499536a5a4b9
7
+ data.tar.gz: 8559483daad47ca76757cf8701f09060bf66f99017a32d62b9c6f739794e1fcc15f2a4b9aae754252d7c8176571f18b590555ed3aa83748836e8e6a681bc7e10
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## 2.4.2
4
+
5
+ More performance improvements:
6
+
7
+ - Optimize lookup of first 4096 codepoints
8
+ - Avoid overwrite lookup if no overwrites are set
9
+
3
10
  ## 2.4.1
4
11
 
5
12
  - Improve general performance!
data/MIT-LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT LICENSE
2
2
 
3
- Copyright (c) 2011, 2015-2022 Jan Lelis
3
+ Copyright (c) 2011, 2015-2023 Jan Lelis
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining
6
6
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -10,6 +10,12 @@ Old Rubies which might still work: **2.6**, **2.5**, **2.4**
10
10
 
11
11
  For even older Rubies, use version 2.3.0 of this gem: **2.3**, **2.2**, **2.1**, **2.0**, **1.9**
12
12
 
13
+ ## Version 2.4.2 — Performance Updates
14
+
15
+ **If you use this gem, you should really upgrade to 2.4.2. It's often 100x faster, sometimes even 1000x and more!**
16
+
17
+ This is possible because the gem now detects if you use very basic (and common) characters, like ASCII characters. Furthermore, the charachter width lookup code has been optimized, so even when full-width characters are involved, the gem is much faster now.
18
+
13
19
  ## Version 2.0 — Breaking Changes
14
20
 
15
21
  Some features of this library were marked deprecated for a long time and have been removed with Version 2.0:
@@ -159,7 +165,7 @@ See [unicode-x](https://github.com/janlelis/unicode-x) for more Unicode related
159
165
 
160
166
  ## Copyright & Info
161
167
 
162
- - Copyright (c) 2011, 2015-2022 Jan Lelis, https://janlelis.com, released under the MIT
168
+ - Copyright (c) 2011, 2015-2023 Jan Lelis, https://janlelis.com, released under the MIT
163
169
  license
164
170
  - Early versions based on runpaint's unicode-data interface: Copyright (c) 2009 Run Paint Run Run
165
171
  - Unicode data: https://www.unicode.org/copyright.html#Exhibit1
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Unicode
4
4
  class DisplayWidth
5
- VERSION = "2.4.1"
5
+ VERSION = "2.4.2"
6
6
  UNICODE_VERSION = "15.0.0"
7
7
  DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + "/../../../data/")
8
8
  INDEX_FILENAME = DATA_DIRECTORY + "/display_width.marshal.gz"
@@ -10,5 +10,25 @@ module Unicode
10
10
  serialized_data.force_encoding Encoding::BINARY
11
11
  INDEX = Marshal.load(serialized_data)
12
12
  end
13
+
14
+ def self.decompress_index(index, level)
15
+ index.flat_map{ |value|
16
+ if level > 0
17
+ if value.instance_of?(Array)
18
+ value[15] ||= nil
19
+ decompress_index(value, level - 1)
20
+ else
21
+ decompress_index([value] * 16, level - 1)
22
+ end
23
+ else
24
+ if value.instance_of?(Array)
25
+ value[15] ||= nil
26
+ value
27
+ else
28
+ [value] * 16
29
+ end
30
+ end
31
+ }
32
+ end
13
33
  end
14
34
  end
@@ -7,31 +7,72 @@ module Unicode
7
7
  class DisplayWidth
8
8
  INITIAL_DEPTH = 0x10000
9
9
  ASCII_NON_ZERO_REGEX = /[\0\x05\a\b\n\v\f\r\x0E\x0F]/
10
+ FIRST_4096 = decompress_index(INDEX[0][0], 1)
10
11
 
11
12
  def self.of(string, ambiguous = 1, overwrite = {}, options = {})
12
- # Optimization for ASCII-only strings without certain control symbols
13
- if overwrite.empty? && string.ascii_only?
14
- if string.match?(ASCII_NON_ZERO_REGEX)
15
- res = string.gsub(ASCII_NON_ZERO_REGEX, "").size - string.count("\b")
16
- return res < 0 ? 0 : res
13
+ if overwrite.empty?
14
+ # Optimization for ASCII-only strings without certain control symbols
15
+ if string.ascii_only?
16
+ if string.match?(ASCII_NON_ZERO_REGEX)
17
+ res = string.gsub(ASCII_NON_ZERO_REGEX, "").size - string.count("\b")
18
+ res < 0 ? 0 : res
19
+ else
20
+ string.size
21
+ end
17
22
  else
18
- return string.size
23
+ width_no_overwrite(string, ambiguous, options)
19
24
  end
25
+ else
26
+ width_all_features(string, ambiguous, overwrite, options)
20
27
  end
28
+ end
21
29
 
30
+ def self.width_no_overwrite(string, ambiguous, options = {})
31
+ # Sum of all chars widths
32
+ res = string.codepoints.sum{ |codepoint|
33
+ if codepoint > 15 && codepoint < 161 # very common
34
+ next 1
35
+ elsif codepoint < 0x1001
36
+ width = FIRST_4096[codepoint]
37
+ else
38
+ width = INDEX
39
+ depth = INITIAL_DEPTH
40
+ while (width = width[codepoint / depth]).instance_of? Array
41
+ codepoint %= depth
42
+ depth /= 16
43
+ end
44
+ end
45
+
46
+ width == :A ? ambiguous : (width || 1)
47
+ }
48
+
49
+ # Substract emoji error
50
+ res -= emoji_extra_width_of(string, ambiguous) if options[:emoji]
51
+
52
+ # Return result + prevent negative lengths
53
+ res < 0 ? 0 : res
54
+ end
55
+
56
+ # Same as .width_no_overwrite - but with applying overwrites for each char
57
+ def self.width_all_features(string, ambiguous, overwrite, options)
22
58
  # Sum of all chars widths
23
59
  res = string.codepoints.sum{ |codepoint|
24
60
  next overwrite[codepoint] if overwrite[codepoint]
25
- next 1 if codepoint > 15 && codepoint < 161 # very common
26
61
 
27
- width = INDEX
28
- depth = INITIAL_DEPTH
29
- while (width = width[codepoint / depth]).is_a? Array
30
- codepoint %= depth
31
- depth /= 16
62
+ if codepoint > 15 && codepoint < 161 # very common
63
+ next 1
64
+ elsif codepoint < 0x1001
65
+ width = FIRST_4096[codepoint]
66
+ else
67
+ width = INDEX
68
+ depth = INITIAL_DEPTH
69
+ while (width = width[codepoint / depth]).instance_of? Array
70
+ codepoint %= depth
71
+ depth /= 16
72
+ end
32
73
  end
33
- width = ambiguous if width == :A
34
- width || 1
74
+
75
+ width == :A ? ambiguous : (width || 1)
35
76
  }
36
77
 
37
78
  # Substract emoji error
@@ -41,6 +82,7 @@ module Unicode
41
82
  res < 0 ? 0 : res
42
83
  end
43
84
 
85
+
44
86
  def self.emoji_extra_width_of(string, ambiguous = 1, overwrite = {}, _ = {})
45
87
  require "unicode/emoji"
46
88
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode-display_width
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.1
4
+ version: 2.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-03 00:00:00.000000000 Z
11
+ date: 2023-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec