unicode-display_width 2.4.0 → 2.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e64e9230d9a0df4076a18bb935b6f1aeac16e079544aa65097b1ddb967b4960c
4
- data.tar.gz: 17d5cf547c89859318bc578e3e15a63ce0ab94d8801f2928097bb29d27cb6da0
3
+ metadata.gz: e0bcd900f031999ffa43dd6ef091b07b45d425b1ab04e6559f8c8e2c54e08710
4
+ data.tar.gz: ec3daad5e92107072f8f590d5f2217fd1213d7b25d6491bb3b20ee103f7a2087
5
5
  SHA512:
6
- metadata.gz: 1ea3ee4075d687ac1708a095ffe82e833389db1ec613ef43c9f8aa3aeeb5983d585cf6ad22e7d00d1020378344d9e9450096d6b7a04f27358d5418466b7fdf41
7
- data.tar.gz: 00a75b4f46daa89186bde97d5e2da86d197e5ef8704876c16085dd9a503620dc2aaf6e6e9030b9a438417f54d48070d0df06ca2b3e1530bc6d2d1be761f21054
6
+ metadata.gz: da2098a3b5d56518129b453aa88e113583403fefb37bb23eeb3f2ff3213426ea6a7076ab0a2f4c778f91a6cfbe3f98f59ea8dc6a4e37ff4bb6a1499536a5a4b9
7
+ data.tar.gz: 8559483daad47ca76757cf8701f09060bf66f99017a32d62b9c6f739794e1fcc15f2a4b9aae754252d7c8176571f18b590555ed3aa83748836e8e6a681bc7e10
data/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## 2.4.2
4
+
5
+ More performance improvements:
6
+
7
+ - Optimize lookup of first 4096 codepoints
8
+ - Avoid overwrite lookup if no overwrites are set
9
+
10
+ ## 2.4.1
11
+
12
+ - Improve general performance!
13
+ - Further improve performance for ASCII strings
14
+
15
+ *You should really upgrade - it's much faster now!*
16
+
3
17
  ## 2.4.0
4
18
  - Improve performance for ASCII-only strings, by @fatkodima
5
19
  - Require Ruby 2.4
data/MIT-LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT LICENSE
2
2
 
3
- Copyright (c) 2011, 2015-2022 Jan Lelis
3
+ Copyright (c) 2011, 2015-2023 Jan Lelis
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining
6
6
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -10,6 +10,12 @@ Old Rubies which might still work: **2.6**, **2.5**, **2.4**
10
10
 
11
11
  For even older Rubies, use version 2.3.0 of this gem: **2.3**, **2.2**, **2.1**, **2.0**, **1.9**
12
12
 
13
+ ## Version 2.4.2 — Performance Updates
14
+
15
+ **If you use this gem, you should really upgrade to 2.4.2. It's often 100x faster, sometimes even 1000x and more!**
16
+
17
+ This is possible because the gem now detects if you use very basic (and common) characters, like ASCII characters. Furthermore, the charachter width lookup code has been optimized, so even when full-width characters are involved, the gem is much faster now.
18
+
13
19
  ## Version 2.0 — Breaking Changes
14
20
 
15
21
  Some features of this library were marked deprecated for a long time and have been removed with Version 2.0:
@@ -91,6 +97,9 @@ You can overwrite how to handle specific code points by passing a hash (or even
91
97
  Unicode::DisplayWidth.of("a\tb", 1, "\t".ord => 10)) # => tab counted as 10, so result is 12
92
98
  ```
93
99
 
100
+ Please note that using overwrites disables some perfomance optimizations of this gem.
101
+
102
+
94
103
  #### Emoji Support
95
104
 
96
105
  Emoji width support is included, but in must be activated manually. It will adjust the string's size for modifier and zero-width joiner sequences. You also need to add the [unicode-emoji](https://github.com/janlelis/unicode-emoji) gem to your Gemfile:
@@ -156,7 +165,7 @@ See [unicode-x](https://github.com/janlelis/unicode-x) for more Unicode related
156
165
 
157
166
  ## Copyright & Info
158
167
 
159
- - Copyright (c) 2011, 2015-2022 Jan Lelis, https://janlelis.com, released under the MIT
168
+ - Copyright (c) 2011, 2015-2023 Jan Lelis, https://janlelis.com, released under the MIT
160
169
  license
161
170
  - Early versions based on runpaint's unicode-data interface: Copyright (c) 2009 Run Paint Run Run
162
171
  - Unicode data: https://www.unicode.org/copyright.html#Exhibit1
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Unicode
4
4
  class DisplayWidth
5
- VERSION = "2.4.0"
5
+ VERSION = "2.4.2"
6
6
  UNICODE_VERSION = "15.0.0"
7
7
  DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + "/../../../data/")
8
8
  INDEX_FILENAME = DATA_DIRECTORY + "/display_width.marshal.gz"
@@ -10,5 +10,25 @@ module Unicode
10
10
  serialized_data.force_encoding Encoding::BINARY
11
11
  INDEX = Marshal.load(serialized_data)
12
12
  end
13
+
14
+ def self.decompress_index(index, level)
15
+ index.flat_map{ |value|
16
+ if level > 0
17
+ if value.instance_of?(Array)
18
+ value[15] ||= nil
19
+ decompress_index(value, level - 1)
20
+ else
21
+ decompress_index([value] * 16, level - 1)
22
+ end
23
+ else
24
+ if value.instance_of?(Array)
25
+ value[15] ||= nil
26
+ value
27
+ else
28
+ [value] * 16
29
+ end
30
+ end
31
+ }
32
+ end
13
33
  end
14
34
  end
@@ -5,27 +5,74 @@ require_relative "display_width/index"
5
5
 
6
6
  module Unicode
7
7
  class DisplayWidth
8
- DEPTHS = [0x10000, 0x1000, 0x100, 0x10].freeze
8
+ INITIAL_DEPTH = 0x10000
9
9
  ASCII_NON_ZERO_REGEX = /[\0\x05\a\b\n\v\f\r\x0E\x0F]/
10
+ FIRST_4096 = decompress_index(INDEX[0][0], 1)
10
11
 
11
12
  def self.of(string, ambiguous = 1, overwrite = {}, options = {})
12
- # Optimization for ASCII-only strings without control symbols.
13
- if overwrite.empty? && string.ascii_only? && !string.match?(ASCII_NON_ZERO_REGEX)
14
- return string.size
13
+ if overwrite.empty?
14
+ # Optimization for ASCII-only strings without certain control symbols
15
+ if string.ascii_only?
16
+ if string.match?(ASCII_NON_ZERO_REGEX)
17
+ res = string.gsub(ASCII_NON_ZERO_REGEX, "").size - string.count("\b")
18
+ res < 0 ? 0 : res
19
+ else
20
+ string.size
21
+ end
22
+ else
23
+ width_no_overwrite(string, ambiguous, options)
24
+ end
25
+ else
26
+ width_all_features(string, ambiguous, overwrite, options)
15
27
  end
28
+ end
16
29
 
17
- # Add width of each char
18
- res = string.codepoints.inject(0){ |total_width, codepoint|
19
- index_or_value = INDEX
20
- codepoint_depth_offset = codepoint
21
- DEPTHS.each{ |depth|
22
- index_or_value = index_or_value[codepoint_depth_offset / depth]
23
- codepoint_depth_offset = codepoint_depth_offset % depth
24
- break unless index_or_value.is_a? Array
25
- }
26
- width = index_or_value.is_a?(Array) ? index_or_value[codepoint_depth_offset] : index_or_value
27
- width = ambiguous if width == :A
28
- total_width + (overwrite[codepoint] || width || 1)
30
+ def self.width_no_overwrite(string, ambiguous, options = {})
31
+ # Sum of all chars widths
32
+ res = string.codepoints.sum{ |codepoint|
33
+ if codepoint > 15 && codepoint < 161 # very common
34
+ next 1
35
+ elsif codepoint < 0x1001
36
+ width = FIRST_4096[codepoint]
37
+ else
38
+ width = INDEX
39
+ depth = INITIAL_DEPTH
40
+ while (width = width[codepoint / depth]).instance_of? Array
41
+ codepoint %= depth
42
+ depth /= 16
43
+ end
44
+ end
45
+
46
+ width == :A ? ambiguous : (width || 1)
47
+ }
48
+
49
+ # Substract emoji error
50
+ res -= emoji_extra_width_of(string, ambiguous) if options[:emoji]
51
+
52
+ # Return result + prevent negative lengths
53
+ res < 0 ? 0 : res
54
+ end
55
+
56
+ # Same as .width_no_overwrite - but with applying overwrites for each char
57
+ def self.width_all_features(string, ambiguous, overwrite, options)
58
+ # Sum of all chars widths
59
+ res = string.codepoints.sum{ |codepoint|
60
+ next overwrite[codepoint] if overwrite[codepoint]
61
+
62
+ if codepoint > 15 && codepoint < 161 # very common
63
+ next 1
64
+ elsif codepoint < 0x1001
65
+ width = FIRST_4096[codepoint]
66
+ else
67
+ width = INDEX
68
+ depth = INITIAL_DEPTH
69
+ while (width = width[codepoint / depth]).instance_of? Array
70
+ codepoint %= depth
71
+ depth /= 16
72
+ end
73
+ end
74
+
75
+ width == :A ? ambiguous : (width || 1)
29
76
  }
30
77
 
31
78
  # Substract emoji error
@@ -35,6 +82,7 @@ module Unicode
35
82
  res < 0 ? 0 : res
36
83
  end
37
84
 
85
+
38
86
  def self.emoji_extra_width_of(string, ambiguous = 1, overwrite = {}, _ = {})
39
87
  require "unicode/emoji"
40
88
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode-display_width
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.0
4
+ version: 2.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-03 00:00:00.000000000 Z
11
+ date: 2023-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -81,7 +81,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  requirements: []
84
- rubygems_version: 3.3.7
84
+ rubygems_version: 3.4.1
85
85
  signing_key:
86
86
  specification_version: 4
87
87
  summary: Determines the monospace display width of a string in Ruby.