unicode-display_width 1.1.3 β†’ 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 139f5127ef080149e50f2156bda03d18748cc384
4
- data.tar.gz: 483e9a3386cb9b2c7f67568fb9c9b3570a8f5556
3
+ metadata.gz: dc469a91d5eda226bed21bce0669da6fa9e5d064
4
+ data.tar.gz: 5b376806fdd80debc0e1baa8694f0ef1eb697ca2
5
5
  SHA512:
6
- metadata.gz: ccdd24ab36d9077cdd808ade6e44cc67fcf5c1180eab122a80ecef5aae1adf2439996701dff84d7224fb4f48d5e0ef9a8de70a97a0b7c9395afd1f5cea756ae3
7
- data.tar.gz: 4431ed8b567006bda9418af105d666b2050845cb486b75e465990633d0ca395574fa7f2d66e7f0bdc69875bfc35f156a5f622a1e4d82ffa1149b260d46cf7044
6
+ metadata.gz: 63022c8bd0776c973347dcb3ccbf0a3c79610134f20ea82fd0832b2143517e492bd56f11564a5639c8a88136bf402abde808ae90b0a79a4ea2ff14020f36d496
7
+ data.tar.gz: 6765c915c17a1cd7a13c9fad7a4fbc6621a99e8c119b148f4a10fd33b4c3b81cf45a2eed2a54fafc6f36c6d268650e68d67740d04190281568b5f87ebb57e9f1
@@ -1,5 +1,12 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## 1.2.0
4
+
5
+ - Add zero-width codepoint ranges: U+2060..U+206F, U+FFF0..U+FFF8, U+E0000..U+E0FFF
6
+ - Add full-witdh codepoint ranges: U+3400..U+4DBF, U+4E00..U+9FFF, U+F900..U+FAFF, U+20000..U+2FFFD, U+30000..U+3FFFD
7
+ - Experimental emoji support using the [unicode-emoji](https://github.com/janlelis/unicode-emoji) gem
8
+ - Fix minor bug in index compression scheme
9
+
3
10
  ## 1.1.3
4
11
 
5
12
  - Fix that non-UTF-8 encodings do not throw errors, patch by @windwiny
data/README.md CHANGED
@@ -22,7 +22,9 @@ X | (user defined) | Overwrites any other values
22
22
  3 | `"\u{2E3B}"` | THREE-EM DASH
23
23
  0 | General Categories: Mn, Me, Cf (non-arabic) | Excludes ARABIC format characters
24
24
  0 | `"\u{1160}".."\u{11FF}"` | HANGUL JUNGSEONG
25
+ 0 | `"\u{2060}".."\u{206F}"`, `"\u{FFF0}".."\u{FFF8}"`, `"\u{E0000}".."\u{E0FFF}"` | Ignorable ranges
25
26
  2 | East Asian Width: F, W | Full-width characters
27
+ 2 | `"\u{3400}"`..`"\u{4DBF}"`, `"\u{4E00}"`..`"\u{9FFF}"`, `"\u{F900}"`..`"\u{FAFF}"`, `"\u{20000}"`..`"\u{2FFFD}"`, `"\u{30000}"`..`"\u{3FFFD}"` | Full-width ranges
26
28
  1 or 2 | East Asian Width: A | Ambiguous characters, user defined, default: 1
27
29
  1 | All other codepoints | -
28
30
 
@@ -62,6 +64,22 @@ You can overwrite how to handle specific code points by passing a hash (or even
62
64
  Unicode::DisplayWidth.of("a\tb", 1, 0x09 => 10)) # => 12
63
65
  ```
64
66
 
67
+ ### Emoji Support
68
+
69
+ Experimental emoji support is included. It will adjust the string's size for modifier and zero-width joiner sequences. You will need to add the [unicode-emoji](https://github.com/janlelis/unicode-emoji) gem to your Gemfile:
70
+
71
+ ```ruby
72
+ gem 'unicode-display_width'
73
+ gem 'unicode-emoji'
74
+ ```
75
+
76
+ You can then activate the emoji string width adjustments by passing `emoji: true` as fourth parameter:
77
+
78
+ ```ruby
79
+ Unicode::DisplayWidth.of "πŸ€ΎπŸ½β€β™€οΈ" # => 5
80
+ Unicode::DisplayWidth.of "πŸ€ΎπŸ½β€β™€οΈ", 1, {}, emoji: true # => 2
81
+ ```
82
+
65
83
  ### Usage with String Extension
66
84
 
67
85
  Activated by default. Will be deactivated in version 2.0:
data/Rakefile CHANGED
@@ -37,6 +37,7 @@ desc "#{gemspec.name} | Test"
37
37
  task :test do
38
38
  sh "rspec spec"
39
39
  end
40
+ task :spec => :test
40
41
  task :default => :test
41
42
 
42
43
  # # #
@@ -5,7 +5,7 @@ module Unicode
5
5
  module DisplayWidth
6
6
  DEPTHS = [0x10000, 0x1000, 0x100, 0x10].freeze
7
7
 
8
- def self.of(string, ambiguous = 1, overwrite = {})
8
+ def self.of(string, ambiguous = 1, overwrite = {}, options = {})
9
9
  res = string.codepoints.inject(0){ |total_width, codepoint|
10
10
  index_or_value = INDEX
11
11
  codepoint_depth_offset = codepoint
@@ -19,8 +19,27 @@ module Unicode
19
19
  total_width + (overwrite[codepoint] || width || 1)
20
20
  }
21
21
 
22
+ res -= emoji_extra_width_of(string) if options[:emoji]
22
23
  res < 0 ? 0 : res
23
24
  end
25
+
26
+ def self.emoji_extra_width_of(string, ambiguous = 1, overwrite = {}, _ = {})
27
+ require "unicode/emoji"
28
+
29
+ extra_width = 0
30
+ modifier_regex = /[#{ Unicode::Emoji::EMOJI_MODIFIERS.pack("U*") }]/
31
+ zwj_regex = /(?<=#{ [Unicode::Emoji::ZWJ].pack("U") })./
32
+
33
+ string.scan(Unicode::Emoji::REGEX){ |emoji|
34
+ extra_width += 2 * emoji.match(modifier_regex).size
35
+
36
+ emoji.scan(zwj_regex){ |zwj_succ|
37
+ extra_width += self.of(zwj_succ, ambiguous, overwrite)
38
+ }
39
+ }
40
+
41
+ extra_width
42
+ end
24
43
  end
25
44
  end
26
45
 
@@ -1,6 +1,6 @@
1
1
  module Unicode
2
2
  module DisplayWidth
3
- VERSION = '1.1.3'
3
+ VERSION = '1.2.0'
4
4
  UNICODE_VERSION = "9.0.0".freeze
5
5
  DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + '/../../../data/').freeze
6
6
  INDEX_FILENAME = (DATA_DIRECTORY + '/display_width.marshal.gz').freeze
@@ -1,8 +1,8 @@
1
1
  require_relative '../display_width' unless defined? Unicode::DisplayWidth
2
2
 
3
3
  class String
4
- def display_width(ambiguous = 1, overwrite = {})
5
- Unicode::DisplayWidth.of(self, ambiguous, overwrite)
4
+ def display_width(ambiguous = 1, overwrite = {}, options = {})
5
+ Unicode::DisplayWidth.of(self, ambiguous, overwrite, options)
6
6
  end
7
7
 
8
8
  def display_size(*args)
@@ -1,6 +1,4 @@
1
- # coding: utf-8
2
-
3
- require 'unicode/display_width'
1
+ require_relative '../lib/unicode/display_width'
4
2
 
5
3
  describe 'Unicode::DisplayWidth.of' do
6
4
  describe '[east asian width]' do
@@ -12,6 +10,10 @@ describe 'Unicode::DisplayWidth.of' do
12
10
  expect( 'δΈ€'.display_width ).to eq 2
13
11
  end
14
12
 
13
+ it 'returns 2 for W (which are currently unassigned)' do
14
+ expect( "\u{3FFFD}".display_width ).to eq 2
15
+ end
16
+
15
17
  it 'returns 1 for N' do
16
18
  expect( 'Γ€'.display_width ).to eq 1
17
19
  end
@@ -53,6 +55,18 @@ describe 'Unicode::DisplayWidth.of' do
53
55
  it 'returns 0 for HANGUL JUNGSEONG chars' do
54
56
  expect( 'α… '.display_width ).to eq 0
55
57
  end
58
+
59
+ it 'returns 0 for U+2060..U+206F' do
60
+ expect( "\u{2060}".display_width ).to eq 0
61
+ end
62
+
63
+ it 'returns 0 for U+FFF0..U+FFF8' do
64
+ expect( "\u{FFF0}".display_width ).to eq 0
65
+ end
66
+
67
+ it 'returns 0 for U+E0000..U+E0FFF' do
68
+ expect( "\u{E0000}".display_width ).to eq 0
69
+ end
56
70
  end
57
71
 
58
72
  describe '[special characters]' do
@@ -129,4 +143,10 @@ describe 'Unicode::DisplayWidth.of' do
129
143
  expect( 'Γ€'.encode("UTF-16LE").display_width ).to eq 1
130
144
  end
131
145
  end
146
+
147
+ describe '[emoji]' do
148
+ it 'does not count modifiers and zjw sequences for valid emoji' do
149
+ expect( "πŸ€ΎπŸ½β€β™€οΈ".display_width(1, {}, emoji: true) ).to eq 2
150
+ end
151
+ end
132
152
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode-display_width
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.3
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-13 00:00:00.000000000 Z
11
+ date: 2017-04-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec