unicode-sequence_name 1.14.1 โ†’ 1.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4503c1bf10b43d732a5ee88c9a9059837f2190910f10a0c33fdfda6dd516b68f
4
- data.tar.gz: 6d821ef47679dc9be1aef97a9a489051127b54f160e2fd7609a9cded3ee5decf
3
+ metadata.gz: 9027ebad3f5176ac6e6e554460da72a6fc2edcefa99e73027df557693f197d3a
4
+ data.tar.gz: 34ea77e121ff2ed7c88e94da43bfe78ad313b52fba516255459de01ba7027d5e
5
5
  SHA512:
6
- metadata.gz: e5e8ff63ca2e1e97a604fdf4bc4f8067afb8eba9d7e163a6ddd0eb6e13b6b565a19cc67b2f43b70566d3e5903dc429a6b8fbfb2379a938d201bd1482dccb41c9
7
- data.tar.gz: b8728edb403073493e7d135016981ffdd2d046f1369a696fb23e0e4ec36075035aee38a19f4e03545b8d9174490e7e45235283ab4491af5fe654e8c035c4fb67
6
+ metadata.gz: 537a1b631841da656b8bd8a1445791383271f6f7a2fff62c9d413e0db1780d8af1d67f98c7793ec797999985ed6d2b694741c0e101ec14899a37f5e010864fe9
7
+ data.tar.gz: f4f0943434c71e329bfa4b7c9551804ab46dc1547a629bde186c95c0aeb2087661232bc74d067dac50a03ce6d12411d1753b582c664dd76d3b547fc89d1f12d5
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  ## CHANGELOG
2
2
 
3
+ ### 1.15.1
4
+
5
+ - Optimize index size by substituting common words
6
+
7
+ ### 1.15.0
8
+
9
+ - Include Emoji sequences which are not fully qualified (VS16 is missing) in index
10
+ - You can use the newly introduced method `Unicode::SequenceName.fully_qualified`
11
+ if you want to exclude non-fully qualified sequences
12
+
3
13
  ### 1.14.1
4
14
 
5
15
  - Fix bug that some singleton Emoji would be included in index
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- unicode-sequence_name (1.14.1)
4
+ unicode-sequence_name (1.15.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -10,7 +10,7 @@ IVD version: **2022-09-13** (September 2022)
10
10
 
11
11
  Supported Rubies: **3.3**, **3.2**, **3.1**, **3.0**
12
12
 
13
- Old Rubies which might still work: **2.7**, **2.6**, **2.5**, **2.4**, **2.3**, **2.X**
13
+ Old Rubies which might still work: **2.X**
14
14
 
15
15
  ## Usage
16
16
 
@@ -32,10 +32,10 @@ Unicode::SequenceName.of "๐Ÿ™‚โ€โ†”๏ธ" # => "HEAD SHAKING HORIZONTALLY"
32
32
  Unicode::SequenceName.of "โ€˜๏ธ" # => "LEFT SINGLE QUOTATION MARK (right-justified fullwidth form)"
33
33
  ```
34
34
 
35
- Names for singular codepoints are not included, you can use [unicode-name](https://github.com/janlelis/unicode-name) for that purpose. This is how you could use both libraries together to get the most relevant name of a character:
35
+ Names for singular codepoints are not included, but you can use [unicode-name](https://github.com/janlelis/unicode-name) for that purpose. This is how you could use both libraries together to get the most relevant name of a character:
36
36
 
37
37
  ```ruby
38
- name = Unicode::Name.sequence_name(char) || Unicode::Name.readable(char)
38
+ name = Unicode::SequenceName.of(char) || Unicode::Name.readable(char)
39
39
  ```
40
40
 
41
41
  ## Also See
Binary file
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Unicode
4
4
  module SequenceName
5
- VERSION = "1.14.1"
5
+ VERSION = "1.15.1"
6
6
  UNICODE_VERSION = "16.0.0"
7
7
  EMOJI_VERSION = "16.0"
8
8
  IVD_VERSION = "2022-09-13"
@@ -5,14 +5,24 @@ module Unicode
5
5
  def self.sequence_name(string)
6
6
  codepoints = get_codepoint_values(string)
7
7
  require_relative "sequence_name/index" unless defined? ::Unicode::SequenceName::INDEX
8
- if res = INDEX[:SEQUENCES][codepoints]
9
- res
8
+ if res = INDEX[:SEQUENCES][codepoints] || INDEX[:SEQUENCES_NOT_QUALIFIED][codepoints]
9
+ insert_words(res)
10
10
  else
11
11
  nil
12
12
  end
13
13
  end
14
14
  class << self; alias of sequence_name; end
15
15
 
16
+ def self.fully_qualified(string)
17
+ codepoints = get_codepoint_values(string)
18
+ require_relative "sequence_name/index" unless defined? ::Unicode::SequenceName::INDEX
19
+ if res = INDEX[:SEQUENCES][codepoints]
20
+ insert_words(res)
21
+ else
22
+ nil
23
+ end
24
+ end
25
+
16
26
  def self.get_codepoint_values(string)
17
27
  if string.valid_encoding?
18
28
  return string.codepoints
@@ -26,8 +36,20 @@ module Unicode
26
36
  raise(ArgumentError, "Unicode::SequenceName.of must be given a valid string")
27
37
  end
28
38
 
39
+ def self.insert_words(raw_name)
40
+ raw_name.chars.map{ |char|
41
+ codepoint = char.ord
42
+ if codepoint < INDEX[:REPLACE_BASE]
43
+ char
44
+ else
45
+ "#{INDEX[:COMMON_WORDS][codepoint - INDEX[:REPLACE_BASE]]} "
46
+ end
47
+ }.join.chomp
48
+ end
49
+
29
50
  class << self
30
51
  private :get_codepoint_values
52
+ private :insert_words
31
53
  end
32
54
  end
33
55
  end
@@ -3,7 +3,7 @@ require "minitest/autorun"
3
3
 
4
4
  describe Unicode::SequenceName do
5
5
  describe ".sequence_name (alias .of)" do
6
- it "will return name for that sequence name" do
6
+ it "will return name for that sequence" do
7
7
  assert_equal "DOUBLE EXCLAMATION MARK (text style)", Unicode::SequenceName.of("โ€ผ๏ธŽ")
8
8
  assert_equal "CJK COMPATIBILITY IDEOGRAPH-2F81F", Unicode::SequenceName.of("ใ“Ÿ๏ธ€")
9
9
  assert_equal "MYANMAR LETTER NGA (dotted form)", Unicode::SequenceName.of("แ€„๏ธ€")
@@ -19,6 +19,13 @@ describe Unicode::SequenceName do
19
19
  assert_equal "LEFT SINGLE QUOTATION MARK (right-justified fullwidth form)", Unicode::SequenceName.of("โ€˜๏ธ") # Unicode 16.0
20
20
  end
21
21
 
22
+ it "will return name for that sequence (not fully qualified: VS16 missing)" do
23
+ assert_equal "COUPLE WITH HEART: WOMAN, WOMAN, DARK SKIN TONE, MEDIUM SKIN TONE", Unicode::SequenceName.of("๐Ÿ‘ฉ๐Ÿฟโ€โคโ€๐Ÿ‘ฉ๐Ÿฝ")
24
+ assert_equal "MAN JUDGE", Unicode::SequenceName.of("๐Ÿ‘จโ€โš–")
25
+ assert_equal "WOMAN BOUNCING BALL", Unicode::SequenceName.of("โ›นโ€โ™€๏ธ") # First VS16 missing
26
+ assert_equal "WOMAN BOUNCING BALL", Unicode::SequenceName.of("โ›น๏ธโ€โ™€") # Second VS16 missing
27
+ end
28
+
22
29
  it "will return nil for characters without name" do
23
30
  assert_nil Unicode::SequenceName.of("\u{10c50}")
24
31
  assert_nil Unicode::SequenceName.of("bla")
@@ -28,5 +35,14 @@ describe Unicode::SequenceName do
28
35
  assert_nil Unicode::SequenceName.of("โณ")
29
36
  end
30
37
  end
38
+
39
+ describe ".fully_qualified" do
40
+ it "will *not* return name for that sequence (not fully qualified: VS16 missing)" do
41
+ assert_nil Unicode::SequenceName.fully_qualified("๐Ÿ‘ฉ๐Ÿฟโ€โคโ€๐Ÿ‘ฉ๐Ÿฝ")
42
+ assert_nil Unicode::SequenceName.fully_qualified("๐Ÿ‘จโ€โš–")
43
+ assert_nil Unicode::SequenceName.fully_qualified("โ›นโ€โ™€๏ธ") # First VS16 missing
44
+ assert_nil Unicode::SequenceName.fully_qualified("โ›น๏ธโ€โ™€") # Second VS16 missing
45
+ end
46
+ end
31
47
  end
32
48
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode-sequence_name
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.14.1
4
+ version: 1.15.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-04 00:00:00.000000000 Z
11
+ date: 2024-10-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: "[Unicode 16.0.0][Emoji 16.0] Returns the name of a Unicode code point
14
14
  sequence, if one exists"
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  requirements: []
56
- rubygems_version: 3.5.20
56
+ rubygems_version: 3.5.21
57
57
  signing_key:
58
58
  specification_version: 4
59
59
  summary: Returns the name of a Unicode codepoint sequence, if one exists