unicode-sequence_name 1.14.1 โ†’ 1.15.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4503c1bf10b43d732a5ee88c9a9059837f2190910f10a0c33fdfda6dd516b68f
4
- data.tar.gz: 6d821ef47679dc9be1aef97a9a489051127b54f160e2fd7609a9cded3ee5decf
3
+ metadata.gz: 9027ebad3f5176ac6e6e554460da72a6fc2edcefa99e73027df557693f197d3a
4
+ data.tar.gz: 34ea77e121ff2ed7c88e94da43bfe78ad313b52fba516255459de01ba7027d5e
5
5
  SHA512:
6
- metadata.gz: e5e8ff63ca2e1e97a604fdf4bc4f8067afb8eba9d7e163a6ddd0eb6e13b6b565a19cc67b2f43b70566d3e5903dc429a6b8fbfb2379a938d201bd1482dccb41c9
7
- data.tar.gz: b8728edb403073493e7d135016981ffdd2d046f1369a696fb23e0e4ec36075035aee38a19f4e03545b8d9174490e7e45235283ab4491af5fe654e8c035c4fb67
6
+ metadata.gz: 537a1b631841da656b8bd8a1445791383271f6f7a2fff62c9d413e0db1780d8af1d67f98c7793ec797999985ed6d2b694741c0e101ec14899a37f5e010864fe9
7
+ data.tar.gz: f4f0943434c71e329bfa4b7c9551804ab46dc1547a629bde186c95c0aeb2087661232bc74d067dac50a03ce6d12411d1753b582c664dd76d3b547fc89d1f12d5
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  ## CHANGELOG
2
2
 
3
+ ### 1.15.1
4
+
5
+ - Optimize index size by substituting common words
6
+
7
+ ### 1.15.0
8
+
9
+ - Include Emoji sequences which are not fully qualified (VS16 is missing) in index
10
+ - You can use the newly introduced method `Unicode::SequenceName.fully_qualified`
11
+ if you want to exclude non-fully qualified sequences
12
+
3
13
  ### 1.14.1
4
14
 
5
15
  - Fix bug that some singleton Emoji would be included in index
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- unicode-sequence_name (1.14.1)
4
+ unicode-sequence_name (1.15.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -10,7 +10,7 @@ IVD version: **2022-09-13** (September 2022)
10
10
 
11
11
  Supported Rubies: **3.3**, **3.2**, **3.1**, **3.0**
12
12
 
13
- Old Rubies which might still work: **2.7**, **2.6**, **2.5**, **2.4**, **2.3**, **2.X**
13
+ Old Rubies which might still work: **2.X**
14
14
 
15
15
  ## Usage
16
16
 
@@ -32,10 +32,10 @@ Unicode::SequenceName.of "๐Ÿ™‚โ€โ†”๏ธ" # => "HEAD SHAKING HORIZONTALLY"
32
32
  Unicode::SequenceName.of "โ€˜๏ธ" # => "LEFT SINGLE QUOTATION MARK (right-justified fullwidth form)"
33
33
  ```
34
34
 
35
- Names for singular codepoints are not included, you can use [unicode-name](https://github.com/janlelis/unicode-name) for that purpose. This is how you could use both libraries together to get the most relevant name of a character:
35
+ Names for singular codepoints are not included, but you can use [unicode-name](https://github.com/janlelis/unicode-name) for that purpose. This is how you could use both libraries together to get the most relevant name of a character:
36
36
 
37
37
  ```ruby
38
- name = Unicode::Name.sequence_name(char) || Unicode::Name.readable(char)
38
+ name = Unicode::SequenceName.of(char) || Unicode::Name.readable(char)
39
39
  ```
40
40
 
41
41
  ## Also See
Binary file
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Unicode
4
4
  module SequenceName
5
- VERSION = "1.14.1"
5
+ VERSION = "1.15.1"
6
6
  UNICODE_VERSION = "16.0.0"
7
7
  EMOJI_VERSION = "16.0"
8
8
  IVD_VERSION = "2022-09-13"
@@ -5,14 +5,24 @@ module Unicode
5
5
  def self.sequence_name(string)
6
6
  codepoints = get_codepoint_values(string)
7
7
  require_relative "sequence_name/index" unless defined? ::Unicode::SequenceName::INDEX
8
- if res = INDEX[:SEQUENCES][codepoints]
9
- res
8
+ if res = INDEX[:SEQUENCES][codepoints] || INDEX[:SEQUENCES_NOT_QUALIFIED][codepoints]
9
+ insert_words(res)
10
10
  else
11
11
  nil
12
12
  end
13
13
  end
14
14
  class << self; alias of sequence_name; end
15
15
 
16
+ def self.fully_qualified(string)
17
+ codepoints = get_codepoint_values(string)
18
+ require_relative "sequence_name/index" unless defined? ::Unicode::SequenceName::INDEX
19
+ if res = INDEX[:SEQUENCES][codepoints]
20
+ insert_words(res)
21
+ else
22
+ nil
23
+ end
24
+ end
25
+
16
26
  def self.get_codepoint_values(string)
17
27
  if string.valid_encoding?
18
28
  return string.codepoints
@@ -26,8 +36,20 @@ module Unicode
26
36
  raise(ArgumentError, "Unicode::SequenceName.of must be given a valid string")
27
37
  end
28
38
 
39
+ def self.insert_words(raw_name)
40
+ raw_name.chars.map{ |char|
41
+ codepoint = char.ord
42
+ if codepoint < INDEX[:REPLACE_BASE]
43
+ char
44
+ else
45
+ "#{INDEX[:COMMON_WORDS][codepoint - INDEX[:REPLACE_BASE]]} "
46
+ end
47
+ }.join.chomp
48
+ end
49
+
29
50
  class << self
30
51
  private :get_codepoint_values
52
+ private :insert_words
31
53
  end
32
54
  end
33
55
  end
@@ -3,7 +3,7 @@ require "minitest/autorun"
3
3
 
4
4
  describe Unicode::SequenceName do
5
5
  describe ".sequence_name (alias .of)" do
6
- it "will return name for that sequence name" do
6
+ it "will return name for that sequence" do
7
7
  assert_equal "DOUBLE EXCLAMATION MARK (text style)", Unicode::SequenceName.of("โ€ผ๏ธŽ")
8
8
  assert_equal "CJK COMPATIBILITY IDEOGRAPH-2F81F", Unicode::SequenceName.of("ใ“Ÿ๏ธ€")
9
9
  assert_equal "MYANMAR LETTER NGA (dotted form)", Unicode::SequenceName.of("แ€„๏ธ€")
@@ -19,6 +19,13 @@ describe Unicode::SequenceName do
19
19
  assert_equal "LEFT SINGLE QUOTATION MARK (right-justified fullwidth form)", Unicode::SequenceName.of("โ€˜๏ธ") # Unicode 16.0
20
20
  end
21
21
 
22
+ it "will return name for that sequence (not fully qualified: VS16 missing)" do
23
+ assert_equal "COUPLE WITH HEART: WOMAN, WOMAN, DARK SKIN TONE, MEDIUM SKIN TONE", Unicode::SequenceName.of("๐Ÿ‘ฉ๐Ÿฟโ€โคโ€๐Ÿ‘ฉ๐Ÿฝ")
24
+ assert_equal "MAN JUDGE", Unicode::SequenceName.of("๐Ÿ‘จโ€โš–")
25
+ assert_equal "WOMAN BOUNCING BALL", Unicode::SequenceName.of("โ›นโ€โ™€๏ธ") # First VS16 missing
26
+ assert_equal "WOMAN BOUNCING BALL", Unicode::SequenceName.of("โ›น๏ธโ€โ™€") # Second VS16 missing
27
+ end
28
+
22
29
  it "will return nil for characters without name" do
23
30
  assert_nil Unicode::SequenceName.of("\u{10c50}")
24
31
  assert_nil Unicode::SequenceName.of("bla")
@@ -28,5 +35,14 @@ describe Unicode::SequenceName do
28
35
  assert_nil Unicode::SequenceName.of("โณ")
29
36
  end
30
37
  end
38
+
39
+ describe ".fully_qualified" do
40
+ it "will *not* return name for that sequence (not fully qualified: VS16 missing)" do
41
+ assert_nil Unicode::SequenceName.fully_qualified("๐Ÿ‘ฉ๐Ÿฟโ€โคโ€๐Ÿ‘ฉ๐Ÿฝ")
42
+ assert_nil Unicode::SequenceName.fully_qualified("๐Ÿ‘จโ€โš–")
43
+ assert_nil Unicode::SequenceName.fully_qualified("โ›นโ€โ™€๏ธ") # First VS16 missing
44
+ assert_nil Unicode::SequenceName.fully_qualified("โ›น๏ธโ€โ™€") # Second VS16 missing
45
+ end
46
+ end
31
47
  end
32
48
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode-sequence_name
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.14.1
4
+ version: 1.15.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-04 00:00:00.000000000 Z
11
+ date: 2024-10-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: "[Unicode 16.0.0][Emoji 16.0] Returns the name of a Unicode code point
14
14
  sequence, if one exists"
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  requirements: []
56
- rubygems_version: 3.5.20
56
+ rubygems_version: 3.5.21
57
57
  signing_key:
58
58
  specification_version: 4
59
59
  summary: Returns the name of a Unicode codepoint sequence, if one exists