unicode-name 1.13.1 → 1.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ad37a86c74a53f8ed0fd5b7d99958e63f06537392c1156810405d8cf83e98082
4
- data.tar.gz: ec21c556d90d33b7dbef6ac61cfca62805505f4a3afba068a5efc8f3d967b686
3
+ metadata.gz: ca6d8f90ce7c5fa9c9da362be1d90b10260da01d8ac97e2412e0699fa69ca40a
4
+ data.tar.gz: a3a2a417c76906c32fe429ce51e16c543696687bb0078340aecb293a65595800
5
5
  SHA512:
6
- metadata.gz: 5a86c60936a59105991a4e769ecfa2ea6cff752ab85a4ad87ca800d2455d13cf2b276cfa20a4d281bc4b91eb4f9c7155293b4da8a454d544a992045b66b9006e
7
- data.tar.gz: 8e2ad3afb63a2dda11dee3b2a5b3e7ea24e275e3744fbfe14615b8cca698ae14b2d9784cf6180e4d4136eb058fe1ae4b1a1fddd1ff34fc220ac059a0639586fb
6
+ metadata.gz: 9ad0910912fcf5e226e00955c72cd3325796acc49137ce2e9c141fdcaa5518585fb38795de6587cd792d22b428110d08592a212220dc09a91f3e92016140a86a
7
+ data.tar.gz: 5b8de2a4c57c893d6e18ef4ce5b876a032f0cc0f3726504753a0dfbf9b7b4e4bf18d2b6b7aadf1a976231079d285872a6203e352d66fe26154883c31237d9aca
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  ## CHANGELOG
2
2
 
3
+ ### 1.13.2
4
+
5
+ - Optimize index size by removing ranges that have codepoints embedded
6
+ - Optimize index size by substituting common words
7
+ - Fix missing Tangut ideographs
8
+
3
9
  ### 1.13.1
4
10
 
5
11
  Bugfix release:
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- unicode-name (1.13.1)
4
+ unicode-name (1.13.2)
5
5
  unicode-types (~> 1.10)
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -42,7 +42,11 @@ Unicode::Name.readable("\0") # => "NULL"
42
42
  Unicode::Name.readable("\u{FFFFD}") # => "<private-use-FFFFD>"
43
43
  ```
44
44
 
45
- See [unicode-sequence_names](https://github.com/janlelis/unicode-sequence_name) for character names of more complex codepoint sequences.
45
+ See [unicode-sequence_names](https://github.com/janlelis/unicode-sequence_name) for character names of more complex codepoint sequences. This is how you could use both libraries together to get the most relevant name of a character:
46
+
47
+ ```ruby
48
+ name = Unicode::SequenceName.of(char) || Unicode::Name.readable(char)
49
+ ```
46
50
 
47
51
  See [unicode-x](https://github.com/janlelis/unicode-x) for more Unicode related micro libraries.
48
52
 
data/data/name.marshal.gz CHANGED
Binary file
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Unicode
4
4
  module Name
5
- VERSION = "1.13.1"
5
+ VERSION = "1.13.2"
6
6
  UNICODE_VERSION = "16.0.0"
7
7
  DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + "/../../../data/").freeze
8
8
  INDEX_FILENAME = (DATA_DIRECTORY + "/name.marshal.gz").freeze
data/lib/unicode/name.rb CHANGED
@@ -11,11 +11,18 @@ module Unicode
11
11
  def self.unicode_name(char)
12
12
  codepoint = char.unpack("U")[0]
13
13
  require_relative "name/index" unless defined? ::Unicode::Name::INDEX
14
+
14
15
  if res = INDEX[:NAMES][codepoint]
15
- res
16
- elsif INDEX[:CJK].any?{ |cjk_range| codepoint >= cjk_range[0] && codepoint <= cjk_range[1] }
17
- "CJK UNIFIED IDEOGRAPH-%.4X" % codepoint
18
- elsif codepoint >= HANGUL_START && codepoint <= HANGUL_END
16
+ return insert_words(res)
17
+ end
18
+
19
+ INDEX[:CP_RANGES].each{|prefix, range|
20
+ if range.any?{ |range| codepoint >= range[0] && codepoint <= range[1] }
21
+ return "%s%.4X" %[prefix, codepoint]
22
+ end
23
+ }
24
+
25
+ if codepoint >= HANGUL_START && codepoint <= HANGUL_END
19
26
  "HANGUL SYLLABLE %s" % hangul_decomposition(codepoint)
20
27
  else
21
28
  nil
@@ -82,6 +89,17 @@ module Unicode
82
89
  initial = base / HANGUL_MEDIAL_MAX
83
90
  "#{INDEX[:JAMO][:INITIAL][initial]}#{INDEX[:JAMO][:MEDIAL][medial]}#{INDEX[:JAMO][:FINAL][final]}"
84
91
  end
92
+
93
+ def self.insert_words(raw_name)
94
+ raw_name.chars.map{ |char|
95
+ codepoint = char.ord
96
+ if codepoint < INDEX[:REPLACE_BASE]
97
+ char
98
+ else
99
+ "#{INDEX[:COMMON_WORDS][codepoint - INDEX[:REPLACE_BASE]]} "
100
+ end
101
+ }.join.chomp
102
+ end
85
103
  end
86
104
  end
87
105
 
@@ -9,11 +9,11 @@ describe Unicode::Name do
9
9
  assert_equal "REPLACEMENT CHARACTER", Unicode::Name.of("�")
10
10
  end
11
11
 
12
- it "works for CJK Ideographs" do
12
+ it "works for CJK unified ideographs" do
13
13
  assert_equal "CJK UNIFIED IDEOGRAPH-4E01", Unicode::Name.of("丁")
14
14
  end
15
15
 
16
- it "works for Hangul Syllables" do
16
+ it "works for Hangul syllables" do
17
17
  assert_equal "HANGUL SYLLABLE HAN", Unicode::Name.of("한")
18
18
  assert_equal "HANGUL SYLLABLE GAG", Unicode::Name.of("각")
19
19
  assert_equal "HANGUL SYLLABLE GAE", Unicode::Name.of("개")
@@ -21,6 +21,14 @@ describe Unicode::Name do
21
21
  assert_equal "HANGUL SYLLABLE DWALB", Unicode::Name.of("돫")
22
22
  end
23
23
 
24
+ it "works with some ranges that have the codepoint embedded" do
25
+ assert_equal "EGYPTIAN HIEROGLYPH-143F5", Unicode::Name.of("𔏵")
26
+ assert_equal "KHITAN SMALL SCRIPT CHARACTER-18C12", Unicode::Name.of("𘰒")
27
+ assert_equal "TANGUT IDEOGRAPH-18D00", Unicode::Name.of("𘴀")
28
+ assert_equal "NUSHU CHARACTER-1B171", Unicode::Name.of("𛅱")
29
+ assert_equal "CJK COMPATIBILITY IDEOGRAPH-2F9B1", Unicode::Name.of("𧃒")
30
+ end
31
+
24
32
  it "will return nil for characters without name" do
25
33
  assert_nil Unicode::Name.of("\u{10c50}")
26
34
  assert_nil Unicode::Name.of("\0")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode-name
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.13.1
4
+ version: 1.13.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-03 00:00:00.000000000 Z
11
+ date: 2024-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unicode-types
@@ -68,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
68
68
  - !ruby/object:Gem::Version
69
69
  version: '0'
70
70
  requirements: []
71
- rubygems_version: 3.5.20
71
+ rubygems_version: 3.5.21
72
72
  signing_key:
73
73
  specification_version: 4
74
74
  summary: Returns name/aliases/label of a Unicode codepoint