unicode-name 1.13.1 → 1.13.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Gemfile.lock +1 -1
- data/README.md +5 -1
- data/data/name.marshal.gz +0 -0
- data/lib/unicode/name/constants.rb +1 -1
- data/lib/unicode/name.rb +22 -4
- data/spec/unicode_name_spec.rb +10 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ca6d8f90ce7c5fa9c9da362be1d90b10260da01d8ac97e2412e0699fa69ca40a
|
4
|
+
data.tar.gz: a3a2a417c76906c32fe429ce51e16c543696687bb0078340aecb293a65595800
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ad0910912fcf5e226e00955c72cd3325796acc49137ce2e9c141fdcaa5518585fb38795de6587cd792d22b428110d08592a212220dc09a91f3e92016140a86a
|
7
|
+
data.tar.gz: 5b8de2a4c57c893d6e18ef4ce5b876a032f0cc0f3726504753a0dfbf9b7b4e4bf18d2b6b7aadf1a976231079d285872a6203e352d66fe26154883c31237d9aca
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -42,7 +42,11 @@ Unicode::Name.readable("\0") # => "NULL"
|
|
42
42
|
Unicode::Name.readable("\u{FFFFD}") # => "<private-use-FFFFD>"
|
43
43
|
```
|
44
44
|
|
45
|
-
See [unicode-sequence_names](https://github.com/janlelis/unicode-sequence_name) for character names of more complex codepoint sequences.
|
45
|
+
See [unicode-sequence_names](https://github.com/janlelis/unicode-sequence_name) for character names of more complex codepoint sequences. This is how you could use both libraries together to get the most relevant name of a character:
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
name = Unicode::SequenceName.of(char) || Unicode::Name.readable(char)
|
49
|
+
```
|
46
50
|
|
47
51
|
See [unicode-x](https://github.com/janlelis/unicode-x) for more Unicode related micro libraries.
|
48
52
|
|
data/data/name.marshal.gz
CHANGED
Binary file
|
data/lib/unicode/name.rb
CHANGED
@@ -11,11 +11,18 @@ module Unicode
|
|
11
11
|
def self.unicode_name(char)
|
12
12
|
codepoint = char.unpack("U")[0]
|
13
13
|
require_relative "name/index" unless defined? ::Unicode::Name::INDEX
|
14
|
+
|
14
15
|
if res = INDEX[:NAMES][codepoint]
|
15
|
-
res
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
return insert_words(res)
|
17
|
+
end
|
18
|
+
|
19
|
+
INDEX[:CP_RANGES].each{|prefix, range|
|
20
|
+
if range.any?{ |range| codepoint >= range[0] && codepoint <= range[1] }
|
21
|
+
return "%s%.4X" %[prefix, codepoint]
|
22
|
+
end
|
23
|
+
}
|
24
|
+
|
25
|
+
if codepoint >= HANGUL_START && codepoint <= HANGUL_END
|
19
26
|
"HANGUL SYLLABLE %s" % hangul_decomposition(codepoint)
|
20
27
|
else
|
21
28
|
nil
|
@@ -82,6 +89,17 @@ module Unicode
|
|
82
89
|
initial = base / HANGUL_MEDIAL_MAX
|
83
90
|
"#{INDEX[:JAMO][:INITIAL][initial]}#{INDEX[:JAMO][:MEDIAL][medial]}#{INDEX[:JAMO][:FINAL][final]}"
|
84
91
|
end
|
92
|
+
|
93
|
+
def self.insert_words(raw_name)
|
94
|
+
raw_name.chars.map{ |char|
|
95
|
+
codepoint = char.ord
|
96
|
+
if codepoint < INDEX[:REPLACE_BASE]
|
97
|
+
char
|
98
|
+
else
|
99
|
+
"#{INDEX[:COMMON_WORDS][codepoint - INDEX[:REPLACE_BASE]]} "
|
100
|
+
end
|
101
|
+
}.join.chomp
|
102
|
+
end
|
85
103
|
end
|
86
104
|
end
|
87
105
|
|
data/spec/unicode_name_spec.rb
CHANGED
@@ -9,11 +9,11 @@ describe Unicode::Name do
|
|
9
9
|
assert_equal "REPLACEMENT CHARACTER", Unicode::Name.of("�")
|
10
10
|
end
|
11
11
|
|
12
|
-
it "works for CJK
|
12
|
+
it "works for CJK unified ideographs" do
|
13
13
|
assert_equal "CJK UNIFIED IDEOGRAPH-4E01", Unicode::Name.of("丁")
|
14
14
|
end
|
15
15
|
|
16
|
-
it "works for Hangul
|
16
|
+
it "works for Hangul syllables" do
|
17
17
|
assert_equal "HANGUL SYLLABLE HAN", Unicode::Name.of("한")
|
18
18
|
assert_equal "HANGUL SYLLABLE GAG", Unicode::Name.of("각")
|
19
19
|
assert_equal "HANGUL SYLLABLE GAE", Unicode::Name.of("개")
|
@@ -21,6 +21,14 @@ describe Unicode::Name do
|
|
21
21
|
assert_equal "HANGUL SYLLABLE DWALB", Unicode::Name.of("돫")
|
22
22
|
end
|
23
23
|
|
24
|
+
it "works with some ranges that have the codepoint embedded" do
|
25
|
+
assert_equal "EGYPTIAN HIEROGLYPH-143F5", Unicode::Name.of("")
|
26
|
+
assert_equal "KHITAN SMALL SCRIPT CHARACTER-18C12", Unicode::Name.of("𘰒")
|
27
|
+
assert_equal "TANGUT IDEOGRAPH-18D00", Unicode::Name.of("𘴀")
|
28
|
+
assert_equal "NUSHU CHARACTER-1B171", Unicode::Name.of("𛅱")
|
29
|
+
assert_equal "CJK COMPATIBILITY IDEOGRAPH-2F9B1", Unicode::Name.of("𧃒")
|
30
|
+
end
|
31
|
+
|
24
32
|
it "will return nil for characters without name" do
|
25
33
|
assert_nil Unicode::Name.of("\u{10c50}")
|
26
34
|
assert_nil Unicode::Name.of("\0")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-name
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.13.
|
4
|
+
version: 1.13.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-10-
|
11
|
+
date: 2024-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unicode-types
|
@@ -68,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
version: '0'
|
70
70
|
requirements: []
|
71
|
-
rubygems_version: 3.5.
|
71
|
+
rubygems_version: 3.5.21
|
72
72
|
signing_key:
|
73
73
|
specification_version: 4
|
74
74
|
summary: Returns name/aliases/label of a Unicode codepoint
|