unicode-name 1.13.1 → 1.13.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Gemfile.lock +1 -1
- data/README.md +5 -1
- data/data/name.marshal.gz +0 -0
- data/lib/unicode/name/constants.rb +1 -1
- data/lib/unicode/name.rb +22 -4
- data/spec/unicode_name_spec.rb +10 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ca6d8f90ce7c5fa9c9da362be1d90b10260da01d8ac97e2412e0699fa69ca40a
|
4
|
+
data.tar.gz: a3a2a417c76906c32fe429ce51e16c543696687bb0078340aecb293a65595800
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ad0910912fcf5e226e00955c72cd3325796acc49137ce2e9c141fdcaa5518585fb38795de6587cd792d22b428110d08592a212220dc09a91f3e92016140a86a
|
7
|
+
data.tar.gz: 5b8de2a4c57c893d6e18ef4ce5b876a032f0cc0f3726504753a0dfbf9b7b4e4bf18d2b6b7aadf1a976231079d285872a6203e352d66fe26154883c31237d9aca
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -42,7 +42,11 @@ Unicode::Name.readable("\0") # => "NULL"
|
|
42
42
|
Unicode::Name.readable("\u{FFFFD}") # => "<private-use-FFFFD>"
|
43
43
|
```
|
44
44
|
|
45
|
-
See [unicode-sequence_names](https://github.com/janlelis/unicode-sequence_name) for character names of more complex codepoint sequences.
|
45
|
+
See [unicode-sequence_names](https://github.com/janlelis/unicode-sequence_name) for character names of more complex codepoint sequences. This is how you could use both libraries together to get the most relevant name of a character:
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
name = Unicode::SequenceName.of(char) || Unicode::Name.readable(char)
|
49
|
+
```
|
46
50
|
|
47
51
|
See [unicode-x](https://github.com/janlelis/unicode-x) for more Unicode related micro libraries.
|
48
52
|
|
data/data/name.marshal.gz
CHANGED
Binary file
|
data/lib/unicode/name.rb
CHANGED
@@ -11,11 +11,18 @@ module Unicode
|
|
11
11
|
def self.unicode_name(char)
|
12
12
|
codepoint = char.unpack("U")[0]
|
13
13
|
require_relative "name/index" unless defined? ::Unicode::Name::INDEX
|
14
|
+
|
14
15
|
if res = INDEX[:NAMES][codepoint]
|
15
|
-
res
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
return insert_words(res)
|
17
|
+
end
|
18
|
+
|
19
|
+
INDEX[:CP_RANGES].each{|prefix, range|
|
20
|
+
if range.any?{ |range| codepoint >= range[0] && codepoint <= range[1] }
|
21
|
+
return "%s%.4X" %[prefix, codepoint]
|
22
|
+
end
|
23
|
+
}
|
24
|
+
|
25
|
+
if codepoint >= HANGUL_START && codepoint <= HANGUL_END
|
19
26
|
"HANGUL SYLLABLE %s" % hangul_decomposition(codepoint)
|
20
27
|
else
|
21
28
|
nil
|
@@ -82,6 +89,17 @@ module Unicode
|
|
82
89
|
initial = base / HANGUL_MEDIAL_MAX
|
83
90
|
"#{INDEX[:JAMO][:INITIAL][initial]}#{INDEX[:JAMO][:MEDIAL][medial]}#{INDEX[:JAMO][:FINAL][final]}"
|
84
91
|
end
|
92
|
+
|
93
|
+
def self.insert_words(raw_name)
|
94
|
+
raw_name.chars.map{ |char|
|
95
|
+
codepoint = char.ord
|
96
|
+
if codepoint < INDEX[:REPLACE_BASE]
|
97
|
+
char
|
98
|
+
else
|
99
|
+
"#{INDEX[:COMMON_WORDS][codepoint - INDEX[:REPLACE_BASE]]} "
|
100
|
+
end
|
101
|
+
}.join.chomp
|
102
|
+
end
|
85
103
|
end
|
86
104
|
end
|
87
105
|
|
data/spec/unicode_name_spec.rb
CHANGED
@@ -9,11 +9,11 @@ describe Unicode::Name do
|
|
9
9
|
assert_equal "REPLACEMENT CHARACTER", Unicode::Name.of("�")
|
10
10
|
end
|
11
11
|
|
12
|
-
it "works for CJK
|
12
|
+
it "works for CJK unified ideographs" do
|
13
13
|
assert_equal "CJK UNIFIED IDEOGRAPH-4E01", Unicode::Name.of("丁")
|
14
14
|
end
|
15
15
|
|
16
|
-
it "works for Hangul
|
16
|
+
it "works for Hangul syllables" do
|
17
17
|
assert_equal "HANGUL SYLLABLE HAN", Unicode::Name.of("한")
|
18
18
|
assert_equal "HANGUL SYLLABLE GAG", Unicode::Name.of("각")
|
19
19
|
assert_equal "HANGUL SYLLABLE GAE", Unicode::Name.of("개")
|
@@ -21,6 +21,14 @@ describe Unicode::Name do
|
|
21
21
|
assert_equal "HANGUL SYLLABLE DWALB", Unicode::Name.of("돫")
|
22
22
|
end
|
23
23
|
|
24
|
+
it "works with some ranges that have the codepoint embedded" do
|
25
|
+
assert_equal "EGYPTIAN HIEROGLYPH-143F5", Unicode::Name.of("")
|
26
|
+
assert_equal "KHITAN SMALL SCRIPT CHARACTER-18C12", Unicode::Name.of("𘰒")
|
27
|
+
assert_equal "TANGUT IDEOGRAPH-18D00", Unicode::Name.of("𘴀")
|
28
|
+
assert_equal "NUSHU CHARACTER-1B171", Unicode::Name.of("𛅱")
|
29
|
+
assert_equal "CJK COMPATIBILITY IDEOGRAPH-2F9B1", Unicode::Name.of("𧃒")
|
30
|
+
end
|
31
|
+
|
24
32
|
it "will return nil for characters without name" do
|
25
33
|
assert_nil Unicode::Name.of("\u{10c50}")
|
26
34
|
assert_nil Unicode::Name.of("\0")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-name
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.13.
|
4
|
+
version: 1.13.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-10-
|
11
|
+
date: 2024-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unicode-types
|
@@ -68,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
version: '0'
|
70
70
|
requirements: []
|
71
|
-
rubygems_version: 3.5.
|
71
|
+
rubygems_version: 3.5.21
|
72
72
|
signing_key:
|
73
73
|
specification_version: 4
|
74
74
|
summary: Returns name/aliases/label of a Unicode codepoint
|