unicode-name 1.13.0 → 1.13.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rake_tasks +3 -0
- data/CHANGELOG.md +29 -17
- data/Gemfile.lock +11 -11
- data/README.md +9 -3
- data/data/name.marshal.gz +0 -0
- data/lib/unicode/name/constants.rb +1 -1
- data/lib/unicode/name.rb +24 -6
- data/spec/unicode_name_spec.rb +14 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ca6d8f90ce7c5fa9c9da362be1d90b10260da01d8ac97e2412e0699fa69ca40a
|
4
|
+
data.tar.gz: a3a2a417c76906c32fe429ce51e16c543696687bb0078340aecb293a65595800
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ad0910912fcf5e226e00955c72cd3325796acc49137ce2e9c141fdcaa5518585fb38795de6587cd792d22b428110d08592a212220dc09a91f3e92016140a86a
|
7
|
+
data.tar.gz: 5b8de2a4c57c893d6e18ef4ce5b876a032f0cc0f3726504753a0dfbf9b7b4e4bf18d2b6b7aadf1a976231079d285872a6203e352d66fe26154883c31237d9aca
|
data/.rake_tasks
ADDED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,18 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
+
### 1.13.2
|
4
|
+
|
5
|
+
- Optimize index size by removing ranges that have codepoints embedded
|
6
|
+
- Optimize index size by substituting common words
|
7
|
+
- Fix missing Tangut ideographs
|
8
|
+
|
9
|
+
### 1.13.1
|
10
|
+
|
11
|
+
Bugfix release:
|
12
|
+
|
13
|
+
- Fix medial vowels not generated correctly for Hangul syllables #1
|
14
|
+
- Unicode::Name.readable now also applies correction if one exists
|
15
|
+
|
3
16
|
### 1.13.0
|
4
17
|
|
5
18
|
- Unicode 16.0
|
@@ -22,59 +35,58 @@
|
|
22
35
|
|
23
36
|
### 1.8.0
|
24
37
|
|
25
|
-
|
38
|
+
- Unicode 12.1
|
26
39
|
|
27
40
|
### 1.7.1
|
28
41
|
|
29
|
-
|
42
|
+
- Push unicode-types dependency to 1.4.0
|
30
43
|
|
31
44
|
### 1.7.0
|
32
45
|
|
33
|
-
|
46
|
+
- Unicode 12
|
34
47
|
|
35
48
|
### 1.6.0
|
36
49
|
|
37
|
-
|
38
|
-
|
50
|
+
- Unicode 11
|
51
|
+
- Do not depend on rubygems (only use zlib stdlib for unzipping)
|
39
52
|
|
40
53
|
### 1.5.2
|
41
54
|
|
42
|
-
|
55
|
+
- Explicitly load rubygems/util, fixes regression in 1.5.1
|
43
56
|
|
44
57
|
### 1.5.1
|
45
58
|
|
46
|
-
|
59
|
+
- Use `Gem::Util` for `gunzip`, removes deprecation warning
|
47
60
|
|
48
61
|
### 1.5.0
|
49
62
|
|
50
|
-
|
63
|
+
- Unicode 10
|
51
64
|
|
52
65
|
### 1.4.2
|
53
66
|
|
54
|
-
|
67
|
+
- Fix that Unicode::Name.correct would not fail if codepoint has aliases but no correction
|
55
68
|
|
56
69
|
### 1.4.1
|
57
70
|
|
58
|
-
|
59
|
-
|
71
|
+
- Be compatible with Ruby 2.4's surrogate literals
|
72
|
+
- Bump unicode-types dependency
|
60
73
|
|
61
74
|
### 1.4.0
|
62
75
|
|
63
|
-
|
76
|
+
- Support Hangul syllables
|
64
77
|
|
65
78
|
### 1.3.0
|
66
79
|
|
67
|
-
|
80
|
+
- Support Unicode 9.0
|
68
81
|
|
69
82
|
### 1.2.0
|
70
83
|
|
71
|
-
|
84
|
+
- Support CJK Ideographs
|
72
85
|
|
73
86
|
### 1.1.0
|
74
87
|
|
75
|
-
|
88
|
+
- Support codepoint labels
|
76
89
|
|
77
90
|
### 1.0.0
|
78
91
|
|
79
|
-
|
80
|
-
|
92
|
+
- Initial release
|
data/Gemfile.lock
CHANGED
@@ -1,25 +1,25 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
unicode-name (1.13.
|
4
|
+
unicode-name (1.13.2)
|
5
5
|
unicode-types (~> 1.10)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
|
-
io-console (0.
|
11
|
-
irb (1.
|
12
|
-
rdoc
|
13
|
-
reline (>= 0.
|
14
|
-
minitest (5.
|
15
|
-
psych (5.1.
|
10
|
+
io-console (0.7.2)
|
11
|
+
irb (1.14.1)
|
12
|
+
rdoc (>= 4.0.0)
|
13
|
+
reline (>= 0.4.2)
|
14
|
+
minitest (5.25.1)
|
15
|
+
psych (5.1.2)
|
16
16
|
stringio
|
17
|
-
rake (13.
|
18
|
-
rdoc (6.
|
17
|
+
rake (13.2.1)
|
18
|
+
rdoc (6.7.0)
|
19
19
|
psych (>= 4.0.0)
|
20
|
-
reline (0.
|
20
|
+
reline (0.5.10)
|
21
21
|
io-console (~> 0.5)
|
22
|
-
stringio (3.
|
22
|
+
stringio (3.1.1)
|
23
23
|
unicode-types (1.10.0)
|
24
24
|
|
25
25
|
PLATFORMS
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Unicode::Name [![[version]](https://badge.fury.io/rb/unicode-name.svg)](https://badge.fury.io/rb/unicode-name)
|
1
|
+
# Unicode::Name [![[version]](https://badge.fury.io/rb/unicode-name.svg)](https://badge.fury.io/rb/unicode-name) [![[ci]](https://github.com/janlelis/unicode-name/workflows/Test/badge.svg)](https://github.com/janlelis/unicode-name/actions?query=workflow%3ATest)
|
2
2
|
|
3
3
|
Return Unicode codepoint names, aliases, and labels.
|
4
4
|
|
@@ -6,7 +6,7 @@ Unicode version: **16.0.0** (September 2024)
|
|
6
6
|
|
7
7
|
Supported Rubies: **3.3**, **3.2**, **3.1**, **3.0**
|
8
8
|
|
9
|
-
Old Rubies that might still work: **2.
|
9
|
+
Old Rubies that might still work: **2.X**
|
10
10
|
|
11
11
|
## Usage
|
12
12
|
|
@@ -42,10 +42,16 @@ Unicode::Name.readable("\0") # => "NULL"
|
|
42
42
|
Unicode::Name.readable("\u{FFFFD}") # => "<private-use-FFFFD>"
|
43
43
|
```
|
44
44
|
|
45
|
-
See [unicode-sequence_names](https://github.com/janlelis/unicode-sequence_name) for character names of more complex codepoint sequences.
|
45
|
+
See [unicode-sequence_names](https://github.com/janlelis/unicode-sequence_name) for character names of more complex codepoint sequences. This is how you could use both libraries together to get the most relevant name of a character:
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
name = Unicode::SequenceName.of(char) || Unicode::Name.readable(char)
|
49
|
+
```
|
46
50
|
|
47
51
|
See [unicode-x](https://github.com/janlelis/unicode-x) for more Unicode related micro libraries.
|
48
52
|
|
53
|
+
See [unicode-name.js](https://github.com/janlelis/unicode-name.js) for a JavaScript implementation of this gem.
|
54
|
+
|
49
55
|
## MIT License
|
50
56
|
|
51
57
|
- Copyright (C) 2016-2024 Jan Lelis <https://janlelis.com>. Released under the MIT license.
|
data/data/name.marshal.gz
CHANGED
Binary file
|
data/lib/unicode/name.rb
CHANGED
@@ -11,11 +11,18 @@ module Unicode
|
|
11
11
|
def self.unicode_name(char)
|
12
12
|
codepoint = char.unpack("U")[0]
|
13
13
|
require_relative "name/index" unless defined? ::Unicode::Name::INDEX
|
14
|
+
|
14
15
|
if res = INDEX[:NAMES][codepoint]
|
15
|
-
res
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
return insert_words(res)
|
17
|
+
end
|
18
|
+
|
19
|
+
INDEX[:CP_RANGES].each{|prefix, range|
|
20
|
+
if range.any?{ |range| codepoint >= range[0] && codepoint <= range[1] }
|
21
|
+
return "%s%.4X" %[prefix, codepoint]
|
22
|
+
end
|
23
|
+
}
|
24
|
+
|
25
|
+
if codepoint >= HANGUL_START && codepoint <= HANGUL_END
|
19
26
|
"HANGUL SYLLABLE %s" % hangul_decomposition(codepoint)
|
20
27
|
else
|
21
28
|
nil
|
@@ -63,7 +70,7 @@ module Unicode
|
|
63
70
|
end
|
64
71
|
|
65
72
|
def self.readable(char)
|
66
|
-
|
73
|
+
correct(char) ||
|
67
74
|
( as = aliases(char) ) &&
|
68
75
|
( as[:control] && as[:control][0] ||
|
69
76
|
as[:figment] && as[:figment][0] ||
|
@@ -78,10 +85,21 @@ module Unicode
|
|
78
85
|
def self.hangul_decomposition(codepoint)
|
79
86
|
base = codepoint - HANGUL_START
|
80
87
|
final = base % HANGUL_FINAL_MAX
|
81
|
-
medial = (base
|
88
|
+
medial = (base % HANGUL_MEDIAL_MAX) / HANGUL_FINAL_MAX
|
82
89
|
initial = base / HANGUL_MEDIAL_MAX
|
83
90
|
"#{INDEX[:JAMO][:INITIAL][initial]}#{INDEX[:JAMO][:MEDIAL][medial]}#{INDEX[:JAMO][:FINAL][final]}"
|
84
91
|
end
|
92
|
+
|
93
|
+
def self.insert_words(raw_name)
|
94
|
+
raw_name.chars.map{ |char|
|
95
|
+
codepoint = char.ord
|
96
|
+
if codepoint < INDEX[:REPLACE_BASE]
|
97
|
+
char
|
98
|
+
else
|
99
|
+
"#{INDEX[:COMMON_WORDS][codepoint - INDEX[:REPLACE_BASE]]} "
|
100
|
+
end
|
101
|
+
}.join.chomp
|
102
|
+
end
|
85
103
|
end
|
86
104
|
end
|
87
105
|
|
data/spec/unicode_name_spec.rb
CHANGED
@@ -9,13 +9,24 @@ describe Unicode::Name do
|
|
9
9
|
assert_equal "REPLACEMENT CHARACTER", Unicode::Name.of("�")
|
10
10
|
end
|
11
11
|
|
12
|
-
it "works for CJK
|
12
|
+
it "works for CJK unified ideographs" do
|
13
13
|
assert_equal "CJK UNIFIED IDEOGRAPH-4E01", Unicode::Name.of("丁")
|
14
14
|
end
|
15
15
|
|
16
|
-
it "works for Hangul
|
16
|
+
it "works for Hangul syllables" do
|
17
17
|
assert_equal "HANGUL SYLLABLE HAN", Unicode::Name.of("한")
|
18
18
|
assert_equal "HANGUL SYLLABLE GAG", Unicode::Name.of("각")
|
19
|
+
assert_equal "HANGUL SYLLABLE GAE", Unicode::Name.of("개")
|
20
|
+
assert_equal "HANGUL SYLLABLE GAENG", Unicode::Name.of("갱")
|
21
|
+
assert_equal "HANGUL SYLLABLE DWALB", Unicode::Name.of("돫")
|
22
|
+
end
|
23
|
+
|
24
|
+
it "works with some ranges that have the codepoint embedded" do
|
25
|
+
assert_equal "EGYPTIAN HIEROGLYPH-143F5", Unicode::Name.of("")
|
26
|
+
assert_equal "KHITAN SMALL SCRIPT CHARACTER-18C12", Unicode::Name.of("𘰒")
|
27
|
+
assert_equal "TANGUT IDEOGRAPH-18D00", Unicode::Name.of("𘴀")
|
28
|
+
assert_equal "NUSHU CHARACTER-1B171", Unicode::Name.of("𛅱")
|
29
|
+
assert_equal "CJK COMPATIBILITY IDEOGRAPH-2F9B1", Unicode::Name.of("𧃒")
|
19
30
|
end
|
20
31
|
|
21
32
|
it "will return nil for characters without name" do
|
@@ -89,6 +100,7 @@ describe Unicode::Name do
|
|
89
100
|
describe ".readable" do
|
90
101
|
it "will return best readable representation of a codepoint" do
|
91
102
|
assert_equal "LATIN CAPITAL LETTER A", Unicode::Name.readable("A")
|
103
|
+
assert_equal "LATIN CAPITAL LETTER GHA", Unicode::Name.readable("Ƣ")
|
92
104
|
assert_equal "NULL", Unicode::Name.readable("\0")
|
93
105
|
assert_equal "<noncharacter-FFFFF>", Unicode::Name.readable("\u{FFFFF}")
|
94
106
|
assert_equal "<reserved-10C50>", Unicode::Name.readable("\u{10C50}")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-name
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.13.
|
4
|
+
version: 1.13.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-09
|
11
|
+
date: 2024-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unicode-types
|
@@ -33,6 +33,7 @@ extensions: []
|
|
33
33
|
extra_rdoc_files: []
|
34
34
|
files:
|
35
35
|
- ".gitignore"
|
36
|
+
- ".rake_tasks"
|
36
37
|
- CHANGELOG.md
|
37
38
|
- CODE_OF_CONDUCT.md
|
38
39
|
- Gemfile
|
@@ -67,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
67
68
|
- !ruby/object:Gem::Version
|
68
69
|
version: '0'
|
69
70
|
requirements: []
|
70
|
-
rubygems_version: 3.5.
|
71
|
+
rubygems_version: 3.5.21
|
71
72
|
signing_key:
|
72
73
|
specification_version: 4
|
73
74
|
summary: Returns name/aliases/label of a Unicode codepoint
|