unicode-sequence_name 1.14.1 โ 1.15.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Gemfile.lock +1 -1
- data/README.md +3 -3
- data/data/sequence_name.marshal.gz +0 -0
- data/lib/unicode/sequence_name/constants.rb +1 -1
- data/lib/unicode/sequence_name.rb +24 -2
- data/spec/unicode_sequence_name_spec.rb +17 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9027ebad3f5176ac6e6e554460da72a6fc2edcefa99e73027df557693f197d3a
|
4
|
+
data.tar.gz: 34ea77e121ff2ed7c88e94da43bfe78ad313b52fba516255459de01ba7027d5e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 537a1b631841da656b8bd8a1445791383271f6f7a2fff62c9d413e0db1780d8af1d67f98c7793ec797999985ed6d2b694741c0e101ec14899a37f5e010864fe9
|
7
|
+
data.tar.gz: f4f0943434c71e329bfa4b7c9551804ab46dc1547a629bde186c95c0aeb2087661232bc74d067dac50a03ce6d12411d1753b582c664dd76d3b547fc89d1f12d5
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
+
### 1.15.1
|
4
|
+
|
5
|
+
- Optimize index size by substituting common words
|
6
|
+
|
7
|
+
### 1.15.0
|
8
|
+
|
9
|
+
- Include Emoji sequences which are not fully qualified (VS16 is missing) in index
|
10
|
+
- You can use the newly introduced method `Unicode::SequenceName.fully_qualified`
|
11
|
+
if you want to exclude non-fully qualified sequences
|
12
|
+
|
3
13
|
### 1.14.1
|
4
14
|
|
5
15
|
- Fix bug that some singleton Emoji would be included in index
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -10,7 +10,7 @@ IVD version: **2022-09-13** (September 2022)
|
|
10
10
|
|
11
11
|
Supported Rubies: **3.3**, **3.2**, **3.1**, **3.0**
|
12
12
|
|
13
|
-
Old Rubies which might still work: **2.
|
13
|
+
Old Rubies which might still work: **2.X**
|
14
14
|
|
15
15
|
## Usage
|
16
16
|
|
@@ -32,10 +32,10 @@ Unicode::SequenceName.of "๐โโ๏ธ" # => "HEAD SHAKING HORIZONTALLY"
|
|
32
32
|
Unicode::SequenceName.of "โ๏ธ" # => "LEFT SINGLE QUOTATION MARK (right-justified fullwidth form)"
|
33
33
|
```
|
34
34
|
|
35
|
-
Names for singular codepoints are not included, you can use [unicode-name](https://github.com/janlelis/unicode-name) for that purpose. This is how you could use both libraries together to get the most relevant name of a character:
|
35
|
+
Names for singular codepoints are not included, but you can use [unicode-name](https://github.com/janlelis/unicode-name) for that purpose. This is how you could use both libraries together to get the most relevant name of a character:
|
36
36
|
|
37
37
|
```ruby
|
38
|
-
name = Unicode::
|
38
|
+
name = Unicode::SequenceName.of(char) || Unicode::Name.readable(char)
|
39
39
|
```
|
40
40
|
|
41
41
|
## Also See
|
Binary file
|
@@ -5,14 +5,24 @@ module Unicode
|
|
5
5
|
def self.sequence_name(string)
|
6
6
|
codepoints = get_codepoint_values(string)
|
7
7
|
require_relative "sequence_name/index" unless defined? ::Unicode::SequenceName::INDEX
|
8
|
-
if res = INDEX[:SEQUENCES][codepoints]
|
9
|
-
res
|
8
|
+
if res = INDEX[:SEQUENCES][codepoints] || INDEX[:SEQUENCES_NOT_QUALIFIED][codepoints]
|
9
|
+
insert_words(res)
|
10
10
|
else
|
11
11
|
nil
|
12
12
|
end
|
13
13
|
end
|
14
14
|
class << self; alias of sequence_name; end
|
15
15
|
|
16
|
+
def self.fully_qualified(string)
|
17
|
+
codepoints = get_codepoint_values(string)
|
18
|
+
require_relative "sequence_name/index" unless defined? ::Unicode::SequenceName::INDEX
|
19
|
+
if res = INDEX[:SEQUENCES][codepoints]
|
20
|
+
insert_words(res)
|
21
|
+
else
|
22
|
+
nil
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
16
26
|
def self.get_codepoint_values(string)
|
17
27
|
if string.valid_encoding?
|
18
28
|
return string.codepoints
|
@@ -26,8 +36,20 @@ module Unicode
|
|
26
36
|
raise(ArgumentError, "Unicode::SequenceName.of must be given a valid string")
|
27
37
|
end
|
28
38
|
|
39
|
+
def self.insert_words(raw_name)
|
40
|
+
raw_name.chars.map{ |char|
|
41
|
+
codepoint = char.ord
|
42
|
+
if codepoint < INDEX[:REPLACE_BASE]
|
43
|
+
char
|
44
|
+
else
|
45
|
+
"#{INDEX[:COMMON_WORDS][codepoint - INDEX[:REPLACE_BASE]]} "
|
46
|
+
end
|
47
|
+
}.join.chomp
|
48
|
+
end
|
49
|
+
|
29
50
|
class << self
|
30
51
|
private :get_codepoint_values
|
52
|
+
private :insert_words
|
31
53
|
end
|
32
54
|
end
|
33
55
|
end
|
@@ -3,7 +3,7 @@ require "minitest/autorun"
|
|
3
3
|
|
4
4
|
describe Unicode::SequenceName do
|
5
5
|
describe ".sequence_name (alias .of)" do
|
6
|
-
it "will return name for that sequence
|
6
|
+
it "will return name for that sequence" do
|
7
7
|
assert_equal "DOUBLE EXCLAMATION MARK (text style)", Unicode::SequenceName.of("โผ๏ธ")
|
8
8
|
assert_equal "CJK COMPATIBILITY IDEOGRAPH-2F81F", Unicode::SequenceName.of("ใ๏ธ")
|
9
9
|
assert_equal "MYANMAR LETTER NGA (dotted form)", Unicode::SequenceName.of("แ๏ธ")
|
@@ -19,6 +19,13 @@ describe Unicode::SequenceName do
|
|
19
19
|
assert_equal "LEFT SINGLE QUOTATION MARK (right-justified fullwidth form)", Unicode::SequenceName.of("โ๏ธ") # Unicode 16.0
|
20
20
|
end
|
21
21
|
|
22
|
+
it "will return name for that sequence (not fully qualified: VS16 missing)" do
|
23
|
+
assert_equal "COUPLE WITH HEART: WOMAN, WOMAN, DARK SKIN TONE, MEDIUM SKIN TONE", Unicode::SequenceName.of("๐ฉ๐ฟโโคโ๐ฉ๐ฝ")
|
24
|
+
assert_equal "MAN JUDGE", Unicode::SequenceName.of("๐จโโ")
|
25
|
+
assert_equal "WOMAN BOUNCING BALL", Unicode::SequenceName.of("โนโโ๏ธ") # First VS16 missing
|
26
|
+
assert_equal "WOMAN BOUNCING BALL", Unicode::SequenceName.of("โน๏ธโโ") # Second VS16 missing
|
27
|
+
end
|
28
|
+
|
22
29
|
it "will return nil for characters without name" do
|
23
30
|
assert_nil Unicode::SequenceName.of("\u{10c50}")
|
24
31
|
assert_nil Unicode::SequenceName.of("bla")
|
@@ -28,5 +35,14 @@ describe Unicode::SequenceName do
|
|
28
35
|
assert_nil Unicode::SequenceName.of("โณ")
|
29
36
|
end
|
30
37
|
end
|
38
|
+
|
39
|
+
describe ".fully_qualified" do
|
40
|
+
it "will *not* return name for that sequence (not fully qualified: VS16 missing)" do
|
41
|
+
assert_nil Unicode::SequenceName.fully_qualified("๐ฉ๐ฟโโคโ๐ฉ๐ฝ")
|
42
|
+
assert_nil Unicode::SequenceName.fully_qualified("๐จโโ")
|
43
|
+
assert_nil Unicode::SequenceName.fully_qualified("โนโโ๏ธ") # First VS16 missing
|
44
|
+
assert_nil Unicode::SequenceName.fully_qualified("โน๏ธโโ") # Second VS16 missing
|
45
|
+
end
|
46
|
+
end
|
31
47
|
end
|
32
48
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-sequence_name
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.15.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-10-
|
11
|
+
date: 2024-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: "[Unicode 16.0.0][Emoji 16.0] Returns the name of a Unicode code point
|
14
14
|
sequence, if one exists"
|
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
requirements: []
|
56
|
-
rubygems_version: 3.5.
|
56
|
+
rubygems_version: 3.5.21
|
57
57
|
signing_key:
|
58
58
|
specification_version: 4
|
59
59
|
summary: Returns the name of a Unicode codepoint sequence, if one exists
|