unicode-sequence_name 1.14.1 โ 1.15.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Gemfile.lock +1 -1
- data/README.md +3 -3
- data/data/sequence_name.marshal.gz +0 -0
- data/lib/unicode/sequence_name/constants.rb +1 -1
- data/lib/unicode/sequence_name.rb +24 -2
- data/spec/unicode_sequence_name_spec.rb +17 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9027ebad3f5176ac6e6e554460da72a6fc2edcefa99e73027df557693f197d3a
|
4
|
+
data.tar.gz: 34ea77e121ff2ed7c88e94da43bfe78ad313b52fba516255459de01ba7027d5e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 537a1b631841da656b8bd8a1445791383271f6f7a2fff62c9d413e0db1780d8af1d67f98c7793ec797999985ed6d2b694741c0e101ec14899a37f5e010864fe9
|
7
|
+
data.tar.gz: f4f0943434c71e329bfa4b7c9551804ab46dc1547a629bde186c95c0aeb2087661232bc74d067dac50a03ce6d12411d1753b582c664dd76d3b547fc89d1f12d5
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
+
### 1.15.1
|
4
|
+
|
5
|
+
- Optimize index size by substituting common words
|
6
|
+
|
7
|
+
### 1.15.0
|
8
|
+
|
9
|
+
- Include Emoji sequences which are not fully qualified (VS16 is missing) in index
|
10
|
+
- You can use the newly introduced method `Unicode::SequenceName.fully_qualified`
|
11
|
+
if you want to exclude non-fully qualified sequences
|
12
|
+
|
3
13
|
### 1.14.1
|
4
14
|
|
5
15
|
- Fix bug that some singleton Emoji would be included in index
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -10,7 +10,7 @@ IVD version: **2022-09-13** (September 2022)
|
|
10
10
|
|
11
11
|
Supported Rubies: **3.3**, **3.2**, **3.1**, **3.0**
|
12
12
|
|
13
|
-
Old Rubies which might still work: **2.
|
13
|
+
Old Rubies which might still work: **2.X**
|
14
14
|
|
15
15
|
## Usage
|
16
16
|
|
@@ -32,10 +32,10 @@ Unicode::SequenceName.of "๐โโ๏ธ" # => "HEAD SHAKING HORIZONTALLY"
|
|
32
32
|
Unicode::SequenceName.of "โ๏ธ" # => "LEFT SINGLE QUOTATION MARK (right-justified fullwidth form)"
|
33
33
|
```
|
34
34
|
|
35
|
-
Names for singular codepoints are not included, you can use [unicode-name](https://github.com/janlelis/unicode-name) for that purpose. This is how you could use both libraries together to get the most relevant name of a character:
|
35
|
+
Names for singular codepoints are not included, but you can use [unicode-name](https://github.com/janlelis/unicode-name) for that purpose. This is how you could use both libraries together to get the most relevant name of a character:
|
36
36
|
|
37
37
|
```ruby
|
38
|
-
name = Unicode::
|
38
|
+
name = Unicode::SequenceName.of(char) || Unicode::Name.readable(char)
|
39
39
|
```
|
40
40
|
|
41
41
|
## Also See
|
Binary file
|
@@ -5,14 +5,24 @@ module Unicode
|
|
5
5
|
def self.sequence_name(string)
|
6
6
|
codepoints = get_codepoint_values(string)
|
7
7
|
require_relative "sequence_name/index" unless defined? ::Unicode::SequenceName::INDEX
|
8
|
-
if res = INDEX[:SEQUENCES][codepoints]
|
9
|
-
res
|
8
|
+
if res = INDEX[:SEQUENCES][codepoints] || INDEX[:SEQUENCES_NOT_QUALIFIED][codepoints]
|
9
|
+
insert_words(res)
|
10
10
|
else
|
11
11
|
nil
|
12
12
|
end
|
13
13
|
end
|
14
14
|
class << self; alias of sequence_name; end
|
15
15
|
|
16
|
+
def self.fully_qualified(string)
|
17
|
+
codepoints = get_codepoint_values(string)
|
18
|
+
require_relative "sequence_name/index" unless defined? ::Unicode::SequenceName::INDEX
|
19
|
+
if res = INDEX[:SEQUENCES][codepoints]
|
20
|
+
insert_words(res)
|
21
|
+
else
|
22
|
+
nil
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
16
26
|
def self.get_codepoint_values(string)
|
17
27
|
if string.valid_encoding?
|
18
28
|
return string.codepoints
|
@@ -26,8 +36,20 @@ module Unicode
|
|
26
36
|
raise(ArgumentError, "Unicode::SequenceName.of must be given a valid string")
|
27
37
|
end
|
28
38
|
|
39
|
+
def self.insert_words(raw_name)
|
40
|
+
raw_name.chars.map{ |char|
|
41
|
+
codepoint = char.ord
|
42
|
+
if codepoint < INDEX[:REPLACE_BASE]
|
43
|
+
char
|
44
|
+
else
|
45
|
+
"#{INDEX[:COMMON_WORDS][codepoint - INDEX[:REPLACE_BASE]]} "
|
46
|
+
end
|
47
|
+
}.join.chomp
|
48
|
+
end
|
49
|
+
|
29
50
|
class << self
|
30
51
|
private :get_codepoint_values
|
52
|
+
private :insert_words
|
31
53
|
end
|
32
54
|
end
|
33
55
|
end
|
@@ -3,7 +3,7 @@ require "minitest/autorun"
|
|
3
3
|
|
4
4
|
describe Unicode::SequenceName do
|
5
5
|
describe ".sequence_name (alias .of)" do
|
6
|
-
it "will return name for that sequence
|
6
|
+
it "will return name for that sequence" do
|
7
7
|
assert_equal "DOUBLE EXCLAMATION MARK (text style)", Unicode::SequenceName.of("โผ๏ธ")
|
8
8
|
assert_equal "CJK COMPATIBILITY IDEOGRAPH-2F81F", Unicode::SequenceName.of("ใ๏ธ")
|
9
9
|
assert_equal "MYANMAR LETTER NGA (dotted form)", Unicode::SequenceName.of("แ๏ธ")
|
@@ -19,6 +19,13 @@ describe Unicode::SequenceName do
|
|
19
19
|
assert_equal "LEFT SINGLE QUOTATION MARK (right-justified fullwidth form)", Unicode::SequenceName.of("โ๏ธ") # Unicode 16.0
|
20
20
|
end
|
21
21
|
|
22
|
+
it "will return name for that sequence (not fully qualified: VS16 missing)" do
|
23
|
+
assert_equal "COUPLE WITH HEART: WOMAN, WOMAN, DARK SKIN TONE, MEDIUM SKIN TONE", Unicode::SequenceName.of("๐ฉ๐ฟโโคโ๐ฉ๐ฝ")
|
24
|
+
assert_equal "MAN JUDGE", Unicode::SequenceName.of("๐จโโ")
|
25
|
+
assert_equal "WOMAN BOUNCING BALL", Unicode::SequenceName.of("โนโโ๏ธ") # First VS16 missing
|
26
|
+
assert_equal "WOMAN BOUNCING BALL", Unicode::SequenceName.of("โน๏ธโโ") # Second VS16 missing
|
27
|
+
end
|
28
|
+
|
22
29
|
it "will return nil for characters without name" do
|
23
30
|
assert_nil Unicode::SequenceName.of("\u{10c50}")
|
24
31
|
assert_nil Unicode::SequenceName.of("bla")
|
@@ -28,5 +35,14 @@ describe Unicode::SequenceName do
|
|
28
35
|
assert_nil Unicode::SequenceName.of("โณ")
|
29
36
|
end
|
30
37
|
end
|
38
|
+
|
39
|
+
describe ".fully_qualified" do
|
40
|
+
it "will *not* return name for that sequence (not fully qualified: VS16 missing)" do
|
41
|
+
assert_nil Unicode::SequenceName.fully_qualified("๐ฉ๐ฟโโคโ๐ฉ๐ฝ")
|
42
|
+
assert_nil Unicode::SequenceName.fully_qualified("๐จโโ")
|
43
|
+
assert_nil Unicode::SequenceName.fully_qualified("โนโโ๏ธ") # First VS16 missing
|
44
|
+
assert_nil Unicode::SequenceName.fully_qualified("โน๏ธโโ") # Second VS16 missing
|
45
|
+
end
|
46
|
+
end
|
31
47
|
end
|
32
48
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-sequence_name
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.15.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-10-
|
11
|
+
date: 2024-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: "[Unicode 16.0.0][Emoji 16.0] Returns the name of a Unicode code point
|
14
14
|
sequence, if one exists"
|
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
requirements: []
|
56
|
-
rubygems_version: 3.5.
|
56
|
+
rubygems_version: 3.5.21
|
57
57
|
signing_key:
|
58
58
|
specification_version: 4
|
59
59
|
summary: Returns the name of a Unicode codepoint sequence, if one exists
|