unicode-emoji 3.7.0 โ 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rake_tasks +1 -0
- data/CHANGELOG.md +24 -1
- data/Gemfile.lock +3 -3
- data/README.md +111 -58
- data/Rakefile +6 -2
- data/data/emoji.marshal.gz +0 -0
- data/data/generate_constants.rb +123 -43
- data/lib/unicode/emoji/constants.rb +22 -2
- data/lib/unicode/emoji/generated/regex.rb +1 -1
- data/lib/unicode/emoji/generated/regex_basic.rb +1 -1
- data/lib/unicode/emoji/generated/regex_emoji_keycap.rb +8 -0
- data/lib/unicode/emoji/generated/regex_include_mqe.rb +8 -0
- data/lib/unicode/emoji/generated/regex_include_mqe_uqe.rb +8 -0
- data/lib/unicode/emoji/generated/regex_include_text.rb +1 -1
- data/lib/unicode/emoji/generated/regex_prop_component.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_emoji.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_modifier.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_modifier_base.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_presentation.rb +8 -0
- data/lib/unicode/emoji/generated/regex_text.rb +1 -1
- data/lib/unicode/emoji/generated/regex_valid.rb +1 -1
- data/lib/unicode/emoji/generated/regex_valid_include_text.rb +1 -1
- data/lib/unicode/emoji/generated/regex_well_formed.rb +1 -1
- data/lib/unicode/emoji/generated/regex_well_formed_include_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_basic.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_emoji_keycap.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_include_mqe.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_include_mqe_uqe.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_include_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_picto_no_emoji.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_prop_component.rb +8 -0
- data/lib/unicode/emoji/generated_native/{regex_any.rb โ regex_prop_emoji.rb} +1 -1
- data/lib/unicode/emoji/generated_native/regex_prop_modifier.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_prop_modifier_base.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_prop_presentation.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_valid.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_valid_include_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_well_formed.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_well_formed_include_text.rb +1 -1
- data/lib/unicode/emoji/lazy_constants.rb +36 -0
- data/lib/unicode/emoji/list.rb +3 -0
- data/lib/unicode/emoji.rb +39 -6
- data/spec/data/.keep +0 -0
- data/spec/data/emoji-test.txt +5331 -0
- data/spec/emoji_test_txt_spec.rb +181 -0
- data/spec/unicode_emoji_spec.rb +127 -14
- metadata +24 -4
- data/lib/unicode/emoji/generated/regex_any.rb +0 -8
@@ -0,0 +1,181 @@
|
|
1
|
+
require_relative "../lib/unicode/emoji"
|
2
|
+
require "minitest/autorun"
|
3
|
+
require "open-uri"
|
4
|
+
|
5
|
+
def iterate_emoji
|
6
|
+
EMOJI_TEST_FILE.scan(/^(?:# (?<sub>sub)?group: (?<group_name>.*)$)|(?:(?<codepoints>.+?)\s*; (?<qual_status>.+?)-?qualified )/) do
|
7
|
+
if $~[:codepoints]
|
8
|
+
yield $~[:codepoints].split.map{|e| e.to_i(16)}.pack("U*"), $~[:qual_status]
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "emoji-test.txt" do
|
14
|
+
EMOJI_TEST_FILE = begin
|
15
|
+
emoji_test_path = File.join(__dir__, "data/emoji-test.txt")
|
16
|
+
if File.exist? emoji_test_path
|
17
|
+
file = File.read(emoji_test_path)
|
18
|
+
else
|
19
|
+
puts "Downloading emoji-test.txt from the consortium"
|
20
|
+
URI.open "https://www.unicode.org/Public/emoji/#{Unicode::Emoji::EMOJI_VERSION}/emoji-test.txt" do |f|
|
21
|
+
file = f.read
|
22
|
+
File.write(File.join(__dir__, "data/emoji-test.txt"), @file)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
file
|
27
|
+
end
|
28
|
+
|
29
|
+
# qual_status:
|
30
|
+
# - fully - fully-qualified emoji sequences
|
31
|
+
# - minimally - minimallyq-ualified emoji sequences (some VS16 missing, but not first one)
|
32
|
+
# - un - unqualified emoji sequences (some VS16 missing)
|
33
|
+
|
34
|
+
describe "REGEX" do
|
35
|
+
describe "detects fully-qualified emoji" do
|
36
|
+
iterate_emoji do |emoji, qual_status|
|
37
|
+
it(emoji) do
|
38
|
+
if qual_status == "fully"
|
39
|
+
assert_equal emoji, emoji[Unicode::Emoji::REGEX]
|
40
|
+
else
|
41
|
+
refute_equal emoji, emoji[Unicode::Emoji::REGEX]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe "REGEX_INCLUDE_TEXT" do
|
49
|
+
describe "detects fully-qualified emoji and (unqualified) singleton text emoji" do
|
50
|
+
iterate_emoji do |emoji, qual_status|
|
51
|
+
it(emoji) do
|
52
|
+
if qual_status == "fully" || qual_status == "un" && emoji.size <= 2
|
53
|
+
assert_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_TEXT]
|
54
|
+
else
|
55
|
+
refute_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_TEXT]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe "REGEX_INCLUDE_MQE" do
|
63
|
+
describe "detects fully-qualified emoji and minimally-qualified emoji" do
|
64
|
+
iterate_emoji do |emoji, qual_status|
|
65
|
+
it(emoji) do
|
66
|
+
if qual_status == "fully" || qual_status == "minimally"
|
67
|
+
assert_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_MQE]
|
68
|
+
else
|
69
|
+
refute_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_MQE]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
describe "REGEX_INCLUDE_MQE_UQE" do
|
77
|
+
describe "detects all emoji" do
|
78
|
+
iterate_emoji do |emoji, qual_status|
|
79
|
+
it(emoji) do
|
80
|
+
assert_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_MQE_UQE]
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
describe "REGEX_VALID" do
|
87
|
+
describe "detects fully-qualified, minimally-qualified emoji, and unqualified emoji with ZWJ" do
|
88
|
+
iterate_emoji do |emoji, qual_status|
|
89
|
+
it(emoji) do
|
90
|
+
if qual_status == "fully" || qual_status == "minimally" || qual_status == "un" && emoji.size >= 3
|
91
|
+
assert_equal emoji, emoji[Unicode::Emoji::REGEX_VALID]
|
92
|
+
else
|
93
|
+
refute_equal emoji, emoji[Unicode::Emoji::REGEX_VALID]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
describe "REGEX_VALID_INCLUDE_TEXT" do
|
101
|
+
describe "detects all emoji" do
|
102
|
+
iterate_emoji do |emoji, qual_status|
|
103
|
+
it(emoji) do
|
104
|
+
assert_equal emoji, emoji[Unicode::Emoji::REGEX_VALID_INCLUDE_TEXT]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
describe "REGEX_WELL_FORMED" do
|
111
|
+
describe "detects fully-qualified, minimally-qualified emoji, and unqualified emoji with ZWJ" do
|
112
|
+
iterate_emoji do |emoji, qual_status|
|
113
|
+
it(emoji) do
|
114
|
+
if qual_status == "fully" || qual_status == "minimally" || qual_status == "un" && emoji.size >= 3
|
115
|
+
assert_equal emoji, emoji[Unicode::Emoji::REGEX_WELL_FORMED]
|
116
|
+
else
|
117
|
+
refute_equal emoji, emoji[Unicode::Emoji::REGEX_WELL_FORMED]
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
describe "REGEX_WELL_FORMED_INCLUDE_TEXT" do
|
125
|
+
describe "detects all emoji" do
|
126
|
+
iterate_emoji do |emoji, qual_status|
|
127
|
+
it(emoji) do
|
128
|
+
assert_equal emoji, emoji[Unicode::Emoji::REGEX_WELL_FORMED_INCLUDE_TEXT]
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
describe "REGEX_POSSIBLE" do
|
135
|
+
describe "detects all emoji, except unqualified keycap sequences" do
|
136
|
+
# fixing test not regex, since implementation of this regex should match the one in the standard
|
137
|
+
unqualified_keycaps = Unicode::Emoji::EMOJI_KEYCAPS.map{|keycap|
|
138
|
+
[keycap, Unicode::Emoji::EMOJI_KEYCAP_SUFFIX].pack("U*")
|
139
|
+
}
|
140
|
+
|
141
|
+
iterate_emoji do |emoji, qual_status|
|
142
|
+
it(emoji) do
|
143
|
+
if !unqualified_keycaps.include?(emoji)
|
144
|
+
assert_equal emoji, emoji[Unicode::Emoji::REGEX_POSSIBLE]
|
145
|
+
else
|
146
|
+
refute_equal emoji, emoji[Unicode::Emoji::REGEX_POSSIBLE]
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
describe "REGEX_TEXT" do
|
154
|
+
describe "detects (unqualified) singleton text emoji" do
|
155
|
+
iterate_emoji do |emoji, qual_status|
|
156
|
+
it(emoji) do
|
157
|
+
# if qual_status == "un" && emoji =~ /^.[\u{FE0E 20E3}]?$/
|
158
|
+
if qual_status == "un" && emoji.size <= 2
|
159
|
+
assert_equal emoji, emoji[Unicode::Emoji::REGEX_TEXT]
|
160
|
+
else
|
161
|
+
refute_equal emoji, emoji[Unicode::Emoji::REGEX_TEXT]
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
describe "REGEX_BASIC" do
|
169
|
+
describe "detects (fully-qualified) singleton emoji" do
|
170
|
+
iterate_emoji do |emoji, qual_status|
|
171
|
+
it(emoji) do
|
172
|
+
if qual_status == "fully" && emoji =~ /^.\u{FE0F}?$/
|
173
|
+
assert_equal emoji, emoji[Unicode::Emoji::REGEX_BASIC]
|
174
|
+
else
|
175
|
+
refute_equal emoji, emoji[Unicode::Emoji::REGEX_BASIC]
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
data/spec/unicode_emoji_spec.rb
CHANGED
@@ -3,12 +3,12 @@ require "minitest/autorun"
|
|
3
3
|
|
4
4
|
describe Unicode::Emoji do
|
5
5
|
describe ".properties" do
|
6
|
-
it "returns an Array
|
6
|
+
it "returns an Array of Emoji properties for given codepoint" do
|
7
7
|
assert_equal ["Emoji", "Emoji_Presentation", "Extended_Pictographic"], Unicode::Emoji.properties("๐ด")
|
8
8
|
assert_equal ["Emoji", "Extended_Pictographic"], Unicode::Emoji.properties("โ ")
|
9
9
|
end
|
10
10
|
|
11
|
-
it "returns nil
|
11
|
+
it "returns nil if codepoint has no Emoji prop" do
|
12
12
|
assert_nil Unicode::Emoji.properties("A")
|
13
13
|
end
|
14
14
|
end
|
@@ -39,8 +39,19 @@ describe Unicode::Emoji do
|
|
39
39
|
assert_equal "โถ\u{FE0F}", $&
|
40
40
|
end
|
41
41
|
|
42
|
-
it "
|
42
|
+
it "matches singleton skin tone modifiers and hair components" do
|
43
43
|
"๐ป light skin tone" =~ Unicode::Emoji::REGEX
|
44
|
+
assert_equal "๐ป", $&
|
45
|
+
|
46
|
+
"๐ฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX
|
47
|
+
assert_equal "๐ฆฐ", $&
|
48
|
+
end
|
49
|
+
|
50
|
+
it "does not match singleton components that are not skin tone modifiers or hair components" do
|
51
|
+
"1 digit one" =~ Unicode::Emoji::REGEX
|
52
|
+
assert_nil $&
|
53
|
+
|
54
|
+
"๐ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX
|
44
55
|
assert_nil $&
|
45
56
|
end
|
46
57
|
|
@@ -92,6 +103,16 @@ describe Unicode::Emoji do
|
|
92
103
|
assert_equal "๐คพ๐ฝโโ๏ธ", $&
|
93
104
|
end
|
94
105
|
|
106
|
+
it "does not match MQE zwj sequences" do
|
107
|
+
"๐คพ๐ฝโโ woman playing handball: medium skin tone, missing VS16" =~ Unicode::Emoji::REGEX
|
108
|
+
refute_equal "๐คพ๐ฝโโ", $&
|
109
|
+
end
|
110
|
+
|
111
|
+
it "does not match UQE emoji" do
|
112
|
+
"๐โโ๏ธ man golfing, missing VS16" =~ Unicode::Emoji::REGEX
|
113
|
+
refute_equal "๐โโ๏ธ", $&
|
114
|
+
end
|
115
|
+
|
95
116
|
it "does not match valid zwj sequences that are not recommended" do
|
96
117
|
"๐ค โ๐คข vomiting cowboy" =~ Unicode::Emoji::REGEX
|
97
118
|
assert_equal "๐ค ", $&
|
@@ -132,8 +153,6 @@ describe Unicode::Emoji do
|
|
132
153
|
assert_equal "๐ชพ", $&
|
133
154
|
end
|
134
155
|
|
135
|
-
|
136
|
-
|
137
156
|
# See gh#12 and https://github.com/matt17r/nw5k/commit/05a34d3c9211a23e5ae6853bb19fd2f224779ef4#diff-afb6f8bc3bae71b75743e00882a060863e2430cbe858ec9014e5956504dfc61cR2
|
138
157
|
it "matches family emoji correctly" do
|
139
158
|
["๐จโ๐ฉโ๐งโ๐ฆ", "๐จโ๐ฉโ๐ฆโ๐ฆ", "๐จโ๐ฉโ๐งโ๐ง", "๐จโ๐จโ๐งโ๐ฆ", "๐จโ๐จโ๐ฆโ๐ฆ", "๐จโ๐จโ๐งโ๐ง", "๐ฉโ๐ฉโ๐งโ๐ฆ", "๐ฉโ๐ฉโ๐ฆโ๐ฆ", "๐ฉโ๐ฉโ๐งโ๐ง", "๐จโ๐ฆโ๐ฆ", "๐จโ๐งโ๐ฆ", "๐จโ๐งโ๐ง", "๐ฉโ๐ฆโ๐ฆ", "๐ฉโ๐งโ๐ฆ", "๐ฉโ๐งโ๐ง"].each { |family|
|
@@ -142,6 +161,30 @@ describe Unicode::Emoji do
|
|
142
161
|
end
|
143
162
|
end
|
144
163
|
|
164
|
+
describe "REGEX_INCLUDE_MQE" do
|
165
|
+
it "matches MQE emoji" do
|
166
|
+
"๐คพ๐ฝโโ woman playing handball: medium skin tone, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE
|
167
|
+
assert_equal "๐คพ๐ฝโโ", $&
|
168
|
+
end
|
169
|
+
|
170
|
+
it "does not match UQE emoji" do
|
171
|
+
"๐โโ๏ธ man golfing, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE
|
172
|
+
refute_equal "๐โโ๏ธ", $&
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
describe "REGEX_INCLUDE_MQE_UQE" do
|
177
|
+
it "matches MQE emoji" do
|
178
|
+
"๐คพ๐ฝโโ woman playing handball: medium skin tone, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE_UQE
|
179
|
+
assert_equal "๐คพ๐ฝโโ", $&
|
180
|
+
end
|
181
|
+
|
182
|
+
it "matches UQE emoji" do
|
183
|
+
"๐โโ๏ธ man golfing, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE_UQE
|
184
|
+
assert_equal "๐โโ๏ธ", $&
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
145
188
|
describe "REGEX_VALID" do
|
146
189
|
it "matches most singleton emoji codepoints" do
|
147
190
|
"๐ด sleeping face" =~ Unicode::Emoji::REGEX_VALID
|
@@ -168,8 +211,19 @@ describe Unicode::Emoji do
|
|
168
211
|
assert_equal "โถ\u{FE0F}", $&
|
169
212
|
end
|
170
213
|
|
171
|
-
it "
|
214
|
+
it "matches singleton skin tone modifiers and hair components" do
|
172
215
|
"๐ป light skin tone" =~ Unicode::Emoji::REGEX_VALID
|
216
|
+
assert_equal "๐ป", $&
|
217
|
+
|
218
|
+
"๐ฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_VALID
|
219
|
+
assert_equal "๐ฆฐ", $&
|
220
|
+
end
|
221
|
+
|
222
|
+
it "does not match singleton components that are not skin tone modifiers or hair components" do
|
223
|
+
"1 digit one" =~ Unicode::Emoji::REGEX_VALID
|
224
|
+
assert_nil $&
|
225
|
+
|
226
|
+
"๐ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_VALID
|
173
227
|
assert_nil $&
|
174
228
|
end
|
175
229
|
|
@@ -263,8 +317,19 @@ describe Unicode::Emoji do
|
|
263
317
|
assert_equal "โถ\u{FE0F}", $&
|
264
318
|
end
|
265
319
|
|
266
|
-
it "
|
320
|
+
it "matches singleton skin tone modifiers and hair components" do
|
267
321
|
"๐ป light skin tone" =~ Unicode::Emoji::REGEX_WELL_FORMED
|
322
|
+
assert_equal "๐ป", $&
|
323
|
+
|
324
|
+
"๐ฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_WELL_FORMED
|
325
|
+
assert_equal "๐ฆฐ", $&
|
326
|
+
end
|
327
|
+
|
328
|
+
it "does not match singleton components that are not skin tone modifiers or hair components" do
|
329
|
+
"1 digit one" =~ Unicode::Emoji::REGEX_WELL_FORMED
|
330
|
+
assert_nil $&
|
331
|
+
|
332
|
+
"๐ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_WELL_FORMED
|
268
333
|
assert_nil $&
|
269
334
|
end
|
270
335
|
|
@@ -363,9 +428,20 @@ describe Unicode::Emoji do
|
|
363
428
|
assert_equal "โถ\u{FE0F}", $&
|
364
429
|
end
|
365
430
|
|
366
|
-
it "matches singleton
|
431
|
+
it "matches singleton skin tone modifiers and hair components" do
|
367
432
|
"๐ป light skin tone" =~ Unicode::Emoji::REGEX_POSSIBLE
|
368
433
|
assert_equal "๐ป", $&
|
434
|
+
|
435
|
+
"๐ฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_POSSIBLE
|
436
|
+
assert_equal "๐ฆฐ", $&
|
437
|
+
end
|
438
|
+
|
439
|
+
it "matches singleton components that are not skin tone modifiers or hair components" do
|
440
|
+
"1 digit one" =~ Unicode::Emoji::REGEX_POSSIBLE
|
441
|
+
assert_equal "1", $&
|
442
|
+
|
443
|
+
"๐ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_POSSIBLE
|
444
|
+
assert_equal "๐ฆ", $&
|
369
445
|
end
|
370
446
|
|
371
447
|
it "matches modified emoji if modifier base emoji is used" do
|
@@ -454,17 +530,28 @@ describe Unicode::Emoji do
|
|
454
530
|
end
|
455
531
|
|
456
532
|
it "does not match textual singleton emoji" do
|
457
|
-
"โถ play button" =~ Unicode::Emoji::
|
533
|
+
"โถ play button" =~ Unicode::Emoji::REGEX_BASIC
|
458
534
|
assert_nil $&
|
459
535
|
end
|
460
536
|
|
461
537
|
it "matches textual singleton emoji in combination with emoji variation selector" do
|
462
|
-
"โถ\u{FE0F} play button" =~ Unicode::Emoji::
|
538
|
+
"โถ\u{FE0F} play button" =~ Unicode::Emoji::REGEX_BASIC
|
463
539
|
assert_equal "โถ\u{FE0F}", $&
|
464
540
|
end
|
465
541
|
|
466
|
-
it "
|
542
|
+
it "matches singleton skin tone modifiers and hair components" do
|
467
543
|
"๐ป light skin tone" =~ Unicode::Emoji::REGEX_BASIC
|
544
|
+
assert_equal "๐ป", $&
|
545
|
+
|
546
|
+
"๐ฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_BASIC
|
547
|
+
assert_equal "๐ฆฐ", $&
|
548
|
+
end
|
549
|
+
|
550
|
+
it "does not match singleton components that are not skin tone modifiers or hair components" do
|
551
|
+
"1 digit one" =~ Unicode::Emoji::REGEX_BASIC
|
552
|
+
assert_nil $&
|
553
|
+
|
554
|
+
"๐ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_BASIC
|
468
555
|
assert_nil $&
|
469
556
|
end
|
470
557
|
|
@@ -525,9 +612,20 @@ describe Unicode::Emoji do
|
|
525
612
|
assert_nil $&
|
526
613
|
end
|
527
614
|
|
528
|
-
it "does not match singleton
|
615
|
+
it "does not match singleton skin tone modifiers and hair components" do
|
529
616
|
"๐ป light skin tone" =~ Unicode::Emoji::REGEX_TEXT
|
530
617
|
assert_nil $&
|
618
|
+
|
619
|
+
"๐ฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_TEXT
|
620
|
+
assert_nil $&
|
621
|
+
end
|
622
|
+
|
623
|
+
it "does not match singleton components that are not skin tone modifiers or hair components" do
|
624
|
+
"1 digit one" =~ Unicode::Emoji::REGEX_TEXT
|
625
|
+
assert_nil $&
|
626
|
+
|
627
|
+
"๐ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_TEXT
|
628
|
+
assert_nil $&
|
531
629
|
end
|
532
630
|
|
533
631
|
it "does not match modified emoji" do
|
@@ -564,13 +662,28 @@ describe Unicode::Emoji do
|
|
564
662
|
end
|
565
663
|
end
|
566
664
|
|
567
|
-
describe "
|
665
|
+
describe "REGEX_PROP_EMOJI" do
|
568
666
|
it "returns any emoji-related codepoint (but no variation selectors or tags)" do
|
569
|
-
matches = "1 string ๐ด\u{FE0F} sleeping face with ๐ต and modifier ๐พ, also ๐ด๓ ง๓ ข๓ ณ๓ ฃ๓ ด๓ ฟ Scotland".scan(Unicode::Emoji::
|
667
|
+
matches = "1 string ๐ด\u{FE0F} sleeping face with ๐ต and modifier ๐พ, also ๐ด๓ ง๓ ข๓ ณ๓ ฃ๓ ด๓ ฟ Scotland".scan(Unicode::Emoji::REGEX_PROP_EMOJI)
|
570
668
|
assert_equal ["1", "๐ด", "๐ต", "๐พ", "๐ด"], matches
|
571
669
|
end
|
572
670
|
end
|
573
671
|
|
672
|
+
describe "REGEX_EMOJI_KEYCAP" do
|
673
|
+
it "matches emoji keycap sequences" do
|
674
|
+
"2๏ธโฃ keycap: 2" =~ Unicode::Emoji::REGEX_EMOJI_KEYCAP
|
675
|
+
assert_equal "2๏ธโฃ", $&
|
676
|
+
end
|
677
|
+
|
678
|
+
it "does not match non-emoji keycap sequences" do
|
679
|
+
"8โฃ text keycap: 8" =~ Unicode::Emoji::REGEX_EMOJI_KEYCAP
|
680
|
+
assert_nil $&
|
681
|
+
|
682
|
+
"#โฃ text keycap: #" =~ Unicode::Emoji::REGEX_EMOJI_KEYCAP
|
683
|
+
assert_nil $&
|
684
|
+
end
|
685
|
+
end
|
686
|
+
|
574
687
|
describe "REGEX_PICTO" do
|
575
688
|
it "matches codepoints with Extended_Pictograph property (almost all emoji are, but also others)" do
|
576
689
|
matches = "U+1F32D ๐ญ HOT DOG, U+203C โผ DOUBLE EXCLAMATION MARK, U+26E8 โจ BLACK CROSS ON SHIELD".scan(Unicode::Emoji::REGEX_PICTO)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-emoji
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-11-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unicode-version
|
@@ -47,24 +47,38 @@ files:
|
|
47
47
|
- lib/unicode/emoji.rb
|
48
48
|
- lib/unicode/emoji/constants.rb
|
49
49
|
- lib/unicode/emoji/generated/regex.rb
|
50
|
-
- lib/unicode/emoji/generated/regex_any.rb
|
51
50
|
- lib/unicode/emoji/generated/regex_basic.rb
|
51
|
+
- lib/unicode/emoji/generated/regex_emoji_keycap.rb
|
52
|
+
- lib/unicode/emoji/generated/regex_include_mqe.rb
|
53
|
+
- lib/unicode/emoji/generated/regex_include_mqe_uqe.rb
|
52
54
|
- lib/unicode/emoji/generated/regex_include_text.rb
|
53
55
|
- lib/unicode/emoji/generated/regex_picto.rb
|
54
56
|
- lib/unicode/emoji/generated/regex_picto_no_emoji.rb
|
55
57
|
- lib/unicode/emoji/generated/regex_possible.rb
|
58
|
+
- lib/unicode/emoji/generated/regex_prop_component.rb
|
59
|
+
- lib/unicode/emoji/generated/regex_prop_emoji.rb
|
60
|
+
- lib/unicode/emoji/generated/regex_prop_modifier.rb
|
61
|
+
- lib/unicode/emoji/generated/regex_prop_modifier_base.rb
|
62
|
+
- lib/unicode/emoji/generated/regex_prop_presentation.rb
|
56
63
|
- lib/unicode/emoji/generated/regex_text.rb
|
57
64
|
- lib/unicode/emoji/generated/regex_valid.rb
|
58
65
|
- lib/unicode/emoji/generated/regex_valid_include_text.rb
|
59
66
|
- lib/unicode/emoji/generated/regex_well_formed.rb
|
60
67
|
- lib/unicode/emoji/generated/regex_well_formed_include_text.rb
|
61
68
|
- lib/unicode/emoji/generated_native/regex.rb
|
62
|
-
- lib/unicode/emoji/generated_native/regex_any.rb
|
63
69
|
- lib/unicode/emoji/generated_native/regex_basic.rb
|
70
|
+
- lib/unicode/emoji/generated_native/regex_emoji_keycap.rb
|
71
|
+
- lib/unicode/emoji/generated_native/regex_include_mqe.rb
|
72
|
+
- lib/unicode/emoji/generated_native/regex_include_mqe_uqe.rb
|
64
73
|
- lib/unicode/emoji/generated_native/regex_include_text.rb
|
65
74
|
- lib/unicode/emoji/generated_native/regex_picto.rb
|
66
75
|
- lib/unicode/emoji/generated_native/regex_picto_no_emoji.rb
|
67
76
|
- lib/unicode/emoji/generated_native/regex_possible.rb
|
77
|
+
- lib/unicode/emoji/generated_native/regex_prop_component.rb
|
78
|
+
- lib/unicode/emoji/generated_native/regex_prop_emoji.rb
|
79
|
+
- lib/unicode/emoji/generated_native/regex_prop_modifier.rb
|
80
|
+
- lib/unicode/emoji/generated_native/regex_prop_modifier_base.rb
|
81
|
+
- lib/unicode/emoji/generated_native/regex_prop_presentation.rb
|
68
82
|
- lib/unicode/emoji/generated_native/regex_text.rb
|
69
83
|
- lib/unicode/emoji/generated_native/regex_valid.rb
|
70
84
|
- lib/unicode/emoji/generated_native/regex_valid_include_text.rb
|
@@ -73,6 +87,9 @@ files:
|
|
73
87
|
- lib/unicode/emoji/index.rb
|
74
88
|
- lib/unicode/emoji/lazy_constants.rb
|
75
89
|
- lib/unicode/emoji/list.rb
|
90
|
+
- spec/data/.keep
|
91
|
+
- spec/data/emoji-test.txt
|
92
|
+
- spec/emoji_test_txt_spec.rb
|
76
93
|
- spec/unicode_emoji_spec.rb
|
77
94
|
- unicode-emoji.gemspec
|
78
95
|
homepage: https://github.com/janlelis/unicode-emoji
|
@@ -103,4 +120,7 @@ signing_key:
|
|
103
120
|
specification_version: 4
|
104
121
|
summary: Emoji data and regex
|
105
122
|
test_files:
|
123
|
+
- spec/data/.keep
|
124
|
+
- spec/data/emoji-test.txt
|
125
|
+
- spec/emoji_test_txt_spec.rb
|
106
126
|
- spec/unicode_emoji_spec.rb
|
@@ -1,8 +0,0 @@
|
|
1
|
-
# This file was generated by a script, please do not edit it by hand.
|
2
|
-
# See `$ rake generate_constants` and data/generate_constants.rb for more info.
|
3
|
-
|
4
|
-
module Unicode
|
5
|
-
module Emoji
|
6
|
-
REGEX_ANY = /[\#\*0-9ยฉยฎโผโโขโนโ-โโฉโชโโโจโโฉ-โณโธ-โบโโชโซโถโโป-โพโ-โโโโโโโโ โขโฃโฆโชโฎโฏโธ-โบโโโ-โโโ โฃโฅโฆโจโปโพโฟโ-โโโโโ โกโงโชโซโฐโฑโฝโพโโ
โโโโโโโฉโชโฐ-โตโท-โบโฝโโ
โ-โโโโโโโกโจโณโดโโโโโ-โโโฃโคโ-โโกโฐโฟโคดโคตโฌ
-โฌโฌโฌโญโญใฐใฝใใ๐๐๐
ฐ๐
ฑ๐
พ๐
ฟ๐๐-๐๐ฆ-๐ฟ๐๐๐๐ฏ๐ฒ-๐บ๐๐๐-๐ก๐ค-๐๐๐๐-๐๐-๐ฐ๐ณ-๐ต๐ท-๐ฝ๐ฟ-๐ฝ๐-๐๐-๐ง๐ฏ๐ฐ๐ณ-๐บ๐๐-๐๐๐๐๐ค๐ฅ๐จ๐ฑ๐ฒ๐ผ๐-๐๐-๐๐-๐๐ก๐ฃ๐จ๐ฏ๐ณ๐บ-๐๐-๐
๐-๐๐-๐๐-๐ฅ๐ฉ๐ซ๐ฌ๐ฐ๐ณ-๐ผ๐ -๐ซ๐ฐ๐ค-๐คบ๐คผ-๐ฅ
๐ฅ-๐งฟ๐ฉฐ-๐ฉผ๐ช-๐ช๐ช-๐ซ๐ซ-๐ซ๐ซ-๐ซฉ๐ซฐ-๐ซธ]/
|
7
|
-
end
|
8
|
-
end
|