unicode-emoji 3.7.0 โ†’ 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rake_tasks +1 -0
  4. data/CHANGELOG.md +24 -1
  5. data/Gemfile.lock +3 -3
  6. data/README.md +111 -58
  7. data/Rakefile +6 -2
  8. data/data/emoji.marshal.gz +0 -0
  9. data/data/generate_constants.rb +123 -43
  10. data/lib/unicode/emoji/constants.rb +22 -2
  11. data/lib/unicode/emoji/generated/regex.rb +1 -1
  12. data/lib/unicode/emoji/generated/regex_basic.rb +1 -1
  13. data/lib/unicode/emoji/generated/regex_emoji_keycap.rb +8 -0
  14. data/lib/unicode/emoji/generated/regex_include_mqe.rb +8 -0
  15. data/lib/unicode/emoji/generated/regex_include_mqe_uqe.rb +8 -0
  16. data/lib/unicode/emoji/generated/regex_include_text.rb +1 -1
  17. data/lib/unicode/emoji/generated/regex_prop_component.rb +8 -0
  18. data/lib/unicode/emoji/generated/regex_prop_emoji.rb +8 -0
  19. data/lib/unicode/emoji/generated/regex_prop_modifier.rb +8 -0
  20. data/lib/unicode/emoji/generated/regex_prop_modifier_base.rb +8 -0
  21. data/lib/unicode/emoji/generated/regex_prop_presentation.rb +8 -0
  22. data/lib/unicode/emoji/generated/regex_text.rb +1 -1
  23. data/lib/unicode/emoji/generated/regex_valid.rb +1 -1
  24. data/lib/unicode/emoji/generated/regex_valid_include_text.rb +1 -1
  25. data/lib/unicode/emoji/generated/regex_well_formed.rb +1 -1
  26. data/lib/unicode/emoji/generated/regex_well_formed_include_text.rb +1 -1
  27. data/lib/unicode/emoji/generated_native/regex.rb +1 -1
  28. data/lib/unicode/emoji/generated_native/regex_basic.rb +1 -1
  29. data/lib/unicode/emoji/generated_native/regex_emoji_keycap.rb +8 -0
  30. data/lib/unicode/emoji/generated_native/regex_include_mqe.rb +8 -0
  31. data/lib/unicode/emoji/generated_native/regex_include_mqe_uqe.rb +8 -0
  32. data/lib/unicode/emoji/generated_native/regex_include_text.rb +1 -1
  33. data/lib/unicode/emoji/generated_native/regex_picto_no_emoji.rb +1 -1
  34. data/lib/unicode/emoji/generated_native/regex_prop_component.rb +8 -0
  35. data/lib/unicode/emoji/generated_native/{regex_any.rb โ†’ regex_prop_emoji.rb} +1 -1
  36. data/lib/unicode/emoji/generated_native/regex_prop_modifier.rb +8 -0
  37. data/lib/unicode/emoji/generated_native/regex_prop_modifier_base.rb +8 -0
  38. data/lib/unicode/emoji/generated_native/regex_prop_presentation.rb +8 -0
  39. data/lib/unicode/emoji/generated_native/regex_text.rb +1 -1
  40. data/lib/unicode/emoji/generated_native/regex_valid.rb +1 -1
  41. data/lib/unicode/emoji/generated_native/regex_valid_include_text.rb +1 -1
  42. data/lib/unicode/emoji/generated_native/regex_well_formed.rb +1 -1
  43. data/lib/unicode/emoji/generated_native/regex_well_formed_include_text.rb +1 -1
  44. data/lib/unicode/emoji/lazy_constants.rb +36 -0
  45. data/lib/unicode/emoji/list.rb +3 -0
  46. data/lib/unicode/emoji.rb +39 -6
  47. data/spec/data/.keep +0 -0
  48. data/spec/data/emoji-test.txt +5331 -0
  49. data/spec/emoji_test_txt_spec.rb +181 -0
  50. data/spec/unicode_emoji_spec.rb +127 -14
  51. metadata +24 -4
  52. data/lib/unicode/emoji/generated/regex_any.rb +0 -8
@@ -0,0 +1,181 @@
1
+ require_relative "../lib/unicode/emoji"
2
+ require "minitest/autorun"
3
+ require "open-uri"
4
+
5
+ def iterate_emoji
6
+ EMOJI_TEST_FILE.scan(/^(?:# (?<sub>sub)?group: (?<group_name>.*)$)|(?:(?<codepoints>.+?)\s*; (?<qual_status>.+?)-?qualified )/) do
7
+ if $~[:codepoints]
8
+ yield $~[:codepoints].split.map{|e| e.to_i(16)}.pack("U*"), $~[:qual_status]
9
+ end
10
+ end
11
+ end
12
+
13
+ describe "emoji-test.txt" do
14
+ EMOJI_TEST_FILE = begin
15
+ emoji_test_path = File.join(__dir__, "data/emoji-test.txt")
16
+ if File.exist? emoji_test_path
17
+ file = File.read(emoji_test_path)
18
+ else
19
+ puts "Downloading emoji-test.txt from the consortium"
20
+ URI.open "https://www.unicode.org/Public/emoji/#{Unicode::Emoji::EMOJI_VERSION}/emoji-test.txt" do |f|
21
+ file = f.read
22
+ File.write(File.join(__dir__, "data/emoji-test.txt"), @file)
23
+ end
24
+ end
25
+
26
+ file
27
+ end
28
+
29
+ # qual_status:
30
+ # - fully - fully-qualified emoji sequences
31
+ # - minimally - minimallyq-ualified emoji sequences (some VS16 missing, but not first one)
32
+ # - un - unqualified emoji sequences (some VS16 missing)
33
+
34
+ describe "REGEX" do
35
+ describe "detects fully-qualified emoji" do
36
+ iterate_emoji do |emoji, qual_status|
37
+ it(emoji) do
38
+ if qual_status == "fully"
39
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX]
40
+ else
41
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX]
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+
48
+ describe "REGEX_INCLUDE_TEXT" do
49
+ describe "detects fully-qualified emoji and (unqualified) singleton text emoji" do
50
+ iterate_emoji do |emoji, qual_status|
51
+ it(emoji) do
52
+ if qual_status == "fully" || qual_status == "un" && emoji.size <= 2
53
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_TEXT]
54
+ else
55
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_TEXT]
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+ describe "REGEX_INCLUDE_MQE" do
63
+ describe "detects fully-qualified emoji and minimally-qualified emoji" do
64
+ iterate_emoji do |emoji, qual_status|
65
+ it(emoji) do
66
+ if qual_status == "fully" || qual_status == "minimally"
67
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_MQE]
68
+ else
69
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_MQE]
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+
76
+ describe "REGEX_INCLUDE_MQE_UQE" do
77
+ describe "detects all emoji" do
78
+ iterate_emoji do |emoji, qual_status|
79
+ it(emoji) do
80
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_MQE_UQE]
81
+ end
82
+ end
83
+ end
84
+ end
85
+
86
+ describe "REGEX_VALID" do
87
+ describe "detects fully-qualified, minimally-qualified emoji, and unqualified emoji with ZWJ" do
88
+ iterate_emoji do |emoji, qual_status|
89
+ it(emoji) do
90
+ if qual_status == "fully" || qual_status == "minimally" || qual_status == "un" && emoji.size >= 3
91
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_VALID]
92
+ else
93
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_VALID]
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+
100
+ describe "REGEX_VALID_INCLUDE_TEXT" do
101
+ describe "detects all emoji" do
102
+ iterate_emoji do |emoji, qual_status|
103
+ it(emoji) do
104
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_VALID_INCLUDE_TEXT]
105
+ end
106
+ end
107
+ end
108
+ end
109
+
110
+ describe "REGEX_WELL_FORMED" do
111
+ describe "detects fully-qualified, minimally-qualified emoji, and unqualified emoji with ZWJ" do
112
+ iterate_emoji do |emoji, qual_status|
113
+ it(emoji) do
114
+ if qual_status == "fully" || qual_status == "minimally" || qual_status == "un" && emoji.size >= 3
115
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_WELL_FORMED]
116
+ else
117
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_WELL_FORMED]
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
123
+
124
+ describe "REGEX_WELL_FORMED_INCLUDE_TEXT" do
125
+ describe "detects all emoji" do
126
+ iterate_emoji do |emoji, qual_status|
127
+ it(emoji) do
128
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_WELL_FORMED_INCLUDE_TEXT]
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ describe "REGEX_POSSIBLE" do
135
+ describe "detects all emoji, except unqualified keycap sequences" do
136
+ # fixing test not regex, since implementation of this regex should match the one in the standard
137
+ unqualified_keycaps = Unicode::Emoji::EMOJI_KEYCAPS.map{|keycap|
138
+ [keycap, Unicode::Emoji::EMOJI_KEYCAP_SUFFIX].pack("U*")
139
+ }
140
+
141
+ iterate_emoji do |emoji, qual_status|
142
+ it(emoji) do
143
+ if !unqualified_keycaps.include?(emoji)
144
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_POSSIBLE]
145
+ else
146
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_POSSIBLE]
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
152
+
153
+ describe "REGEX_TEXT" do
154
+ describe "detects (unqualified) singleton text emoji" do
155
+ iterate_emoji do |emoji, qual_status|
156
+ it(emoji) do
157
+ # if qual_status == "un" && emoji =~ /^.[\u{FE0E 20E3}]?$/
158
+ if qual_status == "un" && emoji.size <= 2
159
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_TEXT]
160
+ else
161
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_TEXT]
162
+ end
163
+ end
164
+ end
165
+ end
166
+ end
167
+
168
+ describe "REGEX_BASIC" do
169
+ describe "detects (fully-qualified) singleton emoji" do
170
+ iterate_emoji do |emoji, qual_status|
171
+ it(emoji) do
172
+ if qual_status == "fully" && emoji =~ /^.\u{FE0F}?$/
173
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_BASIC]
174
+ else
175
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_BASIC]
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end
181
+ end
@@ -3,12 +3,12 @@ require "minitest/autorun"
3
3
 
4
4
  describe Unicode::Emoji do
5
5
  describe ".properties" do
6
- it "returns an Array for Emoji properties if has codepoints" do
6
+ it "returns an Array of Emoji properties for given codepoint" do
7
7
  assert_equal ["Emoji", "Emoji_Presentation", "Extended_Pictographic"], Unicode::Emoji.properties("๐Ÿ˜ด")
8
8
  assert_equal ["Emoji", "Extended_Pictographic"], Unicode::Emoji.properties("โ™ ")
9
9
  end
10
10
 
11
- it "returns nil for Emoji properties if has no codepoints" do
11
+ it "returns nil if codepoint has no Emoji prop" do
12
12
  assert_nil Unicode::Emoji.properties("A")
13
13
  end
14
14
  end
@@ -39,8 +39,19 @@ describe Unicode::Emoji do
39
39
  assert_equal "โ–ถ\u{FE0F}", $&
40
40
  end
41
41
 
42
- it "does not match singleton 'component' emoji codepoints" do
42
+ it "matches singleton skin tone modifiers and hair components" do
43
43
  "๐Ÿป light skin tone" =~ Unicode::Emoji::REGEX
44
+ assert_equal "๐Ÿป", $&
45
+
46
+ "๐Ÿฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX
47
+ assert_equal "๐Ÿฆฐ", $&
48
+ end
49
+
50
+ it "does not match singleton components that are not skin tone modifiers or hair components" do
51
+ "1 digit one" =~ Unicode::Emoji::REGEX
52
+ assert_nil $&
53
+
54
+ "๐Ÿ‡ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX
44
55
  assert_nil $&
45
56
  end
46
57
 
@@ -92,6 +103,16 @@ describe Unicode::Emoji do
92
103
  assert_equal "๐Ÿคพ๐Ÿฝโ€โ™€๏ธ", $&
93
104
  end
94
105
 
106
+ it "does not match MQE zwj sequences" do
107
+ "๐Ÿคพ๐Ÿฝโ€โ™€ woman playing handball: medium skin tone, missing VS16" =~ Unicode::Emoji::REGEX
108
+ refute_equal "๐Ÿคพ๐Ÿฝโ€โ™€", $&
109
+ end
110
+
111
+ it "does not match UQE emoji" do
112
+ "๐ŸŒโ€โ™‚๏ธ man golfing, missing VS16" =~ Unicode::Emoji::REGEX
113
+ refute_equal "๐ŸŒโ€โ™‚๏ธ", $&
114
+ end
115
+
95
116
  it "does not match valid zwj sequences that are not recommended" do
96
117
  "๐Ÿค โ€๐Ÿคข vomiting cowboy" =~ Unicode::Emoji::REGEX
97
118
  assert_equal "๐Ÿค ", $&
@@ -132,8 +153,6 @@ describe Unicode::Emoji do
132
153
  assert_equal "๐Ÿชพ", $&
133
154
  end
134
155
 
135
-
136
-
137
156
  # See gh#12 and https://github.com/matt17r/nw5k/commit/05a34d3c9211a23e5ae6853bb19fd2f224779ef4#diff-afb6f8bc3bae71b75743e00882a060863e2430cbe858ec9014e5956504dfc61cR2
138
157
  it "matches family emoji correctly" do
139
158
  ["๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง", "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ง", "๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง", "๐Ÿ‘จโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ง", "๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง"].each { |family|
@@ -142,6 +161,30 @@ describe Unicode::Emoji do
142
161
  end
143
162
  end
144
163
 
164
+ describe "REGEX_INCLUDE_MQE" do
165
+ it "matches MQE emoji" do
166
+ "๐Ÿคพ๐Ÿฝโ€โ™€ woman playing handball: medium skin tone, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE
167
+ assert_equal "๐Ÿคพ๐Ÿฝโ€โ™€", $&
168
+ end
169
+
170
+ it "does not match UQE emoji" do
171
+ "๐ŸŒโ€โ™‚๏ธ man golfing, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE
172
+ refute_equal "๐ŸŒโ€โ™‚๏ธ", $&
173
+ end
174
+ end
175
+
176
+ describe "REGEX_INCLUDE_MQE_UQE" do
177
+ it "matches MQE emoji" do
178
+ "๐Ÿคพ๐Ÿฝโ€โ™€ woman playing handball: medium skin tone, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE_UQE
179
+ assert_equal "๐Ÿคพ๐Ÿฝโ€โ™€", $&
180
+ end
181
+
182
+ it "matches UQE emoji" do
183
+ "๐ŸŒโ€โ™‚๏ธ man golfing, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE_UQE
184
+ assert_equal "๐ŸŒโ€โ™‚๏ธ", $&
185
+ end
186
+ end
187
+
145
188
  describe "REGEX_VALID" do
146
189
  it "matches most singleton emoji codepoints" do
147
190
  "๐Ÿ˜ด sleeping face" =~ Unicode::Emoji::REGEX_VALID
@@ -168,8 +211,19 @@ describe Unicode::Emoji do
168
211
  assert_equal "โ–ถ\u{FE0F}", $&
169
212
  end
170
213
 
171
- it "does not match singleton 'component' emoji codepoints" do
214
+ it "matches singleton skin tone modifiers and hair components" do
172
215
  "๐Ÿป light skin tone" =~ Unicode::Emoji::REGEX_VALID
216
+ assert_equal "๐Ÿป", $&
217
+
218
+ "๐Ÿฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_VALID
219
+ assert_equal "๐Ÿฆฐ", $&
220
+ end
221
+
222
+ it "does not match singleton components that are not skin tone modifiers or hair components" do
223
+ "1 digit one" =~ Unicode::Emoji::REGEX_VALID
224
+ assert_nil $&
225
+
226
+ "๐Ÿ‡ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_VALID
173
227
  assert_nil $&
174
228
  end
175
229
 
@@ -263,8 +317,19 @@ describe Unicode::Emoji do
263
317
  assert_equal "โ–ถ\u{FE0F}", $&
264
318
  end
265
319
 
266
- it "does not match singleton 'component' emoji codepoints" do
320
+ it "matches singleton skin tone modifiers and hair components" do
267
321
  "๐Ÿป light skin tone" =~ Unicode::Emoji::REGEX_WELL_FORMED
322
+ assert_equal "๐Ÿป", $&
323
+
324
+ "๐Ÿฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_WELL_FORMED
325
+ assert_equal "๐Ÿฆฐ", $&
326
+ end
327
+
328
+ it "does not match singleton components that are not skin tone modifiers or hair components" do
329
+ "1 digit one" =~ Unicode::Emoji::REGEX_WELL_FORMED
330
+ assert_nil $&
331
+
332
+ "๐Ÿ‡ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_WELL_FORMED
268
333
  assert_nil $&
269
334
  end
270
335
 
@@ -363,9 +428,20 @@ describe Unicode::Emoji do
363
428
  assert_equal "โ–ถ\u{FE0F}", $&
364
429
  end
365
430
 
366
- it "matches singleton 'component' emoji codepoints" do
431
+ it "matches singleton skin tone modifiers and hair components" do
367
432
  "๐Ÿป light skin tone" =~ Unicode::Emoji::REGEX_POSSIBLE
368
433
  assert_equal "๐Ÿป", $&
434
+
435
+ "๐Ÿฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_POSSIBLE
436
+ assert_equal "๐Ÿฆฐ", $&
437
+ end
438
+
439
+ it "matches singleton components that are not skin tone modifiers or hair components" do
440
+ "1 digit one" =~ Unicode::Emoji::REGEX_POSSIBLE
441
+ assert_equal "1", $&
442
+
443
+ "๐Ÿ‡ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_POSSIBLE
444
+ assert_equal "๐Ÿ‡ฆ", $&
369
445
  end
370
446
 
371
447
  it "matches modified emoji if modifier base emoji is used" do
@@ -454,17 +530,28 @@ describe Unicode::Emoji do
454
530
  end
455
531
 
456
532
  it "does not match textual singleton emoji" do
457
- "โ–ถ play button" =~ Unicode::Emoji::REGEX
533
+ "โ–ถ play button" =~ Unicode::Emoji::REGEX_BASIC
458
534
  assert_nil $&
459
535
  end
460
536
 
461
537
  it "matches textual singleton emoji in combination with emoji variation selector" do
462
- "โ–ถ\u{FE0F} play button" =~ Unicode::Emoji::REGEX
538
+ "โ–ถ\u{FE0F} play button" =~ Unicode::Emoji::REGEX_BASIC
463
539
  assert_equal "โ–ถ\u{FE0F}", $&
464
540
  end
465
541
 
466
- it "does not match singleton 'component' emoji codepoints" do
542
+ it "matches singleton skin tone modifiers and hair components" do
467
543
  "๐Ÿป light skin tone" =~ Unicode::Emoji::REGEX_BASIC
544
+ assert_equal "๐Ÿป", $&
545
+
546
+ "๐Ÿฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_BASIC
547
+ assert_equal "๐Ÿฆฐ", $&
548
+ end
549
+
550
+ it "does not match singleton components that are not skin tone modifiers or hair components" do
551
+ "1 digit one" =~ Unicode::Emoji::REGEX_BASIC
552
+ assert_nil $&
553
+
554
+ "๐Ÿ‡ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_BASIC
468
555
  assert_nil $&
469
556
  end
470
557
 
@@ -525,9 +612,20 @@ describe Unicode::Emoji do
525
612
  assert_nil $&
526
613
  end
527
614
 
528
- it "does not match singleton 'component' emoji codepoints" do
615
+ it "does not match singleton skin tone modifiers and hair components" do
529
616
  "๐Ÿป light skin tone" =~ Unicode::Emoji::REGEX_TEXT
530
617
  assert_nil $&
618
+
619
+ "๐Ÿฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_TEXT
620
+ assert_nil $&
621
+ end
622
+
623
+ it "does not match singleton components that are not skin tone modifiers or hair components" do
624
+ "1 digit one" =~ Unicode::Emoji::REGEX_TEXT
625
+ assert_nil $&
626
+
627
+ "๐Ÿ‡ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_TEXT
628
+ assert_nil $&
531
629
  end
532
630
 
533
631
  it "does not match modified emoji" do
@@ -564,13 +662,28 @@ describe Unicode::Emoji do
564
662
  end
565
663
  end
566
664
 
567
- describe "REGEX_ANY" do
665
+ describe "REGEX_PROP_EMOJI" do
568
666
  it "returns any emoji-related codepoint (but no variation selectors or tags)" do
569
- matches = "1 string ๐Ÿ˜ด\u{FE0F} sleeping face with ๐Ÿ‡ต and modifier ๐Ÿพ, also ๐Ÿด๓ ง๓ ข๓ ณ๓ ฃ๓ ด๓ ฟ Scotland".scan(Unicode::Emoji::REGEX_ANY)
667
+ matches = "1 string ๐Ÿ˜ด\u{FE0F} sleeping face with ๐Ÿ‡ต and modifier ๐Ÿพ, also ๐Ÿด๓ ง๓ ข๓ ณ๓ ฃ๓ ด๓ ฟ Scotland".scan(Unicode::Emoji::REGEX_PROP_EMOJI)
570
668
  assert_equal ["1", "๐Ÿ˜ด", "๐Ÿ‡ต", "๐Ÿพ", "๐Ÿด"], matches
571
669
  end
572
670
  end
573
671
 
672
+ describe "REGEX_EMOJI_KEYCAP" do
673
+ it "matches emoji keycap sequences" do
674
+ "2๏ธโƒฃ keycap: 2" =~ Unicode::Emoji::REGEX_EMOJI_KEYCAP
675
+ assert_equal "2๏ธโƒฃ", $&
676
+ end
677
+
678
+ it "does not match non-emoji keycap sequences" do
679
+ "8โƒฃ text keycap: 8" =~ Unicode::Emoji::REGEX_EMOJI_KEYCAP
680
+ assert_nil $&
681
+
682
+ "#โƒฃ text keycap: #" =~ Unicode::Emoji::REGEX_EMOJI_KEYCAP
683
+ assert_nil $&
684
+ end
685
+ end
686
+
574
687
  describe "REGEX_PICTO" do
575
688
  it "matches codepoints with Extended_Pictograph property (almost all emoji are, but also others)" do
576
689
  matches = "U+1F32D ๐ŸŒญ HOT DOG, U+203C โ€ผ DOUBLE EXCLAMATION MARK, U+26E8 โ›จ BLACK CROSS ON SHIELD".scan(Unicode::Emoji::REGEX_PICTO)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode-emoji
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.7.0
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-18 00:00:00.000000000 Z
11
+ date: 2024-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unicode-version
@@ -47,24 +47,38 @@ files:
47
47
  - lib/unicode/emoji.rb
48
48
  - lib/unicode/emoji/constants.rb
49
49
  - lib/unicode/emoji/generated/regex.rb
50
- - lib/unicode/emoji/generated/regex_any.rb
51
50
  - lib/unicode/emoji/generated/regex_basic.rb
51
+ - lib/unicode/emoji/generated/regex_emoji_keycap.rb
52
+ - lib/unicode/emoji/generated/regex_include_mqe.rb
53
+ - lib/unicode/emoji/generated/regex_include_mqe_uqe.rb
52
54
  - lib/unicode/emoji/generated/regex_include_text.rb
53
55
  - lib/unicode/emoji/generated/regex_picto.rb
54
56
  - lib/unicode/emoji/generated/regex_picto_no_emoji.rb
55
57
  - lib/unicode/emoji/generated/regex_possible.rb
58
+ - lib/unicode/emoji/generated/regex_prop_component.rb
59
+ - lib/unicode/emoji/generated/regex_prop_emoji.rb
60
+ - lib/unicode/emoji/generated/regex_prop_modifier.rb
61
+ - lib/unicode/emoji/generated/regex_prop_modifier_base.rb
62
+ - lib/unicode/emoji/generated/regex_prop_presentation.rb
56
63
  - lib/unicode/emoji/generated/regex_text.rb
57
64
  - lib/unicode/emoji/generated/regex_valid.rb
58
65
  - lib/unicode/emoji/generated/regex_valid_include_text.rb
59
66
  - lib/unicode/emoji/generated/regex_well_formed.rb
60
67
  - lib/unicode/emoji/generated/regex_well_formed_include_text.rb
61
68
  - lib/unicode/emoji/generated_native/regex.rb
62
- - lib/unicode/emoji/generated_native/regex_any.rb
63
69
  - lib/unicode/emoji/generated_native/regex_basic.rb
70
+ - lib/unicode/emoji/generated_native/regex_emoji_keycap.rb
71
+ - lib/unicode/emoji/generated_native/regex_include_mqe.rb
72
+ - lib/unicode/emoji/generated_native/regex_include_mqe_uqe.rb
64
73
  - lib/unicode/emoji/generated_native/regex_include_text.rb
65
74
  - lib/unicode/emoji/generated_native/regex_picto.rb
66
75
  - lib/unicode/emoji/generated_native/regex_picto_no_emoji.rb
67
76
  - lib/unicode/emoji/generated_native/regex_possible.rb
77
+ - lib/unicode/emoji/generated_native/regex_prop_component.rb
78
+ - lib/unicode/emoji/generated_native/regex_prop_emoji.rb
79
+ - lib/unicode/emoji/generated_native/regex_prop_modifier.rb
80
+ - lib/unicode/emoji/generated_native/regex_prop_modifier_base.rb
81
+ - lib/unicode/emoji/generated_native/regex_prop_presentation.rb
68
82
  - lib/unicode/emoji/generated_native/regex_text.rb
69
83
  - lib/unicode/emoji/generated_native/regex_valid.rb
70
84
  - lib/unicode/emoji/generated_native/regex_valid_include_text.rb
@@ -73,6 +87,9 @@ files:
73
87
  - lib/unicode/emoji/index.rb
74
88
  - lib/unicode/emoji/lazy_constants.rb
75
89
  - lib/unicode/emoji/list.rb
90
+ - spec/data/.keep
91
+ - spec/data/emoji-test.txt
92
+ - spec/emoji_test_txt_spec.rb
76
93
  - spec/unicode_emoji_spec.rb
77
94
  - unicode-emoji.gemspec
78
95
  homepage: https://github.com/janlelis/unicode-emoji
@@ -103,4 +120,7 @@ signing_key:
103
120
  specification_version: 4
104
121
  summary: Emoji data and regex
105
122
  test_files:
123
+ - spec/data/.keep
124
+ - spec/data/emoji-test.txt
125
+ - spec/emoji_test_txt_spec.rb
106
126
  - spec/unicode_emoji_spec.rb
@@ -1,8 +0,0 @@
1
- # This file was generated by a script, please do not edit it by hand.
2
- # See `$ rake generate_constants` and data/generate_constants.rb for more info.
3
-
4
- module Unicode
5
- module Emoji
6
- REGEX_ANY = /[\#\*0-9ยฉยฎโ€ผโ‰โ„ขโ„นโ†”-โ†™โ†ฉโ†ชโŒšโŒ›โŒจโโฉ-โณโธ-โบโ“‚โ–ชโ–ซโ–ถโ—€โ—ป-โ—พโ˜€-โ˜„โ˜Žโ˜‘โ˜”โ˜•โ˜˜โ˜โ˜ โ˜ขโ˜ฃโ˜ฆโ˜ชโ˜ฎโ˜ฏโ˜ธ-โ˜บโ™€โ™‚โ™ˆ-โ™“โ™Ÿโ™ โ™ฃโ™ฅโ™ฆโ™จโ™ปโ™พโ™ฟโš’-โš—โš™โš›โšœโš โšกโšงโšชโšซโšฐโšฑโšฝโšพโ›„โ›…โ›ˆโ›Žโ›โ›‘โ›“โ›”โ›ฉโ›ชโ›ฐ-โ›ตโ›ท-โ›บโ›ฝโœ‚โœ…โœˆ-โœโœโœ’โœ”โœ–โœโœกโœจโœณโœดโ„โ‡โŒโŽโ“-โ•โ—โฃโคโž•-โž—โžกโžฐโžฟโคดโคตโฌ…-โฌ‡โฌ›โฌœโญโญ•ใ€ฐใ€ฝใŠ—ใŠ™๐Ÿ€„๐Ÿƒ๐Ÿ…ฐ๐Ÿ…ฑ๐Ÿ…พ๐Ÿ…ฟ๐Ÿ†Ž๐Ÿ†‘-๐Ÿ†š๐Ÿ‡ฆ-๐Ÿ‡ฟ๐Ÿˆ๐Ÿˆ‚๐Ÿˆš๐Ÿˆฏ๐Ÿˆฒ-๐Ÿˆบ๐Ÿ‰๐Ÿ‰‘๐ŸŒ€-๐ŸŒก๐ŸŒค-๐ŸŽ“๐ŸŽ–๐ŸŽ—๐ŸŽ™-๐ŸŽ›๐ŸŽž-๐Ÿฐ๐Ÿณ-๐Ÿต๐Ÿท-๐Ÿ“ฝ๐Ÿ“ฟ-๐Ÿ”ฝ๐Ÿ•‰-๐Ÿ•Ž๐Ÿ•-๐Ÿ•ง๐Ÿ•ฏ๐Ÿ•ฐ๐Ÿ•ณ-๐Ÿ•บ๐Ÿ–‡๐Ÿ–Š-๐Ÿ–๐Ÿ–๐Ÿ–•๐Ÿ––๐Ÿ–ค๐Ÿ–ฅ๐Ÿ–จ๐Ÿ–ฑ๐Ÿ–ฒ๐Ÿ–ผ๐Ÿ—‚-๐Ÿ—„๐Ÿ—‘-๐Ÿ—“๐Ÿ—œ-๐Ÿ—ž๐Ÿ—ก๐Ÿ—ฃ๐Ÿ—จ๐Ÿ—ฏ๐Ÿ—ณ๐Ÿ—บ-๐Ÿ™๐Ÿš€-๐Ÿ›…๐Ÿ›‹-๐Ÿ›’๐Ÿ›•-๐Ÿ›—๐Ÿ›œ-๐Ÿ›ฅ๐Ÿ›ฉ๐Ÿ›ซ๐Ÿ›ฌ๐Ÿ›ฐ๐Ÿ›ณ-๐Ÿ›ผ๐ŸŸ -๐ŸŸซ๐ŸŸฐ๐ŸคŒ-๐Ÿคบ๐Ÿคผ-๐Ÿฅ…๐Ÿฅ‡-๐Ÿงฟ๐Ÿฉฐ-๐Ÿฉผ๐Ÿช€-๐Ÿช‰๐Ÿช-๐Ÿซ†๐ŸซŽ-๐Ÿซœ๐ŸซŸ-๐Ÿซฉ๐Ÿซฐ-๐Ÿซธ]/
7
- end
8
- end