unicode-emoji 3.7.0 โ†’ 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rake_tasks +1 -0
  4. data/CHANGELOG.md +24 -1
  5. data/Gemfile.lock +3 -3
  6. data/README.md +111 -58
  7. data/Rakefile +6 -2
  8. data/data/emoji.marshal.gz +0 -0
  9. data/data/generate_constants.rb +123 -43
  10. data/lib/unicode/emoji/constants.rb +22 -2
  11. data/lib/unicode/emoji/generated/regex.rb +1 -1
  12. data/lib/unicode/emoji/generated/regex_basic.rb +1 -1
  13. data/lib/unicode/emoji/generated/regex_emoji_keycap.rb +8 -0
  14. data/lib/unicode/emoji/generated/regex_include_mqe.rb +8 -0
  15. data/lib/unicode/emoji/generated/regex_include_mqe_uqe.rb +8 -0
  16. data/lib/unicode/emoji/generated/regex_include_text.rb +1 -1
  17. data/lib/unicode/emoji/generated/regex_prop_component.rb +8 -0
  18. data/lib/unicode/emoji/generated/regex_prop_emoji.rb +8 -0
  19. data/lib/unicode/emoji/generated/regex_prop_modifier.rb +8 -0
  20. data/lib/unicode/emoji/generated/regex_prop_modifier_base.rb +8 -0
  21. data/lib/unicode/emoji/generated/regex_prop_presentation.rb +8 -0
  22. data/lib/unicode/emoji/generated/regex_text.rb +1 -1
  23. data/lib/unicode/emoji/generated/regex_valid.rb +1 -1
  24. data/lib/unicode/emoji/generated/regex_valid_include_text.rb +1 -1
  25. data/lib/unicode/emoji/generated/regex_well_formed.rb +1 -1
  26. data/lib/unicode/emoji/generated/regex_well_formed_include_text.rb +1 -1
  27. data/lib/unicode/emoji/generated_native/regex.rb +1 -1
  28. data/lib/unicode/emoji/generated_native/regex_basic.rb +1 -1
  29. data/lib/unicode/emoji/generated_native/regex_emoji_keycap.rb +8 -0
  30. data/lib/unicode/emoji/generated_native/regex_include_mqe.rb +8 -0
  31. data/lib/unicode/emoji/generated_native/regex_include_mqe_uqe.rb +8 -0
  32. data/lib/unicode/emoji/generated_native/regex_include_text.rb +1 -1
  33. data/lib/unicode/emoji/generated_native/regex_picto_no_emoji.rb +1 -1
  34. data/lib/unicode/emoji/generated_native/regex_prop_component.rb +8 -0
  35. data/lib/unicode/emoji/generated_native/{regex_any.rb โ†’ regex_prop_emoji.rb} +1 -1
  36. data/lib/unicode/emoji/generated_native/regex_prop_modifier.rb +8 -0
  37. data/lib/unicode/emoji/generated_native/regex_prop_modifier_base.rb +8 -0
  38. data/lib/unicode/emoji/generated_native/regex_prop_presentation.rb +8 -0
  39. data/lib/unicode/emoji/generated_native/regex_text.rb +1 -1
  40. data/lib/unicode/emoji/generated_native/regex_valid.rb +1 -1
  41. data/lib/unicode/emoji/generated_native/regex_valid_include_text.rb +1 -1
  42. data/lib/unicode/emoji/generated_native/regex_well_formed.rb +1 -1
  43. data/lib/unicode/emoji/generated_native/regex_well_formed_include_text.rb +1 -1
  44. data/lib/unicode/emoji/lazy_constants.rb +36 -0
  45. data/lib/unicode/emoji/list.rb +3 -0
  46. data/lib/unicode/emoji.rb +39 -6
  47. data/spec/data/.keep +0 -0
  48. data/spec/data/emoji-test.txt +5331 -0
  49. data/spec/emoji_test_txt_spec.rb +181 -0
  50. data/spec/unicode_emoji_spec.rb +127 -14
  51. metadata +24 -4
  52. data/lib/unicode/emoji/generated/regex_any.rb +0 -8
@@ -0,0 +1,181 @@
1
+ require_relative "../lib/unicode/emoji"
2
+ require "minitest/autorun"
3
+ require "open-uri"
4
+
5
+ def iterate_emoji
6
+ EMOJI_TEST_FILE.scan(/^(?:# (?<sub>sub)?group: (?<group_name>.*)$)|(?:(?<codepoints>.+?)\s*; (?<qual_status>.+?)-?qualified )/) do
7
+ if $~[:codepoints]
8
+ yield $~[:codepoints].split.map{|e| e.to_i(16)}.pack("U*"), $~[:qual_status]
9
+ end
10
+ end
11
+ end
12
+
13
+ describe "emoji-test.txt" do
14
+ EMOJI_TEST_FILE = begin
15
+ emoji_test_path = File.join(__dir__, "data/emoji-test.txt")
16
+ if File.exist? emoji_test_path
17
+ file = File.read(emoji_test_path)
18
+ else
19
+ puts "Downloading emoji-test.txt from the consortium"
20
+ URI.open "https://www.unicode.org/Public/emoji/#{Unicode::Emoji::EMOJI_VERSION}/emoji-test.txt" do |f|
21
+ file = f.read
22
+ File.write(File.join(__dir__, "data/emoji-test.txt"), @file)
23
+ end
24
+ end
25
+
26
+ file
27
+ end
28
+
29
+ # qual_status:
30
+ # - fully - fully-qualified emoji sequences
31
+ # - minimally - minimallyq-ualified emoji sequences (some VS16 missing, but not first one)
32
+ # - un - unqualified emoji sequences (some VS16 missing)
33
+
34
+ describe "REGEX" do
35
+ describe "detects fully-qualified emoji" do
36
+ iterate_emoji do |emoji, qual_status|
37
+ it(emoji) do
38
+ if qual_status == "fully"
39
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX]
40
+ else
41
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX]
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+
48
+ describe "REGEX_INCLUDE_TEXT" do
49
+ describe "detects fully-qualified emoji and (unqualified) singleton text emoji" do
50
+ iterate_emoji do |emoji, qual_status|
51
+ it(emoji) do
52
+ if qual_status == "fully" || qual_status == "un" && emoji.size <= 2
53
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_TEXT]
54
+ else
55
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_TEXT]
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+ describe "REGEX_INCLUDE_MQE" do
63
+ describe "detects fully-qualified emoji and minimally-qualified emoji" do
64
+ iterate_emoji do |emoji, qual_status|
65
+ it(emoji) do
66
+ if qual_status == "fully" || qual_status == "minimally"
67
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_MQE]
68
+ else
69
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_MQE]
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+
76
+ describe "REGEX_INCLUDE_MQE_UQE" do
77
+ describe "detects all emoji" do
78
+ iterate_emoji do |emoji, qual_status|
79
+ it(emoji) do
80
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_MQE_UQE]
81
+ end
82
+ end
83
+ end
84
+ end
85
+
86
+ describe "REGEX_VALID" do
87
+ describe "detects fully-qualified, minimally-qualified emoji, and unqualified emoji with ZWJ" do
88
+ iterate_emoji do |emoji, qual_status|
89
+ it(emoji) do
90
+ if qual_status == "fully" || qual_status == "minimally" || qual_status == "un" && emoji.size >= 3
91
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_VALID]
92
+ else
93
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_VALID]
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+
100
+ describe "REGEX_VALID_INCLUDE_TEXT" do
101
+ describe "detects all emoji" do
102
+ iterate_emoji do |emoji, qual_status|
103
+ it(emoji) do
104
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_VALID_INCLUDE_TEXT]
105
+ end
106
+ end
107
+ end
108
+ end
109
+
110
+ describe "REGEX_WELL_FORMED" do
111
+ describe "detects fully-qualified, minimally-qualified emoji, and unqualified emoji with ZWJ" do
112
+ iterate_emoji do |emoji, qual_status|
113
+ it(emoji) do
114
+ if qual_status == "fully" || qual_status == "minimally" || qual_status == "un" && emoji.size >= 3
115
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_WELL_FORMED]
116
+ else
117
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_WELL_FORMED]
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
123
+
124
+ describe "REGEX_WELL_FORMED_INCLUDE_TEXT" do
125
+ describe "detects all emoji" do
126
+ iterate_emoji do |emoji, qual_status|
127
+ it(emoji) do
128
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_WELL_FORMED_INCLUDE_TEXT]
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ describe "REGEX_POSSIBLE" do
135
+ describe "detects all emoji, except unqualified keycap sequences" do
136
+ # fixing test not regex, since implementation of this regex should match the one in the standard
137
+ unqualified_keycaps = Unicode::Emoji::EMOJI_KEYCAPS.map{|keycap|
138
+ [keycap, Unicode::Emoji::EMOJI_KEYCAP_SUFFIX].pack("U*")
139
+ }
140
+
141
+ iterate_emoji do |emoji, qual_status|
142
+ it(emoji) do
143
+ if !unqualified_keycaps.include?(emoji)
144
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_POSSIBLE]
145
+ else
146
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_POSSIBLE]
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
152
+
153
+ describe "REGEX_TEXT" do
154
+ describe "detects (unqualified) singleton text emoji" do
155
+ iterate_emoji do |emoji, qual_status|
156
+ it(emoji) do
157
+ # if qual_status == "un" && emoji =~ /^.[\u{FE0E 20E3}]?$/
158
+ if qual_status == "un" && emoji.size <= 2
159
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_TEXT]
160
+ else
161
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_TEXT]
162
+ end
163
+ end
164
+ end
165
+ end
166
+ end
167
+
168
+ describe "REGEX_BASIC" do
169
+ describe "detects (fully-qualified) singleton emoji" do
170
+ iterate_emoji do |emoji, qual_status|
171
+ it(emoji) do
172
+ if qual_status == "fully" && emoji =~ /^.\u{FE0F}?$/
173
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_BASIC]
174
+ else
175
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_BASIC]
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end
181
+ end
@@ -3,12 +3,12 @@ require "minitest/autorun"
3
3
 
4
4
  describe Unicode::Emoji do
5
5
  describe ".properties" do
6
- it "returns an Array for Emoji properties if has codepoints" do
6
+ it "returns an Array of Emoji properties for given codepoint" do
7
7
  assert_equal ["Emoji", "Emoji_Presentation", "Extended_Pictographic"], Unicode::Emoji.properties("๐Ÿ˜ด")
8
8
  assert_equal ["Emoji", "Extended_Pictographic"], Unicode::Emoji.properties("โ™ ")
9
9
  end
10
10
 
11
- it "returns nil for Emoji properties if has no codepoints" do
11
+ it "returns nil if codepoint has no Emoji prop" do
12
12
  assert_nil Unicode::Emoji.properties("A")
13
13
  end
14
14
  end
@@ -39,8 +39,19 @@ describe Unicode::Emoji do
39
39
  assert_equal "โ–ถ\u{FE0F}", $&
40
40
  end
41
41
 
42
- it "does not match singleton 'component' emoji codepoints" do
42
+ it "matches singleton skin tone modifiers and hair components" do
43
43
  "๐Ÿป light skin tone" =~ Unicode::Emoji::REGEX
44
+ assert_equal "๐Ÿป", $&
45
+
46
+ "๐Ÿฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX
47
+ assert_equal "๐Ÿฆฐ", $&
48
+ end
49
+
50
+ it "does not match singleton components that are not skin tone modifiers or hair components" do
51
+ "1 digit one" =~ Unicode::Emoji::REGEX
52
+ assert_nil $&
53
+
54
+ "๐Ÿ‡ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX
44
55
  assert_nil $&
45
56
  end
46
57
 
@@ -92,6 +103,16 @@ describe Unicode::Emoji do
92
103
  assert_equal "๐Ÿคพ๐Ÿฝโ€โ™€๏ธ", $&
93
104
  end
94
105
 
106
+ it "does not match MQE zwj sequences" do
107
+ "๐Ÿคพ๐Ÿฝโ€โ™€ woman playing handball: medium skin tone, missing VS16" =~ Unicode::Emoji::REGEX
108
+ refute_equal "๐Ÿคพ๐Ÿฝโ€โ™€", $&
109
+ end
110
+
111
+ it "does not match UQE emoji" do
112
+ "๐ŸŒโ€โ™‚๏ธ man golfing, missing VS16" =~ Unicode::Emoji::REGEX
113
+ refute_equal "๐ŸŒโ€โ™‚๏ธ", $&
114
+ end
115
+
95
116
  it "does not match valid zwj sequences that are not recommended" do
96
117
  "๐Ÿค โ€๐Ÿคข vomiting cowboy" =~ Unicode::Emoji::REGEX
97
118
  assert_equal "๐Ÿค ", $&
@@ -132,8 +153,6 @@ describe Unicode::Emoji do
132
153
  assert_equal "๐Ÿชพ", $&
133
154
  end
134
155
 
135
-
136
-
137
156
  # See gh#12 and https://github.com/matt17r/nw5k/commit/05a34d3c9211a23e5ae6853bb19fd2f224779ef4#diff-afb6f8bc3bae71b75743e00882a060863e2430cbe858ec9014e5956504dfc61cR2
138
157
  it "matches family emoji correctly" do
139
158
  ["๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง", "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ง", "๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง", "๐Ÿ‘จโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ง", "๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง"].each { |family|
@@ -142,6 +161,30 @@ describe Unicode::Emoji do
142
161
  end
143
162
  end
144
163
 
164
+ describe "REGEX_INCLUDE_MQE" do
165
+ it "matches MQE emoji" do
166
+ "๐Ÿคพ๐Ÿฝโ€โ™€ woman playing handball: medium skin tone, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE
167
+ assert_equal "๐Ÿคพ๐Ÿฝโ€โ™€", $&
168
+ end
169
+
170
+ it "does not match UQE emoji" do
171
+ "๐ŸŒโ€โ™‚๏ธ man golfing, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE
172
+ refute_equal "๐ŸŒโ€โ™‚๏ธ", $&
173
+ end
174
+ end
175
+
176
+ describe "REGEX_INCLUDE_MQE_UQE" do
177
+ it "matches MQE emoji" do
178
+ "๐Ÿคพ๐Ÿฝโ€โ™€ woman playing handball: medium skin tone, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE_UQE
179
+ assert_equal "๐Ÿคพ๐Ÿฝโ€โ™€", $&
180
+ end
181
+
182
+ it "matches UQE emoji" do
183
+ "๐ŸŒโ€โ™‚๏ธ man golfing, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE_UQE
184
+ assert_equal "๐ŸŒโ€โ™‚๏ธ", $&
185
+ end
186
+ end
187
+
145
188
  describe "REGEX_VALID" do
146
189
  it "matches most singleton emoji codepoints" do
147
190
  "๐Ÿ˜ด sleeping face" =~ Unicode::Emoji::REGEX_VALID
@@ -168,8 +211,19 @@ describe Unicode::Emoji do
168
211
  assert_equal "โ–ถ\u{FE0F}", $&
169
212
  end
170
213
 
171
- it "does not match singleton 'component' emoji codepoints" do
214
+ it "matches singleton skin tone modifiers and hair components" do
172
215
  "๐Ÿป light skin tone" =~ Unicode::Emoji::REGEX_VALID
216
+ assert_equal "๐Ÿป", $&
217
+
218
+ "๐Ÿฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_VALID
219
+ assert_equal "๐Ÿฆฐ", $&
220
+ end
221
+
222
+ it "does not match singleton components that are not skin tone modifiers or hair components" do
223
+ "1 digit one" =~ Unicode::Emoji::REGEX_VALID
224
+ assert_nil $&
225
+
226
+ "๐Ÿ‡ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_VALID
173
227
  assert_nil $&
174
228
  end
175
229
 
@@ -263,8 +317,19 @@ describe Unicode::Emoji do
263
317
  assert_equal "โ–ถ\u{FE0F}", $&
264
318
  end
265
319
 
266
- it "does not match singleton 'component' emoji codepoints" do
320
+ it "matches singleton skin tone modifiers and hair components" do
267
321
  "๐Ÿป light skin tone" =~ Unicode::Emoji::REGEX_WELL_FORMED
322
+ assert_equal "๐Ÿป", $&
323
+
324
+ "๐Ÿฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_WELL_FORMED
325
+ assert_equal "๐Ÿฆฐ", $&
326
+ end
327
+
328
+ it "does not match singleton components that are not skin tone modifiers or hair components" do
329
+ "1 digit one" =~ Unicode::Emoji::REGEX_WELL_FORMED
330
+ assert_nil $&
331
+
332
+ "๐Ÿ‡ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_WELL_FORMED
268
333
  assert_nil $&
269
334
  end
270
335
 
@@ -363,9 +428,20 @@ describe Unicode::Emoji do
363
428
  assert_equal "โ–ถ\u{FE0F}", $&
364
429
  end
365
430
 
366
- it "matches singleton 'component' emoji codepoints" do
431
+ it "matches singleton skin tone modifiers and hair components" do
367
432
  "๐Ÿป light skin tone" =~ Unicode::Emoji::REGEX_POSSIBLE
368
433
  assert_equal "๐Ÿป", $&
434
+
435
+ "๐Ÿฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_POSSIBLE
436
+ assert_equal "๐Ÿฆฐ", $&
437
+ end
438
+
439
+ it "matches singleton components that are not skin tone modifiers or hair components" do
440
+ "1 digit one" =~ Unicode::Emoji::REGEX_POSSIBLE
441
+ assert_equal "1", $&
442
+
443
+ "๐Ÿ‡ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_POSSIBLE
444
+ assert_equal "๐Ÿ‡ฆ", $&
369
445
  end
370
446
 
371
447
  it "matches modified emoji if modifier base emoji is used" do
@@ -454,17 +530,28 @@ describe Unicode::Emoji do
454
530
  end
455
531
 
456
532
  it "does not match textual singleton emoji" do
457
- "โ–ถ play button" =~ Unicode::Emoji::REGEX
533
+ "โ–ถ play button" =~ Unicode::Emoji::REGEX_BASIC
458
534
  assert_nil $&
459
535
  end
460
536
 
461
537
  it "matches textual singleton emoji in combination with emoji variation selector" do
462
- "โ–ถ\u{FE0F} play button" =~ Unicode::Emoji::REGEX
538
+ "โ–ถ\u{FE0F} play button" =~ Unicode::Emoji::REGEX_BASIC
463
539
  assert_equal "โ–ถ\u{FE0F}", $&
464
540
  end
465
541
 
466
- it "does not match singleton 'component' emoji codepoints" do
542
+ it "matches singleton skin tone modifiers and hair components" do
467
543
  "๐Ÿป light skin tone" =~ Unicode::Emoji::REGEX_BASIC
544
+ assert_equal "๐Ÿป", $&
545
+
546
+ "๐Ÿฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_BASIC
547
+ assert_equal "๐Ÿฆฐ", $&
548
+ end
549
+
550
+ it "does not match singleton components that are not skin tone modifiers or hair components" do
551
+ "1 digit one" =~ Unicode::Emoji::REGEX_BASIC
552
+ assert_nil $&
553
+
554
+ "๐Ÿ‡ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_BASIC
468
555
  assert_nil $&
469
556
  end
470
557
 
@@ -525,9 +612,20 @@ describe Unicode::Emoji do
525
612
  assert_nil $&
526
613
  end
527
614
 
528
- it "does not match singleton 'component' emoji codepoints" do
615
+ it "does not match singleton skin tone modifiers and hair components" do
529
616
  "๐Ÿป light skin tone" =~ Unicode::Emoji::REGEX_TEXT
530
617
  assert_nil $&
618
+
619
+ "๐Ÿฆฐ emoji component red hair" =~ Unicode::Emoji::REGEX_TEXT
620
+ assert_nil $&
621
+ end
622
+
623
+ it "does not match singleton components that are not skin tone modifiers or hair components" do
624
+ "1 digit one" =~ Unicode::Emoji::REGEX_TEXT
625
+ assert_nil $&
626
+
627
+ "๐Ÿ‡ฆ regional indicator symbol letter a" =~ Unicode::Emoji::REGEX_TEXT
628
+ assert_nil $&
531
629
  end
532
630
 
533
631
  it "does not match modified emoji" do
@@ -564,13 +662,28 @@ describe Unicode::Emoji do
564
662
  end
565
663
  end
566
664
 
567
- describe "REGEX_ANY" do
665
+ describe "REGEX_PROP_EMOJI" do
568
666
  it "returns any emoji-related codepoint (but no variation selectors or tags)" do
569
- matches = "1 string ๐Ÿ˜ด\u{FE0F} sleeping face with ๐Ÿ‡ต and modifier ๐Ÿพ, also ๐Ÿด๓ ง๓ ข๓ ณ๓ ฃ๓ ด๓ ฟ Scotland".scan(Unicode::Emoji::REGEX_ANY)
667
+ matches = "1 string ๐Ÿ˜ด\u{FE0F} sleeping face with ๐Ÿ‡ต and modifier ๐Ÿพ, also ๐Ÿด๓ ง๓ ข๓ ณ๓ ฃ๓ ด๓ ฟ Scotland".scan(Unicode::Emoji::REGEX_PROP_EMOJI)
570
668
  assert_equal ["1", "๐Ÿ˜ด", "๐Ÿ‡ต", "๐Ÿพ", "๐Ÿด"], matches
571
669
  end
572
670
  end
573
671
 
672
+ describe "REGEX_EMOJI_KEYCAP" do
673
+ it "matches emoji keycap sequences" do
674
+ "2๏ธโƒฃ keycap: 2" =~ Unicode::Emoji::REGEX_EMOJI_KEYCAP
675
+ assert_equal "2๏ธโƒฃ", $&
676
+ end
677
+
678
+ it "does not match non-emoji keycap sequences" do
679
+ "8โƒฃ text keycap: 8" =~ Unicode::Emoji::REGEX_EMOJI_KEYCAP
680
+ assert_nil $&
681
+
682
+ "#โƒฃ text keycap: #" =~ Unicode::Emoji::REGEX_EMOJI_KEYCAP
683
+ assert_nil $&
684
+ end
685
+ end
686
+
574
687
  describe "REGEX_PICTO" do
575
688
  it "matches codepoints with Extended_Pictograph property (almost all emoji are, but also others)" do
576
689
  matches = "U+1F32D ๐ŸŒญ HOT DOG, U+203C โ€ผ DOUBLE EXCLAMATION MARK, U+26E8 โ›จ BLACK CROSS ON SHIELD".scan(Unicode::Emoji::REGEX_PICTO)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode-emoji
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.7.0
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-18 00:00:00.000000000 Z
11
+ date: 2024-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unicode-version
@@ -47,24 +47,38 @@ files:
47
47
  - lib/unicode/emoji.rb
48
48
  - lib/unicode/emoji/constants.rb
49
49
  - lib/unicode/emoji/generated/regex.rb
50
- - lib/unicode/emoji/generated/regex_any.rb
51
50
  - lib/unicode/emoji/generated/regex_basic.rb
51
+ - lib/unicode/emoji/generated/regex_emoji_keycap.rb
52
+ - lib/unicode/emoji/generated/regex_include_mqe.rb
53
+ - lib/unicode/emoji/generated/regex_include_mqe_uqe.rb
52
54
  - lib/unicode/emoji/generated/regex_include_text.rb
53
55
  - lib/unicode/emoji/generated/regex_picto.rb
54
56
  - lib/unicode/emoji/generated/regex_picto_no_emoji.rb
55
57
  - lib/unicode/emoji/generated/regex_possible.rb
58
+ - lib/unicode/emoji/generated/regex_prop_component.rb
59
+ - lib/unicode/emoji/generated/regex_prop_emoji.rb
60
+ - lib/unicode/emoji/generated/regex_prop_modifier.rb
61
+ - lib/unicode/emoji/generated/regex_prop_modifier_base.rb
62
+ - lib/unicode/emoji/generated/regex_prop_presentation.rb
56
63
  - lib/unicode/emoji/generated/regex_text.rb
57
64
  - lib/unicode/emoji/generated/regex_valid.rb
58
65
  - lib/unicode/emoji/generated/regex_valid_include_text.rb
59
66
  - lib/unicode/emoji/generated/regex_well_formed.rb
60
67
  - lib/unicode/emoji/generated/regex_well_formed_include_text.rb
61
68
  - lib/unicode/emoji/generated_native/regex.rb
62
- - lib/unicode/emoji/generated_native/regex_any.rb
63
69
  - lib/unicode/emoji/generated_native/regex_basic.rb
70
+ - lib/unicode/emoji/generated_native/regex_emoji_keycap.rb
71
+ - lib/unicode/emoji/generated_native/regex_include_mqe.rb
72
+ - lib/unicode/emoji/generated_native/regex_include_mqe_uqe.rb
64
73
  - lib/unicode/emoji/generated_native/regex_include_text.rb
65
74
  - lib/unicode/emoji/generated_native/regex_picto.rb
66
75
  - lib/unicode/emoji/generated_native/regex_picto_no_emoji.rb
67
76
  - lib/unicode/emoji/generated_native/regex_possible.rb
77
+ - lib/unicode/emoji/generated_native/regex_prop_component.rb
78
+ - lib/unicode/emoji/generated_native/regex_prop_emoji.rb
79
+ - lib/unicode/emoji/generated_native/regex_prop_modifier.rb
80
+ - lib/unicode/emoji/generated_native/regex_prop_modifier_base.rb
81
+ - lib/unicode/emoji/generated_native/regex_prop_presentation.rb
68
82
  - lib/unicode/emoji/generated_native/regex_text.rb
69
83
  - lib/unicode/emoji/generated_native/regex_valid.rb
70
84
  - lib/unicode/emoji/generated_native/regex_valid_include_text.rb
@@ -73,6 +87,9 @@ files:
73
87
  - lib/unicode/emoji/index.rb
74
88
  - lib/unicode/emoji/lazy_constants.rb
75
89
  - lib/unicode/emoji/list.rb
90
+ - spec/data/.keep
91
+ - spec/data/emoji-test.txt
92
+ - spec/emoji_test_txt_spec.rb
76
93
  - spec/unicode_emoji_spec.rb
77
94
  - unicode-emoji.gemspec
78
95
  homepage: https://github.com/janlelis/unicode-emoji
@@ -103,4 +120,7 @@ signing_key:
103
120
  specification_version: 4
104
121
  summary: Emoji data and regex
105
122
  test_files:
123
+ - spec/data/.keep
124
+ - spec/data/emoji-test.txt
125
+ - spec/emoji_test_txt_spec.rb
106
126
  - spec/unicode_emoji_spec.rb
@@ -1,8 +0,0 @@
1
- # This file was generated by a script, please do not edit it by hand.
2
- # See `$ rake generate_constants` and data/generate_constants.rb for more info.
3
-
4
- module Unicode
5
- module Emoji
6
- REGEX_ANY = /[\#\*0-9ยฉยฎโ€ผโ‰โ„ขโ„นโ†”-โ†™โ†ฉโ†ชโŒšโŒ›โŒจโโฉ-โณโธ-โบโ“‚โ–ชโ–ซโ–ถโ—€โ—ป-โ—พโ˜€-โ˜„โ˜Žโ˜‘โ˜”โ˜•โ˜˜โ˜โ˜ โ˜ขโ˜ฃโ˜ฆโ˜ชโ˜ฎโ˜ฏโ˜ธ-โ˜บโ™€โ™‚โ™ˆ-โ™“โ™Ÿโ™ โ™ฃโ™ฅโ™ฆโ™จโ™ปโ™พโ™ฟโš’-โš—โš™โš›โšœโš โšกโšงโšชโšซโšฐโšฑโšฝโšพโ›„โ›…โ›ˆโ›Žโ›โ›‘โ›“โ›”โ›ฉโ›ชโ›ฐ-โ›ตโ›ท-โ›บโ›ฝโœ‚โœ…โœˆ-โœโœโœ’โœ”โœ–โœโœกโœจโœณโœดโ„โ‡โŒโŽโ“-โ•โ—โฃโคโž•-โž—โžกโžฐโžฟโคดโคตโฌ…-โฌ‡โฌ›โฌœโญโญ•ใ€ฐใ€ฝใŠ—ใŠ™๐Ÿ€„๐Ÿƒ๐Ÿ…ฐ๐Ÿ…ฑ๐Ÿ…พ๐Ÿ…ฟ๐Ÿ†Ž๐Ÿ†‘-๐Ÿ†š๐Ÿ‡ฆ-๐Ÿ‡ฟ๐Ÿˆ๐Ÿˆ‚๐Ÿˆš๐Ÿˆฏ๐Ÿˆฒ-๐Ÿˆบ๐Ÿ‰๐Ÿ‰‘๐ŸŒ€-๐ŸŒก๐ŸŒค-๐ŸŽ“๐ŸŽ–๐ŸŽ—๐ŸŽ™-๐ŸŽ›๐ŸŽž-๐Ÿฐ๐Ÿณ-๐Ÿต๐Ÿท-๐Ÿ“ฝ๐Ÿ“ฟ-๐Ÿ”ฝ๐Ÿ•‰-๐Ÿ•Ž๐Ÿ•-๐Ÿ•ง๐Ÿ•ฏ๐Ÿ•ฐ๐Ÿ•ณ-๐Ÿ•บ๐Ÿ–‡๐Ÿ–Š-๐Ÿ–๐Ÿ–๐Ÿ–•๐Ÿ––๐Ÿ–ค๐Ÿ–ฅ๐Ÿ–จ๐Ÿ–ฑ๐Ÿ–ฒ๐Ÿ–ผ๐Ÿ—‚-๐Ÿ—„๐Ÿ—‘-๐Ÿ—“๐Ÿ—œ-๐Ÿ—ž๐Ÿ—ก๐Ÿ—ฃ๐Ÿ—จ๐Ÿ—ฏ๐Ÿ—ณ๐Ÿ—บ-๐Ÿ™๐Ÿš€-๐Ÿ›…๐Ÿ›‹-๐Ÿ›’๐Ÿ›•-๐Ÿ›—๐Ÿ›œ-๐Ÿ›ฅ๐Ÿ›ฉ๐Ÿ›ซ๐Ÿ›ฌ๐Ÿ›ฐ๐Ÿ›ณ-๐Ÿ›ผ๐ŸŸ -๐ŸŸซ๐ŸŸฐ๐ŸคŒ-๐Ÿคบ๐Ÿคผ-๐Ÿฅ…๐Ÿฅ‡-๐Ÿงฟ๐Ÿฉฐ-๐Ÿฉผ๐Ÿช€-๐Ÿช‰๐Ÿช-๐Ÿซ†๐ŸซŽ-๐Ÿซœ๐ŸซŸ-๐Ÿซฉ๐Ÿซฐ-๐Ÿซธ]/
7
- end
8
- end