unicode-emoji 3.7.0 β†’ 3.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/CHANGELOG.md +11 -1
  4. data/README.md +98 -55
  5. data/Rakefile +6 -2
  6. data/data/emoji.marshal.gz +0 -0
  7. data/data/generate_constants.rb +97 -40
  8. data/lib/unicode/emoji/constants.rb +17 -1
  9. data/lib/unicode/emoji/generated/regex.rb +1 -1
  10. data/lib/unicode/emoji/generated/regex_include_mqe.rb +8 -0
  11. data/lib/unicode/emoji/generated/regex_include_mqe_uqe.rb +8 -0
  12. data/lib/unicode/emoji/generated/regex_include_text.rb +1 -1
  13. data/lib/unicode/emoji/generated/regex_text.rb +1 -1
  14. data/lib/unicode/emoji/generated/regex_valid.rb +1 -1
  15. data/lib/unicode/emoji/generated/regex_valid_include_text.rb +1 -1
  16. data/lib/unicode/emoji/generated/regex_well_formed.rb +1 -1
  17. data/lib/unicode/emoji/generated/regex_well_formed_include_text.rb +1 -1
  18. data/lib/unicode/emoji/generated_native/regex.rb +1 -1
  19. data/lib/unicode/emoji/generated_native/regex_basic.rb +1 -1
  20. data/lib/unicode/emoji/generated_native/regex_include_mqe.rb +8 -0
  21. data/lib/unicode/emoji/generated_native/regex_include_mqe_uqe.rb +8 -0
  22. data/lib/unicode/emoji/generated_native/regex_include_text.rb +1 -1
  23. data/lib/unicode/emoji/generated_native/regex_text.rb +1 -1
  24. data/lib/unicode/emoji/generated_native/regex_valid.rb +1 -1
  25. data/lib/unicode/emoji/generated_native/regex_valid_include_text.rb +1 -1
  26. data/lib/unicode/emoji/generated_native/regex_well_formed.rb +1 -1
  27. data/lib/unicode/emoji/generated_native/regex_well_formed_include_text.rb +1 -1
  28. data/lib/unicode/emoji/lazy_constants.rb +36 -0
  29. data/lib/unicode/emoji/list.rb +3 -0
  30. data/lib/unicode/emoji.rb +33 -6
  31. data/spec/data/.keep +0 -0
  32. data/spec/data/emoji-test.txt +5331 -0
  33. data/spec/emoji_test_txt_spec.rb +181 -0
  34. data/spec/unicode_emoji_spec.rb +36 -4
  35. metadata +12 -2
@@ -0,0 +1,181 @@
1
+ require_relative "../lib/unicode/emoji"
2
+ require "minitest/autorun"
3
+ require "open-uri"
4
+
5
+ def iterate_emoji
6
+ EMOJI_TEST_FILE.scan(/^(?:# (?<sub>sub)?group: (?<group_name>.*)$)|(?:(?<codepoints>.+?)\s*; (?<qual_status>.+?)-?qualified )/) do
7
+ if $~[:codepoints]
8
+ yield $~[:codepoints].split.map{|e| e.to_i(16)}.pack("U*"), $~[:qual_status]
9
+ end
10
+ end
11
+ end
12
+
13
+ describe "emoji-test.txt" do
14
+ EMOJI_TEST_FILE = begin
15
+ emoji_test_path = File.join(__dir__, "data/emoji-test.txt")
16
+ if File.exist? emoji_test_path
17
+ file = File.read(emoji_test_path)
18
+ else
19
+ puts "Downloading emoji-test.txt from the consortium"
20
+ URI.open "https://www.unicode.org/Public/emoji/#{Unicode::Emoji::EMOJI_VERSION}/emoji-test.txt" do |f|
21
+ file = f.read
22
+ File.write(File.join(__dir__, "data/emoji-test.txt"), @file)
23
+ end
24
+ end
25
+
26
+ file
27
+ end
28
+
29
+ # qual_status:
30
+ # - fully - fully-qualified emoji sequences
31
+ # - minimally - minimallyq-ualified emoji sequences (some VS16 missing, but not first one)
32
+ # - un - unqualified emoji sequences (some VS16 missing)
33
+
34
+ describe "REGEX" do
35
+ describe "detects fully-qualified emoji" do
36
+ iterate_emoji do |emoji, qual_status|
37
+ it(emoji) do
38
+ if qual_status == "fully"
39
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX]
40
+ else
41
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX]
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+
48
+ describe "REGEX_INCLUDE_TEXT" do
49
+ describe "detects fully-qualified emoji and (unqualified) singleton text emoji" do
50
+ iterate_emoji do |emoji, qual_status|
51
+ it(emoji) do
52
+ if qual_status == "fully" || qual_status == "un" && emoji.size <= 2
53
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_TEXT]
54
+ else
55
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_TEXT]
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+ describe "REGEX_INCLUDE_MQE" do
63
+ describe "detects fully-qualified emoji and minimally-qualified emoji" do
64
+ iterate_emoji do |emoji, qual_status|
65
+ it(emoji) do
66
+ if qual_status == "fully" || qual_status == "minimally"
67
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_MQE]
68
+ else
69
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_MQE]
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+
76
+ describe "REGEX_INCLUDE_MQE_UQE" do
77
+ describe "detects all emoji" do
78
+ iterate_emoji do |emoji, qual_status|
79
+ it(emoji) do
80
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_INCLUDE_MQE_UQE]
81
+ end
82
+ end
83
+ end
84
+ end
85
+
86
+ describe "REGEX_VALID" do
87
+ describe "detects fully-qualified, minimally-qualified emoji, and unqualified emoji with ZWJ" do
88
+ iterate_emoji do |emoji, qual_status|
89
+ it(emoji) do
90
+ if qual_status == "fully" || qual_status == "minimally" || qual_status == "un" && emoji.size >= 3
91
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_VALID]
92
+ else
93
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_VALID]
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+
100
+ describe "REGEX_VALID_INCLUDE_TEXT" do
101
+ describe "detects all emoji" do
102
+ iterate_emoji do |emoji, qual_status|
103
+ it(emoji) do
104
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_VALID_INCLUDE_TEXT]
105
+ end
106
+ end
107
+ end
108
+ end
109
+
110
+ describe "REGEX_WELL_FORMED" do
111
+ describe "detects fully-qualified, minimally-qualified emoji, and unqualified emoji with ZWJ" do
112
+ iterate_emoji do |emoji, qual_status|
113
+ it(emoji) do
114
+ if qual_status == "fully" || qual_status == "minimally" || qual_status == "un" && emoji.size >= 3
115
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_WELL_FORMED]
116
+ else
117
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_WELL_FORMED]
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
123
+
124
+ describe "REGEX_WELL_FORMED_INCLUDE_TEXT" do
125
+ describe "detects all emoji" do
126
+ iterate_emoji do |emoji, qual_status|
127
+ it(emoji) do
128
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_WELL_FORMED_INCLUDE_TEXT]
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ describe "REGEX_POSSIBLE" do
135
+ describe "detects all emoji, except unqualified keycap sequences" do
136
+ # fixing test not regex, since implementation of this regex should match the one in the standard
137
+ unqualified_keycaps = Unicode::Emoji::EMOJI_KEYCAPS.map{|keycap|
138
+ [keycap, Unicode::Emoji::EMOJI_KEYCAP_SUFFIX].pack("U*")
139
+ }
140
+
141
+ iterate_emoji do |emoji, qual_status|
142
+ it(emoji) do
143
+ if !unqualified_keycaps.include?(emoji)
144
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_POSSIBLE]
145
+ else
146
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_POSSIBLE]
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
152
+
153
+ describe "REGEX_TEXT" do
154
+ describe "detects (unqualified) singleton text emoji" do
155
+ iterate_emoji do |emoji, qual_status|
156
+ it(emoji) do
157
+ # if qual_status == "un" && emoji =~ /^.[\u{FE0E 20E3}]?$/
158
+ if qual_status == "un" && emoji.size <= 2
159
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_TEXT]
160
+ else
161
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_TEXT]
162
+ end
163
+ end
164
+ end
165
+ end
166
+ end
167
+
168
+ describe "REGEX_BASIC" do
169
+ describe "detects (fully-qualified) singleton emoji" do
170
+ iterate_emoji do |emoji, qual_status|
171
+ it(emoji) do
172
+ if qual_status == "fully" && emoji =~ /^.\u{FE0F}?$/
173
+ assert_equal emoji, emoji[Unicode::Emoji::REGEX_BASIC]
174
+ else
175
+ refute_equal emoji, emoji[Unicode::Emoji::REGEX_BASIC]
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end
181
+ end
@@ -3,12 +3,12 @@ require "minitest/autorun"
3
3
 
4
4
  describe Unicode::Emoji do
5
5
  describe ".properties" do
6
- it "returns an Array for Emoji properties if has codepoints" do
6
+ it "returns an Array of Emoji properties for given codepoint" do
7
7
  assert_equal ["Emoji", "Emoji_Presentation", "Extended_Pictographic"], Unicode::Emoji.properties("😴")
8
8
  assert_equal ["Emoji", "Extended_Pictographic"], Unicode::Emoji.properties("β™ ")
9
9
  end
10
10
 
11
- it "returns nil for Emoji properties if has no codepoints" do
11
+ it "returns nil if codepoint has no Emoji prop" do
12
12
  assert_nil Unicode::Emoji.properties("A")
13
13
  end
14
14
  end
@@ -92,6 +92,16 @@ describe Unicode::Emoji do
92
92
  assert_equal "πŸ€ΎπŸ½β€β™€οΈ", $&
93
93
  end
94
94
 
95
+ it "does not match MQE zwj sequences" do
96
+ "πŸ€ΎπŸ½β€β™€ woman playing handball: medium skin tone, missing VS16" =~ Unicode::Emoji::REGEX
97
+ refute_equal `πŸ€ΎπŸ½β€β™€`, $&
98
+ end
99
+
100
+ it "does not match UQE emoji" do
101
+ "πŸŒβ€β™‚οΈ man golfing, missing VS16" =~ Unicode::Emoji::REGEX
102
+ refute_equal `πŸŒβ€β™‚οΈ`, $&
103
+ end
104
+
95
105
  it "does not match valid zwj sequences that are not recommended" do
96
106
  "πŸ€ β€πŸ€’ vomiting cowboy" =~ Unicode::Emoji::REGEX
97
107
  assert_equal "🀠", $&
@@ -132,8 +142,6 @@ describe Unicode::Emoji do
132
142
  assert_equal "πŸͺΎ", $&
133
143
  end
134
144
 
135
-
136
-
137
145
  # See gh#12 and https://github.com/matt17r/nw5k/commit/05a34d3c9211a23e5ae6853bb19fd2f224779ef4#diff-afb6f8bc3bae71b75743e00882a060863e2430cbe858ec9014e5956504dfc61cR2
138
146
  it "matches family emoji correctly" do
139
147
  ["πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦", "πŸ‘¨β€πŸ‘©β€πŸ‘¦β€πŸ‘¦", "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘§", "πŸ‘¨β€πŸ‘¨β€πŸ‘§β€πŸ‘¦", "πŸ‘¨β€πŸ‘¨β€πŸ‘¦β€πŸ‘¦", "πŸ‘¨β€πŸ‘¨β€πŸ‘§β€πŸ‘§", "πŸ‘©β€πŸ‘©β€πŸ‘§β€πŸ‘¦", "πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦", "πŸ‘©β€πŸ‘©β€πŸ‘§β€πŸ‘§", "πŸ‘¨β€πŸ‘¦β€πŸ‘¦", "πŸ‘¨β€πŸ‘§β€πŸ‘¦", "πŸ‘¨β€πŸ‘§β€πŸ‘§", "πŸ‘©β€πŸ‘¦β€πŸ‘¦", "πŸ‘©β€πŸ‘§β€πŸ‘¦", "πŸ‘©β€πŸ‘§β€πŸ‘§"].each { |family|
@@ -142,6 +150,30 @@ describe Unicode::Emoji do
142
150
  end
143
151
  end
144
152
 
153
+ describe "REGEX_INCLUDE_MQE" do
154
+ it "matches MQE emoji" do
155
+ "πŸ€ΎπŸ½β€β™€ woman playing handball: medium skin tone, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE
156
+ assert_equal `πŸ€ΎπŸ½β€β™€`, $&
157
+ end
158
+
159
+ it "does not match UQE emoji" do
160
+ "πŸŒβ€β™‚οΈ man golfing, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE
161
+ refute_equal `πŸŒβ€β™‚οΈ`, $&
162
+ end
163
+ end
164
+
165
+ describe "REGEX_INCLUDE_MQE_UQE" do
166
+ it "matches MQE emoji" do
167
+ "πŸ€ΎπŸ½β€β™€ woman playing handball: medium skin tone, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE_UQE
168
+ assert_equal `πŸ€ΎπŸ½β€β™€`, $&
169
+ end
170
+
171
+ it "matches UQE emoji" do
172
+ "πŸŒβ€β™‚οΈ man golfing, missing VS16" =~ Unicode::Emoji::REGEX_INCLUDE_MQE_UQE
173
+ assert_equal `πŸŒβ€β™‚οΈ`, $&
174
+ end
175
+ end
176
+
145
177
  describe "REGEX_VALID" do
146
178
  it "matches most singleton emoji codepoints" do
147
179
  "😴 sleeping face" =~ Unicode::Emoji::REGEX_VALID
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode-emoji
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.7.0
4
+ version: 3.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-18 00:00:00.000000000 Z
11
+ date: 2024-10-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unicode-version
@@ -49,6 +49,8 @@ files:
49
49
  - lib/unicode/emoji/generated/regex.rb
50
50
  - lib/unicode/emoji/generated/regex_any.rb
51
51
  - lib/unicode/emoji/generated/regex_basic.rb
52
+ - lib/unicode/emoji/generated/regex_include_mqe.rb
53
+ - lib/unicode/emoji/generated/regex_include_mqe_uqe.rb
52
54
  - lib/unicode/emoji/generated/regex_include_text.rb
53
55
  - lib/unicode/emoji/generated/regex_picto.rb
54
56
  - lib/unicode/emoji/generated/regex_picto_no_emoji.rb
@@ -61,6 +63,8 @@ files:
61
63
  - lib/unicode/emoji/generated_native/regex.rb
62
64
  - lib/unicode/emoji/generated_native/regex_any.rb
63
65
  - lib/unicode/emoji/generated_native/regex_basic.rb
66
+ - lib/unicode/emoji/generated_native/regex_include_mqe.rb
67
+ - lib/unicode/emoji/generated_native/regex_include_mqe_uqe.rb
64
68
  - lib/unicode/emoji/generated_native/regex_include_text.rb
65
69
  - lib/unicode/emoji/generated_native/regex_picto.rb
66
70
  - lib/unicode/emoji/generated_native/regex_picto_no_emoji.rb
@@ -73,6 +77,9 @@ files:
73
77
  - lib/unicode/emoji/index.rb
74
78
  - lib/unicode/emoji/lazy_constants.rb
75
79
  - lib/unicode/emoji/list.rb
80
+ - spec/data/.keep
81
+ - spec/data/emoji-test.txt
82
+ - spec/emoji_test_txt_spec.rb
76
83
  - spec/unicode_emoji_spec.rb
77
84
  - unicode-emoji.gemspec
78
85
  homepage: https://github.com/janlelis/unicode-emoji
@@ -103,4 +110,7 @@ signing_key:
103
110
  specification_version: 4
104
111
  summary: Emoji data and regex
105
112
  test_files:
113
+ - spec/data/.keep
114
+ - spec/data/emoji-test.txt
115
+ - spec/emoji_test_txt_spec.rb
106
116
  - spec/unicode_emoji_spec.rb