unicode-emoji 2.9.0 โ†’ 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -1
  3. data/Gemfile.lock +4 -4
  4. data/MIT-LICENSE.txt +1 -1
  5. data/README.md +4 -4
  6. data/Rakefile +5 -0
  7. data/data/emoji.marshal.gz +0 -0
  8. data/data/generate_constants.rb +240 -0
  9. data/lib/unicode/emoji/constants.rb +21 -4
  10. data/lib/unicode/emoji/generated/regex.rb +8 -0
  11. data/lib/unicode/emoji/generated/regex_any.rb +8 -0
  12. data/lib/unicode/emoji/generated/regex_basic.rb +8 -0
  13. data/lib/unicode/emoji/generated/regex_include_text.rb +8 -0
  14. data/lib/unicode/emoji/generated/regex_picto.rb +8 -0
  15. data/lib/unicode/emoji/generated/regex_picto_no_emoji.rb +8 -0
  16. data/lib/unicode/emoji/generated/regex_text.rb +8 -0
  17. data/lib/unicode/emoji/generated/regex_valid.rb +8 -0
  18. data/lib/unicode/emoji/generated/regex_valid_include_text.rb +8 -0
  19. data/lib/unicode/emoji/generated/regex_well_formed.rb +8 -0
  20. data/lib/unicode/emoji/generated/regex_well_formed_include_text.rb +8 -0
  21. data/lib/unicode/emoji/generated_native/regex.rb +8 -0
  22. data/lib/unicode/emoji/generated_native/regex_any.rb +8 -0
  23. data/lib/unicode/emoji/generated_native/regex_basic.rb +8 -0
  24. data/lib/unicode/emoji/generated_native/regex_include_text.rb +8 -0
  25. data/lib/unicode/emoji/generated_native/regex_picto.rb +8 -0
  26. data/lib/unicode/emoji/generated_native/regex_picto_no_emoji.rb +8 -0
  27. data/lib/unicode/emoji/generated_native/regex_text.rb +8 -0
  28. data/lib/unicode/emoji/generated_native/regex_valid.rb +8 -0
  29. data/lib/unicode/emoji/generated_native/regex_valid_include_text.rb +8 -0
  30. data/lib/unicode/emoji/generated_native/regex_well_formed.rb +8 -0
  31. data/lib/unicode/emoji/generated_native/regex_well_formed_include_text.rb +8 -0
  32. data/lib/unicode/emoji/lazy_constants.rb +25 -0
  33. data/lib/unicode/emoji.rb +17 -199
  34. data/spec/unicode_emoji_spec.rb +8 -1
  35. data/unicode-emoji.gemspec +1 -0
  36. metadata +29 -4
data/lib/unicode/emoji.rb CHANGED
@@ -3,212 +3,30 @@
3
3
  require "unicode/version"
4
4
 
5
5
  require_relative "emoji/constants"
6
- require_relative "emoji/index"
7
6
 
8
7
  module Unicode
9
8
  module Emoji
10
- PROPERTY_NAMES = {
11
- E: "Emoji",
12
- B: "Emoji_Modifier_Base",
13
- M: "Emoji_Modifier",
14
- C: "Emoji_Component",
15
- P: "Emoji_Presentation",
16
- X: "Extended_Pictographic",
17
- }
18
-
19
- EMOJI_VARIATION_SELECTOR = 0xFE0F
20
- TEXT_VARIATION_SELECTOR = 0xFE0E
21
- EMOJI_TAG_BASE_FLAG = 0x1F3F4
22
- CANCEL_TAG = 0xE007F
23
- TAGS = [*0xE0020..0xE007E]
24
- EMOJI_KEYCAP_SUFFIX = 0x20E3
25
- ZWJ = 0x200D
26
- REGIONAL_INDICATORS = [*0x1F1E6..0x1F1FF]
27
-
28
- EMOJI_CHAR = INDEX[:PROPERTIES].select{ |ord, props| props.include?(:E) }.keys.freeze
29
- EMOJI_PRESENTATION = INDEX[:PROPERTIES].select{ |ord, props| props.include?(:P) }.keys.freeze
30
- TEXT_PRESENTATION = INDEX[:PROPERTIES].select{ |ord, props| props.include?(:E) && !props.include?(:P) }.keys.freeze
31
- EMOJI_COMPONENT = INDEX[:PROPERTIES].select{ |ord, props| props.include?(:C) }.keys.freeze
32
- EMOJI_MODIFIER_BASES = INDEX[:PROPERTIES].select{ |ord, props| props.include?(:B) }.keys.freeze
33
- EMOJI_MODIFIERS = INDEX[:PROPERTIES].select{ |ord, props| props.include?(:M) }.keys.freeze
34
-
35
- EXTENDED_PICTOGRAPHIC = INDEX[:PROPERTIES].select{ |ord, props| props.include?(:X) }.keys.freeze
36
- EXTENDED_PICTOGRAPHIC_NO_EMOJI= INDEX[:PROPERTIES].select{ |ord, props| props.include?(:X) && !props.include?(:E) }.keys.freeze
37
- EMOJI_KEYCAPS = INDEX[:KEYCAPS].freeze
38
- VALID_REGION_FLAGS = INDEX[:FLAGS].freeze
39
- VALID_SUBDIVISIONS = INDEX[:SD].freeze
40
- RECOMMENDED_SUBDIVISION_FLAGS = INDEX[:TAGS].freeze
41
- RECOMMENDED_ZWJ_SEQUENCES = INDEX[:ZWJ].freeze
42
-
43
- LIST = INDEX[:LIST].freeze.each_value(&:freeze)
44
- LIST_REMOVED_KEYS = [
45
- "Smileys & People",
46
- ]
47
-
48
- pack = ->(ord){ Regexp.escape(Array(ord).pack("U*")) }
49
- join = -> (*strings){ "(?:" + strings.join("|") + ")" }
50
- pack_and_join = ->(ords){ join[*ords.map{ |ord| pack[ord] }] }
51
-
52
- if EMOJI_VERSION == Unicode::Version.emoji_version
53
- emoji_character = "\\p{Emoji}"
54
- emoji_modifier = "\\p{Emoji Modifier}"
55
- emoji_modifier_base = "\\p{Emoji Modifier Base}"
56
- emoji_component = "\\p{Emoji Component}"
57
- emoji_presentation = "\\p{Emoji Presentation}"
58
- picto = "\\p{Extended Pictographic}"
59
- picto_no_emoji = "\\p{Extended Pictographic}(?<!\\p{Emoji})"
60
- else
61
- emoji_character = pack_and_join[EMOJI_CHAR]
62
- emoji_modifier = pack_and_join[EMOJI_MODIFIERS]
63
- emoji_modifier_base = pack_and_join[EMOJI_MODIFIER_BASES]
64
- emoji_component = pack_and_join[EMOJI_COMPONENT]
65
- emoji_presentation = pack_and_join[EMOJI_PRESENTATION]
66
- picto = pack_and_join[EXTENDED_PICTOGRAPHIC]
67
- picto_no_emoji = pack_and_join[EXTENDED_PICTOGRAPHIC_NO_EMOJI]
9
+ autoload :INDEX, File.expand_path('emoji/index', __dir__)
10
+
11
+ %w[
12
+ EMOJI_CHAR EMOJI_CHAR EMOJI_PRESENTATION TEXT_PRESENTATION EMOJI_COMPONENT EMOJI_MODIFIER_BASES
13
+ EMOJI_MODIFIERS EXTENDED_PICTOGRAPHIC EXTENDED_PICTOGRAPHIC_NO_EMOJI EMOJI_KEYCAPS VALID_REGION_FLAGS
14
+ VALID_SUBDIVISIONS RECOMMENDED_SUBDIVISION_FLAGS RECOMMENDED_ZWJ_SEQUENCES LIST LIST_REMOVED_KEYS
15
+ ].each do |const_name|
16
+ autoload const_name, File.expand_path('emoji/lazy_constants', __dir__)
68
17
  end
69
18
 
70
- emoji_presentation_sequence = \
71
- join[
72
- pack_and_join[TEXT_PRESENTATION] + pack[EMOJI_VARIATION_SELECTOR],
73
- emoji_presentation + "(?!" + pack[TEXT_VARIATION_SELECTOR] + ")" + pack[EMOJI_VARIATION_SELECTOR] + "?",
74
- ]
75
-
76
- non_component_emoji_presentation_sequence = \
77
- "(?!" + emoji_component + ")" + emoji_presentation_sequence
78
-
79
- text_presentation_sequence = \
80
- join[
81
- pack_and_join[TEXT_PRESENTATION]+ "(?!" + join[emoji_modifier, pack[EMOJI_VARIATION_SELECTOR]] + ")" + pack[TEXT_VARIATION_SELECTOR] + "?",
82
- emoji_presentation + pack[TEXT_VARIATION_SELECTOR]
83
- ]
84
-
85
- emoji_modifier_sequence = \
86
- emoji_modifier_base + emoji_modifier
87
-
88
- emoji_keycap_sequence = \
89
- pack_and_join[EMOJI_KEYCAPS] + pack[[EMOJI_VARIATION_SELECTOR, EMOJI_KEYCAP_SUFFIX]]
90
-
91
- emoji_valid_flag_sequence = \
92
- pack_and_join[VALID_REGION_FLAGS]
93
-
94
- emoji_well_formed_flag_sequence = \
95
- "(?:" +
96
- pack_and_join[REGIONAL_INDICATORS] +
97
- pack_and_join[REGIONAL_INDICATORS] +
98
- ")"
99
-
100
- emoji_valid_core_sequence = \
101
- join[
102
- # emoji_character,
103
- emoji_keycap_sequence,
104
- emoji_modifier_sequence,
105
- non_component_emoji_presentation_sequence,
106
- emoji_valid_flag_sequence,
107
- ]
108
-
109
- emoji_well_formed_core_sequence = \
110
- join[
111
- # emoji_character,
112
- emoji_keycap_sequence,
113
- emoji_modifier_sequence,
114
- non_component_emoji_presentation_sequence,
115
- emoji_well_formed_flag_sequence,
116
- ]
117
-
118
- emoji_rgi_tag_sequence = \
119
- pack_and_join[RECOMMENDED_SUBDIVISION_FLAGS]
120
-
121
- emoji_valid_tag_sequence = \
122
- "(?:" +
123
- pack[EMOJI_TAG_BASE_FLAG] +
124
- "(?:" + VALID_SUBDIVISIONS.map{ |sd| Regexp.escape(sd.tr("\u{20}-\u{7E}", "\u{E0020}-\u{E007E}"))}.join("|") + ")" +
125
- pack[CANCEL_TAG] +
126
- ")"
127
-
128
- emoji_well_formed_tag_sequence = \
129
- "(?:" +
130
- join[
131
- non_component_emoji_presentation_sequence,
132
- emoji_modifier_sequence,
133
- ] +
134
- pack_and_join[TAGS] + "+" +
135
- pack[CANCEL_TAG] +
136
- ")"
137
-
138
- emoji_rgi_zwj_sequence = \
139
- pack_and_join[RECOMMENDED_ZWJ_SEQUENCES]
140
-
141
- emoji_valid_zwj_element = \
142
- join[
143
- emoji_modifier_sequence,
144
- emoji_presentation_sequence,
145
- emoji_character,
146
- ]
147
-
148
- emoji_valid_zwj_sequence = \
149
- "(?:" +
150
- "(?:" + emoji_valid_zwj_element + pack[ZWJ] + ")+" + emoji_valid_zwj_element +
151
- ")"
152
-
153
- emoji_rgi_sequence = \
154
- join[
155
- emoji_rgi_zwj_sequence,
156
- emoji_rgi_tag_sequence,
157
- emoji_valid_core_sequence,
158
- ]
159
-
160
- emoji_valid_sequence = \
161
- join[
162
- emoji_valid_zwj_sequence,
163
- emoji_valid_tag_sequence,
164
- emoji_valid_core_sequence,
165
- ]
166
-
167
- emoji_well_formed_sequence = \
168
- join[
169
- emoji_valid_zwj_sequence,
170
- emoji_well_formed_tag_sequence,
171
- emoji_well_formed_core_sequence,
172
- ]
173
-
174
- # Matches basic singleton emoji and all kind of sequences, but restrict zwj and tag sequences to known sequences (rgi)
175
- REGEX = Regexp.compile(emoji_rgi_sequence)
176
-
177
- # Matches basic singleton emoji and all kind of valid sequences
178
- REGEX_VALID = Regexp.compile(emoji_valid_sequence)
179
-
180
- # Matches basic singleton emoji and all kind of sequences
181
- REGEX_WELL_FORMED = Regexp.compile(emoji_well_formed_sequence)
182
-
183
- # Matches only basic single, non-textual emoji
184
- # Ignores "components" like modifiers or simple digits
185
- REGEX_BASIC = Regexp.compile(
186
- "(?!" + emoji_component + ")" + emoji_presentation_sequence
19
+ generated_constants_dirpath = File.expand_path(
20
+ EMOJI_VERSION == Unicode::Version.emoji_version ? "emoji/generated_native/" : "emoji/generated/",
21
+ __dir__
187
22
  )
188
23
 
189
- # Matches only basic single, textual emoji
190
- # Ignores "components" like modifiers or simple digits
191
- REGEX_TEXT = Regexp.compile(
192
- "(?!" + emoji_component + ")" + text_presentation_sequence
193
- )
194
-
195
- # Matches any emoji-related codepoint - Use with caution (returns partil matches)
196
- REGEX_ANY = Regexp.compile(
197
- emoji_character
198
- )
199
-
200
- # Combined REGEXes which also match for TEXTUAL emoji
201
- REGEX_INCLUDE_TEXT = Regexp.union(REGEX, REGEX_TEXT)
202
- REGEX_VALID_INCLUDE_TEXT = Regexp.union(REGEX_VALID, REGEX_TEXT)
203
- REGEX_WELL_FORMED_INCLUDE_TEXT = Regexp.union(REGEX_WELL_FORMED, REGEX_TEXT)
204
-
205
- REGEX_PICTO = Regexp.compile(
206
- picto
207
- )
208
-
209
- REGEX_PICTO_NO_EMOJI = Regexp.compile(
210
- picto_no_emoji
211
- )
24
+ %w[
25
+ REGEX REGEX_VALID REGEX_WELL_FORMED REGEX_BASIC REGEX_TEXT REGEX_ANY REGEX_INCLUDE_TEXT
26
+ REGEX_VALID_INCLUDE_TEXT REGEX_WELL_FORMED_INCLUDE_TEXT REGEX_PICTO REGEX_PICTO_NO_EMOJI
27
+ ].each do |const_name|
28
+ autoload const_name, File.join(generated_constants_dirpath, const_name.downcase)
29
+ end
212
30
 
213
31
  def self.properties(char)
214
32
  ord = get_codepoint_value(char)
@@ -108,6 +108,13 @@ describe Unicode::Emoji do
108
108
  "๐Ÿชบ nest with eggs" =~ Unicode::Emoji::REGEX
109
109
  assert_equal "๐Ÿชบ", $&
110
110
  end
111
+
112
+ # See gh#12 and https://github.com/matt17r/nw5k/commit/05a34d3c9211a23e5ae6853bb19fd2f224779ef4#diff-afb6f8bc3bae71b75743e00882a060863e2430cbe858ec9014e5956504dfc61cR2
113
+ it "matches family emoji correctly" do
114
+ ["๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง", "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ง", "๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง", "๐Ÿ‘จโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ง", "๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง"].each { |family|
115
+ assert_equal family, family[Unicode::Emoji::REGEX]
116
+ }
117
+ end
111
118
  end
112
119
 
113
120
  describe "REGEX_VALID" do
@@ -407,7 +414,7 @@ describe Unicode::Emoji do
407
414
  end
408
415
  end
409
416
 
410
- describe "REGEX_PICTO" do
417
+ describe "REGEX_PICTO_NO_EMOJI" do
411
418
  it "matches codepoints with Extended_Pictograph property, but no Emoji property" do
412
419
  matches = "U+1F32D ๐ŸŒญ HOT DOG, U+203C โ€ผ DOUBLE EXCLAMATION MARK, U+26E8 โ›จ BLACK CROSS ON SHIELD".scan(Unicode::Emoji::REGEX_PICTO_NO_EMOJI)
413
420
  assert_equal ["โ›จ"], matches
@@ -16,6 +16,7 @@ Gem::Specification.new do |gem|
16
16
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
+ gem.metadata = { "rubygems_mfa_required" => "true" }
19
20
 
20
21
  gem.required_ruby_version = ">= 2.3", "< 4.0"
21
22
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode-emoji
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.9.0
4
+ version: 3.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-09-15 00:00:00.000000000 Z
11
+ date: 2022-06-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unicode-version
@@ -42,15 +42,40 @@ files:
42
42
  - README.md
43
43
  - Rakefile
44
44
  - data/emoji.marshal.gz
45
+ - data/generate_constants.rb
45
46
  - lib/unicode/emoji.rb
46
47
  - lib/unicode/emoji/constants.rb
48
+ - lib/unicode/emoji/generated/regex.rb
49
+ - lib/unicode/emoji/generated/regex_any.rb
50
+ - lib/unicode/emoji/generated/regex_basic.rb
51
+ - lib/unicode/emoji/generated/regex_include_text.rb
52
+ - lib/unicode/emoji/generated/regex_picto.rb
53
+ - lib/unicode/emoji/generated/regex_picto_no_emoji.rb
54
+ - lib/unicode/emoji/generated/regex_text.rb
55
+ - lib/unicode/emoji/generated/regex_valid.rb
56
+ - lib/unicode/emoji/generated/regex_valid_include_text.rb
57
+ - lib/unicode/emoji/generated/regex_well_formed.rb
58
+ - lib/unicode/emoji/generated/regex_well_formed_include_text.rb
59
+ - lib/unicode/emoji/generated_native/regex.rb
60
+ - lib/unicode/emoji/generated_native/regex_any.rb
61
+ - lib/unicode/emoji/generated_native/regex_basic.rb
62
+ - lib/unicode/emoji/generated_native/regex_include_text.rb
63
+ - lib/unicode/emoji/generated_native/regex_picto.rb
64
+ - lib/unicode/emoji/generated_native/regex_picto_no_emoji.rb
65
+ - lib/unicode/emoji/generated_native/regex_text.rb
66
+ - lib/unicode/emoji/generated_native/regex_valid.rb
67
+ - lib/unicode/emoji/generated_native/regex_valid_include_text.rb
68
+ - lib/unicode/emoji/generated_native/regex_well_formed.rb
69
+ - lib/unicode/emoji/generated_native/regex_well_formed_include_text.rb
47
70
  - lib/unicode/emoji/index.rb
71
+ - lib/unicode/emoji/lazy_constants.rb
48
72
  - spec/unicode_emoji_spec.rb
49
73
  - unicode-emoji.gemspec
50
74
  homepage: https://github.com/janlelis/unicode-emoji
51
75
  licenses:
52
76
  - MIT
53
- metadata: {}
77
+ metadata:
78
+ rubygems_mfa_required: 'true'
54
79
  post_install_message:
55
80
  rdoc_options: []
56
81
  require_paths:
@@ -69,7 +94,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
69
94
  - !ruby/object:Gem::Version
70
95
  version: '0'
71
96
  requirements: []
72
- rubygems_version: 3.2.3
97
+ rubygems_version: 3.3.7
73
98
  signing_key:
74
99
  specification_version: 4
75
100
  summary: Retrieve Emoji data about Unicode codepoints.