emoji_data 0.1.0 → 0.2.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.editorconfig +18 -0
- data/.gitattributes +2 -0
- data/.travis.yml +5 -0
- data/.yardopts +2 -0
- data/CHANGELOG.md +29 -4
- data/{LICENSE.txt → LICENSE} +0 -0
- data/README.md +58 -30
- data/emoji_data.gemspec +8 -6
- data/lib/emoji_data.rb +138 -28
- data/lib/emoji_data/emoji_char.rb +72 -16
- data/lib/emoji_data/version.rb +2 -1
- data/scripts/benchmark.rb +70 -0
- data/spec/emoji_char_spec.rb +15 -9
- data/spec/emoji_data_spec.rb +40 -9
- metadata +43 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7385039bbd2cb55d93480a7d389c88fc8f47bbfa
|
4
|
+
data.tar.gz: 481273df89feb32b0c6d7178711bb6485e110538
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6be855ddc07996303eef279b6c840a91a27da97774635e71c500f184b14a0fe4e30977dc0cadb1b48cb7a9f4ff465bae6613fd7b93d1101ee785245bb69097ea
|
7
|
+
data.tar.gz: 9a5c308587f581f2a500ac7686664ebe9ab86c103022c372fccf5d3c5a51b359992b9fec612ba6323e805d633d84470ce825ac1f7a945da3b94348a8ad4f087b
|
data/.editorconfig
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# EditorConfig helps developers define and maintain consistent
|
2
|
+
# coding styles between different editors and IDEs
|
3
|
+
# editorconfig.org
|
4
|
+
|
5
|
+
root = true
|
6
|
+
|
7
|
+
[*]
|
8
|
+
indent_style = space
|
9
|
+
indent_size = 2
|
10
|
+
|
11
|
+
end_of_line = lf
|
12
|
+
charset = utf-8
|
13
|
+
trim_trailing_whitespace = true
|
14
|
+
insert_final_newline = true
|
15
|
+
|
16
|
+
[*.md]
|
17
|
+
trim_trailing_whitespace = false
|
18
|
+
|
data/.gitattributes
ADDED
data/.travis.yml
CHANGED
data/.yardopts
ADDED
data/CHANGELOG.md
CHANGED
@@ -1,11 +1,33 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 0.2.0 (TBD)
|
4
|
+
|
5
|
+
* Rename a number of methods to be clearer and more consistent with that they
|
6
|
+
actually do:
|
7
|
+
- `EmojiChar.char()` → `EmojiChar.render()`
|
8
|
+
- `EmojiData.find_by_unified()` → `EmojiData.from_unified()`
|
9
|
+
- `EmojiData.find_by_str()` → `EmojiData.scan()`
|
10
|
+
|
11
|
+
Don't worry, the old names are still aliased in so you don't have to change
|
12
|
+
anything in your existing code. This change is make things clearer for
|
13
|
+
people new to the library.
|
14
|
+
|
15
|
+
* Add new `.from_short_name()` library method for fast keyword lookups.
|
16
|
+
* DEVELOPERS: Internal code cleanup and better comments.
|
17
|
+
* DEVELOPERS: Add benchmark suite for comparing method implementation time
|
18
|
+
across versions of this library.
|
19
|
+
|
3
20
|
## 0.1.0 (3 May 2014)
|
4
21
|
|
5
22
|
* Add support for Unicode variant encodings, used by MacOSX 10.9 / iOS 7.
|
6
23
|
- For more info: http://www.unicode.org/L2/L2011/11438-emoji-var.pdf
|
7
|
-
- By default, `EmojiChar.to_s()` and `.char()` will now use the variant
|
8
|
-
|
24
|
+
- By default, `EmojiChar.to_s()` and `.char()` will now use the variant
|
25
|
+
encoding.
|
26
|
+
* With adding support for variants, the speed of `find_by_str` regressed by
|
27
|
+
approximately 20% (because there are more codepoints to match against). To
|
28
|
+
counter this, we switched to a Regex based scan than improves performance of
|
29
|
+
the method by over 250x(!). A complete sorted search against 1000 strings
|
30
|
+
now takes ~2ms where before it would take around a half second.
|
9
31
|
* Import latest version of iamcal/emoji-data.
|
10
32
|
* 100% test coverage. :sunglasses:
|
11
33
|
|
@@ -13,9 +35,12 @@
|
|
13
35
|
|
14
36
|
* On initialization, create hashmaps to cache lookups for `.find_by_unified()`.
|
15
37
|
|
16
|
-
In a quick benchmark in MRI 2.1.1, this reduces the time needed for one
|
38
|
+
In a quick benchmark in MRI 2.1.1, this reduces the time needed for one
|
39
|
+
million lookups from `13.5s` to `0.3s`!
|
17
40
|
|
18
|
-
This is only for lookup by unified ID for now, since the other `find_by_*()`
|
41
|
+
This is only for lookup by unified ID for now, since the other `find_by_*()`
|
42
|
+
methods are actually searches that can return multiple values. I'll look at
|
43
|
+
nested hashmaps for those if there is a pressing performance need later.
|
19
44
|
|
20
45
|
## 0.0.2 (3 December 2013)
|
21
46
|
|
data/{LICENSE.txt → LICENSE}
RENAMED
File without changes
|
data/README.md
CHANGED
@@ -3,21 +3,29 @@
|
|
3
3
|
[![Gem Version](http://img.shields.io/gem/v/emoji_data.svg?style=flat)](https://rubygems.org/gems/emoji_data)
|
4
4
|
[![Build Status](http://img.shields.io/travis/mroth/emoji_data.rb.svg?style=flat)](https://travis-ci.org/mroth/emoji_data.rb)
|
5
5
|
[![Dependency Status](http://img.shields.io/gemnasium/mroth/emoji_data.rb.svg?style=flat)](https://gemnasium.com/mroth/emoji_data.rb)
|
6
|
-
[![CodeClimate Status](http://img.shields.io/codeclimate/github/mroth/emoji_data.rb.svg?style=flat)](https://codeclimate.com/github/mroth/emoji_data.rb)
|
7
6
|
[![Coverage Status](http://img.shields.io/coveralls/mroth/emoji_data.rb.svg?style=flat)](https://coveralls.io/r/mroth/emoji_data.rb)
|
8
7
|
|
8
|
+
Ruby library providing low level operations for dealing with Emoji
|
9
|
+
glyphs in the Unicode standard. :cool:
|
9
10
|
|
10
|
-
|
11
|
+
EmojiData is like a swiss-army knife for dealing with Emoji encoding issues. If
|
12
|
+
all you need to do is translate `:poop:` into :poop:, then there are plenty of
|
13
|
+
other libs out there that will probably do what you want. But once you are
|
14
|
+
dealing with Emoji as a fundamental part of your application, and you start to
|
15
|
+
realize the nightmare of [doublebyte encoding][doublebyte] or
|
16
|
+
[variants][variant], then this library may be your new best friend.
|
17
|
+
:raised_hands:
|
11
18
|
|
12
|
-
|
19
|
+
EmojiData is used in production by [Emojitracker.com][emojitracker] to parse
|
20
|
+
well over 100M+ emoji glyphs daily. :dizzy:
|
13
21
|
|
14
|
-
|
15
|
-
|
16
|
-
|
22
|
+
[doublebyte]: http://www.quora.com/Why-does-using-emoji-reduce-my-SMS-character-limit-to-70
|
23
|
+
[variant]: http://www.unicode.org/L2/L2011/11438-emoji-var.pdf
|
24
|
+
[emojitracker]: http://www.emojitracker.com
|
17
25
|
|
18
26
|
## Installation
|
19
27
|
|
20
|
-
Add this line to your application's Gemfile
|
28
|
+
Add this line to your application's `Gemfile`:
|
21
29
|
|
22
30
|
gem 'emoji_data'
|
23
31
|
|
@@ -29,42 +37,62 @@ Or install it yourself as:
|
|
29
37
|
|
30
38
|
$ gem install emoji_data
|
31
39
|
|
32
|
-
Currently requires `RUBY_VERSION >= 1.9.
|
33
|
-
|
34
|
-
## Library Usage
|
40
|
+
Currently requires `RUBY_VERSION >= 1.9.3`.
|
35
41
|
|
36
|
-
|
42
|
+
## Usage
|
37
43
|
|
38
|
-
###
|
44
|
+
### Documentation
|
45
|
+
Full API documentation is available via YARD or here:
|
46
|
+
http://rubydoc.info/github/mroth/emoji_data.rb/master/frames
|
39
47
|
|
40
|
-
|
48
|
+
### Examples
|
49
|
+
Here are some examples of the type of stuff you can do:
|
41
50
|
|
42
|
-
|
51
|
+
```irb
|
52
|
+
>> require 'emoji_data'
|
53
|
+
=> true
|
43
54
|
|
44
|
-
|
55
|
+
>> EmojiData.from_unified('1f680')
|
56
|
+
=> #<EmojiData::EmojiChar:0x007f8fdba33b40 @variations=[], @name="ROCKET",
|
57
|
+
@unified="1F680", @docomo=nil, @au="E5C8", @softbank="E10D", @google="FE7ED",
|
58
|
+
@image="1f680.png", @sheet_x=25, @sheet_y=4, @short_name="rocket",
|
59
|
+
@short_names=["rocket"], @text=nil, @apple_img=true, @hangouts_img=true,
|
60
|
+
@twitter_img=true>
|
45
61
|
|
46
|
-
|
47
|
-
|
62
|
+
>> EmojiData.all.count
|
63
|
+
=> 845
|
48
64
|
|
49
|
-
|
65
|
+
>> EmojiData.all_with_variants.count
|
66
|
+
=> 107
|
50
67
|
|
51
|
-
|
52
|
-
|
68
|
+
>> EmojiData.find_by_short_name("moon").count
|
69
|
+
=> 13
|
53
70
|
|
54
|
-
|
71
|
+
>> EmojiData.all.select(&:doublebyte?).map(&:short_name)
|
72
|
+
=> ["hash", "zero", "one", "two", "three", "four", "five", "six", "seven",
|
73
|
+
"eight", "nine", "cn", "de", "es", "fr", "gb", "it", "jp", "kr", "ru", "us"]
|
55
74
|
|
56
|
-
|
57
|
-
|
75
|
+
>> EmojiData.find_by_name("tree").map { |c| [c.unified, c.name, c.render] }
|
76
|
+
=> [["1F332", "EVERGREEN TREE", "🌲"], ["1F333", "DECIDUOUS TREE", "🌳"],
|
77
|
+
["1F334", "PALM TREE", "🌴"], ["1F384", "CHRISTMAS TREE", "🎄"], ["1F38B",
|
78
|
+
"TANABATA TREE", "🎋"]]
|
58
79
|
|
59
|
-
|
80
|
+
>> EmojiData.scan("I ♥ when marketers talk about the ☁. #blessed").each do |ec|
|
81
|
+
?> puts "Found some #{ec.short_name}!"
|
82
|
+
>> end
|
83
|
+
Found some hearts!
|
84
|
+
Found some cloud!
|
85
|
+
=> [...]
|
86
|
+
```
|
60
87
|
|
61
|
-
|
62
|
-
>> EmojiData.all.select(&:doublebyte?).map(&:short_name)
|
63
|
-
=> ["hash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "cn", "de", "es", "fr", "gb", "it", "jp", "kr", "ru", "us"]
|
88
|
+
## Contributing
|
64
89
|
|
90
|
+
Please be sure to run `rake spec` and help keep test coverage at :100:.
|
65
91
|
|
66
|
-
|
92
|
+
There is a full benchmark suite available via `scripts/benchmark.rb`. Please
|
93
|
+
run before and after your changes to ensure you have not caused a performance
|
94
|
+
regression.
|
67
95
|
|
68
|
-
|
96
|
+
## License
|
69
97
|
|
70
|
-
|
98
|
+
[The MIT License (MIT)](LICENSE)
|
data/emoji_data.gemspec
CHANGED
@@ -18,11 +18,13 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_development_dependency
|
22
|
-
spec.add_development_dependency
|
23
|
-
spec.add_development_dependency
|
24
|
-
spec.add_development_dependency 'simplecov',
|
25
|
-
spec.add_development_dependency 'coveralls',
|
21
|
+
spec.add_development_dependency 'bundler', '~> 1.3'
|
22
|
+
spec.add_development_dependency 'rake'
|
23
|
+
spec.add_development_dependency 'rspec', '~> 2.14.1'
|
24
|
+
spec.add_development_dependency 'simplecov', '~> 0.7.1'
|
25
|
+
spec.add_development_dependency 'coveralls', '~> 0.7.0'
|
26
|
+
spec.add_development_dependency 'benchmark-ips', '~> 2.0.0'
|
27
|
+
spec.add_development_dependency 'yard', '~> 0.8.7.4'
|
26
28
|
|
27
|
-
spec.required_ruby_version = '>= 1.9.
|
29
|
+
spec.required_ruby_version = '>= 1.9.3'
|
28
30
|
end
|
data/lib/emoji_data.rb
CHANGED
@@ -3,82 +3,192 @@ require 'emoji_data/emoji_char'
|
|
3
3
|
require 'json'
|
4
4
|
|
5
5
|
module EmojiData
|
6
|
+
|
7
|
+
# specify some location paths
|
6
8
|
GEM_ROOT = File.join(File.dirname(__FILE__), '..')
|
7
|
-
|
8
|
-
EMOJI_MAP = JSON.parse( RAW_JSON )
|
9
|
-
EMOJI_CHARS = EMOJI_MAP.map { |em| EmojiChar.new(em) }
|
9
|
+
VENDOR_DATA = 'vendor/emoji-data/emoji.json'
|
10
10
|
|
11
|
-
#
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
# precomputed list of all possible emoji characters
|
12
|
+
EMOJI_CHARS = begin
|
13
|
+
raw_json = IO.read(File.join(GEM_ROOT, VENDOR_DATA))
|
14
|
+
vendordata = JSON.parse( raw_json )
|
15
|
+
vendordata.map { |em| EmojiChar.new(em) }
|
16
|
+
end
|
17
|
+
|
18
|
+
# precomputed hashmap for fast precached lookups in .from_unified
|
19
|
+
EMOJICHAR_UNIFIED_MAP = {}
|
20
|
+
EMOJI_CHARS.each do |ec|
|
21
|
+
EMOJICHAR_UNIFIED_MAP[ec.unified] = ec
|
22
|
+
ec.variations.each { |variant| EMOJICHAR_UNIFIED_MAP[variant] = ec }
|
23
|
+
end
|
24
|
+
|
25
|
+
# precomputed hashmap for fast precached lookups in .from_short_name
|
26
|
+
EMOJICHAR_KEYWORD_MAP = {}
|
27
|
+
EMOJI_CHARS.each do |ec|
|
28
|
+
ec.short_names.each { |keyword| EMOJICHAR_KEYWORD_MAP[keyword] = ec }
|
20
29
|
end
|
21
30
|
|
31
|
+
# our constants are only for usage internally
|
32
|
+
private_constant :GEM_ROOT, :VENDOR_DATA
|
33
|
+
private_constant :EMOJI_CHARS, :EMOJICHAR_UNIFIED_MAP, :EMOJICHAR_KEYWORD_MAP
|
34
|
+
|
35
|
+
|
36
|
+
# Returns a list of all known Emoji characters as `EmojiChar` objects.
|
37
|
+
#
|
38
|
+
# @return [Array<EmojiChar>] a list of all known `EmojiChar`.
|
22
39
|
def self.all
|
23
40
|
EMOJI_CHARS
|
24
41
|
end
|
25
42
|
|
43
|
+
# Returns a list of all `EmojiChar` that are represented with doublebyte
|
44
|
+
# encoding.
|
45
|
+
#
|
46
|
+
# @return [Array<EmojiChar>] a list of all doublebyte `EmojiChar`.
|
26
47
|
def self.all_doublebyte
|
27
48
|
EMOJI_CHARS.select(&:doublebyte?)
|
28
49
|
end
|
29
50
|
|
51
|
+
# Returns a list of all `EmojiChar` that have at least one variant encoding.
|
52
|
+
#
|
53
|
+
# @return [Array<EmojiChar>] a list of all `EmojiChar` with variant encoding.
|
30
54
|
def self.all_with_variants
|
31
55
|
EMOJI_CHARS.select(&:variant?)
|
32
56
|
end
|
33
57
|
|
34
|
-
|
35
|
-
|
58
|
+
# Returns a list of all known Emoji characters rendered as UTF-8 strings.
|
59
|
+
#
|
60
|
+
# By default, the default rendering options for this library will be used.
|
61
|
+
# However, if you pass an option hash with `include_variants: true` then all
|
62
|
+
# possible renderings of a single glyph will be included, meaning that:
|
63
|
+
#
|
64
|
+
# 1. You will have "duplicate" emojis in your list.
|
65
|
+
# 2. This list is now suitable for exhaustably matching against in a search.
|
66
|
+
#
|
67
|
+
# @option opts [Boolean] :include_variants whether or not to include all
|
68
|
+
# possible encoding variants in the list
|
69
|
+
#
|
70
|
+
# @return [Array<String>] all Emoji characters rendered as UTF-8 strings
|
71
|
+
def self.chars(opts={})
|
72
|
+
options = {include_variants: false}.merge(opts)
|
36
73
|
|
37
|
-
normals = EMOJI_CHARS.map { |c| c.
|
38
|
-
extras = self.all_with_variants.map { |c| c.char({variant_encoding: true}) }
|
74
|
+
normals = EMOJI_CHARS.map { |c| c.render({variant_encoding: false}) }
|
39
75
|
|
40
76
|
if options[:include_variants]
|
77
|
+
extras = self.all_with_variants.map { |c| c.render({variant_encoding: true}) }
|
41
78
|
return normals + extras
|
42
79
|
end
|
43
80
|
normals
|
44
81
|
end
|
45
82
|
|
46
|
-
|
47
|
-
|
83
|
+
# Returns a list of all known codepoints representing Emoji characters.
|
84
|
+
#
|
85
|
+
# @option (see .chars)
|
86
|
+
# @return [Array<String>] all codepoints represented as unified ID strings
|
87
|
+
def self.codepoints(opts={})
|
88
|
+
options = {include_variants: false}.merge(opts)
|
89
|
+
|
90
|
+
normals = EMOJI_CHARS.map(&:unified)
|
48
91
|
|
49
92
|
if options[:include_variants]
|
50
|
-
|
93
|
+
extras = self.all_with_variants.map {|c| c.variant}
|
94
|
+
return normals + extras
|
51
95
|
end
|
52
|
-
|
96
|
+
normals
|
53
97
|
end
|
54
98
|
|
99
|
+
# Convert a native UTF-8 string glyph to its unified codepoint ID.
|
100
|
+
#
|
101
|
+
# This is a conversion operation, not a match, so it may produce unexpected
|
102
|
+
# results with different types of values.
|
103
|
+
#
|
104
|
+
# @param char [String] a single rendered emoji glyph encoded as a UTF-8 string
|
105
|
+
# @return [String] the unified ID
|
106
|
+
#
|
107
|
+
# @example
|
108
|
+
# >> EmojiData.unified_to_char("1F47E")
|
109
|
+
# => "👾"
|
55
110
|
def self.char_to_unified(char)
|
56
|
-
char.codepoints.to_a.map {|i| i.to_s(16).rjust(4,'0')}.join('-').upcase
|
111
|
+
char.codepoints.to_a.map { |i| i.to_s(16).rjust(4,'0')}.join('-').upcase
|
57
112
|
end
|
58
113
|
|
59
|
-
|
60
|
-
|
114
|
+
# Convert a unified codepoint ID directly to its UTF-8 string representation.
|
115
|
+
#
|
116
|
+
# @param uid [String] the unified codepoint ID for an emoji
|
117
|
+
# @return [String] UTF-8 string rendering of the emoji character
|
118
|
+
#
|
119
|
+
# @example
|
120
|
+
# >> EmojiData.char_to_unified("👾")
|
121
|
+
# => "1F47E"
|
122
|
+
def self.unified_to_char(uid)
|
123
|
+
EmojiChar::unified_to_char(uid)
|
61
124
|
end
|
62
125
|
|
63
|
-
|
64
|
-
|
126
|
+
# Finds a specific `EmojiChar` based on its unified codepoint ID.
|
127
|
+
#
|
128
|
+
# @param uid [String] the unified codepoint ID for an emoji
|
129
|
+
# @return [EmojiChar]
|
130
|
+
def self.from_unified(uid)
|
131
|
+
EMOJICHAR_UNIFIED_MAP[uid.upcase]
|
65
132
|
end
|
66
133
|
|
67
|
-
|
68
|
-
|
134
|
+
# precompile regex pattern for fast matches in `.scan`
|
135
|
+
# needs to be defined after self.chars so not at top of file for now...
|
136
|
+
FBS_REGEXP = Regexp.new(
|
137
|
+
"(?:#{EmojiData.chars({include_variants: true}).join("|")})"
|
138
|
+
)
|
139
|
+
private_constant :FBS_REGEXP
|
140
|
+
|
141
|
+
# Scans a string for all encoded emoji characters contained within.
|
142
|
+
#
|
143
|
+
# @param str [String] the target string to search
|
144
|
+
# @return [Array<EmojiChar>] all emoji characters contained within the target
|
145
|
+
# string, in the order they appeared.
|
146
|
+
#
|
147
|
+
# @example
|
148
|
+
# >> EmojiData.scan("flying on my 🚀 to visit the 👾 people.")
|
149
|
+
# => [#<EmojiData::EmojiChar... @name="ROCKET", @unified="1F680", ...>,
|
150
|
+
# #<EmojiData::EmojiChar... @name="ALIEN MONSTER", @unified="1F47E", ...>]
|
151
|
+
def self.scan(str)
|
69
152
|
matches = str.scan(FBS_REGEXP)
|
70
|
-
matches.map { |m| EmojiData.
|
153
|
+
matches.map { |m| EmojiData.from_unified(EmojiData.char_to_unified(m)) }
|
71
154
|
end
|
72
155
|
|
156
|
+
# Finds any `EmojiChar` that contains given string in its official name.
|
157
|
+
#
|
158
|
+
# @param name [String]
|
159
|
+
# @return [Array<EmojiChar>]
|
73
160
|
def self.find_by_name(name)
|
74
161
|
self.find_by_value(:name, name.upcase)
|
75
162
|
end
|
76
163
|
|
164
|
+
# Find all `EmojiChar` that match string in any of their associated short
|
165
|
+
# name keywords.
|
166
|
+
#
|
167
|
+
# @param short_name [String]
|
168
|
+
# @return [Array<EmojiChar>]
|
77
169
|
def self.find_by_short_name(short_name)
|
78
170
|
self.find_by_value(:short_name, short_name.downcase)
|
79
171
|
end
|
80
172
|
|
173
|
+
# Finds a specific `EmojiChar` based on the unified codepoint ID.
|
174
|
+
#
|
175
|
+
# Must be exact match.
|
176
|
+
#
|
177
|
+
# @param short_name [String]
|
178
|
+
# @return [EmojiChar]
|
179
|
+
def self.from_short_name(short_name)
|
180
|
+
EMOJICHAR_KEYWORD_MAP[short_name.downcase]
|
181
|
+
end
|
182
|
+
|
183
|
+
# alias old method names for legacy apps
|
184
|
+
class << self
|
185
|
+
alias_method :find_by_unified, :from_unified
|
186
|
+
alias_method :find_by_str, :scan
|
187
|
+
end
|
188
|
+
|
189
|
+
|
81
190
|
protected
|
191
|
+
|
82
192
|
def self.find_by_value(field,value)
|
83
193
|
self.all.select { |char| char.send(field).include? value }
|
84
194
|
end
|
@@ -1,13 +1,42 @@
|
|
1
1
|
module EmojiData
|
2
2
|
|
3
|
+
# EmojiChar represents a single Emoji character and its associated metadata.
|
4
|
+
#
|
5
|
+
# @!attribute name
|
6
|
+
# @return [String] The standardized name used in the Unicode specification
|
7
|
+
# to represent this emoji character.
|
8
|
+
#
|
9
|
+
# @!attribute unified
|
10
|
+
# @return [String] The primary unified codepoint ID for the emoji character.
|
11
|
+
#
|
12
|
+
# @!attribute variations
|
13
|
+
# @return [Array<String>] A list of all variant codepoints that may also
|
14
|
+
# represent this emoji.
|
15
|
+
#
|
16
|
+
# @!attribute short_name
|
17
|
+
# @return [String] The canonical "short name" or keyword used in many
|
18
|
+
# systems to refer to this emoji. Often surrounded by `:colons:` in
|
19
|
+
# systems like GitHub & Campfire.
|
20
|
+
#
|
21
|
+
# @!attribute short_names
|
22
|
+
# @return [Array<String>] A full list of possible keywords for the emoji.
|
23
|
+
#
|
24
|
+
# @!attribute text
|
25
|
+
# @return [String] An alternate textual representation of the emoji, for
|
26
|
+
# example a smiley face emoji may be represented with an ASCII alternative.
|
27
|
+
# Most emoji do not have a text alternative. This is typically used when
|
28
|
+
# building an automatic translation from typed emoticons.
|
29
|
+
#
|
3
30
|
class EmojiChar
|
31
|
+
|
4
32
|
def initialize(emoji_hash)
|
5
33
|
# work around inconsistency in emoji.json for now by just setting a blank
|
6
34
|
# array for instance value, and let it get overriden in main
|
7
35
|
# deserialization loop if variable is present.
|
8
36
|
@variations = []
|
9
37
|
|
10
|
-
#
|
38
|
+
# trick for declaring instance variables while iterating over a hash
|
39
|
+
# http://stackoverflow.com/questions/1615190/
|
11
40
|
emoji_hash.each do |k,v|
|
12
41
|
instance_variable_set("@#{k}",v)
|
13
42
|
eigenclass = class<<self; self; end
|
@@ -15,51 +44,78 @@ module EmojiData
|
|
15
44
|
end
|
16
45
|
end
|
17
46
|
|
18
|
-
#
|
47
|
+
# Renders an `EmojiChar` to its string glyph representation, suitable for
|
48
|
+
# printing to screen.
|
49
|
+
#
|
50
|
+
# @option opts [Boolean] :variant_encoding specify whether the variant
|
51
|
+
# encoding selector should be used to hint to rendering devices that
|
52
|
+
# "graphic" representation should be used. By default, we use this for all
|
53
|
+
# Emoji characters that contain a possible variant.
|
19
54
|
#
|
20
|
-
#
|
21
|
-
def
|
22
|
-
options = {variant_encoding: true}.merge(
|
55
|
+
# @return [String] the emoji character rendered to a UTF-8 string
|
56
|
+
def render(opts = {})
|
57
|
+
options = {variant_encoding: true}.merge(opts)
|
23
58
|
#decide whether to use the normal unified ID or the variant for encoding to str
|
24
59
|
target = (self.variant? && options[:variant_encoding]) ? self.variant : @unified
|
25
60
|
EmojiChar::unified_to_char(target)
|
26
61
|
end
|
27
62
|
|
28
|
-
|
63
|
+
alias_method :to_s, :render
|
64
|
+
alias_method :char, :render
|
65
|
+
|
66
|
+
# Returns a list of all possible UTF-8 string renderings of an `EmojiChar`.
|
29
67
|
#
|
30
|
-
#
|
68
|
+
# E.g., normal, with variant selectors, etc. This is useful if you want to
|
69
|
+
# have all possible values to match against when searching for the emoji in
|
70
|
+
# a string representation.
|
71
|
+
#
|
72
|
+
# @return [Array<String>] all possible UTF-8 string renderings
|
31
73
|
def chars
|
32
|
-
results = [self.
|
74
|
+
results = [self.render({variant_encoding: false})]
|
33
75
|
@variations.each do |variation|
|
34
76
|
results << EmojiChar::unified_to_char(variation)
|
35
77
|
end
|
36
78
|
@chars ||= results
|
37
79
|
end
|
38
80
|
|
39
|
-
#
|
81
|
+
# Is the `EmojiChar` represented by a doublebyte codepoint in Unicode?
|
82
|
+
#
|
83
|
+
# @return [Boolean]
|
40
84
|
def doublebyte?
|
41
|
-
@unified.
|
85
|
+
@unified.include? "-"
|
42
86
|
end
|
43
87
|
|
44
|
-
#
|
88
|
+
# Does the `EmojiChar` have an alternate Unicode variant encoding?
|
89
|
+
#
|
90
|
+
# @return [Boolean]
|
45
91
|
def variant?
|
46
92
|
@variations.length > 0
|
47
93
|
end
|
48
94
|
|
49
|
-
#
|
50
|
-
#
|
51
|
-
#
|
95
|
+
# Returns the most likely variant-encoding codepoint ID for an `EmojiChar`.
|
96
|
+
#
|
97
|
+
# For now we only know of one possible variant encoding for certain
|
98
|
+
# characters, but there could be others in the future.
|
99
|
+
#
|
100
|
+
# This is typically used to force Emoji rendering for characters that could
|
101
|
+
# be represented in standard font glyphs on certain operating systems.
|
102
|
+
#
|
103
|
+
# The resulting encoded string will be two codepoints, or three codepoints
|
104
|
+
# for doublebyte Emoji characters.
|
105
|
+
#
|
106
|
+
# @return [String, nil]
|
107
|
+
# The most likely variant-encoding codepoint ID.
|
108
|
+
# If there is no variant-encoding for a character, returns nil.
|
52
109
|
def variant
|
53
110
|
@variations.first
|
54
111
|
end
|
55
112
|
|
56
|
-
alias_method :to_s, :char
|
57
113
|
|
58
114
|
protected
|
115
|
+
|
59
116
|
def self.unified_to_char(cps)
|
60
117
|
cps.split('-').map { |i| i.hex }.pack("U*")
|
61
118
|
end
|
62
119
|
|
63
120
|
end
|
64
|
-
|
65
121
|
end
|
data/lib/emoji_data/version.rb
CHANGED
@@ -0,0 +1,70 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require './lib/emoji_data'
|
4
|
+
require 'benchmark/ips'
|
5
|
+
|
6
|
+
suites = []
|
7
|
+
|
8
|
+
s0 = "I liek to eat cake oh so very much cake eating is nice!! #cake #food"
|
9
|
+
s1 = "🚀"
|
10
|
+
s2 = "flying on my 🚀 to visit the 👾 people."
|
11
|
+
s3 = "first a \u{0023}\u{FE0F}\u{20E3} then a 🚀"
|
12
|
+
|
13
|
+
suites << Benchmark.ips do |x|
|
14
|
+
x.config(:time => 1, :warmup => 0)
|
15
|
+
x.report("EmojiData.scan(s0)") { EmojiData.scan(s0) }
|
16
|
+
x.report("EmojiData.scan(s1)") { EmojiData.scan(s1) }
|
17
|
+
x.report("EmojiData.scan(s2)") { EmojiData.scan(s2) }
|
18
|
+
x.report("EmojiData.scan(s3)") { EmojiData.scan(s3) }
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
suites << Benchmark.ips do |x|
|
23
|
+
x.config(:time => 1, :warmup => 0)
|
24
|
+
x.report("EmojiData.all") { EmojiData.all() }
|
25
|
+
x.report("EmojiData.all_doublebyte") { EmojiData.all_doublebyte() }
|
26
|
+
x.report("EmojiData.all_with_variants") { EmojiData.all_with_variants() }
|
27
|
+
x.report("EmojiData.from_unified") { EmojiData.from_unified("1F680") }
|
28
|
+
x.report("EmojiData.chars") { EmojiData.chars() }
|
29
|
+
x.report("EmojiData.codepoints") { EmojiData.codepoints() }
|
30
|
+
x.report("EmojiData.find_by_name - many") { EmojiData.find_by_name("tree") }
|
31
|
+
x.report("EmojiData.find_by_name - none") { EmojiData.find_by_name("zzzz") }
|
32
|
+
x.report("EmojiData.find_by_short_name - many") { EmojiData.find_by_short_name("MOON") }
|
33
|
+
x.report("EmojiData.find_by_short_name - none") { EmojiData.find_by_short_name("zzzz") }
|
34
|
+
x.report("EmojiData.char_to_unified - single") { EmojiData.char_to_unified("🚀") }
|
35
|
+
x.report("EmojiData.char_to_unified - double") { EmojiData.char_to_unified("\u{2601}\u{FE0F}") }
|
36
|
+
x.report("EmojiData.unified_to_char - single") { EmojiData.unified_to_char("1F47E") }
|
37
|
+
x.report("EmojiData.unified_to_char - double") { EmojiData.unified_to_char("2764-fe0f") }
|
38
|
+
x.report("EmojiData.unified_to_char - triple") { EmojiData.unified_to_char("0030-FE0F-20E3") }
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
invader = EmojiData::EmojiChar.new({unified: '1F47E'})
|
43
|
+
usflag = EmojiData::EmojiChar.new({unified: '1F1FA-1F1F8'})
|
44
|
+
hourglass = EmojiData::EmojiChar.new({unified: '231B', variations: ['231B-FE0F']})
|
45
|
+
cloud = EmojiData::EmojiChar.new({unified: '2601', variations: ['2601-FE0F']})
|
46
|
+
|
47
|
+
suites << Benchmark.ips do |x|
|
48
|
+
x.config(:time => 1, :warmup => 0)
|
49
|
+
x.report("EmojiChar.render - single") { invader.render() }
|
50
|
+
x.report("EmojiChar.render - double") { usflag.render() }
|
51
|
+
x.report("EmojiChar.render - variant") { cloud.render({variant_encoding: true}) }
|
52
|
+
x.report("EmojiChar.chars") { cloud.chars() }
|
53
|
+
x.report("EmojiChar.doublebyte?") { invader.doublebyte?() }
|
54
|
+
x.report("EmojiChar.variant?") { invader.variant?() }
|
55
|
+
x.report("EmojiChar.variant") { invader.variant() }
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def micros(hz)
|
60
|
+
1_000_000 / hz
|
61
|
+
end
|
62
|
+
|
63
|
+
suites.each do |report|
|
64
|
+
results = report.entries.sort { |a,b| b.ips <=> a.ips }
|
65
|
+
|
66
|
+
print "\n"
|
67
|
+
results.each do |r|
|
68
|
+
printf "%-45s %10u %.2f µs/op\n", r.label, r.iterations, micros(r.ips)
|
69
|
+
end
|
70
|
+
end
|
data/spec/emoji_char_spec.rb
CHANGED
@@ -38,22 +38,28 @@ describe EmojiChar do
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
|
-
describe "#
|
41
|
+
describe "#render" do
|
42
42
|
it "should render as happy shiny unicode" do
|
43
|
-
@invader.
|
43
|
+
@invader.render.should eq("👾")
|
44
44
|
end
|
45
45
|
it "should render as happy shiny unicode for doublebyte chars too" do
|
46
|
-
@usflag.
|
46
|
+
@usflag.render.should eq("🇺🇸")
|
47
47
|
end
|
48
48
|
it "should have a flag to output forced emoji variant char encoding if requested" do
|
49
|
-
@cloud.
|
50
|
-
@cloud.
|
51
|
-
@invader.
|
52
|
-
@invader.
|
49
|
+
@cloud.render( {variant_encoding: false}).should eq("\u{2601}")
|
50
|
+
@cloud.render( {variant_encoding: true}).should eq("\u{2601}\u{FE0F}")
|
51
|
+
@invader.render( {variant_encoding: false}).should eq("\u{1F47E}")
|
52
|
+
@invader.render( {variant_encoding: true}).should eq("\u{1F47E}")
|
53
53
|
end
|
54
54
|
it "should default to variant encoding for chars with a variant present" do
|
55
|
-
@cloud.
|
56
|
-
@hourglass.
|
55
|
+
@cloud.render.should eq("\u{2601}\u{FE0F}")
|
56
|
+
@hourglass.render.should eq("\u{231B}\u{FE0F}")
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe "#char - DEPRECATED" do
|
61
|
+
it "should maintain compatibility with old method name for .render" do
|
62
|
+
@cloud.char.should eq(@cloud.render)
|
57
63
|
end
|
58
64
|
end
|
59
65
|
|
data/spec/emoji_data_spec.rb
CHANGED
@@ -56,12 +56,12 @@ describe EmojiData do
|
|
56
56
|
end
|
57
57
|
end
|
58
58
|
|
59
|
-
describe ".
|
59
|
+
describe ".scan" do
|
60
60
|
before(:all) do
|
61
|
-
@exact_results = EmojiData.
|
62
|
-
@multi_results = EmojiData.
|
63
|
-
@variant_results = EmojiData.
|
64
|
-
@variant_multi = EmojiData.
|
61
|
+
@exact_results = EmojiData.scan("🚀")
|
62
|
+
@multi_results = EmojiData.scan("flying on my 🚀 to visit the 👾 people.")
|
63
|
+
@variant_results = EmojiData.scan("\u{0023}\u{FE0F}\u{20E3}")
|
64
|
+
@variant_multi = EmojiData.scan("first a \u{0023}\u{FE0F}\u{20E3} then a 🚀")
|
65
65
|
end
|
66
66
|
it "should find the proper EmojiChar object from a single string char" do
|
67
67
|
@exact_results.should be_kind_of(Array)
|
@@ -89,22 +89,34 @@ describe EmojiData do
|
|
89
89
|
end
|
90
90
|
end
|
91
91
|
|
92
|
-
describe ".
|
92
|
+
describe ".find_by_str - DEPRECATED" do
|
93
|
+
it "should maintain compatibility with old method name for .scan" do
|
94
|
+
EmojiData.find_by_str("\u{0023}\u{FE0F}\u{20E3}").should eq(EmojiData.scan("\u{0023}\u{FE0F}\u{20E3}"))
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
describe ".from_unified" do
|
93
99
|
it "should find the proper EmojiChar object" do
|
94
|
-
results = EmojiData.
|
100
|
+
results = EmojiData.from_unified('1f680')
|
95
101
|
results.should be_kind_of(EmojiChar)
|
96
102
|
results.name.should eq('ROCKET')
|
97
103
|
end
|
98
104
|
it "should normalise capitalization for hex values" do
|
99
|
-
EmojiData.
|
105
|
+
EmojiData.from_unified('1f680').should_not be_nil
|
100
106
|
end
|
101
107
|
it "should find via variant encoding ID format as well" do
|
102
|
-
results = EmojiData.
|
108
|
+
results = EmojiData.from_unified('2764-fe0f')
|
103
109
|
results.should_not be_nil
|
104
110
|
results.name.should eq('HEAVY BLACK HEART')
|
105
111
|
end
|
106
112
|
end
|
107
113
|
|
114
|
+
describe ".find_by_unified - DEPRECATED" do
|
115
|
+
it "should maintain compatibility with old method name for .from_unified" do
|
116
|
+
EmojiData.find_by_unified('1f680').should eq(EmojiData.from_unified('1f680'))
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
108
120
|
describe ".find_by_name" do
|
109
121
|
it "returns an array of results, upcasing input if needed" do
|
110
122
|
EmojiData.find_by_name('tree').should be_kind_of(Array)
|
@@ -129,6 +141,25 @@ describe EmojiData do
|
|
129
141
|
end
|
130
142
|
end
|
131
143
|
|
144
|
+
describe ".from_short_name" do
|
145
|
+
it "returns exact matches on a short name" do
|
146
|
+
results = EmojiData.from_short_name('scream')
|
147
|
+
results.should be_kind_of(EmojiChar)
|
148
|
+
results.name.should eq('FACE SCREAMING IN FEAR')
|
149
|
+
end
|
150
|
+
it "handles lowercasing input if required" do
|
151
|
+
EmojiData.from_short_name('SCREAM').should eq( EmojiData.from_short_name('scream') )
|
152
|
+
end
|
153
|
+
it "works on secondary keywords" do
|
154
|
+
primary = EmojiData.from_short_name('hankey')
|
155
|
+
EmojiData.from_short_name('poop').should eq(primary)
|
156
|
+
EmojiData.from_short_name('shit').should eq(primary)
|
157
|
+
end
|
158
|
+
it "returns nil if nothing matches" do
|
159
|
+
EmojiData.from_short_name('taco').should be_nil
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
132
163
|
describe ".char_to_unified" do
|
133
164
|
it "converts normal emoji to unified codepoint" do
|
134
165
|
EmojiData.char_to_unified("👾").should eq('1F47E')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: emoji_data
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0.rc1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matthew Rothenberg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05
|
11
|
+
date: 2014-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -42,16 +42,16 @@ dependencies:
|
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 2.14.1
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 2.14.1
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: simplecov
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +80,34 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 0.7.0
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: benchmark-ips
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 2.0.0
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 2.0.0
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: yard
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 0.8.7.4
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 0.8.7.4
|
83
111
|
description: Provides classes and helpers for dealing with emoji character data as
|
84
112
|
unicode. Wraps a library of all known emoji characters and provides convenience
|
85
113
|
methods.
|
@@ -90,17 +118,21 @@ extensions: []
|
|
90
118
|
extra_rdoc_files: []
|
91
119
|
files:
|
92
120
|
- ".coveralls.yml"
|
121
|
+
- ".editorconfig"
|
122
|
+
- ".gitattributes"
|
93
123
|
- ".gitignore"
|
94
124
|
- ".travis.yml"
|
125
|
+
- ".yardopts"
|
95
126
|
- CHANGELOG.md
|
96
127
|
- Gemfile
|
97
|
-
- LICENSE
|
128
|
+
- LICENSE
|
98
129
|
- README.md
|
99
130
|
- Rakefile
|
100
131
|
- emoji_data.gemspec
|
101
132
|
- lib/emoji_data.rb
|
102
133
|
- lib/emoji_data/emoji_char.rb
|
103
134
|
- lib/emoji_data/version.rb
|
135
|
+
- scripts/benchmark.rb
|
104
136
|
- spec/emoji_char_spec.rb
|
105
137
|
- spec/emoji_data_spec.rb
|
106
138
|
- spec/spec_helper.rb
|
@@ -118,12 +150,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
118
150
|
requirements:
|
119
151
|
- - ">="
|
120
152
|
- !ruby/object:Gem::Version
|
121
|
-
version: 1.9.
|
153
|
+
version: 1.9.3
|
122
154
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
155
|
requirements:
|
124
|
-
- - "
|
156
|
+
- - ">"
|
125
157
|
- !ruby/object:Gem::Version
|
126
|
-
version:
|
158
|
+
version: 1.3.1
|
127
159
|
requirements: []
|
128
160
|
rubyforge_project:
|
129
161
|
rubygems_version: 2.2.2
|
@@ -134,3 +166,4 @@ test_files:
|
|
134
166
|
- spec/emoji_char_spec.rb
|
135
167
|
- spec/emoji_data_spec.rb
|
136
168
|
- spec/spec_helper.rb
|
169
|
+
has_rdoc:
|