unicode-emoji 3.8.0 → 4.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rake_tasks +1 -0
- data/CHANGELOG.md +59 -33
- data/Gemfile.lock +2 -4
- data/README.md +32 -21
- data/data/generate_constants.rb +32 -8
- data/lib/unicode/emoji/constants.rb +6 -2
- data/lib/unicode/emoji/generated/regex.rb +1 -1
- data/lib/unicode/emoji/generated/regex_basic.rb +1 -1
- data/lib/unicode/emoji/generated/regex_emoji_keycap.rb +8 -0
- data/lib/unicode/emoji/generated/regex_include_mqe.rb +1 -1
- data/lib/unicode/emoji/generated/regex_include_mqe_uqe.rb +1 -1
- data/lib/unicode/emoji/generated/regex_include_text.rb +1 -1
- data/lib/unicode/emoji/generated/regex_prop_component.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_emoji.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_modifier.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_modifier_base.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_presentation.rb +8 -0
- data/lib/unicode/emoji/generated/regex_text_presentation.rb +8 -0
- data/lib/unicode/emoji/generated/regex_valid.rb +1 -1
- data/lib/unicode/emoji/generated/regex_valid_include_text.rb +1 -1
- data/lib/unicode/emoji/generated/regex_well_formed.rb +1 -1
- data/lib/unicode/emoji/generated/regex_well_formed_include_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_basic.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_emoji_keycap.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_include_mqe.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_include_mqe_uqe.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_include_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_picto_no_emoji.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_prop_component.rb +8 -0
- data/lib/unicode/emoji/generated_native/{regex_any.rb → regex_prop_emoji.rb} +1 -1
- data/lib/unicode/emoji/generated_native/regex_prop_modifier.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_prop_modifier_base.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_prop_presentation.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_text_presentation.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_valid.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_valid_include_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_well_formed.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_well_formed_include_text.rb +1 -1
- data/lib/unicode/emoji.rb +10 -5
- data/spec/unicode_emoji_spec.rb +97 -16
- data/unicode-emoji.gemspec +1 -3
- metadata +17 -21
- data/lib/unicode/emoji/generated/regex_any.rb +0 -8
- data/spec/data/emoji-test.txt +0 -5331
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3b08d6adaddfcbca3e754c9a52a0c0d5c772da86ca708affc9799ad113c5a005
|
4
|
+
data.tar.gz: e9f3817a215ef38b7933d69b4f0563d848a03f6a6b8728ecd06a74417fb5f8a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cedad0ceb5f1039be614bbca170cccc3f29e8f05bd7fc74714ed586ddf20edb5da25a6c8fd840acfb7dfbeb918ec6e2218cf427e5e510eb2406235253e32ad74
|
7
|
+
data.tar.gz: 4680b526737abd7491351ff87c5d323f3a6acf5996dffa4c2f4737e10bc8083da16928dbab34362b6014ae5a17863266eb456ab2d382f25350339eaf71a175bf
|
data/.rake_tasks
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,32 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
-
|
3
|
+
## 4.0.4
|
4
|
+
|
5
|
+
- Add `REGEX_TEXT_PRESENTATION` to be able to match for raw default-text Emoji codepoints
|
6
|
+
|
7
|
+
## 4.0.3
|
8
|
+
|
9
|
+
- Remove emoji-test.txt from Rubygems package
|
10
|
+
|
11
|
+
## 4.0.2
|
12
|
+
|
13
|
+
- Directly use `RbConfig::CONFIG["UNICODE_EMOJI_VERSION"]` to detect Ruby's Emoji version,
|
14
|
+
drop unicode-version dependency
|
15
|
+
|
16
|
+
## 4.0.0
|
17
|
+
|
18
|
+
- **Breaking change:** Regexes now include single skin tone modifiers (`🏻`) and hair components (`🦰`).
|
19
|
+
They were previously considered to be invalid partial Emoji, however since they are supposed to be
|
20
|
+
displayed as Emoji in isolation, they are now part of the regexes (see *ED-20* in UTS51).
|
21
|
+
- **Breaking change:** Drop `REGEX_ANY` in favor of `REGEX_PROP_EMOJI`
|
22
|
+
- Expose regexes for Emoji props (`REGEX_PROP_*`). The advantage over using the native regex properties
|
23
|
+
directly is that you will be able to use the Emoji support level of this gem instead of Ruby's.
|
24
|
+
For example, as of releasing this, the current Emoji version is 16.0, while Ruby is at 15.0.
|
25
|
+
Also see README for a table listing the regexes that match Emoji properties.
|
26
|
+
- Add `REGEX_EMOJI_KEYCAP` for matching specifically Emoji keycaps
|
27
|
+
- Use character class instead of lookbehind for native text emoji and non-emoji pictographic regexes
|
28
|
+
|
29
|
+
## 3.8.0
|
4
30
|
|
5
31
|
- Add new RGI-based regexes `REGEX_INCLUDE_MQE` and `REGEX_INCLUDE_MQE_UQE` which allows to match
|
6
32
|
for minimally-qualified and unqualified RGI sequences (Emoji that lack some VS16)
|
@@ -10,7 +36,7 @@
|
|
10
36
|
- Update CLDR to v46 (valid subdivisions)
|
11
37
|
- Further improvements (see commit log)
|
12
38
|
|
13
|
-
|
39
|
+
## 3.7.0
|
14
40
|
|
15
41
|
- Bump required Ruby slightly to 2.5
|
16
42
|
- Introduce new `REGEX_POSSIBLE` which contains the regex described in
|
@@ -23,46 +49,46 @@
|
|
23
49
|
- Separately autoload emoji list, so it can be loaded when other indexes
|
24
50
|
are not needed
|
25
51
|
|
26
|
-
|
52
|
+
## 3.6.0
|
27
53
|
|
28
54
|
- `Unicode::Emoji::REGEX_TEXT` now matches non-emoji keycaps like "3⃣" (U+0033 U+20E3)
|
29
55
|
- Minor refactorings
|
30
56
|
|
31
|
-
|
57
|
+
## 3.5.0
|
32
58
|
|
33
59
|
- Emoji 16.0
|
34
60
|
|
35
|
-
|
61
|
+
## 3.4.0
|
36
62
|
|
37
63
|
- Emoji 15.1
|
38
64
|
|
39
|
-
|
65
|
+
## 3.3.2
|
40
66
|
|
41
67
|
- Update valid subdivisions to CLDR 43 (no changes)
|
42
68
|
-> there won't be any new RGI subdivision flags in Emoji
|
43
69
|
|
44
|
-
|
70
|
+
## 3.3.1
|
45
71
|
|
46
72
|
- Update valid subdivisions to CLDR 42 (no changes)
|
47
73
|
|
48
|
-
|
74
|
+
## 3.3.0
|
49
75
|
|
50
76
|
- Emoji 15.0
|
51
77
|
|
52
|
-
|
78
|
+
## 3.2.0
|
53
79
|
|
54
80
|
- Update valid subdivisions to CLDR 41
|
55
81
|
|
56
|
-
|
82
|
+
## 3.1.1
|
57
83
|
|
58
84
|
- Fix `REGEX` to be able to match complete family emoji, instead of
|
59
85
|
sub-matching partial families, thanks @matt17r
|
60
86
|
|
61
|
-
|
87
|
+
## 3.1.0
|
62
88
|
|
63
89
|
- Update valid subdivisions to CLDR 40
|
64
90
|
|
65
|
-
|
91
|
+
## 3.0.0
|
66
92
|
|
67
93
|
- Vastly improve memory usage, patch by @radarek
|
68
94
|
- Emoji regexes are now pre-generated and bundled with the release
|
@@ -70,54 +96,54 @@
|
|
70
96
|
- Most constants (e.g. regexes) now get autoloaded
|
71
97
|
- See https://github.com/janlelis/unicode-emoji/pull/9 for more details
|
72
98
|
|
73
|
-
|
99
|
+
## 2.9.0
|
74
100
|
|
75
101
|
- Emoji 14.0
|
76
102
|
|
77
|
-
|
103
|
+
## 2.8.0
|
78
104
|
|
79
105
|
- Update valid subdivisions to CLDR 39
|
80
106
|
|
81
|
-
|
107
|
+
## 2.7.1
|
82
108
|
|
83
109
|
- Update valid subdivisions to CLDR 38.1
|
84
110
|
|
85
|
-
|
111
|
+
## 2.7.0
|
86
112
|
|
87
113
|
- Update valid subdivisions to CLDR 38
|
88
114
|
- Loosen Ruby dependency to allow Ruby 3.0
|
89
115
|
|
90
|
-
|
116
|
+
## 2.6.0
|
91
117
|
|
92
118
|
- Emoji 13.1
|
93
119
|
|
94
|
-
|
120
|
+
## 2.5.0
|
95
121
|
|
96
122
|
- Use native Emoji regex properties when current Ruby's Emoji support is the same as our current Emoji version
|
97
123
|
- Update valid subdivisions to CLDR 37
|
98
124
|
|
99
|
-
|
125
|
+
## 2.4.0
|
100
126
|
|
101
127
|
- Emoji 13.0
|
102
128
|
|
103
|
-
|
129
|
+
## 2.3.1
|
104
130
|
|
105
131
|
- Fix index to actually include Emoji 12.1
|
106
132
|
|
107
|
-
|
133
|
+
## 2.3.0
|
108
134
|
|
109
135
|
- Emoji 12.1
|
110
136
|
|
111
|
-
|
137
|
+
## 2.2.0
|
112
138
|
|
113
139
|
- Update subdivisions to CLDR 36
|
114
140
|
|
115
|
-
|
141
|
+
## 2.1.0
|
116
142
|
|
117
143
|
- Add `REGEX_PICTO` which matches codepoints with the **Extended_Pictographic** property
|
118
144
|
- Add `REGEX_PICTO_NO_EMOJI` which matches codepoints with the **Extended_Pictographic** property, but no **Emoji** property
|
119
145
|
|
120
|
-
|
146
|
+
## 2.0.0
|
121
147
|
|
122
148
|
- Emoji 12.0 data (including valid subdivisions)
|
123
149
|
- Introduce new `REGEX_WELL_FORMED` to be able to match for invalid tag and region sequences
|
@@ -126,40 +152,40 @@
|
|
126
152
|
- Issue warning when using `#list` method to retrieve outdated category
|
127
153
|
- Change matching for ZWJ sequences: Do not limit sequence to a maximum of 3 ZWJs
|
128
154
|
|
129
|
-
|
155
|
+
## 1.1.0
|
130
156
|
|
131
157
|
- Emoji 11.0
|
132
158
|
- Do not depend on rubygems (only use zlib stdlib for unzipping)
|
133
159
|
|
134
|
-
|
160
|
+
## 1.0.3
|
135
161
|
|
136
162
|
- Explicitly load rubygems/util, fixes regression in 1.2.1
|
137
163
|
|
138
|
-
|
164
|
+
## 1.0.2
|
139
165
|
|
140
166
|
- Use `Gem::Util` for `gunzip`, removes deprecation warning
|
141
167
|
|
142
|
-
|
168
|
+
## 1.0.1
|
143
169
|
|
144
170
|
- Actually set required Ruby version to 2.3 in gemspec
|
145
171
|
|
146
|
-
|
172
|
+
## 1.0.0
|
147
173
|
|
148
174
|
- Drop support for Ruby below 2.3, use 0.9 if you need to
|
149
175
|
- Internal refactorings, no API change
|
150
176
|
|
151
|
-
|
177
|
+
## 0.9.3
|
152
178
|
|
153
179
|
- Implement native Emoji regex matchers, but do not activate or document, yet
|
154
180
|
|
155
|
-
|
181
|
+
## 0.9.2
|
156
182
|
|
157
183
|
- REGEX_TEXT: Do not match if the text emoji is followed by a emoji modifier
|
158
184
|
|
159
|
-
|
185
|
+
## 0.9.1
|
160
186
|
|
161
187
|
- Include a categorized list of recommended Emoji
|
162
188
|
|
163
|
-
|
189
|
+
## 0.9.0
|
164
190
|
|
165
191
|
- Initial release (Emoji version 5.0)
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
unicode-emoji (
|
5
|
-
unicode-version (~> 1.0)
|
4
|
+
unicode-emoji (4.0.4)
|
6
5
|
|
7
6
|
GEM
|
8
7
|
remote: https://rubygems.org/
|
@@ -20,7 +19,6 @@ GEM
|
|
20
19
|
reline (0.3.8)
|
21
20
|
io-console (~> 0.5)
|
22
21
|
stringio (3.0.8)
|
23
|
-
unicode-version (1.3.0)
|
24
22
|
|
25
23
|
PLATFORMS
|
26
24
|
ruby
|
@@ -32,4 +30,4 @@ DEPENDENCIES
|
|
32
30
|
unicode-emoji!
|
33
31
|
|
34
32
|
BUNDLED WITH
|
35
|
-
2.
|
33
|
+
2.5.21
|
data/README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# Unicode::Emoji [![[version]](https://badge.fury.io/rb/unicode-emoji.svg)](https://badge.fury.io/rb/unicode-emoji) [![[ci]](https://github.com/janlelis/unicode-emoji/workflows/Test/badge.svg)](https://github.com/janlelis/unicode-emoji/actions?query=workflow%3ATest)
|
2
2
|
|
3
|
-
Provides regular expressions to
|
3
|
+
Provides various sophisticated regular expressions to work with Emoji in strings,
|
4
|
+
incorporating the latest Unicode / Emoji standards.
|
4
5
|
|
5
6
|
Additional features:
|
6
7
|
|
@@ -26,16 +27,17 @@ require "unicode/emoji"
|
|
26
27
|
|
27
28
|
string = "String which contains all types of Emoji sequences:
|
28
29
|
|
29
|
-
-
|
30
|
-
- Textual
|
30
|
+
- Basic Emoji: 😴
|
31
|
+
- Textual Emoji with Emoji variation (VS16): ▶️
|
31
32
|
- Emoji with skin tone modifier: 🛌🏽
|
32
33
|
- Region flag: 🇵🇹
|
33
34
|
- Sub-Region flag: 🏴
|
34
35
|
- Keycap sequence: 2️⃣
|
36
|
+
- Skin tone modifier: 🏻
|
35
37
|
- Sequence using ZWJ (zero width joiner): 🤾🏽♀️
|
36
38
|
"
|
37
39
|
|
38
|
-
string.scan(Unicode::Emoji::REGEX) # => ["😴", "▶️", "🛌🏽", "🇵🇹", "🏴", "2️⃣", "🤾🏽♀️"]
|
40
|
+
string.scan(Unicode::Emoji::REGEX) # => ["😴", "▶️", "🛌🏽", "🇵🇹", "🏴", "2️⃣", "🏻", "🤾🏽♀️"]
|
39
41
|
```
|
40
42
|
|
41
43
|
Depending on your exact usecase, you can choose between multiple levels of Emoji detection:
|
@@ -44,10 +46,10 @@ Depending on your exact usecase, you can choose between multiple levels of Emoji
|
|
44
46
|
|
45
47
|
Regex | Description | Example Matches | Example Non-Matches
|
46
48
|
------------------------------|-------------|-----------------|--------------------
|
47
|
-
`Unicode::Emoji::REGEX` | **Use this one if unsure!** Matches (non-textual)
|
48
|
-
`Unicode::Emoji::REGEX_VALID` | Matches (non-textual)
|
49
|
-
`Unicode::Emoji::REGEX_WELL_FORMED` | Matches (non-textual)
|
50
|
-
`Unicode::Emoji::REGEX_POSSIBLE` | Matches all singleton Emoji,
|
49
|
+
`Unicode::Emoji::REGEX` | **Use this one if unsure!** Matches (non-textual) Basic Emoji and all kinds of *recommended* Emoji sequences (RGI/FQE) | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `🏻` | `🤾🏽♀`, `🏌♂️`, `😴︎`, `▶`, `🇵🇵`, `🏴`, `🤠🤢`, `1`, `1⃣`
|
50
|
+
`Unicode::Emoji::REGEX_VALID` | Matches (non-textual) Basic Emoji and all kinds of *valid* Emoji sequences | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀` ,`🏌♂️`, `🤠🤢`, `🏻` | `😴︎`, `▶`, `🇵🇵`, `1`, `1⃣`
|
51
|
+
`Unicode::Emoji::REGEX_WELL_FORMED` | Matches (non-textual) Basic Emoji and all kinds of *well-formed* Emoji sequences | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`,`🏌♂️` , `🤠🤢`, `🇵🇵`, `🏻` | `😴︎`, `▶`, `1`, `1⃣`
|
52
|
+
`Unicode::Emoji::REGEX_POSSIBLE` | Matches all singleton Emoji, all kinds of Emoji sequences, and even non-Emoji singleton components like digits. Only exception: Unqualified keycap sequences are not matched | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `🇵🇵`, `😴︎`, `▶`, `🏻`, `1` | `1⃣`
|
51
53
|
|
52
54
|
#### Include Text Emoji
|
53
55
|
|
@@ -55,16 +57,16 @@ By default, textual Emoji (emoji characters with text variation selector or thos
|
|
55
57
|
|
56
58
|
Regex | Description | Example Matches | Example Non-Matches
|
57
59
|
------------------------------|-------------|-----------------|--------------------
|
58
|
-
`Unicode::Emoji::REGEX_INCLUDE_TEXT` | `REGEX` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `😴︎`, `▶`, `1⃣`
|
59
|
-
`Unicode::Emoji::REGEX_VALID_INCLUDE_TEXT` | `REGEX_VALID` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `😴︎`, `▶`, `1⃣` |
|
60
|
-
`Unicode::Emoji::REGEX_WELL_FORMED_INCLUDE_TEXT` | `REGEX_WELL_FORMED` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `🇵🇵`, `😴︎`, `▶`, `1⃣` |
|
60
|
+
`Unicode::Emoji::REGEX_INCLUDE_TEXT` | `REGEX` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `😴︎`, `▶`, `1⃣` , `🏻`| `🤾🏽♀`, `🏌♂️`, `🇵🇵`, `🏴`, `🤠🤢`, `1`
|
61
|
+
`Unicode::Emoji::REGEX_VALID_INCLUDE_TEXT` | `REGEX_VALID` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `😴︎`, `▶`, `1⃣` , `🏻` | `🇵🇵`, `1`
|
62
|
+
`Unicode::Emoji::REGEX_WELL_FORMED_INCLUDE_TEXT` | `REGEX_WELL_FORMED` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `🇵🇵`, `😴︎`, `▶`, `1⃣` , `🏻` | `1`
|
61
63
|
|
62
64
|
#### Minimally-qualified and Unqualified Sequences
|
63
65
|
|
64
66
|
Regex | Description | Example Matches | Example Non-Matches
|
65
67
|
------------------------------|-------------|-----------------|--------------------
|
66
|
-
`Unicode::Emoji::REGEX_INCLUDE_MQE` | Like `REGEX`, but additionally includes Emoji with missing Emoji Presentation Variation Selectors, where the first partial Emoji has all required Variation Selectors | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`,
|
67
|
-
`Unicode::Emoji::REGEX_INCLUDE_MQE_UQE` | Like `REGEX`, but additionally includes Emoji with missing Emoji Presentation Variation Selectors | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`,
|
68
|
+
`Unicode::Emoji::REGEX_INCLUDE_MQE` | Like `REGEX`, but additionally includes Emoji with missing Emoji Presentation Variation Selectors, where the first partial Emoji has all required Variation Selectors | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏻` | `🏌♂️`, `😴︎`, `▶`, `🇵🇵`, `🏴`, `🤠🤢`, `1`, `1⃣`
|
69
|
+
`Unicode::Emoji::REGEX_INCLUDE_MQE_UQE` | Like `REGEX`, but additionally includes Emoji with missing Emoji Presentation Variation Selectors | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🏻` | `😴︎`, `▶`, `🇵🇵`, `🏴`, `🤠🤢`, `1`, `1⃣`
|
68
70
|
|
69
71
|
[List of MQE and UQE Emoji sequences](https://character.construction/unqualified-emoji)
|
70
72
|
|
@@ -74,10 +76,10 @@ Matches only simple one-codepoint (+ optional variation selector) Emoji:
|
|
74
76
|
|
75
77
|
Regex | Description | Example Matches | Example Non-Matches
|
76
78
|
------------------------------|-------------|-----------------|--------------------
|
77
|
-
`Unicode::Emoji::REGEX_BASIC` | Matches (non-textual)
|
78
|
-
`Unicode::Emoji::REGEX_TEXT` | Matches only textual singleton Emoji
|
79
|
+
`Unicode::Emoji::REGEX_BASIC` | Matches (non-textual) Basic Emoji, but no sequences at all | `😴`, `▶️`, `🏻` | `😴︎`, `▶`, `🛌🏽`, `🇵🇹`, `🇵🇵`,`2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `1`
|
80
|
+
`Unicode::Emoji::REGEX_TEXT` | Matches only textual singleton Emoji | `😴︎`, `▶` | `😴`, `▶️`, `🏻`, `🛌🏽`, `🇵🇹`, `🇵🇵`,`2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `1`
|
79
81
|
|
80
|
-
Here is a list of all Emoji that can be matched using the two regexes: [character.construction/emoji-vs-text](https://character.construction/emoji-vs-text)
|
82
|
+
Here is a list of all Emoji that can be matched using the two regexes: [character.construction/emoji-vs-text](https://character.construction/emoji-vs-text). The `REGEX_BASIC` regex also matches [visual Emoji components](https://character.construction/emoji-components) (skin tone modifiers and hair components).
|
81
83
|
|
82
84
|
While `REGEX_BASIC` is part of the above regexes, `REGEX_TEXT` is only included in the `*_INCLUDE_TEXT` or `*_UQE` variants.
|
83
85
|
|
@@ -140,7 +142,20 @@ Please see [the standard](https://www.unicode.org/reports/tr51/#Emoji_Sets) for
|
|
140
142
|
|
141
143
|
More info about valid vs. recommended Emoji can also be found in this [blog article on Emojipedia](https://blog.emojipedia.org/unicode-behind-the-curtain/).
|
142
144
|
|
143
|
-
###
|
145
|
+
### Emoji Property Regexes
|
146
|
+
|
147
|
+
Ruby includes native regex Emoji properties, as listed in the following table. You can also opt-in to use the `*_PROP_*` regexes to get the Emoji support level of this gem (instead of Ruby's).
|
148
|
+
|
149
|
+
Gem Regex (`Unicode::Emoji`'s Emoji support level) | Native Regex (Ruby's Emoji support level)
|
150
|
+
---------------------------------------------------|------------------------------------------
|
151
|
+
`Unicode::Emoji::REGEX_PROP_EMOJI` | `/\p{Emoji}/`
|
152
|
+
`Unicode::Emoji::REGEX_PROP_MODIFIER` | `/\p{EMod}/`
|
153
|
+
`Unicode::Emoji::REGEX_PROP_MODIFIER_BASE` | `/\p{EBase}/`
|
154
|
+
`Unicode::Emoji::REGEX_PROP_COMPONENT` | `/\p{EComp}/`
|
155
|
+
`Unicode::Emoji::REGEX_PROP_PRESENTATION` | `/\p{EPres}/`
|
156
|
+
`Unicode::Emoji::REGEX_TEXT_PRESENTATION` | `/[\p{Emoji}&&\P{EPres}]/`
|
157
|
+
|
158
|
+
#### Extended Pictographic Regex
|
144
159
|
|
145
160
|
`Unicode::Emoji::REGEX_PICTO` matches single codepoints with the **Extended_Pictographic** property. For example, it will match `✀` BLACK SAFETY SCISSORS.
|
146
161
|
|
@@ -148,10 +163,6 @@ More info about valid vs. recommended Emoji can also be found in this [blog arti
|
|
148
163
|
|
149
164
|
See [character.construction/picto](https://character.construction/picto) for a list of all non-Emoji pictographic characters.
|
150
165
|
|
151
|
-
### Partial Regexes
|
152
|
-
|
153
|
-
`Unicode::Emoji::REGEX_ANY`, same as `\p{Emoji}`. Deprecated: Will be removed or renamed in the future.
|
154
|
-
|
155
166
|
## Usage – List
|
156
167
|
|
157
168
|
Use `Unicode::Emoji::LIST` or the **list** method to get a ordered and categorized list of Emoji:
|
data/data/generate_constants.rb
CHANGED
@@ -69,6 +69,8 @@ def pack_and_join(ords)
|
|
69
69
|
end
|
70
70
|
|
71
71
|
def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_component:, emoji_presentation:, text_presentation:, picto:, picto_no_emoji:)
|
72
|
+
visual_component = pack_and_join(VISUAL_COMPONENT)
|
73
|
+
|
72
74
|
emoji_presentation_sequence = \
|
73
75
|
join(
|
74
76
|
text_presentation + pack(EMOJI_VARIATION_SELECTOR),
|
@@ -78,6 +80,12 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
78
80
|
non_component_emoji_presentation_sequence = \
|
79
81
|
"(?!" + emoji_component + ")" + emoji_presentation_sequence
|
80
82
|
|
83
|
+
basic_emoji = \
|
84
|
+
join(
|
85
|
+
non_component_emoji_presentation_sequence,
|
86
|
+
visual_component,
|
87
|
+
)
|
88
|
+
|
81
89
|
text_keycap_sequence = \
|
82
90
|
pack_and_join(EMOJI_KEYCAPS) + pack(EMOJI_KEYCAP_SUFFIX)
|
83
91
|
|
@@ -169,6 +177,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
169
177
|
emoji_rgi_tag_sequence,
|
170
178
|
emoji_valid_flag_sequence,
|
171
179
|
emoji_core_sequence,
|
180
|
+
visual_component,
|
172
181
|
)
|
173
182
|
|
174
183
|
emoji_rgi_sequence_include_text = \
|
@@ -177,6 +186,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
177
186
|
emoji_rgi_tag_sequence,
|
178
187
|
emoji_valid_flag_sequence,
|
179
188
|
emoji_core_sequence,
|
189
|
+
visual_component,
|
180
190
|
text_emoji,
|
181
191
|
)
|
182
192
|
|
@@ -186,6 +196,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
186
196
|
emoji_rgi_tag_sequence,
|
187
197
|
emoji_valid_flag_sequence,
|
188
198
|
emoji_core_sequence,
|
199
|
+
visual_component,
|
189
200
|
)
|
190
201
|
|
191
202
|
emoji_rgi_include_mqe_uqe_sequence = \
|
@@ -195,6 +206,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
195
206
|
emoji_rgi_tag_sequence,
|
196
207
|
emoji_valid_flag_sequence,
|
197
208
|
emoji_core_sequence,
|
209
|
+
visual_component,
|
198
210
|
)
|
199
211
|
|
200
212
|
emoji_valid_sequence = \
|
@@ -203,6 +215,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
203
215
|
emoji_valid_tag_sequence,
|
204
216
|
emoji_valid_flag_sequence,
|
205
217
|
emoji_core_sequence,
|
218
|
+
visual_component,
|
206
219
|
)
|
207
220
|
|
208
221
|
emoji_valid_sequence_include_text = \
|
@@ -211,6 +224,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
211
224
|
emoji_valid_tag_sequence,
|
212
225
|
emoji_valid_flag_sequence,
|
213
226
|
emoji_core_sequence,
|
227
|
+
visual_component,
|
214
228
|
text_emoji,
|
215
229
|
)
|
216
230
|
|
@@ -220,6 +234,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
220
234
|
emoji_well_formed_tag_sequence,
|
221
235
|
emoji_well_formed_flag_sequence,
|
222
236
|
emoji_core_sequence,
|
237
|
+
visual_component,
|
223
238
|
)
|
224
239
|
|
225
240
|
emoji_well_formed_sequence_include_text = \
|
@@ -228,6 +243,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
228
243
|
emoji_well_formed_tag_sequence,
|
229
244
|
emoji_well_formed_flag_sequence,
|
230
245
|
emoji_core_sequence,
|
246
|
+
visual_component,
|
231
247
|
text_emoji,
|
232
248
|
)
|
233
249
|
|
@@ -279,19 +295,27 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
279
295
|
# See https://www.unicode.org/reports/tr51/#EBNF_and_Regex
|
280
296
|
regexes[:REGEX_POSSIBLE] = Regexp.compile(emoji_possible)
|
281
297
|
|
282
|
-
# Matches only basic single, non-textual emoji, ignores
|
283
|
-
regexes[:REGEX_BASIC] = Regexp.compile(
|
298
|
+
# Matches only basic single, non-textual emoji, ignores some components like simple digits
|
299
|
+
regexes[:REGEX_BASIC] = Regexp.compile(basic_emoji)
|
284
300
|
|
285
|
-
# Matches only basic single, textual emoji, ignores
|
301
|
+
# Matches only basic single, textual emoji, ignores components like modifiers or simple digits
|
286
302
|
regexes[:REGEX_TEXT] = Regexp.compile(text_emoji)
|
303
|
+
regexes[:REGEX_TEXT_PRESENTATION] = Regexp.compile(text_presentation)
|
287
304
|
|
288
|
-
#
|
289
|
-
regexes[:
|
305
|
+
# Export regexes for Emoji properties so they can be used with newer Unicode than Ruby's
|
306
|
+
regexes[:REGEX_PROP_EMOJI] = Regexp.compile(emoji_character)
|
307
|
+
regexes[:REGEX_PROP_MODIFIER] = Regexp.compile(emoji_modifier)
|
308
|
+
regexes[:REGEX_PROP_MODIFIER_BASE] = Regexp.compile(emoji_modifier_base)
|
309
|
+
regexes[:REGEX_PROP_COMPONENT] = Regexp.compile(emoji_component)
|
310
|
+
regexes[:REGEX_PROP_PRESENTATION] = Regexp.compile(emoji_presentation)
|
290
311
|
|
312
|
+
# Same goes for ExtendedPictographic
|
291
313
|
regexes[:REGEX_PICTO] = Regexp.compile(picto)
|
292
|
-
|
293
314
|
regexes[:REGEX_PICTO_NO_EMOJI] = Regexp.compile(picto_no_emoji)
|
294
315
|
|
316
|
+
# Emoji keycaps
|
317
|
+
regexes[:REGEX_EMOJI_KEYCAP] = Regexp.compile(emoji_keycap_sequence)
|
318
|
+
|
295
319
|
regexes
|
296
320
|
end
|
297
321
|
|
@@ -313,8 +337,8 @@ native_regexes = compile(
|
|
313
337
|
emoji_modifier_base: "\\p{EBase}",
|
314
338
|
emoji_component: "\\p{EComp}",
|
315
339
|
emoji_presentation: "\\p{EPres}",
|
316
|
-
text_presentation: "\\p{Emoji}
|
340
|
+
text_presentation: "[\\p{Emoji}&&\\P{EPres}]",
|
317
341
|
picto: "\\p{ExtPict}",
|
318
|
-
picto_no_emoji: "\\p{ExtPict}
|
342
|
+
picto_no_emoji: "[\\p{ExtPict}&&\\P{Emoji}]"
|
319
343
|
)
|
320
344
|
write_regexes(native_regexes, File.expand_path("../lib/unicode/emoji/generated_native", __dir__))
|
@@ -2,9 +2,9 @@
|
|
2
2
|
|
3
3
|
module Unicode
|
4
4
|
module Emoji
|
5
|
-
VERSION = "
|
5
|
+
VERSION = "4.0.4"
|
6
6
|
EMOJI_VERSION = "16.0"
|
7
|
-
CLDR_VERSION = "
|
7
|
+
CLDR_VERSION = "46"
|
8
8
|
DATA_DIRECTORY = File.expand_path('../../../data', __dir__).freeze
|
9
9
|
INDEX_FILENAME = (DATA_DIRECTORY + "/emoji.marshal.gz").freeze
|
10
10
|
|
@@ -41,5 +41,9 @@ module Unicode
|
|
41
41
|
|
42
42
|
# Two regional indicators make up a region
|
43
43
|
REGIONAL_INDICATORS = [*0x1F1E6..0x1F1FF].freeze
|
44
|
+
|
45
|
+
# The current list of Emoji components that should have a visual representation
|
46
|
+
# Currently skin tone modifiers + hair components
|
47
|
+
VISUAL_COMPONENT = [*0x1F3FB..0x1F3FF, *0x1F9B0..0x1F9B3].freeze
|
44
48
|
end
|
45
49
|
end
|