unicode-emoji 3.8.0 → 4.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rake_tasks +1 -0
- data/CHANGELOG.md +59 -33
- data/Gemfile.lock +2 -4
- data/README.md +32 -21
- data/data/generate_constants.rb +32 -8
- data/lib/unicode/emoji/constants.rb +6 -2
- data/lib/unicode/emoji/generated/regex.rb +1 -1
- data/lib/unicode/emoji/generated/regex_basic.rb +1 -1
- data/lib/unicode/emoji/generated/regex_emoji_keycap.rb +8 -0
- data/lib/unicode/emoji/generated/regex_include_mqe.rb +1 -1
- data/lib/unicode/emoji/generated/regex_include_mqe_uqe.rb +1 -1
- data/lib/unicode/emoji/generated/regex_include_text.rb +1 -1
- data/lib/unicode/emoji/generated/regex_prop_component.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_emoji.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_modifier.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_modifier_base.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_presentation.rb +8 -0
- data/lib/unicode/emoji/generated/regex_text_presentation.rb +8 -0
- data/lib/unicode/emoji/generated/regex_valid.rb +1 -1
- data/lib/unicode/emoji/generated/regex_valid_include_text.rb +1 -1
- data/lib/unicode/emoji/generated/regex_well_formed.rb +1 -1
- data/lib/unicode/emoji/generated/regex_well_formed_include_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_basic.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_emoji_keycap.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_include_mqe.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_include_mqe_uqe.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_include_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_picto_no_emoji.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_prop_component.rb +8 -0
- data/lib/unicode/emoji/generated_native/{regex_any.rb → regex_prop_emoji.rb} +1 -1
- data/lib/unicode/emoji/generated_native/regex_prop_modifier.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_prop_modifier_base.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_prop_presentation.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_text_presentation.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_valid.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_valid_include_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_well_formed.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_well_formed_include_text.rb +1 -1
- data/lib/unicode/emoji.rb +10 -5
- data/spec/unicode_emoji_spec.rb +97 -16
- data/unicode-emoji.gemspec +1 -3
- metadata +17 -21
- data/lib/unicode/emoji/generated/regex_any.rb +0 -8
- data/spec/data/emoji-test.txt +0 -5331
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3b08d6adaddfcbca3e754c9a52a0c0d5c772da86ca708affc9799ad113c5a005
|
4
|
+
data.tar.gz: e9f3817a215ef38b7933d69b4f0563d848a03f6a6b8728ecd06a74417fb5f8a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cedad0ceb5f1039be614bbca170cccc3f29e8f05bd7fc74714ed586ddf20edb5da25a6c8fd840acfb7dfbeb918ec6e2218cf427e5e510eb2406235253e32ad74
|
7
|
+
data.tar.gz: 4680b526737abd7491351ff87c5d323f3a6acf5996dffa4c2f4737e10bc8083da16928dbab34362b6014ae5a17863266eb456ab2d382f25350339eaf71a175bf
|
data/.rake_tasks
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,32 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
-
|
3
|
+
## 4.0.4
|
4
|
+
|
5
|
+
- Add `REGEX_TEXT_PRESENTATION` to be able to match for raw default-text Emoji codepoints
|
6
|
+
|
7
|
+
## 4.0.3
|
8
|
+
|
9
|
+
- Remove emoji-test.txt from Rubygems package
|
10
|
+
|
11
|
+
## 4.0.2
|
12
|
+
|
13
|
+
- Directly use `RbConfig::CONFIG["UNICODE_EMOJI_VERSION"]` to detect Ruby's Emoji version,
|
14
|
+
drop unicode-version dependency
|
15
|
+
|
16
|
+
## 4.0.0
|
17
|
+
|
18
|
+
- **Breaking change:** Regexes now include single skin tone modifiers (`🏻`) and hair components (`🦰`).
|
19
|
+
They were previously considered to be invalid partial Emoji, however since they are supposed to be
|
20
|
+
displayed as Emoji in isolation, they are now part of the regexes (see *ED-20* in UTS51).
|
21
|
+
- **Breaking change:** Drop `REGEX_ANY` in favor of `REGEX_PROP_EMOJI`
|
22
|
+
- Expose regexes for Emoji props (`REGEX_PROP_*`). The advantage over using the native regex properties
|
23
|
+
directly is that you will be able to use the Emoji support level of this gem instead of Ruby's.
|
24
|
+
For example, as of releasing this, the current Emoji version is 16.0, while Ruby is at 15.0.
|
25
|
+
Also see README for a table listing the regexes that match Emoji properties.
|
26
|
+
- Add `REGEX_EMOJI_KEYCAP` for matching specifically Emoji keycaps
|
27
|
+
- Use character class instead of lookbehind for native text emoji and non-emoji pictographic regexes
|
28
|
+
|
29
|
+
## 3.8.0
|
4
30
|
|
5
31
|
- Add new RGI-based regexes `REGEX_INCLUDE_MQE` and `REGEX_INCLUDE_MQE_UQE` which allows to match
|
6
32
|
for minimally-qualified and unqualified RGI sequences (Emoji that lack some VS16)
|
@@ -10,7 +36,7 @@
|
|
10
36
|
- Update CLDR to v46 (valid subdivisions)
|
11
37
|
- Further improvements (see commit log)
|
12
38
|
|
13
|
-
|
39
|
+
## 3.7.0
|
14
40
|
|
15
41
|
- Bump required Ruby slightly to 2.5
|
16
42
|
- Introduce new `REGEX_POSSIBLE` which contains the regex described in
|
@@ -23,46 +49,46 @@
|
|
23
49
|
- Separately autoload emoji list, so it can be loaded when other indexes
|
24
50
|
are not needed
|
25
51
|
|
26
|
-
|
52
|
+
## 3.6.0
|
27
53
|
|
28
54
|
- `Unicode::Emoji::REGEX_TEXT` now matches non-emoji keycaps like "3⃣" (U+0033 U+20E3)
|
29
55
|
- Minor refactorings
|
30
56
|
|
31
|
-
|
57
|
+
## 3.5.0
|
32
58
|
|
33
59
|
- Emoji 16.0
|
34
60
|
|
35
|
-
|
61
|
+
## 3.4.0
|
36
62
|
|
37
63
|
- Emoji 15.1
|
38
64
|
|
39
|
-
|
65
|
+
## 3.3.2
|
40
66
|
|
41
67
|
- Update valid subdivisions to CLDR 43 (no changes)
|
42
68
|
-> there won't be any new RGI subdivision flags in Emoji
|
43
69
|
|
44
|
-
|
70
|
+
## 3.3.1
|
45
71
|
|
46
72
|
- Update valid subdivisions to CLDR 42 (no changes)
|
47
73
|
|
48
|
-
|
74
|
+
## 3.3.0
|
49
75
|
|
50
76
|
- Emoji 15.0
|
51
77
|
|
52
|
-
|
78
|
+
## 3.2.0
|
53
79
|
|
54
80
|
- Update valid subdivisions to CLDR 41
|
55
81
|
|
56
|
-
|
82
|
+
## 3.1.1
|
57
83
|
|
58
84
|
- Fix `REGEX` to be able to match complete family emoji, instead of
|
59
85
|
sub-matching partial families, thanks @matt17r
|
60
86
|
|
61
|
-
|
87
|
+
## 3.1.0
|
62
88
|
|
63
89
|
- Update valid subdivisions to CLDR 40
|
64
90
|
|
65
|
-
|
91
|
+
## 3.0.0
|
66
92
|
|
67
93
|
- Vastly improve memory usage, patch by @radarek
|
68
94
|
- Emoji regexes are now pre-generated and bundled with the release
|
@@ -70,54 +96,54 @@
|
|
70
96
|
- Most constants (e.g. regexes) now get autoloaded
|
71
97
|
- See https://github.com/janlelis/unicode-emoji/pull/9 for more details
|
72
98
|
|
73
|
-
|
99
|
+
## 2.9.0
|
74
100
|
|
75
101
|
- Emoji 14.0
|
76
102
|
|
77
|
-
|
103
|
+
## 2.8.0
|
78
104
|
|
79
105
|
- Update valid subdivisions to CLDR 39
|
80
106
|
|
81
|
-
|
107
|
+
## 2.7.1
|
82
108
|
|
83
109
|
- Update valid subdivisions to CLDR 38.1
|
84
110
|
|
85
|
-
|
111
|
+
## 2.7.0
|
86
112
|
|
87
113
|
- Update valid subdivisions to CLDR 38
|
88
114
|
- Loosen Ruby dependency to allow Ruby 3.0
|
89
115
|
|
90
|
-
|
116
|
+
## 2.6.0
|
91
117
|
|
92
118
|
- Emoji 13.1
|
93
119
|
|
94
|
-
|
120
|
+
## 2.5.0
|
95
121
|
|
96
122
|
- Use native Emoji regex properties when current Ruby's Emoji support is the same as our current Emoji version
|
97
123
|
- Update valid subdivisions to CLDR 37
|
98
124
|
|
99
|
-
|
125
|
+
## 2.4.0
|
100
126
|
|
101
127
|
- Emoji 13.0
|
102
128
|
|
103
|
-
|
129
|
+
## 2.3.1
|
104
130
|
|
105
131
|
- Fix index to actually include Emoji 12.1
|
106
132
|
|
107
|
-
|
133
|
+
## 2.3.0
|
108
134
|
|
109
135
|
- Emoji 12.1
|
110
136
|
|
111
|
-
|
137
|
+
## 2.2.0
|
112
138
|
|
113
139
|
- Update subdivisions to CLDR 36
|
114
140
|
|
115
|
-
|
141
|
+
## 2.1.0
|
116
142
|
|
117
143
|
- Add `REGEX_PICTO` which matches codepoints with the **Extended_Pictographic** property
|
118
144
|
- Add `REGEX_PICTO_NO_EMOJI` which matches codepoints with the **Extended_Pictographic** property, but no **Emoji** property
|
119
145
|
|
120
|
-
|
146
|
+
## 2.0.0
|
121
147
|
|
122
148
|
- Emoji 12.0 data (including valid subdivisions)
|
123
149
|
- Introduce new `REGEX_WELL_FORMED` to be able to match for invalid tag and region sequences
|
@@ -126,40 +152,40 @@
|
|
126
152
|
- Issue warning when using `#list` method to retrieve outdated category
|
127
153
|
- Change matching for ZWJ sequences: Do not limit sequence to a maximum of 3 ZWJs
|
128
154
|
|
129
|
-
|
155
|
+
## 1.1.0
|
130
156
|
|
131
157
|
- Emoji 11.0
|
132
158
|
- Do not depend on rubygems (only use zlib stdlib for unzipping)
|
133
159
|
|
134
|
-
|
160
|
+
## 1.0.3
|
135
161
|
|
136
162
|
- Explicitly load rubygems/util, fixes regression in 1.2.1
|
137
163
|
|
138
|
-
|
164
|
+
## 1.0.2
|
139
165
|
|
140
166
|
- Use `Gem::Util` for `gunzip`, removes deprecation warning
|
141
167
|
|
142
|
-
|
168
|
+
## 1.0.1
|
143
169
|
|
144
170
|
- Actually set required Ruby version to 2.3 in gemspec
|
145
171
|
|
146
|
-
|
172
|
+
## 1.0.0
|
147
173
|
|
148
174
|
- Drop support for Ruby below 2.3, use 0.9 if you need to
|
149
175
|
- Internal refactorings, no API change
|
150
176
|
|
151
|
-
|
177
|
+
## 0.9.3
|
152
178
|
|
153
179
|
- Implement native Emoji regex matchers, but do not activate or document, yet
|
154
180
|
|
155
|
-
|
181
|
+
## 0.9.2
|
156
182
|
|
157
183
|
- REGEX_TEXT: Do not match if the text emoji is followed by a emoji modifier
|
158
184
|
|
159
|
-
|
185
|
+
## 0.9.1
|
160
186
|
|
161
187
|
- Include a categorized list of recommended Emoji
|
162
188
|
|
163
|
-
|
189
|
+
## 0.9.0
|
164
190
|
|
165
191
|
- Initial release (Emoji version 5.0)
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
unicode-emoji (
|
5
|
-
unicode-version (~> 1.0)
|
4
|
+
unicode-emoji (4.0.4)
|
6
5
|
|
7
6
|
GEM
|
8
7
|
remote: https://rubygems.org/
|
@@ -20,7 +19,6 @@ GEM
|
|
20
19
|
reline (0.3.8)
|
21
20
|
io-console (~> 0.5)
|
22
21
|
stringio (3.0.8)
|
23
|
-
unicode-version (1.3.0)
|
24
22
|
|
25
23
|
PLATFORMS
|
26
24
|
ruby
|
@@ -32,4 +30,4 @@ DEPENDENCIES
|
|
32
30
|
unicode-emoji!
|
33
31
|
|
34
32
|
BUNDLED WITH
|
35
|
-
2.
|
33
|
+
2.5.21
|
data/README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# Unicode::Emoji [![[version]](https://badge.fury.io/rb/unicode-emoji.svg)](https://badge.fury.io/rb/unicode-emoji) [![[ci]](https://github.com/janlelis/unicode-emoji/workflows/Test/badge.svg)](https://github.com/janlelis/unicode-emoji/actions?query=workflow%3ATest)
|
2
2
|
|
3
|
-
Provides regular expressions to
|
3
|
+
Provides various sophisticated regular expressions to work with Emoji in strings,
|
4
|
+
incorporating the latest Unicode / Emoji standards.
|
4
5
|
|
5
6
|
Additional features:
|
6
7
|
|
@@ -26,16 +27,17 @@ require "unicode/emoji"
|
|
26
27
|
|
27
28
|
string = "String which contains all types of Emoji sequences:
|
28
29
|
|
29
|
-
-
|
30
|
-
- Textual
|
30
|
+
- Basic Emoji: 😴
|
31
|
+
- Textual Emoji with Emoji variation (VS16): ▶️
|
31
32
|
- Emoji with skin tone modifier: 🛌🏽
|
32
33
|
- Region flag: 🇵🇹
|
33
34
|
- Sub-Region flag: 🏴
|
34
35
|
- Keycap sequence: 2️⃣
|
36
|
+
- Skin tone modifier: 🏻
|
35
37
|
- Sequence using ZWJ (zero width joiner): 🤾🏽♀️
|
36
38
|
"
|
37
39
|
|
38
|
-
string.scan(Unicode::Emoji::REGEX) # => ["😴", "▶️", "🛌🏽", "🇵🇹", "🏴", "2️⃣", "🤾🏽♀️"]
|
40
|
+
string.scan(Unicode::Emoji::REGEX) # => ["😴", "▶️", "🛌🏽", "🇵🇹", "🏴", "2️⃣", "🏻", "🤾🏽♀️"]
|
39
41
|
```
|
40
42
|
|
41
43
|
Depending on your exact usecase, you can choose between multiple levels of Emoji detection:
|
@@ -44,10 +46,10 @@ Depending on your exact usecase, you can choose between multiple levels of Emoji
|
|
44
46
|
|
45
47
|
Regex | Description | Example Matches | Example Non-Matches
|
46
48
|
------------------------------|-------------|-----------------|--------------------
|
47
|
-
`Unicode::Emoji::REGEX` | **Use this one if unsure!** Matches (non-textual)
|
48
|
-
`Unicode::Emoji::REGEX_VALID` | Matches (non-textual)
|
49
|
-
`Unicode::Emoji::REGEX_WELL_FORMED` | Matches (non-textual)
|
50
|
-
`Unicode::Emoji::REGEX_POSSIBLE` | Matches all singleton Emoji,
|
49
|
+
`Unicode::Emoji::REGEX` | **Use this one if unsure!** Matches (non-textual) Basic Emoji and all kinds of *recommended* Emoji sequences (RGI/FQE) | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `🏻` | `🤾🏽♀`, `🏌♂️`, `😴︎`, `▶`, `🇵🇵`, `🏴`, `🤠🤢`, `1`, `1⃣`
|
50
|
+
`Unicode::Emoji::REGEX_VALID` | Matches (non-textual) Basic Emoji and all kinds of *valid* Emoji sequences | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀` ,`🏌♂️`, `🤠🤢`, `🏻` | `😴︎`, `▶`, `🇵🇵`, `1`, `1⃣`
|
51
|
+
`Unicode::Emoji::REGEX_WELL_FORMED` | Matches (non-textual) Basic Emoji and all kinds of *well-formed* Emoji sequences | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`,`🏌♂️` , `🤠🤢`, `🇵🇵`, `🏻` | `😴︎`, `▶`, `1`, `1⃣`
|
52
|
+
`Unicode::Emoji::REGEX_POSSIBLE` | Matches all singleton Emoji, all kinds of Emoji sequences, and even non-Emoji singleton components like digits. Only exception: Unqualified keycap sequences are not matched | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `🇵🇵`, `😴︎`, `▶`, `🏻`, `1` | `1⃣`
|
51
53
|
|
52
54
|
#### Include Text Emoji
|
53
55
|
|
@@ -55,16 +57,16 @@ By default, textual Emoji (emoji characters with text variation selector or thos
|
|
55
57
|
|
56
58
|
Regex | Description | Example Matches | Example Non-Matches
|
57
59
|
------------------------------|-------------|-----------------|--------------------
|
58
|
-
`Unicode::Emoji::REGEX_INCLUDE_TEXT` | `REGEX` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `😴︎`, `▶`, `1⃣`
|
59
|
-
`Unicode::Emoji::REGEX_VALID_INCLUDE_TEXT` | `REGEX_VALID` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `😴︎`, `▶`, `1⃣` |
|
60
|
-
`Unicode::Emoji::REGEX_WELL_FORMED_INCLUDE_TEXT` | `REGEX_WELL_FORMED` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `🇵🇵`, `😴︎`, `▶`, `1⃣` |
|
60
|
+
`Unicode::Emoji::REGEX_INCLUDE_TEXT` | `REGEX` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `😴︎`, `▶`, `1⃣` , `🏻`| `🤾🏽♀`, `🏌♂️`, `🇵🇵`, `🏴`, `🤠🤢`, `1`
|
61
|
+
`Unicode::Emoji::REGEX_VALID_INCLUDE_TEXT` | `REGEX_VALID` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `😴︎`, `▶`, `1⃣` , `🏻` | `🇵🇵`, `1`
|
62
|
+
`Unicode::Emoji::REGEX_WELL_FORMED_INCLUDE_TEXT` | `REGEX_WELL_FORMED` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `🇵🇵`, `😴︎`, `▶`, `1⃣` , `🏻` | `1`
|
61
63
|
|
62
64
|
#### Minimally-qualified and Unqualified Sequences
|
63
65
|
|
64
66
|
Regex | Description | Example Matches | Example Non-Matches
|
65
67
|
------------------------------|-------------|-----------------|--------------------
|
66
|
-
`Unicode::Emoji::REGEX_INCLUDE_MQE` | Like `REGEX`, but additionally includes Emoji with missing Emoji Presentation Variation Selectors, where the first partial Emoji has all required Variation Selectors | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`,
|
67
|
-
`Unicode::Emoji::REGEX_INCLUDE_MQE_UQE` | Like `REGEX`, but additionally includes Emoji with missing Emoji Presentation Variation Selectors | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`,
|
68
|
+
`Unicode::Emoji::REGEX_INCLUDE_MQE` | Like `REGEX`, but additionally includes Emoji with missing Emoji Presentation Variation Selectors, where the first partial Emoji has all required Variation Selectors | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏻` | `🏌♂️`, `😴︎`, `▶`, `🇵🇵`, `🏴`, `🤠🤢`, `1`, `1⃣`
|
69
|
+
`Unicode::Emoji::REGEX_INCLUDE_MQE_UQE` | Like `REGEX`, but additionally includes Emoji with missing Emoji Presentation Variation Selectors | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🏻` | `😴︎`, `▶`, `🇵🇵`, `🏴`, `🤠🤢`, `1`, `1⃣`
|
68
70
|
|
69
71
|
[List of MQE and UQE Emoji sequences](https://character.construction/unqualified-emoji)
|
70
72
|
|
@@ -74,10 +76,10 @@ Matches only simple one-codepoint (+ optional variation selector) Emoji:
|
|
74
76
|
|
75
77
|
Regex | Description | Example Matches | Example Non-Matches
|
76
78
|
------------------------------|-------------|-----------------|--------------------
|
77
|
-
`Unicode::Emoji::REGEX_BASIC` | Matches (non-textual)
|
78
|
-
`Unicode::Emoji::REGEX_TEXT` | Matches only textual singleton Emoji
|
79
|
+
`Unicode::Emoji::REGEX_BASIC` | Matches (non-textual) Basic Emoji, but no sequences at all | `😴`, `▶️`, `🏻` | `😴︎`, `▶`, `🛌🏽`, `🇵🇹`, `🇵🇵`,`2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `1`
|
80
|
+
`Unicode::Emoji::REGEX_TEXT` | Matches only textual singleton Emoji | `😴︎`, `▶` | `😴`, `▶️`, `🏻`, `🛌🏽`, `🇵🇹`, `🇵🇵`,`2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `1`
|
79
81
|
|
80
|
-
Here is a list of all Emoji that can be matched using the two regexes: [character.construction/emoji-vs-text](https://character.construction/emoji-vs-text)
|
82
|
+
Here is a list of all Emoji that can be matched using the two regexes: [character.construction/emoji-vs-text](https://character.construction/emoji-vs-text). The `REGEX_BASIC` regex also matches [visual Emoji components](https://character.construction/emoji-components) (skin tone modifiers and hair components).
|
81
83
|
|
82
84
|
While `REGEX_BASIC` is part of the above regexes, `REGEX_TEXT` is only included in the `*_INCLUDE_TEXT` or `*_UQE` variants.
|
83
85
|
|
@@ -140,7 +142,20 @@ Please see [the standard](https://www.unicode.org/reports/tr51/#Emoji_Sets) for
|
|
140
142
|
|
141
143
|
More info about valid vs. recommended Emoji can also be found in this [blog article on Emojipedia](https://blog.emojipedia.org/unicode-behind-the-curtain/).
|
142
144
|
|
143
|
-
###
|
145
|
+
### Emoji Property Regexes
|
146
|
+
|
147
|
+
Ruby includes native regex Emoji properties, as listed in the following table. You can also opt-in to use the `*_PROP_*` regexes to get the Emoji support level of this gem (instead of Ruby's).
|
148
|
+
|
149
|
+
Gem Regex (`Unicode::Emoji`'s Emoji support level) | Native Regex (Ruby's Emoji support level)
|
150
|
+
---------------------------------------------------|------------------------------------------
|
151
|
+
`Unicode::Emoji::REGEX_PROP_EMOJI` | `/\p{Emoji}/`
|
152
|
+
`Unicode::Emoji::REGEX_PROP_MODIFIER` | `/\p{EMod}/`
|
153
|
+
`Unicode::Emoji::REGEX_PROP_MODIFIER_BASE` | `/\p{EBase}/`
|
154
|
+
`Unicode::Emoji::REGEX_PROP_COMPONENT` | `/\p{EComp}/`
|
155
|
+
`Unicode::Emoji::REGEX_PROP_PRESENTATION` | `/\p{EPres}/`
|
156
|
+
`Unicode::Emoji::REGEX_TEXT_PRESENTATION` | `/[\p{Emoji}&&\P{EPres}]/`
|
157
|
+
|
158
|
+
#### Extended Pictographic Regex
|
144
159
|
|
145
160
|
`Unicode::Emoji::REGEX_PICTO` matches single codepoints with the **Extended_Pictographic** property. For example, it will match `✀` BLACK SAFETY SCISSORS.
|
146
161
|
|
@@ -148,10 +163,6 @@ More info about valid vs. recommended Emoji can also be found in this [blog arti
|
|
148
163
|
|
149
164
|
See [character.construction/picto](https://character.construction/picto) for a list of all non-Emoji pictographic characters.
|
150
165
|
|
151
|
-
### Partial Regexes
|
152
|
-
|
153
|
-
`Unicode::Emoji::REGEX_ANY`, same as `\p{Emoji}`. Deprecated: Will be removed or renamed in the future.
|
154
|
-
|
155
166
|
## Usage – List
|
156
167
|
|
157
168
|
Use `Unicode::Emoji::LIST` or the **list** method to get a ordered and categorized list of Emoji:
|
data/data/generate_constants.rb
CHANGED
@@ -69,6 +69,8 @@ def pack_and_join(ords)
|
|
69
69
|
end
|
70
70
|
|
71
71
|
def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_component:, emoji_presentation:, text_presentation:, picto:, picto_no_emoji:)
|
72
|
+
visual_component = pack_and_join(VISUAL_COMPONENT)
|
73
|
+
|
72
74
|
emoji_presentation_sequence = \
|
73
75
|
join(
|
74
76
|
text_presentation + pack(EMOJI_VARIATION_SELECTOR),
|
@@ -78,6 +80,12 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
78
80
|
non_component_emoji_presentation_sequence = \
|
79
81
|
"(?!" + emoji_component + ")" + emoji_presentation_sequence
|
80
82
|
|
83
|
+
basic_emoji = \
|
84
|
+
join(
|
85
|
+
non_component_emoji_presentation_sequence,
|
86
|
+
visual_component,
|
87
|
+
)
|
88
|
+
|
81
89
|
text_keycap_sequence = \
|
82
90
|
pack_and_join(EMOJI_KEYCAPS) + pack(EMOJI_KEYCAP_SUFFIX)
|
83
91
|
|
@@ -169,6 +177,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
169
177
|
emoji_rgi_tag_sequence,
|
170
178
|
emoji_valid_flag_sequence,
|
171
179
|
emoji_core_sequence,
|
180
|
+
visual_component,
|
172
181
|
)
|
173
182
|
|
174
183
|
emoji_rgi_sequence_include_text = \
|
@@ -177,6 +186,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
177
186
|
emoji_rgi_tag_sequence,
|
178
187
|
emoji_valid_flag_sequence,
|
179
188
|
emoji_core_sequence,
|
189
|
+
visual_component,
|
180
190
|
text_emoji,
|
181
191
|
)
|
182
192
|
|
@@ -186,6 +196,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
186
196
|
emoji_rgi_tag_sequence,
|
187
197
|
emoji_valid_flag_sequence,
|
188
198
|
emoji_core_sequence,
|
199
|
+
visual_component,
|
189
200
|
)
|
190
201
|
|
191
202
|
emoji_rgi_include_mqe_uqe_sequence = \
|
@@ -195,6 +206,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
195
206
|
emoji_rgi_tag_sequence,
|
196
207
|
emoji_valid_flag_sequence,
|
197
208
|
emoji_core_sequence,
|
209
|
+
visual_component,
|
198
210
|
)
|
199
211
|
|
200
212
|
emoji_valid_sequence = \
|
@@ -203,6 +215,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
203
215
|
emoji_valid_tag_sequence,
|
204
216
|
emoji_valid_flag_sequence,
|
205
217
|
emoji_core_sequence,
|
218
|
+
visual_component,
|
206
219
|
)
|
207
220
|
|
208
221
|
emoji_valid_sequence_include_text = \
|
@@ -211,6 +224,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
211
224
|
emoji_valid_tag_sequence,
|
212
225
|
emoji_valid_flag_sequence,
|
213
226
|
emoji_core_sequence,
|
227
|
+
visual_component,
|
214
228
|
text_emoji,
|
215
229
|
)
|
216
230
|
|
@@ -220,6 +234,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
220
234
|
emoji_well_formed_tag_sequence,
|
221
235
|
emoji_well_formed_flag_sequence,
|
222
236
|
emoji_core_sequence,
|
237
|
+
visual_component,
|
223
238
|
)
|
224
239
|
|
225
240
|
emoji_well_formed_sequence_include_text = \
|
@@ -228,6 +243,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
228
243
|
emoji_well_formed_tag_sequence,
|
229
244
|
emoji_well_formed_flag_sequence,
|
230
245
|
emoji_core_sequence,
|
246
|
+
visual_component,
|
231
247
|
text_emoji,
|
232
248
|
)
|
233
249
|
|
@@ -279,19 +295,27 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
279
295
|
# See https://www.unicode.org/reports/tr51/#EBNF_and_Regex
|
280
296
|
regexes[:REGEX_POSSIBLE] = Regexp.compile(emoji_possible)
|
281
297
|
|
282
|
-
# Matches only basic single, non-textual emoji, ignores
|
283
|
-
regexes[:REGEX_BASIC] = Regexp.compile(
|
298
|
+
# Matches only basic single, non-textual emoji, ignores some components like simple digits
|
299
|
+
regexes[:REGEX_BASIC] = Regexp.compile(basic_emoji)
|
284
300
|
|
285
|
-
# Matches only basic single, textual emoji, ignores
|
301
|
+
# Matches only basic single, textual emoji, ignores components like modifiers or simple digits
|
286
302
|
regexes[:REGEX_TEXT] = Regexp.compile(text_emoji)
|
303
|
+
regexes[:REGEX_TEXT_PRESENTATION] = Regexp.compile(text_presentation)
|
287
304
|
|
288
|
-
#
|
289
|
-
regexes[:
|
305
|
+
# Export regexes for Emoji properties so they can be used with newer Unicode than Ruby's
|
306
|
+
regexes[:REGEX_PROP_EMOJI] = Regexp.compile(emoji_character)
|
307
|
+
regexes[:REGEX_PROP_MODIFIER] = Regexp.compile(emoji_modifier)
|
308
|
+
regexes[:REGEX_PROP_MODIFIER_BASE] = Regexp.compile(emoji_modifier_base)
|
309
|
+
regexes[:REGEX_PROP_COMPONENT] = Regexp.compile(emoji_component)
|
310
|
+
regexes[:REGEX_PROP_PRESENTATION] = Regexp.compile(emoji_presentation)
|
290
311
|
|
312
|
+
# Same goes for ExtendedPictographic
|
291
313
|
regexes[:REGEX_PICTO] = Regexp.compile(picto)
|
292
|
-
|
293
314
|
regexes[:REGEX_PICTO_NO_EMOJI] = Regexp.compile(picto_no_emoji)
|
294
315
|
|
316
|
+
# Emoji keycaps
|
317
|
+
regexes[:REGEX_EMOJI_KEYCAP] = Regexp.compile(emoji_keycap_sequence)
|
318
|
+
|
295
319
|
regexes
|
296
320
|
end
|
297
321
|
|
@@ -313,8 +337,8 @@ native_regexes = compile(
|
|
313
337
|
emoji_modifier_base: "\\p{EBase}",
|
314
338
|
emoji_component: "\\p{EComp}",
|
315
339
|
emoji_presentation: "\\p{EPres}",
|
316
|
-
text_presentation: "\\p{Emoji}
|
340
|
+
text_presentation: "[\\p{Emoji}&&\\P{EPres}]",
|
317
341
|
picto: "\\p{ExtPict}",
|
318
|
-
picto_no_emoji: "\\p{ExtPict}
|
342
|
+
picto_no_emoji: "[\\p{ExtPict}&&\\P{Emoji}]"
|
319
343
|
)
|
320
344
|
write_regexes(native_regexes, File.expand_path("../lib/unicode/emoji/generated_native", __dir__))
|
@@ -2,9 +2,9 @@
|
|
2
2
|
|
3
3
|
module Unicode
|
4
4
|
module Emoji
|
5
|
-
VERSION = "
|
5
|
+
VERSION = "4.0.4"
|
6
6
|
EMOJI_VERSION = "16.0"
|
7
|
-
CLDR_VERSION = "
|
7
|
+
CLDR_VERSION = "46"
|
8
8
|
DATA_DIRECTORY = File.expand_path('../../../data', __dir__).freeze
|
9
9
|
INDEX_FILENAME = (DATA_DIRECTORY + "/emoji.marshal.gz").freeze
|
10
10
|
|
@@ -41,5 +41,9 @@ module Unicode
|
|
41
41
|
|
42
42
|
# Two regional indicators make up a region
|
43
43
|
REGIONAL_INDICATORS = [*0x1F1E6..0x1F1FF].freeze
|
44
|
+
|
45
|
+
# The current list of Emoji components that should have a visual representation
|
46
|
+
# Currently skin tone modifiers + hair components
|
47
|
+
VISUAL_COMPONENT = [*0x1F3FB..0x1F3FF, *0x1F9B0..0x1F9B3].freeze
|
44
48
|
end
|
45
49
|
end
|