unicode-emoji 3.8.0 → 4.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rake_tasks +1 -0
- data/CHANGELOG.md +55 -33
- data/Gemfile.lock +2 -4
- data/README.md +31 -21
- data/data/generate_constants.rb +31 -8
- data/lib/unicode/emoji/constants.rb +6 -2
- data/lib/unicode/emoji/generated/regex.rb +1 -1
- data/lib/unicode/emoji/generated/regex_basic.rb +1 -1
- data/lib/unicode/emoji/generated/regex_emoji_keycap.rb +8 -0
- data/lib/unicode/emoji/generated/regex_include_mqe.rb +1 -1
- data/lib/unicode/emoji/generated/regex_include_mqe_uqe.rb +1 -1
- data/lib/unicode/emoji/generated/regex_include_text.rb +1 -1
- data/lib/unicode/emoji/generated/regex_prop_component.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_emoji.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_modifier.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_modifier_base.rb +8 -0
- data/lib/unicode/emoji/generated/regex_prop_presentation.rb +8 -0
- data/lib/unicode/emoji/generated/regex_valid.rb +1 -1
- data/lib/unicode/emoji/generated/regex_valid_include_text.rb +1 -1
- data/lib/unicode/emoji/generated/regex_well_formed.rb +1 -1
- data/lib/unicode/emoji/generated/regex_well_formed_include_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_basic.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_emoji_keycap.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_include_mqe.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_include_mqe_uqe.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_include_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_picto_no_emoji.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_prop_component.rb +8 -0
- data/lib/unicode/emoji/generated_native/{regex_any.rb → regex_prop_emoji.rb} +1 -1
- data/lib/unicode/emoji/generated_native/regex_prop_modifier.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_prop_modifier_base.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_prop_presentation.rb +8 -0
- data/lib/unicode/emoji/generated_native/regex_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_valid.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_valid_include_text.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_well_formed.rb +1 -1
- data/lib/unicode/emoji/generated_native/regex_well_formed_include_text.rb +1 -1
- data/lib/unicode/emoji.rb +9 -5
- data/spec/unicode_emoji_spec.rb +97 -16
- data/unicode-emoji.gemspec +1 -3
- metadata +15 -21
- data/lib/unicode/emoji/generated/regex_any.rb +0 -8
- data/spec/data/emoji-test.txt +0 -5331
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6380c3c0ad44728037efba8733373e0c2b923b959239544cb0efda1ac595815d
|
|
4
|
+
data.tar.gz: 9c45c4d5ba6134933472459e8223657f23aa779fb0e2d0bc68822da3a8b726e3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d5ec122b5aad377bf3e4f17b11d9428a4eaaeaaa3245b2302cfed5befdb40a53e7b549c08e215c9eb43c445f5dd1cb69ac000343ecc7fc88473cb3a1475aa6e3
|
|
7
|
+
data.tar.gz: 36427662e0a3d911884d45d7d7d0ab3313449598deda482a22dd64c99c3720ef12a16aecb7f3f446913e591cea9517b7a312bcc8731a74f06ff99498a973d070
|
data/.rake_tasks
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,28 @@
|
|
|
1
1
|
# CHANGELOG
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
## 4.0.3
|
|
4
|
+
|
|
5
|
+
- Remove emoji-test.txt from Rubygems package
|
|
6
|
+
|
|
7
|
+
## 4.0.2
|
|
8
|
+
|
|
9
|
+
- Directly use `RbConfig::CONFIG["UNICODE_EMOJI_VERSION"]` to detect Ruby's Emoji version,
|
|
10
|
+
drop unicode-version dependency
|
|
11
|
+
|
|
12
|
+
## 4.0.0
|
|
13
|
+
|
|
14
|
+
- **Breaking change:** Regexes now include single skin tone modifiers (`🏻`) and hair components (`🦰`).
|
|
15
|
+
They were previously considered to be invalid partial Emoji, however since they are supposed to be
|
|
16
|
+
displayed as Emoji in isolation, they are now part of the regexes (see *ED-20* in UTS51).
|
|
17
|
+
- **Breaking change:** Drop `REGEX_ANY` in favor of `REGEX_PROP_EMOJI`
|
|
18
|
+
- Expose regexes for Emoji props (`REGEX_PROP_*`). The advantage over using the native regex properties
|
|
19
|
+
directly is that you will be able to use the Emoji support level of this gem instead of Ruby's.
|
|
20
|
+
For example, as of releasing this, the current Emoji version is 16.0, while Ruby is at 15.0.
|
|
21
|
+
Also see README for a table listing the regexes that match Emoji properties.
|
|
22
|
+
- Add `REGEX_EMOJI_KEYCAP` for matching specifically Emoji keycaps
|
|
23
|
+
- Use character class instead of lookbehind for native text emoji and non-emoji pictographic regexes
|
|
24
|
+
|
|
25
|
+
## 3.8.0
|
|
4
26
|
|
|
5
27
|
- Add new RGI-based regexes `REGEX_INCLUDE_MQE` and `REGEX_INCLUDE_MQE_UQE` which allows to match
|
|
6
28
|
for minimally-qualified and unqualified RGI sequences (Emoji that lack some VS16)
|
|
@@ -10,7 +32,7 @@
|
|
|
10
32
|
- Update CLDR to v46 (valid subdivisions)
|
|
11
33
|
- Further improvements (see commit log)
|
|
12
34
|
|
|
13
|
-
|
|
35
|
+
## 3.7.0
|
|
14
36
|
|
|
15
37
|
- Bump required Ruby slightly to 2.5
|
|
16
38
|
- Introduce new `REGEX_POSSIBLE` which contains the regex described in
|
|
@@ -23,46 +45,46 @@
|
|
|
23
45
|
- Separately autoload emoji list, so it can be loaded when other indexes
|
|
24
46
|
are not needed
|
|
25
47
|
|
|
26
|
-
|
|
48
|
+
## 3.6.0
|
|
27
49
|
|
|
28
50
|
- `Unicode::Emoji::REGEX_TEXT` now matches non-emoji keycaps like "3⃣" (U+0033 U+20E3)
|
|
29
51
|
- Minor refactorings
|
|
30
52
|
|
|
31
|
-
|
|
53
|
+
## 3.5.0
|
|
32
54
|
|
|
33
55
|
- Emoji 16.0
|
|
34
56
|
|
|
35
|
-
|
|
57
|
+
## 3.4.0
|
|
36
58
|
|
|
37
59
|
- Emoji 15.1
|
|
38
60
|
|
|
39
|
-
|
|
61
|
+
## 3.3.2
|
|
40
62
|
|
|
41
63
|
- Update valid subdivisions to CLDR 43 (no changes)
|
|
42
64
|
-> there won't be any new RGI subdivision flags in Emoji
|
|
43
65
|
|
|
44
|
-
|
|
66
|
+
## 3.3.1
|
|
45
67
|
|
|
46
68
|
- Update valid subdivisions to CLDR 42 (no changes)
|
|
47
69
|
|
|
48
|
-
|
|
70
|
+
## 3.3.0
|
|
49
71
|
|
|
50
72
|
- Emoji 15.0
|
|
51
73
|
|
|
52
|
-
|
|
74
|
+
## 3.2.0
|
|
53
75
|
|
|
54
76
|
- Update valid subdivisions to CLDR 41
|
|
55
77
|
|
|
56
|
-
|
|
78
|
+
## 3.1.1
|
|
57
79
|
|
|
58
80
|
- Fix `REGEX` to be able to match complete family emoji, instead of
|
|
59
81
|
sub-matching partial families, thanks @matt17r
|
|
60
82
|
|
|
61
|
-
|
|
83
|
+
## 3.1.0
|
|
62
84
|
|
|
63
85
|
- Update valid subdivisions to CLDR 40
|
|
64
86
|
|
|
65
|
-
|
|
87
|
+
## 3.0.0
|
|
66
88
|
|
|
67
89
|
- Vastly improve memory usage, patch by @radarek
|
|
68
90
|
- Emoji regexes are now pre-generated and bundled with the release
|
|
@@ -70,54 +92,54 @@
|
|
|
70
92
|
- Most constants (e.g. regexes) now get autoloaded
|
|
71
93
|
- See https://github.com/janlelis/unicode-emoji/pull/9 for more details
|
|
72
94
|
|
|
73
|
-
|
|
95
|
+
## 2.9.0
|
|
74
96
|
|
|
75
97
|
- Emoji 14.0
|
|
76
98
|
|
|
77
|
-
|
|
99
|
+
## 2.8.0
|
|
78
100
|
|
|
79
101
|
- Update valid subdivisions to CLDR 39
|
|
80
102
|
|
|
81
|
-
|
|
103
|
+
## 2.7.1
|
|
82
104
|
|
|
83
105
|
- Update valid subdivisions to CLDR 38.1
|
|
84
106
|
|
|
85
|
-
|
|
107
|
+
## 2.7.0
|
|
86
108
|
|
|
87
109
|
- Update valid subdivisions to CLDR 38
|
|
88
110
|
- Loosen Ruby dependency to allow Ruby 3.0
|
|
89
111
|
|
|
90
|
-
|
|
112
|
+
## 2.6.0
|
|
91
113
|
|
|
92
114
|
- Emoji 13.1
|
|
93
115
|
|
|
94
|
-
|
|
116
|
+
## 2.5.0
|
|
95
117
|
|
|
96
118
|
- Use native Emoji regex properties when current Ruby's Emoji support is the same as our current Emoji version
|
|
97
119
|
- Update valid subdivisions to CLDR 37
|
|
98
120
|
|
|
99
|
-
|
|
121
|
+
## 2.4.0
|
|
100
122
|
|
|
101
123
|
- Emoji 13.0
|
|
102
124
|
|
|
103
|
-
|
|
125
|
+
## 2.3.1
|
|
104
126
|
|
|
105
127
|
- Fix index to actually include Emoji 12.1
|
|
106
128
|
|
|
107
|
-
|
|
129
|
+
## 2.3.0
|
|
108
130
|
|
|
109
131
|
- Emoji 12.1
|
|
110
132
|
|
|
111
|
-
|
|
133
|
+
## 2.2.0
|
|
112
134
|
|
|
113
135
|
- Update subdivisions to CLDR 36
|
|
114
136
|
|
|
115
|
-
|
|
137
|
+
## 2.1.0
|
|
116
138
|
|
|
117
139
|
- Add `REGEX_PICTO` which matches codepoints with the **Extended_Pictographic** property
|
|
118
140
|
- Add `REGEX_PICTO_NO_EMOJI` which matches codepoints with the **Extended_Pictographic** property, but no **Emoji** property
|
|
119
141
|
|
|
120
|
-
|
|
142
|
+
## 2.0.0
|
|
121
143
|
|
|
122
144
|
- Emoji 12.0 data (including valid subdivisions)
|
|
123
145
|
- Introduce new `REGEX_WELL_FORMED` to be able to match for invalid tag and region sequences
|
|
@@ -126,40 +148,40 @@
|
|
|
126
148
|
- Issue warning when using `#list` method to retrieve outdated category
|
|
127
149
|
- Change matching for ZWJ sequences: Do not limit sequence to a maximum of 3 ZWJs
|
|
128
150
|
|
|
129
|
-
|
|
151
|
+
## 1.1.0
|
|
130
152
|
|
|
131
153
|
- Emoji 11.0
|
|
132
154
|
- Do not depend on rubygems (only use zlib stdlib for unzipping)
|
|
133
155
|
|
|
134
|
-
|
|
156
|
+
## 1.0.3
|
|
135
157
|
|
|
136
158
|
- Explicitly load rubygems/util, fixes regression in 1.2.1
|
|
137
159
|
|
|
138
|
-
|
|
160
|
+
## 1.0.2
|
|
139
161
|
|
|
140
162
|
- Use `Gem::Util` for `gunzip`, removes deprecation warning
|
|
141
163
|
|
|
142
|
-
|
|
164
|
+
## 1.0.1
|
|
143
165
|
|
|
144
166
|
- Actually set required Ruby version to 2.3 in gemspec
|
|
145
167
|
|
|
146
|
-
|
|
168
|
+
## 1.0.0
|
|
147
169
|
|
|
148
170
|
- Drop support for Ruby below 2.3, use 0.9 if you need to
|
|
149
171
|
- Internal refactorings, no API change
|
|
150
172
|
|
|
151
|
-
|
|
173
|
+
## 0.9.3
|
|
152
174
|
|
|
153
175
|
- Implement native Emoji regex matchers, but do not activate or document, yet
|
|
154
176
|
|
|
155
|
-
|
|
177
|
+
## 0.9.2
|
|
156
178
|
|
|
157
179
|
- REGEX_TEXT: Do not match if the text emoji is followed by a emoji modifier
|
|
158
180
|
|
|
159
|
-
|
|
181
|
+
## 0.9.1
|
|
160
182
|
|
|
161
183
|
- Include a categorized list of recommended Emoji
|
|
162
184
|
|
|
163
|
-
|
|
185
|
+
## 0.9.0
|
|
164
186
|
|
|
165
187
|
- Initial release (Emoji version 5.0)
|
data/Gemfile.lock
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
unicode-emoji (
|
|
5
|
-
unicode-version (~> 1.0)
|
|
4
|
+
unicode-emoji (4.0.3)
|
|
6
5
|
|
|
7
6
|
GEM
|
|
8
7
|
remote: https://rubygems.org/
|
|
@@ -20,7 +19,6 @@ GEM
|
|
|
20
19
|
reline (0.3.8)
|
|
21
20
|
io-console (~> 0.5)
|
|
22
21
|
stringio (3.0.8)
|
|
23
|
-
unicode-version (1.3.0)
|
|
24
22
|
|
|
25
23
|
PLATFORMS
|
|
26
24
|
ruby
|
|
@@ -32,4 +30,4 @@ DEPENDENCIES
|
|
|
32
30
|
unicode-emoji!
|
|
33
31
|
|
|
34
32
|
BUNDLED WITH
|
|
35
|
-
2.
|
|
33
|
+
2.5.21
|
data/README.md
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# Unicode::Emoji [![[version]](https://badge.fury.io/rb/unicode-emoji.svg)](https://badge.fury.io/rb/unicode-emoji) [![[ci]](https://github.com/janlelis/unicode-emoji/workflows/Test/badge.svg)](https://github.com/janlelis/unicode-emoji/actions?query=workflow%3ATest)
|
|
2
2
|
|
|
3
|
-
Provides regular expressions to
|
|
3
|
+
Provides various sophisticated regular expressions to work with Emoji in strings,
|
|
4
|
+
incorporating the latest Unicode / Emoji standards.
|
|
4
5
|
|
|
5
6
|
Additional features:
|
|
6
7
|
|
|
@@ -26,16 +27,17 @@ require "unicode/emoji"
|
|
|
26
27
|
|
|
27
28
|
string = "String which contains all types of Emoji sequences:
|
|
28
29
|
|
|
29
|
-
-
|
|
30
|
-
- Textual
|
|
30
|
+
- Basic Emoji: 😴
|
|
31
|
+
- Textual Emoji with Emoji variation (VS16): ▶️
|
|
31
32
|
- Emoji with skin tone modifier: 🛌🏽
|
|
32
33
|
- Region flag: 🇵🇹
|
|
33
34
|
- Sub-Region flag: 🏴
|
|
34
35
|
- Keycap sequence: 2️⃣
|
|
36
|
+
- Skin tone modifier: 🏻
|
|
35
37
|
- Sequence using ZWJ (zero width joiner): 🤾🏽♀️
|
|
36
38
|
"
|
|
37
39
|
|
|
38
|
-
string.scan(Unicode::Emoji::REGEX) # => ["😴", "▶️", "🛌🏽", "🇵🇹", "🏴", "2️⃣", "🤾🏽♀️"]
|
|
40
|
+
string.scan(Unicode::Emoji::REGEX) # => ["😴", "▶️", "🛌🏽", "🇵🇹", "🏴", "2️⃣", "🏻", "🤾🏽♀️"]
|
|
39
41
|
```
|
|
40
42
|
|
|
41
43
|
Depending on your exact usecase, you can choose between multiple levels of Emoji detection:
|
|
@@ -44,10 +46,10 @@ Depending on your exact usecase, you can choose between multiple levels of Emoji
|
|
|
44
46
|
|
|
45
47
|
Regex | Description | Example Matches | Example Non-Matches
|
|
46
48
|
------------------------------|-------------|-----------------|--------------------
|
|
47
|
-
`Unicode::Emoji::REGEX` | **Use this one if unsure!** Matches (non-textual)
|
|
48
|
-
`Unicode::Emoji::REGEX_VALID` | Matches (non-textual)
|
|
49
|
-
`Unicode::Emoji::REGEX_WELL_FORMED` | Matches (non-textual)
|
|
50
|
-
`Unicode::Emoji::REGEX_POSSIBLE` | Matches all singleton Emoji,
|
|
49
|
+
`Unicode::Emoji::REGEX` | **Use this one if unsure!** Matches (non-textual) Basic Emoji and all kinds of *recommended* Emoji sequences (RGI/FQE) | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `🏻` | `🤾🏽♀`, `🏌♂️`, `😴︎`, `▶`, `🇵🇵`, `🏴`, `🤠🤢`, `1`, `1⃣`
|
|
50
|
+
`Unicode::Emoji::REGEX_VALID` | Matches (non-textual) Basic Emoji and all kinds of *valid* Emoji sequences | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀` ,`🏌♂️`, `🤠🤢`, `🏻` | `😴︎`, `▶`, `🇵🇵`, `1`, `1⃣`
|
|
51
|
+
`Unicode::Emoji::REGEX_WELL_FORMED` | Matches (non-textual) Basic Emoji and all kinds of *well-formed* Emoji sequences | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`,`🏌♂️` , `🤠🤢`, `🇵🇵`, `🏻` | `😴︎`, `▶`, `1`, `1⃣`
|
|
52
|
+
`Unicode::Emoji::REGEX_POSSIBLE` | Matches all singleton Emoji, all kinds of Emoji sequences, and even non-Emoji singleton components like digits. Only exception: Unqualified keycap sequences are not matched | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `🇵🇵`, `😴︎`, `▶`, `🏻`, `1` | `1⃣`
|
|
51
53
|
|
|
52
54
|
#### Include Text Emoji
|
|
53
55
|
|
|
@@ -55,16 +57,16 @@ By default, textual Emoji (emoji characters with text variation selector or thos
|
|
|
55
57
|
|
|
56
58
|
Regex | Description | Example Matches | Example Non-Matches
|
|
57
59
|
------------------------------|-------------|-----------------|--------------------
|
|
58
|
-
`Unicode::Emoji::REGEX_INCLUDE_TEXT` | `REGEX` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `😴︎`, `▶`, `1⃣`
|
|
59
|
-
`Unicode::Emoji::REGEX_VALID_INCLUDE_TEXT` | `REGEX_VALID` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `😴︎`, `▶`, `1⃣` |
|
|
60
|
-
`Unicode::Emoji::REGEX_WELL_FORMED_INCLUDE_TEXT` | `REGEX_WELL_FORMED` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `🇵🇵`, `😴︎`, `▶`, `1⃣` |
|
|
60
|
+
`Unicode::Emoji::REGEX_INCLUDE_TEXT` | `REGEX` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `😴︎`, `▶`, `1⃣` , `🏻`| `🤾🏽♀`, `🏌♂️`, `🇵🇵`, `🏴`, `🤠🤢`, `1`
|
|
61
|
+
`Unicode::Emoji::REGEX_VALID_INCLUDE_TEXT` | `REGEX_VALID` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `😴︎`, `▶`, `1⃣` , `🏻` | `🇵🇵`, `1`
|
|
62
|
+
`Unicode::Emoji::REGEX_WELL_FORMED_INCLUDE_TEXT` | `REGEX_WELL_FORMED` + `REGEX_TEXT` | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `🇵🇵`, `😴︎`, `▶`, `1⃣` , `🏻` | `1`
|
|
61
63
|
|
|
62
64
|
#### Minimally-qualified and Unqualified Sequences
|
|
63
65
|
|
|
64
66
|
Regex | Description | Example Matches | Example Non-Matches
|
|
65
67
|
------------------------------|-------------|-----------------|--------------------
|
|
66
|
-
`Unicode::Emoji::REGEX_INCLUDE_MQE` | Like `REGEX`, but additionally includes Emoji with missing Emoji Presentation Variation Selectors, where the first partial Emoji has all required Variation Selectors | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`,
|
|
67
|
-
`Unicode::Emoji::REGEX_INCLUDE_MQE_UQE` | Like `REGEX`, but additionally includes Emoji with missing Emoji Presentation Variation Selectors | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`,
|
|
68
|
+
`Unicode::Emoji::REGEX_INCLUDE_MQE` | Like `REGEX`, but additionally includes Emoji with missing Emoji Presentation Variation Selectors, where the first partial Emoji has all required Variation Selectors | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏻` | `🏌♂️`, `😴︎`, `▶`, `🇵🇵`, `🏴`, `🤠🤢`, `1`, `1⃣`
|
|
69
|
+
`Unicode::Emoji::REGEX_INCLUDE_MQE_UQE` | Like `REGEX`, but additionally includes Emoji with missing Emoji Presentation Variation Selectors | `😴`, `▶️`, `🛌🏽`, `🇵🇹`, `2️⃣`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🏻` | `😴︎`, `▶`, `🇵🇵`, `🏴`, `🤠🤢`, `1`, `1⃣`
|
|
68
70
|
|
|
69
71
|
[List of MQE and UQE Emoji sequences](https://character.construction/unqualified-emoji)
|
|
70
72
|
|
|
@@ -74,10 +76,10 @@ Matches only simple one-codepoint (+ optional variation selector) Emoji:
|
|
|
74
76
|
|
|
75
77
|
Regex | Description | Example Matches | Example Non-Matches
|
|
76
78
|
------------------------------|-------------|-----------------|--------------------
|
|
77
|
-
`Unicode::Emoji::REGEX_BASIC` | Matches (non-textual)
|
|
78
|
-
`Unicode::Emoji::REGEX_TEXT` | Matches only textual singleton Emoji
|
|
79
|
+
`Unicode::Emoji::REGEX_BASIC` | Matches (non-textual) Basic Emoji, but no sequences at all | `😴`, `▶️`, `🏻` | `😴︎`, `▶`, `🛌🏽`, `🇵🇹`, `🇵🇵`,`2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `1`
|
|
80
|
+
`Unicode::Emoji::REGEX_TEXT` | Matches only textual singleton Emoji | `😴︎`, `▶` | `😴`, `▶️`, `🏻`, `🛌🏽`, `🇵🇹`, `🇵🇵`,`2️⃣`, `🏴`, `🏴`, `🤾🏽♀️`, `🤾🏽♀`, `🏌♂️`, `🤠🤢`, `1`
|
|
79
81
|
|
|
80
|
-
Here is a list of all Emoji that can be matched using the two regexes: [character.construction/emoji-vs-text](https://character.construction/emoji-vs-text)
|
|
82
|
+
Here is a list of all Emoji that can be matched using the two regexes: [character.construction/emoji-vs-text](https://character.construction/emoji-vs-text). The `REGEX_BASIC` regex also matches [visual Emoji components](https://character.construction/emoji-components) (skin tone modifiers and hair components).
|
|
81
83
|
|
|
82
84
|
While `REGEX_BASIC` is part of the above regexes, `REGEX_TEXT` is only included in the `*_INCLUDE_TEXT` or `*_UQE` variants.
|
|
83
85
|
|
|
@@ -140,7 +142,19 @@ Please see [the standard](https://www.unicode.org/reports/tr51/#Emoji_Sets) for
|
|
|
140
142
|
|
|
141
143
|
More info about valid vs. recommended Emoji can also be found in this [blog article on Emojipedia](https://blog.emojipedia.org/unicode-behind-the-curtain/).
|
|
142
144
|
|
|
143
|
-
###
|
|
145
|
+
### Emoji Property Regexes
|
|
146
|
+
|
|
147
|
+
Ruby includes native regex Emoji properties, as listed in the following table. You can also opt-in to use the `*_PROP_*` regexes to get the Emoji support level of this gem (instead of Ruby's).
|
|
148
|
+
|
|
149
|
+
Gem Regex (`Unicode::Emoji`'s Emoji support level) | Native Regex (Ruby's Emoji support level)
|
|
150
|
+
---------------------------------------------------|------------------------------------------
|
|
151
|
+
`Unicode::Emoji::REGEX_PROP_EMOJI` | `/\p{Emoji}/`
|
|
152
|
+
`Unicode::Emoji::REGEX_PROP_MODIFIER` | `/\p{EMod}/`
|
|
153
|
+
`Unicode::Emoji::REGEX_PROP_MODIFIER_BASE` | `/\p{EBase}/`
|
|
154
|
+
`Unicode::Emoji::REGEX_PROP_COMPONENT` | `/\p{EComp}/`
|
|
155
|
+
`Unicode::Emoji::REGEX_PROP_PRESENTATION` | `/\p{EPres}/`
|
|
156
|
+
|
|
157
|
+
#### Extended Pictographic Regex
|
|
144
158
|
|
|
145
159
|
`Unicode::Emoji::REGEX_PICTO` matches single codepoints with the **Extended_Pictographic** property. For example, it will match `✀` BLACK SAFETY SCISSORS.
|
|
146
160
|
|
|
@@ -148,10 +162,6 @@ More info about valid vs. recommended Emoji can also be found in this [blog arti
|
|
|
148
162
|
|
|
149
163
|
See [character.construction/picto](https://character.construction/picto) for a list of all non-Emoji pictographic characters.
|
|
150
164
|
|
|
151
|
-
### Partial Regexes
|
|
152
|
-
|
|
153
|
-
`Unicode::Emoji::REGEX_ANY`, same as `\p{Emoji}`. Deprecated: Will be removed or renamed in the future.
|
|
154
|
-
|
|
155
165
|
## Usage – List
|
|
156
166
|
|
|
157
167
|
Use `Unicode::Emoji::LIST` or the **list** method to get a ordered and categorized list of Emoji:
|
data/data/generate_constants.rb
CHANGED
|
@@ -69,6 +69,8 @@ def pack_and_join(ords)
|
|
|
69
69
|
end
|
|
70
70
|
|
|
71
71
|
def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_component:, emoji_presentation:, text_presentation:, picto:, picto_no_emoji:)
|
|
72
|
+
visual_component = pack_and_join(VISUAL_COMPONENT)
|
|
73
|
+
|
|
72
74
|
emoji_presentation_sequence = \
|
|
73
75
|
join(
|
|
74
76
|
text_presentation + pack(EMOJI_VARIATION_SELECTOR),
|
|
@@ -78,6 +80,12 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
|
78
80
|
non_component_emoji_presentation_sequence = \
|
|
79
81
|
"(?!" + emoji_component + ")" + emoji_presentation_sequence
|
|
80
82
|
|
|
83
|
+
basic_emoji = \
|
|
84
|
+
join(
|
|
85
|
+
non_component_emoji_presentation_sequence,
|
|
86
|
+
visual_component,
|
|
87
|
+
)
|
|
88
|
+
|
|
81
89
|
text_keycap_sequence = \
|
|
82
90
|
pack_and_join(EMOJI_KEYCAPS) + pack(EMOJI_KEYCAP_SUFFIX)
|
|
83
91
|
|
|
@@ -169,6 +177,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
|
169
177
|
emoji_rgi_tag_sequence,
|
|
170
178
|
emoji_valid_flag_sequence,
|
|
171
179
|
emoji_core_sequence,
|
|
180
|
+
visual_component,
|
|
172
181
|
)
|
|
173
182
|
|
|
174
183
|
emoji_rgi_sequence_include_text = \
|
|
@@ -177,6 +186,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
|
177
186
|
emoji_rgi_tag_sequence,
|
|
178
187
|
emoji_valid_flag_sequence,
|
|
179
188
|
emoji_core_sequence,
|
|
189
|
+
visual_component,
|
|
180
190
|
text_emoji,
|
|
181
191
|
)
|
|
182
192
|
|
|
@@ -186,6 +196,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
|
186
196
|
emoji_rgi_tag_sequence,
|
|
187
197
|
emoji_valid_flag_sequence,
|
|
188
198
|
emoji_core_sequence,
|
|
199
|
+
visual_component,
|
|
189
200
|
)
|
|
190
201
|
|
|
191
202
|
emoji_rgi_include_mqe_uqe_sequence = \
|
|
@@ -195,6 +206,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
|
195
206
|
emoji_rgi_tag_sequence,
|
|
196
207
|
emoji_valid_flag_sequence,
|
|
197
208
|
emoji_core_sequence,
|
|
209
|
+
visual_component,
|
|
198
210
|
)
|
|
199
211
|
|
|
200
212
|
emoji_valid_sequence = \
|
|
@@ -203,6 +215,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
|
203
215
|
emoji_valid_tag_sequence,
|
|
204
216
|
emoji_valid_flag_sequence,
|
|
205
217
|
emoji_core_sequence,
|
|
218
|
+
visual_component,
|
|
206
219
|
)
|
|
207
220
|
|
|
208
221
|
emoji_valid_sequence_include_text = \
|
|
@@ -211,6 +224,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
|
211
224
|
emoji_valid_tag_sequence,
|
|
212
225
|
emoji_valid_flag_sequence,
|
|
213
226
|
emoji_core_sequence,
|
|
227
|
+
visual_component,
|
|
214
228
|
text_emoji,
|
|
215
229
|
)
|
|
216
230
|
|
|
@@ -220,6 +234,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
|
220
234
|
emoji_well_formed_tag_sequence,
|
|
221
235
|
emoji_well_formed_flag_sequence,
|
|
222
236
|
emoji_core_sequence,
|
|
237
|
+
visual_component,
|
|
223
238
|
)
|
|
224
239
|
|
|
225
240
|
emoji_well_formed_sequence_include_text = \
|
|
@@ -228,6 +243,7 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
|
228
243
|
emoji_well_formed_tag_sequence,
|
|
229
244
|
emoji_well_formed_flag_sequence,
|
|
230
245
|
emoji_core_sequence,
|
|
246
|
+
visual_component,
|
|
231
247
|
text_emoji,
|
|
232
248
|
)
|
|
233
249
|
|
|
@@ -279,19 +295,26 @@ def compile(emoji_character:, emoji_modifier:, emoji_modifier_base:, emoji_compo
|
|
|
279
295
|
# See https://www.unicode.org/reports/tr51/#EBNF_and_Regex
|
|
280
296
|
regexes[:REGEX_POSSIBLE] = Regexp.compile(emoji_possible)
|
|
281
297
|
|
|
282
|
-
# Matches only basic single, non-textual emoji, ignores
|
|
283
|
-
regexes[:REGEX_BASIC] = Regexp.compile(
|
|
298
|
+
# Matches only basic single, non-textual emoji, ignores some components like simple digits
|
|
299
|
+
regexes[:REGEX_BASIC] = Regexp.compile(basic_emoji)
|
|
284
300
|
|
|
285
|
-
# Matches only basic single, textual emoji, ignores
|
|
301
|
+
# Matches only basic single, textual emoji, ignores components like modifiers or simple digits
|
|
286
302
|
regexes[:REGEX_TEXT] = Regexp.compile(text_emoji)
|
|
287
303
|
|
|
288
|
-
#
|
|
289
|
-
regexes[:
|
|
304
|
+
# Export regexes for Emoji properties so they can be used with newer Unicode than Ruby's
|
|
305
|
+
regexes[:REGEX_PROP_EMOJI] = Regexp.compile(emoji_character)
|
|
306
|
+
regexes[:REGEX_PROP_MODIFIER] = Regexp.compile(emoji_modifier)
|
|
307
|
+
regexes[:REGEX_PROP_MODIFIER_BASE] = Regexp.compile(emoji_modifier_base)
|
|
308
|
+
regexes[:REGEX_PROP_COMPONENT] = Regexp.compile(emoji_component)
|
|
309
|
+
regexes[:REGEX_PROP_PRESENTATION] = Regexp.compile(emoji_presentation)
|
|
290
310
|
|
|
311
|
+
# Same goes for ExtendedPictographic
|
|
291
312
|
regexes[:REGEX_PICTO] = Regexp.compile(picto)
|
|
292
|
-
|
|
293
313
|
regexes[:REGEX_PICTO_NO_EMOJI] = Regexp.compile(picto_no_emoji)
|
|
294
314
|
|
|
315
|
+
# Emoji keycaps
|
|
316
|
+
regexes[:REGEX_EMOJI_KEYCAP] = Regexp.compile(emoji_keycap_sequence)
|
|
317
|
+
|
|
295
318
|
regexes
|
|
296
319
|
end
|
|
297
320
|
|
|
@@ -313,8 +336,8 @@ native_regexes = compile(
|
|
|
313
336
|
emoji_modifier_base: "\\p{EBase}",
|
|
314
337
|
emoji_component: "\\p{EComp}",
|
|
315
338
|
emoji_presentation: "\\p{EPres}",
|
|
316
|
-
text_presentation: "\\p{Emoji}
|
|
339
|
+
text_presentation: "[\\p{Emoji}&&\\P{EPres}]",
|
|
317
340
|
picto: "\\p{ExtPict}",
|
|
318
|
-
picto_no_emoji: "\\p{ExtPict}
|
|
341
|
+
picto_no_emoji: "[\\p{ExtPict}&&\\P{Emoji}]"
|
|
319
342
|
)
|
|
320
343
|
write_regexes(native_regexes, File.expand_path("../lib/unicode/emoji/generated_native", __dir__))
|
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
module Unicode
|
|
4
4
|
module Emoji
|
|
5
|
-
VERSION = "
|
|
5
|
+
VERSION = "4.0.3"
|
|
6
6
|
EMOJI_VERSION = "16.0"
|
|
7
|
-
CLDR_VERSION = "
|
|
7
|
+
CLDR_VERSION = "46"
|
|
8
8
|
DATA_DIRECTORY = File.expand_path('../../../data', __dir__).freeze
|
|
9
9
|
INDEX_FILENAME = (DATA_DIRECTORY + "/emoji.marshal.gz").freeze
|
|
10
10
|
|
|
@@ -41,5 +41,9 @@ module Unicode
|
|
|
41
41
|
|
|
42
42
|
# Two regional indicators make up a region
|
|
43
43
|
REGIONAL_INDICATORS = [*0x1F1E6..0x1F1FF].freeze
|
|
44
|
+
|
|
45
|
+
# The current list of Emoji components that should have a visual representation
|
|
46
|
+
# Currently skin tone modifiers + hair components
|
|
47
|
+
VISUAL_COMPONENT = [*0x1F3FB..0x1F3FF, *0x1F9B0..0x1F9B3].freeze
|
|
44
48
|
end
|
|
45
49
|
end
|