quickjs 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/quickjsrb/quickjs/cutils.h +56 -0
- data/ext/quickjsrb/quickjs/libregexp-opcode.h +11 -1
- data/ext/quickjsrb/quickjs/libregexp.c +883 -132
- data/ext/quickjsrb/quickjs/libregexp.h +1 -0
- data/ext/quickjsrb/quickjs/libunicode-table.h +420 -1
- data/ext/quickjsrb/quickjs/libunicode.c +224 -11
- data/ext/quickjsrb/quickjs/libunicode.h +9 -5
- data/ext/quickjsrb/quickjs/qjs.c +1 -1
- data/ext/quickjsrb/quickjs/qjsc.c +81 -26
- data/ext/quickjsrb/quickjs/quickjs-atom.h +7 -0
- data/ext/quickjsrb/quickjs/quickjs-libc.c +254 -65
- data/ext/quickjsrb/quickjs/quickjs-libc.h +7 -1
- data/ext/quickjsrb/quickjs/quickjs-opcode.h +2 -2
- data/ext/quickjsrb/quickjs/quickjs.c +2021 -686
- data/ext/quickjsrb/quickjs/quickjs.h +52 -8
- data/ext/quickjsrb/quickjs/run-test262.c +109 -32
- data/ext/quickjsrb/quickjs/unicode_gen.c +541 -5
- data/ext/quickjsrb/quickjs/unicode_gen_def.h +15 -0
- data/ext/quickjsrb/quickjsrb.c +1 -1
- data/lib/quickjs/version.rb +1 -1
- metadata +2 -2
@@ -156,6 +156,153 @@ char *get_line(char *buf, int buf_size, FILE *f)
|
|
156
156
|
return buf;
|
157
157
|
}
|
158
158
|
|
159
|
+
typedef struct REString {
|
160
|
+
struct REString *next;
|
161
|
+
uint32_t hash;
|
162
|
+
uint32_t len;
|
163
|
+
uint32_t flags;
|
164
|
+
uint32_t buf[];
|
165
|
+
} REString;
|
166
|
+
|
167
|
+
typedef struct {
|
168
|
+
uint32_t n_strings;
|
169
|
+
uint32_t hash_size;
|
170
|
+
int hash_bits;
|
171
|
+
REString **hash_table;
|
172
|
+
} REStringList;
|
173
|
+
|
174
|
+
static uint32_t re_string_hash(int len, const uint32_t *buf)
|
175
|
+
{
|
176
|
+
int i;
|
177
|
+
uint32_t h;
|
178
|
+
h = 1;
|
179
|
+
for(i = 0; i < len; i++)
|
180
|
+
h = h * 263 + buf[i];
|
181
|
+
return h * 0x61C88647;
|
182
|
+
}
|
183
|
+
|
184
|
+
static void re_string_list_init(REStringList *s)
|
185
|
+
{
|
186
|
+
s->n_strings = 0;
|
187
|
+
s->hash_size = 0;
|
188
|
+
s->hash_bits = 0;
|
189
|
+
s->hash_table = NULL;
|
190
|
+
}
|
191
|
+
|
192
|
+
static __maybe_unused void re_string_list_free(REStringList *s)
|
193
|
+
{
|
194
|
+
REString *p, *p_next;
|
195
|
+
int i;
|
196
|
+
for(i = 0; i < s->hash_size; i++) {
|
197
|
+
for(p = s->hash_table[i]; p != NULL; p = p_next) {
|
198
|
+
p_next = p->next;
|
199
|
+
free(p);
|
200
|
+
}
|
201
|
+
}
|
202
|
+
free(s->hash_table);
|
203
|
+
}
|
204
|
+
|
205
|
+
static void lre_print_char(int c, BOOL is_range)
|
206
|
+
{
|
207
|
+
if (c == '\'' || c == '\\' ||
|
208
|
+
(is_range && (c == '-' || c == ']'))) {
|
209
|
+
printf("\\%c", c);
|
210
|
+
} else if (c >= ' ' && c <= 126) {
|
211
|
+
printf("%c", c);
|
212
|
+
} else {
|
213
|
+
printf("\\u{%04x}", c);
|
214
|
+
}
|
215
|
+
}
|
216
|
+
|
217
|
+
static __maybe_unused void re_string_list_dump(const char *str, const REStringList *s)
|
218
|
+
{
|
219
|
+
REString *p;
|
220
|
+
int i, j, k;
|
221
|
+
|
222
|
+
printf("%s:\n", str);
|
223
|
+
|
224
|
+
j = 0;
|
225
|
+
for(i = 0; i < s->hash_size; i++) {
|
226
|
+
for(p = s->hash_table[i]; p != NULL; p = p->next) {
|
227
|
+
printf(" %d/%d: '", j, s->n_strings);
|
228
|
+
for(k = 0; k < p->len; k++) {
|
229
|
+
lre_print_char(p->buf[k], FALSE);
|
230
|
+
}
|
231
|
+
printf("'\n");
|
232
|
+
j++;
|
233
|
+
}
|
234
|
+
}
|
235
|
+
}
|
236
|
+
|
237
|
+
static REString *re_string_find2(REStringList *s, int len, const uint32_t *buf,
|
238
|
+
uint32_t h0, BOOL add_flag)
|
239
|
+
{
|
240
|
+
uint32_t h = 0; /* avoid warning */
|
241
|
+
REString *p;
|
242
|
+
if (s->n_strings != 0) {
|
243
|
+
h = h0 >> (32 - s->hash_bits);
|
244
|
+
for(p = s->hash_table[h]; p != NULL; p = p->next) {
|
245
|
+
if (p->hash == h0 && p->len == len &&
|
246
|
+
!memcmp(p->buf, buf, len * sizeof(buf[0]))) {
|
247
|
+
return p;
|
248
|
+
}
|
249
|
+
}
|
250
|
+
}
|
251
|
+
/* not found */
|
252
|
+
if (!add_flag)
|
253
|
+
return NULL;
|
254
|
+
/* increase the size of the hash table if needed */
|
255
|
+
if (unlikely((s->n_strings + 1) > s->hash_size)) {
|
256
|
+
REString **new_hash_table, *p_next;
|
257
|
+
int new_hash_bits, i;
|
258
|
+
uint32_t new_hash_size;
|
259
|
+
new_hash_bits = max_int(s->hash_bits + 1, 4);
|
260
|
+
new_hash_size = 1 << new_hash_bits;
|
261
|
+
new_hash_table = malloc(sizeof(new_hash_table[0]) * new_hash_size);
|
262
|
+
if (!new_hash_table)
|
263
|
+
return NULL;
|
264
|
+
memset(new_hash_table, 0, sizeof(new_hash_table[0]) * new_hash_size);
|
265
|
+
for(i = 0; i < s->hash_size; i++) {
|
266
|
+
for(p = s->hash_table[i]; p != NULL; p = p_next) {
|
267
|
+
p_next = p->next;
|
268
|
+
h = p->hash >> (32 - new_hash_bits);
|
269
|
+
p->next = new_hash_table[h];
|
270
|
+
new_hash_table[h] = p;
|
271
|
+
}
|
272
|
+
}
|
273
|
+
free(s->hash_table);
|
274
|
+
s->hash_bits = new_hash_bits;
|
275
|
+
s->hash_size = new_hash_size;
|
276
|
+
s->hash_table = new_hash_table;
|
277
|
+
h = h0 >> (32 - s->hash_bits);
|
278
|
+
}
|
279
|
+
|
280
|
+
p = malloc(sizeof(REString) + len * sizeof(buf[0]));
|
281
|
+
if (!p)
|
282
|
+
return NULL;
|
283
|
+
p->next = s->hash_table[h];
|
284
|
+
s->hash_table[h] = p;
|
285
|
+
s->n_strings++;
|
286
|
+
p->hash = h0;
|
287
|
+
p->len = len;
|
288
|
+
p->flags = 0;
|
289
|
+
memcpy(p->buf, buf, sizeof(buf[0]) * len);
|
290
|
+
return p;
|
291
|
+
}
|
292
|
+
|
293
|
+
static REString *re_string_find(REStringList *s, int len, const uint32_t *buf,
|
294
|
+
BOOL add_flag)
|
295
|
+
{
|
296
|
+
uint32_t h0;
|
297
|
+
h0 = re_string_hash(len, buf);
|
298
|
+
return re_string_find2(s, len, buf, h0, add_flag);
|
299
|
+
}
|
300
|
+
|
301
|
+
static void re_string_add(REStringList *s, int len, const uint32_t *buf)
|
302
|
+
{
|
303
|
+
re_string_find(s, len, buf, TRUE);
|
304
|
+
}
|
305
|
+
|
159
306
|
#define UNICODE_GENERAL_CATEGORY
|
160
307
|
|
161
308
|
typedef enum {
|
@@ -225,6 +372,23 @@ static const char *unicode_prop_short_name[] = {
|
|
225
372
|
|
226
373
|
#undef UNICODE_PROP_LIST
|
227
374
|
|
375
|
+
#define UNICODE_SEQUENCE_PROP_LIST
|
376
|
+
|
377
|
+
typedef enum {
|
378
|
+
#define DEF(id) SEQUENCE_PROP_ ## id,
|
379
|
+
#include "unicode_gen_def.h"
|
380
|
+
#undef DEF
|
381
|
+
SEQUENCE_PROP_COUNT,
|
382
|
+
} UnicodeSequencePropEnum1;
|
383
|
+
|
384
|
+
static const char *unicode_sequence_prop_name[] = {
|
385
|
+
#define DEF(id) #id,
|
386
|
+
#include "unicode_gen_def.h"
|
387
|
+
#undef DEF
|
388
|
+
};
|
389
|
+
|
390
|
+
#undef UNICODE_SEQUENCE_PROP_LIST
|
391
|
+
|
228
392
|
typedef struct {
|
229
393
|
/* case conv */
|
230
394
|
uint8_t u_len;
|
@@ -247,7 +411,15 @@ typedef struct {
|
|
247
411
|
int *decomp_data;
|
248
412
|
} CCInfo;
|
249
413
|
|
414
|
+
typedef struct {
|
415
|
+
int count;
|
416
|
+
int size;
|
417
|
+
int *tab;
|
418
|
+
} UnicodeSequenceProperties;
|
419
|
+
|
250
420
|
CCInfo *unicode_db;
|
421
|
+
REStringList rgi_emoji_zwj_sequence;
|
422
|
+
DynBuf rgi_emoji_tag_sequence;
|
251
423
|
|
252
424
|
int find_name(const char **tab, int tab_len, const char *name)
|
253
425
|
{
|
@@ -751,6 +923,147 @@ void parse_prop_list(const char *filename)
|
|
751
923
|
fclose(f);
|
752
924
|
}
|
753
925
|
|
926
|
+
#define SEQ_MAX_LEN 16
|
927
|
+
|
928
|
+
static BOOL is_emoji_modifier(uint32_t c)
|
929
|
+
{
|
930
|
+
return (c >= 0x1f3fb && c <= 0x1f3ff);
|
931
|
+
}
|
932
|
+
|
933
|
+
static void add_sequence_prop(int idx, int seq_len, int *seq)
|
934
|
+
{
|
935
|
+
int i;
|
936
|
+
|
937
|
+
assert(idx < SEQUENCE_PROP_COUNT);
|
938
|
+
switch(idx) {
|
939
|
+
case SEQUENCE_PROP_Basic_Emoji:
|
940
|
+
/* convert to 2 properties lists */
|
941
|
+
if (seq_len == 1) {
|
942
|
+
set_prop(seq[0], PROP_Basic_Emoji1, 1);
|
943
|
+
} else if (seq_len == 2 && seq[1] == 0xfe0f) {
|
944
|
+
set_prop(seq[0], PROP_Basic_Emoji2, 1);
|
945
|
+
} else {
|
946
|
+
abort();
|
947
|
+
}
|
948
|
+
break;
|
949
|
+
case SEQUENCE_PROP_RGI_Emoji_Modifier_Sequence:
|
950
|
+
assert(seq_len == 2);
|
951
|
+
assert(is_emoji_modifier(seq[1]));
|
952
|
+
assert(get_prop(seq[0], PROP_Emoji_Modifier_Base));
|
953
|
+
set_prop(seq[0], PROP_RGI_Emoji_Modifier_Sequence, 1);
|
954
|
+
break;
|
955
|
+
case SEQUENCE_PROP_RGI_Emoji_Flag_Sequence:
|
956
|
+
{
|
957
|
+
int code;
|
958
|
+
assert(seq_len == 2);
|
959
|
+
assert(seq[0] >= 0x1F1E6 && seq[0] <= 0x1F1FF);
|
960
|
+
assert(seq[1] >= 0x1F1E6 && seq[1] <= 0x1F1FF);
|
961
|
+
code = (seq[0] - 0x1F1E6) * 26 + (seq[1] - 0x1F1E6);
|
962
|
+
/* XXX: would be more compact with a simple bitmap -> 676 bits */
|
963
|
+
set_prop(code, PROP_RGI_Emoji_Flag_Sequence, 1);
|
964
|
+
}
|
965
|
+
break;
|
966
|
+
case SEQUENCE_PROP_RGI_Emoji_ZWJ_Sequence:
|
967
|
+
re_string_add(&rgi_emoji_zwj_sequence, seq_len, (uint32_t *)seq);
|
968
|
+
break;
|
969
|
+
case SEQUENCE_PROP_RGI_Emoji_Tag_Sequence:
|
970
|
+
{
|
971
|
+
assert(seq_len >= 3);
|
972
|
+
assert(seq[0] == 0x1F3F4);
|
973
|
+
assert(seq[seq_len - 1] == 0xE007F);
|
974
|
+
for(i = 1; i < seq_len - 1; i++) {
|
975
|
+
assert(seq[i] >= 0xe0001 && seq[i] <= 0xe007e);
|
976
|
+
dbuf_putc(&rgi_emoji_tag_sequence, seq[i] - 0xe0000);
|
977
|
+
}
|
978
|
+
dbuf_putc(&rgi_emoji_tag_sequence, 0);
|
979
|
+
}
|
980
|
+
break;
|
981
|
+
case SEQUENCE_PROP_Emoji_Keycap_Sequence:
|
982
|
+
assert(seq_len == 3);
|
983
|
+
assert(seq[1] == 0xfe0f);
|
984
|
+
assert(seq[2] == 0x20e3);
|
985
|
+
set_prop(seq[0], PROP_Emoji_Keycap_Sequence, 1);
|
986
|
+
break;
|
987
|
+
default:
|
988
|
+
assert(0);
|
989
|
+
}
|
990
|
+
}
|
991
|
+
|
992
|
+
void parse_sequence_prop_list(const char *filename)
|
993
|
+
{
|
994
|
+
FILE *f;
|
995
|
+
char line[4096], *p, buf[256], *q, *p_start;
|
996
|
+
uint32_t c0, c1, c;
|
997
|
+
int idx, seq_len;
|
998
|
+
int seq[SEQ_MAX_LEN];
|
999
|
+
|
1000
|
+
f = fopen(filename, "rb");
|
1001
|
+
if (!f) {
|
1002
|
+
perror(filename);
|
1003
|
+
exit(1);
|
1004
|
+
}
|
1005
|
+
|
1006
|
+
for(;;) {
|
1007
|
+
if (!get_line(line, sizeof(line), f))
|
1008
|
+
break;
|
1009
|
+
p = line;
|
1010
|
+
while (isspace(*p))
|
1011
|
+
p++;
|
1012
|
+
if (*p == '#' || *p == '@' || *p == '\0')
|
1013
|
+
continue;
|
1014
|
+
p_start = p;
|
1015
|
+
|
1016
|
+
/* find the sequence property name */
|
1017
|
+
p = strchr(p, ';');
|
1018
|
+
if (!p)
|
1019
|
+
continue;
|
1020
|
+
p++;
|
1021
|
+
p += strspn(p, " \t");
|
1022
|
+
q = buf;
|
1023
|
+
while (*p != '\0' && *p != ' ' && *p != '#' && *p != '\t' && *p != ';') {
|
1024
|
+
if ((q - buf) < sizeof(buf) - 1)
|
1025
|
+
*q++ = *p;
|
1026
|
+
p++;
|
1027
|
+
}
|
1028
|
+
*q = '\0';
|
1029
|
+
idx = find_name(unicode_sequence_prop_name,
|
1030
|
+
countof(unicode_sequence_prop_name), buf);
|
1031
|
+
if (idx < 0) {
|
1032
|
+
fprintf(stderr, "Property not found: %s\n", buf);
|
1033
|
+
exit(1);
|
1034
|
+
}
|
1035
|
+
|
1036
|
+
p = p_start;
|
1037
|
+
c0 = strtoul(p, (char **)&p, 16);
|
1038
|
+
assert(c0 <= CHARCODE_MAX);
|
1039
|
+
|
1040
|
+
if (*p == '.' && p[1] == '.') {
|
1041
|
+
p += 2;
|
1042
|
+
c1 = strtoul(p, (char **)&p, 16);
|
1043
|
+
assert(c1 <= CHARCODE_MAX);
|
1044
|
+
for(c = c0; c <= c1; c++) {
|
1045
|
+
seq[0] = c;
|
1046
|
+
add_sequence_prop(idx, 1, seq);
|
1047
|
+
}
|
1048
|
+
} else {
|
1049
|
+
seq_len = 0;
|
1050
|
+
seq[seq_len++] = c0;
|
1051
|
+
for(;;) {
|
1052
|
+
while (isspace(*p))
|
1053
|
+
p++;
|
1054
|
+
if (*p == ';' || *p == '\0')
|
1055
|
+
break;
|
1056
|
+
c0 = strtoul(p, (char **)&p, 16);
|
1057
|
+
assert(c0 <= CHARCODE_MAX);
|
1058
|
+
assert(seq_len < countof(seq));
|
1059
|
+
seq[seq_len++] = c0;
|
1060
|
+
}
|
1061
|
+
add_sequence_prop(idx, seq_len, seq);
|
1062
|
+
}
|
1063
|
+
}
|
1064
|
+
fclose(f);
|
1065
|
+
}
|
1066
|
+
|
754
1067
|
void parse_scripts(const char *filename)
|
755
1068
|
{
|
756
1069
|
FILE *f;
|
@@ -1654,7 +1967,7 @@ void dump_name_table(FILE *f, const char *cname, const char **tab_name, int len,
|
|
1654
1967
|
maxw = 0;
|
1655
1968
|
for(i = 0; i < len; i++) {
|
1656
1969
|
w = strlen(tab_name[i]);
|
1657
|
-
if (tab_short_name[i][0] != '\0') {
|
1970
|
+
if (tab_short_name && tab_short_name[i][0] != '\0') {
|
1658
1971
|
w += 1 + strlen(tab_short_name[i]);
|
1659
1972
|
}
|
1660
1973
|
if (maxw < w)
|
@@ -1666,7 +1979,7 @@ void dump_name_table(FILE *f, const char *cname, const char **tab_name, int len,
|
|
1666
1979
|
for(i = 0; i < len; i++) {
|
1667
1980
|
fprintf(f, " \"");
|
1668
1981
|
w = fprintf(f, "%s", tab_name[i]);
|
1669
|
-
if (tab_short_name[i][0] != '\0') {
|
1982
|
+
if (tab_short_name && tab_short_name[i][0] != '\0') {
|
1670
1983
|
w += fprintf(f, ",%s", tab_short_name[i]);
|
1671
1984
|
}
|
1672
1985
|
fprintf(f, "\"%*s\"\\0\"\n", 1 + maxw - w, "");
|
@@ -1774,10 +2087,9 @@ void build_script_table(FILE *f)
|
|
1774
2087
|
fprintf(f, " UNICODE_SCRIPT_COUNT,\n");
|
1775
2088
|
fprintf(f, "} UnicodeScriptEnum;\n\n");
|
1776
2089
|
|
1777
|
-
i = 1;
|
1778
2090
|
dump_name_table(f, "unicode_script_name_table",
|
1779
|
-
unicode_script_name
|
1780
|
-
unicode_script_short_name
|
2091
|
+
unicode_script_name, SCRIPT_COUNT,
|
2092
|
+
unicode_script_short_name);
|
1781
2093
|
|
1782
2094
|
dbuf_init(dbuf);
|
1783
2095
|
#ifdef DUMP_TABLE_SIZE
|
@@ -1930,6 +2242,218 @@ void build_prop_list_table(FILE *f)
|
|
1930
2242
|
fprintf(f, "};\n\n");
|
1931
2243
|
}
|
1932
2244
|
|
2245
|
+
static BOOL is_emoji_hair_color(uint32_t c)
|
2246
|
+
{
|
2247
|
+
return (c >= 0x1F9B0 && c <= 0x1F9B3);
|
2248
|
+
}
|
2249
|
+
|
2250
|
+
#define EMOJI_MOD_NONE 0
|
2251
|
+
#define EMOJI_MOD_TYPE1 1
|
2252
|
+
#define EMOJI_MOD_TYPE2 2
|
2253
|
+
#define EMOJI_MOD_TYPE2D 3
|
2254
|
+
|
2255
|
+
static BOOL mark_zwj_string(REStringList *sl, uint32_t *buf, int len, int mod_type, int *mod_pos,
|
2256
|
+
int hc_pos, BOOL mark_flag)
|
2257
|
+
{
|
2258
|
+
REString *p;
|
2259
|
+
int i, n_mod, i0, i1, hc_count, j;
|
2260
|
+
|
2261
|
+
#if 0
|
2262
|
+
if (mark_flag)
|
2263
|
+
printf("mod_type=%d\n", mod_type);
|
2264
|
+
#endif
|
2265
|
+
|
2266
|
+
switch(mod_type) {
|
2267
|
+
case EMOJI_MOD_NONE:
|
2268
|
+
n_mod = 1;
|
2269
|
+
break;
|
2270
|
+
case EMOJI_MOD_TYPE1:
|
2271
|
+
n_mod = 5;
|
2272
|
+
break;
|
2273
|
+
case EMOJI_MOD_TYPE2:
|
2274
|
+
n_mod = 25;
|
2275
|
+
break;
|
2276
|
+
case EMOJI_MOD_TYPE2D:
|
2277
|
+
n_mod = 20;
|
2278
|
+
break;
|
2279
|
+
default:
|
2280
|
+
assert(0);
|
2281
|
+
}
|
2282
|
+
if (hc_pos >= 0)
|
2283
|
+
hc_count = 4;
|
2284
|
+
else
|
2285
|
+
hc_count = 1;
|
2286
|
+
/* check that all the related strings are present */
|
2287
|
+
for(j = 0; j < hc_count; j++) {
|
2288
|
+
for(i = 0; i < n_mod; i++) {
|
2289
|
+
switch(mod_type) {
|
2290
|
+
case EMOJI_MOD_NONE:
|
2291
|
+
break;
|
2292
|
+
case EMOJI_MOD_TYPE1:
|
2293
|
+
buf[mod_pos[0]] = 0x1f3fb + i;
|
2294
|
+
break;
|
2295
|
+
case EMOJI_MOD_TYPE2:
|
2296
|
+
case EMOJI_MOD_TYPE2D:
|
2297
|
+
i0 = i / 5;
|
2298
|
+
i1 = i % 5;
|
2299
|
+
/* avoid identical values */
|
2300
|
+
if (mod_type == EMOJI_MOD_TYPE2D && i0 >= i1)
|
2301
|
+
i0++;
|
2302
|
+
buf[mod_pos[0]] = 0x1f3fb + i0;
|
2303
|
+
buf[mod_pos[1]] = 0x1f3fb + i1;
|
2304
|
+
break;
|
2305
|
+
default:
|
2306
|
+
assert(0);
|
2307
|
+
}
|
2308
|
+
|
2309
|
+
if (hc_pos >= 0)
|
2310
|
+
buf[hc_pos] = 0x1F9B0 + j;
|
2311
|
+
|
2312
|
+
p = re_string_find(sl, len, buf, FALSE);
|
2313
|
+
if (!p)
|
2314
|
+
return FALSE;
|
2315
|
+
if (mark_flag)
|
2316
|
+
p->flags |= 1;
|
2317
|
+
}
|
2318
|
+
}
|
2319
|
+
return TRUE;
|
2320
|
+
}
|
2321
|
+
|
2322
|
+
static void zwj_encode_string(DynBuf *dbuf, const uint32_t *buf, int len, int mod_type, int *mod_pos,
|
2323
|
+
int hc_pos)
|
2324
|
+
{
|
2325
|
+
int i, j;
|
2326
|
+
int c, code;
|
2327
|
+
uint32_t buf1[SEQ_MAX_LEN];
|
2328
|
+
|
2329
|
+
j = 0;
|
2330
|
+
for(i = 0; i < len;) {
|
2331
|
+
c = buf[i++];
|
2332
|
+
if (c >= 0x2000 && c <= 0x2fff) {
|
2333
|
+
code = c - 0x2000;
|
2334
|
+
} else if (c >= 0x1f000 && c <= 0x1ffff) {
|
2335
|
+
code = c - 0x1f000 + 0x1000;
|
2336
|
+
} else {
|
2337
|
+
assert(0);
|
2338
|
+
}
|
2339
|
+
if (i < len && is_emoji_modifier(buf[i])) {
|
2340
|
+
/* modifier */
|
2341
|
+
code |= (mod_type << 13);
|
2342
|
+
i++;
|
2343
|
+
}
|
2344
|
+
if (i < len && buf[i] == 0xfe0f) {
|
2345
|
+
/* presentation selector present */
|
2346
|
+
code |= 0x8000;
|
2347
|
+
i++;
|
2348
|
+
}
|
2349
|
+
if (i < len) {
|
2350
|
+
/* zero width join */
|
2351
|
+
assert(buf[i] == 0x200d);
|
2352
|
+
i++;
|
2353
|
+
}
|
2354
|
+
buf1[j++] = code;
|
2355
|
+
}
|
2356
|
+
dbuf_putc(dbuf, j);
|
2357
|
+
for(i = 0; i < j; i++) {
|
2358
|
+
dbuf_putc(dbuf, buf1[i]);
|
2359
|
+
dbuf_putc(dbuf, buf1[i] >> 8);
|
2360
|
+
}
|
2361
|
+
}
|
2362
|
+
|
2363
|
+
static void build_rgi_emoji_zwj_sequence(FILE *f, REStringList *sl)
|
2364
|
+
{
|
2365
|
+
int mod_pos[2], mod_count, hair_color_pos, j, h;
|
2366
|
+
REString *p;
|
2367
|
+
uint32_t buf[SEQ_MAX_LEN];
|
2368
|
+
DynBuf dbuf;
|
2369
|
+
|
2370
|
+
#if 0
|
2371
|
+
{
|
2372
|
+
for(h = 0; h < sl->hash_size; h++) {
|
2373
|
+
for(p = sl->hash_table[h]; p != NULL; p = p->next) {
|
2374
|
+
for(j = 0; j < p->len; j++)
|
2375
|
+
printf(" %04x", p->buf[j]);
|
2376
|
+
printf("\n");
|
2377
|
+
}
|
2378
|
+
}
|
2379
|
+
exit(0);
|
2380
|
+
}
|
2381
|
+
#endif
|
2382
|
+
// printf("rgi_emoji_zwj_sequence: n=%d\n", sl->n_strings);
|
2383
|
+
|
2384
|
+
dbuf_init(&dbuf);
|
2385
|
+
|
2386
|
+
/* avoid duplicating strings with emoji modifiers or hair colors */
|
2387
|
+
for(h = 0; h < sl->hash_size; h++) {
|
2388
|
+
for(p = sl->hash_table[h]; p != NULL; p = p->next) {
|
2389
|
+
if (p->flags) /* already examined */
|
2390
|
+
continue;
|
2391
|
+
mod_count = 0;
|
2392
|
+
hair_color_pos = -1;
|
2393
|
+
for(j = 0; j < p->len; j++) {
|
2394
|
+
if (is_emoji_modifier(p->buf[j])) {
|
2395
|
+
assert(mod_count < 2);
|
2396
|
+
mod_pos[mod_count++] = j;
|
2397
|
+
} else if (is_emoji_hair_color(p->buf[j])) {
|
2398
|
+
hair_color_pos = j;
|
2399
|
+
}
|
2400
|
+
buf[j] = p->buf[j];
|
2401
|
+
}
|
2402
|
+
|
2403
|
+
if (mod_count != 0 || hair_color_pos >= 0) {
|
2404
|
+
int mod_type;
|
2405
|
+
if (mod_count == 0)
|
2406
|
+
mod_type = EMOJI_MOD_NONE;
|
2407
|
+
else if (mod_count == 1)
|
2408
|
+
mod_type = EMOJI_MOD_TYPE1;
|
2409
|
+
else
|
2410
|
+
mod_type = EMOJI_MOD_TYPE2;
|
2411
|
+
|
2412
|
+
if (mark_zwj_string(sl, buf, p->len, mod_type, mod_pos, hair_color_pos, FALSE)) {
|
2413
|
+
mark_zwj_string(sl, buf, p->len, mod_type, mod_pos, hair_color_pos, TRUE);
|
2414
|
+
} else if (mod_type == EMOJI_MOD_TYPE2) {
|
2415
|
+
mod_type = EMOJI_MOD_TYPE2D;
|
2416
|
+
if (mark_zwj_string(sl, buf, p->len, mod_type, mod_pos, hair_color_pos, FALSE)) {
|
2417
|
+
mark_zwj_string(sl, buf, p->len, mod_type, mod_pos, hair_color_pos, TRUE);
|
2418
|
+
} else {
|
2419
|
+
dump_str("not_found", (int *)p->buf, p->len);
|
2420
|
+
goto keep;
|
2421
|
+
}
|
2422
|
+
}
|
2423
|
+
if (hair_color_pos >= 0)
|
2424
|
+
buf[hair_color_pos] = 0x1f9b0;
|
2425
|
+
/* encode the string */
|
2426
|
+
zwj_encode_string(&dbuf, buf, p->len, mod_type, mod_pos, hair_color_pos);
|
2427
|
+
} else {
|
2428
|
+
keep:
|
2429
|
+
zwj_encode_string(&dbuf, buf, p->len, EMOJI_MOD_NONE, NULL, -1);
|
2430
|
+
}
|
2431
|
+
}
|
2432
|
+
}
|
2433
|
+
|
2434
|
+
/* Encode */
|
2435
|
+
dump_byte_table(f, "unicode_rgi_emoji_zwj_sequence", dbuf.buf, dbuf.size);
|
2436
|
+
|
2437
|
+
dbuf_free(&dbuf);
|
2438
|
+
}
|
2439
|
+
|
2440
|
+
void build_sequence_prop_list_table(FILE *f)
|
2441
|
+
{
|
2442
|
+
int i;
|
2443
|
+
fprintf(f, "typedef enum {\n");
|
2444
|
+
for(i = 0; i < SEQUENCE_PROP_COUNT; i++)
|
2445
|
+
fprintf(f, " UNICODE_SEQUENCE_PROP_%s,\n", unicode_sequence_prop_name[i]);
|
2446
|
+
fprintf(f, " UNICODE_SEQUENCE_PROP_COUNT,\n");
|
2447
|
+
fprintf(f, "} UnicodeSequencePropertyEnum;\n\n");
|
2448
|
+
|
2449
|
+
dump_name_table(f, "unicode_sequence_prop_name_table",
|
2450
|
+
unicode_sequence_prop_name, SEQUENCE_PROP_COUNT, NULL);
|
2451
|
+
|
2452
|
+
dump_byte_table(f, "unicode_rgi_emoji_tag_sequence", rgi_emoji_tag_sequence.buf, rgi_emoji_tag_sequence.size);
|
2453
|
+
|
2454
|
+
build_rgi_emoji_zwj_sequence(f, &rgi_emoji_zwj_sequence);
|
2455
|
+
}
|
2456
|
+
|
1933
2457
|
#ifdef USE_TEST
|
1934
2458
|
int check_conv(uint32_t *res, uint32_t c, int conv_type)
|
1935
2459
|
{
|
@@ -3156,6 +3680,8 @@ int main(int argc, char *argv[])
|
|
3156
3680
|
outfilename = argv[arg++];
|
3157
3681
|
|
3158
3682
|
unicode_db = mallocz(sizeof(unicode_db[0]) * (CHARCODE_MAX + 1));
|
3683
|
+
re_string_list_init(&rgi_emoji_zwj_sequence);
|
3684
|
+
dbuf_init(&rgi_emoji_tag_sequence);
|
3159
3685
|
|
3160
3686
|
snprintf(filename, sizeof(filename), "%s/UnicodeData.txt", unicode_db_path);
|
3161
3687
|
|
@@ -3190,6 +3716,14 @@ int main(int argc, char *argv[])
|
|
3190
3716
|
unicode_db_path);
|
3191
3717
|
parse_prop_list(filename);
|
3192
3718
|
|
3719
|
+
snprintf(filename, sizeof(filename), "%s/emoji-sequences.txt",
|
3720
|
+
unicode_db_path);
|
3721
|
+
parse_sequence_prop_list(filename);
|
3722
|
+
|
3723
|
+
snprintf(filename, sizeof(filename), "%s/emoji-zwj-sequences.txt",
|
3724
|
+
unicode_db_path);
|
3725
|
+
parse_sequence_prop_list(filename);
|
3726
|
+
|
3193
3727
|
// dump_unicode_data(unicode_db);
|
3194
3728
|
build_conv_table(unicode_db);
|
3195
3729
|
|
@@ -3234,10 +3768,12 @@ int main(int argc, char *argv[])
|
|
3234
3768
|
build_script_table(fo);
|
3235
3769
|
build_script_ext_table(fo);
|
3236
3770
|
build_prop_list_table(fo);
|
3771
|
+
build_sequence_prop_list_table(fo);
|
3237
3772
|
fprintf(fo, "#endif /* CONFIG_ALL_UNICODE */\n");
|
3238
3773
|
fprintf(fo, "/* %u tables / %u bytes, %u index / %u bytes */\n",
|
3239
3774
|
total_tables, total_table_bytes, total_index, total_index_bytes);
|
3240
3775
|
fclose(fo);
|
3241
3776
|
}
|
3777
|
+
re_string_list_free(&rgi_emoji_zwj_sequence);
|
3242
3778
|
return 0;
|
3243
3779
|
}
|
@@ -234,6 +234,11 @@ DEF(XID_Continue1, "")
|
|
234
234
|
DEF(Changes_When_Titlecased1, "")
|
235
235
|
DEF(Changes_When_Casefolded1, "")
|
236
236
|
DEF(Changes_When_NFKC_Casefolded1, "")
|
237
|
+
DEF(Basic_Emoji1, "")
|
238
|
+
DEF(Basic_Emoji2, "")
|
239
|
+
DEF(RGI_Emoji_Modifier_Sequence, "")
|
240
|
+
DEF(RGI_Emoji_Flag_Sequence, "")
|
241
|
+
DEF(Emoji_Keycap_Sequence, "")
|
237
242
|
|
238
243
|
/* Prop list exported to JS */
|
239
244
|
DEF(ASCII_Hex_Digit, "AHex")
|
@@ -301,3 +306,13 @@ DEF(XID_Start, "XIDS")
|
|
301
306
|
DEF(Cased1, "")
|
302
307
|
|
303
308
|
#endif
|
309
|
+
|
310
|
+
#ifdef UNICODE_SEQUENCE_PROP_LIST
|
311
|
+
DEF(Basic_Emoji)
|
312
|
+
DEF(Emoji_Keycap_Sequence)
|
313
|
+
DEF(RGI_Emoji_Modifier_Sequence)
|
314
|
+
DEF(RGI_Emoji_Flag_Sequence)
|
315
|
+
DEF(RGI_Emoji_Tag_Sequence)
|
316
|
+
DEF(RGI_Emoji_ZWJ_Sequence)
|
317
|
+
DEF(RGI_Emoji)
|
318
|
+
#endif
|
data/ext/quickjsrb/quickjsrb.c
CHANGED
@@ -514,7 +514,7 @@ static VALUE vm_m_initialize(int argc, VALUE *argv, VALUE r_self)
|
|
514
514
|
JS_SetMemoryLimit(runtime, NUM2UINT(r_memory_limit));
|
515
515
|
JS_SetMaxStackSize(runtime, NUM2UINT(r_max_stack_size));
|
516
516
|
|
517
|
-
|
517
|
+
JS_SetModuleLoaderFunc2(runtime, NULL, js_module_loader, js_module_check_attributes, NULL);
|
518
518
|
js_std_init_handlers(runtime);
|
519
519
|
|
520
520
|
JSValue j_global = JS_GetGlobalObject(data->context);
|
data/lib/quickjs/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: quickjs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hmsk
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-09-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|