quickjs 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -156,6 +156,153 @@ char *get_line(char *buf, int buf_size, FILE *f)
156
156
  return buf;
157
157
  }
158
158
 
159
+ typedef struct REString {
160
+ struct REString *next;
161
+ uint32_t hash;
162
+ uint32_t len;
163
+ uint32_t flags;
164
+ uint32_t buf[];
165
+ } REString;
166
+
167
+ typedef struct {
168
+ uint32_t n_strings;
169
+ uint32_t hash_size;
170
+ int hash_bits;
171
+ REString **hash_table;
172
+ } REStringList;
173
+
174
+ static uint32_t re_string_hash(int len, const uint32_t *buf)
175
+ {
176
+ int i;
177
+ uint32_t h;
178
+ h = 1;
179
+ for(i = 0; i < len; i++)
180
+ h = h * 263 + buf[i];
181
+ return h * 0x61C88647;
182
+ }
183
+
184
+ static void re_string_list_init(REStringList *s)
185
+ {
186
+ s->n_strings = 0;
187
+ s->hash_size = 0;
188
+ s->hash_bits = 0;
189
+ s->hash_table = NULL;
190
+ }
191
+
192
+ static __maybe_unused void re_string_list_free(REStringList *s)
193
+ {
194
+ REString *p, *p_next;
195
+ int i;
196
+ for(i = 0; i < s->hash_size; i++) {
197
+ for(p = s->hash_table[i]; p != NULL; p = p_next) {
198
+ p_next = p->next;
199
+ free(p);
200
+ }
201
+ }
202
+ free(s->hash_table);
203
+ }
204
+
205
+ static void lre_print_char(int c, BOOL is_range)
206
+ {
207
+ if (c == '\'' || c == '\\' ||
208
+ (is_range && (c == '-' || c == ']'))) {
209
+ printf("\\%c", c);
210
+ } else if (c >= ' ' && c <= 126) {
211
+ printf("%c", c);
212
+ } else {
213
+ printf("\\u{%04x}", c);
214
+ }
215
+ }
216
+
217
+ static __maybe_unused void re_string_list_dump(const char *str, const REStringList *s)
218
+ {
219
+ REString *p;
220
+ int i, j, k;
221
+
222
+ printf("%s:\n", str);
223
+
224
+ j = 0;
225
+ for(i = 0; i < s->hash_size; i++) {
226
+ for(p = s->hash_table[i]; p != NULL; p = p->next) {
227
+ printf(" %d/%d: '", j, s->n_strings);
228
+ for(k = 0; k < p->len; k++) {
229
+ lre_print_char(p->buf[k], FALSE);
230
+ }
231
+ printf("'\n");
232
+ j++;
233
+ }
234
+ }
235
+ }
236
+
237
+ static REString *re_string_find2(REStringList *s, int len, const uint32_t *buf,
238
+ uint32_t h0, BOOL add_flag)
239
+ {
240
+ uint32_t h = 0; /* avoid warning */
241
+ REString *p;
242
+ if (s->n_strings != 0) {
243
+ h = h0 >> (32 - s->hash_bits);
244
+ for(p = s->hash_table[h]; p != NULL; p = p->next) {
245
+ if (p->hash == h0 && p->len == len &&
246
+ !memcmp(p->buf, buf, len * sizeof(buf[0]))) {
247
+ return p;
248
+ }
249
+ }
250
+ }
251
+ /* not found */
252
+ if (!add_flag)
253
+ return NULL;
254
+ /* increase the size of the hash table if needed */
255
+ if (unlikely((s->n_strings + 1) > s->hash_size)) {
256
+ REString **new_hash_table, *p_next;
257
+ int new_hash_bits, i;
258
+ uint32_t new_hash_size;
259
+ new_hash_bits = max_int(s->hash_bits + 1, 4);
260
+ new_hash_size = 1 << new_hash_bits;
261
+ new_hash_table = malloc(sizeof(new_hash_table[0]) * new_hash_size);
262
+ if (!new_hash_table)
263
+ return NULL;
264
+ memset(new_hash_table, 0, sizeof(new_hash_table[0]) * new_hash_size);
265
+ for(i = 0; i < s->hash_size; i++) {
266
+ for(p = s->hash_table[i]; p != NULL; p = p_next) {
267
+ p_next = p->next;
268
+ h = p->hash >> (32 - new_hash_bits);
269
+ p->next = new_hash_table[h];
270
+ new_hash_table[h] = p;
271
+ }
272
+ }
273
+ free(s->hash_table);
274
+ s->hash_bits = new_hash_bits;
275
+ s->hash_size = new_hash_size;
276
+ s->hash_table = new_hash_table;
277
+ h = h0 >> (32 - s->hash_bits);
278
+ }
279
+
280
+ p = malloc(sizeof(REString) + len * sizeof(buf[0]));
281
+ if (!p)
282
+ return NULL;
283
+ p->next = s->hash_table[h];
284
+ s->hash_table[h] = p;
285
+ s->n_strings++;
286
+ p->hash = h0;
287
+ p->len = len;
288
+ p->flags = 0;
289
+ memcpy(p->buf, buf, sizeof(buf[0]) * len);
290
+ return p;
291
+ }
292
+
293
+ static REString *re_string_find(REStringList *s, int len, const uint32_t *buf,
294
+ BOOL add_flag)
295
+ {
296
+ uint32_t h0;
297
+ h0 = re_string_hash(len, buf);
298
+ return re_string_find2(s, len, buf, h0, add_flag);
299
+ }
300
+
301
+ static void re_string_add(REStringList *s, int len, const uint32_t *buf)
302
+ {
303
+ re_string_find(s, len, buf, TRUE);
304
+ }
305
+
159
306
  #define UNICODE_GENERAL_CATEGORY
160
307
 
161
308
  typedef enum {
@@ -225,6 +372,23 @@ static const char *unicode_prop_short_name[] = {
225
372
 
226
373
  #undef UNICODE_PROP_LIST
227
374
 
375
+ #define UNICODE_SEQUENCE_PROP_LIST
376
+
377
+ typedef enum {
378
+ #define DEF(id) SEQUENCE_PROP_ ## id,
379
+ #include "unicode_gen_def.h"
380
+ #undef DEF
381
+ SEQUENCE_PROP_COUNT,
382
+ } UnicodeSequencePropEnum1;
383
+
384
+ static const char *unicode_sequence_prop_name[] = {
385
+ #define DEF(id) #id,
386
+ #include "unicode_gen_def.h"
387
+ #undef DEF
388
+ };
389
+
390
+ #undef UNICODE_SEQUENCE_PROP_LIST
391
+
228
392
  typedef struct {
229
393
  /* case conv */
230
394
  uint8_t u_len;
@@ -247,7 +411,15 @@ typedef struct {
247
411
  int *decomp_data;
248
412
  } CCInfo;
249
413
 
414
+ typedef struct {
415
+ int count;
416
+ int size;
417
+ int *tab;
418
+ } UnicodeSequenceProperties;
419
+
250
420
  CCInfo *unicode_db;
421
+ REStringList rgi_emoji_zwj_sequence;
422
+ DynBuf rgi_emoji_tag_sequence;
251
423
 
252
424
  int find_name(const char **tab, int tab_len, const char *name)
253
425
  {
@@ -625,7 +797,7 @@ void parse_derived_core_properties(const char *filename)
625
797
  p++;
626
798
  p += strspn(p, " \t");
627
799
  q = buf;
628
- while (*p != '\0' && *p != ' ' && *p != '#' && *p != '\t') {
800
+ while (*p != '\0' && *p != ' ' && *p != '#' && *p != '\t' && *p != ';') {
629
801
  if ((q - buf) < sizeof(buf) - 1)
630
802
  *q++ = *p;
631
803
  p++;
@@ -751,6 +923,147 @@ void parse_prop_list(const char *filename)
751
923
  fclose(f);
752
924
  }
753
925
 
926
+ #define SEQ_MAX_LEN 16
927
+
928
+ static BOOL is_emoji_modifier(uint32_t c)
929
+ {
930
+ return (c >= 0x1f3fb && c <= 0x1f3ff);
931
+ }
932
+
933
+ static void add_sequence_prop(int idx, int seq_len, int *seq)
934
+ {
935
+ int i;
936
+
937
+ assert(idx < SEQUENCE_PROP_COUNT);
938
+ switch(idx) {
939
+ case SEQUENCE_PROP_Basic_Emoji:
940
+ /* convert to 2 properties lists */
941
+ if (seq_len == 1) {
942
+ set_prop(seq[0], PROP_Basic_Emoji1, 1);
943
+ } else if (seq_len == 2 && seq[1] == 0xfe0f) {
944
+ set_prop(seq[0], PROP_Basic_Emoji2, 1);
945
+ } else {
946
+ abort();
947
+ }
948
+ break;
949
+ case SEQUENCE_PROP_RGI_Emoji_Modifier_Sequence:
950
+ assert(seq_len == 2);
951
+ assert(is_emoji_modifier(seq[1]));
952
+ assert(get_prop(seq[0], PROP_Emoji_Modifier_Base));
953
+ set_prop(seq[0], PROP_RGI_Emoji_Modifier_Sequence, 1);
954
+ break;
955
+ case SEQUENCE_PROP_RGI_Emoji_Flag_Sequence:
956
+ {
957
+ int code;
958
+ assert(seq_len == 2);
959
+ assert(seq[0] >= 0x1F1E6 && seq[0] <= 0x1F1FF);
960
+ assert(seq[1] >= 0x1F1E6 && seq[1] <= 0x1F1FF);
961
+ code = (seq[0] - 0x1F1E6) * 26 + (seq[1] - 0x1F1E6);
962
+ /* XXX: would be more compact with a simple bitmap -> 676 bits */
963
+ set_prop(code, PROP_RGI_Emoji_Flag_Sequence, 1);
964
+ }
965
+ break;
966
+ case SEQUENCE_PROP_RGI_Emoji_ZWJ_Sequence:
967
+ re_string_add(&rgi_emoji_zwj_sequence, seq_len, (uint32_t *)seq);
968
+ break;
969
+ case SEQUENCE_PROP_RGI_Emoji_Tag_Sequence:
970
+ {
971
+ assert(seq_len >= 3);
972
+ assert(seq[0] == 0x1F3F4);
973
+ assert(seq[seq_len - 1] == 0xE007F);
974
+ for(i = 1; i < seq_len - 1; i++) {
975
+ assert(seq[i] >= 0xe0001 && seq[i] <= 0xe007e);
976
+ dbuf_putc(&rgi_emoji_tag_sequence, seq[i] - 0xe0000);
977
+ }
978
+ dbuf_putc(&rgi_emoji_tag_sequence, 0);
979
+ }
980
+ break;
981
+ case SEQUENCE_PROP_Emoji_Keycap_Sequence:
982
+ assert(seq_len == 3);
983
+ assert(seq[1] == 0xfe0f);
984
+ assert(seq[2] == 0x20e3);
985
+ set_prop(seq[0], PROP_Emoji_Keycap_Sequence, 1);
986
+ break;
987
+ default:
988
+ assert(0);
989
+ }
990
+ }
991
+
992
+ void parse_sequence_prop_list(const char *filename)
993
+ {
994
+ FILE *f;
995
+ char line[4096], *p, buf[256], *q, *p_start;
996
+ uint32_t c0, c1, c;
997
+ int idx, seq_len;
998
+ int seq[SEQ_MAX_LEN];
999
+
1000
+ f = fopen(filename, "rb");
1001
+ if (!f) {
1002
+ perror(filename);
1003
+ exit(1);
1004
+ }
1005
+
1006
+ for(;;) {
1007
+ if (!get_line(line, sizeof(line), f))
1008
+ break;
1009
+ p = line;
1010
+ while (isspace(*p))
1011
+ p++;
1012
+ if (*p == '#' || *p == '@' || *p == '\0')
1013
+ continue;
1014
+ p_start = p;
1015
+
1016
+ /* find the sequence property name */
1017
+ p = strchr(p, ';');
1018
+ if (!p)
1019
+ continue;
1020
+ p++;
1021
+ p += strspn(p, " \t");
1022
+ q = buf;
1023
+ while (*p != '\0' && *p != ' ' && *p != '#' && *p != '\t' && *p != ';') {
1024
+ if ((q - buf) < sizeof(buf) - 1)
1025
+ *q++ = *p;
1026
+ p++;
1027
+ }
1028
+ *q = '\0';
1029
+ idx = find_name(unicode_sequence_prop_name,
1030
+ countof(unicode_sequence_prop_name), buf);
1031
+ if (idx < 0) {
1032
+ fprintf(stderr, "Property not found: %s\n", buf);
1033
+ exit(1);
1034
+ }
1035
+
1036
+ p = p_start;
1037
+ c0 = strtoul(p, (char **)&p, 16);
1038
+ assert(c0 <= CHARCODE_MAX);
1039
+
1040
+ if (*p == '.' && p[1] == '.') {
1041
+ p += 2;
1042
+ c1 = strtoul(p, (char **)&p, 16);
1043
+ assert(c1 <= CHARCODE_MAX);
1044
+ for(c = c0; c <= c1; c++) {
1045
+ seq[0] = c;
1046
+ add_sequence_prop(idx, 1, seq);
1047
+ }
1048
+ } else {
1049
+ seq_len = 0;
1050
+ seq[seq_len++] = c0;
1051
+ for(;;) {
1052
+ while (isspace(*p))
1053
+ p++;
1054
+ if (*p == ';' || *p == '\0')
1055
+ break;
1056
+ c0 = strtoul(p, (char **)&p, 16);
1057
+ assert(c0 <= CHARCODE_MAX);
1058
+ assert(seq_len < countof(seq));
1059
+ seq[seq_len++] = c0;
1060
+ }
1061
+ add_sequence_prop(idx, seq_len, seq);
1062
+ }
1063
+ }
1064
+ fclose(f);
1065
+ }
1066
+
754
1067
  void parse_scripts(const char *filename)
755
1068
  {
756
1069
  FILE *f;
@@ -1117,6 +1430,24 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
1117
1430
  te->ext_data[1] = ci->u_data[1];
1118
1431
  te->ext_data[2] = ci->u_data[2];
1119
1432
  te->ext_len = 3;
1433
+ } else if (ci->u_len == 2 && ci->l_len == 0 && ci->f_len == 1) {
1434
+ // U+FB05 LATIN SMALL LIGATURE LONG S T
1435
+ assert(code == 0xFB05);
1436
+ te->len = 1;
1437
+ te->type = RUN_TYPE_UF_EXT2;
1438
+ te->ext_data[0] = ci->u_data[0];
1439
+ te->ext_data[1] = ci->u_data[1];
1440
+ te->ext_len = 2;
1441
+ } else if (ci->u_len == 3 && ci->l_len == 0 && ci->f_len == 1) {
1442
+ // U+1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA or
1443
+ // U+1FE3 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
1444
+ assert(code == 0x1FD3 || code == 0x1FE3);
1445
+ te->len = 1;
1446
+ te->type = RUN_TYPE_UF_EXT3;
1447
+ te->ext_data[0] = ci->u_data[0];
1448
+ te->ext_data[1] = ci->u_data[1];
1449
+ te->ext_data[2] = ci->u_data[2];
1450
+ te->ext_len = 3;
1120
1451
  } else {
1121
1452
  printf("unsupported encoding case:\n");
1122
1453
  dump_cc_info(ci, code);
@@ -1636,7 +1967,7 @@ void dump_name_table(FILE *f, const char *cname, const char **tab_name, int len,
1636
1967
  maxw = 0;
1637
1968
  for(i = 0; i < len; i++) {
1638
1969
  w = strlen(tab_name[i]);
1639
- if (tab_short_name[i][0] != '\0') {
1970
+ if (tab_short_name && tab_short_name[i][0] != '\0') {
1640
1971
  w += 1 + strlen(tab_short_name[i]);
1641
1972
  }
1642
1973
  if (maxw < w)
@@ -1648,7 +1979,7 @@ void dump_name_table(FILE *f, const char *cname, const char **tab_name, int len,
1648
1979
  for(i = 0; i < len; i++) {
1649
1980
  fprintf(f, " \"");
1650
1981
  w = fprintf(f, "%s", tab_name[i]);
1651
- if (tab_short_name[i][0] != '\0') {
1982
+ if (tab_short_name && tab_short_name[i][0] != '\0') {
1652
1983
  w += fprintf(f, ",%s", tab_short_name[i]);
1653
1984
  }
1654
1985
  fprintf(f, "\"%*s\"\\0\"\n", 1 + maxw - w, "");
@@ -1756,10 +2087,9 @@ void build_script_table(FILE *f)
1756
2087
  fprintf(f, " UNICODE_SCRIPT_COUNT,\n");
1757
2088
  fprintf(f, "} UnicodeScriptEnum;\n\n");
1758
2089
 
1759
- i = 1;
1760
2090
  dump_name_table(f, "unicode_script_name_table",
1761
- unicode_script_name + i, SCRIPT_COUNT - i,
1762
- unicode_script_short_name + i);
2091
+ unicode_script_name, SCRIPT_COUNT,
2092
+ unicode_script_short_name);
1763
2093
 
1764
2094
  dbuf_init(dbuf);
1765
2095
  #ifdef DUMP_TABLE_SIZE
@@ -1912,6 +2242,218 @@ void build_prop_list_table(FILE *f)
1912
2242
  fprintf(f, "};\n\n");
1913
2243
  }
1914
2244
 
2245
+ static BOOL is_emoji_hair_color(uint32_t c)
2246
+ {
2247
+ return (c >= 0x1F9B0 && c <= 0x1F9B3);
2248
+ }
2249
+
2250
+ #define EMOJI_MOD_NONE 0
2251
+ #define EMOJI_MOD_TYPE1 1
2252
+ #define EMOJI_MOD_TYPE2 2
2253
+ #define EMOJI_MOD_TYPE2D 3
2254
+
2255
+ static BOOL mark_zwj_string(REStringList *sl, uint32_t *buf, int len, int mod_type, int *mod_pos,
2256
+ int hc_pos, BOOL mark_flag)
2257
+ {
2258
+ REString *p;
2259
+ int i, n_mod, i0, i1, hc_count, j;
2260
+
2261
+ #if 0
2262
+ if (mark_flag)
2263
+ printf("mod_type=%d\n", mod_type);
2264
+ #endif
2265
+
2266
+ switch(mod_type) {
2267
+ case EMOJI_MOD_NONE:
2268
+ n_mod = 1;
2269
+ break;
2270
+ case EMOJI_MOD_TYPE1:
2271
+ n_mod = 5;
2272
+ break;
2273
+ case EMOJI_MOD_TYPE2:
2274
+ n_mod = 25;
2275
+ break;
2276
+ case EMOJI_MOD_TYPE2D:
2277
+ n_mod = 20;
2278
+ break;
2279
+ default:
2280
+ assert(0);
2281
+ }
2282
+ if (hc_pos >= 0)
2283
+ hc_count = 4;
2284
+ else
2285
+ hc_count = 1;
2286
+ /* check that all the related strings are present */
2287
+ for(j = 0; j < hc_count; j++) {
2288
+ for(i = 0; i < n_mod; i++) {
2289
+ switch(mod_type) {
2290
+ case EMOJI_MOD_NONE:
2291
+ break;
2292
+ case EMOJI_MOD_TYPE1:
2293
+ buf[mod_pos[0]] = 0x1f3fb + i;
2294
+ break;
2295
+ case EMOJI_MOD_TYPE2:
2296
+ case EMOJI_MOD_TYPE2D:
2297
+ i0 = i / 5;
2298
+ i1 = i % 5;
2299
+ /* avoid identical values */
2300
+ if (mod_type == EMOJI_MOD_TYPE2D && i0 >= i1)
2301
+ i0++;
2302
+ buf[mod_pos[0]] = 0x1f3fb + i0;
2303
+ buf[mod_pos[1]] = 0x1f3fb + i1;
2304
+ break;
2305
+ default:
2306
+ assert(0);
2307
+ }
2308
+
2309
+ if (hc_pos >= 0)
2310
+ buf[hc_pos] = 0x1F9B0 + j;
2311
+
2312
+ p = re_string_find(sl, len, buf, FALSE);
2313
+ if (!p)
2314
+ return FALSE;
2315
+ if (mark_flag)
2316
+ p->flags |= 1;
2317
+ }
2318
+ }
2319
+ return TRUE;
2320
+ }
2321
+
2322
+ static void zwj_encode_string(DynBuf *dbuf, const uint32_t *buf, int len, int mod_type, int *mod_pos,
2323
+ int hc_pos)
2324
+ {
2325
+ int i, j;
2326
+ int c, code;
2327
+ uint32_t buf1[SEQ_MAX_LEN];
2328
+
2329
+ j = 0;
2330
+ for(i = 0; i < len;) {
2331
+ c = buf[i++];
2332
+ if (c >= 0x2000 && c <= 0x2fff) {
2333
+ code = c - 0x2000;
2334
+ } else if (c >= 0x1f000 && c <= 0x1ffff) {
2335
+ code = c - 0x1f000 + 0x1000;
2336
+ } else {
2337
+ assert(0);
2338
+ }
2339
+ if (i < len && is_emoji_modifier(buf[i])) {
2340
+ /* modifier */
2341
+ code |= (mod_type << 13);
2342
+ i++;
2343
+ }
2344
+ if (i < len && buf[i] == 0xfe0f) {
2345
+ /* presentation selector present */
2346
+ code |= 0x8000;
2347
+ i++;
2348
+ }
2349
+ if (i < len) {
2350
+ /* zero width join */
2351
+ assert(buf[i] == 0x200d);
2352
+ i++;
2353
+ }
2354
+ buf1[j++] = code;
2355
+ }
2356
+ dbuf_putc(dbuf, j);
2357
+ for(i = 0; i < j; i++) {
2358
+ dbuf_putc(dbuf, buf1[i]);
2359
+ dbuf_putc(dbuf, buf1[i] >> 8);
2360
+ }
2361
+ }
2362
+
2363
+ static void build_rgi_emoji_zwj_sequence(FILE *f, REStringList *sl)
2364
+ {
2365
+ int mod_pos[2], mod_count, hair_color_pos, j, h;
2366
+ REString *p;
2367
+ uint32_t buf[SEQ_MAX_LEN];
2368
+ DynBuf dbuf;
2369
+
2370
+ #if 0
2371
+ {
2372
+ for(h = 0; h < sl->hash_size; h++) {
2373
+ for(p = sl->hash_table[h]; p != NULL; p = p->next) {
2374
+ for(j = 0; j < p->len; j++)
2375
+ printf(" %04x", p->buf[j]);
2376
+ printf("\n");
2377
+ }
2378
+ }
2379
+ exit(0);
2380
+ }
2381
+ #endif
2382
+ // printf("rgi_emoji_zwj_sequence: n=%d\n", sl->n_strings);
2383
+
2384
+ dbuf_init(&dbuf);
2385
+
2386
+ /* avoid duplicating strings with emoji modifiers or hair colors */
2387
+ for(h = 0; h < sl->hash_size; h++) {
2388
+ for(p = sl->hash_table[h]; p != NULL; p = p->next) {
2389
+ if (p->flags) /* already examined */
2390
+ continue;
2391
+ mod_count = 0;
2392
+ hair_color_pos = -1;
2393
+ for(j = 0; j < p->len; j++) {
2394
+ if (is_emoji_modifier(p->buf[j])) {
2395
+ assert(mod_count < 2);
2396
+ mod_pos[mod_count++] = j;
2397
+ } else if (is_emoji_hair_color(p->buf[j])) {
2398
+ hair_color_pos = j;
2399
+ }
2400
+ buf[j] = p->buf[j];
2401
+ }
2402
+
2403
+ if (mod_count != 0 || hair_color_pos >= 0) {
2404
+ int mod_type;
2405
+ if (mod_count == 0)
2406
+ mod_type = EMOJI_MOD_NONE;
2407
+ else if (mod_count == 1)
2408
+ mod_type = EMOJI_MOD_TYPE1;
2409
+ else
2410
+ mod_type = EMOJI_MOD_TYPE2;
2411
+
2412
+ if (mark_zwj_string(sl, buf, p->len, mod_type, mod_pos, hair_color_pos, FALSE)) {
2413
+ mark_zwj_string(sl, buf, p->len, mod_type, mod_pos, hair_color_pos, TRUE);
2414
+ } else if (mod_type == EMOJI_MOD_TYPE2) {
2415
+ mod_type = EMOJI_MOD_TYPE2D;
2416
+ if (mark_zwj_string(sl, buf, p->len, mod_type, mod_pos, hair_color_pos, FALSE)) {
2417
+ mark_zwj_string(sl, buf, p->len, mod_type, mod_pos, hair_color_pos, TRUE);
2418
+ } else {
2419
+ dump_str("not_found", (int *)p->buf, p->len);
2420
+ goto keep;
2421
+ }
2422
+ }
2423
+ if (hair_color_pos >= 0)
2424
+ buf[hair_color_pos] = 0x1f9b0;
2425
+ /* encode the string */
2426
+ zwj_encode_string(&dbuf, buf, p->len, mod_type, mod_pos, hair_color_pos);
2427
+ } else {
2428
+ keep:
2429
+ zwj_encode_string(&dbuf, buf, p->len, EMOJI_MOD_NONE, NULL, -1);
2430
+ }
2431
+ }
2432
+ }
2433
+
2434
+ /* Encode */
2435
+ dump_byte_table(f, "unicode_rgi_emoji_zwj_sequence", dbuf.buf, dbuf.size);
2436
+
2437
+ dbuf_free(&dbuf);
2438
+ }
2439
+
2440
+ void build_sequence_prop_list_table(FILE *f)
2441
+ {
2442
+ int i;
2443
+ fprintf(f, "typedef enum {\n");
2444
+ for(i = 0; i < SEQUENCE_PROP_COUNT; i++)
2445
+ fprintf(f, " UNICODE_SEQUENCE_PROP_%s,\n", unicode_sequence_prop_name[i]);
2446
+ fprintf(f, " UNICODE_SEQUENCE_PROP_COUNT,\n");
2447
+ fprintf(f, "} UnicodeSequencePropertyEnum;\n\n");
2448
+
2449
+ dump_name_table(f, "unicode_sequence_prop_name_table",
2450
+ unicode_sequence_prop_name, SEQUENCE_PROP_COUNT, NULL);
2451
+
2452
+ dump_byte_table(f, "unicode_rgi_emoji_tag_sequence", rgi_emoji_tag_sequence.buf, rgi_emoji_tag_sequence.size);
2453
+
2454
+ build_rgi_emoji_zwj_sequence(f, &rgi_emoji_zwj_sequence);
2455
+ }
2456
+
1915
2457
  #ifdef USE_TEST
1916
2458
  int check_conv(uint32_t *res, uint32_t c, int conv_type)
1917
2459
  {
@@ -3138,6 +3680,8 @@ int main(int argc, char *argv[])
3138
3680
  outfilename = argv[arg++];
3139
3681
 
3140
3682
  unicode_db = mallocz(sizeof(unicode_db[0]) * (CHARCODE_MAX + 1));
3683
+ re_string_list_init(&rgi_emoji_zwj_sequence);
3684
+ dbuf_init(&rgi_emoji_tag_sequence);
3141
3685
 
3142
3686
  snprintf(filename, sizeof(filename), "%s/UnicodeData.txt", unicode_db_path);
3143
3687
 
@@ -3172,6 +3716,14 @@ int main(int argc, char *argv[])
3172
3716
  unicode_db_path);
3173
3717
  parse_prop_list(filename);
3174
3718
 
3719
+ snprintf(filename, sizeof(filename), "%s/emoji-sequences.txt",
3720
+ unicode_db_path);
3721
+ parse_sequence_prop_list(filename);
3722
+
3723
+ snprintf(filename, sizeof(filename), "%s/emoji-zwj-sequences.txt",
3724
+ unicode_db_path);
3725
+ parse_sequence_prop_list(filename);
3726
+
3175
3727
  // dump_unicode_data(unicode_db);
3176
3728
  build_conv_table(unicode_db);
3177
3729
 
@@ -3216,10 +3768,12 @@ int main(int argc, char *argv[])
3216
3768
  build_script_table(fo);
3217
3769
  build_script_ext_table(fo);
3218
3770
  build_prop_list_table(fo);
3771
+ build_sequence_prop_list_table(fo);
3219
3772
  fprintf(fo, "#endif /* CONFIG_ALL_UNICODE */\n");
3220
3773
  fprintf(fo, "/* %u tables / %u bytes, %u index / %u bytes */\n",
3221
3774
  total_tables, total_table_bytes, total_index, total_index_bytes);
3222
3775
  fclose(fo);
3223
3776
  }
3777
+ re_string_list_free(&rgi_emoji_zwj_sequence);
3224
3778
  return 0;
3225
3779
  }