quickjs 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,83 @@
1
+ /*
2
+ * Tiny float64 printing and parsing library
3
+ *
4
+ * Copyright (c) 2024 Fabrice Bellard
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ * of this software and associated documentation files (the "Software"), to deal
8
+ * in the Software without restriction, including without limitation the rights
9
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ * copies of the Software, and to permit persons to whom the Software is
11
+ * furnished to do so, subject to the following conditions:
12
+ *
13
+ * The above copyright notice and this permission notice shall be included in
14
+ * all copies or substantial portions of the Software.
15
+ *
16
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ * THE SOFTWARE.
23
+ */
24
+
25
+ //#define JS_DTOA_DUMP_STATS
26
+
27
+ /* maximum number of digits for fixed and frac formats */
28
+ #define JS_DTOA_MAX_DIGITS 101
29
+
30
+ /* radix != 10 is only supported with flags = JS_DTOA_FORMAT_FREE */
31
+ /* use as many digits as necessary */
32
+ #define JS_DTOA_FORMAT_FREE (0 << 0)
33
+ /* use n_digits significant digits (1 <= n_digits <= JS_DTOA_MAX_DIGITS) */
34
+ #define JS_DTOA_FORMAT_FIXED (1 << 0)
35
+ /* force fractional format: [-]dd.dd with n_digits fractional digits.
36
+ 0 <= n_digits <= JS_DTOA_MAX_DIGITS */
37
+ #define JS_DTOA_FORMAT_FRAC (2 << 0)
38
+ #define JS_DTOA_FORMAT_MASK (3 << 0)
39
+
40
+ /* select exponential notation either in fixed or free format */
41
+ #define JS_DTOA_EXP_AUTO (0 << 2)
42
+ #define JS_DTOA_EXP_ENABLED (1 << 2)
43
+ #define JS_DTOA_EXP_DISABLED (2 << 2)
44
+ #define JS_DTOA_EXP_MASK (3 << 2)
45
+
46
+ #define JS_DTOA_MINUS_ZERO (1 << 4) /* show the minus sign for -0 */
47
+
48
+ /* only accepts integers (no dot, no exponent) */
49
+ #define JS_ATOD_INT_ONLY (1 << 0)
50
+ /* accept Oo and Ob prefixes in addition to 0x prefix if radix = 0 */
51
+ #define JS_ATOD_ACCEPT_BIN_OCT (1 << 1)
52
+ /* accept O prefix as octal if radix == 0 and properly formed (Annex B) */
53
+ #define JS_ATOD_ACCEPT_LEGACY_OCTAL (1 << 2)
54
+ /* accept _ between digits as a digit separator */
55
+ #define JS_ATOD_ACCEPT_UNDERSCORES (1 << 3)
56
+
57
+ typedef struct {
58
+ uint64_t mem[37];
59
+ } JSDTOATempMem;
60
+
61
+ typedef struct {
62
+ uint64_t mem[27];
63
+ } JSATODTempMem;
64
+
65
+ /* return a maximum bound of the string length */
66
+ int js_dtoa_max_len(double d, int radix, int n_digits, int flags);
67
+ /* return the string length */
68
+ int js_dtoa(char *buf, double d, int radix, int n_digits, int flags,
69
+ JSDTOATempMem *tmp_mem);
70
+ double js_atod(const char *str, const char **pnext, int radix, int flags,
71
+ JSATODTempMem *tmp_mem);
72
+
73
+ #ifdef JS_DTOA_DUMP_STATS
74
+ void js_dtoa_dump_stats(void);
75
+ #endif
76
+
77
+ /* additional exported functions */
78
+ size_t u32toa(char *buf, uint32_t n);
79
+ size_t i32toa(char *buf, int32_t n);
80
+ size_t u64toa(char *buf, uint64_t n);
81
+ size_t i64toa(char *buf, int64_t n);
82
+ size_t u64toa_radix(char *buf, uint64_t n, unsigned int radix);
83
+ size_t i64toa_radix(char *buf, int64_t n, unsigned int radix);
@@ -54,6 +54,9 @@ typedef enum {
54
54
 
55
55
  #define CAPTURE_COUNT_MAX 255
56
56
  #define STACK_SIZE_MAX 255
57
+ /* must be large enough to have a negligible runtime cost and small
58
+ enough to call the interrupt callback often. */
59
+ #define INTERRUPT_COUNTER_INIT 10000
57
60
 
58
61
  /* unicode code points */
59
62
  #define CP_LS 0x2028
@@ -683,6 +686,10 @@ static int get_class_atom(REParseState *s, CharRange *cr,
683
686
  c = '\\';
684
687
  }
685
688
  break;
689
+ case '-':
690
+ if (!inclass && s->is_unicode)
691
+ goto invalid_escape;
692
+ break;
686
693
  #ifdef CONFIG_ALL_UNICODE
687
694
  case 'p':
688
695
  case 'P':
@@ -1931,6 +1938,7 @@ typedef struct {
1931
1938
  BOOL multi_line;
1932
1939
  BOOL ignore_case;
1933
1940
  BOOL is_unicode;
1941
+ int interrupt_counter;
1934
1942
  void *opaque; /* used for stack overflow check */
1935
1943
 
1936
1944
  size_t state_size;
@@ -1977,7 +1985,17 @@ static int push_state(REExecContext *s,
1977
1985
  return 0;
1978
1986
  }
1979
1987
 
1980
- /* return 1 if match, 0 if not match or -1 if error. */
1988
+ static int lre_poll_timeout(REExecContext *s)
1989
+ {
1990
+ if (unlikely(--s->interrupt_counter <= 0)) {
1991
+ s->interrupt_counter = INTERRUPT_COUNTER_INIT;
1992
+ if (lre_check_timeout(s->opaque))
1993
+ return LRE_RET_TIMEOUT;
1994
+ }
1995
+ return 0;
1996
+ }
1997
+
1998
+ /* return 1 if match, 0 if not match or < 0 if error. */
1981
1999
  static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
1982
2000
  StackInt *stack, int stack_len,
1983
2001
  const uint8_t *pc, const uint8_t *cptr,
@@ -2008,6 +2026,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
2008
2026
  ret = 0;
2009
2027
  recurse:
2010
2028
  for(;;) {
2029
+ if (lre_poll_timeout(s))
2030
+ return LRE_RET_TIMEOUT;
2011
2031
  if (s->state_stack_len == 0)
2012
2032
  return ret;
2013
2033
  rs = (REExecState *)(s->state_stack +
@@ -2097,7 +2117,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
2097
2117
  ret = push_state(s, capture, stack, stack_len,
2098
2118
  pc1, cptr, RE_EXEC_STATE_SPLIT, 0);
2099
2119
  if (ret < 0)
2100
- return -1;
2120
+ return LRE_RET_MEMORY_ERROR;
2101
2121
  break;
2102
2122
  }
2103
2123
  case REOP_lookahead:
@@ -2109,12 +2129,14 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
2109
2129
  RE_EXEC_STATE_LOOKAHEAD + opcode - REOP_lookahead,
2110
2130
  0);
2111
2131
  if (ret < 0)
2112
- return -1;
2132
+ return LRE_RET_MEMORY_ERROR;
2113
2133
  break;
2114
2134
 
2115
2135
  case REOP_goto:
2116
2136
  val = get_u32(pc);
2117
2137
  pc += 4 + (int)val;
2138
+ if (lre_poll_timeout(s))
2139
+ return LRE_RET_TIMEOUT;
2118
2140
  break;
2119
2141
  case REOP_line_start:
2120
2142
  if (cptr == s->cbuf)
@@ -2179,6 +2201,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
2179
2201
  pc += 4;
2180
2202
  if (--stack[stack_len - 1] != 0) {
2181
2203
  pc += (int)val;
2204
+ if (lre_poll_timeout(s))
2205
+ return LRE_RET_TIMEOUT;
2182
2206
  }
2183
2207
  break;
2184
2208
  case REOP_push_char_pos:
@@ -2353,9 +2377,12 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
2353
2377
 
2354
2378
  q = 0;
2355
2379
  for(;;) {
2380
+ if (lre_poll_timeout(s))
2381
+ return LRE_RET_TIMEOUT;
2356
2382
  res = lre_exec_backtrack(s, capture, stack, stack_len,
2357
2383
  pc1, cptr, TRUE);
2358
- if (res == -1)
2384
+ if (res == LRE_RET_MEMORY_ERROR ||
2385
+ res == LRE_RET_TIMEOUT)
2359
2386
  return res;
2360
2387
  if (!res)
2361
2388
  break;
@@ -2373,7 +2400,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
2373
2400
  RE_EXEC_STATE_GREEDY_QUANT,
2374
2401
  q - quant_min);
2375
2402
  if (ret < 0)
2376
- return -1;
2403
+ return LRE_RET_MEMORY_ERROR;
2377
2404
  }
2378
2405
  }
2379
2406
  break;
@@ -2383,7 +2410,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
2383
2410
  }
2384
2411
  }
2385
2412
 
2386
- /* Return 1 if match, 0 if not match or -1 if error. cindex is the
2413
+ /* Return 1 if match, 0 if not match or < 0 if error (see LRE_RET_x). cindex is the
2387
2414
  starting position of the match and must be such as 0 <= cindex <=
2388
2415
  clen. */
2389
2416
  int lre_exec(uint8_t **capture,
@@ -2405,6 +2432,7 @@ int lre_exec(uint8_t **capture,
2405
2432
  s->cbuf_type = cbuf_type;
2406
2433
  if (s->cbuf_type == 1 && s->is_unicode)
2407
2434
  s->cbuf_type = 2;
2435
+ s->interrupt_counter = INTERRUPT_COUNTER_INIT;
2408
2436
  s->opaque = opaque;
2409
2437
 
2410
2438
  s->state_size = sizeof(REExecState) +
@@ -36,6 +36,9 @@
36
36
  #define LRE_FLAG_INDICES (1 << 6) /* Unused by libregexp, just recorded. */
37
37
  #define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */
38
38
 
39
+ #define LRE_RET_MEMORY_ERROR (-1)
40
+ #define LRE_RET_TIMEOUT (-2)
41
+
39
42
  uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
40
43
  const char *buf, size_t buf_len, int re_flags,
41
44
  void *opaque);
@@ -50,6 +53,8 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16);
50
53
 
51
54
  /* must be provided by the user, return non zero if overflow */
52
55
  int lre_check_stack_overflow(void *opaque, size_t alloca_size);
56
+ /* must be provided by the user, return non zero if time out */
57
+ int lre_check_timeout(void *opaque);
53
58
  void *lre_realloc(void *opaque, void *ptr, size_t size);
54
59
 
55
60
  #endif /* LIBREGEXP_H */
@@ -537,6 +537,207 @@ int cr_invert(CharRange *cr)
537
537
  return 0;
538
538
  }
539
539
 
540
+ #define CASE_U (1 << 0)
541
+ #define CASE_L (1 << 1)
542
+ #define CASE_F (1 << 2)
543
+
544
+ /* use the case conversion table to generate range of characters.
545
+ CASE_U: set char if modified by uppercasing,
546
+ CASE_L: set char if modified by lowercasing,
547
+ CASE_F: set char if modified by case folding,
548
+ */
549
+ static int unicode_case1(CharRange *cr, int case_mask)
550
+ {
551
+ #define MR(x) (1 << RUN_TYPE_ ## x)
552
+ const uint32_t tab_run_mask[3] = {
553
+ MR(U) | MR(UF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(UF_D20) |
554
+ MR(UF_D1_EXT) | MR(U_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
555
+
556
+ MR(L) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2),
557
+
558
+ MR(UF) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2) | MR(UF_D20) | MR(UF_D1_EXT) | MR(LF_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
559
+ };
560
+ #undef MR
561
+ uint32_t mask, v, code, type, len, i, idx;
562
+
563
+ if (case_mask == 0)
564
+ return 0;
565
+ mask = 0;
566
+ for(i = 0; i < 3; i++) {
567
+ if ((case_mask >> i) & 1)
568
+ mask |= tab_run_mask[i];
569
+ }
570
+ for(idx = 0; idx < countof(case_conv_table1); idx++) {
571
+ v = case_conv_table1[idx];
572
+ type = (v >> (32 - 17 - 7 - 4)) & 0xf;
573
+ code = v >> (32 - 17);
574
+ len = (v >> (32 - 17 - 7)) & 0x7f;
575
+ if ((mask >> type) & 1) {
576
+ // printf("%d: type=%d %04x %04x\n", idx, type, code, code + len - 1);
577
+ switch(type) {
578
+ case RUN_TYPE_UL:
579
+ if ((case_mask & CASE_U) && (case_mask & (CASE_L | CASE_F)))
580
+ goto def_case;
581
+ code += ((case_mask & CASE_U) != 0);
582
+ for(i = 0; i < len; i += 2) {
583
+ if (cr_add_interval(cr, code + i, code + i + 1))
584
+ return -1;
585
+ }
586
+ break;
587
+ case RUN_TYPE_LSU:
588
+ if ((case_mask & CASE_U) && (case_mask & (CASE_L | CASE_F)))
589
+ goto def_case;
590
+ if (!(case_mask & CASE_U)) {
591
+ if (cr_add_interval(cr, code, code + 1))
592
+ return -1;
593
+ }
594
+ if (cr_add_interval(cr, code + 1, code + 2))
595
+ return -1;
596
+ if (case_mask & CASE_U) {
597
+ if (cr_add_interval(cr, code + 2, code + 3))
598
+ return -1;
599
+ }
600
+ break;
601
+ default:
602
+ def_case:
603
+ if (cr_add_interval(cr, code, code + len))
604
+ return -1;
605
+ break;
606
+ }
607
+ }
608
+ }
609
+ return 0;
610
+ }
611
+
612
+ static int point_cmp(const void *p1, const void *p2, void *arg)
613
+ {
614
+ uint32_t v1 = *(uint32_t *)p1;
615
+ uint32_t v2 = *(uint32_t *)p2;
616
+ return (v1 > v2) - (v1 < v2);
617
+ }
618
+
619
+ static void cr_sort_and_remove_overlap(CharRange *cr)
620
+ {
621
+ uint32_t start, end, start1, end1, i, j;
622
+
623
+ /* the resulting ranges are not necessarily sorted and may overlap */
624
+ rqsort(cr->points, cr->len / 2, sizeof(cr->points[0]) * 2, point_cmp, NULL);
625
+ j = 0;
626
+ for(i = 0; i < cr->len; ) {
627
+ start = cr->points[i];
628
+ end = cr->points[i + 1];
629
+ i += 2;
630
+ while (i < cr->len) {
631
+ start1 = cr->points[i];
632
+ end1 = cr->points[i + 1];
633
+ if (start1 > end) {
634
+ /* |------|
635
+ * |-------| */
636
+ break;
637
+ } else if (end1 <= end) {
638
+ /* |------|
639
+ * |--| */
640
+ i += 2;
641
+ } else {
642
+ /* |------|
643
+ * |-------| */
644
+ end = end1;
645
+ i += 2;
646
+ }
647
+ }
648
+ cr->points[j] = start;
649
+ cr->points[j + 1] = end;
650
+ j += 2;
651
+ }
652
+ cr->len = j;
653
+ }
654
+
655
+ /* canonicalize a character set using the JS regex case folding rules
656
+ (see lre_canonicalize()) */
657
+ int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode)
658
+ {
659
+ CharRange cr_inter, cr_mask, cr_result, cr_sub;
660
+ uint32_t v, code, len, i, idx, start, end, c, d_start, d_end, d;
661
+
662
+ cr_init(&cr_mask, cr->mem_opaque, cr->realloc_func);
663
+ cr_init(&cr_inter, cr->mem_opaque, cr->realloc_func);
664
+ cr_init(&cr_result, cr->mem_opaque, cr->realloc_func);
665
+ cr_init(&cr_sub, cr->mem_opaque, cr->realloc_func);
666
+
667
+ if (unicode_case1(&cr_mask, is_unicode ? CASE_F : CASE_U))
668
+ goto fail;
669
+ if (cr_op(&cr_inter, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
670
+ goto fail;
671
+
672
+ if (cr_invert(&cr_mask))
673
+ goto fail;
674
+ if (cr_op(&cr_sub, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
675
+ goto fail;
676
+
677
+ /* cr_inter = cr & cr_mask */
678
+ /* cr_sub = cr & ~cr_mask */
679
+
680
+ /* use the case conversion table to compute the result */
681
+ d_start = -1;
682
+ d_end = -1;
683
+ idx = 0;
684
+ v = case_conv_table1[idx];
685
+ code = v >> (32 - 17);
686
+ len = (v >> (32 - 17 - 7)) & 0x7f;
687
+ for(i = 0; i < cr_inter.len; i += 2) {
688
+ start = cr_inter.points[i];
689
+ end = cr_inter.points[i + 1];
690
+
691
+ for(c = start; c < end; c++) {
692
+ for(;;) {
693
+ if (c >= code && c < code + len)
694
+ break;
695
+ idx++;
696
+ assert(idx < countof(case_conv_table1));
697
+ v = case_conv_table1[idx];
698
+ code = v >> (32 - 17);
699
+ len = (v >> (32 - 17 - 7)) & 0x7f;
700
+ }
701
+ d = lre_case_folding_entry(c, idx, v, is_unicode);
702
+ /* try to merge with the current interval */
703
+ if (d_start == -1) {
704
+ d_start = d;
705
+ d_end = d + 1;
706
+ } else if (d_end == d) {
707
+ d_end++;
708
+ } else {
709
+ cr_add_interval(&cr_result, d_start, d_end);
710
+ d_start = d;
711
+ d_end = d + 1;
712
+ }
713
+ }
714
+ }
715
+ if (d_start != -1) {
716
+ if (cr_add_interval(&cr_result, d_start, d_end))
717
+ goto fail;
718
+ }
719
+
720
+ /* the resulting ranges are not necessarily sorted and may overlap */
721
+ cr_sort_and_remove_overlap(&cr_result);
722
+
723
+ /* or with the character not affected by the case folding */
724
+ cr->len = 0;
725
+ if (cr_op(cr, cr_result.points, cr_result.len, cr_sub.points, cr_sub.len, CR_OP_UNION))
726
+ goto fail;
727
+
728
+ cr_free(&cr_inter);
729
+ cr_free(&cr_mask);
730
+ cr_free(&cr_result);
731
+ cr_free(&cr_sub);
732
+ return 0;
733
+ fail:
734
+ cr_free(&cr_inter);
735
+ cr_free(&cr_mask);
736
+ cr_free(&cr_result);
737
+ cr_free(&cr_sub);
738
+ return -1;
739
+ }
740
+
540
741
  #ifdef CONFIG_ALL_UNICODE
541
742
 
542
743
  BOOL lre_is_id_start(uint32_t c)
@@ -1296,207 +1497,6 @@ static int unicode_prop1(CharRange *cr, int prop_idx)
1296
1497
  return 0;
1297
1498
  }
1298
1499
 
1299
- #define CASE_U (1 << 0)
1300
- #define CASE_L (1 << 1)
1301
- #define CASE_F (1 << 2)
1302
-
1303
- /* use the case conversion table to generate range of characters.
1304
- CASE_U: set char if modified by uppercasing,
1305
- CASE_L: set char if modified by lowercasing,
1306
- CASE_F: set char if modified by case folding,
1307
- */
1308
- static int unicode_case1(CharRange *cr, int case_mask)
1309
- {
1310
- #define MR(x) (1 << RUN_TYPE_ ## x)
1311
- const uint32_t tab_run_mask[3] = {
1312
- MR(U) | MR(UF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(UF_D20) |
1313
- MR(UF_D1_EXT) | MR(U_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
1314
-
1315
- MR(L) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2),
1316
-
1317
- MR(UF) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2) | MR(UF_D20) | MR(UF_D1_EXT) | MR(LF_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
1318
- };
1319
- #undef MR
1320
- uint32_t mask, v, code, type, len, i, idx;
1321
-
1322
- if (case_mask == 0)
1323
- return 0;
1324
- mask = 0;
1325
- for(i = 0; i < 3; i++) {
1326
- if ((case_mask >> i) & 1)
1327
- mask |= tab_run_mask[i];
1328
- }
1329
- for(idx = 0; idx < countof(case_conv_table1); idx++) {
1330
- v = case_conv_table1[idx];
1331
- type = (v >> (32 - 17 - 7 - 4)) & 0xf;
1332
- code = v >> (32 - 17);
1333
- len = (v >> (32 - 17 - 7)) & 0x7f;
1334
- if ((mask >> type) & 1) {
1335
- // printf("%d: type=%d %04x %04x\n", idx, type, code, code + len - 1);
1336
- switch(type) {
1337
- case RUN_TYPE_UL:
1338
- if ((case_mask & CASE_U) && (case_mask & (CASE_L | CASE_F)))
1339
- goto def_case;
1340
- code += ((case_mask & CASE_U) != 0);
1341
- for(i = 0; i < len; i += 2) {
1342
- if (cr_add_interval(cr, code + i, code + i + 1))
1343
- return -1;
1344
- }
1345
- break;
1346
- case RUN_TYPE_LSU:
1347
- if ((case_mask & CASE_U) && (case_mask & (CASE_L | CASE_F)))
1348
- goto def_case;
1349
- if (!(case_mask & CASE_U)) {
1350
- if (cr_add_interval(cr, code, code + 1))
1351
- return -1;
1352
- }
1353
- if (cr_add_interval(cr, code + 1, code + 2))
1354
- return -1;
1355
- if (case_mask & CASE_U) {
1356
- if (cr_add_interval(cr, code + 2, code + 3))
1357
- return -1;
1358
- }
1359
- break;
1360
- default:
1361
- def_case:
1362
- if (cr_add_interval(cr, code, code + len))
1363
- return -1;
1364
- break;
1365
- }
1366
- }
1367
- }
1368
- return 0;
1369
- }
1370
-
1371
- static int point_cmp(const void *p1, const void *p2, void *arg)
1372
- {
1373
- uint32_t v1 = *(uint32_t *)p1;
1374
- uint32_t v2 = *(uint32_t *)p2;
1375
- return (v1 > v2) - (v1 < v2);
1376
- }
1377
-
1378
- static void cr_sort_and_remove_overlap(CharRange *cr)
1379
- {
1380
- uint32_t start, end, start1, end1, i, j;
1381
-
1382
- /* the resulting ranges are not necessarily sorted and may overlap */
1383
- rqsort(cr->points, cr->len / 2, sizeof(cr->points[0]) * 2, point_cmp, NULL);
1384
- j = 0;
1385
- for(i = 0; i < cr->len; ) {
1386
- start = cr->points[i];
1387
- end = cr->points[i + 1];
1388
- i += 2;
1389
- while (i < cr->len) {
1390
- start1 = cr->points[i];
1391
- end1 = cr->points[i + 1];
1392
- if (start1 > end) {
1393
- /* |------|
1394
- * |-------| */
1395
- break;
1396
- } else if (end1 <= end) {
1397
- /* |------|
1398
- * |--| */
1399
- i += 2;
1400
- } else {
1401
- /* |------|
1402
- * |-------| */
1403
- end = end1;
1404
- i += 2;
1405
- }
1406
- }
1407
- cr->points[j] = start;
1408
- cr->points[j + 1] = end;
1409
- j += 2;
1410
- }
1411
- cr->len = j;
1412
- }
1413
-
1414
- /* canonicalize a character set using the JS regex case folding rules
1415
- (see lre_canonicalize()) */
1416
- int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode)
1417
- {
1418
- CharRange cr_inter, cr_mask, cr_result, cr_sub;
1419
- uint32_t v, code, len, i, idx, start, end, c, d_start, d_end, d;
1420
-
1421
- cr_init(&cr_mask, cr->mem_opaque, cr->realloc_func);
1422
- cr_init(&cr_inter, cr->mem_opaque, cr->realloc_func);
1423
- cr_init(&cr_result, cr->mem_opaque, cr->realloc_func);
1424
- cr_init(&cr_sub, cr->mem_opaque, cr->realloc_func);
1425
-
1426
- if (unicode_case1(&cr_mask, is_unicode ? CASE_F : CASE_U))
1427
- goto fail;
1428
- if (cr_op(&cr_inter, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
1429
- goto fail;
1430
-
1431
- if (cr_invert(&cr_mask))
1432
- goto fail;
1433
- if (cr_op(&cr_sub, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
1434
- goto fail;
1435
-
1436
- /* cr_inter = cr & cr_mask */
1437
- /* cr_sub = cr & ~cr_mask */
1438
-
1439
- /* use the case conversion table to compute the result */
1440
- d_start = -1;
1441
- d_end = -1;
1442
- idx = 0;
1443
- v = case_conv_table1[idx];
1444
- code = v >> (32 - 17);
1445
- len = (v >> (32 - 17 - 7)) & 0x7f;
1446
- for(i = 0; i < cr_inter.len; i += 2) {
1447
- start = cr_inter.points[i];
1448
- end = cr_inter.points[i + 1];
1449
-
1450
- for(c = start; c < end; c++) {
1451
- for(;;) {
1452
- if (c >= code && c < code + len)
1453
- break;
1454
- idx++;
1455
- assert(idx < countof(case_conv_table1));
1456
- v = case_conv_table1[idx];
1457
- code = v >> (32 - 17);
1458
- len = (v >> (32 - 17 - 7)) & 0x7f;
1459
- }
1460
- d = lre_case_folding_entry(c, idx, v, is_unicode);
1461
- /* try to merge with the current interval */
1462
- if (d_start == -1) {
1463
- d_start = d;
1464
- d_end = d + 1;
1465
- } else if (d_end == d) {
1466
- d_end++;
1467
- } else {
1468
- cr_add_interval(&cr_result, d_start, d_end);
1469
- d_start = d;
1470
- d_end = d + 1;
1471
- }
1472
- }
1473
- }
1474
- if (d_start != -1) {
1475
- if (cr_add_interval(&cr_result, d_start, d_end))
1476
- goto fail;
1477
- }
1478
-
1479
- /* the resulting ranges are not necessarily sorted and may overlap */
1480
- cr_sort_and_remove_overlap(&cr_result);
1481
-
1482
- /* or with the character not affected by the case folding */
1483
- cr->len = 0;
1484
- if (cr_op(cr, cr_result.points, cr_result.len, cr_sub.points, cr_sub.len, CR_OP_UNION))
1485
- goto fail;
1486
-
1487
- cr_free(&cr_inter);
1488
- cr_free(&cr_mask);
1489
- cr_free(&cr_result);
1490
- cr_free(&cr_sub);
1491
- return 0;
1492
- fail:
1493
- cr_free(&cr_inter);
1494
- cr_free(&cr_mask);
1495
- cr_free(&cr_result);
1496
- cr_free(&cr_sub);
1497
- return -1;
1498
- }
1499
-
1500
1500
  typedef enum {
1501
1501
  POP_GC,
1502
1502
  POP_PROP,