quickjs 0.19.0 → 0.20.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/quickjsrb/quickjs/cutils.c +31 -23
- data/ext/quickjsrb/quickjs/cutils.h +40 -6
- data/ext/quickjsrb/quickjs/libregexp-opcode.h +14 -8
- data/ext/quickjsrb/quickjs/libregexp.c +658 -490
- data/ext/quickjsrb/quickjs/libregexp.h +4 -0
- data/ext/quickjsrb/quickjs/libunicode-table.h +2273 -2216
- data/ext/quickjsrb/quickjs/libunicode.c +3 -2
- data/ext/quickjsrb/quickjs/libunicode.h +10 -0
- data/ext/quickjsrb/quickjs/qjs.c +24 -7
- data/ext/quickjsrb/quickjs/quickjs-atom.h +13 -0
- data/ext/quickjsrb/quickjs/quickjs-libc.c +90 -29
- data/ext/quickjsrb/quickjs/quickjs-opcode.h +5 -9
- data/ext/quickjsrb/quickjs/quickjs.c +7997 -2829
- data/ext/quickjsrb/quickjs/quickjs.h +35 -23
- data/ext/quickjsrb/quickjs/run-test262.c +377 -150
- data/ext/quickjsrb/quickjs/unicode_gen_def.h +5 -0
- data/lib/quickjs/version.rb +1 -1
- data/polyfills/package-lock.json +2 -2
- data/polyfills/package.json +1 -1
- metadata +1 -1
|
@@ -34,16 +34,20 @@
|
|
|
34
34
|
|
|
35
35
|
/*
|
|
36
36
|
TODO:
|
|
37
|
-
|
|
37
|
+
- remove REOP_char_i and REOP_range_i by precomputing the case folding.
|
|
38
|
+
- add specific opcodes for simple unicode property tests so that the
|
|
39
|
+
generated bytecode is smaller.
|
|
38
40
|
- Add a lock step execution mode (=linear time execution guaranteed)
|
|
39
41
|
when the regular expression is "simple" i.e. no backreference nor
|
|
40
42
|
complicated lookahead. The opcodes are designed for this execution
|
|
41
43
|
model.
|
|
42
44
|
*/
|
|
43
45
|
|
|
44
|
-
#if defined(TEST)
|
|
46
|
+
#if defined(TEST)
|
|
45
47
|
#define DUMP_REOP
|
|
46
48
|
#endif
|
|
49
|
+
//#define DUMP_REOP
|
|
50
|
+
//#define DUMP_EXEC
|
|
47
51
|
|
|
48
52
|
typedef enum {
|
|
49
53
|
#define DEF(id, size) REOP_ ## id,
|
|
@@ -53,7 +57,7 @@ typedef enum {
|
|
|
53
57
|
} REOPCodeEnum;
|
|
54
58
|
|
|
55
59
|
#define CAPTURE_COUNT_MAX 255
|
|
56
|
-
#define
|
|
60
|
+
#define REGISTER_COUNT_MAX 255
|
|
57
61
|
/* must be large enough to have a negligible runtime cost and small
|
|
58
62
|
enough to call the interrupt callback often. */
|
|
59
63
|
#define INTERRUPT_COUNTER_INIT 10000
|
|
@@ -75,6 +79,7 @@ typedef struct {
|
|
|
75
79
|
BOOL ignore_case;
|
|
76
80
|
BOOL multi_line;
|
|
77
81
|
BOOL dotall;
|
|
82
|
+
uint8_t group_name_scope;
|
|
78
83
|
int capture_count;
|
|
79
84
|
int total_capture_count; /* -1 = not computed yet */
|
|
80
85
|
int has_named_captures; /* -1 = don't know, 0 = no, 1 = yes */
|
|
@@ -103,10 +108,10 @@ static const REOpCode reopcode_info[REOP_COUNT] = {
|
|
|
103
108
|
#undef DEF
|
|
104
109
|
};
|
|
105
110
|
|
|
106
|
-
#define RE_HEADER_FLAGS
|
|
107
|
-
#define RE_HEADER_CAPTURE_COUNT
|
|
108
|
-
#define
|
|
109
|
-
#define RE_HEADER_BYTECODE_LEN
|
|
111
|
+
#define RE_HEADER_FLAGS 0
|
|
112
|
+
#define RE_HEADER_CAPTURE_COUNT 2
|
|
113
|
+
#define RE_HEADER_REGISTER_COUNT 3
|
|
114
|
+
#define RE_HEADER_BYTECODE_LEN 4
|
|
110
115
|
|
|
111
116
|
#define RE_HEADER_LEN 8
|
|
112
117
|
|
|
@@ -117,7 +122,7 @@ static inline int is_digit(int c) {
|
|
|
117
122
|
/* insert 'len' bytes at position 'pos'. Return < 0 if error. */
|
|
118
123
|
static int dbuf_insert(DynBuf *s, int pos, int len)
|
|
119
124
|
{
|
|
120
|
-
if (
|
|
125
|
+
if (dbuf_claim(s, len))
|
|
121
126
|
return -1;
|
|
122
127
|
memmove(s->buf + pos + len, s->buf + pos, s->size - pos);
|
|
123
128
|
s->size += len;
|
|
@@ -459,15 +464,15 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
|
|
|
459
464
|
int buf_len)
|
|
460
465
|
{
|
|
461
466
|
int pos, len, opcode, bc_len, re_flags, i;
|
|
462
|
-
uint32_t val;
|
|
467
|
+
uint32_t val, val2;
|
|
463
468
|
|
|
464
469
|
assert(buf_len >= RE_HEADER_LEN);
|
|
465
470
|
|
|
466
471
|
re_flags = lre_get_flags(buf);
|
|
467
472
|
bc_len = get_u32(buf + RE_HEADER_BYTECODE_LEN);
|
|
468
473
|
assert(bc_len + RE_HEADER_LEN <= buf_len);
|
|
469
|
-
printf("flags: 0x%x capture_count=%d
|
|
470
|
-
re_flags, buf[RE_HEADER_CAPTURE_COUNT], buf[
|
|
474
|
+
printf("flags: 0x%x capture_count=%d reg_count=%d\n",
|
|
475
|
+
re_flags, buf[RE_HEADER_CAPTURE_COUNT], buf[RE_HEADER_REGISTER_COUNT]);
|
|
471
476
|
if (re_flags & LRE_FLAG_NAMED_GROUPS) {
|
|
472
477
|
const char *p;
|
|
473
478
|
p = (char *)buf + RE_HEADER_LEN + bc_len;
|
|
@@ -476,7 +481,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
|
|
|
476
481
|
if (i != 1)
|
|
477
482
|
printf(",");
|
|
478
483
|
printf("<%s>", p);
|
|
479
|
-
p += strlen(p) +
|
|
484
|
+
p += strlen(p) + LRE_GROUP_NAME_TRAILER_LEN;
|
|
480
485
|
}
|
|
481
486
|
printf("\n");
|
|
482
487
|
assert(p == (char *)(buf + buf_len));
|
|
@@ -518,34 +523,62 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
|
|
|
518
523
|
case REOP_goto:
|
|
519
524
|
case REOP_split_goto_first:
|
|
520
525
|
case REOP_split_next_first:
|
|
521
|
-
case REOP_loop:
|
|
522
526
|
case REOP_lookahead:
|
|
523
527
|
case REOP_negative_lookahead:
|
|
524
528
|
val = get_u32(buf + pos + 1);
|
|
525
529
|
val += (pos + 5);
|
|
526
530
|
printf(" %u", val);
|
|
527
531
|
break;
|
|
528
|
-
case
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
532
|
+
case REOP_loop:
|
|
533
|
+
val2 = buf[pos + 1];
|
|
534
|
+
val = get_u32(buf + pos + 2);
|
|
535
|
+
val += (pos + 6);
|
|
536
|
+
printf(" r%u, %u", val2, val);
|
|
537
|
+
break;
|
|
538
|
+
case REOP_loop_split_goto_first:
|
|
539
|
+
case REOP_loop_split_next_first:
|
|
540
|
+
case REOP_loop_check_adv_split_goto_first:
|
|
541
|
+
case REOP_loop_check_adv_split_next_first:
|
|
542
|
+
{
|
|
543
|
+
uint32_t limit;
|
|
544
|
+
val2 = buf[pos + 1];
|
|
545
|
+
limit = get_u32(buf + pos + 2);
|
|
546
|
+
val = get_u32(buf + pos + 6);
|
|
547
|
+
val += (pos + 10);
|
|
548
|
+
printf(" r%u, %u, %u", val2, limit, val);
|
|
549
|
+
}
|
|
534
550
|
break;
|
|
535
551
|
case REOP_save_start:
|
|
536
552
|
case REOP_save_end:
|
|
553
|
+
printf(" %u", buf[pos + 1]);
|
|
554
|
+
break;
|
|
537
555
|
case REOP_back_reference:
|
|
538
556
|
case REOP_back_reference_i:
|
|
539
557
|
case REOP_backward_back_reference:
|
|
540
558
|
case REOP_backward_back_reference_i:
|
|
541
|
-
|
|
559
|
+
{
|
|
560
|
+
int n, i;
|
|
561
|
+
n = buf[pos + 1];
|
|
562
|
+
len += n;
|
|
563
|
+
for(i = 0; i < n; i++) {
|
|
564
|
+
if (i != 0)
|
|
565
|
+
printf(",");
|
|
566
|
+
printf(" %u", buf[pos + 2 + i]);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
542
569
|
break;
|
|
543
570
|
case REOP_save_reset:
|
|
544
571
|
printf(" %u %u", buf[pos + 1], buf[pos + 2]);
|
|
545
572
|
break;
|
|
546
|
-
case
|
|
547
|
-
val =
|
|
548
|
-
|
|
573
|
+
case REOP_set_i32:
|
|
574
|
+
val = buf[pos + 1];
|
|
575
|
+
val2 = get_u32(buf + pos + 2);
|
|
576
|
+
printf(" r%u, %d", val, val2);
|
|
577
|
+
break;
|
|
578
|
+
case REOP_set_char_pos:
|
|
579
|
+
case REOP_check_advance:
|
|
580
|
+
val = buf[pos + 1];
|
|
581
|
+
printf(" r%u", val);
|
|
549
582
|
break;
|
|
550
583
|
case REOP_range:
|
|
551
584
|
case REOP_range_i:
|
|
@@ -604,6 +637,27 @@ static int re_emit_goto(REParseState *s, int op, uint32_t val)
|
|
|
604
637
|
return pos;
|
|
605
638
|
}
|
|
606
639
|
|
|
640
|
+
static int re_emit_goto_u8(REParseState *s, int op, uint32_t arg, uint32_t val)
|
|
641
|
+
{
|
|
642
|
+
int pos;
|
|
643
|
+
dbuf_putc(&s->byte_code, op);
|
|
644
|
+
dbuf_putc(&s->byte_code, arg);
|
|
645
|
+
pos = s->byte_code.size;
|
|
646
|
+
dbuf_put_u32(&s->byte_code, val - (pos + 4));
|
|
647
|
+
return pos;
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
static int re_emit_goto_u8_u32(REParseState *s, int op, uint32_t arg0, uint32_t arg1, uint32_t val)
|
|
651
|
+
{
|
|
652
|
+
int pos;
|
|
653
|
+
dbuf_putc(&s->byte_code, op);
|
|
654
|
+
dbuf_putc(&s->byte_code, arg0);
|
|
655
|
+
dbuf_put_u32(&s->byte_code, arg1);
|
|
656
|
+
pos = s->byte_code.size;
|
|
657
|
+
dbuf_put_u32(&s->byte_code, val - (pos + 4));
|
|
658
|
+
return pos;
|
|
659
|
+
}
|
|
660
|
+
|
|
607
661
|
static void re_emit_op_u8(REParseState *s, int op, uint32_t val)
|
|
608
662
|
{
|
|
609
663
|
dbuf_putc(&s->byte_code, op);
|
|
@@ -705,9 +759,21 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16)
|
|
|
705
759
|
c = '\v';
|
|
706
760
|
break;
|
|
707
761
|
case 'x':
|
|
762
|
+
{
|
|
763
|
+
int h0, h1;
|
|
764
|
+
|
|
765
|
+
h0 = from_hex(*p++);
|
|
766
|
+
if (h0 < 0)
|
|
767
|
+
return -1;
|
|
768
|
+
h1 = from_hex(*p++);
|
|
769
|
+
if (h1 < 0)
|
|
770
|
+
return -1;
|
|
771
|
+
c = (h0 << 4) | h1;
|
|
772
|
+
}
|
|
773
|
+
break;
|
|
708
774
|
case 'u':
|
|
709
775
|
{
|
|
710
|
-
int h,
|
|
776
|
+
int h, i;
|
|
711
777
|
uint32_t c1;
|
|
712
778
|
|
|
713
779
|
if (*p == '{' && allow_utf16) {
|
|
@@ -725,14 +791,8 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16)
|
|
|
725
791
|
}
|
|
726
792
|
p++;
|
|
727
793
|
} else {
|
|
728
|
-
if (c == 'x') {
|
|
729
|
-
n = 2;
|
|
730
|
-
} else {
|
|
731
|
-
n = 4;
|
|
732
|
-
}
|
|
733
|
-
|
|
734
794
|
c = 0;
|
|
735
|
-
for(i = 0; i <
|
|
795
|
+
for(i = 0; i < 4; i++) {
|
|
736
796
|
h = from_hex(*p++);
|
|
737
797
|
if (h < 0) {
|
|
738
798
|
return -1;
|
|
@@ -1020,7 +1080,7 @@ static int get_class_atom(REParseState *s, REStringList *cr,
|
|
|
1020
1080
|
goto default_escape;
|
|
1021
1081
|
if (cr_init_char_range(s, cr, c))
|
|
1022
1082
|
return -1;
|
|
1023
|
-
c
|
|
1083
|
+
c += CLASS_RANGE_BASE;
|
|
1024
1084
|
break;
|
|
1025
1085
|
case 'c':
|
|
1026
1086
|
c = *p;
|
|
@@ -1491,17 +1551,18 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
|
|
|
1491
1551
|
return -1;
|
|
1492
1552
|
}
|
|
1493
1553
|
|
|
1494
|
-
/*
|
|
1495
|
-
|
|
1496
|
-
- false if the opcodes always advance the char pointer
|
|
1554
|
+
/* need_check_adv: false if the opcodes always advance the char pointer
|
|
1555
|
+
need_capture_init: true if all the captures in the atom are not set
|
|
1497
1556
|
*/
|
|
1498
|
-
static BOOL
|
|
1557
|
+
static BOOL re_need_check_adv_and_capture_init(BOOL *pneed_capture_init,
|
|
1558
|
+
const uint8_t *bc_buf, int bc_buf_len)
|
|
1499
1559
|
{
|
|
1500
1560
|
int pos, opcode, len;
|
|
1501
1561
|
uint32_t val;
|
|
1502
|
-
BOOL
|
|
1562
|
+
BOOL need_check_adv, need_capture_init;
|
|
1503
1563
|
|
|
1504
|
-
|
|
1564
|
+
need_check_adv = TRUE;
|
|
1565
|
+
need_capture_init = FALSE;
|
|
1505
1566
|
pos = 0;
|
|
1506
1567
|
while (pos < bc_buf_len) {
|
|
1507
1568
|
opcode = bc_buf[pos];
|
|
@@ -1511,28 +1572,30 @@ static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len)
|
|
|
1511
1572
|
case REOP_range_i:
|
|
1512
1573
|
val = get_u16(bc_buf + pos + 1);
|
|
1513
1574
|
len += val * 4;
|
|
1514
|
-
|
|
1575
|
+
need_check_adv = FALSE;
|
|
1576
|
+
break;
|
|
1515
1577
|
case REOP_range32:
|
|
1516
1578
|
case REOP_range32_i:
|
|
1517
1579
|
val = get_u16(bc_buf + pos + 1);
|
|
1518
1580
|
len += val * 8;
|
|
1519
|
-
|
|
1581
|
+
need_check_adv = FALSE;
|
|
1582
|
+
break;
|
|
1520
1583
|
case REOP_char:
|
|
1521
1584
|
case REOP_char_i:
|
|
1522
1585
|
case REOP_char32:
|
|
1523
1586
|
case REOP_char32_i:
|
|
1524
1587
|
case REOP_dot:
|
|
1525
1588
|
case REOP_any:
|
|
1526
|
-
|
|
1527
|
-
|
|
1589
|
+
case REOP_space:
|
|
1590
|
+
case REOP_not_space:
|
|
1591
|
+
need_check_adv = FALSE;
|
|
1528
1592
|
break;
|
|
1529
1593
|
case REOP_line_start:
|
|
1530
1594
|
case REOP_line_start_m:
|
|
1531
1595
|
case REOP_line_end:
|
|
1532
1596
|
case REOP_line_end_m:
|
|
1533
|
-
case
|
|
1534
|
-
case
|
|
1535
|
-
case REOP_drop:
|
|
1597
|
+
case REOP_set_i32:
|
|
1598
|
+
case REOP_set_char_pos:
|
|
1536
1599
|
case REOP_word_boundary:
|
|
1537
1600
|
case REOP_word_boundary_i:
|
|
1538
1601
|
case REOP_not_word_boundary:
|
|
@@ -1543,67 +1606,25 @@ static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len)
|
|
|
1543
1606
|
case REOP_save_start:
|
|
1544
1607
|
case REOP_save_end:
|
|
1545
1608
|
case REOP_save_reset:
|
|
1609
|
+
break;
|
|
1546
1610
|
case REOP_back_reference:
|
|
1547
1611
|
case REOP_back_reference_i:
|
|
1548
1612
|
case REOP_backward_back_reference:
|
|
1549
1613
|
case REOP_backward_back_reference_i:
|
|
1614
|
+
val = bc_buf[pos + 1];
|
|
1615
|
+
len += val;
|
|
1616
|
+
need_capture_init = TRUE;
|
|
1550
1617
|
break;
|
|
1551
1618
|
default:
|
|
1552
1619
|
/* safe behavior: we cannot predict the outcome */
|
|
1553
|
-
|
|
1620
|
+
need_capture_init = TRUE;
|
|
1621
|
+
goto done;
|
|
1554
1622
|
}
|
|
1555
1623
|
pos += len;
|
|
1556
1624
|
}
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
/* return -1 if a simple quantifier cannot be used. Otherwise return
|
|
1561
|
-
the number of characters in the atom. */
|
|
1562
|
-
static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len)
|
|
1563
|
-
{
|
|
1564
|
-
int pos, opcode, len, count;
|
|
1565
|
-
uint32_t val;
|
|
1566
|
-
|
|
1567
|
-
count = 0;
|
|
1568
|
-
pos = 0;
|
|
1569
|
-
while (pos < bc_buf_len) {
|
|
1570
|
-
opcode = bc_buf[pos];
|
|
1571
|
-
len = reopcode_info[opcode].size;
|
|
1572
|
-
switch(opcode) {
|
|
1573
|
-
case REOP_range:
|
|
1574
|
-
case REOP_range_i:
|
|
1575
|
-
val = get_u16(bc_buf + pos + 1);
|
|
1576
|
-
len += val * 4;
|
|
1577
|
-
goto simple_char;
|
|
1578
|
-
case REOP_range32:
|
|
1579
|
-
case REOP_range32_i:
|
|
1580
|
-
val = get_u16(bc_buf + pos + 1);
|
|
1581
|
-
len += val * 8;
|
|
1582
|
-
goto simple_char;
|
|
1583
|
-
case REOP_char:
|
|
1584
|
-
case REOP_char_i:
|
|
1585
|
-
case REOP_char32:
|
|
1586
|
-
case REOP_char32_i:
|
|
1587
|
-
case REOP_dot:
|
|
1588
|
-
case REOP_any:
|
|
1589
|
-
simple_char:
|
|
1590
|
-
count++;
|
|
1591
|
-
break;
|
|
1592
|
-
case REOP_line_start:
|
|
1593
|
-
case REOP_line_start_m:
|
|
1594
|
-
case REOP_line_end:
|
|
1595
|
-
case REOP_line_end_m:
|
|
1596
|
-
case REOP_word_boundary:
|
|
1597
|
-
case REOP_word_boundary_i:
|
|
1598
|
-
case REOP_not_word_boundary:
|
|
1599
|
-
case REOP_not_word_boundary_i:
|
|
1600
|
-
break;
|
|
1601
|
-
default:
|
|
1602
|
-
return -1;
|
|
1603
|
-
}
|
|
1604
|
-
pos += len;
|
|
1605
|
-
}
|
|
1606
|
-
return count;
|
|
1625
|
+
done:
|
|
1626
|
+
*pneed_capture_init = need_capture_init;
|
|
1627
|
+
return need_check_adv;
|
|
1607
1628
|
}
|
|
1608
1629
|
|
|
1609
1630
|
/* '*pp' is the first char after '<' */
|
|
@@ -1662,16 +1683,16 @@ static int re_parse_group_name(char *buf, int buf_size, const uint8_t **pp)
|
|
|
1662
1683
|
}
|
|
1663
1684
|
|
|
1664
1685
|
/* if capture_name = NULL: return the number of captures + 1.
|
|
1665
|
-
Otherwise, return the
|
|
1666
|
-
or -1 if none */
|
|
1686
|
+
Otherwise, return the number of matching capture groups */
|
|
1667
1687
|
static int re_parse_captures(REParseState *s, int *phas_named_captures,
|
|
1668
|
-
const char *capture_name)
|
|
1688
|
+
const char *capture_name, BOOL emit_group_index)
|
|
1669
1689
|
{
|
|
1670
1690
|
const uint8_t *p;
|
|
1671
|
-
int capture_index;
|
|
1691
|
+
int capture_index, n;
|
|
1672
1692
|
char name[TMP_BUF_SIZE];
|
|
1673
1693
|
|
|
1674
1694
|
capture_index = 1;
|
|
1695
|
+
n = 0;
|
|
1675
1696
|
*phas_named_captures = 0;
|
|
1676
1697
|
for (p = s->buf_start; p < s->buf_end; p++) {
|
|
1677
1698
|
switch (*p) {
|
|
@@ -1683,8 +1704,11 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures,
|
|
|
1683
1704
|
if (capture_name) {
|
|
1684
1705
|
p += 3;
|
|
1685
1706
|
if (re_parse_group_name(name, sizeof(name), &p) == 0) {
|
|
1686
|
-
if (!strcmp(name, capture_name))
|
|
1687
|
-
|
|
1707
|
+
if (!strcmp(name, capture_name)) {
|
|
1708
|
+
if (emit_group_index)
|
|
1709
|
+
dbuf_putc(&s->byte_code, capture_index);
|
|
1710
|
+
n++;
|
|
1711
|
+
}
|
|
1688
1712
|
}
|
|
1689
1713
|
}
|
|
1690
1714
|
capture_index++;
|
|
@@ -1709,17 +1733,18 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures,
|
|
|
1709
1733
|
}
|
|
1710
1734
|
}
|
|
1711
1735
|
done:
|
|
1712
|
-
if (capture_name)
|
|
1713
|
-
return
|
|
1714
|
-
else
|
|
1736
|
+
if (capture_name) {
|
|
1737
|
+
return n;
|
|
1738
|
+
} else {
|
|
1715
1739
|
return capture_index;
|
|
1740
|
+
}
|
|
1716
1741
|
}
|
|
1717
1742
|
|
|
1718
1743
|
static int re_count_captures(REParseState *s)
|
|
1719
1744
|
{
|
|
1720
1745
|
if (s->total_capture_count < 0) {
|
|
1721
1746
|
s->total_capture_count = re_parse_captures(s, &s->has_named_captures,
|
|
1722
|
-
NULL);
|
|
1747
|
+
NULL, FALSE);
|
|
1723
1748
|
}
|
|
1724
1749
|
return s->total_capture_count;
|
|
1725
1750
|
}
|
|
@@ -1731,25 +1756,53 @@ static BOOL re_has_named_captures(REParseState *s)
|
|
|
1731
1756
|
return s->has_named_captures;
|
|
1732
1757
|
}
|
|
1733
1758
|
|
|
1734
|
-
static int find_group_name(REParseState *s, const char *name)
|
|
1759
|
+
static int find_group_name(REParseState *s, const char *name, BOOL emit_group_index)
|
|
1735
1760
|
{
|
|
1736
1761
|
const char *p, *buf_end;
|
|
1737
1762
|
size_t len, name_len;
|
|
1738
|
-
int capture_index;
|
|
1763
|
+
int capture_index, n;
|
|
1739
1764
|
|
|
1740
1765
|
p = (char *)s->group_names.buf;
|
|
1741
|
-
if (!p)
|
|
1766
|
+
if (!p)
|
|
1767
|
+
return 0;
|
|
1742
1768
|
buf_end = (char *)s->group_names.buf + s->group_names.size;
|
|
1743
1769
|
name_len = strlen(name);
|
|
1744
1770
|
capture_index = 1;
|
|
1771
|
+
n = 0;
|
|
1745
1772
|
while (p < buf_end) {
|
|
1746
1773
|
len = strlen(p);
|
|
1747
|
-
if (len == name_len && memcmp(name, p, name_len) == 0)
|
|
1748
|
-
|
|
1749
|
-
|
|
1774
|
+
if (len == name_len && memcmp(name, p, name_len) == 0) {
|
|
1775
|
+
if (emit_group_index)
|
|
1776
|
+
dbuf_putc(&s->byte_code, capture_index);
|
|
1777
|
+
n++;
|
|
1778
|
+
}
|
|
1779
|
+
p += len + LRE_GROUP_NAME_TRAILER_LEN;
|
|
1750
1780
|
capture_index++;
|
|
1751
1781
|
}
|
|
1752
|
-
return
|
|
1782
|
+
return n;
|
|
1783
|
+
}
|
|
1784
|
+
|
|
1785
|
+
static BOOL is_duplicate_group_name(REParseState *s, const char *name, int scope)
|
|
1786
|
+
{
|
|
1787
|
+
const char *p, *buf_end;
|
|
1788
|
+
size_t len, name_len;
|
|
1789
|
+
int scope1;
|
|
1790
|
+
|
|
1791
|
+
p = (char *)s->group_names.buf;
|
|
1792
|
+
if (!p)
|
|
1793
|
+
return 0;
|
|
1794
|
+
buf_end = (char *)s->group_names.buf + s->group_names.size;
|
|
1795
|
+
name_len = strlen(name);
|
|
1796
|
+
while (p < buf_end) {
|
|
1797
|
+
len = strlen(p);
|
|
1798
|
+
if (len == name_len && memcmp(name, p, name_len) == 0) {
|
|
1799
|
+
scope1 = (uint8_t)p[len + 1];
|
|
1800
|
+
if (scope == scope1)
|
|
1801
|
+
return TRUE;
|
|
1802
|
+
}
|
|
1803
|
+
p += len + LRE_GROUP_NAME_TRAILER_LEN;
|
|
1804
|
+
}
|
|
1805
|
+
return FALSE;
|
|
1753
1806
|
}
|
|
1754
1807
|
|
|
1755
1808
|
static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir);
|
|
@@ -1793,7 +1846,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|
|
1793
1846
|
{
|
|
1794
1847
|
const uint8_t *p;
|
|
1795
1848
|
int c, last_atom_start, quant_min, quant_max, last_capture_count;
|
|
1796
|
-
BOOL greedy,
|
|
1849
|
+
BOOL greedy, is_neg, is_backward_lookahead;
|
|
1797
1850
|
REStringList cr_s, *cr = &cr_s;
|
|
1798
1851
|
|
|
1799
1852
|
last_atom_start = -1;
|
|
@@ -1921,7 +1974,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|
|
1921
1974
|
p = s->buf_ptr;
|
|
1922
1975
|
if (re_parse_expect(s, &p, ')'))
|
|
1923
1976
|
return -1;
|
|
1924
|
-
re_emit_op(s,
|
|
1977
|
+
re_emit_op(s, REOP_lookahead_match + is_neg);
|
|
1925
1978
|
/* jump after the 'match' after the lookahead is successful */
|
|
1926
1979
|
if (dbuf_error(&s->byte_code))
|
|
1927
1980
|
return -1;
|
|
@@ -1932,12 +1985,16 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|
|
1932
1985
|
&p)) {
|
|
1933
1986
|
return re_parse_error(s, "invalid group name");
|
|
1934
1987
|
}
|
|
1935
|
-
|
|
1988
|
+
/* poor's man method to test duplicate group
|
|
1989
|
+
names. */
|
|
1990
|
+
/* XXX: this method does not catch all the errors*/
|
|
1991
|
+
if (is_duplicate_group_name(s, s->u.tmp_buf, s->group_name_scope)) {
|
|
1936
1992
|
return re_parse_error(s, "duplicate group name");
|
|
1937
1993
|
}
|
|
1938
1994
|
/* group name with a trailing zero */
|
|
1939
1995
|
dbuf_put(&s->group_names, (uint8_t *)s->u.tmp_buf,
|
|
1940
1996
|
strlen(s->u.tmp_buf) + 1);
|
|
1997
|
+
dbuf_putc(&s->group_names, s->group_name_scope);
|
|
1941
1998
|
s->has_named_captures = 1;
|
|
1942
1999
|
goto parse_capture;
|
|
1943
2000
|
} else {
|
|
@@ -1948,6 +2005,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|
|
1948
2005
|
p++;
|
|
1949
2006
|
/* capture without group name */
|
|
1950
2007
|
dbuf_putc(&s->group_names, 0);
|
|
2008
|
+
dbuf_putc(&s->group_names, 0);
|
|
1951
2009
|
parse_capture:
|
|
1952
2010
|
if (s->capture_count >= CAPTURE_COUNT_MAX)
|
|
1953
2011
|
return re_parse_error(s, "too many captures");
|
|
@@ -1974,17 +2032,18 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|
|
1974
2032
|
case 'b':
|
|
1975
2033
|
case 'B':
|
|
1976
2034
|
if (p[1] != 'b') {
|
|
1977
|
-
re_emit_op(s, s->ignore_case ? REOP_not_word_boundary_i : REOP_not_word_boundary);
|
|
2035
|
+
re_emit_op(s, s->ignore_case && s->is_unicode ? REOP_not_word_boundary_i : REOP_not_word_boundary);
|
|
1978
2036
|
} else {
|
|
1979
|
-
re_emit_op(s, s->ignore_case ? REOP_word_boundary_i : REOP_word_boundary);
|
|
2037
|
+
re_emit_op(s, s->ignore_case && s->is_unicode ? REOP_word_boundary_i : REOP_word_boundary);
|
|
1980
2038
|
}
|
|
1981
2039
|
p += 2;
|
|
1982
2040
|
break;
|
|
1983
2041
|
case 'k':
|
|
1984
2042
|
{
|
|
1985
2043
|
const uint8_t *p1;
|
|
1986
|
-
int dummy_res;
|
|
1987
|
-
|
|
2044
|
+
int dummy_res, n;
|
|
2045
|
+
BOOL is_forward;
|
|
2046
|
+
|
|
1988
2047
|
p1 = p;
|
|
1989
2048
|
if (p1[2] != '<') {
|
|
1990
2049
|
/* annex B: we tolerate invalid group names in non
|
|
@@ -2003,21 +2062,33 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|
|
2003
2062
|
else
|
|
2004
2063
|
goto parse_class_atom;
|
|
2005
2064
|
}
|
|
2006
|
-
|
|
2007
|
-
|
|
2065
|
+
is_forward = FALSE;
|
|
2066
|
+
n = find_group_name(s, s->u.tmp_buf, FALSE);
|
|
2067
|
+
if (n == 0) {
|
|
2008
2068
|
/* no capture name parsed before, try to look
|
|
2009
2069
|
after (inefficient, but hopefully not common */
|
|
2010
|
-
|
|
2011
|
-
if (
|
|
2070
|
+
n = re_parse_captures(s, &dummy_res, s->u.tmp_buf, FALSE);
|
|
2071
|
+
if (n == 0) {
|
|
2012
2072
|
if (s->is_unicode || re_has_named_captures(s))
|
|
2013
2073
|
return re_parse_error(s, "group name not defined");
|
|
2014
2074
|
else
|
|
2015
2075
|
goto parse_class_atom;
|
|
2016
2076
|
}
|
|
2077
|
+
is_forward = TRUE;
|
|
2078
|
+
}
|
|
2079
|
+
last_atom_start = s->byte_code.size;
|
|
2080
|
+
last_capture_count = s->capture_count;
|
|
2081
|
+
|
|
2082
|
+
/* emit back references to all the captures indexes matching the group name */
|
|
2083
|
+
re_emit_op_u8(s, REOP_back_reference + 2 * is_backward_dir + s->ignore_case, n);
|
|
2084
|
+
if (is_forward) {
|
|
2085
|
+
re_parse_captures(s, &dummy_res, s->u.tmp_buf, TRUE);
|
|
2086
|
+
} else {
|
|
2087
|
+
find_group_name(s, s->u.tmp_buf, TRUE);
|
|
2017
2088
|
}
|
|
2018
2089
|
p = p1;
|
|
2019
2090
|
}
|
|
2020
|
-
|
|
2091
|
+
break;
|
|
2021
2092
|
case '0':
|
|
2022
2093
|
p += 2;
|
|
2023
2094
|
c = 0;
|
|
@@ -2063,11 +2134,11 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|
|
2063
2134
|
}
|
|
2064
2135
|
return re_parse_error(s, "back reference out of range in regular expression");
|
|
2065
2136
|
}
|
|
2066
|
-
emit_back_reference:
|
|
2067
2137
|
last_atom_start = s->byte_code.size;
|
|
2068
2138
|
last_capture_count = s->capture_count;
|
|
2069
2139
|
|
|
2070
|
-
re_emit_op_u8(s, REOP_back_reference + 2 * is_backward_dir + s->ignore_case,
|
|
2140
|
+
re_emit_op_u8(s, REOP_back_reference + 2 * is_backward_dir + s->ignore_case, 1);
|
|
2141
|
+
dbuf_putc(&s->byte_code, c);
|
|
2071
2142
|
}
|
|
2072
2143
|
break;
|
|
2073
2144
|
default:
|
|
@@ -2100,8 +2171,15 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|
|
2100
2171
|
if (is_backward_dir)
|
|
2101
2172
|
re_emit_op(s, REOP_prev);
|
|
2102
2173
|
if (c >= CLASS_RANGE_BASE) {
|
|
2103
|
-
int ret;
|
|
2104
|
-
|
|
2174
|
+
int ret = 0;
|
|
2175
|
+
/* optimize the common 'space' tests */
|
|
2176
|
+
if (c == (CLASS_RANGE_BASE + CHAR_RANGE_s)) {
|
|
2177
|
+
re_emit_op(s, REOP_space);
|
|
2178
|
+
} else if (c == (CLASS_RANGE_BASE + CHAR_RANGE_S)) {
|
|
2179
|
+
re_emit_op(s, REOP_not_space);
|
|
2180
|
+
} else {
|
|
2181
|
+
ret = re_emit_string_list(s, cr);
|
|
2182
|
+
}
|
|
2105
2183
|
re_string_list_free(cr);
|
|
2106
2184
|
if (ret)
|
|
2107
2185
|
return -1;
|
|
@@ -2176,52 +2254,39 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|
|
2176
2254
|
if (last_atom_start < 0) {
|
|
2177
2255
|
return re_parse_error(s, "nothing to repeat");
|
|
2178
2256
|
}
|
|
2179
|
-
|
|
2257
|
+
{
|
|
2258
|
+
BOOL need_capture_init, add_zero_advance_check;
|
|
2180
2259
|
int len, pos;
|
|
2181
|
-
|
|
2182
|
-
if
|
|
2183
|
-
|
|
2184
|
-
|
|
2260
|
+
|
|
2261
|
+
/* the spec tells that if there is no advance when
|
|
2262
|
+
running the atom after the first quant_min times,
|
|
2263
|
+
then there is no match. We remove this test when we
|
|
2264
|
+
are sure the atom always advances the position. */
|
|
2265
|
+
add_zero_advance_check =
|
|
2266
|
+
re_need_check_adv_and_capture_init(&need_capture_init,
|
|
2267
|
+
s->byte_code.buf + last_atom_start,
|
|
2268
|
+
s->byte_code.size - last_atom_start);
|
|
2269
|
+
|
|
2270
|
+
/* general case: need to reset the capture at each
|
|
2271
|
+
iteration. We don't do it if there are no captures
|
|
2272
|
+
in the atom or if we are sure all captures are
|
|
2273
|
+
initialized in the atom. If quant_min = 0, we still
|
|
2274
|
+
need to reset once the captures in case the atom
|
|
2275
|
+
does not match. */
|
|
2276
|
+
if (need_capture_init && last_capture_count != s->capture_count) {
|
|
2277
|
+
if (dbuf_insert(&s->byte_code, last_atom_start, 3))
|
|
2185
2278
|
goto out_of_memory;
|
|
2186
|
-
|
|
2187
|
-
|
|
2188
|
-
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
if (dbuf_insert(&s->byte_code, last_atom_start, 17))
|
|
2192
|
-
goto out_of_memory;
|
|
2193
|
-
pos = last_atom_start;
|
|
2194
|
-
s->byte_code.buf[pos++] = REOP_simple_greedy_quant;
|
|
2195
|
-
put_u32(&s->byte_code.buf[pos],
|
|
2196
|
-
s->byte_code.size - last_atom_start - 17);
|
|
2197
|
-
pos += 4;
|
|
2198
|
-
put_u32(&s->byte_code.buf[pos], quant_min);
|
|
2199
|
-
pos += 4;
|
|
2200
|
-
put_u32(&s->byte_code.buf[pos], quant_max);
|
|
2201
|
-
pos += 4;
|
|
2202
|
-
put_u32(&s->byte_code.buf[pos], len);
|
|
2203
|
-
pos += 4;
|
|
2204
|
-
goto done;
|
|
2205
|
-
}
|
|
2279
|
+
int pos = last_atom_start;
|
|
2280
|
+
s->byte_code.buf[pos++] = REOP_save_reset;
|
|
2281
|
+
s->byte_code.buf[pos++] = last_capture_count;
|
|
2282
|
+
s->byte_code.buf[pos++] = s->capture_count - 1;
|
|
2206
2283
|
}
|
|
2207
2284
|
|
|
2208
|
-
if (dbuf_error(&s->byte_code))
|
|
2209
|
-
goto out_of_memory;
|
|
2210
|
-
}
|
|
2211
|
-
/* the spec tells that if there is no advance when
|
|
2212
|
-
running the atom after the first quant_min times,
|
|
2213
|
-
then there is no match. We remove this test when we
|
|
2214
|
-
are sure the atom always advances the position. */
|
|
2215
|
-
add_zero_advance_check = re_need_check_advance(s->byte_code.buf + last_atom_start,
|
|
2216
|
-
s->byte_code.size - last_atom_start);
|
|
2217
|
-
|
|
2218
|
-
{
|
|
2219
|
-
int len, pos;
|
|
2220
2285
|
len = s->byte_code.size - last_atom_start;
|
|
2221
2286
|
if (quant_min == 0) {
|
|
2222
2287
|
/* need to reset the capture in case the atom is
|
|
2223
2288
|
not executed */
|
|
2224
|
-
if (last_capture_count != s->capture_count) {
|
|
2289
|
+
if (!need_capture_init && last_capture_count != s->capture_count) {
|
|
2225
2290
|
if (dbuf_insert(&s->byte_code, last_atom_start, 3))
|
|
2226
2291
|
goto out_of_memory;
|
|
2227
2292
|
s->byte_code.buf[last_atom_start++] = REOP_save_reset;
|
|
@@ -2232,76 +2297,63 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|
|
2232
2297
|
s->byte_code.size = last_atom_start;
|
|
2233
2298
|
} else if (quant_max == 1 || quant_max == INT32_MAX) {
|
|
2234
2299
|
BOOL has_goto = (quant_max == INT32_MAX);
|
|
2235
|
-
if (dbuf_insert(&s->byte_code, last_atom_start, 5 + add_zero_advance_check))
|
|
2300
|
+
if (dbuf_insert(&s->byte_code, last_atom_start, 5 + add_zero_advance_check * 2))
|
|
2236
2301
|
goto out_of_memory;
|
|
2237
2302
|
s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
|
|
2238
2303
|
greedy;
|
|
2239
2304
|
put_u32(s->byte_code.buf + last_atom_start + 1,
|
|
2240
|
-
len + 5 * has_goto + add_zero_advance_check * 2);
|
|
2305
|
+
len + 5 * has_goto + add_zero_advance_check * 2 * 2);
|
|
2241
2306
|
if (add_zero_advance_check) {
|
|
2242
|
-
s->byte_code.buf[last_atom_start + 1 + 4] =
|
|
2243
|
-
|
|
2307
|
+
s->byte_code.buf[last_atom_start + 1 + 4] = REOP_set_char_pos;
|
|
2308
|
+
s->byte_code.buf[last_atom_start + 1 + 4 + 1] = 0;
|
|
2309
|
+
re_emit_op_u8(s, REOP_check_advance, 0);
|
|
2244
2310
|
}
|
|
2245
2311
|
if (has_goto)
|
|
2246
2312
|
re_emit_goto(s, REOP_goto, last_atom_start);
|
|
2247
2313
|
} else {
|
|
2248
|
-
if (dbuf_insert(&s->byte_code, last_atom_start,
|
|
2314
|
+
if (dbuf_insert(&s->byte_code, last_atom_start, 11 + add_zero_advance_check * 2))
|
|
2249
2315
|
goto out_of_memory;
|
|
2250
2316
|
pos = last_atom_start;
|
|
2251
|
-
s->byte_code.buf[pos++] = REOP_push_i32;
|
|
2252
|
-
put_u32(s->byte_code.buf + pos, quant_max);
|
|
2253
|
-
pos += 4;
|
|
2254
2317
|
s->byte_code.buf[pos++] = REOP_split_goto_first + greedy;
|
|
2255
|
-
put_u32(s->byte_code.buf + pos,
|
|
2318
|
+
put_u32(s->byte_code.buf + pos, 6 + add_zero_advance_check * 2 + len + 10);
|
|
2256
2319
|
pos += 4;
|
|
2320
|
+
|
|
2321
|
+
s->byte_code.buf[pos++] = REOP_set_i32;
|
|
2322
|
+
s->byte_code.buf[pos++] = 0;
|
|
2323
|
+
put_u32(s->byte_code.buf + pos, quant_max);
|
|
2324
|
+
pos += 4;
|
|
2325
|
+
last_atom_start = pos;
|
|
2257
2326
|
if (add_zero_advance_check) {
|
|
2258
|
-
s->byte_code.buf[pos++] =
|
|
2259
|
-
|
|
2327
|
+
s->byte_code.buf[pos++] = REOP_set_char_pos;
|
|
2328
|
+
s->byte_code.buf[pos++] = 0;
|
|
2260
2329
|
}
|
|
2261
|
-
|
|
2262
|
-
re_emit_op(s, REOP_drop);
|
|
2330
|
+
re_emit_goto_u8_u32(s, (add_zero_advance_check ? REOP_loop_check_adv_split_next_first : REOP_loop_split_next_first) - greedy, 0, quant_max, last_atom_start);
|
|
2263
2331
|
}
|
|
2264
2332
|
} else if (quant_min == 1 && quant_max == INT32_MAX &&
|
|
2265
2333
|
!add_zero_advance_check) {
|
|
2266
2334
|
re_emit_goto(s, REOP_split_next_first - greedy,
|
|
2267
2335
|
last_atom_start);
|
|
2268
2336
|
} else {
|
|
2269
|
-
if (quant_min ==
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
|
|
2278
|
-
|
|
2279
|
-
|
|
2337
|
+
if (quant_min == quant_max)
|
|
2338
|
+
add_zero_advance_check = FALSE;
|
|
2339
|
+
if (dbuf_insert(&s->byte_code, last_atom_start, 6 + add_zero_advance_check * 2))
|
|
2340
|
+
goto out_of_memory;
|
|
2341
|
+
/* Note: we assume the string length is < INT32_MAX */
|
|
2342
|
+
pos = last_atom_start;
|
|
2343
|
+
s->byte_code.buf[pos++] = REOP_set_i32;
|
|
2344
|
+
s->byte_code.buf[pos++] = 0;
|
|
2345
|
+
put_u32(s->byte_code.buf + pos, quant_max);
|
|
2346
|
+
pos += 4;
|
|
2347
|
+
last_atom_start = pos;
|
|
2348
|
+
if (add_zero_advance_check) {
|
|
2349
|
+
s->byte_code.buf[pos++] = REOP_set_char_pos;
|
|
2350
|
+
s->byte_code.buf[pos++] = 0;
|
|
2280
2351
|
}
|
|
2281
|
-
if (
|
|
2282
|
-
|
|
2283
|
-
|
|
2284
|
-
|
|
2285
|
-
|
|
2286
|
-
re_emit_op(s, REOP_push_char_pos);
|
|
2287
|
-
/* copy the atom */
|
|
2288
|
-
dbuf_put_self(&s->byte_code, last_atom_start, len);
|
|
2289
|
-
if (add_zero_advance_check)
|
|
2290
|
-
re_emit_op(s, REOP_check_advance);
|
|
2291
|
-
re_emit_goto(s, REOP_goto, pos);
|
|
2292
|
-
} else if (quant_max > quant_min) {
|
|
2293
|
-
re_emit_op_u32(s, REOP_push_i32, quant_max - quant_min);
|
|
2294
|
-
pos = s->byte_code.size;
|
|
2295
|
-
re_emit_op_u32(s, REOP_split_goto_first + greedy,
|
|
2296
|
-
len + 5 + add_zero_advance_check * 2);
|
|
2297
|
-
if (add_zero_advance_check)
|
|
2298
|
-
re_emit_op(s, REOP_push_char_pos);
|
|
2299
|
-
/* copy the atom */
|
|
2300
|
-
dbuf_put_self(&s->byte_code, last_atom_start, len);
|
|
2301
|
-
if (add_zero_advance_check)
|
|
2302
|
-
re_emit_op(s, REOP_check_advance);
|
|
2303
|
-
re_emit_goto(s, REOP_loop, pos);
|
|
2304
|
-
re_emit_op(s, REOP_drop);
|
|
2352
|
+
if (quant_min == quant_max) {
|
|
2353
|
+
/* a simple loop is enough */
|
|
2354
|
+
re_emit_goto_u8(s, REOP_loop, 0, last_atom_start);
|
|
2355
|
+
} else {
|
|
2356
|
+
re_emit_goto_u8_u32(s, (add_zero_advance_check ? REOP_loop_check_adv_split_next_first : REOP_loop_split_next_first) - greedy, 0, quant_max - quant_min, last_atom_start);
|
|
2305
2357
|
}
|
|
2306
2358
|
}
|
|
2307
2359
|
last_atom_start = -1;
|
|
@@ -2311,7 +2363,6 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|
|
2311
2363
|
break;
|
|
2312
2364
|
}
|
|
2313
2365
|
}
|
|
2314
|
-
done:
|
|
2315
2366
|
s->buf_ptr = p;
|
|
2316
2367
|
return 0;
|
|
2317
2368
|
out_of_memory:
|
|
@@ -2340,7 +2391,7 @@ static int re_parse_alternative(REParseState *s, BOOL is_backward_dir)
|
|
|
2340
2391
|
speed is not really critical here) */
|
|
2341
2392
|
end = s->byte_code.size;
|
|
2342
2393
|
term_size = end - term_start;
|
|
2343
|
-
if (
|
|
2394
|
+
if (dbuf_claim(&s->byte_code, term_size))
|
|
2344
2395
|
return -1;
|
|
2345
2396
|
memmove(s->byte_code.buf + start + term_size,
|
|
2346
2397
|
s->byte_code.buf + start,
|
|
@@ -2376,6 +2427,8 @@ static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir)
|
|
|
2376
2427
|
|
|
2377
2428
|
pos = re_emit_op_u32(s, REOP_goto, 0);
|
|
2378
2429
|
|
|
2430
|
+
s->group_name_scope++;
|
|
2431
|
+
|
|
2379
2432
|
if (re_parse_alternative(s, is_backward_dir))
|
|
2380
2433
|
return -1;
|
|
2381
2434
|
|
|
@@ -2386,8 +2439,9 @@ static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir)
|
|
|
2386
2439
|
return 0;
|
|
2387
2440
|
}
|
|
2388
2441
|
|
|
2389
|
-
/* the control flow is recursive so
|
|
2390
|
-
|
|
2442
|
+
/* Allocate the registers as a stack. The control flow is recursive so
|
|
2443
|
+
the analysis can be linear. */
|
|
2444
|
+
static int compute_register_count(uint8_t *bc_buf, int bc_buf_len)
|
|
2391
2445
|
{
|
|
2392
2446
|
int stack_size, stack_size_max, pos, opcode, len;
|
|
2393
2447
|
uint32_t val;
|
|
@@ -2403,19 +2457,29 @@ static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len)
|
|
|
2403
2457
|
assert(opcode < REOP_COUNT);
|
|
2404
2458
|
assert((pos + len) <= bc_buf_len);
|
|
2405
2459
|
switch(opcode) {
|
|
2406
|
-
case
|
|
2407
|
-
case
|
|
2460
|
+
case REOP_set_i32:
|
|
2461
|
+
case REOP_set_char_pos:
|
|
2462
|
+
bc_buf[pos + 1] = stack_size;
|
|
2408
2463
|
stack_size++;
|
|
2409
2464
|
if (stack_size > stack_size_max) {
|
|
2410
|
-
if (stack_size >
|
|
2465
|
+
if (stack_size > REGISTER_COUNT_MAX)
|
|
2411
2466
|
return -1;
|
|
2412
2467
|
stack_size_max = stack_size;
|
|
2413
2468
|
}
|
|
2414
2469
|
break;
|
|
2415
|
-
case REOP_drop:
|
|
2416
2470
|
case REOP_check_advance:
|
|
2471
|
+
case REOP_loop:
|
|
2472
|
+
case REOP_loop_split_goto_first:
|
|
2473
|
+
case REOP_loop_split_next_first:
|
|
2417
2474
|
assert(stack_size > 0);
|
|
2418
2475
|
stack_size--;
|
|
2476
|
+
bc_buf[pos + 1] = stack_size;
|
|
2477
|
+
break;
|
|
2478
|
+
case REOP_loop_check_adv_split_goto_first:
|
|
2479
|
+
case REOP_loop_check_adv_split_next_first:
|
|
2480
|
+
assert(stack_size >= 2);
|
|
2481
|
+
stack_size -= 2;
|
|
2482
|
+
bc_buf[pos + 1] = stack_size;
|
|
2419
2483
|
break;
|
|
2420
2484
|
case REOP_range:
|
|
2421
2485
|
case REOP_range_i:
|
|
@@ -2427,6 +2491,13 @@ static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len)
|
|
|
2427
2491
|
val = get_u16(bc_buf + pos + 1);
|
|
2428
2492
|
len += val * 8;
|
|
2429
2493
|
break;
|
|
2494
|
+
case REOP_back_reference:
|
|
2495
|
+
case REOP_back_reference_i:
|
|
2496
|
+
case REOP_backward_back_reference:
|
|
2497
|
+
case REOP_backward_back_reference_i:
|
|
2498
|
+
val = bc_buf[pos + 1];
|
|
2499
|
+
len += val;
|
|
2500
|
+
break;
|
|
2430
2501
|
}
|
|
2431
2502
|
pos += len;
|
|
2432
2503
|
}
|
|
@@ -2453,7 +2524,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
|
|
2453
2524
|
void *opaque)
|
|
2454
2525
|
{
|
|
2455
2526
|
REParseState s_s, *s = &s_s;
|
|
2456
|
-
int
|
|
2527
|
+
int register_count;
|
|
2457
2528
|
BOOL is_sticky;
|
|
2458
2529
|
|
|
2459
2530
|
memset(s, 0, sizeof(*s));
|
|
@@ -2514,19 +2585,19 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
|
|
2514
2585
|
goto error;
|
|
2515
2586
|
}
|
|
2516
2587
|
|
|
2517
|
-
|
|
2518
|
-
if (
|
|
2588
|
+
register_count = compute_register_count(s->byte_code.buf, s->byte_code.size);
|
|
2589
|
+
if (register_count < 0) {
|
|
2519
2590
|
re_parse_error(s, "too many imbricated quantifiers");
|
|
2520
2591
|
goto error;
|
|
2521
2592
|
}
|
|
2522
2593
|
|
|
2523
2594
|
s->byte_code.buf[RE_HEADER_CAPTURE_COUNT] = s->capture_count;
|
|
2524
|
-
s->byte_code.buf[
|
|
2595
|
+
s->byte_code.buf[RE_HEADER_REGISTER_COUNT] = register_count;
|
|
2525
2596
|
put_u32(s->byte_code.buf + RE_HEADER_BYTECODE_LEN,
|
|
2526
2597
|
s->byte_code.size - RE_HEADER_LEN);
|
|
2527
2598
|
|
|
2528
2599
|
/* add the named groups if needed */
|
|
2529
|
-
if (s->group_names.size > (s->capture_count - 1)) {
|
|
2600
|
+
if (s->group_names.size > (s->capture_count - 1) * LRE_GROUP_NAME_TRAILER_LEN) {
|
|
2530
2601
|
dbuf_put(&s->byte_code, s->group_names.buf, s->group_names.size);
|
|
2531
2602
|
put_u16(s->byte_code.buf + RE_HEADER_FLAGS,
|
|
2532
2603
|
lre_get_flags(s->byte_code.buf) | LRE_FLAG_NAMED_GROUPS);
|
|
@@ -2547,14 +2618,6 @@ static BOOL is_line_terminator(uint32_t c)
|
|
|
2547
2618
|
return (c == '\n' || c == '\r' || c == CP_LS || c == CP_PS);
|
|
2548
2619
|
}
|
|
2549
2620
|
|
|
2550
|
-
static BOOL is_word_char(uint32_t c)
|
|
2551
|
-
{
|
|
2552
|
-
return ((c >= '0' && c <= '9') ||
|
|
2553
|
-
(c >= 'a' && c <= 'z') ||
|
|
2554
|
-
(c >= 'A' && c <= 'Z') ||
|
|
2555
|
-
(c == '_'));
|
|
2556
|
-
}
|
|
2557
|
-
|
|
2558
2621
|
#define GET_CHAR(c, cptr, cbuf_end, cbuf_type) \
|
|
2559
2622
|
do { \
|
|
2560
2623
|
if (cbuf_type == 0) { \
|
|
@@ -2638,23 +2701,26 @@ static BOOL is_word_char(uint32_t c)
|
|
|
2638
2701
|
} \
|
|
2639
2702
|
} while (0)
|
|
2640
2703
|
|
|
2641
|
-
typedef uintptr_t StackInt;
|
|
2642
|
-
|
|
2643
2704
|
typedef enum {
|
|
2644
2705
|
RE_EXEC_STATE_SPLIT,
|
|
2645
2706
|
RE_EXEC_STATE_LOOKAHEAD,
|
|
2646
2707
|
RE_EXEC_STATE_NEGATIVE_LOOKAHEAD,
|
|
2647
|
-
RE_EXEC_STATE_GREEDY_QUANT,
|
|
2648
2708
|
} REExecStateEnum;
|
|
2649
2709
|
|
|
2650
|
-
|
|
2651
|
-
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
|
|
2655
|
-
|
|
2656
|
-
|
|
2657
|
-
|
|
2710
|
+
#if INTPTR_MAX >= INT64_MAX
|
|
2711
|
+
#define BP_TYPE_BITS 3
|
|
2712
|
+
#else
|
|
2713
|
+
#define BP_TYPE_BITS 2
|
|
2714
|
+
#endif
|
|
2715
|
+
|
|
2716
|
+
typedef union {
|
|
2717
|
+
uint8_t *ptr;
|
|
2718
|
+
intptr_t val; /* for bp, the low BP_SHIFT bits store REExecStateEnum */
|
|
2719
|
+
struct {
|
|
2720
|
+
uintptr_t val : sizeof(uintptr_t) * 8 - BP_TYPE_BITS;
|
|
2721
|
+
uintptr_t type : BP_TYPE_BITS;
|
|
2722
|
+
} bp;
|
|
2723
|
+
} StackElem;
|
|
2658
2724
|
|
|
2659
2725
|
typedef struct {
|
|
2660
2726
|
const uint8_t *cbuf;
|
|
@@ -2662,55 +2728,15 @@ typedef struct {
|
|
|
2662
2728
|
/* 0 = 8 bit chars, 1 = 16 bit chars, 2 = 16 bit chars, UTF-16 */
|
|
2663
2729
|
int cbuf_type;
|
|
2664
2730
|
int capture_count;
|
|
2665
|
-
int stack_size_max;
|
|
2666
2731
|
BOOL is_unicode;
|
|
2667
2732
|
int interrupt_counter;
|
|
2668
2733
|
void *opaque; /* used for stack overflow check */
|
|
2669
2734
|
|
|
2670
|
-
|
|
2671
|
-
|
|
2672
|
-
|
|
2673
|
-
size_t state_stack_len;
|
|
2735
|
+
StackElem *stack_buf;
|
|
2736
|
+
size_t stack_size;
|
|
2737
|
+
StackElem static_stack_buf[32]; /* static stack to avoid allocation in most cases */
|
|
2674
2738
|
} REExecContext;
|
|
2675
2739
|
|
|
2676
|
-
static int push_state(REExecContext *s,
|
|
2677
|
-
uint8_t **capture,
|
|
2678
|
-
StackInt *stack, size_t stack_len,
|
|
2679
|
-
const uint8_t *pc, const uint8_t *cptr,
|
|
2680
|
-
REExecStateEnum type, size_t count)
|
|
2681
|
-
{
|
|
2682
|
-
REExecState *rs;
|
|
2683
|
-
uint8_t *new_stack;
|
|
2684
|
-
size_t new_size, i, n;
|
|
2685
|
-
StackInt *stack_buf;
|
|
2686
|
-
|
|
2687
|
-
if (unlikely((s->state_stack_len + 1) > s->state_stack_size)) {
|
|
2688
|
-
/* reallocate the stack */
|
|
2689
|
-
new_size = s->state_stack_size * 3 / 2;
|
|
2690
|
-
if (new_size < 8)
|
|
2691
|
-
new_size = 8;
|
|
2692
|
-
new_stack = lre_realloc(s->opaque, s->state_stack, new_size * s->state_size);
|
|
2693
|
-
if (!new_stack)
|
|
2694
|
-
return -1;
|
|
2695
|
-
s->state_stack_size = new_size;
|
|
2696
|
-
s->state_stack = new_stack;
|
|
2697
|
-
}
|
|
2698
|
-
rs = (REExecState *)(s->state_stack + s->state_stack_len * s->state_size);
|
|
2699
|
-
s->state_stack_len++;
|
|
2700
|
-
rs->type = type;
|
|
2701
|
-
rs->count = count;
|
|
2702
|
-
rs->stack_len = stack_len;
|
|
2703
|
-
rs->cptr = cptr;
|
|
2704
|
-
rs->pc = pc;
|
|
2705
|
-
n = 2 * s->capture_count;
|
|
2706
|
-
for(i = 0; i < n; i++)
|
|
2707
|
-
rs->buf[i] = capture[i];
|
|
2708
|
-
stack_buf = (StackInt *)(rs->buf + n);
|
|
2709
|
-
for(i = 0; i < stack_len; i++)
|
|
2710
|
-
stack_buf[i] = stack[i];
|
|
2711
|
-
return 0;
|
|
2712
|
-
}
|
|
2713
|
-
|
|
2714
2740
|
static int lre_poll_timeout(REExecContext *s)
|
|
2715
2741
|
{
|
|
2716
2742
|
if (unlikely(--s->interrupt_counter <= 0)) {
|
|
@@ -2721,95 +2747,182 @@ static int lre_poll_timeout(REExecContext *s)
|
|
|
2721
2747
|
return 0;
|
|
2722
2748
|
}
|
|
2723
2749
|
|
|
2750
|
+
static no_inline int stack_realloc(REExecContext *s, size_t n)
|
|
2751
|
+
{
|
|
2752
|
+
StackElem *new_stack;
|
|
2753
|
+
size_t new_size;
|
|
2754
|
+
new_size = s->stack_size * 3 / 2;
|
|
2755
|
+
if (new_size < n)
|
|
2756
|
+
new_size = n;
|
|
2757
|
+
if (s->stack_buf == s->static_stack_buf) {
|
|
2758
|
+
new_stack = lre_realloc(s->opaque, NULL, new_size * sizeof(StackElem));
|
|
2759
|
+
if (!new_stack)
|
|
2760
|
+
return -1;
|
|
2761
|
+
/* XXX: could use correct size */
|
|
2762
|
+
memcpy(new_stack, s->stack_buf, s->stack_size * sizeof(StackElem));
|
|
2763
|
+
} else {
|
|
2764
|
+
new_stack = lre_realloc(s->opaque, s->stack_buf, new_size * sizeof(StackElem));
|
|
2765
|
+
if (!new_stack)
|
|
2766
|
+
return -1;
|
|
2767
|
+
}
|
|
2768
|
+
s->stack_size = new_size;
|
|
2769
|
+
s->stack_buf = new_stack;
|
|
2770
|
+
return 0;
|
|
2771
|
+
}
|
|
2772
|
+
|
|
2724
2773
|
/* return 1 if match, 0 if not match or < 0 if error. */
|
|
2725
2774
|
static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|
2726
|
-
|
|
2727
|
-
const uint8_t *pc, const uint8_t *cptr,
|
|
2728
|
-
BOOL no_recurse)
|
|
2775
|
+
const uint8_t *pc, const uint8_t *cptr)
|
|
2729
2776
|
{
|
|
2730
|
-
int opcode
|
|
2777
|
+
int opcode;
|
|
2731
2778
|
int cbuf_type;
|
|
2732
|
-
uint32_t val, c;
|
|
2779
|
+
uint32_t val, c, idx;
|
|
2733
2780
|
const uint8_t *cbuf_end;
|
|
2734
|
-
|
|
2781
|
+
StackElem *sp, *bp, *stack_end;
|
|
2782
|
+
#ifdef DUMP_EXEC
|
|
2783
|
+
const uint8_t *pc_start = pc; /* TEST */
|
|
2784
|
+
#endif
|
|
2735
2785
|
cbuf_type = s->cbuf_type;
|
|
2736
2786
|
cbuf_end = s->cbuf_end;
|
|
2737
2787
|
|
|
2788
|
+
sp = s->stack_buf;
|
|
2789
|
+
bp = s->stack_buf;
|
|
2790
|
+
stack_end = s->stack_buf + s->stack_size;
|
|
2791
|
+
|
|
2792
|
+
#define CHECK_STACK_SPACE(n) \
|
|
2793
|
+
if (unlikely((stack_end - sp) < (n))) { \
|
|
2794
|
+
size_t saved_sp = sp - s->stack_buf; \
|
|
2795
|
+
size_t saved_bp = bp - s->stack_buf; \
|
|
2796
|
+
if (stack_realloc(s, sp - s->stack_buf + (n))) \
|
|
2797
|
+
return LRE_RET_MEMORY_ERROR; \
|
|
2798
|
+
stack_end = s->stack_buf + s->stack_size; \
|
|
2799
|
+
sp = s->stack_buf + saved_sp; \
|
|
2800
|
+
bp = s->stack_buf + saved_bp; \
|
|
2801
|
+
}
|
|
2802
|
+
|
|
2803
|
+
/* XXX: could test if the value was saved to reduce the stack size
|
|
2804
|
+
but slower */
|
|
2805
|
+
#define SAVE_CAPTURE(idx, value) \
|
|
2806
|
+
{ \
|
|
2807
|
+
CHECK_STACK_SPACE(2); \
|
|
2808
|
+
sp[0].val = idx; \
|
|
2809
|
+
sp[1].ptr = capture[idx]; \
|
|
2810
|
+
sp += 2; \
|
|
2811
|
+
capture[idx] = (value); \
|
|
2812
|
+
}
|
|
2813
|
+
|
|
2814
|
+
/* avoid saving the previous value if already saved */
|
|
2815
|
+
#define SAVE_CAPTURE_CHECK(idx, value) \
|
|
2816
|
+
{ \
|
|
2817
|
+
StackElem *sp1; \
|
|
2818
|
+
sp1 = sp; \
|
|
2819
|
+
for(;;) { \
|
|
2820
|
+
if (sp1 > bp) { \
|
|
2821
|
+
if (sp1[-2].val == idx) \
|
|
2822
|
+
break; \
|
|
2823
|
+
sp1 -= 2; \
|
|
2824
|
+
} else { \
|
|
2825
|
+
CHECK_STACK_SPACE(2); \
|
|
2826
|
+
sp[0].val = idx; \
|
|
2827
|
+
sp[1].ptr = capture[idx]; \
|
|
2828
|
+
sp += 2; \
|
|
2829
|
+
break; \
|
|
2830
|
+
} \
|
|
2831
|
+
} \
|
|
2832
|
+
capture[idx] = (value); \
|
|
2833
|
+
}
|
|
2834
|
+
|
|
2835
|
+
|
|
2836
|
+
#ifdef DUMP_EXEC
|
|
2837
|
+
printf("%5s %5s %5s %5s %s\n", "PC", "CP", "BP", "SP", "OPCODE");
|
|
2838
|
+
#endif
|
|
2738
2839
|
for(;;) {
|
|
2739
|
-
// printf("top=%p: pc=%d\n", th_list.top, (int)(pc - (bc_buf + RE_HEADER_LEN)));
|
|
2740
2840
|
opcode = *pc++;
|
|
2841
|
+
#ifdef DUMP_EXEC
|
|
2842
|
+
printf("%5ld %5ld %5ld %5ld %s\n",
|
|
2843
|
+
pc - 1 - pc_start,
|
|
2844
|
+
cbuf_type == 0 ? cptr - s->cbuf : (cptr - s->cbuf) / 2,
|
|
2845
|
+
bp - s->stack_buf,
|
|
2846
|
+
sp - s->stack_buf,
|
|
2847
|
+
reopcode_info[opcode].name);
|
|
2848
|
+
#endif
|
|
2741
2849
|
switch(opcode) {
|
|
2742
2850
|
case REOP_match:
|
|
2743
|
-
|
|
2744
|
-
|
|
2745
|
-
|
|
2746
|
-
|
|
2747
|
-
|
|
2748
|
-
goto recurse;
|
|
2749
|
-
no_match:
|
|
2750
|
-
if (no_recurse)
|
|
2851
|
+
return 1;
|
|
2852
|
+
no_match:
|
|
2853
|
+
for(;;) {
|
|
2854
|
+
REExecStateEnum type;
|
|
2855
|
+
if (bp == s->stack_buf)
|
|
2751
2856
|
return 0;
|
|
2752
|
-
|
|
2753
|
-
|
|
2857
|
+
/* undo the modifications to capture[] */
|
|
2858
|
+
while (sp > bp) {
|
|
2859
|
+
capture[sp[-2].val] = sp[-1].ptr;
|
|
2860
|
+
sp -= 2;
|
|
2861
|
+
}
|
|
2862
|
+
|
|
2863
|
+
pc = sp[-3].ptr;
|
|
2864
|
+
cptr = sp[-2].ptr;
|
|
2865
|
+
type = sp[-1].bp.type;
|
|
2866
|
+
bp = s->stack_buf + sp[-1].bp.val;
|
|
2867
|
+
sp -= 3;
|
|
2868
|
+
if (type != RE_EXEC_STATE_LOOKAHEAD)
|
|
2869
|
+
break;
|
|
2870
|
+
}
|
|
2871
|
+
if (lre_poll_timeout(s))
|
|
2872
|
+
return LRE_RET_TIMEOUT;
|
|
2873
|
+
break;
|
|
2874
|
+
case REOP_lookahead_match:
|
|
2875
|
+
/* pop all the saved states until reaching the start of
|
|
2876
|
+
the lookahead and keep the updated captures and
|
|
2877
|
+
variables and the corresponding undo info. */
|
|
2878
|
+
{
|
|
2879
|
+
StackElem *sp1, *sp_top, *next_sp;
|
|
2880
|
+
REExecStateEnum type;
|
|
2881
|
+
|
|
2882
|
+
sp_top = sp;
|
|
2754
2883
|
for(;;) {
|
|
2755
|
-
|
|
2756
|
-
|
|
2757
|
-
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
|
|
2762
|
-
|
|
2763
|
-
|
|
2764
|
-
|
|
2765
|
-
|
|
2766
|
-
|
|
2767
|
-
|
|
2768
|
-
|
|
2769
|
-
|
|
2770
|
-
|
|
2771
|
-
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
}
|
|
2775
|
-
} else if (rs->type == RE_EXEC_STATE_GREEDY_QUANT) {
|
|
2776
|
-
if (!ret) {
|
|
2777
|
-
uint32_t char_count, i;
|
|
2778
|
-
memcpy(capture, rs->buf,
|
|
2779
|
-
sizeof(capture[0]) * 2 * s->capture_count);
|
|
2780
|
-
stack_len = rs->stack_len;
|
|
2781
|
-
memcpy(stack, rs->buf + 2 * s->capture_count,
|
|
2782
|
-
stack_len * sizeof(stack[0]));
|
|
2783
|
-
pc = rs->pc;
|
|
2784
|
-
cptr = rs->cptr;
|
|
2785
|
-
/* go backward */
|
|
2786
|
-
char_count = get_u32(pc + 12);
|
|
2787
|
-
for(i = 0; i < char_count; i++) {
|
|
2788
|
-
PREV_CHAR(cptr, s->cbuf, cbuf_type);
|
|
2789
|
-
}
|
|
2790
|
-
pc = (pc + 16) + (int)get_u32(pc);
|
|
2791
|
-
rs->cptr = cptr;
|
|
2792
|
-
rs->count--;
|
|
2793
|
-
if (rs->count == 0) {
|
|
2794
|
-
s->state_stack_len--;
|
|
2795
|
-
}
|
|
2796
|
-
break;
|
|
2797
|
-
}
|
|
2798
|
-
} else {
|
|
2799
|
-
ret = ((rs->type == RE_EXEC_STATE_LOOKAHEAD && ret) ||
|
|
2800
|
-
(rs->type == RE_EXEC_STATE_NEGATIVE_LOOKAHEAD && !ret));
|
|
2801
|
-
if (ret) {
|
|
2802
|
-
/* keep the capture in case of positive lookahead */
|
|
2803
|
-
if (rs->type == RE_EXEC_STATE_LOOKAHEAD)
|
|
2804
|
-
goto pop_state1;
|
|
2805
|
-
else
|
|
2806
|
-
goto pop_state;
|
|
2807
|
-
}
|
|
2884
|
+
sp1 = sp;
|
|
2885
|
+
sp = bp;
|
|
2886
|
+
pc = sp[-3].ptr;
|
|
2887
|
+
cptr = sp[-2].ptr;
|
|
2888
|
+
type = sp[-1].bp.type;
|
|
2889
|
+
bp = s->stack_buf + sp[-1].bp.val;
|
|
2890
|
+
sp[-1].ptr = (void *)sp1; /* save the next value for the copy step */
|
|
2891
|
+
sp -= 3;
|
|
2892
|
+
if (type == RE_EXEC_STATE_LOOKAHEAD)
|
|
2893
|
+
break;
|
|
2894
|
+
}
|
|
2895
|
+
if (sp != s->stack_buf) {
|
|
2896
|
+
/* keep the undo info if there is a saved state */
|
|
2897
|
+
sp1 = sp;
|
|
2898
|
+
while (sp1 < sp_top) {
|
|
2899
|
+
next_sp = (void *)sp1[2].ptr;
|
|
2900
|
+
sp1 += 3;
|
|
2901
|
+
while (sp1 < next_sp)
|
|
2902
|
+
*sp++ = *sp1++;
|
|
2808
2903
|
}
|
|
2809
|
-
s->state_stack_len--;
|
|
2810
2904
|
}
|
|
2811
2905
|
}
|
|
2812
2906
|
break;
|
|
2907
|
+
case REOP_negative_lookahead_match:
|
|
2908
|
+
/* pop all the saved states until reaching start of the negative lookahead */
|
|
2909
|
+
for(;;) {
|
|
2910
|
+
REExecStateEnum type;
|
|
2911
|
+
type = bp[-1].bp.type;
|
|
2912
|
+
/* undo the modifications to capture[] */
|
|
2913
|
+
while (sp > bp) {
|
|
2914
|
+
capture[sp[-2].val] = sp[-1].ptr;
|
|
2915
|
+
sp -= 2;
|
|
2916
|
+
}
|
|
2917
|
+
pc = sp[-3].ptr;
|
|
2918
|
+
cptr = sp[-2].ptr;
|
|
2919
|
+
type = sp[-1].bp.type;
|
|
2920
|
+
bp = s->stack_buf + sp[-1].bp.val;
|
|
2921
|
+
sp -= 3;
|
|
2922
|
+
if (type == RE_EXEC_STATE_NEGATIVE_LOOKAHEAD)
|
|
2923
|
+
break;
|
|
2924
|
+
}
|
|
2925
|
+
goto no_match;
|
|
2813
2926
|
case REOP_char32:
|
|
2814
2927
|
case REOP_char32_i:
|
|
2815
2928
|
val = get_u32(pc);
|
|
@@ -2842,24 +2955,27 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|
|
2842
2955
|
pc1 = pc;
|
|
2843
2956
|
pc = pc + (int)val;
|
|
2844
2957
|
}
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
|
|
2848
|
-
|
|
2849
|
-
|
|
2958
|
+
CHECK_STACK_SPACE(3);
|
|
2959
|
+
sp[0].ptr = (uint8_t *)pc1;
|
|
2960
|
+
sp[1].ptr = (uint8_t *)cptr;
|
|
2961
|
+
sp[2].bp.val = bp - s->stack_buf;
|
|
2962
|
+
sp[2].bp.type = RE_EXEC_STATE_SPLIT;
|
|
2963
|
+
sp += 3;
|
|
2964
|
+
bp = sp;
|
|
2850
2965
|
}
|
|
2966
|
+
break;
|
|
2851
2967
|
case REOP_lookahead:
|
|
2852
2968
|
case REOP_negative_lookahead:
|
|
2853
2969
|
val = get_u32(pc);
|
|
2854
2970
|
pc += 4;
|
|
2855
|
-
|
|
2856
|
-
|
|
2857
|
-
|
|
2858
|
-
|
|
2859
|
-
|
|
2860
|
-
|
|
2971
|
+
CHECK_STACK_SPACE(3);
|
|
2972
|
+
sp[0].ptr = (uint8_t *)(pc + (int)val);
|
|
2973
|
+
sp[1].ptr = (uint8_t *)cptr;
|
|
2974
|
+
sp[2].bp.val = bp - s->stack_buf;
|
|
2975
|
+
sp[2].bp.type = RE_EXEC_STATE_LOOKAHEAD + opcode - REOP_lookahead;
|
|
2976
|
+
sp += 3;
|
|
2977
|
+
bp = sp;
|
|
2861
2978
|
break;
|
|
2862
|
-
|
|
2863
2979
|
case REOP_goto:
|
|
2864
2980
|
val = get_u32(pc);
|
|
2865
2981
|
pc += 4 + (int)val;
|
|
@@ -2898,11 +3014,26 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|
|
2898
3014
|
goto no_match;
|
|
2899
3015
|
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
|
|
2900
3016
|
break;
|
|
3017
|
+
case REOP_space:
|
|
3018
|
+
if (cptr == cbuf_end)
|
|
3019
|
+
goto no_match;
|
|
3020
|
+
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
|
|
3021
|
+
if (!lre_is_space(c))
|
|
3022
|
+
goto no_match;
|
|
3023
|
+
break;
|
|
3024
|
+
case REOP_not_space:
|
|
3025
|
+
if (cptr == cbuf_end)
|
|
3026
|
+
goto no_match;
|
|
3027
|
+
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
|
|
3028
|
+
if (lre_is_space(c))
|
|
3029
|
+
goto no_match;
|
|
3030
|
+
break;
|
|
2901
3031
|
case REOP_save_start:
|
|
2902
3032
|
case REOP_save_end:
|
|
2903
3033
|
val = *pc++;
|
|
2904
3034
|
assert(val < s->capture_count);
|
|
2905
|
-
|
|
3035
|
+
idx = 2 * val + opcode - REOP_save_start;
|
|
3036
|
+
SAVE_CAPTURE(idx, (uint8_t *)cptr);
|
|
2906
3037
|
break;
|
|
2907
3038
|
case REOP_save_reset:
|
|
2908
3039
|
{
|
|
@@ -2911,35 +3042,97 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|
|
2911
3042
|
val2 = pc[1];
|
|
2912
3043
|
pc += 2;
|
|
2913
3044
|
assert(val2 < s->capture_count);
|
|
3045
|
+
CHECK_STACK_SPACE(2 * (val2 - val + 1));
|
|
2914
3046
|
while (val <= val2) {
|
|
2915
|
-
|
|
2916
|
-
|
|
3047
|
+
idx = 2 * val;
|
|
3048
|
+
SAVE_CAPTURE(idx, NULL);
|
|
3049
|
+
idx = 2 * val + 1;
|
|
3050
|
+
SAVE_CAPTURE(idx, NULL);
|
|
2917
3051
|
val++;
|
|
2918
3052
|
}
|
|
2919
3053
|
}
|
|
2920
3054
|
break;
|
|
2921
|
-
case
|
|
2922
|
-
|
|
2923
|
-
pc
|
|
2924
|
-
|
|
2925
|
-
|
|
2926
|
-
case REOP_drop:
|
|
2927
|
-
stack_len--;
|
|
3055
|
+
case REOP_set_i32:
|
|
3056
|
+
idx = 2 * s->capture_count + pc[0];
|
|
3057
|
+
val = get_u32(pc + 1);
|
|
3058
|
+
pc += 5;
|
|
3059
|
+
SAVE_CAPTURE_CHECK(idx, (void *)(uintptr_t)val);
|
|
2928
3060
|
break;
|
|
2929
3061
|
case REOP_loop:
|
|
2930
|
-
|
|
2931
|
-
|
|
2932
|
-
|
|
2933
|
-
pc
|
|
2934
|
-
|
|
2935
|
-
|
|
3062
|
+
{
|
|
3063
|
+
uint32_t val2;
|
|
3064
|
+
idx = 2 * s->capture_count + pc[0];
|
|
3065
|
+
val = get_u32(pc + 1);
|
|
3066
|
+
pc += 5;
|
|
3067
|
+
|
|
3068
|
+
val2 = (uintptr_t)capture[idx] - 1;
|
|
3069
|
+
SAVE_CAPTURE_CHECK(idx, (void *)(uintptr_t)val2);
|
|
3070
|
+
if (val2 != 0) {
|
|
3071
|
+
pc += (int)val;
|
|
3072
|
+
if (lre_poll_timeout(s))
|
|
3073
|
+
return LRE_RET_TIMEOUT;
|
|
3074
|
+
}
|
|
3075
|
+
}
|
|
3076
|
+
break;
|
|
3077
|
+
case REOP_loop_split_goto_first:
|
|
3078
|
+
case REOP_loop_split_next_first:
|
|
3079
|
+
case REOP_loop_check_adv_split_goto_first:
|
|
3080
|
+
case REOP_loop_check_adv_split_next_first:
|
|
3081
|
+
{
|
|
3082
|
+
const uint8_t *pc1;
|
|
3083
|
+
uint32_t val2, limit;
|
|
3084
|
+
idx = 2 * s->capture_count + pc[0];
|
|
3085
|
+
limit = get_u32(pc + 1);
|
|
3086
|
+
val = get_u32(pc + 5);
|
|
3087
|
+
pc += 9;
|
|
3088
|
+
|
|
3089
|
+
/* decrement the counter */
|
|
3090
|
+
val2 = (uintptr_t)capture[idx] - 1;
|
|
3091
|
+
SAVE_CAPTURE_CHECK(idx, (void *)(uintptr_t)val2);
|
|
3092
|
+
|
|
3093
|
+
if (val2 > limit) {
|
|
3094
|
+
/* normal loop if counter > limit */
|
|
3095
|
+
pc += (int)val;
|
|
3096
|
+
if (lre_poll_timeout(s))
|
|
3097
|
+
return LRE_RET_TIMEOUT;
|
|
3098
|
+
} else {
|
|
3099
|
+
/* check advance */
|
|
3100
|
+
if ((opcode == REOP_loop_check_adv_split_goto_first ||
|
|
3101
|
+
opcode == REOP_loop_check_adv_split_next_first) &&
|
|
3102
|
+
capture[idx + 1] == cptr &&
|
|
3103
|
+
val2 != limit) {
|
|
3104
|
+
goto no_match;
|
|
3105
|
+
}
|
|
3106
|
+
|
|
3107
|
+
/* otherwise conditional split */
|
|
3108
|
+
if (val2 != 0) {
|
|
3109
|
+
if (opcode == REOP_loop_split_next_first ||
|
|
3110
|
+
opcode == REOP_loop_check_adv_split_next_first) {
|
|
3111
|
+
pc1 = pc + (int)val;
|
|
3112
|
+
} else {
|
|
3113
|
+
pc1 = pc;
|
|
3114
|
+
pc = pc + (int)val;
|
|
3115
|
+
}
|
|
3116
|
+
CHECK_STACK_SPACE(3);
|
|
3117
|
+
sp[0].ptr = (uint8_t *)pc1;
|
|
3118
|
+
sp[1].ptr = (uint8_t *)cptr;
|
|
3119
|
+
sp[2].bp.val = bp - s->stack_buf;
|
|
3120
|
+
sp[2].bp.type = RE_EXEC_STATE_SPLIT;
|
|
3121
|
+
sp += 3;
|
|
3122
|
+
bp = sp;
|
|
3123
|
+
}
|
|
3124
|
+
}
|
|
2936
3125
|
}
|
|
2937
3126
|
break;
|
|
2938
|
-
case
|
|
2939
|
-
|
|
3127
|
+
case REOP_set_char_pos:
|
|
3128
|
+
idx = 2 * s->capture_count + pc[0];
|
|
3129
|
+
pc++;
|
|
3130
|
+
SAVE_CAPTURE_CHECK(idx, (uint8_t *)cptr);
|
|
2940
3131
|
break;
|
|
2941
3132
|
case REOP_check_advance:
|
|
2942
|
-
|
|
3133
|
+
idx = 2 * s->capture_count + pc[0];
|
|
3134
|
+
pc++;
|
|
3135
|
+
if (capture[idx] == cptr)
|
|
2943
3136
|
goto no_match;
|
|
2944
3137
|
break;
|
|
2945
3138
|
case REOP_word_boundary:
|
|
@@ -2955,18 +3148,22 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|
|
2955
3148
|
v1 = FALSE;
|
|
2956
3149
|
} else {
|
|
2957
3150
|
PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type);
|
|
2958
|
-
if (
|
|
2959
|
-
|
|
2960
|
-
|
|
3151
|
+
if (c < 256) {
|
|
3152
|
+
v1 = (lre_is_word_byte(c) != 0);
|
|
3153
|
+
} else {
|
|
3154
|
+
v1 = ignore_case && (c == 0x017f || c == 0x212a);
|
|
3155
|
+
}
|
|
2961
3156
|
}
|
|
2962
3157
|
/* current char */
|
|
2963
3158
|
if (cptr >= cbuf_end) {
|
|
2964
3159
|
v2 = FALSE;
|
|
2965
3160
|
} else {
|
|
2966
3161
|
PEEK_CHAR(c, cptr, cbuf_end, cbuf_type);
|
|
2967
|
-
if (
|
|
2968
|
-
|
|
2969
|
-
|
|
3162
|
+
if (c < 256) {
|
|
3163
|
+
v2 = (lre_is_word_byte(c) != 0);
|
|
3164
|
+
} else {
|
|
3165
|
+
v2 = ignore_case && (c == 0x017f || c == 0x212a);
|
|
3166
|
+
}
|
|
2970
3167
|
}
|
|
2971
3168
|
if (v1 ^ v2 ^ is_boundary)
|
|
2972
3169
|
goto no_match;
|
|
@@ -2978,43 +3175,53 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|
|
2978
3175
|
case REOP_backward_back_reference_i:
|
|
2979
3176
|
{
|
|
2980
3177
|
const uint8_t *cptr1, *cptr1_end, *cptr1_start;
|
|
3178
|
+
const uint8_t *pc1;
|
|
2981
3179
|
uint32_t c1, c2;
|
|
3180
|
+
int i, n;
|
|
2982
3181
|
|
|
2983
|
-
|
|
2984
|
-
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
|
|
2993
|
-
|
|
2994
|
-
|
|
2995
|
-
|
|
2996
|
-
|
|
2997
|
-
|
|
2998
|
-
|
|
2999
|
-
|
|
3000
|
-
|
|
3001
|
-
|
|
3002
|
-
|
|
3003
|
-
|
|
3004
|
-
|
|
3005
|
-
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
|
|
3009
|
-
|
|
3010
|
-
|
|
3011
|
-
|
|
3012
|
-
|
|
3013
|
-
|
|
3014
|
-
|
|
3182
|
+
n = *pc++;
|
|
3183
|
+
pc1 = pc;
|
|
3184
|
+
pc += n;
|
|
3185
|
+
|
|
3186
|
+
for(i = 0; i < n; i++) {
|
|
3187
|
+
val = pc1[i];
|
|
3188
|
+
if (val >= s->capture_count)
|
|
3189
|
+
goto no_match;
|
|
3190
|
+
cptr1_start = capture[2 * val];
|
|
3191
|
+
cptr1_end = capture[2 * val + 1];
|
|
3192
|
+
/* test the first not empty capture */
|
|
3193
|
+
if (cptr1_start && cptr1_end) {
|
|
3194
|
+
if (opcode == REOP_back_reference ||
|
|
3195
|
+
opcode == REOP_back_reference_i) {
|
|
3196
|
+
cptr1 = cptr1_start;
|
|
3197
|
+
while (cptr1 < cptr1_end) {
|
|
3198
|
+
if (cptr >= cbuf_end)
|
|
3199
|
+
goto no_match;
|
|
3200
|
+
GET_CHAR(c1, cptr1, cptr1_end, cbuf_type);
|
|
3201
|
+
GET_CHAR(c2, cptr, cbuf_end, cbuf_type);
|
|
3202
|
+
if (opcode == REOP_back_reference_i) {
|
|
3203
|
+
c1 = lre_canonicalize(c1, s->is_unicode);
|
|
3204
|
+
c2 = lre_canonicalize(c2, s->is_unicode);
|
|
3205
|
+
}
|
|
3206
|
+
if (c1 != c2)
|
|
3207
|
+
goto no_match;
|
|
3208
|
+
}
|
|
3209
|
+
} else {
|
|
3210
|
+
cptr1 = cptr1_end;
|
|
3211
|
+
while (cptr1 > cptr1_start) {
|
|
3212
|
+
if (cptr == s->cbuf)
|
|
3213
|
+
goto no_match;
|
|
3214
|
+
GET_PREV_CHAR(c1, cptr1, cptr1_start, cbuf_type);
|
|
3215
|
+
GET_PREV_CHAR(c2, cptr, s->cbuf, cbuf_type);
|
|
3216
|
+
if (opcode == REOP_backward_back_reference_i) {
|
|
3217
|
+
c1 = lre_canonicalize(c1, s->is_unicode);
|
|
3218
|
+
c2 = lre_canonicalize(c2, s->is_unicode);
|
|
3219
|
+
}
|
|
3220
|
+
if (c1 != c2)
|
|
3221
|
+
goto no_match;
|
|
3222
|
+
}
|
|
3015
3223
|
}
|
|
3016
|
-
|
|
3017
|
-
goto no_match;
|
|
3224
|
+
break;
|
|
3018
3225
|
}
|
|
3019
3226
|
}
|
|
3020
3227
|
}
|
|
@@ -3104,50 +3311,10 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|
|
3104
3311
|
goto no_match;
|
|
3105
3312
|
PREV_CHAR(cptr, s->cbuf, cbuf_type);
|
|
3106
3313
|
break;
|
|
3107
|
-
case REOP_simple_greedy_quant:
|
|
3108
|
-
{
|
|
3109
|
-
uint32_t next_pos, quant_min, quant_max;
|
|
3110
|
-
size_t q;
|
|
3111
|
-
intptr_t res;
|
|
3112
|
-
const uint8_t *pc1;
|
|
3113
|
-
|
|
3114
|
-
next_pos = get_u32(pc);
|
|
3115
|
-
quant_min = get_u32(pc + 4);
|
|
3116
|
-
quant_max = get_u32(pc + 8);
|
|
3117
|
-
pc += 16;
|
|
3118
|
-
pc1 = pc;
|
|
3119
|
-
pc += (int)next_pos;
|
|
3120
|
-
|
|
3121
|
-
q = 0;
|
|
3122
|
-
for(;;) {
|
|
3123
|
-
if (lre_poll_timeout(s))
|
|
3124
|
-
return LRE_RET_TIMEOUT;
|
|
3125
|
-
res = lre_exec_backtrack(s, capture, stack, stack_len,
|
|
3126
|
-
pc1, cptr, TRUE);
|
|
3127
|
-
if (res == LRE_RET_MEMORY_ERROR ||
|
|
3128
|
-
res == LRE_RET_TIMEOUT)
|
|
3129
|
-
return res;
|
|
3130
|
-
if (!res)
|
|
3131
|
-
break;
|
|
3132
|
-
cptr = (uint8_t *)res;
|
|
3133
|
-
q++;
|
|
3134
|
-
if (q >= quant_max && quant_max != INT32_MAX)
|
|
3135
|
-
break;
|
|
3136
|
-
}
|
|
3137
|
-
if (q < quant_min)
|
|
3138
|
-
goto no_match;
|
|
3139
|
-
if (q > quant_min) {
|
|
3140
|
-
/* will examine all matches down to quant_min */
|
|
3141
|
-
ret = push_state(s, capture, stack, stack_len,
|
|
3142
|
-
pc1 - 16, cptr,
|
|
3143
|
-
RE_EXEC_STATE_GREEDY_QUANT,
|
|
3144
|
-
q - quant_min);
|
|
3145
|
-
if (ret < 0)
|
|
3146
|
-
return LRE_RET_MEMORY_ERROR;
|
|
3147
|
-
}
|
|
3148
|
-
}
|
|
3149
|
-
break;
|
|
3150
3314
|
default:
|
|
3315
|
+
#ifdef DUMP_EXEC
|
|
3316
|
+
printf("unknown opcode pc=%ld\n", pc - 1 - pc_start);
|
|
3317
|
+
#endif
|
|
3151
3318
|
abort();
|
|
3152
3319
|
}
|
|
3153
3320
|
}
|
|
@@ -3161,14 +3328,12 @@ int lre_exec(uint8_t **capture,
|
|
|
3161
3328
|
int cbuf_type, void *opaque)
|
|
3162
3329
|
{
|
|
3163
3330
|
REExecContext s_s, *s = &s_s;
|
|
3164
|
-
int re_flags, i,
|
|
3165
|
-
StackInt *stack_buf;
|
|
3331
|
+
int re_flags, i, ret;
|
|
3166
3332
|
const uint8_t *cptr;
|
|
3167
3333
|
|
|
3168
3334
|
re_flags = lre_get_flags(bc_buf);
|
|
3169
3335
|
s->is_unicode = (re_flags & (LRE_FLAG_UNICODE | LRE_FLAG_UNICODE_SETS)) != 0;
|
|
3170
3336
|
s->capture_count = bc_buf[RE_HEADER_CAPTURE_COUNT];
|
|
3171
|
-
s->stack_size_max = bc_buf[RE_HEADER_STACK_SIZE];
|
|
3172
3337
|
s->cbuf = cbuf;
|
|
3173
3338
|
s->cbuf_end = cbuf + (clen << cbuf_type);
|
|
3174
3339
|
s->cbuf_type = cbuf_type;
|
|
@@ -3177,17 +3342,11 @@ int lre_exec(uint8_t **capture,
|
|
|
3177
3342
|
s->interrupt_counter = INTERRUPT_COUNTER_INIT;
|
|
3178
3343
|
s->opaque = opaque;
|
|
3179
3344
|
|
|
3180
|
-
s->
|
|
3181
|
-
|
|
3182
|
-
s->stack_size_max * sizeof(stack_buf[0]);
|
|
3183
|
-
s->state_stack = NULL;
|
|
3184
|
-
s->state_stack_len = 0;
|
|
3185
|
-
s->state_stack_size = 0;
|
|
3345
|
+
s->stack_buf = s->static_stack_buf;
|
|
3346
|
+
s->stack_size = countof(s->static_stack_buf);
|
|
3186
3347
|
|
|
3187
3348
|
for(i = 0; i < s->capture_count * 2; i++)
|
|
3188
3349
|
capture[i] = NULL;
|
|
3189
|
-
alloca_size = s->stack_size_max * sizeof(stack_buf[0]);
|
|
3190
|
-
stack_buf = alloca(alloca_size);
|
|
3191
3350
|
|
|
3192
3351
|
cptr = cbuf + (cindex << cbuf_type);
|
|
3193
3352
|
if (0 < cindex && cindex < clen && s->cbuf_type == 2) {
|
|
@@ -3197,12 +3356,19 @@ int lre_exec(uint8_t **capture,
|
|
|
3197
3356
|
}
|
|
3198
3357
|
}
|
|
3199
3358
|
|
|
3200
|
-
ret = lre_exec_backtrack(s, capture,
|
|
3201
|
-
|
|
3202
|
-
|
|
3359
|
+
ret = lre_exec_backtrack(s, capture, bc_buf + RE_HEADER_LEN, cptr);
|
|
3360
|
+
|
|
3361
|
+
if (s->stack_buf != s->static_stack_buf)
|
|
3362
|
+
lre_realloc(s->opaque, s->stack_buf, 0);
|
|
3203
3363
|
return ret;
|
|
3204
3364
|
}
|
|
3205
3365
|
|
|
3366
|
+
int lre_get_alloc_count(const uint8_t *bc_buf)
|
|
3367
|
+
{
|
|
3368
|
+
return bc_buf[RE_HEADER_CAPTURE_COUNT] * 2 +
|
|
3369
|
+
bc_buf[RE_HEADER_REGISTER_COUNT];
|
|
3370
|
+
}
|
|
3371
|
+
|
|
3206
3372
|
int lre_get_capture_count(const uint8_t *bc_buf)
|
|
3207
3373
|
{
|
|
3208
3374
|
return bc_buf[RE_HEADER_CAPTURE_COUNT];
|
|
@@ -3241,7 +3407,7 @@ int main(int argc, char **argv)
|
|
|
3241
3407
|
int len, flags, ret, i;
|
|
3242
3408
|
uint8_t *bc;
|
|
3243
3409
|
char error_msg[64];
|
|
3244
|
-
uint8_t *capture
|
|
3410
|
+
uint8_t *capture;
|
|
3245
3411
|
const char *input;
|
|
3246
3412
|
int input_len, capture_count;
|
|
3247
3413
|
|
|
@@ -3260,6 +3426,7 @@ int main(int argc, char **argv)
|
|
|
3260
3426
|
input = argv[3];
|
|
3261
3427
|
input_len = strlen(input);
|
|
3262
3428
|
|
|
3429
|
+
capture = malloc(sizeof(capture[0]) * lre_get_alloc_count(bc));
|
|
3263
3430
|
ret = lre_exec(capture, bc, (uint8_t *)input, 0, input_len, 0, NULL);
|
|
3264
3431
|
printf("ret=%d\n", ret);
|
|
3265
3432
|
if (ret == 1) {
|
|
@@ -3275,6 +3442,7 @@ int main(int argc, char **argv)
|
|
|
3275
3442
|
printf("\n");
|
|
3276
3443
|
}
|
|
3277
3444
|
}
|
|
3445
|
+
free(capture);
|
|
3278
3446
|
return 0;
|
|
3279
3447
|
}
|
|
3280
3448
|
#endif
|