prism 0.29.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +77 -1
- data/CONTRIBUTING.md +0 -4
- data/README.md +4 -0
- data/config.yml +498 -145
- data/docs/fuzzing.md +1 -1
- data/docs/parsing_rules.md +4 -1
- data/docs/ripper_translation.md +22 -0
- data/docs/serialization.md +3 -0
- data/ext/prism/api_node.c +2858 -2082
- data/ext/prism/extconf.rb +1 -1
- data/ext/prism/extension.c +203 -421
- data/ext/prism/extension.h +2 -2
- data/include/prism/ast.h +1732 -453
- data/include/prism/defines.h +36 -0
- data/include/prism/diagnostic.h +23 -6
- data/include/prism/node.h +0 -21
- data/include/prism/options.h +94 -3
- data/include/prism/parser.h +57 -28
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +3 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +0 -8
- data/include/prism/util/pm_integer.h +22 -15
- data/include/prism/util/pm_newline_list.h +11 -0
- data/include/prism/util/pm_string.h +28 -12
- data/include/prism/version.h +3 -3
- data/include/prism.h +0 -11
- data/lib/prism/compiler.rb +3 -0
- data/lib/prism/desugar_compiler.rb +111 -74
- data/lib/prism/dispatcher.rb +16 -1
- data/lib/prism/dot_visitor.rb +45 -34
- data/lib/prism/dsl.rb +660 -468
- data/lib/prism/ffi.rb +64 -6
- data/lib/prism/inspect_visitor.rb +294 -64
- data/lib/prism/lex_compat.rb +1 -1
- data/lib/prism/mutation_compiler.rb +11 -6
- data/lib/prism/node.rb +2469 -4973
- data/lib/prism/node_ext.rb +91 -14
- data/lib/prism/parse_result/comments.rb +0 -7
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +101 -11
- data/lib/prism/parse_result.rb +43 -3
- data/lib/prism/reflection.rb +10 -8
- data/lib/prism/serialize.rb +484 -609
- data/lib/prism/translation/parser/compiler.rb +152 -132
- data/lib/prism/translation/parser/lexer.rb +26 -4
- data/lib/prism/translation/parser.rb +9 -4
- data/lib/prism/translation/ripper.rb +22 -20
- data/lib/prism/translation/ruby_parser.rb +73 -13
- data/lib/prism/visitor.rb +3 -0
- data/lib/prism.rb +0 -4
- data/prism.gemspec +3 -5
- data/rbi/prism/dsl.rbi +521 -0
- data/rbi/prism/node.rbi +744 -4837
- data/rbi/prism/visitor.rbi +3 -0
- data/rbi/prism.rbi +36 -30
- data/sig/prism/dsl.rbs +190 -303
- data/sig/prism/mutation_compiler.rbs +1 -0
- data/sig/prism/node.rbs +759 -628
- data/sig/prism/parse_result.rbs +2 -0
- data/sig/prism/visitor.rbs +1 -0
- data/sig/prism.rbs +103 -64
- data/src/diagnostic.c +62 -28
- data/src/node.c +499 -1754
- data/src/options.c +76 -27
- data/src/prettyprint.c +156 -112
- data/src/prism.c +2773 -2081
- data/src/regexp.c +202 -69
- data/src/serialize.c +170 -50
- data/src/static_literals.c +63 -84
- data/src/token_type.c +4 -4
- data/src/util/pm_constant_pool.c +0 -8
- data/src/util/pm_integer.c +53 -25
- data/src/util/pm_newline_list.c +29 -0
- data/src/util/pm_string.c +130 -80
- data/src/util/pm_strpbrk.c +32 -6
- metadata +4 -6
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -249
- data/lib/prism/translation/parser/rubocop.rb +0 -73
- data/src/util/pm_string_list.c +0 -28
data/src/regexp.c
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
#include "prism/regexp.h"
|
2
2
|
|
3
|
+
#define PM_REGEXP_PARSE_DEPTH_MAX 4096
|
4
|
+
|
3
5
|
/**
|
4
6
|
* This is the parser that is going to handle parsing regular expressions.
|
5
7
|
*/
|
6
8
|
typedef struct {
|
9
|
+
/** The parser that is currently being used. */
|
10
|
+
pm_parser_t *parser;
|
11
|
+
|
7
12
|
/** A pointer to the start of the source that we are parsing. */
|
8
13
|
const uint8_t *start;
|
9
14
|
|
@@ -13,39 +18,48 @@ typedef struct {
|
|
13
18
|
/** A pointer to the end of the source that we are parsing. */
|
14
19
|
const uint8_t *end;
|
15
20
|
|
16
|
-
/**
|
17
|
-
|
21
|
+
/**
|
22
|
+
* Whether or not the regular expression currently being parsed is in
|
23
|
+
* extended mode, wherein whitespace is ignored and comments are allowed.
|
24
|
+
*/
|
25
|
+
bool extended_mode;
|
18
26
|
|
19
27
|
/** Whether the encoding has changed from the default. */
|
20
28
|
bool encoding_changed;
|
21
29
|
|
22
30
|
/** The encoding of the source. */
|
23
31
|
const pm_encoding_t *encoding;
|
32
|
+
|
33
|
+
/** The callback to call when a named capture group is found. */
|
34
|
+
pm_regexp_name_callback_t name_callback;
|
35
|
+
|
36
|
+
/** The data to pass to the name callback. */
|
37
|
+
void *name_data;
|
38
|
+
|
39
|
+
/** The callback to call when a parse error is found. */
|
40
|
+
pm_regexp_error_callback_t error_callback;
|
41
|
+
|
42
|
+
/** The data to pass to the error callback. */
|
43
|
+
void *error_data;
|
24
44
|
} pm_regexp_parser_t;
|
25
45
|
|
26
46
|
/**
|
27
|
-
*
|
47
|
+
* Append an error to the parser.
|
28
48
|
*/
|
29
|
-
static void
|
30
|
-
|
31
|
-
|
32
|
-
.start = start,
|
33
|
-
.cursor = start,
|
34
|
-
.end = end,
|
35
|
-
.named_captures = named_captures,
|
36
|
-
.encoding_changed = encoding_changed,
|
37
|
-
.encoding = encoding
|
38
|
-
};
|
49
|
+
static inline void
|
50
|
+
pm_regexp_parse_error(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, const char *message) {
|
51
|
+
parser->error_callback(start, end, message, parser->error_data);
|
39
52
|
}
|
40
53
|
|
41
54
|
/**
|
42
|
-
* This appends a new string to the list of named captures.
|
55
|
+
* This appends a new string to the list of named captures. This function
|
56
|
+
* assumes the caller has already checked the validity of the name callback.
|
43
57
|
*/
|
44
58
|
static void
|
45
59
|
pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
46
60
|
pm_string_t string;
|
47
61
|
pm_string_shared_init(&string, start, end);
|
48
|
-
|
62
|
+
parser->name_callback(&string, parser->name_data);
|
49
63
|
pm_string_free(&string);
|
50
64
|
}
|
51
65
|
|
@@ -217,21 +231,24 @@ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
|
|
217
231
|
*/
|
218
232
|
static bool
|
219
233
|
pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
+
while (!pm_regexp_char_is_eof(parser)) {
|
235
|
+
switch (*parser->cursor) {
|
236
|
+
case '*':
|
237
|
+
case '+':
|
238
|
+
case '?':
|
239
|
+
parser->cursor++;
|
240
|
+
break;
|
241
|
+
case '{':
|
242
|
+
parser->cursor++;
|
243
|
+
if (!pm_regexp_parse_range_quantifier(parser)) return false;
|
244
|
+
break;
|
245
|
+
default:
|
246
|
+
// In this case there is no quantifier.
|
247
|
+
return true;
|
248
|
+
}
|
234
249
|
}
|
250
|
+
|
251
|
+
return true;
|
235
252
|
}
|
236
253
|
|
237
254
|
/**
|
@@ -255,20 +272,20 @@ pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
|
|
255
272
|
|
256
273
|
// Forward declaration because character sets can be nested.
|
257
274
|
static bool
|
258
|
-
pm_regexp_parse_lbracket(pm_regexp_parser_t *parser);
|
275
|
+
pm_regexp_parse_lbracket(pm_regexp_parser_t *parser, uint16_t depth);
|
259
276
|
|
260
277
|
/**
|
261
278
|
* match-char-set : '[' '^'? (match-range | match-char)* ']'
|
262
279
|
* ;
|
263
280
|
*/
|
264
281
|
static bool
|
265
|
-
pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
|
282
|
+
pm_regexp_parse_character_set(pm_regexp_parser_t *parser, uint16_t depth) {
|
266
283
|
pm_regexp_char_accept(parser, '^');
|
267
284
|
|
268
285
|
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ']') {
|
269
286
|
switch (*parser->cursor++) {
|
270
287
|
case '[':
|
271
|
-
pm_regexp_parse_lbracket(parser);
|
288
|
+
pm_regexp_parse_lbracket(parser, (uint16_t) (depth + 1));
|
272
289
|
break;
|
273
290
|
case '\\':
|
274
291
|
if (!pm_regexp_char_is_eof(parser)) {
|
@@ -288,7 +305,18 @@ pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
|
|
288
305
|
* A left bracket can either mean a POSIX class or a character set.
|
289
306
|
*/
|
290
307
|
static bool
|
291
|
-
pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
|
308
|
+
pm_regexp_parse_lbracket(pm_regexp_parser_t *parser, uint16_t depth) {
|
309
|
+
if (depth >= PM_REGEXP_PARSE_DEPTH_MAX) {
|
310
|
+
pm_regexp_parse_error(parser, parser->start, parser->end, "parse depth limit over");
|
311
|
+
return false;
|
312
|
+
}
|
313
|
+
|
314
|
+
if ((parser->cursor < parser->end) && parser->cursor[0] == ']') {
|
315
|
+
parser->cursor++;
|
316
|
+
pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "empty char-class");
|
317
|
+
return true;
|
318
|
+
}
|
319
|
+
|
292
320
|
const uint8_t *reset = parser->cursor;
|
293
321
|
|
294
322
|
if ((parser->cursor + 2 < parser->end) && parser->cursor[0] == '[' && parser->cursor[1] == ':') {
|
@@ -298,13 +326,13 @@ pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
|
|
298
326
|
parser->cursor = reset;
|
299
327
|
}
|
300
328
|
|
301
|
-
return pm_regexp_parse_character_set(parser);
|
329
|
+
return pm_regexp_parse_character_set(parser, depth);
|
302
330
|
}
|
303
331
|
|
304
332
|
// Forward declaration here since parsing groups needs to go back up the grammar
|
305
333
|
// to parse expressions within them.
|
306
334
|
static bool
|
307
|
-
pm_regexp_parse_expression(pm_regexp_parser_t *parser);
|
335
|
+
pm_regexp_parse_expression(pm_regexp_parser_t *parser, uint16_t depth);
|
308
336
|
|
309
337
|
/**
|
310
338
|
* These are the states of the options that are configurable on the regular
|
@@ -396,6 +424,19 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
|
|
396
424
|
return false;
|
397
425
|
}
|
398
426
|
|
427
|
+
/**
|
428
|
+
* True if the given key is set in the options.
|
429
|
+
*/
|
430
|
+
static uint8_t
|
431
|
+
pm_regexp_options_state(pm_regexp_options_t *options, uint8_t key) {
|
432
|
+
if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
|
433
|
+
key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
|
434
|
+
return options->values[key];
|
435
|
+
}
|
436
|
+
|
437
|
+
return false;
|
438
|
+
}
|
439
|
+
|
399
440
|
/**
|
400
441
|
* Groups can have quite a few different patterns for syntax. They basically
|
401
442
|
* just wrap a set of expressions, but they can potentially have options after a
|
@@ -418,17 +459,27 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
|
|
418
459
|
* * (?imxdau-imx:subexp) - turn on and off configuration for an expression
|
419
460
|
*/
|
420
461
|
static bool
|
421
|
-
pm_regexp_parse_group(pm_regexp_parser_t *parser) {
|
462
|
+
pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
|
463
|
+
const uint8_t *group_start = parser->cursor;
|
464
|
+
|
465
|
+
pm_regexp_options_t options;
|
466
|
+
pm_regexp_options_init(&options);
|
467
|
+
|
422
468
|
// First, parse any options for the group.
|
423
469
|
if (pm_regexp_char_accept(parser, '?')) {
|
424
470
|
if (pm_regexp_char_is_eof(parser)) {
|
471
|
+
pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern in group");
|
425
472
|
return false;
|
426
473
|
}
|
427
|
-
pm_regexp_options_t options;
|
428
|
-
pm_regexp_options_init(&options);
|
429
474
|
|
430
475
|
switch (*parser->cursor) {
|
431
476
|
case '#': { // inline comments
|
477
|
+
parser->cursor++;
|
478
|
+
if (pm_regexp_char_is_eof(parser)) {
|
479
|
+
pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern in group");
|
480
|
+
return false;
|
481
|
+
}
|
482
|
+
|
432
483
|
if (parser->encoding_changed && parser->encoding->multibyte) {
|
433
484
|
bool escaped = false;
|
434
485
|
|
@@ -472,6 +523,7 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
|
|
472
523
|
case '<':
|
473
524
|
parser->cursor++;
|
474
525
|
if (pm_regexp_char_is_eof(parser)) {
|
526
|
+
pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern with unmatched parenthesis");
|
475
527
|
return false;
|
476
528
|
}
|
477
529
|
|
@@ -485,7 +537,15 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
|
|
485
537
|
if (!pm_regexp_char_find(parser, '>')) {
|
486
538
|
return false;
|
487
539
|
}
|
488
|
-
|
540
|
+
|
541
|
+
if (parser->cursor - start == 1) {
|
542
|
+
pm_regexp_parse_error(parser, start, parser->cursor, "group name is empty");
|
543
|
+
}
|
544
|
+
|
545
|
+
if (parser->name_callback != NULL) {
|
546
|
+
pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
|
547
|
+
}
|
548
|
+
|
489
549
|
break;
|
490
550
|
}
|
491
551
|
}
|
@@ -496,7 +556,10 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
|
|
496
556
|
return false;
|
497
557
|
}
|
498
558
|
|
499
|
-
|
559
|
+
if (parser->name_callback != NULL) {
|
560
|
+
pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
|
561
|
+
}
|
562
|
+
|
500
563
|
break;
|
501
564
|
}
|
502
565
|
case '(': // conditional expression
|
@@ -516,6 +579,18 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
|
|
516
579
|
return false;
|
517
580
|
}
|
518
581
|
|
582
|
+
// If we are at the end of the group of options and there is no
|
583
|
+
// subexpression, then we are going to be setting the options
|
584
|
+
// for the parent group. In this case we are safe to return now.
|
585
|
+
if (*parser->cursor == ')') {
|
586
|
+
if (pm_regexp_options_state(&options, 'x') == PM_REGEXP_OPTION_STATE_ADDED) {
|
587
|
+
parser->extended_mode = true;
|
588
|
+
}
|
589
|
+
|
590
|
+
parser->cursor++;
|
591
|
+
return true;
|
592
|
+
}
|
593
|
+
|
519
594
|
// If we hit a -, then we're done parsing options.
|
520
595
|
if (*parser->cursor != '-') break;
|
521
596
|
|
@@ -533,22 +608,57 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
|
|
533
608
|
if (pm_regexp_char_is_eof(parser)) {
|
534
609
|
return false;
|
535
610
|
}
|
611
|
+
|
612
|
+
// If we are at the end of the group of options and there is no
|
613
|
+
// subexpression, then we are going to be setting the options
|
614
|
+
// for the parent group. In this case we are safe to return now.
|
615
|
+
if (*parser->cursor == ')') {
|
616
|
+
switch (pm_regexp_options_state(&options, 'x')) {
|
617
|
+
case PM_REGEXP_OPTION_STATE_ADDED:
|
618
|
+
parser->extended_mode = true;
|
619
|
+
break;
|
620
|
+
case PM_REGEXP_OPTION_STATE_REMOVED:
|
621
|
+
parser->extended_mode = false;
|
622
|
+
break;
|
623
|
+
}
|
624
|
+
|
625
|
+
parser->cursor++;
|
626
|
+
return true;
|
627
|
+
}
|
628
|
+
|
536
629
|
break;
|
537
630
|
default:
|
538
|
-
|
631
|
+
parser->cursor++;
|
632
|
+
pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "undefined group option");
|
633
|
+
break;
|
539
634
|
}
|
540
635
|
}
|
541
636
|
|
637
|
+
bool extended_mode = parser->extended_mode;
|
638
|
+
switch (pm_regexp_options_state(&options, 'x')) {
|
639
|
+
case PM_REGEXP_OPTION_STATE_ADDED:
|
640
|
+
parser->extended_mode = true;
|
641
|
+
break;
|
642
|
+
case PM_REGEXP_OPTION_STATE_REMOVED:
|
643
|
+
parser->extended_mode = false;
|
644
|
+
break;
|
645
|
+
}
|
646
|
+
|
542
647
|
// Now, parse the expressions within this group.
|
543
648
|
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')') {
|
544
|
-
if (!pm_regexp_parse_expression(parser)) {
|
649
|
+
if (!pm_regexp_parse_expression(parser, (uint16_t) (depth + 1))) {
|
650
|
+
parser->extended_mode = extended_mode;
|
545
651
|
return false;
|
546
652
|
}
|
547
653
|
pm_regexp_char_accept(parser, '|');
|
548
654
|
}
|
549
655
|
|
550
656
|
// Finally, make sure we have a closing parenthesis.
|
551
|
-
|
657
|
+
parser->extended_mode = extended_mode;
|
658
|
+
if (pm_regexp_char_expect(parser, ')')) return true;
|
659
|
+
|
660
|
+
pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern with unmatched parenthesis");
|
661
|
+
return false;
|
552
662
|
}
|
553
663
|
|
554
664
|
/**
|
@@ -564,12 +674,12 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
|
|
564
674
|
* ;
|
565
675
|
*/
|
566
676
|
static bool
|
567
|
-
pm_regexp_parse_item(pm_regexp_parser_t *parser) {
|
677
|
+
pm_regexp_parse_item(pm_regexp_parser_t *parser, uint16_t depth) {
|
568
678
|
switch (*parser->cursor) {
|
569
679
|
case '^':
|
570
680
|
case '$':
|
571
681
|
parser->cursor++;
|
572
|
-
return
|
682
|
+
return pm_regexp_parse_quantifier(parser);
|
573
683
|
case '\\':
|
574
684
|
parser->cursor++;
|
575
685
|
if (!pm_regexp_char_is_eof(parser)) {
|
@@ -578,10 +688,26 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser) {
|
|
578
688
|
return pm_regexp_parse_quantifier(parser);
|
579
689
|
case '(':
|
580
690
|
parser->cursor++;
|
581
|
-
return pm_regexp_parse_group(parser) && pm_regexp_parse_quantifier(parser);
|
691
|
+
return pm_regexp_parse_group(parser, depth) && pm_regexp_parse_quantifier(parser);
|
582
692
|
case '[':
|
583
693
|
parser->cursor++;
|
584
|
-
return pm_regexp_parse_lbracket(parser) && pm_regexp_parse_quantifier(parser);
|
694
|
+
return pm_regexp_parse_lbracket(parser, depth) && pm_regexp_parse_quantifier(parser);
|
695
|
+
case '*':
|
696
|
+
case '?':
|
697
|
+
case '+':
|
698
|
+
parser->cursor++;
|
699
|
+
pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "target of repeat operator is not specified");
|
700
|
+
return true;
|
701
|
+
case ')':
|
702
|
+
parser->cursor++;
|
703
|
+
pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "unmatched close parenthesis");
|
704
|
+
return true;
|
705
|
+
case '#':
|
706
|
+
if (parser->extended_mode) {
|
707
|
+
if (!pm_regexp_char_find(parser, '\n')) parser->cursor = parser->end;
|
708
|
+
return true;
|
709
|
+
}
|
710
|
+
/* fallthrough */
|
585
711
|
default: {
|
586
712
|
size_t width;
|
587
713
|
if (!parser->encoding_changed) {
|
@@ -603,13 +729,18 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser) {
|
|
603
729
|
* ;
|
604
730
|
*/
|
605
731
|
static bool
|
606
|
-
pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
|
607
|
-
if (
|
732
|
+
pm_regexp_parse_expression(pm_regexp_parser_t *parser, uint16_t depth) {
|
733
|
+
if (depth >= PM_REGEXP_PARSE_DEPTH_MAX) {
|
734
|
+
pm_regexp_parse_error(parser, parser->start, parser->end, "parse depth limit over");
|
735
|
+
return false;
|
736
|
+
}
|
737
|
+
|
738
|
+
if (!pm_regexp_parse_item(parser, depth)) {
|
608
739
|
return false;
|
609
740
|
}
|
610
741
|
|
611
742
|
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')' && *parser->cursor != '|') {
|
612
|
-
if (!pm_regexp_parse_item(parser)) {
|
743
|
+
if (!pm_regexp_parse_item(parser, depth)) {
|
613
744
|
return false;
|
614
745
|
}
|
615
746
|
}
|
@@ -625,29 +756,31 @@ pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
|
|
625
756
|
*/
|
626
757
|
static bool
|
627
758
|
pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
|
628
|
-
|
629
|
-
(
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
) &&
|
635
|
-
(
|
636
|
-
// Return now if we've parsed the entire pattern.
|
637
|
-
pm_regexp_char_is_eof(parser) ||
|
638
|
-
// Otherwise, we should have a pipe character.
|
639
|
-
(pm_regexp_char_expect(parser, '|') && pm_regexp_parse_pattern(parser))
|
640
|
-
)
|
641
|
-
);
|
759
|
+
do {
|
760
|
+
if (pm_regexp_char_is_eof(parser)) return true;
|
761
|
+
if (!pm_regexp_parse_expression(parser, 0)) return false;
|
762
|
+
} while (pm_regexp_char_accept(parser, '|'));
|
763
|
+
|
764
|
+
return pm_regexp_char_is_eof(parser);
|
642
765
|
}
|
643
766
|
|
644
767
|
/**
|
645
768
|
* Parse a regular expression and extract the names of all of the named capture
|
646
769
|
* groups.
|
647
770
|
*/
|
648
|
-
PRISM_EXPORTED_FUNCTION
|
649
|
-
|
650
|
-
pm_regexp_parser_t
|
651
|
-
|
652
|
-
|
771
|
+
PRISM_EXPORTED_FUNCTION void
|
772
|
+
pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data) {
|
773
|
+
pm_regexp_parse_pattern(&(pm_regexp_parser_t) {
|
774
|
+
.parser = parser,
|
775
|
+
.start = source,
|
776
|
+
.cursor = source,
|
777
|
+
.end = source + size,
|
778
|
+
.extended_mode = extended_mode,
|
779
|
+
.encoding_changed = parser->encoding_changed,
|
780
|
+
.encoding = parser->encoding,
|
781
|
+
.name_callback = name_callback,
|
782
|
+
.name_data = name_data,
|
783
|
+
.error_callback = error_callback,
|
784
|
+
.error_data = error_data
|
785
|
+
});
|
653
786
|
}
|