prism 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -1
- data/Makefile +6 -0
- data/README.md +1 -1
- data/config.yml +50 -35
- data/docs/fuzzing.md +1 -1
- data/docs/serialization.md +28 -29
- data/ext/prism/api_node.c +802 -770
- data/ext/prism/api_pack.c +20 -9
- data/ext/prism/extension.c +464 -162
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +3173 -763
- data/include/prism/defines.h +32 -9
- data/include/prism/diagnostic.h +36 -3
- data/include/prism/enc/pm_encoding.h +118 -28
- data/include/prism/node.h +38 -13
- data/include/prism/options.h +204 -0
- data/include/prism/pack.h +44 -33
- data/include/prism/parser.h +445 -200
- data/include/prism/prettyprint.h +12 -1
- data/include/prism/regexp.h +16 -2
- data/include/prism/util/pm_buffer.h +94 -16
- data/include/prism/util/pm_char.h +162 -48
- data/include/prism/util/pm_constant_pool.h +126 -32
- data/include/prism/util/pm_list.h +68 -38
- data/include/prism/util/pm_memchr.h +18 -3
- data/include/prism/util/pm_newline_list.h +70 -27
- data/include/prism/util/pm_state_stack.h +25 -7
- data/include/prism/util/pm_string.h +115 -27
- data/include/prism/util/pm_string_list.h +25 -6
- data/include/prism/util/pm_strncasecmp.h +32 -0
- data/include/prism/util/pm_strpbrk.h +31 -17
- data/include/prism/version.h +27 -2
- data/include/prism.h +224 -31
- data/lib/prism/compiler.rb +6 -3
- data/lib/prism/debug.rb +23 -7
- data/lib/prism/dispatcher.rb +33 -18
- data/lib/prism/dsl.rb +10 -5
- data/lib/prism/ffi.rb +132 -80
- data/lib/prism/lex_compat.rb +25 -15
- data/lib/prism/mutation_compiler.rb +10 -5
- data/lib/prism/node.rb +370 -135
- data/lib/prism/node_ext.rb +1 -1
- data/lib/prism/node_inspector.rb +1 -1
- data/lib/prism/pack.rb +79 -40
- data/lib/prism/parse_result/comments.rb +7 -2
- data/lib/prism/parse_result/newlines.rb +4 -0
- data/lib/prism/parse_result.rb +150 -30
- data/lib/prism/pattern.rb +11 -0
- data/lib/prism/ripper_compat.rb +28 -10
- data/lib/prism/serialize.rb +86 -54
- data/lib/prism/visitor.rb +10 -3
- data/lib/prism.rb +20 -2
- data/prism.gemspec +4 -2
- data/rbi/prism.rbi +104 -60
- data/rbi/prism_static.rbi +16 -2
- data/sig/prism.rbs +72 -43
- data/sig/prism_static.rbs +14 -1
- data/src/diagnostic.c +56 -53
- data/src/enc/pm_big5.c +1 -0
- data/src/enc/pm_euc_jp.c +1 -0
- data/src/enc/pm_gbk.c +1 -0
- data/src/enc/pm_shift_jis.c +1 -0
- data/src/enc/pm_tables.c +316 -80
- data/src/enc/pm_unicode.c +53 -8
- data/src/enc/pm_windows_31j.c +1 -0
- data/src/node.c +334 -321
- data/src/options.c +170 -0
- data/src/prettyprint.c +74 -47
- data/src/prism.c +1642 -856
- data/src/regexp.c +151 -95
- data/src/serialize.c +44 -20
- data/src/token_type.c +3 -1
- data/src/util/pm_buffer.c +45 -15
- data/src/util/pm_char.c +103 -57
- data/src/util/pm_constant_pool.c +51 -21
- data/src/util/pm_list.c +12 -4
- data/src/util/pm_memchr.c +5 -3
- data/src/util/pm_newline_list.c +20 -12
- data/src/util/pm_state_stack.c +9 -3
- data/src/util/pm_string.c +95 -85
- data/src/util/pm_string_list.c +14 -15
- data/src/util/pm_strncasecmp.c +10 -3
- data/src/util/pm_strpbrk.c +25 -19
- metadata +5 -3
- data/docs/prism.png +0 -0
data/src/regexp.c
CHANGED
@@ -1,16 +1,31 @@
|
|
1
1
|
#include "prism/regexp.h"
|
2
2
|
|
3
|
-
|
3
|
+
/**
|
4
|
+
* This is the parser that is going to handle parsing regular expressions.
|
5
|
+
*/
|
4
6
|
typedef struct {
|
7
|
+
/** A pointer to the start of the source that we are parsing. */
|
5
8
|
const uint8_t *start;
|
9
|
+
|
10
|
+
/** A pointer to the current position in the source. */
|
6
11
|
const uint8_t *cursor;
|
12
|
+
|
13
|
+
/** A pointer to the end of the source that we are parsing. */
|
7
14
|
const uint8_t *end;
|
15
|
+
|
16
|
+
/** A list of named captures that we've found. */
|
8
17
|
pm_string_list_t *named_captures;
|
18
|
+
|
19
|
+
/** Whether the encoding has changed from the default. */
|
9
20
|
bool encoding_changed;
|
21
|
+
|
22
|
+
/** The encoding of the source. */
|
10
23
|
pm_encoding_t *encoding;
|
11
24
|
} pm_regexp_parser_t;
|
12
25
|
|
13
|
-
|
26
|
+
/**
|
27
|
+
* This initializes a new parser with the given source.
|
28
|
+
*/
|
14
29
|
static void
|
15
30
|
pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
|
16
31
|
*parser = (pm_regexp_parser_t) {
|
@@ -23,7 +38,9 @@ pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const ui
|
|
23
38
|
};
|
24
39
|
}
|
25
40
|
|
26
|
-
|
41
|
+
/**
|
42
|
+
* This appends a new string to the list of named captures.
|
43
|
+
*/
|
27
44
|
static void
|
28
45
|
pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
29
46
|
pm_string_t string;
|
@@ -32,13 +49,17 @@ pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start,
|
|
32
49
|
pm_string_free(&string);
|
33
50
|
}
|
34
51
|
|
35
|
-
|
52
|
+
/**
|
53
|
+
* Returns true if the next character is the end of the source.
|
54
|
+
*/
|
36
55
|
static inline bool
|
37
56
|
pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
|
38
57
|
return parser->cursor >= parser->end;
|
39
58
|
}
|
40
59
|
|
41
|
-
|
60
|
+
/**
|
61
|
+
* Optionally accept a char and consume it if it exists.
|
62
|
+
*/
|
42
63
|
static inline bool
|
43
64
|
pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
|
44
65
|
if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
|
@@ -48,7 +69,9 @@ pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
|
|
48
69
|
return false;
|
49
70
|
}
|
50
71
|
|
51
|
-
|
72
|
+
/**
|
73
|
+
* Expect a character to be present and consume it.
|
74
|
+
*/
|
52
75
|
static inline bool
|
53
76
|
pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
|
54
77
|
if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
|
@@ -58,7 +81,9 @@ pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
|
|
58
81
|
return false;
|
59
82
|
}
|
60
83
|
|
61
|
-
|
84
|
+
/**
|
85
|
+
* This advances the current token to the next instance of the given character.
|
86
|
+
*/
|
62
87
|
static bool
|
63
88
|
pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
|
64
89
|
if (pm_regexp_char_is_eof(parser)) {
|
@@ -74,37 +99,39 @@ pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
|
|
74
99
|
return true;
|
75
100
|
}
|
76
101
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
102
|
+
/**
|
103
|
+
* Range quantifiers are a special class of quantifiers that look like
|
104
|
+
*
|
105
|
+
* * {digit}
|
106
|
+
* * {digit,}
|
107
|
+
* * {digit,digit}
|
108
|
+
* * {,digit}
|
109
|
+
*
|
110
|
+
* Unfortunately, if there are any spaces in between, then this just becomes a
|
111
|
+
* regular character match expression and we have to backtrack. So when this
|
112
|
+
* function first starts running, we'll create a "save" point and then attempt
|
113
|
+
* to parse the quantifier. If it fails, we'll restore the save point and
|
114
|
+
* return.
|
115
|
+
*
|
116
|
+
* The properly track everything, we're going to build a little state machine.
|
117
|
+
* It looks something like the following:
|
118
|
+
*
|
119
|
+
* ┌───────┐ ┌─────────┐ ────────────┐
|
120
|
+
* ──── lbrace ───> │ start │ ──── digit ───> │ minimum │ │
|
121
|
+
* └───────┘ └─────────┘ <─── digit ─┘
|
122
|
+
* │ │ │
|
123
|
+
* ┌───────┐ │ │ rbrace
|
124
|
+
* │ comma │ <───── comma ┌──── comma ───────┘ │
|
125
|
+
* └───────┘ V V
|
126
|
+
* │ ┌─────────┐ ┌─────────┐
|
127
|
+
* └── digit ──> │ maximum │ ── rbrace ──> │| final |│
|
128
|
+
* └─────────┘ └─────────┘
|
129
|
+
* │ ^
|
130
|
+
* └─ digit ─┘
|
131
|
+
*
|
132
|
+
* Note that by the time we've hit this function, the lbrace has already been
|
133
|
+
* consumed so we're in the start state.
|
134
|
+
*/
|
108
135
|
static bool
|
109
136
|
pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
|
110
137
|
const uint8_t *savepoint = parser->cursor;
|
@@ -180,12 +207,14 @@ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
|
|
180
207
|
return true;
|
181
208
|
}
|
182
209
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
210
|
+
/**
|
211
|
+
* quantifier : star-quantifier
|
212
|
+
* | plus-quantifier
|
213
|
+
* | optional-quantifier
|
214
|
+
* | range-quantifier
|
215
|
+
* | <empty>
|
216
|
+
* ;
|
217
|
+
*/
|
189
218
|
static bool
|
190
219
|
pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
|
191
220
|
if (pm_regexp_char_is_eof(parser)) return true;
|
@@ -205,8 +234,10 @@ pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
|
|
205
234
|
}
|
206
235
|
}
|
207
236
|
|
208
|
-
|
209
|
-
|
237
|
+
/**
|
238
|
+
* match-posix-class : '[' '[' ':' '^'? CHAR+ ':' ']' ']'
|
239
|
+
* ;
|
240
|
+
*/
|
210
241
|
static bool
|
211
242
|
pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
|
212
243
|
if (!pm_regexp_char_expect(parser, ':')) {
|
@@ -226,8 +257,10 @@ pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
|
|
226
257
|
static bool
|
227
258
|
pm_regexp_parse_lbracket(pm_regexp_parser_t *parser);
|
228
259
|
|
229
|
-
|
230
|
-
|
260
|
+
/**
|
261
|
+
* match-char-set : '[' '^'? (match-range | match-char)* ']'
|
262
|
+
* ;
|
263
|
+
*/
|
231
264
|
static bool
|
232
265
|
pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
|
233
266
|
pm_regexp_char_accept(parser, '^');
|
@@ -251,7 +284,9 @@ pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
|
|
251
284
|
return pm_regexp_char_expect(parser, ']');
|
252
285
|
}
|
253
286
|
|
254
|
-
|
287
|
+
/**
|
288
|
+
* A left bracket can either mean a POSIX class or a character set.
|
289
|
+
*/
|
255
290
|
static bool
|
256
291
|
pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
|
257
292
|
const uint8_t *reset = parser->cursor;
|
@@ -271,8 +306,10 @@ pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
|
|
271
306
|
static bool
|
272
307
|
pm_regexp_parse_expression(pm_regexp_parser_t *parser);
|
273
308
|
|
274
|
-
|
275
|
-
|
309
|
+
/**
|
310
|
+
* These are the states of the options that are configurable on the regular
|
311
|
+
* expression (or from within a group).
|
312
|
+
*/
|
276
313
|
typedef enum {
|
277
314
|
PM_REGEXP_OPTION_STATE_INVALID,
|
278
315
|
PM_REGEXP_OPTION_STATE_TOGGLEABLE,
|
@@ -283,16 +320,22 @@ typedef enum {
|
|
283
320
|
|
284
321
|
// These are the options that are configurable on the regular expression (or
|
285
322
|
// from within a group).
|
323
|
+
|
286
324
|
#define PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
|
287
325
|
#define PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
|
288
326
|
#define PRISM_REGEXP_OPTION_STATE_SLOTS (PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
|
289
327
|
|
290
|
-
|
328
|
+
/**
|
329
|
+
* This is the set of options that are configurable on the regular expression.
|
330
|
+
*/
|
291
331
|
typedef struct {
|
332
|
+
/** The current state of each option. */
|
292
333
|
uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
|
293
334
|
} pm_regexp_options_t;
|
294
335
|
|
295
|
-
|
336
|
+
/**
|
337
|
+
* Initialize a new set of options to their default values.
|
338
|
+
*/
|
296
339
|
static void
|
297
340
|
pm_regexp_options_init(pm_regexp_options_t *options) {
|
298
341
|
memset(options, PM_REGEXP_OPTION_STATE_INVALID, sizeof(uint8_t) * PRISM_REGEXP_OPTION_STATE_SLOTS);
|
@@ -304,8 +347,10 @@ pm_regexp_options_init(pm_regexp_options_t *options) {
|
|
304
347
|
options->values['u' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
|
305
348
|
}
|
306
349
|
|
307
|
-
|
308
|
-
|
350
|
+
/**
|
351
|
+
* Attempt to add the given option to the set of options. Returns true if it was
|
352
|
+
* added, false if it was already present.
|
353
|
+
*/
|
309
354
|
static bool
|
310
355
|
pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
|
311
356
|
if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
|
@@ -327,8 +372,10 @@ pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
|
|
327
372
|
return false;
|
328
373
|
}
|
329
374
|
|
330
|
-
|
331
|
-
|
375
|
+
/**
|
376
|
+
* Attempt to remove the given option from the set of options. Returns true if
|
377
|
+
* it was removed, false if it was already absent.
|
378
|
+
*/
|
332
379
|
static bool
|
333
380
|
pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
|
334
381
|
if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
|
@@ -349,26 +396,27 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
|
|
349
396
|
return false;
|
350
397
|
}
|
351
398
|
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
399
|
+
/**
|
400
|
+
* Groups can have quite a few different patterns for syntax. They basically
|
401
|
+
* just wrap a set of expressions, but they can potentially have options after a
|
402
|
+
* question mark. If there _isn't_ a question mark, then it's just a set of
|
403
|
+
* expressions. If there _is_, then here are the options:
|
404
|
+
*
|
405
|
+
* * (?#...) - inline comments
|
406
|
+
* * (?:subexp) - non-capturing group
|
407
|
+
* * (?=subexp) - positive lookahead
|
408
|
+
* * (?!subexp) - negative lookahead
|
409
|
+
* * (?>subexp) - atomic group
|
410
|
+
* * (?~subexp) - absence operator
|
411
|
+
* * (?<=subexp) - positive lookbehind
|
412
|
+
* * (?<!subexp) - negative lookbehind
|
413
|
+
* * (?<name>subexp) - named capturing group
|
414
|
+
* * (?'name'subexp) - named capturing group
|
415
|
+
* * (?(cond)yes-subexp) - conditional expression
|
416
|
+
* * (?(cond)yes-subexp|no-subexp) - conditional expression
|
417
|
+
* * (?imxdau-imx) - turn on and off configuration
|
418
|
+
* * (?imxdau-imx:subexp) - turn on and off configuration for an expression
|
419
|
+
*/
|
372
420
|
static bool
|
373
421
|
pm_regexp_parse_group(pm_regexp_parser_t *parser) {
|
374
422
|
// First, parse any options for the group.
|
@@ -503,16 +551,18 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
|
|
503
551
|
return pm_regexp_char_expect(parser, ')');
|
504
552
|
}
|
505
553
|
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
554
|
+
/**
|
555
|
+
* item : anchor
|
556
|
+
* | match-posix-class
|
557
|
+
* | match-char-set
|
558
|
+
* | match-char-class
|
559
|
+
* | match-char-prop
|
560
|
+
* | match-char
|
561
|
+
* | match-any
|
562
|
+
* | group
|
563
|
+
* | quantified
|
564
|
+
* ;
|
565
|
+
*/
|
516
566
|
static bool
|
517
567
|
pm_regexp_parse_item(pm_regexp_parser_t *parser) {
|
518
568
|
switch (*parser->cursor++) {
|
@@ -533,8 +583,10 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser) {
|
|
533
583
|
}
|
534
584
|
}
|
535
585
|
|
536
|
-
|
537
|
-
|
586
|
+
/**
|
587
|
+
* expression : item+
|
588
|
+
* ;
|
589
|
+
*/
|
538
590
|
static bool
|
539
591
|
pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
|
540
592
|
if (!pm_regexp_parse_item(parser)) {
|
@@ -550,10 +602,12 @@ pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
|
|
550
602
|
return true;
|
551
603
|
}
|
552
604
|
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
605
|
+
/**
|
606
|
+
* pattern : EOF
|
607
|
+
* | expression EOF
|
608
|
+
* | expression '|' pattern
|
609
|
+
* ;
|
610
|
+
*/
|
557
611
|
static bool
|
558
612
|
pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
|
559
613
|
return (
|
@@ -572,8 +626,10 @@ pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
|
|
572
626
|
);
|
573
627
|
}
|
574
628
|
|
575
|
-
|
576
|
-
|
629
|
+
/**
|
630
|
+
* Parse a regular expression and extract the names of all of the named capture
|
631
|
+
* groups.
|
632
|
+
*/
|
577
633
|
PRISM_EXPORTED_FUNCTION bool
|
578
634
|
pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
|
579
635
|
pm_regexp_parser_t parser;
|
data/src/serialize.c
CHANGED
@@ -54,7 +54,7 @@ pm_serialize_string(pm_parser_t *parser, pm_string_t *string, pm_buffer_t *buffe
|
|
54
54
|
}
|
55
55
|
}
|
56
56
|
|
57
|
-
void
|
57
|
+
static void
|
58
58
|
pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
59
59
|
pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node));
|
60
60
|
|
@@ -1131,16 +1131,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1131
1131
|
}
|
1132
1132
|
break;
|
1133
1133
|
}
|
1134
|
-
case PM_KEYWORD_PARAMETER_NODE: {
|
1135
|
-
pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_keyword_parameter_node_t *)node)->name));
|
1136
|
-
pm_serialize_location(parser, &((pm_keyword_parameter_node_t *)node)->name_loc, buffer);
|
1137
|
-
if (((pm_keyword_parameter_node_t *)node)->value == NULL) {
|
1138
|
-
pm_buffer_append_byte(buffer, 0);
|
1139
|
-
} else {
|
1140
|
-
pm_serialize_node(parser, (pm_node_t *)((pm_keyword_parameter_node_t *)node)->value, buffer);
|
1141
|
-
}
|
1142
|
-
break;
|
1143
|
-
}
|
1144
1134
|
case PM_KEYWORD_REST_PARAMETER_NODE: {
|
1145
1135
|
pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_keyword_rest_parameter_node_t *)node)->name));
|
1146
1136
|
if (((pm_keyword_rest_parameter_node_t *)node)->name_loc.start == NULL) {
|
@@ -1348,6 +1338,12 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1348
1338
|
pm_buffer_append_varint(buffer, ((pm_numbered_reference_read_node_t *)node)->number);
|
1349
1339
|
break;
|
1350
1340
|
}
|
1341
|
+
case PM_OPTIONAL_KEYWORD_PARAMETER_NODE: {
|
1342
|
+
pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_optional_keyword_parameter_node_t *)node)->name));
|
1343
|
+
pm_serialize_location(parser, &((pm_optional_keyword_parameter_node_t *)node)->name_loc, buffer);
|
1344
|
+
pm_serialize_node(parser, (pm_node_t *)((pm_optional_keyword_parameter_node_t *)node)->value, buffer);
|
1345
|
+
break;
|
1346
|
+
}
|
1351
1347
|
case PM_OPTIONAL_PARAMETER_NODE: {
|
1352
1348
|
pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_optional_parameter_node_t *)node)->name));
|
1353
1349
|
pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->name_loc, buffer);
|
@@ -1482,6 +1478,11 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1482
1478
|
pm_buffer_append_varint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1483
1479
|
break;
|
1484
1480
|
}
|
1481
|
+
case PM_REQUIRED_KEYWORD_PARAMETER_NODE: {
|
1482
|
+
pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_required_keyword_parameter_node_t *)node)->name));
|
1483
|
+
pm_serialize_location(parser, &((pm_required_keyword_parameter_node_t *)node)->name_loc, buffer);
|
1484
|
+
break;
|
1485
|
+
}
|
1485
1486
|
case PM_REQUIRED_PARAMETER_NODE: {
|
1486
1487
|
pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_required_parameter_node_t *)node)->name));
|
1487
1488
|
break;
|
@@ -1785,6 +1786,9 @@ pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *bu
|
|
1785
1786
|
pm_buffer_append_varint(buffer, pm_ptrdifft_to_u32(comment->end - comment->start));
|
1786
1787
|
}
|
1787
1788
|
|
1789
|
+
/**
|
1790
|
+
* Serialize the given list of comments to the given buffer.
|
1791
|
+
*/
|
1788
1792
|
void
|
1789
1793
|
pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
|
1790
1794
|
pm_buffer_append_varint(buffer, pm_sizet_to_u32(pm_list_size(list)));
|
@@ -1838,6 +1842,9 @@ pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *
|
|
1838
1842
|
}
|
1839
1843
|
}
|
1840
1844
|
|
1845
|
+
/**
|
1846
|
+
* Serialize the name of the encoding to the buffer.
|
1847
|
+
*/
|
1841
1848
|
void
|
1842
1849
|
pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
|
1843
1850
|
size_t encoding_length = strlen(encoding->name);
|
@@ -1845,10 +1852,14 @@ pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
|
|
1845
1852
|
pm_buffer_append_string(buffer, encoding->name, encoding_length);
|
1846
1853
|
}
|
1847
1854
|
|
1848
|
-
#line
|
1855
|
+
#line 206 "serialize.c.erb"
|
1856
|
+
/**
|
1857
|
+
* Serialize the encoding, metadata, nodes, and constant pool.
|
1858
|
+
*/
|
1849
1859
|
void
|
1850
1860
|
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
1851
1861
|
pm_serialize_encoding(&parser->encoding, buffer);
|
1862
|
+
pm_buffer_append_varint(buffer, parser->start_line);
|
1852
1863
|
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
|
1853
1864
|
pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
|
1854
1865
|
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
|
@@ -1921,10 +1932,16 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) {
|
|
1921
1932
|
pm_buffer_append_varint(buffer, parser->lex_state);
|
1922
1933
|
}
|
1923
1934
|
|
1935
|
+
/**
|
1936
|
+
* Lex the given source and serialize to the given buffer.
|
1937
|
+
*/
|
1924
1938
|
PRISM_EXPORTED_FUNCTION void
|
1925
|
-
|
1939
|
+
pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
|
1940
|
+
pm_options_t options = { 0 };
|
1941
|
+
if (data != NULL) pm_options_read(&options, data);
|
1942
|
+
|
1926
1943
|
pm_parser_t parser;
|
1927
|
-
pm_parser_init(&parser, source, size,
|
1944
|
+
pm_parser_init(&parser, source, size, &options);
|
1928
1945
|
|
1929
1946
|
pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
|
1930
1947
|
.data = (void *) buffer,
|
@@ -1934,10 +1951,11 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
|
|
1934
1951
|
parser.lex_callback = &lex_callback;
|
1935
1952
|
pm_node_t *node = pm_parse(&parser);
|
1936
1953
|
|
1937
|
-
// Append 0 to mark end of tokens
|
1954
|
+
// Append 0 to mark end of tokens.
|
1938
1955
|
pm_buffer_append_byte(buffer, 0);
|
1939
1956
|
|
1940
1957
|
pm_serialize_encoding(&parser.encoding, buffer);
|
1958
|
+
pm_buffer_append_varint(buffer, parser.start_line);
|
1941
1959
|
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
|
1942
1960
|
pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
|
1943
1961
|
pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
|
@@ -1945,15 +1963,20 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
|
|
1945
1963
|
|
1946
1964
|
pm_node_destroy(&parser, node);
|
1947
1965
|
pm_parser_free(&parser);
|
1966
|
+
pm_options_free(&options);
|
1948
1967
|
}
|
1949
1968
|
|
1950
|
-
|
1951
|
-
|
1969
|
+
/**
|
1970
|
+
* Parse and serialize both the AST and the tokens represented by the given
|
1971
|
+
* source to the given buffer.
|
1972
|
+
*/
|
1952
1973
|
PRISM_EXPORTED_FUNCTION void
|
1953
|
-
|
1974
|
+
pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
|
1975
|
+
pm_options_t options = { 0 };
|
1976
|
+
if (data != NULL) pm_options_read(&options, data);
|
1977
|
+
|
1954
1978
|
pm_parser_t parser;
|
1955
|
-
pm_parser_init(&parser, source, size,
|
1956
|
-
if (metadata) pm_parser_metadata(&parser, metadata);
|
1979
|
+
pm_parser_init(&parser, source, size, &options);
|
1957
1980
|
|
1958
1981
|
pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
|
1959
1982
|
.data = (void *) buffer,
|
@@ -1968,4 +1991,5 @@ pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer,
|
|
1968
1991
|
|
1969
1992
|
pm_node_destroy(&parser, node);
|
1970
1993
|
pm_parser_free(&parser);
|
1994
|
+
pm_options_free(&options);
|
1971
1995
|
}
|
data/src/token_type.c
CHANGED
@@ -9,7 +9,9 @@
|
|
9
9
|
|
10
10
|
#include "prism/ast.h"
|
11
11
|
|
12
|
-
|
12
|
+
/**
|
13
|
+
* Returns a string representation of the given token type.
|
14
|
+
*/
|
13
15
|
PRISM_EXPORTED_FUNCTION const char *
|
14
16
|
pm_token_type_to_str(pm_token_type_t token_type)
|
15
17
|
{
|