prism 0.16.0 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -1
- data/Makefile +6 -0
- data/README.md +1 -1
- data/config.yml +50 -35
- data/docs/fuzzing.md +1 -1
- data/docs/serialization.md +28 -29
- data/ext/prism/api_node.c +802 -770
- data/ext/prism/api_pack.c +20 -9
- data/ext/prism/extension.c +464 -162
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +3173 -763
- data/include/prism/defines.h +32 -9
- data/include/prism/diagnostic.h +36 -3
- data/include/prism/enc/pm_encoding.h +118 -28
- data/include/prism/node.h +38 -13
- data/include/prism/options.h +204 -0
- data/include/prism/pack.h +44 -33
- data/include/prism/parser.h +445 -200
- data/include/prism/prettyprint.h +12 -1
- data/include/prism/regexp.h +16 -2
- data/include/prism/util/pm_buffer.h +94 -16
- data/include/prism/util/pm_char.h +162 -48
- data/include/prism/util/pm_constant_pool.h +126 -32
- data/include/prism/util/pm_list.h +68 -38
- data/include/prism/util/pm_memchr.h +18 -3
- data/include/prism/util/pm_newline_list.h +70 -27
- data/include/prism/util/pm_state_stack.h +25 -7
- data/include/prism/util/pm_string.h +115 -27
- data/include/prism/util/pm_string_list.h +25 -6
- data/include/prism/util/pm_strncasecmp.h +32 -0
- data/include/prism/util/pm_strpbrk.h +31 -17
- data/include/prism/version.h +27 -2
- data/include/prism.h +224 -31
- data/lib/prism/compiler.rb +6 -3
- data/lib/prism/debug.rb +23 -7
- data/lib/prism/dispatcher.rb +33 -18
- data/lib/prism/dsl.rb +10 -5
- data/lib/prism/ffi.rb +132 -80
- data/lib/prism/lex_compat.rb +25 -15
- data/lib/prism/mutation_compiler.rb +10 -5
- data/lib/prism/node.rb +370 -135
- data/lib/prism/node_ext.rb +1 -1
- data/lib/prism/node_inspector.rb +1 -1
- data/lib/prism/pack.rb +79 -40
- data/lib/prism/parse_result/comments.rb +7 -2
- data/lib/prism/parse_result/newlines.rb +4 -0
- data/lib/prism/parse_result.rb +150 -30
- data/lib/prism/pattern.rb +11 -0
- data/lib/prism/ripper_compat.rb +28 -10
- data/lib/prism/serialize.rb +86 -54
- data/lib/prism/visitor.rb +10 -3
- data/lib/prism.rb +20 -2
- data/prism.gemspec +4 -2
- data/rbi/prism.rbi +104 -60
- data/rbi/prism_static.rbi +16 -2
- data/sig/prism.rbs +72 -43
- data/sig/prism_static.rbs +14 -1
- data/src/diagnostic.c +56 -53
- data/src/enc/pm_big5.c +1 -0
- data/src/enc/pm_euc_jp.c +1 -0
- data/src/enc/pm_gbk.c +1 -0
- data/src/enc/pm_shift_jis.c +1 -0
- data/src/enc/pm_tables.c +316 -80
- data/src/enc/pm_unicode.c +53 -8
- data/src/enc/pm_windows_31j.c +1 -0
- data/src/node.c +334 -321
- data/src/options.c +170 -0
- data/src/prettyprint.c +74 -47
- data/src/prism.c +1642 -856
- data/src/regexp.c +151 -95
- data/src/serialize.c +44 -20
- data/src/token_type.c +3 -1
- data/src/util/pm_buffer.c +45 -15
- data/src/util/pm_char.c +103 -57
- data/src/util/pm_constant_pool.c +51 -21
- data/src/util/pm_list.c +12 -4
- data/src/util/pm_memchr.c +5 -3
- data/src/util/pm_newline_list.c +20 -12
- data/src/util/pm_state_stack.c +9 -3
- data/src/util/pm_string.c +95 -85
- data/src/util/pm_string_list.c +14 -15
- data/src/util/pm_strncasecmp.c +10 -3
- data/src/util/pm_strpbrk.c +25 -19
- metadata +5 -3
- data/docs/prism.png +0 -0
data/src/regexp.c
CHANGED
@@ -1,16 +1,31 @@
|
|
1
1
|
#include "prism/regexp.h"
|
2
2
|
|
3
|
-
|
3
|
+
/**
|
4
|
+
* This is the parser that is going to handle parsing regular expressions.
|
5
|
+
*/
|
4
6
|
typedef struct {
|
7
|
+
/** A pointer to the start of the source that we are parsing. */
|
5
8
|
const uint8_t *start;
|
9
|
+
|
10
|
+
/** A pointer to the current position in the source. */
|
6
11
|
const uint8_t *cursor;
|
12
|
+
|
13
|
+
/** A pointer to the end of the source that we are parsing. */
|
7
14
|
const uint8_t *end;
|
15
|
+
|
16
|
+
/** A list of named captures that we've found. */
|
8
17
|
pm_string_list_t *named_captures;
|
18
|
+
|
19
|
+
/** Whether the encoding has changed from the default. */
|
9
20
|
bool encoding_changed;
|
21
|
+
|
22
|
+
/** The encoding of the source. */
|
10
23
|
pm_encoding_t *encoding;
|
11
24
|
} pm_regexp_parser_t;
|
12
25
|
|
13
|
-
|
26
|
+
/**
|
27
|
+
* This initializes a new parser with the given source.
|
28
|
+
*/
|
14
29
|
static void
|
15
30
|
pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
|
16
31
|
*parser = (pm_regexp_parser_t) {
|
@@ -23,7 +38,9 @@ pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const ui
|
|
23
38
|
};
|
24
39
|
}
|
25
40
|
|
26
|
-
|
41
|
+
/**
|
42
|
+
* This appends a new string to the list of named captures.
|
43
|
+
*/
|
27
44
|
static void
|
28
45
|
pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
29
46
|
pm_string_t string;
|
@@ -32,13 +49,17 @@ pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start,
|
|
32
49
|
pm_string_free(&string);
|
33
50
|
}
|
34
51
|
|
35
|
-
|
52
|
+
/**
|
53
|
+
* Returns true if the next character is the end of the source.
|
54
|
+
*/
|
36
55
|
static inline bool
|
37
56
|
pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
|
38
57
|
return parser->cursor >= parser->end;
|
39
58
|
}
|
40
59
|
|
41
|
-
|
60
|
+
/**
|
61
|
+
* Optionally accept a char and consume it if it exists.
|
62
|
+
*/
|
42
63
|
static inline bool
|
43
64
|
pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
|
44
65
|
if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
|
@@ -48,7 +69,9 @@ pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
|
|
48
69
|
return false;
|
49
70
|
}
|
50
71
|
|
51
|
-
|
72
|
+
/**
|
73
|
+
* Expect a character to be present and consume it.
|
74
|
+
*/
|
52
75
|
static inline bool
|
53
76
|
pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
|
54
77
|
if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
|
@@ -58,7 +81,9 @@ pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
|
|
58
81
|
return false;
|
59
82
|
}
|
60
83
|
|
61
|
-
|
84
|
+
/**
|
85
|
+
* This advances the current token to the next instance of the given character.
|
86
|
+
*/
|
62
87
|
static bool
|
63
88
|
pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
|
64
89
|
if (pm_regexp_char_is_eof(parser)) {
|
@@ -74,37 +99,39 @@ pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
|
|
74
99
|
return true;
|
75
100
|
}
|
76
101
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
102
|
+
/**
|
103
|
+
* Range quantifiers are a special class of quantifiers that look like
|
104
|
+
*
|
105
|
+
* * {digit}
|
106
|
+
* * {digit,}
|
107
|
+
* * {digit,digit}
|
108
|
+
* * {,digit}
|
109
|
+
*
|
110
|
+
* Unfortunately, if there are any spaces in between, then this just becomes a
|
111
|
+
* regular character match expression and we have to backtrack. So when this
|
112
|
+
* function first starts running, we'll create a "save" point and then attempt
|
113
|
+
* to parse the quantifier. If it fails, we'll restore the save point and
|
114
|
+
* return.
|
115
|
+
*
|
116
|
+
* The properly track everything, we're going to build a little state machine.
|
117
|
+
* It looks something like the following:
|
118
|
+
*
|
119
|
+
* ┌───────┐ ┌─────────┐ ────────────┐
|
120
|
+
* ──── lbrace ───> │ start │ ──── digit ───> │ minimum │ │
|
121
|
+
* └───────┘ └─────────┘ <─── digit ─┘
|
122
|
+
* │ │ │
|
123
|
+
* ┌───────┐ │ │ rbrace
|
124
|
+
* │ comma │ <───── comma ┌──── comma ───────┘ │
|
125
|
+
* └───────┘ V V
|
126
|
+
* │ ┌─────────┐ ┌─────────┐
|
127
|
+
* └── digit ──> │ maximum │ ── rbrace ──> │| final |│
|
128
|
+
* └─────────┘ └─────────┘
|
129
|
+
* │ ^
|
130
|
+
* └─ digit ─┘
|
131
|
+
*
|
132
|
+
* Note that by the time we've hit this function, the lbrace has already been
|
133
|
+
* consumed so we're in the start state.
|
134
|
+
*/
|
108
135
|
static bool
|
109
136
|
pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
|
110
137
|
const uint8_t *savepoint = parser->cursor;
|
@@ -180,12 +207,14 @@ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
|
|
180
207
|
return true;
|
181
208
|
}
|
182
209
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
210
|
+
/**
|
211
|
+
* quantifier : star-quantifier
|
212
|
+
* | plus-quantifier
|
213
|
+
* | optional-quantifier
|
214
|
+
* | range-quantifier
|
215
|
+
* | <empty>
|
216
|
+
* ;
|
217
|
+
*/
|
189
218
|
static bool
|
190
219
|
pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
|
191
220
|
if (pm_regexp_char_is_eof(parser)) return true;
|
@@ -205,8 +234,10 @@ pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
|
|
205
234
|
}
|
206
235
|
}
|
207
236
|
|
208
|
-
|
209
|
-
|
237
|
+
/**
|
238
|
+
* match-posix-class : '[' '[' ':' '^'? CHAR+ ':' ']' ']'
|
239
|
+
* ;
|
240
|
+
*/
|
210
241
|
static bool
|
211
242
|
pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
|
212
243
|
if (!pm_regexp_char_expect(parser, ':')) {
|
@@ -226,8 +257,10 @@ pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
|
|
226
257
|
static bool
|
227
258
|
pm_regexp_parse_lbracket(pm_regexp_parser_t *parser);
|
228
259
|
|
229
|
-
|
230
|
-
|
260
|
+
/**
|
261
|
+
* match-char-set : '[' '^'? (match-range | match-char)* ']'
|
262
|
+
* ;
|
263
|
+
*/
|
231
264
|
static bool
|
232
265
|
pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
|
233
266
|
pm_regexp_char_accept(parser, '^');
|
@@ -251,7 +284,9 @@ pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
|
|
251
284
|
return pm_regexp_char_expect(parser, ']');
|
252
285
|
}
|
253
286
|
|
254
|
-
|
287
|
+
/**
|
288
|
+
* A left bracket can either mean a POSIX class or a character set.
|
289
|
+
*/
|
255
290
|
static bool
|
256
291
|
pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
|
257
292
|
const uint8_t *reset = parser->cursor;
|
@@ -271,8 +306,10 @@ pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
|
|
271
306
|
static bool
|
272
307
|
pm_regexp_parse_expression(pm_regexp_parser_t *parser);
|
273
308
|
|
274
|
-
|
275
|
-
|
309
|
+
/**
|
310
|
+
* These are the states of the options that are configurable on the regular
|
311
|
+
* expression (or from within a group).
|
312
|
+
*/
|
276
313
|
typedef enum {
|
277
314
|
PM_REGEXP_OPTION_STATE_INVALID,
|
278
315
|
PM_REGEXP_OPTION_STATE_TOGGLEABLE,
|
@@ -283,16 +320,22 @@ typedef enum {
|
|
283
320
|
|
284
321
|
// These are the options that are configurable on the regular expression (or
|
285
322
|
// from within a group).
|
323
|
+
|
286
324
|
#define PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
|
287
325
|
#define PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
|
288
326
|
#define PRISM_REGEXP_OPTION_STATE_SLOTS (PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
|
289
327
|
|
290
|
-
|
328
|
+
/**
|
329
|
+
* This is the set of options that are configurable on the regular expression.
|
330
|
+
*/
|
291
331
|
typedef struct {
|
332
|
+
/** The current state of each option. */
|
292
333
|
uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
|
293
334
|
} pm_regexp_options_t;
|
294
335
|
|
295
|
-
|
336
|
+
/**
|
337
|
+
* Initialize a new set of options to their default values.
|
338
|
+
*/
|
296
339
|
static void
|
297
340
|
pm_regexp_options_init(pm_regexp_options_t *options) {
|
298
341
|
memset(options, PM_REGEXP_OPTION_STATE_INVALID, sizeof(uint8_t) * PRISM_REGEXP_OPTION_STATE_SLOTS);
|
@@ -304,8 +347,10 @@ pm_regexp_options_init(pm_regexp_options_t *options) {
|
|
304
347
|
options->values['u' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
|
305
348
|
}
|
306
349
|
|
307
|
-
|
308
|
-
|
350
|
+
/**
|
351
|
+
* Attempt to add the given option to the set of options. Returns true if it was
|
352
|
+
* added, false if it was already present.
|
353
|
+
*/
|
309
354
|
static bool
|
310
355
|
pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
|
311
356
|
if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
|
@@ -327,8 +372,10 @@ pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
|
|
327
372
|
return false;
|
328
373
|
}
|
329
374
|
|
330
|
-
|
331
|
-
|
375
|
+
/**
|
376
|
+
* Attempt to remove the given option from the set of options. Returns true if
|
377
|
+
* it was removed, false if it was already absent.
|
378
|
+
*/
|
332
379
|
static bool
|
333
380
|
pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
|
334
381
|
if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
|
@@ -349,26 +396,27 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
|
|
349
396
|
return false;
|
350
397
|
}
|
351
398
|
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
399
|
+
/**
|
400
|
+
* Groups can have quite a few different patterns for syntax. They basically
|
401
|
+
* just wrap a set of expressions, but they can potentially have options after a
|
402
|
+
* question mark. If there _isn't_ a question mark, then it's just a set of
|
403
|
+
* expressions. If there _is_, then here are the options:
|
404
|
+
*
|
405
|
+
* * (?#...) - inline comments
|
406
|
+
* * (?:subexp) - non-capturing group
|
407
|
+
* * (?=subexp) - positive lookahead
|
408
|
+
* * (?!subexp) - negative lookahead
|
409
|
+
* * (?>subexp) - atomic group
|
410
|
+
* * (?~subexp) - absence operator
|
411
|
+
* * (?<=subexp) - positive lookbehind
|
412
|
+
* * (?<!subexp) - negative lookbehind
|
413
|
+
* * (?<name>subexp) - named capturing group
|
414
|
+
* * (?'name'subexp) - named capturing group
|
415
|
+
* * (?(cond)yes-subexp) - conditional expression
|
416
|
+
* * (?(cond)yes-subexp|no-subexp) - conditional expression
|
417
|
+
* * (?imxdau-imx) - turn on and off configuration
|
418
|
+
* * (?imxdau-imx:subexp) - turn on and off configuration for an expression
|
419
|
+
*/
|
372
420
|
static bool
|
373
421
|
pm_regexp_parse_group(pm_regexp_parser_t *parser) {
|
374
422
|
// First, parse any options for the group.
|
@@ -503,16 +551,18 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
|
|
503
551
|
return pm_regexp_char_expect(parser, ')');
|
504
552
|
}
|
505
553
|
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
554
|
+
/**
|
555
|
+
* item : anchor
|
556
|
+
* | match-posix-class
|
557
|
+
* | match-char-set
|
558
|
+
* | match-char-class
|
559
|
+
* | match-char-prop
|
560
|
+
* | match-char
|
561
|
+
* | match-any
|
562
|
+
* | group
|
563
|
+
* | quantified
|
564
|
+
* ;
|
565
|
+
*/
|
516
566
|
static bool
|
517
567
|
pm_regexp_parse_item(pm_regexp_parser_t *parser) {
|
518
568
|
switch (*parser->cursor++) {
|
@@ -533,8 +583,10 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser) {
|
|
533
583
|
}
|
534
584
|
}
|
535
585
|
|
536
|
-
|
537
|
-
|
586
|
+
/**
|
587
|
+
* expression : item+
|
588
|
+
* ;
|
589
|
+
*/
|
538
590
|
static bool
|
539
591
|
pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
|
540
592
|
if (!pm_regexp_parse_item(parser)) {
|
@@ -550,10 +602,12 @@ pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
|
|
550
602
|
return true;
|
551
603
|
}
|
552
604
|
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
605
|
+
/**
|
606
|
+
* pattern : EOF
|
607
|
+
* | expression EOF
|
608
|
+
* | expression '|' pattern
|
609
|
+
* ;
|
610
|
+
*/
|
557
611
|
static bool
|
558
612
|
pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
|
559
613
|
return (
|
@@ -572,8 +626,10 @@ pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
|
|
572
626
|
);
|
573
627
|
}
|
574
628
|
|
575
|
-
|
576
|
-
|
629
|
+
/**
|
630
|
+
* Parse a regular expression and extract the names of all of the named capture
|
631
|
+
* groups.
|
632
|
+
*/
|
577
633
|
PRISM_EXPORTED_FUNCTION bool
|
578
634
|
pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
|
579
635
|
pm_regexp_parser_t parser;
|
data/src/serialize.c
CHANGED
@@ -54,7 +54,7 @@ pm_serialize_string(pm_parser_t *parser, pm_string_t *string, pm_buffer_t *buffe
|
|
54
54
|
}
|
55
55
|
}
|
56
56
|
|
57
|
-
void
|
57
|
+
static void
|
58
58
|
pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
59
59
|
pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node));
|
60
60
|
|
@@ -1131,16 +1131,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1131
1131
|
}
|
1132
1132
|
break;
|
1133
1133
|
}
|
1134
|
-
case PM_KEYWORD_PARAMETER_NODE: {
|
1135
|
-
pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_keyword_parameter_node_t *)node)->name));
|
1136
|
-
pm_serialize_location(parser, &((pm_keyword_parameter_node_t *)node)->name_loc, buffer);
|
1137
|
-
if (((pm_keyword_parameter_node_t *)node)->value == NULL) {
|
1138
|
-
pm_buffer_append_byte(buffer, 0);
|
1139
|
-
} else {
|
1140
|
-
pm_serialize_node(parser, (pm_node_t *)((pm_keyword_parameter_node_t *)node)->value, buffer);
|
1141
|
-
}
|
1142
|
-
break;
|
1143
|
-
}
|
1144
1134
|
case PM_KEYWORD_REST_PARAMETER_NODE: {
|
1145
1135
|
pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_keyword_rest_parameter_node_t *)node)->name));
|
1146
1136
|
if (((pm_keyword_rest_parameter_node_t *)node)->name_loc.start == NULL) {
|
@@ -1348,6 +1338,12 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1348
1338
|
pm_buffer_append_varint(buffer, ((pm_numbered_reference_read_node_t *)node)->number);
|
1349
1339
|
break;
|
1350
1340
|
}
|
1341
|
+
case PM_OPTIONAL_KEYWORD_PARAMETER_NODE: {
|
1342
|
+
pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_optional_keyword_parameter_node_t *)node)->name));
|
1343
|
+
pm_serialize_location(parser, &((pm_optional_keyword_parameter_node_t *)node)->name_loc, buffer);
|
1344
|
+
pm_serialize_node(parser, (pm_node_t *)((pm_optional_keyword_parameter_node_t *)node)->value, buffer);
|
1345
|
+
break;
|
1346
|
+
}
|
1351
1347
|
case PM_OPTIONAL_PARAMETER_NODE: {
|
1352
1348
|
pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_optional_parameter_node_t *)node)->name));
|
1353
1349
|
pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->name_loc, buffer);
|
@@ -1482,6 +1478,11 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1482
1478
|
pm_buffer_append_varint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
|
1483
1479
|
break;
|
1484
1480
|
}
|
1481
|
+
case PM_REQUIRED_KEYWORD_PARAMETER_NODE: {
|
1482
|
+
pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_required_keyword_parameter_node_t *)node)->name));
|
1483
|
+
pm_serialize_location(parser, &((pm_required_keyword_parameter_node_t *)node)->name_loc, buffer);
|
1484
|
+
break;
|
1485
|
+
}
|
1485
1486
|
case PM_REQUIRED_PARAMETER_NODE: {
|
1486
1487
|
pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_required_parameter_node_t *)node)->name));
|
1487
1488
|
break;
|
@@ -1785,6 +1786,9 @@ pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *bu
|
|
1785
1786
|
pm_buffer_append_varint(buffer, pm_ptrdifft_to_u32(comment->end - comment->start));
|
1786
1787
|
}
|
1787
1788
|
|
1789
|
+
/**
|
1790
|
+
* Serialize the given list of comments to the given buffer.
|
1791
|
+
*/
|
1788
1792
|
void
|
1789
1793
|
pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
|
1790
1794
|
pm_buffer_append_varint(buffer, pm_sizet_to_u32(pm_list_size(list)));
|
@@ -1838,6 +1842,9 @@ pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *
|
|
1838
1842
|
}
|
1839
1843
|
}
|
1840
1844
|
|
1845
|
+
/**
|
1846
|
+
* Serialize the name of the encoding to the buffer.
|
1847
|
+
*/
|
1841
1848
|
void
|
1842
1849
|
pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
|
1843
1850
|
size_t encoding_length = strlen(encoding->name);
|
@@ -1845,10 +1852,14 @@ pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
|
|
1845
1852
|
pm_buffer_append_string(buffer, encoding->name, encoding_length);
|
1846
1853
|
}
|
1847
1854
|
|
1848
|
-
#line
|
1855
|
+
#line 206 "serialize.c.erb"
|
1856
|
+
/**
|
1857
|
+
* Serialize the encoding, metadata, nodes, and constant pool.
|
1858
|
+
*/
|
1849
1859
|
void
|
1850
1860
|
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
1851
1861
|
pm_serialize_encoding(&parser->encoding, buffer);
|
1862
|
+
pm_buffer_append_varint(buffer, parser->start_line);
|
1852
1863
|
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
|
1853
1864
|
pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
|
1854
1865
|
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
|
@@ -1921,10 +1932,16 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) {
|
|
1921
1932
|
pm_buffer_append_varint(buffer, parser->lex_state);
|
1922
1933
|
}
|
1923
1934
|
|
1935
|
+
/**
|
1936
|
+
* Lex the given source and serialize to the given buffer.
|
1937
|
+
*/
|
1924
1938
|
PRISM_EXPORTED_FUNCTION void
|
1925
|
-
|
1939
|
+
pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
|
1940
|
+
pm_options_t options = { 0 };
|
1941
|
+
if (data != NULL) pm_options_read(&options, data);
|
1942
|
+
|
1926
1943
|
pm_parser_t parser;
|
1927
|
-
pm_parser_init(&parser, source, size,
|
1944
|
+
pm_parser_init(&parser, source, size, &options);
|
1928
1945
|
|
1929
1946
|
pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
|
1930
1947
|
.data = (void *) buffer,
|
@@ -1934,10 +1951,11 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
|
|
1934
1951
|
parser.lex_callback = &lex_callback;
|
1935
1952
|
pm_node_t *node = pm_parse(&parser);
|
1936
1953
|
|
1937
|
-
// Append 0 to mark end of tokens
|
1954
|
+
// Append 0 to mark end of tokens.
|
1938
1955
|
pm_buffer_append_byte(buffer, 0);
|
1939
1956
|
|
1940
1957
|
pm_serialize_encoding(&parser.encoding, buffer);
|
1958
|
+
pm_buffer_append_varint(buffer, parser.start_line);
|
1941
1959
|
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
|
1942
1960
|
pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
|
1943
1961
|
pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
|
@@ -1945,15 +1963,20 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
|
|
1945
1963
|
|
1946
1964
|
pm_node_destroy(&parser, node);
|
1947
1965
|
pm_parser_free(&parser);
|
1966
|
+
pm_options_free(&options);
|
1948
1967
|
}
|
1949
1968
|
|
1950
|
-
|
1951
|
-
|
1969
|
+
/**
|
1970
|
+
* Parse and serialize both the AST and the tokens represented by the given
|
1971
|
+
* source to the given buffer.
|
1972
|
+
*/
|
1952
1973
|
PRISM_EXPORTED_FUNCTION void
|
1953
|
-
|
1974
|
+
pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
|
1975
|
+
pm_options_t options = { 0 };
|
1976
|
+
if (data != NULL) pm_options_read(&options, data);
|
1977
|
+
|
1954
1978
|
pm_parser_t parser;
|
1955
|
-
pm_parser_init(&parser, source, size,
|
1956
|
-
if (metadata) pm_parser_metadata(&parser, metadata);
|
1979
|
+
pm_parser_init(&parser, source, size, &options);
|
1957
1980
|
|
1958
1981
|
pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
|
1959
1982
|
.data = (void *) buffer,
|
@@ -1968,4 +1991,5 @@ pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer,
|
|
1968
1991
|
|
1969
1992
|
pm_node_destroy(&parser, node);
|
1970
1993
|
pm_parser_free(&parser);
|
1994
|
+
pm_options_free(&options);
|
1971
1995
|
}
|
data/src/token_type.c
CHANGED
@@ -9,7 +9,9 @@
|
|
9
9
|
|
10
10
|
#include "prism/ast.h"
|
11
11
|
|
12
|
-
|
12
|
+
/**
|
13
|
+
* Returns a string representation of the given token type.
|
14
|
+
*/
|
13
15
|
PRISM_EXPORTED_FUNCTION const char *
|
14
16
|
pm_token_type_to_str(pm_token_type_t token_type)
|
15
17
|
{
|