prism 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -1
  3. data/Makefile +6 -0
  4. data/README.md +1 -1
  5. data/config.yml +50 -35
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/serialization.md +28 -29
  8. data/ext/prism/api_node.c +802 -770
  9. data/ext/prism/api_pack.c +20 -9
  10. data/ext/prism/extension.c +464 -162
  11. data/ext/prism/extension.h +1 -1
  12. data/include/prism/ast.h +3173 -763
  13. data/include/prism/defines.h +32 -9
  14. data/include/prism/diagnostic.h +36 -3
  15. data/include/prism/enc/pm_encoding.h +118 -28
  16. data/include/prism/node.h +38 -13
  17. data/include/prism/options.h +204 -0
  18. data/include/prism/pack.h +44 -33
  19. data/include/prism/parser.h +445 -200
  20. data/include/prism/prettyprint.h +12 -1
  21. data/include/prism/regexp.h +16 -2
  22. data/include/prism/util/pm_buffer.h +94 -16
  23. data/include/prism/util/pm_char.h +162 -48
  24. data/include/prism/util/pm_constant_pool.h +126 -32
  25. data/include/prism/util/pm_list.h +68 -38
  26. data/include/prism/util/pm_memchr.h +18 -3
  27. data/include/prism/util/pm_newline_list.h +70 -27
  28. data/include/prism/util/pm_state_stack.h +25 -7
  29. data/include/prism/util/pm_string.h +115 -27
  30. data/include/prism/util/pm_string_list.h +25 -6
  31. data/include/prism/util/pm_strncasecmp.h +32 -0
  32. data/include/prism/util/pm_strpbrk.h +31 -17
  33. data/include/prism/version.h +27 -2
  34. data/include/prism.h +224 -31
  35. data/lib/prism/compiler.rb +6 -3
  36. data/lib/prism/debug.rb +23 -7
  37. data/lib/prism/dispatcher.rb +33 -18
  38. data/lib/prism/dsl.rb +10 -5
  39. data/lib/prism/ffi.rb +132 -80
  40. data/lib/prism/lex_compat.rb +25 -15
  41. data/lib/prism/mutation_compiler.rb +10 -5
  42. data/lib/prism/node.rb +370 -135
  43. data/lib/prism/node_ext.rb +1 -1
  44. data/lib/prism/node_inspector.rb +1 -1
  45. data/lib/prism/pack.rb +79 -40
  46. data/lib/prism/parse_result/comments.rb +7 -2
  47. data/lib/prism/parse_result/newlines.rb +4 -0
  48. data/lib/prism/parse_result.rb +150 -30
  49. data/lib/prism/pattern.rb +11 -0
  50. data/lib/prism/ripper_compat.rb +28 -10
  51. data/lib/prism/serialize.rb +86 -54
  52. data/lib/prism/visitor.rb +10 -3
  53. data/lib/prism.rb +20 -2
  54. data/prism.gemspec +4 -2
  55. data/rbi/prism.rbi +104 -60
  56. data/rbi/prism_static.rbi +16 -2
  57. data/sig/prism.rbs +72 -43
  58. data/sig/prism_static.rbs +14 -1
  59. data/src/diagnostic.c +56 -53
  60. data/src/enc/pm_big5.c +1 -0
  61. data/src/enc/pm_euc_jp.c +1 -0
  62. data/src/enc/pm_gbk.c +1 -0
  63. data/src/enc/pm_shift_jis.c +1 -0
  64. data/src/enc/pm_tables.c +316 -80
  65. data/src/enc/pm_unicode.c +53 -8
  66. data/src/enc/pm_windows_31j.c +1 -0
  67. data/src/node.c +334 -321
  68. data/src/options.c +170 -0
  69. data/src/prettyprint.c +74 -47
  70. data/src/prism.c +1642 -856
  71. data/src/regexp.c +151 -95
  72. data/src/serialize.c +44 -20
  73. data/src/token_type.c +3 -1
  74. data/src/util/pm_buffer.c +45 -15
  75. data/src/util/pm_char.c +103 -57
  76. data/src/util/pm_constant_pool.c +51 -21
  77. data/src/util/pm_list.c +12 -4
  78. data/src/util/pm_memchr.c +5 -3
  79. data/src/util/pm_newline_list.c +20 -12
  80. data/src/util/pm_state_stack.c +9 -3
  81. data/src/util/pm_string.c +95 -85
  82. data/src/util/pm_string_list.c +14 -15
  83. data/src/util/pm_strncasecmp.c +10 -3
  84. data/src/util/pm_strpbrk.c +25 -19
  85. metadata +5 -3
  86. data/docs/prism.png +0 -0
data/src/regexp.c CHANGED
@@ -1,16 +1,31 @@
1
1
  #include "prism/regexp.h"
2
2
 
3
- // This is the parser that is going to handle parsing regular expressions.
3
+ /**
4
+ * This is the parser that is going to handle parsing regular expressions.
5
+ */
4
6
  typedef struct {
7
+ /** A pointer to the start of the source that we are parsing. */
5
8
  const uint8_t *start;
9
+
10
+ /** A pointer to the current position in the source. */
6
11
  const uint8_t *cursor;
12
+
13
+ /** A pointer to the end of the source that we are parsing. */
7
14
  const uint8_t *end;
15
+
16
+ /** A list of named captures that we've found. */
8
17
  pm_string_list_t *named_captures;
18
+
19
+ /** Whether the encoding has changed from the default. */
9
20
  bool encoding_changed;
21
+
22
+ /** The encoding of the source. */
10
23
  pm_encoding_t *encoding;
11
24
  } pm_regexp_parser_t;
12
25
 
13
- // This initializes a new parser with the given source.
26
+ /**
27
+ * This initializes a new parser with the given source.
28
+ */
14
29
  static void
15
30
  pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
16
31
  *parser = (pm_regexp_parser_t) {
@@ -23,7 +38,9 @@ pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const ui
23
38
  };
24
39
  }
25
40
 
26
- // This appends a new string to the list of named captures.
41
+ /**
42
+ * This appends a new string to the list of named captures.
43
+ */
27
44
  static void
28
45
  pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
29
46
  pm_string_t string;
@@ -32,13 +49,17 @@ pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start,
32
49
  pm_string_free(&string);
33
50
  }
34
51
 
35
- // Returns true if the next character is the end of the source.
52
+ /**
53
+ * Returns true if the next character is the end of the source.
54
+ */
36
55
  static inline bool
37
56
  pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
38
57
  return parser->cursor >= parser->end;
39
58
  }
40
59
 
41
- // Optionally accept a char and consume it if it exists.
60
+ /**
61
+ * Optionally accept a char and consume it if it exists.
62
+ */
42
63
  static inline bool
43
64
  pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
44
65
  if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
@@ -48,7 +69,9 @@ pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
48
69
  return false;
49
70
  }
50
71
 
51
- // Expect a character to be present and consume it.
72
+ /**
73
+ * Expect a character to be present and consume it.
74
+ */
52
75
  static inline bool
53
76
  pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
54
77
  if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
@@ -58,7 +81,9 @@ pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
58
81
  return false;
59
82
  }
60
83
 
61
- // This advances the current token to the next instance of the given character.
84
+ /**
85
+ * This advances the current token to the next instance of the given character.
86
+ */
62
87
  static bool
63
88
  pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
64
89
  if (pm_regexp_char_is_eof(parser)) {
@@ -74,37 +99,39 @@ pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
74
99
  return true;
75
100
  }
76
101
 
77
- // Range quantifiers are a special class of quantifiers that look like
78
- //
79
- // * {digit}
80
- // * {digit,}
81
- // * {digit,digit}
82
- // * {,digit}
83
- //
84
- // Unfortunately, if there are any spaces in between, then this just becomes a
85
- // regular character match expression and we have to backtrack. So when this
86
- // function first starts running, we'll create a "save" point and then attempt
87
- // to parse the quantifier. If it fails, we'll restore the save point and
88
- // return.
89
- //
90
- // The properly track everything, we're going to build a little state machine.
91
- // It looks something like the following:
92
- //
93
- // ┌───────┐ ┌─────────┐ ────────────┐
94
- // ──── lbrace ───> │ start │ ──── digit ───> │ minimum │ │
95
- // └───────┘ └─────────┘ <─── digit ─┘
96
- // │ │ │
97
- // ┌───────┐ │ │ rbrace
98
- // comma <───── comma ┌──── comma ───────┘ │
99
- // └───────┘ V V
100
- // │ ┌─────────┐ ┌─────────┐
101
- // └── digit ──> maximum │ ── rbrace ──> │| final |│
102
- // └─────────┘ └─────────┘
103
- // │ ^
104
- // └─ digit ─┘
105
- //
106
- // Note that by the time we've hit this function, the lbrace has already been
107
- // consumed so we're in the start state.
102
+ /**
103
+ * Range quantifiers are a special class of quantifiers that look like
104
+ *
105
+ * * {digit}
106
+ * * {digit,}
107
+ * * {digit,digit}
108
+ * * {,digit}
109
+ *
110
+ * Unfortunately, if there are any spaces in between, then this just becomes a
111
+ * regular character match expression and we have to backtrack. So when this
112
+ * function first starts running, we'll create a "save" point and then attempt
113
+ * to parse the quantifier. If it fails, we'll restore the save point and
114
+ * return.
115
+ *
116
+ * The properly track everything, we're going to build a little state machine.
117
+ * It looks something like the following:
118
+ *
119
+ * ┌───────┐ ┌─────────┐ ────────────┐
120
+ * ──── lbrace ───> │ start │ ──── digit ───> │ minimum │ │
121
+ * └───────┘ └─────────┘ <─── digit ─┘
122
+ * │ │
123
+ * ┌───────┐ rbrace
124
+ * │ comma │ <───── comma ┌──── comma ───────┘ │
125
+ * └───────┘ V V
126
+ * ┌─────────┐ ┌─────────┐
127
+ * └── digit ──> │ maximum │ ── rbrace ──> │| final |│
128
+ * └─────────┘ └─────────┘
129
+ * │ ^
130
+ * └─ digit ─┘
131
+ *
132
+ * Note that by the time we've hit this function, the lbrace has already been
133
+ * consumed so we're in the start state.
134
+ */
108
135
  static bool
109
136
  pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
110
137
  const uint8_t *savepoint = parser->cursor;
@@ -180,12 +207,14 @@ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
180
207
  return true;
181
208
  }
182
209
 
183
- // quantifier : star-quantifier
184
- // | plus-quantifier
185
- // | optional-quantifier
186
- // | range-quantifier
187
- // | <empty>
188
- // ;
210
+ /**
211
+ * quantifier : star-quantifier
212
+ * | plus-quantifier
213
+ * | optional-quantifier
214
+ * | range-quantifier
215
+ * | <empty>
216
+ * ;
217
+ */
189
218
  static bool
190
219
  pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
191
220
  if (pm_regexp_char_is_eof(parser)) return true;
@@ -205,8 +234,10 @@ pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
205
234
  }
206
235
  }
207
236
 
208
- // match-posix-class : '[' '[' ':' '^'? CHAR+ ':' ']' ']'
209
- // ;
237
+ /**
238
+ * match-posix-class : '[' '[' ':' '^'? CHAR+ ':' ']' ']'
239
+ * ;
240
+ */
210
241
  static bool
211
242
  pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
212
243
  if (!pm_regexp_char_expect(parser, ':')) {
@@ -226,8 +257,10 @@ pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
226
257
  static bool
227
258
  pm_regexp_parse_lbracket(pm_regexp_parser_t *parser);
228
259
 
229
- // match-char-set : '[' '^'? (match-range | match-char)* ']'
230
- // ;
260
+ /**
261
+ * match-char-set : '[' '^'? (match-range | match-char)* ']'
262
+ * ;
263
+ */
231
264
  static bool
232
265
  pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
233
266
  pm_regexp_char_accept(parser, '^');
@@ -251,7 +284,9 @@ pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
251
284
  return pm_regexp_char_expect(parser, ']');
252
285
  }
253
286
 
254
- // A left bracket can either mean a POSIX class or a character set.
287
+ /**
288
+ * A left bracket can either mean a POSIX class or a character set.
289
+ */
255
290
  static bool
256
291
  pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
257
292
  const uint8_t *reset = parser->cursor;
@@ -271,8 +306,10 @@ pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
271
306
  static bool
272
307
  pm_regexp_parse_expression(pm_regexp_parser_t *parser);
273
308
 
274
- // These are the states of the options that are configurable on the regular
275
- // expression (or from within a group).
309
+ /**
310
+ * These are the states of the options that are configurable on the regular
311
+ * expression (or from within a group).
312
+ */
276
313
  typedef enum {
277
314
  PM_REGEXP_OPTION_STATE_INVALID,
278
315
  PM_REGEXP_OPTION_STATE_TOGGLEABLE,
@@ -283,16 +320,22 @@ typedef enum {
283
320
 
284
321
  // These are the options that are configurable on the regular expression (or
285
322
  // from within a group).
323
+
286
324
  #define PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
287
325
  #define PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
288
326
  #define PRISM_REGEXP_OPTION_STATE_SLOTS (PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
289
327
 
290
- // This is the set of options that are configurable on the regular expression.
328
+ /**
329
+ * This is the set of options that are configurable on the regular expression.
330
+ */
291
331
  typedef struct {
332
+ /** The current state of each option. */
292
333
  uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
293
334
  } pm_regexp_options_t;
294
335
 
295
- // Initialize a new set of options to their default values.
336
+ /**
337
+ * Initialize a new set of options to their default values.
338
+ */
296
339
  static void
297
340
  pm_regexp_options_init(pm_regexp_options_t *options) {
298
341
  memset(options, PM_REGEXP_OPTION_STATE_INVALID, sizeof(uint8_t) * PRISM_REGEXP_OPTION_STATE_SLOTS);
@@ -304,8 +347,10 @@ pm_regexp_options_init(pm_regexp_options_t *options) {
304
347
  options->values['u' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
305
348
  }
306
349
 
307
- // Attempt to add the given option to the set of options. Returns true if it was
308
- // added, false if it was already present.
350
+ /**
351
+ * Attempt to add the given option to the set of options. Returns true if it was
352
+ * added, false if it was already present.
353
+ */
309
354
  static bool
310
355
  pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
311
356
  if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
@@ -327,8 +372,10 @@ pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
327
372
  return false;
328
373
  }
329
374
 
330
- // Attempt to remove the given option from the set of options. Returns true if
331
- // it was removed, false if it was already absent.
375
+ /**
376
+ * Attempt to remove the given option from the set of options. Returns true if
377
+ * it was removed, false if it was already absent.
378
+ */
332
379
  static bool
333
380
  pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
334
381
  if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
@@ -349,26 +396,27 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
349
396
  return false;
350
397
  }
351
398
 
352
- // Groups can have quite a few different patterns for syntax. They basically
353
- // just wrap a set of expressions, but they can potentially have options after a
354
- // question mark. If there _isn't_ a question mark, then it's just a set of
355
- // expressions. If there _is_, then here are the options:
356
- //
357
- // * (?#...) - inline comments
358
- // * (?:subexp) - non-capturing group
359
- // * (?=subexp) - positive lookahead
360
- // * (?!subexp) - negative lookahead
361
- // * (?>subexp) - atomic group
362
- // * (?~subexp) - absence operator
363
- // * (?<=subexp) - positive lookbehind
364
- // * (?<!subexp) - negative lookbehind
365
- // * (?<name>subexp) - named capturing group
366
- // * (?'name'subexp) - named capturing group
367
- // * (?(cond)yes-subexp) - conditional expression
368
- // * (?(cond)yes-subexp|no-subexp) - conditional expression
369
- // * (?imxdau-imx) - turn on and off configuration
370
- // * (?imxdau-imx:subexp) - turn on and off configuration for an expression
371
- //
399
+ /**
400
+ * Groups can have quite a few different patterns for syntax. They basically
401
+ * just wrap a set of expressions, but they can potentially have options after a
402
+ * question mark. If there _isn't_ a question mark, then it's just a set of
403
+ * expressions. If there _is_, then here are the options:
404
+ *
405
+ * * (?#...) - inline comments
406
+ * * (?:subexp) - non-capturing group
407
+ * * (?=subexp) - positive lookahead
408
+ * * (?!subexp) - negative lookahead
409
+ * * (?>subexp) - atomic group
410
+ * * (?~subexp) - absence operator
411
+ * * (?<=subexp) - positive lookbehind
412
+ * * (?<!subexp) - negative lookbehind
413
+ * * (?<name>subexp) - named capturing group
414
+ * * (?'name'subexp) - named capturing group
415
+ * * (?(cond)yes-subexp) - conditional expression
416
+ * * (?(cond)yes-subexp|no-subexp) - conditional expression
417
+ * * (?imxdau-imx) - turn on and off configuration
418
+ * * (?imxdau-imx:subexp) - turn on and off configuration for an expression
419
+ */
372
420
  static bool
373
421
  pm_regexp_parse_group(pm_regexp_parser_t *parser) {
374
422
  // First, parse any options for the group.
@@ -503,16 +551,18 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
503
551
  return pm_regexp_char_expect(parser, ')');
504
552
  }
505
553
 
506
- // item : anchor
507
- // | match-posix-class
508
- // | match-char-set
509
- // | match-char-class
510
- // | match-char-prop
511
- // | match-char
512
- // | match-any
513
- // | group
514
- // | quantified
515
- // ;
554
+ /**
555
+ * item : anchor
556
+ * | match-posix-class
557
+ * | match-char-set
558
+ * | match-char-class
559
+ * | match-char-prop
560
+ * | match-char
561
+ * | match-any
562
+ * | group
563
+ * | quantified
564
+ * ;
565
+ */
516
566
  static bool
517
567
  pm_regexp_parse_item(pm_regexp_parser_t *parser) {
518
568
  switch (*parser->cursor++) {
@@ -533,8 +583,10 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser) {
533
583
  }
534
584
  }
535
585
 
536
- // expression : item+
537
- // ;
586
+ /**
587
+ * expression : item+
588
+ * ;
589
+ */
538
590
  static bool
539
591
  pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
540
592
  if (!pm_regexp_parse_item(parser)) {
@@ -550,10 +602,12 @@ pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
550
602
  return true;
551
603
  }
552
604
 
553
- // pattern : EOF
554
- // | expression EOF
555
- // | expression '|' pattern
556
- // ;
605
+ /**
606
+ * pattern : EOF
607
+ * | expression EOF
608
+ * | expression '|' pattern
609
+ * ;
610
+ */
557
611
  static bool
558
612
  pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
559
613
  return (
@@ -572,8 +626,10 @@ pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
572
626
  );
573
627
  }
574
628
 
575
- // Parse a regular expression and extract the names of all of the named capture
576
- // groups.
629
+ /**
630
+ * Parse a regular expression and extract the names of all of the named capture
631
+ * groups.
632
+ */
577
633
  PRISM_EXPORTED_FUNCTION bool
578
634
  pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
579
635
  pm_regexp_parser_t parser;
data/src/serialize.c CHANGED
@@ -54,7 +54,7 @@ pm_serialize_string(pm_parser_t *parser, pm_string_t *string, pm_buffer_t *buffe
54
54
  }
55
55
  }
56
56
 
57
- void
57
+ static void
58
58
  pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
59
59
  pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node));
60
60
 
@@ -1131,16 +1131,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1131
1131
  }
1132
1132
  break;
1133
1133
  }
1134
- case PM_KEYWORD_PARAMETER_NODE: {
1135
- pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_keyword_parameter_node_t *)node)->name));
1136
- pm_serialize_location(parser, &((pm_keyword_parameter_node_t *)node)->name_loc, buffer);
1137
- if (((pm_keyword_parameter_node_t *)node)->value == NULL) {
1138
- pm_buffer_append_byte(buffer, 0);
1139
- } else {
1140
- pm_serialize_node(parser, (pm_node_t *)((pm_keyword_parameter_node_t *)node)->value, buffer);
1141
- }
1142
- break;
1143
- }
1144
1134
  case PM_KEYWORD_REST_PARAMETER_NODE: {
1145
1135
  pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_keyword_rest_parameter_node_t *)node)->name));
1146
1136
  if (((pm_keyword_rest_parameter_node_t *)node)->name_loc.start == NULL) {
@@ -1348,6 +1338,12 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1348
1338
  pm_buffer_append_varint(buffer, ((pm_numbered_reference_read_node_t *)node)->number);
1349
1339
  break;
1350
1340
  }
1341
+ case PM_OPTIONAL_KEYWORD_PARAMETER_NODE: {
1342
+ pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_optional_keyword_parameter_node_t *)node)->name));
1343
+ pm_serialize_location(parser, &((pm_optional_keyword_parameter_node_t *)node)->name_loc, buffer);
1344
+ pm_serialize_node(parser, (pm_node_t *)((pm_optional_keyword_parameter_node_t *)node)->value, buffer);
1345
+ break;
1346
+ }
1351
1347
  case PM_OPTIONAL_PARAMETER_NODE: {
1352
1348
  pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_optional_parameter_node_t *)node)->name));
1353
1349
  pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->name_loc, buffer);
@@ -1482,6 +1478,11 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1482
1478
  pm_buffer_append_varint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1483
1479
  break;
1484
1480
  }
1481
+ case PM_REQUIRED_KEYWORD_PARAMETER_NODE: {
1482
+ pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_required_keyword_parameter_node_t *)node)->name));
1483
+ pm_serialize_location(parser, &((pm_required_keyword_parameter_node_t *)node)->name_loc, buffer);
1484
+ break;
1485
+ }
1485
1486
  case PM_REQUIRED_PARAMETER_NODE: {
1486
1487
  pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_required_parameter_node_t *)node)->name));
1487
1488
  break;
@@ -1785,6 +1786,9 @@ pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *bu
1785
1786
  pm_buffer_append_varint(buffer, pm_ptrdifft_to_u32(comment->end - comment->start));
1786
1787
  }
1787
1788
 
1789
+ /**
1790
+ * Serialize the given list of comments to the given buffer.
1791
+ */
1788
1792
  void
1789
1793
  pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
1790
1794
  pm_buffer_append_varint(buffer, pm_sizet_to_u32(pm_list_size(list)));
@@ -1838,6 +1842,9 @@ pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *
1838
1842
  }
1839
1843
  }
1840
1844
 
1845
+ /**
1846
+ * Serialize the name of the encoding to the buffer.
1847
+ */
1841
1848
  void
1842
1849
  pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
1843
1850
  size_t encoding_length = strlen(encoding->name);
@@ -1845,10 +1852,14 @@ pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
1845
1852
  pm_buffer_append_string(buffer, encoding->name, encoding_length);
1846
1853
  }
1847
1854
 
1848
- #line 200 "serialize.c.erb"
1855
+ #line 206 "serialize.c.erb"
1856
+ /**
1857
+ * Serialize the encoding, metadata, nodes, and constant pool.
1858
+ */
1849
1859
  void
1850
1860
  pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1851
1861
  pm_serialize_encoding(&parser->encoding, buffer);
1862
+ pm_buffer_append_varint(buffer, parser->start_line);
1852
1863
  pm_serialize_comment_list(parser, &parser->comment_list, buffer);
1853
1864
  pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
1854
1865
  pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
@@ -1921,10 +1932,16 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) {
1921
1932
  pm_buffer_append_varint(buffer, parser->lex_state);
1922
1933
  }
1923
1934
 
1935
+ /**
1936
+ * Lex the given source and serialize to the given buffer.
1937
+ */
1924
1938
  PRISM_EXPORTED_FUNCTION void
1925
- pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer) {
1939
+ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
1940
+ pm_options_t options = { 0 };
1941
+ if (data != NULL) pm_options_read(&options, data);
1942
+
1926
1943
  pm_parser_t parser;
1927
- pm_parser_init(&parser, source, size, filepath);
1944
+ pm_parser_init(&parser, source, size, &options);
1928
1945
 
1929
1946
  pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
1930
1947
  .data = (void *) buffer,
@@ -1934,10 +1951,11 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
1934
1951
  parser.lex_callback = &lex_callback;
1935
1952
  pm_node_t *node = pm_parse(&parser);
1936
1953
 
1937
- // Append 0 to mark end of tokens
1954
+ // Append 0 to mark end of tokens.
1938
1955
  pm_buffer_append_byte(buffer, 0);
1939
1956
 
1940
1957
  pm_serialize_encoding(&parser.encoding, buffer);
1958
+ pm_buffer_append_varint(buffer, parser.start_line);
1941
1959
  pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
1942
1960
  pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
1943
1961
  pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
@@ -1945,15 +1963,20 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
1945
1963
 
1946
1964
  pm_node_destroy(&parser, node);
1947
1965
  pm_parser_free(&parser);
1966
+ pm_options_free(&options);
1948
1967
  }
1949
1968
 
1950
- // Parse and serialize both the AST and the tokens represented by the given
1951
- // source to the given buffer.
1969
+ /**
1970
+ * Parse and serialize both the AST and the tokens represented by the given
1971
+ * source to the given buffer.
1972
+ */
1952
1973
  PRISM_EXPORTED_FUNCTION void
1953
- pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata) {
1974
+ pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
1975
+ pm_options_t options = { 0 };
1976
+ if (data != NULL) pm_options_read(&options, data);
1977
+
1954
1978
  pm_parser_t parser;
1955
- pm_parser_init(&parser, source, size, NULL);
1956
- if (metadata) pm_parser_metadata(&parser, metadata);
1979
+ pm_parser_init(&parser, source, size, &options);
1957
1980
 
1958
1981
  pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
1959
1982
  .data = (void *) buffer,
@@ -1968,4 +1991,5 @@ pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer,
1968
1991
 
1969
1992
  pm_node_destroy(&parser, node);
1970
1993
  pm_parser_free(&parser);
1994
+ pm_options_free(&options);
1971
1995
  }
data/src/token_type.c CHANGED
@@ -9,7 +9,9 @@
9
9
 
10
10
  #include "prism/ast.h"
11
11
 
12
- // Returns a string representation of the given token type.
12
+ /**
13
+ * Returns a string representation of the given token type.
14
+ */
13
15
  PRISM_EXPORTED_FUNCTION const char *
14
16
  pm_token_type_to_str(pm_token_type_t token_type)
15
17
  {