prism 0.16.0 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -1
  3. data/Makefile +6 -0
  4. data/README.md +1 -1
  5. data/config.yml +50 -35
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/serialization.md +28 -29
  8. data/ext/prism/api_node.c +802 -770
  9. data/ext/prism/api_pack.c +20 -9
  10. data/ext/prism/extension.c +464 -162
  11. data/ext/prism/extension.h +1 -1
  12. data/include/prism/ast.h +3173 -763
  13. data/include/prism/defines.h +32 -9
  14. data/include/prism/diagnostic.h +36 -3
  15. data/include/prism/enc/pm_encoding.h +118 -28
  16. data/include/prism/node.h +38 -13
  17. data/include/prism/options.h +204 -0
  18. data/include/prism/pack.h +44 -33
  19. data/include/prism/parser.h +445 -200
  20. data/include/prism/prettyprint.h +12 -1
  21. data/include/prism/regexp.h +16 -2
  22. data/include/prism/util/pm_buffer.h +94 -16
  23. data/include/prism/util/pm_char.h +162 -48
  24. data/include/prism/util/pm_constant_pool.h +126 -32
  25. data/include/prism/util/pm_list.h +68 -38
  26. data/include/prism/util/pm_memchr.h +18 -3
  27. data/include/prism/util/pm_newline_list.h +70 -27
  28. data/include/prism/util/pm_state_stack.h +25 -7
  29. data/include/prism/util/pm_string.h +115 -27
  30. data/include/prism/util/pm_string_list.h +25 -6
  31. data/include/prism/util/pm_strncasecmp.h +32 -0
  32. data/include/prism/util/pm_strpbrk.h +31 -17
  33. data/include/prism/version.h +27 -2
  34. data/include/prism.h +224 -31
  35. data/lib/prism/compiler.rb +6 -3
  36. data/lib/prism/debug.rb +23 -7
  37. data/lib/prism/dispatcher.rb +33 -18
  38. data/lib/prism/dsl.rb +10 -5
  39. data/lib/prism/ffi.rb +132 -80
  40. data/lib/prism/lex_compat.rb +25 -15
  41. data/lib/prism/mutation_compiler.rb +10 -5
  42. data/lib/prism/node.rb +370 -135
  43. data/lib/prism/node_ext.rb +1 -1
  44. data/lib/prism/node_inspector.rb +1 -1
  45. data/lib/prism/pack.rb +79 -40
  46. data/lib/prism/parse_result/comments.rb +7 -2
  47. data/lib/prism/parse_result/newlines.rb +4 -0
  48. data/lib/prism/parse_result.rb +150 -30
  49. data/lib/prism/pattern.rb +11 -0
  50. data/lib/prism/ripper_compat.rb +28 -10
  51. data/lib/prism/serialize.rb +86 -54
  52. data/lib/prism/visitor.rb +10 -3
  53. data/lib/prism.rb +20 -2
  54. data/prism.gemspec +4 -2
  55. data/rbi/prism.rbi +104 -60
  56. data/rbi/prism_static.rbi +16 -2
  57. data/sig/prism.rbs +72 -43
  58. data/sig/prism_static.rbs +14 -1
  59. data/src/diagnostic.c +56 -53
  60. data/src/enc/pm_big5.c +1 -0
  61. data/src/enc/pm_euc_jp.c +1 -0
  62. data/src/enc/pm_gbk.c +1 -0
  63. data/src/enc/pm_shift_jis.c +1 -0
  64. data/src/enc/pm_tables.c +316 -80
  65. data/src/enc/pm_unicode.c +53 -8
  66. data/src/enc/pm_windows_31j.c +1 -0
  67. data/src/node.c +334 -321
  68. data/src/options.c +170 -0
  69. data/src/prettyprint.c +74 -47
  70. data/src/prism.c +1642 -856
  71. data/src/regexp.c +151 -95
  72. data/src/serialize.c +44 -20
  73. data/src/token_type.c +3 -1
  74. data/src/util/pm_buffer.c +45 -15
  75. data/src/util/pm_char.c +103 -57
  76. data/src/util/pm_constant_pool.c +51 -21
  77. data/src/util/pm_list.c +12 -4
  78. data/src/util/pm_memchr.c +5 -3
  79. data/src/util/pm_newline_list.c +20 -12
  80. data/src/util/pm_state_stack.c +9 -3
  81. data/src/util/pm_string.c +95 -85
  82. data/src/util/pm_string_list.c +14 -15
  83. data/src/util/pm_strncasecmp.c +10 -3
  84. data/src/util/pm_strpbrk.c +25 -19
  85. metadata +5 -3
  86. data/docs/prism.png +0 -0
data/src/regexp.c CHANGED
@@ -1,16 +1,31 @@
1
1
  #include "prism/regexp.h"
2
2
 
3
- // This is the parser that is going to handle parsing regular expressions.
3
+ /**
4
+ * This is the parser that is going to handle parsing regular expressions.
5
+ */
4
6
  typedef struct {
7
+ /** A pointer to the start of the source that we are parsing. */
5
8
  const uint8_t *start;
9
+
10
+ /** A pointer to the current position in the source. */
6
11
  const uint8_t *cursor;
12
+
13
+ /** A pointer to the end of the source that we are parsing. */
7
14
  const uint8_t *end;
15
+
16
+ /** A list of named captures that we've found. */
8
17
  pm_string_list_t *named_captures;
18
+
19
+ /** Whether the encoding has changed from the default. */
9
20
  bool encoding_changed;
21
+
22
+ /** The encoding of the source. */
10
23
  pm_encoding_t *encoding;
11
24
  } pm_regexp_parser_t;
12
25
 
13
- // This initializes a new parser with the given source.
26
+ /**
27
+ * This initializes a new parser with the given source.
28
+ */
14
29
  static void
15
30
  pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
16
31
  *parser = (pm_regexp_parser_t) {
@@ -23,7 +38,9 @@ pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const ui
23
38
  };
24
39
  }
25
40
 
26
- // This appends a new string to the list of named captures.
41
+ /**
42
+ * This appends a new string to the list of named captures.
43
+ */
27
44
  static void
28
45
  pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
29
46
  pm_string_t string;
@@ -32,13 +49,17 @@ pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start,
32
49
  pm_string_free(&string);
33
50
  }
34
51
 
35
- // Returns true if the next character is the end of the source.
52
+ /**
53
+ * Returns true if the next character is the end of the source.
54
+ */
36
55
  static inline bool
37
56
  pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
38
57
  return parser->cursor >= parser->end;
39
58
  }
40
59
 
41
- // Optionally accept a char and consume it if it exists.
60
+ /**
61
+ * Optionally accept a char and consume it if it exists.
62
+ */
42
63
  static inline bool
43
64
  pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
44
65
  if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
@@ -48,7 +69,9 @@ pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
48
69
  return false;
49
70
  }
50
71
 
51
- // Expect a character to be present and consume it.
72
+ /**
73
+ * Expect a character to be present and consume it.
74
+ */
52
75
  static inline bool
53
76
  pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
54
77
  if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
@@ -58,7 +81,9 @@ pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
58
81
  return false;
59
82
  }
60
83
 
61
- // This advances the current token to the next instance of the given character.
84
+ /**
85
+ * This advances the current token to the next instance of the given character.
86
+ */
62
87
  static bool
63
88
  pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
64
89
  if (pm_regexp_char_is_eof(parser)) {
@@ -74,37 +99,39 @@ pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
74
99
  return true;
75
100
  }
76
101
 
77
- // Range quantifiers are a special class of quantifiers that look like
78
- //
79
- // * {digit}
80
- // * {digit,}
81
- // * {digit,digit}
82
- // * {,digit}
83
- //
84
- // Unfortunately, if there are any spaces in between, then this just becomes a
85
- // regular character match expression and we have to backtrack. So when this
86
- // function first starts running, we'll create a "save" point and then attempt
87
- // to parse the quantifier. If it fails, we'll restore the save point and
88
- // return.
89
- //
90
- // The properly track everything, we're going to build a little state machine.
91
- // It looks something like the following:
92
- //
93
- // ┌───────┐ ┌─────────┐ ────────────┐
94
- // ──── lbrace ───> │ start │ ──── digit ───> │ minimum │ │
95
- // └───────┘ └─────────┘ <─── digit ─┘
96
- // │ │ │
97
- // ┌───────┐ │ │ rbrace
98
- // comma <───── comma ┌──── comma ───────┘ │
99
- // └───────┘ V V
100
- // │ ┌─────────┐ ┌─────────┐
101
- // └── digit ──> maximum │ ── rbrace ──> │| final |│
102
- // └─────────┘ └─────────┘
103
- // │ ^
104
- // └─ digit ─┘
105
- //
106
- // Note that by the time we've hit this function, the lbrace has already been
107
- // consumed so we're in the start state.
102
+ /**
103
+ * Range quantifiers are a special class of quantifiers that look like
104
+ *
105
+ * * {digit}
106
+ * * {digit,}
107
+ * * {digit,digit}
108
+ * * {,digit}
109
+ *
110
+ * Unfortunately, if there are any spaces in between, then this just becomes a
111
+ * regular character match expression and we have to backtrack. So when this
112
+ * function first starts running, we'll create a "save" point and then attempt
113
+ * to parse the quantifier. If it fails, we'll restore the save point and
114
+ * return.
115
+ *
116
+ * The properly track everything, we're going to build a little state machine.
117
+ * It looks something like the following:
118
+ *
119
+ * ┌───────┐ ┌─────────┐ ────────────┐
120
+ * ──── lbrace ───> │ start │ ──── digit ───> │ minimum │ │
121
+ * └───────┘ └─────────┘ <─── digit ─┘
122
+ * │ │
123
+ * ┌───────┐ rbrace
124
+ * │ comma │ <───── comma ┌──── comma ───────┘ │
125
+ * └───────┘ V V
126
+ * ┌─────────┐ ┌─────────┐
127
+ * └── digit ──> │ maximum │ ── rbrace ──> │| final |│
128
+ * └─────────┘ └─────────┘
129
+ * │ ^
130
+ * └─ digit ─┘
131
+ *
132
+ * Note that by the time we've hit this function, the lbrace has already been
133
+ * consumed so we're in the start state.
134
+ */
108
135
  static bool
109
136
  pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
110
137
  const uint8_t *savepoint = parser->cursor;
@@ -180,12 +207,14 @@ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
180
207
  return true;
181
208
  }
182
209
 
183
- // quantifier : star-quantifier
184
- // | plus-quantifier
185
- // | optional-quantifier
186
- // | range-quantifier
187
- // | <empty>
188
- // ;
210
+ /**
211
+ * quantifier : star-quantifier
212
+ * | plus-quantifier
213
+ * | optional-quantifier
214
+ * | range-quantifier
215
+ * | <empty>
216
+ * ;
217
+ */
189
218
  static bool
190
219
  pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
191
220
  if (pm_regexp_char_is_eof(parser)) return true;
@@ -205,8 +234,10 @@ pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
205
234
  }
206
235
  }
207
236
 
208
- // match-posix-class : '[' '[' ':' '^'? CHAR+ ':' ']' ']'
209
- // ;
237
+ /**
238
+ * match-posix-class : '[' '[' ':' '^'? CHAR+ ':' ']' ']'
239
+ * ;
240
+ */
210
241
  static bool
211
242
  pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
212
243
  if (!pm_regexp_char_expect(parser, ':')) {
@@ -226,8 +257,10 @@ pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
226
257
  static bool
227
258
  pm_regexp_parse_lbracket(pm_regexp_parser_t *parser);
228
259
 
229
- // match-char-set : '[' '^'? (match-range | match-char)* ']'
230
- // ;
260
+ /**
261
+ * match-char-set : '[' '^'? (match-range | match-char)* ']'
262
+ * ;
263
+ */
231
264
  static bool
232
265
  pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
233
266
  pm_regexp_char_accept(parser, '^');
@@ -251,7 +284,9 @@ pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
251
284
  return pm_regexp_char_expect(parser, ']');
252
285
  }
253
286
 
254
- // A left bracket can either mean a POSIX class or a character set.
287
+ /**
288
+ * A left bracket can either mean a POSIX class or a character set.
289
+ */
255
290
  static bool
256
291
  pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
257
292
  const uint8_t *reset = parser->cursor;
@@ -271,8 +306,10 @@ pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
271
306
  static bool
272
307
  pm_regexp_parse_expression(pm_regexp_parser_t *parser);
273
308
 
274
- // These are the states of the options that are configurable on the regular
275
- // expression (or from within a group).
309
+ /**
310
+ * These are the states of the options that are configurable on the regular
311
+ * expression (or from within a group).
312
+ */
276
313
  typedef enum {
277
314
  PM_REGEXP_OPTION_STATE_INVALID,
278
315
  PM_REGEXP_OPTION_STATE_TOGGLEABLE,
@@ -283,16 +320,22 @@ typedef enum {
283
320
 
284
321
  // These are the options that are configurable on the regular expression (or
285
322
  // from within a group).
323
+
286
324
  #define PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
287
325
  #define PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
288
326
  #define PRISM_REGEXP_OPTION_STATE_SLOTS (PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
289
327
 
290
- // This is the set of options that are configurable on the regular expression.
328
+ /**
329
+ * This is the set of options that are configurable on the regular expression.
330
+ */
291
331
  typedef struct {
332
+ /** The current state of each option. */
292
333
  uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
293
334
  } pm_regexp_options_t;
294
335
 
295
- // Initialize a new set of options to their default values.
336
+ /**
337
+ * Initialize a new set of options to their default values.
338
+ */
296
339
  static void
297
340
  pm_regexp_options_init(pm_regexp_options_t *options) {
298
341
  memset(options, PM_REGEXP_OPTION_STATE_INVALID, sizeof(uint8_t) * PRISM_REGEXP_OPTION_STATE_SLOTS);
@@ -304,8 +347,10 @@ pm_regexp_options_init(pm_regexp_options_t *options) {
304
347
  options->values['u' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
305
348
  }
306
349
 
307
- // Attempt to add the given option to the set of options. Returns true if it was
308
- // added, false if it was already present.
350
+ /**
351
+ * Attempt to add the given option to the set of options. Returns true if it was
352
+ * added, false if it was already present.
353
+ */
309
354
  static bool
310
355
  pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
311
356
  if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
@@ -327,8 +372,10 @@ pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
327
372
  return false;
328
373
  }
329
374
 
330
- // Attempt to remove the given option from the set of options. Returns true if
331
- // it was removed, false if it was already absent.
375
+ /**
376
+ * Attempt to remove the given option from the set of options. Returns true if
377
+ * it was removed, false if it was already absent.
378
+ */
332
379
  static bool
333
380
  pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
334
381
  if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
@@ -349,26 +396,27 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
349
396
  return false;
350
397
  }
351
398
 
352
- // Groups can have quite a few different patterns for syntax. They basically
353
- // just wrap a set of expressions, but they can potentially have options after a
354
- // question mark. If there _isn't_ a question mark, then it's just a set of
355
- // expressions. If there _is_, then here are the options:
356
- //
357
- // * (?#...) - inline comments
358
- // * (?:subexp) - non-capturing group
359
- // * (?=subexp) - positive lookahead
360
- // * (?!subexp) - negative lookahead
361
- // * (?>subexp) - atomic group
362
- // * (?~subexp) - absence operator
363
- // * (?<=subexp) - positive lookbehind
364
- // * (?<!subexp) - negative lookbehind
365
- // * (?<name>subexp) - named capturing group
366
- // * (?'name'subexp) - named capturing group
367
- // * (?(cond)yes-subexp) - conditional expression
368
- // * (?(cond)yes-subexp|no-subexp) - conditional expression
369
- // * (?imxdau-imx) - turn on and off configuration
370
- // * (?imxdau-imx:subexp) - turn on and off configuration for an expression
371
- //
399
+ /**
400
+ * Groups can have quite a few different patterns for syntax. They basically
401
+ * just wrap a set of expressions, but they can potentially have options after a
402
+ * question mark. If there _isn't_ a question mark, then it's just a set of
403
+ * expressions. If there _is_, then here are the options:
404
+ *
405
+ * * (?#...) - inline comments
406
+ * * (?:subexp) - non-capturing group
407
+ * * (?=subexp) - positive lookahead
408
+ * * (?!subexp) - negative lookahead
409
+ * * (?>subexp) - atomic group
410
+ * * (?~subexp) - absence operator
411
+ * * (?<=subexp) - positive lookbehind
412
+ * * (?<!subexp) - negative lookbehind
413
+ * * (?<name>subexp) - named capturing group
414
+ * * (?'name'subexp) - named capturing group
415
+ * * (?(cond)yes-subexp) - conditional expression
416
+ * * (?(cond)yes-subexp|no-subexp) - conditional expression
417
+ * * (?imxdau-imx) - turn on and off configuration
418
+ * * (?imxdau-imx:subexp) - turn on and off configuration for an expression
419
+ */
372
420
  static bool
373
421
  pm_regexp_parse_group(pm_regexp_parser_t *parser) {
374
422
  // First, parse any options for the group.
@@ -503,16 +551,18 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
503
551
  return pm_regexp_char_expect(parser, ')');
504
552
  }
505
553
 
506
- // item : anchor
507
- // | match-posix-class
508
- // | match-char-set
509
- // | match-char-class
510
- // | match-char-prop
511
- // | match-char
512
- // | match-any
513
- // | group
514
- // | quantified
515
- // ;
554
+ /**
555
+ * item : anchor
556
+ * | match-posix-class
557
+ * | match-char-set
558
+ * | match-char-class
559
+ * | match-char-prop
560
+ * | match-char
561
+ * | match-any
562
+ * | group
563
+ * | quantified
564
+ * ;
565
+ */
516
566
  static bool
517
567
  pm_regexp_parse_item(pm_regexp_parser_t *parser) {
518
568
  switch (*parser->cursor++) {
@@ -533,8 +583,10 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser) {
533
583
  }
534
584
  }
535
585
 
536
- // expression : item+
537
- // ;
586
+ /**
587
+ * expression : item+
588
+ * ;
589
+ */
538
590
  static bool
539
591
  pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
540
592
  if (!pm_regexp_parse_item(parser)) {
@@ -550,10 +602,12 @@ pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
550
602
  return true;
551
603
  }
552
604
 
553
- // pattern : EOF
554
- // | expression EOF
555
- // | expression '|' pattern
556
- // ;
605
+ /**
606
+ * pattern : EOF
607
+ * | expression EOF
608
+ * | expression '|' pattern
609
+ * ;
610
+ */
557
611
  static bool
558
612
  pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
559
613
  return (
@@ -572,8 +626,10 @@ pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
572
626
  );
573
627
  }
574
628
 
575
- // Parse a regular expression and extract the names of all of the named capture
576
- // groups.
629
+ /**
630
+ * Parse a regular expression and extract the names of all of the named capture
631
+ * groups.
632
+ */
577
633
  PRISM_EXPORTED_FUNCTION bool
578
634
  pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
579
635
  pm_regexp_parser_t parser;
data/src/serialize.c CHANGED
@@ -54,7 +54,7 @@ pm_serialize_string(pm_parser_t *parser, pm_string_t *string, pm_buffer_t *buffe
54
54
  }
55
55
  }
56
56
 
57
- void
57
+ static void
58
58
  pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
59
59
  pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node));
60
60
 
@@ -1131,16 +1131,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1131
1131
  }
1132
1132
  break;
1133
1133
  }
1134
- case PM_KEYWORD_PARAMETER_NODE: {
1135
- pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_keyword_parameter_node_t *)node)->name));
1136
- pm_serialize_location(parser, &((pm_keyword_parameter_node_t *)node)->name_loc, buffer);
1137
- if (((pm_keyword_parameter_node_t *)node)->value == NULL) {
1138
- pm_buffer_append_byte(buffer, 0);
1139
- } else {
1140
- pm_serialize_node(parser, (pm_node_t *)((pm_keyword_parameter_node_t *)node)->value, buffer);
1141
- }
1142
- break;
1143
- }
1144
1134
  case PM_KEYWORD_REST_PARAMETER_NODE: {
1145
1135
  pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_keyword_rest_parameter_node_t *)node)->name));
1146
1136
  if (((pm_keyword_rest_parameter_node_t *)node)->name_loc.start == NULL) {
@@ -1348,6 +1338,12 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1348
1338
  pm_buffer_append_varint(buffer, ((pm_numbered_reference_read_node_t *)node)->number);
1349
1339
  break;
1350
1340
  }
1341
+ case PM_OPTIONAL_KEYWORD_PARAMETER_NODE: {
1342
+ pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_optional_keyword_parameter_node_t *)node)->name));
1343
+ pm_serialize_location(parser, &((pm_optional_keyword_parameter_node_t *)node)->name_loc, buffer);
1344
+ pm_serialize_node(parser, (pm_node_t *)((pm_optional_keyword_parameter_node_t *)node)->value, buffer);
1345
+ break;
1346
+ }
1351
1347
  case PM_OPTIONAL_PARAMETER_NODE: {
1352
1348
  pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_optional_parameter_node_t *)node)->name));
1353
1349
  pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->name_loc, buffer);
@@ -1482,6 +1478,11 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1482
1478
  pm_buffer_append_varint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1483
1479
  break;
1484
1480
  }
1481
+ case PM_REQUIRED_KEYWORD_PARAMETER_NODE: {
1482
+ pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_required_keyword_parameter_node_t *)node)->name));
1483
+ pm_serialize_location(parser, &((pm_required_keyword_parameter_node_t *)node)->name_loc, buffer);
1484
+ break;
1485
+ }
1485
1486
  case PM_REQUIRED_PARAMETER_NODE: {
1486
1487
  pm_buffer_append_varint(buffer, pm_sizet_to_u32(((pm_required_parameter_node_t *)node)->name));
1487
1488
  break;
@@ -1785,6 +1786,9 @@ pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *bu
1785
1786
  pm_buffer_append_varint(buffer, pm_ptrdifft_to_u32(comment->end - comment->start));
1786
1787
  }
1787
1788
 
1789
+ /**
1790
+ * Serialize the given list of comments to the given buffer.
1791
+ */
1788
1792
  void
1789
1793
  pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
1790
1794
  pm_buffer_append_varint(buffer, pm_sizet_to_u32(pm_list_size(list)));
@@ -1838,6 +1842,9 @@ pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *
1838
1842
  }
1839
1843
  }
1840
1844
 
1845
+ /**
1846
+ * Serialize the name of the encoding to the buffer.
1847
+ */
1841
1848
  void
1842
1849
  pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
1843
1850
  size_t encoding_length = strlen(encoding->name);
@@ -1845,10 +1852,14 @@ pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
1845
1852
  pm_buffer_append_string(buffer, encoding->name, encoding_length);
1846
1853
  }
1847
1854
 
1848
- #line 200 "serialize.c.erb"
1855
+ #line 206 "serialize.c.erb"
1856
+ /**
1857
+ * Serialize the encoding, metadata, nodes, and constant pool.
1858
+ */
1849
1859
  void
1850
1860
  pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1851
1861
  pm_serialize_encoding(&parser->encoding, buffer);
1862
+ pm_buffer_append_varint(buffer, parser->start_line);
1852
1863
  pm_serialize_comment_list(parser, &parser->comment_list, buffer);
1853
1864
  pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
1854
1865
  pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
@@ -1921,10 +1932,16 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) {
1921
1932
  pm_buffer_append_varint(buffer, parser->lex_state);
1922
1933
  }
1923
1934
 
1935
+ /**
1936
+ * Lex the given source and serialize to the given buffer.
1937
+ */
1924
1938
  PRISM_EXPORTED_FUNCTION void
1925
- pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer) {
1939
+ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
1940
+ pm_options_t options = { 0 };
1941
+ if (data != NULL) pm_options_read(&options, data);
1942
+
1926
1943
  pm_parser_t parser;
1927
- pm_parser_init(&parser, source, size, filepath);
1944
+ pm_parser_init(&parser, source, size, &options);
1928
1945
 
1929
1946
  pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
1930
1947
  .data = (void *) buffer,
@@ -1934,10 +1951,11 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
1934
1951
  parser.lex_callback = &lex_callback;
1935
1952
  pm_node_t *node = pm_parse(&parser);
1936
1953
 
1937
- // Append 0 to mark end of tokens
1954
+ // Append 0 to mark end of tokens.
1938
1955
  pm_buffer_append_byte(buffer, 0);
1939
1956
 
1940
1957
  pm_serialize_encoding(&parser.encoding, buffer);
1958
+ pm_buffer_append_varint(buffer, parser.start_line);
1941
1959
  pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
1942
1960
  pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
1943
1961
  pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
@@ -1945,15 +1963,20 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
1945
1963
 
1946
1964
  pm_node_destroy(&parser, node);
1947
1965
  pm_parser_free(&parser);
1966
+ pm_options_free(&options);
1948
1967
  }
1949
1968
 
1950
- // Parse and serialize both the AST and the tokens represented by the given
1951
- // source to the given buffer.
1969
+ /**
1970
+ * Parse and serialize both the AST and the tokens represented by the given
1971
+ * source to the given buffer.
1972
+ */
1952
1973
  PRISM_EXPORTED_FUNCTION void
1953
- pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata) {
1974
+ pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
1975
+ pm_options_t options = { 0 };
1976
+ if (data != NULL) pm_options_read(&options, data);
1977
+
1954
1978
  pm_parser_t parser;
1955
- pm_parser_init(&parser, source, size, NULL);
1956
- if (metadata) pm_parser_metadata(&parser, metadata);
1979
+ pm_parser_init(&parser, source, size, &options);
1957
1980
 
1958
1981
  pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
1959
1982
  .data = (void *) buffer,
@@ -1968,4 +1991,5 @@ pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer,
1968
1991
 
1969
1992
  pm_node_destroy(&parser, node);
1970
1993
  pm_parser_free(&parser);
1994
+ pm_options_free(&options);
1971
1995
  }
data/src/token_type.c CHANGED
@@ -9,7 +9,9 @@
9
9
 
10
10
  #include "prism/ast.h"
11
11
 
12
- // Returns a string representation of the given token type.
12
+ /**
13
+ * Returns a string representation of the given token type.
14
+ */
13
15
  PRISM_EXPORTED_FUNCTION const char *
14
16
  pm_token_type_to_str(pm_token_type_t token_type)
15
17
  {