prism 0.29.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +22 -1
  3. data/CONTRIBUTING.md +0 -4
  4. data/README.md +1 -0
  5. data/config.yml +66 -9
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/ripper_translation.md +22 -0
  8. data/ext/prism/api_node.c +30 -12
  9. data/ext/prism/extension.c +107 -372
  10. data/ext/prism/extension.h +1 -1
  11. data/include/prism/ast.h +138 -70
  12. data/include/prism/diagnostic.h +7 -2
  13. data/include/prism/node.h +0 -21
  14. data/include/prism/parser.h +23 -25
  15. data/include/prism/regexp.h +17 -8
  16. data/include/prism/static_literals.h +3 -2
  17. data/include/prism/util/pm_char.h +1 -2
  18. data/include/prism/util/pm_constant_pool.h +0 -8
  19. data/include/prism/util/pm_integer.h +16 -9
  20. data/include/prism/util/pm_string.h +0 -8
  21. data/include/prism/version.h +2 -2
  22. data/include/prism.h +0 -11
  23. data/lib/prism/compiler.rb +3 -0
  24. data/lib/prism/dispatcher.rb +14 -0
  25. data/lib/prism/dot_visitor.rb +22 -3
  26. data/lib/prism/dsl.rb +7 -2
  27. data/lib/prism/ffi.rb +24 -3
  28. data/lib/prism/inspect_visitor.rb +10 -8
  29. data/lib/prism/mutation_compiler.rb +6 -1
  30. data/lib/prism/node.rb +166 -241
  31. data/lib/prism/node_ext.rb +21 -5
  32. data/lib/prism/parse_result/comments.rb +0 -7
  33. data/lib/prism/parse_result/newlines.rb +101 -11
  34. data/lib/prism/parse_result.rb +17 -0
  35. data/lib/prism/reflection.rb +3 -1
  36. data/lib/prism/serialize.rb +80 -67
  37. data/lib/prism/translation/parser/compiler.rb +134 -114
  38. data/lib/prism/translation/parser.rb +6 -1
  39. data/lib/prism/translation/ripper.rb +8 -6
  40. data/lib/prism/translation/ruby_parser.rb +23 -5
  41. data/lib/prism/visitor.rb +3 -0
  42. data/lib/prism.rb +0 -4
  43. data/prism.gemspec +1 -4
  44. data/rbi/prism/node.rbi +63 -6
  45. data/rbi/prism/visitor.rbi +3 -0
  46. data/rbi/prism.rbi +6 -0
  47. data/sig/prism/dsl.rbs +4 -1
  48. data/sig/prism/mutation_compiler.rbs +1 -0
  49. data/sig/prism/node.rbs +28 -4
  50. data/sig/prism/visitor.rbs +1 -0
  51. data/sig/prism.rbs +21 -0
  52. data/src/diagnostic.c +27 -17
  53. data/src/node.c +408 -1666
  54. data/src/prettyprint.c +49 -6
  55. data/src/prism.c +958 -991
  56. data/src/regexp.c +133 -68
  57. data/src/serialize.c +6 -1
  58. data/src/static_literals.c +63 -84
  59. data/src/token_type.c +2 -2
  60. data/src/util/pm_constant_pool.c +0 -8
  61. data/src/util/pm_integer.c +39 -11
  62. data/src/util/pm_string.c +0 -12
  63. data/src/util/pm_strpbrk.c +32 -6
  64. metadata +2 -5
  65. data/include/prism/util/pm_string_list.h +0 -44
  66. data/lib/prism/debug.rb +0 -249
  67. data/src/util/pm_string_list.c +0 -28
data/src/regexp.c CHANGED
@@ -1,9 +1,14 @@
1
1
  #include "prism/regexp.h"
2
2
 
3
+ #define PM_REGEXP_PARSE_DEPTH_MAX 4096
4
+
3
5
  /**
4
6
  * This is the parser that is going to handle parsing regular expressions.
5
7
  */
6
8
  typedef struct {
9
+ /** The parser that is currently being used. */
10
+ pm_parser_t *parser;
11
+
7
12
  /** A pointer to the start of the source that we are parsing. */
8
13
  const uint8_t *start;
9
14
 
@@ -13,39 +18,42 @@ typedef struct {
13
18
  /** A pointer to the end of the source that we are parsing. */
14
19
  const uint8_t *end;
15
20
 
16
- /** A list of named captures that we've found. */
17
- pm_string_list_t *named_captures;
18
-
19
21
  /** Whether the encoding has changed from the default. */
20
22
  bool encoding_changed;
21
23
 
22
24
  /** The encoding of the source. */
23
25
  const pm_encoding_t *encoding;
26
+
27
+ /** The callback to call when a named capture group is found. */
28
+ pm_regexp_name_callback_t name_callback;
29
+
30
+ /** The data to pass to the name callback. */
31
+ void *name_data;
32
+
33
+ /** The callback to call when a parse error is found. */
34
+ pm_regexp_error_callback_t error_callback;
35
+
36
+ /** The data to pass to the error callback. */
37
+ void *error_data;
24
38
  } pm_regexp_parser_t;
25
39
 
26
40
  /**
27
- * This initializes a new parser with the given source.
41
+ * Append an error to the parser.
28
42
  */
29
- static void
30
- pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_string_list_t *named_captures, bool encoding_changed, const pm_encoding_t *encoding) {
31
- *parser = (pm_regexp_parser_t) {
32
- .start = start,
33
- .cursor = start,
34
- .end = end,
35
- .named_captures = named_captures,
36
- .encoding_changed = encoding_changed,
37
- .encoding = encoding
38
- };
43
+ static inline void
44
+ pm_regexp_parse_error(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, const char *message) {
45
+ parser->error_callback(start, end, message, parser->error_data);
39
46
  }
40
47
 
41
48
  /**
42
- * This appends a new string to the list of named captures.
49
+ * This appends a new string to the list of named captures. This function
50
+ * assumes the caller has already checked the validity of the name callback.
43
51
  */
44
52
  static void
45
53
  pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
46
54
  pm_string_t string;
47
55
  pm_string_shared_init(&string, start, end);
48
- pm_string_list_append(parser->named_captures, &string);
56
+ parser->name_callback(&string, parser->name_data);
49
57
  pm_string_free(&string);
50
58
  }
51
59
 
@@ -217,21 +225,24 @@ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
217
225
  */
218
226
  static bool
219
227
  pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
220
- if (pm_regexp_char_is_eof(parser)) return true;
221
-
222
- switch (*parser->cursor) {
223
- case '*':
224
- case '+':
225
- case '?':
226
- parser->cursor++;
227
- return true;
228
- case '{':
229
- parser->cursor++;
230
- return pm_regexp_parse_range_quantifier(parser);
231
- default:
232
- // In this case there is no quantifier.
233
- return true;
228
+ while (!pm_regexp_char_is_eof(parser)) {
229
+ switch (*parser->cursor) {
230
+ case '*':
231
+ case '+':
232
+ case '?':
233
+ parser->cursor++;
234
+ break;
235
+ case '{':
236
+ parser->cursor++;
237
+ if (!pm_regexp_parse_range_quantifier(parser)) return false;
238
+ break;
239
+ default:
240
+ // In this case there is no quantifier.
241
+ return true;
242
+ }
234
243
  }
244
+
245
+ return true;
235
246
  }
236
247
 
237
248
  /**
@@ -255,20 +266,20 @@ pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
255
266
 
256
267
  // Forward declaration because character sets can be nested.
257
268
  static bool
258
- pm_regexp_parse_lbracket(pm_regexp_parser_t *parser);
269
+ pm_regexp_parse_lbracket(pm_regexp_parser_t *parser, uint16_t depth);
259
270
 
260
271
  /**
261
272
  * match-char-set : '[' '^'? (match-range | match-char)* ']'
262
273
  * ;
263
274
  */
264
275
  static bool
265
- pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
276
+ pm_regexp_parse_character_set(pm_regexp_parser_t *parser, uint16_t depth) {
266
277
  pm_regexp_char_accept(parser, '^');
267
278
 
268
279
  while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ']') {
269
280
  switch (*parser->cursor++) {
270
281
  case '[':
271
- pm_regexp_parse_lbracket(parser);
282
+ pm_regexp_parse_lbracket(parser, (uint16_t) (depth + 1));
272
283
  break;
273
284
  case '\\':
274
285
  if (!pm_regexp_char_is_eof(parser)) {
@@ -288,7 +299,18 @@ pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
288
299
  * A left bracket can either mean a POSIX class or a character set.
289
300
  */
290
301
  static bool
291
- pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
302
+ pm_regexp_parse_lbracket(pm_regexp_parser_t *parser, uint16_t depth) {
303
+ if (depth >= PM_REGEXP_PARSE_DEPTH_MAX) {
304
+ pm_regexp_parse_error(parser, parser->start, parser->end, "parse depth limit over");
305
+ return false;
306
+ }
307
+
308
+ if ((parser->cursor < parser->end) && parser->cursor[0] == ']') {
309
+ parser->cursor++;
310
+ pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "empty char-class");
311
+ return true;
312
+ }
313
+
292
314
  const uint8_t *reset = parser->cursor;
293
315
 
294
316
  if ((parser->cursor + 2 < parser->end) && parser->cursor[0] == '[' && parser->cursor[1] == ':') {
@@ -298,13 +320,13 @@ pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
298
320
  parser->cursor = reset;
299
321
  }
300
322
 
301
- return pm_regexp_parse_character_set(parser);
323
+ return pm_regexp_parse_character_set(parser, depth);
302
324
  }
303
325
 
304
326
  // Forward declaration here since parsing groups needs to go back up the grammar
305
327
  // to parse expressions within them.
306
328
  static bool
307
- pm_regexp_parse_expression(pm_regexp_parser_t *parser);
329
+ pm_regexp_parse_expression(pm_regexp_parser_t *parser, uint16_t depth);
308
330
 
309
331
  /**
310
332
  * These are the states of the options that are configurable on the regular
@@ -418,17 +440,27 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
418
440
  * * (?imxdau-imx:subexp) - turn on and off configuration for an expression
419
441
  */
420
442
  static bool
421
- pm_regexp_parse_group(pm_regexp_parser_t *parser) {
443
+ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
444
+ const uint8_t *group_start = parser->cursor;
445
+
422
446
  // First, parse any options for the group.
423
447
  if (pm_regexp_char_accept(parser, '?')) {
424
448
  if (pm_regexp_char_is_eof(parser)) {
449
+ pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern in group");
425
450
  return false;
426
451
  }
452
+
427
453
  pm_regexp_options_t options;
428
454
  pm_regexp_options_init(&options);
429
455
 
430
456
  switch (*parser->cursor) {
431
457
  case '#': { // inline comments
458
+ parser->cursor++;
459
+ if (pm_regexp_char_is_eof(parser)) {
460
+ pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern in group");
461
+ return false;
462
+ }
463
+
432
464
  if (parser->encoding_changed && parser->encoding->multibyte) {
433
465
  bool escaped = false;
434
466
 
@@ -472,6 +504,7 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
472
504
  case '<':
473
505
  parser->cursor++;
474
506
  if (pm_regexp_char_is_eof(parser)) {
507
+ pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern with unmatched parenthesis");
475
508
  return false;
476
509
  }
477
510
 
@@ -485,7 +518,15 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
485
518
  if (!pm_regexp_char_find(parser, '>')) {
486
519
  return false;
487
520
  }
488
- pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
521
+
522
+ if (parser->cursor - start == 1) {
523
+ pm_regexp_parse_error(parser, start, parser->cursor, "group name is empty");
524
+ }
525
+
526
+ if (parser->name_callback != NULL) {
527
+ pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
528
+ }
529
+
489
530
  break;
490
531
  }
491
532
  }
@@ -496,7 +537,10 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
496
537
  return false;
497
538
  }
498
539
 
499
- pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
540
+ if (parser->name_callback != NULL) {
541
+ pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
542
+ }
543
+
500
544
  break;
501
545
  }
502
546
  case '(': // conditional expression
@@ -535,20 +579,25 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
535
579
  }
536
580
  break;
537
581
  default:
538
- return false;
582
+ parser->cursor++;
583
+ pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "undefined group option");
584
+ break;
539
585
  }
540
586
  }
541
587
 
542
588
  // Now, parse the expressions within this group.
543
589
  while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')') {
544
- if (!pm_regexp_parse_expression(parser)) {
590
+ if (!pm_regexp_parse_expression(parser, (uint16_t) (depth + 1))) {
545
591
  return false;
546
592
  }
547
593
  pm_regexp_char_accept(parser, '|');
548
594
  }
549
595
 
550
596
  // Finally, make sure we have a closing parenthesis.
551
- return pm_regexp_char_expect(parser, ')');
597
+ if (pm_regexp_char_expect(parser, ')')) return true;
598
+
599
+ pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern with unmatched parenthesis");
600
+ return false;
552
601
  }
553
602
 
554
603
  /**
@@ -564,12 +613,12 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
564
613
  * ;
565
614
  */
566
615
  static bool
567
- pm_regexp_parse_item(pm_regexp_parser_t *parser) {
616
+ pm_regexp_parse_item(pm_regexp_parser_t *parser, uint16_t depth) {
568
617
  switch (*parser->cursor) {
569
618
  case '^':
570
619
  case '$':
571
620
  parser->cursor++;
572
- return true;
621
+ return pm_regexp_parse_quantifier(parser);
573
622
  case '\\':
574
623
  parser->cursor++;
575
624
  if (!pm_regexp_char_is_eof(parser)) {
@@ -578,10 +627,20 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser) {
578
627
  return pm_regexp_parse_quantifier(parser);
579
628
  case '(':
580
629
  parser->cursor++;
581
- return pm_regexp_parse_group(parser) && pm_regexp_parse_quantifier(parser);
630
+ return pm_regexp_parse_group(parser, depth) && pm_regexp_parse_quantifier(parser);
582
631
  case '[':
583
632
  parser->cursor++;
584
- return pm_regexp_parse_lbracket(parser) && pm_regexp_parse_quantifier(parser);
633
+ return pm_regexp_parse_lbracket(parser, depth) && pm_regexp_parse_quantifier(parser);
634
+ case '*':
635
+ case '?':
636
+ case '+':
637
+ parser->cursor++;
638
+ pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "target of repeat operator is not specified");
639
+ return true;
640
+ case ')':
641
+ parser->cursor++;
642
+ pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "unmatched close parenthesis");
643
+ return true;
585
644
  default: {
586
645
  size_t width;
587
646
  if (!parser->encoding_changed) {
@@ -603,13 +662,18 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser) {
603
662
  * ;
604
663
  */
605
664
  static bool
606
- pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
607
- if (!pm_regexp_parse_item(parser)) {
665
+ pm_regexp_parse_expression(pm_regexp_parser_t *parser, uint16_t depth) {
666
+ if (depth >= PM_REGEXP_PARSE_DEPTH_MAX) {
667
+ pm_regexp_parse_error(parser, parser->start, parser->end, "parse depth limit over");
668
+ return false;
669
+ }
670
+
671
+ if (!pm_regexp_parse_item(parser, depth)) {
608
672
  return false;
609
673
  }
610
674
 
611
675
  while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')' && *parser->cursor != '|') {
612
- if (!pm_regexp_parse_item(parser)) {
676
+ if (!pm_regexp_parse_item(parser, depth)) {
613
677
  return false;
614
678
  }
615
679
  }
@@ -625,29 +689,30 @@ pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
625
689
  */
626
690
  static bool
627
691
  pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
628
- return (
629
- (
630
- // Exit early if the pattern is empty.
631
- pm_regexp_char_is_eof(parser) ||
632
- // Parse the first expression in the pattern.
633
- pm_regexp_parse_expression(parser)
634
- ) &&
635
- (
636
- // Return now if we've parsed the entire pattern.
637
- pm_regexp_char_is_eof(parser) ||
638
- // Otherwise, we should have a pipe character.
639
- (pm_regexp_char_expect(parser, '|') && pm_regexp_parse_pattern(parser))
640
- )
641
- );
692
+ do {
693
+ if (pm_regexp_char_is_eof(parser)) return true;
694
+ if (!pm_regexp_parse_expression(parser, 0)) return false;
695
+ } while (pm_regexp_char_accept(parser, '|'));
696
+
697
+ return pm_regexp_char_is_eof(parser);
642
698
  }
643
699
 
644
700
  /**
645
701
  * Parse a regular expression and extract the names of all of the named capture
646
702
  * groups.
647
703
  */
648
- PRISM_EXPORTED_FUNCTION bool
649
- pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, const pm_encoding_t *encoding) {
650
- pm_regexp_parser_t parser;
651
- pm_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding);
652
- return pm_regexp_parse_pattern(&parser);
704
+ PRISM_EXPORTED_FUNCTION void
705
+ pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data) {
706
+ pm_regexp_parse_pattern(&(pm_regexp_parser_t) {
707
+ .parser = parser,
708
+ .start = source,
709
+ .cursor = source,
710
+ .end = source + size,
711
+ .encoding_changed = parser->encoding_changed,
712
+ .encoding = parser->encoding,
713
+ .name_callback = name_callback,
714
+ .name_data = name_data,
715
+ .error_callback = error_callback,
716
+ .error_data = error_data
717
+ });
653
718
  }
data/src/serialize.c CHANGED
@@ -1198,6 +1198,9 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1198
1198
  pm_serialize_location(parser, &((pm_interpolated_x_string_node_t *)node)->closing_loc, buffer);
1199
1199
  break;
1200
1200
  }
1201
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
1202
+ break;
1203
+ }
1201
1204
  case PM_IT_PARAMETERS_NODE: {
1202
1205
  break;
1203
1206
  }
@@ -1550,7 +1553,9 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1550
1553
  break;
1551
1554
  }
1552
1555
  case PM_RATIONAL_NODE: {
1553
- pm_serialize_node(parser, (pm_node_t *)((pm_rational_node_t *)node)->numeric, buffer);
1556
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1557
+ pm_serialize_integer(&((pm_rational_node_t *)node)->numerator, buffer);
1558
+ pm_serialize_integer(&((pm_rational_node_t *)node)->denominator, buffer);
1554
1559
  break;
1555
1560
  }
1556
1561
  case PM_REDO_NODE: {
@@ -58,6 +58,25 @@ murmur_hash(const uint8_t *key, size_t length) {
58
58
  return hash;
59
59
  }
60
60
 
61
+ /**
62
+ * Hash the value of an integer and return it.
63
+ */
64
+ static uint32_t
65
+ integer_hash(const pm_integer_t *integer) {
66
+ uint32_t hash;
67
+ if (integer->values) {
68
+ hash = murmur_hash((const uint8_t *) integer->values, sizeof(uint32_t) * integer->length);
69
+ } else {
70
+ hash = murmur_hash((const uint8_t *) &integer->value, sizeof(uint32_t));
71
+ }
72
+
73
+ if (integer->negative) {
74
+ hash ^= murmur_scramble((uint32_t) 1);
75
+ }
76
+
77
+ return hash;
78
+ }
79
+
61
80
  /**
62
81
  * Return the hash of the given node. It is important that nodes that have
63
82
  * equivalent static literal values have the same hash. This is because we use
@@ -68,19 +87,8 @@ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node)
68
87
  switch (PM_NODE_TYPE(node)) {
69
88
  case PM_INTEGER_NODE: {
70
89
  // Integers hash their value.
71
- const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
72
- uint32_t hash;
73
- if (integer->values) {
74
- hash = murmur_hash((const uint8_t *) integer->values, sizeof(uint32_t) * integer->length);
75
- } else {
76
- hash = murmur_hash((const uint8_t *) &integer->value, sizeof(uint32_t));
77
- }
78
-
79
- if (integer->negative) {
80
- hash ^= murmur_scramble((uint32_t) 1);
81
- }
82
-
83
- return hash;
90
+ const pm_integer_node_t *cast = (const pm_integer_node_t *) node;
91
+ return integer_hash(&cast->value);
84
92
  }
85
93
  case PM_SOURCE_LINE_NODE: {
86
94
  // Source lines hash their line number.
@@ -94,11 +102,9 @@ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node)
94
102
  return murmur_hash((const uint8_t *) value, sizeof(double));
95
103
  }
96
104
  case PM_RATIONAL_NODE: {
97
- // Rationals hash their numeric value. Because their numeric value
98
- // is stored as a subnode, we hash that node and then mix in the
99
- // fact that this is a rational node.
100
- const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;
101
- return node_hash(metadata, numeric) ^ murmur_scramble((uint32_t) node->type);
105
+ // Rationals hash their numerator and denominator.
106
+ const pm_rational_node_t *cast = (const pm_rational_node_t *) node;
107
+ return integer_hash(&cast->numerator) ^ integer_hash(&cast->denominator) ^ murmur_scramble((uint32_t) cast->base.type);
102
108
  }
103
109
  case PM_IMAGINARY_NODE: {
104
110
  // Imaginaries hash their numeric value. Because their numeric value
@@ -148,7 +154,7 @@ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node)
148
154
  * and must be able to compare all node types that will be stored in this hash.
149
155
  */
150
156
  static pm_node_t *
151
- pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *metadata, pm_node_t *node, int (*compare)(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right)) {
157
+ pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *metadata, pm_node_t *node, bool replace, int (*compare)(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right)) {
152
158
  // If we are out of space, we need to resize the hash. This will cause all
153
159
  // of the nodes to be rehashed and reinserted into the new hash.
154
160
  if (hash->size * 2 >= hash->capacity) {
@@ -196,9 +202,14 @@ pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *m
196
202
  // already in the hash. Otherwise, we can just increment the size and insert
197
203
  // the new node.
198
204
  pm_node_t *result = hash->nodes[index];
199
- if (result == NULL) hash->size++;
200
205
 
201
- hash->nodes[index] = node;
206
+ if (result == NULL) {
207
+ hash->size++;
208
+ hash->nodes[index] = node;
209
+ } else if (replace) {
210
+ hash->nodes[index] = node;
211
+ }
212
+
202
213
  return result;
203
214
  }
204
215
 
@@ -275,8 +286,15 @@ pm_compare_number_nodes(const pm_static_literals_metadata_t *metadata, const pm_
275
286
  switch (PM_NODE_TYPE(left)) {
276
287
  case PM_IMAGINARY_NODE:
277
288
  return pm_compare_number_nodes(metadata, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric);
278
- case PM_RATIONAL_NODE:
279
- return pm_compare_number_nodes(metadata, ((const pm_rational_node_t *) left)->numeric, ((const pm_rational_node_t *) right)->numeric);
289
+ case PM_RATIONAL_NODE: {
290
+ const pm_rational_node_t *left_rational = (const pm_rational_node_t *) left;
291
+ const pm_rational_node_t *right_rational = (const pm_rational_node_t *) right;
292
+
293
+ int result = pm_integer_compare(&left_rational->denominator, &right_rational->denominator);
294
+ if (result != 0) return result;
295
+
296
+ return pm_integer_compare(&left_rational->numerator, &right_rational->numerator);
297
+ }
280
298
  case PM_INTEGER_NODE:
281
299
  return pm_compare_integer_nodes(metadata, left, right);
282
300
  case PM_FLOAT_NODE:
@@ -335,7 +353,7 @@ pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_liter
335
353
  * Add a node to the set of static literals.
336
354
  */
337
355
  pm_node_t *
338
- pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node) {
356
+ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) {
339
357
  switch (PM_NODE_TYPE(node)) {
340
358
  case PM_INTEGER_NODE:
341
359
  case PM_SOURCE_LINE_NODE:
@@ -347,6 +365,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
347
365
  .encoding_name = NULL
348
366
  },
349
367
  node,
368
+ replace,
350
369
  pm_compare_integer_nodes
351
370
  );
352
371
  case PM_FLOAT_NODE:
@@ -358,6 +377,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
358
377
  .encoding_name = NULL
359
378
  },
360
379
  node,
380
+ replace,
361
381
  pm_compare_float_nodes
362
382
  );
363
383
  case PM_RATIONAL_NODE:
@@ -370,6 +390,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
370
390
  .encoding_name = NULL
371
391
  },
372
392
  node,
393
+ replace,
373
394
  pm_compare_number_nodes
374
395
  );
375
396
  case PM_STRING_NODE:
@@ -382,6 +403,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
382
403
  .encoding_name = NULL
383
404
  },
384
405
  node,
406
+ replace,
385
407
  pm_compare_string_nodes
386
408
  );
387
409
  case PM_REGULAR_EXPRESSION_NODE:
@@ -393,6 +415,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
393
415
  .encoding_name = NULL
394
416
  },
395
417
  node,
418
+ replace,
396
419
  pm_compare_regular_expression_nodes
397
420
  );
398
421
  case PM_SYMBOL_NODE:
@@ -404,26 +427,27 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
404
427
  .encoding_name = NULL
405
428
  },
406
429
  node,
430
+ replace,
407
431
  pm_compare_string_nodes
408
432
  );
409
433
  case PM_TRUE_NODE: {
410
434
  pm_node_t *duplicated = literals->true_node;
411
- literals->true_node = node;
435
+ if ((duplicated == NULL) || replace) literals->true_node = node;
412
436
  return duplicated;
413
437
  }
414
438
  case PM_FALSE_NODE: {
415
439
  pm_node_t *duplicated = literals->false_node;
416
- literals->false_node = node;
440
+ if ((duplicated == NULL) || replace) literals->false_node = node;
417
441
  return duplicated;
418
442
  }
419
443
  case PM_NIL_NODE: {
420
444
  pm_node_t *duplicated = literals->nil_node;
421
- literals->nil_node = node;
445
+ if ((duplicated == NULL) || replace) literals->nil_node = node;
422
446
  return duplicated;
423
447
  }
424
448
  case PM_SOURCE_ENCODING_NODE: {
425
449
  pm_node_t *duplicated = literals->source_encoding_node;
426
- literals->source_encoding_node = node;
450
+ if ((duplicated == NULL) || replace) literals->source_encoding_node = node;
427
451
  return duplicated;
428
452
  }
429
453
  default:
@@ -456,7 +480,7 @@ pm_static_literal_positive_p(const pm_node_t *node) {
456
480
  case PM_INTEGER_NODE:
457
481
  return !((const pm_integer_node_t *) node)->value.negative;
458
482
  case PM_RATIONAL_NODE:
459
- return pm_static_literal_positive_p(((const pm_rational_node_t *) node)->numeric);
483
+ return !((const pm_rational_node_t *) node)->numerator.negative;
460
484
  case PM_IMAGINARY_NODE:
461
485
  return pm_static_literal_positive_p(((const pm_imaginary_node_t *) node)->numeric);
462
486
  default:
@@ -465,43 +489,6 @@ pm_static_literal_positive_p(const pm_node_t *node) {
465
489
  }
466
490
  }
467
491
 
468
- /**
469
- * Inspect a rational node that wraps a float node. This is going to be a
470
- * poor-man's version of the Ruby `Rational#to_s` method, because we're not
471
- * going to try to reduce the rational by finding the GCD. We'll leave that for
472
- * a future improvement.
473
- */
474
- static void
475
- pm_rational_inspect(pm_buffer_t *buffer, pm_rational_node_t *node) {
476
- const uint8_t *start = node->base.location.start;
477
- const uint8_t *end = node->base.location.end - 1; // r
478
-
479
- while (start < end && *start == '0') start++; // 0.1 -> .1
480
- while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
481
- size_t length = (size_t) (end - start);
482
-
483
- const uint8_t *point = memchr(start, '.', length);
484
- assert(point && "should have a decimal point");
485
-
486
- uint8_t *digits = malloc(length - 1);
487
- if (digits == NULL) return;
488
-
489
- memcpy(digits, start, (unsigned long) (point - start));
490
- memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
491
-
492
- pm_integer_t numerator = { 0 };
493
- pm_integer_parse(&numerator, PM_INTEGER_BASE_DECIMAL, digits, digits + length - 1);
494
-
495
- pm_buffer_append_byte(buffer, '(');
496
- pm_integer_string(buffer, &numerator);
497
- pm_buffer_append_string(buffer, "/1", 2);
498
- for (size_t index = 0; index < (size_t) (end - point - 1); index++) pm_buffer_append_byte(buffer, '0');
499
- pm_buffer_append_byte(buffer, ')');
500
-
501
- pm_integer_free(&numerator);
502
- free(digits);
503
- }
504
-
505
492
  /**
506
493
  * Create a string-based representation of the given static literal.
507
494
  */
@@ -544,7 +531,9 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
544
531
  pm_buffer_append_string(buffer, "(0", 2);
545
532
  if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+');
546
533
  pm_static_literal_inspect_node(buffer, metadata, numeric);
547
- if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) pm_buffer_append_byte(buffer, '*');
534
+ if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) {
535
+ pm_buffer_append_byte(buffer, '*');
536
+ }
548
537
  pm_buffer_append_string(buffer, "i)", 2);
549
538
  break;
550
539
  }
@@ -555,22 +544,12 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
555
544
  pm_buffer_append_string(buffer, "nil", 3);
556
545
  break;
557
546
  case PM_RATIONAL_NODE: {
558
- const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;
559
-
560
- switch (PM_NODE_TYPE(numeric)) {
561
- case PM_INTEGER_NODE:
562
- pm_buffer_append_byte(buffer, '(');
563
- pm_static_literal_inspect_node(buffer, metadata, numeric);
564
- pm_buffer_append_string(buffer, "/1)", 3);
565
- break;
566
- case PM_FLOAT_NODE:
567
- pm_rational_inspect(buffer, (pm_rational_node_t *) node);
568
- break;
569
- default:
570
- assert(false && "unreachable");
571
- break;
572
- }
573
-
547
+ const pm_rational_node_t *rational = (const pm_rational_node_t *) node;
548
+ pm_buffer_append_byte(buffer, '(');
549
+ pm_integer_string(buffer, &rational->numerator);
550
+ pm_buffer_append_byte(buffer, '/');
551
+ pm_integer_string(buffer, &rational->denominator);
552
+ pm_buffer_append_byte(buffer, ')');
574
553
  break;
575
554
  }
576
555
  case PM_REGULAR_EXPRESSION_NODE: {
@@ -624,7 +603,7 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
624
603
  /**
625
604
  * Create a string-based representation of the given static literal.
626
605
  */
627
- PRISM_EXPORTED_FUNCTION void
606
+ void
628
607
  pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node) {
629
608
  pm_static_literal_inspect_node(
630
609
  buffer,