prism 0.28.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +41 -1
  3. data/CONTRIBUTING.md +0 -4
  4. data/README.md +1 -0
  5. data/config.yml +95 -26
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/ripper_translation.md +22 -0
  8. data/ext/prism/api_node.c +70 -52
  9. data/ext/prism/extconf.rb +27 -23
  10. data/ext/prism/extension.c +107 -372
  11. data/ext/prism/extension.h +1 -1
  12. data/include/prism/ast.h +170 -102
  13. data/include/prism/diagnostic.h +18 -3
  14. data/include/prism/node.h +0 -21
  15. data/include/prism/parser.h +23 -25
  16. data/include/prism/regexp.h +17 -8
  17. data/include/prism/static_literals.h +3 -2
  18. data/include/prism/util/pm_char.h +1 -2
  19. data/include/prism/util/pm_constant_pool.h +0 -8
  20. data/include/prism/util/pm_integer.h +16 -9
  21. data/include/prism/util/pm_string.h +0 -8
  22. data/include/prism/version.h +2 -2
  23. data/include/prism.h +0 -11
  24. data/lib/prism/compiler.rb +3 -0
  25. data/lib/prism/desugar_compiler.rb +4 -4
  26. data/lib/prism/dispatcher.rb +14 -0
  27. data/lib/prism/dot_visitor.rb +54 -35
  28. data/lib/prism/dsl.rb +23 -18
  29. data/lib/prism/ffi.rb +25 -4
  30. data/lib/prism/inspect_visitor.rb +26 -24
  31. data/lib/prism/mutation_compiler.rb +6 -1
  32. data/lib/prism/node.rb +314 -389
  33. data/lib/prism/node_ext.rb +175 -17
  34. data/lib/prism/parse_result/comments.rb +1 -8
  35. data/lib/prism/parse_result/newlines.rb +102 -12
  36. data/lib/prism/parse_result.rb +17 -0
  37. data/lib/prism/reflection.rb +11 -9
  38. data/lib/prism/serialize.rb +91 -68
  39. data/lib/prism/translation/parser/compiler.rb +288 -138
  40. data/lib/prism/translation/parser.rb +7 -2
  41. data/lib/prism/translation/ripper.rb +24 -22
  42. data/lib/prism/translation/ruby_parser.rb +32 -14
  43. data/lib/prism/visitor.rb +3 -0
  44. data/lib/prism.rb +0 -4
  45. data/prism.gemspec +2 -4
  46. data/rbi/prism/node.rbi +114 -57
  47. data/rbi/prism/node_ext.rbi +5 -0
  48. data/rbi/prism/parse_result.rbi +1 -1
  49. data/rbi/prism/visitor.rbi +3 -0
  50. data/rbi/prism.rbi +6 -0
  51. data/sig/prism/dsl.rbs +13 -10
  52. data/sig/prism/lex_compat.rbs +10 -0
  53. data/sig/prism/mutation_compiler.rbs +1 -0
  54. data/sig/prism/node.rbs +72 -48
  55. data/sig/prism/node_ext.rbs +4 -0
  56. data/sig/prism/visitor.rbs +1 -0
  57. data/sig/prism.rbs +21 -0
  58. data/src/diagnostic.c +56 -27
  59. data/src/node.c +432 -1690
  60. data/src/prettyprint.c +97 -54
  61. data/src/prism.c +1286 -1196
  62. data/src/regexp.c +133 -68
  63. data/src/serialize.c +22 -17
  64. data/src/static_literals.c +63 -84
  65. data/src/token_type.c +4 -4
  66. data/src/util/pm_constant_pool.c +0 -8
  67. data/src/util/pm_integer.c +39 -11
  68. data/src/util/pm_string.c +0 -12
  69. data/src/util/pm_strpbrk.c +32 -6
  70. metadata +3 -5
  71. data/include/prism/util/pm_string_list.h +0 -44
  72. data/lib/prism/debug.rb +0 -249
  73. data/src/util/pm_string_list.c +0 -28
@@ -58,6 +58,25 @@ murmur_hash(const uint8_t *key, size_t length) {
58
58
  return hash;
59
59
  }
60
60
 
61
+ /**
62
+ * Hash the value of an integer and return it.
63
+ */
64
+ static uint32_t
65
+ integer_hash(const pm_integer_t *integer) {
66
+ uint32_t hash;
67
+ if (integer->values) {
68
+ hash = murmur_hash((const uint8_t *) integer->values, sizeof(uint32_t) * integer->length);
69
+ } else {
70
+ hash = murmur_hash((const uint8_t *) &integer->value, sizeof(uint32_t));
71
+ }
72
+
73
+ if (integer->negative) {
74
+ hash ^= murmur_scramble((uint32_t) 1);
75
+ }
76
+
77
+ return hash;
78
+ }
79
+
61
80
  /**
62
81
  * Return the hash of the given node. It is important that nodes that have
63
82
  * equivalent static literal values have the same hash. This is because we use
@@ -68,19 +87,8 @@ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node)
68
87
  switch (PM_NODE_TYPE(node)) {
69
88
  case PM_INTEGER_NODE: {
70
89
  // Integers hash their value.
71
- const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
72
- uint32_t hash;
73
- if (integer->values) {
74
- hash = murmur_hash((const uint8_t *) integer->values, sizeof(uint32_t) * integer->length);
75
- } else {
76
- hash = murmur_hash((const uint8_t *) &integer->value, sizeof(uint32_t));
77
- }
78
-
79
- if (integer->negative) {
80
- hash ^= murmur_scramble((uint32_t) 1);
81
- }
82
-
83
- return hash;
90
+ const pm_integer_node_t *cast = (const pm_integer_node_t *) node;
91
+ return integer_hash(&cast->value);
84
92
  }
85
93
  case PM_SOURCE_LINE_NODE: {
86
94
  // Source lines hash their line number.
@@ -94,11 +102,9 @@ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node)
94
102
  return murmur_hash((const uint8_t *) value, sizeof(double));
95
103
  }
96
104
  case PM_RATIONAL_NODE: {
97
- // Rationals hash their numeric value. Because their numeric value
98
- // is stored as a subnode, we hash that node and then mix in the
99
- // fact that this is a rational node.
100
- const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;
101
- return node_hash(metadata, numeric) ^ murmur_scramble((uint32_t) node->type);
105
+ // Rationals hash their numerator and denominator.
106
+ const pm_rational_node_t *cast = (const pm_rational_node_t *) node;
107
+ return integer_hash(&cast->numerator) ^ integer_hash(&cast->denominator) ^ murmur_scramble((uint32_t) cast->base.type);
102
108
  }
103
109
  case PM_IMAGINARY_NODE: {
104
110
  // Imaginaries hash their numeric value. Because their numeric value
@@ -148,7 +154,7 @@ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node)
148
154
  * and must be able to compare all node types that will be stored in this hash.
149
155
  */
150
156
  static pm_node_t *
151
- pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *metadata, pm_node_t *node, int (*compare)(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right)) {
157
+ pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *metadata, pm_node_t *node, bool replace, int (*compare)(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right)) {
152
158
  // If we are out of space, we need to resize the hash. This will cause all
153
159
  // of the nodes to be rehashed and reinserted into the new hash.
154
160
  if (hash->size * 2 >= hash->capacity) {
@@ -196,9 +202,14 @@ pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *m
196
202
  // already in the hash. Otherwise, we can just increment the size and insert
197
203
  // the new node.
198
204
  pm_node_t *result = hash->nodes[index];
199
- if (result == NULL) hash->size++;
200
205
 
201
- hash->nodes[index] = node;
206
+ if (result == NULL) {
207
+ hash->size++;
208
+ hash->nodes[index] = node;
209
+ } else if (replace) {
210
+ hash->nodes[index] = node;
211
+ }
212
+
202
213
  return result;
203
214
  }
204
215
 
@@ -275,8 +286,15 @@ pm_compare_number_nodes(const pm_static_literals_metadata_t *metadata, const pm_
275
286
  switch (PM_NODE_TYPE(left)) {
276
287
  case PM_IMAGINARY_NODE:
277
288
  return pm_compare_number_nodes(metadata, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric);
278
- case PM_RATIONAL_NODE:
279
- return pm_compare_number_nodes(metadata, ((const pm_rational_node_t *) left)->numeric, ((const pm_rational_node_t *) right)->numeric);
289
+ case PM_RATIONAL_NODE: {
290
+ const pm_rational_node_t *left_rational = (const pm_rational_node_t *) left;
291
+ const pm_rational_node_t *right_rational = (const pm_rational_node_t *) right;
292
+
293
+ int result = pm_integer_compare(&left_rational->denominator, &right_rational->denominator);
294
+ if (result != 0) return result;
295
+
296
+ return pm_integer_compare(&left_rational->numerator, &right_rational->numerator);
297
+ }
280
298
  case PM_INTEGER_NODE:
281
299
  return pm_compare_integer_nodes(metadata, left, right);
282
300
  case PM_FLOAT_NODE:
@@ -335,7 +353,7 @@ pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_liter
335
353
  * Add a node to the set of static literals.
336
354
  */
337
355
  pm_node_t *
338
- pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node) {
356
+ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) {
339
357
  switch (PM_NODE_TYPE(node)) {
340
358
  case PM_INTEGER_NODE:
341
359
  case PM_SOURCE_LINE_NODE:
@@ -347,6 +365,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
347
365
  .encoding_name = NULL
348
366
  },
349
367
  node,
368
+ replace,
350
369
  pm_compare_integer_nodes
351
370
  );
352
371
  case PM_FLOAT_NODE:
@@ -358,6 +377,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
358
377
  .encoding_name = NULL
359
378
  },
360
379
  node,
380
+ replace,
361
381
  pm_compare_float_nodes
362
382
  );
363
383
  case PM_RATIONAL_NODE:
@@ -370,6 +390,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
370
390
  .encoding_name = NULL
371
391
  },
372
392
  node,
393
+ replace,
373
394
  pm_compare_number_nodes
374
395
  );
375
396
  case PM_STRING_NODE:
@@ -382,6 +403,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
382
403
  .encoding_name = NULL
383
404
  },
384
405
  node,
406
+ replace,
385
407
  pm_compare_string_nodes
386
408
  );
387
409
  case PM_REGULAR_EXPRESSION_NODE:
@@ -393,6 +415,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
393
415
  .encoding_name = NULL
394
416
  },
395
417
  node,
418
+ replace,
396
419
  pm_compare_regular_expression_nodes
397
420
  );
398
421
  case PM_SYMBOL_NODE:
@@ -404,26 +427,27 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
404
427
  .encoding_name = NULL
405
428
  },
406
429
  node,
430
+ replace,
407
431
  pm_compare_string_nodes
408
432
  );
409
433
  case PM_TRUE_NODE: {
410
434
  pm_node_t *duplicated = literals->true_node;
411
- literals->true_node = node;
435
+ if ((duplicated == NULL) || replace) literals->true_node = node;
412
436
  return duplicated;
413
437
  }
414
438
  case PM_FALSE_NODE: {
415
439
  pm_node_t *duplicated = literals->false_node;
416
- literals->false_node = node;
440
+ if ((duplicated == NULL) || replace) literals->false_node = node;
417
441
  return duplicated;
418
442
  }
419
443
  case PM_NIL_NODE: {
420
444
  pm_node_t *duplicated = literals->nil_node;
421
- literals->nil_node = node;
445
+ if ((duplicated == NULL) || replace) literals->nil_node = node;
422
446
  return duplicated;
423
447
  }
424
448
  case PM_SOURCE_ENCODING_NODE: {
425
449
  pm_node_t *duplicated = literals->source_encoding_node;
426
- literals->source_encoding_node = node;
450
+ if ((duplicated == NULL) || replace) literals->source_encoding_node = node;
427
451
  return duplicated;
428
452
  }
429
453
  default:
@@ -456,7 +480,7 @@ pm_static_literal_positive_p(const pm_node_t *node) {
456
480
  case PM_INTEGER_NODE:
457
481
  return !((const pm_integer_node_t *) node)->value.negative;
458
482
  case PM_RATIONAL_NODE:
459
- return pm_static_literal_positive_p(((const pm_rational_node_t *) node)->numeric);
483
+ return !((const pm_rational_node_t *) node)->numerator.negative;
460
484
  case PM_IMAGINARY_NODE:
461
485
  return pm_static_literal_positive_p(((const pm_imaginary_node_t *) node)->numeric);
462
486
  default:
@@ -465,43 +489,6 @@ pm_static_literal_positive_p(const pm_node_t *node) {
465
489
  }
466
490
  }
467
491
 
468
- /**
469
- * Inspect a rational node that wraps a float node. This is going to be a
470
- * poor-man's version of the Ruby `Rational#to_s` method, because we're not
471
- * going to try to reduce the rational by finding the GCD. We'll leave that for
472
- * a future improvement.
473
- */
474
- static void
475
- pm_rational_inspect(pm_buffer_t *buffer, pm_rational_node_t *node) {
476
- const uint8_t *start = node->base.location.start;
477
- const uint8_t *end = node->base.location.end - 1; // r
478
-
479
- while (start < end && *start == '0') start++; // 0.1 -> .1
480
- while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
481
- size_t length = (size_t) (end - start);
482
-
483
- const uint8_t *point = memchr(start, '.', length);
484
- assert(point && "should have a decimal point");
485
-
486
- uint8_t *digits = malloc(length - 1);
487
- if (digits == NULL) return;
488
-
489
- memcpy(digits, start, (unsigned long) (point - start));
490
- memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
491
-
492
- pm_integer_t numerator = { 0 };
493
- pm_integer_parse(&numerator, PM_INTEGER_BASE_DECIMAL, digits, digits + length - 1);
494
-
495
- pm_buffer_append_byte(buffer, '(');
496
- pm_integer_string(buffer, &numerator);
497
- pm_buffer_append_string(buffer, "/1", 2);
498
- for (size_t index = 0; index < (size_t) (end - point - 1); index++) pm_buffer_append_byte(buffer, '0');
499
- pm_buffer_append_byte(buffer, ')');
500
-
501
- pm_integer_free(&numerator);
502
- free(digits);
503
- }
504
-
505
492
  /**
506
493
  * Create a string-based representation of the given static literal.
507
494
  */
@@ -544,7 +531,9 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
544
531
  pm_buffer_append_string(buffer, "(0", 2);
545
532
  if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+');
546
533
  pm_static_literal_inspect_node(buffer, metadata, numeric);
547
- if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) pm_buffer_append_byte(buffer, '*');
534
+ if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) {
535
+ pm_buffer_append_byte(buffer, '*');
536
+ }
548
537
  pm_buffer_append_string(buffer, "i)", 2);
549
538
  break;
550
539
  }
@@ -555,22 +544,12 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
555
544
  pm_buffer_append_string(buffer, "nil", 3);
556
545
  break;
557
546
  case PM_RATIONAL_NODE: {
558
- const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;
559
-
560
- switch (PM_NODE_TYPE(numeric)) {
561
- case PM_INTEGER_NODE:
562
- pm_buffer_append_byte(buffer, '(');
563
- pm_static_literal_inspect_node(buffer, metadata, numeric);
564
- pm_buffer_append_string(buffer, "/1)", 3);
565
- break;
566
- case PM_FLOAT_NODE:
567
- pm_rational_inspect(buffer, (pm_rational_node_t *) node);
568
- break;
569
- default:
570
- assert(false && "unreachable");
571
- break;
572
- }
573
-
547
+ const pm_rational_node_t *rational = (const pm_rational_node_t *) node;
548
+ pm_buffer_append_byte(buffer, '(');
549
+ pm_integer_string(buffer, &rational->numerator);
550
+ pm_buffer_append_byte(buffer, '/');
551
+ pm_integer_string(buffer, &rational->denominator);
552
+ pm_buffer_append_byte(buffer, ')');
574
553
  break;
575
554
  }
576
555
  case PM_REGULAR_EXPRESSION_NODE: {
@@ -624,7 +603,7 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
624
603
  /**
625
604
  * Create a string-based representation of the given static literal.
626
605
  */
627
- PRISM_EXPORTED_FUNCTION void
606
+ void
628
607
  pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node) {
629
608
  pm_static_literal_inspect_node(
630
609
  buffer,
data/src/token_type.c CHANGED
@@ -362,7 +362,7 @@ const char *
362
362
  pm_token_type_human(pm_token_type_t token_type) {
363
363
  switch (token_type) {
364
364
  case PM_TOKEN_EOF:
365
- return "end of file";
365
+ return "end-of-input";
366
366
  case PM_TOKEN_MISSING:
367
367
  return "missing token";
368
368
  case PM_TOKEN_NOT_PROVIDED:
@@ -422,9 +422,9 @@ pm_token_type_human(pm_token_type_t token_type) {
422
422
  case PM_TOKEN_DOT:
423
423
  return "'.'";
424
424
  case PM_TOKEN_DOT_DOT:
425
- return "'..'";
425
+ return "..";
426
426
  case PM_TOKEN_DOT_DOT_DOT:
427
- return "'...'";
427
+ return "...";
428
428
  case PM_TOKEN_EMBDOC_BEGIN:
429
429
  return "'=begin'";
430
430
  case PM_TOKEN_EMBDOC_END:
@@ -684,7 +684,7 @@ pm_token_type_human(pm_token_type_t token_type) {
684
684
  case PM_TOKEN_USTAR:
685
685
  return "*";
686
686
  case PM_TOKEN_USTAR_STAR:
687
- return "'**'";
687
+ return "**";
688
688
  case PM_TOKEN_WORDS_SEP:
689
689
  return "string separator";
690
690
  case PM_TOKEN___END__:
@@ -61,14 +61,6 @@ pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
61
61
  return false;
62
62
  }
63
63
 
64
- /**
65
- * Get the memory size of a list of constant ids.
66
- */
67
- size_t
68
- pm_constant_id_list_memsize(pm_constant_id_list_t *list) {
69
- return sizeof(pm_constant_id_list_t) + (list->capacity * sizeof(pm_constant_id_t));
70
- }
71
-
72
64
  /**
73
65
  * Free the memory associated with a list of constant ids.
74
66
  */
@@ -48,7 +48,7 @@ big_add(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint
48
48
 
49
49
  /**
50
50
  * Internal use for karatsuba_multiply. Calculates `a - b - c` with the given
51
- * base. Assume a, b, c, a - b - c all to be poitive.
51
+ * base. Assume a, b, c, a - b - c all to be positive.
52
52
  * Return pm_integer_t with values allocated. Not normalized.
53
53
  */
54
54
  static void
@@ -471,15 +471,18 @@ pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *
471
471
  * has already been validated, as internal validation checks are not performed
472
472
  * here.
473
473
  */
474
- PRISM_EXPORTED_FUNCTION void
474
+ void
475
475
  pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end) {
476
- // Ignore unary +. Unary + is parsed differently and will not end up here.
476
+ // Ignore unary +. Unary - is parsed differently and will not end up here.
477
477
  // Instead, it will modify the parsed integer later.
478
478
  if (*start == '+') start++;
479
479
 
480
480
  // Determine the multiplier from the base, and skip past any prefixes.
481
481
  uint32_t multiplier = 10;
482
482
  switch (base) {
483
+ case PM_INTEGER_BASE_DEFAULT:
484
+ while (*start == '0') start++; // 01 -> 1
485
+ break;
483
486
  case PM_INTEGER_BASE_BINARY:
484
487
  start += 2; // 0b
485
488
  multiplier = 2;
@@ -533,14 +536,6 @@ pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *s
533
536
  integer->value = (uint32_t) value;
534
537
  }
535
538
 
536
- /**
537
- * Return the memory size of the integer.
538
- */
539
- size_t
540
- pm_integer_memsize(const pm_integer_t *integer) {
541
- return sizeof(pm_integer_t) + integer->length * sizeof(uint32_t);
542
- }
543
-
544
539
  /**
545
540
  * Compare two integers. This function returns -1 if the left integer is less
546
541
  * than the right integer, 0 if they are equal, and 1 if the left integer is
@@ -572,6 +567,39 @@ pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right) {
572
567
  return 0;
573
568
  }
574
569
 
570
+ /**
571
+ * Reduce a ratio of integers to its simplest form.
572
+ */
573
+ void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator) {
574
+ // If either the numerator or denominator do not fit into a 32-bit integer,
575
+ // then this function is a no-op. In the future, we may consider reducing
576
+ // even the larger numbers, but for now we're going to keep it simple.
577
+ if (
578
+ // If the numerator doesn't fit into a 32-bit integer, return early.
579
+ numerator->length != 0 ||
580
+ // If the denominator doesn't fit into a 32-bit integer, return early.
581
+ denominator->length != 0 ||
582
+ // If the numerator is 0, then return early.
583
+ numerator->value == 0 ||
584
+ // If the denominator is 1, then return early.
585
+ denominator->value == 1
586
+ ) return;
587
+
588
+ // Find the greatest common divisor of the numerator and denominator.
589
+ uint32_t divisor = numerator->value;
590
+ uint32_t remainder = denominator->value;
591
+
592
+ while (remainder != 0) {
593
+ uint32_t temporary = remainder;
594
+ remainder = divisor % remainder;
595
+ divisor = temporary;
596
+ }
597
+
598
+ // Divide the numerator and denominator by the greatest common divisor.
599
+ numerator->value /= divisor;
600
+ denominator->value /= divisor;
601
+ }
602
+
575
603
  /**
576
604
  * Convert an integer to a decimal string.
577
605
  */
data/src/util/pm_string.c CHANGED
@@ -245,18 +245,6 @@ pm_string_file_init(pm_string_t *string, const char *filepath) {
245
245
  #endif
246
246
  }
247
247
 
248
- /**
249
- * Returns the memory size associated with the string.
250
- */
251
- size_t
252
- pm_string_memsize(const pm_string_t *string) {
253
- size_t size = sizeof(pm_string_t);
254
- if (string->type == PM_STRING_OWNED) {
255
- size += string->length;
256
- }
257
- return size;
258
- }
259
-
260
248
  /**
261
249
  * Ensure the string is owned. If it is not, then reinitialize it as owned and
262
250
  * copy over the previous source.
@@ -8,6 +8,27 @@ pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start
8
8
  pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
9
9
  }
10
10
 
11
+ /**
12
+ * Set the explicit encoding for the parser to the current encoding.
13
+ */
14
+ static inline void
15
+ pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) {
16
+ if (parser->explicit_encoding != NULL) {
17
+ if (parser->explicit_encoding == parser->encoding) {
18
+ // Okay, we already locked to this encoding.
19
+ } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
20
+ // Not okay, we already found a Unicode escape sequence and this
21
+ // conflicts.
22
+ pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name);
23
+ } else {
24
+ // Should not be anything else.
25
+ assert(false && "unreachable");
26
+ }
27
+ }
28
+
29
+ parser->explicit_encoding = parser->encoding;
30
+ }
31
+
11
32
  /**
12
33
  * This is the default path.
13
34
  */
@@ -52,7 +73,7 @@ pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *chars
52
73
  * This is the path when the encoding is ASCII-8BIT.
53
74
  */
54
75
  static inline const uint8_t *
55
- pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maximum) {
76
+ pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
56
77
  size_t index = 0;
57
78
 
58
79
  while (index < maximum) {
@@ -60,6 +81,7 @@ pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maxi
60
81
  return source + index;
61
82
  }
62
83
 
84
+ if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1);
63
85
  index++;
64
86
  }
65
87
 
@@ -72,6 +94,7 @@ pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maxi
72
94
  static inline const uint8_t *
73
95
  pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
74
96
  size_t index = 0;
97
+ const pm_encoding_t *encoding = parser->encoding;
75
98
 
76
99
  while (index < maximum) {
77
100
  if (strchr((const char *) charset, source[index]) != NULL) {
@@ -81,7 +104,8 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
81
104
  if (source[index] < 0x80) {
82
105
  index++;
83
106
  } else {
84
- size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
107
+ size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
108
+ if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width);
85
109
 
86
110
  if (width > 0) {
87
111
  index += width;
@@ -96,7 +120,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
96
120
 
97
121
  do {
98
122
  index++;
99
- } while (index < maximum && parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
123
+ } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
100
124
 
101
125
  pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
102
126
  }
@@ -113,6 +137,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
113
137
  static inline const uint8_t *
114
138
  pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
115
139
  size_t index = 0;
140
+ const pm_encoding_t *encoding = parser->encoding;
116
141
 
117
142
  while (index < maximum) {
118
143
  if (strchr((const char *) charset, source[index]) != NULL) {
@@ -122,7 +147,8 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
122
147
  if (source[index] < 0x80 || !validate) {
123
148
  index++;
124
149
  } else {
125
- size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
150
+ size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
151
+ pm_strpbrk_explicit_encoding_set(parser, source, width);
126
152
 
127
153
  if (width > 0) {
128
154
  index += width;
@@ -135,7 +161,7 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
135
161
 
136
162
  do {
137
163
  index++;
138
- } while (index < maximum && parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
164
+ } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
139
165
 
140
166
  pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
141
167
  }
@@ -171,7 +197,7 @@ pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, p
171
197
  } else if (!parser->encoding_changed) {
172
198
  return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
173
199
  } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
174
- return pm_strpbrk_ascii_8bit(source, charset, (size_t) length);
200
+ return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate);
175
201
  } else if (parser->encoding->multibyte) {
176
202
  return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
177
203
  } else {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prism
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.28.0
4
+ version: 0.30.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-05-03 00:00:00.000000000 Z
11
+ date: 2024-06-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email:
@@ -69,13 +69,11 @@ files:
69
69
  - include/prism/util/pm_memchr.h
70
70
  - include/prism/util/pm_newline_list.h
71
71
  - include/prism/util/pm_string.h
72
- - include/prism/util/pm_string_list.h
73
72
  - include/prism/util/pm_strncasecmp.h
74
73
  - include/prism/util/pm_strpbrk.h
75
74
  - include/prism/version.h
76
75
  - lib/prism.rb
77
76
  - lib/prism/compiler.rb
78
- - lib/prism/debug.rb
79
77
  - lib/prism/desugar_compiler.rb
80
78
  - lib/prism/dispatcher.rb
81
79
  - lib/prism/dot_visitor.rb
@@ -126,6 +124,7 @@ files:
126
124
  - sig/prism/dot_visitor.rbs
127
125
  - sig/prism/dsl.rbs
128
126
  - sig/prism/inspect_visitor.rbs
127
+ - sig/prism/lex_compat.rbs
129
128
  - sig/prism/mutation_compiler.rbs
130
129
  - sig/prism/node.rbs
131
130
  - sig/prism/node_ext.rbs
@@ -154,7 +153,6 @@ files:
154
153
  - src/util/pm_memchr.c
155
154
  - src/util/pm_newline_list.c
156
155
  - src/util/pm_string.c
157
- - src/util/pm_string_list.c
158
156
  - src/util/pm_strncasecmp.c
159
157
  - src/util/pm_strpbrk.c
160
158
  homepage: https://github.com/ruby/prism
@@ -1,44 +0,0 @@
1
- /**
2
- * @file pm_string_list.h
3
- *
4
- * A list of strings.
5
- */
6
- #ifndef PRISM_STRING_LIST_H
7
- #define PRISM_STRING_LIST_H
8
-
9
- #include "prism/defines.h"
10
- #include "prism/util/pm_string.h"
11
-
12
- #include <stddef.h>
13
- #include <stdlib.h>
14
-
15
- /**
16
- * A list of strings.
17
- */
18
- typedef struct {
19
- /** The length of the string list. */
20
- size_t length;
21
-
22
- /** The capacity of the string list that has been allocated. */
23
- size_t capacity;
24
-
25
- /** A pointer to the start of the string list. */
26
- pm_string_t *strings;
27
- } pm_string_list_t;
28
-
29
- /**
30
- * Append a pm_string_t to the given string list.
31
- *
32
- * @param string_list The string list to append to.
33
- * @param string The string to append.
34
- */
35
- void pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string);
36
-
37
- /**
38
- * Free the memory associated with the string list.
39
- *
40
- * @param string_list The string list to free.
41
- */
42
- PRISM_EXPORTED_FUNCTION void pm_string_list_free(pm_string_list_t *string_list);
43
-
44
- #endif