prism 0.29.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +115 -1
  3. data/CONTRIBUTING.md +0 -4
  4. data/Makefile +1 -1
  5. data/README.md +4 -0
  6. data/config.yml +920 -148
  7. data/docs/build_system.md +8 -11
  8. data/docs/fuzzing.md +1 -1
  9. data/docs/parsing_rules.md +4 -1
  10. data/docs/relocation.md +34 -0
  11. data/docs/ripper_translation.md +22 -0
  12. data/docs/serialization.md +3 -0
  13. data/ext/prism/api_node.c +2863 -2079
  14. data/ext/prism/extconf.rb +14 -37
  15. data/ext/prism/extension.c +241 -391
  16. data/ext/prism/extension.h +2 -2
  17. data/include/prism/ast.h +2156 -453
  18. data/include/prism/defines.h +58 -7
  19. data/include/prism/diagnostic.h +24 -6
  20. data/include/prism/node.h +0 -21
  21. data/include/prism/options.h +94 -3
  22. data/include/prism/parser.h +82 -40
  23. data/include/prism/regexp.h +18 -8
  24. data/include/prism/static_literals.h +3 -2
  25. data/include/prism/util/pm_char.h +1 -2
  26. data/include/prism/util/pm_constant_pool.h +0 -8
  27. data/include/prism/util/pm_integer.h +22 -15
  28. data/include/prism/util/pm_newline_list.h +11 -0
  29. data/include/prism/util/pm_string.h +28 -12
  30. data/include/prism/version.h +3 -3
  31. data/include/prism.h +47 -11
  32. data/lib/prism/compiler.rb +3 -0
  33. data/lib/prism/desugar_compiler.rb +111 -74
  34. data/lib/prism/dispatcher.rb +16 -1
  35. data/lib/prism/dot_visitor.rb +55 -34
  36. data/lib/prism/dsl.rb +660 -468
  37. data/lib/prism/ffi.rb +113 -8
  38. data/lib/prism/inspect_visitor.rb +296 -64
  39. data/lib/prism/lex_compat.rb +1 -1
  40. data/lib/prism/mutation_compiler.rb +11 -6
  41. data/lib/prism/node.rb +4262 -5023
  42. data/lib/prism/node_ext.rb +91 -14
  43. data/lib/prism/parse_result/comments.rb +0 -7
  44. data/lib/prism/parse_result/errors.rb +65 -0
  45. data/lib/prism/parse_result/newlines.rb +101 -11
  46. data/lib/prism/parse_result.rb +183 -6
  47. data/lib/prism/reflection.rb +12 -10
  48. data/lib/prism/relocation.rb +504 -0
  49. data/lib/prism/serialize.rb +496 -609
  50. data/lib/prism/string_query.rb +30 -0
  51. data/lib/prism/translation/parser/compiler.rb +185 -155
  52. data/lib/prism/translation/parser/lexer.rb +26 -4
  53. data/lib/prism/translation/parser.rb +9 -4
  54. data/lib/prism/translation/ripper.rb +23 -25
  55. data/lib/prism/translation/ruby_parser.rb +86 -17
  56. data/lib/prism/visitor.rb +3 -0
  57. data/lib/prism.rb +6 -8
  58. data/prism.gemspec +9 -5
  59. data/rbi/prism/dsl.rbi +521 -0
  60. data/rbi/prism/node.rbi +1115 -1120
  61. data/rbi/prism/parse_result.rbi +29 -0
  62. data/rbi/prism/string_query.rbi +12 -0
  63. data/rbi/prism/visitor.rbi +3 -0
  64. data/rbi/prism.rbi +36 -30
  65. data/sig/prism/dsl.rbs +190 -303
  66. data/sig/prism/mutation_compiler.rbs +1 -0
  67. data/sig/prism/node.rbs +678 -632
  68. data/sig/prism/parse_result.rbs +22 -0
  69. data/sig/prism/relocation.rbs +185 -0
  70. data/sig/prism/string_query.rbs +11 -0
  71. data/sig/prism/visitor.rbs +1 -0
  72. data/sig/prism.rbs +103 -64
  73. data/src/diagnostic.c +64 -28
  74. data/src/node.c +502 -1739
  75. data/src/options.c +76 -27
  76. data/src/prettyprint.c +188 -112
  77. data/src/prism.c +3376 -2293
  78. data/src/regexp.c +208 -71
  79. data/src/serialize.c +182 -50
  80. data/src/static_literals.c +64 -85
  81. data/src/token_type.c +4 -4
  82. data/src/util/pm_char.c +1 -1
  83. data/src/util/pm_constant_pool.c +0 -8
  84. data/src/util/pm_integer.c +53 -25
  85. data/src/util/pm_newline_list.c +29 -0
  86. data/src/util/pm_string.c +131 -80
  87. data/src/util/pm_strpbrk.c +32 -6
  88. metadata +11 -7
  89. data/include/prism/util/pm_string_list.h +0 -44
  90. data/lib/prism/debug.rb +0 -249
  91. data/lib/prism/translation/parser/rubocop.rb +0 -73
  92. data/src/util/pm_string_list.c +0 -28
@@ -58,6 +58,25 @@ murmur_hash(const uint8_t *key, size_t length) {
58
58
  return hash;
59
59
  }
60
60
 
61
+ /**
62
+ * Hash the value of an integer and return it.
63
+ */
64
+ static uint32_t
65
+ integer_hash(const pm_integer_t *integer) {
66
+ uint32_t hash;
67
+ if (integer->values) {
68
+ hash = murmur_hash((const uint8_t *) integer->values, sizeof(uint32_t) * integer->length);
69
+ } else {
70
+ hash = murmur_hash((const uint8_t *) &integer->value, sizeof(uint32_t));
71
+ }
72
+
73
+ if (integer->negative) {
74
+ hash ^= murmur_scramble((uint32_t) 1);
75
+ }
76
+
77
+ return hash;
78
+ }
79
+
61
80
  /**
62
81
  * Return the hash of the given node. It is important that nodes that have
63
82
  * equivalent static literal values have the same hash. This is because we use
@@ -68,19 +87,8 @@ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node)
68
87
  switch (PM_NODE_TYPE(node)) {
69
88
  case PM_INTEGER_NODE: {
70
89
  // Integers hash their value.
71
- const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
72
- uint32_t hash;
73
- if (integer->values) {
74
- hash = murmur_hash((const uint8_t *) integer->values, sizeof(uint32_t) * integer->length);
75
- } else {
76
- hash = murmur_hash((const uint8_t *) &integer->value, sizeof(uint32_t));
77
- }
78
-
79
- if (integer->negative) {
80
- hash ^= murmur_scramble((uint32_t) 1);
81
- }
82
-
83
- return hash;
90
+ const pm_integer_node_t *cast = (const pm_integer_node_t *) node;
91
+ return integer_hash(&cast->value);
84
92
  }
85
93
  case PM_SOURCE_LINE_NODE: {
86
94
  // Source lines hash their line number.
@@ -94,11 +102,9 @@ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node)
94
102
  return murmur_hash((const uint8_t *) value, sizeof(double));
95
103
  }
96
104
  case PM_RATIONAL_NODE: {
97
- // Rationals hash their numeric value. Because their numeric value
98
- // is stored as a subnode, we hash that node and then mix in the
99
- // fact that this is a rational node.
100
- const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;
101
- return node_hash(metadata, numeric) ^ murmur_scramble((uint32_t) node->type);
105
+ // Rationals hash their numerator and denominator.
106
+ const pm_rational_node_t *cast = (const pm_rational_node_t *) node;
107
+ return integer_hash(&cast->numerator) ^ integer_hash(&cast->denominator) ^ murmur_scramble((uint32_t) cast->base.type);
102
108
  }
103
109
  case PM_IMAGINARY_NODE: {
104
110
  // Imaginaries hash their numeric value. Because their numeric value
@@ -148,7 +154,7 @@ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node)
148
154
  * and must be able to compare all node types that will be stored in this hash.
149
155
  */
150
156
  static pm_node_t *
151
- pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *metadata, pm_node_t *node, int (*compare)(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right)) {
157
+ pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *metadata, pm_node_t *node, bool replace, int (*compare)(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right)) {
152
158
  // If we are out of space, we need to resize the hash. This will cause all
153
159
  // of the nodes to be rehashed and reinserted into the new hash.
154
160
  if (hash->size * 2 >= hash->capacity) {
@@ -196,9 +202,14 @@ pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *m
196
202
  // already in the hash. Otherwise, we can just increment the size and insert
197
203
  // the new node.
198
204
  pm_node_t *result = hash->nodes[index];
199
- if (result == NULL) hash->size++;
200
205
 
201
- hash->nodes[index] = node;
206
+ if (result == NULL) {
207
+ hash->size++;
208
+ hash->nodes[index] = node;
209
+ } else if (replace) {
210
+ hash->nodes[index] = node;
211
+ }
212
+
202
213
  return result;
203
214
  }
204
215
 
@@ -275,8 +286,15 @@ pm_compare_number_nodes(const pm_static_literals_metadata_t *metadata, const pm_
275
286
  switch (PM_NODE_TYPE(left)) {
276
287
  case PM_IMAGINARY_NODE:
277
288
  return pm_compare_number_nodes(metadata, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric);
278
- case PM_RATIONAL_NODE:
279
- return pm_compare_number_nodes(metadata, ((const pm_rational_node_t *) left)->numeric, ((const pm_rational_node_t *) right)->numeric);
289
+ case PM_RATIONAL_NODE: {
290
+ const pm_rational_node_t *left_rational = (const pm_rational_node_t *) left;
291
+ const pm_rational_node_t *right_rational = (const pm_rational_node_t *) right;
292
+
293
+ int result = pm_integer_compare(&left_rational->denominator, &right_rational->denominator);
294
+ if (result != 0) return result;
295
+
296
+ return pm_integer_compare(&left_rational->numerator, &right_rational->numerator);
297
+ }
280
298
  case PM_INTEGER_NODE:
281
299
  return pm_compare_integer_nodes(metadata, left, right);
282
300
  case PM_FLOAT_NODE:
@@ -335,7 +353,7 @@ pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_liter
335
353
  * Add a node to the set of static literals.
336
354
  */
337
355
  pm_node_t *
338
- pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node) {
356
+ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) {
339
357
  switch (PM_NODE_TYPE(node)) {
340
358
  case PM_INTEGER_NODE:
341
359
  case PM_SOURCE_LINE_NODE:
@@ -347,6 +365,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
347
365
  .encoding_name = NULL
348
366
  },
349
367
  node,
368
+ replace,
350
369
  pm_compare_integer_nodes
351
370
  );
352
371
  case PM_FLOAT_NODE:
@@ -358,6 +377,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
358
377
  .encoding_name = NULL
359
378
  },
360
379
  node,
380
+ replace,
361
381
  pm_compare_float_nodes
362
382
  );
363
383
  case PM_RATIONAL_NODE:
@@ -370,6 +390,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
370
390
  .encoding_name = NULL
371
391
  },
372
392
  node,
393
+ replace,
373
394
  pm_compare_number_nodes
374
395
  );
375
396
  case PM_STRING_NODE:
@@ -382,6 +403,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
382
403
  .encoding_name = NULL
383
404
  },
384
405
  node,
406
+ replace,
385
407
  pm_compare_string_nodes
386
408
  );
387
409
  case PM_REGULAR_EXPRESSION_NODE:
@@ -393,6 +415,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
393
415
  .encoding_name = NULL
394
416
  },
395
417
  node,
418
+ replace,
396
419
  pm_compare_regular_expression_nodes
397
420
  );
398
421
  case PM_SYMBOL_NODE:
@@ -404,26 +427,27 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
404
427
  .encoding_name = NULL
405
428
  },
406
429
  node,
430
+ replace,
407
431
  pm_compare_string_nodes
408
432
  );
409
433
  case PM_TRUE_NODE: {
410
434
  pm_node_t *duplicated = literals->true_node;
411
- literals->true_node = node;
435
+ if ((duplicated == NULL) || replace) literals->true_node = node;
412
436
  return duplicated;
413
437
  }
414
438
  case PM_FALSE_NODE: {
415
439
  pm_node_t *duplicated = literals->false_node;
416
- literals->false_node = node;
440
+ if ((duplicated == NULL) || replace) literals->false_node = node;
417
441
  return duplicated;
418
442
  }
419
443
  case PM_NIL_NODE: {
420
444
  pm_node_t *duplicated = literals->nil_node;
421
- literals->nil_node = node;
445
+ if ((duplicated == NULL) || replace) literals->nil_node = node;
422
446
  return duplicated;
423
447
  }
424
448
  case PM_SOURCE_ENCODING_NODE: {
425
449
  pm_node_t *duplicated = literals->source_encoding_node;
426
- literals->source_encoding_node = node;
450
+ if ((duplicated == NULL) || replace) literals->source_encoding_node = node;
427
451
  return duplicated;
428
452
  }
429
453
  default:
@@ -456,7 +480,7 @@ pm_static_literal_positive_p(const pm_node_t *node) {
456
480
  case PM_INTEGER_NODE:
457
481
  return !((const pm_integer_node_t *) node)->value.negative;
458
482
  case PM_RATIONAL_NODE:
459
- return pm_static_literal_positive_p(((const pm_rational_node_t *) node)->numeric);
483
+ return !((const pm_rational_node_t *) node)->numerator.negative;
460
484
  case PM_IMAGINARY_NODE:
461
485
  return pm_static_literal_positive_p(((const pm_imaginary_node_t *) node)->numeric);
462
486
  default:
@@ -465,43 +489,6 @@ pm_static_literal_positive_p(const pm_node_t *node) {
465
489
  }
466
490
  }
467
491
 
468
- /**
469
- * Inspect a rational node that wraps a float node. This is going to be a
470
- * poor-man's version of the Ruby `Rational#to_s` method, because we're not
471
- * going to try to reduce the rational by finding the GCD. We'll leave that for
472
- * a future improvement.
473
- */
474
- static void
475
- pm_rational_inspect(pm_buffer_t *buffer, pm_rational_node_t *node) {
476
- const uint8_t *start = node->base.location.start;
477
- const uint8_t *end = node->base.location.end - 1; // r
478
-
479
- while (start < end && *start == '0') start++; // 0.1 -> .1
480
- while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
481
- size_t length = (size_t) (end - start);
482
-
483
- const uint8_t *point = memchr(start, '.', length);
484
- assert(point && "should have a decimal point");
485
-
486
- uint8_t *digits = malloc(length - 1);
487
- if (digits == NULL) return;
488
-
489
- memcpy(digits, start, (unsigned long) (point - start));
490
- memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
491
-
492
- pm_integer_t numerator = { 0 };
493
- pm_integer_parse(&numerator, PM_INTEGER_BASE_DECIMAL, digits, digits + length - 1);
494
-
495
- pm_buffer_append_byte(buffer, '(');
496
- pm_integer_string(buffer, &numerator);
497
- pm_buffer_append_string(buffer, "/1", 2);
498
- for (size_t index = 0; index < (size_t) (end - point - 1); index++) pm_buffer_append_byte(buffer, '0');
499
- pm_buffer_append_byte(buffer, ')');
500
-
501
- pm_integer_free(&numerator);
502
- free(digits);
503
- }
504
-
505
492
  /**
506
493
  * Create a string-based representation of the given static literal.
507
494
  */
@@ -514,7 +501,7 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
514
501
  case PM_FLOAT_NODE: {
515
502
  const double value = ((const pm_float_node_t *) node)->value;
516
503
 
517
- if (isinf(value)) {
504
+ if (PRISM_ISINF(value)) {
518
505
  if (*node->location.start == '-') {
519
506
  pm_buffer_append_byte(buffer, '-');
520
507
  }
@@ -544,7 +531,9 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
544
531
  pm_buffer_append_string(buffer, "(0", 2);
545
532
  if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+');
546
533
  pm_static_literal_inspect_node(buffer, metadata, numeric);
547
- if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) pm_buffer_append_byte(buffer, '*');
534
+ if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) {
535
+ pm_buffer_append_byte(buffer, '*');
536
+ }
548
537
  pm_buffer_append_string(buffer, "i)", 2);
549
538
  break;
550
539
  }
@@ -555,22 +544,12 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
555
544
  pm_buffer_append_string(buffer, "nil", 3);
556
545
  break;
557
546
  case PM_RATIONAL_NODE: {
558
- const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;
559
-
560
- switch (PM_NODE_TYPE(numeric)) {
561
- case PM_INTEGER_NODE:
562
- pm_buffer_append_byte(buffer, '(');
563
- pm_static_literal_inspect_node(buffer, metadata, numeric);
564
- pm_buffer_append_string(buffer, "/1)", 3);
565
- break;
566
- case PM_FLOAT_NODE:
567
- pm_rational_inspect(buffer, (pm_rational_node_t *) node);
568
- break;
569
- default:
570
- assert(false && "unreachable");
571
- break;
572
- }
573
-
547
+ const pm_rational_node_t *rational = (const pm_rational_node_t *) node;
548
+ pm_buffer_append_byte(buffer, '(');
549
+ pm_integer_string(buffer, &rational->numerator);
550
+ pm_buffer_append_byte(buffer, '/');
551
+ pm_integer_string(buffer, &rational->denominator);
552
+ pm_buffer_append_byte(buffer, ')');
574
553
  break;
575
554
  }
576
555
  case PM_REGULAR_EXPRESSION_NODE: {
@@ -624,7 +603,7 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
624
603
  /**
625
604
  * Create a string-based representation of the given static literal.
626
605
  */
627
- PRISM_EXPORTED_FUNCTION void
606
+ void
628
607
  pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node) {
629
608
  pm_static_literal_inspect_node(
630
609
  buffer,
data/src/token_type.c CHANGED
@@ -1,10 +1,10 @@
1
- /******************************************************************************/
1
+ /*----------------------------------------------------------------------------*/
2
2
  /* This file is generated by the templates/template.rb script and should not */
3
3
  /* be modified manually. See */
4
4
  /* templates/src/token_type.c.erb */
5
5
  /* if you are looking to modify the */
6
6
  /* template */
7
- /******************************************************************************/
7
+ /*----------------------------------------------------------------------------*/
8
8
 
9
9
  #include <string.h>
10
10
 
@@ -362,7 +362,7 @@ const char *
362
362
  pm_token_type_human(pm_token_type_t token_type) {
363
363
  switch (token_type) {
364
364
  case PM_TOKEN_EOF:
365
- return "end of file";
365
+ return "end-of-input";
366
366
  case PM_TOKEN_MISSING:
367
367
  return "missing token";
368
368
  case PM_TOKEN_NOT_PROVIDED:
@@ -684,7 +684,7 @@ pm_token_type_human(pm_token_type_t token_type) {
684
684
  case PM_TOKEN_USTAR:
685
685
  return "*";
686
686
  case PM_TOKEN_USTAR_STAR:
687
- return "'**'";
687
+ return "**";
688
688
  case PM_TOKEN_WORDS_SEP:
689
689
  return "string separator";
690
690
  case PM_TOKEN___END__:
data/src/util/pm_char.c CHANGED
@@ -185,7 +185,7 @@ pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const
185
185
  size++;
186
186
  }
187
187
 
188
- if (string[size - 1] == '_') *invalid = string + size - 1;
188
+ if (size > 0 && string[size - 1] == '_') *invalid = string + size - 1;
189
189
  return size;
190
190
  }
191
191
 
@@ -61,14 +61,6 @@ pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
61
61
  return false;
62
62
  }
63
63
 
64
- /**
65
- * Get the memory size of a list of constant ids.
66
- */
67
- size_t
68
- pm_constant_id_list_memsize(pm_constant_id_list_t *list) {
69
- return sizeof(pm_constant_id_list_t) + (list->capacity * sizeof(pm_constant_id_t));
70
- }
71
-
72
64
  /**
73
65
  * Free the memory associated with a list of constant ids.
74
66
  */
@@ -43,12 +43,12 @@ big_add(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint
43
43
  length++;
44
44
  }
45
45
 
46
- *destination = (pm_integer_t) { 0, length, values, false };
46
+ *destination = (pm_integer_t) { length, values, 0, false };
47
47
  }
48
48
 
49
49
  /**
50
50
  * Internal use for karatsuba_multiply. Calculates `a - b - c` with the given
51
- * base. Assume a, b, c, a - b - c all to be poitive.
51
+ * base. Assume a, b, c, a - b - c all to be positive.
52
52
  * Return pm_integer_t with values allocated. Not normalized.
53
53
  */
54
54
  static void
@@ -87,7 +87,7 @@ big_sub2(pm_integer_t *destination, pm_integer_t *a, pm_integer_t *b, pm_integer
87
87
  }
88
88
 
89
89
  while (a_length > 1 && values[a_length - 1] == 0) a_length--;
90
- *destination = (pm_integer_t) { 0, a_length, values, false };
90
+ *destination = (pm_integer_t) { a_length, values, 0, false };
91
91
  }
92
92
 
93
93
  /**
@@ -130,7 +130,7 @@ karatsuba_multiply(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *
130
130
  }
131
131
 
132
132
  while (length > 1 && values[length - 1] == 0) length--;
133
- *destination = (pm_integer_t) { 0, length, values, false };
133
+ *destination = (pm_integer_t) { length, values, 0, false };
134
134
  return;
135
135
  }
136
136
 
@@ -142,16 +142,16 @@ karatsuba_multiply(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *
142
142
  if (end_offset > right_length) end_offset = right_length;
143
143
 
144
144
  pm_integer_t sliced_left = {
145
- .value = 0,
146
145
  .length = left_length,
147
146
  .values = left_values,
147
+ .value = 0,
148
148
  .negative = false
149
149
  };
150
150
 
151
151
  pm_integer_t sliced_right = {
152
- .value = 0,
153
152
  .length = end_offset - start_offset,
154
153
  .values = right_values + start_offset,
154
+ .value = 0,
155
155
  .negative = false
156
156
  };
157
157
 
@@ -169,15 +169,15 @@ karatsuba_multiply(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *
169
169
  pm_integer_free(&product);
170
170
  }
171
171
 
172
- *destination = (pm_integer_t) { 0, left_length + right_length, values, false };
172
+ *destination = (pm_integer_t) { left_length + right_length, values, 0, false };
173
173
  return;
174
174
  }
175
175
 
176
176
  size_t half = left_length / 2;
177
- pm_integer_t x0 = { 0, half, left_values, false };
178
- pm_integer_t x1 = { 0, left_length - half, left_values + half, false };
179
- pm_integer_t y0 = { 0, half, right_values, false };
180
- pm_integer_t y1 = { 0, right_length - half, right_values + half, false };
177
+ pm_integer_t x0 = { half, left_values, 0, false };
178
+ pm_integer_t x1 = { left_length - half, left_values + half, 0, false };
179
+ pm_integer_t y0 = { half, right_values, 0, false };
180
+ pm_integer_t y1 = { right_length - half, right_values + half, 0, false };
181
181
 
182
182
  pm_integer_t z0 = { 0 };
183
183
  karatsuba_multiply(&z0, &x0, &y0, base);
@@ -229,7 +229,7 @@ karatsuba_multiply(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *
229
229
  pm_integer_free(&y01);
230
230
  pm_integer_free(&xy);
231
231
 
232
- *destination = (pm_integer_t) { 0, length, values, false };
232
+ *destination = (pm_integer_t) { length, values, 0, false };
233
233
  }
234
234
 
235
235
  /**
@@ -323,7 +323,7 @@ pm_integer_normalize(pm_integer_t *integer) {
323
323
  bool negative = integer->negative && value != 0;
324
324
 
325
325
  pm_integer_free(integer);
326
- *integer = (pm_integer_t) { .value = value, .length = 0, .values = NULL, .negative = negative };
326
+ *integer = (pm_integer_t) { .values = NULL, .value = value, .length = 0, .negative = negative };
327
327
  }
328
328
 
329
329
  /**
@@ -412,7 +412,7 @@ pm_integer_parse_powof2(pm_integer_t *integer, uint32_t base, const uint8_t *dig
412
412
  }
413
413
 
414
414
  while (length > 1 && values[length - 1] == 0) length--;
415
- *integer = (pm_integer_t) { .value = 0, .length = length, .values = values, .negative = false };
415
+ *integer = (pm_integer_t) { .length = length, .values = values, .value = 0, .negative = false };
416
416
  pm_integer_normalize(integer);
417
417
  }
418
418
 
@@ -438,7 +438,7 @@ pm_integer_parse_decimal(pm_integer_t *integer, const uint8_t *digits, size_t di
438
438
  }
439
439
 
440
440
  // Convert base from 10**9 to 1<<32.
441
- pm_integer_convert_base(integer, &((pm_integer_t) { .value = 0, .length = length, .values = values, .negative = false }), 1000000000, ((uint64_t) 1 << 32));
441
+ pm_integer_convert_base(integer, &((pm_integer_t) { .length = length, .values = values, .value = 0, .negative = false }), 1000000000, ((uint64_t) 1 << 32));
442
442
  xfree(values);
443
443
  }
444
444
 
@@ -471,15 +471,18 @@ pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *
471
471
  * has already been validated, as internal validation checks are not performed
472
472
  * here.
473
473
  */
474
- PRISM_EXPORTED_FUNCTION void
474
+ void
475
475
  pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end) {
476
- // Ignore unary +. Unary + is parsed differently and will not end up here.
476
+ // Ignore unary +. Unary - is parsed differently and will not end up here.
477
477
  // Instead, it will modify the parsed integer later.
478
478
  if (*start == '+') start++;
479
479
 
480
480
  // Determine the multiplier from the base, and skip past any prefixes.
481
481
  uint32_t multiplier = 10;
482
482
  switch (base) {
483
+ case PM_INTEGER_BASE_DEFAULT:
484
+ while (*start == '0') start++; // 01 -> 1
485
+ break;
483
486
  case PM_INTEGER_BASE_BINARY:
484
487
  start += 2; // 0b
485
488
  multiplier = 2;
@@ -533,14 +536,6 @@ pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *s
533
536
  integer->value = (uint32_t) value;
534
537
  }
535
538
 
536
- /**
537
- * Return the memory size of the integer.
538
- */
539
- size_t
540
- pm_integer_memsize(const pm_integer_t *integer) {
541
- return sizeof(pm_integer_t) + integer->length * sizeof(uint32_t);
542
- }
543
-
544
539
  /**
545
540
  * Compare two integers. This function returns -1 if the left integer is less
546
541
  * than the right integer, 0 if they are equal, and 1 if the left integer is
@@ -572,6 +567,39 @@ pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right) {
572
567
  return 0;
573
568
  }
574
569
 
570
+ /**
571
+ * Reduce a ratio of integers to its simplest form.
572
+ */
573
+ void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator) {
574
+ // If either the numerator or denominator do not fit into a 32-bit integer,
575
+ // then this function is a no-op. In the future, we may consider reducing
576
+ // even the larger numbers, but for now we're going to keep it simple.
577
+ if (
578
+ // If the numerator doesn't fit into a 32-bit integer, return early.
579
+ numerator->length != 0 ||
580
+ // If the denominator doesn't fit into a 32-bit integer, return early.
581
+ denominator->length != 0 ||
582
+ // If the numerator is 0, then return early.
583
+ numerator->value == 0 ||
584
+ // If the denominator is 1, then return early.
585
+ denominator->value == 1
586
+ ) return;
587
+
588
+ // Find the greatest common divisor of the numerator and denominator.
589
+ uint32_t divisor = numerator->value;
590
+ uint32_t remainder = denominator->value;
591
+
592
+ while (remainder != 0) {
593
+ uint32_t temporary = remainder;
594
+ remainder = divisor % remainder;
595
+ divisor = temporary;
596
+ }
597
+
598
+ // Divide the numerator and denominator by the greatest common divisor.
599
+ numerator->value /= divisor;
600
+ denominator->value /= divisor;
601
+ }
602
+
575
603
  /**
576
604
  * Convert an integer to a decimal string.
577
605
  */
@@ -54,6 +54,35 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
54
54
  return true;
55
55
  }
56
56
 
57
+ /**
58
+ * Returns the line of the given offset. If the offset is not in the list, the
59
+ * line of the closest offset less than the given offset is returned.
60
+ */
61
+ int32_t
62
+ pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
63
+ assert(cursor >= list->start);
64
+ size_t offset = (size_t) (cursor - list->start);
65
+
66
+ size_t left = 0;
67
+ size_t right = list->size - 1;
68
+
69
+ while (left <= right) {
70
+ size_t mid = left + (right - left) / 2;
71
+
72
+ if (list->offsets[mid] == offset) {
73
+ return ((int32_t) mid) + start_line;
74
+ }
75
+
76
+ if (list->offsets[mid] < offset) {
77
+ left = mid + 1;
78
+ } else {
79
+ right = mid - 1;
80
+ }
81
+ }
82
+
83
+ return ((int32_t) left) + start_line - 1;
84
+ }
85
+
57
86
  /**
58
87
  * Returns the line and column of the given offset. If the offset is not in the
59
88
  * list, the line and column of the closest offset less than the given offset