prism 0.24.0 → 0.25.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +50 -1
  4. data/Makefile +5 -2
  5. data/README.md +45 -6
  6. data/config.yml +499 -4
  7. data/docs/build_system.md +31 -0
  8. data/docs/configuration.md +2 -0
  9. data/docs/cruby_compilation.md +1 -1
  10. data/docs/parser_translation.md +14 -9
  11. data/docs/releasing.md +2 -2
  12. data/docs/ripper_translation.md +50 -0
  13. data/docs/ruby_api.md +1 -0
  14. data/docs/serialization.md +26 -5
  15. data/ext/prism/api_node.c +911 -815
  16. data/ext/prism/api_pack.c +9 -0
  17. data/ext/prism/extconf.rb +27 -11
  18. data/ext/prism/extension.c +313 -66
  19. data/ext/prism/extension.h +5 -4
  20. data/include/prism/ast.h +213 -64
  21. data/include/prism/defines.h +106 -2
  22. data/include/prism/diagnostic.h +134 -71
  23. data/include/prism/encoding.h +22 -4
  24. data/include/prism/node.h +93 -0
  25. data/include/prism/options.h +82 -7
  26. data/include/prism/pack.h +11 -0
  27. data/include/prism/parser.h +198 -53
  28. data/include/prism/prettyprint.h +8 -0
  29. data/include/prism/static_literals.h +118 -0
  30. data/include/prism/util/pm_buffer.h +65 -2
  31. data/include/prism/util/pm_constant_pool.h +18 -1
  32. data/include/prism/util/pm_integer.h +119 -0
  33. data/include/prism/util/pm_list.h +1 -1
  34. data/include/prism/util/pm_newline_list.h +8 -0
  35. data/include/prism/util/pm_string.h +26 -2
  36. data/include/prism/version.h +2 -2
  37. data/include/prism.h +59 -1
  38. data/lib/prism/compiler.rb +8 -1
  39. data/lib/prism/debug.rb +46 -3
  40. data/lib/prism/desugar_compiler.rb +1 -1
  41. data/lib/prism/dispatcher.rb +29 -0
  42. data/lib/prism/dot_visitor.rb +87 -16
  43. data/lib/prism/dsl.rb +24 -12
  44. data/lib/prism/ffi.rb +67 -12
  45. data/lib/prism/lex_compat.rb +17 -15
  46. data/lib/prism/mutation_compiler.rb +11 -0
  47. data/lib/prism/node.rb +2096 -2499
  48. data/lib/prism/node_ext.rb +77 -29
  49. data/lib/prism/pack.rb +4 -0
  50. data/lib/prism/parse_result/comments.rb +34 -17
  51. data/lib/prism/parse_result/newlines.rb +3 -1
  52. data/lib/prism/parse_result.rb +78 -32
  53. data/lib/prism/pattern.rb +16 -4
  54. data/lib/prism/polyfill/string.rb +12 -0
  55. data/lib/prism/serialize.rb +439 -102
  56. data/lib/prism/translation/parser/compiler.rb +152 -50
  57. data/lib/prism/translation/parser/lexer.rb +103 -22
  58. data/lib/prism/translation/parser/rubocop.rb +41 -13
  59. data/lib/prism/translation/parser.rb +119 -7
  60. data/lib/prism/translation/parser33.rb +1 -1
  61. data/lib/prism/translation/parser34.rb +1 -1
  62. data/lib/prism/translation/ripper/sexp.rb +125 -0
  63. data/lib/prism/translation/ripper/shim.rb +5 -0
  64. data/lib/prism/translation/ripper.rb +3212 -462
  65. data/lib/prism/translation/ruby_parser.rb +35 -18
  66. data/lib/prism/translation.rb +3 -1
  67. data/lib/prism/visitor.rb +10 -0
  68. data/lib/prism.rb +8 -2
  69. data/prism.gemspec +33 -4
  70. data/rbi/prism/compiler.rbi +14 -0
  71. data/rbi/prism/desugar_compiler.rbi +5 -0
  72. data/rbi/prism/mutation_compiler.rbi +5 -0
  73. data/rbi/prism/node.rbi +8221 -0
  74. data/rbi/prism/node_ext.rbi +102 -0
  75. data/rbi/prism/parse_result.rbi +304 -0
  76. data/rbi/prism/translation/parser/compiler.rbi +13 -0
  77. data/rbi/prism/translation/ripper/ripper_compiler.rbi +5 -0
  78. data/rbi/prism/translation/ripper.rbi +25 -0
  79. data/rbi/prism/translation/ruby_parser.rbi +11 -0
  80. data/rbi/prism/visitor.rbi +470 -0
  81. data/rbi/prism.rbi +39 -7749
  82. data/sig/prism/compiler.rbs +9 -0
  83. data/sig/prism/dispatcher.rbs +16 -0
  84. data/sig/prism/dot_visitor.rbs +6 -0
  85. data/sig/prism/dsl.rbs +462 -0
  86. data/sig/prism/mutation_compiler.rbs +158 -0
  87. data/sig/prism/node.rbs +3529 -0
  88. data/sig/prism/node_ext.rbs +78 -0
  89. data/sig/prism/pack.rbs +43 -0
  90. data/sig/prism/parse_result.rbs +127 -0
  91. data/sig/prism/pattern.rbs +13 -0
  92. data/sig/prism/serialize.rbs +7 -0
  93. data/sig/prism/visitor.rbs +168 -0
  94. data/sig/prism.rbs +188 -4767
  95. data/src/diagnostic.c +575 -230
  96. data/src/encoding.c +211 -108
  97. data/src/node.c +7526 -447
  98. data/src/options.c +36 -12
  99. data/src/pack.c +33 -17
  100. data/src/prettyprint.c +1294 -1385
  101. data/src/prism.c +3628 -1099
  102. data/src/regexp.c +17 -2
  103. data/src/serialize.c +47 -28
  104. data/src/static_literals.c +552 -0
  105. data/src/token_type.c +1 -0
  106. data/src/util/pm_buffer.c +147 -20
  107. data/src/util/pm_char.c +4 -4
  108. data/src/util/pm_constant_pool.c +35 -11
  109. data/src/util/pm_integer.c +629 -0
  110. data/src/util/pm_list.c +1 -1
  111. data/src/util/pm_newline_list.c +14 -5
  112. data/src/util/pm_string.c +134 -5
  113. data/src/util/pm_string_list.c +2 -2
  114. metadata +35 -6
  115. data/docs/ripper.md +0 -36
  116. data/rbi/prism_static.rbi +0 -207
  117. data/sig/prism_static.rbs +0 -201
@@ -0,0 +1,629 @@
1
+ #include "prism/util/pm_integer.h"
2
+
3
+ /**
4
+ * Pull out the length and values from the integer, regardless of the form in
5
+ * which the length/values are stored.
6
+ */
7
+ #define INTEGER_EXTRACT(integer, length_variable, values_variable) \
8
+ if ((integer)->values == NULL) { \
9
+ length_variable = 1; \
10
+ values_variable = &(integer)->value; \
11
+ } else { \
12
+ length_variable = (integer)->length; \
13
+ values_variable = (integer)->values; \
14
+ }
15
+
16
+ /**
17
+ * Adds two positive pm_integer_t with the given base.
18
+ * Return pm_integer_t with values allocated. Not normalized.
19
+ */
20
+ static void
21
+ big_add(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint64_t base) {
22
+ size_t left_length;
23
+ uint32_t *left_values;
24
+ INTEGER_EXTRACT(left, left_length, left_values)
25
+
26
+ size_t right_length;
27
+ uint32_t *right_values;
28
+ INTEGER_EXTRACT(right, right_length, right_values)
29
+
30
+ size_t length = left_length < right_length ? right_length : left_length;
31
+ uint32_t *values = (uint32_t *) xmalloc(sizeof(uint32_t) * (length + 1));
32
+ if (values == NULL) return;
33
+
34
+ uint64_t carry = 0;
35
+ for (size_t index = 0; index < length; index++) {
36
+ uint64_t sum = carry + (index < left_length ? left_values[index] : 0) + (index < right_length ? right_values[index] : 0);
37
+ values[index] = (uint32_t) (sum % base);
38
+ carry = sum / base;
39
+ }
40
+
41
+ if (carry > 0) {
42
+ values[length] = (uint32_t) carry;
43
+ length++;
44
+ }
45
+
46
+ *destination = (pm_integer_t) { 0, length, values, false };
47
+ }
48
+
49
+ /**
50
+ * Internal use for karatsuba_multiply. Calculates `a - b - c` with the given
51
+ * base. Assume a, b, c, a - b - c all to be poitive.
52
+ * Return pm_integer_t with values allocated. Not normalized.
53
+ */
54
+ static void
55
+ big_sub2(pm_integer_t *destination, pm_integer_t *a, pm_integer_t *b, pm_integer_t *c, uint64_t base) {
56
+ size_t a_length;
57
+ uint32_t *a_values;
58
+ INTEGER_EXTRACT(a, a_length, a_values)
59
+
60
+ size_t b_length;
61
+ uint32_t *b_values;
62
+ INTEGER_EXTRACT(b, b_length, b_values)
63
+
64
+ size_t c_length;
65
+ uint32_t *c_values;
66
+ INTEGER_EXTRACT(c, c_length, c_values)
67
+
68
+ uint32_t *values = (uint32_t*) xmalloc(sizeof(uint32_t) * a_length);
69
+ int64_t carry = 0;
70
+
71
+ for (size_t index = 0; index < a_length; index++) {
72
+ int64_t sub = (
73
+ carry +
74
+ a_values[index] -
75
+ (index < b_length ? b_values[index] : 0) -
76
+ (index < c_length ? c_values[index] : 0)
77
+ );
78
+
79
+ if (sub >= 0) {
80
+ values[index] = (uint32_t) sub;
81
+ carry = 0;
82
+ } else {
83
+ sub += 2 * (int64_t) base;
84
+ values[index] = (uint32_t) ((uint64_t) sub % base);
85
+ carry = sub / (int64_t) base - 2;
86
+ }
87
+ }
88
+
89
+ while (a_length > 1 && values[a_length - 1] == 0) a_length--;
90
+ *destination = (pm_integer_t) { 0, a_length, values, false };
91
+ }
92
+
93
+ /**
94
+ * Multiply two positive integers with the given base using karatsuba algorithm.
95
+ * Return pm_integer_t with values allocated. Not normalized.
96
+ */
97
+ static void
98
+ karatsuba_multiply(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint64_t base) {
99
+ size_t left_length;
100
+ uint32_t *left_values;
101
+ INTEGER_EXTRACT(left, left_length, left_values)
102
+
103
+ size_t right_length;
104
+ uint32_t *right_values;
105
+ INTEGER_EXTRACT(right, right_length, right_values)
106
+
107
+ if (left_length > right_length) {
108
+ size_t temporary_length = left_length;
109
+ left_length = right_length;
110
+ right_length = temporary_length;
111
+
112
+ uint32_t *temporary_values = left_values;
113
+ left_values = right_values;
114
+ right_values = temporary_values;
115
+ }
116
+
117
+ if (left_length <= 10) {
118
+ size_t length = left_length + right_length;
119
+ uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
120
+ if (values == NULL) return;
121
+
122
+ for (size_t left_index = 0; left_index < left_length; left_index++) {
123
+ uint32_t carry = 0;
124
+ for (size_t right_index = 0; right_index < right_length; right_index++) {
125
+ uint64_t product = (uint64_t) left_values[left_index] * right_values[right_index] + values[left_index + right_index] + carry;
126
+ values[left_index + right_index] = (uint32_t) (product % base);
127
+ carry = (uint32_t) (product / base);
128
+ }
129
+ values[left_index + right_length] = carry;
130
+ }
131
+
132
+ while (length > 1 && values[length - 1] == 0) length--;
133
+ *destination = (pm_integer_t) { 0, length, values, false };
134
+ return;
135
+ }
136
+
137
+ if (left_length * 2 <= right_length) {
138
+ uint32_t *values = (uint32_t*) xcalloc(left_length + right_length, sizeof(uint32_t));
139
+
140
+ for (size_t start_offset = 0; start_offset < right_length; start_offset += left_length) {
141
+ size_t end_offset = start_offset + left_length;
142
+ if (end_offset > right_length) end_offset = right_length;
143
+
144
+ pm_integer_t sliced_right = {
145
+ .value = 0,
146
+ .length = end_offset - start_offset,
147
+ .values = right_values + start_offset,
148
+ .negative = false
149
+ };
150
+
151
+ pm_integer_t product;
152
+ karatsuba_multiply(&product, left, &sliced_right, base);
153
+
154
+ uint32_t carry = 0;
155
+ for (size_t index = 0; index < product.length; index++) {
156
+ uint64_t sum = (uint64_t) values[start_offset + index] + product.values[index] + carry;
157
+ values[start_offset + index] = (uint32_t) (sum % base);
158
+ carry = (uint32_t) (sum / base);
159
+ }
160
+
161
+ if (carry > 0) values[start_offset + product.length] += carry;
162
+ pm_integer_free(&product);
163
+ }
164
+
165
+ *destination = (pm_integer_t) { 0, left_length + right_length, values, false };
166
+ return;
167
+ }
168
+
169
+ size_t half = left_length / 2;
170
+ pm_integer_t x0 = { 0, half, left_values, false };
171
+ pm_integer_t x1 = { 0, left_length - half, left_values + half, false };
172
+ pm_integer_t y0 = { 0, half, right_values, false };
173
+ pm_integer_t y1 = { 0, right_length - half, right_values + half, false };
174
+
175
+ pm_integer_t z0;
176
+ karatsuba_multiply(&z0, &x0, &y0, base);
177
+
178
+ pm_integer_t z2;
179
+ karatsuba_multiply(&z2, &x1, &y1, base);
180
+
181
+ // For simplicity to avoid considering negative values,
182
+ // use `z1 = (x0 + x1) * (y0 + y1) - z0 - z2` instead of original karatsuba algorithm.
183
+ pm_integer_t x01;
184
+ big_add(&x01, &x0, &x1, base);
185
+
186
+ pm_integer_t y01;
187
+ big_add(&y01, &y0, &y1, base);
188
+
189
+ pm_integer_t xy;
190
+ karatsuba_multiply(&xy, &x01, &y01, base);
191
+
192
+ pm_integer_t z1;
193
+ big_sub2(&z1, &xy, &z0, &z2, base);
194
+
195
+ size_t length = left_length + right_length;
196
+ uint32_t *values = (uint32_t*) xcalloc(length, sizeof(uint32_t));
197
+ memcpy(values, z0.values, sizeof(uint32_t) * z0.length);
198
+ memcpy(values + 2 * half, z2.values, sizeof(uint32_t) * z2.length);
199
+
200
+ uint32_t carry = 0;
201
+ for(size_t index = 0; index < z1.length; index++) {
202
+ uint64_t sum = (uint64_t) carry + values[index + half] + z1.values[index];
203
+ values[index + half] = (uint32_t) (sum % base);
204
+ carry = (uint32_t) (sum / base);
205
+ }
206
+
207
+ for(size_t index = half + z1.length; carry > 0; index++) {
208
+ uint64_t sum = (uint64_t) carry + values[index];
209
+ values[index] = (uint32_t) (sum % base);
210
+ carry = (uint32_t) (sum / base);
211
+ }
212
+
213
+ while (length > 1 && values[length - 1] == 0) length--;
214
+ pm_integer_free(&z0);
215
+ pm_integer_free(&z1);
216
+ pm_integer_free(&z2);
217
+ pm_integer_free(&x01);
218
+ pm_integer_free(&y01);
219
+ pm_integer_free(&xy);
220
+
221
+ *destination = (pm_integer_t) { 0, length, values, false };
222
+ }
223
+
224
+ /**
225
+ * The values of a hexadecimal digit, where the index is the ASCII character.
226
+ * Note that there's an odd exception here where _ is mapped to 0. This is
227
+ * because it's possible for us to end up trying to parse a number that has
228
+ * already had an error attached to it, and we want to provide _something_ to
229
+ * the user.
230
+ */
231
+ static const int8_t pm_integer_parse_digit_values[256] = {
232
+ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
233
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x
234
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x
235
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x
236
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 3x
237
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x
238
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, // 5x
239
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x
240
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x
241
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x
242
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x
243
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax
244
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx
245
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx
246
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx
247
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex
248
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Fx
249
+ };
250
+
251
+ /**
252
+ * Return the value of a hexadecimal digit in a uint8_t.
253
+ */
254
+ static uint8_t
255
+ pm_integer_parse_digit(const uint8_t character) {
256
+ int8_t value = pm_integer_parse_digit_values[character];
257
+ assert(value != -1 && "invalid digit");
258
+
259
+ return (uint8_t) value;
260
+ }
261
+
262
+ /**
263
+ * Create a pm_integer_t from uint64_t with the given base. It is assumed that
264
+ * the memory for the pm_integer_t pointer has been zeroed.
265
+ */
266
+ static void
267
+ pm_integer_from_uint64(pm_integer_t *integer, uint64_t value, uint64_t base) {
268
+ if (value < base) {
269
+ integer->value = (uint32_t) value;
270
+ return;
271
+ }
272
+
273
+ size_t length = 0;
274
+ uint64_t length_value = value;
275
+ while (length_value > 0) {
276
+ length++;
277
+ length_value /= base;
278
+ }
279
+
280
+ uint32_t *values = (uint32_t *) xmalloc(sizeof(uint32_t) * length);
281
+ if (values == NULL) return;
282
+
283
+ for (size_t value_index = 0; value_index < length; value_index++) {
284
+ values[value_index] = (uint32_t) (value % base);
285
+ value /= base;
286
+ }
287
+
288
+ integer->length = length;
289
+ integer->values = values;
290
+ }
291
+
292
+ /**
293
+ * Normalize pm_integer_t.
294
+ * Heading zero values will be removed. If the integer fits into uint32_t,
295
+ * values is set to NULL, length is set to 0, and value field will be used.
296
+ */
297
+ static void
298
+ pm_integer_normalize(pm_integer_t *integer) {
299
+ if (integer->values == NULL) {
300
+ return;
301
+ }
302
+
303
+ while (integer->length > 1 && integer->values[integer->length - 1] == 0) {
304
+ integer->length--;
305
+ }
306
+
307
+ if (integer->length > 1) {
308
+ return;
309
+ }
310
+
311
+ uint32_t value = integer->values[0];
312
+ bool negative = integer->negative && value != 0;
313
+
314
+ pm_integer_free(integer);
315
+ *integer = (pm_integer_t) { .value = value, .length = 0, .values = NULL, .negative = negative };
316
+ }
317
+
318
+ /**
319
+ * Convert base of the integer.
320
+ * In practice, it converts 10**9 to 1<<32 or 1<<32 to 10**9.
321
+ */
322
+ static void
323
+ pm_integer_convert_base(pm_integer_t *destination, const pm_integer_t *source, uint64_t base_from, uint64_t base_to) {
324
+ size_t source_length;
325
+ const uint32_t *source_values;
326
+ INTEGER_EXTRACT(source, source_length, source_values)
327
+
328
+ size_t bigints_length = (source_length + 1) / 2;
329
+ pm_integer_t *bigints = (pm_integer_t *) xcalloc(bigints_length, sizeof(pm_integer_t));
330
+ if (bigints == NULL) return;
331
+
332
+ for (size_t index = 0; index < source_length; index += 2) {
333
+ uint64_t value = source_values[index] + base_from * (index + 1 < source_length ? source_values[index + 1] : 0);
334
+ pm_integer_from_uint64(&bigints[index / 2], value, base_to);
335
+ }
336
+
337
+ pm_integer_t base = { 0 };
338
+ pm_integer_from_uint64(&base, base_from, base_to);
339
+
340
+ while (bigints_length > 1) {
341
+ pm_integer_t next_base;
342
+ karatsuba_multiply(&next_base, &base, &base, base_to);
343
+
344
+ pm_integer_free(&base);
345
+ base = next_base;
346
+
347
+ size_t next_length = (bigints_length + 1) / 2;
348
+ pm_integer_t *next_bigints = (pm_integer_t *) xmalloc(sizeof(pm_integer_t) * next_length);
349
+
350
+ for (size_t bigints_index = 0; bigints_index < bigints_length; bigints_index += 2) {
351
+ if (bigints_index + 1 == bigints_length) {
352
+ next_bigints[bigints_index / 2] = bigints[bigints_index];
353
+ } else {
354
+ pm_integer_t multiplied;
355
+ karatsuba_multiply(&multiplied, &base, &bigints[bigints_index + 1], base_to);
356
+
357
+ big_add(&next_bigints[bigints_index / 2], &bigints[bigints_index], &multiplied, base_to);
358
+ pm_integer_free(&bigints[bigints_index]);
359
+ pm_integer_free(&bigints[bigints_index + 1]);
360
+ pm_integer_free(&multiplied);
361
+ }
362
+ }
363
+
364
+ xfree(bigints);
365
+ bigints = next_bigints;
366
+ bigints_length = next_length;
367
+ }
368
+
369
+ *destination = bigints[0];
370
+ destination->negative = source->negative;
371
+ pm_integer_normalize(destination);
372
+
373
+ xfree(bigints);
374
+ pm_integer_free(&base);
375
+ }
376
+
377
+ #undef INTEGER_EXTRACT
378
+
379
+ /**
380
+ * Convert digits to integer with the given power-of-two base.
381
+ */
382
+ static void
383
+ pm_integer_parse_powof2(pm_integer_t *integer, uint32_t base, const uint8_t *digits, size_t digits_length) {
384
+ size_t bit = 1;
385
+ while (base > (uint32_t) (1 << bit)) bit++;
386
+
387
+ size_t length = (digits_length * bit + 31) / 32;
388
+ uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
389
+
390
+ for (size_t digit_index = 0; digit_index < digits_length; digit_index++) {
391
+ size_t bit_position = bit * (digits_length - digit_index - 1);
392
+ uint32_t value = digits[digit_index];
393
+
394
+ size_t index = bit_position / 32;
395
+ size_t shift = bit_position % 32;
396
+
397
+ values[index] |= value << shift;
398
+ if (32 - shift < bit) values[index + 1] |= value >> (32 - shift);
399
+ }
400
+
401
+ while (length > 1 && values[length - 1] == 0) length--;
402
+ *integer = (pm_integer_t) { .value = 0, .length = length, .values = values, .negative = false };
403
+ pm_integer_normalize(integer);
404
+ }
405
+
406
+ /**
407
+ * Convert decimal digits to pm_integer_t.
408
+ */
409
+ static void
410
+ pm_integer_parse_decimal(pm_integer_t *integer, const uint8_t *digits, size_t digits_length) {
411
+ const size_t batch = 9;
412
+ size_t length = (digits_length + batch - 1) / batch;
413
+
414
+ uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
415
+ uint32_t value = 0;
416
+
417
+ for (size_t digits_index = 0; digits_index < digits_length; digits_index++) {
418
+ value = value * 10 + digits[digits_index];
419
+
420
+ size_t reverse_index = digits_length - digits_index - 1;
421
+ if (reverse_index % batch == 0) {
422
+ values[reverse_index / batch] = value;
423
+ value = 0;
424
+ }
425
+ }
426
+
427
+ // Convert base from 10**9 to 1<<32.
428
+ pm_integer_convert_base(integer, &((pm_integer_t) { .value = 0, .length = length, .values = values, .negative = false }), 1000000000, ((uint64_t) 1 << 32));
429
+ xfree(values);
430
+ }
431
+
432
+ /**
433
+ * Parse a large integer from a string that does not fit into uint32_t.
434
+ */
435
+ static void
436
+ pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *start, const uint8_t *end) {
437
+ // Allocate an array to store digits.
438
+ uint8_t *digits = xmalloc(sizeof(uint8_t) * (size_t) (end - start));
439
+ size_t digits_length = 0;
440
+
441
+ for (; start < end; start++) {
442
+ if (*start == '_') continue;
443
+ digits[digits_length++] = pm_integer_parse_digit(*start);
444
+ }
445
+
446
+ // Construct pm_integer_t from the digits.
447
+ if (multiplier == 10) {
448
+ pm_integer_parse_decimal(integer, digits, digits_length);
449
+ } else {
450
+ pm_integer_parse_powof2(integer, multiplier, digits, digits_length);
451
+ }
452
+
453
+ xfree(digits);
454
+ }
455
+
456
+ /**
457
+ * Parse an integer from a string. This assumes that the format of the integer
458
+ * has already been validated, as internal validation checks are not performed
459
+ * here.
460
+ */
461
+ PRISM_EXPORTED_FUNCTION void
462
+ pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end) {
463
+ // Ignore unary +. Unary + is parsed differently and will not end up here.
464
+ // Instead, it will modify the parsed integer later.
465
+ if (*start == '+') start++;
466
+
467
+ // Determine the multiplier from the base, and skip past any prefixes.
468
+ uint32_t multiplier = 10;
469
+ switch (base) {
470
+ case PM_INTEGER_BASE_BINARY:
471
+ start += 2; // 0b
472
+ multiplier = 2;
473
+ break;
474
+ case PM_INTEGER_BASE_OCTAL:
475
+ start++; // 0
476
+ if (*start == '_' || *start == 'o' || *start == 'O') start++; // o
477
+ multiplier = 8;
478
+ break;
479
+ case PM_INTEGER_BASE_DECIMAL:
480
+ if (*start == '0' && (end - start) > 1) start += 2; // 0d
481
+ break;
482
+ case PM_INTEGER_BASE_HEXADECIMAL:
483
+ start += 2; // 0x
484
+ multiplier = 16;
485
+ break;
486
+ case PM_INTEGER_BASE_UNKNOWN:
487
+ if (*start == '0' && (end - start) > 1) {
488
+ switch (start[1]) {
489
+ case '_': start += 2; multiplier = 8; break;
490
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': start++; multiplier = 8; break;
491
+ case 'b': case 'B': start += 2; multiplier = 2; break;
492
+ case 'o': case 'O': start += 2; multiplier = 8; break;
493
+ case 'd': case 'D': start += 2; break;
494
+ case 'x': case 'X': start += 2; multiplier = 16; break;
495
+ default: assert(false && "unreachable"); break;
496
+ }
497
+ }
498
+ break;
499
+ }
500
+
501
+ // It's possible that we've consumed everything at this point if there is an
502
+ // invalid integer. If this is the case, we'll just return 0.
503
+ if (start >= end) return;
504
+
505
+ const uint8_t *cursor = start;
506
+ uint64_t value = (uint64_t) pm_integer_parse_digit(*cursor++);
507
+
508
+ for (; cursor < end; cursor++) {
509
+ if (*cursor == '_') continue;
510
+ value = value * multiplier + (uint64_t) pm_integer_parse_digit(*cursor);
511
+
512
+ if (value > UINT32_MAX) {
513
+ // If the integer is too large to fit into a single uint32_t, then
514
+ // we'll parse it as a big integer.
515
+ pm_integer_parse_big(integer, multiplier, start, end);
516
+ return;
517
+ }
518
+ }
519
+
520
+ integer->value = (uint32_t) value;
521
+ }
522
+
523
+ /**
524
+ * Return the memory size of the integer.
525
+ */
526
+ size_t
527
+ pm_integer_memsize(const pm_integer_t *integer) {
528
+ return sizeof(pm_integer_t) + integer->length * sizeof(uint32_t);
529
+ }
530
+
531
+ /**
532
+ * Compare two integers. This function returns -1 if the left integer is less
533
+ * than the right integer, 0 if they are equal, and 1 if the left integer is
534
+ * greater than the right integer.
535
+ */
536
+ int
537
+ pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right) {
538
+ if (left->negative != right->negative) return left->negative ? -1 : 1;
539
+ int negative = left->negative ? -1 : 1;
540
+
541
+ if (left->values == NULL && right->values == NULL) {
542
+ if (left->value < right->value) return -1 * negative;
543
+ if (left->value > right->value) return 1 * negative;
544
+ return 0;
545
+ }
546
+
547
+ if (left->values == NULL || left->length < right->length) return -1 * negative;
548
+ if (right->values == NULL || left->length > right->length) return 1 * negative;
549
+
550
+ for (size_t index = 0; index < left->length; index++) {
551
+ size_t value_index = left->length - index - 1;
552
+ uint32_t left_value = left->values[value_index];
553
+ uint32_t right_value = right->values[value_index];
554
+
555
+ if (left_value < right_value) return -1 * negative;
556
+ if (left_value > right_value) return 1 * negative;
557
+ }
558
+
559
+ return 0;
560
+ }
561
+
562
+ /**
563
+ * Convert an integer to a decimal string.
564
+ */
565
+ PRISM_EXPORTED_FUNCTION void
566
+ pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) {
567
+ if (integer->negative) {
568
+ pm_buffer_append_byte(buffer, '-');
569
+ }
570
+
571
+ // If the integer fits into a single uint32_t, then we can just append the
572
+ // value directly to the buffer.
573
+ if (integer->values == NULL) {
574
+ pm_buffer_append_format(buffer, "%" PRIu32, integer->value);
575
+ return;
576
+ }
577
+
578
+ // If the integer is two uint32_t values, then we can | them together and
579
+ // append the result to the buffer.
580
+ if (integer->length == 2) {
581
+ const uint64_t value = ((uint64_t) integer->values[0]) | ((uint64_t) integer->values[1] << 32);
582
+ pm_buffer_append_format(buffer, "%" PRIu64, value);
583
+ return;
584
+ }
585
+
586
+ // Otherwise, first we'll convert the base from 1<<32 to 10**9.
587
+ pm_integer_t converted;
588
+ pm_integer_convert_base(&converted, integer, (uint64_t) 1 << 32, 1000000000);
589
+
590
+ if (converted.values == NULL) {
591
+ pm_buffer_append_format(buffer, "%" PRIu32, converted.value);
592
+ pm_integer_free(&converted);
593
+ return;
594
+ }
595
+
596
+ // Allocate a buffer that we'll copy the decimal digits into.
597
+ size_t digits_length = converted.length * 9;
598
+ char *digits = xcalloc(digits_length, sizeof(char));
599
+ if (digits == NULL) return;
600
+
601
+ // Pack bigdecimal to digits.
602
+ for (size_t value_index = 0; value_index < converted.length; value_index++) {
603
+ uint32_t value = converted.values[value_index];
604
+
605
+ for (size_t digit_index = 0; digit_index < 9; digit_index++) {
606
+ digits[digits_length - 9 * value_index - digit_index - 1] = (char) ('0' + value % 10);
607
+ value /= 10;
608
+ }
609
+ }
610
+
611
+ size_t start_offset = 0;
612
+ while (start_offset < digits_length - 1 && digits[start_offset] == '0') start_offset++;
613
+
614
+ // Finally, append the string to the buffer and free the digits.
615
+ pm_buffer_append_string(buffer, digits + start_offset, digits_length - start_offset);
616
+ xfree(digits);
617
+ pm_integer_free(&converted);
618
+ }
619
+
620
+ /**
621
+ * Free the internal memory of an integer. This memory will only be allocated if
622
+ * the integer exceeds the size of a single uint32_t.
623
+ */
624
+ PRISM_EXPORTED_FUNCTION void
625
+ pm_integer_free(pm_integer_t *integer) {
626
+ if (integer->values) {
627
+ xfree(integer->values);
628
+ }
629
+ }
data/src/util/pm_list.c CHANGED
@@ -41,7 +41,7 @@ pm_list_free(pm_list_t *list) {
41
41
 
42
42
  while (node != NULL) {
43
43
  next = node->next;
44
- free(node);
44
+ xfree(node);
45
45
  node = next;
46
46
  }
47
47
 
@@ -6,7 +6,7 @@
6
6
  */
7
7
  bool
8
8
  pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
9
- list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
9
+ list->offsets = (size_t *) xcalloc(capacity, sizeof(size_t));
10
10
  if (list->offsets == NULL) return false;
11
11
 
12
12
  list->start = start;
@@ -19,6 +19,14 @@ pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capac
19
19
  return true;
20
20
  }
21
21
 
22
+ /**
23
+ * Clear out the newlines that have been appended to the list.
24
+ */
25
+ void
26
+ pm_newline_list_clear(pm_newline_list_t *list) {
27
+ list->size = 1;
28
+ }
29
+
22
30
  /**
23
31
  * Append a new offset to the newline list. Returns true if the reallocation of
24
32
  * the offsets succeeds (if one was necessary), otherwise returns false.
@@ -29,10 +37,11 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
29
37
  size_t *original_offsets = list->offsets;
30
38
 
31
39
  list->capacity = (list->capacity * 3) / 2;
32
- list->offsets = (size_t *) calloc(list->capacity, sizeof(size_t));
33
- memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
34
- free(original_offsets);
40
+ list->offsets = (size_t *) xcalloc(list->capacity, sizeof(size_t));
35
41
  if (list->offsets == NULL) return false;
42
+
43
+ memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
44
+ xfree(original_offsets);
36
45
  }
37
46
 
38
47
  assert(*cursor == '\n');
@@ -83,5 +92,5 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
83
92
  */
84
93
  void
85
94
  pm_newline_list_free(pm_newline_list_t *list) {
86
- free(list->offsets);
95
+ xfree(list->offsets);
87
96
  }