prism 0.24.0 → 0.25.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/BSDmakefile +58 -0
- data/CHANGELOG.md +50 -1
- data/Makefile +5 -2
- data/README.md +45 -6
- data/config.yml +499 -4
- data/docs/build_system.md +31 -0
- data/docs/configuration.md +2 -0
- data/docs/cruby_compilation.md +1 -1
- data/docs/parser_translation.md +14 -9
- data/docs/releasing.md +2 -2
- data/docs/ripper_translation.md +50 -0
- data/docs/ruby_api.md +1 -0
- data/docs/serialization.md +26 -5
- data/ext/prism/api_node.c +911 -815
- data/ext/prism/api_pack.c +9 -0
- data/ext/prism/extconf.rb +27 -11
- data/ext/prism/extension.c +313 -66
- data/ext/prism/extension.h +5 -4
- data/include/prism/ast.h +213 -64
- data/include/prism/defines.h +106 -2
- data/include/prism/diagnostic.h +134 -71
- data/include/prism/encoding.h +22 -4
- data/include/prism/node.h +93 -0
- data/include/prism/options.h +82 -7
- data/include/prism/pack.h +11 -0
- data/include/prism/parser.h +198 -53
- data/include/prism/prettyprint.h +8 -0
- data/include/prism/static_literals.h +118 -0
- data/include/prism/util/pm_buffer.h +65 -2
- data/include/prism/util/pm_constant_pool.h +18 -1
- data/include/prism/util/pm_integer.h +119 -0
- data/include/prism/util/pm_list.h +1 -1
- data/include/prism/util/pm_newline_list.h +8 -0
- data/include/prism/util/pm_string.h +26 -2
- data/include/prism/version.h +2 -2
- data/include/prism.h +59 -1
- data/lib/prism/compiler.rb +8 -1
- data/lib/prism/debug.rb +46 -3
- data/lib/prism/desugar_compiler.rb +1 -1
- data/lib/prism/dispatcher.rb +29 -0
- data/lib/prism/dot_visitor.rb +87 -16
- data/lib/prism/dsl.rb +24 -12
- data/lib/prism/ffi.rb +67 -12
- data/lib/prism/lex_compat.rb +17 -15
- data/lib/prism/mutation_compiler.rb +11 -0
- data/lib/prism/node.rb +2096 -2499
- data/lib/prism/node_ext.rb +77 -29
- data/lib/prism/pack.rb +4 -0
- data/lib/prism/parse_result/comments.rb +34 -17
- data/lib/prism/parse_result/newlines.rb +3 -1
- data/lib/prism/parse_result.rb +78 -32
- data/lib/prism/pattern.rb +16 -4
- data/lib/prism/polyfill/string.rb +12 -0
- data/lib/prism/serialize.rb +439 -102
- data/lib/prism/translation/parser/compiler.rb +152 -50
- data/lib/prism/translation/parser/lexer.rb +103 -22
- data/lib/prism/translation/parser/rubocop.rb +41 -13
- data/lib/prism/translation/parser.rb +119 -7
- data/lib/prism/translation/parser33.rb +1 -1
- data/lib/prism/translation/parser34.rb +1 -1
- data/lib/prism/translation/ripper/sexp.rb +125 -0
- data/lib/prism/translation/ripper/shim.rb +5 -0
- data/lib/prism/translation/ripper.rb +3212 -462
- data/lib/prism/translation/ruby_parser.rb +35 -18
- data/lib/prism/translation.rb +3 -1
- data/lib/prism/visitor.rb +10 -0
- data/lib/prism.rb +8 -2
- data/prism.gemspec +33 -4
- data/rbi/prism/compiler.rbi +14 -0
- data/rbi/prism/desugar_compiler.rbi +5 -0
- data/rbi/prism/mutation_compiler.rbi +5 -0
- data/rbi/prism/node.rbi +8221 -0
- data/rbi/prism/node_ext.rbi +102 -0
- data/rbi/prism/parse_result.rbi +304 -0
- data/rbi/prism/translation/parser/compiler.rbi +13 -0
- data/rbi/prism/translation/ripper/ripper_compiler.rbi +5 -0
- data/rbi/prism/translation/ripper.rbi +25 -0
- data/rbi/prism/translation/ruby_parser.rbi +11 -0
- data/rbi/prism/visitor.rbi +470 -0
- data/rbi/prism.rbi +39 -7749
- data/sig/prism/compiler.rbs +9 -0
- data/sig/prism/dispatcher.rbs +16 -0
- data/sig/prism/dot_visitor.rbs +6 -0
- data/sig/prism/dsl.rbs +462 -0
- data/sig/prism/mutation_compiler.rbs +158 -0
- data/sig/prism/node.rbs +3529 -0
- data/sig/prism/node_ext.rbs +78 -0
- data/sig/prism/pack.rbs +43 -0
- data/sig/prism/parse_result.rbs +127 -0
- data/sig/prism/pattern.rbs +13 -0
- data/sig/prism/serialize.rbs +7 -0
- data/sig/prism/visitor.rbs +168 -0
- data/sig/prism.rbs +188 -4767
- data/src/diagnostic.c +575 -230
- data/src/encoding.c +211 -108
- data/src/node.c +7526 -447
- data/src/options.c +36 -12
- data/src/pack.c +33 -17
- data/src/prettyprint.c +1294 -1385
- data/src/prism.c +3628 -1099
- data/src/regexp.c +17 -2
- data/src/serialize.c +47 -28
- data/src/static_literals.c +552 -0
- data/src/token_type.c +1 -0
- data/src/util/pm_buffer.c +147 -20
- data/src/util/pm_char.c +4 -4
- data/src/util/pm_constant_pool.c +35 -11
- data/src/util/pm_integer.c +629 -0
- data/src/util/pm_list.c +1 -1
- data/src/util/pm_newline_list.c +14 -5
- data/src/util/pm_string.c +134 -5
- data/src/util/pm_string_list.c +2 -2
- metadata +35 -6
- data/docs/ripper.md +0 -36
- data/rbi/prism_static.rbi +0 -207
- data/sig/prism_static.rbs +0 -201
@@ -0,0 +1,629 @@
|
|
1
|
+
#include "prism/util/pm_integer.h"
|
2
|
+
|
3
|
+
/**
|
4
|
+
* Pull out the length and values from the integer, regardless of the form in
|
5
|
+
* which the length/values are stored.
|
6
|
+
*/
|
7
|
+
#define INTEGER_EXTRACT(integer, length_variable, values_variable) \
|
8
|
+
if ((integer)->values == NULL) { \
|
9
|
+
length_variable = 1; \
|
10
|
+
values_variable = &(integer)->value; \
|
11
|
+
} else { \
|
12
|
+
length_variable = (integer)->length; \
|
13
|
+
values_variable = (integer)->values; \
|
14
|
+
}
|
15
|
+
|
16
|
+
/**
|
17
|
+
* Adds two positive pm_integer_t with the given base.
|
18
|
+
* Return pm_integer_t with values allocated. Not normalized.
|
19
|
+
*/
|
20
|
+
static void
|
21
|
+
big_add(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint64_t base) {
|
22
|
+
size_t left_length;
|
23
|
+
uint32_t *left_values;
|
24
|
+
INTEGER_EXTRACT(left, left_length, left_values)
|
25
|
+
|
26
|
+
size_t right_length;
|
27
|
+
uint32_t *right_values;
|
28
|
+
INTEGER_EXTRACT(right, right_length, right_values)
|
29
|
+
|
30
|
+
size_t length = left_length < right_length ? right_length : left_length;
|
31
|
+
uint32_t *values = (uint32_t *) xmalloc(sizeof(uint32_t) * (length + 1));
|
32
|
+
if (values == NULL) return;
|
33
|
+
|
34
|
+
uint64_t carry = 0;
|
35
|
+
for (size_t index = 0; index < length; index++) {
|
36
|
+
uint64_t sum = carry + (index < left_length ? left_values[index] : 0) + (index < right_length ? right_values[index] : 0);
|
37
|
+
values[index] = (uint32_t) (sum % base);
|
38
|
+
carry = sum / base;
|
39
|
+
}
|
40
|
+
|
41
|
+
if (carry > 0) {
|
42
|
+
values[length] = (uint32_t) carry;
|
43
|
+
length++;
|
44
|
+
}
|
45
|
+
|
46
|
+
*destination = (pm_integer_t) { 0, length, values, false };
|
47
|
+
}
|
48
|
+
|
49
|
+
/**
|
50
|
+
* Internal use for karatsuba_multiply. Calculates `a - b - c` with the given
|
51
|
+
* base. Assume a, b, c, a - b - c all to be poitive.
|
52
|
+
* Return pm_integer_t with values allocated. Not normalized.
|
53
|
+
*/
|
54
|
+
static void
|
55
|
+
big_sub2(pm_integer_t *destination, pm_integer_t *a, pm_integer_t *b, pm_integer_t *c, uint64_t base) {
|
56
|
+
size_t a_length;
|
57
|
+
uint32_t *a_values;
|
58
|
+
INTEGER_EXTRACT(a, a_length, a_values)
|
59
|
+
|
60
|
+
size_t b_length;
|
61
|
+
uint32_t *b_values;
|
62
|
+
INTEGER_EXTRACT(b, b_length, b_values)
|
63
|
+
|
64
|
+
size_t c_length;
|
65
|
+
uint32_t *c_values;
|
66
|
+
INTEGER_EXTRACT(c, c_length, c_values)
|
67
|
+
|
68
|
+
uint32_t *values = (uint32_t*) xmalloc(sizeof(uint32_t) * a_length);
|
69
|
+
int64_t carry = 0;
|
70
|
+
|
71
|
+
for (size_t index = 0; index < a_length; index++) {
|
72
|
+
int64_t sub = (
|
73
|
+
carry +
|
74
|
+
a_values[index] -
|
75
|
+
(index < b_length ? b_values[index] : 0) -
|
76
|
+
(index < c_length ? c_values[index] : 0)
|
77
|
+
);
|
78
|
+
|
79
|
+
if (sub >= 0) {
|
80
|
+
values[index] = (uint32_t) sub;
|
81
|
+
carry = 0;
|
82
|
+
} else {
|
83
|
+
sub += 2 * (int64_t) base;
|
84
|
+
values[index] = (uint32_t) ((uint64_t) sub % base);
|
85
|
+
carry = sub / (int64_t) base - 2;
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
89
|
+
while (a_length > 1 && values[a_length - 1] == 0) a_length--;
|
90
|
+
*destination = (pm_integer_t) { 0, a_length, values, false };
|
91
|
+
}
|
92
|
+
|
93
|
+
/**
|
94
|
+
* Multiply two positive integers with the given base using karatsuba algorithm.
|
95
|
+
* Return pm_integer_t with values allocated. Not normalized.
|
96
|
+
*/
|
97
|
+
static void
|
98
|
+
karatsuba_multiply(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint64_t base) {
|
99
|
+
size_t left_length;
|
100
|
+
uint32_t *left_values;
|
101
|
+
INTEGER_EXTRACT(left, left_length, left_values)
|
102
|
+
|
103
|
+
size_t right_length;
|
104
|
+
uint32_t *right_values;
|
105
|
+
INTEGER_EXTRACT(right, right_length, right_values)
|
106
|
+
|
107
|
+
if (left_length > right_length) {
|
108
|
+
size_t temporary_length = left_length;
|
109
|
+
left_length = right_length;
|
110
|
+
right_length = temporary_length;
|
111
|
+
|
112
|
+
uint32_t *temporary_values = left_values;
|
113
|
+
left_values = right_values;
|
114
|
+
right_values = temporary_values;
|
115
|
+
}
|
116
|
+
|
117
|
+
if (left_length <= 10) {
|
118
|
+
size_t length = left_length + right_length;
|
119
|
+
uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
|
120
|
+
if (values == NULL) return;
|
121
|
+
|
122
|
+
for (size_t left_index = 0; left_index < left_length; left_index++) {
|
123
|
+
uint32_t carry = 0;
|
124
|
+
for (size_t right_index = 0; right_index < right_length; right_index++) {
|
125
|
+
uint64_t product = (uint64_t) left_values[left_index] * right_values[right_index] + values[left_index + right_index] + carry;
|
126
|
+
values[left_index + right_index] = (uint32_t) (product % base);
|
127
|
+
carry = (uint32_t) (product / base);
|
128
|
+
}
|
129
|
+
values[left_index + right_length] = carry;
|
130
|
+
}
|
131
|
+
|
132
|
+
while (length > 1 && values[length - 1] == 0) length--;
|
133
|
+
*destination = (pm_integer_t) { 0, length, values, false };
|
134
|
+
return;
|
135
|
+
}
|
136
|
+
|
137
|
+
if (left_length * 2 <= right_length) {
|
138
|
+
uint32_t *values = (uint32_t*) xcalloc(left_length + right_length, sizeof(uint32_t));
|
139
|
+
|
140
|
+
for (size_t start_offset = 0; start_offset < right_length; start_offset += left_length) {
|
141
|
+
size_t end_offset = start_offset + left_length;
|
142
|
+
if (end_offset > right_length) end_offset = right_length;
|
143
|
+
|
144
|
+
pm_integer_t sliced_right = {
|
145
|
+
.value = 0,
|
146
|
+
.length = end_offset - start_offset,
|
147
|
+
.values = right_values + start_offset,
|
148
|
+
.negative = false
|
149
|
+
};
|
150
|
+
|
151
|
+
pm_integer_t product;
|
152
|
+
karatsuba_multiply(&product, left, &sliced_right, base);
|
153
|
+
|
154
|
+
uint32_t carry = 0;
|
155
|
+
for (size_t index = 0; index < product.length; index++) {
|
156
|
+
uint64_t sum = (uint64_t) values[start_offset + index] + product.values[index] + carry;
|
157
|
+
values[start_offset + index] = (uint32_t) (sum % base);
|
158
|
+
carry = (uint32_t) (sum / base);
|
159
|
+
}
|
160
|
+
|
161
|
+
if (carry > 0) values[start_offset + product.length] += carry;
|
162
|
+
pm_integer_free(&product);
|
163
|
+
}
|
164
|
+
|
165
|
+
*destination = (pm_integer_t) { 0, left_length + right_length, values, false };
|
166
|
+
return;
|
167
|
+
}
|
168
|
+
|
169
|
+
size_t half = left_length / 2;
|
170
|
+
pm_integer_t x0 = { 0, half, left_values, false };
|
171
|
+
pm_integer_t x1 = { 0, left_length - half, left_values + half, false };
|
172
|
+
pm_integer_t y0 = { 0, half, right_values, false };
|
173
|
+
pm_integer_t y1 = { 0, right_length - half, right_values + half, false };
|
174
|
+
|
175
|
+
pm_integer_t z0;
|
176
|
+
karatsuba_multiply(&z0, &x0, &y0, base);
|
177
|
+
|
178
|
+
pm_integer_t z2;
|
179
|
+
karatsuba_multiply(&z2, &x1, &y1, base);
|
180
|
+
|
181
|
+
// For simplicity to avoid considering negative values,
|
182
|
+
// use `z1 = (x0 + x1) * (y0 + y1) - z0 - z2` instead of original karatsuba algorithm.
|
183
|
+
pm_integer_t x01;
|
184
|
+
big_add(&x01, &x0, &x1, base);
|
185
|
+
|
186
|
+
pm_integer_t y01;
|
187
|
+
big_add(&y01, &y0, &y1, base);
|
188
|
+
|
189
|
+
pm_integer_t xy;
|
190
|
+
karatsuba_multiply(&xy, &x01, &y01, base);
|
191
|
+
|
192
|
+
pm_integer_t z1;
|
193
|
+
big_sub2(&z1, &xy, &z0, &z2, base);
|
194
|
+
|
195
|
+
size_t length = left_length + right_length;
|
196
|
+
uint32_t *values = (uint32_t*) xcalloc(length, sizeof(uint32_t));
|
197
|
+
memcpy(values, z0.values, sizeof(uint32_t) * z0.length);
|
198
|
+
memcpy(values + 2 * half, z2.values, sizeof(uint32_t) * z2.length);
|
199
|
+
|
200
|
+
uint32_t carry = 0;
|
201
|
+
for(size_t index = 0; index < z1.length; index++) {
|
202
|
+
uint64_t sum = (uint64_t) carry + values[index + half] + z1.values[index];
|
203
|
+
values[index + half] = (uint32_t) (sum % base);
|
204
|
+
carry = (uint32_t) (sum / base);
|
205
|
+
}
|
206
|
+
|
207
|
+
for(size_t index = half + z1.length; carry > 0; index++) {
|
208
|
+
uint64_t sum = (uint64_t) carry + values[index];
|
209
|
+
values[index] = (uint32_t) (sum % base);
|
210
|
+
carry = (uint32_t) (sum / base);
|
211
|
+
}
|
212
|
+
|
213
|
+
while (length > 1 && values[length - 1] == 0) length--;
|
214
|
+
pm_integer_free(&z0);
|
215
|
+
pm_integer_free(&z1);
|
216
|
+
pm_integer_free(&z2);
|
217
|
+
pm_integer_free(&x01);
|
218
|
+
pm_integer_free(&y01);
|
219
|
+
pm_integer_free(&xy);
|
220
|
+
|
221
|
+
*destination = (pm_integer_t) { 0, length, values, false };
|
222
|
+
}
|
223
|
+
|
224
|
+
/**
|
225
|
+
* The values of a hexadecimal digit, where the index is the ASCII character.
|
226
|
+
* Note that there's an odd exception here where _ is mapped to 0. This is
|
227
|
+
* because it's possible for us to end up trying to parse a number that has
|
228
|
+
* already had an error attached to it, and we want to provide _something_ to
|
229
|
+
* the user.
|
230
|
+
*/
|
231
|
+
static const int8_t pm_integer_parse_digit_values[256] = {
|
232
|
+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
233
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x
|
234
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x
|
235
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x
|
236
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 3x
|
237
|
+
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x
|
238
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, // 5x
|
239
|
+
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x
|
240
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x
|
241
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x
|
242
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x
|
243
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax
|
244
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx
|
245
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx
|
246
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx
|
247
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex
|
248
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Fx
|
249
|
+
};
|
250
|
+
|
251
|
+
/**
|
252
|
+
* Return the value of a hexadecimal digit in a uint8_t.
|
253
|
+
*/
|
254
|
+
static uint8_t
|
255
|
+
pm_integer_parse_digit(const uint8_t character) {
|
256
|
+
int8_t value = pm_integer_parse_digit_values[character];
|
257
|
+
assert(value != -1 && "invalid digit");
|
258
|
+
|
259
|
+
return (uint8_t) value;
|
260
|
+
}
|
261
|
+
|
262
|
+
/**
|
263
|
+
* Create a pm_integer_t from uint64_t with the given base. It is assumed that
|
264
|
+
* the memory for the pm_integer_t pointer has been zeroed.
|
265
|
+
*/
|
266
|
+
static void
|
267
|
+
pm_integer_from_uint64(pm_integer_t *integer, uint64_t value, uint64_t base) {
|
268
|
+
if (value < base) {
|
269
|
+
integer->value = (uint32_t) value;
|
270
|
+
return;
|
271
|
+
}
|
272
|
+
|
273
|
+
size_t length = 0;
|
274
|
+
uint64_t length_value = value;
|
275
|
+
while (length_value > 0) {
|
276
|
+
length++;
|
277
|
+
length_value /= base;
|
278
|
+
}
|
279
|
+
|
280
|
+
uint32_t *values = (uint32_t *) xmalloc(sizeof(uint32_t) * length);
|
281
|
+
if (values == NULL) return;
|
282
|
+
|
283
|
+
for (size_t value_index = 0; value_index < length; value_index++) {
|
284
|
+
values[value_index] = (uint32_t) (value % base);
|
285
|
+
value /= base;
|
286
|
+
}
|
287
|
+
|
288
|
+
integer->length = length;
|
289
|
+
integer->values = values;
|
290
|
+
}
|
291
|
+
|
292
|
+
/**
|
293
|
+
* Normalize pm_integer_t.
|
294
|
+
* Heading zero values will be removed. If the integer fits into uint32_t,
|
295
|
+
* values is set to NULL, length is set to 0, and value field will be used.
|
296
|
+
*/
|
297
|
+
static void
|
298
|
+
pm_integer_normalize(pm_integer_t *integer) {
|
299
|
+
if (integer->values == NULL) {
|
300
|
+
return;
|
301
|
+
}
|
302
|
+
|
303
|
+
while (integer->length > 1 && integer->values[integer->length - 1] == 0) {
|
304
|
+
integer->length--;
|
305
|
+
}
|
306
|
+
|
307
|
+
if (integer->length > 1) {
|
308
|
+
return;
|
309
|
+
}
|
310
|
+
|
311
|
+
uint32_t value = integer->values[0];
|
312
|
+
bool negative = integer->negative && value != 0;
|
313
|
+
|
314
|
+
pm_integer_free(integer);
|
315
|
+
*integer = (pm_integer_t) { .value = value, .length = 0, .values = NULL, .negative = negative };
|
316
|
+
}
|
317
|
+
|
318
|
+
/**
|
319
|
+
* Convert base of the integer.
|
320
|
+
* In practice, it converts 10**9 to 1<<32 or 1<<32 to 10**9.
|
321
|
+
*/
|
322
|
+
static void
|
323
|
+
pm_integer_convert_base(pm_integer_t *destination, const pm_integer_t *source, uint64_t base_from, uint64_t base_to) {
|
324
|
+
size_t source_length;
|
325
|
+
const uint32_t *source_values;
|
326
|
+
INTEGER_EXTRACT(source, source_length, source_values)
|
327
|
+
|
328
|
+
size_t bigints_length = (source_length + 1) / 2;
|
329
|
+
pm_integer_t *bigints = (pm_integer_t *) xcalloc(bigints_length, sizeof(pm_integer_t));
|
330
|
+
if (bigints == NULL) return;
|
331
|
+
|
332
|
+
for (size_t index = 0; index < source_length; index += 2) {
|
333
|
+
uint64_t value = source_values[index] + base_from * (index + 1 < source_length ? source_values[index + 1] : 0);
|
334
|
+
pm_integer_from_uint64(&bigints[index / 2], value, base_to);
|
335
|
+
}
|
336
|
+
|
337
|
+
pm_integer_t base = { 0 };
|
338
|
+
pm_integer_from_uint64(&base, base_from, base_to);
|
339
|
+
|
340
|
+
while (bigints_length > 1) {
|
341
|
+
pm_integer_t next_base;
|
342
|
+
karatsuba_multiply(&next_base, &base, &base, base_to);
|
343
|
+
|
344
|
+
pm_integer_free(&base);
|
345
|
+
base = next_base;
|
346
|
+
|
347
|
+
size_t next_length = (bigints_length + 1) / 2;
|
348
|
+
pm_integer_t *next_bigints = (pm_integer_t *) xmalloc(sizeof(pm_integer_t) * next_length);
|
349
|
+
|
350
|
+
for (size_t bigints_index = 0; bigints_index < bigints_length; bigints_index += 2) {
|
351
|
+
if (bigints_index + 1 == bigints_length) {
|
352
|
+
next_bigints[bigints_index / 2] = bigints[bigints_index];
|
353
|
+
} else {
|
354
|
+
pm_integer_t multiplied;
|
355
|
+
karatsuba_multiply(&multiplied, &base, &bigints[bigints_index + 1], base_to);
|
356
|
+
|
357
|
+
big_add(&next_bigints[bigints_index / 2], &bigints[bigints_index], &multiplied, base_to);
|
358
|
+
pm_integer_free(&bigints[bigints_index]);
|
359
|
+
pm_integer_free(&bigints[bigints_index + 1]);
|
360
|
+
pm_integer_free(&multiplied);
|
361
|
+
}
|
362
|
+
}
|
363
|
+
|
364
|
+
xfree(bigints);
|
365
|
+
bigints = next_bigints;
|
366
|
+
bigints_length = next_length;
|
367
|
+
}
|
368
|
+
|
369
|
+
*destination = bigints[0];
|
370
|
+
destination->negative = source->negative;
|
371
|
+
pm_integer_normalize(destination);
|
372
|
+
|
373
|
+
xfree(bigints);
|
374
|
+
pm_integer_free(&base);
|
375
|
+
}
|
376
|
+
|
377
|
+
#undef INTEGER_EXTRACT
|
378
|
+
|
379
|
+
/**
|
380
|
+
* Convert digits to integer with the given power-of-two base.
|
381
|
+
*/
|
382
|
+
static void
|
383
|
+
pm_integer_parse_powof2(pm_integer_t *integer, uint32_t base, const uint8_t *digits, size_t digits_length) {
|
384
|
+
size_t bit = 1;
|
385
|
+
while (base > (uint32_t) (1 << bit)) bit++;
|
386
|
+
|
387
|
+
size_t length = (digits_length * bit + 31) / 32;
|
388
|
+
uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
|
389
|
+
|
390
|
+
for (size_t digit_index = 0; digit_index < digits_length; digit_index++) {
|
391
|
+
size_t bit_position = bit * (digits_length - digit_index - 1);
|
392
|
+
uint32_t value = digits[digit_index];
|
393
|
+
|
394
|
+
size_t index = bit_position / 32;
|
395
|
+
size_t shift = bit_position % 32;
|
396
|
+
|
397
|
+
values[index] |= value << shift;
|
398
|
+
if (32 - shift < bit) values[index + 1] |= value >> (32 - shift);
|
399
|
+
}
|
400
|
+
|
401
|
+
while (length > 1 && values[length - 1] == 0) length--;
|
402
|
+
*integer = (pm_integer_t) { .value = 0, .length = length, .values = values, .negative = false };
|
403
|
+
pm_integer_normalize(integer);
|
404
|
+
}
|
405
|
+
|
406
|
+
/**
|
407
|
+
* Convert decimal digits to pm_integer_t.
|
408
|
+
*/
|
409
|
+
static void
|
410
|
+
pm_integer_parse_decimal(pm_integer_t *integer, const uint8_t *digits, size_t digits_length) {
|
411
|
+
const size_t batch = 9;
|
412
|
+
size_t length = (digits_length + batch - 1) / batch;
|
413
|
+
|
414
|
+
uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
|
415
|
+
uint32_t value = 0;
|
416
|
+
|
417
|
+
for (size_t digits_index = 0; digits_index < digits_length; digits_index++) {
|
418
|
+
value = value * 10 + digits[digits_index];
|
419
|
+
|
420
|
+
size_t reverse_index = digits_length - digits_index - 1;
|
421
|
+
if (reverse_index % batch == 0) {
|
422
|
+
values[reverse_index / batch] = value;
|
423
|
+
value = 0;
|
424
|
+
}
|
425
|
+
}
|
426
|
+
|
427
|
+
// Convert base from 10**9 to 1<<32.
|
428
|
+
pm_integer_convert_base(integer, &((pm_integer_t) { .value = 0, .length = length, .values = values, .negative = false }), 1000000000, ((uint64_t) 1 << 32));
|
429
|
+
xfree(values);
|
430
|
+
}
|
431
|
+
|
432
|
+
/**
|
433
|
+
* Parse a large integer from a string that does not fit into uint32_t.
|
434
|
+
*/
|
435
|
+
static void
|
436
|
+
pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *start, const uint8_t *end) {
|
437
|
+
// Allocate an array to store digits.
|
438
|
+
uint8_t *digits = xmalloc(sizeof(uint8_t) * (size_t) (end - start));
|
439
|
+
size_t digits_length = 0;
|
440
|
+
|
441
|
+
for (; start < end; start++) {
|
442
|
+
if (*start == '_') continue;
|
443
|
+
digits[digits_length++] = pm_integer_parse_digit(*start);
|
444
|
+
}
|
445
|
+
|
446
|
+
// Construct pm_integer_t from the digits.
|
447
|
+
if (multiplier == 10) {
|
448
|
+
pm_integer_parse_decimal(integer, digits, digits_length);
|
449
|
+
} else {
|
450
|
+
pm_integer_parse_powof2(integer, multiplier, digits, digits_length);
|
451
|
+
}
|
452
|
+
|
453
|
+
xfree(digits);
|
454
|
+
}
|
455
|
+
|
456
|
+
/**
|
457
|
+
* Parse an integer from a string. This assumes that the format of the integer
|
458
|
+
* has already been validated, as internal validation checks are not performed
|
459
|
+
* here.
|
460
|
+
*/
|
461
|
+
PRISM_EXPORTED_FUNCTION void
|
462
|
+
pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end) {
|
463
|
+
// Ignore unary +. Unary + is parsed differently and will not end up here.
|
464
|
+
// Instead, it will modify the parsed integer later.
|
465
|
+
if (*start == '+') start++;
|
466
|
+
|
467
|
+
// Determine the multiplier from the base, and skip past any prefixes.
|
468
|
+
uint32_t multiplier = 10;
|
469
|
+
switch (base) {
|
470
|
+
case PM_INTEGER_BASE_BINARY:
|
471
|
+
start += 2; // 0b
|
472
|
+
multiplier = 2;
|
473
|
+
break;
|
474
|
+
case PM_INTEGER_BASE_OCTAL:
|
475
|
+
start++; // 0
|
476
|
+
if (*start == '_' || *start == 'o' || *start == 'O') start++; // o
|
477
|
+
multiplier = 8;
|
478
|
+
break;
|
479
|
+
case PM_INTEGER_BASE_DECIMAL:
|
480
|
+
if (*start == '0' && (end - start) > 1) start += 2; // 0d
|
481
|
+
break;
|
482
|
+
case PM_INTEGER_BASE_HEXADECIMAL:
|
483
|
+
start += 2; // 0x
|
484
|
+
multiplier = 16;
|
485
|
+
break;
|
486
|
+
case PM_INTEGER_BASE_UNKNOWN:
|
487
|
+
if (*start == '0' && (end - start) > 1) {
|
488
|
+
switch (start[1]) {
|
489
|
+
case '_': start += 2; multiplier = 8; break;
|
490
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': start++; multiplier = 8; break;
|
491
|
+
case 'b': case 'B': start += 2; multiplier = 2; break;
|
492
|
+
case 'o': case 'O': start += 2; multiplier = 8; break;
|
493
|
+
case 'd': case 'D': start += 2; break;
|
494
|
+
case 'x': case 'X': start += 2; multiplier = 16; break;
|
495
|
+
default: assert(false && "unreachable"); break;
|
496
|
+
}
|
497
|
+
}
|
498
|
+
break;
|
499
|
+
}
|
500
|
+
|
501
|
+
// It's possible that we've consumed everything at this point if there is an
|
502
|
+
// invalid integer. If this is the case, we'll just return 0.
|
503
|
+
if (start >= end) return;
|
504
|
+
|
505
|
+
const uint8_t *cursor = start;
|
506
|
+
uint64_t value = (uint64_t) pm_integer_parse_digit(*cursor++);
|
507
|
+
|
508
|
+
for (; cursor < end; cursor++) {
|
509
|
+
if (*cursor == '_') continue;
|
510
|
+
value = value * multiplier + (uint64_t) pm_integer_parse_digit(*cursor);
|
511
|
+
|
512
|
+
if (value > UINT32_MAX) {
|
513
|
+
// If the integer is too large to fit into a single uint32_t, then
|
514
|
+
// we'll parse it as a big integer.
|
515
|
+
pm_integer_parse_big(integer, multiplier, start, end);
|
516
|
+
return;
|
517
|
+
}
|
518
|
+
}
|
519
|
+
|
520
|
+
integer->value = (uint32_t) value;
|
521
|
+
}
|
522
|
+
|
523
|
+
/**
|
524
|
+
* Return the memory size of the integer.
|
525
|
+
*/
|
526
|
+
size_t
|
527
|
+
pm_integer_memsize(const pm_integer_t *integer) {
|
528
|
+
return sizeof(pm_integer_t) + integer->length * sizeof(uint32_t);
|
529
|
+
}
|
530
|
+
|
531
|
+
/**
|
532
|
+
* Compare two integers. This function returns -1 if the left integer is less
|
533
|
+
* than the right integer, 0 if they are equal, and 1 if the left integer is
|
534
|
+
* greater than the right integer.
|
535
|
+
*/
|
536
|
+
int
|
537
|
+
pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right) {
|
538
|
+
if (left->negative != right->negative) return left->negative ? -1 : 1;
|
539
|
+
int negative = left->negative ? -1 : 1;
|
540
|
+
|
541
|
+
if (left->values == NULL && right->values == NULL) {
|
542
|
+
if (left->value < right->value) return -1 * negative;
|
543
|
+
if (left->value > right->value) return 1 * negative;
|
544
|
+
return 0;
|
545
|
+
}
|
546
|
+
|
547
|
+
if (left->values == NULL || left->length < right->length) return -1 * negative;
|
548
|
+
if (right->values == NULL || left->length > right->length) return 1 * negative;
|
549
|
+
|
550
|
+
for (size_t index = 0; index < left->length; index++) {
|
551
|
+
size_t value_index = left->length - index - 1;
|
552
|
+
uint32_t left_value = left->values[value_index];
|
553
|
+
uint32_t right_value = right->values[value_index];
|
554
|
+
|
555
|
+
if (left_value < right_value) return -1 * negative;
|
556
|
+
if (left_value > right_value) return 1 * negative;
|
557
|
+
}
|
558
|
+
|
559
|
+
return 0;
|
560
|
+
}
|
561
|
+
|
562
|
+
/**
|
563
|
+
* Convert an integer to a decimal string.
|
564
|
+
*/
|
565
|
+
PRISM_EXPORTED_FUNCTION void
|
566
|
+
pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) {
|
567
|
+
if (integer->negative) {
|
568
|
+
pm_buffer_append_byte(buffer, '-');
|
569
|
+
}
|
570
|
+
|
571
|
+
// If the integer fits into a single uint32_t, then we can just append the
|
572
|
+
// value directly to the buffer.
|
573
|
+
if (integer->values == NULL) {
|
574
|
+
pm_buffer_append_format(buffer, "%" PRIu32, integer->value);
|
575
|
+
return;
|
576
|
+
}
|
577
|
+
|
578
|
+
// If the integer is two uint32_t values, then we can | them together and
|
579
|
+
// append the result to the buffer.
|
580
|
+
if (integer->length == 2) {
|
581
|
+
const uint64_t value = ((uint64_t) integer->values[0]) | ((uint64_t) integer->values[1] << 32);
|
582
|
+
pm_buffer_append_format(buffer, "%" PRIu64, value);
|
583
|
+
return;
|
584
|
+
}
|
585
|
+
|
586
|
+
// Otherwise, first we'll convert the base from 1<<32 to 10**9.
|
587
|
+
pm_integer_t converted;
|
588
|
+
pm_integer_convert_base(&converted, integer, (uint64_t) 1 << 32, 1000000000);
|
589
|
+
|
590
|
+
if (converted.values == NULL) {
|
591
|
+
pm_buffer_append_format(buffer, "%" PRIu32, converted.value);
|
592
|
+
pm_integer_free(&converted);
|
593
|
+
return;
|
594
|
+
}
|
595
|
+
|
596
|
+
// Allocate a buffer that we'll copy the decimal digits into.
|
597
|
+
size_t digits_length = converted.length * 9;
|
598
|
+
char *digits = xcalloc(digits_length, sizeof(char));
|
599
|
+
if (digits == NULL) return;
|
600
|
+
|
601
|
+
// Pack bigdecimal to digits.
|
602
|
+
for (size_t value_index = 0; value_index < converted.length; value_index++) {
|
603
|
+
uint32_t value = converted.values[value_index];
|
604
|
+
|
605
|
+
for (size_t digit_index = 0; digit_index < 9; digit_index++) {
|
606
|
+
digits[digits_length - 9 * value_index - digit_index - 1] = (char) ('0' + value % 10);
|
607
|
+
value /= 10;
|
608
|
+
}
|
609
|
+
}
|
610
|
+
|
611
|
+
size_t start_offset = 0;
|
612
|
+
while (start_offset < digits_length - 1 && digits[start_offset] == '0') start_offset++;
|
613
|
+
|
614
|
+
// Finally, append the string to the buffer and free the digits.
|
615
|
+
pm_buffer_append_string(buffer, digits + start_offset, digits_length - start_offset);
|
616
|
+
xfree(digits);
|
617
|
+
pm_integer_free(&converted);
|
618
|
+
}
|
619
|
+
|
620
|
+
/**
|
621
|
+
* Free the internal memory of an integer. This memory will only be allocated if
|
622
|
+
* the integer exceeds the size of a single uint32_t.
|
623
|
+
*/
|
624
|
+
PRISM_EXPORTED_FUNCTION void
|
625
|
+
pm_integer_free(pm_integer_t *integer) {
|
626
|
+
if (integer->values) {
|
627
|
+
xfree(integer->values);
|
628
|
+
}
|
629
|
+
}
|
data/src/util/pm_list.c
CHANGED
data/src/util/pm_newline_list.c
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
*/
|
7
7
|
bool
|
8
8
|
pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
|
9
|
-
list->offsets = (size_t *)
|
9
|
+
list->offsets = (size_t *) xcalloc(capacity, sizeof(size_t));
|
10
10
|
if (list->offsets == NULL) return false;
|
11
11
|
|
12
12
|
list->start = start;
|
@@ -19,6 +19,14 @@ pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capac
|
|
19
19
|
return true;
|
20
20
|
}
|
21
21
|
|
22
|
+
/**
|
23
|
+
* Clear out the newlines that have been appended to the list.
|
24
|
+
*/
|
25
|
+
void
|
26
|
+
pm_newline_list_clear(pm_newline_list_t *list) {
|
27
|
+
list->size = 1;
|
28
|
+
}
|
29
|
+
|
22
30
|
/**
|
23
31
|
* Append a new offset to the newline list. Returns true if the reallocation of
|
24
32
|
* the offsets succeeds (if one was necessary), otherwise returns false.
|
@@ -29,10 +37,11 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
|
|
29
37
|
size_t *original_offsets = list->offsets;
|
30
38
|
|
31
39
|
list->capacity = (list->capacity * 3) / 2;
|
32
|
-
list->offsets = (size_t *)
|
33
|
-
memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
|
34
|
-
free(original_offsets);
|
40
|
+
list->offsets = (size_t *) xcalloc(list->capacity, sizeof(size_t));
|
35
41
|
if (list->offsets == NULL) return false;
|
42
|
+
|
43
|
+
memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
|
44
|
+
xfree(original_offsets);
|
36
45
|
}
|
37
46
|
|
38
47
|
assert(*cursor == '\n');
|
@@ -83,5 +92,5 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
|
|
83
92
|
*/
|
84
93
|
void
|
85
94
|
pm_newline_list_free(pm_newline_list_t *list) {
|
86
|
-
|
95
|
+
xfree(list->offsets);
|
87
96
|
}
|