prism 0.24.0 → 0.29.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (126) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +132 -1
  4. data/Makefile +25 -18
  5. data/README.md +45 -6
  6. data/config.yml +828 -25
  7. data/docs/build_system.md +31 -0
  8. data/docs/configuration.md +4 -0
  9. data/docs/cruby_compilation.md +1 -1
  10. data/docs/parser_translation.md +14 -9
  11. data/docs/releasing.md +7 -9
  12. data/docs/ripper_translation.md +50 -0
  13. data/docs/ruby_api.md +1 -0
  14. data/docs/serialization.md +26 -5
  15. data/ext/prism/api_node.c +1037 -936
  16. data/ext/prism/api_pack.c +9 -0
  17. data/ext/prism/extconf.rb +62 -18
  18. data/ext/prism/extension.c +351 -71
  19. data/ext/prism/extension.h +5 -4
  20. data/include/prism/ast.h +539 -101
  21. data/include/prism/defines.h +106 -2
  22. data/include/prism/diagnostic.h +168 -74
  23. data/include/prism/encoding.h +22 -4
  24. data/include/prism/node.h +93 -0
  25. data/include/prism/options.h +84 -9
  26. data/include/prism/pack.h +11 -0
  27. data/include/prism/parser.h +213 -54
  28. data/include/prism/prettyprint.h +8 -0
  29. data/include/prism/static_literals.h +120 -0
  30. data/include/prism/util/pm_buffer.h +65 -2
  31. data/include/prism/util/pm_constant_pool.h +18 -1
  32. data/include/prism/util/pm_integer.h +119 -0
  33. data/include/prism/util/pm_list.h +1 -1
  34. data/include/prism/util/pm_newline_list.h +8 -0
  35. data/include/prism/util/pm_string.h +26 -2
  36. data/include/prism/version.h +2 -2
  37. data/include/prism.h +59 -1
  38. data/lib/prism/compiler.rb +8 -1
  39. data/lib/prism/debug.rb +46 -3
  40. data/lib/prism/desugar_compiler.rb +5 -3
  41. data/lib/prism/dispatcher.rb +29 -0
  42. data/lib/prism/dot_visitor.rb +141 -54
  43. data/lib/prism/dsl.rb +48 -36
  44. data/lib/prism/ffi.rb +82 -17
  45. data/lib/prism/inspect_visitor.rb +2156 -0
  46. data/lib/prism/lex_compat.rb +34 -15
  47. data/lib/prism/mutation_compiler.rb +13 -2
  48. data/lib/prism/node.rb +4453 -4459
  49. data/lib/prism/node_ext.rb +249 -30
  50. data/lib/prism/pack.rb +4 -0
  51. data/lib/prism/parse_result/comments.rb +35 -18
  52. data/lib/prism/parse_result/newlines.rb +2 -2
  53. data/lib/prism/parse_result.rb +218 -43
  54. data/lib/prism/pattern.rb +28 -10
  55. data/lib/prism/polyfill/byteindex.rb +13 -0
  56. data/lib/prism/polyfill/unpack1.rb +14 -0
  57. data/lib/prism/reflection.rb +411 -0
  58. data/lib/prism/serialize.rb +480 -112
  59. data/lib/prism/translation/parser/compiler.rb +376 -88
  60. data/lib/prism/translation/parser/lexer.rb +103 -22
  61. data/lib/prism/translation/parser/rubocop.rb +41 -13
  62. data/lib/prism/translation/parser.rb +123 -11
  63. data/lib/prism/translation/parser33.rb +1 -1
  64. data/lib/prism/translation/parser34.rb +1 -1
  65. data/lib/prism/translation/ripper/sexp.rb +125 -0
  66. data/lib/prism/translation/ripper/shim.rb +5 -0
  67. data/lib/prism/translation/ripper.rb +3216 -462
  68. data/lib/prism/translation/ruby_parser.rb +111 -56
  69. data/lib/prism/translation.rb +3 -1
  70. data/lib/prism/visitor.rb +10 -0
  71. data/lib/prism.rb +12 -20
  72. data/prism.gemspec +46 -14
  73. data/rbi/prism/compiler.rbi +12 -0
  74. data/rbi/prism/inspect_visitor.rbi +12 -0
  75. data/rbi/prism/node.rbi +8712 -0
  76. data/rbi/prism/node_ext.rbi +107 -0
  77. data/rbi/prism/parse_result.rbi +358 -0
  78. data/rbi/prism/reflection.rbi +58 -0
  79. data/rbi/prism/translation/parser.rbi +11 -0
  80. data/rbi/prism/translation/parser33.rbi +6 -0
  81. data/rbi/prism/translation/parser34.rbi +6 -0
  82. data/rbi/prism/translation/ripper.rbi +15 -0
  83. data/rbi/prism/visitor.rbi +470 -0
  84. data/rbi/prism.rbi +38 -7748
  85. data/sig/prism/compiler.rbs +9 -0
  86. data/sig/prism/dispatcher.rbs +16 -0
  87. data/sig/prism/dot_visitor.rbs +6 -0
  88. data/sig/prism/dsl.rbs +462 -0
  89. data/sig/prism/inspect_visitor.rbs +22 -0
  90. data/sig/prism/lex_compat.rbs +10 -0
  91. data/sig/prism/mutation_compiler.rbs +158 -0
  92. data/sig/prism/node.rbs +3558 -0
  93. data/sig/prism/node_ext.rbs +82 -0
  94. data/sig/prism/pack.rbs +43 -0
  95. data/sig/prism/parse_result.rbs +160 -0
  96. data/sig/prism/pattern.rbs +13 -0
  97. data/sig/prism/reflection.rbs +50 -0
  98. data/sig/prism/serialize.rbs +6 -0
  99. data/sig/prism/visitor.rbs +168 -0
  100. data/sig/prism.rbs +188 -4767
  101. data/src/diagnostic.c +636 -230
  102. data/src/encoding.c +211 -108
  103. data/src/node.c +7555 -451
  104. data/src/options.c +66 -31
  105. data/src/pack.c +33 -17
  106. data/src/prettyprint.c +1383 -1431
  107. data/src/prism.c +4734 -1310
  108. data/src/regexp.c +17 -2
  109. data/src/serialize.c +68 -46
  110. data/src/static_literals.c +638 -0
  111. data/src/token_type.c +10 -9
  112. data/src/util/pm_buffer.c +147 -20
  113. data/src/util/pm_char.c +4 -4
  114. data/src/util/pm_constant_pool.c +35 -11
  115. data/src/util/pm_integer.c +642 -0
  116. data/src/util/pm_list.c +1 -1
  117. data/src/util/pm_newline_list.c +14 -5
  118. data/src/util/pm_string.c +134 -5
  119. data/src/util/pm_string_list.c +2 -2
  120. metadata +41 -9
  121. data/docs/ripper.md +0 -36
  122. data/include/prism/util/pm_state_stack.h +0 -42
  123. data/lib/prism/node_inspector.rb +0 -68
  124. data/rbi/prism_static.rbi +0 -207
  125. data/sig/prism_static.rbs +0 -201
  126. data/src/util/pm_state_stack.c +0 -25
@@ -0,0 +1,638 @@
1
+ #include "prism/static_literals.h"
2
+
3
+ /**
4
+ * A small struct used for passing around a subset of the information that is
5
+ * stored on the parser. We use this to avoid having static literals explicitly
6
+ * depend on the parser struct.
7
+ */
8
+ typedef struct {
9
+ /** The list of newline offsets to use to calculate line numbers. */
10
+ const pm_newline_list_t *newline_list;
11
+
12
+ /** The line number that the parser starts on. */
13
+ int32_t start_line;
14
+
15
+ /** The name of the encoding that the parser is using. */
16
+ const char *encoding_name;
17
+ } pm_static_literals_metadata_t;
18
+
19
+ static inline uint32_t
20
+ murmur_scramble(uint32_t value) {
21
+ value *= 0xcc9e2d51;
22
+ value = (value << 15) | (value >> 17);
23
+ value *= 0x1b873593;
24
+ return value;
25
+ }
26
+
27
+ /**
28
+ * Murmur hash (https://en.wikipedia.org/wiki/MurmurHash) is a non-cryptographic
29
+ * general-purpose hash function. It is fast, which is what we care about in
30
+ * this case.
31
+ */
32
+ static uint32_t
33
+ murmur_hash(const uint8_t *key, size_t length) {
34
+ uint32_t hash = 0x9747b28c;
35
+ uint32_t segment;
36
+
37
+ for (size_t index = length >> 2; index; index--) {
38
+ memcpy(&segment, key, sizeof(uint32_t));
39
+ key += sizeof(uint32_t);
40
+ hash ^= murmur_scramble(segment);
41
+ hash = (hash << 13) | (hash >> 19);
42
+ hash = hash * 5 + 0xe6546b64;
43
+ }
44
+
45
+ segment = 0;
46
+ for (size_t index = length & 3; index; index--) {
47
+ segment <<= 8;
48
+ segment |= key[index - 1];
49
+ }
50
+
51
+ hash ^= murmur_scramble(segment);
52
+ hash ^= (uint32_t) length;
53
+ hash ^= hash >> 16;
54
+ hash *= 0x85ebca6b;
55
+ hash ^= hash >> 13;
56
+ hash *= 0xc2b2ae35;
57
+ hash ^= hash >> 16;
58
+ return hash;
59
+ }
60
+
61
+ /**
62
+ * Return the hash of the given node. It is important that nodes that have
63
+ * equivalent static literal values have the same hash. This is because we use
64
+ * these hashes to look for duplicates.
65
+ */
66
+ static uint32_t
67
+ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
68
+ switch (PM_NODE_TYPE(node)) {
69
+ case PM_INTEGER_NODE: {
70
+ // Integers hash their value.
71
+ const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
72
+ uint32_t hash;
73
+ if (integer->values) {
74
+ hash = murmur_hash((const uint8_t *) integer->values, sizeof(uint32_t) * integer->length);
75
+ } else {
76
+ hash = murmur_hash((const uint8_t *) &integer->value, sizeof(uint32_t));
77
+ }
78
+
79
+ if (integer->negative) {
80
+ hash ^= murmur_scramble((uint32_t) 1);
81
+ }
82
+
83
+ return hash;
84
+ }
85
+ case PM_SOURCE_LINE_NODE: {
86
+ // Source lines hash their line number.
87
+ const pm_line_column_t line_column = pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line);
88
+ const int32_t *value = &line_column.line;
89
+ return murmur_hash((const uint8_t *) value, sizeof(int32_t));
90
+ }
91
+ case PM_FLOAT_NODE: {
92
+ // Floats hash their value.
93
+ const double *value = &((const pm_float_node_t *) node)->value;
94
+ return murmur_hash((const uint8_t *) value, sizeof(double));
95
+ }
96
+ case PM_RATIONAL_NODE: {
97
+ // Rationals hash their numeric value. Because their numeric value
98
+ // is stored as a subnode, we hash that node and then mix in the
99
+ // fact that this is a rational node.
100
+ const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;
101
+ return node_hash(metadata, numeric) ^ murmur_scramble((uint32_t) node->type);
102
+ }
103
+ case PM_IMAGINARY_NODE: {
104
+ // Imaginaries hash their numeric value. Because their numeric value
105
+ // is stored as a subnode, we hash that node and then mix in the
106
+ // fact that this is an imaginary node.
107
+ const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
108
+ return node_hash(metadata, numeric) ^ murmur_scramble((uint32_t) node->type);
109
+ }
110
+ case PM_STRING_NODE: {
111
+ // Strings hash their value and mix in their flags so that different
112
+ // encodings are not considered equal.
113
+ const pm_string_t *value = &((const pm_string_node_t *) node)->unescaped;
114
+
115
+ pm_node_flags_t flags = node->flags;
116
+ flags &= (PM_STRING_FLAGS_FORCED_BINARY_ENCODING | PM_STRING_FLAGS_FORCED_UTF8_ENCODING);
117
+
118
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) flags);
119
+ }
120
+ case PM_SOURCE_FILE_NODE: {
121
+ // Source files hash their value and mix in their flags so that
122
+ // different encodings are not considered equal.
123
+ const pm_string_t *value = &((const pm_source_file_node_t *) node)->filepath;
124
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t));
125
+ }
126
+ case PM_REGULAR_EXPRESSION_NODE: {
127
+ // Regular expressions hash their value and mix in their flags so
128
+ // that different encodings are not considered equal.
129
+ const pm_string_t *value = &((const pm_regular_expression_node_t *) node)->unescaped;
130
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags);
131
+ }
132
+ case PM_SYMBOL_NODE: {
133
+ // Symbols hash their value and mix in their flags so that different
134
+ // encodings are not considered equal.
135
+ const pm_string_t *value = &((const pm_symbol_node_t *) node)->unescaped;
136
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags);
137
+ }
138
+ default:
139
+ assert(false && "unreachable");
140
+ return 0;
141
+ }
142
+ }
143
+
144
+ /**
145
+ * Insert a node into the node hash. It accepts the hash that should hold the
146
+ * new node, the parser that generated the node, the node to insert, and a
147
+ * comparison function. The comparison function is used for collision detection,
148
+ * and must be able to compare all node types that will be stored in this hash.
149
+ */
150
+ static pm_node_t *
151
+ pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *metadata, pm_node_t *node, int (*compare)(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right)) {
152
+ // If we are out of space, we need to resize the hash. This will cause all
153
+ // of the nodes to be rehashed and reinserted into the new hash.
154
+ if (hash->size * 2 >= hash->capacity) {
155
+ // First, allocate space for the new node list.
156
+ uint32_t new_capacity = hash->capacity == 0 ? 4 : hash->capacity * 2;
157
+ pm_node_t **new_nodes = xcalloc(new_capacity, sizeof(pm_node_t *));
158
+ if (new_nodes == NULL) return NULL;
159
+
160
+ // It turns out to be more efficient to mask the hash value than to use
161
+ // the modulo operator. Because our capacities are always powers of two,
162
+ // we can use a bitwise AND to get the same result as the modulo
163
+ // operator.
164
+ uint32_t mask = new_capacity - 1;
165
+
166
+ // Now, rehash all of the nodes into the new list.
167
+ for (uint32_t index = 0; index < hash->capacity; index++) {
168
+ pm_node_t *node = hash->nodes[index];
169
+
170
+ if (node != NULL) {
171
+ uint32_t index = node_hash(metadata, node) & mask;
172
+ new_nodes[index] = node;
173
+ }
174
+ }
175
+
176
+ // Finally, free the old node list and update the hash.
177
+ xfree(hash->nodes);
178
+ hash->nodes = new_nodes;
179
+ hash->capacity = new_capacity;
180
+ }
181
+
182
+ // Now, insert the node into the hash.
183
+ uint32_t mask = hash->capacity - 1;
184
+ uint32_t index = node_hash(metadata, node) & mask;
185
+
186
+ // We use linear probing to resolve collisions. This means that if the
187
+ // current index is occupied, we will move to the next index and try again.
188
+ // We are guaranteed that this will eventually find an empty slot because we
189
+ // resize the hash when it gets too full.
190
+ while (hash->nodes[index] != NULL) {
191
+ if (compare(metadata, hash->nodes[index], node) == 0) break;
192
+ index = (index + 1) & mask;
193
+ }
194
+
195
+ // If the current index is occupied, we need to return the node that was
196
+ // already in the hash. Otherwise, we can just increment the size and insert
197
+ // the new node.
198
+ pm_node_t *result = hash->nodes[index];
199
+ if (result == NULL) hash->size++;
200
+
201
+ hash->nodes[index] = node;
202
+ return result;
203
+ }
204
+
205
+ /**
206
+ * Free the internal memory associated with the given node hash.
207
+ */
208
+ static void
209
+ pm_node_hash_free(pm_node_hash_t *hash) {
210
+ if (hash->capacity > 0) xfree(hash->nodes);
211
+ }
212
+
213
+ /**
214
+ * Compare two values that can be compared with a simple numeric comparison.
215
+ */
216
+ #define PM_NUMERIC_COMPARISON(left, right) ((left < right) ? -1 : (left > right) ? 1 : 0)
217
+
218
+ /**
219
+ * Return the integer value of the given node as an int64_t.
220
+ */
221
+ static int64_t
222
+ pm_int64_value(const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
223
+ switch (PM_NODE_TYPE(node)) {
224
+ case PM_INTEGER_NODE: {
225
+ const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
226
+ if (integer->values) return integer->negative ? INT64_MIN : INT64_MAX;
227
+
228
+ int64_t value = (int64_t) integer->value;
229
+ return integer->negative ? -value : value;
230
+ }
231
+ case PM_SOURCE_LINE_NODE:
232
+ return (int64_t) pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line;
233
+ default:
234
+ assert(false && "unreachable");
235
+ return 0;
236
+ }
237
+ }
238
+
239
+ /**
240
+ * A comparison function for comparing two IntegerNode or SourceLineNode
241
+ * instances.
242
+ */
243
+ static int
244
+ pm_compare_integer_nodes(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
245
+ if (PM_NODE_TYPE_P(left, PM_SOURCE_LINE_NODE) || PM_NODE_TYPE_P(right, PM_SOURCE_LINE_NODE)) {
246
+ int64_t left_value = pm_int64_value(metadata, left);
247
+ int64_t right_value = pm_int64_value(metadata, right);
248
+ return PM_NUMERIC_COMPARISON(left_value, right_value);
249
+ }
250
+
251
+ const pm_integer_t *left_integer = &((const pm_integer_node_t *) left)->value;
252
+ const pm_integer_t *right_integer = &((const pm_integer_node_t *) right)->value;
253
+ return pm_integer_compare(left_integer, right_integer);
254
+ }
255
+
256
+ /**
257
+ * A comparison function for comparing two FloatNode instances.
258
+ */
259
+ static int
260
+ pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
261
+ const double left_value = ((const pm_float_node_t *) left)->value;
262
+ const double right_value = ((const pm_float_node_t *) right)->value;
263
+ return PM_NUMERIC_COMPARISON(left_value, right_value);
264
+ }
265
+
266
+ /**
267
+ * A comparison function for comparing two nodes that have attached numbers.
268
+ */
269
+ static int
270
+ pm_compare_number_nodes(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
271
+ if (PM_NODE_TYPE(left) != PM_NODE_TYPE(right)) {
272
+ return PM_NUMERIC_COMPARISON(PM_NODE_TYPE(left), PM_NODE_TYPE(right));
273
+ }
274
+
275
+ switch (PM_NODE_TYPE(left)) {
276
+ case PM_IMAGINARY_NODE:
277
+ return pm_compare_number_nodes(metadata, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric);
278
+ case PM_RATIONAL_NODE:
279
+ return pm_compare_number_nodes(metadata, ((const pm_rational_node_t *) left)->numeric, ((const pm_rational_node_t *) right)->numeric);
280
+ case PM_INTEGER_NODE:
281
+ return pm_compare_integer_nodes(metadata, left, right);
282
+ case PM_FLOAT_NODE:
283
+ return pm_compare_float_nodes(metadata, left, right);
284
+ default:
285
+ assert(false && "unreachable");
286
+ return 0;
287
+ }
288
+ }
289
+
290
+ /**
291
+ * Return a pointer to the string value of the given node.
292
+ */
293
+ static const pm_string_t *
294
+ pm_string_value(const pm_node_t *node) {
295
+ switch (PM_NODE_TYPE(node)) {
296
+ case PM_STRING_NODE:
297
+ return &((const pm_string_node_t *) node)->unescaped;
298
+ case PM_SOURCE_FILE_NODE:
299
+ return &((const pm_source_file_node_t *) node)->filepath;
300
+ case PM_SYMBOL_NODE:
301
+ return &((const pm_symbol_node_t *) node)->unescaped;
302
+ default:
303
+ assert(false && "unreachable");
304
+ return NULL;
305
+ }
306
+ }
307
+
308
+ /**
309
+ * A comparison function for comparing two nodes that have attached strings.
310
+ */
311
+ static int
312
+ pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
313
+ const pm_string_t *left_string = pm_string_value(left);
314
+ const pm_string_t *right_string = pm_string_value(right);
315
+ return pm_string_compare(left_string, right_string);
316
+ }
317
+
318
+ /**
319
+ * A comparison function for comparing two RegularExpressionNode instances.
320
+ */
321
+ static int
322
+ pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
323
+ const pm_regular_expression_node_t *left_regexp = (const pm_regular_expression_node_t *) left;
324
+ const pm_regular_expression_node_t *right_regexp = (const pm_regular_expression_node_t *) right;
325
+
326
+ int result = pm_string_compare(&left_regexp->unescaped, &right_regexp->unescaped);
327
+ if (result != 0) return result;
328
+
329
+ return PM_NUMERIC_COMPARISON(left_regexp->base.flags, right_regexp->base.flags);
330
+ }
331
+
332
+ #undef PM_NUMERIC_COMPARISON
333
+
334
+ /**
335
+ * Add a node to the set of static literals.
336
+ */
337
+ pm_node_t *
338
+ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node) {
339
+ switch (PM_NODE_TYPE(node)) {
340
+ case PM_INTEGER_NODE:
341
+ case PM_SOURCE_LINE_NODE:
342
+ return pm_node_hash_insert(
343
+ &literals->integer_nodes,
344
+ &(pm_static_literals_metadata_t) {
345
+ .newline_list = newline_list,
346
+ .start_line = start_line,
347
+ .encoding_name = NULL
348
+ },
349
+ node,
350
+ pm_compare_integer_nodes
351
+ );
352
+ case PM_FLOAT_NODE:
353
+ return pm_node_hash_insert(
354
+ &literals->float_nodes,
355
+ &(pm_static_literals_metadata_t) {
356
+ .newline_list = newline_list,
357
+ .start_line = start_line,
358
+ .encoding_name = NULL
359
+ },
360
+ node,
361
+ pm_compare_float_nodes
362
+ );
363
+ case PM_RATIONAL_NODE:
364
+ case PM_IMAGINARY_NODE:
365
+ return pm_node_hash_insert(
366
+ &literals->number_nodes,
367
+ &(pm_static_literals_metadata_t) {
368
+ .newline_list = newline_list,
369
+ .start_line = start_line,
370
+ .encoding_name = NULL
371
+ },
372
+ node,
373
+ pm_compare_number_nodes
374
+ );
375
+ case PM_STRING_NODE:
376
+ case PM_SOURCE_FILE_NODE:
377
+ return pm_node_hash_insert(
378
+ &literals->string_nodes,
379
+ &(pm_static_literals_metadata_t) {
380
+ .newline_list = newline_list,
381
+ .start_line = start_line,
382
+ .encoding_name = NULL
383
+ },
384
+ node,
385
+ pm_compare_string_nodes
386
+ );
387
+ case PM_REGULAR_EXPRESSION_NODE:
388
+ return pm_node_hash_insert(
389
+ &literals->regexp_nodes,
390
+ &(pm_static_literals_metadata_t) {
391
+ .newline_list = newline_list,
392
+ .start_line = start_line,
393
+ .encoding_name = NULL
394
+ },
395
+ node,
396
+ pm_compare_regular_expression_nodes
397
+ );
398
+ case PM_SYMBOL_NODE:
399
+ return pm_node_hash_insert(
400
+ &literals->symbol_nodes,
401
+ &(pm_static_literals_metadata_t) {
402
+ .newline_list = newline_list,
403
+ .start_line = start_line,
404
+ .encoding_name = NULL
405
+ },
406
+ node,
407
+ pm_compare_string_nodes
408
+ );
409
+ case PM_TRUE_NODE: {
410
+ pm_node_t *duplicated = literals->true_node;
411
+ literals->true_node = node;
412
+ return duplicated;
413
+ }
414
+ case PM_FALSE_NODE: {
415
+ pm_node_t *duplicated = literals->false_node;
416
+ literals->false_node = node;
417
+ return duplicated;
418
+ }
419
+ case PM_NIL_NODE: {
420
+ pm_node_t *duplicated = literals->nil_node;
421
+ literals->nil_node = node;
422
+ return duplicated;
423
+ }
424
+ case PM_SOURCE_ENCODING_NODE: {
425
+ pm_node_t *duplicated = literals->source_encoding_node;
426
+ literals->source_encoding_node = node;
427
+ return duplicated;
428
+ }
429
+ default:
430
+ return NULL;
431
+ }
432
+ }
433
+
434
+ /**
435
+ * Free the internal memory associated with the given static literals set.
436
+ */
437
+ void
438
+ pm_static_literals_free(pm_static_literals_t *literals) {
439
+ pm_node_hash_free(&literals->integer_nodes);
440
+ pm_node_hash_free(&literals->float_nodes);
441
+ pm_node_hash_free(&literals->number_nodes);
442
+ pm_node_hash_free(&literals->string_nodes);
443
+ pm_node_hash_free(&literals->regexp_nodes);
444
+ pm_node_hash_free(&literals->symbol_nodes);
445
+ }
446
+
447
+ /**
448
+ * A helper to determine if the given node is a static literal that is positive.
449
+ * This is used for formatting imaginary nodes.
450
+ */
451
+ static bool
452
+ pm_static_literal_positive_p(const pm_node_t *node) {
453
+ switch (PM_NODE_TYPE(node)) {
454
+ case PM_FLOAT_NODE:
455
+ return ((const pm_float_node_t *) node)->value > 0;
456
+ case PM_INTEGER_NODE:
457
+ return !((const pm_integer_node_t *) node)->value.negative;
458
+ case PM_RATIONAL_NODE:
459
+ return pm_static_literal_positive_p(((const pm_rational_node_t *) node)->numeric);
460
+ case PM_IMAGINARY_NODE:
461
+ return pm_static_literal_positive_p(((const pm_imaginary_node_t *) node)->numeric);
462
+ default:
463
+ assert(false && "unreachable");
464
+ return false;
465
+ }
466
+ }
467
+
468
+ /**
469
+ * Inspect a rational node that wraps a float node. This is going to be a
470
+ * poor-man's version of the Ruby `Rational#to_s` method, because we're not
471
+ * going to try to reduce the rational by finding the GCD. We'll leave that for
472
+ * a future improvement.
473
+ */
474
+ static void
475
+ pm_rational_inspect(pm_buffer_t *buffer, pm_rational_node_t *node) {
476
+ const uint8_t *start = node->base.location.start;
477
+ const uint8_t *end = node->base.location.end - 1; // r
478
+
479
+ while (start < end && *start == '0') start++; // 0.1 -> .1
480
+ while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
481
+ size_t length = (size_t) (end - start);
482
+
483
+ const uint8_t *point = memchr(start, '.', length);
484
+ assert(point && "should have a decimal point");
485
+
486
+ uint8_t *digits = malloc(length - 1);
487
+ if (digits == NULL) return;
488
+
489
+ memcpy(digits, start, (unsigned long) (point - start));
490
+ memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
491
+
492
+ pm_integer_t numerator = { 0 };
493
+ pm_integer_parse(&numerator, PM_INTEGER_BASE_DECIMAL, digits, digits + length - 1);
494
+
495
+ pm_buffer_append_byte(buffer, '(');
496
+ pm_integer_string(buffer, &numerator);
497
+ pm_buffer_append_string(buffer, "/1", 2);
498
+ for (size_t index = 0; index < (size_t) (end - point - 1); index++) pm_buffer_append_byte(buffer, '0');
499
+ pm_buffer_append_byte(buffer, ')');
500
+
501
+ pm_integer_free(&numerator);
502
+ free(digits);
503
+ }
504
+
505
+ /**
506
+ * Create a string-based representation of the given static literal.
507
+ */
508
+ static inline void
509
+ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
510
+ switch (PM_NODE_TYPE(node)) {
511
+ case PM_FALSE_NODE:
512
+ pm_buffer_append_string(buffer, "false", 5);
513
+ break;
514
+ case PM_FLOAT_NODE: {
515
+ const double value = ((const pm_float_node_t *) node)->value;
516
+
517
+ if (isinf(value)) {
518
+ if (*node->location.start == '-') {
519
+ pm_buffer_append_byte(buffer, '-');
520
+ }
521
+ pm_buffer_append_string(buffer, "Infinity", 8);
522
+ } else if (value == 0.0) {
523
+ if (*node->location.start == '-') {
524
+ pm_buffer_append_byte(buffer, '-');
525
+ }
526
+ pm_buffer_append_string(buffer, "0.0", 3);
527
+ } else {
528
+ pm_buffer_append_format(buffer, "%g", value);
529
+
530
+ // %g will not insert a .0 for 1e100 (we'll get back 1e+100). So
531
+ // we check for the decimal point and add it in here if it's not
532
+ // present.
533
+ if (pm_buffer_index(buffer, '.') == SIZE_MAX) {
534
+ size_t exponent_index = pm_buffer_index(buffer, 'e');
535
+ size_t index = exponent_index == SIZE_MAX ? pm_buffer_length(buffer) : exponent_index;
536
+ pm_buffer_insert(buffer, index, ".0", 2);
537
+ }
538
+ }
539
+
540
+ break;
541
+ }
542
+ case PM_IMAGINARY_NODE: {
543
+ const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
544
+ pm_buffer_append_string(buffer, "(0", 2);
545
+ if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+');
546
+ pm_static_literal_inspect_node(buffer, metadata, numeric);
547
+ if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) pm_buffer_append_byte(buffer, '*');
548
+ pm_buffer_append_string(buffer, "i)", 2);
549
+ break;
550
+ }
551
+ case PM_INTEGER_NODE:
552
+ pm_integer_string(buffer, &((const pm_integer_node_t *) node)->value);
553
+ break;
554
+ case PM_NIL_NODE:
555
+ pm_buffer_append_string(buffer, "nil", 3);
556
+ break;
557
+ case PM_RATIONAL_NODE: {
558
+ const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;
559
+
560
+ switch (PM_NODE_TYPE(numeric)) {
561
+ case PM_INTEGER_NODE:
562
+ pm_buffer_append_byte(buffer, '(');
563
+ pm_static_literal_inspect_node(buffer, metadata, numeric);
564
+ pm_buffer_append_string(buffer, "/1)", 3);
565
+ break;
566
+ case PM_FLOAT_NODE:
567
+ pm_rational_inspect(buffer, (pm_rational_node_t *) node);
568
+ break;
569
+ default:
570
+ assert(false && "unreachable");
571
+ break;
572
+ }
573
+
574
+ break;
575
+ }
576
+ case PM_REGULAR_EXPRESSION_NODE: {
577
+ const pm_string_t *unescaped = &((const pm_regular_expression_node_t *) node)->unescaped;
578
+ pm_buffer_append_byte(buffer, '/');
579
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
580
+ pm_buffer_append_byte(buffer, '/');
581
+
582
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) pm_buffer_append_string(buffer, "m", 1);
583
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) pm_buffer_append_string(buffer, "i", 1);
584
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) pm_buffer_append_string(buffer, "x", 1);
585
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) pm_buffer_append_string(buffer, "n", 1);
586
+
587
+ break;
588
+ }
589
+ case PM_SOURCE_ENCODING_NODE:
590
+ pm_buffer_append_format(buffer, "#<Encoding:%s>", metadata->encoding_name);
591
+ break;
592
+ case PM_SOURCE_FILE_NODE: {
593
+ const pm_string_t *filepath = &((const pm_source_file_node_t *) node)->filepath;
594
+ pm_buffer_append_byte(buffer, '"');
595
+ pm_buffer_append_source(buffer, pm_string_source(filepath), pm_string_length(filepath), PM_BUFFER_ESCAPING_RUBY);
596
+ pm_buffer_append_byte(buffer, '"');
597
+ break;
598
+ }
599
+ case PM_SOURCE_LINE_NODE:
600
+ pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line);
601
+ break;
602
+ case PM_STRING_NODE: {
603
+ const pm_string_t *unescaped = &((const pm_string_node_t *) node)->unescaped;
604
+ pm_buffer_append_byte(buffer, '"');
605
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
606
+ pm_buffer_append_byte(buffer, '"');
607
+ break;
608
+ }
609
+ case PM_SYMBOL_NODE: {
610
+ const pm_string_t *unescaped = &((const pm_symbol_node_t *) node)->unescaped;
611
+ pm_buffer_append_byte(buffer, ':');
612
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
613
+ break;
614
+ }
615
+ case PM_TRUE_NODE:
616
+ pm_buffer_append_string(buffer, "true", 4);
617
+ break;
618
+ default:
619
+ assert(false && "unreachable");
620
+ break;
621
+ }
622
+ }
623
+
624
+ /**
625
+ * Create a string-based representation of the given static literal.
626
+ */
627
+ PRISM_EXPORTED_FUNCTION void
628
+ pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node) {
629
+ pm_static_literal_inspect_node(
630
+ buffer,
631
+ &(pm_static_literals_metadata_t) {
632
+ .newline_list = newline_list,
633
+ .start_line = start_line,
634
+ .encoding_name = encoding_name
635
+ },
636
+ node
637
+ );
638
+ }