prism 0.23.0 → 0.25.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +65 -1
  4. data/Makefile +5 -2
  5. data/README.md +45 -6
  6. data/config.yml +499 -4
  7. data/docs/build_system.md +31 -0
  8. data/docs/configuration.md +2 -0
  9. data/docs/cruby_compilation.md +1 -1
  10. data/docs/parser_translation.md +14 -9
  11. data/docs/releasing.md +3 -3
  12. data/docs/ripper_translation.md +50 -0
  13. data/docs/ruby_api.md +1 -0
  14. data/docs/serialization.md +26 -5
  15. data/ext/prism/api_node.c +2342 -1801
  16. data/ext/prism/api_pack.c +9 -0
  17. data/ext/prism/extconf.rb +27 -11
  18. data/ext/prism/extension.c +313 -66
  19. data/ext/prism/extension.h +5 -4
  20. data/include/prism/ast.h +213 -64
  21. data/include/prism/defines.h +106 -2
  22. data/include/prism/diagnostic.h +134 -71
  23. data/include/prism/encoding.h +22 -4
  24. data/include/prism/node.h +93 -0
  25. data/include/prism/options.h +82 -7
  26. data/include/prism/pack.h +11 -0
  27. data/include/prism/parser.h +198 -53
  28. data/include/prism/prettyprint.h +8 -0
  29. data/include/prism/static_literals.h +118 -0
  30. data/include/prism/util/pm_buffer.h +65 -2
  31. data/include/prism/util/pm_constant_pool.h +18 -1
  32. data/include/prism/util/pm_integer.h +119 -0
  33. data/include/prism/util/pm_list.h +1 -1
  34. data/include/prism/util/pm_newline_list.h +12 -3
  35. data/include/prism/util/pm_string.h +26 -2
  36. data/include/prism/version.h +2 -2
  37. data/include/prism.h +59 -1
  38. data/lib/prism/compiler.rb +8 -1
  39. data/lib/prism/debug.rb +46 -3
  40. data/lib/prism/desugar_compiler.rb +225 -80
  41. data/lib/prism/dispatcher.rb +29 -0
  42. data/lib/prism/dot_visitor.rb +87 -16
  43. data/lib/prism/dsl.rb +315 -300
  44. data/lib/prism/ffi.rb +165 -84
  45. data/lib/prism/lex_compat.rb +17 -15
  46. data/lib/prism/mutation_compiler.rb +11 -0
  47. data/lib/prism/node.rb +4857 -3750
  48. data/lib/prism/node_ext.rb +77 -29
  49. data/lib/prism/pack.rb +4 -0
  50. data/lib/prism/parse_result/comments.rb +34 -17
  51. data/lib/prism/parse_result/newlines.rb +3 -1
  52. data/lib/prism/parse_result.rb +88 -34
  53. data/lib/prism/pattern.rb +16 -4
  54. data/lib/prism/polyfill/string.rb +12 -0
  55. data/lib/prism/serialize.rb +960 -327
  56. data/lib/prism/translation/parser/compiler.rb +152 -50
  57. data/lib/prism/translation/parser/lexer.rb +103 -22
  58. data/lib/prism/translation/parser/rubocop.rb +47 -11
  59. data/lib/prism/translation/parser.rb +134 -10
  60. data/lib/prism/translation/parser33.rb +12 -0
  61. data/lib/prism/translation/parser34.rb +12 -0
  62. data/lib/prism/translation/ripper/sexp.rb +125 -0
  63. data/lib/prism/translation/ripper/shim.rb +5 -0
  64. data/lib/prism/translation/ripper.rb +3248 -379
  65. data/lib/prism/translation/ruby_parser.rb +35 -18
  66. data/lib/prism/translation.rb +3 -1
  67. data/lib/prism/visitor.rb +10 -0
  68. data/lib/prism.rb +8 -2
  69. data/prism.gemspec +35 -4
  70. data/rbi/prism/compiler.rbi +14 -0
  71. data/rbi/prism/desugar_compiler.rbi +5 -0
  72. data/rbi/prism/mutation_compiler.rbi +5 -0
  73. data/rbi/prism/node.rbi +8221 -0
  74. data/rbi/prism/node_ext.rbi +102 -0
  75. data/rbi/prism/parse_result.rbi +304 -0
  76. data/rbi/prism/translation/parser/compiler.rbi +13 -0
  77. data/rbi/prism/translation/ripper/ripper_compiler.rbi +5 -0
  78. data/rbi/prism/translation/ripper.rbi +25 -0
  79. data/rbi/prism/translation/ruby_parser.rbi +11 -0
  80. data/rbi/prism/visitor.rbi +470 -0
  81. data/rbi/prism.rbi +39 -7749
  82. data/sig/prism/compiler.rbs +9 -0
  83. data/sig/prism/dispatcher.rbs +16 -0
  84. data/sig/prism/dot_visitor.rbs +6 -0
  85. data/sig/prism/dsl.rbs +462 -0
  86. data/sig/prism/mutation_compiler.rbs +158 -0
  87. data/sig/prism/node.rbs +3529 -0
  88. data/sig/prism/node_ext.rbs +78 -0
  89. data/sig/prism/pack.rbs +43 -0
  90. data/sig/prism/parse_result.rbs +127 -0
  91. data/sig/prism/pattern.rbs +13 -0
  92. data/sig/prism/serialize.rbs +7 -0
  93. data/sig/prism/visitor.rbs +168 -0
  94. data/sig/prism.rbs +188 -4767
  95. data/src/diagnostic.c +575 -230
  96. data/src/encoding.c +211 -108
  97. data/src/node.c +7526 -447
  98. data/src/options.c +36 -12
  99. data/src/pack.c +33 -17
  100. data/src/prettyprint.c +1297 -1388
  101. data/src/prism.c +3665 -1121
  102. data/src/regexp.c +17 -2
  103. data/src/serialize.c +47 -28
  104. data/src/static_literals.c +552 -0
  105. data/src/token_type.c +1 -0
  106. data/src/util/pm_buffer.c +147 -20
  107. data/src/util/pm_char.c +4 -4
  108. data/src/util/pm_constant_pool.c +35 -11
  109. data/src/util/pm_integer.c +629 -0
  110. data/src/util/pm_list.c +1 -1
  111. data/src/util/pm_newline_list.c +20 -8
  112. data/src/util/pm_string.c +134 -5
  113. data/src/util/pm_string_list.c +2 -2
  114. metadata +37 -6
  115. data/docs/ripper.md +0 -36
  116. data/rbi/prism_static.rbi +0 -207
  117. data/sig/prism_static.rbs +0 -201
@@ -0,0 +1,552 @@
1
+ #include "prism/static_literals.h"
2
+
3
+ static inline uint32_t
4
+ murmur_scramble(uint32_t value) {
5
+ value *= 0xcc9e2d51;
6
+ value = (value << 15) | (value >> 17);
7
+ value *= 0x1b873593;
8
+ return value;
9
+ }
10
+
11
+ /**
12
+ * Murmur hash (https://en.wikipedia.org/wiki/MurmurHash) is a non-cryptographic
13
+ * general-purpose hash function. It is fast, which is what we care about in
14
+ * this case.
15
+ */
16
+ static uint32_t
17
+ murmur_hash(const uint8_t *key, size_t length) {
18
+ uint32_t hash = 0x9747b28c;
19
+ uint32_t segment;
20
+
21
+ for (size_t index = length >> 2; index; index--) {
22
+ memcpy(&segment, key, sizeof(uint32_t));
23
+ key += sizeof(uint32_t);
24
+ hash ^= murmur_scramble(segment);
25
+ hash = (hash << 13) | (hash >> 19);
26
+ hash = hash * 5 + 0xe6546b64;
27
+ }
28
+
29
+ segment = 0;
30
+ for (size_t index = length & 3; index; index--) {
31
+ segment <<= 8;
32
+ segment |= key[index - 1];
33
+ }
34
+
35
+ hash ^= murmur_scramble(segment);
36
+ hash ^= (uint32_t) length;
37
+ hash ^= hash >> 16;
38
+ hash *= 0x85ebca6b;
39
+ hash ^= hash >> 13;
40
+ hash *= 0xc2b2ae35;
41
+ hash ^= hash >> 16;
42
+ return hash;
43
+ }
44
+
45
+ /**
46
+ * Return the hash of the given node. It is important that nodes that have
47
+ * equivalent static literal values have the same hash. This is because we use
48
+ * these hashes to look for duplicates.
49
+ */
50
+ static uint32_t
51
+ node_hash(const pm_parser_t *parser, const pm_node_t *node) {
52
+ switch (PM_NODE_TYPE(node)) {
53
+ case PM_INTEGER_NODE: {
54
+ // Integers hash their value.
55
+ const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
56
+ uint32_t hash;
57
+ if (integer->values) {
58
+ hash = murmur_hash((const uint8_t *) integer->values, sizeof(uint32_t) * integer->length);
59
+ } else {
60
+ hash = murmur_hash((const uint8_t *) &integer->value, sizeof(uint32_t));
61
+ }
62
+
63
+ if (integer->negative) {
64
+ hash ^= murmur_scramble((uint32_t) 1);
65
+ }
66
+
67
+ return hash;
68
+ }
69
+ case PM_SOURCE_LINE_NODE: {
70
+ // Source lines hash their line number.
71
+ const pm_line_column_t line_column = pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line);
72
+ const int32_t *value = &line_column.line;
73
+ return murmur_hash((const uint8_t *) value, sizeof(int32_t));
74
+ }
75
+ case PM_FLOAT_NODE: {
76
+ // Floats hash their value.
77
+ const double *value = &((const pm_float_node_t *) node)->value;
78
+ return murmur_hash((const uint8_t *) value, sizeof(double));
79
+ }
80
+ case PM_RATIONAL_NODE: {
81
+ // Rationals hash their numeric value. Because their numeric value
82
+ // is stored as a subnode, we hash that node and then mix in the
83
+ // fact that this is a rational node.
84
+ const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;
85
+ return node_hash(parser, numeric) ^ murmur_scramble((uint32_t) node->type);
86
+ }
87
+ case PM_IMAGINARY_NODE: {
88
+ // Imaginaries hash their numeric value. Because their numeric value
89
+ // is stored as a subnode, we hash that node and then mix in the
90
+ // fact that this is an imaginary node.
91
+ const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
92
+ return node_hash(parser, numeric) ^ murmur_scramble((uint32_t) node->type);
93
+ }
94
+ case PM_STRING_NODE: {
95
+ // Strings hash their value and mix in their flags so that different
96
+ // encodings are not considered equal.
97
+ const pm_string_t *value = &((const pm_string_node_t *) node)->unescaped;
98
+
99
+ pm_node_flags_t flags = node->flags;
100
+ flags &= (PM_STRING_FLAGS_FORCED_BINARY_ENCODING | PM_STRING_FLAGS_FORCED_UTF8_ENCODING);
101
+
102
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) flags);
103
+ }
104
+ case PM_SOURCE_FILE_NODE: {
105
+ // Source files hash their value and mix in their flags so that
106
+ // different encodings are not considered equal.
107
+ const pm_string_t *value = &((const pm_source_file_node_t *) node)->filepath;
108
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t));
109
+ }
110
+ case PM_REGULAR_EXPRESSION_NODE: {
111
+ // Regular expressions hash their value and mix in their flags so
112
+ // that different encodings are not considered equal.
113
+ const pm_string_t *value = &((const pm_regular_expression_node_t *) node)->unescaped;
114
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags);
115
+ }
116
+ case PM_SYMBOL_NODE: {
117
+ // Symbols hash their value and mix in their flags so that different
118
+ // encodings are not considered equal.
119
+ const pm_string_t *value = &((const pm_symbol_node_t *) node)->unescaped;
120
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags);
121
+ }
122
+ default:
123
+ assert(false && "unreachable");
124
+ return 0;
125
+ }
126
+ }
127
+
128
+ /**
129
+ * Insert a node into the node hash. It accepts the hash that should hold the
130
+ * new node, the parser that generated the node, the node to insert, and a
131
+ * comparison function. The comparison function is used for collision detection,
132
+ * and must be able to compare all node types that will be stored in this hash.
133
+ */
134
+ static pm_node_t *
135
+ pm_node_hash_insert(pm_node_hash_t *hash, const pm_parser_t *parser, pm_node_t *node, int (*compare)(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right)) {
136
+ // If we are out of space, we need to resize the hash. This will cause all
137
+ // of the nodes to be rehashed and reinserted into the new hash.
138
+ if (hash->size * 2 >= hash->capacity) {
139
+ // First, allocate space for the new node list.
140
+ uint32_t new_capacity = hash->capacity == 0 ? 4 : hash->capacity * 2;
141
+ pm_node_t **new_nodes = xcalloc(new_capacity, sizeof(pm_node_t *));
142
+ if (new_nodes == NULL) return NULL;
143
+
144
+ // It turns out to be more efficient to mask the hash value than to use
145
+ // the modulo operator. Because our capacities are always powers of two,
146
+ // we can use a bitwise AND to get the same result as the modulo
147
+ // operator.
148
+ uint32_t mask = new_capacity - 1;
149
+
150
+ // Now, rehash all of the nodes into the new list.
151
+ for (uint32_t index = 0; index < hash->capacity; index++) {
152
+ pm_node_t *node = hash->nodes[index];
153
+
154
+ if (node != NULL) {
155
+ uint32_t index = node_hash(parser, node) & mask;
156
+ new_nodes[index] = node;
157
+ }
158
+ }
159
+
160
+ // Finally, free the old node list and update the hash.
161
+ xfree(hash->nodes);
162
+ hash->nodes = new_nodes;
163
+ hash->capacity = new_capacity;
164
+ }
165
+
166
+ // Now, insert the node into the hash.
167
+ uint32_t mask = hash->capacity - 1;
168
+ uint32_t index = node_hash(parser, node) & mask;
169
+
170
+ // We use linear probing to resolve collisions. This means that if the
171
+ // current index is occupied, we will move to the next index and try again.
172
+ // We are guaranteed that this will eventually find an empty slot because we
173
+ // resize the hash when it gets too full.
174
+ while (hash->nodes[index] != NULL) {
175
+ if (compare(parser, hash->nodes[index], node) == 0) break;
176
+ index = (index + 1) & mask;
177
+ }
178
+
179
+ // If the current index is occupied, we need to return the node that was
180
+ // already in the hash. Otherwise, we can just increment the size and insert
181
+ // the new node.
182
+ pm_node_t *result = hash->nodes[index];
183
+ if (result == NULL) hash->size++;
184
+
185
+ hash->nodes[index] = node;
186
+ return result;
187
+ }
188
+
189
+ /**
190
+ * Free the internal memory associated with the given node hash.
191
+ */
192
+ static void
193
+ pm_node_hash_free(pm_node_hash_t *hash) {
194
+ if (hash->capacity > 0) xfree(hash->nodes);
195
+ }
196
+
197
+ /**
198
+ * Compare two values that can be compared with a simple numeric comparison.
199
+ */
200
+ #define PM_NUMERIC_COMPARISON(left, right) ((left < right) ? -1 : (left > right) ? 1 : 0)
201
+
202
+ /**
203
+ * Return the integer value of the given node as an int64_t.
204
+ */
205
+ static int64_t
206
+ pm_int64_value(const pm_parser_t *parser, const pm_node_t *node) {
207
+ switch (PM_NODE_TYPE(node)) {
208
+ case PM_INTEGER_NODE: {
209
+ const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
210
+ if (integer->values) return integer->negative ? INT64_MIN : INT64_MAX;
211
+
212
+ int64_t value = (int64_t) integer->value;
213
+ return integer->negative ? -value : value;
214
+ }
215
+ case PM_SOURCE_LINE_NODE:
216
+ return (int64_t) pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line;
217
+ default:
218
+ assert(false && "unreachable");
219
+ return 0;
220
+ }
221
+ }
222
+
223
+ /**
224
+ * A comparison function for comparing two IntegerNode or SourceLineNode
225
+ * instances.
226
+ */
227
+ static int
228
+ pm_compare_integer_nodes(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
229
+ if (PM_NODE_TYPE_P(left, PM_SOURCE_LINE_NODE) || PM_NODE_TYPE_P(right, PM_SOURCE_LINE_NODE)) {
230
+ int64_t left_value = pm_int64_value(parser, left);
231
+ int64_t right_value = pm_int64_value(parser, right);
232
+ return PM_NUMERIC_COMPARISON(left_value, right_value);
233
+ }
234
+
235
+ const pm_integer_t *left_integer = &((const pm_integer_node_t *) left)->value;
236
+ const pm_integer_t *right_integer = &((const pm_integer_node_t *) right)->value;
237
+ return pm_integer_compare(left_integer, right_integer);
238
+ }
239
+
240
+ /**
241
+ * A comparison function for comparing two FloatNode instances.
242
+ */
243
+ static int
244
+ pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
245
+ const double left_value = ((const pm_float_node_t *) left)->value;
246
+ const double right_value = ((const pm_float_node_t *) right)->value;
247
+ return PM_NUMERIC_COMPARISON(left_value, right_value);
248
+ }
249
+
250
+ /**
251
+ * A comparison function for comparing two nodes that have attached numbers.
252
+ */
253
+ static int
254
+ pm_compare_number_nodes(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
255
+ if (PM_NODE_TYPE(left) != PM_NODE_TYPE(right)) {
256
+ return PM_NUMERIC_COMPARISON(PM_NODE_TYPE(left), PM_NODE_TYPE(right));
257
+ }
258
+
259
+ switch (PM_NODE_TYPE(left)) {
260
+ case PM_IMAGINARY_NODE:
261
+ return pm_compare_number_nodes(parser, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric);
262
+ case PM_RATIONAL_NODE:
263
+ return pm_compare_number_nodes(parser, ((const pm_rational_node_t *) left)->numeric, ((const pm_rational_node_t *) right)->numeric);
264
+ case PM_INTEGER_NODE:
265
+ return pm_compare_integer_nodes(parser, left, right);
266
+ case PM_FLOAT_NODE:
267
+ return pm_compare_float_nodes(parser, left, right);
268
+ default:
269
+ assert(false && "unreachable");
270
+ return 0;
271
+ }
272
+ }
273
+
274
+ /**
275
+ * Return a pointer to the string value of the given node.
276
+ */
277
+ static const pm_string_t *
278
+ pm_string_value(const pm_node_t *node) {
279
+ switch (PM_NODE_TYPE(node)) {
280
+ case PM_STRING_NODE:
281
+ return &((const pm_string_node_t *) node)->unescaped;
282
+ case PM_SOURCE_FILE_NODE:
283
+ return &((const pm_source_file_node_t *) node)->filepath;
284
+ case PM_SYMBOL_NODE:
285
+ return &((const pm_symbol_node_t *) node)->unescaped;
286
+ default:
287
+ assert(false && "unreachable");
288
+ return NULL;
289
+ }
290
+ }
291
+
292
+ /**
293
+ * A comparison function for comparing two nodes that have attached strings.
294
+ */
295
+ static int
296
+ pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
297
+ const pm_string_t *left_string = pm_string_value(left);
298
+ const pm_string_t *right_string = pm_string_value(right);
299
+ return pm_string_compare(left_string, right_string);
300
+ }
301
+
302
+ /**
303
+ * A comparison function for comparing two RegularExpressionNode instances.
304
+ */
305
+ static int
306
+ pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
307
+ const pm_regular_expression_node_t *left_regexp = (const pm_regular_expression_node_t *) left;
308
+ const pm_regular_expression_node_t *right_regexp = (const pm_regular_expression_node_t *) right;
309
+
310
+ int result = pm_string_compare(&left_regexp->unescaped, &right_regexp->unescaped);
311
+ if (result != 0) return result;
312
+
313
+ return PM_NUMERIC_COMPARISON(left_regexp->base.flags, right_regexp->base.flags);
314
+ }
315
+
316
+ #undef PM_NUMERIC_COMPARISON
317
+
318
+ /**
319
+ * Add a node to the set of static literals.
320
+ */
321
+ pm_node_t *
322
+ pm_static_literals_add(const pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
323
+ switch (PM_NODE_TYPE(node)) {
324
+ case PM_INTEGER_NODE:
325
+ case PM_SOURCE_LINE_NODE:
326
+ return pm_node_hash_insert(&literals->integer_nodes, parser, node, pm_compare_integer_nodes);
327
+ case PM_FLOAT_NODE:
328
+ return pm_node_hash_insert(&literals->float_nodes, parser, node, pm_compare_float_nodes);
329
+ case PM_RATIONAL_NODE:
330
+ case PM_IMAGINARY_NODE:
331
+ return pm_node_hash_insert(&literals->number_nodes, parser, node, pm_compare_number_nodes);
332
+ case PM_STRING_NODE:
333
+ case PM_SOURCE_FILE_NODE:
334
+ return pm_node_hash_insert(&literals->string_nodes, parser, node, pm_compare_string_nodes);
335
+ case PM_REGULAR_EXPRESSION_NODE:
336
+ return pm_node_hash_insert(&literals->regexp_nodes, parser, node, pm_compare_regular_expression_nodes);
337
+ case PM_SYMBOL_NODE:
338
+ return pm_node_hash_insert(&literals->symbol_nodes, parser, node, pm_compare_string_nodes);
339
+ case PM_TRUE_NODE: {
340
+ pm_node_t *duplicated = literals->true_node;
341
+ literals->true_node = node;
342
+ return duplicated;
343
+ }
344
+ case PM_FALSE_NODE: {
345
+ pm_node_t *duplicated = literals->false_node;
346
+ literals->false_node = node;
347
+ return duplicated;
348
+ }
349
+ case PM_NIL_NODE: {
350
+ pm_node_t *duplicated = literals->nil_node;
351
+ literals->nil_node = node;
352
+ return duplicated;
353
+ }
354
+ case PM_SOURCE_ENCODING_NODE: {
355
+ pm_node_t *duplicated = literals->source_encoding_node;
356
+ literals->source_encoding_node = node;
357
+ return duplicated;
358
+ }
359
+ default:
360
+ return NULL;
361
+ }
362
+ }
363
+
364
+ /**
365
+ * Free the internal memory associated with the given static literals set.
366
+ */
367
+ void
368
+ pm_static_literals_free(pm_static_literals_t *literals) {
369
+ pm_node_hash_free(&literals->integer_nodes);
370
+ pm_node_hash_free(&literals->float_nodes);
371
+ pm_node_hash_free(&literals->number_nodes);
372
+ pm_node_hash_free(&literals->string_nodes);
373
+ pm_node_hash_free(&literals->regexp_nodes);
374
+ pm_node_hash_free(&literals->symbol_nodes);
375
+ }
376
+
377
+ /**
378
+ * A helper to determine if the given node is a static literal that is positive.
379
+ * This is used for formatting imaginary nodes.
380
+ */
381
+ static bool
382
+ pm_static_literal_positive_p(const pm_node_t *node) {
383
+ switch (PM_NODE_TYPE(node)) {
384
+ case PM_FLOAT_NODE:
385
+ return ((const pm_float_node_t *) node)->value > 0;
386
+ case PM_INTEGER_NODE:
387
+ return !((const pm_integer_node_t *) node)->value.negative;
388
+ case PM_RATIONAL_NODE:
389
+ return pm_static_literal_positive_p(((const pm_rational_node_t *) node)->numeric);
390
+ case PM_IMAGINARY_NODE:
391
+ return pm_static_literal_positive_p(((const pm_imaginary_node_t *) node)->numeric);
392
+ default:
393
+ assert(false && "unreachable");
394
+ return false;
395
+ }
396
+ }
397
+
398
+ /**
399
+ * Inspect a rational node that wraps a float node. This is going to be a
400
+ * poor-man's version of the Ruby `Rational#to_s` method, because we're not
401
+ * going to try to reduce the rational by finding the GCD. We'll leave that for
402
+ * a future improvement.
403
+ */
404
+ static void
405
+ pm_rational_inspect(pm_buffer_t *buffer, pm_rational_node_t *node) {
406
+ const uint8_t *start = node->base.location.start;
407
+ const uint8_t *end = node->base.location.end - 1; // r
408
+
409
+ while (start < end && *start == '0') start++; // 0.1 -> .1
410
+ while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
411
+ size_t length = (size_t) (end - start);
412
+
413
+ const uint8_t *point = memchr(start, '.', length);
414
+ assert(point && "should have a decimal point");
415
+
416
+ uint8_t *digits = malloc(length - 1);
417
+ if (digits == NULL) return;
418
+
419
+ memcpy(digits, start, (unsigned long) (point - start));
420
+ memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
421
+
422
+ pm_integer_t numerator = { 0 };
423
+ pm_integer_parse(&numerator, PM_INTEGER_BASE_DECIMAL, digits, digits + length - 1);
424
+
425
+ pm_buffer_append_byte(buffer, '(');
426
+ pm_integer_string(buffer, &numerator);
427
+ pm_buffer_append_string(buffer, "/1", 2);
428
+ for (size_t index = 0; index < (size_t) (end - point - 1); index++) pm_buffer_append_byte(buffer, '0');
429
+ pm_buffer_append_byte(buffer, ')');
430
+
431
+ pm_integer_free(&numerator);
432
+ free(digits);
433
+ }
434
+
435
+ /**
436
+ * Create a string-based representation of the given static literal.
437
+ */
438
+ PRISM_EXPORTED_FUNCTION void
439
+ pm_static_literal_inspect(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) {
440
+ switch (PM_NODE_TYPE(node)) {
441
+ case PM_FALSE_NODE:
442
+ pm_buffer_append_string(buffer, "false", 5);
443
+ break;
444
+ case PM_FLOAT_NODE: {
445
+ const double value = ((const pm_float_node_t *) node)->value;
446
+
447
+ if (isinf(value)) {
448
+ if (*node->location.start == '-') {
449
+ pm_buffer_append_byte(buffer, '-');
450
+ }
451
+ pm_buffer_append_string(buffer, "Infinity", 8);
452
+ } else if (value == 0.0) {
453
+ if (*node->location.start == '-') {
454
+ pm_buffer_append_byte(buffer, '-');
455
+ }
456
+ pm_buffer_append_string(buffer, "0.0", 3);
457
+ } else {
458
+ pm_buffer_append_format(buffer, "%g", value);
459
+
460
+ // %g will not insert a .0 for 1e100 (we'll get back 1e+100). So
461
+ // we check for the decimal point and add it in here if it's not
462
+ // present.
463
+ if (pm_buffer_index(buffer, '.') == SIZE_MAX) {
464
+ size_t exponent_index = pm_buffer_index(buffer, 'e');
465
+ size_t index = exponent_index == SIZE_MAX ? pm_buffer_length(buffer) : exponent_index;
466
+ pm_buffer_insert(buffer, index, ".0", 2);
467
+ }
468
+ }
469
+
470
+ break;
471
+ }
472
+ case PM_IMAGINARY_NODE: {
473
+ const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
474
+ pm_buffer_append_string(buffer, "(0", 2);
475
+ if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+');
476
+ pm_static_literal_inspect(buffer, parser, numeric);
477
+ if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) pm_buffer_append_byte(buffer, '*');
478
+ pm_buffer_append_string(buffer, "i)", 2);
479
+ break;
480
+ }
481
+ case PM_INTEGER_NODE:
482
+ pm_integer_string(buffer, &((const pm_integer_node_t *) node)->value);
483
+ break;
484
+ case PM_NIL_NODE:
485
+ pm_buffer_append_string(buffer, "nil", 3);
486
+ break;
487
+ case PM_RATIONAL_NODE: {
488
+ const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric;
489
+
490
+ switch (PM_NODE_TYPE(numeric)) {
491
+ case PM_INTEGER_NODE:
492
+ pm_buffer_append_byte(buffer, '(');
493
+ pm_static_literal_inspect(buffer, parser, numeric);
494
+ pm_buffer_append_string(buffer, "/1)", 3);
495
+ break;
496
+ case PM_FLOAT_NODE:
497
+ pm_rational_inspect(buffer, (pm_rational_node_t *) node);
498
+ break;
499
+ default:
500
+ assert(false && "unreachable");
501
+ break;
502
+ }
503
+
504
+ break;
505
+ }
506
+ case PM_REGULAR_EXPRESSION_NODE: {
507
+ const pm_string_t *unescaped = &((const pm_regular_expression_node_t *) node)->unescaped;
508
+ pm_buffer_append_byte(buffer, '/');
509
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
510
+ pm_buffer_append_byte(buffer, '/');
511
+
512
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) pm_buffer_append_string(buffer, "m", 1);
513
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) pm_buffer_append_string(buffer, "i", 1);
514
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) pm_buffer_append_string(buffer, "x", 1);
515
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) pm_buffer_append_string(buffer, "n", 1);
516
+
517
+ break;
518
+ }
519
+ case PM_SOURCE_ENCODING_NODE:
520
+ pm_buffer_append_format(buffer, "#<Encoding:%s>", parser->encoding->name);
521
+ break;
522
+ case PM_SOURCE_FILE_NODE: {
523
+ const pm_string_t *filepath = &((const pm_source_file_node_t *) node)->filepath;
524
+ pm_buffer_append_byte(buffer, '"');
525
+ pm_buffer_append_source(buffer, pm_string_source(filepath), pm_string_length(filepath), PM_BUFFER_ESCAPING_RUBY);
526
+ pm_buffer_append_byte(buffer, '"');
527
+ break;
528
+ }
529
+ case PM_SOURCE_LINE_NODE:
530
+ pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line);
531
+ break;
532
+ case PM_STRING_NODE: {
533
+ const pm_string_t *unescaped = &((const pm_string_node_t *) node)->unescaped;
534
+ pm_buffer_append_byte(buffer, '"');
535
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
536
+ pm_buffer_append_byte(buffer, '"');
537
+ break;
538
+ }
539
+ case PM_SYMBOL_NODE: {
540
+ const pm_string_t *unescaped = &((const pm_symbol_node_t *) node)->unescaped;
541
+ pm_buffer_append_byte(buffer, ':');
542
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
543
+ break;
544
+ }
545
+ case PM_TRUE_NODE:
546
+ pm_buffer_append_string(buffer, "true", 4);
547
+ break;
548
+ default:
549
+ assert(false && "unreachable");
550
+ break;
551
+ }
552
+ }
data/src/token_type.c CHANGED
@@ -5,6 +5,7 @@
5
5
  /* if you are looking to modify the */
6
6
  /* template */
7
7
  /******************************************************************************/
8
+
8
9
  #include <string.h>
9
10
 
10
11
  #include "prism/ast.h"