jruby-prism-parser 0.24.0-java → 1.4.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +269 -1
  4. data/CONTRIBUTING.md +0 -4
  5. data/Makefile +25 -18
  6. data/README.md +57 -6
  7. data/config.yml +1724 -140
  8. data/docs/build_system.md +39 -11
  9. data/docs/configuration.md +4 -0
  10. data/docs/cruby_compilation.md +1 -1
  11. data/docs/fuzzing.md +1 -1
  12. data/docs/parser_translation.md +14 -9
  13. data/docs/parsing_rules.md +4 -1
  14. data/docs/releasing.md +8 -10
  15. data/docs/relocation.md +34 -0
  16. data/docs/ripper_translation.md +72 -0
  17. data/docs/ruby_api.md +2 -1
  18. data/docs/serialization.md +29 -5
  19. data/ext/prism/api_node.c +3395 -1999
  20. data/ext/prism/api_pack.c +9 -0
  21. data/ext/prism/extconf.rb +55 -34
  22. data/ext/prism/extension.c +597 -346
  23. data/ext/prism/extension.h +6 -5
  24. data/include/prism/ast.h +2612 -455
  25. data/include/prism/defines.h +160 -2
  26. data/include/prism/diagnostic.h +188 -76
  27. data/include/prism/encoding.h +22 -4
  28. data/include/prism/node.h +89 -17
  29. data/include/prism/options.h +224 -12
  30. data/include/prism/pack.h +11 -0
  31. data/include/prism/parser.h +267 -66
  32. data/include/prism/prettyprint.h +8 -0
  33. data/include/prism/regexp.h +18 -8
  34. data/include/prism/static_literals.h +121 -0
  35. data/include/prism/util/pm_buffer.h +75 -2
  36. data/include/prism/util/pm_char.h +1 -2
  37. data/include/prism/util/pm_constant_pool.h +18 -9
  38. data/include/prism/util/pm_integer.h +126 -0
  39. data/include/prism/util/pm_list.h +1 -1
  40. data/include/prism/util/pm_newline_list.h +19 -0
  41. data/include/prism/util/pm_string.h +48 -8
  42. data/include/prism/version.h +3 -3
  43. data/include/prism.h +99 -5
  44. data/jruby-prism.jar +0 -0
  45. data/lib/prism/compiler.rb +11 -1
  46. data/lib/prism/desugar_compiler.rb +113 -74
  47. data/lib/prism/dispatcher.rb +45 -1
  48. data/lib/prism/dot_visitor.rb +201 -77
  49. data/lib/prism/dsl.rb +673 -461
  50. data/lib/prism/ffi.rb +233 -45
  51. data/lib/prism/inspect_visitor.rb +2389 -0
  52. data/lib/prism/lex_compat.rb +35 -16
  53. data/lib/prism/mutation_compiler.rb +24 -8
  54. data/lib/prism/node.rb +7731 -8460
  55. data/lib/prism/node_ext.rb +328 -32
  56. data/lib/prism/pack.rb +4 -0
  57. data/lib/prism/parse_result/comments.rb +34 -24
  58. data/lib/prism/parse_result/errors.rb +65 -0
  59. data/lib/prism/parse_result/newlines.rb +102 -12
  60. data/lib/prism/parse_result.rb +448 -44
  61. data/lib/prism/pattern.rb +28 -10
  62. data/lib/prism/polyfill/append_as_bytes.rb +15 -0
  63. data/lib/prism/polyfill/byteindex.rb +13 -0
  64. data/lib/prism/polyfill/unpack1.rb +14 -0
  65. data/lib/prism/reflection.rb +413 -0
  66. data/lib/prism/relocation.rb +504 -0
  67. data/lib/prism/serialize.rb +1940 -1198
  68. data/lib/prism/string_query.rb +30 -0
  69. data/lib/prism/translation/parser/builder.rb +61 -0
  70. data/lib/prism/translation/parser/compiler.rb +569 -195
  71. data/lib/prism/translation/parser/lexer.rb +516 -39
  72. data/lib/prism/translation/parser.rb +177 -12
  73. data/lib/prism/translation/parser33.rb +1 -1
  74. data/lib/prism/translation/parser34.rb +1 -1
  75. data/lib/prism/translation/parser35.rb +12 -0
  76. data/lib/prism/translation/ripper/sexp.rb +125 -0
  77. data/lib/prism/translation/ripper/shim.rb +5 -0
  78. data/lib/prism/translation/ripper.rb +3224 -462
  79. data/lib/prism/translation/ruby_parser.rb +194 -69
  80. data/lib/prism/translation.rb +4 -1
  81. data/lib/prism/version.rb +1 -1
  82. data/lib/prism/visitor.rb +13 -0
  83. data/lib/prism.rb +17 -27
  84. data/prism.gemspec +57 -17
  85. data/rbi/prism/compiler.rbi +12 -0
  86. data/rbi/prism/dsl.rbi +524 -0
  87. data/rbi/prism/inspect_visitor.rbi +12 -0
  88. data/rbi/prism/node.rbi +8722 -0
  89. data/rbi/prism/node_ext.rbi +107 -0
  90. data/rbi/prism/parse_result.rbi +404 -0
  91. data/rbi/prism/reflection.rbi +58 -0
  92. data/rbi/prism/string_query.rbi +12 -0
  93. data/rbi/prism/translation/parser.rbi +11 -0
  94. data/rbi/prism/translation/parser33.rbi +6 -0
  95. data/rbi/prism/translation/parser34.rbi +6 -0
  96. data/rbi/prism/translation/parser35.rbi +6 -0
  97. data/rbi/prism/translation/ripper.rbi +15 -0
  98. data/rbi/prism/visitor.rbi +473 -0
  99. data/rbi/prism.rbi +44 -7745
  100. data/sig/prism/compiler.rbs +9 -0
  101. data/sig/prism/dispatcher.rbs +16 -0
  102. data/sig/prism/dot_visitor.rbs +6 -0
  103. data/sig/prism/dsl.rbs +351 -0
  104. data/sig/prism/inspect_visitor.rbs +22 -0
  105. data/sig/prism/lex_compat.rbs +10 -0
  106. data/sig/prism/mutation_compiler.rbs +159 -0
  107. data/sig/prism/node.rbs +3614 -0
  108. data/sig/prism/node_ext.rbs +82 -0
  109. data/sig/prism/pack.rbs +43 -0
  110. data/sig/prism/parse_result.rbs +192 -0
  111. data/sig/prism/pattern.rbs +13 -0
  112. data/sig/prism/reflection.rbs +50 -0
  113. data/sig/prism/relocation.rbs +185 -0
  114. data/sig/prism/serialize.rbs +8 -0
  115. data/sig/prism/string_query.rbs +11 -0
  116. data/sig/prism/visitor.rbs +169 -0
  117. data/sig/prism.rbs +248 -4767
  118. data/src/diagnostic.c +672 -230
  119. data/src/encoding.c +211 -108
  120. data/src/node.c +7541 -1653
  121. data/src/options.c +135 -20
  122. data/src/pack.c +33 -17
  123. data/src/prettyprint.c +1543 -1485
  124. data/src/prism.c +7813 -3050
  125. data/src/regexp.c +225 -73
  126. data/src/serialize.c +101 -77
  127. data/src/static_literals.c +617 -0
  128. data/src/token_type.c +14 -13
  129. data/src/util/pm_buffer.c +187 -20
  130. data/src/util/pm_char.c +5 -5
  131. data/src/util/pm_constant_pool.c +39 -19
  132. data/src/util/pm_integer.c +670 -0
  133. data/src/util/pm_list.c +1 -1
  134. data/src/util/pm_newline_list.c +43 -5
  135. data/src/util/pm_string.c +213 -33
  136. data/src/util/pm_strncasecmp.c +13 -1
  137. data/src/util/pm_strpbrk.c +32 -6
  138. metadata +55 -19
  139. data/docs/ripper.md +0 -36
  140. data/include/prism/util/pm_state_stack.h +0 -42
  141. data/include/prism/util/pm_string_list.h +0 -44
  142. data/lib/prism/debug.rb +0 -206
  143. data/lib/prism/node_inspector.rb +0 -68
  144. data/lib/prism/translation/parser/rubocop.rb +0 -45
  145. data/rbi/prism_static.rbi +0 -207
  146. data/sig/prism_static.rbs +0 -201
  147. data/src/util/pm_state_stack.c +0 -25
  148. data/src/util/pm_string_list.c +0 -28
@@ -0,0 +1,617 @@
1
+ #include "prism/static_literals.h"
2
+
3
+ /**
4
+ * A small struct used for passing around a subset of the information that is
5
+ * stored on the parser. We use this to avoid having static literals explicitly
6
+ * depend on the parser struct.
7
+ */
8
+ typedef struct {
9
+ /** The list of newline offsets to use to calculate line numbers. */
10
+ const pm_newline_list_t *newline_list;
11
+
12
+ /** The line number that the parser starts on. */
13
+ int32_t start_line;
14
+
15
+ /** The name of the encoding that the parser is using. */
16
+ const char *encoding_name;
17
+ } pm_static_literals_metadata_t;
18
+
19
+ static inline uint32_t
20
+ murmur_scramble(uint32_t value) {
21
+ value *= 0xcc9e2d51;
22
+ value = (value << 15) | (value >> 17);
23
+ value *= 0x1b873593;
24
+ return value;
25
+ }
26
+
27
+ /**
28
+ * Murmur hash (https://en.wikipedia.org/wiki/MurmurHash) is a non-cryptographic
29
+ * general-purpose hash function. It is fast, which is what we care about in
30
+ * this case.
31
+ */
32
+ static uint32_t
33
+ murmur_hash(const uint8_t *key, size_t length) {
34
+ uint32_t hash = 0x9747b28c;
35
+ uint32_t segment;
36
+
37
+ for (size_t index = length >> 2; index; index--) {
38
+ memcpy(&segment, key, sizeof(uint32_t));
39
+ key += sizeof(uint32_t);
40
+ hash ^= murmur_scramble(segment);
41
+ hash = (hash << 13) | (hash >> 19);
42
+ hash = hash * 5 + 0xe6546b64;
43
+ }
44
+
45
+ segment = 0;
46
+ for (size_t index = length & 3; index; index--) {
47
+ segment <<= 8;
48
+ segment |= key[index - 1];
49
+ }
50
+
51
+ hash ^= murmur_scramble(segment);
52
+ hash ^= (uint32_t) length;
53
+ hash ^= hash >> 16;
54
+ hash *= 0x85ebca6b;
55
+ hash ^= hash >> 13;
56
+ hash *= 0xc2b2ae35;
57
+ hash ^= hash >> 16;
58
+ return hash;
59
+ }
60
+
61
+ /**
62
+ * Hash the value of an integer and return it.
63
+ */
64
+ static uint32_t
65
+ integer_hash(const pm_integer_t *integer) {
66
+ uint32_t hash;
67
+ if (integer->values) {
68
+ hash = murmur_hash((const uint8_t *) integer->values, sizeof(uint32_t) * integer->length);
69
+ } else {
70
+ hash = murmur_hash((const uint8_t *) &integer->value, sizeof(uint32_t));
71
+ }
72
+
73
+ if (integer->negative) {
74
+ hash ^= murmur_scramble((uint32_t) 1);
75
+ }
76
+
77
+ return hash;
78
+ }
79
+
80
+ /**
81
+ * Return the hash of the given node. It is important that nodes that have
82
+ * equivalent static literal values have the same hash. This is because we use
83
+ * these hashes to look for duplicates.
84
+ */
85
+ static uint32_t
86
+ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
87
+ switch (PM_NODE_TYPE(node)) {
88
+ case PM_INTEGER_NODE: {
89
+ // Integers hash their value.
90
+ const pm_integer_node_t *cast = (const pm_integer_node_t *) node;
91
+ return integer_hash(&cast->value);
92
+ }
93
+ case PM_SOURCE_LINE_NODE: {
94
+ // Source lines hash their line number.
95
+ const pm_line_column_t line_column = pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line);
96
+ const int32_t *value = &line_column.line;
97
+ return murmur_hash((const uint8_t *) value, sizeof(int32_t));
98
+ }
99
+ case PM_FLOAT_NODE: {
100
+ // Floats hash their value.
101
+ const double *value = &((const pm_float_node_t *) node)->value;
102
+ return murmur_hash((const uint8_t *) value, sizeof(double));
103
+ }
104
+ case PM_RATIONAL_NODE: {
105
+ // Rationals hash their numerator and denominator.
106
+ const pm_rational_node_t *cast = (const pm_rational_node_t *) node;
107
+ return integer_hash(&cast->numerator) ^ integer_hash(&cast->denominator) ^ murmur_scramble((uint32_t) cast->base.type);
108
+ }
109
+ case PM_IMAGINARY_NODE: {
110
+ // Imaginaries hash their numeric value. Because their numeric value
111
+ // is stored as a subnode, we hash that node and then mix in the
112
+ // fact that this is an imaginary node.
113
+ const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
114
+ return node_hash(metadata, numeric) ^ murmur_scramble((uint32_t) node->type);
115
+ }
116
+ case PM_STRING_NODE: {
117
+ // Strings hash their value and mix in their flags so that different
118
+ // encodings are not considered equal.
119
+ const pm_string_t *value = &((const pm_string_node_t *) node)->unescaped;
120
+
121
+ pm_node_flags_t flags = node->flags;
122
+ flags &= (PM_STRING_FLAGS_FORCED_BINARY_ENCODING | PM_STRING_FLAGS_FORCED_UTF8_ENCODING);
123
+
124
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) flags);
125
+ }
126
+ case PM_SOURCE_FILE_NODE: {
127
+ // Source files hash their value and mix in their flags so that
128
+ // different encodings are not considered equal.
129
+ const pm_string_t *value = &((const pm_source_file_node_t *) node)->filepath;
130
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t));
131
+ }
132
+ case PM_REGULAR_EXPRESSION_NODE: {
133
+ // Regular expressions hash their value and mix in their flags so
134
+ // that different encodings are not considered equal.
135
+ const pm_string_t *value = &((const pm_regular_expression_node_t *) node)->unescaped;
136
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags);
137
+ }
138
+ case PM_SYMBOL_NODE: {
139
+ // Symbols hash their value and mix in their flags so that different
140
+ // encodings are not considered equal.
141
+ const pm_string_t *value = &((const pm_symbol_node_t *) node)->unescaped;
142
+ return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags);
143
+ }
144
+ default:
145
+ assert(false && "unreachable");
146
+ return 0;
147
+ }
148
+ }
149
+
150
+ /**
151
+ * Insert a node into the node hash. It accepts the hash that should hold the
152
+ * new node, the parser that generated the node, the node to insert, and a
153
+ * comparison function. The comparison function is used for collision detection,
154
+ * and must be able to compare all node types that will be stored in this hash.
155
+ */
156
+ static pm_node_t *
157
+ pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *metadata, pm_node_t *node, bool replace, int (*compare)(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right)) {
158
+ // If we are out of space, we need to resize the hash. This will cause all
159
+ // of the nodes to be rehashed and reinserted into the new hash.
160
+ if (hash->size * 2 >= hash->capacity) {
161
+ // First, allocate space for the new node list.
162
+ uint32_t new_capacity = hash->capacity == 0 ? 4 : hash->capacity * 2;
163
+ pm_node_t **new_nodes = xcalloc(new_capacity, sizeof(pm_node_t *));
164
+ if (new_nodes == NULL) return NULL;
165
+
166
+ // It turns out to be more efficient to mask the hash value than to use
167
+ // the modulo operator. Because our capacities are always powers of two,
168
+ // we can use a bitwise AND to get the same result as the modulo
169
+ // operator.
170
+ uint32_t mask = new_capacity - 1;
171
+
172
+ // Now, rehash all of the nodes into the new list.
173
+ for (uint32_t index = 0; index < hash->capacity; index++) {
174
+ pm_node_t *node = hash->nodes[index];
175
+
176
+ if (node != NULL) {
177
+ uint32_t index = node_hash(metadata, node) & mask;
178
+ new_nodes[index] = node;
179
+ }
180
+ }
181
+
182
+ // Finally, free the old node list and update the hash.
183
+ xfree(hash->nodes);
184
+ hash->nodes = new_nodes;
185
+ hash->capacity = new_capacity;
186
+ }
187
+
188
+ // Now, insert the node into the hash.
189
+ uint32_t mask = hash->capacity - 1;
190
+ uint32_t index = node_hash(metadata, node) & mask;
191
+
192
+ // We use linear probing to resolve collisions. This means that if the
193
+ // current index is occupied, we will move to the next index and try again.
194
+ // We are guaranteed that this will eventually find an empty slot because we
195
+ // resize the hash when it gets too full.
196
+ while (hash->nodes[index] != NULL) {
197
+ if (compare(metadata, hash->nodes[index], node) == 0) break;
198
+ index = (index + 1) & mask;
199
+ }
200
+
201
+ // If the current index is occupied, we need to return the node that was
202
+ // already in the hash. Otherwise, we can just increment the size and insert
203
+ // the new node.
204
+ pm_node_t *result = hash->nodes[index];
205
+
206
+ if (result == NULL) {
207
+ hash->size++;
208
+ hash->nodes[index] = node;
209
+ } else if (replace) {
210
+ hash->nodes[index] = node;
211
+ }
212
+
213
+ return result;
214
+ }
215
+
216
+ /**
217
+ * Free the internal memory associated with the given node hash.
218
+ */
219
+ static void
220
+ pm_node_hash_free(pm_node_hash_t *hash) {
221
+ if (hash->capacity > 0) xfree(hash->nodes);
222
+ }
223
+
224
+ /**
225
+ * Compare two values that can be compared with a simple numeric comparison.
226
+ */
227
+ #define PM_NUMERIC_COMPARISON(left, right) ((left < right) ? -1 : (left > right) ? 1 : 0)
228
+
229
+ /**
230
+ * Return the integer value of the given node as an int64_t.
231
+ */
232
+ static int64_t
233
+ pm_int64_value(const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
234
+ switch (PM_NODE_TYPE(node)) {
235
+ case PM_INTEGER_NODE: {
236
+ const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
237
+ if (integer->values) return integer->negative ? INT64_MIN : INT64_MAX;
238
+
239
+ int64_t value = (int64_t) integer->value;
240
+ return integer->negative ? -value : value;
241
+ }
242
+ case PM_SOURCE_LINE_NODE:
243
+ return (int64_t) pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line;
244
+ default:
245
+ assert(false && "unreachable");
246
+ return 0;
247
+ }
248
+ }
249
+
250
+ /**
251
+ * A comparison function for comparing two IntegerNode or SourceLineNode
252
+ * instances.
253
+ */
254
+ static int
255
+ pm_compare_integer_nodes(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
256
+ if (PM_NODE_TYPE_P(left, PM_SOURCE_LINE_NODE) || PM_NODE_TYPE_P(right, PM_SOURCE_LINE_NODE)) {
257
+ int64_t left_value = pm_int64_value(metadata, left);
258
+ int64_t right_value = pm_int64_value(metadata, right);
259
+ return PM_NUMERIC_COMPARISON(left_value, right_value);
260
+ }
261
+
262
+ const pm_integer_t *left_integer = &((const pm_integer_node_t *) left)->value;
263
+ const pm_integer_t *right_integer = &((const pm_integer_node_t *) right)->value;
264
+ return pm_integer_compare(left_integer, right_integer);
265
+ }
266
+
267
+ /**
268
+ * A comparison function for comparing two FloatNode instances.
269
+ */
270
+ static int
271
+ pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
272
+ const double left_value = ((const pm_float_node_t *) left)->value;
273
+ const double right_value = ((const pm_float_node_t *) right)->value;
274
+ return PM_NUMERIC_COMPARISON(left_value, right_value);
275
+ }
276
+
277
+ /**
278
+ * A comparison function for comparing two nodes that have attached numbers.
279
+ */
280
+ static int
281
+ pm_compare_number_nodes(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
282
+ if (PM_NODE_TYPE(left) != PM_NODE_TYPE(right)) {
283
+ return PM_NUMERIC_COMPARISON(PM_NODE_TYPE(left), PM_NODE_TYPE(right));
284
+ }
285
+
286
+ switch (PM_NODE_TYPE(left)) {
287
+ case PM_IMAGINARY_NODE:
288
+ return pm_compare_number_nodes(metadata, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric);
289
+ case PM_RATIONAL_NODE: {
290
+ const pm_rational_node_t *left_rational = (const pm_rational_node_t *) left;
291
+ const pm_rational_node_t *right_rational = (const pm_rational_node_t *) right;
292
+
293
+ int result = pm_integer_compare(&left_rational->denominator, &right_rational->denominator);
294
+ if (result != 0) return result;
295
+
296
+ return pm_integer_compare(&left_rational->numerator, &right_rational->numerator);
297
+ }
298
+ case PM_INTEGER_NODE:
299
+ return pm_compare_integer_nodes(metadata, left, right);
300
+ case PM_FLOAT_NODE:
301
+ return pm_compare_float_nodes(metadata, left, right);
302
+ default:
303
+ assert(false && "unreachable");
304
+ return 0;
305
+ }
306
+ }
307
+
308
+ /**
309
+ * Return a pointer to the string value of the given node.
310
+ */
311
+ static const pm_string_t *
312
+ pm_string_value(const pm_node_t *node) {
313
+ switch (PM_NODE_TYPE(node)) {
314
+ case PM_STRING_NODE:
315
+ return &((const pm_string_node_t *) node)->unescaped;
316
+ case PM_SOURCE_FILE_NODE:
317
+ return &((const pm_source_file_node_t *) node)->filepath;
318
+ case PM_SYMBOL_NODE:
319
+ return &((const pm_symbol_node_t *) node)->unescaped;
320
+ default:
321
+ assert(false && "unreachable");
322
+ return NULL;
323
+ }
324
+ }
325
+
326
+ /**
327
+ * A comparison function for comparing two nodes that have attached strings.
328
+ */
329
+ static int
330
+ pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
331
+ const pm_string_t *left_string = pm_string_value(left);
332
+ const pm_string_t *right_string = pm_string_value(right);
333
+ return pm_string_compare(left_string, right_string);
334
+ }
335
+
336
+ /**
337
+ * A comparison function for comparing two RegularExpressionNode instances.
338
+ */
339
+ static int
340
+ pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
341
+ const pm_regular_expression_node_t *left_regexp = (const pm_regular_expression_node_t *) left;
342
+ const pm_regular_expression_node_t *right_regexp = (const pm_regular_expression_node_t *) right;
343
+
344
+ int result = pm_string_compare(&left_regexp->unescaped, &right_regexp->unescaped);
345
+ if (result != 0) return result;
346
+
347
+ return PM_NUMERIC_COMPARISON(left_regexp->base.flags, right_regexp->base.flags);
348
+ }
349
+
350
+ #undef PM_NUMERIC_COMPARISON
351
+
352
+ /**
353
+ * Add a node to the set of static literals.
354
+ */
355
+ pm_node_t *
356
+ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) {
357
+ switch (PM_NODE_TYPE(node)) {
358
+ case PM_INTEGER_NODE:
359
+ case PM_SOURCE_LINE_NODE:
360
+ return pm_node_hash_insert(
361
+ &literals->integer_nodes,
362
+ &(pm_static_literals_metadata_t) {
363
+ .newline_list = newline_list,
364
+ .start_line = start_line,
365
+ .encoding_name = NULL
366
+ },
367
+ node,
368
+ replace,
369
+ pm_compare_integer_nodes
370
+ );
371
+ case PM_FLOAT_NODE:
372
+ return pm_node_hash_insert(
373
+ &literals->float_nodes,
374
+ &(pm_static_literals_metadata_t) {
375
+ .newline_list = newline_list,
376
+ .start_line = start_line,
377
+ .encoding_name = NULL
378
+ },
379
+ node,
380
+ replace,
381
+ pm_compare_float_nodes
382
+ );
383
+ case PM_RATIONAL_NODE:
384
+ case PM_IMAGINARY_NODE:
385
+ return pm_node_hash_insert(
386
+ &literals->number_nodes,
387
+ &(pm_static_literals_metadata_t) {
388
+ .newline_list = newline_list,
389
+ .start_line = start_line,
390
+ .encoding_name = NULL
391
+ },
392
+ node,
393
+ replace,
394
+ pm_compare_number_nodes
395
+ );
396
+ case PM_STRING_NODE:
397
+ case PM_SOURCE_FILE_NODE:
398
+ return pm_node_hash_insert(
399
+ &literals->string_nodes,
400
+ &(pm_static_literals_metadata_t) {
401
+ .newline_list = newline_list,
402
+ .start_line = start_line,
403
+ .encoding_name = NULL
404
+ },
405
+ node,
406
+ replace,
407
+ pm_compare_string_nodes
408
+ );
409
+ case PM_REGULAR_EXPRESSION_NODE:
410
+ return pm_node_hash_insert(
411
+ &literals->regexp_nodes,
412
+ &(pm_static_literals_metadata_t) {
413
+ .newline_list = newline_list,
414
+ .start_line = start_line,
415
+ .encoding_name = NULL
416
+ },
417
+ node,
418
+ replace,
419
+ pm_compare_regular_expression_nodes
420
+ );
421
+ case PM_SYMBOL_NODE:
422
+ return pm_node_hash_insert(
423
+ &literals->symbol_nodes,
424
+ &(pm_static_literals_metadata_t) {
425
+ .newline_list = newline_list,
426
+ .start_line = start_line,
427
+ .encoding_name = NULL
428
+ },
429
+ node,
430
+ replace,
431
+ pm_compare_string_nodes
432
+ );
433
+ case PM_TRUE_NODE: {
434
+ pm_node_t *duplicated = literals->true_node;
435
+ if ((duplicated == NULL) || replace) literals->true_node = node;
436
+ return duplicated;
437
+ }
438
+ case PM_FALSE_NODE: {
439
+ pm_node_t *duplicated = literals->false_node;
440
+ if ((duplicated == NULL) || replace) literals->false_node = node;
441
+ return duplicated;
442
+ }
443
+ case PM_NIL_NODE: {
444
+ pm_node_t *duplicated = literals->nil_node;
445
+ if ((duplicated == NULL) || replace) literals->nil_node = node;
446
+ return duplicated;
447
+ }
448
+ case PM_SOURCE_ENCODING_NODE: {
449
+ pm_node_t *duplicated = literals->source_encoding_node;
450
+ if ((duplicated == NULL) || replace) literals->source_encoding_node = node;
451
+ return duplicated;
452
+ }
453
+ default:
454
+ return NULL;
455
+ }
456
+ }
457
+
458
+ /**
459
+ * Free the internal memory associated with the given static literals set.
460
+ */
461
+ void
462
+ pm_static_literals_free(pm_static_literals_t *literals) {
463
+ pm_node_hash_free(&literals->integer_nodes);
464
+ pm_node_hash_free(&literals->float_nodes);
465
+ pm_node_hash_free(&literals->number_nodes);
466
+ pm_node_hash_free(&literals->string_nodes);
467
+ pm_node_hash_free(&literals->regexp_nodes);
468
+ pm_node_hash_free(&literals->symbol_nodes);
469
+ }
470
+
471
+ /**
472
+ * A helper to determine if the given node is a static literal that is positive.
473
+ * This is used for formatting imaginary nodes.
474
+ */
475
+ static bool
476
+ pm_static_literal_positive_p(const pm_node_t *node) {
477
+ switch (PM_NODE_TYPE(node)) {
478
+ case PM_FLOAT_NODE:
479
+ return ((const pm_float_node_t *) node)->value > 0;
480
+ case PM_INTEGER_NODE:
481
+ return !((const pm_integer_node_t *) node)->value.negative;
482
+ case PM_RATIONAL_NODE:
483
+ return !((const pm_rational_node_t *) node)->numerator.negative;
484
+ case PM_IMAGINARY_NODE:
485
+ return pm_static_literal_positive_p(((const pm_imaginary_node_t *) node)->numeric);
486
+ default:
487
+ assert(false && "unreachable");
488
+ return false;
489
+ }
490
+ }
491
+
492
+ /**
493
+ * Create a string-based representation of the given static literal.
494
+ */
495
+ static inline void
496
+ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
497
+ switch (PM_NODE_TYPE(node)) {
498
+ case PM_FALSE_NODE:
499
+ pm_buffer_append_string(buffer, "false", 5);
500
+ break;
501
+ case PM_FLOAT_NODE: {
502
+ const double value = ((const pm_float_node_t *) node)->value;
503
+
504
+ if (PRISM_ISINF(value)) {
505
+ if (*node->location.start == '-') {
506
+ pm_buffer_append_byte(buffer, '-');
507
+ }
508
+ pm_buffer_append_string(buffer, "Infinity", 8);
509
+ } else if (value == 0.0) {
510
+ if (*node->location.start == '-') {
511
+ pm_buffer_append_byte(buffer, '-');
512
+ }
513
+ pm_buffer_append_string(buffer, "0.0", 3);
514
+ } else {
515
+ pm_buffer_append_format(buffer, "%g", value);
516
+
517
+ // %g will not insert a .0 for 1e100 (we'll get back 1e+100). So
518
+ // we check for the decimal point and add it in here if it's not
519
+ // present.
520
+ if (pm_buffer_index(buffer, '.') == SIZE_MAX) {
521
+ size_t exponent_index = pm_buffer_index(buffer, 'e');
522
+ size_t index = exponent_index == SIZE_MAX ? pm_buffer_length(buffer) : exponent_index;
523
+ pm_buffer_insert(buffer, index, ".0", 2);
524
+ }
525
+ }
526
+
527
+ break;
528
+ }
529
+ case PM_IMAGINARY_NODE: {
530
+ const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
531
+ pm_buffer_append_string(buffer, "(0", 2);
532
+ if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+');
533
+ pm_static_literal_inspect_node(buffer, metadata, numeric);
534
+ if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) {
535
+ pm_buffer_append_byte(buffer, '*');
536
+ }
537
+ pm_buffer_append_string(buffer, "i)", 2);
538
+ break;
539
+ }
540
+ case PM_INTEGER_NODE:
541
+ pm_integer_string(buffer, &((const pm_integer_node_t *) node)->value);
542
+ break;
543
+ case PM_NIL_NODE:
544
+ pm_buffer_append_string(buffer, "nil", 3);
545
+ break;
546
+ case PM_RATIONAL_NODE: {
547
+ const pm_rational_node_t *rational = (const pm_rational_node_t *) node;
548
+ pm_buffer_append_byte(buffer, '(');
549
+ pm_integer_string(buffer, &rational->numerator);
550
+ pm_buffer_append_byte(buffer, '/');
551
+ pm_integer_string(buffer, &rational->denominator);
552
+ pm_buffer_append_byte(buffer, ')');
553
+ break;
554
+ }
555
+ case PM_REGULAR_EXPRESSION_NODE: {
556
+ const pm_string_t *unescaped = &((const pm_regular_expression_node_t *) node)->unescaped;
557
+ pm_buffer_append_byte(buffer, '/');
558
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
559
+ pm_buffer_append_byte(buffer, '/');
560
+
561
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) pm_buffer_append_string(buffer, "m", 1);
562
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) pm_buffer_append_string(buffer, "i", 1);
563
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) pm_buffer_append_string(buffer, "x", 1);
564
+ if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) pm_buffer_append_string(buffer, "n", 1);
565
+
566
+ break;
567
+ }
568
+ case PM_SOURCE_ENCODING_NODE:
569
+ pm_buffer_append_format(buffer, "#<Encoding:%s>", metadata->encoding_name);
570
+ break;
571
+ case PM_SOURCE_FILE_NODE: {
572
+ const pm_string_t *filepath = &((const pm_source_file_node_t *) node)->filepath;
573
+ pm_buffer_append_byte(buffer, '"');
574
+ pm_buffer_append_source(buffer, pm_string_source(filepath), pm_string_length(filepath), PM_BUFFER_ESCAPING_RUBY);
575
+ pm_buffer_append_byte(buffer, '"');
576
+ break;
577
+ }
578
+ case PM_SOURCE_LINE_NODE:
579
+ pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line);
580
+ break;
581
+ case PM_STRING_NODE: {
582
+ const pm_string_t *unescaped = &((const pm_string_node_t *) node)->unescaped;
583
+ pm_buffer_append_byte(buffer, '"');
584
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
585
+ pm_buffer_append_byte(buffer, '"');
586
+ break;
587
+ }
588
+ case PM_SYMBOL_NODE: {
589
+ const pm_string_t *unescaped = &((const pm_symbol_node_t *) node)->unescaped;
590
+ pm_buffer_append_byte(buffer, ':');
591
+ pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
592
+ break;
593
+ }
594
+ case PM_TRUE_NODE:
595
+ pm_buffer_append_string(buffer, "true", 4);
596
+ break;
597
+ default:
598
+ assert(false && "unreachable");
599
+ break;
600
+ }
601
+ }
602
+
603
+ /**
604
+ * Create a string-based representation of the given static literal.
605
+ */
606
+ void
607
+ pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node) {
608
+ pm_static_literal_inspect_node(
609
+ buffer,
610
+ &(pm_static_literals_metadata_t) {
611
+ .newline_list = newline_list,
612
+ .start_line = start_line,
613
+ .encoding_name = encoding_name
614
+ },
615
+ node
616
+ );
617
+ }