jruby-prism-parser 0.24.0-java → 1.4.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BSDmakefile +58 -0
- data/CHANGELOG.md +269 -1
- data/CONTRIBUTING.md +0 -4
- data/Makefile +25 -18
- data/README.md +57 -6
- data/config.yml +1724 -140
- data/docs/build_system.md +39 -11
- data/docs/configuration.md +4 -0
- data/docs/cruby_compilation.md +1 -1
- data/docs/fuzzing.md +1 -1
- data/docs/parser_translation.md +14 -9
- data/docs/parsing_rules.md +4 -1
- data/docs/releasing.md +8 -10
- data/docs/relocation.md +34 -0
- data/docs/ripper_translation.md +72 -0
- data/docs/ruby_api.md +2 -1
- data/docs/serialization.md +29 -5
- data/ext/prism/api_node.c +3395 -1999
- data/ext/prism/api_pack.c +9 -0
- data/ext/prism/extconf.rb +55 -34
- data/ext/prism/extension.c +597 -346
- data/ext/prism/extension.h +6 -5
- data/include/prism/ast.h +2612 -455
- data/include/prism/defines.h +160 -2
- data/include/prism/diagnostic.h +188 -76
- data/include/prism/encoding.h +22 -4
- data/include/prism/node.h +89 -17
- data/include/prism/options.h +224 -12
- data/include/prism/pack.h +11 -0
- data/include/prism/parser.h +267 -66
- data/include/prism/prettyprint.h +8 -0
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +121 -0
- data/include/prism/util/pm_buffer.h +75 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +18 -9
- data/include/prism/util/pm_integer.h +126 -0
- data/include/prism/util/pm_list.h +1 -1
- data/include/prism/util/pm_newline_list.h +19 -0
- data/include/prism/util/pm_string.h +48 -8
- data/include/prism/version.h +3 -3
- data/include/prism.h +99 -5
- data/jruby-prism.jar +0 -0
- data/lib/prism/compiler.rb +11 -1
- data/lib/prism/desugar_compiler.rb +113 -74
- data/lib/prism/dispatcher.rb +45 -1
- data/lib/prism/dot_visitor.rb +201 -77
- data/lib/prism/dsl.rb +673 -461
- data/lib/prism/ffi.rb +233 -45
- data/lib/prism/inspect_visitor.rb +2389 -0
- data/lib/prism/lex_compat.rb +35 -16
- data/lib/prism/mutation_compiler.rb +24 -8
- data/lib/prism/node.rb +7731 -8460
- data/lib/prism/node_ext.rb +328 -32
- data/lib/prism/pack.rb +4 -0
- data/lib/prism/parse_result/comments.rb +34 -24
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +102 -12
- data/lib/prism/parse_result.rb +448 -44
- data/lib/prism/pattern.rb +28 -10
- data/lib/prism/polyfill/append_as_bytes.rb +15 -0
- data/lib/prism/polyfill/byteindex.rb +13 -0
- data/lib/prism/polyfill/unpack1.rb +14 -0
- data/lib/prism/reflection.rb +413 -0
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +1940 -1198
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/builder.rb +61 -0
- data/lib/prism/translation/parser/compiler.rb +569 -195
- data/lib/prism/translation/parser/lexer.rb +516 -39
- data/lib/prism/translation/parser.rb +177 -12
- data/lib/prism/translation/parser33.rb +1 -1
- data/lib/prism/translation/parser34.rb +1 -1
- data/lib/prism/translation/parser35.rb +12 -0
- data/lib/prism/translation/ripper/sexp.rb +125 -0
- data/lib/prism/translation/ripper/shim.rb +5 -0
- data/lib/prism/translation/ripper.rb +3224 -462
- data/lib/prism/translation/ruby_parser.rb +194 -69
- data/lib/prism/translation.rb +4 -1
- data/lib/prism/version.rb +1 -1
- data/lib/prism/visitor.rb +13 -0
- data/lib/prism.rb +17 -27
- data/prism.gemspec +57 -17
- data/rbi/prism/compiler.rbi +12 -0
- data/rbi/prism/dsl.rbi +524 -0
- data/rbi/prism/inspect_visitor.rbi +12 -0
- data/rbi/prism/node.rbi +8722 -0
- data/rbi/prism/node_ext.rbi +107 -0
- data/rbi/prism/parse_result.rbi +404 -0
- data/rbi/prism/reflection.rbi +58 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism/translation/parser.rbi +11 -0
- data/rbi/prism/translation/parser33.rbi +6 -0
- data/rbi/prism/translation/parser34.rbi +6 -0
- data/rbi/prism/translation/parser35.rbi +6 -0
- data/rbi/prism/translation/ripper.rbi +15 -0
- data/rbi/prism/visitor.rbi +473 -0
- data/rbi/prism.rbi +44 -7745
- data/sig/prism/compiler.rbs +9 -0
- data/sig/prism/dispatcher.rbs +16 -0
- data/sig/prism/dot_visitor.rbs +6 -0
- data/sig/prism/dsl.rbs +351 -0
- data/sig/prism/inspect_visitor.rbs +22 -0
- data/sig/prism/lex_compat.rbs +10 -0
- data/sig/prism/mutation_compiler.rbs +159 -0
- data/sig/prism/node.rbs +3614 -0
- data/sig/prism/node_ext.rbs +82 -0
- data/sig/prism/pack.rbs +43 -0
- data/sig/prism/parse_result.rbs +192 -0
- data/sig/prism/pattern.rbs +13 -0
- data/sig/prism/reflection.rbs +50 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/serialize.rbs +8 -0
- data/sig/prism/string_query.rbs +11 -0
- data/sig/prism/visitor.rbs +169 -0
- data/sig/prism.rbs +248 -4767
- data/src/diagnostic.c +672 -230
- data/src/encoding.c +211 -108
- data/src/node.c +7541 -1653
- data/src/options.c +135 -20
- data/src/pack.c +33 -17
- data/src/prettyprint.c +1543 -1485
- data/src/prism.c +7813 -3050
- data/src/regexp.c +225 -73
- data/src/serialize.c +101 -77
- data/src/static_literals.c +617 -0
- data/src/token_type.c +14 -13
- data/src/util/pm_buffer.c +187 -20
- data/src/util/pm_char.c +5 -5
- data/src/util/pm_constant_pool.c +39 -19
- data/src/util/pm_integer.c +670 -0
- data/src/util/pm_list.c +1 -1
- data/src/util/pm_newline_list.c +43 -5
- data/src/util/pm_string.c +213 -33
- data/src/util/pm_strncasecmp.c +13 -1
- data/src/util/pm_strpbrk.c +32 -6
- metadata +55 -19
- data/docs/ripper.md +0 -36
- data/include/prism/util/pm_state_stack.h +0 -42
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -206
- data/lib/prism/node_inspector.rb +0 -68
- data/lib/prism/translation/parser/rubocop.rb +0 -45
- data/rbi/prism_static.rbi +0 -207
- data/sig/prism_static.rbs +0 -201
- data/src/util/pm_state_stack.c +0 -25
- data/src/util/pm_string_list.c +0 -28
@@ -0,0 +1,617 @@
|
|
1
|
+
#include "prism/static_literals.h"
|
2
|
+
|
3
|
+
/**
|
4
|
+
* A small struct used for passing around a subset of the information that is
|
5
|
+
* stored on the parser. We use this to avoid having static literals explicitly
|
6
|
+
* depend on the parser struct.
|
7
|
+
*/
|
8
|
+
typedef struct {
|
9
|
+
/** The list of newline offsets to use to calculate line numbers. */
|
10
|
+
const pm_newline_list_t *newline_list;
|
11
|
+
|
12
|
+
/** The line number that the parser starts on. */
|
13
|
+
int32_t start_line;
|
14
|
+
|
15
|
+
/** The name of the encoding that the parser is using. */
|
16
|
+
const char *encoding_name;
|
17
|
+
} pm_static_literals_metadata_t;
|
18
|
+
|
19
|
+
static inline uint32_t
|
20
|
+
murmur_scramble(uint32_t value) {
|
21
|
+
value *= 0xcc9e2d51;
|
22
|
+
value = (value << 15) | (value >> 17);
|
23
|
+
value *= 0x1b873593;
|
24
|
+
return value;
|
25
|
+
}
|
26
|
+
|
27
|
+
/**
|
28
|
+
* Murmur hash (https://en.wikipedia.org/wiki/MurmurHash) is a non-cryptographic
|
29
|
+
* general-purpose hash function. It is fast, which is what we care about in
|
30
|
+
* this case.
|
31
|
+
*/
|
32
|
+
static uint32_t
|
33
|
+
murmur_hash(const uint8_t *key, size_t length) {
|
34
|
+
uint32_t hash = 0x9747b28c;
|
35
|
+
uint32_t segment;
|
36
|
+
|
37
|
+
for (size_t index = length >> 2; index; index--) {
|
38
|
+
memcpy(&segment, key, sizeof(uint32_t));
|
39
|
+
key += sizeof(uint32_t);
|
40
|
+
hash ^= murmur_scramble(segment);
|
41
|
+
hash = (hash << 13) | (hash >> 19);
|
42
|
+
hash = hash * 5 + 0xe6546b64;
|
43
|
+
}
|
44
|
+
|
45
|
+
segment = 0;
|
46
|
+
for (size_t index = length & 3; index; index--) {
|
47
|
+
segment <<= 8;
|
48
|
+
segment |= key[index - 1];
|
49
|
+
}
|
50
|
+
|
51
|
+
hash ^= murmur_scramble(segment);
|
52
|
+
hash ^= (uint32_t) length;
|
53
|
+
hash ^= hash >> 16;
|
54
|
+
hash *= 0x85ebca6b;
|
55
|
+
hash ^= hash >> 13;
|
56
|
+
hash *= 0xc2b2ae35;
|
57
|
+
hash ^= hash >> 16;
|
58
|
+
return hash;
|
59
|
+
}
|
60
|
+
|
61
|
+
/**
|
62
|
+
* Hash the value of an integer and return it.
|
63
|
+
*/
|
64
|
+
static uint32_t
|
65
|
+
integer_hash(const pm_integer_t *integer) {
|
66
|
+
uint32_t hash;
|
67
|
+
if (integer->values) {
|
68
|
+
hash = murmur_hash((const uint8_t *) integer->values, sizeof(uint32_t) * integer->length);
|
69
|
+
} else {
|
70
|
+
hash = murmur_hash((const uint8_t *) &integer->value, sizeof(uint32_t));
|
71
|
+
}
|
72
|
+
|
73
|
+
if (integer->negative) {
|
74
|
+
hash ^= murmur_scramble((uint32_t) 1);
|
75
|
+
}
|
76
|
+
|
77
|
+
return hash;
|
78
|
+
}
|
79
|
+
|
80
|
+
/**
|
81
|
+
* Return the hash of the given node. It is important that nodes that have
|
82
|
+
* equivalent static literal values have the same hash. This is because we use
|
83
|
+
* these hashes to look for duplicates.
|
84
|
+
*/
|
85
|
+
static uint32_t
|
86
|
+
node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
|
87
|
+
switch (PM_NODE_TYPE(node)) {
|
88
|
+
case PM_INTEGER_NODE: {
|
89
|
+
// Integers hash their value.
|
90
|
+
const pm_integer_node_t *cast = (const pm_integer_node_t *) node;
|
91
|
+
return integer_hash(&cast->value);
|
92
|
+
}
|
93
|
+
case PM_SOURCE_LINE_NODE: {
|
94
|
+
// Source lines hash their line number.
|
95
|
+
const pm_line_column_t line_column = pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line);
|
96
|
+
const int32_t *value = &line_column.line;
|
97
|
+
return murmur_hash((const uint8_t *) value, sizeof(int32_t));
|
98
|
+
}
|
99
|
+
case PM_FLOAT_NODE: {
|
100
|
+
// Floats hash their value.
|
101
|
+
const double *value = &((const pm_float_node_t *) node)->value;
|
102
|
+
return murmur_hash((const uint8_t *) value, sizeof(double));
|
103
|
+
}
|
104
|
+
case PM_RATIONAL_NODE: {
|
105
|
+
// Rationals hash their numerator and denominator.
|
106
|
+
const pm_rational_node_t *cast = (const pm_rational_node_t *) node;
|
107
|
+
return integer_hash(&cast->numerator) ^ integer_hash(&cast->denominator) ^ murmur_scramble((uint32_t) cast->base.type);
|
108
|
+
}
|
109
|
+
case PM_IMAGINARY_NODE: {
|
110
|
+
// Imaginaries hash their numeric value. Because their numeric value
|
111
|
+
// is stored as a subnode, we hash that node and then mix in the
|
112
|
+
// fact that this is an imaginary node.
|
113
|
+
const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
|
114
|
+
return node_hash(metadata, numeric) ^ murmur_scramble((uint32_t) node->type);
|
115
|
+
}
|
116
|
+
case PM_STRING_NODE: {
|
117
|
+
// Strings hash their value and mix in their flags so that different
|
118
|
+
// encodings are not considered equal.
|
119
|
+
const pm_string_t *value = &((const pm_string_node_t *) node)->unescaped;
|
120
|
+
|
121
|
+
pm_node_flags_t flags = node->flags;
|
122
|
+
flags &= (PM_STRING_FLAGS_FORCED_BINARY_ENCODING | PM_STRING_FLAGS_FORCED_UTF8_ENCODING);
|
123
|
+
|
124
|
+
return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) flags);
|
125
|
+
}
|
126
|
+
case PM_SOURCE_FILE_NODE: {
|
127
|
+
// Source files hash their value and mix in their flags so that
|
128
|
+
// different encodings are not considered equal.
|
129
|
+
const pm_string_t *value = &((const pm_source_file_node_t *) node)->filepath;
|
130
|
+
return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t));
|
131
|
+
}
|
132
|
+
case PM_REGULAR_EXPRESSION_NODE: {
|
133
|
+
// Regular expressions hash their value and mix in their flags so
|
134
|
+
// that different encodings are not considered equal.
|
135
|
+
const pm_string_t *value = &((const pm_regular_expression_node_t *) node)->unescaped;
|
136
|
+
return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags);
|
137
|
+
}
|
138
|
+
case PM_SYMBOL_NODE: {
|
139
|
+
// Symbols hash their value and mix in their flags so that different
|
140
|
+
// encodings are not considered equal.
|
141
|
+
const pm_string_t *value = &((const pm_symbol_node_t *) node)->unescaped;
|
142
|
+
return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags);
|
143
|
+
}
|
144
|
+
default:
|
145
|
+
assert(false && "unreachable");
|
146
|
+
return 0;
|
147
|
+
}
|
148
|
+
}
|
149
|
+
|
150
|
+
/**
|
151
|
+
* Insert a node into the node hash. It accepts the hash that should hold the
|
152
|
+
* new node, the parser that generated the node, the node to insert, and a
|
153
|
+
* comparison function. The comparison function is used for collision detection,
|
154
|
+
* and must be able to compare all node types that will be stored in this hash.
|
155
|
+
*/
|
156
|
+
static pm_node_t *
|
157
|
+
pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *metadata, pm_node_t *node, bool replace, int (*compare)(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right)) {
|
158
|
+
// If we are out of space, we need to resize the hash. This will cause all
|
159
|
+
// of the nodes to be rehashed and reinserted into the new hash.
|
160
|
+
if (hash->size * 2 >= hash->capacity) {
|
161
|
+
// First, allocate space for the new node list.
|
162
|
+
uint32_t new_capacity = hash->capacity == 0 ? 4 : hash->capacity * 2;
|
163
|
+
pm_node_t **new_nodes = xcalloc(new_capacity, sizeof(pm_node_t *));
|
164
|
+
if (new_nodes == NULL) return NULL;
|
165
|
+
|
166
|
+
// It turns out to be more efficient to mask the hash value than to use
|
167
|
+
// the modulo operator. Because our capacities are always powers of two,
|
168
|
+
// we can use a bitwise AND to get the same result as the modulo
|
169
|
+
// operator.
|
170
|
+
uint32_t mask = new_capacity - 1;
|
171
|
+
|
172
|
+
// Now, rehash all of the nodes into the new list.
|
173
|
+
for (uint32_t index = 0; index < hash->capacity; index++) {
|
174
|
+
pm_node_t *node = hash->nodes[index];
|
175
|
+
|
176
|
+
if (node != NULL) {
|
177
|
+
uint32_t index = node_hash(metadata, node) & mask;
|
178
|
+
new_nodes[index] = node;
|
179
|
+
}
|
180
|
+
}
|
181
|
+
|
182
|
+
// Finally, free the old node list and update the hash.
|
183
|
+
xfree(hash->nodes);
|
184
|
+
hash->nodes = new_nodes;
|
185
|
+
hash->capacity = new_capacity;
|
186
|
+
}
|
187
|
+
|
188
|
+
// Now, insert the node into the hash.
|
189
|
+
uint32_t mask = hash->capacity - 1;
|
190
|
+
uint32_t index = node_hash(metadata, node) & mask;
|
191
|
+
|
192
|
+
// We use linear probing to resolve collisions. This means that if the
|
193
|
+
// current index is occupied, we will move to the next index and try again.
|
194
|
+
// We are guaranteed that this will eventually find an empty slot because we
|
195
|
+
// resize the hash when it gets too full.
|
196
|
+
while (hash->nodes[index] != NULL) {
|
197
|
+
if (compare(metadata, hash->nodes[index], node) == 0) break;
|
198
|
+
index = (index + 1) & mask;
|
199
|
+
}
|
200
|
+
|
201
|
+
// If the current index is occupied, we need to return the node that was
|
202
|
+
// already in the hash. Otherwise, we can just increment the size and insert
|
203
|
+
// the new node.
|
204
|
+
pm_node_t *result = hash->nodes[index];
|
205
|
+
|
206
|
+
if (result == NULL) {
|
207
|
+
hash->size++;
|
208
|
+
hash->nodes[index] = node;
|
209
|
+
} else if (replace) {
|
210
|
+
hash->nodes[index] = node;
|
211
|
+
}
|
212
|
+
|
213
|
+
return result;
|
214
|
+
}
|
215
|
+
|
216
|
+
/**
|
217
|
+
* Free the internal memory associated with the given node hash.
|
218
|
+
*/
|
219
|
+
static void
|
220
|
+
pm_node_hash_free(pm_node_hash_t *hash) {
|
221
|
+
if (hash->capacity > 0) xfree(hash->nodes);
|
222
|
+
}
|
223
|
+
|
224
|
+
/**
|
225
|
+
* Compare two values that can be compared with a simple numeric comparison.
|
226
|
+
*/
|
227
|
+
#define PM_NUMERIC_COMPARISON(left, right) ((left < right) ? -1 : (left > right) ? 1 : 0)
|
228
|
+
|
229
|
+
/**
|
230
|
+
* Return the integer value of the given node as an int64_t.
|
231
|
+
*/
|
232
|
+
static int64_t
|
233
|
+
pm_int64_value(const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
|
234
|
+
switch (PM_NODE_TYPE(node)) {
|
235
|
+
case PM_INTEGER_NODE: {
|
236
|
+
const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
|
237
|
+
if (integer->values) return integer->negative ? INT64_MIN : INT64_MAX;
|
238
|
+
|
239
|
+
int64_t value = (int64_t) integer->value;
|
240
|
+
return integer->negative ? -value : value;
|
241
|
+
}
|
242
|
+
case PM_SOURCE_LINE_NODE:
|
243
|
+
return (int64_t) pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line;
|
244
|
+
default:
|
245
|
+
assert(false && "unreachable");
|
246
|
+
return 0;
|
247
|
+
}
|
248
|
+
}
|
249
|
+
|
250
|
+
/**
|
251
|
+
* A comparison function for comparing two IntegerNode or SourceLineNode
|
252
|
+
* instances.
|
253
|
+
*/
|
254
|
+
static int
|
255
|
+
pm_compare_integer_nodes(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
|
256
|
+
if (PM_NODE_TYPE_P(left, PM_SOURCE_LINE_NODE) || PM_NODE_TYPE_P(right, PM_SOURCE_LINE_NODE)) {
|
257
|
+
int64_t left_value = pm_int64_value(metadata, left);
|
258
|
+
int64_t right_value = pm_int64_value(metadata, right);
|
259
|
+
return PM_NUMERIC_COMPARISON(left_value, right_value);
|
260
|
+
}
|
261
|
+
|
262
|
+
const pm_integer_t *left_integer = &((const pm_integer_node_t *) left)->value;
|
263
|
+
const pm_integer_t *right_integer = &((const pm_integer_node_t *) right)->value;
|
264
|
+
return pm_integer_compare(left_integer, right_integer);
|
265
|
+
}
|
266
|
+
|
267
|
+
/**
|
268
|
+
* A comparison function for comparing two FloatNode instances.
|
269
|
+
*/
|
270
|
+
static int
|
271
|
+
pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
|
272
|
+
const double left_value = ((const pm_float_node_t *) left)->value;
|
273
|
+
const double right_value = ((const pm_float_node_t *) right)->value;
|
274
|
+
return PM_NUMERIC_COMPARISON(left_value, right_value);
|
275
|
+
}
|
276
|
+
|
277
|
+
/**
|
278
|
+
* A comparison function for comparing two nodes that have attached numbers.
|
279
|
+
*/
|
280
|
+
static int
|
281
|
+
pm_compare_number_nodes(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
|
282
|
+
if (PM_NODE_TYPE(left) != PM_NODE_TYPE(right)) {
|
283
|
+
return PM_NUMERIC_COMPARISON(PM_NODE_TYPE(left), PM_NODE_TYPE(right));
|
284
|
+
}
|
285
|
+
|
286
|
+
switch (PM_NODE_TYPE(left)) {
|
287
|
+
case PM_IMAGINARY_NODE:
|
288
|
+
return pm_compare_number_nodes(metadata, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric);
|
289
|
+
case PM_RATIONAL_NODE: {
|
290
|
+
const pm_rational_node_t *left_rational = (const pm_rational_node_t *) left;
|
291
|
+
const pm_rational_node_t *right_rational = (const pm_rational_node_t *) right;
|
292
|
+
|
293
|
+
int result = pm_integer_compare(&left_rational->denominator, &right_rational->denominator);
|
294
|
+
if (result != 0) return result;
|
295
|
+
|
296
|
+
return pm_integer_compare(&left_rational->numerator, &right_rational->numerator);
|
297
|
+
}
|
298
|
+
case PM_INTEGER_NODE:
|
299
|
+
return pm_compare_integer_nodes(metadata, left, right);
|
300
|
+
case PM_FLOAT_NODE:
|
301
|
+
return pm_compare_float_nodes(metadata, left, right);
|
302
|
+
default:
|
303
|
+
assert(false && "unreachable");
|
304
|
+
return 0;
|
305
|
+
}
|
306
|
+
}
|
307
|
+
|
308
|
+
/**
|
309
|
+
* Return a pointer to the string value of the given node.
|
310
|
+
*/
|
311
|
+
static const pm_string_t *
|
312
|
+
pm_string_value(const pm_node_t *node) {
|
313
|
+
switch (PM_NODE_TYPE(node)) {
|
314
|
+
case PM_STRING_NODE:
|
315
|
+
return &((const pm_string_node_t *) node)->unescaped;
|
316
|
+
case PM_SOURCE_FILE_NODE:
|
317
|
+
return &((const pm_source_file_node_t *) node)->filepath;
|
318
|
+
case PM_SYMBOL_NODE:
|
319
|
+
return &((const pm_symbol_node_t *) node)->unescaped;
|
320
|
+
default:
|
321
|
+
assert(false && "unreachable");
|
322
|
+
return NULL;
|
323
|
+
}
|
324
|
+
}
|
325
|
+
|
326
|
+
/**
|
327
|
+
* A comparison function for comparing two nodes that have attached strings.
|
328
|
+
*/
|
329
|
+
static int
|
330
|
+
pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
|
331
|
+
const pm_string_t *left_string = pm_string_value(left);
|
332
|
+
const pm_string_t *right_string = pm_string_value(right);
|
333
|
+
return pm_string_compare(left_string, right_string);
|
334
|
+
}
|
335
|
+
|
336
|
+
/**
|
337
|
+
* A comparison function for comparing two RegularExpressionNode instances.
|
338
|
+
*/
|
339
|
+
static int
|
340
|
+
pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
|
341
|
+
const pm_regular_expression_node_t *left_regexp = (const pm_regular_expression_node_t *) left;
|
342
|
+
const pm_regular_expression_node_t *right_regexp = (const pm_regular_expression_node_t *) right;
|
343
|
+
|
344
|
+
int result = pm_string_compare(&left_regexp->unescaped, &right_regexp->unescaped);
|
345
|
+
if (result != 0) return result;
|
346
|
+
|
347
|
+
return PM_NUMERIC_COMPARISON(left_regexp->base.flags, right_regexp->base.flags);
|
348
|
+
}
|
349
|
+
|
350
|
+
#undef PM_NUMERIC_COMPARISON
|
351
|
+
|
352
|
+
/**
|
353
|
+
* Add a node to the set of static literals.
|
354
|
+
*/
|
355
|
+
pm_node_t *
|
356
|
+
pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) {
|
357
|
+
switch (PM_NODE_TYPE(node)) {
|
358
|
+
case PM_INTEGER_NODE:
|
359
|
+
case PM_SOURCE_LINE_NODE:
|
360
|
+
return pm_node_hash_insert(
|
361
|
+
&literals->integer_nodes,
|
362
|
+
&(pm_static_literals_metadata_t) {
|
363
|
+
.newline_list = newline_list,
|
364
|
+
.start_line = start_line,
|
365
|
+
.encoding_name = NULL
|
366
|
+
},
|
367
|
+
node,
|
368
|
+
replace,
|
369
|
+
pm_compare_integer_nodes
|
370
|
+
);
|
371
|
+
case PM_FLOAT_NODE:
|
372
|
+
return pm_node_hash_insert(
|
373
|
+
&literals->float_nodes,
|
374
|
+
&(pm_static_literals_metadata_t) {
|
375
|
+
.newline_list = newline_list,
|
376
|
+
.start_line = start_line,
|
377
|
+
.encoding_name = NULL
|
378
|
+
},
|
379
|
+
node,
|
380
|
+
replace,
|
381
|
+
pm_compare_float_nodes
|
382
|
+
);
|
383
|
+
case PM_RATIONAL_NODE:
|
384
|
+
case PM_IMAGINARY_NODE:
|
385
|
+
return pm_node_hash_insert(
|
386
|
+
&literals->number_nodes,
|
387
|
+
&(pm_static_literals_metadata_t) {
|
388
|
+
.newline_list = newline_list,
|
389
|
+
.start_line = start_line,
|
390
|
+
.encoding_name = NULL
|
391
|
+
},
|
392
|
+
node,
|
393
|
+
replace,
|
394
|
+
pm_compare_number_nodes
|
395
|
+
);
|
396
|
+
case PM_STRING_NODE:
|
397
|
+
case PM_SOURCE_FILE_NODE:
|
398
|
+
return pm_node_hash_insert(
|
399
|
+
&literals->string_nodes,
|
400
|
+
&(pm_static_literals_metadata_t) {
|
401
|
+
.newline_list = newline_list,
|
402
|
+
.start_line = start_line,
|
403
|
+
.encoding_name = NULL
|
404
|
+
},
|
405
|
+
node,
|
406
|
+
replace,
|
407
|
+
pm_compare_string_nodes
|
408
|
+
);
|
409
|
+
case PM_REGULAR_EXPRESSION_NODE:
|
410
|
+
return pm_node_hash_insert(
|
411
|
+
&literals->regexp_nodes,
|
412
|
+
&(pm_static_literals_metadata_t) {
|
413
|
+
.newline_list = newline_list,
|
414
|
+
.start_line = start_line,
|
415
|
+
.encoding_name = NULL
|
416
|
+
},
|
417
|
+
node,
|
418
|
+
replace,
|
419
|
+
pm_compare_regular_expression_nodes
|
420
|
+
);
|
421
|
+
case PM_SYMBOL_NODE:
|
422
|
+
return pm_node_hash_insert(
|
423
|
+
&literals->symbol_nodes,
|
424
|
+
&(pm_static_literals_metadata_t) {
|
425
|
+
.newline_list = newline_list,
|
426
|
+
.start_line = start_line,
|
427
|
+
.encoding_name = NULL
|
428
|
+
},
|
429
|
+
node,
|
430
|
+
replace,
|
431
|
+
pm_compare_string_nodes
|
432
|
+
);
|
433
|
+
case PM_TRUE_NODE: {
|
434
|
+
pm_node_t *duplicated = literals->true_node;
|
435
|
+
if ((duplicated == NULL) || replace) literals->true_node = node;
|
436
|
+
return duplicated;
|
437
|
+
}
|
438
|
+
case PM_FALSE_NODE: {
|
439
|
+
pm_node_t *duplicated = literals->false_node;
|
440
|
+
if ((duplicated == NULL) || replace) literals->false_node = node;
|
441
|
+
return duplicated;
|
442
|
+
}
|
443
|
+
case PM_NIL_NODE: {
|
444
|
+
pm_node_t *duplicated = literals->nil_node;
|
445
|
+
if ((duplicated == NULL) || replace) literals->nil_node = node;
|
446
|
+
return duplicated;
|
447
|
+
}
|
448
|
+
case PM_SOURCE_ENCODING_NODE: {
|
449
|
+
pm_node_t *duplicated = literals->source_encoding_node;
|
450
|
+
if ((duplicated == NULL) || replace) literals->source_encoding_node = node;
|
451
|
+
return duplicated;
|
452
|
+
}
|
453
|
+
default:
|
454
|
+
return NULL;
|
455
|
+
}
|
456
|
+
}
|
457
|
+
|
458
|
+
/**
|
459
|
+
* Free the internal memory associated with the given static literals set.
|
460
|
+
*/
|
461
|
+
void
|
462
|
+
pm_static_literals_free(pm_static_literals_t *literals) {
|
463
|
+
pm_node_hash_free(&literals->integer_nodes);
|
464
|
+
pm_node_hash_free(&literals->float_nodes);
|
465
|
+
pm_node_hash_free(&literals->number_nodes);
|
466
|
+
pm_node_hash_free(&literals->string_nodes);
|
467
|
+
pm_node_hash_free(&literals->regexp_nodes);
|
468
|
+
pm_node_hash_free(&literals->symbol_nodes);
|
469
|
+
}
|
470
|
+
|
471
|
+
/**
|
472
|
+
* A helper to determine if the given node is a static literal that is positive.
|
473
|
+
* This is used for formatting imaginary nodes.
|
474
|
+
*/
|
475
|
+
static bool
|
476
|
+
pm_static_literal_positive_p(const pm_node_t *node) {
|
477
|
+
switch (PM_NODE_TYPE(node)) {
|
478
|
+
case PM_FLOAT_NODE:
|
479
|
+
return ((const pm_float_node_t *) node)->value > 0;
|
480
|
+
case PM_INTEGER_NODE:
|
481
|
+
return !((const pm_integer_node_t *) node)->value.negative;
|
482
|
+
case PM_RATIONAL_NODE:
|
483
|
+
return !((const pm_rational_node_t *) node)->numerator.negative;
|
484
|
+
case PM_IMAGINARY_NODE:
|
485
|
+
return pm_static_literal_positive_p(((const pm_imaginary_node_t *) node)->numeric);
|
486
|
+
default:
|
487
|
+
assert(false && "unreachable");
|
488
|
+
return false;
|
489
|
+
}
|
490
|
+
}
|
491
|
+
|
492
|
+
/**
|
493
|
+
* Create a string-based representation of the given static literal.
|
494
|
+
*/
|
495
|
+
static inline void
|
496
|
+
pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
|
497
|
+
switch (PM_NODE_TYPE(node)) {
|
498
|
+
case PM_FALSE_NODE:
|
499
|
+
pm_buffer_append_string(buffer, "false", 5);
|
500
|
+
break;
|
501
|
+
case PM_FLOAT_NODE: {
|
502
|
+
const double value = ((const pm_float_node_t *) node)->value;
|
503
|
+
|
504
|
+
if (PRISM_ISINF(value)) {
|
505
|
+
if (*node->location.start == '-') {
|
506
|
+
pm_buffer_append_byte(buffer, '-');
|
507
|
+
}
|
508
|
+
pm_buffer_append_string(buffer, "Infinity", 8);
|
509
|
+
} else if (value == 0.0) {
|
510
|
+
if (*node->location.start == '-') {
|
511
|
+
pm_buffer_append_byte(buffer, '-');
|
512
|
+
}
|
513
|
+
pm_buffer_append_string(buffer, "0.0", 3);
|
514
|
+
} else {
|
515
|
+
pm_buffer_append_format(buffer, "%g", value);
|
516
|
+
|
517
|
+
// %g will not insert a .0 for 1e100 (we'll get back 1e+100). So
|
518
|
+
// we check for the decimal point and add it in here if it's not
|
519
|
+
// present.
|
520
|
+
if (pm_buffer_index(buffer, '.') == SIZE_MAX) {
|
521
|
+
size_t exponent_index = pm_buffer_index(buffer, 'e');
|
522
|
+
size_t index = exponent_index == SIZE_MAX ? pm_buffer_length(buffer) : exponent_index;
|
523
|
+
pm_buffer_insert(buffer, index, ".0", 2);
|
524
|
+
}
|
525
|
+
}
|
526
|
+
|
527
|
+
break;
|
528
|
+
}
|
529
|
+
case PM_IMAGINARY_NODE: {
|
530
|
+
const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
|
531
|
+
pm_buffer_append_string(buffer, "(0", 2);
|
532
|
+
if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+');
|
533
|
+
pm_static_literal_inspect_node(buffer, metadata, numeric);
|
534
|
+
if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) {
|
535
|
+
pm_buffer_append_byte(buffer, '*');
|
536
|
+
}
|
537
|
+
pm_buffer_append_string(buffer, "i)", 2);
|
538
|
+
break;
|
539
|
+
}
|
540
|
+
case PM_INTEGER_NODE:
|
541
|
+
pm_integer_string(buffer, &((const pm_integer_node_t *) node)->value);
|
542
|
+
break;
|
543
|
+
case PM_NIL_NODE:
|
544
|
+
pm_buffer_append_string(buffer, "nil", 3);
|
545
|
+
break;
|
546
|
+
case PM_RATIONAL_NODE: {
|
547
|
+
const pm_rational_node_t *rational = (const pm_rational_node_t *) node;
|
548
|
+
pm_buffer_append_byte(buffer, '(');
|
549
|
+
pm_integer_string(buffer, &rational->numerator);
|
550
|
+
pm_buffer_append_byte(buffer, '/');
|
551
|
+
pm_integer_string(buffer, &rational->denominator);
|
552
|
+
pm_buffer_append_byte(buffer, ')');
|
553
|
+
break;
|
554
|
+
}
|
555
|
+
case PM_REGULAR_EXPRESSION_NODE: {
|
556
|
+
const pm_string_t *unescaped = &((const pm_regular_expression_node_t *) node)->unescaped;
|
557
|
+
pm_buffer_append_byte(buffer, '/');
|
558
|
+
pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
|
559
|
+
pm_buffer_append_byte(buffer, '/');
|
560
|
+
|
561
|
+
if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) pm_buffer_append_string(buffer, "m", 1);
|
562
|
+
if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) pm_buffer_append_string(buffer, "i", 1);
|
563
|
+
if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) pm_buffer_append_string(buffer, "x", 1);
|
564
|
+
if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) pm_buffer_append_string(buffer, "n", 1);
|
565
|
+
|
566
|
+
break;
|
567
|
+
}
|
568
|
+
case PM_SOURCE_ENCODING_NODE:
|
569
|
+
pm_buffer_append_format(buffer, "#<Encoding:%s>", metadata->encoding_name);
|
570
|
+
break;
|
571
|
+
case PM_SOURCE_FILE_NODE: {
|
572
|
+
const pm_string_t *filepath = &((const pm_source_file_node_t *) node)->filepath;
|
573
|
+
pm_buffer_append_byte(buffer, '"');
|
574
|
+
pm_buffer_append_source(buffer, pm_string_source(filepath), pm_string_length(filepath), PM_BUFFER_ESCAPING_RUBY);
|
575
|
+
pm_buffer_append_byte(buffer, '"');
|
576
|
+
break;
|
577
|
+
}
|
578
|
+
case PM_SOURCE_LINE_NODE:
|
579
|
+
pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line);
|
580
|
+
break;
|
581
|
+
case PM_STRING_NODE: {
|
582
|
+
const pm_string_t *unescaped = &((const pm_string_node_t *) node)->unescaped;
|
583
|
+
pm_buffer_append_byte(buffer, '"');
|
584
|
+
pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
|
585
|
+
pm_buffer_append_byte(buffer, '"');
|
586
|
+
break;
|
587
|
+
}
|
588
|
+
case PM_SYMBOL_NODE: {
|
589
|
+
const pm_string_t *unescaped = &((const pm_symbol_node_t *) node)->unescaped;
|
590
|
+
pm_buffer_append_byte(buffer, ':');
|
591
|
+
pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
|
592
|
+
break;
|
593
|
+
}
|
594
|
+
case PM_TRUE_NODE:
|
595
|
+
pm_buffer_append_string(buffer, "true", 4);
|
596
|
+
break;
|
597
|
+
default:
|
598
|
+
assert(false && "unreachable");
|
599
|
+
break;
|
600
|
+
}
|
601
|
+
}
|
602
|
+
|
603
|
+
/**
|
604
|
+
* Create a string-based representation of the given static literal.
|
605
|
+
*/
|
606
|
+
void
|
607
|
+
pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node) {
|
608
|
+
pm_static_literal_inspect_node(
|
609
|
+
buffer,
|
610
|
+
&(pm_static_literals_metadata_t) {
|
611
|
+
.newline_list = newline_list,
|
612
|
+
.start_line = start_line,
|
613
|
+
.encoding_name = encoding_name
|
614
|
+
},
|
615
|
+
node
|
616
|
+
);
|
617
|
+
}
|