yarp 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -1
- data/CONTRIBUTING.md +7 -0
- data/config.yml +154 -43
- data/docs/configuration.md +0 -1
- data/docs/mapping.md +91 -91
- data/docs/serialization.md +23 -20
- data/ext/yarp/api_node.c +1074 -391
- data/ext/yarp/extension.c +1 -1
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +501 -301
- data/include/yarp/diagnostic.h +198 -1
- data/include/yarp/node.h +0 -4
- data/include/yarp/util/yp_char.h +1 -1
- data/include/yarp/util/yp_constant_pool.h +11 -4
- data/include/yarp/version.h +2 -2
- data/lib/yarp/desugar_visitor.rb +19 -19
- data/lib/yarp/mutation_visitor.rb +22 -12
- data/lib/yarp/node.rb +2883 -293
- data/lib/yarp/parse_result/comments.rb +172 -0
- data/lib/yarp/parse_result/newlines.rb +60 -0
- data/lib/yarp/pattern.rb +239 -0
- data/lib/yarp/serialize.rb +152 -129
- data/lib/yarp.rb +104 -44
- data/src/diagnostic.c +254 -2
- data/src/node.c +901 -868
- data/src/prettyprint.c +380 -186
- data/src/serialize.c +325 -170
- data/src/unescape.c +20 -20
- data/src/util/yp_char.c +2 -7
- data/src/util/yp_constant_pool.c +41 -8
- data/src/util/yp_newline_list.c +5 -1
- data/src/util/yp_string_list.c +4 -1
- data/src/yarp.c +946 -818
- data/yarp.gemspec +4 -1
- metadata +6 -3
data/src/unescape.c
CHANGED
@@ -94,7 +94,7 @@ static inline size_t
|
|
94
94
|
unescape_hexadecimal(const uint8_t *backslash, uint8_t *value, const uint8_t *end, yp_list_t *error_list) {
|
95
95
|
*value = 0;
|
96
96
|
if (backslash + 2 >= end || !yp_char_is_hexadecimal_digit(backslash[2])) {
|
97
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2,
|
97
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_HEXADECIMAL);
|
98
98
|
return 2;
|
99
99
|
}
|
100
100
|
*value = unescape_hexadecimal_digit(backslash[2]);
|
@@ -157,7 +157,7 @@ unescape_unicode_write(uint8_t *dest, uint32_t value, const uint8_t *start, cons
|
|
157
157
|
// If we get here, then the value is too big. This is an error, but we don't
|
158
158
|
// want to just crash, so instead we'll add an error to the error list and put
|
159
159
|
// in a replacement character instead.
|
160
|
-
if (error_list) yp_diagnostic_list_append(error_list, start, end,
|
160
|
+
if (error_list) yp_diagnostic_list_append(error_list, start, end, YP_ERR_ESCAPE_INVALID_UNICODE);
|
161
161
|
dest[0] = 0xEF;
|
162
162
|
dest[1] = 0xBF;
|
163
163
|
dest[2] = 0xBD;
|
@@ -235,7 +235,7 @@ unescape(
|
|
235
235
|
// \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
|
236
236
|
case 'u': {
|
237
237
|
if ((flags & YP_UNESCAPE_FLAG_CONTROL) | (flags & YP_UNESCAPE_FLAG_META)) {
|
238
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2,
|
238
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS);
|
239
239
|
return backslash + 2;
|
240
240
|
}
|
241
241
|
|
@@ -252,11 +252,11 @@ unescape(
|
|
252
252
|
|
253
253
|
// \u{nnnn} character literal allows only 1-6 hexadecimal digits
|
254
254
|
if (hexadecimal_length > 6) {
|
255
|
-
if (error_list) yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length,
|
255
|
+
if (error_list) yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, YP_ERR_ESCAPE_INVALID_UNICODE_LONG);
|
256
256
|
}
|
257
257
|
// there are not hexadecimal characters
|
258
258
|
else if (hexadecimal_length == 0) {
|
259
|
-
if (error_list) yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length,
|
259
|
+
if (error_list) yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, YP_ERR_ESCAPE_INVALID_UNICODE);
|
260
260
|
return unicode_cursor;
|
261
261
|
}
|
262
262
|
|
@@ -277,13 +277,13 @@ unescape(
|
|
277
277
|
|
278
278
|
// ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
|
279
279
|
if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1) {
|
280
|
-
if (error_list) yp_diagnostic_list_append(error_list, extra_codepoints_start, unicode_cursor - 1,
|
280
|
+
if (error_list) yp_diagnostic_list_append(error_list, extra_codepoints_start, unicode_cursor - 1, YP_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
|
281
281
|
}
|
282
282
|
|
283
283
|
if (unicode_cursor < end && *unicode_cursor == '}') {
|
284
284
|
unicode_cursor++;
|
285
285
|
} else {
|
286
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, unicode_cursor,
|
286
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, unicode_cursor, YP_ERR_ESCAPE_INVALID_UNICODE_TERM);
|
287
287
|
}
|
288
288
|
|
289
289
|
return unicode_cursor;
|
@@ -298,7 +298,7 @@ unescape(
|
|
298
298
|
return backslash + 6;
|
299
299
|
}
|
300
300
|
|
301
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2,
|
301
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_UNICODE);
|
302
302
|
return backslash + 2;
|
303
303
|
}
|
304
304
|
// \c\M-x meta control character, where x is an ASCII printable character
|
@@ -306,12 +306,12 @@ unescape(
|
|
306
306
|
// \cx control character, where x is an ASCII printable character
|
307
307
|
case 'c':
|
308
308
|
if (backslash + 2 >= end) {
|
309
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1,
|
309
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
|
310
310
|
return end;
|
311
311
|
}
|
312
312
|
|
313
313
|
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
|
314
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1,
|
314
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
315
315
|
return backslash + 2;
|
316
316
|
}
|
317
317
|
|
@@ -325,7 +325,7 @@ unescape(
|
|
325
325
|
return backslash + 3;
|
326
326
|
default: {
|
327
327
|
if (!char_is_ascii_printable(backslash[2])) {
|
328
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1,
|
328
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
|
329
329
|
return backslash + 2;
|
330
330
|
}
|
331
331
|
|
@@ -339,17 +339,17 @@ unescape(
|
|
339
339
|
// \C-? delete, ASCII 7Fh (DEL)
|
340
340
|
case 'C':
|
341
341
|
if (backslash + 3 >= end) {
|
342
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1,
|
342
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
|
343
343
|
return end;
|
344
344
|
}
|
345
345
|
|
346
346
|
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
|
347
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1,
|
347
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
348
348
|
return backslash + 2;
|
349
349
|
}
|
350
350
|
|
351
351
|
if (backslash[2] != '-') {
|
352
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1,
|
352
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
|
353
353
|
return backslash + 2;
|
354
354
|
}
|
355
355
|
|
@@ -363,7 +363,7 @@ unescape(
|
|
363
363
|
return backslash + 4;
|
364
364
|
default:
|
365
365
|
if (!char_is_ascii_printable(backslash[3])) {
|
366
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2,
|
366
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_CONTROL);
|
367
367
|
return backslash + 2;
|
368
368
|
}
|
369
369
|
|
@@ -377,17 +377,17 @@ unescape(
|
|
377
377
|
// \M-x meta character, where x is an ASCII printable character
|
378
378
|
case 'M': {
|
379
379
|
if (backslash + 3 >= end) {
|
380
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1,
|
380
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_META);
|
381
381
|
return end;
|
382
382
|
}
|
383
383
|
|
384
384
|
if (flags & YP_UNESCAPE_FLAG_META) {
|
385
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2,
|
385
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_META_REPEAT);
|
386
386
|
return backslash + 2;
|
387
387
|
}
|
388
388
|
|
389
389
|
if (backslash[2] != '-') {
|
390
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2,
|
390
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_META);
|
391
391
|
return backslash + 2;
|
392
392
|
}
|
393
393
|
|
@@ -402,7 +402,7 @@ unescape(
|
|
402
402
|
return backslash + 4;
|
403
403
|
}
|
404
404
|
|
405
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2,
|
405
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_META);
|
406
406
|
return backslash + 3;
|
407
407
|
}
|
408
408
|
// \n
|
@@ -474,7 +474,7 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
|
|
474
474
|
// within the string.
|
475
475
|
uint8_t *allocated = malloc(string->length);
|
476
476
|
if (allocated == NULL) {
|
477
|
-
yp_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length,
|
477
|
+
yp_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length, YP_ERR_MALLOC_FAILED);
|
478
478
|
return;
|
479
479
|
}
|
480
480
|
|
data/src/util/yp_char.c
CHANGED
@@ -75,7 +75,7 @@ yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
|
|
75
75
|
// whitespace while also tracking the location of each newline. Disallows
|
76
76
|
// searching past the given maximum number of characters.
|
77
77
|
size_t
|
78
|
-
yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list
|
78
|
+
yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list) {
|
79
79
|
if (length <= 0) return 0;
|
80
80
|
|
81
81
|
size_t size = 0;
|
@@ -83,12 +83,7 @@ yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newlin
|
|
83
83
|
|
84
84
|
while (size < maximum && (yp_byte_table[string[size]] & YP_CHAR_BIT_WHITESPACE)) {
|
85
85
|
if (string[size] == '\n') {
|
86
|
-
|
87
|
-
return size + 1;
|
88
|
-
}
|
89
|
-
else {
|
90
|
-
yp_newline_list_append(newline_list, string + size);
|
91
|
-
}
|
86
|
+
yp_newline_list_append(newline_list, string + size);
|
92
87
|
}
|
93
88
|
|
94
89
|
size++;
|
data/src/util/yp_constant_pool.c
CHANGED
@@ -106,12 +106,11 @@ yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity) {
|
|
106
106
|
return true;
|
107
107
|
}
|
108
108
|
|
109
|
-
// Insert a constant into a constant pool
|
110
|
-
|
111
|
-
yp_constant_id_t
|
109
|
+
// Insert a constant into a constant pool and return its index in the pool.
|
110
|
+
static size_t
|
112
111
|
yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
113
112
|
if (pool->size >= (pool->capacity / 4 * 3)) {
|
114
|
-
if (!yp_constant_pool_resize(pool)) return
|
113
|
+
if (!yp_constant_pool_resize(pool)) return pool->capacity;
|
115
114
|
}
|
116
115
|
|
117
116
|
size_t hash = yp_constant_pool_hash(start, length);
|
@@ -123,25 +122,59 @@ yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t l
|
|
123
122
|
// same as the content we are trying to insert. If it is, then we can
|
124
123
|
// return the id of the existing constant.
|
125
124
|
if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
|
126
|
-
return
|
125
|
+
return index;
|
127
126
|
}
|
128
127
|
|
129
128
|
index = (index + 1) % pool->capacity;
|
130
129
|
}
|
131
130
|
|
132
|
-
|
131
|
+
pool->size++;
|
132
|
+
assert(pool->size < ((size_t) (1 << 31)));
|
133
|
+
|
133
134
|
pool->constants[index] = (yp_constant_t) {
|
134
|
-
.id =
|
135
|
+
.id = (unsigned int) (pool->size & 0x7FFFFFFF),
|
135
136
|
.start = start,
|
136
137
|
.length = length,
|
137
138
|
.hash = hash
|
138
139
|
};
|
139
140
|
|
140
|
-
return
|
141
|
+
return index;
|
142
|
+
}
|
143
|
+
|
144
|
+
// Insert a constant into a constant pool. Returns the id of the constant, or 0
|
145
|
+
// if any potential calls to resize fail.
|
146
|
+
yp_constant_id_t
|
147
|
+
yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
148
|
+
size_t index = yp_constant_pool_insert(pool, start, length);
|
149
|
+
return index == pool->capacity ? 0 : ((yp_constant_id_t) pool->constants[index].id);
|
150
|
+
}
|
151
|
+
|
152
|
+
// Insert a constant into a constant pool from memory that is now owned by the
|
153
|
+
// constant pool. Returns the id of the constant, or 0 if any potential calls to
|
154
|
+
// resize fail.
|
155
|
+
yp_constant_id_t
|
156
|
+
yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
157
|
+
size_t index = yp_constant_pool_insert(pool, start, length);
|
158
|
+
if (index == pool->capacity) return 0;
|
159
|
+
|
160
|
+
yp_constant_t *constant = &pool->constants[index];
|
161
|
+
constant->owned = true;
|
162
|
+
return ((yp_constant_id_t) constant->id);
|
141
163
|
}
|
142
164
|
|
143
165
|
// Free the memory associated with a constant pool.
|
144
166
|
void
|
145
167
|
yp_constant_pool_free(yp_constant_pool_t *pool) {
|
168
|
+
// For each constant in the current constant pool, free the contents if the
|
169
|
+
// contents are owned.
|
170
|
+
for (uint32_t index = 0; index < pool->capacity; index++) {
|
171
|
+
yp_constant_t *constant = &pool->constants[index];
|
172
|
+
|
173
|
+
// If an id is set on this constant, then we know we have content here.
|
174
|
+
if (constant->id != 0 && constant->owned) {
|
175
|
+
free((void *) constant->start);
|
176
|
+
}
|
177
|
+
}
|
178
|
+
|
146
179
|
free(pool->constants);
|
147
180
|
}
|
data/src/util/yp_newline_list.c
CHANGED
@@ -25,8 +25,12 @@ yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capac
|
|
25
25
|
bool
|
26
26
|
yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor) {
|
27
27
|
if (list->size == list->capacity) {
|
28
|
+
size_t *original_offsets = list->offsets;
|
29
|
+
|
28
30
|
list->capacity = (list->capacity * 3) / 2;
|
29
|
-
list->offsets = (size_t *)
|
31
|
+
list->offsets = (size_t *) calloc(list->capacity, sizeof(size_t));
|
32
|
+
memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
|
33
|
+
free(original_offsets);
|
30
34
|
if (list->offsets == NULL) return false;
|
31
35
|
}
|
32
36
|
|
data/src/util/yp_string_list.c
CHANGED
@@ -12,8 +12,11 @@ yp_string_list_init(yp_string_list_t *string_list) {
|
|
12
12
|
void
|
13
13
|
yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string) {
|
14
14
|
if (string_list->length + 1 > string_list->capacity) {
|
15
|
+
yp_string_t *original_string = string_list->strings;
|
15
16
|
string_list->capacity *= 2;
|
16
|
-
string_list->strings = (yp_string_t *)
|
17
|
+
string_list->strings = (yp_string_t *) malloc(string_list->capacity * sizeof(yp_string_t));
|
18
|
+
memcpy(string_list->strings, original_string, (string_list->length) * sizeof(yp_string_t));
|
19
|
+
free(original_string);
|
17
20
|
}
|
18
21
|
|
19
22
|
string_list->strings[string_list->length++] = *string;
|