yarp 0.10.0 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -1
- data/CONTRIBUTING.md +7 -0
- data/config.yml +154 -43
- data/docs/configuration.md +0 -1
- data/docs/mapping.md +91 -91
- data/docs/serialization.md +23 -20
- data/ext/yarp/api_node.c +1074 -391
- data/ext/yarp/extension.c +1 -1
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +501 -301
- data/include/yarp/diagnostic.h +198 -1
- data/include/yarp/node.h +0 -4
- data/include/yarp/util/yp_char.h +1 -1
- data/include/yarp/util/yp_constant_pool.h +11 -4
- data/include/yarp/version.h +2 -2
- data/lib/yarp/desugar_visitor.rb +19 -19
- data/lib/yarp/mutation_visitor.rb +22 -12
- data/lib/yarp/node.rb +2883 -293
- data/lib/yarp/parse_result/comments.rb +172 -0
- data/lib/yarp/parse_result/newlines.rb +60 -0
- data/lib/yarp/pattern.rb +239 -0
- data/lib/yarp/serialize.rb +152 -129
- data/lib/yarp.rb +104 -44
- data/src/diagnostic.c +254 -2
- data/src/node.c +901 -868
- data/src/prettyprint.c +380 -186
- data/src/serialize.c +325 -170
- data/src/unescape.c +20 -20
- data/src/util/yp_char.c +2 -7
- data/src/util/yp_constant_pool.c +41 -8
- data/src/util/yp_newline_list.c +5 -1
- data/src/util/yp_string_list.c +4 -1
- data/src/yarp.c +946 -818
- data/yarp.gemspec +4 -1
- metadata +6 -3
data/src/unescape.c
CHANGED
@@ -94,7 +94,7 @@ static inline size_t
|
|
94
94
|
unescape_hexadecimal(const uint8_t *backslash, uint8_t *value, const uint8_t *end, yp_list_t *error_list) {
|
95
95
|
*value = 0;
|
96
96
|
if (backslash + 2 >= end || !yp_char_is_hexadecimal_digit(backslash[2])) {
|
97
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2,
|
97
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_HEXADECIMAL);
|
98
98
|
return 2;
|
99
99
|
}
|
100
100
|
*value = unescape_hexadecimal_digit(backslash[2]);
|
@@ -157,7 +157,7 @@ unescape_unicode_write(uint8_t *dest, uint32_t value, const uint8_t *start, cons
|
|
157
157
|
// If we get here, then the value is too big. This is an error, but we don't
|
158
158
|
// want to just crash, so instead we'll add an error to the error list and put
|
159
159
|
// in a replacement character instead.
|
160
|
-
if (error_list) yp_diagnostic_list_append(error_list, start, end,
|
160
|
+
if (error_list) yp_diagnostic_list_append(error_list, start, end, YP_ERR_ESCAPE_INVALID_UNICODE);
|
161
161
|
dest[0] = 0xEF;
|
162
162
|
dest[1] = 0xBF;
|
163
163
|
dest[2] = 0xBD;
|
@@ -235,7 +235,7 @@ unescape(
|
|
235
235
|
// \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
|
236
236
|
case 'u': {
|
237
237
|
if ((flags & YP_UNESCAPE_FLAG_CONTROL) | (flags & YP_UNESCAPE_FLAG_META)) {
|
238
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2,
|
238
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS);
|
239
239
|
return backslash + 2;
|
240
240
|
}
|
241
241
|
|
@@ -252,11 +252,11 @@ unescape(
|
|
252
252
|
|
253
253
|
// \u{nnnn} character literal allows only 1-6 hexadecimal digits
|
254
254
|
if (hexadecimal_length > 6) {
|
255
|
-
if (error_list) yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length,
|
255
|
+
if (error_list) yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, YP_ERR_ESCAPE_INVALID_UNICODE_LONG);
|
256
256
|
}
|
257
257
|
// there are not hexadecimal characters
|
258
258
|
else if (hexadecimal_length == 0) {
|
259
|
-
if (error_list) yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length,
|
259
|
+
if (error_list) yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, YP_ERR_ESCAPE_INVALID_UNICODE);
|
260
260
|
return unicode_cursor;
|
261
261
|
}
|
262
262
|
|
@@ -277,13 +277,13 @@ unescape(
|
|
277
277
|
|
278
278
|
// ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
|
279
279
|
if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1) {
|
280
|
-
if (error_list) yp_diagnostic_list_append(error_list, extra_codepoints_start, unicode_cursor - 1,
|
280
|
+
if (error_list) yp_diagnostic_list_append(error_list, extra_codepoints_start, unicode_cursor - 1, YP_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
|
281
281
|
}
|
282
282
|
|
283
283
|
if (unicode_cursor < end && *unicode_cursor == '}') {
|
284
284
|
unicode_cursor++;
|
285
285
|
} else {
|
286
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, unicode_cursor,
|
286
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, unicode_cursor, YP_ERR_ESCAPE_INVALID_UNICODE_TERM);
|
287
287
|
}
|
288
288
|
|
289
289
|
return unicode_cursor;
|
@@ -298,7 +298,7 @@ unescape(
|
|
298
298
|
return backslash + 6;
|
299
299
|
}
|
300
300
|
|
301
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2,
|
301
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_UNICODE);
|
302
302
|
return backslash + 2;
|
303
303
|
}
|
304
304
|
// \c\M-x meta control character, where x is an ASCII printable character
|
@@ -306,12 +306,12 @@ unescape(
|
|
306
306
|
// \cx control character, where x is an ASCII printable character
|
307
307
|
case 'c':
|
308
308
|
if (backslash + 2 >= end) {
|
309
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1,
|
309
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
|
310
310
|
return end;
|
311
311
|
}
|
312
312
|
|
313
313
|
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
|
314
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1,
|
314
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
315
315
|
return backslash + 2;
|
316
316
|
}
|
317
317
|
|
@@ -325,7 +325,7 @@ unescape(
|
|
325
325
|
return backslash + 3;
|
326
326
|
default: {
|
327
327
|
if (!char_is_ascii_printable(backslash[2])) {
|
328
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1,
|
328
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
|
329
329
|
return backslash + 2;
|
330
330
|
}
|
331
331
|
|
@@ -339,17 +339,17 @@ unescape(
|
|
339
339
|
// \C-? delete, ASCII 7Fh (DEL)
|
340
340
|
case 'C':
|
341
341
|
if (backslash + 3 >= end) {
|
342
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1,
|
342
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
|
343
343
|
return end;
|
344
344
|
}
|
345
345
|
|
346
346
|
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
|
347
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1,
|
347
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
348
348
|
return backslash + 2;
|
349
349
|
}
|
350
350
|
|
351
351
|
if (backslash[2] != '-') {
|
352
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1,
|
352
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
|
353
353
|
return backslash + 2;
|
354
354
|
}
|
355
355
|
|
@@ -363,7 +363,7 @@ unescape(
|
|
363
363
|
return backslash + 4;
|
364
364
|
default:
|
365
365
|
if (!char_is_ascii_printable(backslash[3])) {
|
366
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2,
|
366
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_CONTROL);
|
367
367
|
return backslash + 2;
|
368
368
|
}
|
369
369
|
|
@@ -377,17 +377,17 @@ unescape(
|
|
377
377
|
// \M-x meta character, where x is an ASCII printable character
|
378
378
|
case 'M': {
|
379
379
|
if (backslash + 3 >= end) {
|
380
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1,
|
380
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_META);
|
381
381
|
return end;
|
382
382
|
}
|
383
383
|
|
384
384
|
if (flags & YP_UNESCAPE_FLAG_META) {
|
385
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2,
|
385
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_META_REPEAT);
|
386
386
|
return backslash + 2;
|
387
387
|
}
|
388
388
|
|
389
389
|
if (backslash[2] != '-') {
|
390
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2,
|
390
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_META);
|
391
391
|
return backslash + 2;
|
392
392
|
}
|
393
393
|
|
@@ -402,7 +402,7 @@ unescape(
|
|
402
402
|
return backslash + 4;
|
403
403
|
}
|
404
404
|
|
405
|
-
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2,
|
405
|
+
if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_META);
|
406
406
|
return backslash + 3;
|
407
407
|
}
|
408
408
|
// \n
|
@@ -474,7 +474,7 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
|
|
474
474
|
// within the string.
|
475
475
|
uint8_t *allocated = malloc(string->length);
|
476
476
|
if (allocated == NULL) {
|
477
|
-
yp_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length,
|
477
|
+
yp_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length, YP_ERR_MALLOC_FAILED);
|
478
478
|
return;
|
479
479
|
}
|
480
480
|
|
data/src/util/yp_char.c
CHANGED
@@ -75,7 +75,7 @@ yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
|
|
75
75
|
// whitespace while also tracking the location of each newline. Disallows
|
76
76
|
// searching past the given maximum number of characters.
|
77
77
|
size_t
|
78
|
-
yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list
|
78
|
+
yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list) {
|
79
79
|
if (length <= 0) return 0;
|
80
80
|
|
81
81
|
size_t size = 0;
|
@@ -83,12 +83,7 @@ yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newlin
|
|
83
83
|
|
84
84
|
while (size < maximum && (yp_byte_table[string[size]] & YP_CHAR_BIT_WHITESPACE)) {
|
85
85
|
if (string[size] == '\n') {
|
86
|
-
|
87
|
-
return size + 1;
|
88
|
-
}
|
89
|
-
else {
|
90
|
-
yp_newline_list_append(newline_list, string + size);
|
91
|
-
}
|
86
|
+
yp_newline_list_append(newline_list, string + size);
|
92
87
|
}
|
93
88
|
|
94
89
|
size++;
|
data/src/util/yp_constant_pool.c
CHANGED
@@ -106,12 +106,11 @@ yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity) {
|
|
106
106
|
return true;
|
107
107
|
}
|
108
108
|
|
109
|
-
// Insert a constant into a constant pool
|
110
|
-
|
111
|
-
yp_constant_id_t
|
109
|
+
// Insert a constant into a constant pool and return its index in the pool.
|
110
|
+
static size_t
|
112
111
|
yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
113
112
|
if (pool->size >= (pool->capacity / 4 * 3)) {
|
114
|
-
if (!yp_constant_pool_resize(pool)) return
|
113
|
+
if (!yp_constant_pool_resize(pool)) return pool->capacity;
|
115
114
|
}
|
116
115
|
|
117
116
|
size_t hash = yp_constant_pool_hash(start, length);
|
@@ -123,25 +122,59 @@ yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t l
|
|
123
122
|
// same as the content we are trying to insert. If it is, then we can
|
124
123
|
// return the id of the existing constant.
|
125
124
|
if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
|
126
|
-
return
|
125
|
+
return index;
|
127
126
|
}
|
128
127
|
|
129
128
|
index = (index + 1) % pool->capacity;
|
130
129
|
}
|
131
130
|
|
132
|
-
|
131
|
+
pool->size++;
|
132
|
+
assert(pool->size < ((size_t) (1 << 31)));
|
133
|
+
|
133
134
|
pool->constants[index] = (yp_constant_t) {
|
134
|
-
.id =
|
135
|
+
.id = (unsigned int) (pool->size & 0x7FFFFFFF),
|
135
136
|
.start = start,
|
136
137
|
.length = length,
|
137
138
|
.hash = hash
|
138
139
|
};
|
139
140
|
|
140
|
-
return
|
141
|
+
return index;
|
142
|
+
}
|
143
|
+
|
144
|
+
// Insert a constant into a constant pool. Returns the id of the constant, or 0
|
145
|
+
// if any potential calls to resize fail.
|
146
|
+
yp_constant_id_t
|
147
|
+
yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
148
|
+
size_t index = yp_constant_pool_insert(pool, start, length);
|
149
|
+
return index == pool->capacity ? 0 : ((yp_constant_id_t) pool->constants[index].id);
|
150
|
+
}
|
151
|
+
|
152
|
+
// Insert a constant into a constant pool from memory that is now owned by the
|
153
|
+
// constant pool. Returns the id of the constant, or 0 if any potential calls to
|
154
|
+
// resize fail.
|
155
|
+
yp_constant_id_t
|
156
|
+
yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
157
|
+
size_t index = yp_constant_pool_insert(pool, start, length);
|
158
|
+
if (index == pool->capacity) return 0;
|
159
|
+
|
160
|
+
yp_constant_t *constant = &pool->constants[index];
|
161
|
+
constant->owned = true;
|
162
|
+
return ((yp_constant_id_t) constant->id);
|
141
163
|
}
|
142
164
|
|
143
165
|
// Free the memory associated with a constant pool.
|
144
166
|
void
|
145
167
|
yp_constant_pool_free(yp_constant_pool_t *pool) {
|
168
|
+
// For each constant in the current constant pool, free the contents if the
|
169
|
+
// contents are owned.
|
170
|
+
for (uint32_t index = 0; index < pool->capacity; index++) {
|
171
|
+
yp_constant_t *constant = &pool->constants[index];
|
172
|
+
|
173
|
+
// If an id is set on this constant, then we know we have content here.
|
174
|
+
if (constant->id != 0 && constant->owned) {
|
175
|
+
free((void *) constant->start);
|
176
|
+
}
|
177
|
+
}
|
178
|
+
|
146
179
|
free(pool->constants);
|
147
180
|
}
|
data/src/util/yp_newline_list.c
CHANGED
@@ -25,8 +25,12 @@ yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capac
|
|
25
25
|
bool
|
26
26
|
yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor) {
|
27
27
|
if (list->size == list->capacity) {
|
28
|
+
size_t *original_offsets = list->offsets;
|
29
|
+
|
28
30
|
list->capacity = (list->capacity * 3) / 2;
|
29
|
-
list->offsets = (size_t *)
|
31
|
+
list->offsets = (size_t *) calloc(list->capacity, sizeof(size_t));
|
32
|
+
memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
|
33
|
+
free(original_offsets);
|
30
34
|
if (list->offsets == NULL) return false;
|
31
35
|
}
|
32
36
|
|
data/src/util/yp_string_list.c
CHANGED
@@ -12,8 +12,11 @@ yp_string_list_init(yp_string_list_t *string_list) {
|
|
12
12
|
void
|
13
13
|
yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string) {
|
14
14
|
if (string_list->length + 1 > string_list->capacity) {
|
15
|
+
yp_string_t *original_string = string_list->strings;
|
15
16
|
string_list->capacity *= 2;
|
16
|
-
string_list->strings = (yp_string_t *)
|
17
|
+
string_list->strings = (yp_string_t *) malloc(string_list->capacity * sizeof(yp_string_t));
|
18
|
+
memcpy(string_list->strings, original_string, (string_list->length) * sizeof(yp_string_t));
|
19
|
+
free(original_string);
|
17
20
|
}
|
18
21
|
|
19
22
|
string_list->strings[string_list->length++] = *string;
|