yarp 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -1
- data/README.md +4 -3
- data/config.yml +332 -52
- data/docs/configuration.md +1 -0
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +1 -1
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +703 -136
- data/ext/yarp/extension.c +73 -24
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +331 -137
- data/include/yarp/node.h +10 -0
- data/include/yarp/unescape.h +4 -2
- data/include/yarp/util/yp_newline_list.h +3 -0
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +10 -0
- data/lib/yarp/desugar_visitor.rb +267 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3042 -508
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +48 -2
- data/src/node.c +421 -185
- data/src/prettyprint.c +262 -80
- data/src/serialize.c +410 -270
- data/src/token_type.c +2 -2
- data/src/unescape.c +69 -51
- data/src/util/yp_newline_list.c +10 -0
- data/src/yarp.c +1208 -458
- data/yarp.gemspec +3 -1
- metadata +4 -2
data/src/token_type.c
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
/******************************************************************************/
|
2
|
-
/* This file is generated by the
|
3
|
-
/* modified manually. See
|
2
|
+
/* This file is generated by the templates/template.rb script and should not */
|
3
|
+
/* be modified manually. See */
|
4
4
|
/* templates/src/token_type.c.erb */
|
5
5
|
/* if you are looking to modify the */
|
6
6
|
/* template */
|
data/src/unescape.c
CHANGED
@@ -14,6 +14,20 @@ yp_char_is_hexadecimal_digits(const char *c, size_t length) {
|
|
14
14
|
return true;
|
15
15
|
}
|
16
16
|
|
17
|
+
// We don't call the char_width function unless we have to because it's
|
18
|
+
// expensive to go through the indirection of the function pointer. Instead we
|
19
|
+
// provide a fast path that will check if we can just return 1.
|
20
|
+
static inline size_t
|
21
|
+
yp_char_width(yp_parser_t *parser, const char *start, const char *end) {
|
22
|
+
const unsigned char *uc = (const unsigned char *) start;
|
23
|
+
|
24
|
+
if (parser->encoding_changed || (*uc >= 0x80)) {
|
25
|
+
return parser->encoding.char_width(start, end - start);
|
26
|
+
} else {
|
27
|
+
return 1;
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
17
31
|
/******************************************************************************/
|
18
32
|
/* Lookup tables for characters */
|
19
33
|
/******************************************************************************/
|
@@ -178,7 +192,7 @@ unescape_char(const unsigned char value, const unsigned char flags) {
|
|
178
192
|
|
179
193
|
// Read a specific escape sequence into the given destination.
|
180
194
|
static const char *
|
181
|
-
unescape(char *dest, size_t *dest_length, const char *backslash, const char *end,
|
195
|
+
unescape(yp_parser_t *parser, char *dest, size_t *dest_length, const char *backslash, const char *end, const unsigned char flags, bool write_to_str) {
|
182
196
|
switch (backslash[1]) {
|
183
197
|
case 'a':
|
184
198
|
case 'b':
|
@@ -218,7 +232,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
218
232
|
// \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
|
219
233
|
case 'u': {
|
220
234
|
if ((flags & YP_UNESCAPE_FLAG_CONTROL) | (flags & YP_UNESCAPE_FLAG_META)) {
|
221
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Unicode escape sequence cannot be used with control or meta flags.");
|
235
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Unicode escape sequence cannot be used with control or meta flags.");
|
222
236
|
return backslash + 2;
|
223
237
|
}
|
224
238
|
|
@@ -235,11 +249,11 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
235
249
|
|
236
250
|
// \u{nnnn} character literal allows only 1-6 hexadecimal digits
|
237
251
|
if (hexadecimal_length > 6)
|
238
|
-
yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "invalid Unicode escape.");
|
252
|
+
yp_diagnostic_list_append(&parser->error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "invalid Unicode escape.");
|
239
253
|
|
240
254
|
// there are not hexadecimal characters
|
241
255
|
if (hexadecimal_length == 0) {
|
242
|
-
yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "unterminated Unicode escape");
|
256
|
+
yp_diagnostic_list_append(&parser->error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "unterminated Unicode escape");
|
243
257
|
return unicode_cursor;
|
244
258
|
}
|
245
259
|
|
@@ -252,7 +266,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
252
266
|
uint32_t value;
|
253
267
|
unescape_unicode(unicode_start, (size_t) (unicode_cursor - unicode_start), &value);
|
254
268
|
if (write_to_str) {
|
255
|
-
*dest_length += unescape_unicode_write(dest + *dest_length, value, unicode_start, unicode_cursor, error_list);
|
269
|
+
*dest_length += unescape_unicode_write(dest + *dest_length, value, unicode_start, unicode_cursor, &parser->error_list);
|
256
270
|
}
|
257
271
|
|
258
272
|
unicode_cursor += yp_strspn_whitespace(unicode_cursor, end - unicode_cursor);
|
@@ -260,7 +274,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
260
274
|
|
261
275
|
// ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
|
262
276
|
if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1)
|
263
|
-
yp_diagnostic_list_append(error_list, extra_codepoints_start, unicode_cursor - 1, "Multiple codepoints at single character literal");
|
277
|
+
yp_diagnostic_list_append(&parser->error_list, extra_codepoints_start, unicode_cursor - 1, "Multiple codepoints at single character literal");
|
264
278
|
|
265
279
|
return unicode_cursor + 1;
|
266
280
|
}
|
@@ -270,12 +284,12 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
270
284
|
unescape_unicode(backslash + 2, 4, &value);
|
271
285
|
|
272
286
|
if (write_to_str) {
|
273
|
-
*dest_length += unescape_unicode_write(dest + *dest_length, value, backslash + 2, backslash + 6, error_list);
|
287
|
+
*dest_length += unescape_unicode_write(dest + *dest_length, value, backslash + 2, backslash + 6, &parser->error_list);
|
274
288
|
}
|
275
289
|
return backslash + 6;
|
276
290
|
}
|
277
291
|
|
278
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid Unicode escape sequence");
|
292
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid Unicode escape sequence");
|
279
293
|
return backslash + 2;
|
280
294
|
}
|
281
295
|
// \c\M-x meta control character, where x is an ASCII printable character
|
@@ -283,18 +297,18 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
283
297
|
// \cx control character, where x is an ASCII printable character
|
284
298
|
case 'c':
|
285
299
|
if (backslash + 2 >= end) {
|
286
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
300
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
287
301
|
return end;
|
288
302
|
}
|
289
303
|
|
290
304
|
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
|
291
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
|
305
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
|
292
306
|
return backslash + 2;
|
293
307
|
}
|
294
308
|
|
295
309
|
switch (backslash[2]) {
|
296
310
|
case '\\':
|
297
|
-
return unescape(dest, dest_length, backslash + 2, end,
|
311
|
+
return unescape(parser, dest, dest_length, backslash + 2, end, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
|
298
312
|
case '?':
|
299
313
|
if (write_to_str) {
|
300
314
|
dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
|
@@ -302,7 +316,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
302
316
|
return backslash + 3;
|
303
317
|
default: {
|
304
318
|
if (!char_is_ascii_printable(backslash[2])) {
|
305
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
319
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
306
320
|
return backslash + 2;
|
307
321
|
}
|
308
322
|
|
@@ -316,23 +330,23 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
316
330
|
// \C-? delete, ASCII 7Fh (DEL)
|
317
331
|
case 'C':
|
318
332
|
if (backslash + 3 >= end) {
|
319
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
333
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
320
334
|
return end;
|
321
335
|
}
|
322
336
|
|
323
337
|
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
|
324
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
|
338
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
|
325
339
|
return backslash + 2;
|
326
340
|
}
|
327
341
|
|
328
342
|
if (backslash[2] != '-') {
|
329
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
343
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
330
344
|
return backslash + 2;
|
331
345
|
}
|
332
346
|
|
333
347
|
switch (backslash[3]) {
|
334
348
|
case '\\':
|
335
|
-
return unescape(dest, dest_length, backslash + 3, end,
|
349
|
+
return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
|
336
350
|
case '?':
|
337
351
|
if (write_to_str) {
|
338
352
|
dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
|
@@ -340,7 +354,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
340
354
|
return backslash + 4;
|
341
355
|
default:
|
342
356
|
if (!char_is_ascii_printable(backslash[3])) {
|
343
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid control escape sequence");
|
357
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid control escape sequence");
|
344
358
|
return backslash + 2;
|
345
359
|
}
|
346
360
|
|
@@ -354,22 +368,22 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
354
368
|
// \M-x meta character, where x is an ASCII printable character
|
355
369
|
case 'M': {
|
356
370
|
if (backslash + 3 >= end) {
|
357
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
371
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
358
372
|
return end;
|
359
373
|
}
|
360
374
|
|
361
375
|
if (flags & YP_UNESCAPE_FLAG_META) {
|
362
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Meta escape sequence cannot be doubled.");
|
376
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Meta escape sequence cannot be doubled.");
|
363
377
|
return backslash + 2;
|
364
378
|
}
|
365
379
|
|
366
380
|
if (backslash[2] != '-') {
|
367
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid meta escape sequence");
|
381
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid meta escape sequence");
|
368
382
|
return backslash + 2;
|
369
383
|
}
|
370
384
|
|
371
385
|
if (backslash[3] == '\\') {
|
372
|
-
return unescape(dest, dest_length, backslash + 3, end,
|
386
|
+
return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_META, write_to_str);
|
373
387
|
}
|
374
388
|
|
375
389
|
if (char_is_ascii_printable(backslash[3])) {
|
@@ -379,7 +393,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
379
393
|
return backslash + 4;
|
380
394
|
}
|
381
395
|
|
382
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid meta escape sequence");
|
396
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid meta escape sequence");
|
383
397
|
return backslash + 3;
|
384
398
|
}
|
385
399
|
// \n
|
@@ -390,14 +404,17 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
390
404
|
if (backslash + 2 < end && backslash[2] == '\n') {
|
391
405
|
return backslash + 3;
|
392
406
|
}
|
393
|
-
|
394
|
-
/* fallthrough */
|
407
|
+
/* fallthrough */
|
395
408
|
// In this case we're escaping something that doesn't need escaping.
|
396
409
|
default: {
|
410
|
+
size_t width = yp_char_width(parser, backslash + 1, end);
|
411
|
+
|
397
412
|
if (write_to_str) {
|
398
|
-
dest
|
413
|
+
memcpy(dest + *dest_length, backslash + 1, width);
|
414
|
+
*dest_length += width;
|
399
415
|
}
|
400
|
-
|
416
|
+
|
417
|
+
return backslash + 1 + width;
|
401
418
|
}
|
402
419
|
}
|
403
420
|
}
|
@@ -431,7 +448,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
431
448
|
// \c? or \C-? delete, ASCII 7Fh (DEL)
|
432
449
|
//
|
433
450
|
YP_EXPORTED_FUNCTION void
|
434
|
-
yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type
|
451
|
+
yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type) {
|
435
452
|
if (unescape_type == YP_UNESCAPE_NONE) {
|
436
453
|
// If we're not unescaping then we can reference the source directly.
|
437
454
|
return;
|
@@ -448,7 +465,7 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
|
|
448
465
|
// within the string.
|
449
466
|
char *allocated = malloc(string->length);
|
450
467
|
if (allocated == NULL) {
|
451
|
-
yp_diagnostic_list_append(error_list, string->source, string->source + string->length, "Failed to allocate memory for unescaping.");
|
468
|
+
yp_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length, "Failed to allocate memory for unescaping.");
|
452
469
|
return;
|
453
470
|
}
|
454
471
|
|
@@ -493,7 +510,7 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
|
|
493
510
|
// This is the only type of unescaping left. In this case we need to
|
494
511
|
// handle all of the different unescapes.
|
495
512
|
assert(unescape_type == YP_UNESCAPE_ALL);
|
496
|
-
cursor = unescape(dest, &dest_length, backslash, end,
|
513
|
+
cursor = unescape(parser, dest, &dest_length, backslash, end, YP_UNESCAPE_FLAG_NONE, true);
|
497
514
|
break;
|
498
515
|
}
|
499
516
|
|
@@ -521,29 +538,11 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
|
|
521
538
|
yp_string_owned_init(string, allocated, dest_length + ((size_t) (end - cursor)));
|
522
539
|
}
|
523
540
|
|
524
|
-
YP_EXPORTED_FUNCTION bool
|
525
|
-
yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
|
526
|
-
bool success;
|
527
|
-
|
528
|
-
yp_parser_t parser;
|
529
|
-
yp_parser_init(&parser, start, length, "");
|
530
|
-
|
531
|
-
yp_list_t error_list = YP_LIST_EMPTY;
|
532
|
-
yp_string_shared_init(result, start, start + length);
|
533
|
-
yp_unescape_manipulate_string(&parser, result, unescape_type, &error_list);
|
534
|
-
success = yp_list_empty_p(&error_list);
|
535
|
-
|
536
|
-
yp_list_free(&error_list);
|
537
|
-
yp_parser_free(&parser);
|
538
|
-
|
539
|
-
return success;
|
540
|
-
}
|
541
|
-
|
542
541
|
// This function is similar to yp_unescape_manipulate_string, except it doesn't
|
543
542
|
// actually perform any string manipulations. Instead, it calculates how long
|
544
543
|
// the unescaped character is, and returns that value
|
545
|
-
|
546
|
-
yp_unescape_calculate_difference(
|
544
|
+
size_t
|
545
|
+
yp_unescape_calculate_difference(yp_parser_t *parser, const char *backslash, yp_unescape_type_t unescape_type, bool expect_single_codepoint) {
|
547
546
|
assert(unescape_type != YP_UNESCAPE_NONE);
|
548
547
|
|
549
548
|
switch (backslash[1]) {
|
@@ -551,7 +550,9 @@ yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unes
|
|
551
550
|
case '\'':
|
552
551
|
return 2;
|
553
552
|
default: {
|
554
|
-
if (unescape_type == YP_UNESCAPE_MINIMAL)
|
553
|
+
if (unescape_type == YP_UNESCAPE_MINIMAL) {
|
554
|
+
return 1 + yp_char_width(parser, backslash + 1, parser->end);
|
555
|
+
}
|
555
556
|
|
556
557
|
// This is the only type of unescaping left. In this case we need to
|
557
558
|
// handle all of the different unescapes.
|
@@ -561,10 +562,27 @@ yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unes
|
|
561
562
|
if (expect_single_codepoint)
|
562
563
|
flags |= YP_UNESCAPE_FLAG_EXPECT_SINGLE;
|
563
564
|
|
564
|
-
const char *cursor = unescape(NULL, 0, backslash, end,
|
565
|
+
const char *cursor = unescape(parser, NULL, 0, backslash, parser->end, flags, false);
|
565
566
|
assert(cursor > backslash);
|
566
567
|
|
567
568
|
return (size_t) (cursor - backslash);
|
568
569
|
}
|
569
570
|
}
|
570
571
|
}
|
572
|
+
|
573
|
+
// This is one of the main entry points into the extension. It accepts a source
|
574
|
+
// string, a type of unescaping, and a pointer to a result string. It returns a
|
575
|
+
// boolean indicating whether or not the unescaping was successful.
|
576
|
+
YP_EXPORTED_FUNCTION bool
|
577
|
+
yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
|
578
|
+
yp_parser_t parser;
|
579
|
+
yp_parser_init(&parser, start, length, NULL);
|
580
|
+
|
581
|
+
yp_string_shared_init(result, start, start + length);
|
582
|
+
yp_unescape_manipulate_string(&parser, result, unescape_type);
|
583
|
+
|
584
|
+
bool success = yp_list_empty_p(&parser.error_list);
|
585
|
+
yp_parser_free(&parser);
|
586
|
+
|
587
|
+
return success;
|
588
|
+
}
|
data/src/util/yp_newline_list.c
CHANGED
@@ -30,6 +30,7 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
|
|
30
30
|
if (list->offsets == NULL) return false;
|
31
31
|
}
|
32
32
|
|
33
|
+
assert(*cursor == '\n');
|
33
34
|
assert(cursor >= list->start);
|
34
35
|
size_t newline_offset = (size_t) (cursor - list->start + 1);
|
35
36
|
assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
|
@@ -38,6 +39,15 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
|
|
38
39
|
return true;
|
39
40
|
}
|
40
41
|
|
42
|
+
// Conditionally append a new offset to the newline list, if the value passed in is a newline.
|
43
|
+
bool
|
44
|
+
yp_newline_list_check_append(yp_newline_list_t *list, const char *cursor) {
|
45
|
+
if (*cursor != '\n') {
|
46
|
+
return true;
|
47
|
+
}
|
48
|
+
return yp_newline_list_append(list, cursor);
|
49
|
+
}
|
50
|
+
|
41
51
|
// Returns the line and column of the given offset, assuming we don't have any
|
42
52
|
// information about the previous index that we found.
|
43
53
|
static yp_line_column_t
|