yarp 0.8.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -1
- data/README.md +4 -3
- data/config.yml +332 -52
- data/docs/configuration.md +1 -0
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +1 -1
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +703 -136
- data/ext/yarp/extension.c +73 -24
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +331 -137
- data/include/yarp/node.h +10 -0
- data/include/yarp/unescape.h +4 -2
- data/include/yarp/util/yp_newline_list.h +3 -0
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +10 -0
- data/lib/yarp/desugar_visitor.rb +267 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3042 -508
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +48 -2
- data/src/node.c +421 -185
- data/src/prettyprint.c +262 -80
- data/src/serialize.c +410 -270
- data/src/token_type.c +2 -2
- data/src/unescape.c +69 -51
- data/src/util/yp_newline_list.c +10 -0
- data/src/yarp.c +1208 -458
- data/yarp.gemspec +3 -1
- metadata +4 -2
data/src/token_type.c
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
/******************************************************************************/
|
2
|
-
/* This file is generated by the
|
3
|
-
/* modified manually. See
|
2
|
+
/* This file is generated by the templates/template.rb script and should not */
|
3
|
+
/* be modified manually. See */
|
4
4
|
/* templates/src/token_type.c.erb */
|
5
5
|
/* if you are looking to modify the */
|
6
6
|
/* template */
|
data/src/unescape.c
CHANGED
@@ -14,6 +14,20 @@ yp_char_is_hexadecimal_digits(const char *c, size_t length) {
|
|
14
14
|
return true;
|
15
15
|
}
|
16
16
|
|
17
|
+
// We don't call the char_width function unless we have to because it's
|
18
|
+
// expensive to go through the indirection of the function pointer. Instead we
|
19
|
+
// provide a fast path that will check if we can just return 1.
|
20
|
+
static inline size_t
|
21
|
+
yp_char_width(yp_parser_t *parser, const char *start, const char *end) {
|
22
|
+
const unsigned char *uc = (const unsigned char *) start;
|
23
|
+
|
24
|
+
if (parser->encoding_changed || (*uc >= 0x80)) {
|
25
|
+
return parser->encoding.char_width(start, end - start);
|
26
|
+
} else {
|
27
|
+
return 1;
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
17
31
|
/******************************************************************************/
|
18
32
|
/* Lookup tables for characters */
|
19
33
|
/******************************************************************************/
|
@@ -178,7 +192,7 @@ unescape_char(const unsigned char value, const unsigned char flags) {
|
|
178
192
|
|
179
193
|
// Read a specific escape sequence into the given destination.
|
180
194
|
static const char *
|
181
|
-
unescape(char *dest, size_t *dest_length, const char *backslash, const char *end,
|
195
|
+
unescape(yp_parser_t *parser, char *dest, size_t *dest_length, const char *backslash, const char *end, const unsigned char flags, bool write_to_str) {
|
182
196
|
switch (backslash[1]) {
|
183
197
|
case 'a':
|
184
198
|
case 'b':
|
@@ -218,7 +232,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
218
232
|
// \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
|
219
233
|
case 'u': {
|
220
234
|
if ((flags & YP_UNESCAPE_FLAG_CONTROL) | (flags & YP_UNESCAPE_FLAG_META)) {
|
221
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Unicode escape sequence cannot be used with control or meta flags.");
|
235
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Unicode escape sequence cannot be used with control or meta flags.");
|
222
236
|
return backslash + 2;
|
223
237
|
}
|
224
238
|
|
@@ -235,11 +249,11 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
235
249
|
|
236
250
|
// \u{nnnn} character literal allows only 1-6 hexadecimal digits
|
237
251
|
if (hexadecimal_length > 6)
|
238
|
-
yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "invalid Unicode escape.");
|
252
|
+
yp_diagnostic_list_append(&parser->error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "invalid Unicode escape.");
|
239
253
|
|
240
254
|
// there are not hexadecimal characters
|
241
255
|
if (hexadecimal_length == 0) {
|
242
|
-
yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "unterminated Unicode escape");
|
256
|
+
yp_diagnostic_list_append(&parser->error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "unterminated Unicode escape");
|
243
257
|
return unicode_cursor;
|
244
258
|
}
|
245
259
|
|
@@ -252,7 +266,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
252
266
|
uint32_t value;
|
253
267
|
unescape_unicode(unicode_start, (size_t) (unicode_cursor - unicode_start), &value);
|
254
268
|
if (write_to_str) {
|
255
|
-
*dest_length += unescape_unicode_write(dest + *dest_length, value, unicode_start, unicode_cursor, error_list);
|
269
|
+
*dest_length += unescape_unicode_write(dest + *dest_length, value, unicode_start, unicode_cursor, &parser->error_list);
|
256
270
|
}
|
257
271
|
|
258
272
|
unicode_cursor += yp_strspn_whitespace(unicode_cursor, end - unicode_cursor);
|
@@ -260,7 +274,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
260
274
|
|
261
275
|
// ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
|
262
276
|
if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1)
|
263
|
-
yp_diagnostic_list_append(error_list, extra_codepoints_start, unicode_cursor - 1, "Multiple codepoints at single character literal");
|
277
|
+
yp_diagnostic_list_append(&parser->error_list, extra_codepoints_start, unicode_cursor - 1, "Multiple codepoints at single character literal");
|
264
278
|
|
265
279
|
return unicode_cursor + 1;
|
266
280
|
}
|
@@ -270,12 +284,12 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
270
284
|
unescape_unicode(backslash + 2, 4, &value);
|
271
285
|
|
272
286
|
if (write_to_str) {
|
273
|
-
*dest_length += unescape_unicode_write(dest + *dest_length, value, backslash + 2, backslash + 6, error_list);
|
287
|
+
*dest_length += unescape_unicode_write(dest + *dest_length, value, backslash + 2, backslash + 6, &parser->error_list);
|
274
288
|
}
|
275
289
|
return backslash + 6;
|
276
290
|
}
|
277
291
|
|
278
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid Unicode escape sequence");
|
292
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid Unicode escape sequence");
|
279
293
|
return backslash + 2;
|
280
294
|
}
|
281
295
|
// \c\M-x meta control character, where x is an ASCII printable character
|
@@ -283,18 +297,18 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
283
297
|
// \cx control character, where x is an ASCII printable character
|
284
298
|
case 'c':
|
285
299
|
if (backslash + 2 >= end) {
|
286
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
300
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
287
301
|
return end;
|
288
302
|
}
|
289
303
|
|
290
304
|
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
|
291
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
|
305
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
|
292
306
|
return backslash + 2;
|
293
307
|
}
|
294
308
|
|
295
309
|
switch (backslash[2]) {
|
296
310
|
case '\\':
|
297
|
-
return unescape(dest, dest_length, backslash + 2, end,
|
311
|
+
return unescape(parser, dest, dest_length, backslash + 2, end, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
|
298
312
|
case '?':
|
299
313
|
if (write_to_str) {
|
300
314
|
dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
|
@@ -302,7 +316,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
302
316
|
return backslash + 3;
|
303
317
|
default: {
|
304
318
|
if (!char_is_ascii_printable(backslash[2])) {
|
305
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
319
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
306
320
|
return backslash + 2;
|
307
321
|
}
|
308
322
|
|
@@ -316,23 +330,23 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
316
330
|
// \C-? delete, ASCII 7Fh (DEL)
|
317
331
|
case 'C':
|
318
332
|
if (backslash + 3 >= end) {
|
319
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
333
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
320
334
|
return end;
|
321
335
|
}
|
322
336
|
|
323
337
|
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
|
324
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
|
338
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
|
325
339
|
return backslash + 2;
|
326
340
|
}
|
327
341
|
|
328
342
|
if (backslash[2] != '-') {
|
329
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
343
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
330
344
|
return backslash + 2;
|
331
345
|
}
|
332
346
|
|
333
347
|
switch (backslash[3]) {
|
334
348
|
case '\\':
|
335
|
-
return unescape(dest, dest_length, backslash + 3, end,
|
349
|
+
return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
|
336
350
|
case '?':
|
337
351
|
if (write_to_str) {
|
338
352
|
dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
|
@@ -340,7 +354,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
340
354
|
return backslash + 4;
|
341
355
|
default:
|
342
356
|
if (!char_is_ascii_printable(backslash[3])) {
|
343
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid control escape sequence");
|
357
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid control escape sequence");
|
344
358
|
return backslash + 2;
|
345
359
|
}
|
346
360
|
|
@@ -354,22 +368,22 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
354
368
|
// \M-x meta character, where x is an ASCII printable character
|
355
369
|
case 'M': {
|
356
370
|
if (backslash + 3 >= end) {
|
357
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
371
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
358
372
|
return end;
|
359
373
|
}
|
360
374
|
|
361
375
|
if (flags & YP_UNESCAPE_FLAG_META) {
|
362
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Meta escape sequence cannot be doubled.");
|
376
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Meta escape sequence cannot be doubled.");
|
363
377
|
return backslash + 2;
|
364
378
|
}
|
365
379
|
|
366
380
|
if (backslash[2] != '-') {
|
367
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid meta escape sequence");
|
381
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid meta escape sequence");
|
368
382
|
return backslash + 2;
|
369
383
|
}
|
370
384
|
|
371
385
|
if (backslash[3] == '\\') {
|
372
|
-
return unescape(dest, dest_length, backslash + 3, end,
|
386
|
+
return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_META, write_to_str);
|
373
387
|
}
|
374
388
|
|
375
389
|
if (char_is_ascii_printable(backslash[3])) {
|
@@ -379,7 +393,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
379
393
|
return backslash + 4;
|
380
394
|
}
|
381
395
|
|
382
|
-
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid meta escape sequence");
|
396
|
+
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid meta escape sequence");
|
383
397
|
return backslash + 3;
|
384
398
|
}
|
385
399
|
// \n
|
@@ -390,14 +404,17 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
390
404
|
if (backslash + 2 < end && backslash[2] == '\n') {
|
391
405
|
return backslash + 3;
|
392
406
|
}
|
393
|
-
|
394
|
-
/* fallthrough */
|
407
|
+
/* fallthrough */
|
395
408
|
// In this case we're escaping something that doesn't need escaping.
|
396
409
|
default: {
|
410
|
+
size_t width = yp_char_width(parser, backslash + 1, end);
|
411
|
+
|
397
412
|
if (write_to_str) {
|
398
|
-
dest
|
413
|
+
memcpy(dest + *dest_length, backslash + 1, width);
|
414
|
+
*dest_length += width;
|
399
415
|
}
|
400
|
-
|
416
|
+
|
417
|
+
return backslash + 1 + width;
|
401
418
|
}
|
402
419
|
}
|
403
420
|
}
|
@@ -431,7 +448,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
|
431
448
|
// \c? or \C-? delete, ASCII 7Fh (DEL)
|
432
449
|
//
|
433
450
|
YP_EXPORTED_FUNCTION void
|
434
|
-
yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type
|
451
|
+
yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type) {
|
435
452
|
if (unescape_type == YP_UNESCAPE_NONE) {
|
436
453
|
// If we're not unescaping then we can reference the source directly.
|
437
454
|
return;
|
@@ -448,7 +465,7 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
|
|
448
465
|
// within the string.
|
449
466
|
char *allocated = malloc(string->length);
|
450
467
|
if (allocated == NULL) {
|
451
|
-
yp_diagnostic_list_append(error_list, string->source, string->source + string->length, "Failed to allocate memory for unescaping.");
|
468
|
+
yp_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length, "Failed to allocate memory for unescaping.");
|
452
469
|
return;
|
453
470
|
}
|
454
471
|
|
@@ -493,7 +510,7 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
|
|
493
510
|
// This is the only type of unescaping left. In this case we need to
|
494
511
|
// handle all of the different unescapes.
|
495
512
|
assert(unescape_type == YP_UNESCAPE_ALL);
|
496
|
-
cursor = unescape(dest, &dest_length, backslash, end,
|
513
|
+
cursor = unescape(parser, dest, &dest_length, backslash, end, YP_UNESCAPE_FLAG_NONE, true);
|
497
514
|
break;
|
498
515
|
}
|
499
516
|
|
@@ -521,29 +538,11 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
|
|
521
538
|
yp_string_owned_init(string, allocated, dest_length + ((size_t) (end - cursor)));
|
522
539
|
}
|
523
540
|
|
524
|
-
YP_EXPORTED_FUNCTION bool
|
525
|
-
yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
|
526
|
-
bool success;
|
527
|
-
|
528
|
-
yp_parser_t parser;
|
529
|
-
yp_parser_init(&parser, start, length, "");
|
530
|
-
|
531
|
-
yp_list_t error_list = YP_LIST_EMPTY;
|
532
|
-
yp_string_shared_init(result, start, start + length);
|
533
|
-
yp_unescape_manipulate_string(&parser, result, unescape_type, &error_list);
|
534
|
-
success = yp_list_empty_p(&error_list);
|
535
|
-
|
536
|
-
yp_list_free(&error_list);
|
537
|
-
yp_parser_free(&parser);
|
538
|
-
|
539
|
-
return success;
|
540
|
-
}
|
541
|
-
|
542
541
|
// This function is similar to yp_unescape_manipulate_string, except it doesn't
|
543
542
|
// actually perform any string manipulations. Instead, it calculates how long
|
544
543
|
// the unescaped character is, and returns that value
|
545
|
-
|
546
|
-
yp_unescape_calculate_difference(
|
544
|
+
size_t
|
545
|
+
yp_unescape_calculate_difference(yp_parser_t *parser, const char *backslash, yp_unescape_type_t unescape_type, bool expect_single_codepoint) {
|
547
546
|
assert(unescape_type != YP_UNESCAPE_NONE);
|
548
547
|
|
549
548
|
switch (backslash[1]) {
|
@@ -551,7 +550,9 @@ yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unes
|
|
551
550
|
case '\'':
|
552
551
|
return 2;
|
553
552
|
default: {
|
554
|
-
if (unescape_type == YP_UNESCAPE_MINIMAL)
|
553
|
+
if (unescape_type == YP_UNESCAPE_MINIMAL) {
|
554
|
+
return 1 + yp_char_width(parser, backslash + 1, parser->end);
|
555
|
+
}
|
555
556
|
|
556
557
|
// This is the only type of unescaping left. In this case we need to
|
557
558
|
// handle all of the different unescapes.
|
@@ -561,10 +562,27 @@ yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unes
|
|
561
562
|
if (expect_single_codepoint)
|
562
563
|
flags |= YP_UNESCAPE_FLAG_EXPECT_SINGLE;
|
563
564
|
|
564
|
-
const char *cursor = unescape(NULL, 0, backslash, end,
|
565
|
+
const char *cursor = unescape(parser, NULL, 0, backslash, parser->end, flags, false);
|
565
566
|
assert(cursor > backslash);
|
566
567
|
|
567
568
|
return (size_t) (cursor - backslash);
|
568
569
|
}
|
569
570
|
}
|
570
571
|
}
|
572
|
+
|
573
|
+
// This is one of the main entry points into the extension. It accepts a source
|
574
|
+
// string, a type of unescaping, and a pointer to a result string. It returns a
|
575
|
+
// boolean indicating whether or not the unescaping was successful.
|
576
|
+
YP_EXPORTED_FUNCTION bool
|
577
|
+
yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
|
578
|
+
yp_parser_t parser;
|
579
|
+
yp_parser_init(&parser, start, length, NULL);
|
580
|
+
|
581
|
+
yp_string_shared_init(result, start, start + length);
|
582
|
+
yp_unescape_manipulate_string(&parser, result, unescape_type);
|
583
|
+
|
584
|
+
bool success = yp_list_empty_p(&parser.error_list);
|
585
|
+
yp_parser_free(&parser);
|
586
|
+
|
587
|
+
return success;
|
588
|
+
}
|
data/src/util/yp_newline_list.c
CHANGED
@@ -30,6 +30,7 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
|
|
30
30
|
if (list->offsets == NULL) return false;
|
31
31
|
}
|
32
32
|
|
33
|
+
assert(*cursor == '\n');
|
33
34
|
assert(cursor >= list->start);
|
34
35
|
size_t newline_offset = (size_t) (cursor - list->start + 1);
|
35
36
|
assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
|
@@ -38,6 +39,15 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
|
|
38
39
|
return true;
|
39
40
|
}
|
40
41
|
|
42
|
+
// Conditionally append a new offset to the newline list, if the value passed in is a newline.
|
43
|
+
bool
|
44
|
+
yp_newline_list_check_append(yp_newline_list_t *list, const char *cursor) {
|
45
|
+
if (*cursor != '\n') {
|
46
|
+
return true;
|
47
|
+
}
|
48
|
+
return yp_newline_list_append(list, cursor);
|
49
|
+
}
|
50
|
+
|
41
51
|
// Returns the line and column of the given offset, assuming we don't have any
|
42
52
|
// information about the previous index that we found.
|
43
53
|
static yp_line_column_t
|