yarp 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/src/token_type.c CHANGED
@@ -1,6 +1,6 @@
1
1
  /******************************************************************************/
2
- /* This file is generated by the bin/template script and should not be */
3
- /* modified manually. See */
2
+ /* This file is generated by the templates/template.rb script and should not */
3
+ /* be modified manually. See */
4
4
  /* templates/src/token_type.c.erb */
5
5
  /* if you are looking to modify the */
6
6
  /* template */
data/src/unescape.c CHANGED
@@ -14,6 +14,20 @@ yp_char_is_hexadecimal_digits(const char *c, size_t length) {
14
14
  return true;
15
15
  }
16
16
 
17
+ // We don't call the char_width function unless we have to because it's
18
+ // expensive to go through the indirection of the function pointer. Instead we
19
+ // provide a fast path that will check if we can just return 1.
20
+ static inline size_t
21
+ yp_char_width(yp_parser_t *parser, const char *start, const char *end) {
22
+ const unsigned char *uc = (const unsigned char *) start;
23
+
24
+ if (parser->encoding_changed || (*uc >= 0x80)) {
25
+ return parser->encoding.char_width(start, end - start);
26
+ } else {
27
+ return 1;
28
+ }
29
+ }
30
+
17
31
  /******************************************************************************/
18
32
  /* Lookup tables for characters */
19
33
  /******************************************************************************/
@@ -178,7 +192,7 @@ unescape_char(const unsigned char value, const unsigned char flags) {
178
192
 
179
193
  // Read a specific escape sequence into the given destination.
180
194
  static const char *
181
- unescape(char *dest, size_t *dest_length, const char *backslash, const char *end, yp_list_t *error_list, const unsigned char flags, bool write_to_str) {
195
+ unescape(yp_parser_t *parser, char *dest, size_t *dest_length, const char *backslash, const char *end, const unsigned char flags, bool write_to_str) {
182
196
  switch (backslash[1]) {
183
197
  case 'a':
184
198
  case 'b':
@@ -218,7 +232,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
218
232
  // \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
219
233
  case 'u': {
220
234
  if ((flags & YP_UNESCAPE_FLAG_CONTROL) | (flags & YP_UNESCAPE_FLAG_META)) {
221
- yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Unicode escape sequence cannot be used with control or meta flags.");
235
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Unicode escape sequence cannot be used with control or meta flags.");
222
236
  return backslash + 2;
223
237
  }
224
238
 
@@ -235,11 +249,11 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
235
249
 
236
250
  // \u{nnnn} character literal allows only 1-6 hexadecimal digits
237
251
  if (hexadecimal_length > 6)
238
- yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "invalid Unicode escape.");
252
+ yp_diagnostic_list_append(&parser->error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "invalid Unicode escape.");
239
253
 
240
254
  // there are not hexadecimal characters
241
255
  if (hexadecimal_length == 0) {
242
- yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "unterminated Unicode escape");
256
+ yp_diagnostic_list_append(&parser->error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "unterminated Unicode escape");
243
257
  return unicode_cursor;
244
258
  }
245
259
 
@@ -252,7 +266,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
252
266
  uint32_t value;
253
267
  unescape_unicode(unicode_start, (size_t) (unicode_cursor - unicode_start), &value);
254
268
  if (write_to_str) {
255
- *dest_length += unescape_unicode_write(dest + *dest_length, value, unicode_start, unicode_cursor, error_list);
269
+ *dest_length += unescape_unicode_write(dest + *dest_length, value, unicode_start, unicode_cursor, &parser->error_list);
256
270
  }
257
271
 
258
272
  unicode_cursor += yp_strspn_whitespace(unicode_cursor, end - unicode_cursor);
@@ -260,7 +274,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
260
274
 
261
275
  // ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
262
276
  if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1)
263
- yp_diagnostic_list_append(error_list, extra_codepoints_start, unicode_cursor - 1, "Multiple codepoints at single character literal");
277
+ yp_diagnostic_list_append(&parser->error_list, extra_codepoints_start, unicode_cursor - 1, "Multiple codepoints at single character literal");
264
278
 
265
279
  return unicode_cursor + 1;
266
280
  }
@@ -270,12 +284,12 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
270
284
  unescape_unicode(backslash + 2, 4, &value);
271
285
 
272
286
  if (write_to_str) {
273
- *dest_length += unescape_unicode_write(dest + *dest_length, value, backslash + 2, backslash + 6, error_list);
287
+ *dest_length += unescape_unicode_write(dest + *dest_length, value, backslash + 2, backslash + 6, &parser->error_list);
274
288
  }
275
289
  return backslash + 6;
276
290
  }
277
291
 
278
- yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid Unicode escape sequence");
292
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid Unicode escape sequence");
279
293
  return backslash + 2;
280
294
  }
281
295
  // \c\M-x meta control character, where x is an ASCII printable character
@@ -283,18 +297,18 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
283
297
  // \cx control character, where x is an ASCII printable character
284
298
  case 'c':
285
299
  if (backslash + 2 >= end) {
286
- yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
300
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
287
301
  return end;
288
302
  }
289
303
 
290
304
  if (flags & YP_UNESCAPE_FLAG_CONTROL) {
291
- yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
305
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
292
306
  return backslash + 2;
293
307
  }
294
308
 
295
309
  switch (backslash[2]) {
296
310
  case '\\':
297
- return unescape(dest, dest_length, backslash + 2, end, error_list, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
311
+ return unescape(parser, dest, dest_length, backslash + 2, end, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
298
312
  case '?':
299
313
  if (write_to_str) {
300
314
  dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
@@ -302,7 +316,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
302
316
  return backslash + 3;
303
317
  default: {
304
318
  if (!char_is_ascii_printable(backslash[2])) {
305
- yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
319
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
306
320
  return backslash + 2;
307
321
  }
308
322
 
@@ -316,23 +330,23 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
316
330
  // \C-? delete, ASCII 7Fh (DEL)
317
331
  case 'C':
318
332
  if (backslash + 3 >= end) {
319
- yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
333
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
320
334
  return end;
321
335
  }
322
336
 
323
337
  if (flags & YP_UNESCAPE_FLAG_CONTROL) {
324
- yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
338
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
325
339
  return backslash + 2;
326
340
  }
327
341
 
328
342
  if (backslash[2] != '-') {
329
- yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
343
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
330
344
  return backslash + 2;
331
345
  }
332
346
 
333
347
  switch (backslash[3]) {
334
348
  case '\\':
335
- return unescape(dest, dest_length, backslash + 3, end, error_list, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
349
+ return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
336
350
  case '?':
337
351
  if (write_to_str) {
338
352
  dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
@@ -340,7 +354,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
340
354
  return backslash + 4;
341
355
  default:
342
356
  if (!char_is_ascii_printable(backslash[3])) {
343
- yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid control escape sequence");
357
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid control escape sequence");
344
358
  return backslash + 2;
345
359
  }
346
360
 
@@ -354,22 +368,22 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
354
368
  // \M-x meta character, where x is an ASCII printable character
355
369
  case 'M': {
356
370
  if (backslash + 3 >= end) {
357
- yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
371
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
358
372
  return end;
359
373
  }
360
374
 
361
375
  if (flags & YP_UNESCAPE_FLAG_META) {
362
- yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Meta escape sequence cannot be doubled.");
376
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Meta escape sequence cannot be doubled.");
363
377
  return backslash + 2;
364
378
  }
365
379
 
366
380
  if (backslash[2] != '-') {
367
- yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid meta escape sequence");
381
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid meta escape sequence");
368
382
  return backslash + 2;
369
383
  }
370
384
 
371
385
  if (backslash[3] == '\\') {
372
- return unescape(dest, dest_length, backslash + 3, end, error_list, flags | YP_UNESCAPE_FLAG_META, write_to_str);
386
+ return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_META, write_to_str);
373
387
  }
374
388
 
375
389
  if (char_is_ascii_printable(backslash[3])) {
@@ -379,7 +393,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
379
393
  return backslash + 4;
380
394
  }
381
395
 
382
- yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid meta escape sequence");
396
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid meta escape sequence");
383
397
  return backslash + 3;
384
398
  }
385
399
  // \n
@@ -390,14 +404,17 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
390
404
  if (backslash + 2 < end && backslash[2] == '\n') {
391
405
  return backslash + 3;
392
406
  }
393
-
394
- /* fallthrough */
407
+ /* fallthrough */
395
408
  // In this case we're escaping something that doesn't need escaping.
396
409
  default: {
410
+ size_t width = yp_char_width(parser, backslash + 1, end);
411
+
397
412
  if (write_to_str) {
398
- dest[(*dest_length)++] = backslash[1];
413
+ memcpy(dest + *dest_length, backslash + 1, width);
414
+ *dest_length += width;
399
415
  }
400
- return backslash + 2;
416
+
417
+ return backslash + 1 + width;
401
418
  }
402
419
  }
403
420
  }
@@ -431,7 +448,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
431
448
  // \c? or \C-? delete, ASCII 7Fh (DEL)
432
449
  //
433
450
  YP_EXPORTED_FUNCTION void
434
- yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list) {
451
+ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type) {
435
452
  if (unescape_type == YP_UNESCAPE_NONE) {
436
453
  // If we're not unescaping then we can reference the source directly.
437
454
  return;
@@ -448,7 +465,7 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
448
465
  // within the string.
449
466
  char *allocated = malloc(string->length);
450
467
  if (allocated == NULL) {
451
- yp_diagnostic_list_append(error_list, string->source, string->source + string->length, "Failed to allocate memory for unescaping.");
468
+ yp_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length, "Failed to allocate memory for unescaping.");
452
469
  return;
453
470
  }
454
471
 
@@ -493,7 +510,7 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
493
510
  // This is the only type of unescaping left. In this case we need to
494
511
  // handle all of the different unescapes.
495
512
  assert(unescape_type == YP_UNESCAPE_ALL);
496
- cursor = unescape(dest, &dest_length, backslash, end, error_list, YP_UNESCAPE_FLAG_NONE, true);
513
+ cursor = unescape(parser, dest, &dest_length, backslash, end, YP_UNESCAPE_FLAG_NONE, true);
497
514
  break;
498
515
  }
499
516
 
@@ -521,29 +538,11 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
521
538
  yp_string_owned_init(string, allocated, dest_length + ((size_t) (end - cursor)));
522
539
  }
523
540
 
524
- YP_EXPORTED_FUNCTION bool
525
- yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
526
- bool success;
527
-
528
- yp_parser_t parser;
529
- yp_parser_init(&parser, start, length, "");
530
-
531
- yp_list_t error_list = YP_LIST_EMPTY;
532
- yp_string_shared_init(result, start, start + length);
533
- yp_unescape_manipulate_string(&parser, result, unescape_type, &error_list);
534
- success = yp_list_empty_p(&error_list);
535
-
536
- yp_list_free(&error_list);
537
- yp_parser_free(&parser);
538
-
539
- return success;
540
- }
541
-
542
541
  // This function is similar to yp_unescape_manipulate_string, except it doesn't
543
542
  // actually perform any string manipulations. Instead, it calculates how long
544
543
  // the unescaped character is, and returns that value
545
- YP_EXPORTED_FUNCTION size_t
546
- yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unescape_type_t unescape_type, bool expect_single_codepoint, yp_list_t *error_list) {
544
+ size_t
545
+ yp_unescape_calculate_difference(yp_parser_t *parser, const char *backslash, yp_unescape_type_t unescape_type, bool expect_single_codepoint) {
547
546
  assert(unescape_type != YP_UNESCAPE_NONE);
548
547
 
549
548
  switch (backslash[1]) {
@@ -551,7 +550,9 @@ yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unes
551
550
  case '\'':
552
551
  return 2;
553
552
  default: {
554
- if (unescape_type == YP_UNESCAPE_MINIMAL) return 2;
553
+ if (unescape_type == YP_UNESCAPE_MINIMAL) {
554
+ return 1 + yp_char_width(parser, backslash + 1, parser->end);
555
+ }
555
556
 
556
557
  // This is the only type of unescaping left. In this case we need to
557
558
  // handle all of the different unescapes.
@@ -561,10 +562,27 @@ yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unes
561
562
  if (expect_single_codepoint)
562
563
  flags |= YP_UNESCAPE_FLAG_EXPECT_SINGLE;
563
564
 
564
- const char *cursor = unescape(NULL, 0, backslash, end, error_list, flags, false);
565
+ const char *cursor = unescape(parser, NULL, 0, backslash, parser->end, flags, false);
565
566
  assert(cursor > backslash);
566
567
 
567
568
  return (size_t) (cursor - backslash);
568
569
  }
569
570
  }
570
571
  }
572
+
573
+ // This is one of the main entry points into the extension. It accepts a source
574
+ // string, a type of unescaping, and a pointer to a result string. It returns a
575
+ // boolean indicating whether or not the unescaping was successful.
576
+ YP_EXPORTED_FUNCTION bool
577
+ yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
578
+ yp_parser_t parser;
579
+ yp_parser_init(&parser, start, length, NULL);
580
+
581
+ yp_string_shared_init(result, start, start + length);
582
+ yp_unescape_manipulate_string(&parser, result, unescape_type);
583
+
584
+ bool success = yp_list_empty_p(&parser.error_list);
585
+ yp_parser_free(&parser);
586
+
587
+ return success;
588
+ }
@@ -30,6 +30,7 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
30
30
  if (list->offsets == NULL) return false;
31
31
  }
32
32
 
33
+ assert(*cursor == '\n');
33
34
  assert(cursor >= list->start);
34
35
  size_t newline_offset = (size_t) (cursor - list->start + 1);
35
36
  assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
@@ -38,6 +39,15 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
38
39
  return true;
39
40
  }
40
41
 
42
+ // Conditionally append a new offset to the newline list, if the value passed in is a newline.
43
+ bool
44
+ yp_newline_list_check_append(yp_newline_list_t *list, const char *cursor) {
45
+ if (*cursor != '\n') {
46
+ return true;
47
+ }
48
+ return yp_newline_list_append(list, cursor);
49
+ }
50
+
41
51
  // Returns the line and column of the given offset, assuming we don't have any
42
52
  // information about the previous index that we found.
43
53
  static yp_line_column_t