yarp 0.8.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data/src/token_type.c CHANGED
@@ -1,6 +1,6 @@
1
1
  /******************************************************************************/
2
- /* This file is generated by the bin/template script and should not be */
3
- /* modified manually. See */
2
+ /* This file is generated by the templates/template.rb script and should not */
3
+ /* be modified manually. See */
4
4
  /* templates/src/token_type.c.erb */
5
5
  /* if you are looking to modify the */
6
6
  /* template */
data/src/unescape.c CHANGED
@@ -14,6 +14,20 @@ yp_char_is_hexadecimal_digits(const char *c, size_t length) {
14
14
  return true;
15
15
  }
16
16
 
17
+ // We don't call the char_width function unless we have to because it's
18
+ // expensive to go through the indirection of the function pointer. Instead we
19
+ // provide a fast path that will check if we can just return 1.
20
+ static inline size_t
21
+ yp_char_width(yp_parser_t *parser, const char *start, const char *end) {
22
+ const unsigned char *uc = (const unsigned char *) start;
23
+
24
+ if (parser->encoding_changed || (*uc >= 0x80)) {
25
+ return parser->encoding.char_width(start, end - start);
26
+ } else {
27
+ return 1;
28
+ }
29
+ }
30
+
17
31
  /******************************************************************************/
18
32
  /* Lookup tables for characters */
19
33
  /******************************************************************************/
@@ -178,7 +192,7 @@ unescape_char(const unsigned char value, const unsigned char flags) {
178
192
 
179
193
  // Read a specific escape sequence into the given destination.
180
194
  static const char *
181
- unescape(char *dest, size_t *dest_length, const char *backslash, const char *end, yp_list_t *error_list, const unsigned char flags, bool write_to_str) {
195
+ unescape(yp_parser_t *parser, char *dest, size_t *dest_length, const char *backslash, const char *end, const unsigned char flags, bool write_to_str) {
182
196
  switch (backslash[1]) {
183
197
  case 'a':
184
198
  case 'b':
@@ -218,7 +232,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
218
232
  // \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
219
233
  case 'u': {
220
234
  if ((flags & YP_UNESCAPE_FLAG_CONTROL) | (flags & YP_UNESCAPE_FLAG_META)) {
221
- yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Unicode escape sequence cannot be used with control or meta flags.");
235
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Unicode escape sequence cannot be used with control or meta flags.");
222
236
  return backslash + 2;
223
237
  }
224
238
 
@@ -235,11 +249,11 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
235
249
 
236
250
  // \u{nnnn} character literal allows only 1-6 hexadecimal digits
237
251
  if (hexadecimal_length > 6)
238
- yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "invalid Unicode escape.");
252
+ yp_diagnostic_list_append(&parser->error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "invalid Unicode escape.");
239
253
 
240
254
  // there are not hexadecimal characters
241
255
  if (hexadecimal_length == 0) {
242
- yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "unterminated Unicode escape");
256
+ yp_diagnostic_list_append(&parser->error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "unterminated Unicode escape");
243
257
  return unicode_cursor;
244
258
  }
245
259
 
@@ -252,7 +266,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
252
266
  uint32_t value;
253
267
  unescape_unicode(unicode_start, (size_t) (unicode_cursor - unicode_start), &value);
254
268
  if (write_to_str) {
255
- *dest_length += unescape_unicode_write(dest + *dest_length, value, unicode_start, unicode_cursor, error_list);
269
+ *dest_length += unescape_unicode_write(dest + *dest_length, value, unicode_start, unicode_cursor, &parser->error_list);
256
270
  }
257
271
 
258
272
  unicode_cursor += yp_strspn_whitespace(unicode_cursor, end - unicode_cursor);
@@ -260,7 +274,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
260
274
 
261
275
  // ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
262
276
  if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1)
263
- yp_diagnostic_list_append(error_list, extra_codepoints_start, unicode_cursor - 1, "Multiple codepoints at single character literal");
277
+ yp_diagnostic_list_append(&parser->error_list, extra_codepoints_start, unicode_cursor - 1, "Multiple codepoints at single character literal");
264
278
 
265
279
  return unicode_cursor + 1;
266
280
  }
@@ -270,12 +284,12 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
270
284
  unescape_unicode(backslash + 2, 4, &value);
271
285
 
272
286
  if (write_to_str) {
273
- *dest_length += unescape_unicode_write(dest + *dest_length, value, backslash + 2, backslash + 6, error_list);
287
+ *dest_length += unescape_unicode_write(dest + *dest_length, value, backslash + 2, backslash + 6, &parser->error_list);
274
288
  }
275
289
  return backslash + 6;
276
290
  }
277
291
 
278
- yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid Unicode escape sequence");
292
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid Unicode escape sequence");
279
293
  return backslash + 2;
280
294
  }
281
295
  // \c\M-x meta control character, where x is an ASCII printable character
@@ -283,18 +297,18 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
283
297
  // \cx control character, where x is an ASCII printable character
284
298
  case 'c':
285
299
  if (backslash + 2 >= end) {
286
- yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
300
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
287
301
  return end;
288
302
  }
289
303
 
290
304
  if (flags & YP_UNESCAPE_FLAG_CONTROL) {
291
- yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
305
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
292
306
  return backslash + 2;
293
307
  }
294
308
 
295
309
  switch (backslash[2]) {
296
310
  case '\\':
297
- return unescape(dest, dest_length, backslash + 2, end, error_list, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
311
+ return unescape(parser, dest, dest_length, backslash + 2, end, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
298
312
  case '?':
299
313
  if (write_to_str) {
300
314
  dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
@@ -302,7 +316,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
302
316
  return backslash + 3;
303
317
  default: {
304
318
  if (!char_is_ascii_printable(backslash[2])) {
305
- yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
319
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
306
320
  return backslash + 2;
307
321
  }
308
322
 
@@ -316,23 +330,23 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
316
330
  // \C-? delete, ASCII 7Fh (DEL)
317
331
  case 'C':
318
332
  if (backslash + 3 >= end) {
319
- yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
333
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
320
334
  return end;
321
335
  }
322
336
 
323
337
  if (flags & YP_UNESCAPE_FLAG_CONTROL) {
324
- yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
338
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
325
339
  return backslash + 2;
326
340
  }
327
341
 
328
342
  if (backslash[2] != '-') {
329
- yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
343
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
330
344
  return backslash + 2;
331
345
  }
332
346
 
333
347
  switch (backslash[3]) {
334
348
  case '\\':
335
- return unescape(dest, dest_length, backslash + 3, end, error_list, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
349
+ return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
336
350
  case '?':
337
351
  if (write_to_str) {
338
352
  dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
@@ -340,7 +354,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
340
354
  return backslash + 4;
341
355
  default:
342
356
  if (!char_is_ascii_printable(backslash[3])) {
343
- yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid control escape sequence");
357
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid control escape sequence");
344
358
  return backslash + 2;
345
359
  }
346
360
 
@@ -354,22 +368,22 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
354
368
  // \M-x meta character, where x is an ASCII printable character
355
369
  case 'M': {
356
370
  if (backslash + 3 >= end) {
357
- yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
371
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
358
372
  return end;
359
373
  }
360
374
 
361
375
  if (flags & YP_UNESCAPE_FLAG_META) {
362
- yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Meta escape sequence cannot be doubled.");
376
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Meta escape sequence cannot be doubled.");
363
377
  return backslash + 2;
364
378
  }
365
379
 
366
380
  if (backslash[2] != '-') {
367
- yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid meta escape sequence");
381
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid meta escape sequence");
368
382
  return backslash + 2;
369
383
  }
370
384
 
371
385
  if (backslash[3] == '\\') {
372
- return unescape(dest, dest_length, backslash + 3, end, error_list, flags | YP_UNESCAPE_FLAG_META, write_to_str);
386
+ return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_META, write_to_str);
373
387
  }
374
388
 
375
389
  if (char_is_ascii_printable(backslash[3])) {
@@ -379,7 +393,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
379
393
  return backslash + 4;
380
394
  }
381
395
 
382
- yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid meta escape sequence");
396
+ yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid meta escape sequence");
383
397
  return backslash + 3;
384
398
  }
385
399
  // \n
@@ -390,14 +404,17 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
390
404
  if (backslash + 2 < end && backslash[2] == '\n') {
391
405
  return backslash + 3;
392
406
  }
393
-
394
- /* fallthrough */
407
+ /* fallthrough */
395
408
  // In this case we're escaping something that doesn't need escaping.
396
409
  default: {
410
+ size_t width = yp_char_width(parser, backslash + 1, end);
411
+
397
412
  if (write_to_str) {
398
- dest[(*dest_length)++] = backslash[1];
413
+ memcpy(dest + *dest_length, backslash + 1, width);
414
+ *dest_length += width;
399
415
  }
400
- return backslash + 2;
416
+
417
+ return backslash + 1 + width;
401
418
  }
402
419
  }
403
420
  }
@@ -431,7 +448,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
431
448
  // \c? or \C-? delete, ASCII 7Fh (DEL)
432
449
  //
433
450
  YP_EXPORTED_FUNCTION void
434
- yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list) {
451
+ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type) {
435
452
  if (unescape_type == YP_UNESCAPE_NONE) {
436
453
  // If we're not unescaping then we can reference the source directly.
437
454
  return;
@@ -448,7 +465,7 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
448
465
  // within the string.
449
466
  char *allocated = malloc(string->length);
450
467
  if (allocated == NULL) {
451
- yp_diagnostic_list_append(error_list, string->source, string->source + string->length, "Failed to allocate memory for unescaping.");
468
+ yp_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length, "Failed to allocate memory for unescaping.");
452
469
  return;
453
470
  }
454
471
 
@@ -493,7 +510,7 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
493
510
  // This is the only type of unescaping left. In this case we need to
494
511
  // handle all of the different unescapes.
495
512
  assert(unescape_type == YP_UNESCAPE_ALL);
496
- cursor = unescape(dest, &dest_length, backslash, end, error_list, YP_UNESCAPE_FLAG_NONE, true);
513
+ cursor = unescape(parser, dest, &dest_length, backslash, end, YP_UNESCAPE_FLAG_NONE, true);
497
514
  break;
498
515
  }
499
516
 
@@ -521,29 +538,11 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
521
538
  yp_string_owned_init(string, allocated, dest_length + ((size_t) (end - cursor)));
522
539
  }
523
540
 
524
- YP_EXPORTED_FUNCTION bool
525
- yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
526
- bool success;
527
-
528
- yp_parser_t parser;
529
- yp_parser_init(&parser, start, length, "");
530
-
531
- yp_list_t error_list = YP_LIST_EMPTY;
532
- yp_string_shared_init(result, start, start + length);
533
- yp_unescape_manipulate_string(&parser, result, unescape_type, &error_list);
534
- success = yp_list_empty_p(&error_list);
535
-
536
- yp_list_free(&error_list);
537
- yp_parser_free(&parser);
538
-
539
- return success;
540
- }
541
-
542
541
  // This function is similar to yp_unescape_manipulate_string, except it doesn't
543
542
  // actually perform any string manipulations. Instead, it calculates how long
544
543
  // the unescaped character is, and returns that value
545
- YP_EXPORTED_FUNCTION size_t
546
- yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unescape_type_t unescape_type, bool expect_single_codepoint, yp_list_t *error_list) {
544
+ size_t
545
+ yp_unescape_calculate_difference(yp_parser_t *parser, const char *backslash, yp_unescape_type_t unescape_type, bool expect_single_codepoint) {
547
546
  assert(unescape_type != YP_UNESCAPE_NONE);
548
547
 
549
548
  switch (backslash[1]) {
@@ -551,7 +550,9 @@ yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unes
551
550
  case '\'':
552
551
  return 2;
553
552
  default: {
554
- if (unescape_type == YP_UNESCAPE_MINIMAL) return 2;
553
+ if (unescape_type == YP_UNESCAPE_MINIMAL) {
554
+ return 1 + yp_char_width(parser, backslash + 1, parser->end);
555
+ }
555
556
 
556
557
  // This is the only type of unescaping left. In this case we need to
557
558
  // handle all of the different unescapes.
@@ -561,10 +562,27 @@ yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unes
561
562
  if (expect_single_codepoint)
562
563
  flags |= YP_UNESCAPE_FLAG_EXPECT_SINGLE;
563
564
 
564
- const char *cursor = unescape(NULL, 0, backslash, end, error_list, flags, false);
565
+ const char *cursor = unescape(parser, NULL, 0, backslash, parser->end, flags, false);
565
566
  assert(cursor > backslash);
566
567
 
567
568
  return (size_t) (cursor - backslash);
568
569
  }
569
570
  }
570
571
  }
572
+
573
+ // This is one of the main entry points into the extension. It accepts a source
574
+ // string, a type of unescaping, and a pointer to a result string. It returns a
575
+ // boolean indicating whether or not the unescaping was successful.
576
+ YP_EXPORTED_FUNCTION bool
577
+ yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
578
+ yp_parser_t parser;
579
+ yp_parser_init(&parser, start, length, NULL);
580
+
581
+ yp_string_shared_init(result, start, start + length);
582
+ yp_unescape_manipulate_string(&parser, result, unescape_type);
583
+
584
+ bool success = yp_list_empty_p(&parser.error_list);
585
+ yp_parser_free(&parser);
586
+
587
+ return success;
588
+ }
@@ -30,6 +30,7 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
30
30
  if (list->offsets == NULL) return false;
31
31
  }
32
32
 
33
+ assert(*cursor == '\n');
33
34
  assert(cursor >= list->start);
34
35
  size_t newline_offset = (size_t) (cursor - list->start + 1);
35
36
  assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
@@ -38,6 +39,15 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
38
39
  return true;
39
40
  }
40
41
 
42
+ // Conditionally append a new offset to the newline list, if the value passed in is a newline.
43
+ bool
44
+ yp_newline_list_check_append(yp_newline_list_t *list, const char *cursor) {
45
+ if (*cursor != '\n') {
46
+ return true;
47
+ }
48
+ return yp_newline_list_append(list, cursor);
49
+ }
50
+
41
51
  // Returns the line and column of the given offset, assuming we don't have any
42
52
  // information about the previous index that we found.
43
53
  static yp_line_column_t