yarp 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +29 -8
  3. data/CONTRIBUTING.md +2 -2
  4. data/Makefile +5 -5
  5. data/README.md +11 -12
  6. data/config.yml +6 -2
  7. data/docs/build_system.md +21 -21
  8. data/docs/building.md +4 -4
  9. data/docs/configuration.md +25 -21
  10. data/docs/design.md +2 -2
  11. data/docs/encoding.md +17 -17
  12. data/docs/fuzzing.md +4 -4
  13. data/docs/heredocs.md +3 -3
  14. data/docs/mapping.md +94 -94
  15. data/docs/ripper.md +4 -4
  16. data/docs/ruby_api.md +11 -11
  17. data/docs/serialization.md +17 -16
  18. data/docs/testing.md +6 -6
  19. data/ext/prism/api_node.c +4725 -0
  20. data/ext/{yarp → prism}/api_pack.c +82 -82
  21. data/ext/{yarp → prism}/extconf.rb +13 -13
  22. data/ext/{yarp → prism}/extension.c +175 -168
  23. data/ext/prism/extension.h +18 -0
  24. data/include/prism/ast.h +1932 -0
  25. data/include/prism/defines.h +45 -0
  26. data/include/prism/diagnostic.h +231 -0
  27. data/include/{yarp/enc/yp_encoding.h → prism/enc/pm_encoding.h} +40 -40
  28. data/include/prism/node.h +41 -0
  29. data/include/prism/pack.h +141 -0
  30. data/include/{yarp → prism}/parser.h +143 -142
  31. data/include/prism/regexp.h +19 -0
  32. data/include/prism/unescape.h +48 -0
  33. data/include/prism/util/pm_buffer.h +51 -0
  34. data/include/{yarp/util/yp_char.h → prism/util/pm_char.h} +20 -20
  35. data/include/{yarp/util/yp_constant_pool.h → prism/util/pm_constant_pool.h} +26 -22
  36. data/include/{yarp/util/yp_list.h → prism/util/pm_list.h} +21 -21
  37. data/include/prism/util/pm_memchr.h +14 -0
  38. data/include/{yarp/util/yp_newline_list.h → prism/util/pm_newline_list.h} +11 -11
  39. data/include/prism/util/pm_state_stack.h +24 -0
  40. data/include/{yarp/util/yp_string.h → prism/util/pm_string.h} +20 -20
  41. data/include/prism/util/pm_string_list.h +25 -0
  42. data/include/{yarp/util/yp_strpbrk.h → prism/util/pm_strpbrk.h} +7 -7
  43. data/include/prism/version.h +4 -0
  44. data/include/prism.h +82 -0
  45. data/lib/prism/compiler.rb +465 -0
  46. data/lib/prism/debug.rb +157 -0
  47. data/lib/{yarp/desugar_visitor.rb → prism/desugar_compiler.rb} +4 -2
  48. data/lib/prism/dispatcher.rb +2051 -0
  49. data/lib/prism/dsl.rb +750 -0
  50. data/lib/{yarp → prism}/ffi.rb +66 -67
  51. data/lib/{yarp → prism}/lex_compat.rb +40 -43
  52. data/lib/{yarp/mutation_visitor.rb → prism/mutation_compiler.rb} +3 -3
  53. data/lib/{yarp → prism}/node.rb +2012 -2593
  54. data/lib/prism/node_ext.rb +55 -0
  55. data/lib/prism/node_inspector.rb +68 -0
  56. data/lib/{yarp → prism}/pack.rb +1 -1
  57. data/lib/{yarp → prism}/parse_result/comments.rb +1 -1
  58. data/lib/{yarp → prism}/parse_result/newlines.rb +1 -1
  59. data/lib/prism/parse_result.rb +266 -0
  60. data/lib/{yarp → prism}/pattern.rb +14 -14
  61. data/lib/{yarp → prism}/ripper_compat.rb +5 -5
  62. data/lib/{yarp → prism}/serialize.rb +12 -7
  63. data/lib/prism/visitor.rb +470 -0
  64. data/lib/prism.rb +64 -0
  65. data/lib/yarp.rb +2 -614
  66. data/src/diagnostic.c +213 -208
  67. data/src/enc/pm_big5.c +52 -0
  68. data/src/enc/pm_euc_jp.c +58 -0
  69. data/src/enc/{yp_gbk.c → pm_gbk.c} +16 -16
  70. data/src/enc/pm_shift_jis.c +56 -0
  71. data/src/enc/{yp_tables.c → pm_tables.c} +69 -69
  72. data/src/enc/{yp_unicode.c → pm_unicode.c} +40 -40
  73. data/src/enc/pm_windows_31j.c +56 -0
  74. data/src/node.c +1293 -1233
  75. data/src/pack.c +247 -247
  76. data/src/prettyprint.c +1479 -1479
  77. data/src/{yarp.c → prism.c} +5205 -5083
  78. data/src/regexp.c +132 -132
  79. data/src/serialize.c +1121 -1121
  80. data/src/token_type.c +169 -167
  81. data/src/unescape.c +106 -87
  82. data/src/util/pm_buffer.c +103 -0
  83. data/src/util/{yp_char.c → pm_char.c} +72 -72
  84. data/src/util/{yp_constant_pool.c → pm_constant_pool.c} +85 -64
  85. data/src/util/{yp_list.c → pm_list.c} +10 -10
  86. data/src/util/{yp_memchr.c → pm_memchr.c} +6 -4
  87. data/src/util/{yp_newline_list.c → pm_newline_list.c} +21 -21
  88. data/src/util/{yp_state_stack.c → pm_state_stack.c} +4 -4
  89. data/src/util/{yp_string.c → pm_string.c} +38 -38
  90. data/src/util/pm_string_list.c +29 -0
  91. data/src/util/{yp_strncasecmp.c → pm_strncasecmp.c} +1 -1
  92. data/src/util/{yp_strpbrk.c → pm_strpbrk.c} +8 -8
  93. data/yarp.gemspec +68 -59
  94. metadata +70 -61
  95. data/ext/yarp/api_node.c +0 -4728
  96. data/ext/yarp/extension.h +0 -18
  97. data/include/yarp/ast.h +0 -1929
  98. data/include/yarp/defines.h +0 -45
  99. data/include/yarp/diagnostic.h +0 -226
  100. data/include/yarp/node.h +0 -42
  101. data/include/yarp/pack.h +0 -141
  102. data/include/yarp/regexp.h +0 -19
  103. data/include/yarp/unescape.h +0 -44
  104. data/include/yarp/util/yp_buffer.h +0 -51
  105. data/include/yarp/util/yp_memchr.h +0 -14
  106. data/include/yarp/util/yp_state_stack.h +0 -24
  107. data/include/yarp/util/yp_string_list.h +0 -25
  108. data/include/yarp/version.h +0 -4
  109. data/include/yarp.h +0 -82
  110. data/src/enc/yp_big5.c +0 -52
  111. data/src/enc/yp_euc_jp.c +0 -58
  112. data/src/enc/yp_shift_jis.c +0 -56
  113. data/src/enc/yp_windows_31j.c +0 -56
  114. data/src/util/yp_buffer.c +0 -101
  115. data/src/util/yp_string_list.c +0 -29
data/src/unescape.c CHANGED
@@ -1,13 +1,13 @@
1
- #include "yarp.h"
1
+ #include "prism.h"
2
2
 
3
3
  /******************************************************************************/
4
4
  /* Character checks */
5
5
  /******************************************************************************/
6
6
 
7
7
  static inline bool
8
- yp_char_is_hexadecimal_digits(const uint8_t *string, size_t length) {
8
+ pm_char_is_hexadecimal_digits(const uint8_t *string, size_t length) {
9
9
  for (size_t index = 0; index < length; index++) {
10
- if (!yp_char_is_hexadecimal_digit(string[index])) {
10
+ if (!pm_char_is_hexadecimal_digit(string[index])) {
11
11
  return false;
12
12
  }
13
13
  }
@@ -18,7 +18,7 @@ yp_char_is_hexadecimal_digits(const uint8_t *string, size_t length) {
18
18
  // expensive to go through the indirection of the function pointer. Instead we
19
19
  // provide a fast path that will check if we can just return 1.
20
20
  static inline size_t
21
- yp_char_width(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
21
+ pm_char_width(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
22
22
  if (parser->encoding_changed || (*start >= 0x80)) {
23
23
  return parser->encoding.char_width(start, end - start);
24
24
  } else {
@@ -71,11 +71,11 @@ char_is_ascii_printable(const uint8_t b) {
71
71
  static inline size_t
72
72
  unescape_octal(const uint8_t *backslash, uint8_t *value, const uint8_t *end) {
73
73
  *value = (uint8_t) (backslash[1] - '0');
74
- if (backslash + 2 >= end || !yp_char_is_octal_digit(backslash[2])) {
74
+ if (backslash + 2 >= end || !pm_char_is_octal_digit(backslash[2])) {
75
75
  return 2;
76
76
  }
77
77
  *value = (uint8_t) ((*value << 3) | (backslash[2] - '0'));
78
- if (backslash + 3 >= end || !yp_char_is_octal_digit(backslash[3])) {
78
+ if (backslash + 3 >= end || !pm_char_is_octal_digit(backslash[3])) {
79
79
  return 3;
80
80
  }
81
81
  *value = (uint8_t) ((*value << 3) | (backslash[3] - '0'));
@@ -91,14 +91,14 @@ unescape_hexadecimal_digit(const uint8_t value) {
91
91
  // Scan the 1-2 digits of hexadecimal into the value. Returns the number of
92
92
  // digits scanned.
93
93
  static inline size_t
94
- unescape_hexadecimal(const uint8_t *backslash, uint8_t *value, const uint8_t *end, yp_list_t *error_list) {
94
+ unescape_hexadecimal(const uint8_t *backslash, uint8_t *value, const uint8_t *end, pm_list_t *error_list) {
95
95
  *value = 0;
96
- if (backslash + 2 >= end || !yp_char_is_hexadecimal_digit(backslash[2])) {
97
- if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_HEXADECIMAL);
96
+ if (backslash + 2 >= end || !pm_char_is_hexadecimal_digit(backslash[2])) {
97
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 2, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
98
98
  return 2;
99
99
  }
100
100
  *value = unescape_hexadecimal_digit(backslash[2]);
101
- if (backslash + 3 >= end || !yp_char_is_hexadecimal_digit(backslash[3])) {
101
+ if (backslash + 3 >= end || !pm_char_is_hexadecimal_digit(backslash[3])) {
102
102
  return 3;
103
103
  }
104
104
  *value = (uint8_t) ((*value << 4) | unescape_hexadecimal_digit(backslash[3]));
@@ -121,7 +121,7 @@ unescape_unicode(const uint8_t *string, size_t length, uint32_t *value) {
121
121
  // 32-bit value to write. Writes the UTF-8 representation of the value to the
122
122
  // string and returns the number of bytes written.
123
123
  static inline size_t
124
- unescape_unicode_write(uint8_t *dest, uint32_t value, const uint8_t *start, const uint8_t *end, yp_list_t *error_list) {
124
+ unescape_unicode_write(uint8_t *dest, uint32_t value, const uint8_t *start, const uint8_t *end, pm_list_t *error_list) {
125
125
  if (value <= 0x7F) {
126
126
  // 0xxxxxxx
127
127
  dest[0] = (uint8_t) value;
@@ -157,7 +157,7 @@ unescape_unicode_write(uint8_t *dest, uint32_t value, const uint8_t *start, cons
157
157
  // If we get here, then the value is too big. This is an error, but we don't
158
158
  // want to just crash, so instead we'll add an error to the error list and put
159
159
  // in a replacement character instead.
160
- if (error_list) yp_diagnostic_list_append(error_list, start, end, YP_ERR_ESCAPE_INVALID_UNICODE);
160
+ if (error_list) pm_diagnostic_list_append(error_list, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
161
161
  dest[0] = 0xEF;
162
162
  dest[1] = 0xBF;
163
163
  dest[2] = 0xBD;
@@ -165,20 +165,20 @@ unescape_unicode_write(uint8_t *dest, uint32_t value, const uint8_t *start, cons
165
165
  }
166
166
 
167
167
  typedef enum {
168
- YP_UNESCAPE_FLAG_NONE = 0,
169
- YP_UNESCAPE_FLAG_CONTROL = 1,
170
- YP_UNESCAPE_FLAG_META = 2,
171
- YP_UNESCAPE_FLAG_EXPECT_SINGLE = 4
172
- } yp_unescape_flag_t;
168
+ PM_UNESCAPE_FLAG_NONE = 0,
169
+ PM_UNESCAPE_FLAG_CONTROL = 1,
170
+ PM_UNESCAPE_FLAG_META = 2,
171
+ PM_UNESCAPE_FLAG_EXPECT_SINGLE = 4
172
+ } pm_unescape_flag_t;
173
173
 
174
174
  // Unescape a single character value based on the given flags.
175
175
  static inline uint8_t
176
176
  unescape_char(uint8_t value, const uint8_t flags) {
177
- if (flags & YP_UNESCAPE_FLAG_CONTROL) {
177
+ if (flags & PM_UNESCAPE_FLAG_CONTROL) {
178
178
  value &= 0x1f;
179
179
  }
180
180
 
181
- if (flags & YP_UNESCAPE_FLAG_META) {
181
+ if (flags & PM_UNESCAPE_FLAG_META) {
182
182
  value |= 0x80;
183
183
  }
184
184
 
@@ -188,13 +188,13 @@ unescape_char(uint8_t value, const uint8_t flags) {
188
188
  // Read a specific escape sequence into the given destination.
189
189
  static const uint8_t *
190
190
  unescape(
191
- yp_parser_t *parser,
191
+ pm_parser_t *parser,
192
192
  uint8_t *dest,
193
193
  size_t *dest_length,
194
194
  const uint8_t *backslash,
195
195
  const uint8_t *end,
196
196
  const uint8_t flags,
197
- yp_list_t *error_list
197
+ pm_list_t *error_list
198
198
  ) {
199
199
  switch (backslash[1]) {
200
200
  case 'a':
@@ -234,8 +234,8 @@ unescape(
234
234
  // \u{nnnn ...} Unicode character(s), where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F])
235
235
  // \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
236
236
  case 'u': {
237
- if ((flags & YP_UNESCAPE_FLAG_CONTROL) | (flags & YP_UNESCAPE_FLAG_META)) {
238
- if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS);
237
+ if ((flags & PM_UNESCAPE_FLAG_CONTROL) | (flags & PM_UNESCAPE_FLAG_META)) {
238
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 2, PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS);
239
239
  return backslash + 2;
240
240
  }
241
241
 
@@ -244,26 +244,26 @@ unescape(
244
244
  const uint8_t *extra_codepoints_start = NULL;
245
245
  int codepoints_count = 0;
246
246
 
247
- unicode_cursor += yp_strspn_whitespace(unicode_cursor, end - unicode_cursor);
247
+ unicode_cursor += pm_strspn_whitespace(unicode_cursor, end - unicode_cursor);
248
248
 
249
249
  while ((unicode_cursor < end) && (*unicode_cursor != '}')) {
250
250
  const uint8_t *unicode_start = unicode_cursor;
251
- size_t hexadecimal_length = yp_strspn_hexadecimal_digit(unicode_cursor, end - unicode_cursor);
251
+ size_t hexadecimal_length = pm_strspn_hexadecimal_digit(unicode_cursor, end - unicode_cursor);
252
252
 
253
253
  // \u{nnnn} character literal allows only 1-6 hexadecimal digits
254
254
  if (hexadecimal_length > 6) {
255
- if (error_list) yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, YP_ERR_ESCAPE_INVALID_UNICODE_LONG);
255
+ if (error_list) pm_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
256
256
  }
257
257
  // there are not hexadecimal characters
258
258
  else if (hexadecimal_length == 0) {
259
- if (error_list) yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, YP_ERR_ESCAPE_INVALID_UNICODE);
259
+ if (error_list) pm_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE);
260
260
  return unicode_cursor;
261
261
  }
262
262
 
263
263
  unicode_cursor += hexadecimal_length;
264
264
 
265
265
  codepoints_count++;
266
- if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count == 2)
266
+ if (flags & PM_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count == 2)
267
267
  extra_codepoints_start = unicode_start;
268
268
 
269
269
  uint32_t value;
@@ -272,23 +272,23 @@ unescape(
272
272
  *dest_length += unescape_unicode_write(dest + *dest_length, value, unicode_start, unicode_cursor, error_list);
273
273
  }
274
274
 
275
- unicode_cursor += yp_strspn_whitespace(unicode_cursor, end - unicode_cursor);
275
+ unicode_cursor += pm_strspn_whitespace(unicode_cursor, end - unicode_cursor);
276
276
  }
277
277
 
278
278
  // ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
279
- if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1) {
280
- if (error_list) yp_diagnostic_list_append(error_list, extra_codepoints_start, unicode_cursor - 1, YP_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
279
+ if (flags & PM_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1) {
280
+ if (error_list) pm_diagnostic_list_append(error_list, extra_codepoints_start, unicode_cursor - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
281
281
  }
282
282
 
283
283
  if (unicode_cursor < end && *unicode_cursor == '}') {
284
284
  unicode_cursor++;
285
285
  } else {
286
- if (error_list) yp_diagnostic_list_append(error_list, backslash, unicode_cursor, YP_ERR_ESCAPE_INVALID_UNICODE_TERM);
286
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, unicode_cursor, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
287
287
  }
288
288
 
289
289
  return unicode_cursor;
290
290
  }
291
- else if ((backslash + 5) < end && yp_char_is_hexadecimal_digits(backslash + 2, 4)) {
291
+ else if ((backslash + 5) < end && pm_char_is_hexadecimal_digits(backslash + 2, 4)) {
292
292
  uint32_t value;
293
293
  unescape_unicode(backslash + 2, 4, &value);
294
294
 
@@ -298,7 +298,7 @@ unescape(
298
298
  return backslash + 6;
299
299
  }
300
300
 
301
- if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_UNICODE);
301
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 2, PM_ERR_ESCAPE_INVALID_UNICODE);
302
302
  return backslash + 2;
303
303
  }
304
304
  // \c\M-x meta control character, where x is an ASCII printable character
@@ -306,18 +306,18 @@ unescape(
306
306
  // \cx control character, where x is an ASCII printable character
307
307
  case 'c':
308
308
  if (backslash + 2 >= end) {
309
- if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
309
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 1, PM_ERR_ESCAPE_INVALID_CONTROL);
310
310
  return end;
311
311
  }
312
312
 
313
- if (flags & YP_UNESCAPE_FLAG_CONTROL) {
314
- if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
313
+ if (flags & PM_UNESCAPE_FLAG_CONTROL) {
314
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 1, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
315
315
  return backslash + 2;
316
316
  }
317
317
 
318
318
  switch (backslash[2]) {
319
319
  case '\\':
320
- return unescape(parser, dest, dest_length, backslash + 2, end, flags | YP_UNESCAPE_FLAG_CONTROL, error_list);
320
+ return unescape(parser, dest, dest_length, backslash + 2, end, flags | PM_UNESCAPE_FLAG_CONTROL, error_list);
321
321
  case '?':
322
322
  if (dest) {
323
323
  dest[(*dest_length)++] = unescape_char(0x7f, flags);
@@ -325,12 +325,12 @@ unescape(
325
325
  return backslash + 3;
326
326
  default: {
327
327
  if (!char_is_ascii_printable(backslash[2])) {
328
- if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
328
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 1, PM_ERR_ESCAPE_INVALID_CONTROL);
329
329
  return backslash + 2;
330
330
  }
331
331
 
332
332
  if (dest) {
333
- dest[(*dest_length)++] = unescape_char(backslash[2], flags | YP_UNESCAPE_FLAG_CONTROL);
333
+ dest[(*dest_length)++] = unescape_char(backslash[2], flags | PM_UNESCAPE_FLAG_CONTROL);
334
334
  }
335
335
  return backslash + 3;
336
336
  }
@@ -339,23 +339,23 @@ unescape(
339
339
  // \C-? delete, ASCII 7Fh (DEL)
340
340
  case 'C':
341
341
  if (backslash + 3 >= end) {
342
- if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
342
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 1, PM_ERR_ESCAPE_INVALID_CONTROL);
343
343
  return end;
344
344
  }
345
345
 
346
- if (flags & YP_UNESCAPE_FLAG_CONTROL) {
347
- if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
346
+ if (flags & PM_UNESCAPE_FLAG_CONTROL) {
347
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 1, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
348
348
  return backslash + 2;
349
349
  }
350
350
 
351
351
  if (backslash[2] != '-') {
352
- if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_CONTROL);
352
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 1, PM_ERR_ESCAPE_INVALID_CONTROL);
353
353
  return backslash + 2;
354
354
  }
355
355
 
356
356
  switch (backslash[3]) {
357
357
  case '\\':
358
- return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_CONTROL, error_list);
358
+ return unescape(parser, dest, dest_length, backslash + 3, end, flags | PM_UNESCAPE_FLAG_CONTROL, error_list);
359
359
  case '?':
360
360
  if (dest) {
361
361
  dest[(*dest_length)++] = unescape_char(0x7f, flags);
@@ -363,12 +363,12 @@ unescape(
363
363
  return backslash + 4;
364
364
  default:
365
365
  if (!char_is_ascii_printable(backslash[3])) {
366
- if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_CONTROL);
366
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 2, PM_ERR_ESCAPE_INVALID_CONTROL);
367
367
  return backslash + 2;
368
368
  }
369
369
 
370
370
  if (dest) {
371
- dest[(*dest_length)++] = unescape_char(backslash[3], flags | YP_UNESCAPE_FLAG_CONTROL);
371
+ dest[(*dest_length)++] = unescape_char(backslash[3], flags | PM_UNESCAPE_FLAG_CONTROL);
372
372
  }
373
373
  return backslash + 4;
374
374
  }
@@ -377,32 +377,32 @@ unescape(
377
377
  // \M-x meta character, where x is an ASCII printable character
378
378
  case 'M': {
379
379
  if (backslash + 3 >= end) {
380
- if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 1, YP_ERR_ESCAPE_INVALID_META);
380
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 1, PM_ERR_ESCAPE_INVALID_META);
381
381
  return end;
382
382
  }
383
383
 
384
- if (flags & YP_UNESCAPE_FLAG_META) {
385
- if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_META_REPEAT);
384
+ if (flags & PM_UNESCAPE_FLAG_META) {
385
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 2, PM_ERR_ESCAPE_INVALID_META_REPEAT);
386
386
  return backslash + 2;
387
387
  }
388
388
 
389
389
  if (backslash[2] != '-') {
390
- if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_META);
390
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 2, PM_ERR_ESCAPE_INVALID_META);
391
391
  return backslash + 2;
392
392
  }
393
393
 
394
394
  if (backslash[3] == '\\') {
395
- return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_META, error_list);
395
+ return unescape(parser, dest, dest_length, backslash + 3, end, flags | PM_UNESCAPE_FLAG_META, error_list);
396
396
  }
397
397
 
398
398
  if (char_is_ascii_printable(backslash[3])) {
399
399
  if (dest) {
400
- dest[(*dest_length)++] = unescape_char(backslash[3], flags | YP_UNESCAPE_FLAG_META);
400
+ dest[(*dest_length)++] = unescape_char(backslash[3], flags | PM_UNESCAPE_FLAG_META);
401
401
  }
402
402
  return backslash + 4;
403
403
  }
404
404
 
405
- if (error_list) yp_diagnostic_list_append(error_list, backslash, backslash + 2, YP_ERR_ESCAPE_INVALID_META);
405
+ if (error_list) pm_diagnostic_list_append(error_list, backslash, backslash + 2, PM_ERR_ESCAPE_INVALID_META);
406
406
  return backslash + 3;
407
407
  }
408
408
  // \n
@@ -416,7 +416,7 @@ unescape(
416
416
  /* fallthrough */
417
417
  // In this case we're escaping something that doesn't need escaping.
418
418
  default: {
419
- size_t width = yp_char_width(parser, backslash + 1, end);
419
+ size_t width = pm_char_width(parser, backslash + 1, end);
420
420
 
421
421
  if (dest) {
422
422
  memcpy(dest + *dest_length, backslash + 1, width);
@@ -457,13 +457,13 @@ unescape(
457
457
  // \c? or \C-? delete, ASCII 7Fh (DEL)
458
458
  //
459
459
  static void
460
- yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type, bool expect_single_codepoint) {
461
- if (unescape_type == YP_UNESCAPE_NONE) {
460
+ pm_unescape_manipulate_string_or_char_literal(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type, bool expect_single_codepoint) {
461
+ if (unescape_type == PM_UNESCAPE_NONE) {
462
462
  // If we're not unescaping then we can reference the source directly.
463
463
  return;
464
464
  }
465
465
 
466
- const uint8_t *backslash = yp_memchr(string->source, '\\', string->length, parser->encoding_changed, &parser->encoding);
466
+ const uint8_t *backslash = pm_memchr(string->source, '\\', string->length, parser->encoding_changed, &parser->encoding);
467
467
 
468
468
  if (backslash == NULL) {
469
469
  // Here there are no escapes, so we can reference the source directly.
@@ -474,7 +474,7 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
474
474
  // within the string.
475
475
  uint8_t *allocated = malloc(string->length);
476
476
  if (allocated == NULL) {
477
- yp_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length, YP_ERR_MALLOC_FAILED);
477
+ pm_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length, PM_ERR_MALLOC_FAILED);
478
478
  return;
479
479
  }
480
480
 
@@ -509,7 +509,17 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
509
509
  cursor = backslash + 2;
510
510
  break;
511
511
  default:
512
- if (unescape_type == YP_UNESCAPE_MINIMAL) {
512
+ if (unescape_type == PM_UNESCAPE_WHITESPACE) {
513
+ if (backslash[1] == '\r' && backslash[2] == '\n') {
514
+ cursor = backslash + 2;
515
+ break;
516
+ }
517
+ if (pm_strspn_whitespace(backslash + 1, 1)) {
518
+ cursor = backslash + 1;
519
+ break;
520
+ }
521
+ }
522
+ if (unescape_type == PM_UNESCAPE_WHITESPACE || unescape_type == PM_UNESCAPE_MINIMAL) {
513
523
  // In this case we're escaping something that doesn't need escaping.
514
524
  dest[dest_length++] = '\\';
515
525
  cursor = backslash + 1;
@@ -518,11 +528,11 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
518
528
 
519
529
  // This is the only type of unescaping left. In this case we need to
520
530
  // handle all of the different unescapes.
521
- assert(unescape_type == YP_UNESCAPE_ALL);
531
+ assert(unescape_type == PM_UNESCAPE_ALL);
522
532
 
523
- uint8_t flags = YP_UNESCAPE_FLAG_NONE;
533
+ uint8_t flags = PM_UNESCAPE_FLAG_NONE;
524
534
  if (expect_single_codepoint) {
525
- flags |= YP_UNESCAPE_FLAG_EXPECT_SINGLE;
535
+ flags |= PM_UNESCAPE_FLAG_EXPECT_SINGLE;
526
536
  }
527
537
 
528
538
  cursor = unescape(parser, dest, &dest_length, backslash, end, flags, &parser->error_list);
@@ -530,7 +540,7 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
530
540
  }
531
541
 
532
542
  if (end > cursor) {
533
- backslash = yp_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, &parser->encoding);
543
+ backslash = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, &parser->encoding);
534
544
  } else {
535
545
  backslash = NULL;
536
546
  }
@@ -545,30 +555,30 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
545
555
 
546
556
  // If the string was already allocated, then we need to free that memory
547
557
  // here. That's because we're about to override it with the escaped string.
548
- yp_string_free(string);
558
+ pm_string_free(string);
549
559
 
550
560
  // We also need to update the length at the end. This is because every escape
551
561
  // reduces the length of the final string, and we don't want garbage at the
552
562
  // end.
553
- yp_string_owned_init(string, allocated, dest_length + ((size_t) (end - cursor)));
563
+ pm_string_owned_init(string, allocated, dest_length + ((size_t) (end - cursor)));
554
564
  }
555
565
 
556
- YP_EXPORTED_FUNCTION void
557
- yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type) {
558
- yp_unescape_manipulate_string_or_char_literal(parser, string, unescape_type, false);
566
+ PRISM_EXPORTED_FUNCTION void
567
+ pm_unescape_manipulate_string(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type) {
568
+ pm_unescape_manipulate_string_or_char_literal(parser, string, unescape_type, false);
559
569
  }
560
570
 
561
571
  void
562
- yp_unescape_manipulate_char_literal(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type) {
563
- yp_unescape_manipulate_string_or_char_literal(parser, string, unescape_type, true);
572
+ pm_unescape_manipulate_char_literal(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type) {
573
+ pm_unescape_manipulate_string_or_char_literal(parser, string, unescape_type, true);
564
574
  }
565
575
 
566
- // This function is similar to yp_unescape_manipulate_string, except it doesn't
576
+ // This function is similar to pm_unescape_manipulate_string, except it doesn't
567
577
  // actually perform any string manipulations. Instead, it calculates how long
568
578
  // the unescaped character is, and returns that value
569
579
  size_t
570
- yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *backslash, yp_unescape_type_t unescape_type, bool expect_single_codepoint) {
571
- assert(unescape_type != YP_UNESCAPE_NONE);
580
+ pm_unescape_calculate_difference(pm_parser_t *parser, const uint8_t *backslash, pm_unescape_type_t unescape_type, bool expect_single_codepoint) {
581
+ assert(unescape_type != PM_UNESCAPE_NONE);
572
582
 
573
583
  if (backslash + 1 >= parser->end) {
574
584
  return 0;
@@ -579,17 +589,26 @@ yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *backslash,
579
589
  case '\'':
580
590
  return 2;
581
591
  default: {
582
- if (unescape_type == YP_UNESCAPE_MINIMAL) {
583
- return 1 + yp_char_width(parser, backslash + 1, parser->end);
592
+ if (unescape_type == PM_UNESCAPE_WHITESPACE) {
593
+ if (backslash[1] == '\r' && backslash[2] == '\n') {
594
+ return 2;
595
+ }
596
+ size_t whitespace = pm_strspn_whitespace(backslash + 1, 1);
597
+ if (whitespace > 0) {
598
+ return whitespace;
599
+ }
600
+ }
601
+ if (unescape_type == PM_UNESCAPE_WHITESPACE || unescape_type == PM_UNESCAPE_MINIMAL) {
602
+ return 1 + pm_char_width(parser, backslash + 1, parser->end);
584
603
  }
585
604
 
586
605
  // This is the only type of unescaping left. In this case we need to
587
606
  // handle all of the different unescapes.
588
- assert(unescape_type == YP_UNESCAPE_ALL);
607
+ assert(unescape_type == PM_UNESCAPE_ALL);
589
608
 
590
- uint8_t flags = YP_UNESCAPE_FLAG_NONE;
609
+ uint8_t flags = PM_UNESCAPE_FLAG_NONE;
591
610
  if (expect_single_codepoint) {
592
- flags |= YP_UNESCAPE_FLAG_EXPECT_SINGLE;
611
+ flags |= PM_UNESCAPE_FLAG_EXPECT_SINGLE;
593
612
  }
594
613
 
595
614
  const uint8_t *cursor = unescape(parser, NULL, 0, backslash, parser->end, flags, NULL);
@@ -603,16 +622,16 @@ yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *backslash,
603
622
  // This is one of the main entry points into the extension. It accepts a source
604
623
  // string, a type of unescaping, and a pointer to a result string. It returns a
605
624
  // boolean indicating whether or not the unescaping was successful.
606
- YP_EXPORTED_FUNCTION bool
607
- yp_unescape_string(const uint8_t *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
608
- yp_parser_t parser;
609
- yp_parser_init(&parser, start, length, NULL);
625
+ PRISM_EXPORTED_FUNCTION bool
626
+ pm_unescape_string(const uint8_t *start, size_t length, pm_unescape_type_t unescape_type, pm_string_t *result) {
627
+ pm_parser_t parser;
628
+ pm_parser_init(&parser, start, length, NULL);
610
629
 
611
- yp_string_shared_init(result, start, start + length);
612
- yp_unescape_manipulate_string(&parser, result, unescape_type);
630
+ pm_string_shared_init(result, start, start + length);
631
+ pm_unescape_manipulate_string(&parser, result, unescape_type);
613
632
 
614
- bool success = yp_list_empty_p(&parser.error_list);
615
- yp_parser_free(&parser);
633
+ bool success = pm_list_empty_p(&parser.error_list);
634
+ pm_parser_free(&parser);
616
635
 
617
636
  return success;
618
637
  }
@@ -0,0 +1,103 @@
1
+ #include "prism/util/pm_buffer.h"
2
+
3
+ #define PRISM_BUFFER_INITIAL_SIZE 1024
4
+
5
+ // Return the size of the pm_buffer_t struct.
6
+ size_t
7
+ pm_buffer_sizeof(void) {
8
+ return sizeof(pm_buffer_t);
9
+ }
10
+
11
+ // Initialize a pm_buffer_t with its default values.
12
+ bool
13
+ pm_buffer_init(pm_buffer_t *buffer) {
14
+ buffer->length = 0;
15
+ buffer->capacity = PRISM_BUFFER_INITIAL_SIZE;
16
+
17
+ buffer->value = (char *) malloc(PRISM_BUFFER_INITIAL_SIZE);
18
+ return buffer->value != NULL;
19
+ }
20
+
21
+ #undef PRISM_BUFFER_INITIAL_SIZE
22
+
23
+ // Return the value of the buffer.
24
+ char *
25
+ pm_buffer_value(pm_buffer_t *buffer) {
26
+ return buffer->value;
27
+ }
28
+
29
+ // Return the length of the buffer.
30
+ size_t
31
+ pm_buffer_length(pm_buffer_t *buffer) {
32
+ return buffer->length;
33
+ }
34
+
35
+ // Append the given amount of space to the buffer.
36
+ static inline void
37
+ pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
38
+ size_t next_length = buffer->length + length;
39
+
40
+ if (next_length > buffer->capacity) {
41
+ do {
42
+ buffer->capacity *= 2;
43
+ } while (next_length > buffer->capacity);
44
+
45
+ buffer->value = realloc(buffer->value, buffer->capacity);
46
+ }
47
+
48
+ buffer->length = next_length;
49
+ }
50
+
51
+ // Append a generic pointer to memory to the buffer.
52
+ static inline void
53
+ pm_buffer_append(pm_buffer_t *buffer, const void *source, size_t length) {
54
+ pm_buffer_append_length(buffer, length);
55
+ memcpy(buffer->value + (buffer->length - length), source, length);
56
+ }
57
+
58
+ // Append the given amount of space as zeroes to the buffer.
59
+ void
60
+ pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length) {
61
+ pm_buffer_append_length(buffer, length);
62
+ memset(buffer->value + (buffer->length - length), 0, length);
63
+ }
64
+
65
+ // Append a string to the buffer.
66
+ void
67
+ pm_buffer_append_str(pm_buffer_t *buffer, const char *value, size_t length) {
68
+ pm_buffer_append(buffer, value, length);
69
+ }
70
+
71
+ // Append a list of bytes to the buffer.
72
+ void
73
+ pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length) {
74
+ pm_buffer_append(buffer, (const char *) value, length);
75
+ }
76
+
77
+ // Append a single byte to the buffer.
78
+ void
79
+ pm_buffer_append_u8(pm_buffer_t *buffer, uint8_t value) {
80
+ const void *source = &value;
81
+ pm_buffer_append(buffer, source, sizeof(uint8_t));
82
+ }
83
+
84
+ // Append a 32-bit unsigned integer to the buffer.
85
+ void
86
+ pm_buffer_append_u32(pm_buffer_t *buffer, uint32_t value) {
87
+ if (value < 128) {
88
+ pm_buffer_append_u8(buffer, (uint8_t) value);
89
+ } else {
90
+ uint32_t n = value;
91
+ while (n >= 128) {
92
+ pm_buffer_append_u8(buffer, (uint8_t) (n | 128));
93
+ n >>= 7;
94
+ }
95
+ pm_buffer_append_u8(buffer, (uint8_t) n);
96
+ }
97
+ }
98
+
99
+ // Free the memory associated with the buffer.
100
+ void
101
+ pm_buffer_free(pm_buffer_t *buffer) {
102
+ free(buffer->value);
103
+ }