yarp 0.8.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +48 -1
  3. data/Makefile +5 -1
  4. data/README.md +4 -3
  5. data/config.yml +461 -150
  6. data/docs/configuration.md +1 -0
  7. data/docs/encoding.md +5 -5
  8. data/docs/ruby_api.md +2 -0
  9. data/docs/serialization.md +3 -3
  10. data/docs/testing.md +2 -2
  11. data/ext/yarp/api_node.c +810 -199
  12. data/ext/yarp/extension.c +94 -31
  13. data/ext/yarp/extension.h +2 -2
  14. data/include/yarp/ast.h +653 -150
  15. data/include/yarp/defines.h +2 -1
  16. data/include/yarp/diagnostic.h +3 -3
  17. data/include/yarp/enc/yp_encoding.h +10 -10
  18. data/include/yarp/node.h +10 -0
  19. data/include/yarp/parser.h +19 -19
  20. data/include/yarp/regexp.h +1 -1
  21. data/include/yarp/unescape.h +7 -5
  22. data/include/yarp/util/yp_buffer.h +3 -0
  23. data/include/yarp/util/yp_char.h +16 -16
  24. data/include/yarp/util/yp_constant_pool.h +2 -2
  25. data/include/yarp/util/yp_newline_list.h +7 -4
  26. data/include/yarp/util/yp_string.h +4 -4
  27. data/include/yarp/util/yp_string_list.h +0 -3
  28. data/include/yarp/util/yp_strpbrk.h +1 -1
  29. data/include/yarp/version.h +2 -2
  30. data/include/yarp.h +14 -3
  31. data/lib/yarp/desugar_visitor.rb +204 -0
  32. data/lib/yarp/ffi.rb +27 -1
  33. data/lib/yarp/lex_compat.rb +93 -25
  34. data/lib/yarp/mutation_visitor.rb +683 -0
  35. data/lib/yarp/node.rb +3121 -597
  36. data/lib/yarp/serialize.rb +198 -126
  37. data/lib/yarp.rb +53 -7
  38. data/src/diagnostic.c +1 -1
  39. data/src/enc/yp_big5.c +15 -42
  40. data/src/enc/yp_euc_jp.c +16 -43
  41. data/src/enc/yp_gbk.c +19 -46
  42. data/src/enc/yp_shift_jis.c +16 -43
  43. data/src/enc/yp_tables.c +36 -38
  44. data/src/enc/yp_unicode.c +20 -25
  45. data/src/enc/yp_windows_31j.c +16 -43
  46. data/src/node.c +1444 -836
  47. data/src/prettyprint.c +324 -103
  48. data/src/regexp.c +21 -21
  49. data/src/serialize.c +429 -276
  50. data/src/token_type.c +2 -2
  51. data/src/unescape.c +184 -136
  52. data/src/util/yp_buffer.c +7 -2
  53. data/src/util/yp_char.c +34 -34
  54. data/src/util/yp_constant_pool.c +4 -4
  55. data/src/util/yp_memchr.c +1 -1
  56. data/src/util/yp_newline_list.c +14 -3
  57. data/src/util/yp_string.c +22 -20
  58. data/src/util/yp_string_list.c +0 -6
  59. data/src/util/yp_strncasecmp.c +3 -6
  60. data/src/util/yp_strpbrk.c +8 -8
  61. data/src/yarp.c +1504 -615
  62. data/yarp.gemspec +3 -1
  63. metadata +4 -2
data/ext/yarp/extension.c CHANGED
@@ -83,7 +83,21 @@ dump(int argc, VALUE *argv, VALUE self) {
83
83
 
84
84
  yp_string_t input;
85
85
  input_load_string(&input, string);
86
- return dump_input(&input, check_string(filepath));
86
+
87
+ #ifdef YARP_DEBUG_MODE_BUILD
88
+ size_t length = yp_string_length(&input);
89
+ char* dup = malloc(length);
90
+ memcpy(dup, yp_string_source(&input), length);
91
+ yp_string_constant_init(&input, dup, length);
92
+ #endif
93
+
94
+ VALUE value = dump_input(&input, check_string(filepath));
95
+
96
+ #ifdef YARP_DEBUG_MODE_BUILD
97
+ free(dup);
98
+ #endif
99
+
100
+ return value;
87
101
  }
88
102
 
89
103
  // Dump the AST corresponding to the given file to a string.
@@ -198,52 +212,67 @@ typedef struct {
198
212
  VALUE source;
199
213
  VALUE tokens;
200
214
  rb_encoding *encoding;
201
- } lex_data_t;
215
+ } parse_lex_data_t;
202
216
 
203
217
  // This is passed as a callback to the parser. It gets called every time a new
204
218
  // token is found. Once found, we initialize a new instance of Token and push it
205
219
  // onto the tokens array.
206
220
  static void
207
- lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
208
- lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
221
+ parse_lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
222
+ parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
209
223
 
210
224
  VALUE yields = rb_ary_new_capa(2);
211
- rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding, lex_data->source));
225
+ rb_ary_push(yields, yp_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
212
226
  rb_ary_push(yields, INT2FIX(parser->lex_state));
213
227
 
214
- rb_ary_push(lex_data->tokens, yields);
228
+ rb_ary_push(parse_lex_data->tokens, yields);
215
229
  }
216
230
 
217
231
  // This is called whenever the encoding changes based on the magic comment at
218
232
  // the top of the file. We use it to update the encoding that we are using to
219
233
  // create tokens.
220
234
  static void
221
- lex_encoding_changed_callback(yp_parser_t *parser) {
222
- lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
223
- lex_data->encoding = rb_enc_find(parser->encoding.name);
235
+ parse_lex_encoding_changed_callback(yp_parser_t *parser) {
236
+ parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
237
+ parse_lex_data->encoding = rb_enc_find(parser->encoding.name);
238
+
239
+ // Since the encoding changed, we need to go back and change the encoding of
240
+ // the tokens that were already lexed. This is only going to end up being
241
+ // one or two tokens, since the encoding can only change at the top of the
242
+ // file.
243
+ VALUE tokens = parse_lex_data->tokens;
244
+ for (long index = 0; index < RARRAY_LEN(tokens); index++) {
245
+ VALUE yields = rb_ary_entry(tokens, index);
246
+ VALUE token = rb_ary_entry(yields, 0);
247
+
248
+ VALUE value = rb_ivar_get(token, rb_intern("@value"));
249
+ rb_enc_associate(value, parse_lex_data->encoding);
250
+ ENC_CODERANGE_CLEAR(value);
251
+ }
224
252
  }
225
253
 
226
- // Return an array of tokens corresponding to the given source.
254
+ // Parse the given input and return a ParseResult containing just the tokens or
255
+ // the nodes and tokens.
227
256
  static VALUE
228
- lex_input(yp_string_t *input, const char *filepath) {
257
+ parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
229
258
  yp_parser_t parser;
230
259
  yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
231
- yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback);
260
+ yp_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
232
261
 
233
262
  VALUE offsets = rb_ary_new();
234
- VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets };
263
+ VALUE source_argv[] = { rb_str_new((const char *) yp_string_source(input), yp_string_length(input)), offsets };
235
264
  VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
236
265
 
237
- lex_data_t lex_data = {
266
+ parse_lex_data_t parse_lex_data = {
238
267
  .source = source,
239
268
  .tokens = rb_ary_new(),
240
269
  .encoding = rb_utf8_encoding()
241
270
  };
242
271
 
243
- lex_data_t *data = &lex_data;
272
+ parse_lex_data_t *data = &parse_lex_data;
244
273
  yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
245
274
  .data = (void *) data,
246
- .callback = lex_token,
275
+ .callback = parse_lex_token,
247
276
  };
248
277
 
249
278
  parser.lex_callback = &lex_callback;
@@ -256,20 +285,26 @@ lex_input(yp_string_t *input, const char *filepath) {
256
285
  rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
257
286
  }
258
287
 
288
+ VALUE value;
289
+ if (return_nodes) {
290
+ value = rb_ary_new_capa(2);
291
+ rb_ary_push(value, yp_ast_new(&parser, node, parse_lex_data.encoding));
292
+ rb_ary_push(value, parse_lex_data.tokens);
293
+ } else {
294
+ value = parse_lex_data.tokens;
295
+ }
296
+
259
297
  VALUE result_argv[] = {
260
- lex_data.tokens,
298
+ value,
261
299
  parser_comments(&parser, source),
262
- parser_errors(&parser, lex_data.encoding, source),
263
- parser_warnings(&parser, lex_data.encoding, source),
300
+ parser_errors(&parser, parse_lex_data.encoding, source),
301
+ parser_warnings(&parser, parse_lex_data.encoding, source),
264
302
  source
265
303
  };
266
304
 
267
- VALUE result = rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
268
-
269
305
  yp_node_destroy(&parser, node);
270
306
  yp_parser_free(&parser);
271
-
272
- return result;
307
+ return rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
273
308
  }
274
309
 
275
310
  // Return an array of tokens corresponding to the given string.
@@ -281,7 +316,8 @@ lex(int argc, VALUE *argv, VALUE self) {
281
316
 
282
317
  yp_string_t input;
283
318
  input_load_string(&input, string);
284
- return lex_input(&input, check_string(filepath));
319
+
320
+ return parse_lex_input(&input, check_string(filepath), false);
285
321
  }
286
322
 
287
323
  // Return an array of tokens corresponding to the given file.
@@ -292,7 +328,7 @@ lex_file(VALUE self, VALUE filepath) {
292
328
  const char *checked = check_string(filepath);
293
329
  if (!yp_string_mapped_init(&input, checked)) return Qnil;
294
330
 
295
- VALUE value = lex_input(&input, checked);
331
+ VALUE value = parse_lex_input(&input, checked, false);
296
332
  yp_string_free(&input);
297
333
 
298
334
  return value;
@@ -368,6 +404,32 @@ parse_file(VALUE self, VALUE filepath) {
368
404
  return value;
369
405
  }
370
406
 
407
+ // Parse the given string and return a ParseResult instance.
408
+ static VALUE
409
+ parse_lex(int argc, VALUE *argv, VALUE self) {
410
+ VALUE string;
411
+ VALUE filepath;
412
+ rb_scan_args(argc, argv, "11", &string, &filepath);
413
+
414
+ yp_string_t input;
415
+ input_load_string(&input, string);
416
+ return parse_lex_input(&input, check_string(filepath), true);
417
+ }
418
+
419
+ // Parse and lex the given file and return a ParseResult instance.
420
+ static VALUE
421
+ parse_lex_file(VALUE self, VALUE filepath) {
422
+ yp_string_t input;
423
+
424
+ const char *checked = check_string(filepath);
425
+ if (!yp_string_mapped_init(&input, checked)) return Qnil;
426
+
427
+ VALUE value = parse_lex_input(&input, checked, true);
428
+ yp_string_free(&input);
429
+
430
+ return value;
431
+ }
432
+
371
433
  /******************************************************************************/
372
434
  /* Utility functions exposed to make testing easier */
373
435
  /******************************************************************************/
@@ -380,7 +442,7 @@ named_captures(VALUE self, VALUE source) {
380
442
  yp_string_list_t string_list;
381
443
  yp_string_list_init(&string_list);
382
444
 
383
- if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &yp_encoding_utf_8)) {
445
+ if (!yp_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &yp_encoding_utf_8)) {
384
446
  yp_string_list_free(&string_list);
385
447
  return Qnil;
386
448
  }
@@ -388,7 +450,7 @@ named_captures(VALUE self, VALUE source) {
388
450
  VALUE names = rb_ary_new();
389
451
  for (size_t index = 0; index < string_list.length; index++) {
390
452
  const yp_string_t *string = &string_list.strings[index];
391
- rb_ary_push(names, rb_str_new(yp_string_source(string), yp_string_length(string)));
453
+ rb_ary_push(names, rb_str_new((const char *) yp_string_source(string), yp_string_length(string)));
392
454
  }
393
455
 
394
456
  yp_string_list_free(&string_list);
@@ -401,8 +463,8 @@ static VALUE
401
463
  unescape(VALUE source, yp_unescape_type_t unescape_type) {
402
464
  yp_string_t result;
403
465
 
404
- if (yp_unescape_string(RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
405
- VALUE str = rb_str_new(yp_string_source(&result), yp_string_length(&result));
466
+ if (yp_unescape_string((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
467
+ VALUE str = rb_str_new((const char *) yp_string_source(&result), yp_string_length(&result));
406
468
  yp_string_free(&result);
407
469
  return str;
408
470
  } else {
@@ -436,7 +498,7 @@ static VALUE
436
498
  memsize(VALUE self, VALUE string) {
437
499
  yp_parser_t parser;
438
500
  size_t length = RSTRING_LEN(string);
439
- yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
501
+ yp_parser_init(&parser, (const uint8_t *) RSTRING_PTR(string), length, NULL);
440
502
 
441
503
  yp_node_t *node = yp_parse(&parser);
442
504
  yp_memsize_t memsize;
@@ -521,7 +583,6 @@ Init_yarp(void) {
521
583
  // Define the version string here so that we can use the constants defined
522
584
  // in yarp.h.
523
585
  rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
524
-
525
586
  rb_define_const(rb_cYARP, "BACKEND", ID2SYM(rb_intern("CExtension")));
526
587
 
527
588
  // First, the functions that have to do with lexing and parsing.
@@ -531,6 +592,8 @@ Init_yarp(void) {
531
592
  rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
532
593
  rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
533
594
  rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
595
+ rb_define_singleton_method(rb_cYARP, "parse_lex", parse_lex, -1);
596
+ rb_define_singleton_method(rb_cYARP, "parse_lex_file", parse_lex_file, 1);
534
597
 
535
598
  // Next, the functions that will be called by the parser to perform various
536
599
  // internal tasks. We expose these to make them easier to test.
data/ext/yarp/extension.h CHANGED
@@ -1,12 +1,12 @@
1
1
  #ifndef YARP_EXT_NODE_H
2
2
  #define YARP_EXT_NODE_H
3
3
 
4
+ #define EXPECTED_YARP_VERSION "0.10.0"
5
+
4
6
  #include <ruby.h>
5
7
  #include <ruby/encoding.h>
6
8
  #include "yarp.h"
7
9
 
8
- #define EXPECTED_YARP_VERSION "0.8.0"
9
-
10
10
  VALUE yp_source_new(yp_parser_t *parser);
11
11
  VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
12
12
  VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);