prism 0.19.0 → 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +102 -1
  3. data/Makefile +5 -0
  4. data/README.md +9 -6
  5. data/config.yml +236 -38
  6. data/docs/build_system.md +19 -2
  7. data/docs/cruby_compilation.md +27 -0
  8. data/docs/parser_translation.md +34 -0
  9. data/docs/parsing_rules.md +19 -0
  10. data/docs/releasing.md +84 -16
  11. data/docs/ruby_api.md +1 -1
  12. data/docs/ruby_parser_translation.md +19 -0
  13. data/docs/serialization.md +19 -5
  14. data/ext/prism/api_node.c +1989 -1525
  15. data/ext/prism/extension.c +130 -30
  16. data/ext/prism/extension.h +2 -2
  17. data/include/prism/ast.h +1700 -505
  18. data/include/prism/defines.h +8 -0
  19. data/include/prism/diagnostic.h +49 -7
  20. data/include/prism/encoding.h +17 -0
  21. data/include/prism/options.h +40 -14
  22. data/include/prism/parser.h +34 -18
  23. data/include/prism/util/pm_buffer.h +9 -0
  24. data/include/prism/util/pm_constant_pool.h +18 -0
  25. data/include/prism/util/pm_newline_list.h +4 -14
  26. data/include/prism/util/pm_strpbrk.h +4 -1
  27. data/include/prism/version.h +2 -2
  28. data/include/prism.h +19 -2
  29. data/lib/prism/debug.rb +11 -5
  30. data/lib/prism/desugar_compiler.rb +225 -80
  31. data/lib/prism/dot_visitor.rb +36 -14
  32. data/lib/prism/dsl.rb +302 -299
  33. data/lib/prism/ffi.rb +107 -76
  34. data/lib/prism/lex_compat.rb +17 -1
  35. data/lib/prism/node.rb +4580 -2607
  36. data/lib/prism/node_ext.rb +27 -4
  37. data/lib/prism/parse_result.rb +75 -29
  38. data/lib/prism/serialize.rb +633 -305
  39. data/lib/prism/translation/parser/compiler.rb +1838 -0
  40. data/lib/prism/translation/parser/lexer.rb +335 -0
  41. data/lib/prism/translation/parser/rubocop.rb +45 -0
  42. data/lib/prism/translation/parser.rb +190 -0
  43. data/lib/prism/translation/parser33.rb +12 -0
  44. data/lib/prism/translation/parser34.rb +12 -0
  45. data/lib/prism/translation/ripper.rb +696 -0
  46. data/lib/prism/translation/ruby_parser.rb +1521 -0
  47. data/lib/prism/translation.rb +11 -0
  48. data/lib/prism.rb +1 -1
  49. data/prism.gemspec +18 -7
  50. data/rbi/prism.rbi +150 -88
  51. data/rbi/prism_static.rbi +15 -3
  52. data/sig/prism.rbs +996 -961
  53. data/sig/prism_static.rbs +123 -46
  54. data/src/diagnostic.c +264 -219
  55. data/src/encoding.c +21 -26
  56. data/src/node.c +2 -6
  57. data/src/options.c +29 -5
  58. data/src/prettyprint.c +176 -44
  59. data/src/prism.c +1499 -564
  60. data/src/serialize.c +35 -21
  61. data/src/token_type.c +353 -4
  62. data/src/util/pm_buffer.c +11 -0
  63. data/src/util/pm_constant_pool.c +37 -11
  64. data/src/util/pm_newline_list.c +6 -15
  65. data/src/util/pm_string.c +0 -7
  66. data/src/util/pm_strpbrk.c +122 -14
  67. metadata +16 -5
  68. data/docs/building.md +0 -29
  69. data/lib/prism/ripper_compat.rb +0 -207
@@ -1,5 +1,9 @@
1
1
  #include "prism/extension.h"
2
2
 
3
+ #ifdef _WIN32
4
+ #include <ruby/win32.h>
5
+ #endif
6
+
3
7
  // NOTE: this file should contain only bindings. All non-trivial logic should be
4
8
  // in libprism so it can be shared its the various callers.
5
9
 
@@ -21,7 +25,7 @@ ID rb_option_id_filepath;
21
25
  ID rb_option_id_encoding;
22
26
  ID rb_option_id_line;
23
27
  ID rb_option_id_frozen_string_literal;
24
- ID rb_option_id_verbose;
28
+ ID rb_option_id_version;
25
29
  ID rb_option_id_scopes;
26
30
 
27
31
  /******************************************************************************/
@@ -129,8 +133,14 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
129
133
  if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
130
134
  } else if (key_id == rb_option_id_frozen_string_literal) {
131
135
  if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
132
- } else if (key_id == rb_option_id_verbose) {
133
- pm_options_suppress_warnings_set(options, value != Qtrue);
136
+ } else if (key_id == rb_option_id_version) {
137
+ if (!NIL_P(value)) {
138
+ const char *version = check_string(value);
139
+
140
+ if (!pm_options_version_set(options, version, RSTRING_LEN(value))) {
141
+ rb_raise(rb_eArgError, "invalid version: %"PRIsVALUE, value);
142
+ }
143
+ }
134
144
  } else if (key_id == rb_option_id_scopes) {
135
145
  if (!NIL_P(value)) build_options_scopes(options, value);
136
146
  } else {
@@ -206,20 +216,29 @@ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options)
206
216
  /**
207
217
  * Read options for methods that look like (filepath, **options).
208
218
  */
209
- static bool
219
+ static void
210
220
  file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
211
221
  VALUE filepath;
212
222
  VALUE keywords;
213
223
  rb_scan_args(argc, argv, "1:", &filepath, &keywords);
214
224
 
225
+ Check_Type(filepath, T_STRING);
226
+
215
227
  extract_options(options, filepath, keywords);
216
228
 
217
- if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) {
229
+ const char * string_source = (const char *) pm_string_source(&options->filepath);
230
+
231
+ if (!pm_string_mapped_init(input, string_source)) {
218
232
  pm_options_free(options);
219
- return false;
220
- }
221
233
 
222
- return true;
234
+ #ifdef _WIN32
235
+ int e = rb_w32_map_errno(GetLastError());
236
+ #else
237
+ int e = errno;
238
+ #endif
239
+
240
+ rb_syserr_fail(e, string_source);
241
+ }
223
242
  }
224
243
 
225
244
  /******************************************************************************/
@@ -293,7 +312,8 @@ static VALUE
293
312
  dump_file(int argc, VALUE *argv, VALUE self) {
294
313
  pm_string_t input;
295
314
  pm_options_t options = { 0 };
296
- if (!file_options(argc, argv, &input, &options)) return Qnil;
315
+
316
+ file_options(argc, argv, &input, &options);
297
317
 
298
318
  VALUE value = dump_input(&input, &options);
299
319
  pm_string_free(&input);
@@ -393,12 +413,25 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
393
413
  LONG2FIX(error->location.end - error->location.start)
394
414
  };
395
415
 
416
+ VALUE level = Qnil;
417
+ switch (error->level) {
418
+ case PM_ERROR_LEVEL_FATAL:
419
+ level = ID2SYM(rb_intern("fatal"));
420
+ break;
421
+ case PM_ERROR_LEVEL_ARGUMENT:
422
+ level = ID2SYM(rb_intern("argument"));
423
+ break;
424
+ default:
425
+ rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
426
+ }
427
+
396
428
  VALUE error_argv[] = {
397
429
  rb_enc_str_new_cstr(error->message, encoding),
398
- rb_class_new_instance(3, location_argv, rb_cPrismLocation)
430
+ rb_class_new_instance(3, location_argv, rb_cPrismLocation),
431
+ level
399
432
  };
400
433
 
401
- rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cPrismParseError));
434
+ rb_ary_push(errors, rb_class_new_instance(3, error_argv, rb_cPrismParseError));
402
435
  }
403
436
 
404
437
  return errors;
@@ -419,12 +452,25 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
419
452
  LONG2FIX(warning->location.end - warning->location.start)
420
453
  };
421
454
 
455
+ VALUE level = Qnil;
456
+ switch (warning->level) {
457
+ case PM_WARNING_LEVEL_DEFAULT:
458
+ level = ID2SYM(rb_intern("default"));
459
+ break;
460
+ case PM_WARNING_LEVEL_VERBOSE:
461
+ level = ID2SYM(rb_intern("verbose"));
462
+ break;
463
+ default:
464
+ rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning->level);
465
+ }
466
+
422
467
  VALUE warning_argv[] = {
423
468
  rb_enc_str_new_cstr(warning->message, encoding),
424
- rb_class_new_instance(3, location_argv, rb_cPrismLocation)
469
+ rb_class_new_instance(3, location_argv, rb_cPrismLocation),
470
+ level
425
471
  };
426
472
 
427
- rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cPrismParseWarning));
473
+ rb_ary_push(warnings, rb_class_new_instance(3, warning_argv, rb_cPrismParseWarning));
428
474
  }
429
475
 
430
476
  return warnings;
@@ -496,8 +542,9 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
496
542
  pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
497
543
  pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
498
544
 
545
+ VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
499
546
  VALUE offsets = rb_ary_new();
500
- VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
547
+ VALUE source_argv[] = { source_string, LONG2NUM(parser.start_line), offsets };
501
548
  VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
502
549
 
503
550
  parse_lex_data_t parse_lex_data = {
@@ -515,17 +562,21 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
515
562
  parser.lex_callback = &lex_callback;
516
563
  pm_node_t *node = pm_parse(&parser);
517
564
 
518
- // Here we need to update the source range to have the correct newline
519
- // offsets. We do it here because we've already created the object and given
520
- // it over to all of the tokens.
565
+ // Here we need to update the Source object to have the correct
566
+ // encoding for the source string and the correct newline offsets.
567
+ // We do it here because we've already created the Source object and given
568
+ // it over to all of the tokens, and both of these are only set after pm_parse().
569
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
570
+ rb_enc_associate(source_string, encoding);
571
+
521
572
  for (size_t index = 0; index < parser.newline_list.size; index++) {
522
- rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
573
+ rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index]));
523
574
  }
524
575
 
525
576
  VALUE value;
526
577
  if (return_nodes) {
527
578
  value = rb_ary_new_capa(2);
528
- rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding));
579
+ rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source));
529
580
  rb_ary_push(value, parse_lex_data.tokens);
530
581
  } else {
531
582
  value = parse_lex_data.tokens;
@@ -577,7 +628,8 @@ static VALUE
577
628
  lex_file(int argc, VALUE *argv, VALUE self) {
578
629
  pm_string_t input;
579
630
  pm_options_t options = { 0 };
580
- if (!file_options(argc, argv, &input, &options)) return Qnil;
631
+
632
+ file_options(argc, argv, &input, &options);
581
633
 
582
634
  VALUE value = parse_lex_input(&input, &options, false);
583
635
  pm_string_free(&input);
@@ -603,7 +655,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
603
655
 
604
656
  VALUE source = pm_source_new(&parser, encoding);
605
657
  VALUE result_argv[] = {
606
- pm_ast_new(&parser, node, encoding),
658
+ pm_ast_new(&parser, node, encoding, source),
607
659
  parser_comments(&parser, source),
608
660
  parser_magic_comments(&parser, source),
609
661
  parser_data_loc(&parser, source),
@@ -635,10 +687,13 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
635
687
  * integer or nil. Note that this is 1-indexed.
636
688
  * * `frozen_string_literal` - whether or not the frozen string literal pragma
637
689
  * has been set. This should be a boolean or nil.
638
- * * `verbose` - the current level of verbosity. This controls whether or not
639
- * the parser emits warnings. This should be a boolean or nil.
690
+ * * `version` - the version of prism that should be used to parse Ruby code. By
691
+ * default prism assumes you want to parse with the latest vesion of
692
+ * prism (which you can trigger with `nil` or `"latest"`). If you want to
693
+ * parse exactly as CRuby 3.3.0 would, then you can pass `"3.3.0"`.
640
694
  * * `scopes` - the locals that are in scope surrounding the code that is being
641
- * parsed. This should be an array of arrays of symbols or nil.
695
+ * parsed. This should be an array of arrays of symbols or nil. Scopes are
696
+ * ordered from the outermost scope to the innermost one.
642
697
  */
643
698
  static VALUE
644
699
  parse(int argc, VALUE *argv, VALUE self) {
@@ -675,7 +730,8 @@ static VALUE
675
730
  parse_file(int argc, VALUE *argv, VALUE self) {
676
731
  pm_string_t input;
677
732
  pm_options_t options = { 0 };
678
- if (!file_options(argc, argv, &input, &options)) return Qnil;
733
+
734
+ file_options(argc, argv, &input, &options);
679
735
 
680
736
  VALUE value = parse_input(&input, &options);
681
737
  pm_string_free(&input);
@@ -735,7 +791,8 @@ static VALUE
735
791
  parse_file_comments(int argc, VALUE *argv, VALUE self) {
736
792
  pm_string_t input;
737
793
  pm_options_t options = { 0 };
738
- if (!file_options(argc, argv, &input, &options)) return Qnil;
794
+
795
+ file_options(argc, argv, &input, &options);
739
796
 
740
797
  VALUE value = parse_input_comments(&input, &options);
741
798
  pm_string_free(&input);
@@ -789,7 +846,8 @@ static VALUE
789
846
  parse_lex_file(int argc, VALUE *argv, VALUE self) {
790
847
  pm_string_t input;
791
848
  pm_options_t options = { 0 };
792
- if (!file_options(argc, argv, &input, &options)) return Qnil;
849
+
850
+ file_options(argc, argv, &input, &options);
793
851
 
794
852
  VALUE value = parse_lex_input(&input, &options, true);
795
853
  pm_string_free(&input);
@@ -846,7 +904,8 @@ static VALUE
846
904
  parse_file_success_p(int argc, VALUE *argv, VALUE self) {
847
905
  pm_string_t input;
848
906
  pm_options_t options = { 0 };
849
- if (!file_options(argc, argv, &input, &options)) return Qnil;
907
+
908
+ file_options(argc, argv, &input, &options);
850
909
 
851
910
  VALUE result = parse_input_success_p(&input, &options);
852
911
  pm_string_free(&input);
@@ -924,7 +983,17 @@ profile_file(VALUE self, VALUE filepath) {
924
983
  pm_string_t input;
925
984
 
926
985
  const char *checked = check_string(filepath);
927
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
986
+ Check_Type(filepath, T_STRING);
987
+
988
+ if (!pm_string_mapped_init(&input, checked)) {
989
+ #ifdef _WIN32
990
+ int e = rb_w32_map_errno(GetLastError());
991
+ #else
992
+ int e = errno;
993
+ #endif
994
+
995
+ rb_syserr_fail(e, checked);
996
+ }
928
997
 
929
998
  pm_options_t options = { 0 };
930
999
  pm_options_filepath_set(&options, checked);
@@ -971,6 +1040,36 @@ inspect_node(VALUE self, VALUE source) {
971
1040
  return string;
972
1041
  }
973
1042
 
1043
+ /**
1044
+ * call-seq:
1045
+ * Debug::format_errors(source, colorize) -> String
1046
+ *
1047
+ * Format the errors that are found when parsing the given source string.
1048
+ */
1049
+ static VALUE
1050
+ format_errors(VALUE self, VALUE source, VALUE colorize) {
1051
+ pm_string_t input;
1052
+ input_load_string(&input, source);
1053
+
1054
+ pm_parser_t parser;
1055
+ pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL);
1056
+
1057
+ pm_node_t *node = pm_parse(&parser);
1058
+ pm_buffer_t buffer = { 0 };
1059
+
1060
+ pm_parser_errors_format(&parser, &buffer, RTEST(colorize));
1061
+
1062
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
1063
+ VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
1064
+
1065
+ pm_buffer_free(&buffer);
1066
+ pm_node_destroy(&parser, node);
1067
+ pm_parser_free(&parser);
1068
+ pm_string_free(&input);
1069
+
1070
+ return result;
1071
+ }
1072
+
974
1073
  /******************************************************************************/
975
1074
  /* Initialization of the extension */
976
1075
  /******************************************************************************/
@@ -1012,7 +1111,7 @@ Init_prism(void) {
1012
1111
  rb_option_id_encoding = rb_intern_const("encoding");
1013
1112
  rb_option_id_line = rb_intern_const("line");
1014
1113
  rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
1015
- rb_option_id_verbose = rb_intern_const("verbose");
1114
+ rb_option_id_version = rb_intern_const("version");
1016
1115
  rb_option_id_scopes = rb_intern_const("scopes");
1017
1116
 
1018
1117
  /**
@@ -1048,6 +1147,7 @@ Init_prism(void) {
1048
1147
  rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
1049
1148
  rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
1050
1149
  rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
1150
+ rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 2);
1051
1151
 
1052
1152
  // Next, initialize the other APIs.
1053
1153
  Init_prism_api_node();
@@ -1,7 +1,7 @@
1
1
  #ifndef PRISM_EXT_NODE_H
2
2
  #define PRISM_EXT_NODE_H
3
3
 
4
- #define EXPECTED_PRISM_VERSION "0.19.0"
4
+ #define EXPECTED_PRISM_VERSION "0.24.0"
5
5
 
6
6
  #include <ruby.h>
7
7
  #include <ruby/encoding.h>
@@ -9,7 +9,7 @@
9
9
 
10
10
  VALUE pm_source_new(pm_parser_t *parser, rb_encoding *encoding);
11
11
  VALUE pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source);
12
- VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding);
12
+ VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding, VALUE source);
13
13
 
14
14
  void Init_prism_api_node(void);
15
15
  void Init_prism_pack(void);