prism 0.19.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +102 -1
  3. data/Makefile +5 -0
  4. data/README.md +9 -6
  5. data/config.yml +236 -38
  6. data/docs/build_system.md +19 -2
  7. data/docs/cruby_compilation.md +27 -0
  8. data/docs/parser_translation.md +34 -0
  9. data/docs/parsing_rules.md +19 -0
  10. data/docs/releasing.md +84 -16
  11. data/docs/ruby_api.md +1 -1
  12. data/docs/ruby_parser_translation.md +19 -0
  13. data/docs/serialization.md +19 -5
  14. data/ext/prism/api_node.c +1989 -1525
  15. data/ext/prism/extension.c +130 -30
  16. data/ext/prism/extension.h +2 -2
  17. data/include/prism/ast.h +1700 -505
  18. data/include/prism/defines.h +8 -0
  19. data/include/prism/diagnostic.h +49 -7
  20. data/include/prism/encoding.h +17 -0
  21. data/include/prism/options.h +40 -14
  22. data/include/prism/parser.h +34 -18
  23. data/include/prism/util/pm_buffer.h +9 -0
  24. data/include/prism/util/pm_constant_pool.h +18 -0
  25. data/include/prism/util/pm_newline_list.h +4 -14
  26. data/include/prism/util/pm_strpbrk.h +4 -1
  27. data/include/prism/version.h +2 -2
  28. data/include/prism.h +19 -2
  29. data/lib/prism/debug.rb +11 -5
  30. data/lib/prism/desugar_compiler.rb +225 -80
  31. data/lib/prism/dot_visitor.rb +36 -14
  32. data/lib/prism/dsl.rb +302 -299
  33. data/lib/prism/ffi.rb +107 -76
  34. data/lib/prism/lex_compat.rb +17 -1
  35. data/lib/prism/node.rb +4580 -2607
  36. data/lib/prism/node_ext.rb +27 -4
  37. data/lib/prism/parse_result.rb +75 -29
  38. data/lib/prism/serialize.rb +633 -305
  39. data/lib/prism/translation/parser/compiler.rb +1838 -0
  40. data/lib/prism/translation/parser/lexer.rb +335 -0
  41. data/lib/prism/translation/parser/rubocop.rb +45 -0
  42. data/lib/prism/translation/parser.rb +190 -0
  43. data/lib/prism/translation/parser33.rb +12 -0
  44. data/lib/prism/translation/parser34.rb +12 -0
  45. data/lib/prism/translation/ripper.rb +696 -0
  46. data/lib/prism/translation/ruby_parser.rb +1521 -0
  47. data/lib/prism/translation.rb +11 -0
  48. data/lib/prism.rb +1 -1
  49. data/prism.gemspec +18 -7
  50. data/rbi/prism.rbi +150 -88
  51. data/rbi/prism_static.rbi +15 -3
  52. data/sig/prism.rbs +996 -961
  53. data/sig/prism_static.rbs +123 -46
  54. data/src/diagnostic.c +264 -219
  55. data/src/encoding.c +21 -26
  56. data/src/node.c +2 -6
  57. data/src/options.c +29 -5
  58. data/src/prettyprint.c +176 -44
  59. data/src/prism.c +1499 -564
  60. data/src/serialize.c +35 -21
  61. data/src/token_type.c +353 -4
  62. data/src/util/pm_buffer.c +11 -0
  63. data/src/util/pm_constant_pool.c +37 -11
  64. data/src/util/pm_newline_list.c +6 -15
  65. data/src/util/pm_string.c +0 -7
  66. data/src/util/pm_strpbrk.c +122 -14
  67. metadata +16 -5
  68. data/docs/building.md +0 -29
  69. data/lib/prism/ripper_compat.rb +0 -207
@@ -1,5 +1,9 @@
1
1
  #include "prism/extension.h"
2
2
 
3
+ #ifdef _WIN32
4
+ #include <ruby/win32.h>
5
+ #endif
6
+
3
7
  // NOTE: this file should contain only bindings. All non-trivial logic should be
4
8
  // in libprism so it can be shared its the various callers.
5
9
 
@@ -21,7 +25,7 @@ ID rb_option_id_filepath;
21
25
  ID rb_option_id_encoding;
22
26
  ID rb_option_id_line;
23
27
  ID rb_option_id_frozen_string_literal;
24
- ID rb_option_id_verbose;
28
+ ID rb_option_id_version;
25
29
  ID rb_option_id_scopes;
26
30
 
27
31
  /******************************************************************************/
@@ -129,8 +133,14 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
129
133
  if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
130
134
  } else if (key_id == rb_option_id_frozen_string_literal) {
131
135
  if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
132
- } else if (key_id == rb_option_id_verbose) {
133
- pm_options_suppress_warnings_set(options, value != Qtrue);
136
+ } else if (key_id == rb_option_id_version) {
137
+ if (!NIL_P(value)) {
138
+ const char *version = check_string(value);
139
+
140
+ if (!pm_options_version_set(options, version, RSTRING_LEN(value))) {
141
+ rb_raise(rb_eArgError, "invalid version: %"PRIsVALUE, value);
142
+ }
143
+ }
134
144
  } else if (key_id == rb_option_id_scopes) {
135
145
  if (!NIL_P(value)) build_options_scopes(options, value);
136
146
  } else {
@@ -206,20 +216,29 @@ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options)
206
216
  /**
207
217
  * Read options for methods that look like (filepath, **options).
208
218
  */
209
- static bool
219
+ static void
210
220
  file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
211
221
  VALUE filepath;
212
222
  VALUE keywords;
213
223
  rb_scan_args(argc, argv, "1:", &filepath, &keywords);
214
224
 
225
+ Check_Type(filepath, T_STRING);
226
+
215
227
  extract_options(options, filepath, keywords);
216
228
 
217
- if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) {
229
+ const char * string_source = (const char *) pm_string_source(&options->filepath);
230
+
231
+ if (!pm_string_mapped_init(input, string_source)) {
218
232
  pm_options_free(options);
219
- return false;
220
- }
221
233
 
222
- return true;
234
+ #ifdef _WIN32
235
+ int e = rb_w32_map_errno(GetLastError());
236
+ #else
237
+ int e = errno;
238
+ #endif
239
+
240
+ rb_syserr_fail(e, string_source);
241
+ }
223
242
  }
224
243
 
225
244
  /******************************************************************************/
@@ -293,7 +312,8 @@ static VALUE
293
312
  dump_file(int argc, VALUE *argv, VALUE self) {
294
313
  pm_string_t input;
295
314
  pm_options_t options = { 0 };
296
- if (!file_options(argc, argv, &input, &options)) return Qnil;
315
+
316
+ file_options(argc, argv, &input, &options);
297
317
 
298
318
  VALUE value = dump_input(&input, &options);
299
319
  pm_string_free(&input);
@@ -393,12 +413,25 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
393
413
  LONG2FIX(error->location.end - error->location.start)
394
414
  };
395
415
 
416
+ VALUE level = Qnil;
417
+ switch (error->level) {
418
+ case PM_ERROR_LEVEL_FATAL:
419
+ level = ID2SYM(rb_intern("fatal"));
420
+ break;
421
+ case PM_ERROR_LEVEL_ARGUMENT:
422
+ level = ID2SYM(rb_intern("argument"));
423
+ break;
424
+ default:
425
+ rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
426
+ }
427
+
396
428
  VALUE error_argv[] = {
397
429
  rb_enc_str_new_cstr(error->message, encoding),
398
- rb_class_new_instance(3, location_argv, rb_cPrismLocation)
430
+ rb_class_new_instance(3, location_argv, rb_cPrismLocation),
431
+ level
399
432
  };
400
433
 
401
- rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cPrismParseError));
434
+ rb_ary_push(errors, rb_class_new_instance(3, error_argv, rb_cPrismParseError));
402
435
  }
403
436
 
404
437
  return errors;
@@ -419,12 +452,25 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
419
452
  LONG2FIX(warning->location.end - warning->location.start)
420
453
  };
421
454
 
455
+ VALUE level = Qnil;
456
+ switch (warning->level) {
457
+ case PM_WARNING_LEVEL_DEFAULT:
458
+ level = ID2SYM(rb_intern("default"));
459
+ break;
460
+ case PM_WARNING_LEVEL_VERBOSE:
461
+ level = ID2SYM(rb_intern("verbose"));
462
+ break;
463
+ default:
464
+ rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning->level);
465
+ }
466
+
422
467
  VALUE warning_argv[] = {
423
468
  rb_enc_str_new_cstr(warning->message, encoding),
424
- rb_class_new_instance(3, location_argv, rb_cPrismLocation)
469
+ rb_class_new_instance(3, location_argv, rb_cPrismLocation),
470
+ level
425
471
  };
426
472
 
427
- rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cPrismParseWarning));
473
+ rb_ary_push(warnings, rb_class_new_instance(3, warning_argv, rb_cPrismParseWarning));
428
474
  }
429
475
 
430
476
  return warnings;
@@ -496,8 +542,9 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
496
542
  pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
497
543
  pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
498
544
 
545
+ VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
499
546
  VALUE offsets = rb_ary_new();
500
- VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
547
+ VALUE source_argv[] = { source_string, LONG2NUM(parser.start_line), offsets };
501
548
  VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
502
549
 
503
550
  parse_lex_data_t parse_lex_data = {
@@ -515,17 +562,21 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
515
562
  parser.lex_callback = &lex_callback;
516
563
  pm_node_t *node = pm_parse(&parser);
517
564
 
518
- // Here we need to update the source range to have the correct newline
519
- // offsets. We do it here because we've already created the object and given
520
- // it over to all of the tokens.
565
+ // Here we need to update the Source object to have the correct
566
+ // encoding for the source string and the correct newline offsets.
567
+ // We do it here because we've already created the Source object and given
568
+ // it over to all of the tokens, and both of these are only set after pm_parse().
569
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
570
+ rb_enc_associate(source_string, encoding);
571
+
521
572
  for (size_t index = 0; index < parser.newline_list.size; index++) {
522
- rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
573
+ rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index]));
523
574
  }
524
575
 
525
576
  VALUE value;
526
577
  if (return_nodes) {
527
578
  value = rb_ary_new_capa(2);
528
- rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding));
579
+ rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source));
529
580
  rb_ary_push(value, parse_lex_data.tokens);
530
581
  } else {
531
582
  value = parse_lex_data.tokens;
@@ -577,7 +628,8 @@ static VALUE
577
628
  lex_file(int argc, VALUE *argv, VALUE self) {
578
629
  pm_string_t input;
579
630
  pm_options_t options = { 0 };
580
- if (!file_options(argc, argv, &input, &options)) return Qnil;
631
+
632
+ file_options(argc, argv, &input, &options);
581
633
 
582
634
  VALUE value = parse_lex_input(&input, &options, false);
583
635
  pm_string_free(&input);
@@ -603,7 +655,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
603
655
 
604
656
  VALUE source = pm_source_new(&parser, encoding);
605
657
  VALUE result_argv[] = {
606
- pm_ast_new(&parser, node, encoding),
658
+ pm_ast_new(&parser, node, encoding, source),
607
659
  parser_comments(&parser, source),
608
660
  parser_magic_comments(&parser, source),
609
661
  parser_data_loc(&parser, source),
@@ -635,10 +687,13 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
635
687
  * integer or nil. Note that this is 1-indexed.
636
688
  * * `frozen_string_literal` - whether or not the frozen string literal pragma
637
689
  * has been set. This should be a boolean or nil.
638
- * * `verbose` - the current level of verbosity. This controls whether or not
639
- * the parser emits warnings. This should be a boolean or nil.
690
+ * * `version` - the version of prism that should be used to parse Ruby code. By
691
+ * default prism assumes you want to parse with the latest vesion of
692
+ * prism (which you can trigger with `nil` or `"latest"`). If you want to
693
+ * parse exactly as CRuby 3.3.0 would, then you can pass `"3.3.0"`.
640
694
  * * `scopes` - the locals that are in scope surrounding the code that is being
641
- * parsed. This should be an array of arrays of symbols or nil.
695
+ * parsed. This should be an array of arrays of symbols or nil. Scopes are
696
+ * ordered from the outermost scope to the innermost one.
642
697
  */
643
698
  static VALUE
644
699
  parse(int argc, VALUE *argv, VALUE self) {
@@ -675,7 +730,8 @@ static VALUE
675
730
  parse_file(int argc, VALUE *argv, VALUE self) {
676
731
  pm_string_t input;
677
732
  pm_options_t options = { 0 };
678
- if (!file_options(argc, argv, &input, &options)) return Qnil;
733
+
734
+ file_options(argc, argv, &input, &options);
679
735
 
680
736
  VALUE value = parse_input(&input, &options);
681
737
  pm_string_free(&input);
@@ -735,7 +791,8 @@ static VALUE
735
791
  parse_file_comments(int argc, VALUE *argv, VALUE self) {
736
792
  pm_string_t input;
737
793
  pm_options_t options = { 0 };
738
- if (!file_options(argc, argv, &input, &options)) return Qnil;
794
+
795
+ file_options(argc, argv, &input, &options);
739
796
 
740
797
  VALUE value = parse_input_comments(&input, &options);
741
798
  pm_string_free(&input);
@@ -789,7 +846,8 @@ static VALUE
789
846
  parse_lex_file(int argc, VALUE *argv, VALUE self) {
790
847
  pm_string_t input;
791
848
  pm_options_t options = { 0 };
792
- if (!file_options(argc, argv, &input, &options)) return Qnil;
849
+
850
+ file_options(argc, argv, &input, &options);
793
851
 
794
852
  VALUE value = parse_lex_input(&input, &options, true);
795
853
  pm_string_free(&input);
@@ -846,7 +904,8 @@ static VALUE
846
904
  parse_file_success_p(int argc, VALUE *argv, VALUE self) {
847
905
  pm_string_t input;
848
906
  pm_options_t options = { 0 };
849
- if (!file_options(argc, argv, &input, &options)) return Qnil;
907
+
908
+ file_options(argc, argv, &input, &options);
850
909
 
851
910
  VALUE result = parse_input_success_p(&input, &options);
852
911
  pm_string_free(&input);
@@ -924,7 +983,17 @@ profile_file(VALUE self, VALUE filepath) {
924
983
  pm_string_t input;
925
984
 
926
985
  const char *checked = check_string(filepath);
927
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
986
+ Check_Type(filepath, T_STRING);
987
+
988
+ if (!pm_string_mapped_init(&input, checked)) {
989
+ #ifdef _WIN32
990
+ int e = rb_w32_map_errno(GetLastError());
991
+ #else
992
+ int e = errno;
993
+ #endif
994
+
995
+ rb_syserr_fail(e, checked);
996
+ }
928
997
 
929
998
  pm_options_t options = { 0 };
930
999
  pm_options_filepath_set(&options, checked);
@@ -971,6 +1040,36 @@ inspect_node(VALUE self, VALUE source) {
971
1040
  return string;
972
1041
  }
973
1042
 
1043
+ /**
1044
+ * call-seq:
1045
+ * Debug::format_errors(source, colorize) -> String
1046
+ *
1047
+ * Format the errors that are found when parsing the given source string.
1048
+ */
1049
+ static VALUE
1050
+ format_errors(VALUE self, VALUE source, VALUE colorize) {
1051
+ pm_string_t input;
1052
+ input_load_string(&input, source);
1053
+
1054
+ pm_parser_t parser;
1055
+ pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL);
1056
+
1057
+ pm_node_t *node = pm_parse(&parser);
1058
+ pm_buffer_t buffer = { 0 };
1059
+
1060
+ pm_parser_errors_format(&parser, &buffer, RTEST(colorize));
1061
+
1062
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
1063
+ VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
1064
+
1065
+ pm_buffer_free(&buffer);
1066
+ pm_node_destroy(&parser, node);
1067
+ pm_parser_free(&parser);
1068
+ pm_string_free(&input);
1069
+
1070
+ return result;
1071
+ }
1072
+
974
1073
  /******************************************************************************/
975
1074
  /* Initialization of the extension */
976
1075
  /******************************************************************************/
@@ -1012,7 +1111,7 @@ Init_prism(void) {
1012
1111
  rb_option_id_encoding = rb_intern_const("encoding");
1013
1112
  rb_option_id_line = rb_intern_const("line");
1014
1113
  rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
1015
- rb_option_id_verbose = rb_intern_const("verbose");
1114
+ rb_option_id_version = rb_intern_const("version");
1016
1115
  rb_option_id_scopes = rb_intern_const("scopes");
1017
1116
 
1018
1117
  /**
@@ -1048,6 +1147,7 @@ Init_prism(void) {
1048
1147
  rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
1049
1148
  rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
1050
1149
  rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
1150
+ rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 2);
1051
1151
 
1052
1152
  // Next, initialize the other APIs.
1053
1153
  Init_prism_api_node();
@@ -1,7 +1,7 @@
1
1
  #ifndef PRISM_EXT_NODE_H
2
2
  #define PRISM_EXT_NODE_H
3
3
 
4
- #define EXPECTED_PRISM_VERSION "0.19.0"
4
+ #define EXPECTED_PRISM_VERSION "0.24.0"
5
5
 
6
6
  #include <ruby.h>
7
7
  #include <ruby/encoding.h>
@@ -9,7 +9,7 @@
9
9
 
10
10
  VALUE pm_source_new(pm_parser_t *parser, rb_encoding *encoding);
11
11
  VALUE pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source);
12
- VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding);
12
+ VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding, VALUE source);
13
13
 
14
14
  void Init_prism_api_node(void);
15
15
  void Init_prism_pack(void);