prism 0.18.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +31 -1
  3. data/README.md +2 -1
  4. data/config.yml +188 -55
  5. data/docs/building.md +9 -2
  6. data/docs/configuration.md +10 -9
  7. data/docs/encoding.md +24 -56
  8. data/docs/local_variable_depth.md +229 -0
  9. data/docs/ruby_api.md +2 -0
  10. data/docs/serialization.md +18 -13
  11. data/ext/prism/api_node.c +337 -195
  12. data/ext/prism/extconf.rb +13 -7
  13. data/ext/prism/extension.c +96 -32
  14. data/ext/prism/extension.h +1 -1
  15. data/include/prism/ast.h +340 -137
  16. data/include/prism/defines.h +17 -0
  17. data/include/prism/diagnostic.h +11 -5
  18. data/include/prism/encoding.h +248 -0
  19. data/include/prism/options.h +2 -2
  20. data/include/prism/parser.h +62 -42
  21. data/include/prism/regexp.h +2 -2
  22. data/include/prism/util/pm_buffer.h +9 -1
  23. data/include/prism/util/pm_memchr.h +2 -2
  24. data/include/prism/util/pm_strpbrk.h +3 -3
  25. data/include/prism/version.h +2 -2
  26. data/include/prism.h +13 -15
  27. data/lib/prism/compiler.rb +12 -0
  28. data/lib/prism/debug.rb +9 -4
  29. data/lib/prism/desugar_compiler.rb +3 -3
  30. data/lib/prism/dispatcher.rb +56 -0
  31. data/lib/prism/dot_visitor.rb +476 -198
  32. data/lib/prism/dsl.rb +66 -46
  33. data/lib/prism/ffi.rb +16 -3
  34. data/lib/prism/lex_compat.rb +19 -9
  35. data/lib/prism/mutation_compiler.rb +20 -0
  36. data/lib/prism/node.rb +1173 -450
  37. data/lib/prism/node_ext.rb +41 -16
  38. data/lib/prism/parse_result.rb +12 -15
  39. data/lib/prism/ripper_compat.rb +49 -34
  40. data/lib/prism/serialize.rb +242 -212
  41. data/lib/prism/visitor.rb +12 -0
  42. data/lib/prism.rb +20 -4
  43. data/prism.gemspec +4 -10
  44. data/rbi/prism.rbi +605 -230
  45. data/rbi/prism_static.rbi +3 -0
  46. data/sig/prism.rbs +379 -124
  47. data/sig/prism_static.rbs +1 -0
  48. data/src/diagnostic.c +228 -222
  49. data/src/encoding.c +5137 -0
  50. data/src/node.c +66 -0
  51. data/src/options.c +21 -2
  52. data/src/prettyprint.c +806 -406
  53. data/src/prism.c +1092 -700
  54. data/src/regexp.c +3 -3
  55. data/src/serialize.c +227 -157
  56. data/src/util/pm_buffer.c +10 -1
  57. data/src/util/pm_memchr.c +1 -1
  58. data/src/util/pm_strpbrk.c +4 -4
  59. metadata +5 -11
  60. data/include/prism/enc/pm_encoding.h +0 -227
  61. data/src/enc/pm_big5.c +0 -116
  62. data/src/enc/pm_cp51932.c +0 -57
  63. data/src/enc/pm_euc_jp.c +0 -69
  64. data/src/enc/pm_gbk.c +0 -65
  65. data/src/enc/pm_shift_jis.c +0 -57
  66. data/src/enc/pm_tables.c +0 -2073
  67. data/src/enc/pm_unicode.c +0 -2369
  68. data/src/enc/pm_windows_31j.c +0 -57
data/ext/prism/extconf.rb CHANGED
@@ -22,18 +22,22 @@ if ARGV.delete("--help")
22
22
  exit!(0)
23
23
  end
24
24
 
25
- # Runs `make` in the root directory of the project. Note that this is the
26
- # `Makefile` for the overall project, not the `Makefile` that is being generated
27
- # by this script.`
28
- def make(target)
25
+ # If this gem is being build from a git source, then we need to run
26
+ # templating if it hasn't been run yet. In normal packaging, we would have
27
+ # shipped the templated files with the gem, so this wouldn't be necessary.
28
+ def generate_templates
29
29
  Dir.chdir(File.expand_path("../..", __dir__)) do
30
- # If this gem is being build from a git source, then we need to run
31
- # templating if it hasn't been run yet. In normal packaging, we would have
32
- # shipped the templated files with the gem, so this wouldn't be necessary.
33
30
  if !File.exist?("include/prism/ast.h") && Dir.exist?(".git")
34
31
  system("templates/template.rb", exception: true)
35
32
  end
33
+ end
34
+ end
36
35
 
36
+ # Runs `make` in the root directory of the project. Note that this is the
37
+ # `Makefile` for the overall project, not the `Makefile` that is being generated
38
+ # by this script.`
39
+ def make(target)
40
+ Dir.chdir(File.expand_path("../..", __dir__)) do
37
41
  system("make", target, exception: true)
38
42
  end
39
43
  end
@@ -45,6 +49,7 @@ require "rbconfig"
45
49
  # `require "mkmf"` as that prepends the LLVM toolchain to PATH on TruffleRuby,
46
50
  # but we want to use the native toolchain here since libprism is run natively.
47
51
  if RUBY_ENGINE != "ruby"
52
+ generate_templates
48
53
  make("build/libprism.#{RbConfig::CONFIG["SOEXT"]}")
49
54
  File.write("Makefile", "all install clean:\n\t@#{RbConfig::CONFIG["NULLCMD"]}\n")
50
55
  return
@@ -53,6 +58,7 @@ end
53
58
  require "mkmf"
54
59
 
55
60
  # First, ensure that we can find the header for the prism library.
61
+ generate_templates # Templates should be generated before find_header.
56
62
  unless find_header("prism.h", File.expand_path("../../include", __dir__))
57
63
  raise "prism.h is required"
58
64
  end
@@ -12,7 +12,6 @@ VALUE rb_cPrismLocation;
12
12
  VALUE rb_cPrismComment;
13
13
  VALUE rb_cPrismInlineComment;
14
14
  VALUE rb_cPrismEmbDocComment;
15
- VALUE rb_cPrismDATAComment;
16
15
  VALUE rb_cPrismMagicComment;
17
16
  VALUE rb_cPrismParseError;
18
17
  VALUE rb_cPrismParseWarning;
@@ -127,7 +126,7 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
127
126
  } else if (key_id == rb_option_id_encoding) {
128
127
  if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
129
128
  } else if (key_id == rb_option_id_line) {
130
- if (!NIL_P(value)) pm_options_line_set(options, NUM2UINT(value));
129
+ if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
131
130
  } else if (key_id == rb_option_id_frozen_string_literal) {
132
131
  if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
133
132
  } else if (key_id == rb_option_id_verbose) {
@@ -167,6 +166,7 @@ build_options(VALUE argument) {
167
166
  */
168
167
  static void
169
168
  extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
169
+ options->line = 1; // default
170
170
  if (!NIL_P(keywords)) {
171
171
  struct build_options_data data = { .options = options, .keywords = keywords };
172
172
  struct build_options_data *argument = &data;
@@ -316,26 +316,11 @@ parser_comments(pm_parser_t *parser, VALUE source) {
316
316
  for (pm_comment_t *comment = (pm_comment_t *) parser->comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
317
317
  VALUE location_argv[] = {
318
318
  source,
319
- LONG2FIX(comment->start - parser->start),
320
- LONG2FIX(comment->end - comment->start)
319
+ LONG2FIX(comment->location.start - parser->start),
320
+ LONG2FIX(comment->location.end - comment->location.start)
321
321
  };
322
322
 
323
- VALUE type;
324
- switch (comment->type) {
325
- case PM_COMMENT_INLINE:
326
- type = rb_cPrismInlineComment;
327
- break;
328
- case PM_COMMENT_EMBDOC:
329
- type = rb_cPrismEmbDocComment;
330
- break;
331
- case PM_COMMENT___END__:
332
- type = rb_cPrismDATAComment;
333
- break;
334
- default:
335
- type = rb_cPrismInlineComment;
336
- break;
337
- }
338
-
323
+ VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
339
324
  VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
340
325
  rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
341
326
  }
@@ -374,6 +359,25 @@ parser_magic_comments(pm_parser_t *parser, VALUE source) {
374
359
  return magic_comments;
375
360
  }
376
361
 
362
+ /**
363
+ * Extract out the data location from the parser into a Location instance if one
364
+ * exists.
365
+ */
366
+ static VALUE
367
+ parser_data_loc(const pm_parser_t *parser, VALUE source) {
368
+ if (parser->data_loc.end == NULL) {
369
+ return Qnil;
370
+ } else {
371
+ VALUE argv[] = {
372
+ source,
373
+ LONG2FIX(parser->data_loc.start - parser->start),
374
+ LONG2FIX(parser->data_loc.end - parser->data_loc.start)
375
+ };
376
+
377
+ return rb_class_new_instance(3, argv, rb_cPrismLocation);
378
+ }
379
+ }
380
+
377
381
  /**
378
382
  * Extract the errors out of the parser into an array.
379
383
  */
@@ -385,8 +389,8 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
385
389
  for (error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
386
390
  VALUE location_argv[] = {
387
391
  source,
388
- LONG2FIX(error->start - parser->start),
389
- LONG2FIX(error->end - error->start)
392
+ LONG2FIX(error->location.start - parser->start),
393
+ LONG2FIX(error->location.end - error->location.start)
390
394
  };
391
395
 
392
396
  VALUE error_argv[] = {
@@ -411,8 +415,8 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
411
415
  for (warning = (pm_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (pm_diagnostic_t *) warning->node.next) {
412
416
  VALUE location_argv[] = {
413
417
  source,
414
- LONG2FIX(warning->start - parser->start),
415
- LONG2FIX(warning->end - warning->start)
418
+ LONG2FIX(warning->location.start - parser->start),
419
+ LONG2FIX(warning->location.end - warning->location.start)
416
420
  };
417
421
 
418
422
  VALUE warning_argv[] = {
@@ -465,7 +469,7 @@ parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
465
469
  static void
466
470
  parse_lex_encoding_changed_callback(pm_parser_t *parser) {
467
471
  parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
468
- parse_lex_data->encoding = rb_enc_find(parser->encoding.name);
472
+ parse_lex_data->encoding = rb_enc_find(parser->encoding->name);
469
473
 
470
474
  // Since the encoding changed, we need to go back and change the encoding of
471
475
  // the tokens that were already lexed. This is only going to end up being
@@ -531,6 +535,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
531
535
  value,
532
536
  parser_comments(&parser, source),
533
537
  parser_magic_comments(&parser, source),
538
+ parser_data_loc(&parser, source),
534
539
  parser_errors(&parser, parse_lex_data.encoding, source),
535
540
  parser_warnings(&parser, parse_lex_data.encoding, source),
536
541
  source
@@ -538,7 +543,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
538
543
 
539
544
  pm_node_destroy(&parser, node);
540
545
  pm_parser_free(&parser);
541
- return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
546
+ return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
542
547
  }
543
548
 
544
549
  /**
@@ -594,19 +599,20 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
594
599
  pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
595
600
 
596
601
  pm_node_t *node = pm_parse(&parser);
597
- rb_encoding *encoding = rb_enc_find(parser.encoding.name);
602
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
598
603
 
599
604
  VALUE source = pm_source_new(&parser, encoding);
600
605
  VALUE result_argv[] = {
601
606
  pm_ast_new(&parser, node, encoding),
602
607
  parser_comments(&parser, source),
603
608
  parser_magic_comments(&parser, source),
609
+ parser_data_loc(&parser, source),
604
610
  parser_errors(&parser, encoding, source),
605
611
  parser_warnings(&parser, encoding, source),
606
612
  source
607
613
  };
608
614
 
609
- VALUE result = rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
615
+ VALUE result = rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
610
616
 
611
617
  pm_node_destroy(&parser, node);
612
618
  pm_parser_free(&parser);
@@ -687,7 +693,7 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) {
687
693
  pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
688
694
 
689
695
  pm_node_t *node = pm_parse(&parser);
690
- rb_encoding *encoding = rb_enc_find(parser.encoding.name);
696
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
691
697
 
692
698
  VALUE source = pm_source_new(&parser, encoding);
693
699
  VALUE comments = parser_comments(&parser, source);
@@ -792,6 +798,63 @@ parse_lex_file(int argc, VALUE *argv, VALUE self) {
792
798
  return value;
793
799
  }
794
800
 
801
+ /**
802
+ * Parse the given input and return true if it parses without errors.
803
+ */
804
+ static VALUE
805
+ parse_input_success_p(pm_string_t *input, const pm_options_t *options) {
806
+ pm_parser_t parser;
807
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
808
+
809
+ pm_node_t *node = pm_parse(&parser);
810
+ pm_node_destroy(&parser, node);
811
+
812
+ VALUE result = parser.error_list.size == 0 ? Qtrue : Qfalse;
813
+ pm_parser_free(&parser);
814
+
815
+ return result;
816
+ }
817
+
818
+ /**
819
+ * call-seq:
820
+ * Prism::parse_success?(source, **options) -> Array
821
+ *
822
+ * Parse the given string and return true if it parses without errors. For
823
+ * supported options, see Prism::parse.
824
+ */
825
+ static VALUE
826
+ parse_success_p(int argc, VALUE *argv, VALUE self) {
827
+ pm_string_t input;
828
+ pm_options_t options = { 0 };
829
+ string_options(argc, argv, &input, &options);
830
+
831
+ VALUE result = parse_input_success_p(&input, &options);
832
+ pm_string_free(&input);
833
+ pm_options_free(&options);
834
+
835
+ return result;
836
+ }
837
+
838
+ /**
839
+ * call-seq:
840
+ * Prism::parse_file_success?(filepath, **options) -> Array
841
+ *
842
+ * Parse the given file and return true if it parses without errors. For
843
+ * supported options, see Prism::parse.
844
+ */
845
+ static VALUE
846
+ parse_file_success_p(int argc, VALUE *argv, VALUE self) {
847
+ pm_string_t input;
848
+ pm_options_t options = { 0 };
849
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
850
+
851
+ VALUE result = parse_input_success_p(&input, &options);
852
+ pm_string_free(&input);
853
+ pm_options_free(&options);
854
+
855
+ return result;
856
+ }
857
+
795
858
  /******************************************************************************/
796
859
  /* Utility functions exposed to make testing easier */
797
860
  /******************************************************************************/
@@ -808,7 +871,7 @@ static VALUE
808
871
  named_captures(VALUE self, VALUE source) {
809
872
  pm_string_list_t string_list = { 0 };
810
873
 
811
- if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &pm_encoding_utf_8)) {
874
+ if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, PM_ENCODING_UTF_8_ENTRY)) {
812
875
  pm_string_list_free(&string_list);
813
876
  return Qnil;
814
877
  }
@@ -898,7 +961,7 @@ inspect_node(VALUE self, VALUE source) {
898
961
 
899
962
  pm_prettyprint(&buffer, &parser, node);
900
963
 
901
- rb_encoding *encoding = rb_enc_find(parser.encoding.name);
964
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
902
965
  VALUE string = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
903
966
 
904
967
  pm_buffer_free(&buffer);
@@ -938,7 +1001,6 @@ Init_prism(void) {
938
1001
  rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
939
1002
  rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment);
940
1003
  rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment);
941
- rb_cPrismDATAComment = rb_define_class_under(rb_cPrism, "DATAComment", rb_cPrismComment);
942
1004
  rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
943
1005
  rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
944
1006
  rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
@@ -976,6 +1038,8 @@ Init_prism(void) {
976
1038
  rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
977
1039
  rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
978
1040
  rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1);
1041
+ rb_define_singleton_method(rb_cPrism, "parse_success?", parse_success_p, -1);
1042
+ rb_define_singleton_method(rb_cPrism, "parse_file_success?", parse_file_success_p, -1);
979
1043
 
980
1044
  // Next, the functions that will be called by the parser to perform various
981
1045
  // internal tasks. We expose these to make them easier to test.
@@ -1,7 +1,7 @@
1
1
  #ifndef PRISM_EXT_NODE_H
2
2
  #define PRISM_EXT_NODE_H
3
3
 
4
- #define EXPECTED_PRISM_VERSION "0.18.0"
4
+ #define EXPECTED_PRISM_VERSION "0.19.0"
5
5
 
6
6
  #include <ruby.h>
7
7
  #include <ruby/encoding.h>