prism 0.23.0 → 0.25.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +65 -1
  4. data/Makefile +5 -2
  5. data/README.md +45 -6
  6. data/config.yml +499 -4
  7. data/docs/build_system.md +31 -0
  8. data/docs/configuration.md +2 -0
  9. data/docs/cruby_compilation.md +1 -1
  10. data/docs/parser_translation.md +14 -9
  11. data/docs/releasing.md +3 -3
  12. data/docs/ripper_translation.md +50 -0
  13. data/docs/ruby_api.md +1 -0
  14. data/docs/serialization.md +26 -5
  15. data/ext/prism/api_node.c +2342 -1801
  16. data/ext/prism/api_pack.c +9 -0
  17. data/ext/prism/extconf.rb +27 -11
  18. data/ext/prism/extension.c +313 -66
  19. data/ext/prism/extension.h +5 -4
  20. data/include/prism/ast.h +213 -64
  21. data/include/prism/defines.h +106 -2
  22. data/include/prism/diagnostic.h +134 -71
  23. data/include/prism/encoding.h +22 -4
  24. data/include/prism/node.h +93 -0
  25. data/include/prism/options.h +82 -7
  26. data/include/prism/pack.h +11 -0
  27. data/include/prism/parser.h +198 -53
  28. data/include/prism/prettyprint.h +8 -0
  29. data/include/prism/static_literals.h +118 -0
  30. data/include/prism/util/pm_buffer.h +65 -2
  31. data/include/prism/util/pm_constant_pool.h +18 -1
  32. data/include/prism/util/pm_integer.h +119 -0
  33. data/include/prism/util/pm_list.h +1 -1
  34. data/include/prism/util/pm_newline_list.h +12 -3
  35. data/include/prism/util/pm_string.h +26 -2
  36. data/include/prism/version.h +2 -2
  37. data/include/prism.h +59 -1
  38. data/lib/prism/compiler.rb +8 -1
  39. data/lib/prism/debug.rb +46 -3
  40. data/lib/prism/desugar_compiler.rb +225 -80
  41. data/lib/prism/dispatcher.rb +29 -0
  42. data/lib/prism/dot_visitor.rb +87 -16
  43. data/lib/prism/dsl.rb +315 -300
  44. data/lib/prism/ffi.rb +165 -84
  45. data/lib/prism/lex_compat.rb +17 -15
  46. data/lib/prism/mutation_compiler.rb +11 -0
  47. data/lib/prism/node.rb +4857 -3750
  48. data/lib/prism/node_ext.rb +77 -29
  49. data/lib/prism/pack.rb +4 -0
  50. data/lib/prism/parse_result/comments.rb +34 -17
  51. data/lib/prism/parse_result/newlines.rb +3 -1
  52. data/lib/prism/parse_result.rb +88 -34
  53. data/lib/prism/pattern.rb +16 -4
  54. data/lib/prism/polyfill/string.rb +12 -0
  55. data/lib/prism/serialize.rb +960 -327
  56. data/lib/prism/translation/parser/compiler.rb +152 -50
  57. data/lib/prism/translation/parser/lexer.rb +103 -22
  58. data/lib/prism/translation/parser/rubocop.rb +47 -11
  59. data/lib/prism/translation/parser.rb +134 -10
  60. data/lib/prism/translation/parser33.rb +12 -0
  61. data/lib/prism/translation/parser34.rb +12 -0
  62. data/lib/prism/translation/ripper/sexp.rb +125 -0
  63. data/lib/prism/translation/ripper/shim.rb +5 -0
  64. data/lib/prism/translation/ripper.rb +3248 -379
  65. data/lib/prism/translation/ruby_parser.rb +35 -18
  66. data/lib/prism/translation.rb +3 -1
  67. data/lib/prism/visitor.rb +10 -0
  68. data/lib/prism.rb +8 -2
  69. data/prism.gemspec +35 -4
  70. data/rbi/prism/compiler.rbi +14 -0
  71. data/rbi/prism/desugar_compiler.rbi +5 -0
  72. data/rbi/prism/mutation_compiler.rbi +5 -0
  73. data/rbi/prism/node.rbi +8221 -0
  74. data/rbi/prism/node_ext.rbi +102 -0
  75. data/rbi/prism/parse_result.rbi +304 -0
  76. data/rbi/prism/translation/parser/compiler.rbi +13 -0
  77. data/rbi/prism/translation/ripper/ripper_compiler.rbi +5 -0
  78. data/rbi/prism/translation/ripper.rbi +25 -0
  79. data/rbi/prism/translation/ruby_parser.rbi +11 -0
  80. data/rbi/prism/visitor.rbi +470 -0
  81. data/rbi/prism.rbi +39 -7749
  82. data/sig/prism/compiler.rbs +9 -0
  83. data/sig/prism/dispatcher.rbs +16 -0
  84. data/sig/prism/dot_visitor.rbs +6 -0
  85. data/sig/prism/dsl.rbs +462 -0
  86. data/sig/prism/mutation_compiler.rbs +158 -0
  87. data/sig/prism/node.rbs +3529 -0
  88. data/sig/prism/node_ext.rbs +78 -0
  89. data/sig/prism/pack.rbs +43 -0
  90. data/sig/prism/parse_result.rbs +127 -0
  91. data/sig/prism/pattern.rbs +13 -0
  92. data/sig/prism/serialize.rbs +7 -0
  93. data/sig/prism/visitor.rbs +168 -0
  94. data/sig/prism.rbs +188 -4767
  95. data/src/diagnostic.c +575 -230
  96. data/src/encoding.c +211 -108
  97. data/src/node.c +7526 -447
  98. data/src/options.c +36 -12
  99. data/src/pack.c +33 -17
  100. data/src/prettyprint.c +1297 -1388
  101. data/src/prism.c +3665 -1121
  102. data/src/regexp.c +17 -2
  103. data/src/serialize.c +47 -28
  104. data/src/static_literals.c +552 -0
  105. data/src/token_type.c +1 -0
  106. data/src/util/pm_buffer.c +147 -20
  107. data/src/util/pm_char.c +4 -4
  108. data/src/util/pm_constant_pool.c +35 -11
  109. data/src/util/pm_integer.c +629 -0
  110. data/src/util/pm_list.c +1 -1
  111. data/src/util/pm_newline_list.c +20 -8
  112. data/src/util/pm_string.c +134 -5
  113. data/src/util/pm_string_list.c +2 -2
  114. metadata +37 -6
  115. data/docs/ripper.md +0 -36
  116. data/rbi/prism_static.rbi +0 -207
  117. data/sig/prism_static.rbs +0 -201
data/ext/prism/api_pack.c CHANGED
@@ -1,5 +1,12 @@
1
1
  #include "prism/extension.h"
2
2
 
3
+ #ifdef PRISM_EXCLUDE_PACK
4
+
5
+ void
6
+ Init_prism_pack(void) {}
7
+
8
+ #else
9
+
3
10
  static VALUE rb_cPrism;
4
11
  static VALUE rb_cPrismPack;
5
12
  static VALUE rb_cPrismPackDirective;
@@ -265,3 +272,5 @@ Init_prism_pack(void) {
265
272
  pack_symbol = ID2SYM(rb_intern("pack"));
266
273
  unpack_symbol = ID2SYM(rb_intern("unpack"));
267
274
  }
275
+
276
+ #endif
data/ext/prism/extconf.rb CHANGED
@@ -6,17 +6,24 @@ if ARGV.delete("--help")
6
6
 
7
7
  Flags that are always valid:
8
8
 
9
- --enable-debug-mode-build
10
- Enable debug mode build.
11
- You may also use set PRISM_DEBUG_MODE_BUILD environment variable.
9
+ --enable-build-debug
10
+ Enable debug build.
11
+ You may also set the PRISM_BUILD_DEBUG environment variable.
12
+
13
+ --enable-build-minimal
14
+ Enable minimal build.
15
+ You may also set the PRISM_BUILD_MINIMAL environment variable.
12
16
 
13
17
  --help
14
18
  Display this message.
15
19
 
16
20
  Environment variables used:
17
21
 
18
- PRISM_DEBUG_MODE_BUILD
19
- Equivalent to `--enable-debug-mode-build` when set, even if nil or blank.
22
+ PRISM_BUILD_DEBUG
23
+ Equivalent to `--enable-build-debug` when set, even if nil or blank.
24
+
25
+ PRISM_BUILD_MINIMAL
26
+ Equivalent to `--enable-build-minimal` when set, even if nil or blank.
20
27
 
21
28
  TEXT
22
29
  exit!(0)
@@ -38,7 +45,7 @@ end
38
45
  # by this script.`
39
46
  def make(target)
40
47
  Dir.chdir(File.expand_path("../..", __dir__)) do
41
- system("make", target, exception: true)
48
+ system(RUBY_PLATFORM.include?("openbsd") ? "gmake" : "make", target, exception: true)
42
49
  end
43
50
  end
44
51
 
@@ -71,13 +78,22 @@ unless find_header("prism/extension.h", File.expand_path("..", __dir__))
71
78
  raise "prism/extension.h is required"
72
79
  end
73
80
 
74
- # If `--enable-debug-mode-build` is passed to this script or the
75
- # `PRISM_DEBUG_MODE_BUILD` environment variable is defined, we'll build with the
76
- # `PRISM_DEBUG_MODE_BUILD` macro defined. This causes parse functions to
81
+ # If `--enable-build-debug` is passed to this script or the
82
+ # `PRISM_BUILD_DEBUG` environment variable is defined, we'll build with the
83
+ # `PRISM_BUILD_DEBUG` macro defined. This causes parse functions to
77
84
  # duplicate their input so that they have clearly set bounds, which is useful
78
85
  # for finding bugs that cause the parser to read off the end of the input.
79
- if enable_config("debug-mode-build", ENV["PRISM_DEBUG_MODE_BUILD"] || false)
80
- append_cflags("-DPRISM_DEBUG_MODE_BUILD")
86
+ if enable_config("build-debug", ENV["PRISM_BUILD_DEBUG"] || false)
87
+ append_cflags("-DPRISM_BUILD_DEBUG")
88
+ end
89
+
90
+ # If `--enable-build-minimal` is passed to this script or the
91
+ # `PRISM_BUILD_MINIMAL` environment variable is defined, we'll build with the
92
+ # set of defines that comprise the minimal set. This causes the parser to be
93
+ # built with minimal features, necessary for stripping out functionality when
94
+ # the size of the final built artifact is a concern.
95
+ if enable_config("build-minimal", ENV["PRISM_BUILD_MINIMAL"] || false)
96
+ append_cflags("-DPRISM_BUILD_MINIMAL")
81
97
  end
82
98
 
83
99
  # By default, all symbols are hidden in the shared library.
@@ -21,12 +21,15 @@ VALUE rb_cPrismParseError;
21
21
  VALUE rb_cPrismParseWarning;
22
22
  VALUE rb_cPrismParseResult;
23
23
 
24
- ID rb_option_id_filepath;
24
+ VALUE rb_cPrismDebugEncoding;
25
+
26
+ ID rb_option_id_command_line;
25
27
  ID rb_option_id_encoding;
26
- ID rb_option_id_line;
28
+ ID rb_option_id_filepath;
27
29
  ID rb_option_id_frozen_string_literal;
28
- ID rb_option_id_version;
30
+ ID rb_option_id_line;
29
31
  ID rb_option_id_scopes;
32
+ ID rb_option_id_version;
30
33
 
31
34
  /******************************************************************************/
32
35
  /* IO of Ruby code */
@@ -82,7 +85,9 @@ build_options_scopes(pm_options_t *options, VALUE scopes) {
82
85
 
83
86
  // Initialize the scopes array.
84
87
  size_t scopes_count = RARRAY_LEN(scopes);
85
- pm_options_scopes_init(options, scopes_count);
88
+ if (!pm_options_scopes_init(options, scopes_count)) {
89
+ rb_raise(rb_eNoMemError, "failed to allocate memory");
90
+ }
86
91
 
87
92
  // Iterate over the scopes and add them to the options.
88
93
  for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
@@ -97,7 +102,9 @@ build_options_scopes(pm_options_t *options, VALUE scopes) {
97
102
  // Initialize the scope array.
98
103
  size_t locals_count = RARRAY_LEN(scope);
99
104
  pm_options_scope_t *options_scope = &options->scopes[scope_index];
100
- pm_options_scope_init(options_scope, locals_count);
105
+ if (!pm_options_scope_init(options_scope, locals_count)) {
106
+ rb_raise(rb_eNoMemError, "failed to allocate memory");
107
+ }
101
108
 
102
109
  // Iterate over the locals and add them to the scope.
103
110
  for (size_t local_index = 0; local_index < locals_count; local_index++) {
@@ -132,19 +139,38 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
132
139
  } else if (key_id == rb_option_id_line) {
133
140
  if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
134
141
  } else if (key_id == rb_option_id_frozen_string_literal) {
135
- if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
142
+ if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, RTEST(value));
136
143
  } else if (key_id == rb_option_id_version) {
137
144
  if (!NIL_P(value)) {
138
145
  const char *version = check_string(value);
139
146
 
140
147
  if (!pm_options_version_set(options, version, RSTRING_LEN(value))) {
141
- rb_raise(rb_eArgError, "invalid version: %"PRIsVALUE, value);
148
+ rb_raise(rb_eArgError, "invalid version: %" PRIsVALUE, value);
142
149
  }
143
150
  }
144
151
  } else if (key_id == rb_option_id_scopes) {
145
152
  if (!NIL_P(value)) build_options_scopes(options, value);
153
+ } else if (key_id == rb_option_id_command_line) {
154
+ if (!NIL_P(value)) {
155
+ const char *string = check_string(value);
156
+ uint8_t command_line = 0;
157
+
158
+ for (size_t index = 0; index < strlen(string); index++) {
159
+ switch (string[index]) {
160
+ case 'a': command_line |= PM_OPTIONS_COMMAND_LINE_A; break;
161
+ case 'e': command_line |= PM_OPTIONS_COMMAND_LINE_E; break;
162
+ case 'l': command_line |= PM_OPTIONS_COMMAND_LINE_L; break;
163
+ case 'n': command_line |= PM_OPTIONS_COMMAND_LINE_N; break;
164
+ case 'p': command_line |= PM_OPTIONS_COMMAND_LINE_P; break;
165
+ case 'x': command_line |= PM_OPTIONS_COMMAND_LINE_X; break;
166
+ default: rb_raise(rb_eArgError, "invalid command line flag: '%c'", string[index]); break;
167
+ }
168
+ }
169
+
170
+ pm_options_command_line_set(options, command_line);
171
+ }
146
172
  } else {
147
- rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key);
173
+ rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, key);
148
174
  }
149
175
 
150
176
  return ST_CONTINUE;
@@ -228,7 +254,7 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
228
254
 
229
255
  const char * string_source = (const char *) pm_string_source(&options->filepath);
230
256
 
231
- if (!pm_string_mapped_init(input, string_source)) {
257
+ if (!pm_string_file_init(input, string_source)) {
232
258
  pm_options_free(options);
233
259
 
234
260
  #ifdef _WIN32
@@ -241,6 +267,8 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
241
267
  }
242
268
  }
243
269
 
270
+ #ifndef PRISM_EXCLUDE_SERIALIZATION
271
+
244
272
  /******************************************************************************/
245
273
  /* Serializing the AST */
246
274
  /******************************************************************************/
@@ -282,17 +310,17 @@ dump(int argc, VALUE *argv, VALUE self) {
282
310
  pm_options_t options = { 0 };
283
311
  string_options(argc, argv, &input, &options);
284
312
 
285
- #ifdef PRISM_DEBUG_MODE_BUILD
313
+ #ifdef PRISM_BUILD_DEBUG
286
314
  size_t length = pm_string_length(&input);
287
- char* dup = malloc(length);
315
+ char* dup = xmalloc(length);
288
316
  memcpy(dup, pm_string_source(&input), length);
289
317
  pm_string_constant_init(&input, dup, length);
290
318
  #endif
291
319
 
292
320
  VALUE value = dump_input(&input, &options);
293
321
 
294
- #ifdef PRISM_DEBUG_MODE_BUILD
295
- free(dup);
322
+ #ifdef PRISM_BUILD_DEBUG
323
+ xfree(dup);
296
324
  #endif
297
325
 
298
326
  pm_string_free(&input);
@@ -322,6 +350,8 @@ dump_file(int argc, VALUE *argv, VALUE self) {
322
350
  return value;
323
351
  }
324
352
 
353
+ #endif
354
+
325
355
  /******************************************************************************/
326
356
  /* Extracting values for the parse result */
327
357
  /******************************************************************************/
@@ -415,23 +445,27 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
415
445
 
416
446
  VALUE level = Qnil;
417
447
  switch (error->level) {
418
- case PM_ERROR_LEVEL_FATAL:
419
- level = ID2SYM(rb_intern("fatal"));
448
+ case PM_ERROR_LEVEL_SYNTAX:
449
+ level = ID2SYM(rb_intern("syntax"));
420
450
  break;
421
451
  case PM_ERROR_LEVEL_ARGUMENT:
422
452
  level = ID2SYM(rb_intern("argument"));
423
453
  break;
454
+ case PM_ERROR_LEVEL_LOAD:
455
+ level = ID2SYM(rb_intern("load"));
456
+ break;
424
457
  default:
425
458
  rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
426
459
  }
427
460
 
428
461
  VALUE error_argv[] = {
462
+ ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id))),
429
463
  rb_enc_str_new_cstr(error->message, encoding),
430
464
  rb_class_new_instance(3, location_argv, rb_cPrismLocation),
431
465
  level
432
466
  };
433
467
 
434
- rb_ary_push(errors, rb_class_new_instance(3, error_argv, rb_cPrismParseError));
468
+ rb_ary_push(errors, rb_class_new_instance(4, error_argv, rb_cPrismParseError));
435
469
  }
436
470
 
437
471
  return errors;
@@ -465,17 +499,36 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
465
499
  }
466
500
 
467
501
  VALUE warning_argv[] = {
502
+ ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id))),
468
503
  rb_enc_str_new_cstr(warning->message, encoding),
469
504
  rb_class_new_instance(3, location_argv, rb_cPrismLocation),
470
505
  level
471
506
  };
472
507
 
473
- rb_ary_push(warnings, rb_class_new_instance(3, warning_argv, rb_cPrismParseWarning));
508
+ rb_ary_push(warnings, rb_class_new_instance(4, warning_argv, rb_cPrismParseWarning));
474
509
  }
475
510
 
476
511
  return warnings;
477
512
  }
478
513
 
514
+ /**
515
+ * Create a new parse result from the given parser, value, encoding, and source.
516
+ */
517
+ static VALUE
518
+ parse_result_create(pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source) {
519
+ VALUE result_argv[] = {
520
+ value,
521
+ parser_comments(parser, source),
522
+ parser_magic_comments(parser, source),
523
+ parser_data_loc(parser, source),
524
+ parser_errors(parser, encoding, source),
525
+ parser_warnings(parser, encoding, source),
526
+ source
527
+ };
528
+
529
+ return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
530
+ }
531
+
479
532
  /******************************************************************************/
480
533
  /* Lexing Ruby code */
481
534
  /******************************************************************************/
@@ -582,19 +635,11 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
582
635
  value = parse_lex_data.tokens;
583
636
  }
584
637
 
585
- VALUE result_argv[] = {
586
- value,
587
- parser_comments(&parser, source),
588
- parser_magic_comments(&parser, source),
589
- parser_data_loc(&parser, source),
590
- parser_errors(&parser, parse_lex_data.encoding, source),
591
- parser_warnings(&parser, parse_lex_data.encoding, source),
592
- source
593
- };
594
-
638
+ VALUE result = parse_result_create(&parser, value, parse_lex_data.encoding, source);
595
639
  pm_node_destroy(&parser, node);
596
640
  pm_parser_free(&parser);
597
- return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
641
+
642
+ return result;
598
643
  }
599
644
 
600
645
  /**
@@ -654,17 +699,8 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
654
699
  rb_encoding *encoding = rb_enc_find(parser.encoding->name);
655
700
 
656
701
  VALUE source = pm_source_new(&parser, encoding);
657
- VALUE result_argv[] = {
658
- pm_ast_new(&parser, node, encoding, source),
659
- parser_comments(&parser, source),
660
- parser_magic_comments(&parser, source),
661
- parser_data_loc(&parser, source),
662
- parser_errors(&parser, encoding, source),
663
- parser_warnings(&parser, encoding, source),
664
- source
665
- };
666
-
667
- VALUE result = rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
702
+ VALUE value = pm_ast_new(&parser, node, encoding, source);
703
+ VALUE result = parse_result_create(&parser, value, encoding, source) ;
668
704
 
669
705
  pm_node_destroy(&parser, node);
670
706
  pm_parser_free(&parser);
@@ -679,21 +715,25 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
679
715
  * Parse the given string and return a ParseResult instance. The options that
680
716
  * are supported are:
681
717
  *
682
- * * `filepath` - the filepath of the source being parsed. This should be a
683
- * string or nil
718
+ * * `command_line` - either nil or a string of the various options that were
719
+ * set on the command line. Valid values are combinations of "a", "l",
720
+ * "n", "p", and "x".
684
721
  * * `encoding` - the encoding of the source being parsed. This should be an
685
- * encoding or nil
686
- * * `line` - the line number that the parse starts on. This should be an
687
- * integer or nil. Note that this is 1-indexed.
722
+ * encoding or nil.
723
+ * * `filepath` - the filepath of the source being parsed. This should be a
724
+ * string or nil.
688
725
  * * `frozen_string_literal` - whether or not the frozen string literal pragma
689
726
  * has been set. This should be a boolean or nil.
690
- * * `version` - the version of prism that should be used to parse Ruby code. By
691
- * default prism assumes you want to parse with the latest vesion of
692
- * prism (which you can trigger with `nil` or `"latest"`). If you want to
693
- * parse exactly as CRuby 3.3.0 would, then you can pass `"3.3.0"`.
727
+ * * `line` - the line number that the parse starts on. This should be an
728
+ * integer or nil. Note that this is 1-indexed.
694
729
  * * `scopes` - the locals that are in scope surrounding the code that is being
695
730
  * parsed. This should be an array of arrays of symbols or nil. Scopes are
696
731
  * ordered from the outermost scope to the innermost one.
732
+ * * `version` - the version of Ruby syntax that prism should used to parse Ruby
733
+ * code. By default prism assumes you want to parse with the latest version
734
+ * of Ruby syntax (which you can trigger with `nil` or `"latest"`). You
735
+ * may also restrict the syntax to a specific version of Ruby. The
736
+ * supported values are `"3.3.0"` and `"3.4.0"`.
697
737
  */
698
738
  static VALUE
699
739
  parse(int argc, VALUE *argv, VALUE self) {
@@ -701,17 +741,17 @@ parse(int argc, VALUE *argv, VALUE self) {
701
741
  pm_options_t options = { 0 };
702
742
  string_options(argc, argv, &input, &options);
703
743
 
704
- #ifdef PRISM_DEBUG_MODE_BUILD
744
+ #ifdef PRISM_BUILD_DEBUG
705
745
  size_t length = pm_string_length(&input);
706
- char* dup = malloc(length);
746
+ char* dup = xmalloc(length);
707
747
  memcpy(dup, pm_string_source(&input), length);
708
748
  pm_string_constant_init(&input, dup, length);
709
749
  #endif
710
750
 
711
751
  VALUE value = parse_input(&input, &options);
712
752
 
713
- #ifdef PRISM_DEBUG_MODE_BUILD
714
- free(dup);
753
+ #ifdef PRISM_BUILD_DEBUG
754
+ xfree(dup);
715
755
  #endif
716
756
 
717
757
  pm_string_free(&input);
@@ -719,6 +759,60 @@ parse(int argc, VALUE *argv, VALUE self) {
719
759
  return value;
720
760
  }
721
761
 
762
+ /**
763
+ * An implementation of fgets that is suitable for use with Ruby IO objects.
764
+ */
765
+ static char *
766
+ parse_stream_fgets(char *string, int size, void *stream) {
767
+ RUBY_ASSERT(size > 0);
768
+
769
+ VALUE line = rb_funcall((VALUE) stream, rb_intern("gets"), 1, INT2FIX(size - 1));
770
+ if (NIL_P(line)) {
771
+ return NULL;
772
+ }
773
+
774
+ const char *cstr = StringValueCStr(line);
775
+ size_t length = strlen(cstr);
776
+
777
+ memcpy(string, cstr, length);
778
+ string[length] = '\0';
779
+
780
+ return string;
781
+ }
782
+
783
+ /**
784
+ * call-seq:
785
+ * Prism::parse_stream(stream, **options) -> ParseResult
786
+ *
787
+ * Parse the given object that responds to `gets` and return a ParseResult
788
+ * instance. The options that are supported are the same as Prism::parse.
789
+ */
790
+ static VALUE
791
+ parse_stream(int argc, VALUE *argv, VALUE self) {
792
+ VALUE stream;
793
+ VALUE keywords;
794
+ rb_scan_args(argc, argv, "1:", &stream, &keywords);
795
+
796
+ pm_options_t options = { 0 };
797
+ extract_options(&options, Qnil, keywords);
798
+
799
+ pm_parser_t parser;
800
+ pm_buffer_t buffer;
801
+
802
+ pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, &options);
803
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
804
+
805
+ VALUE source = pm_source_new(&parser, encoding);
806
+ VALUE value = pm_ast_new(&parser, node, encoding, source);
807
+ VALUE result = parse_result_create(&parser, value, encoding, source);
808
+
809
+ pm_node_destroy(&parser, node);
810
+ pm_buffer_free(&buffer);
811
+ pm_parser_free(&parser);
812
+
813
+ return result;
814
+ }
815
+
722
816
  /**
723
817
  * call-seq:
724
818
  * Prism::parse_file(filepath, **options) -> ParseResult
@@ -945,6 +1039,35 @@ named_captures(VALUE self, VALUE source) {
945
1039
  return names;
946
1040
  }
947
1041
 
1042
+ /**
1043
+ * call-seq:
1044
+ * Debug::integer_parse(source) -> [Integer, String]
1045
+ *
1046
+ * Parses the given source string and returns the integer it represents, as well
1047
+ * as a decimal string representation.
1048
+ */
1049
+ static VALUE
1050
+ integer_parse(VALUE self, VALUE source) {
1051
+ const uint8_t *start = (const uint8_t *) RSTRING_PTR(source);
1052
+ size_t length = RSTRING_LEN(source);
1053
+
1054
+ pm_integer_t integer = { 0 };
1055
+ pm_integer_parse(&integer, PM_INTEGER_BASE_UNKNOWN, start, start + length);
1056
+
1057
+ pm_buffer_t buffer = { 0 };
1058
+ pm_integer_string(&buffer, &integer);
1059
+
1060
+ VALUE string = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
1061
+ pm_buffer_free(&buffer);
1062
+
1063
+ VALUE result = rb_ary_new_capa(2);
1064
+ rb_ary_push(result, pm_integer_new(&integer));
1065
+ rb_ary_push(result, string);
1066
+ pm_integer_free(&integer);
1067
+
1068
+ return result;
1069
+ }
1070
+
948
1071
  /**
949
1072
  * call-seq:
950
1073
  * Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx }
@@ -1010,6 +1133,8 @@ profile_file(VALUE self, VALUE filepath) {
1010
1133
  return Qnil;
1011
1134
  }
1012
1135
 
1136
+ #ifndef PRISM_EXCLUDE_PRETTYPRINT
1137
+
1013
1138
  /**
1014
1139
  * call-seq:
1015
1140
  * Debug::inspect_node(source) -> inspected
@@ -1040,6 +1165,8 @@ inspect_node(VALUE self, VALUE source) {
1040
1165
  return string;
1041
1166
  }
1042
1167
 
1168
+ #endif
1169
+
1043
1170
  /**
1044
1171
  * call-seq:
1045
1172
  * Debug::format_errors(source, colorize) -> String
@@ -1057,7 +1184,7 @@ format_errors(VALUE self, VALUE source, VALUE colorize) {
1057
1184
  pm_node_t *node = pm_parse(&parser);
1058
1185
  pm_buffer_t buffer = { 0 };
1059
1186
 
1060
- pm_parser_errors_format(&parser, &buffer, RTEST(colorize));
1187
+ pm_parser_errors_format(&parser, &parser.error_list, &buffer, RTEST(colorize), true);
1061
1188
 
1062
1189
  rb_encoding *encoding = rb_enc_find(parser.encoding->name);
1063
1190
  VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
@@ -1070,6 +1197,114 @@ format_errors(VALUE self, VALUE source, VALUE colorize) {
1070
1197
  return result;
1071
1198
  }
1072
1199
 
1200
+ /**
1201
+ * call-seq:
1202
+ * Debug::static_inspect(source) -> String
1203
+ *
1204
+ * Inspect the node as it would be inspected by the warnings used in static
1205
+ * literal sets.
1206
+ */
1207
+ static VALUE
1208
+ static_inspect(int argc, VALUE *argv, VALUE self) {
1209
+ pm_string_t input;
1210
+ pm_options_t options = { 0 };
1211
+ string_options(argc, argv, &input, &options);
1212
+
1213
+ pm_parser_t parser;
1214
+ pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
1215
+
1216
+ pm_node_t *program = pm_parse(&parser);
1217
+ pm_node_t *node = ((pm_program_node_t *) program)->statements->body.nodes[0];
1218
+
1219
+ pm_buffer_t buffer = { 0 };
1220
+ pm_static_literal_inspect(&buffer, &parser, node);
1221
+
1222
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
1223
+ VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
1224
+
1225
+ pm_buffer_free(&buffer);
1226
+ pm_node_destroy(&parser, program);
1227
+ pm_parser_free(&parser);
1228
+ pm_string_free(&input);
1229
+ pm_options_free(&options);
1230
+
1231
+ return result;
1232
+ }
1233
+
1234
+ /**
1235
+ * call-seq: Debug::Encoding.all -> Array[Debug::Encoding]
1236
+ *
1237
+ * Return an array of all of the encodings that prism knows about.
1238
+ */
1239
+ static VALUE
1240
+ encoding_all(VALUE self) {
1241
+ VALUE encodings = rb_ary_new();
1242
+
1243
+ for (size_t index = 0; index < PM_ENCODING_MAXIMUM; index++) {
1244
+ const pm_encoding_t *encoding = &pm_encodings[index];
1245
+
1246
+ VALUE encoding_argv[] = { rb_str_new_cstr(encoding->name), encoding->multibyte ? Qtrue : Qfalse };
1247
+ rb_ary_push(encodings, rb_class_new_instance(2, encoding_argv, rb_cPrismDebugEncoding));
1248
+ }
1249
+
1250
+ return encodings;
1251
+ }
1252
+
1253
+ static const pm_encoding_t *
1254
+ encoding_find(VALUE name) {
1255
+ const uint8_t *source = (const uint8_t *) RSTRING_PTR(name);
1256
+ size_t length = RSTRING_LEN(name);
1257
+
1258
+ const pm_encoding_t *encoding = pm_encoding_find(source, source + length);
1259
+ if (encoding == NULL) { rb_raise(rb_eArgError, "Unknown encoding: %s", source); }
1260
+
1261
+ return encoding;
1262
+ }
1263
+
1264
+ /**
1265
+ * call-seq: Debug::Encoding.width(source) -> Integer
1266
+ *
1267
+ * Returns the width of the first character in the given string if it is valid
1268
+ * in the encoding. If it is not, this function returns 0.
1269
+ */
1270
+ static VALUE
1271
+ encoding_char_width(VALUE self, VALUE name, VALUE value) {
1272
+ return ULONG2NUM(encoding_find(name)->char_width((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)));
1273
+ }
1274
+
1275
+ /**
1276
+ * call-seq: Debug::Encoding.alnum?(source) -> true | false
1277
+ *
1278
+ * Returns true if the first character in the given string is an alphanumeric
1279
+ * character in the encoding.
1280
+ */
1281
+ static VALUE
1282
+ encoding_alnum_char(VALUE self, VALUE name, VALUE value) {
1283
+ return encoding_find(name)->alnum_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse;
1284
+ }
1285
+
1286
+ /**
1287
+ * call-seq: Debug::Encoding.alpha?(source) -> true | false
1288
+ *
1289
+ * Returns true if the first character in the given string is an alphabetic
1290
+ * character in the encoding.
1291
+ */
1292
+ static VALUE
1293
+ encoding_alpha_char(VALUE self, VALUE name, VALUE value) {
1294
+ return encoding_find(name)->alpha_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse;
1295
+ }
1296
+
1297
+ /**
1298
+ * call-seq: Debug::Encoding.upper?(source) -> true | false
1299
+ *
1300
+ * Returns true if the first character in the given string is an uppercase
1301
+ * character in the encoding.
1302
+ */
1303
+ static VALUE
1304
+ encoding_isupper_char(VALUE self, VALUE name, VALUE value) {
1305
+ return encoding_find(name)->isupper_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) ? Qtrue : Qfalse;
1306
+ }
1307
+
1073
1308
  /******************************************************************************/
1074
1309
  /* Initialization of the extension */
1075
1310
  /******************************************************************************/
@@ -1107,31 +1342,24 @@ Init_prism(void) {
1107
1342
 
1108
1343
  // Intern all of the options that we support so that we don't have to do it
1109
1344
  // every time we parse.
1110
- rb_option_id_filepath = rb_intern_const("filepath");
1345
+ rb_option_id_command_line = rb_intern_const("command_line");
1111
1346
  rb_option_id_encoding = rb_intern_const("encoding");
1112
- rb_option_id_line = rb_intern_const("line");
1347
+ rb_option_id_filepath = rb_intern_const("filepath");
1113
1348
  rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
1114
- rb_option_id_version = rb_intern_const("version");
1349
+ rb_option_id_line = rb_intern_const("line");
1115
1350
  rb_option_id_scopes = rb_intern_const("scopes");
1351
+ rb_option_id_version = rb_intern_const("version");
1116
1352
 
1117
1353
  /**
1118
1354
  * The version of the prism library.
1119
1355
  */
1120
1356
  rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
1121
1357
 
1122
- /**
1123
- * The backend of the parser that prism is using to parse Ruby code. This
1124
- * can be either :CEXT or :FFI. On runtimes that support C extensions, we
1125
- * default to :CEXT. Otherwise we use :FFI.
1126
- */
1127
- rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CEXT")));
1128
-
1129
1358
  // First, the functions that have to do with lexing and parsing.
1130
- rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
1131
- rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
1132
1359
  rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
1133
1360
  rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
1134
1361
  rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
1362
+ rb_define_singleton_method(rb_cPrism, "parse_stream", parse_stream, -1);
1135
1363
  rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
1136
1364
  rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
1137
1365
  rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
@@ -1140,14 +1368,33 @@ Init_prism(void) {
1140
1368
  rb_define_singleton_method(rb_cPrism, "parse_success?", parse_success_p, -1);
1141
1369
  rb_define_singleton_method(rb_cPrism, "parse_file_success?", parse_file_success_p, -1);
1142
1370
 
1371
+ #ifndef PRISM_EXCLUDE_SERIALIZATION
1372
+ rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
1373
+ rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
1374
+ #endif
1375
+
1143
1376
  // Next, the functions that will be called by the parser to perform various
1144
1377
  // internal tasks. We expose these to make them easier to test.
1145
1378
  VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug");
1146
1379
  rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
1380
+ rb_define_singleton_method(rb_cPrismDebug, "integer_parse", integer_parse, 1);
1147
1381
  rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
1148
1382
  rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
1149
- rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
1150
1383
  rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 2);
1384
+ rb_define_singleton_method(rb_cPrismDebug, "static_inspect", static_inspect, -1);
1385
+
1386
+ #ifndef PRISM_EXCLUDE_PRETTYPRINT
1387
+ rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
1388
+ #endif
1389
+
1390
+ // Next, define the functions that are exposed through the private
1391
+ // Debug::Encoding class.
1392
+ rb_cPrismDebugEncoding = rb_define_class_under(rb_cPrismDebug, "Encoding", rb_cObject);
1393
+ rb_define_singleton_method(rb_cPrismDebugEncoding, "all", encoding_all, 0);
1394
+ rb_define_singleton_method(rb_cPrismDebugEncoding, "_width", encoding_char_width, 2);
1395
+ rb_define_singleton_method(rb_cPrismDebugEncoding, "_alnum?", encoding_alnum_char, 2);
1396
+ rb_define_singleton_method(rb_cPrismDebugEncoding, "_alpha?", encoding_alpha_char, 2);
1397
+ rb_define_singleton_method(rb_cPrismDebugEncoding, "_upper?", encoding_isupper_char, 2);
1151
1398
 
1152
1399
  // Next, initialize the other APIs.
1153
1400
  Init_prism_api_node();