RubyGems - prism - Versions diffs - 0.29.0 → 1.1.0 - Mend

prism 0.29.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +77 -1
data/CONTRIBUTING.md +0 -4
data/README.md +4 -0
data/config.yml +498 -145
data/docs/fuzzing.md +1 -1
data/docs/parsing_rules.md +4 -1
data/docs/ripper_translation.md +22 -0
data/docs/serialization.md +3 -0
data/ext/prism/api_node.c +2858 -2082
data/ext/prism/extconf.rb +1 -1
data/ext/prism/extension.c +203 -421
data/ext/prism/extension.h +2 -2
data/include/prism/ast.h +1732 -453
data/include/prism/defines.h +36 -0
data/include/prism/diagnostic.h +23 -6
data/include/prism/node.h +0 -21
data/include/prism/options.h +94 -3
data/include/prism/parser.h +57 -28
data/include/prism/regexp.h +18 -8
data/include/prism/static_literals.h +3 -2
data/include/prism/util/pm_char.h +1 -2
data/include/prism/util/pm_constant_pool.h +0 -8
data/include/prism/util/pm_integer.h +22 -15
data/include/prism/util/pm_newline_list.h +11 -0
data/include/prism/util/pm_string.h +28 -12
data/include/prism/version.h +3 -3
data/include/prism.h +0 -11
data/lib/prism/compiler.rb +3 -0
data/lib/prism/desugar_compiler.rb +111 -74
data/lib/prism/dispatcher.rb +16 -1
data/lib/prism/dot_visitor.rb +45 -34
data/lib/prism/dsl.rb +660 -468
data/lib/prism/ffi.rb +64 -6
data/lib/prism/inspect_visitor.rb +294 -64
data/lib/prism/lex_compat.rb +1 -1
data/lib/prism/mutation_compiler.rb +11 -6
data/lib/prism/node.rb +2469 -4973
data/lib/prism/node_ext.rb +91 -14
data/lib/prism/parse_result/comments.rb +0 -7
data/lib/prism/parse_result/errors.rb +65 -0
data/lib/prism/parse_result/newlines.rb +101 -11
data/lib/prism/parse_result.rb +43 -3
data/lib/prism/reflection.rb +10 -8
data/lib/prism/serialize.rb +484 -609
data/lib/prism/translation/parser/compiler.rb +152 -132
data/lib/prism/translation/parser/lexer.rb +26 -4
data/lib/prism/translation/parser.rb +9 -4
data/lib/prism/translation/ripper.rb +22 -20
data/lib/prism/translation/ruby_parser.rb +73 -13
data/lib/prism/visitor.rb +3 -0
data/lib/prism.rb +0 -4
data/prism.gemspec +3 -5
data/rbi/prism/dsl.rbi +521 -0
data/rbi/prism/node.rbi +744 -4837
data/rbi/prism/visitor.rbi +3 -0
data/rbi/prism.rbi +36 -30
data/sig/prism/dsl.rbs +190 -303
data/sig/prism/mutation_compiler.rbs +1 -0
data/sig/prism/node.rbs +759 -628
data/sig/prism/parse_result.rbs +2 -0
data/sig/prism/visitor.rbs +1 -0
data/sig/prism.rbs +103 -64
data/src/diagnostic.c +62 -28
data/src/node.c +499 -1754
data/src/options.c +76 -27
data/src/prettyprint.c +156 -112
data/src/prism.c +2773 -2081
data/src/regexp.c +202 -69
data/src/serialize.c +170 -50
data/src/static_literals.c +63 -84
data/src/token_type.c +4 -4
data/src/util/pm_constant_pool.c +0 -8
data/src/util/pm_integer.c +53 -25
data/src/util/pm_newline_list.c +29 -0
data/src/util/pm_string.c +130 -80
data/src/util/pm_strpbrk.c +32 -6
metadata +4 -6
data/include/prism/util/pm_string_list.h +0 -44
data/lib/prism/debug.rb +0 -249
data/lib/prism/translation/parser/rubocop.rb +0 -73
data/src/util/pm_string_list.c +0 -28

data/ext/prism/extension.c CHANGED Viewed

@@ -21,38 +21,35 @@ VALUE rb_cPrismParseError;
 VALUE rb_cPrismParseWarning;
 VALUE rb_cPrismResult;
 VALUE rb_cPrismParseResult;
+VALUE rb_cPrismLexResult;
 VALUE rb_cPrismParseLexResult;
 VALUE rb_cPrismDebugEncoding;
-ID rb_option_id_command_line;
-ID rb_option_id_encoding;
-ID rb_option_id_filepath;
-ID rb_option_id_frozen_string_literal;
-ID rb_option_id_line;
-ID rb_option_id_scopes;
-ID rb_option_id_version;
-ID rb_prism_source_id_for;
+ID rb_id_option_command_line;
+ID rb_id_option_encoding;
+ID rb_id_option_filepath;
+ID rb_id_option_frozen_string_literal;
+ID rb_id_option_line;
+ID rb_id_option_main_script;
+ID rb_id_option_partial_script;
+ID rb_id_option_scopes;
+ID rb_id_option_version;
+ID rb_id_source_for;
 /******************************************************************************/
 /* IO of Ruby code                                                            */
 /******************************************************************************/
 /**
- * Check if the given VALUE is a string. If it's nil, then return NULL. If it's
- * not a string, then raise a type error. Otherwise return the VALUE as a C
- * string.
+ * Check if the given VALUE is a string. If it's not a string, then raise a
+ * TypeError. Otherwise return the VALUE as a C string.
  */
 static const char *
 check_string(VALUE value) {
-    // If the value is nil, then we don't need to do anything.
-    if (NIL_P(value)) {
-        return NULL;
-    }
     // Check if the value is a string. If it's not, then raise a type error.
     if (!RB_TYPE_P(value, T_STRING)) {
-        rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(value));
+        rb_raise(rb_eTypeError, "wrong argument type %" PRIsVALUE " (expected String)", rb_obj_class(value));
     }
     // Otherwise, return the value as a C string.
@@ -66,7 +63,7 @@ static void
 input_load_string(pm_string_t *input, VALUE string) {
     // Check if the string is a string. If it's not, then raise a type error.
     if (!RB_TYPE_P(string, T_STRING)) {
-        rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
+        rb_raise(rb_eTypeError, "wrong argument type %" PRIsVALUE " (expected String)", rb_obj_class(string));
     }
     pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
@@ -135,15 +132,21 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
     pm_options_t *options = (pm_options_t *) argument;
     ID key_id = SYM2ID(key);
-    if (key_id == rb_option_id_filepath) {
+    if (key_id == rb_id_option_filepath) {
         if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
-    } else if (key_id == rb_option_id_encoding) {
-        if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
-    } else if (key_id == rb_option_id_line) {
+    } else if (key_id == rb_id_option_encoding) {
+        if (!NIL_P(value)) {
+            if (value == Qfalse) {
+                pm_options_encoding_locked_set(options, true);
+            } else {
+                pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
+            }
+        }
+    } else if (key_id == rb_id_option_line) {
         if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
-    } else if (key_id == rb_option_id_frozen_string_literal) {
+    } else if (key_id == rb_id_option_frozen_string_literal) {
         if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, RTEST(value));
-    } else if (key_id == rb_option_id_version) {
+    } else if (key_id == rb_id_option_version) {
         if (!NIL_P(value)) {
             const char *version = check_string(value);
@@ -151,9 +154,9 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
                 rb_raise(rb_eArgError, "invalid version: %" PRIsVALUE, value);
             }
         }
-    } else if (key_id == rb_option_id_scopes) {
+    } else if (key_id == rb_id_option_scopes) {
         if (!NIL_P(value)) build_options_scopes(options, value);
-    } else if (key_id == rb_option_id_command_line) {
+    } else if (key_id == rb_id_option_command_line) {
         if (!NIL_P(value)) {
             const char *string = check_string(value);
             uint8_t command_line = 0;
@@ -172,6 +175,10 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
             pm_options_command_line_set(options, command_line);
         }
+    } else if (key_id == rb_id_option_main_script) {
+        if (!NIL_P(value)) pm_options_main_script_set(options, RTEST(value));
+    } else if (key_id == rb_id_option_partial_script) {
+        if (!NIL_P(value)) pm_options_partial_script_set(options, RTEST(value));
     } else {
         rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, key);
     }
@@ -206,6 +213,7 @@ build_options(VALUE argument) {
 static void
 extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
     options->line = 1; // default
     if (!NIL_P(keywords)) {
         struct build_options_data data = { .options = options, .keywords = keywords };
         struct build_options_data *argument = &data;
@@ -246,27 +254,41 @@ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options)
  * Read options for methods that look like (filepath, **options).
  */
 static void
-file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
+file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, VALUE *encoded_filepath) {
     VALUE filepath;
     VALUE keywords;
     rb_scan_args(argc, argv, "1:", &filepath, &keywords);
     Check_Type(filepath, T_STRING);
+    *encoded_filepath = rb_str_encode_ospath(filepath);
+    extract_options(options, *encoded_filepath, keywords);
-    extract_options(options, filepath, keywords);
+    const char *source = (const char *) pm_string_source(&options->filepath);
+    pm_string_init_result_t result;
-    const char * string_source = (const char *) pm_string_source(&options->filepath);
-    if (!pm_string_file_init(input, string_source)) {
-        pm_options_free(options);
+    switch (result = pm_string_file_init(input, source)) {
+        case PM_STRING_INIT_SUCCESS:
+            break;
+        case PM_STRING_INIT_ERROR_GENERIC: {
+            pm_options_free(options);
 #ifdef _WIN32
-        int e = rb_w32_map_errno(GetLastError());
+            int e = rb_w32_map_errno(GetLastError());
 #else
-        int e = errno;
+            int e = errno;
 #endif
-        rb_syserr_fail(e, string_source);
+            rb_syserr_fail(e, source);
+            break;
+        }
+        case PM_STRING_INIT_ERROR_DIRECTORY:
+            pm_options_free(options);
+            rb_syserr_fail(EISDIR, source);
+            break;
+        default:
+            pm_options_free(options);
+            rb_raise(rb_eRuntimeError, "Unknown error (%d) initializing file: %s", result, source);
+            break;
     }
 }
@@ -344,7 +366,8 @@ dump_file(int argc, VALUE *argv, VALUE self) {
     pm_string_t input;
     pm_options_t options = { 0 };
-    file_options(argc, argv, &input, &options);
+    VALUE encoded_filepath;
+    file_options(argc, argv, &input, &options, &encoded_filepath);
     VALUE value = dump_input(&input, &options);
     pm_string_free(&input);
@@ -364,7 +387,7 @@ dump_file(int argc, VALUE *argv, VALUE self) {
  */
 static VALUE
 parser_comments(pm_parser_t *parser, VALUE source) {
-    VALUE comments = rb_ary_new();
+    VALUE comments = rb_ary_new_capa(parser->comment_list.size);
     for (pm_comment_t *comment = (pm_comment_t *) parser->comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
         VALUE location_argv[] = {
@@ -386,7 +409,7 @@ parser_comments(pm_parser_t *parser, VALUE source) {
  */
 static VALUE
 parser_magic_comments(pm_parser_t *parser, VALUE source) {
-    VALUE magic_comments = rb_ary_new();
+    VALUE magic_comments = rb_ary_new_capa(parser->magic_comment_list.size);
     for (pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) parser->magic_comment_list.head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
         VALUE key_loc_argv[] = {
@@ -436,7 +459,7 @@ parser_data_loc(const pm_parser_t *parser, VALUE source) {
  */
 static VALUE
 parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
-    VALUE errors = rb_ary_new();
+    VALUE errors = rb_ary_new_capa(parser->error_list.size);
     pm_diagnostic_t *error;
     for (error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
@@ -479,7 +502,7 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
  */
 static VALUE
 parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
-    VALUE warnings = rb_ary_new();
+    VALUE warnings = rb_ary_new_capa(parser->warning_list.size);
     pm_diagnostic_t *warning;
     for (warning = (pm_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (pm_diagnostic_t *) warning->node.next) {
@@ -556,9 +579,10 @@ static void
 parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
     parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
-    VALUE yields = rb_ary_new_capa(2);
-    rb_ary_push(yields, pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
-    rb_ary_push(yields, INT2FIX(parser->lex_state));
+    VALUE yields = rb_assoc_new(
+        pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source),
+        INT2FIX(parser->lex_state)
+    );
     rb_ary_push(parse_lex_data->tokens, yields);
 }
@@ -599,8 +623,8 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
     pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
     VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
-    VALUE offsets = rb_ary_new();
-    VALUE source = rb_funcall(rb_cPrismSource, rb_prism_source_id_for, 3, source_string, LONG2NUM(parser.start_line), offsets);
+    VALUE offsets = rb_ary_new_capa(parser.newline_list.size);
+    VALUE source = rb_funcall(rb_cPrismSource, rb_id_source_for, 3, source_string, LONG2NUM(parser.start_line), offsets);
     parse_lex_data_t parse_lex_data = {
         .source = source,
@@ -628,16 +652,16 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
         rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index]));
     }
-    VALUE value;
+    VALUE result;
     if (return_nodes) {
-        value = rb_ary_new_capa(2);
+        VALUE value = rb_ary_new_capa(2);
         rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source));
         rb_ary_push(value, parse_lex_data.tokens);
+        result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source);
     } else {
-        value = parse_lex_data.tokens;
+        result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source);
     }
-    VALUE result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source);
     pm_node_destroy(&parser, node);
     pm_parser_free(&parser);
@@ -646,10 +670,10 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
 /**
  * call-seq:
- *   Prism::lex(source, **options) -> Array
+ *   Prism::lex(source, **options) -> LexResult
  *
- * Return an array of Token instances corresponding to the given string. For
- * supported options, see Prism::parse.
+ * Return a LexResult instance that contains an array of Token instances
+ * corresponding to the given string. For supported options, see Prism::parse.
  */
 static VALUE
 lex(int argc, VALUE *argv, VALUE self) {
@@ -666,17 +690,18 @@ lex(int argc, VALUE *argv, VALUE self) {
 /**
  * call-seq:
- *   Prism::lex_file(filepath, **options) -> Array
+ *   Prism::lex_file(filepath, **options) -> LexResult
  *
- * Return an array of Token instances corresponding to the given file. For
- * supported options, see Prism::parse.
+ * Return a LexResult instance that contains an array of Token instances
+ * corresponding to the given file. For supported options, see Prism::parse.
  */
 static VALUE
 lex_file(int argc, VALUE *argv, VALUE self) {
     pm_string_t input;
     pm_options_t options = { 0 };
-    file_options(argc, argv, &input, &options);
+    VALUE encoded_filepath;
+    file_options(argc, argv, &input, &options, &encoded_filepath);
     VALUE value = parse_lex_input(&input, &options, false);
     pm_string_free(&input);
@@ -728,14 +753,27 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
  *       has been set. This should be a boolean or nil.
  * * `line` - the line number that the parse starts on. This should be an
  *       integer or nil. Note that this is 1-indexed.
+ * * `main_script` - a boolean indicating whether or not the source being parsed
+ *       is the main script being run by the interpreter. This controls whether
+ *       or not shebangs are parsed for additional flags and whether or not the
+ *       parser will attempt to find a matching shebang if the first one does
+ *       not contain the word "ruby".
+ * * `partial_script` - when the file being parsed is considered a "partial"
+ *       script, jumps will not be marked as errors if they are not contained
+ *       within loops/blocks. This is used in the case that you're parsing a
+ *       script that you know will be embedded inside another script later, but
+ *       you do not have that context yet. For example, when parsing an ERB
+ *       template that will be evaluated inside another script.
  * * `scopes` - the locals that are in scope surrounding the code that is being
  *       parsed. This should be an array of arrays of symbols or nil. Scopes are
  *       ordered from the outermost scope to the innermost one.
  * * `version` - the version of Ruby syntax that prism should used to parse Ruby
  *       code. By default prism assumes you want to parse with the latest version
  *       of Ruby syntax (which you can trigger with `nil` or `"latest"`). You
- *       may also restrict the syntax to a specific version of Ruby. The
- *       supported values are `"3.3.0"` and `"3.4.0"`.
+ *       may also restrict the syntax to a specific version of Ruby, e.g., with `"3.3.0"`.
+ *       To parse with the same syntax version that the current Ruby is running
+ *       use `version: RUBY_VERSION`. Raises ArgumentError if the version is not
+ *       currently supported by Prism.
  */
 static VALUE
 parse(int argc, VALUE *argv, VALUE self) {
@@ -761,6 +799,85 @@ parse(int argc, VALUE *argv, VALUE self) {
     return value;
 }
+/**
+ * call-seq:
+ *   Prism::parse_file(filepath, **options) -> ParseResult
+ *
+ * Parse the given file and return a ParseResult instance. For supported
+ * options, see Prism::parse.
+ */
+static VALUE
+parse_file(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+    VALUE encoded_filepath;
+    file_options(argc, argv, &input, &options, &encoded_filepath);
+    VALUE value = parse_input(&input, &options);
+    pm_string_free(&input);
+    pm_options_free(&options);
+    return value;
+}
+/**
+ * Parse the given input and return nothing.
+ */
+static void
+profile_input(pm_string_t *input, const pm_options_t *options) {
+    pm_parser_t parser;
+    pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+    pm_node_t *node = pm_parse(&parser);
+    pm_node_destroy(&parser, node);
+    pm_parser_free(&parser);
+}
+/**
+ * call-seq:
+ *   Prism::profile(source, **options) -> nil
+ *
+ * Parse the given string and return nothing. This method is meant to allow
+ * profilers to avoid the overhead of reifying the AST to Ruby. For supported
+ * options, see Prism::parse.
+ */
+static VALUE
+profile(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+    string_options(argc, argv, &input, &options);
+    profile_input(&input, &options);
+    pm_string_free(&input);
+    pm_options_free(&options);
+    return Qnil;
+}
+/**
+ * call-seq:
+ *   Prism::profile_file(filepath, **options) -> nil
+ *
+ * Parse the given file and return nothing. This method is meant to allow
+ * profilers to avoid the overhead of reifying the AST to Ruby. For supported
+ * options, see Prism::parse.
+ */
+static VALUE
+profile_file(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+    VALUE encoded_filepath;
+    file_options(argc, argv, &input, &options, &encoded_filepath);
+    profile_input(&input, &options);
+    pm_string_free(&input);
+    pm_options_free(&options);
+    return Qnil;
+}
 /**
  * An implementation of fgets that is suitable for use with Ruby IO objects.
  */
@@ -773,8 +890,8 @@ parse_stream_fgets(char *string, int size, void *stream) {
         return NULL;
     }
-    const char *cstr = StringValueCStr(line);
-    size_t length = strlen(cstr);
+    const char *cstr = RSTRING_PTR(line);
+    long length = RSTRING_LEN(line);
     memcpy(string, cstr, length);
     string[length] = '\0';
@@ -815,27 +932,6 @@ parse_stream(int argc, VALUE *argv, VALUE self) {
     return result;
 }
-/**
- * call-seq:
- *   Prism::parse_file(filepath, **options) -> ParseResult
- *
- * Parse the given file and return a ParseResult instance. For supported
- * options, see Prism::parse.
- */
-static VALUE
-parse_file(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
-    file_options(argc, argv, &input, &options);
-    VALUE value = parse_input(&input, &options);
-    pm_string_free(&input);
-    pm_options_free(&options);
-    return value;
-}
 /**
  * Parse the given input and return an array of Comment objects.
  */
@@ -888,7 +984,8 @@ parse_file_comments(int argc, VALUE *argv, VALUE self) {
     pm_string_t input;
     pm_options_t options = { 0 };
-    file_options(argc, argv, &input, &options);
+    VALUE encoded_filepath;
+    file_options(argc, argv, &input, &options, &encoded_filepath);
     VALUE value = parse_input_comments(&input, &options);
     pm_string_free(&input);
@@ -899,9 +996,9 @@ parse_file_comments(int argc, VALUE *argv, VALUE self) {
 /**
  * call-seq:
- *   Prism::parse_lex(source, **options) -> ParseResult
+ *   Prism::parse_lex(source, **options) -> ParseLexResult
  *
- * Parse the given string and return a ParseResult instance that contains a
+ * Parse the given string and return a ParseLexResult instance that contains a
  * 2-element array, where the first element is the AST and the second element is
  * an array of Token instances.
  *
@@ -926,9 +1023,9 @@ parse_lex(int argc, VALUE *argv, VALUE self) {
 /**
  * call-seq:
- *   Prism::parse_lex_file(filepath, **options) -> ParseResult
+ *   Prism::parse_lex_file(filepath, **options) -> ParseLexResult
  *
- * Parse the given file and return a ParseResult instance that contains a
+ * Parse the given file and return a ParseLexResult instance that contains a
  * 2-element array, where the first element is the AST and the second element is
  * an array of Token instances.
  *
@@ -943,7 +1040,8 @@ parse_lex_file(int argc, VALUE *argv, VALUE self) {
     pm_string_t input;
     pm_options_t options = { 0 };
-    file_options(argc, argv, &input, &options);
+    VALUE encoded_filepath;
+    file_options(argc, argv, &input, &options, &encoded_filepath);
     VALUE value = parse_lex_input(&input, &options, true);
     pm_string_free(&input);
@@ -1013,7 +1111,8 @@ parse_file_success_p(int argc, VALUE *argv, VALUE self) {
     pm_string_t input;
     pm_options_t options = { 0 };
-    file_options(argc, argv, &input, &options);
+    VALUE encoded_filepath;
+    file_options(argc, argv, &input, &options, &encoded_filepath);
     VALUE result = parse_input_success_p(&input, &options);
     pm_string_free(&input);
@@ -1034,303 +1133,6 @@ parse_file_failure_p(int argc, VALUE *argv, VALUE self) {
     return RTEST(parse_file_success_p(argc, argv, self)) ? Qfalse : Qtrue;
 }
-/******************************************************************************/
-/* Utility functions exposed to make testing easier                           */
-/******************************************************************************/
-/**
- * call-seq:
- *   Debug::named_captures(source) -> Array
- *
- * Returns an array of strings corresponding to the named capture groups in the
- * given source string. If prism was unable to parse the regular expression,
- * this function returns nil.
- */
-static VALUE
-named_captures(VALUE self, VALUE source) {
-    pm_string_list_t string_list = { 0 };
-    if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, PM_ENCODING_UTF_8_ENTRY)) {
-        pm_string_list_free(&string_list);
-        return Qnil;
-    }
-    VALUE names = rb_ary_new();
-    for (size_t index = 0; index < string_list.length; index++) {
-        const pm_string_t *string = &string_list.strings[index];
-        rb_ary_push(names, rb_str_new((const char *) pm_string_source(string), pm_string_length(string)));
-    }
-    pm_string_list_free(&string_list);
-    return names;
-}
-/**
- * call-seq:
- *   Debug::integer_parse(source) -> [Integer, String]
- *
- * Parses the given source string and returns the integer it represents, as well
- * as a decimal string representation.
- */
-static VALUE
-integer_parse(VALUE self, VALUE source) {
-    const uint8_t *start = (const uint8_t *) RSTRING_PTR(source);
-    size_t length = RSTRING_LEN(source);
-    pm_integer_t integer = { 0 };
-    pm_integer_parse(&integer, PM_INTEGER_BASE_UNKNOWN, start, start + length);
-    pm_buffer_t buffer = { 0 };
-    pm_integer_string(&buffer, &integer);
-    VALUE string = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
-    pm_buffer_free(&buffer);
-    VALUE result = rb_ary_new_capa(2);
-    rb_ary_push(result, pm_integer_new(&integer));
-    rb_ary_push(result, string);
-    pm_integer_free(&integer);
-    return result;
-}
-/**
- * call-seq:
- *   Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx }
- *
- * Return a hash of information about the given source string's memory usage.
- */
-static VALUE
-memsize(VALUE self, VALUE string) {
-    pm_parser_t parser;
-    size_t length = RSTRING_LEN(string);
-    pm_parser_init(&parser, (const uint8_t *) RSTRING_PTR(string), length, NULL);
-    pm_node_t *node = pm_parse(&parser);
-    pm_memsize_t memsize;
-    pm_node_memsize(node, &memsize);
-    pm_node_destroy(&parser, node);
-    pm_parser_free(&parser);
-    VALUE result = rb_hash_new();
-    rb_hash_aset(result, ID2SYM(rb_intern("length")), INT2FIX(length));
-    rb_hash_aset(result, ID2SYM(rb_intern("memsize")), INT2FIX(memsize.memsize));
-    rb_hash_aset(result, ID2SYM(rb_intern("node_count")), INT2FIX(memsize.node_count));
-    return result;
-}
-/**
- * call-seq:
- *   Debug::profile_file(filepath) -> nil
- *
- * Parse the file, but do nothing with the result. This is used to profile the
- * parser for memory and speed.
- */
-static VALUE
-profile_file(VALUE self, VALUE filepath) {
-    pm_string_t input;
-    const char *checked = check_string(filepath);
-    Check_Type(filepath, T_STRING);
-    if (!pm_string_mapped_init(&input, checked)) {
-#ifdef _WIN32
-        int e = rb_w32_map_errno(GetLastError());
-#else
-        int e = errno;
-#endif
-        rb_syserr_fail(e, checked);
-    }
-    pm_options_t options = { 0 };
-    pm_options_filepath_set(&options, checked);
-    pm_parser_t parser;
-    pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
-    pm_node_t *node = pm_parse(&parser);
-    pm_node_destroy(&parser, node);
-    pm_parser_free(&parser);
-    pm_options_free(&options);
-    pm_string_free(&input);
-    return Qnil;
-}
-#ifndef PRISM_EXCLUDE_PRETTYPRINT
-/**
- * call-seq:
- *   Debug::inspect_node(source) -> inspected
- *
- * Inspect the AST that represents the given source using the prism pretty print
- * as opposed to the Ruby implementation.
- */
-static VALUE
-inspect_node(VALUE self, VALUE source) {
-    pm_string_t input;
-    input_load_string(&input, source);
-    pm_parser_t parser;
-    pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL);
-    pm_node_t *node = pm_parse(&parser);
-    pm_buffer_t buffer = { 0 };
-    pm_prettyprint(&buffer, &parser, node);
-    rb_encoding *encoding = rb_enc_find(parser.encoding->name);
-    VALUE string = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
-    pm_buffer_free(&buffer);
-    pm_node_destroy(&parser, node);
-    pm_parser_free(&parser);
-    return string;
-}
-#endif
-/**
- * call-seq:
- *   Debug::format_errors(source, colorize) -> String
- *
- * Format the errors that are found when parsing the given source string.
- */
-static VALUE
-format_errors(VALUE self, VALUE source, VALUE colorize) {
-    pm_string_t input;
-    input_load_string(&input, source);
-    pm_parser_t parser;
-    pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL);
-    pm_node_t *node = pm_parse(&parser);
-    pm_buffer_t buffer = { 0 };
-    pm_parser_errors_format(&parser, &parser.error_list, &buffer, RTEST(colorize), true);
-    rb_encoding *encoding = rb_enc_find(parser.encoding->name);
-    VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
-    pm_buffer_free(&buffer);
-    pm_node_destroy(&parser, node);
-    pm_parser_free(&parser);
-    pm_string_free(&input);
-    return result;
-}
-/**
- * call-seq:
- *   Debug::static_inspect(source) -> String
- *
- * Inspect the node as it would be inspected by the warnings used in static
- * literal sets.
- */
-static VALUE
-static_inspect(int argc, VALUE *argv, VALUE self) {
-    pm_string_t input;
-    pm_options_t options = { 0 };
-    string_options(argc, argv, &input, &options);
-    pm_parser_t parser;
-    pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
-    pm_node_t *program = pm_parse(&parser);
-    pm_node_t *node = ((pm_program_node_t *) program)->statements->body.nodes[0];
-    pm_buffer_t buffer = { 0 };
-    pm_static_literal_inspect(&buffer, &parser.newline_list, parser.start_line, parser.encoding->name, node);
-    rb_encoding *encoding = rb_enc_find(parser.encoding->name);
-    VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
-    pm_buffer_free(&buffer);
-    pm_node_destroy(&parser, program);
-    pm_parser_free(&parser);
-    pm_string_free(&input);
-    pm_options_free(&options);
-    return result;
-}
-/**
- * call-seq: Debug::Encoding.all -> Array[Debug::Encoding]
- *
- * Return an array of all of the encodings that prism knows about.
- */
-static VALUE
-encoding_all(VALUE self) {
-    VALUE encodings = rb_ary_new();
-    for (size_t index = 0; index < PM_ENCODING_MAXIMUM; index++) {
-        const pm_encoding_t *encoding = &pm_encodings[index];
-        VALUE encoding_argv[] = { rb_str_new_cstr(encoding->name), encoding->multibyte ? Qtrue : Qfalse };
-        rb_ary_push(encodings, rb_class_new_instance(2, encoding_argv, rb_cPrismDebugEncoding));
-    }
-    return encodings;
-}
-static const pm_encoding_t *
-encoding_find(VALUE name) {
-    const uint8_t *source = (const uint8_t *) RSTRING_PTR(name);
-    size_t length = RSTRING_LEN(name);
-    const pm_encoding_t *encoding = pm_encoding_find(source, source + length);
-    if (encoding == NULL) { rb_raise(rb_eArgError, "Unknown encoding: %s", source); }
-    return encoding;
-}
-/**
- * call-seq: Debug::Encoding.width(source) -> Integer
- *
- * Returns the width of the first character in the given string if it is valid
- * in the encoding. If it is not, this function returns 0.
- */
-static VALUE
-encoding_char_width(VALUE self, VALUE name, VALUE value) {
-    return ULONG2NUM(encoding_find(name)->char_width((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)));
-}
-/**
- * call-seq: Debug::Encoding.alnum?(source) -> true | false
- *
- * Returns true if the first character in the given string is an alphanumeric
- * character in the encoding.
- */
-static VALUE
-encoding_alnum_char(VALUE self, VALUE name, VALUE value) {
-    return encoding_find(name)->alnum_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse;
-}
-/**
- * call-seq: Debug::Encoding.alpha?(source) -> true | false
- *
- * Returns true if the first character in the given string is an alphabetic
- * character in the encoding.
- */
-static VALUE
-encoding_alpha_char(VALUE self, VALUE name, VALUE value) {
-    return encoding_find(name)->alpha_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse;
-}
-/**
- * call-seq: Debug::Encoding.upper?(source) -> true | false
- *
- * Returns true if the first character in the given string is an uppercase
- * character in the encoding.
- */
-static VALUE
-encoding_isupper_char(VALUE self, VALUE name, VALUE value) {
-    return encoding_find(name)->isupper_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) ? Qtrue : Qfalse;
-}
 /******************************************************************************/
 /* Initialization of the extension                                            */
 /******************************************************************************/
@@ -1364,22 +1166,23 @@ Init_prism(void) {
     rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
     rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
     rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
     rb_cPrismResult = rb_define_class_under(rb_cPrism, "Result", rb_cObject);
     rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cPrismResult);
+    rb_cPrismLexResult = rb_define_class_under(rb_cPrism, "LexResult", rb_cPrismResult);
     rb_cPrismParseLexResult = rb_define_class_under(rb_cPrism, "ParseLexResult", rb_cPrismResult);
-    // Intern all of the options that we support so that we don't have to do it
-    // every time we parse.
-    rb_option_id_command_line = rb_intern_const("command_line");
-    rb_option_id_encoding = rb_intern_const("encoding");
-    rb_option_id_filepath = rb_intern_const("filepath");
-    rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
-    rb_option_id_line = rb_intern_const("line");
-    rb_option_id_scopes = rb_intern_const("scopes");
-    rb_option_id_version = rb_intern_const("version");
-    rb_prism_source_id_for = rb_intern("for");
+    // Intern all of the IDs eagerly that we support so that we don't have to do
+    // it every time we parse.
+    rb_id_option_command_line = rb_intern_const("command_line");
+    rb_id_option_encoding = rb_intern_const("encoding");
+    rb_id_option_filepath = rb_intern_const("filepath");
+    rb_id_option_frozen_string_literal = rb_intern_const("frozen_string_literal");
+    rb_id_option_line = rb_intern_const("line");
+    rb_id_option_main_script = rb_intern_const("main_script");
+    rb_id_option_partial_script = rb_intern_const("partial_script");
+    rb_id_option_scopes = rb_intern_const("scopes");
+    rb_id_option_version = rb_intern_const("version");
+    rb_id_source_for = rb_intern("for");
     /**
      * The version of the prism library.
@@ -1390,8 +1193,10 @@ Init_prism(void) {
     rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
     rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
     rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
-    rb_define_singleton_method(rb_cPrism, "parse_stream", parse_stream, -1);
     rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
+    rb_define_singleton_method(rb_cPrism, "profile", profile, -1);
+    rb_define_singleton_method(rb_cPrism, "profile_file", profile_file, -1);
+    rb_define_singleton_method(rb_cPrism, "parse_stream", parse_stream, -1);
     rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
     rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
     rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
@@ -1406,29 +1211,6 @@ Init_prism(void) {
     rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
 #endif
-    // Next, the functions that will be called by the parser to perform various
-    // internal tasks. We expose these to make them easier to test.
-    VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug");
-    rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
-    rb_define_singleton_method(rb_cPrismDebug, "integer_parse", integer_parse, 1);
-    rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
-    rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
-    rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 2);
-    rb_define_singleton_method(rb_cPrismDebug, "static_inspect", static_inspect, -1);
-#ifndef PRISM_EXCLUDE_PRETTYPRINT
-    rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
-#endif
-    // Next, define the functions that are exposed through the private
-    // Debug::Encoding class.
-    rb_cPrismDebugEncoding = rb_define_class_under(rb_cPrismDebug, "Encoding", rb_cObject);
-    rb_define_singleton_method(rb_cPrismDebugEncoding, "all", encoding_all, 0);
-    rb_define_singleton_method(rb_cPrismDebugEncoding, "_width", encoding_char_width, 2);
-    rb_define_singleton_method(rb_cPrismDebugEncoding, "_alnum?", encoding_alnum_char, 2);
-    rb_define_singleton_method(rb_cPrismDebugEncoding, "_alpha?", encoding_alpha_char, 2);
-    rb_define_singleton_method(rb_cPrismDebugEncoding, "_upper?", encoding_isupper_char, 2);
     // Next, initialize the other APIs.
     Init_prism_api_node();
     Init_prism_pack();