prism 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -1
- data/Makefile +6 -0
- data/README.md +1 -1
- data/config.yml +50 -35
- data/docs/fuzzing.md +1 -1
- data/docs/serialization.md +28 -29
- data/ext/prism/api_node.c +802 -770
- data/ext/prism/api_pack.c +20 -9
- data/ext/prism/extension.c +464 -162
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +3173 -763
- data/include/prism/defines.h +32 -9
- data/include/prism/diagnostic.h +36 -3
- data/include/prism/enc/pm_encoding.h +118 -28
- data/include/prism/node.h +38 -13
- data/include/prism/options.h +204 -0
- data/include/prism/pack.h +44 -33
- data/include/prism/parser.h +445 -200
- data/include/prism/prettyprint.h +12 -1
- data/include/prism/regexp.h +16 -2
- data/include/prism/util/pm_buffer.h +94 -16
- data/include/prism/util/pm_char.h +162 -48
- data/include/prism/util/pm_constant_pool.h +126 -32
- data/include/prism/util/pm_list.h +68 -38
- data/include/prism/util/pm_memchr.h +18 -3
- data/include/prism/util/pm_newline_list.h +70 -27
- data/include/prism/util/pm_state_stack.h +25 -7
- data/include/prism/util/pm_string.h +115 -27
- data/include/prism/util/pm_string_list.h +25 -6
- data/include/prism/util/pm_strncasecmp.h +32 -0
- data/include/prism/util/pm_strpbrk.h +31 -17
- data/include/prism/version.h +27 -2
- data/include/prism.h +224 -31
- data/lib/prism/compiler.rb +6 -3
- data/lib/prism/debug.rb +23 -7
- data/lib/prism/dispatcher.rb +33 -18
- data/lib/prism/dsl.rb +10 -5
- data/lib/prism/ffi.rb +132 -80
- data/lib/prism/lex_compat.rb +25 -15
- data/lib/prism/mutation_compiler.rb +10 -5
- data/lib/prism/node.rb +370 -135
- data/lib/prism/node_ext.rb +1 -1
- data/lib/prism/node_inspector.rb +1 -1
- data/lib/prism/pack.rb +79 -40
- data/lib/prism/parse_result/comments.rb +7 -2
- data/lib/prism/parse_result/newlines.rb +4 -0
- data/lib/prism/parse_result.rb +150 -30
- data/lib/prism/pattern.rb +11 -0
- data/lib/prism/ripper_compat.rb +28 -10
- data/lib/prism/serialize.rb +86 -54
- data/lib/prism/visitor.rb +10 -3
- data/lib/prism.rb +20 -2
- data/prism.gemspec +4 -2
- data/rbi/prism.rbi +104 -60
- data/rbi/prism_static.rbi +16 -2
- data/sig/prism.rbs +72 -43
- data/sig/prism_static.rbs +14 -1
- data/src/diagnostic.c +56 -53
- data/src/enc/pm_big5.c +1 -0
- data/src/enc/pm_euc_jp.c +1 -0
- data/src/enc/pm_gbk.c +1 -0
- data/src/enc/pm_shift_jis.c +1 -0
- data/src/enc/pm_tables.c +316 -80
- data/src/enc/pm_unicode.c +53 -8
- data/src/enc/pm_windows_31j.c +1 -0
- data/src/node.c +334 -321
- data/src/options.c +170 -0
- data/src/prettyprint.c +74 -47
- data/src/prism.c +1642 -856
- data/src/regexp.c +151 -95
- data/src/serialize.c +44 -20
- data/src/token_type.c +3 -1
- data/src/util/pm_buffer.c +45 -15
- data/src/util/pm_char.c +103 -57
- data/src/util/pm_constant_pool.c +51 -21
- data/src/util/pm_list.c +12 -4
- data/src/util/pm_memchr.c +5 -3
- data/src/util/pm_newline_list.c +20 -12
- data/src/util/pm_state_stack.c +9 -3
- data/src/util/pm_string.c +95 -85
- data/src/util/pm_string_list.c +14 -15
- data/src/util/pm_strncasecmp.c +10 -3
- data/src/util/pm_strpbrk.c +25 -19
- metadata +5 -3
- data/docs/prism.png +0 -0
    
        data/ext/prism/extension.c
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            #include "prism/extension.h"
         | 
| 2 2 |  | 
| 3 | 
            -
            // NOTE: this file should contain only bindings.
         | 
| 4 | 
            -
            //  | 
| 3 | 
            +
            // NOTE: this file should contain only bindings. All non-trivial logic should be
         | 
| 4 | 
            +
            // in librubyparser so it can be shared its the various callers.
         | 
| 5 5 |  | 
| 6 6 | 
             
            VALUE rb_cPrism;
         | 
| 7 7 | 
             
            VALUE rb_cPrismNode;
         | 
| @@ -10,18 +10,30 @@ VALUE rb_cPrismToken; | |
| 10 10 | 
             
            VALUE rb_cPrismLocation;
         | 
| 11 11 |  | 
| 12 12 | 
             
            VALUE rb_cPrismComment;
         | 
| 13 | 
            +
            VALUE rb_cPrismInlineComment;
         | 
| 14 | 
            +
            VALUE rb_cPrismEmbDocComment;
         | 
| 15 | 
            +
            VALUE rb_cPrismDATAComment;
         | 
| 13 16 | 
             
            VALUE rb_cPrismMagicComment;
         | 
| 14 17 | 
             
            VALUE rb_cPrismParseError;
         | 
| 15 18 | 
             
            VALUE rb_cPrismParseWarning;
         | 
| 16 19 | 
             
            VALUE rb_cPrismParseResult;
         | 
| 17 20 |  | 
| 21 | 
            +
            ID rb_option_id_filepath;
         | 
| 22 | 
            +
            ID rb_option_id_encoding;
         | 
| 23 | 
            +
            ID rb_option_id_line;
         | 
| 24 | 
            +
            ID rb_option_id_frozen_string_literal;
         | 
| 25 | 
            +
            ID rb_option_id_verbose;
         | 
| 26 | 
            +
            ID rb_option_id_scopes;
         | 
| 27 | 
            +
             | 
| 18 28 | 
             
            /******************************************************************************/
         | 
| 19 29 | 
             
            /* IO of Ruby code                                                            */
         | 
| 20 30 | 
             
            /******************************************************************************/
         | 
| 21 31 |  | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 32 | 
            +
            /**
         | 
| 33 | 
            +
             * Check if the given VALUE is a string. If it's nil, then return NULL. If it's
         | 
| 34 | 
            +
             * not a string, then raise a type error. Otherwise return the VALUE as a C
         | 
| 35 | 
            +
             * string.
         | 
| 36 | 
            +
             */
         | 
| 25 37 | 
             
            static const char *
         | 
| 26 38 | 
             
            check_string(VALUE value) {
         | 
| 27 39 | 
             
                // If the value is nil, then we don't need to do anything.
         | 
| @@ -38,7 +50,9 @@ check_string(VALUE value) { | |
| 38 50 | 
             
                return RSTRING_PTR(value);
         | 
| 39 51 | 
             
            }
         | 
| 40 52 |  | 
| 41 | 
            -
             | 
| 53 | 
            +
            /**
         | 
| 54 | 
            +
             * Load the contents and size of the given string into the given pm_string_t.
         | 
| 55 | 
            +
             */
         | 
| 42 56 | 
             
            static void
         | 
| 43 57 | 
             
            input_load_string(pm_string_t *input, VALUE string) {
         | 
| 44 58 | 
             
                // Check if the string is a string. If it's not, then raise a type error.
         | 
| @@ -49,20 +63,181 @@ input_load_string(pm_string_t *input, VALUE string) { | |
| 49 63 | 
             
                pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
         | 
| 50 64 | 
             
            }
         | 
| 51 65 |  | 
| 66 | 
            +
            /******************************************************************************/
         | 
| 67 | 
            +
            /* Building C options from Ruby options                                       */
         | 
| 68 | 
            +
            /******************************************************************************/
         | 
| 69 | 
            +
             | 
| 70 | 
            +
            /**
         | 
| 71 | 
            +
             * Build the scopes associated with the provided Ruby keyword value.
         | 
| 72 | 
            +
             */
         | 
| 73 | 
            +
            static void
         | 
| 74 | 
            +
            build_options_scopes(pm_options_t *options, VALUE scopes) {
         | 
| 75 | 
            +
                // Check if the value is an array. If it's not, then raise a type error.
         | 
| 76 | 
            +
                if (!RB_TYPE_P(scopes, T_ARRAY)) {
         | 
| 77 | 
            +
                    rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scopes));
         | 
| 78 | 
            +
                }
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                // Initialize the scopes array.
         | 
| 81 | 
            +
                size_t scopes_count = RARRAY_LEN(scopes);
         | 
| 82 | 
            +
                pm_options_scopes_init(options, scopes_count);
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                // Iterate over the scopes and add them to the options.
         | 
| 85 | 
            +
                for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
         | 
| 86 | 
            +
                    VALUE scope = rb_ary_entry(scopes, scope_index);
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                    // Check that the scope is an array. If it's not, then raise a type
         | 
| 89 | 
            +
                    // error.
         | 
| 90 | 
            +
                    if (!RB_TYPE_P(scope, T_ARRAY)) {
         | 
| 91 | 
            +
                        rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scope));
         | 
| 92 | 
            +
                    }
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                    // Initialize the scope array.
         | 
| 95 | 
            +
                    size_t locals_count = RARRAY_LEN(scope);
         | 
| 96 | 
            +
                    pm_options_scope_t *options_scope = &options->scopes[scope_index];
         | 
| 97 | 
            +
                    pm_options_scope_init(options_scope, locals_count);
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                    // Iterate over the locals and add them to the scope.
         | 
| 100 | 
            +
                    for (size_t local_index = 0; local_index < locals_count; local_index++) {
         | 
| 101 | 
            +
                        VALUE local = rb_ary_entry(scope, local_index);
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                        // Check that the local is a symbol. If it's not, then raise a
         | 
| 104 | 
            +
                        // type error.
         | 
| 105 | 
            +
                        if (!RB_TYPE_P(local, T_SYMBOL)) {
         | 
| 106 | 
            +
                            rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(local));
         | 
| 107 | 
            +
                        }
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                        // Add the local to the scope.
         | 
| 110 | 
            +
                        pm_string_t *scope_local = &options_scope->locals[local_index];
         | 
| 111 | 
            +
                        const char *name = rb_id2name(SYM2ID(local));
         | 
| 112 | 
            +
                        pm_string_constant_init(scope_local, name, strlen(name));
         | 
| 113 | 
            +
                    }
         | 
| 114 | 
            +
                }
         | 
| 115 | 
            +
            }
         | 
| 116 | 
            +
             | 
| 117 | 
            +
            /**
         | 
| 118 | 
            +
             * An iterator function that is called for each key-value in the keywords hash.
         | 
| 119 | 
            +
             */
         | 
| 120 | 
            +
            static int
         | 
| 121 | 
            +
            build_options_i(VALUE key, VALUE value, VALUE argument) {
         | 
| 122 | 
            +
                pm_options_t *options = (pm_options_t *) argument;
         | 
| 123 | 
            +
                ID key_id = SYM2ID(key);
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                if (key_id == rb_option_id_filepath) {
         | 
| 126 | 
            +
                    if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
         | 
| 127 | 
            +
                } else if (key_id == rb_option_id_encoding) {
         | 
| 128 | 
            +
                    if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
         | 
| 129 | 
            +
                } else if (key_id == rb_option_id_line) {
         | 
| 130 | 
            +
                    if (!NIL_P(value)) pm_options_line_set(options, NUM2UINT(value));
         | 
| 131 | 
            +
                } else if (key_id == rb_option_id_frozen_string_literal) {
         | 
| 132 | 
            +
                    if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
         | 
| 133 | 
            +
                } else if (key_id == rb_option_id_verbose) {
         | 
| 134 | 
            +
                    pm_options_suppress_warnings_set(options, value != Qtrue);
         | 
| 135 | 
            +
                } else if (key_id == rb_option_id_scopes) {
         | 
| 136 | 
            +
                    if (!NIL_P(value)) build_options_scopes(options, value);
         | 
| 137 | 
            +
                } else {
         | 
| 138 | 
            +
                    rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key);
         | 
| 139 | 
            +
                }
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                return ST_CONTINUE;
         | 
| 142 | 
            +
            }
         | 
| 143 | 
            +
             | 
| 144 | 
            +
            /**
         | 
| 145 | 
            +
             * We need a struct here to pass through rb_protect and it has to be a single
         | 
| 146 | 
            +
             * value. Because the sizeof(VALUE) == sizeof(void *), we're going to pass this
         | 
| 147 | 
            +
             * through as an opaque pointer and cast it on both sides.
         | 
| 148 | 
            +
             */
         | 
| 149 | 
            +
            struct build_options_data {
         | 
| 150 | 
            +
                pm_options_t *options;
         | 
| 151 | 
            +
                VALUE keywords;
         | 
| 152 | 
            +
            };
         | 
| 153 | 
            +
             | 
| 154 | 
            +
            /**
         | 
| 155 | 
            +
             * Build the set of options from the given keywords. Note that this can raise a
         | 
| 156 | 
            +
             * Ruby error if the options are not valid.
         | 
| 157 | 
            +
             */
         | 
| 158 | 
            +
            static VALUE
         | 
| 159 | 
            +
            build_options(VALUE argument) {
         | 
| 160 | 
            +
                struct build_options_data *data = (struct build_options_data *) argument;
         | 
| 161 | 
            +
                rb_hash_foreach(data->keywords, build_options_i, (VALUE) data->options);
         | 
| 162 | 
            +
                return Qnil;
         | 
| 163 | 
            +
            }
         | 
| 164 | 
            +
             | 
| 165 | 
            +
            /**
         | 
| 166 | 
            +
             * Extract the options from the given keyword arguments.
         | 
| 167 | 
            +
             */
         | 
| 168 | 
            +
            static void
         | 
| 169 | 
            +
            extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
         | 
| 170 | 
            +
                if (!NIL_P(keywords)) {
         | 
| 171 | 
            +
                    struct build_options_data data = { .options = options, .keywords = keywords };
         | 
| 172 | 
            +
                    struct build_options_data *argument = &data;
         | 
| 173 | 
            +
             | 
| 174 | 
            +
                    int state = 0;
         | 
| 175 | 
            +
                    rb_protect(build_options, (VALUE) argument, &state);
         | 
| 176 | 
            +
             | 
| 177 | 
            +
                    if (state != 0) {
         | 
| 178 | 
            +
                        pm_options_free(options);
         | 
| 179 | 
            +
                        rb_jump_tag(state);
         | 
| 180 | 
            +
                    }
         | 
| 181 | 
            +
                }
         | 
| 182 | 
            +
             | 
| 183 | 
            +
                if (!NIL_P(filepath)) {
         | 
| 184 | 
            +
                    if (!RB_TYPE_P(filepath, T_STRING)) {
         | 
| 185 | 
            +
                        pm_options_free(options);
         | 
| 186 | 
            +
                        rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
         | 
| 187 | 
            +
                    }
         | 
| 188 | 
            +
             | 
| 189 | 
            +
                    pm_options_filepath_set(options, RSTRING_PTR(filepath));
         | 
| 190 | 
            +
                }
         | 
| 191 | 
            +
            }
         | 
| 192 | 
            +
             | 
| 193 | 
            +
            /**
         | 
| 194 | 
            +
             * Read options for methods that look like (source, **options).
         | 
| 195 | 
            +
             */
         | 
| 196 | 
            +
            static void
         | 
| 197 | 
            +
            string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
         | 
| 198 | 
            +
                VALUE string;
         | 
| 199 | 
            +
                VALUE keywords;
         | 
| 200 | 
            +
                rb_scan_args(argc, argv, "1:", &string, &keywords);
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                extract_options(options, Qnil, keywords);
         | 
| 203 | 
            +
                input_load_string(input, string);
         | 
| 204 | 
            +
            }
         | 
| 205 | 
            +
             | 
| 206 | 
            +
            /**
         | 
| 207 | 
            +
             * Read options for methods that look like (filepath, **options).
         | 
| 208 | 
            +
             */
         | 
| 209 | 
            +
            static bool
         | 
| 210 | 
            +
            file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
         | 
| 211 | 
            +
                VALUE filepath;
         | 
| 212 | 
            +
                VALUE keywords;
         | 
| 213 | 
            +
                rb_scan_args(argc, argv, "1:", &filepath, &keywords);
         | 
| 214 | 
            +
             | 
| 215 | 
            +
                extract_options(options, filepath, keywords);
         | 
| 216 | 
            +
             | 
| 217 | 
            +
                if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) {
         | 
| 218 | 
            +
                    pm_options_free(options);
         | 
| 219 | 
            +
                    return false;
         | 
| 220 | 
            +
                }
         | 
| 221 | 
            +
             | 
| 222 | 
            +
                return true;
         | 
| 223 | 
            +
            }
         | 
| 224 | 
            +
             | 
| 52 225 | 
             
            /******************************************************************************/
         | 
| 53 226 | 
             
            /* Serializing the AST                                                        */
         | 
| 54 227 | 
             
            /******************************************************************************/
         | 
| 55 228 |  | 
| 56 | 
            -
             | 
| 229 | 
            +
            /**
         | 
| 230 | 
            +
             * Dump the AST corresponding to the given input to a string.
         | 
| 231 | 
            +
             */
         | 
| 57 232 | 
             
            static VALUE
         | 
| 58 | 
            -
            dump_input(pm_string_t *input, const  | 
| 233 | 
            +
            dump_input(pm_string_t *input, const pm_options_t *options) {
         | 
| 59 234 | 
             
                pm_buffer_t buffer;
         | 
| 60 235 | 
             
                if (!pm_buffer_init(&buffer)) {
         | 
| 61 236 | 
             
                    rb_raise(rb_eNoMemError, "failed to allocate memory");
         | 
| 62 237 | 
             
                }
         | 
| 63 238 |  | 
| 64 239 | 
             
                pm_parser_t parser;
         | 
| 65 | 
            -
                pm_parser_init(&parser, pm_string_source(input), pm_string_length(input),  | 
| 240 | 
            +
                pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
         | 
| 66 241 |  | 
| 67 242 | 
             
                pm_node_t *node = pm_parse(&parser);
         | 
| 68 243 | 
             
                pm_serialize(&parser, node, &buffer);
         | 
| @@ -75,15 +250,18 @@ dump_input(pm_string_t *input, const char *filepath) { | |
| 75 250 | 
             
                return result;
         | 
| 76 251 | 
             
            }
         | 
| 77 252 |  | 
| 78 | 
            -
             | 
| 253 | 
            +
            /**
         | 
| 254 | 
            +
             * call-seq:
         | 
| 255 | 
            +
             *   Prism::dump(source, **options) -> String
         | 
| 256 | 
            +
             *
         | 
| 257 | 
            +
             * Dump the AST corresponding to the given string to a string. For supported
         | 
| 258 | 
            +
             * options, see Prism::parse.
         | 
| 259 | 
            +
             */
         | 
| 79 260 | 
             
            static VALUE
         | 
| 80 261 | 
             
            dump(int argc, VALUE *argv, VALUE self) {
         | 
| 81 | 
            -
                VALUE string;
         | 
| 82 | 
            -
                VALUE filepath;
         | 
| 83 | 
            -
                rb_scan_args(argc, argv, "11", &string, &filepath);
         | 
| 84 | 
            -
             | 
| 85 262 | 
             
                pm_string_t input;
         | 
| 86 | 
            -
                 | 
| 263 | 
            +
                pm_options_t options = { 0 };
         | 
| 264 | 
            +
                string_options(argc, argv, &input, &options);
         | 
| 87 265 |  | 
| 88 266 | 
             
            #ifdef PRISM_DEBUG_MODE_BUILD
         | 
| 89 267 | 
             
                size_t length = pm_string_length(&input);
         | 
| @@ -92,25 +270,34 @@ dump(int argc, VALUE *argv, VALUE self) { | |
| 92 270 | 
             
                pm_string_constant_init(&input, dup, length);
         | 
| 93 271 | 
             
            #endif
         | 
| 94 272 |  | 
| 95 | 
            -
                VALUE value = dump_input(&input,  | 
| 273 | 
            +
                VALUE value = dump_input(&input, &options);
         | 
| 96 274 |  | 
| 97 275 | 
             
            #ifdef PRISM_DEBUG_MODE_BUILD
         | 
| 98 276 | 
             
                free(dup);
         | 
| 99 277 | 
             
            #endif
         | 
| 100 278 |  | 
| 279 | 
            +
                pm_string_free(&input);
         | 
| 280 | 
            +
                pm_options_free(&options);
         | 
| 281 | 
            +
             | 
| 101 282 | 
             
                return value;
         | 
| 102 283 | 
             
            }
         | 
| 103 284 |  | 
| 104 | 
            -
             | 
| 285 | 
            +
            /**
         | 
| 286 | 
            +
             * call-seq:
         | 
| 287 | 
            +
             *   Prism::dump_file(filepath, **options) -> String
         | 
| 288 | 
            +
             *
         | 
| 289 | 
            +
             * Dump the AST corresponding to the given file to a string. For supported
         | 
| 290 | 
            +
             * options, see Prism::parse.
         | 
| 291 | 
            +
             */
         | 
| 105 292 | 
             
            static VALUE
         | 
| 106 | 
            -
            dump_file(VALUE  | 
| 293 | 
            +
            dump_file(int argc, VALUE *argv, VALUE self) {
         | 
| 107 294 | 
             
                pm_string_t input;
         | 
| 295 | 
            +
                pm_options_t options = { 0 };
         | 
| 296 | 
            +
                if (!file_options(argc, argv, &input, &options)) return Qnil;
         | 
| 108 297 |  | 
| 109 | 
            -
                 | 
| 110 | 
            -
                if (!pm_string_mapped_init(&input, checked)) return Qnil;
         | 
| 111 | 
            -
             | 
| 112 | 
            -
                VALUE value = dump_input(&input, checked);
         | 
| 298 | 
            +
                VALUE value = dump_input(&input, &options);
         | 
| 113 299 | 
             
                pm_string_free(&input);
         | 
| 300 | 
            +
                pm_options_free(&options);
         | 
| 114 301 |  | 
| 115 302 | 
             
                return value;
         | 
| 116 303 | 
             
            }
         | 
| @@ -119,7 +306,9 @@ dump_file(VALUE self, VALUE filepath) { | |
| 119 306 | 
             
            /* Extracting values for the parse result                                     */
         | 
| 120 307 | 
             
            /******************************************************************************/
         | 
| 121 308 |  | 
| 122 | 
            -
             | 
| 309 | 
            +
            /**
         | 
| 310 | 
            +
             * Extract the comments out of the parser into an array.
         | 
| 311 | 
            +
             */
         | 
| 123 312 | 
             
            static VALUE
         | 
| 124 313 | 
             
            parser_comments(pm_parser_t *parser, VALUE source) {
         | 
| 125 314 | 
             
                VALUE comments = rb_ary_new();
         | 
| @@ -134,27 +323,26 @@ parser_comments(pm_parser_t *parser, VALUE source) { | |
| 134 323 | 
             
                    VALUE type;
         | 
| 135 324 | 
             
                    switch (comment->type) {
         | 
| 136 325 | 
             
                        case PM_COMMENT_INLINE:
         | 
| 137 | 
            -
                            type =  | 
| 326 | 
            +
                            type = rb_cPrismInlineComment;
         | 
| 138 327 | 
             
                            break;
         | 
| 139 328 | 
             
                        case PM_COMMENT_EMBDOC:
         | 
| 140 | 
            -
                            type =  | 
| 329 | 
            +
                            type = rb_cPrismEmbDocComment;
         | 
| 141 330 | 
             
                            break;
         | 
| 142 331 | 
             
                        case PM_COMMENT___END__:
         | 
| 143 | 
            -
                            type =  | 
| 144 | 
            -
                            break;
         | 
| 145 | 
            -
                        default:
         | 
| 146 | 
            -
                            type = ID2SYM(rb_intern("inline"));
         | 
| 332 | 
            +
                            type = rb_cPrismDATAComment;
         | 
| 147 333 | 
             
                            break;
         | 
| 148 334 | 
             
                    }
         | 
| 149 335 |  | 
| 150 | 
            -
                    VALUE comment_argv[] = {  | 
| 151 | 
            -
                    rb_ary_push(comments, rb_class_new_instance( | 
| 336 | 
            +
                    VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
         | 
| 337 | 
            +
                    rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
         | 
| 152 338 | 
             
                }
         | 
| 153 339 |  | 
| 154 340 | 
             
                return comments;
         | 
| 155 341 | 
             
            }
         | 
| 156 342 |  | 
| 157 | 
            -
             | 
| 343 | 
            +
            /**
         | 
| 344 | 
            +
             * Extract the magic comments out of the parser into an array.
         | 
| 345 | 
            +
             */
         | 
| 158 346 | 
             
            static VALUE
         | 
| 159 347 | 
             
            parser_magic_comments(pm_parser_t *parser, VALUE source) {
         | 
| 160 348 | 
             
                VALUE magic_comments = rb_ary_new();
         | 
| @@ -183,7 +371,9 @@ parser_magic_comments(pm_parser_t *parser, VALUE source) { | |
| 183 371 | 
             
                return magic_comments;
         | 
| 184 372 | 
             
            }
         | 
| 185 373 |  | 
| 186 | 
            -
             | 
| 374 | 
            +
            /**
         | 
| 375 | 
            +
             * Extract the errors out of the parser into an array.
         | 
| 376 | 
            +
             */
         | 
| 187 377 | 
             
            static VALUE
         | 
| 188 378 | 
             
            parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
         | 
| 189 379 | 
             
                VALUE errors = rb_ary_new();
         | 
| @@ -207,7 +397,9 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) { | |
| 207 397 | 
             
                return errors;
         | 
| 208 398 | 
             
            }
         | 
| 209 399 |  | 
| 210 | 
            -
             | 
| 400 | 
            +
            /**
         | 
| 401 | 
            +
             * Extract the warnings out of the parser into an array.
         | 
| 402 | 
            +
             */
         | 
| 211 403 | 
             
            static VALUE
         | 
| 212 404 | 
             
            parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
         | 
| 213 405 | 
             
                VALUE warnings = rb_ary_new();
         | 
| @@ -235,18 +427,22 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) { | |
| 235 427 | 
             
            /* Lexing Ruby code                                                           */
         | 
| 236 428 | 
             
            /******************************************************************************/
         | 
| 237 429 |  | 
| 238 | 
            -
             | 
| 239 | 
            -
             | 
| 240 | 
            -
             | 
| 430 | 
            +
            /**
         | 
| 431 | 
            +
             * This struct gets stored in the parser and passed in to the lex callback any
         | 
| 432 | 
            +
             * time a new token is found. We use it to store the necessary information to
         | 
| 433 | 
            +
             * initialize a Token instance.
         | 
| 434 | 
            +
             */
         | 
| 241 435 | 
             
            typedef struct {
         | 
| 242 436 | 
             
                VALUE source;
         | 
| 243 437 | 
             
                VALUE tokens;
         | 
| 244 438 | 
             
                rb_encoding *encoding;
         | 
| 245 439 | 
             
            } parse_lex_data_t;
         | 
| 246 440 |  | 
| 247 | 
            -
             | 
| 248 | 
            -
             | 
| 249 | 
            -
             | 
| 441 | 
            +
            /**
         | 
| 442 | 
            +
             * This is passed as a callback to the parser. It gets called every time a new
         | 
| 443 | 
            +
             * token is found. Once found, we initialize a new instance of Token and push it
         | 
| 444 | 
            +
             * onto the tokens array.
         | 
| 445 | 
            +
             */
         | 
| 250 446 | 
             
            static void
         | 
| 251 447 | 
             
            parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
         | 
| 252 448 | 
             
                parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
         | 
| @@ -258,9 +454,11 @@ parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) { | |
| 258 454 | 
             
                rb_ary_push(parse_lex_data->tokens, yields);
         | 
| 259 455 | 
             
            }
         | 
| 260 456 |  | 
| 261 | 
            -
             | 
| 262 | 
            -
             | 
| 263 | 
            -
             | 
| 457 | 
            +
            /**
         | 
| 458 | 
            +
             * This is called whenever the encoding changes based on the magic comment at
         | 
| 459 | 
            +
             * the top of the file. We use it to update the encoding that we are using to
         | 
| 460 | 
            +
             * create tokens.
         | 
| 461 | 
            +
             */
         | 
| 264 462 | 
             
            static void
         | 
| 265 463 | 
             
            parse_lex_encoding_changed_callback(pm_parser_t *parser) {
         | 
| 266 464 | 
             
                parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
         | 
| @@ -281,17 +479,19 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) { | |
| 281 479 | 
             
                }
         | 
| 282 480 | 
             
            }
         | 
| 283 481 |  | 
| 284 | 
            -
             | 
| 285 | 
            -
             | 
| 482 | 
            +
            /**
         | 
| 483 | 
            +
             * Parse the given input and return a ParseResult containing just the tokens or
         | 
| 484 | 
            +
             * the nodes and tokens.
         | 
| 485 | 
            +
             */
         | 
| 286 486 | 
             
            static VALUE
         | 
| 287 | 
            -
            parse_lex_input(pm_string_t *input, const  | 
| 487 | 
            +
            parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) {
         | 
| 288 488 | 
             
                pm_parser_t parser;
         | 
| 289 | 
            -
                pm_parser_init(&parser, pm_string_source(input), pm_string_length(input),  | 
| 489 | 
            +
                pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
         | 
| 290 490 | 
             
                pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
         | 
| 291 491 |  | 
| 292 492 | 
             
                VALUE offsets = rb_ary_new();
         | 
| 293 | 
            -
                VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), offsets };
         | 
| 294 | 
            -
                VALUE source = rb_class_new_instance( | 
| 493 | 
            +
                VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
         | 
| 494 | 
            +
                VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
         | 
| 295 495 |  | 
| 296 496 | 
             
                parse_lex_data_t parse_lex_data = {
         | 
| 297 497 | 
             
                    .source = source,
         | 
| @@ -338,29 +538,42 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) { | |
| 338 538 | 
             
                return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
         | 
| 339 539 | 
             
            }
         | 
| 340 540 |  | 
| 341 | 
            -
             | 
| 541 | 
            +
            /**
         | 
| 542 | 
            +
             * call-seq:
         | 
| 543 | 
            +
             *   Prism::lex(source, **options) -> Array
         | 
| 544 | 
            +
             *
         | 
| 545 | 
            +
             * Return an array of Token instances corresponding to the given string. For
         | 
| 546 | 
            +
             * supported options, see Prism::parse.
         | 
| 547 | 
            +
             */
         | 
| 342 548 | 
             
            static VALUE
         | 
| 343 549 | 
             
            lex(int argc, VALUE *argv, VALUE self) {
         | 
| 344 | 
            -
                VALUE string;
         | 
| 345 | 
            -
                VALUE filepath;
         | 
| 346 | 
            -
                rb_scan_args(argc, argv, "11", &string, &filepath);
         | 
| 347 | 
            -
             | 
| 348 550 | 
             
                pm_string_t input;
         | 
| 349 | 
            -
                 | 
| 551 | 
            +
                pm_options_t options = { 0 };
         | 
| 552 | 
            +
                string_options(argc, argv, &input, &options);
         | 
| 350 553 |  | 
| 351 | 
            -
                 | 
| 554 | 
            +
                VALUE result = parse_lex_input(&input, &options, false);
         | 
| 555 | 
            +
                pm_string_free(&input);
         | 
| 556 | 
            +
                pm_options_free(&options);
         | 
| 557 | 
            +
             | 
| 558 | 
            +
                return result;
         | 
| 352 559 | 
             
            }
         | 
| 353 560 |  | 
| 354 | 
            -
             | 
| 561 | 
            +
            /**
         | 
| 562 | 
            +
             * call-seq:
         | 
| 563 | 
            +
             *   Prism::lex_file(filepath, **options) -> Array
         | 
| 564 | 
            +
             *
         | 
| 565 | 
            +
             * Return an array of Token instances corresponding to the given file. For
         | 
| 566 | 
            +
             * supported options, see Prism::parse.
         | 
| 567 | 
            +
             */
         | 
| 355 568 | 
             
            static VALUE
         | 
| 356 | 
            -
            lex_file(VALUE  | 
| 569 | 
            +
            lex_file(int argc, VALUE *argv, VALUE self) {
         | 
| 357 570 | 
             
                pm_string_t input;
         | 
| 571 | 
            +
                pm_options_t options = { 0 };
         | 
| 572 | 
            +
                if (!file_options(argc, argv, &input, &options)) return Qnil;
         | 
| 358 573 |  | 
| 359 | 
            -
                 | 
| 360 | 
            -
                if (!pm_string_mapped_init(&input, checked)) return Qnil;
         | 
| 361 | 
            -
             | 
| 362 | 
            -
                VALUE value = parse_lex_input(&input, checked, false);
         | 
| 574 | 
            +
                VALUE value = parse_lex_input(&input, &options, false);
         | 
| 363 575 | 
             
                pm_string_free(&input);
         | 
| 576 | 
            +
                pm_options_free(&options);
         | 
| 364 577 |  | 
| 365 578 | 
             
                return value;
         | 
| 366 579 | 
             
            }
         | 
| @@ -369,11 +582,13 @@ lex_file(VALUE self, VALUE filepath) { | |
| 369 582 | 
             
            /* Parsing Ruby code                                                          */
         | 
| 370 583 | 
             
            /******************************************************************************/
         | 
| 371 584 |  | 
| 372 | 
            -
             | 
| 585 | 
            +
            /**
         | 
| 586 | 
            +
             * Parse the given input and return a ParseResult instance.
         | 
| 587 | 
            +
             */
         | 
| 373 588 | 
             
            static VALUE
         | 
| 374 | 
            -
            parse_input(pm_string_t *input, const  | 
| 589 | 
            +
            parse_input(pm_string_t *input, const pm_options_t *options) {
         | 
| 375 590 | 
             
                pm_parser_t parser;
         | 
| 376 | 
            -
                pm_parser_init(&parser, pm_string_source(input), pm_string_length(input),  | 
| 591 | 
            +
                pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
         | 
| 377 592 |  | 
| 378 593 | 
             
                pm_node_t *node = pm_parse(&parser);
         | 
| 379 594 | 
             
                rb_encoding *encoding = rb_enc_find(parser.encoding.name);
         | 
| @@ -396,33 +611,31 @@ parse_input(pm_string_t *input, const char *filepath) { | |
| 396 611 | 
             
                return result;
         | 
| 397 612 | 
             
            }
         | 
| 398 613 |  | 
| 399 | 
            -
             | 
| 400 | 
            -
             | 
| 401 | 
            -
             | 
| 402 | 
            -
             | 
| 403 | 
            -
             | 
| 404 | 
            -
             | 
| 405 | 
            -
             | 
| 406 | 
            -
             | 
| 407 | 
            -
             | 
| 408 | 
            -
             | 
| 409 | 
            -
             | 
| 410 | 
            -
             | 
| 411 | 
            -
             | 
| 412 | 
            -
             | 
| 413 | 
            -
             | 
| 414 | 
            -
             | 
| 415 | 
            -
             | 
| 416 | 
            -
             | 
| 417 | 
            -
             | 
| 614 | 
            +
            /**
         | 
| 615 | 
            +
             * call-seq:
         | 
| 616 | 
            +
             *   Prism::parse(source, **options) -> ParseResult
         | 
| 617 | 
            +
             *
         | 
| 618 | 
            +
             * Parse the given string and return a ParseResult instance. The options that
         | 
| 619 | 
            +
             * are supported are:
         | 
| 620 | 
            +
             *
         | 
| 621 | 
            +
             * * `filepath` - the filepath of the source being parsed. This should be a
         | 
| 622 | 
            +
             *       string or nil
         | 
| 623 | 
            +
             * * `encoding` - the encoding of the source being parsed. This should be an
         | 
| 624 | 
            +
             *       encoding or nil
         | 
| 625 | 
            +
             * * `line` - the line number that the parse starts on. This should be an
         | 
| 626 | 
            +
             *       integer or nil. Note that this is 1-indexed.
         | 
| 627 | 
            +
             * * `frozen_string_literal` - whether or not the frozen string literal pragma
         | 
| 628 | 
            +
             *       has been set. This should be a boolean or nil.
         | 
| 629 | 
            +
             * * `verbose` - the current level of verbosity. This controls whether or not
         | 
| 630 | 
            +
             *       the parser emits warnings. This should be a boolean or nil.
         | 
| 631 | 
            +
             * * `scopes` - the locals that are in scope surrounding the code that is being
         | 
| 632 | 
            +
             *       parsed. This should be an array of arrays of symbols or nil.
         | 
| 633 | 
            +
             */
         | 
| 418 634 | 
             
            static VALUE
         | 
| 419 635 | 
             
            parse(int argc, VALUE *argv, VALUE self) {
         | 
| 420 | 
            -
                VALUE string;
         | 
| 421 | 
            -
                VALUE filepath;
         | 
| 422 | 
            -
                rb_scan_args(argc, argv, "11", &string, &filepath);
         | 
| 423 | 
            -
             | 
| 424 636 | 
             
                pm_string_t input;
         | 
| 425 | 
            -
                 | 
| 637 | 
            +
                pm_options_t options = { 0 };
         | 
| 638 | 
            +
                string_options(argc, argv, &input, &options);
         | 
| 426 639 |  | 
| 427 640 | 
             
            #ifdef PRISM_DEBUG_MODE_BUILD
         | 
| 428 641 | 
             
                size_t length = pm_string_length(&input);
         | 
| @@ -431,82 +644,147 @@ parse(int argc, VALUE *argv, VALUE self) { | |
| 431 644 | 
             
                pm_string_constant_init(&input, dup, length);
         | 
| 432 645 | 
             
            #endif
         | 
| 433 646 |  | 
| 434 | 
            -
                VALUE value = parse_input(&input,  | 
| 647 | 
            +
                VALUE value = parse_input(&input, &options);
         | 
| 435 648 |  | 
| 436 649 | 
             
            #ifdef PRISM_DEBUG_MODE_BUILD
         | 
| 437 650 | 
             
                free(dup);
         | 
| 438 651 | 
             
            #endif
         | 
| 439 652 |  | 
| 653 | 
            +
                pm_string_free(&input);
         | 
| 654 | 
            +
                pm_options_free(&options);
         | 
| 440 655 | 
             
                return value;
         | 
| 441 656 | 
             
            }
         | 
| 442 657 |  | 
| 443 | 
            -
             | 
| 658 | 
            +
            /**
         | 
| 659 | 
            +
             * call-seq:
         | 
| 660 | 
            +
             *   Prism::parse_file(filepath, **options) -> ParseResult
         | 
| 661 | 
            +
             *
         | 
| 662 | 
            +
             * Parse the given file and return a ParseResult instance. For supported
         | 
| 663 | 
            +
             * options, see Prism::parse.
         | 
| 664 | 
            +
             */
         | 
| 444 665 | 
             
            static VALUE
         | 
| 445 | 
            -
            parse_file(VALUE  | 
| 666 | 
            +
            parse_file(int argc, VALUE *argv, VALUE self) {
         | 
| 446 667 | 
             
                pm_string_t input;
         | 
| 668 | 
            +
                pm_options_t options = { 0 };
         | 
| 669 | 
            +
                if (!file_options(argc, argv, &input, &options)) return Qnil;
         | 
| 447 670 |  | 
| 448 | 
            -
                 | 
| 449 | 
            -
                if (!pm_string_mapped_init(&input, checked)) return Qnil;
         | 
| 450 | 
            -
             | 
| 451 | 
            -
                VALUE value = parse_input(&input, checked);
         | 
| 671 | 
            +
                VALUE value = parse_input(&input, &options);
         | 
| 452 672 | 
             
                pm_string_free(&input);
         | 
| 673 | 
            +
                pm_options_free(&options);
         | 
| 453 674 |  | 
| 454 675 | 
             
                return value;
         | 
| 455 676 | 
             
            }
         | 
| 456 677 |  | 
| 457 | 
            -
             | 
| 678 | 
            +
            /**
         | 
| 679 | 
            +
             * Parse the given input and return an array of Comment objects.
         | 
| 680 | 
            +
             */
         | 
| 458 681 | 
             
            static VALUE
         | 
| 459 | 
            -
             | 
| 460 | 
            -
                 | 
| 461 | 
            -
                 | 
| 462 | 
            -
                rb_scan_args(argc, argv, "11", &string, &filepath);
         | 
| 682 | 
            +
            parse_input_comments(pm_string_t *input, const pm_options_t *options) {
         | 
| 683 | 
            +
                pm_parser_t parser;
         | 
| 684 | 
            +
                pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
         | 
| 463 685 |  | 
| 464 | 
            -
                 | 
| 465 | 
            -
                 | 
| 686 | 
            +
                pm_node_t *node = pm_parse(&parser);
         | 
| 687 | 
            +
                rb_encoding *encoding = rb_enc_find(parser.encoding.name);
         | 
| 688 | 
            +
             | 
| 689 | 
            +
                VALUE source = pm_source_new(&parser, encoding);
         | 
| 690 | 
            +
                VALUE comments = parser_comments(&parser, source);
         | 
| 466 691 |  | 
| 467 | 
            -
                 | 
| 692 | 
            +
                pm_node_destroy(&parser, node);
         | 
| 693 | 
            +
                pm_parser_free(&parser);
         | 
| 694 | 
            +
             | 
| 695 | 
            +
                return comments;
         | 
| 468 696 | 
             
            }
         | 
| 469 697 |  | 
| 470 | 
            -
             | 
| 698 | 
            +
            /**
         | 
| 699 | 
            +
             * call-seq:
         | 
| 700 | 
            +
             *   Prism::parse_comments(source, **options) -> Array
         | 
| 701 | 
            +
             *
         | 
| 702 | 
            +
             * Parse the given string and return an array of Comment objects. For supported
         | 
| 703 | 
            +
             * options, see Prism::parse.
         | 
| 704 | 
            +
             */
         | 
| 471 705 | 
             
            static VALUE
         | 
| 472 | 
            -
             | 
| 706 | 
            +
            parse_comments(int argc, VALUE *argv, VALUE self) {
         | 
| 473 707 | 
             
                pm_string_t input;
         | 
| 708 | 
            +
                pm_options_t options = { 0 };
         | 
| 709 | 
            +
                string_options(argc, argv, &input, &options);
         | 
| 474 710 |  | 
| 475 | 
            -
                 | 
| 476 | 
            -
                 | 
| 711 | 
            +
                VALUE result = parse_input_comments(&input, &options);
         | 
| 712 | 
            +
                pm_string_free(&input);
         | 
| 713 | 
            +
                pm_options_free(&options);
         | 
| 477 714 |  | 
| 478 | 
            -
                 | 
| 715 | 
            +
                return result;
         | 
| 716 | 
            +
            }
         | 
| 717 | 
            +
             | 
| 718 | 
            +
            /**
         | 
| 719 | 
            +
             * call-seq:
         | 
| 720 | 
            +
             *   Prism::parse_file_comments(filepath, **options) -> Array
         | 
| 721 | 
            +
             *
         | 
| 722 | 
            +
             * Parse the given file and return an array of Comment objects. For supported
         | 
| 723 | 
            +
             * options, see Prism::parse.
         | 
| 724 | 
            +
             */
         | 
| 725 | 
            +
            static VALUE
         | 
| 726 | 
            +
            parse_file_comments(int argc, VALUE *argv, VALUE self) {
         | 
| 727 | 
            +
                pm_string_t input;
         | 
| 728 | 
            +
                pm_options_t options = { 0 };
         | 
| 729 | 
            +
                if (!file_options(argc, argv, &input, &options)) return Qnil;
         | 
| 730 | 
            +
             | 
| 731 | 
            +
                VALUE value = parse_input_comments(&input, &options);
         | 
| 479 732 | 
             
                pm_string_free(&input);
         | 
| 733 | 
            +
                pm_options_free(&options);
         | 
| 480 734 |  | 
| 481 735 | 
             
                return value;
         | 
| 482 736 | 
             
            }
         | 
| 483 737 |  | 
| 484 | 
            -
             | 
| 738 | 
            +
            /**
         | 
| 739 | 
            +
             * call-seq:
         | 
| 740 | 
            +
             *   Prism::parse_lex(source, **options) -> ParseResult
         | 
| 741 | 
            +
             *
         | 
| 742 | 
            +
             * Parse the given string and return a ParseResult instance that contains a
         | 
| 743 | 
            +
             * 2-element array, where the first element is the AST and the second element is
         | 
| 744 | 
            +
             * an array of Token instances.
         | 
| 745 | 
            +
             *
         | 
| 746 | 
            +
             * This API is only meant to be used in the case where you need both the AST and
         | 
| 747 | 
            +
             * the tokens. If you only need one or the other, use either Prism::parse or
         | 
| 748 | 
            +
             * Prism::lex.
         | 
| 749 | 
            +
             *
         | 
| 750 | 
            +
             * For supported options, see Prism::parse.
         | 
| 751 | 
            +
             */
         | 
| 485 752 | 
             
            static VALUE
         | 
| 486 753 | 
             
            parse_lex(int argc, VALUE *argv, VALUE self) {
         | 
| 487 | 
            -
                VALUE string;
         | 
| 488 | 
            -
                VALUE filepath;
         | 
| 489 | 
            -
                rb_scan_args(argc, argv, "11", &string, &filepath);
         | 
| 490 | 
            -
             | 
| 491 754 | 
             
                pm_string_t input;
         | 
| 492 | 
            -
                 | 
| 755 | 
            +
                pm_options_t options = { 0 };
         | 
| 756 | 
            +
                string_options(argc, argv, &input, &options);
         | 
| 493 757 |  | 
| 494 | 
            -
                VALUE value = parse_lex_input(&input,  | 
| 758 | 
            +
                VALUE value = parse_lex_input(&input, &options, true);
         | 
| 495 759 | 
             
                pm_string_free(&input);
         | 
| 760 | 
            +
                pm_options_free(&options);
         | 
| 496 761 |  | 
| 497 762 | 
             
                return value;
         | 
| 498 763 | 
             
            }
         | 
| 499 764 |  | 
| 500 | 
            -
             | 
| 765 | 
            +
            /**
         | 
| 766 | 
            +
             * call-seq:
         | 
| 767 | 
            +
             *   Prism::parse_lex_file(filepath, **options) -> ParseResult
         | 
| 768 | 
            +
             *
         | 
| 769 | 
            +
             * Parse the given file and return a ParseResult instance that contains a
         | 
| 770 | 
            +
             * 2-element array, where the first element is the AST and the second element is
         | 
| 771 | 
            +
             * an array of Token instances.
         | 
| 772 | 
            +
             *
         | 
| 773 | 
            +
             * This API is only meant to be used in the case where you need both the AST and
         | 
| 774 | 
            +
             * the tokens. If you only need one or the other, use either Prism::parse_file
         | 
| 775 | 
            +
             * or Prism::lex_file.
         | 
| 776 | 
            +
             *
         | 
| 777 | 
            +
             * For supported options, see Prism::parse.
         | 
| 778 | 
            +
             */
         | 
| 501 779 | 
             
            static VALUE
         | 
| 502 | 
            -
            parse_lex_file(VALUE  | 
| 780 | 
            +
            parse_lex_file(int argc, VALUE *argv, VALUE self) {
         | 
| 503 781 | 
             
                pm_string_t input;
         | 
| 782 | 
            +
                pm_options_t options = { 0 };
         | 
| 783 | 
            +
                if (!file_options(argc, argv, &input, &options)) return Qnil;
         | 
| 504 784 |  | 
| 505 | 
            -
                 | 
| 506 | 
            -
                if (!pm_string_mapped_init(&input, checked)) return Qnil;
         | 
| 507 | 
            -
             | 
| 508 | 
            -
                VALUE value = parse_lex_input(&input, checked, true);
         | 
| 785 | 
            +
                VALUE value = parse_lex_input(&input, &options, true);
         | 
| 509 786 | 
             
                pm_string_free(&input);
         | 
| 787 | 
            +
                pm_options_free(&options);
         | 
| 510 788 |  | 
| 511 789 | 
             
                return value;
         | 
| 512 790 | 
             
            }
         | 
| @@ -515,13 +793,17 @@ parse_lex_file(VALUE self, VALUE filepath) { | |
| 515 793 | 
             
            /* Utility functions exposed to make testing easier                           */
         | 
| 516 794 | 
             
            /******************************************************************************/
         | 
| 517 795 |  | 
| 518 | 
            -
             | 
| 519 | 
            -
             | 
| 520 | 
            -
             | 
| 796 | 
            +
            /**
         | 
| 797 | 
            +
             * call-seq:
         | 
| 798 | 
            +
             *   Debug::named_captures(source) -> Array
         | 
| 799 | 
            +
             *
         | 
| 800 | 
            +
             * Returns an array of strings corresponding to the named capture groups in the
         | 
| 801 | 
            +
             * given source string. If prism was unable to parse the regular expression,
         | 
| 802 | 
            +
             * this function returns nil.
         | 
| 803 | 
            +
             */
         | 
| 521 804 | 
             
            static VALUE
         | 
| 522 805 | 
             
            named_captures(VALUE self, VALUE source) {
         | 
| 523 | 
            -
                pm_string_list_t string_list;
         | 
| 524 | 
            -
                pm_string_list_init(&string_list);
         | 
| 806 | 
            +
                pm_string_list_t string_list = { 0 };
         | 
| 525 807 |  | 
| 526 808 | 
             
                if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &pm_encoding_utf_8)) {
         | 
| 527 809 | 
             
                    pm_string_list_free(&string_list);
         | 
| @@ -538,7 +820,12 @@ named_captures(VALUE self, VALUE source) { | |
| 538 820 | 
             
                return names;
         | 
| 539 821 | 
             
            }
         | 
| 540 822 |  | 
| 541 | 
            -
             | 
| 823 | 
            +
            /**
         | 
| 824 | 
            +
             * call-seq:
         | 
| 825 | 
            +
             *   Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx }
         | 
| 826 | 
            +
             *
         | 
| 827 | 
            +
             * Return a hash of information about the given source string's memory usage.
         | 
| 828 | 
            +
             */
         | 
| 542 829 | 
             
            static VALUE
         | 
| 543 830 | 
             
            memsize(VALUE self, VALUE string) {
         | 
| 544 831 | 
             
                pm_parser_t parser;
         | 
| @@ -559,8 +846,13 @@ memsize(VALUE self, VALUE string) { | |
| 559 846 | 
             
                return result;
         | 
| 560 847 | 
             
            }
         | 
| 561 848 |  | 
| 562 | 
            -
             | 
| 563 | 
            -
             | 
| 849 | 
            +
            /**
         | 
| 850 | 
            +
             * call-seq:
         | 
| 851 | 
            +
             *   Debug::profile_file(filepath) -> nil
         | 
| 852 | 
            +
             *
         | 
| 853 | 
            +
             * Parse the file, but do nothing with the result. This is used to profile the
         | 
| 854 | 
            +
             * parser for memory and speed.
         | 
| 855 | 
            +
             */
         | 
| 564 856 | 
             
            static VALUE
         | 
| 565 857 | 
             
            profile_file(VALUE self, VALUE filepath) {
         | 
| 566 858 | 
             
                pm_string_t input;
         | 
| @@ -568,39 +860,28 @@ profile_file(VALUE self, VALUE filepath) { | |
| 568 860 | 
             
                const char *checked = check_string(filepath);
         | 
| 569 861 | 
             
                if (!pm_string_mapped_init(&input, checked)) return Qnil;
         | 
| 570 862 |  | 
| 863 | 
            +
                pm_options_t options = { 0 };
         | 
| 864 | 
            +
                pm_options_filepath_set(&options, checked);
         | 
| 865 | 
            +
             | 
| 571 866 | 
             
                pm_parser_t parser;
         | 
| 572 | 
            -
                pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input),  | 
| 867 | 
            +
                pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
         | 
| 573 868 |  | 
| 574 869 | 
             
                pm_node_t *node = pm_parse(&parser);
         | 
| 575 870 | 
             
                pm_node_destroy(&parser, node);
         | 
| 576 871 | 
             
                pm_parser_free(&parser);
         | 
| 577 | 
            -
             | 
| 872 | 
            +
                pm_options_free(&options);
         | 
| 578 873 | 
             
                pm_string_free(&input);
         | 
| 579 874 |  | 
| 580 875 | 
             
                return Qnil;
         | 
| 581 876 | 
             
            }
         | 
| 582 877 |  | 
| 583 | 
            -
             | 
| 584 | 
            -
             | 
| 585 | 
            -
             | 
| 586 | 
            -
             | 
| 587 | 
            -
             | 
| 588 | 
            -
             | 
| 589 | 
            -
             | 
| 590 | 
            -
             | 
| 591 | 
            -
                const char *checked = check_string(filepath);
         | 
| 592 | 
            -
                if (!pm_string_mapped_init(&input, checked)) return Qnil;
         | 
| 593 | 
            -
             | 
| 594 | 
            -
                pm_parse_serialize(pm_string_source(&input), pm_string_length(&input), &buffer, check_string(metadata));
         | 
| 595 | 
            -
                VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
         | 
| 596 | 
            -
             | 
| 597 | 
            -
                pm_string_free(&input);
         | 
| 598 | 
            -
                pm_buffer_free(&buffer);
         | 
| 599 | 
            -
                return result;
         | 
| 600 | 
            -
            }
         | 
| 601 | 
            -
             | 
| 602 | 
            -
            // Inspect the AST that represents the given source using the prism pretty print
         | 
| 603 | 
            -
            // as opposed to the Ruby implementation.
         | 
| 878 | 
            +
            /**
         | 
| 879 | 
            +
             * call-seq:
         | 
| 880 | 
            +
             *   Debug::inspect_node(source) -> inspected
         | 
| 881 | 
            +
             *
         | 
| 882 | 
            +
             * Inspect the AST that represents the given source using the prism pretty print
         | 
| 883 | 
            +
             * as opposed to the Ruby implementation.
         | 
| 884 | 
            +
             */
         | 
| 604 885 | 
             
            static VALUE
         | 
| 605 886 | 
             
            inspect_node(VALUE self, VALUE source) {
         | 
| 606 887 | 
             
                pm_string_t input;
         | 
| @@ -628,6 +909,9 @@ inspect_node(VALUE self, VALUE source) { | |
| 628 909 | 
             
            /* Initialization of the extension                                            */
         | 
| 629 910 | 
             
            /******************************************************************************/
         | 
| 630 911 |  | 
| 912 | 
            +
            /**
         | 
| 913 | 
            +
             * The init function that Ruby calls when loading this extension.
         | 
| 914 | 
            +
             */
         | 
| 631 915 | 
             
            RUBY_FUNC_EXPORTED void
         | 
| 632 916 | 
             
            Init_prism(void) {
         | 
| 633 917 | 
             
                // Make sure that the prism library version matches the expected version.
         | 
| @@ -649,27 +933,46 @@ Init_prism(void) { | |
| 649 933 | 
             
                rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
         | 
| 650 934 | 
             
                rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
         | 
| 651 935 | 
             
                rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
         | 
| 936 | 
            +
                rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment);
         | 
| 937 | 
            +
                rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment);
         | 
| 938 | 
            +
                rb_cPrismDATAComment = rb_define_class_under(rb_cPrism, "DATAComment", rb_cPrismComment);
         | 
| 652 939 | 
             
                rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
         | 
| 653 940 | 
             
                rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
         | 
| 654 941 | 
             
                rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
         | 
| 655 942 | 
             
                rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
         | 
| 656 943 |  | 
| 657 | 
            -
                //  | 
| 658 | 
            -
                //  | 
| 944 | 
            +
                // Intern all of the options that we support so that we don't have to do it
         | 
| 945 | 
            +
                // every time we parse.
         | 
| 946 | 
            +
                rb_option_id_filepath = rb_intern_const("filepath");
         | 
| 947 | 
            +
                rb_option_id_encoding = rb_intern_const("encoding");
         | 
| 948 | 
            +
                rb_option_id_line = rb_intern_const("line");
         | 
| 949 | 
            +
                rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
         | 
| 950 | 
            +
                rb_option_id_verbose = rb_intern_const("verbose");
         | 
| 951 | 
            +
                rb_option_id_scopes = rb_intern_const("scopes");
         | 
| 952 | 
            +
             | 
| 953 | 
            +
                /**
         | 
| 954 | 
            +
                 * The version of the prism library.
         | 
| 955 | 
            +
                 */
         | 
| 659 956 | 
             
                rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
         | 
| 660 | 
            -
             | 
| 957 | 
            +
             | 
| 958 | 
            +
                /**
         | 
| 959 | 
            +
                 * The backend of the parser that prism is using to parse Ruby code. This
         | 
| 960 | 
            +
                 * can be either :CEXT or :FFI. On runtimes that support C extensions, we
         | 
| 961 | 
            +
                 * default to :CEXT. Otherwise we use :FFI.
         | 
| 962 | 
            +
                 */
         | 
| 963 | 
            +
                rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CEXT")));
         | 
| 661 964 |  | 
| 662 965 | 
             
                // First, the functions that have to do with lexing and parsing.
         | 
| 663 966 | 
             
                rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
         | 
| 664 | 
            -
                rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, 1);
         | 
| 967 | 
            +
                rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
         | 
| 665 968 | 
             
                rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
         | 
| 666 | 
            -
                rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, 1);
         | 
| 969 | 
            +
                rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
         | 
| 667 970 | 
             
                rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
         | 
| 668 | 
            -
                rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, 1);
         | 
| 971 | 
            +
                rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
         | 
| 669 972 | 
             
                rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
         | 
| 670 | 
            -
                rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, 1);
         | 
| 973 | 
            +
                rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
         | 
| 671 974 | 
             
                rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
         | 
| 672 | 
            -
                rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, 1);
         | 
| 975 | 
            +
                rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1);
         | 
| 673 976 |  | 
| 674 977 | 
             
                // Next, the functions that will be called by the parser to perform various
         | 
| 675 978 | 
             
                // internal tasks. We expose these to make them easier to test.
         | 
| @@ -677,7 +980,6 @@ Init_prism(void) { | |
| 677 980 | 
             
                rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
         | 
| 678 981 | 
             
                rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
         | 
| 679 982 | 
             
                rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
         | 
| 680 | 
            -
                rb_define_singleton_method(rb_cPrismDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
         | 
| 681 983 | 
             
                rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
         | 
| 682 984 |  | 
| 683 985 | 
             
                // Next, initialize the other APIs.
         |