yarp 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -1
- data/README.md +4 -3
- data/config.yml +332 -52
- data/docs/configuration.md +1 -0
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +1 -1
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +703 -136
- data/ext/yarp/extension.c +73 -24
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +331 -137
- data/include/yarp/node.h +10 -0
- data/include/yarp/unescape.h +4 -2
- data/include/yarp/util/yp_newline_list.h +3 -0
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +10 -0
- data/lib/yarp/desugar_visitor.rb +267 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3042 -508
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +48 -2
- data/src/node.c +421 -185
- data/src/prettyprint.c +262 -80
- data/src/serialize.c +410 -270
- data/src/token_type.c +2 -2
- data/src/unescape.c +69 -51
- data/src/util/yp_newline_list.c +10 -0
- data/src/yarp.c +1208 -458
- data/yarp.gemspec +3 -1
- metadata +4 -2
    
        data/ext/yarp/extension.c
    CHANGED
    
    | @@ -198,52 +198,67 @@ typedef struct { | |
| 198 198 | 
             
                VALUE source;
         | 
| 199 199 | 
             
                VALUE tokens;
         | 
| 200 200 | 
             
                rb_encoding *encoding;
         | 
| 201 | 
            -
            }  | 
| 201 | 
            +
            } parse_lex_data_t;
         | 
| 202 202 |  | 
| 203 203 | 
             
            // This is passed as a callback to the parser. It gets called every time a new
         | 
| 204 204 | 
             
            // token is found. Once found, we initialize a new instance of Token and push it
         | 
| 205 205 | 
             
            // onto the tokens array.
         | 
| 206 206 | 
             
            static void
         | 
| 207 | 
            -
             | 
| 208 | 
            -
                 | 
| 207 | 
            +
            parse_lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
         | 
| 208 | 
            +
                parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
         | 
| 209 209 |  | 
| 210 210 | 
             
                VALUE yields = rb_ary_new_capa(2);
         | 
| 211 | 
            -
                rb_ary_push(yields, yp_token_new(parser, token,  | 
| 211 | 
            +
                rb_ary_push(yields, yp_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
         | 
| 212 212 | 
             
                rb_ary_push(yields, INT2FIX(parser->lex_state));
         | 
| 213 213 |  | 
| 214 | 
            -
                rb_ary_push( | 
| 214 | 
            +
                rb_ary_push(parse_lex_data->tokens, yields);
         | 
| 215 215 | 
             
            }
         | 
| 216 216 |  | 
| 217 217 | 
             
            // This is called whenever the encoding changes based on the magic comment at
         | 
| 218 218 | 
             
            // the top of the file. We use it to update the encoding that we are using to
         | 
| 219 219 | 
             
            // create tokens.
         | 
| 220 220 | 
             
            static void
         | 
| 221 | 
            -
             | 
| 222 | 
            -
                 | 
| 223 | 
            -
                 | 
| 221 | 
            +
            parse_lex_encoding_changed_callback(yp_parser_t *parser) {
         | 
| 222 | 
            +
                parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
         | 
| 223 | 
            +
                parse_lex_data->encoding = rb_enc_find(parser->encoding.name);
         | 
| 224 | 
            +
             | 
| 225 | 
            +
                // Since the encoding changed, we need to go back and change the encoding of
         | 
| 226 | 
            +
                // the tokens that were already lexed. This is only going to end up being
         | 
| 227 | 
            +
                // one or two tokens, since the encoding can only change at the top of the
         | 
| 228 | 
            +
                // file.
         | 
| 229 | 
            +
                VALUE tokens = parse_lex_data->tokens;
         | 
| 230 | 
            +
                for (long index = 0; index < RARRAY_LEN(tokens); index++) {
         | 
| 231 | 
            +
                    VALUE yields = rb_ary_entry(tokens, index);
         | 
| 232 | 
            +
                    VALUE token = rb_ary_entry(yields, 0);
         | 
| 233 | 
            +
             | 
| 234 | 
            +
                    VALUE value = rb_ivar_get(token, rb_intern("@value"));
         | 
| 235 | 
            +
                    rb_enc_associate(value, parse_lex_data->encoding);
         | 
| 236 | 
            +
                    ENC_CODERANGE_CLEAR(value);
         | 
| 237 | 
            +
                }
         | 
| 224 238 | 
             
            }
         | 
| 225 239 |  | 
| 226 | 
            -
            //  | 
| 240 | 
            +
            // Parse the given input and return a ParseResult containing just the tokens or
         | 
| 241 | 
            +
            // the nodes and tokens.
         | 
| 227 242 | 
             
            static VALUE
         | 
| 228 | 
            -
             | 
| 243 | 
            +
            parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
         | 
| 229 244 | 
             
                yp_parser_t parser;
         | 
| 230 245 | 
             
                yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
         | 
| 231 | 
            -
                yp_parser_register_encoding_changed_callback(&parser,  | 
| 246 | 
            +
                yp_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
         | 
| 232 247 |  | 
| 233 248 | 
             
                VALUE offsets = rb_ary_new();
         | 
| 234 249 | 
             
                VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets };
         | 
| 235 250 | 
             
                VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
         | 
| 236 251 |  | 
| 237 | 
            -
                 | 
| 252 | 
            +
                parse_lex_data_t parse_lex_data = {
         | 
| 238 253 | 
             
                    .source = source,
         | 
| 239 254 | 
             
                    .tokens = rb_ary_new(),
         | 
| 240 255 | 
             
                    .encoding = rb_utf8_encoding()
         | 
| 241 256 | 
             
                };
         | 
| 242 257 |  | 
| 243 | 
            -
                 | 
| 258 | 
            +
                parse_lex_data_t *data = &parse_lex_data;
         | 
| 244 259 | 
             
                yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
         | 
| 245 260 | 
             
                    .data = (void *) data,
         | 
| 246 | 
            -
                    .callback =  | 
| 261 | 
            +
                    .callback = parse_lex_token,
         | 
| 247 262 | 
             
                };
         | 
| 248 263 |  | 
| 249 264 | 
             
                parser.lex_callback = &lex_callback;
         | 
| @@ -256,20 +271,26 @@ lex_input(yp_string_t *input, const char *filepath) { | |
| 256 271 | 
             
                    rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
         | 
| 257 272 | 
             
                }
         | 
| 258 273 |  | 
| 274 | 
            +
                VALUE value;
         | 
| 275 | 
            +
                if (return_nodes) {
         | 
| 276 | 
            +
                    value = rb_ary_new_capa(2);
         | 
| 277 | 
            +
                    rb_ary_push(value, yp_ast_new(&parser, node, parse_lex_data.encoding));
         | 
| 278 | 
            +
                    rb_ary_push(value, parse_lex_data.tokens);
         | 
| 279 | 
            +
                } else {
         | 
| 280 | 
            +
                    value = parse_lex_data.tokens;
         | 
| 281 | 
            +
                }
         | 
| 282 | 
            +
             | 
| 259 283 | 
             
                VALUE result_argv[] = {
         | 
| 260 | 
            -
                     | 
| 284 | 
            +
                    value,
         | 
| 261 285 | 
             
                    parser_comments(&parser, source),
         | 
| 262 | 
            -
                    parser_errors(&parser,  | 
| 263 | 
            -
                    parser_warnings(&parser,  | 
| 286 | 
            +
                    parser_errors(&parser, parse_lex_data.encoding, source),
         | 
| 287 | 
            +
                    parser_warnings(&parser, parse_lex_data.encoding, source),
         | 
| 264 288 | 
             
                    source
         | 
| 265 289 | 
             
                };
         | 
| 266 290 |  | 
| 267 | 
            -
                VALUE result = rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
         | 
| 268 | 
            -
             | 
| 269 291 | 
             
                yp_node_destroy(&parser, node);
         | 
| 270 292 | 
             
                yp_parser_free(&parser);
         | 
| 271 | 
            -
             | 
| 272 | 
            -
                return result;
         | 
| 293 | 
            +
                return rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
         | 
| 273 294 | 
             
            }
         | 
| 274 295 |  | 
| 275 296 | 
             
            // Return an array of tokens corresponding to the given string.
         | 
| @@ -281,7 +302,8 @@ lex(int argc, VALUE *argv, VALUE self) { | |
| 281 302 |  | 
| 282 303 | 
             
                yp_string_t input;
         | 
| 283 304 | 
             
                input_load_string(&input, string);
         | 
| 284 | 
            -
             | 
| 305 | 
            +
             | 
| 306 | 
            +
                return parse_lex_input(&input, check_string(filepath), false);
         | 
| 285 307 | 
             
            }
         | 
| 286 308 |  | 
| 287 309 | 
             
            // Return an array of tokens corresponding to the given file.
         | 
| @@ -292,7 +314,7 @@ lex_file(VALUE self, VALUE filepath) { | |
| 292 314 | 
             
                const char *checked = check_string(filepath);
         | 
| 293 315 | 
             
                if (!yp_string_mapped_init(&input, checked)) return Qnil;
         | 
| 294 316 |  | 
| 295 | 
            -
                VALUE value =  | 
| 317 | 
            +
                VALUE value = parse_lex_input(&input, checked, false);
         | 
| 296 318 | 
             
                yp_string_free(&input);
         | 
| 297 319 |  | 
| 298 320 | 
             
                return value;
         | 
| @@ -368,6 +390,32 @@ parse_file(VALUE self, VALUE filepath) { | |
| 368 390 | 
             
                return value;
         | 
| 369 391 | 
             
            }
         | 
| 370 392 |  | 
| 393 | 
            +
            // Parse the given string and return a ParseResult instance.
         | 
| 394 | 
            +
            static VALUE
         | 
| 395 | 
            +
            parse_lex(int argc, VALUE *argv, VALUE self) {
         | 
| 396 | 
            +
                VALUE string;
         | 
| 397 | 
            +
                VALUE filepath;
         | 
| 398 | 
            +
                rb_scan_args(argc, argv, "11", &string, &filepath);
         | 
| 399 | 
            +
             | 
| 400 | 
            +
                yp_string_t input;
         | 
| 401 | 
            +
                input_load_string(&input, string);
         | 
| 402 | 
            +
                return parse_lex_input(&input, check_string(filepath), true);
         | 
| 403 | 
            +
            }
         | 
| 404 | 
            +
             | 
| 405 | 
            +
            // Parse and lex the given file and return a ParseResult instance.
         | 
| 406 | 
            +
            static VALUE
         | 
| 407 | 
            +
            parse_lex_file(VALUE self, VALUE filepath) {
         | 
| 408 | 
            +
                yp_string_t input;
         | 
| 409 | 
            +
             | 
| 410 | 
            +
                const char *checked = check_string(filepath);
         | 
| 411 | 
            +
                if (!yp_string_mapped_init(&input, checked)) return Qnil;
         | 
| 412 | 
            +
             | 
| 413 | 
            +
                VALUE value = parse_lex_input(&input, checked, true);
         | 
| 414 | 
            +
                yp_string_free(&input);
         | 
| 415 | 
            +
             | 
| 416 | 
            +
                return value;
         | 
| 417 | 
            +
            }
         | 
| 418 | 
            +
             | 
| 371 419 | 
             
            /******************************************************************************/
         | 
| 372 420 | 
             
            /* Utility functions exposed to make testing easier                           */
         | 
| 373 421 | 
             
            /******************************************************************************/
         | 
| @@ -521,7 +569,6 @@ Init_yarp(void) { | |
| 521 569 | 
             
                // Define the version string here so that we can use the constants defined
         | 
| 522 570 | 
             
                // in yarp.h.
         | 
| 523 571 | 
             
                rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
         | 
| 524 | 
            -
             | 
| 525 572 | 
             
                rb_define_const(rb_cYARP, "BACKEND", ID2SYM(rb_intern("CExtension")));
         | 
| 526 573 |  | 
| 527 574 | 
             
                // First, the functions that have to do with lexing and parsing.
         | 
| @@ -531,6 +578,8 @@ Init_yarp(void) { | |
| 531 578 | 
             
                rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
         | 
| 532 579 | 
             
                rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
         | 
| 533 580 | 
             
                rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
         | 
| 581 | 
            +
                rb_define_singleton_method(rb_cYARP, "parse_lex", parse_lex, -1);
         | 
| 582 | 
            +
                rb_define_singleton_method(rb_cYARP, "parse_lex_file", parse_lex_file, 1);
         | 
| 534 583 |  | 
| 535 584 | 
             
                // Next, the functions that will be called by the parser to perform various
         | 
| 536 585 | 
             
                // internal tasks. We expose these to make them easier to test.
         | 
    
        data/ext/yarp/extension.h
    CHANGED
    
    | @@ -1,12 +1,12 @@ | |
| 1 1 | 
             
            #ifndef YARP_EXT_NODE_H
         | 
| 2 2 | 
             
            #define YARP_EXT_NODE_H
         | 
| 3 3 |  | 
| 4 | 
            +
            #define EXPECTED_YARP_VERSION "0.9.0"
         | 
| 5 | 
            +
             | 
| 4 6 | 
             
            #include <ruby.h>
         | 
| 5 7 | 
             
            #include <ruby/encoding.h>
         | 
| 6 8 | 
             
            #include "yarp.h"
         | 
| 7 9 |  | 
| 8 | 
            -
            #define EXPECTED_YARP_VERSION "0.8.0"
         | 
| 9 | 
            -
             | 
| 10 10 | 
             
            VALUE yp_source_new(yp_parser_t *parser);
         | 
| 11 11 | 
             
            VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
         | 
| 12 12 | 
             
            VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);
         |