yarp 0.8.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -1
- data/README.md +4 -3
- data/config.yml +332 -52
- data/docs/configuration.md +1 -0
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +1 -1
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +703 -136
- data/ext/yarp/extension.c +73 -24
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +331 -137
- data/include/yarp/node.h +10 -0
- data/include/yarp/unescape.h +4 -2
- data/include/yarp/util/yp_newline_list.h +3 -0
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +10 -0
- data/lib/yarp/desugar_visitor.rb +267 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3042 -508
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +48 -2
- data/src/node.c +421 -185
- data/src/prettyprint.c +262 -80
- data/src/serialize.c +410 -270
- data/src/token_type.c +2 -2
- data/src/unescape.c +69 -51
- data/src/util/yp_newline_list.c +10 -0
- data/src/yarp.c +1208 -458
- data/yarp.gemspec +3 -1
- metadata +4 -2
data/ext/yarp/extension.c
CHANGED
@@ -198,52 +198,67 @@ typedef struct {
|
|
198
198
|
VALUE source;
|
199
199
|
VALUE tokens;
|
200
200
|
rb_encoding *encoding;
|
201
|
-
}
|
201
|
+
} parse_lex_data_t;
|
202
202
|
|
203
203
|
// This is passed as a callback to the parser. It gets called every time a new
|
204
204
|
// token is found. Once found, we initialize a new instance of Token and push it
|
205
205
|
// onto the tokens array.
|
206
206
|
static void
|
207
|
-
|
208
|
-
|
207
|
+
parse_lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
|
208
|
+
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
|
209
209
|
|
210
210
|
VALUE yields = rb_ary_new_capa(2);
|
211
|
-
rb_ary_push(yields, yp_token_new(parser, token,
|
211
|
+
rb_ary_push(yields, yp_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
|
212
212
|
rb_ary_push(yields, INT2FIX(parser->lex_state));
|
213
213
|
|
214
|
-
rb_ary_push(
|
214
|
+
rb_ary_push(parse_lex_data->tokens, yields);
|
215
215
|
}
|
216
216
|
|
217
217
|
// This is called whenever the encoding changes based on the magic comment at
|
218
218
|
// the top of the file. We use it to update the encoding that we are using to
|
219
219
|
// create tokens.
|
220
220
|
static void
|
221
|
-
|
222
|
-
|
223
|
-
|
221
|
+
parse_lex_encoding_changed_callback(yp_parser_t *parser) {
|
222
|
+
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
|
223
|
+
parse_lex_data->encoding = rb_enc_find(parser->encoding.name);
|
224
|
+
|
225
|
+
// Since the encoding changed, we need to go back and change the encoding of
|
226
|
+
// the tokens that were already lexed. This is only going to end up being
|
227
|
+
// one or two tokens, since the encoding can only change at the top of the
|
228
|
+
// file.
|
229
|
+
VALUE tokens = parse_lex_data->tokens;
|
230
|
+
for (long index = 0; index < RARRAY_LEN(tokens); index++) {
|
231
|
+
VALUE yields = rb_ary_entry(tokens, index);
|
232
|
+
VALUE token = rb_ary_entry(yields, 0);
|
233
|
+
|
234
|
+
VALUE value = rb_ivar_get(token, rb_intern("@value"));
|
235
|
+
rb_enc_associate(value, parse_lex_data->encoding);
|
236
|
+
ENC_CODERANGE_CLEAR(value);
|
237
|
+
}
|
224
238
|
}
|
225
239
|
|
226
|
-
//
|
240
|
+
// Parse the given input and return a ParseResult containing just the tokens or
|
241
|
+
// the nodes and tokens.
|
227
242
|
static VALUE
|
228
|
-
|
243
|
+
parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
|
229
244
|
yp_parser_t parser;
|
230
245
|
yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
|
231
|
-
yp_parser_register_encoding_changed_callback(&parser,
|
246
|
+
yp_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
|
232
247
|
|
233
248
|
VALUE offsets = rb_ary_new();
|
234
249
|
VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets };
|
235
250
|
VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
|
236
251
|
|
237
|
-
|
252
|
+
parse_lex_data_t parse_lex_data = {
|
238
253
|
.source = source,
|
239
254
|
.tokens = rb_ary_new(),
|
240
255
|
.encoding = rb_utf8_encoding()
|
241
256
|
};
|
242
257
|
|
243
|
-
|
258
|
+
parse_lex_data_t *data = &parse_lex_data;
|
244
259
|
yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
|
245
260
|
.data = (void *) data,
|
246
|
-
.callback =
|
261
|
+
.callback = parse_lex_token,
|
247
262
|
};
|
248
263
|
|
249
264
|
parser.lex_callback = &lex_callback;
|
@@ -256,20 +271,26 @@ lex_input(yp_string_t *input, const char *filepath) {
|
|
256
271
|
rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
|
257
272
|
}
|
258
273
|
|
274
|
+
VALUE value;
|
275
|
+
if (return_nodes) {
|
276
|
+
value = rb_ary_new_capa(2);
|
277
|
+
rb_ary_push(value, yp_ast_new(&parser, node, parse_lex_data.encoding));
|
278
|
+
rb_ary_push(value, parse_lex_data.tokens);
|
279
|
+
} else {
|
280
|
+
value = parse_lex_data.tokens;
|
281
|
+
}
|
282
|
+
|
259
283
|
VALUE result_argv[] = {
|
260
|
-
|
284
|
+
value,
|
261
285
|
parser_comments(&parser, source),
|
262
|
-
parser_errors(&parser,
|
263
|
-
parser_warnings(&parser,
|
286
|
+
parser_errors(&parser, parse_lex_data.encoding, source),
|
287
|
+
parser_warnings(&parser, parse_lex_data.encoding, source),
|
264
288
|
source
|
265
289
|
};
|
266
290
|
|
267
|
-
VALUE result = rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
|
268
|
-
|
269
291
|
yp_node_destroy(&parser, node);
|
270
292
|
yp_parser_free(&parser);
|
271
|
-
|
272
|
-
return result;
|
293
|
+
return rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
|
273
294
|
}
|
274
295
|
|
275
296
|
// Return an array of tokens corresponding to the given string.
|
@@ -281,7 +302,8 @@ lex(int argc, VALUE *argv, VALUE self) {
|
|
281
302
|
|
282
303
|
yp_string_t input;
|
283
304
|
input_load_string(&input, string);
|
284
|
-
|
305
|
+
|
306
|
+
return parse_lex_input(&input, check_string(filepath), false);
|
285
307
|
}
|
286
308
|
|
287
309
|
// Return an array of tokens corresponding to the given file.
|
@@ -292,7 +314,7 @@ lex_file(VALUE self, VALUE filepath) {
|
|
292
314
|
const char *checked = check_string(filepath);
|
293
315
|
if (!yp_string_mapped_init(&input, checked)) return Qnil;
|
294
316
|
|
295
|
-
VALUE value =
|
317
|
+
VALUE value = parse_lex_input(&input, checked, false);
|
296
318
|
yp_string_free(&input);
|
297
319
|
|
298
320
|
return value;
|
@@ -368,6 +390,32 @@ parse_file(VALUE self, VALUE filepath) {
|
|
368
390
|
return value;
|
369
391
|
}
|
370
392
|
|
393
|
+
// Parse the given string and return a ParseResult instance.
|
394
|
+
static VALUE
|
395
|
+
parse_lex(int argc, VALUE *argv, VALUE self) {
|
396
|
+
VALUE string;
|
397
|
+
VALUE filepath;
|
398
|
+
rb_scan_args(argc, argv, "11", &string, &filepath);
|
399
|
+
|
400
|
+
yp_string_t input;
|
401
|
+
input_load_string(&input, string);
|
402
|
+
return parse_lex_input(&input, check_string(filepath), true);
|
403
|
+
}
|
404
|
+
|
405
|
+
// Parse and lex the given file and return a ParseResult instance.
|
406
|
+
static VALUE
|
407
|
+
parse_lex_file(VALUE self, VALUE filepath) {
|
408
|
+
yp_string_t input;
|
409
|
+
|
410
|
+
const char *checked = check_string(filepath);
|
411
|
+
if (!yp_string_mapped_init(&input, checked)) return Qnil;
|
412
|
+
|
413
|
+
VALUE value = parse_lex_input(&input, checked, true);
|
414
|
+
yp_string_free(&input);
|
415
|
+
|
416
|
+
return value;
|
417
|
+
}
|
418
|
+
|
371
419
|
/******************************************************************************/
|
372
420
|
/* Utility functions exposed to make testing easier */
|
373
421
|
/******************************************************************************/
|
@@ -521,7 +569,6 @@ Init_yarp(void) {
|
|
521
569
|
// Define the version string here so that we can use the constants defined
|
522
570
|
// in yarp.h.
|
523
571
|
rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
|
524
|
-
|
525
572
|
rb_define_const(rb_cYARP, "BACKEND", ID2SYM(rb_intern("CExtension")));
|
526
573
|
|
527
574
|
// First, the functions that have to do with lexing and parsing.
|
@@ -531,6 +578,8 @@ Init_yarp(void) {
|
|
531
578
|
rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
|
532
579
|
rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
|
533
580
|
rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
|
581
|
+
rb_define_singleton_method(rb_cYARP, "parse_lex", parse_lex, -1);
|
582
|
+
rb_define_singleton_method(rb_cYARP, "parse_lex_file", parse_lex_file, 1);
|
534
583
|
|
535
584
|
// Next, the functions that will be called by the parser to perform various
|
536
585
|
// internal tasks. We expose these to make them easier to test.
|
data/ext/yarp/extension.h
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
#ifndef YARP_EXT_NODE_H
|
2
2
|
#define YARP_EXT_NODE_H
|
3
3
|
|
4
|
+
#define EXPECTED_YARP_VERSION "0.9.0"
|
5
|
+
|
4
6
|
#include <ruby.h>
|
5
7
|
#include <ruby/encoding.h>
|
6
8
|
#include "yarp.h"
|
7
9
|
|
8
|
-
#define EXPECTED_YARP_VERSION "0.8.0"
|
9
|
-
|
10
10
|
VALUE yp_source_new(yp_parser_t *parser);
|
11
11
|
VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
|
12
12
|
VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);
|