yarp 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -1
- data/README.md +4 -3
- data/config.yml +332 -52
- data/docs/configuration.md +1 -0
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +1 -1
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +703 -136
- data/ext/yarp/extension.c +73 -24
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +331 -137
- data/include/yarp/node.h +10 -0
- data/include/yarp/unescape.h +4 -2
- data/include/yarp/util/yp_newline_list.h +3 -0
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +10 -0
- data/lib/yarp/desugar_visitor.rb +267 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3042 -508
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +48 -2
- data/src/node.c +421 -185
- data/src/prettyprint.c +262 -80
- data/src/serialize.c +410 -270
- data/src/token_type.c +2 -2
- data/src/unescape.c +69 -51
- data/src/util/yp_newline_list.c +10 -0
- data/src/yarp.c +1208 -458
- data/yarp.gemspec +3 -1
- metadata +4 -2
data/ext/yarp/extension.c
CHANGED
@@ -198,52 +198,67 @@ typedef struct {
|
|
198
198
|
VALUE source;
|
199
199
|
VALUE tokens;
|
200
200
|
rb_encoding *encoding;
|
201
|
-
}
|
201
|
+
} parse_lex_data_t;
|
202
202
|
|
203
203
|
// This is passed as a callback to the parser. It gets called every time a new
|
204
204
|
// token is found. Once found, we initialize a new instance of Token and push it
|
205
205
|
// onto the tokens array.
|
206
206
|
static void
|
207
|
-
|
208
|
-
|
207
|
+
parse_lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
|
208
|
+
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
|
209
209
|
|
210
210
|
VALUE yields = rb_ary_new_capa(2);
|
211
|
-
rb_ary_push(yields, yp_token_new(parser, token,
|
211
|
+
rb_ary_push(yields, yp_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
|
212
212
|
rb_ary_push(yields, INT2FIX(parser->lex_state));
|
213
213
|
|
214
|
-
rb_ary_push(
|
214
|
+
rb_ary_push(parse_lex_data->tokens, yields);
|
215
215
|
}
|
216
216
|
|
217
217
|
// This is called whenever the encoding changes based on the magic comment at
|
218
218
|
// the top of the file. We use it to update the encoding that we are using to
|
219
219
|
// create tokens.
|
220
220
|
static void
|
221
|
-
|
222
|
-
|
223
|
-
|
221
|
+
parse_lex_encoding_changed_callback(yp_parser_t *parser) {
|
222
|
+
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
|
223
|
+
parse_lex_data->encoding = rb_enc_find(parser->encoding.name);
|
224
|
+
|
225
|
+
// Since the encoding changed, we need to go back and change the encoding of
|
226
|
+
// the tokens that were already lexed. This is only going to end up being
|
227
|
+
// one or two tokens, since the encoding can only change at the top of the
|
228
|
+
// file.
|
229
|
+
VALUE tokens = parse_lex_data->tokens;
|
230
|
+
for (long index = 0; index < RARRAY_LEN(tokens); index++) {
|
231
|
+
VALUE yields = rb_ary_entry(tokens, index);
|
232
|
+
VALUE token = rb_ary_entry(yields, 0);
|
233
|
+
|
234
|
+
VALUE value = rb_ivar_get(token, rb_intern("@value"));
|
235
|
+
rb_enc_associate(value, parse_lex_data->encoding);
|
236
|
+
ENC_CODERANGE_CLEAR(value);
|
237
|
+
}
|
224
238
|
}
|
225
239
|
|
226
|
-
//
|
240
|
+
// Parse the given input and return a ParseResult containing just the tokens or
|
241
|
+
// the nodes and tokens.
|
227
242
|
static VALUE
|
228
|
-
|
243
|
+
parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
|
229
244
|
yp_parser_t parser;
|
230
245
|
yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
|
231
|
-
yp_parser_register_encoding_changed_callback(&parser,
|
246
|
+
yp_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
|
232
247
|
|
233
248
|
VALUE offsets = rb_ary_new();
|
234
249
|
VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets };
|
235
250
|
VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
|
236
251
|
|
237
|
-
|
252
|
+
parse_lex_data_t parse_lex_data = {
|
238
253
|
.source = source,
|
239
254
|
.tokens = rb_ary_new(),
|
240
255
|
.encoding = rb_utf8_encoding()
|
241
256
|
};
|
242
257
|
|
243
|
-
|
258
|
+
parse_lex_data_t *data = &parse_lex_data;
|
244
259
|
yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
|
245
260
|
.data = (void *) data,
|
246
|
-
.callback =
|
261
|
+
.callback = parse_lex_token,
|
247
262
|
};
|
248
263
|
|
249
264
|
parser.lex_callback = &lex_callback;
|
@@ -256,20 +271,26 @@ lex_input(yp_string_t *input, const char *filepath) {
|
|
256
271
|
rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
|
257
272
|
}
|
258
273
|
|
274
|
+
VALUE value;
|
275
|
+
if (return_nodes) {
|
276
|
+
value = rb_ary_new_capa(2);
|
277
|
+
rb_ary_push(value, yp_ast_new(&parser, node, parse_lex_data.encoding));
|
278
|
+
rb_ary_push(value, parse_lex_data.tokens);
|
279
|
+
} else {
|
280
|
+
value = parse_lex_data.tokens;
|
281
|
+
}
|
282
|
+
|
259
283
|
VALUE result_argv[] = {
|
260
|
-
|
284
|
+
value,
|
261
285
|
parser_comments(&parser, source),
|
262
|
-
parser_errors(&parser,
|
263
|
-
parser_warnings(&parser,
|
286
|
+
parser_errors(&parser, parse_lex_data.encoding, source),
|
287
|
+
parser_warnings(&parser, parse_lex_data.encoding, source),
|
264
288
|
source
|
265
289
|
};
|
266
290
|
|
267
|
-
VALUE result = rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
|
268
|
-
|
269
291
|
yp_node_destroy(&parser, node);
|
270
292
|
yp_parser_free(&parser);
|
271
|
-
|
272
|
-
return result;
|
293
|
+
return rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
|
273
294
|
}
|
274
295
|
|
275
296
|
// Return an array of tokens corresponding to the given string.
|
@@ -281,7 +302,8 @@ lex(int argc, VALUE *argv, VALUE self) {
|
|
281
302
|
|
282
303
|
yp_string_t input;
|
283
304
|
input_load_string(&input, string);
|
284
|
-
|
305
|
+
|
306
|
+
return parse_lex_input(&input, check_string(filepath), false);
|
285
307
|
}
|
286
308
|
|
287
309
|
// Return an array of tokens corresponding to the given file.
|
@@ -292,7 +314,7 @@ lex_file(VALUE self, VALUE filepath) {
|
|
292
314
|
const char *checked = check_string(filepath);
|
293
315
|
if (!yp_string_mapped_init(&input, checked)) return Qnil;
|
294
316
|
|
295
|
-
VALUE value =
|
317
|
+
VALUE value = parse_lex_input(&input, checked, false);
|
296
318
|
yp_string_free(&input);
|
297
319
|
|
298
320
|
return value;
|
@@ -368,6 +390,32 @@ parse_file(VALUE self, VALUE filepath) {
|
|
368
390
|
return value;
|
369
391
|
}
|
370
392
|
|
393
|
+
// Parse the given string and return a ParseResult instance.
|
394
|
+
static VALUE
|
395
|
+
parse_lex(int argc, VALUE *argv, VALUE self) {
|
396
|
+
VALUE string;
|
397
|
+
VALUE filepath;
|
398
|
+
rb_scan_args(argc, argv, "11", &string, &filepath);
|
399
|
+
|
400
|
+
yp_string_t input;
|
401
|
+
input_load_string(&input, string);
|
402
|
+
return parse_lex_input(&input, check_string(filepath), true);
|
403
|
+
}
|
404
|
+
|
405
|
+
// Parse and lex the given file and return a ParseResult instance.
|
406
|
+
static VALUE
|
407
|
+
parse_lex_file(VALUE self, VALUE filepath) {
|
408
|
+
yp_string_t input;
|
409
|
+
|
410
|
+
const char *checked = check_string(filepath);
|
411
|
+
if (!yp_string_mapped_init(&input, checked)) return Qnil;
|
412
|
+
|
413
|
+
VALUE value = parse_lex_input(&input, checked, true);
|
414
|
+
yp_string_free(&input);
|
415
|
+
|
416
|
+
return value;
|
417
|
+
}
|
418
|
+
|
371
419
|
/******************************************************************************/
|
372
420
|
/* Utility functions exposed to make testing easier */
|
373
421
|
/******************************************************************************/
|
@@ -521,7 +569,6 @@ Init_yarp(void) {
|
|
521
569
|
// Define the version string here so that we can use the constants defined
|
522
570
|
// in yarp.h.
|
523
571
|
rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
|
524
|
-
|
525
572
|
rb_define_const(rb_cYARP, "BACKEND", ID2SYM(rb_intern("CExtension")));
|
526
573
|
|
527
574
|
// First, the functions that have to do with lexing and parsing.
|
@@ -531,6 +578,8 @@ Init_yarp(void) {
|
|
531
578
|
rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
|
532
579
|
rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
|
533
580
|
rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
|
581
|
+
rb_define_singleton_method(rb_cYARP, "parse_lex", parse_lex, -1);
|
582
|
+
rb_define_singleton_method(rb_cYARP, "parse_lex_file", parse_lex_file, 1);
|
534
583
|
|
535
584
|
// Next, the functions that will be called by the parser to perform various
|
536
585
|
// internal tasks. We expose these to make them easier to test.
|
data/ext/yarp/extension.h
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
#ifndef YARP_EXT_NODE_H
|
2
2
|
#define YARP_EXT_NODE_H
|
3
3
|
|
4
|
+
#define EXPECTED_YARP_VERSION "0.9.0"
|
5
|
+
|
4
6
|
#include <ruby.h>
|
5
7
|
#include <ruby/encoding.h>
|
6
8
|
#include "yarp.h"
|
7
9
|
|
8
|
-
#define EXPECTED_YARP_VERSION "0.8.0"
|
9
|
-
|
10
10
|
VALUE yp_source_new(yp_parser_t *parser);
|
11
11
|
VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
|
12
12
|
VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);
|