yarp 0.8.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -1
- data/Makefile +5 -1
- data/README.md +4 -3
- data/config.yml +461 -150
- data/docs/configuration.md +1 -0
- data/docs/encoding.md +5 -5
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +3 -3
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +810 -199
- data/ext/yarp/extension.c +94 -31
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +653 -150
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/node.h +10 -0
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +7 -5
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +7 -4
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +14 -3
- data/lib/yarp/desugar_visitor.rb +204 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3121 -597
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +53 -7
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1444 -836
- data/src/prettyprint.c +324 -103
- data/src/regexp.c +21 -21
- data/src/serialize.c +429 -276
- data/src/token_type.c +2 -2
- data/src/unescape.c +184 -136
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +14 -3
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +1504 -615
- data/yarp.gemspec +3 -1
- metadata +4 -2
data/ext/yarp/extension.c
CHANGED
@@ -83,7 +83,21 @@ dump(int argc, VALUE *argv, VALUE self) {
|
|
83
83
|
|
84
84
|
yp_string_t input;
|
85
85
|
input_load_string(&input, string);
|
86
|
-
|
86
|
+
|
87
|
+
#ifdef YARP_DEBUG_MODE_BUILD
|
88
|
+
size_t length = yp_string_length(&input);
|
89
|
+
char* dup = malloc(length);
|
90
|
+
memcpy(dup, yp_string_source(&input), length);
|
91
|
+
yp_string_constant_init(&input, dup, length);
|
92
|
+
#endif
|
93
|
+
|
94
|
+
VALUE value = dump_input(&input, check_string(filepath));
|
95
|
+
|
96
|
+
#ifdef YARP_DEBUG_MODE_BUILD
|
97
|
+
free(dup);
|
98
|
+
#endif
|
99
|
+
|
100
|
+
return value;
|
87
101
|
}
|
88
102
|
|
89
103
|
// Dump the AST corresponding to the given file to a string.
|
@@ -198,52 +212,67 @@ typedef struct {
|
|
198
212
|
VALUE source;
|
199
213
|
VALUE tokens;
|
200
214
|
rb_encoding *encoding;
|
201
|
-
}
|
215
|
+
} parse_lex_data_t;
|
202
216
|
|
203
217
|
// This is passed as a callback to the parser. It gets called every time a new
|
204
218
|
// token is found. Once found, we initialize a new instance of Token and push it
|
205
219
|
// onto the tokens array.
|
206
220
|
static void
|
207
|
-
|
208
|
-
|
221
|
+
parse_lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
|
222
|
+
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
|
209
223
|
|
210
224
|
VALUE yields = rb_ary_new_capa(2);
|
211
|
-
rb_ary_push(yields, yp_token_new(parser, token,
|
225
|
+
rb_ary_push(yields, yp_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
|
212
226
|
rb_ary_push(yields, INT2FIX(parser->lex_state));
|
213
227
|
|
214
|
-
rb_ary_push(
|
228
|
+
rb_ary_push(parse_lex_data->tokens, yields);
|
215
229
|
}
|
216
230
|
|
217
231
|
// This is called whenever the encoding changes based on the magic comment at
|
218
232
|
// the top of the file. We use it to update the encoding that we are using to
|
219
233
|
// create tokens.
|
220
234
|
static void
|
221
|
-
|
222
|
-
|
223
|
-
|
235
|
+
parse_lex_encoding_changed_callback(yp_parser_t *parser) {
|
236
|
+
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
|
237
|
+
parse_lex_data->encoding = rb_enc_find(parser->encoding.name);
|
238
|
+
|
239
|
+
// Since the encoding changed, we need to go back and change the encoding of
|
240
|
+
// the tokens that were already lexed. This is only going to end up being
|
241
|
+
// one or two tokens, since the encoding can only change at the top of the
|
242
|
+
// file.
|
243
|
+
VALUE tokens = parse_lex_data->tokens;
|
244
|
+
for (long index = 0; index < RARRAY_LEN(tokens); index++) {
|
245
|
+
VALUE yields = rb_ary_entry(tokens, index);
|
246
|
+
VALUE token = rb_ary_entry(yields, 0);
|
247
|
+
|
248
|
+
VALUE value = rb_ivar_get(token, rb_intern("@value"));
|
249
|
+
rb_enc_associate(value, parse_lex_data->encoding);
|
250
|
+
ENC_CODERANGE_CLEAR(value);
|
251
|
+
}
|
224
252
|
}
|
225
253
|
|
226
|
-
//
|
254
|
+
// Parse the given input and return a ParseResult containing just the tokens or
|
255
|
+
// the nodes and tokens.
|
227
256
|
static VALUE
|
228
|
-
|
257
|
+
parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
|
229
258
|
yp_parser_t parser;
|
230
259
|
yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
|
231
|
-
yp_parser_register_encoding_changed_callback(&parser,
|
260
|
+
yp_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
|
232
261
|
|
233
262
|
VALUE offsets = rb_ary_new();
|
234
|
-
VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets };
|
263
|
+
VALUE source_argv[] = { rb_str_new((const char *) yp_string_source(input), yp_string_length(input)), offsets };
|
235
264
|
VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
|
236
265
|
|
237
|
-
|
266
|
+
parse_lex_data_t parse_lex_data = {
|
238
267
|
.source = source,
|
239
268
|
.tokens = rb_ary_new(),
|
240
269
|
.encoding = rb_utf8_encoding()
|
241
270
|
};
|
242
271
|
|
243
|
-
|
272
|
+
parse_lex_data_t *data = &parse_lex_data;
|
244
273
|
yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
|
245
274
|
.data = (void *) data,
|
246
|
-
.callback =
|
275
|
+
.callback = parse_lex_token,
|
247
276
|
};
|
248
277
|
|
249
278
|
parser.lex_callback = &lex_callback;
|
@@ -256,20 +285,26 @@ lex_input(yp_string_t *input, const char *filepath) {
|
|
256
285
|
rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
|
257
286
|
}
|
258
287
|
|
288
|
+
VALUE value;
|
289
|
+
if (return_nodes) {
|
290
|
+
value = rb_ary_new_capa(2);
|
291
|
+
rb_ary_push(value, yp_ast_new(&parser, node, parse_lex_data.encoding));
|
292
|
+
rb_ary_push(value, parse_lex_data.tokens);
|
293
|
+
} else {
|
294
|
+
value = parse_lex_data.tokens;
|
295
|
+
}
|
296
|
+
|
259
297
|
VALUE result_argv[] = {
|
260
|
-
|
298
|
+
value,
|
261
299
|
parser_comments(&parser, source),
|
262
|
-
parser_errors(&parser,
|
263
|
-
parser_warnings(&parser,
|
300
|
+
parser_errors(&parser, parse_lex_data.encoding, source),
|
301
|
+
parser_warnings(&parser, parse_lex_data.encoding, source),
|
264
302
|
source
|
265
303
|
};
|
266
304
|
|
267
|
-
VALUE result = rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
|
268
|
-
|
269
305
|
yp_node_destroy(&parser, node);
|
270
306
|
yp_parser_free(&parser);
|
271
|
-
|
272
|
-
return result;
|
307
|
+
return rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
|
273
308
|
}
|
274
309
|
|
275
310
|
// Return an array of tokens corresponding to the given string.
|
@@ -281,7 +316,8 @@ lex(int argc, VALUE *argv, VALUE self) {
|
|
281
316
|
|
282
317
|
yp_string_t input;
|
283
318
|
input_load_string(&input, string);
|
284
|
-
|
319
|
+
|
320
|
+
return parse_lex_input(&input, check_string(filepath), false);
|
285
321
|
}
|
286
322
|
|
287
323
|
// Return an array of tokens corresponding to the given file.
|
@@ -292,7 +328,7 @@ lex_file(VALUE self, VALUE filepath) {
|
|
292
328
|
const char *checked = check_string(filepath);
|
293
329
|
if (!yp_string_mapped_init(&input, checked)) return Qnil;
|
294
330
|
|
295
|
-
VALUE value =
|
331
|
+
VALUE value = parse_lex_input(&input, checked, false);
|
296
332
|
yp_string_free(&input);
|
297
333
|
|
298
334
|
return value;
|
@@ -368,6 +404,32 @@ parse_file(VALUE self, VALUE filepath) {
|
|
368
404
|
return value;
|
369
405
|
}
|
370
406
|
|
407
|
+
// Parse the given string and return a ParseResult instance.
|
408
|
+
static VALUE
|
409
|
+
parse_lex(int argc, VALUE *argv, VALUE self) {
|
410
|
+
VALUE string;
|
411
|
+
VALUE filepath;
|
412
|
+
rb_scan_args(argc, argv, "11", &string, &filepath);
|
413
|
+
|
414
|
+
yp_string_t input;
|
415
|
+
input_load_string(&input, string);
|
416
|
+
return parse_lex_input(&input, check_string(filepath), true);
|
417
|
+
}
|
418
|
+
|
419
|
+
// Parse and lex the given file and return a ParseResult instance.
|
420
|
+
static VALUE
|
421
|
+
parse_lex_file(VALUE self, VALUE filepath) {
|
422
|
+
yp_string_t input;
|
423
|
+
|
424
|
+
const char *checked = check_string(filepath);
|
425
|
+
if (!yp_string_mapped_init(&input, checked)) return Qnil;
|
426
|
+
|
427
|
+
VALUE value = parse_lex_input(&input, checked, true);
|
428
|
+
yp_string_free(&input);
|
429
|
+
|
430
|
+
return value;
|
431
|
+
}
|
432
|
+
|
371
433
|
/******************************************************************************/
|
372
434
|
/* Utility functions exposed to make testing easier */
|
373
435
|
/******************************************************************************/
|
@@ -380,7 +442,7 @@ named_captures(VALUE self, VALUE source) {
|
|
380
442
|
yp_string_list_t string_list;
|
381
443
|
yp_string_list_init(&string_list);
|
382
444
|
|
383
|
-
if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &yp_encoding_utf_8)) {
|
445
|
+
if (!yp_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &yp_encoding_utf_8)) {
|
384
446
|
yp_string_list_free(&string_list);
|
385
447
|
return Qnil;
|
386
448
|
}
|
@@ -388,7 +450,7 @@ named_captures(VALUE self, VALUE source) {
|
|
388
450
|
VALUE names = rb_ary_new();
|
389
451
|
for (size_t index = 0; index < string_list.length; index++) {
|
390
452
|
const yp_string_t *string = &string_list.strings[index];
|
391
|
-
rb_ary_push(names, rb_str_new(yp_string_source(string), yp_string_length(string)));
|
453
|
+
rb_ary_push(names, rb_str_new((const char *) yp_string_source(string), yp_string_length(string)));
|
392
454
|
}
|
393
455
|
|
394
456
|
yp_string_list_free(&string_list);
|
@@ -401,8 +463,8 @@ static VALUE
|
|
401
463
|
unescape(VALUE source, yp_unescape_type_t unescape_type) {
|
402
464
|
yp_string_t result;
|
403
465
|
|
404
|
-
if (yp_unescape_string(RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
|
405
|
-
VALUE str = rb_str_new(yp_string_source(&result), yp_string_length(&result));
|
466
|
+
if (yp_unescape_string((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
|
467
|
+
VALUE str = rb_str_new((const char *) yp_string_source(&result), yp_string_length(&result));
|
406
468
|
yp_string_free(&result);
|
407
469
|
return str;
|
408
470
|
} else {
|
@@ -436,7 +498,7 @@ static VALUE
|
|
436
498
|
memsize(VALUE self, VALUE string) {
|
437
499
|
yp_parser_t parser;
|
438
500
|
size_t length = RSTRING_LEN(string);
|
439
|
-
yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
|
501
|
+
yp_parser_init(&parser, (const uint8_t *) RSTRING_PTR(string), length, NULL);
|
440
502
|
|
441
503
|
yp_node_t *node = yp_parse(&parser);
|
442
504
|
yp_memsize_t memsize;
|
@@ -521,7 +583,6 @@ Init_yarp(void) {
|
|
521
583
|
// Define the version string here so that we can use the constants defined
|
522
584
|
// in yarp.h.
|
523
585
|
rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
|
524
|
-
|
525
586
|
rb_define_const(rb_cYARP, "BACKEND", ID2SYM(rb_intern("CExtension")));
|
526
587
|
|
527
588
|
// First, the functions that have to do with lexing and parsing.
|
@@ -531,6 +592,8 @@ Init_yarp(void) {
|
|
531
592
|
rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
|
532
593
|
rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
|
533
594
|
rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
|
595
|
+
rb_define_singleton_method(rb_cYARP, "parse_lex", parse_lex, -1);
|
596
|
+
rb_define_singleton_method(rb_cYARP, "parse_lex_file", parse_lex_file, 1);
|
534
597
|
|
535
598
|
// Next, the functions that will be called by the parser to perform various
|
536
599
|
// internal tasks. We expose these to make them easier to test.
|
data/ext/yarp/extension.h
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
#ifndef YARP_EXT_NODE_H
|
2
2
|
#define YARP_EXT_NODE_H
|
3
3
|
|
4
|
+
#define EXPECTED_YARP_VERSION "0.10.0"
|
5
|
+
|
4
6
|
#include <ruby.h>
|
5
7
|
#include <ruby/encoding.h>
|
6
8
|
#include "yarp.h"
|
7
9
|
|
8
|
-
#define EXPECTED_YARP_VERSION "0.8.0"
|
9
|
-
|
10
10
|
VALUE yp_source_new(yp_parser_t *parser);
|
11
11
|
VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
|
12
12
|
VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);
|