yarp 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -1
- data/Makefile +5 -1
- data/README.md +4 -3
- data/config.yml +461 -150
- data/docs/configuration.md +1 -0
- data/docs/encoding.md +5 -5
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +3 -3
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +810 -199
- data/ext/yarp/extension.c +94 -31
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +653 -150
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/node.h +10 -0
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +7 -5
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +7 -4
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +14 -3
- data/lib/yarp/desugar_visitor.rb +204 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3121 -597
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +53 -7
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1444 -836
- data/src/prettyprint.c +324 -103
- data/src/regexp.c +21 -21
- data/src/serialize.c +429 -276
- data/src/token_type.c +2 -2
- data/src/unescape.c +184 -136
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +14 -3
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +1504 -615
- data/yarp.gemspec +3 -1
- metadata +4 -2
data/ext/yarp/extension.c
CHANGED
@@ -83,7 +83,21 @@ dump(int argc, VALUE *argv, VALUE self) {
|
|
83
83
|
|
84
84
|
yp_string_t input;
|
85
85
|
input_load_string(&input, string);
|
86
|
-
|
86
|
+
|
87
|
+
#ifdef YARP_DEBUG_MODE_BUILD
|
88
|
+
size_t length = yp_string_length(&input);
|
89
|
+
char* dup = malloc(length);
|
90
|
+
memcpy(dup, yp_string_source(&input), length);
|
91
|
+
yp_string_constant_init(&input, dup, length);
|
92
|
+
#endif
|
93
|
+
|
94
|
+
VALUE value = dump_input(&input, check_string(filepath));
|
95
|
+
|
96
|
+
#ifdef YARP_DEBUG_MODE_BUILD
|
97
|
+
free(dup);
|
98
|
+
#endif
|
99
|
+
|
100
|
+
return value;
|
87
101
|
}
|
88
102
|
|
89
103
|
// Dump the AST corresponding to the given file to a string.
|
@@ -198,52 +212,67 @@ typedef struct {
|
|
198
212
|
VALUE source;
|
199
213
|
VALUE tokens;
|
200
214
|
rb_encoding *encoding;
|
201
|
-
}
|
215
|
+
} parse_lex_data_t;
|
202
216
|
|
203
217
|
// This is passed as a callback to the parser. It gets called every time a new
|
204
218
|
// token is found. Once found, we initialize a new instance of Token and push it
|
205
219
|
// onto the tokens array.
|
206
220
|
static void
|
207
|
-
|
208
|
-
|
221
|
+
parse_lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
|
222
|
+
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
|
209
223
|
|
210
224
|
VALUE yields = rb_ary_new_capa(2);
|
211
|
-
rb_ary_push(yields, yp_token_new(parser, token,
|
225
|
+
rb_ary_push(yields, yp_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
|
212
226
|
rb_ary_push(yields, INT2FIX(parser->lex_state));
|
213
227
|
|
214
|
-
rb_ary_push(
|
228
|
+
rb_ary_push(parse_lex_data->tokens, yields);
|
215
229
|
}
|
216
230
|
|
217
231
|
// This is called whenever the encoding changes based on the magic comment at
|
218
232
|
// the top of the file. We use it to update the encoding that we are using to
|
219
233
|
// create tokens.
|
220
234
|
static void
|
221
|
-
|
222
|
-
|
223
|
-
|
235
|
+
parse_lex_encoding_changed_callback(yp_parser_t *parser) {
|
236
|
+
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
|
237
|
+
parse_lex_data->encoding = rb_enc_find(parser->encoding.name);
|
238
|
+
|
239
|
+
// Since the encoding changed, we need to go back and change the encoding of
|
240
|
+
// the tokens that were already lexed. This is only going to end up being
|
241
|
+
// one or two tokens, since the encoding can only change at the top of the
|
242
|
+
// file.
|
243
|
+
VALUE tokens = parse_lex_data->tokens;
|
244
|
+
for (long index = 0; index < RARRAY_LEN(tokens); index++) {
|
245
|
+
VALUE yields = rb_ary_entry(tokens, index);
|
246
|
+
VALUE token = rb_ary_entry(yields, 0);
|
247
|
+
|
248
|
+
VALUE value = rb_ivar_get(token, rb_intern("@value"));
|
249
|
+
rb_enc_associate(value, parse_lex_data->encoding);
|
250
|
+
ENC_CODERANGE_CLEAR(value);
|
251
|
+
}
|
224
252
|
}
|
225
253
|
|
226
|
-
//
|
254
|
+
// Parse the given input and return a ParseResult containing just the tokens or
|
255
|
+
// the nodes and tokens.
|
227
256
|
static VALUE
|
228
|
-
|
257
|
+
parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
|
229
258
|
yp_parser_t parser;
|
230
259
|
yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
|
231
|
-
yp_parser_register_encoding_changed_callback(&parser,
|
260
|
+
yp_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
|
232
261
|
|
233
262
|
VALUE offsets = rb_ary_new();
|
234
|
-
VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets };
|
263
|
+
VALUE source_argv[] = { rb_str_new((const char *) yp_string_source(input), yp_string_length(input)), offsets };
|
235
264
|
VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
|
236
265
|
|
237
|
-
|
266
|
+
parse_lex_data_t parse_lex_data = {
|
238
267
|
.source = source,
|
239
268
|
.tokens = rb_ary_new(),
|
240
269
|
.encoding = rb_utf8_encoding()
|
241
270
|
};
|
242
271
|
|
243
|
-
|
272
|
+
parse_lex_data_t *data = &parse_lex_data;
|
244
273
|
yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
|
245
274
|
.data = (void *) data,
|
246
|
-
.callback =
|
275
|
+
.callback = parse_lex_token,
|
247
276
|
};
|
248
277
|
|
249
278
|
parser.lex_callback = &lex_callback;
|
@@ -256,20 +285,26 @@ lex_input(yp_string_t *input, const char *filepath) {
|
|
256
285
|
rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
|
257
286
|
}
|
258
287
|
|
288
|
+
VALUE value;
|
289
|
+
if (return_nodes) {
|
290
|
+
value = rb_ary_new_capa(2);
|
291
|
+
rb_ary_push(value, yp_ast_new(&parser, node, parse_lex_data.encoding));
|
292
|
+
rb_ary_push(value, parse_lex_data.tokens);
|
293
|
+
} else {
|
294
|
+
value = parse_lex_data.tokens;
|
295
|
+
}
|
296
|
+
|
259
297
|
VALUE result_argv[] = {
|
260
|
-
|
298
|
+
value,
|
261
299
|
parser_comments(&parser, source),
|
262
|
-
parser_errors(&parser,
|
263
|
-
parser_warnings(&parser,
|
300
|
+
parser_errors(&parser, parse_lex_data.encoding, source),
|
301
|
+
parser_warnings(&parser, parse_lex_data.encoding, source),
|
264
302
|
source
|
265
303
|
};
|
266
304
|
|
267
|
-
VALUE result = rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
|
268
|
-
|
269
305
|
yp_node_destroy(&parser, node);
|
270
306
|
yp_parser_free(&parser);
|
271
|
-
|
272
|
-
return result;
|
307
|
+
return rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
|
273
308
|
}
|
274
309
|
|
275
310
|
// Return an array of tokens corresponding to the given string.
|
@@ -281,7 +316,8 @@ lex(int argc, VALUE *argv, VALUE self) {
|
|
281
316
|
|
282
317
|
yp_string_t input;
|
283
318
|
input_load_string(&input, string);
|
284
|
-
|
319
|
+
|
320
|
+
return parse_lex_input(&input, check_string(filepath), false);
|
285
321
|
}
|
286
322
|
|
287
323
|
// Return an array of tokens corresponding to the given file.
|
@@ -292,7 +328,7 @@ lex_file(VALUE self, VALUE filepath) {
|
|
292
328
|
const char *checked = check_string(filepath);
|
293
329
|
if (!yp_string_mapped_init(&input, checked)) return Qnil;
|
294
330
|
|
295
|
-
VALUE value =
|
331
|
+
VALUE value = parse_lex_input(&input, checked, false);
|
296
332
|
yp_string_free(&input);
|
297
333
|
|
298
334
|
return value;
|
@@ -368,6 +404,32 @@ parse_file(VALUE self, VALUE filepath) {
|
|
368
404
|
return value;
|
369
405
|
}
|
370
406
|
|
407
|
+
// Parse the given string and return a ParseResult instance.
|
408
|
+
static VALUE
|
409
|
+
parse_lex(int argc, VALUE *argv, VALUE self) {
|
410
|
+
VALUE string;
|
411
|
+
VALUE filepath;
|
412
|
+
rb_scan_args(argc, argv, "11", &string, &filepath);
|
413
|
+
|
414
|
+
yp_string_t input;
|
415
|
+
input_load_string(&input, string);
|
416
|
+
return parse_lex_input(&input, check_string(filepath), true);
|
417
|
+
}
|
418
|
+
|
419
|
+
// Parse and lex the given file and return a ParseResult instance.
|
420
|
+
static VALUE
|
421
|
+
parse_lex_file(VALUE self, VALUE filepath) {
|
422
|
+
yp_string_t input;
|
423
|
+
|
424
|
+
const char *checked = check_string(filepath);
|
425
|
+
if (!yp_string_mapped_init(&input, checked)) return Qnil;
|
426
|
+
|
427
|
+
VALUE value = parse_lex_input(&input, checked, true);
|
428
|
+
yp_string_free(&input);
|
429
|
+
|
430
|
+
return value;
|
431
|
+
}
|
432
|
+
|
371
433
|
/******************************************************************************/
|
372
434
|
/* Utility functions exposed to make testing easier */
|
373
435
|
/******************************************************************************/
|
@@ -380,7 +442,7 @@ named_captures(VALUE self, VALUE source) {
|
|
380
442
|
yp_string_list_t string_list;
|
381
443
|
yp_string_list_init(&string_list);
|
382
444
|
|
383
|
-
if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &yp_encoding_utf_8)) {
|
445
|
+
if (!yp_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &yp_encoding_utf_8)) {
|
384
446
|
yp_string_list_free(&string_list);
|
385
447
|
return Qnil;
|
386
448
|
}
|
@@ -388,7 +450,7 @@ named_captures(VALUE self, VALUE source) {
|
|
388
450
|
VALUE names = rb_ary_new();
|
389
451
|
for (size_t index = 0; index < string_list.length; index++) {
|
390
452
|
const yp_string_t *string = &string_list.strings[index];
|
391
|
-
rb_ary_push(names, rb_str_new(yp_string_source(string), yp_string_length(string)));
|
453
|
+
rb_ary_push(names, rb_str_new((const char *) yp_string_source(string), yp_string_length(string)));
|
392
454
|
}
|
393
455
|
|
394
456
|
yp_string_list_free(&string_list);
|
@@ -401,8 +463,8 @@ static VALUE
|
|
401
463
|
unescape(VALUE source, yp_unescape_type_t unescape_type) {
|
402
464
|
yp_string_t result;
|
403
465
|
|
404
|
-
if (yp_unescape_string(RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
|
405
|
-
VALUE str = rb_str_new(yp_string_source(&result), yp_string_length(&result));
|
466
|
+
if (yp_unescape_string((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
|
467
|
+
VALUE str = rb_str_new((const char *) yp_string_source(&result), yp_string_length(&result));
|
406
468
|
yp_string_free(&result);
|
407
469
|
return str;
|
408
470
|
} else {
|
@@ -436,7 +498,7 @@ static VALUE
|
|
436
498
|
memsize(VALUE self, VALUE string) {
|
437
499
|
yp_parser_t parser;
|
438
500
|
size_t length = RSTRING_LEN(string);
|
439
|
-
yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
|
501
|
+
yp_parser_init(&parser, (const uint8_t *) RSTRING_PTR(string), length, NULL);
|
440
502
|
|
441
503
|
yp_node_t *node = yp_parse(&parser);
|
442
504
|
yp_memsize_t memsize;
|
@@ -521,7 +583,6 @@ Init_yarp(void) {
|
|
521
583
|
// Define the version string here so that we can use the constants defined
|
522
584
|
// in yarp.h.
|
523
585
|
rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
|
524
|
-
|
525
586
|
rb_define_const(rb_cYARP, "BACKEND", ID2SYM(rb_intern("CExtension")));
|
526
587
|
|
527
588
|
// First, the functions that have to do with lexing and parsing.
|
@@ -531,6 +592,8 @@ Init_yarp(void) {
|
|
531
592
|
rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
|
532
593
|
rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
|
533
594
|
rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
|
595
|
+
rb_define_singleton_method(rb_cYARP, "parse_lex", parse_lex, -1);
|
596
|
+
rb_define_singleton_method(rb_cYARP, "parse_lex_file", parse_lex_file, 1);
|
534
597
|
|
535
598
|
// Next, the functions that will be called by the parser to perform various
|
536
599
|
// internal tasks. We expose these to make them easier to test.
|
data/ext/yarp/extension.h
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
#ifndef YARP_EXT_NODE_H
|
2
2
|
#define YARP_EXT_NODE_H
|
3
3
|
|
4
|
+
#define EXPECTED_YARP_VERSION "0.10.0"
|
5
|
+
|
4
6
|
#include <ruby.h>
|
5
7
|
#include <ruby/encoding.h>
|
6
8
|
#include "yarp.h"
|
7
9
|
|
8
|
-
#define EXPECTED_YARP_VERSION "0.8.0"
|
9
|
-
|
10
10
|
VALUE yp_source_new(yp_parser_t *parser);
|
11
11
|
VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
|
12
12
|
VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);
|