prism 0.23.0 → 0.25.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/BSDmakefile +58 -0
- data/CHANGELOG.md +65 -1
- data/Makefile +5 -2
- data/README.md +45 -6
- data/config.yml +499 -4
- data/docs/build_system.md +31 -0
- data/docs/configuration.md +2 -0
- data/docs/cruby_compilation.md +1 -1
- data/docs/parser_translation.md +14 -9
- data/docs/releasing.md +3 -3
- data/docs/ripper_translation.md +50 -0
- data/docs/ruby_api.md +1 -0
- data/docs/serialization.md +26 -5
- data/ext/prism/api_node.c +2342 -1801
- data/ext/prism/api_pack.c +9 -0
- data/ext/prism/extconf.rb +27 -11
- data/ext/prism/extension.c +313 -66
- data/ext/prism/extension.h +5 -4
- data/include/prism/ast.h +213 -64
- data/include/prism/defines.h +106 -2
- data/include/prism/diagnostic.h +134 -71
- data/include/prism/encoding.h +22 -4
- data/include/prism/node.h +93 -0
- data/include/prism/options.h +82 -7
- data/include/prism/pack.h +11 -0
- data/include/prism/parser.h +198 -53
- data/include/prism/prettyprint.h +8 -0
- data/include/prism/static_literals.h +118 -0
- data/include/prism/util/pm_buffer.h +65 -2
- data/include/prism/util/pm_constant_pool.h +18 -1
- data/include/prism/util/pm_integer.h +119 -0
- data/include/prism/util/pm_list.h +1 -1
- data/include/prism/util/pm_newline_list.h +12 -3
- data/include/prism/util/pm_string.h +26 -2
- data/include/prism/version.h +2 -2
- data/include/prism.h +59 -1
- data/lib/prism/compiler.rb +8 -1
- data/lib/prism/debug.rb +46 -3
- data/lib/prism/desugar_compiler.rb +225 -80
- data/lib/prism/dispatcher.rb +29 -0
- data/lib/prism/dot_visitor.rb +87 -16
- data/lib/prism/dsl.rb +315 -300
- data/lib/prism/ffi.rb +165 -84
- data/lib/prism/lex_compat.rb +17 -15
- data/lib/prism/mutation_compiler.rb +11 -0
- data/lib/prism/node.rb +4857 -3750
- data/lib/prism/node_ext.rb +77 -29
- data/lib/prism/pack.rb +4 -0
- data/lib/prism/parse_result/comments.rb +34 -17
- data/lib/prism/parse_result/newlines.rb +3 -1
- data/lib/prism/parse_result.rb +88 -34
- data/lib/prism/pattern.rb +16 -4
- data/lib/prism/polyfill/string.rb +12 -0
- data/lib/prism/serialize.rb +960 -327
- data/lib/prism/translation/parser/compiler.rb +152 -50
- data/lib/prism/translation/parser/lexer.rb +103 -22
- data/lib/prism/translation/parser/rubocop.rb +47 -11
- data/lib/prism/translation/parser.rb +134 -10
- data/lib/prism/translation/parser33.rb +12 -0
- data/lib/prism/translation/parser34.rb +12 -0
- data/lib/prism/translation/ripper/sexp.rb +125 -0
- data/lib/prism/translation/ripper/shim.rb +5 -0
- data/lib/prism/translation/ripper.rb +3248 -379
- data/lib/prism/translation/ruby_parser.rb +35 -18
- data/lib/prism/translation.rb +3 -1
- data/lib/prism/visitor.rb +10 -0
- data/lib/prism.rb +8 -2
- data/prism.gemspec +35 -4
- data/rbi/prism/compiler.rbi +14 -0
- data/rbi/prism/desugar_compiler.rbi +5 -0
- data/rbi/prism/mutation_compiler.rbi +5 -0
- data/rbi/prism/node.rbi +8221 -0
- data/rbi/prism/node_ext.rbi +102 -0
- data/rbi/prism/parse_result.rbi +304 -0
- data/rbi/prism/translation/parser/compiler.rbi +13 -0
- data/rbi/prism/translation/ripper/ripper_compiler.rbi +5 -0
- data/rbi/prism/translation/ripper.rbi +25 -0
- data/rbi/prism/translation/ruby_parser.rbi +11 -0
- data/rbi/prism/visitor.rbi +470 -0
- data/rbi/prism.rbi +39 -7749
- data/sig/prism/compiler.rbs +9 -0
- data/sig/prism/dispatcher.rbs +16 -0
- data/sig/prism/dot_visitor.rbs +6 -0
- data/sig/prism/dsl.rbs +462 -0
- data/sig/prism/mutation_compiler.rbs +158 -0
- data/sig/prism/node.rbs +3529 -0
- data/sig/prism/node_ext.rbs +78 -0
- data/sig/prism/pack.rbs +43 -0
- data/sig/prism/parse_result.rbs +127 -0
- data/sig/prism/pattern.rbs +13 -0
- data/sig/prism/serialize.rbs +7 -0
- data/sig/prism/visitor.rbs +168 -0
- data/sig/prism.rbs +188 -4767
- data/src/diagnostic.c +575 -230
- data/src/encoding.c +211 -108
- data/src/node.c +7526 -447
- data/src/options.c +36 -12
- data/src/pack.c +33 -17
- data/src/prettyprint.c +1297 -1388
- data/src/prism.c +3665 -1121
- data/src/regexp.c +17 -2
- data/src/serialize.c +47 -28
- data/src/static_literals.c +552 -0
- data/src/token_type.c +1 -0
- data/src/util/pm_buffer.c +147 -20
- data/src/util/pm_char.c +4 -4
- data/src/util/pm_constant_pool.c +35 -11
- data/src/util/pm_integer.c +629 -0
- data/src/util/pm_list.c +1 -1
- data/src/util/pm_newline_list.c +20 -8
- data/src/util/pm_string.c +134 -5
- data/src/util/pm_string_list.c +2 -2
- metadata +37 -6
- data/docs/ripper.md +0 -36
- data/rbi/prism_static.rbi +0 -207
- data/sig/prism_static.rbs +0 -201
data/ext/prism/api_pack.c
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
#include "prism/extension.h"
|
2
2
|
|
3
|
+
#ifdef PRISM_EXCLUDE_PACK
|
4
|
+
|
5
|
+
void
|
6
|
+
Init_prism_pack(void) {}
|
7
|
+
|
8
|
+
#else
|
9
|
+
|
3
10
|
static VALUE rb_cPrism;
|
4
11
|
static VALUE rb_cPrismPack;
|
5
12
|
static VALUE rb_cPrismPackDirective;
|
@@ -265,3 +272,5 @@ Init_prism_pack(void) {
|
|
265
272
|
pack_symbol = ID2SYM(rb_intern("pack"));
|
266
273
|
unpack_symbol = ID2SYM(rb_intern("unpack"));
|
267
274
|
}
|
275
|
+
|
276
|
+
#endif
|
data/ext/prism/extconf.rb
CHANGED
@@ -6,17 +6,24 @@ if ARGV.delete("--help")
|
|
6
6
|
|
7
7
|
Flags that are always valid:
|
8
8
|
|
9
|
-
--enable-debug
|
10
|
-
Enable debug
|
11
|
-
You may also
|
9
|
+
--enable-build-debug
|
10
|
+
Enable debug build.
|
11
|
+
You may also set the PRISM_BUILD_DEBUG environment variable.
|
12
|
+
|
13
|
+
--enable-build-minimal
|
14
|
+
Enable minimal build.
|
15
|
+
You may also set the PRISM_BUILD_MINIMAL environment variable.
|
12
16
|
|
13
17
|
--help
|
14
18
|
Display this message.
|
15
19
|
|
16
20
|
Environment variables used:
|
17
21
|
|
18
|
-
|
19
|
-
Equivalent to `--enable-debug
|
22
|
+
PRISM_BUILD_DEBUG
|
23
|
+
Equivalent to `--enable-build-debug` when set, even if nil or blank.
|
24
|
+
|
25
|
+
PRISM_BUILD_MINIMAL
|
26
|
+
Equivalent to `--enable-build-minimal` when set, even if nil or blank.
|
20
27
|
|
21
28
|
TEXT
|
22
29
|
exit!(0)
|
@@ -38,7 +45,7 @@ end
|
|
38
45
|
# by this script.`
|
39
46
|
def make(target)
|
40
47
|
Dir.chdir(File.expand_path("../..", __dir__)) do
|
41
|
-
system("make", target, exception: true)
|
48
|
+
system(RUBY_PLATFORM.include?("openbsd") ? "gmake" : "make", target, exception: true)
|
42
49
|
end
|
43
50
|
end
|
44
51
|
|
@@ -71,13 +78,22 @@ unless find_header("prism/extension.h", File.expand_path("..", __dir__))
|
|
71
78
|
raise "prism/extension.h is required"
|
72
79
|
end
|
73
80
|
|
74
|
-
# If `--enable-debug
|
75
|
-
# `
|
76
|
-
# `
|
81
|
+
# If `--enable-build-debug` is passed to this script or the
|
82
|
+
# `PRISM_BUILD_DEBUG` environment variable is defined, we'll build with the
|
83
|
+
# `PRISM_BUILD_DEBUG` macro defined. This causes parse functions to
|
77
84
|
# duplicate their input so that they have clearly set bounds, which is useful
|
78
85
|
# for finding bugs that cause the parser to read off the end of the input.
|
79
|
-
if enable_config("debug
|
80
|
-
append_cflags("-
|
86
|
+
if enable_config("build-debug", ENV["PRISM_BUILD_DEBUG"] || false)
|
87
|
+
append_cflags("-DPRISM_BUILD_DEBUG")
|
88
|
+
end
|
89
|
+
|
90
|
+
# If `--enable-build-minimal` is passed to this script or the
|
91
|
+
# `PRISM_BUILD_MINIMAL` environment variable is defined, we'll build with the
|
92
|
+
# set of defines that comprise the minimal set. This causes the parser to be
|
93
|
+
# built with minimal features, necessary for stripping out functionality when
|
94
|
+
# the size of the final built artifact is a concern.
|
95
|
+
if enable_config("build-minimal", ENV["PRISM_BUILD_MINIMAL"] || false)
|
96
|
+
append_cflags("-DPRISM_BUILD_MINIMAL")
|
81
97
|
end
|
82
98
|
|
83
99
|
# By default, all symbols are hidden in the shared library.
|
data/ext/prism/extension.c
CHANGED
@@ -21,12 +21,15 @@ VALUE rb_cPrismParseError;
|
|
21
21
|
VALUE rb_cPrismParseWarning;
|
22
22
|
VALUE rb_cPrismParseResult;
|
23
23
|
|
24
|
-
|
24
|
+
VALUE rb_cPrismDebugEncoding;
|
25
|
+
|
26
|
+
ID rb_option_id_command_line;
|
25
27
|
ID rb_option_id_encoding;
|
26
|
-
ID
|
28
|
+
ID rb_option_id_filepath;
|
27
29
|
ID rb_option_id_frozen_string_literal;
|
28
|
-
ID
|
30
|
+
ID rb_option_id_line;
|
29
31
|
ID rb_option_id_scopes;
|
32
|
+
ID rb_option_id_version;
|
30
33
|
|
31
34
|
/******************************************************************************/
|
32
35
|
/* IO of Ruby code */
|
@@ -82,7 +85,9 @@ build_options_scopes(pm_options_t *options, VALUE scopes) {
|
|
82
85
|
|
83
86
|
// Initialize the scopes array.
|
84
87
|
size_t scopes_count = RARRAY_LEN(scopes);
|
85
|
-
pm_options_scopes_init(options, scopes_count)
|
88
|
+
if (!pm_options_scopes_init(options, scopes_count)) {
|
89
|
+
rb_raise(rb_eNoMemError, "failed to allocate memory");
|
90
|
+
}
|
86
91
|
|
87
92
|
// Iterate over the scopes and add them to the options.
|
88
93
|
for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
|
@@ -97,7 +102,9 @@ build_options_scopes(pm_options_t *options, VALUE scopes) {
|
|
97
102
|
// Initialize the scope array.
|
98
103
|
size_t locals_count = RARRAY_LEN(scope);
|
99
104
|
pm_options_scope_t *options_scope = &options->scopes[scope_index];
|
100
|
-
pm_options_scope_init(options_scope, locals_count)
|
105
|
+
if (!pm_options_scope_init(options_scope, locals_count)) {
|
106
|
+
rb_raise(rb_eNoMemError, "failed to allocate memory");
|
107
|
+
}
|
101
108
|
|
102
109
|
// Iterate over the locals and add them to the scope.
|
103
110
|
for (size_t local_index = 0; local_index < locals_count; local_index++) {
|
@@ -132,19 +139,38 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
|
|
132
139
|
} else if (key_id == rb_option_id_line) {
|
133
140
|
if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
|
134
141
|
} else if (key_id == rb_option_id_frozen_string_literal) {
|
135
|
-
if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value
|
142
|
+
if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, RTEST(value));
|
136
143
|
} else if (key_id == rb_option_id_version) {
|
137
144
|
if (!NIL_P(value)) {
|
138
145
|
const char *version = check_string(value);
|
139
146
|
|
140
147
|
if (!pm_options_version_set(options, version, RSTRING_LEN(value))) {
|
141
|
-
rb_raise(rb_eArgError, "invalid version: %"PRIsVALUE, value);
|
148
|
+
rb_raise(rb_eArgError, "invalid version: %" PRIsVALUE, value);
|
142
149
|
}
|
143
150
|
}
|
144
151
|
} else if (key_id == rb_option_id_scopes) {
|
145
152
|
if (!NIL_P(value)) build_options_scopes(options, value);
|
153
|
+
} else if (key_id == rb_option_id_command_line) {
|
154
|
+
if (!NIL_P(value)) {
|
155
|
+
const char *string = check_string(value);
|
156
|
+
uint8_t command_line = 0;
|
157
|
+
|
158
|
+
for (size_t index = 0; index < strlen(string); index++) {
|
159
|
+
switch (string[index]) {
|
160
|
+
case 'a': command_line |= PM_OPTIONS_COMMAND_LINE_A; break;
|
161
|
+
case 'e': command_line |= PM_OPTIONS_COMMAND_LINE_E; break;
|
162
|
+
case 'l': command_line |= PM_OPTIONS_COMMAND_LINE_L; break;
|
163
|
+
case 'n': command_line |= PM_OPTIONS_COMMAND_LINE_N; break;
|
164
|
+
case 'p': command_line |= PM_OPTIONS_COMMAND_LINE_P; break;
|
165
|
+
case 'x': command_line |= PM_OPTIONS_COMMAND_LINE_X; break;
|
166
|
+
default: rb_raise(rb_eArgError, "invalid command line flag: '%c'", string[index]); break;
|
167
|
+
}
|
168
|
+
}
|
169
|
+
|
170
|
+
pm_options_command_line_set(options, command_line);
|
171
|
+
}
|
146
172
|
} else {
|
147
|
-
rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key);
|
173
|
+
rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, key);
|
148
174
|
}
|
149
175
|
|
150
176
|
return ST_CONTINUE;
|
@@ -228,7 +254,7 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
|
|
228
254
|
|
229
255
|
const char * string_source = (const char *) pm_string_source(&options->filepath);
|
230
256
|
|
231
|
-
if (!
|
257
|
+
if (!pm_string_file_init(input, string_source)) {
|
232
258
|
pm_options_free(options);
|
233
259
|
|
234
260
|
#ifdef _WIN32
|
@@ -241,6 +267,8 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
|
|
241
267
|
}
|
242
268
|
}
|
243
269
|
|
270
|
+
#ifndef PRISM_EXCLUDE_SERIALIZATION
|
271
|
+
|
244
272
|
/******************************************************************************/
|
245
273
|
/* Serializing the AST */
|
246
274
|
/******************************************************************************/
|
@@ -282,17 +310,17 @@ dump(int argc, VALUE *argv, VALUE self) {
|
|
282
310
|
pm_options_t options = { 0 };
|
283
311
|
string_options(argc, argv, &input, &options);
|
284
312
|
|
285
|
-
#ifdef
|
313
|
+
#ifdef PRISM_BUILD_DEBUG
|
286
314
|
size_t length = pm_string_length(&input);
|
287
|
-
char* dup =
|
315
|
+
char* dup = xmalloc(length);
|
288
316
|
memcpy(dup, pm_string_source(&input), length);
|
289
317
|
pm_string_constant_init(&input, dup, length);
|
290
318
|
#endif
|
291
319
|
|
292
320
|
VALUE value = dump_input(&input, &options);
|
293
321
|
|
294
|
-
#ifdef
|
295
|
-
|
322
|
+
#ifdef PRISM_BUILD_DEBUG
|
323
|
+
xfree(dup);
|
296
324
|
#endif
|
297
325
|
|
298
326
|
pm_string_free(&input);
|
@@ -322,6 +350,8 @@ dump_file(int argc, VALUE *argv, VALUE self) {
|
|
322
350
|
return value;
|
323
351
|
}
|
324
352
|
|
353
|
+
#endif
|
354
|
+
|
325
355
|
/******************************************************************************/
|
326
356
|
/* Extracting values for the parse result */
|
327
357
|
/******************************************************************************/
|
@@ -415,23 +445,27 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
|
415
445
|
|
416
446
|
VALUE level = Qnil;
|
417
447
|
switch (error->level) {
|
418
|
-
case
|
419
|
-
level = ID2SYM(rb_intern("
|
448
|
+
case PM_ERROR_LEVEL_SYNTAX:
|
449
|
+
level = ID2SYM(rb_intern("syntax"));
|
420
450
|
break;
|
421
451
|
case PM_ERROR_LEVEL_ARGUMENT:
|
422
452
|
level = ID2SYM(rb_intern("argument"));
|
423
453
|
break;
|
454
|
+
case PM_ERROR_LEVEL_LOAD:
|
455
|
+
level = ID2SYM(rb_intern("load"));
|
456
|
+
break;
|
424
457
|
default:
|
425
458
|
rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
|
426
459
|
}
|
427
460
|
|
428
461
|
VALUE error_argv[] = {
|
462
|
+
ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id))),
|
429
463
|
rb_enc_str_new_cstr(error->message, encoding),
|
430
464
|
rb_class_new_instance(3, location_argv, rb_cPrismLocation),
|
431
465
|
level
|
432
466
|
};
|
433
467
|
|
434
|
-
rb_ary_push(errors, rb_class_new_instance(
|
468
|
+
rb_ary_push(errors, rb_class_new_instance(4, error_argv, rb_cPrismParseError));
|
435
469
|
}
|
436
470
|
|
437
471
|
return errors;
|
@@ -465,17 +499,36 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
|
465
499
|
}
|
466
500
|
|
467
501
|
VALUE warning_argv[] = {
|
502
|
+
ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id))),
|
468
503
|
rb_enc_str_new_cstr(warning->message, encoding),
|
469
504
|
rb_class_new_instance(3, location_argv, rb_cPrismLocation),
|
470
505
|
level
|
471
506
|
};
|
472
507
|
|
473
|
-
rb_ary_push(warnings, rb_class_new_instance(
|
508
|
+
rb_ary_push(warnings, rb_class_new_instance(4, warning_argv, rb_cPrismParseWarning));
|
474
509
|
}
|
475
510
|
|
476
511
|
return warnings;
|
477
512
|
}
|
478
513
|
|
514
|
+
/**
|
515
|
+
* Create a new parse result from the given parser, value, encoding, and source.
|
516
|
+
*/
|
517
|
+
static VALUE
|
518
|
+
parse_result_create(pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source) {
|
519
|
+
VALUE result_argv[] = {
|
520
|
+
value,
|
521
|
+
parser_comments(parser, source),
|
522
|
+
parser_magic_comments(parser, source),
|
523
|
+
parser_data_loc(parser, source),
|
524
|
+
parser_errors(parser, encoding, source),
|
525
|
+
parser_warnings(parser, encoding, source),
|
526
|
+
source
|
527
|
+
};
|
528
|
+
|
529
|
+
return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
|
530
|
+
}
|
531
|
+
|
479
532
|
/******************************************************************************/
|
480
533
|
/* Lexing Ruby code */
|
481
534
|
/******************************************************************************/
|
@@ -582,19 +635,11 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
|
|
582
635
|
value = parse_lex_data.tokens;
|
583
636
|
}
|
584
637
|
|
585
|
-
VALUE
|
586
|
-
value,
|
587
|
-
parser_comments(&parser, source),
|
588
|
-
parser_magic_comments(&parser, source),
|
589
|
-
parser_data_loc(&parser, source),
|
590
|
-
parser_errors(&parser, parse_lex_data.encoding, source),
|
591
|
-
parser_warnings(&parser, parse_lex_data.encoding, source),
|
592
|
-
source
|
593
|
-
};
|
594
|
-
|
638
|
+
VALUE result = parse_result_create(&parser, value, parse_lex_data.encoding, source);
|
595
639
|
pm_node_destroy(&parser, node);
|
596
640
|
pm_parser_free(&parser);
|
597
|
-
|
641
|
+
|
642
|
+
return result;
|
598
643
|
}
|
599
644
|
|
600
645
|
/**
|
@@ -654,17 +699,8 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
|
|
654
699
|
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
|
655
700
|
|
656
701
|
VALUE source = pm_source_new(&parser, encoding);
|
657
|
-
VALUE
|
658
|
-
|
659
|
-
parser_comments(&parser, source),
|
660
|
-
parser_magic_comments(&parser, source),
|
661
|
-
parser_data_loc(&parser, source),
|
662
|
-
parser_errors(&parser, encoding, source),
|
663
|
-
parser_warnings(&parser, encoding, source),
|
664
|
-
source
|
665
|
-
};
|
666
|
-
|
667
|
-
VALUE result = rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
|
702
|
+
VALUE value = pm_ast_new(&parser, node, encoding, source);
|
703
|
+
VALUE result = parse_result_create(&parser, value, encoding, source) ;
|
668
704
|
|
669
705
|
pm_node_destroy(&parser, node);
|
670
706
|
pm_parser_free(&parser);
|
@@ -679,21 +715,25 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
|
|
679
715
|
* Parse the given string and return a ParseResult instance. The options that
|
680
716
|
* are supported are:
|
681
717
|
*
|
682
|
-
* * `
|
683
|
-
*
|
718
|
+
* * `command_line` - either nil or a string of the various options that were
|
719
|
+
* set on the command line. Valid values are combinations of "a", "l",
|
720
|
+
* "n", "p", and "x".
|
684
721
|
* * `encoding` - the encoding of the source being parsed. This should be an
|
685
|
-
* encoding or nil
|
686
|
-
* * `
|
687
|
-
*
|
722
|
+
* encoding or nil.
|
723
|
+
* * `filepath` - the filepath of the source being parsed. This should be a
|
724
|
+
* string or nil.
|
688
725
|
* * `frozen_string_literal` - whether or not the frozen string literal pragma
|
689
726
|
* has been set. This should be a boolean or nil.
|
690
|
-
* * `
|
691
|
-
*
|
692
|
-
* prism (which you can trigger with `nil` or `"latest"`). If you want to
|
693
|
-
* parse exactly as CRuby 3.3.0 would, then you can pass `"3.3.0"`.
|
727
|
+
* * `line` - the line number that the parse starts on. This should be an
|
728
|
+
* integer or nil. Note that this is 1-indexed.
|
694
729
|
* * `scopes` - the locals that are in scope surrounding the code that is being
|
695
730
|
* parsed. This should be an array of arrays of symbols or nil. Scopes are
|
696
731
|
* ordered from the outermost scope to the innermost one.
|
732
|
+
* * `version` - the version of Ruby syntax that prism should used to parse Ruby
|
733
|
+
* code. By default prism assumes you want to parse with the latest version
|
734
|
+
* of Ruby syntax (which you can trigger with `nil` or `"latest"`). You
|
735
|
+
* may also restrict the syntax to a specific version of Ruby. The
|
736
|
+
* supported values are `"3.3.0"` and `"3.4.0"`.
|
697
737
|
*/
|
698
738
|
static VALUE
|
699
739
|
parse(int argc, VALUE *argv, VALUE self) {
|
@@ -701,17 +741,17 @@ parse(int argc, VALUE *argv, VALUE self) {
|
|
701
741
|
pm_options_t options = { 0 };
|
702
742
|
string_options(argc, argv, &input, &options);
|
703
743
|
|
704
|
-
#ifdef
|
744
|
+
#ifdef PRISM_BUILD_DEBUG
|
705
745
|
size_t length = pm_string_length(&input);
|
706
|
-
char* dup =
|
746
|
+
char* dup = xmalloc(length);
|
707
747
|
memcpy(dup, pm_string_source(&input), length);
|
708
748
|
pm_string_constant_init(&input, dup, length);
|
709
749
|
#endif
|
710
750
|
|
711
751
|
VALUE value = parse_input(&input, &options);
|
712
752
|
|
713
|
-
#ifdef
|
714
|
-
|
753
|
+
#ifdef PRISM_BUILD_DEBUG
|
754
|
+
xfree(dup);
|
715
755
|
#endif
|
716
756
|
|
717
757
|
pm_string_free(&input);
|
@@ -719,6 +759,60 @@ parse(int argc, VALUE *argv, VALUE self) {
|
|
719
759
|
return value;
|
720
760
|
}
|
721
761
|
|
762
|
+
/**
|
763
|
+
* An implementation of fgets that is suitable for use with Ruby IO objects.
|
764
|
+
*/
|
765
|
+
static char *
|
766
|
+
parse_stream_fgets(char *string, int size, void *stream) {
|
767
|
+
RUBY_ASSERT(size > 0);
|
768
|
+
|
769
|
+
VALUE line = rb_funcall((VALUE) stream, rb_intern("gets"), 1, INT2FIX(size - 1));
|
770
|
+
if (NIL_P(line)) {
|
771
|
+
return NULL;
|
772
|
+
}
|
773
|
+
|
774
|
+
const char *cstr = StringValueCStr(line);
|
775
|
+
size_t length = strlen(cstr);
|
776
|
+
|
777
|
+
memcpy(string, cstr, length);
|
778
|
+
string[length] = '\0';
|
779
|
+
|
780
|
+
return string;
|
781
|
+
}
|
782
|
+
|
783
|
+
/**
|
784
|
+
* call-seq:
|
785
|
+
* Prism::parse_stream(stream, **options) -> ParseResult
|
786
|
+
*
|
787
|
+
* Parse the given object that responds to `gets` and return a ParseResult
|
788
|
+
* instance. The options that are supported are the same as Prism::parse.
|
789
|
+
*/
|
790
|
+
static VALUE
|
791
|
+
parse_stream(int argc, VALUE *argv, VALUE self) {
|
792
|
+
VALUE stream;
|
793
|
+
VALUE keywords;
|
794
|
+
rb_scan_args(argc, argv, "1:", &stream, &keywords);
|
795
|
+
|
796
|
+
pm_options_t options = { 0 };
|
797
|
+
extract_options(&options, Qnil, keywords);
|
798
|
+
|
799
|
+
pm_parser_t parser;
|
800
|
+
pm_buffer_t buffer;
|
801
|
+
|
802
|
+
pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, &options);
|
803
|
+
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
|
804
|
+
|
805
|
+
VALUE source = pm_source_new(&parser, encoding);
|
806
|
+
VALUE value = pm_ast_new(&parser, node, encoding, source);
|
807
|
+
VALUE result = parse_result_create(&parser, value, encoding, source);
|
808
|
+
|
809
|
+
pm_node_destroy(&parser, node);
|
810
|
+
pm_buffer_free(&buffer);
|
811
|
+
pm_parser_free(&parser);
|
812
|
+
|
813
|
+
return result;
|
814
|
+
}
|
815
|
+
|
722
816
|
/**
|
723
817
|
* call-seq:
|
724
818
|
* Prism::parse_file(filepath, **options) -> ParseResult
|
@@ -945,6 +1039,35 @@ named_captures(VALUE self, VALUE source) {
|
|
945
1039
|
return names;
|
946
1040
|
}
|
947
1041
|
|
1042
|
+
/**
|
1043
|
+
* call-seq:
|
1044
|
+
* Debug::integer_parse(source) -> [Integer, String]
|
1045
|
+
*
|
1046
|
+
* Parses the given source string and returns the integer it represents, as well
|
1047
|
+
* as a decimal string representation.
|
1048
|
+
*/
|
1049
|
+
static VALUE
|
1050
|
+
integer_parse(VALUE self, VALUE source) {
|
1051
|
+
const uint8_t *start = (const uint8_t *) RSTRING_PTR(source);
|
1052
|
+
size_t length = RSTRING_LEN(source);
|
1053
|
+
|
1054
|
+
pm_integer_t integer = { 0 };
|
1055
|
+
pm_integer_parse(&integer, PM_INTEGER_BASE_UNKNOWN, start, start + length);
|
1056
|
+
|
1057
|
+
pm_buffer_t buffer = { 0 };
|
1058
|
+
pm_integer_string(&buffer, &integer);
|
1059
|
+
|
1060
|
+
VALUE string = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
|
1061
|
+
pm_buffer_free(&buffer);
|
1062
|
+
|
1063
|
+
VALUE result = rb_ary_new_capa(2);
|
1064
|
+
rb_ary_push(result, pm_integer_new(&integer));
|
1065
|
+
rb_ary_push(result, string);
|
1066
|
+
pm_integer_free(&integer);
|
1067
|
+
|
1068
|
+
return result;
|
1069
|
+
}
|
1070
|
+
|
948
1071
|
/**
|
949
1072
|
* call-seq:
|
950
1073
|
* Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx }
|
@@ -1010,6 +1133,8 @@ profile_file(VALUE self, VALUE filepath) {
|
|
1010
1133
|
return Qnil;
|
1011
1134
|
}
|
1012
1135
|
|
1136
|
+
#ifndef PRISM_EXCLUDE_PRETTYPRINT
|
1137
|
+
|
1013
1138
|
/**
|
1014
1139
|
* call-seq:
|
1015
1140
|
* Debug::inspect_node(source) -> inspected
|
@@ -1040,6 +1165,8 @@ inspect_node(VALUE self, VALUE source) {
|
|
1040
1165
|
return string;
|
1041
1166
|
}
|
1042
1167
|
|
1168
|
+
#endif
|
1169
|
+
|
1043
1170
|
/**
|
1044
1171
|
* call-seq:
|
1045
1172
|
* Debug::format_errors(source, colorize) -> String
|
@@ -1057,7 +1184,7 @@ format_errors(VALUE self, VALUE source, VALUE colorize) {
|
|
1057
1184
|
pm_node_t *node = pm_parse(&parser);
|
1058
1185
|
pm_buffer_t buffer = { 0 };
|
1059
1186
|
|
1060
|
-
pm_parser_errors_format(&parser, &buffer, RTEST(colorize));
|
1187
|
+
pm_parser_errors_format(&parser, &parser.error_list, &buffer, RTEST(colorize), true);
|
1061
1188
|
|
1062
1189
|
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
|
1063
1190
|
VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
|
@@ -1070,6 +1197,114 @@ format_errors(VALUE self, VALUE source, VALUE colorize) {
|
|
1070
1197
|
return result;
|
1071
1198
|
}
|
1072
1199
|
|
1200
|
+
/**
|
1201
|
+
* call-seq:
|
1202
|
+
* Debug::static_inspect(source) -> String
|
1203
|
+
*
|
1204
|
+
* Inspect the node as it would be inspected by the warnings used in static
|
1205
|
+
* literal sets.
|
1206
|
+
*/
|
1207
|
+
static VALUE
|
1208
|
+
static_inspect(int argc, VALUE *argv, VALUE self) {
|
1209
|
+
pm_string_t input;
|
1210
|
+
pm_options_t options = { 0 };
|
1211
|
+
string_options(argc, argv, &input, &options);
|
1212
|
+
|
1213
|
+
pm_parser_t parser;
|
1214
|
+
pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
|
1215
|
+
|
1216
|
+
pm_node_t *program = pm_parse(&parser);
|
1217
|
+
pm_node_t *node = ((pm_program_node_t *) program)->statements->body.nodes[0];
|
1218
|
+
|
1219
|
+
pm_buffer_t buffer = { 0 };
|
1220
|
+
pm_static_literal_inspect(&buffer, &parser, node);
|
1221
|
+
|
1222
|
+
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
|
1223
|
+
VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
|
1224
|
+
|
1225
|
+
pm_buffer_free(&buffer);
|
1226
|
+
pm_node_destroy(&parser, program);
|
1227
|
+
pm_parser_free(&parser);
|
1228
|
+
pm_string_free(&input);
|
1229
|
+
pm_options_free(&options);
|
1230
|
+
|
1231
|
+
return result;
|
1232
|
+
}
|
1233
|
+
|
1234
|
+
/**
|
1235
|
+
* call-seq: Debug::Encoding.all -> Array[Debug::Encoding]
|
1236
|
+
*
|
1237
|
+
* Return an array of all of the encodings that prism knows about.
|
1238
|
+
*/
|
1239
|
+
static VALUE
|
1240
|
+
encoding_all(VALUE self) {
|
1241
|
+
VALUE encodings = rb_ary_new();
|
1242
|
+
|
1243
|
+
for (size_t index = 0; index < PM_ENCODING_MAXIMUM; index++) {
|
1244
|
+
const pm_encoding_t *encoding = &pm_encodings[index];
|
1245
|
+
|
1246
|
+
VALUE encoding_argv[] = { rb_str_new_cstr(encoding->name), encoding->multibyte ? Qtrue : Qfalse };
|
1247
|
+
rb_ary_push(encodings, rb_class_new_instance(2, encoding_argv, rb_cPrismDebugEncoding));
|
1248
|
+
}
|
1249
|
+
|
1250
|
+
return encodings;
|
1251
|
+
}
|
1252
|
+
|
1253
|
+
static const pm_encoding_t *
|
1254
|
+
encoding_find(VALUE name) {
|
1255
|
+
const uint8_t *source = (const uint8_t *) RSTRING_PTR(name);
|
1256
|
+
size_t length = RSTRING_LEN(name);
|
1257
|
+
|
1258
|
+
const pm_encoding_t *encoding = pm_encoding_find(source, source + length);
|
1259
|
+
if (encoding == NULL) { rb_raise(rb_eArgError, "Unknown encoding: %s", source); }
|
1260
|
+
|
1261
|
+
return encoding;
|
1262
|
+
}
|
1263
|
+
|
1264
|
+
/**
|
1265
|
+
* call-seq: Debug::Encoding.width(source) -> Integer
|
1266
|
+
*
|
1267
|
+
* Returns the width of the first character in the given string if it is valid
|
1268
|
+
* in the encoding. If it is not, this function returns 0.
|
1269
|
+
*/
|
1270
|
+
static VALUE
|
1271
|
+
encoding_char_width(VALUE self, VALUE name, VALUE value) {
|
1272
|
+
return ULONG2NUM(encoding_find(name)->char_width((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)));
|
1273
|
+
}
|
1274
|
+
|
1275
|
+
/**
|
1276
|
+
* call-seq: Debug::Encoding.alnum?(source) -> true | false
|
1277
|
+
*
|
1278
|
+
* Returns true if the first character in the given string is an alphanumeric
|
1279
|
+
* character in the encoding.
|
1280
|
+
*/
|
1281
|
+
static VALUE
|
1282
|
+
encoding_alnum_char(VALUE self, VALUE name, VALUE value) {
|
1283
|
+
return encoding_find(name)->alnum_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse;
|
1284
|
+
}
|
1285
|
+
|
1286
|
+
/**
|
1287
|
+
* call-seq: Debug::Encoding.alpha?(source) -> true | false
|
1288
|
+
*
|
1289
|
+
* Returns true if the first character in the given string is an alphabetic
|
1290
|
+
* character in the encoding.
|
1291
|
+
*/
|
1292
|
+
static VALUE
|
1293
|
+
encoding_alpha_char(VALUE self, VALUE name, VALUE value) {
|
1294
|
+
return encoding_find(name)->alpha_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse;
|
1295
|
+
}
|
1296
|
+
|
1297
|
+
/**
|
1298
|
+
* call-seq: Debug::Encoding.upper?(source) -> true | false
|
1299
|
+
*
|
1300
|
+
* Returns true if the first character in the given string is an uppercase
|
1301
|
+
* character in the encoding.
|
1302
|
+
*/
|
1303
|
+
static VALUE
|
1304
|
+
encoding_isupper_char(VALUE self, VALUE name, VALUE value) {
|
1305
|
+
return encoding_find(name)->isupper_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) ? Qtrue : Qfalse;
|
1306
|
+
}
|
1307
|
+
|
1073
1308
|
/******************************************************************************/
|
1074
1309
|
/* Initialization of the extension */
|
1075
1310
|
/******************************************************************************/
|
@@ -1107,31 +1342,24 @@ Init_prism(void) {
|
|
1107
1342
|
|
1108
1343
|
// Intern all of the options that we support so that we don't have to do it
|
1109
1344
|
// every time we parse.
|
1110
|
-
|
1345
|
+
rb_option_id_command_line = rb_intern_const("command_line");
|
1111
1346
|
rb_option_id_encoding = rb_intern_const("encoding");
|
1112
|
-
|
1347
|
+
rb_option_id_filepath = rb_intern_const("filepath");
|
1113
1348
|
rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
|
1114
|
-
|
1349
|
+
rb_option_id_line = rb_intern_const("line");
|
1115
1350
|
rb_option_id_scopes = rb_intern_const("scopes");
|
1351
|
+
rb_option_id_version = rb_intern_const("version");
|
1116
1352
|
|
1117
1353
|
/**
|
1118
1354
|
* The version of the prism library.
|
1119
1355
|
*/
|
1120
1356
|
rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
|
1121
1357
|
|
1122
|
-
/**
|
1123
|
-
* The backend of the parser that prism is using to parse Ruby code. This
|
1124
|
-
* can be either :CEXT or :FFI. On runtimes that support C extensions, we
|
1125
|
-
* default to :CEXT. Otherwise we use :FFI.
|
1126
|
-
*/
|
1127
|
-
rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CEXT")));
|
1128
|
-
|
1129
1358
|
// First, the functions that have to do with lexing and parsing.
|
1130
|
-
rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
|
1131
|
-
rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
|
1132
1359
|
rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
|
1133
1360
|
rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
|
1134
1361
|
rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
|
1362
|
+
rb_define_singleton_method(rb_cPrism, "parse_stream", parse_stream, -1);
|
1135
1363
|
rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
|
1136
1364
|
rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
|
1137
1365
|
rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
|
@@ -1140,14 +1368,33 @@ Init_prism(void) {
|
|
1140
1368
|
rb_define_singleton_method(rb_cPrism, "parse_success?", parse_success_p, -1);
|
1141
1369
|
rb_define_singleton_method(rb_cPrism, "parse_file_success?", parse_file_success_p, -1);
|
1142
1370
|
|
1371
|
+
#ifndef PRISM_EXCLUDE_SERIALIZATION
|
1372
|
+
rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
|
1373
|
+
rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
|
1374
|
+
#endif
|
1375
|
+
|
1143
1376
|
// Next, the functions that will be called by the parser to perform various
|
1144
1377
|
// internal tasks. We expose these to make them easier to test.
|
1145
1378
|
VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug");
|
1146
1379
|
rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
|
1380
|
+
rb_define_singleton_method(rb_cPrismDebug, "integer_parse", integer_parse, 1);
|
1147
1381
|
rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
|
1148
1382
|
rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
|
1149
|
-
rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
|
1150
1383
|
rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 2);
|
1384
|
+
rb_define_singleton_method(rb_cPrismDebug, "static_inspect", static_inspect, -1);
|
1385
|
+
|
1386
|
+
#ifndef PRISM_EXCLUDE_PRETTYPRINT
|
1387
|
+
rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
|
1388
|
+
#endif
|
1389
|
+
|
1390
|
+
// Next, define the functions that are exposed through the private
|
1391
|
+
// Debug::Encoding class.
|
1392
|
+
rb_cPrismDebugEncoding = rb_define_class_under(rb_cPrismDebug, "Encoding", rb_cObject);
|
1393
|
+
rb_define_singleton_method(rb_cPrismDebugEncoding, "all", encoding_all, 0);
|
1394
|
+
rb_define_singleton_method(rb_cPrismDebugEncoding, "_width", encoding_char_width, 2);
|
1395
|
+
rb_define_singleton_method(rb_cPrismDebugEncoding, "_alnum?", encoding_alnum_char, 2);
|
1396
|
+
rb_define_singleton_method(rb_cPrismDebugEncoding, "_alpha?", encoding_alpha_char, 2);
|
1397
|
+
rb_define_singleton_method(rb_cPrismDebugEncoding, "_upper?", encoding_isupper_char, 2);
|
1151
1398
|
|
1152
1399
|
// Next, initialize the other APIs.
|
1153
1400
|
Init_prism_api_node();
|