prism 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -1
- data/README.md +1 -0
- data/config.yml +257 -20
- data/docs/parsing_rules.md +4 -1
- data/ext/prism/extension.c +63 -26
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +559 -327
- data/include/prism/defines.h +27 -0
- data/include/prism/diagnostic.h +5 -1
- data/include/prism/options.h +39 -2
- data/include/prism/parser.h +7 -0
- data/include/prism/util/pm_string.h +27 -4
- data/include/prism/version.h +2 -2
- data/lib/prism/dot_visitor.rb +2 -0
- data/lib/prism/dsl.rb +10 -8
- data/lib/prism/ffi.rb +37 -3
- data/lib/prism/inspect_visitor.rb +1 -1
- data/lib/prism/lex_compat.rb +1 -1
- data/lib/prism/node.rb +132 -89
- data/lib/prism/parse_result.rb +1 -1
- data/lib/prism/reflection.rb +1 -1
- data/lib/prism/serialize.rb +6 -2
- data/lib/prism/translation/parser/lexer.rb +25 -3
- data/lib/prism/translation/ruby_parser.rb +7 -1
- data/prism.gemspec +1 -2
- data/rbi/prism/dsl.rbi +32 -32
- data/rbi/prism/node.rbi +69 -59
- data/rbi/prism.rbi +34 -34
- data/sig/prism/dsl.rbs +24 -24
- data/sig/prism/node.rbs +113 -105
- data/sig/prism.rbs +90 -72
- data/src/diagnostic.c +15 -7
- data/src/node.c +10 -0
- data/src/options.c +58 -27
- data/src/prettyprint.c +10 -0
- data/src/prism.c +588 -385
- data/src/util/pm_string.c +123 -65
- metadata +2 -3
- data/lib/prism/translation/parser/rubocop.rb +0 -73
data/docs/parsing_rules.md
CHANGED
@@ -12,7 +12,10 @@ Constants in Ruby begin with an upper-case letter. This is followed by any numbe
|
|
12
12
|
|
13
13
|
Most expressions in CRuby are non-void. This means the expression they represent resolves to a value. For example, `1 + 2` is a non-void expression, because it resolves to a method call. Even things like `class Foo; end` is a non-void expression, because it returns the last evaluated expression in the body of the class (or `nil`).
|
14
14
|
|
15
|
-
Certain nodes, however, are void expressions, and cannot be combined to form larger expressions.
|
15
|
+
Certain nodes, however, are void expressions, and cannot be combined to form larger expressions.
|
16
|
+
* `BEGIN {}`, `END {}`, `alias foo bar`, and `undef foo` can only be at a statement position.
|
17
|
+
* The "jumps": `return`, `break`, `next`, `redo`, `retry` are void expressions.
|
18
|
+
* `value => pattern` is also considered a void expression.
|
16
19
|
|
17
20
|
## Identifiers
|
18
21
|
|
data/ext/prism/extension.c
CHANGED
@@ -31,6 +31,8 @@ ID rb_id_option_encoding;
|
|
31
31
|
ID rb_id_option_filepath;
|
32
32
|
ID rb_id_option_frozen_string_literal;
|
33
33
|
ID rb_id_option_line;
|
34
|
+
ID rb_id_option_main_script;
|
35
|
+
ID rb_id_option_partial_script;
|
34
36
|
ID rb_id_option_scopes;
|
35
37
|
ID rb_id_option_version;
|
36
38
|
ID rb_id_source_for;
|
@@ -40,17 +42,11 @@ ID rb_id_source_for;
|
|
40
42
|
/******************************************************************************/
|
41
43
|
|
42
44
|
/**
|
43
|
-
* Check if the given VALUE is a string. If it's
|
44
|
-
*
|
45
|
-
* string.
|
45
|
+
* Check if the given VALUE is a string. If it's not a string, then raise a
|
46
|
+
* TypeError. Otherwise return the VALUE as a C string.
|
46
47
|
*/
|
47
48
|
static const char *
|
48
49
|
check_string(VALUE value) {
|
49
|
-
// If the value is nil, then we don't need to do anything.
|
50
|
-
if (NIL_P(value)) {
|
51
|
-
return NULL;
|
52
|
-
}
|
53
|
-
|
54
50
|
// Check if the value is a string. If it's not, then raise a type error.
|
55
51
|
if (!RB_TYPE_P(value, T_STRING)) {
|
56
52
|
rb_raise(rb_eTypeError, "wrong argument type %" PRIsVALUE " (expected String)", rb_obj_class(value));
|
@@ -179,6 +175,10 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
|
|
179
175
|
|
180
176
|
pm_options_command_line_set(options, command_line);
|
181
177
|
}
|
178
|
+
} else if (key_id == rb_id_option_main_script) {
|
179
|
+
if (!NIL_P(value)) pm_options_main_script_set(options, RTEST(value));
|
180
|
+
} else if (key_id == rb_id_option_partial_script) {
|
181
|
+
if (!NIL_P(value)) pm_options_partial_script_set(options, RTEST(value));
|
182
182
|
} else {
|
183
183
|
rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, key);
|
184
184
|
}
|
@@ -254,27 +254,41 @@ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options)
|
|
254
254
|
* Read options for methods that look like (filepath, **options).
|
255
255
|
*/
|
256
256
|
static void
|
257
|
-
file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
|
257
|
+
file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, VALUE *encoded_filepath) {
|
258
258
|
VALUE filepath;
|
259
259
|
VALUE keywords;
|
260
260
|
rb_scan_args(argc, argv, "1:", &filepath, &keywords);
|
261
261
|
|
262
262
|
Check_Type(filepath, T_STRING);
|
263
|
+
*encoded_filepath = rb_str_encode_ospath(filepath);
|
264
|
+
extract_options(options, *encoded_filepath, keywords);
|
263
265
|
|
264
|
-
|
266
|
+
const char *source = (const char *) pm_string_source(&options->filepath);
|
267
|
+
pm_string_init_result_t result;
|
265
268
|
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
269
|
+
switch (result = pm_string_file_init(input, source)) {
|
270
|
+
case PM_STRING_INIT_SUCCESS:
|
271
|
+
break;
|
272
|
+
case PM_STRING_INIT_ERROR_GENERIC: {
|
273
|
+
pm_options_free(options);
|
270
274
|
|
271
275
|
#ifdef _WIN32
|
272
|
-
|
276
|
+
int e = rb_w32_map_errno(GetLastError());
|
273
277
|
#else
|
274
|
-
|
278
|
+
int e = errno;
|
275
279
|
#endif
|
276
280
|
|
277
|
-
|
281
|
+
rb_syserr_fail(e, source);
|
282
|
+
break;
|
283
|
+
}
|
284
|
+
case PM_STRING_INIT_ERROR_DIRECTORY:
|
285
|
+
pm_options_free(options);
|
286
|
+
rb_syserr_fail(EISDIR, source);
|
287
|
+
break;
|
288
|
+
default:
|
289
|
+
pm_options_free(options);
|
290
|
+
rb_raise(rb_eRuntimeError, "Unknown error (%d) initializing file: %s", result, source);
|
291
|
+
break;
|
278
292
|
}
|
279
293
|
}
|
280
294
|
|
@@ -352,7 +366,8 @@ dump_file(int argc, VALUE *argv, VALUE self) {
|
|
352
366
|
pm_string_t input;
|
353
367
|
pm_options_t options = { 0 };
|
354
368
|
|
355
|
-
|
369
|
+
VALUE encoded_filepath;
|
370
|
+
file_options(argc, argv, &input, &options, &encoded_filepath);
|
356
371
|
|
357
372
|
VALUE value = dump_input(&input, &options);
|
358
373
|
pm_string_free(&input);
|
@@ -685,7 +700,8 @@ lex_file(int argc, VALUE *argv, VALUE self) {
|
|
685
700
|
pm_string_t input;
|
686
701
|
pm_options_t options = { 0 };
|
687
702
|
|
688
|
-
|
703
|
+
VALUE encoded_filepath;
|
704
|
+
file_options(argc, argv, &input, &options, &encoded_filepath);
|
689
705
|
|
690
706
|
VALUE value = parse_lex_input(&input, &options, false);
|
691
707
|
pm_string_free(&input);
|
@@ -737,14 +753,27 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
|
|
737
753
|
* has been set. This should be a boolean or nil.
|
738
754
|
* * `line` - the line number that the parse starts on. This should be an
|
739
755
|
* integer or nil. Note that this is 1-indexed.
|
756
|
+
* * `main_script` - a boolean indicating whether or not the source being parsed
|
757
|
+
* is the main script being run by the interpreter. This controls whether
|
758
|
+
* or not shebangs are parsed for additional flags and whether or not the
|
759
|
+
* parser will attempt to find a matching shebang if the first one does
|
760
|
+
* not contain the word "ruby".
|
761
|
+
* * `partial_script` - when the file being parsed is considered a "partial"
|
762
|
+
* script, jumps will not be marked as errors if they are not contained
|
763
|
+
* within loops/blocks. This is used in the case that you're parsing a
|
764
|
+
* script that you know will be embedded inside another script later, but
|
765
|
+
* you do not have that context yet. For example, when parsing an ERB
|
766
|
+
* template that will be evaluated inside another script.
|
740
767
|
* * `scopes` - the locals that are in scope surrounding the code that is being
|
741
768
|
* parsed. This should be an array of arrays of symbols or nil. Scopes are
|
742
769
|
* ordered from the outermost scope to the innermost one.
|
743
770
|
* * `version` - the version of Ruby syntax that prism should used to parse Ruby
|
744
771
|
* code. By default prism assumes you want to parse with the latest version
|
745
772
|
* of Ruby syntax (which you can trigger with `nil` or `"latest"`). You
|
746
|
-
* may also restrict the syntax to a specific version of Ruby.
|
747
|
-
*
|
773
|
+
* may also restrict the syntax to a specific version of Ruby, e.g., with `"3.3.0"`.
|
774
|
+
* To parse with the same syntax version that the current Ruby is running
|
775
|
+
* use `version: RUBY_VERSION`. Raises ArgumentError if the version is not
|
776
|
+
* currently supported by Prism.
|
748
777
|
*/
|
749
778
|
static VALUE
|
750
779
|
parse(int argc, VALUE *argv, VALUE self) {
|
@@ -782,7 +811,8 @@ parse_file(int argc, VALUE *argv, VALUE self) {
|
|
782
811
|
pm_string_t input;
|
783
812
|
pm_options_t options = { 0 };
|
784
813
|
|
785
|
-
|
814
|
+
VALUE encoded_filepath;
|
815
|
+
file_options(argc, argv, &input, &options, &encoded_filepath);
|
786
816
|
|
787
817
|
VALUE value = parse_input(&input, &options);
|
788
818
|
pm_string_free(&input);
|
@@ -838,7 +868,9 @@ profile_file(int argc, VALUE *argv, VALUE self) {
|
|
838
868
|
pm_string_t input;
|
839
869
|
pm_options_t options = { 0 };
|
840
870
|
|
841
|
-
|
871
|
+
VALUE encoded_filepath;
|
872
|
+
file_options(argc, argv, &input, &options, &encoded_filepath);
|
873
|
+
|
842
874
|
profile_input(&input, &options);
|
843
875
|
pm_string_free(&input);
|
844
876
|
pm_options_free(&options);
|
@@ -952,7 +984,8 @@ parse_file_comments(int argc, VALUE *argv, VALUE self) {
|
|
952
984
|
pm_string_t input;
|
953
985
|
pm_options_t options = { 0 };
|
954
986
|
|
955
|
-
|
987
|
+
VALUE encoded_filepath;
|
988
|
+
file_options(argc, argv, &input, &options, &encoded_filepath);
|
956
989
|
|
957
990
|
VALUE value = parse_input_comments(&input, &options);
|
958
991
|
pm_string_free(&input);
|
@@ -1007,7 +1040,8 @@ parse_lex_file(int argc, VALUE *argv, VALUE self) {
|
|
1007
1040
|
pm_string_t input;
|
1008
1041
|
pm_options_t options = { 0 };
|
1009
1042
|
|
1010
|
-
|
1043
|
+
VALUE encoded_filepath;
|
1044
|
+
file_options(argc, argv, &input, &options, &encoded_filepath);
|
1011
1045
|
|
1012
1046
|
VALUE value = parse_lex_input(&input, &options, true);
|
1013
1047
|
pm_string_free(&input);
|
@@ -1077,7 +1111,8 @@ parse_file_success_p(int argc, VALUE *argv, VALUE self) {
|
|
1077
1111
|
pm_string_t input;
|
1078
1112
|
pm_options_t options = { 0 };
|
1079
1113
|
|
1080
|
-
|
1114
|
+
VALUE encoded_filepath;
|
1115
|
+
file_options(argc, argv, &input, &options, &encoded_filepath);
|
1081
1116
|
|
1082
1117
|
VALUE result = parse_input_success_p(&input, &options);
|
1083
1118
|
pm_string_free(&input);
|
@@ -1143,6 +1178,8 @@ Init_prism(void) {
|
|
1143
1178
|
rb_id_option_filepath = rb_intern_const("filepath");
|
1144
1179
|
rb_id_option_frozen_string_literal = rb_intern_const("frozen_string_literal");
|
1145
1180
|
rb_id_option_line = rb_intern_const("line");
|
1181
|
+
rb_id_option_main_script = rb_intern_const("main_script");
|
1182
|
+
rb_id_option_partial_script = rb_intern_const("partial_script");
|
1146
1183
|
rb_id_option_scopes = rb_intern_const("scopes");
|
1147
1184
|
rb_id_option_version = rb_intern_const("version");
|
1148
1185
|
rb_id_source_for = rb_intern("for");
|