jruby-prism-parser 0.23.0.pre.SNAPSHOT-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +401 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +62 -0
- data/LICENSE.md +7 -0
- data/Makefile +101 -0
- data/README.md +98 -0
- data/config.yml +2902 -0
- data/docs/build_system.md +91 -0
- data/docs/configuration.md +64 -0
- data/docs/cruby_compilation.md +27 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +121 -0
- data/docs/fuzzing.md +88 -0
- data/docs/heredocs.md +36 -0
- data/docs/javascript.md +118 -0
- data/docs/local_variable_depth.md +229 -0
- data/docs/mapping.md +117 -0
- data/docs/parser_translation.md +34 -0
- data/docs/parsing_rules.md +19 -0
- data/docs/releasing.md +98 -0
- data/docs/ripper.md +36 -0
- data/docs/ruby_api.md +43 -0
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +209 -0
- data/docs/testing.md +55 -0
- data/ext/prism/api_node.c +5098 -0
- data/ext/prism/api_pack.c +267 -0
- data/ext/prism/extconf.rb +110 -0
- data/ext/prism/extension.c +1155 -0
- data/ext/prism/extension.h +18 -0
- data/include/prism/ast.h +5807 -0
- data/include/prism/defines.h +102 -0
- data/include/prism/diagnostic.h +339 -0
- data/include/prism/encoding.h +265 -0
- data/include/prism/node.h +57 -0
- data/include/prism/options.h +230 -0
- data/include/prism/pack.h +152 -0
- data/include/prism/parser.h +732 -0
- data/include/prism/prettyprint.h +26 -0
- data/include/prism/regexp.h +33 -0
- data/include/prism/util/pm_buffer.h +155 -0
- data/include/prism/util/pm_char.h +205 -0
- data/include/prism/util/pm_constant_pool.h +209 -0
- data/include/prism/util/pm_list.h +97 -0
- data/include/prism/util/pm_memchr.h +29 -0
- data/include/prism/util/pm_newline_list.h +93 -0
- data/include/prism/util/pm_state_stack.h +42 -0
- data/include/prism/util/pm_string.h +150 -0
- data/include/prism/util/pm_string_list.h +44 -0
- data/include/prism/util/pm_strncasecmp.h +32 -0
- data/include/prism/util/pm_strpbrk.h +46 -0
- data/include/prism/version.h +29 -0
- data/include/prism.h +289 -0
- data/jruby-prism.jar +0 -0
- data/lib/prism/compiler.rb +486 -0
- data/lib/prism/debug.rb +206 -0
- data/lib/prism/desugar_compiler.rb +207 -0
- data/lib/prism/dispatcher.rb +2150 -0
- data/lib/prism/dot_visitor.rb +4634 -0
- data/lib/prism/dsl.rb +785 -0
- data/lib/prism/ffi.rb +346 -0
- data/lib/prism/lex_compat.rb +908 -0
- data/lib/prism/mutation_compiler.rb +753 -0
- data/lib/prism/node.rb +17864 -0
- data/lib/prism/node_ext.rb +212 -0
- data/lib/prism/node_inspector.rb +68 -0
- data/lib/prism/pack.rb +224 -0
- data/lib/prism/parse_result/comments.rb +177 -0
- data/lib/prism/parse_result/newlines.rb +64 -0
- data/lib/prism/parse_result.rb +498 -0
- data/lib/prism/pattern.rb +250 -0
- data/lib/prism/serialize.rb +1354 -0
- data/lib/prism/translation/parser/compiler.rb +1838 -0
- data/lib/prism/translation/parser/lexer.rb +335 -0
- data/lib/prism/translation/parser/rubocop.rb +37 -0
- data/lib/prism/translation/parser.rb +178 -0
- data/lib/prism/translation/ripper.rb +577 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +11 -0
- data/lib/prism/version.rb +3 -0
- data/lib/prism/visitor.rb +495 -0
- data/lib/prism.rb +99 -0
- data/prism.gemspec +135 -0
- data/rbi/prism.rbi +7767 -0
- data/rbi/prism_static.rbi +207 -0
- data/sig/prism.rbs +4773 -0
- data/sig/prism_static.rbs +201 -0
- data/src/diagnostic.c +400 -0
- data/src/encoding.c +5132 -0
- data/src/node.c +2786 -0
- data/src/options.c +213 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +8881 -0
- data/src/prism.c +18406 -0
- data/src/regexp.c +638 -0
- data/src/serialize.c +1554 -0
- data/src/token_type.c +700 -0
- data/src/util/pm_buffer.c +190 -0
- data/src/util/pm_char.c +318 -0
- data/src/util/pm_constant_pool.c +322 -0
- data/src/util/pm_list.c +49 -0
- data/src/util/pm_memchr.c +35 -0
- data/src/util/pm_newline_list.c +84 -0
- data/src/util/pm_state_stack.c +25 -0
- data/src/util/pm_string.c +203 -0
- data/src/util/pm_string_list.c +28 -0
- data/src/util/pm_strncasecmp.c +24 -0
- data/src/util/pm_strpbrk.c +180 -0
- metadata +156 -0
@@ -0,0 +1,1155 @@
|
|
1
|
+
#include "prism/extension.h"
|
2
|
+
|
3
|
+
#ifdef _WIN32
|
4
|
+
#include <ruby/win32.h>
|
5
|
+
#endif
|
6
|
+
|
7
|
+
// NOTE: this file should contain only bindings. All non-trivial logic should be
|
8
|
+
// in libprism so it can be shared its the various callers.
|
9
|
+
|
10
|
+
VALUE rb_cPrism;
|
11
|
+
VALUE rb_cPrismNode;
|
12
|
+
VALUE rb_cPrismSource;
|
13
|
+
VALUE rb_cPrismToken;
|
14
|
+
VALUE rb_cPrismLocation;
|
15
|
+
|
16
|
+
VALUE rb_cPrismComment;
|
17
|
+
VALUE rb_cPrismInlineComment;
|
18
|
+
VALUE rb_cPrismEmbDocComment;
|
19
|
+
VALUE rb_cPrismMagicComment;
|
20
|
+
VALUE rb_cPrismParseError;
|
21
|
+
VALUE rb_cPrismParseWarning;
|
22
|
+
VALUE rb_cPrismParseResult;
|
23
|
+
|
24
|
+
ID rb_option_id_filepath;
|
25
|
+
ID rb_option_id_encoding;
|
26
|
+
ID rb_option_id_line;
|
27
|
+
ID rb_option_id_frozen_string_literal;
|
28
|
+
ID rb_option_id_version;
|
29
|
+
ID rb_option_id_scopes;
|
30
|
+
|
31
|
+
/******************************************************************************/
|
32
|
+
/* IO of Ruby code */
|
33
|
+
/******************************************************************************/
|
34
|
+
|
35
|
+
/**
|
36
|
+
* Check if the given VALUE is a string. If it's nil, then return NULL. If it's
|
37
|
+
* not a string, then raise a type error. Otherwise return the VALUE as a C
|
38
|
+
* string.
|
39
|
+
*/
|
40
|
+
static const char *
|
41
|
+
check_string(VALUE value) {
|
42
|
+
// If the value is nil, then we don't need to do anything.
|
43
|
+
if (NIL_P(value)) {
|
44
|
+
return NULL;
|
45
|
+
}
|
46
|
+
|
47
|
+
// Check if the value is a string. If it's not, then raise a type error.
|
48
|
+
if (!RB_TYPE_P(value, T_STRING)) {
|
49
|
+
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(value));
|
50
|
+
}
|
51
|
+
|
52
|
+
// Otherwise, return the value as a C string.
|
53
|
+
return RSTRING_PTR(value);
|
54
|
+
}
|
55
|
+
|
56
|
+
/**
|
57
|
+
* Load the contents and size of the given string into the given pm_string_t.
|
58
|
+
*/
|
59
|
+
static void
|
60
|
+
input_load_string(pm_string_t *input, VALUE string) {
|
61
|
+
// Check if the string is a string. If it's not, then raise a type error.
|
62
|
+
if (!RB_TYPE_P(string, T_STRING)) {
|
63
|
+
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
|
64
|
+
}
|
65
|
+
|
66
|
+
pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
|
67
|
+
}
|
68
|
+
|
69
|
+
/******************************************************************************/
|
70
|
+
/* Building C options from Ruby options */
|
71
|
+
/******************************************************************************/
|
72
|
+
|
73
|
+
/**
|
74
|
+
* Build the scopes associated with the provided Ruby keyword value.
|
75
|
+
*/
|
76
|
+
static void
|
77
|
+
build_options_scopes(pm_options_t *options, VALUE scopes) {
|
78
|
+
// Check if the value is an array. If it's not, then raise a type error.
|
79
|
+
if (!RB_TYPE_P(scopes, T_ARRAY)) {
|
80
|
+
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scopes));
|
81
|
+
}
|
82
|
+
|
83
|
+
// Initialize the scopes array.
|
84
|
+
size_t scopes_count = RARRAY_LEN(scopes);
|
85
|
+
pm_options_scopes_init(options, scopes_count);
|
86
|
+
|
87
|
+
// Iterate over the scopes and add them to the options.
|
88
|
+
for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
|
89
|
+
VALUE scope = rb_ary_entry(scopes, scope_index);
|
90
|
+
|
91
|
+
// Check that the scope is an array. If it's not, then raise a type
|
92
|
+
// error.
|
93
|
+
if (!RB_TYPE_P(scope, T_ARRAY)) {
|
94
|
+
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scope));
|
95
|
+
}
|
96
|
+
|
97
|
+
// Initialize the scope array.
|
98
|
+
size_t locals_count = RARRAY_LEN(scope);
|
99
|
+
pm_options_scope_t *options_scope = &options->scopes[scope_index];
|
100
|
+
pm_options_scope_init(options_scope, locals_count);
|
101
|
+
|
102
|
+
// Iterate over the locals and add them to the scope.
|
103
|
+
for (size_t local_index = 0; local_index < locals_count; local_index++) {
|
104
|
+
VALUE local = rb_ary_entry(scope, local_index);
|
105
|
+
|
106
|
+
// Check that the local is a symbol. If it's not, then raise a
|
107
|
+
// type error.
|
108
|
+
if (!RB_TYPE_P(local, T_SYMBOL)) {
|
109
|
+
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(local));
|
110
|
+
}
|
111
|
+
|
112
|
+
// Add the local to the scope.
|
113
|
+
pm_string_t *scope_local = &options_scope->locals[local_index];
|
114
|
+
const char *name = rb_id2name(SYM2ID(local));
|
115
|
+
pm_string_constant_init(scope_local, name, strlen(name));
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
120
|
+
/**
|
121
|
+
* An iterator function that is called for each key-value in the keywords hash.
|
122
|
+
*/
|
123
|
+
static int
|
124
|
+
build_options_i(VALUE key, VALUE value, VALUE argument) {
|
125
|
+
pm_options_t *options = (pm_options_t *) argument;
|
126
|
+
ID key_id = SYM2ID(key);
|
127
|
+
|
128
|
+
if (key_id == rb_option_id_filepath) {
|
129
|
+
if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
|
130
|
+
} else if (key_id == rb_option_id_encoding) {
|
131
|
+
if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
|
132
|
+
} else if (key_id == rb_option_id_line) {
|
133
|
+
if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
|
134
|
+
} else if (key_id == rb_option_id_frozen_string_literal) {
|
135
|
+
if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
|
136
|
+
} else if (key_id == rb_option_id_version) {
|
137
|
+
if (!NIL_P(value)) {
|
138
|
+
const char *version = check_string(value);
|
139
|
+
|
140
|
+
if (!pm_options_version_set(options, version, RSTRING_LEN(value))) {
|
141
|
+
rb_raise(rb_eArgError, "invalid version: %"PRIsVALUE, value);
|
142
|
+
}
|
143
|
+
}
|
144
|
+
} else if (key_id == rb_option_id_scopes) {
|
145
|
+
if (!NIL_P(value)) build_options_scopes(options, value);
|
146
|
+
} else {
|
147
|
+
rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key);
|
148
|
+
}
|
149
|
+
|
150
|
+
return ST_CONTINUE;
|
151
|
+
}
|
152
|
+
|
153
|
+
/**
|
154
|
+
* We need a struct here to pass through rb_protect and it has to be a single
|
155
|
+
* value. Because the sizeof(VALUE) == sizeof(void *), we're going to pass this
|
156
|
+
* through as an opaque pointer and cast it on both sides.
|
157
|
+
*/
|
158
|
+
struct build_options_data {
|
159
|
+
pm_options_t *options;
|
160
|
+
VALUE keywords;
|
161
|
+
};
|
162
|
+
|
163
|
+
/**
|
164
|
+
* Build the set of options from the given keywords. Note that this can raise a
|
165
|
+
* Ruby error if the options are not valid.
|
166
|
+
*/
|
167
|
+
static VALUE
|
168
|
+
build_options(VALUE argument) {
|
169
|
+
struct build_options_data *data = (struct build_options_data *) argument;
|
170
|
+
rb_hash_foreach(data->keywords, build_options_i, (VALUE) data->options);
|
171
|
+
return Qnil;
|
172
|
+
}
|
173
|
+
|
174
|
+
/**
|
175
|
+
* Extract the options from the given keyword arguments.
|
176
|
+
*/
|
177
|
+
static void
|
178
|
+
extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
|
179
|
+
options->line = 1; // default
|
180
|
+
if (!NIL_P(keywords)) {
|
181
|
+
struct build_options_data data = { .options = options, .keywords = keywords };
|
182
|
+
struct build_options_data *argument = &data;
|
183
|
+
|
184
|
+
int state = 0;
|
185
|
+
rb_protect(build_options, (VALUE) argument, &state);
|
186
|
+
|
187
|
+
if (state != 0) {
|
188
|
+
pm_options_free(options);
|
189
|
+
rb_jump_tag(state);
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
if (!NIL_P(filepath)) {
|
194
|
+
if (!RB_TYPE_P(filepath, T_STRING)) {
|
195
|
+
pm_options_free(options);
|
196
|
+
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
|
197
|
+
}
|
198
|
+
|
199
|
+
pm_options_filepath_set(options, RSTRING_PTR(filepath));
|
200
|
+
}
|
201
|
+
}
|
202
|
+
|
203
|
+
/**
|
204
|
+
* Read options for methods that look like (source, **options).
|
205
|
+
*/
|
206
|
+
static void
|
207
|
+
string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
|
208
|
+
VALUE string;
|
209
|
+
VALUE keywords;
|
210
|
+
rb_scan_args(argc, argv, "1:", &string, &keywords);
|
211
|
+
|
212
|
+
extract_options(options, Qnil, keywords);
|
213
|
+
input_load_string(input, string);
|
214
|
+
}
|
215
|
+
|
216
|
+
/**
|
217
|
+
* Read options for methods that look like (filepath, **options).
|
218
|
+
*/
|
219
|
+
static void
|
220
|
+
file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
|
221
|
+
VALUE filepath;
|
222
|
+
VALUE keywords;
|
223
|
+
rb_scan_args(argc, argv, "1:", &filepath, &keywords);
|
224
|
+
|
225
|
+
Check_Type(filepath, T_STRING);
|
226
|
+
|
227
|
+
extract_options(options, filepath, keywords);
|
228
|
+
|
229
|
+
const char * string_source = (const char *) pm_string_source(&options->filepath);
|
230
|
+
|
231
|
+
if (!pm_string_mapped_init(input, string_source)) {
|
232
|
+
pm_options_free(options);
|
233
|
+
|
234
|
+
#ifdef _WIN32
|
235
|
+
int e = rb_w32_map_errno(GetLastError());
|
236
|
+
#else
|
237
|
+
int e = errno;
|
238
|
+
#endif
|
239
|
+
|
240
|
+
rb_syserr_fail(e, string_source);
|
241
|
+
}
|
242
|
+
}
|
243
|
+
|
244
|
+
/******************************************************************************/
|
245
|
+
/* Serializing the AST */
|
246
|
+
/******************************************************************************/
|
247
|
+
|
248
|
+
/**
|
249
|
+
* Dump the AST corresponding to the given input to a string.
|
250
|
+
*/
|
251
|
+
static VALUE
|
252
|
+
dump_input(pm_string_t *input, const pm_options_t *options) {
|
253
|
+
pm_buffer_t buffer;
|
254
|
+
if (!pm_buffer_init(&buffer)) {
|
255
|
+
rb_raise(rb_eNoMemError, "failed to allocate memory");
|
256
|
+
}
|
257
|
+
|
258
|
+
pm_parser_t parser;
|
259
|
+
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
|
260
|
+
|
261
|
+
pm_node_t *node = pm_parse(&parser);
|
262
|
+
pm_serialize(&parser, node, &buffer);
|
263
|
+
|
264
|
+
VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
|
265
|
+
pm_node_destroy(&parser, node);
|
266
|
+
pm_buffer_free(&buffer);
|
267
|
+
pm_parser_free(&parser);
|
268
|
+
|
269
|
+
return result;
|
270
|
+
}
|
271
|
+
|
272
|
+
/**
|
273
|
+
* call-seq:
|
274
|
+
* Prism::dump(source, **options) -> String
|
275
|
+
*
|
276
|
+
* Dump the AST corresponding to the given string to a string. For supported
|
277
|
+
* options, see Prism::parse.
|
278
|
+
*/
|
279
|
+
static VALUE
|
280
|
+
dump(int argc, VALUE *argv, VALUE self) {
|
281
|
+
pm_string_t input;
|
282
|
+
pm_options_t options = { 0 };
|
283
|
+
string_options(argc, argv, &input, &options);
|
284
|
+
|
285
|
+
#ifdef PRISM_DEBUG_MODE_BUILD
|
286
|
+
size_t length = pm_string_length(&input);
|
287
|
+
char* dup = malloc(length);
|
288
|
+
memcpy(dup, pm_string_source(&input), length);
|
289
|
+
pm_string_constant_init(&input, dup, length);
|
290
|
+
#endif
|
291
|
+
|
292
|
+
VALUE value = dump_input(&input, &options);
|
293
|
+
|
294
|
+
#ifdef PRISM_DEBUG_MODE_BUILD
|
295
|
+
free(dup);
|
296
|
+
#endif
|
297
|
+
|
298
|
+
pm_string_free(&input);
|
299
|
+
pm_options_free(&options);
|
300
|
+
|
301
|
+
return value;
|
302
|
+
}
|
303
|
+
|
304
|
+
/**
|
305
|
+
* call-seq:
|
306
|
+
* Prism::dump_file(filepath, **options) -> String
|
307
|
+
*
|
308
|
+
* Dump the AST corresponding to the given file to a string. For supported
|
309
|
+
* options, see Prism::parse.
|
310
|
+
*/
|
311
|
+
static VALUE
|
312
|
+
dump_file(int argc, VALUE *argv, VALUE self) {
|
313
|
+
pm_string_t input;
|
314
|
+
pm_options_t options = { 0 };
|
315
|
+
|
316
|
+
file_options(argc, argv, &input, &options);
|
317
|
+
|
318
|
+
VALUE value = dump_input(&input, &options);
|
319
|
+
pm_string_free(&input);
|
320
|
+
pm_options_free(&options);
|
321
|
+
|
322
|
+
return value;
|
323
|
+
}
|
324
|
+
|
325
|
+
/******************************************************************************/
|
326
|
+
/* Extracting values for the parse result */
|
327
|
+
/******************************************************************************/
|
328
|
+
|
329
|
+
/**
|
330
|
+
* Extract the comments out of the parser into an array.
|
331
|
+
*/
|
332
|
+
static VALUE
|
333
|
+
parser_comments(pm_parser_t *parser, VALUE source) {
|
334
|
+
VALUE comments = rb_ary_new();
|
335
|
+
|
336
|
+
for (pm_comment_t *comment = (pm_comment_t *) parser->comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
|
337
|
+
VALUE location_argv[] = {
|
338
|
+
source,
|
339
|
+
LONG2FIX(comment->location.start - parser->start),
|
340
|
+
LONG2FIX(comment->location.end - comment->location.start)
|
341
|
+
};
|
342
|
+
|
343
|
+
VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
|
344
|
+
VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
|
345
|
+
rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
|
346
|
+
}
|
347
|
+
|
348
|
+
return comments;
|
349
|
+
}
|
350
|
+
|
351
|
+
/**
|
352
|
+
* Extract the magic comments out of the parser into an array.
|
353
|
+
*/
|
354
|
+
static VALUE
|
355
|
+
parser_magic_comments(pm_parser_t *parser, VALUE source) {
|
356
|
+
VALUE magic_comments = rb_ary_new();
|
357
|
+
|
358
|
+
for (pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) parser->magic_comment_list.head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
|
359
|
+
VALUE key_loc_argv[] = {
|
360
|
+
source,
|
361
|
+
LONG2FIX(magic_comment->key_start - parser->start),
|
362
|
+
LONG2FIX(magic_comment->key_length)
|
363
|
+
};
|
364
|
+
|
365
|
+
VALUE value_loc_argv[] = {
|
366
|
+
source,
|
367
|
+
LONG2FIX(magic_comment->value_start - parser->start),
|
368
|
+
LONG2FIX(magic_comment->value_length)
|
369
|
+
};
|
370
|
+
|
371
|
+
VALUE magic_comment_argv[] = {
|
372
|
+
rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation),
|
373
|
+
rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation)
|
374
|
+
};
|
375
|
+
|
376
|
+
rb_ary_push(magic_comments, rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment));
|
377
|
+
}
|
378
|
+
|
379
|
+
return magic_comments;
|
380
|
+
}
|
381
|
+
|
382
|
+
/**
|
383
|
+
* Extract out the data location from the parser into a Location instance if one
|
384
|
+
* exists.
|
385
|
+
*/
|
386
|
+
static VALUE
|
387
|
+
parser_data_loc(const pm_parser_t *parser, VALUE source) {
|
388
|
+
if (parser->data_loc.end == NULL) {
|
389
|
+
return Qnil;
|
390
|
+
} else {
|
391
|
+
VALUE argv[] = {
|
392
|
+
source,
|
393
|
+
LONG2FIX(parser->data_loc.start - parser->start),
|
394
|
+
LONG2FIX(parser->data_loc.end - parser->data_loc.start)
|
395
|
+
};
|
396
|
+
|
397
|
+
return rb_class_new_instance(3, argv, rb_cPrismLocation);
|
398
|
+
}
|
399
|
+
}
|
400
|
+
|
401
|
+
/**
|
402
|
+
* Extract the errors out of the parser into an array.
|
403
|
+
*/
|
404
|
+
static VALUE
|
405
|
+
parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
406
|
+
VALUE errors = rb_ary_new();
|
407
|
+
pm_diagnostic_t *error;
|
408
|
+
|
409
|
+
for (error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
|
410
|
+
VALUE location_argv[] = {
|
411
|
+
source,
|
412
|
+
LONG2FIX(error->location.start - parser->start),
|
413
|
+
LONG2FIX(error->location.end - error->location.start)
|
414
|
+
};
|
415
|
+
|
416
|
+
VALUE level = Qnil;
|
417
|
+
switch (error->level) {
|
418
|
+
case PM_ERROR_LEVEL_FATAL:
|
419
|
+
level = ID2SYM(rb_intern("fatal"));
|
420
|
+
break;
|
421
|
+
case PM_ERROR_LEVEL_ARGUMENT:
|
422
|
+
level = ID2SYM(rb_intern("argument"));
|
423
|
+
break;
|
424
|
+
default:
|
425
|
+
rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
|
426
|
+
}
|
427
|
+
|
428
|
+
VALUE error_argv[] = {
|
429
|
+
rb_enc_str_new_cstr(error->message, encoding),
|
430
|
+
rb_class_new_instance(3, location_argv, rb_cPrismLocation),
|
431
|
+
level
|
432
|
+
};
|
433
|
+
|
434
|
+
rb_ary_push(errors, rb_class_new_instance(3, error_argv, rb_cPrismParseError));
|
435
|
+
}
|
436
|
+
|
437
|
+
return errors;
|
438
|
+
}
|
439
|
+
|
440
|
+
/**
|
441
|
+
* Extract the warnings out of the parser into an array.
|
442
|
+
*/
|
443
|
+
static VALUE
|
444
|
+
parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
445
|
+
VALUE warnings = rb_ary_new();
|
446
|
+
pm_diagnostic_t *warning;
|
447
|
+
|
448
|
+
for (warning = (pm_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (pm_diagnostic_t *) warning->node.next) {
|
449
|
+
VALUE location_argv[] = {
|
450
|
+
source,
|
451
|
+
LONG2FIX(warning->location.start - parser->start),
|
452
|
+
LONG2FIX(warning->location.end - warning->location.start)
|
453
|
+
};
|
454
|
+
|
455
|
+
VALUE level = Qnil;
|
456
|
+
switch (warning->level) {
|
457
|
+
case PM_WARNING_LEVEL_DEFAULT:
|
458
|
+
level = ID2SYM(rb_intern("default"));
|
459
|
+
break;
|
460
|
+
case PM_WARNING_LEVEL_VERBOSE:
|
461
|
+
level = ID2SYM(rb_intern("verbose"));
|
462
|
+
break;
|
463
|
+
default:
|
464
|
+
rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning->level);
|
465
|
+
}
|
466
|
+
|
467
|
+
VALUE warning_argv[] = {
|
468
|
+
rb_enc_str_new_cstr(warning->message, encoding),
|
469
|
+
rb_class_new_instance(3, location_argv, rb_cPrismLocation),
|
470
|
+
level
|
471
|
+
};
|
472
|
+
|
473
|
+
rb_ary_push(warnings, rb_class_new_instance(3, warning_argv, rb_cPrismParseWarning));
|
474
|
+
}
|
475
|
+
|
476
|
+
return warnings;
|
477
|
+
}
|
478
|
+
|
479
|
+
/******************************************************************************/
|
480
|
+
/* Lexing Ruby code */
|
481
|
+
/******************************************************************************/
|
482
|
+
|
483
|
+
/**
|
484
|
+
* This struct gets stored in the parser and passed in to the lex callback any
|
485
|
+
* time a new token is found. We use it to store the necessary information to
|
486
|
+
* initialize a Token instance.
|
487
|
+
*/
|
488
|
+
typedef struct {
|
489
|
+
VALUE source;
|
490
|
+
VALUE tokens;
|
491
|
+
rb_encoding *encoding;
|
492
|
+
} parse_lex_data_t;
|
493
|
+
|
494
|
+
/**
|
495
|
+
* This is passed as a callback to the parser. It gets called every time a new
|
496
|
+
* token is found. Once found, we initialize a new instance of Token and push it
|
497
|
+
* onto the tokens array.
|
498
|
+
*/
|
499
|
+
static void
|
500
|
+
parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
|
501
|
+
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
|
502
|
+
|
503
|
+
VALUE yields = rb_ary_new_capa(2);
|
504
|
+
rb_ary_push(yields, pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
|
505
|
+
rb_ary_push(yields, INT2FIX(parser->lex_state));
|
506
|
+
|
507
|
+
rb_ary_push(parse_lex_data->tokens, yields);
|
508
|
+
}
|
509
|
+
|
510
|
+
/**
|
511
|
+
* This is called whenever the encoding changes based on the magic comment at
|
512
|
+
* the top of the file. We use it to update the encoding that we are using to
|
513
|
+
* create tokens.
|
514
|
+
*/
|
515
|
+
static void
|
516
|
+
parse_lex_encoding_changed_callback(pm_parser_t *parser) {
|
517
|
+
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
|
518
|
+
parse_lex_data->encoding = rb_enc_find(parser->encoding->name);
|
519
|
+
|
520
|
+
// Since the encoding changed, we need to go back and change the encoding of
|
521
|
+
// the tokens that were already lexed. This is only going to end up being
|
522
|
+
// one or two tokens, since the encoding can only change at the top of the
|
523
|
+
// file.
|
524
|
+
VALUE tokens = parse_lex_data->tokens;
|
525
|
+
for (long index = 0; index < RARRAY_LEN(tokens); index++) {
|
526
|
+
VALUE yields = rb_ary_entry(tokens, index);
|
527
|
+
VALUE token = rb_ary_entry(yields, 0);
|
528
|
+
|
529
|
+
VALUE value = rb_ivar_get(token, rb_intern("@value"));
|
530
|
+
rb_enc_associate(value, parse_lex_data->encoding);
|
531
|
+
ENC_CODERANGE_CLEAR(value);
|
532
|
+
}
|
533
|
+
}
|
534
|
+
|
535
|
+
/**
|
536
|
+
* Parse the given input and return a ParseResult containing just the tokens or
|
537
|
+
* the nodes and tokens.
|
538
|
+
*/
|
539
|
+
static VALUE
|
540
|
+
parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) {
|
541
|
+
pm_parser_t parser;
|
542
|
+
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
|
543
|
+
pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
|
544
|
+
|
545
|
+
VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
|
546
|
+
VALUE offsets = rb_ary_new();
|
547
|
+
VALUE source_argv[] = { source_string, LONG2NUM(parser.start_line), offsets };
|
548
|
+
VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
|
549
|
+
|
550
|
+
parse_lex_data_t parse_lex_data = {
|
551
|
+
.source = source,
|
552
|
+
.tokens = rb_ary_new(),
|
553
|
+
.encoding = rb_utf8_encoding()
|
554
|
+
};
|
555
|
+
|
556
|
+
parse_lex_data_t *data = &parse_lex_data;
|
557
|
+
pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
|
558
|
+
.data = (void *) data,
|
559
|
+
.callback = parse_lex_token,
|
560
|
+
};
|
561
|
+
|
562
|
+
parser.lex_callback = &lex_callback;
|
563
|
+
pm_node_t *node = pm_parse(&parser);
|
564
|
+
|
565
|
+
// Here we need to update the Source object to have the correct
|
566
|
+
// encoding for the source string and the correct newline offsets.
|
567
|
+
// We do it here because we've already created the Source object and given
|
568
|
+
// it over to all of the tokens, and both of these are only set after pm_parse().
|
569
|
+
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
|
570
|
+
rb_enc_associate(source_string, encoding);
|
571
|
+
|
572
|
+
for (size_t index = 0; index < parser.newline_list.size; index++) {
|
573
|
+
rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index]));
|
574
|
+
}
|
575
|
+
|
576
|
+
VALUE value;
|
577
|
+
if (return_nodes) {
|
578
|
+
value = rb_ary_new_capa(2);
|
579
|
+
rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source));
|
580
|
+
rb_ary_push(value, parse_lex_data.tokens);
|
581
|
+
} else {
|
582
|
+
value = parse_lex_data.tokens;
|
583
|
+
}
|
584
|
+
|
585
|
+
VALUE result_argv[] = {
|
586
|
+
value,
|
587
|
+
parser_comments(&parser, source),
|
588
|
+
parser_magic_comments(&parser, source),
|
589
|
+
parser_data_loc(&parser, source),
|
590
|
+
parser_errors(&parser, parse_lex_data.encoding, source),
|
591
|
+
parser_warnings(&parser, parse_lex_data.encoding, source),
|
592
|
+
source
|
593
|
+
};
|
594
|
+
|
595
|
+
pm_node_destroy(&parser, node);
|
596
|
+
pm_parser_free(&parser);
|
597
|
+
return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
|
598
|
+
}
|
599
|
+
|
600
|
+
/**
|
601
|
+
* call-seq:
|
602
|
+
* Prism::lex(source, **options) -> Array
|
603
|
+
*
|
604
|
+
* Return an array of Token instances corresponding to the given string. For
|
605
|
+
* supported options, see Prism::parse.
|
606
|
+
*/
|
607
|
+
static VALUE
|
608
|
+
lex(int argc, VALUE *argv, VALUE self) {
|
609
|
+
pm_string_t input;
|
610
|
+
pm_options_t options = { 0 };
|
611
|
+
string_options(argc, argv, &input, &options);
|
612
|
+
|
613
|
+
VALUE result = parse_lex_input(&input, &options, false);
|
614
|
+
pm_string_free(&input);
|
615
|
+
pm_options_free(&options);
|
616
|
+
|
617
|
+
return result;
|
618
|
+
}
|
619
|
+
|
620
|
+
/**
|
621
|
+
* call-seq:
|
622
|
+
* Prism::lex_file(filepath, **options) -> Array
|
623
|
+
*
|
624
|
+
* Return an array of Token instances corresponding to the given file. For
|
625
|
+
* supported options, see Prism::parse.
|
626
|
+
*/
|
627
|
+
static VALUE
|
628
|
+
lex_file(int argc, VALUE *argv, VALUE self) {
|
629
|
+
pm_string_t input;
|
630
|
+
pm_options_t options = { 0 };
|
631
|
+
|
632
|
+
file_options(argc, argv, &input, &options);
|
633
|
+
|
634
|
+
VALUE value = parse_lex_input(&input, &options, false);
|
635
|
+
pm_string_free(&input);
|
636
|
+
pm_options_free(&options);
|
637
|
+
|
638
|
+
return value;
|
639
|
+
}
|
640
|
+
|
641
|
+
/******************************************************************************/
|
642
|
+
/* Parsing Ruby code */
|
643
|
+
/******************************************************************************/
|
644
|
+
|
645
|
+
/**
|
646
|
+
* Parse the given input and return a ParseResult instance.
|
647
|
+
*/
|
648
|
+
static VALUE
|
649
|
+
parse_input(pm_string_t *input, const pm_options_t *options) {
|
650
|
+
pm_parser_t parser;
|
651
|
+
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
|
652
|
+
|
653
|
+
pm_node_t *node = pm_parse(&parser);
|
654
|
+
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
|
655
|
+
|
656
|
+
VALUE source = pm_source_new(&parser, encoding);
|
657
|
+
VALUE result_argv[] = {
|
658
|
+
pm_ast_new(&parser, node, encoding, source),
|
659
|
+
parser_comments(&parser, source),
|
660
|
+
parser_magic_comments(&parser, source),
|
661
|
+
parser_data_loc(&parser, source),
|
662
|
+
parser_errors(&parser, encoding, source),
|
663
|
+
parser_warnings(&parser, encoding, source),
|
664
|
+
source
|
665
|
+
};
|
666
|
+
|
667
|
+
VALUE result = rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
|
668
|
+
|
669
|
+
pm_node_destroy(&parser, node);
|
670
|
+
pm_parser_free(&parser);
|
671
|
+
|
672
|
+
return result;
|
673
|
+
}
|
674
|
+
|
675
|
+
/**
|
676
|
+
* call-seq:
|
677
|
+
* Prism::parse(source, **options) -> ParseResult
|
678
|
+
*
|
679
|
+
* Parse the given string and return a ParseResult instance. The options that
|
680
|
+
* are supported are:
|
681
|
+
*
|
682
|
+
* * `filepath` - the filepath of the source being parsed. This should be a
|
683
|
+
* string or nil
|
684
|
+
* * `encoding` - the encoding of the source being parsed. This should be an
|
685
|
+
* encoding or nil
|
686
|
+
* * `line` - the line number that the parse starts on. This should be an
|
687
|
+
* integer or nil. Note that this is 1-indexed.
|
688
|
+
* * `frozen_string_literal` - whether or not the frozen string literal pragma
|
689
|
+
* has been set. This should be a boolean or nil.
|
690
|
+
* * `version` - the version of prism that should be used to parse Ruby code. By
|
691
|
+
* default prism assumes you want to parse with the latest vesion of
|
692
|
+
* prism (which you can trigger with `nil` or `"latest"`). If you want to
|
693
|
+
* parse exactly as CRuby 3.3.0 would, then you can pass `"3.3.0"`.
|
694
|
+
* * `scopes` - the locals that are in scope surrounding the code that is being
|
695
|
+
* parsed. This should be an array of arrays of symbols or nil. Scopes are
|
696
|
+
* ordered from the outermost scope to the innermost one.
|
697
|
+
*/
|
698
|
+
static VALUE
|
699
|
+
parse(int argc, VALUE *argv, VALUE self) {
|
700
|
+
pm_string_t input;
|
701
|
+
pm_options_t options = { 0 };
|
702
|
+
string_options(argc, argv, &input, &options);
|
703
|
+
|
704
|
+
#ifdef PRISM_DEBUG_MODE_BUILD
|
705
|
+
size_t length = pm_string_length(&input);
|
706
|
+
char* dup = malloc(length);
|
707
|
+
memcpy(dup, pm_string_source(&input), length);
|
708
|
+
pm_string_constant_init(&input, dup, length);
|
709
|
+
#endif
|
710
|
+
|
711
|
+
VALUE value = parse_input(&input, &options);
|
712
|
+
|
713
|
+
#ifdef PRISM_DEBUG_MODE_BUILD
|
714
|
+
free(dup);
|
715
|
+
#endif
|
716
|
+
|
717
|
+
pm_string_free(&input);
|
718
|
+
pm_options_free(&options);
|
719
|
+
return value;
|
720
|
+
}
|
721
|
+
|
722
|
+
/**
|
723
|
+
* call-seq:
|
724
|
+
* Prism::parse_file(filepath, **options) -> ParseResult
|
725
|
+
*
|
726
|
+
* Parse the given file and return a ParseResult instance. For supported
|
727
|
+
* options, see Prism::parse.
|
728
|
+
*/
|
729
|
+
static VALUE
|
730
|
+
parse_file(int argc, VALUE *argv, VALUE self) {
|
731
|
+
pm_string_t input;
|
732
|
+
pm_options_t options = { 0 };
|
733
|
+
|
734
|
+
file_options(argc, argv, &input, &options);
|
735
|
+
|
736
|
+
VALUE value = parse_input(&input, &options);
|
737
|
+
pm_string_free(&input);
|
738
|
+
pm_options_free(&options);
|
739
|
+
|
740
|
+
return value;
|
741
|
+
}
|
742
|
+
|
743
|
+
/**
|
744
|
+
* Parse the given input and return an array of Comment objects.
|
745
|
+
*/
|
746
|
+
static VALUE
|
747
|
+
parse_input_comments(pm_string_t *input, const pm_options_t *options) {
|
748
|
+
pm_parser_t parser;
|
749
|
+
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
|
750
|
+
|
751
|
+
pm_node_t *node = pm_parse(&parser);
|
752
|
+
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
|
753
|
+
|
754
|
+
VALUE source = pm_source_new(&parser, encoding);
|
755
|
+
VALUE comments = parser_comments(&parser, source);
|
756
|
+
|
757
|
+
pm_node_destroy(&parser, node);
|
758
|
+
pm_parser_free(&parser);
|
759
|
+
|
760
|
+
return comments;
|
761
|
+
}
|
762
|
+
|
763
|
+
/**
|
764
|
+
* call-seq:
|
765
|
+
* Prism::parse_comments(source, **options) -> Array
|
766
|
+
*
|
767
|
+
* Parse the given string and return an array of Comment objects. For supported
|
768
|
+
* options, see Prism::parse.
|
769
|
+
*/
|
770
|
+
static VALUE
|
771
|
+
parse_comments(int argc, VALUE *argv, VALUE self) {
|
772
|
+
pm_string_t input;
|
773
|
+
pm_options_t options = { 0 };
|
774
|
+
string_options(argc, argv, &input, &options);
|
775
|
+
|
776
|
+
VALUE result = parse_input_comments(&input, &options);
|
777
|
+
pm_string_free(&input);
|
778
|
+
pm_options_free(&options);
|
779
|
+
|
780
|
+
return result;
|
781
|
+
}
|
782
|
+
|
783
|
+
/**
|
784
|
+
* call-seq:
|
785
|
+
* Prism::parse_file_comments(filepath, **options) -> Array
|
786
|
+
*
|
787
|
+
* Parse the given file and return an array of Comment objects. For supported
|
788
|
+
* options, see Prism::parse.
|
789
|
+
*/
|
790
|
+
static VALUE
|
791
|
+
parse_file_comments(int argc, VALUE *argv, VALUE self) {
|
792
|
+
pm_string_t input;
|
793
|
+
pm_options_t options = { 0 };
|
794
|
+
|
795
|
+
file_options(argc, argv, &input, &options);
|
796
|
+
|
797
|
+
VALUE value = parse_input_comments(&input, &options);
|
798
|
+
pm_string_free(&input);
|
799
|
+
pm_options_free(&options);
|
800
|
+
|
801
|
+
return value;
|
802
|
+
}
|
803
|
+
|
804
|
+
/**
|
805
|
+
* call-seq:
|
806
|
+
* Prism::parse_lex(source, **options) -> ParseResult
|
807
|
+
*
|
808
|
+
* Parse the given string and return a ParseResult instance that contains a
|
809
|
+
* 2-element array, where the first element is the AST and the second element is
|
810
|
+
* an array of Token instances.
|
811
|
+
*
|
812
|
+
* This API is only meant to be used in the case where you need both the AST and
|
813
|
+
* the tokens. If you only need one or the other, use either Prism::parse or
|
814
|
+
* Prism::lex.
|
815
|
+
*
|
816
|
+
* For supported options, see Prism::parse.
|
817
|
+
*/
|
818
|
+
static VALUE
|
819
|
+
parse_lex(int argc, VALUE *argv, VALUE self) {
|
820
|
+
pm_string_t input;
|
821
|
+
pm_options_t options = { 0 };
|
822
|
+
string_options(argc, argv, &input, &options);
|
823
|
+
|
824
|
+
VALUE value = parse_lex_input(&input, &options, true);
|
825
|
+
pm_string_free(&input);
|
826
|
+
pm_options_free(&options);
|
827
|
+
|
828
|
+
return value;
|
829
|
+
}
|
830
|
+
|
831
|
+
/**
|
832
|
+
* call-seq:
|
833
|
+
* Prism::parse_lex_file(filepath, **options) -> ParseResult
|
834
|
+
*
|
835
|
+
* Parse the given file and return a ParseResult instance that contains a
|
836
|
+
* 2-element array, where the first element is the AST and the second element is
|
837
|
+
* an array of Token instances.
|
838
|
+
*
|
839
|
+
* This API is only meant to be used in the case where you need both the AST and
|
840
|
+
* the tokens. If you only need one or the other, use either Prism::parse_file
|
841
|
+
* or Prism::lex_file.
|
842
|
+
*
|
843
|
+
* For supported options, see Prism::parse.
|
844
|
+
*/
|
845
|
+
static VALUE
|
846
|
+
parse_lex_file(int argc, VALUE *argv, VALUE self) {
|
847
|
+
pm_string_t input;
|
848
|
+
pm_options_t options = { 0 };
|
849
|
+
|
850
|
+
file_options(argc, argv, &input, &options);
|
851
|
+
|
852
|
+
VALUE value = parse_lex_input(&input, &options, true);
|
853
|
+
pm_string_free(&input);
|
854
|
+
pm_options_free(&options);
|
855
|
+
|
856
|
+
return value;
|
857
|
+
}
|
858
|
+
|
859
|
+
/**
|
860
|
+
* Parse the given input and return true if it parses without errors.
|
861
|
+
*/
|
862
|
+
static VALUE
|
863
|
+
parse_input_success_p(pm_string_t *input, const pm_options_t *options) {
|
864
|
+
pm_parser_t parser;
|
865
|
+
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
|
866
|
+
|
867
|
+
pm_node_t *node = pm_parse(&parser);
|
868
|
+
pm_node_destroy(&parser, node);
|
869
|
+
|
870
|
+
VALUE result = parser.error_list.size == 0 ? Qtrue : Qfalse;
|
871
|
+
pm_parser_free(&parser);
|
872
|
+
|
873
|
+
return result;
|
874
|
+
}
|
875
|
+
|
876
|
+
/**
|
877
|
+
* call-seq:
|
878
|
+
* Prism::parse_success?(source, **options) -> Array
|
879
|
+
*
|
880
|
+
* Parse the given string and return true if it parses without errors. For
|
881
|
+
* supported options, see Prism::parse.
|
882
|
+
*/
|
883
|
+
static VALUE
|
884
|
+
parse_success_p(int argc, VALUE *argv, VALUE self) {
|
885
|
+
pm_string_t input;
|
886
|
+
pm_options_t options = { 0 };
|
887
|
+
string_options(argc, argv, &input, &options);
|
888
|
+
|
889
|
+
VALUE result = parse_input_success_p(&input, &options);
|
890
|
+
pm_string_free(&input);
|
891
|
+
pm_options_free(&options);
|
892
|
+
|
893
|
+
return result;
|
894
|
+
}
|
895
|
+
|
896
|
+
/**
|
897
|
+
* call-seq:
|
898
|
+
* Prism::parse_file_success?(filepath, **options) -> Array
|
899
|
+
*
|
900
|
+
* Parse the given file and return true if it parses without errors. For
|
901
|
+
* supported options, see Prism::parse.
|
902
|
+
*/
|
903
|
+
static VALUE
|
904
|
+
parse_file_success_p(int argc, VALUE *argv, VALUE self) {
|
905
|
+
pm_string_t input;
|
906
|
+
pm_options_t options = { 0 };
|
907
|
+
|
908
|
+
file_options(argc, argv, &input, &options);
|
909
|
+
|
910
|
+
VALUE result = parse_input_success_p(&input, &options);
|
911
|
+
pm_string_free(&input);
|
912
|
+
pm_options_free(&options);
|
913
|
+
|
914
|
+
return result;
|
915
|
+
}
|
916
|
+
|
917
|
+
/******************************************************************************/
|
918
|
+
/* Utility functions exposed to make testing easier */
|
919
|
+
/******************************************************************************/
|
920
|
+
|
921
|
+
/**
|
922
|
+
* call-seq:
|
923
|
+
* Debug::named_captures(source) -> Array
|
924
|
+
*
|
925
|
+
* Returns an array of strings corresponding to the named capture groups in the
|
926
|
+
* given source string. If prism was unable to parse the regular expression,
|
927
|
+
* this function returns nil.
|
928
|
+
*/
|
929
|
+
static VALUE
|
930
|
+
named_captures(VALUE self, VALUE source) {
|
931
|
+
pm_string_list_t string_list = { 0 };
|
932
|
+
|
933
|
+
if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, PM_ENCODING_UTF_8_ENTRY)) {
|
934
|
+
pm_string_list_free(&string_list);
|
935
|
+
return Qnil;
|
936
|
+
}
|
937
|
+
|
938
|
+
VALUE names = rb_ary_new();
|
939
|
+
for (size_t index = 0; index < string_list.length; index++) {
|
940
|
+
const pm_string_t *string = &string_list.strings[index];
|
941
|
+
rb_ary_push(names, rb_str_new((const char *) pm_string_source(string), pm_string_length(string)));
|
942
|
+
}
|
943
|
+
|
944
|
+
pm_string_list_free(&string_list);
|
945
|
+
return names;
|
946
|
+
}
|
947
|
+
|
948
|
+
/**
|
949
|
+
* call-seq:
|
950
|
+
* Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx }
|
951
|
+
*
|
952
|
+
* Return a hash of information about the given source string's memory usage.
|
953
|
+
*/
|
954
|
+
static VALUE
|
955
|
+
memsize(VALUE self, VALUE string) {
|
956
|
+
pm_parser_t parser;
|
957
|
+
size_t length = RSTRING_LEN(string);
|
958
|
+
pm_parser_init(&parser, (const uint8_t *) RSTRING_PTR(string), length, NULL);
|
959
|
+
|
960
|
+
pm_node_t *node = pm_parse(&parser);
|
961
|
+
pm_memsize_t memsize;
|
962
|
+
pm_node_memsize(node, &memsize);
|
963
|
+
|
964
|
+
pm_node_destroy(&parser, node);
|
965
|
+
pm_parser_free(&parser);
|
966
|
+
|
967
|
+
VALUE result = rb_hash_new();
|
968
|
+
rb_hash_aset(result, ID2SYM(rb_intern("length")), INT2FIX(length));
|
969
|
+
rb_hash_aset(result, ID2SYM(rb_intern("memsize")), INT2FIX(memsize.memsize));
|
970
|
+
rb_hash_aset(result, ID2SYM(rb_intern("node_count")), INT2FIX(memsize.node_count));
|
971
|
+
return result;
|
972
|
+
}
|
973
|
+
|
974
|
+
/**
|
975
|
+
* call-seq:
|
976
|
+
* Debug::profile_file(filepath) -> nil
|
977
|
+
*
|
978
|
+
* Parse the file, but do nothing with the result. This is used to profile the
|
979
|
+
* parser for memory and speed.
|
980
|
+
*/
|
981
|
+
static VALUE
|
982
|
+
profile_file(VALUE self, VALUE filepath) {
|
983
|
+
pm_string_t input;
|
984
|
+
|
985
|
+
const char *checked = check_string(filepath);
|
986
|
+
Check_Type(filepath, T_STRING);
|
987
|
+
|
988
|
+
if (!pm_string_mapped_init(&input, checked)) {
|
989
|
+
#ifdef _WIN32
|
990
|
+
int e = rb_w32_map_errno(GetLastError());
|
991
|
+
#else
|
992
|
+
int e = errno;
|
993
|
+
#endif
|
994
|
+
|
995
|
+
rb_syserr_fail(e, checked);
|
996
|
+
}
|
997
|
+
|
998
|
+
pm_options_t options = { 0 };
|
999
|
+
pm_options_filepath_set(&options, checked);
|
1000
|
+
|
1001
|
+
pm_parser_t parser;
|
1002
|
+
pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
|
1003
|
+
|
1004
|
+
pm_node_t *node = pm_parse(&parser);
|
1005
|
+
pm_node_destroy(&parser, node);
|
1006
|
+
pm_parser_free(&parser);
|
1007
|
+
pm_options_free(&options);
|
1008
|
+
pm_string_free(&input);
|
1009
|
+
|
1010
|
+
return Qnil;
|
1011
|
+
}
|
1012
|
+
|
1013
|
+
/**
|
1014
|
+
* call-seq:
|
1015
|
+
* Debug::inspect_node(source) -> inspected
|
1016
|
+
*
|
1017
|
+
* Inspect the AST that represents the given source using the prism pretty print
|
1018
|
+
* as opposed to the Ruby implementation.
|
1019
|
+
*/
|
1020
|
+
static VALUE
|
1021
|
+
inspect_node(VALUE self, VALUE source) {
|
1022
|
+
pm_string_t input;
|
1023
|
+
input_load_string(&input, source);
|
1024
|
+
|
1025
|
+
pm_parser_t parser;
|
1026
|
+
pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL);
|
1027
|
+
|
1028
|
+
pm_node_t *node = pm_parse(&parser);
|
1029
|
+
pm_buffer_t buffer = { 0 };
|
1030
|
+
|
1031
|
+
pm_prettyprint(&buffer, &parser, node);
|
1032
|
+
|
1033
|
+
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
|
1034
|
+
VALUE string = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
|
1035
|
+
|
1036
|
+
pm_buffer_free(&buffer);
|
1037
|
+
pm_node_destroy(&parser, node);
|
1038
|
+
pm_parser_free(&parser);
|
1039
|
+
|
1040
|
+
return string;
|
1041
|
+
}
|
1042
|
+
|
1043
|
+
/**
|
1044
|
+
* call-seq:
|
1045
|
+
* Debug::format_errors(source, colorize) -> String
|
1046
|
+
*
|
1047
|
+
* Format the errors that are found when parsing the given source string.
|
1048
|
+
*/
|
1049
|
+
static VALUE
|
1050
|
+
format_errors(VALUE self, VALUE source, VALUE colorize) {
|
1051
|
+
pm_string_t input;
|
1052
|
+
input_load_string(&input, source);
|
1053
|
+
|
1054
|
+
pm_parser_t parser;
|
1055
|
+
pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL);
|
1056
|
+
|
1057
|
+
pm_node_t *node = pm_parse(&parser);
|
1058
|
+
pm_buffer_t buffer = { 0 };
|
1059
|
+
|
1060
|
+
pm_parser_errors_format(&parser, &buffer, RTEST(colorize));
|
1061
|
+
|
1062
|
+
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
|
1063
|
+
VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
|
1064
|
+
|
1065
|
+
pm_buffer_free(&buffer);
|
1066
|
+
pm_node_destroy(&parser, node);
|
1067
|
+
pm_parser_free(&parser);
|
1068
|
+
pm_string_free(&input);
|
1069
|
+
|
1070
|
+
return result;
|
1071
|
+
}
|
1072
|
+
|
1073
|
+
/******************************************************************************/
|
1074
|
+
/* Initialization of the extension */
|
1075
|
+
/******************************************************************************/
|
1076
|
+
|
1077
|
+
/**
|
1078
|
+
* The init function that Ruby calls when loading this extension.
|
1079
|
+
*/
|
1080
|
+
RUBY_FUNC_EXPORTED void
|
1081
|
+
Init_prism(void) {
|
1082
|
+
// Make sure that the prism library version matches the expected version.
|
1083
|
+
// Otherwise something was compiled incorrectly.
|
1084
|
+
if (strcmp(pm_version(), EXPECTED_PRISM_VERSION) != 0) {
|
1085
|
+
rb_raise(
|
1086
|
+
rb_eRuntimeError,
|
1087
|
+
"The prism library version (%s) does not match the expected version (%s)",
|
1088
|
+
pm_version(),
|
1089
|
+
EXPECTED_PRISM_VERSION
|
1090
|
+
);
|
1091
|
+
}
|
1092
|
+
|
1093
|
+
// Grab up references to all of the constants that we're going to need to
|
1094
|
+
// reference throughout this extension.
|
1095
|
+
rb_cPrism = rb_define_module("Prism");
|
1096
|
+
rb_cPrismNode = rb_define_class_under(rb_cPrism, "Node", rb_cObject);
|
1097
|
+
rb_cPrismSource = rb_define_class_under(rb_cPrism, "Source", rb_cObject);
|
1098
|
+
rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
|
1099
|
+
rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
|
1100
|
+
rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
|
1101
|
+
rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment);
|
1102
|
+
rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment);
|
1103
|
+
rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
|
1104
|
+
rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
|
1105
|
+
rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
|
1106
|
+
rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
|
1107
|
+
|
1108
|
+
// Intern all of the options that we support so that we don't have to do it
|
1109
|
+
// every time we parse.
|
1110
|
+
rb_option_id_filepath = rb_intern_const("filepath");
|
1111
|
+
rb_option_id_encoding = rb_intern_const("encoding");
|
1112
|
+
rb_option_id_line = rb_intern_const("line");
|
1113
|
+
rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
|
1114
|
+
rb_option_id_version = rb_intern_const("version");
|
1115
|
+
rb_option_id_scopes = rb_intern_const("scopes");
|
1116
|
+
|
1117
|
+
/**
|
1118
|
+
* The version of the prism library.
|
1119
|
+
*/
|
1120
|
+
rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
|
1121
|
+
|
1122
|
+
/**
|
1123
|
+
* The backend of the parser that prism is using to parse Ruby code. This
|
1124
|
+
* can be either :CEXT or :FFI. On runtimes that support C extensions, we
|
1125
|
+
* default to :CEXT. Otherwise we use :FFI.
|
1126
|
+
*/
|
1127
|
+
rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CEXT")));
|
1128
|
+
|
1129
|
+
// First, the functions that have to do with lexing and parsing.
|
1130
|
+
rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
|
1131
|
+
rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
|
1132
|
+
rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
|
1133
|
+
rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
|
1134
|
+
rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
|
1135
|
+
rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
|
1136
|
+
rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
|
1137
|
+
rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
|
1138
|
+
rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
|
1139
|
+
rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1);
|
1140
|
+
rb_define_singleton_method(rb_cPrism, "parse_success?", parse_success_p, -1);
|
1141
|
+
rb_define_singleton_method(rb_cPrism, "parse_file_success?", parse_file_success_p, -1);
|
1142
|
+
|
1143
|
+
// Next, the functions that will be called by the parser to perform various
|
1144
|
+
// internal tasks. We expose these to make them easier to test.
|
1145
|
+
VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug");
|
1146
|
+
rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
|
1147
|
+
rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
|
1148
|
+
rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
|
1149
|
+
rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
|
1150
|
+
rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 2);
|
1151
|
+
|
1152
|
+
// Next, initialize the other APIs.
|
1153
|
+
Init_prism_api_node();
|
1154
|
+
Init_prism_pack();
|
1155
|
+
}
|