prism 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -1
  3. data/Makefile +6 -0
  4. data/README.md +1 -1
  5. data/config.yml +50 -35
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/serialization.md +28 -29
  8. data/ext/prism/api_node.c +802 -770
  9. data/ext/prism/api_pack.c +20 -9
  10. data/ext/prism/extension.c +464 -162
  11. data/ext/prism/extension.h +1 -1
  12. data/include/prism/ast.h +3173 -763
  13. data/include/prism/defines.h +32 -9
  14. data/include/prism/diagnostic.h +36 -3
  15. data/include/prism/enc/pm_encoding.h +118 -28
  16. data/include/prism/node.h +38 -13
  17. data/include/prism/options.h +204 -0
  18. data/include/prism/pack.h +44 -33
  19. data/include/prism/parser.h +445 -200
  20. data/include/prism/prettyprint.h +12 -1
  21. data/include/prism/regexp.h +16 -2
  22. data/include/prism/util/pm_buffer.h +94 -16
  23. data/include/prism/util/pm_char.h +162 -48
  24. data/include/prism/util/pm_constant_pool.h +126 -32
  25. data/include/prism/util/pm_list.h +68 -38
  26. data/include/prism/util/pm_memchr.h +18 -3
  27. data/include/prism/util/pm_newline_list.h +70 -27
  28. data/include/prism/util/pm_state_stack.h +25 -7
  29. data/include/prism/util/pm_string.h +115 -27
  30. data/include/prism/util/pm_string_list.h +25 -6
  31. data/include/prism/util/pm_strncasecmp.h +32 -0
  32. data/include/prism/util/pm_strpbrk.h +31 -17
  33. data/include/prism/version.h +27 -2
  34. data/include/prism.h +224 -31
  35. data/lib/prism/compiler.rb +6 -3
  36. data/lib/prism/debug.rb +23 -7
  37. data/lib/prism/dispatcher.rb +33 -18
  38. data/lib/prism/dsl.rb +10 -5
  39. data/lib/prism/ffi.rb +132 -80
  40. data/lib/prism/lex_compat.rb +25 -15
  41. data/lib/prism/mutation_compiler.rb +10 -5
  42. data/lib/prism/node.rb +370 -135
  43. data/lib/prism/node_ext.rb +1 -1
  44. data/lib/prism/node_inspector.rb +1 -1
  45. data/lib/prism/pack.rb +79 -40
  46. data/lib/prism/parse_result/comments.rb +7 -2
  47. data/lib/prism/parse_result/newlines.rb +4 -0
  48. data/lib/prism/parse_result.rb +150 -30
  49. data/lib/prism/pattern.rb +11 -0
  50. data/lib/prism/ripper_compat.rb +28 -10
  51. data/lib/prism/serialize.rb +86 -54
  52. data/lib/prism/visitor.rb +10 -3
  53. data/lib/prism.rb +20 -2
  54. data/prism.gemspec +4 -2
  55. data/rbi/prism.rbi +104 -60
  56. data/rbi/prism_static.rbi +16 -2
  57. data/sig/prism.rbs +72 -43
  58. data/sig/prism_static.rbs +14 -1
  59. data/src/diagnostic.c +56 -53
  60. data/src/enc/pm_big5.c +1 -0
  61. data/src/enc/pm_euc_jp.c +1 -0
  62. data/src/enc/pm_gbk.c +1 -0
  63. data/src/enc/pm_shift_jis.c +1 -0
  64. data/src/enc/pm_tables.c +316 -80
  65. data/src/enc/pm_unicode.c +53 -8
  66. data/src/enc/pm_windows_31j.c +1 -0
  67. data/src/node.c +334 -321
  68. data/src/options.c +170 -0
  69. data/src/prettyprint.c +74 -47
  70. data/src/prism.c +1642 -856
  71. data/src/regexp.c +151 -95
  72. data/src/serialize.c +44 -20
  73. data/src/token_type.c +3 -1
  74. data/src/util/pm_buffer.c +45 -15
  75. data/src/util/pm_char.c +103 -57
  76. data/src/util/pm_constant_pool.c +51 -21
  77. data/src/util/pm_list.c +12 -4
  78. data/src/util/pm_memchr.c +5 -3
  79. data/src/util/pm_newline_list.c +20 -12
  80. data/src/util/pm_state_stack.c +9 -3
  81. data/src/util/pm_string.c +95 -85
  82. data/src/util/pm_string_list.c +14 -15
  83. data/src/util/pm_strncasecmp.c +10 -3
  84. data/src/util/pm_strpbrk.c +25 -19
  85. metadata +5 -3
  86. data/docs/prism.png +0 -0
@@ -1,7 +1,7 @@
1
1
  #include "prism/extension.h"
2
2
 
3
- // NOTE: this file should contain only bindings.
4
- // All non-trivial logic should be in librubyparser so it can be shared its the various callers.
3
+ // NOTE: this file should contain only bindings. All non-trivial logic should be
4
+ // in librubyparser so it can be shared its the various callers.
5
5
 
6
6
  VALUE rb_cPrism;
7
7
  VALUE rb_cPrismNode;
@@ -10,18 +10,30 @@ VALUE rb_cPrismToken;
10
10
  VALUE rb_cPrismLocation;
11
11
 
12
12
  VALUE rb_cPrismComment;
13
+ VALUE rb_cPrismInlineComment;
14
+ VALUE rb_cPrismEmbDocComment;
15
+ VALUE rb_cPrismDATAComment;
13
16
  VALUE rb_cPrismMagicComment;
14
17
  VALUE rb_cPrismParseError;
15
18
  VALUE rb_cPrismParseWarning;
16
19
  VALUE rb_cPrismParseResult;
17
20
 
21
+ ID rb_option_id_filepath;
22
+ ID rb_option_id_encoding;
23
+ ID rb_option_id_line;
24
+ ID rb_option_id_frozen_string_literal;
25
+ ID rb_option_id_verbose;
26
+ ID rb_option_id_scopes;
27
+
18
28
  /******************************************************************************/
19
29
  /* IO of Ruby code */
20
30
  /******************************************************************************/
21
31
 
22
- // Check if the given VALUE is a string. If it's nil, then return NULL. If it's
23
- // not a string, then raise a type error. Otherwise return the VALUE as a C
24
- // string.
32
+ /**
33
+ * Check if the given VALUE is a string. If it's nil, then return NULL. If it's
34
+ * not a string, then raise a type error. Otherwise return the VALUE as a C
35
+ * string.
36
+ */
25
37
  static const char *
26
38
  check_string(VALUE value) {
27
39
  // If the value is nil, then we don't need to do anything.
@@ -38,7 +50,9 @@ check_string(VALUE value) {
38
50
  return RSTRING_PTR(value);
39
51
  }
40
52
 
41
- // Load the contents and size of the given string into the given pm_string_t.
53
+ /**
54
+ * Load the contents and size of the given string into the given pm_string_t.
55
+ */
42
56
  static void
43
57
  input_load_string(pm_string_t *input, VALUE string) {
44
58
  // Check if the string is a string. If it's not, then raise a type error.
@@ -49,20 +63,181 @@ input_load_string(pm_string_t *input, VALUE string) {
49
63
  pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
50
64
  }
51
65
 
66
+ /******************************************************************************/
67
+ /* Building C options from Ruby options */
68
+ /******************************************************************************/
69
+
70
+ /**
71
+ * Build the scopes associated with the provided Ruby keyword value.
72
+ */
73
+ static void
74
+ build_options_scopes(pm_options_t *options, VALUE scopes) {
75
+ // Check if the value is an array. If it's not, then raise a type error.
76
+ if (!RB_TYPE_P(scopes, T_ARRAY)) {
77
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scopes));
78
+ }
79
+
80
+ // Initialize the scopes array.
81
+ size_t scopes_count = RARRAY_LEN(scopes);
82
+ pm_options_scopes_init(options, scopes_count);
83
+
84
+ // Iterate over the scopes and add them to the options.
85
+ for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
86
+ VALUE scope = rb_ary_entry(scopes, scope_index);
87
+
88
+ // Check that the scope is an array. If it's not, then raise a type
89
+ // error.
90
+ if (!RB_TYPE_P(scope, T_ARRAY)) {
91
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scope));
92
+ }
93
+
94
+ // Initialize the scope array.
95
+ size_t locals_count = RARRAY_LEN(scope);
96
+ pm_options_scope_t *options_scope = &options->scopes[scope_index];
97
+ pm_options_scope_init(options_scope, locals_count);
98
+
99
+ // Iterate over the locals and add them to the scope.
100
+ for (size_t local_index = 0; local_index < locals_count; local_index++) {
101
+ VALUE local = rb_ary_entry(scope, local_index);
102
+
103
+ // Check that the local is a symbol. If it's not, then raise a
104
+ // type error.
105
+ if (!RB_TYPE_P(local, T_SYMBOL)) {
106
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(local));
107
+ }
108
+
109
+ // Add the local to the scope.
110
+ pm_string_t *scope_local = &options_scope->locals[local_index];
111
+ const char *name = rb_id2name(SYM2ID(local));
112
+ pm_string_constant_init(scope_local, name, strlen(name));
113
+ }
114
+ }
115
+ }
116
+
117
+ /**
118
+ * An iterator function that is called for each key-value in the keywords hash.
119
+ */
120
+ static int
121
+ build_options_i(VALUE key, VALUE value, VALUE argument) {
122
+ pm_options_t *options = (pm_options_t *) argument;
123
+ ID key_id = SYM2ID(key);
124
+
125
+ if (key_id == rb_option_id_filepath) {
126
+ if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
127
+ } else if (key_id == rb_option_id_encoding) {
128
+ if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
129
+ } else if (key_id == rb_option_id_line) {
130
+ if (!NIL_P(value)) pm_options_line_set(options, NUM2UINT(value));
131
+ } else if (key_id == rb_option_id_frozen_string_literal) {
132
+ if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
133
+ } else if (key_id == rb_option_id_verbose) {
134
+ pm_options_suppress_warnings_set(options, value != Qtrue);
135
+ } else if (key_id == rb_option_id_scopes) {
136
+ if (!NIL_P(value)) build_options_scopes(options, value);
137
+ } else {
138
+ rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key);
139
+ }
140
+
141
+ return ST_CONTINUE;
142
+ }
143
+
144
+ /**
145
+ * We need a struct here to pass through rb_protect and it has to be a single
146
+ * value. Because the sizeof(VALUE) == sizeof(void *), we're going to pass this
147
+ * through as an opaque pointer and cast it on both sides.
148
+ */
149
+ struct build_options_data {
150
+ pm_options_t *options;
151
+ VALUE keywords;
152
+ };
153
+
154
+ /**
155
+ * Build the set of options from the given keywords. Note that this can raise a
156
+ * Ruby error if the options are not valid.
157
+ */
158
+ static VALUE
159
+ build_options(VALUE argument) {
160
+ struct build_options_data *data = (struct build_options_data *) argument;
161
+ rb_hash_foreach(data->keywords, build_options_i, (VALUE) data->options);
162
+ return Qnil;
163
+ }
164
+
165
+ /**
166
+ * Extract the options from the given keyword arguments.
167
+ */
168
+ static void
169
+ extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
170
+ if (!NIL_P(keywords)) {
171
+ struct build_options_data data = { .options = options, .keywords = keywords };
172
+ struct build_options_data *argument = &data;
173
+
174
+ int state = 0;
175
+ rb_protect(build_options, (VALUE) argument, &state);
176
+
177
+ if (state != 0) {
178
+ pm_options_free(options);
179
+ rb_jump_tag(state);
180
+ }
181
+ }
182
+
183
+ if (!NIL_P(filepath)) {
184
+ if (!RB_TYPE_P(filepath, T_STRING)) {
185
+ pm_options_free(options);
186
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
187
+ }
188
+
189
+ pm_options_filepath_set(options, RSTRING_PTR(filepath));
190
+ }
191
+ }
192
+
193
+ /**
194
+ * Read options for methods that look like (source, **options).
195
+ */
196
+ static void
197
+ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
198
+ VALUE string;
199
+ VALUE keywords;
200
+ rb_scan_args(argc, argv, "1:", &string, &keywords);
201
+
202
+ extract_options(options, Qnil, keywords);
203
+ input_load_string(input, string);
204
+ }
205
+
206
+ /**
207
+ * Read options for methods that look like (filepath, **options).
208
+ */
209
+ static bool
210
+ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
211
+ VALUE filepath;
212
+ VALUE keywords;
213
+ rb_scan_args(argc, argv, "1:", &filepath, &keywords);
214
+
215
+ extract_options(options, filepath, keywords);
216
+
217
+ if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) {
218
+ pm_options_free(options);
219
+ return false;
220
+ }
221
+
222
+ return true;
223
+ }
224
+
52
225
  /******************************************************************************/
53
226
  /* Serializing the AST */
54
227
  /******************************************************************************/
55
228
 
56
- // Dump the AST corresponding to the given input to a string.
229
+ /**
230
+ * Dump the AST corresponding to the given input to a string.
231
+ */
57
232
  static VALUE
58
- dump_input(pm_string_t *input, const char *filepath) {
233
+ dump_input(pm_string_t *input, const pm_options_t *options) {
59
234
  pm_buffer_t buffer;
60
235
  if (!pm_buffer_init(&buffer)) {
61
236
  rb_raise(rb_eNoMemError, "failed to allocate memory");
62
237
  }
63
238
 
64
239
  pm_parser_t parser;
65
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
240
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
66
241
 
67
242
  pm_node_t *node = pm_parse(&parser);
68
243
  pm_serialize(&parser, node, &buffer);
@@ -75,15 +250,18 @@ dump_input(pm_string_t *input, const char *filepath) {
75
250
  return result;
76
251
  }
77
252
 
78
- // Dump the AST corresponding to the given string to a string.
253
+ /**
254
+ * call-seq:
255
+ * Prism::dump(source, **options) -> String
256
+ *
257
+ * Dump the AST corresponding to the given string to a string. For supported
258
+ * options, see Prism::parse.
259
+ */
79
260
  static VALUE
80
261
  dump(int argc, VALUE *argv, VALUE self) {
81
- VALUE string;
82
- VALUE filepath;
83
- rb_scan_args(argc, argv, "11", &string, &filepath);
84
-
85
262
  pm_string_t input;
86
- input_load_string(&input, string);
263
+ pm_options_t options = { 0 };
264
+ string_options(argc, argv, &input, &options);
87
265
 
88
266
  #ifdef PRISM_DEBUG_MODE_BUILD
89
267
  size_t length = pm_string_length(&input);
@@ -92,25 +270,34 @@ dump(int argc, VALUE *argv, VALUE self) {
92
270
  pm_string_constant_init(&input, dup, length);
93
271
  #endif
94
272
 
95
- VALUE value = dump_input(&input, check_string(filepath));
273
+ VALUE value = dump_input(&input, &options);
96
274
 
97
275
  #ifdef PRISM_DEBUG_MODE_BUILD
98
276
  free(dup);
99
277
  #endif
100
278
 
279
+ pm_string_free(&input);
280
+ pm_options_free(&options);
281
+
101
282
  return value;
102
283
  }
103
284
 
104
- // Dump the AST corresponding to the given file to a string.
285
+ /**
286
+ * call-seq:
287
+ * Prism::dump_file(filepath, **options) -> String
288
+ *
289
+ * Dump the AST corresponding to the given file to a string. For supported
290
+ * options, see Prism::parse.
291
+ */
105
292
  static VALUE
106
- dump_file(VALUE self, VALUE filepath) {
293
+ dump_file(int argc, VALUE *argv, VALUE self) {
107
294
  pm_string_t input;
295
+ pm_options_t options = { 0 };
296
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
108
297
 
109
- const char *checked = check_string(filepath);
110
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
111
-
112
- VALUE value = dump_input(&input, checked);
298
+ VALUE value = dump_input(&input, &options);
113
299
  pm_string_free(&input);
300
+ pm_options_free(&options);
114
301
 
115
302
  return value;
116
303
  }
@@ -119,7 +306,9 @@ dump_file(VALUE self, VALUE filepath) {
119
306
  /* Extracting values for the parse result */
120
307
  /******************************************************************************/
121
308
 
122
- // Extract the comments out of the parser into an array.
309
+ /**
310
+ * Extract the comments out of the parser into an array.
311
+ */
123
312
  static VALUE
124
313
  parser_comments(pm_parser_t *parser, VALUE source) {
125
314
  VALUE comments = rb_ary_new();
@@ -134,27 +323,26 @@ parser_comments(pm_parser_t *parser, VALUE source) {
134
323
  VALUE type;
135
324
  switch (comment->type) {
136
325
  case PM_COMMENT_INLINE:
137
- type = ID2SYM(rb_intern("inline"));
326
+ type = rb_cPrismInlineComment;
138
327
  break;
139
328
  case PM_COMMENT_EMBDOC:
140
- type = ID2SYM(rb_intern("embdoc"));
329
+ type = rb_cPrismEmbDocComment;
141
330
  break;
142
331
  case PM_COMMENT___END__:
143
- type = ID2SYM(rb_intern("__END__"));
144
- break;
145
- default:
146
- type = ID2SYM(rb_intern("inline"));
332
+ type = rb_cPrismDATAComment;
147
333
  break;
148
334
  }
149
335
 
150
- VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
151
- rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cPrismComment));
336
+ VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
337
+ rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
152
338
  }
153
339
 
154
340
  return comments;
155
341
  }
156
342
 
157
- // Extract the magic comments out of the parser into an array.
343
+ /**
344
+ * Extract the magic comments out of the parser into an array.
345
+ */
158
346
  static VALUE
159
347
  parser_magic_comments(pm_parser_t *parser, VALUE source) {
160
348
  VALUE magic_comments = rb_ary_new();
@@ -183,7 +371,9 @@ parser_magic_comments(pm_parser_t *parser, VALUE source) {
183
371
  return magic_comments;
184
372
  }
185
373
 
186
- // Extract the errors out of the parser into an array.
374
+ /**
375
+ * Extract the errors out of the parser into an array.
376
+ */
187
377
  static VALUE
188
378
  parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
189
379
  VALUE errors = rb_ary_new();
@@ -207,7 +397,9 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
207
397
  return errors;
208
398
  }
209
399
 
210
- // Extract the warnings out of the parser into an array.
400
+ /**
401
+ * Extract the warnings out of the parser into an array.
402
+ */
211
403
  static VALUE
212
404
  parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
213
405
  VALUE warnings = rb_ary_new();
@@ -235,18 +427,22 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
235
427
  /* Lexing Ruby code */
236
428
  /******************************************************************************/
237
429
 
238
- // This struct gets stored in the parser and passed in to the lex callback any
239
- // time a new token is found. We use it to store the necessary information to
240
- // initialize a Token instance.
430
+ /**
431
+ * This struct gets stored in the parser and passed in to the lex callback any
432
+ * time a new token is found. We use it to store the necessary information to
433
+ * initialize a Token instance.
434
+ */
241
435
  typedef struct {
242
436
  VALUE source;
243
437
  VALUE tokens;
244
438
  rb_encoding *encoding;
245
439
  } parse_lex_data_t;
246
440
 
247
- // This is passed as a callback to the parser. It gets called every time a new
248
- // token is found. Once found, we initialize a new instance of Token and push it
249
- // onto the tokens array.
441
+ /**
442
+ * This is passed as a callback to the parser. It gets called every time a new
443
+ * token is found. Once found, we initialize a new instance of Token and push it
444
+ * onto the tokens array.
445
+ */
250
446
  static void
251
447
  parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
252
448
  parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
@@ -258,9 +454,11 @@ parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
258
454
  rb_ary_push(parse_lex_data->tokens, yields);
259
455
  }
260
456
 
261
- // This is called whenever the encoding changes based on the magic comment at
262
- // the top of the file. We use it to update the encoding that we are using to
263
- // create tokens.
457
+ /**
458
+ * This is called whenever the encoding changes based on the magic comment at
459
+ * the top of the file. We use it to update the encoding that we are using to
460
+ * create tokens.
461
+ */
264
462
  static void
265
463
  parse_lex_encoding_changed_callback(pm_parser_t *parser) {
266
464
  parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
@@ -281,17 +479,19 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) {
281
479
  }
282
480
  }
283
481
 
284
- // Parse the given input and return a ParseResult containing just the tokens or
285
- // the nodes and tokens.
482
+ /**
483
+ * Parse the given input and return a ParseResult containing just the tokens or
484
+ * the nodes and tokens.
485
+ */
286
486
  static VALUE
287
- parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
487
+ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) {
288
488
  pm_parser_t parser;
289
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
489
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
290
490
  pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
291
491
 
292
492
  VALUE offsets = rb_ary_new();
293
- VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), offsets };
294
- VALUE source = rb_class_new_instance(2, source_argv, rb_cPrismSource);
493
+ VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
494
+ VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
295
495
 
296
496
  parse_lex_data_t parse_lex_data = {
297
497
  .source = source,
@@ -338,29 +538,42 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
338
538
  return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
339
539
  }
340
540
 
341
- // Return an array of tokens corresponding to the given string.
541
+ /**
542
+ * call-seq:
543
+ * Prism::lex(source, **options) -> Array
544
+ *
545
+ * Return an array of Token instances corresponding to the given string. For
546
+ * supported options, see Prism::parse.
547
+ */
342
548
  static VALUE
343
549
  lex(int argc, VALUE *argv, VALUE self) {
344
- VALUE string;
345
- VALUE filepath;
346
- rb_scan_args(argc, argv, "11", &string, &filepath);
347
-
348
550
  pm_string_t input;
349
- input_load_string(&input, string);
551
+ pm_options_t options = { 0 };
552
+ string_options(argc, argv, &input, &options);
350
553
 
351
- return parse_lex_input(&input, check_string(filepath), false);
554
+ VALUE result = parse_lex_input(&input, &options, false);
555
+ pm_string_free(&input);
556
+ pm_options_free(&options);
557
+
558
+ return result;
352
559
  }
353
560
 
354
- // Return an array of tokens corresponding to the given file.
561
+ /**
562
+ * call-seq:
563
+ * Prism::lex_file(filepath, **options) -> Array
564
+ *
565
+ * Return an array of Token instances corresponding to the given file. For
566
+ * supported options, see Prism::parse.
567
+ */
355
568
  static VALUE
356
- lex_file(VALUE self, VALUE filepath) {
569
+ lex_file(int argc, VALUE *argv, VALUE self) {
357
570
  pm_string_t input;
571
+ pm_options_t options = { 0 };
572
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
358
573
 
359
- const char *checked = check_string(filepath);
360
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
361
-
362
- VALUE value = parse_lex_input(&input, checked, false);
574
+ VALUE value = parse_lex_input(&input, &options, false);
363
575
  pm_string_free(&input);
576
+ pm_options_free(&options);
364
577
 
365
578
  return value;
366
579
  }
@@ -369,11 +582,13 @@ lex_file(VALUE self, VALUE filepath) {
369
582
  /* Parsing Ruby code */
370
583
  /******************************************************************************/
371
584
 
372
- // Parse the given input and return a ParseResult instance.
585
+ /**
586
+ * Parse the given input and return a ParseResult instance.
587
+ */
373
588
  static VALUE
374
- parse_input(pm_string_t *input, const char *filepath) {
589
+ parse_input(pm_string_t *input, const pm_options_t *options) {
375
590
  pm_parser_t parser;
376
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
591
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
377
592
 
378
593
  pm_node_t *node = pm_parse(&parser);
379
594
  rb_encoding *encoding = rb_enc_find(parser.encoding.name);
@@ -396,33 +611,31 @@ parse_input(pm_string_t *input, const char *filepath) {
396
611
  return result;
397
612
  }
398
613
 
399
- // Parse the given input and return an array of Comment objects.
400
- static VALUE
401
- parse_input_comments(pm_string_t *input, const char *filepath) {
402
- pm_parser_t parser;
403
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
404
-
405
- pm_node_t *node = pm_parse(&parser);
406
- rb_encoding *encoding = rb_enc_find(parser.encoding.name);
407
-
408
- VALUE source = pm_source_new(&parser, encoding);
409
- VALUE comments = parser_comments(&parser, source);
410
-
411
- pm_node_destroy(&parser, node);
412
- pm_parser_free(&parser);
413
-
414
- return comments;
415
- }
416
-
417
- // Parse the given string and return a ParseResult instance.
614
+ /**
615
+ * call-seq:
616
+ * Prism::parse(source, **options) -> ParseResult
617
+ *
618
+ * Parse the given string and return a ParseResult instance. The options that
619
+ * are supported are:
620
+ *
621
+ * * `filepath` - the filepath of the source being parsed. This should be a
622
+ * string or nil
623
+ * * `encoding` - the encoding of the source being parsed. This should be an
624
+ * encoding or nil
625
+ * * `line` - the line number that the parse starts on. This should be an
626
+ * integer or nil. Note that this is 1-indexed.
627
+ * * `frozen_string_literal` - whether or not the frozen string literal pragma
628
+ * has been set. This should be a boolean or nil.
629
+ * * `verbose` - the current level of verbosity. This controls whether or not
630
+ * the parser emits warnings. This should be a boolean or nil.
631
+ * * `scopes` - the locals that are in scope surrounding the code that is being
632
+ * parsed. This should be an array of arrays of symbols or nil.
633
+ */
418
634
  static VALUE
419
635
  parse(int argc, VALUE *argv, VALUE self) {
420
- VALUE string;
421
- VALUE filepath;
422
- rb_scan_args(argc, argv, "11", &string, &filepath);
423
-
424
636
  pm_string_t input;
425
- input_load_string(&input, string);
637
+ pm_options_t options = { 0 };
638
+ string_options(argc, argv, &input, &options);
426
639
 
427
640
  #ifdef PRISM_DEBUG_MODE_BUILD
428
641
  size_t length = pm_string_length(&input);
@@ -431,82 +644,147 @@ parse(int argc, VALUE *argv, VALUE self) {
431
644
  pm_string_constant_init(&input, dup, length);
432
645
  #endif
433
646
 
434
- VALUE value = parse_input(&input, check_string(filepath));
647
+ VALUE value = parse_input(&input, &options);
435
648
 
436
649
  #ifdef PRISM_DEBUG_MODE_BUILD
437
650
  free(dup);
438
651
  #endif
439
652
 
653
+ pm_string_free(&input);
654
+ pm_options_free(&options);
440
655
  return value;
441
656
  }
442
657
 
443
- // Parse the given file and return a ParseResult instance.
658
+ /**
659
+ * call-seq:
660
+ * Prism::parse_file(filepath, **options) -> ParseResult
661
+ *
662
+ * Parse the given file and return a ParseResult instance. For supported
663
+ * options, see Prism::parse.
664
+ */
444
665
  static VALUE
445
- parse_file(VALUE self, VALUE filepath) {
666
+ parse_file(int argc, VALUE *argv, VALUE self) {
446
667
  pm_string_t input;
668
+ pm_options_t options = { 0 };
669
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
447
670
 
448
- const char *checked = check_string(filepath);
449
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
450
-
451
- VALUE value = parse_input(&input, checked);
671
+ VALUE value = parse_input(&input, &options);
452
672
  pm_string_free(&input);
673
+ pm_options_free(&options);
453
674
 
454
675
  return value;
455
676
  }
456
677
 
457
- // Parse the given string and return an array of Comment objects.
678
+ /**
679
+ * Parse the given input and return an array of Comment objects.
680
+ */
458
681
  static VALUE
459
- parse_comments(int argc, VALUE *argv, VALUE self) {
460
- VALUE string;
461
- VALUE filepath;
462
- rb_scan_args(argc, argv, "11", &string, &filepath);
682
+ parse_input_comments(pm_string_t *input, const pm_options_t *options) {
683
+ pm_parser_t parser;
684
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
463
685
 
464
- pm_string_t input;
465
- input_load_string(&input, string);
686
+ pm_node_t *node = pm_parse(&parser);
687
+ rb_encoding *encoding = rb_enc_find(parser.encoding.name);
688
+
689
+ VALUE source = pm_source_new(&parser, encoding);
690
+ VALUE comments = parser_comments(&parser, source);
466
691
 
467
- return parse_input_comments(&input, check_string(filepath));
692
+ pm_node_destroy(&parser, node);
693
+ pm_parser_free(&parser);
694
+
695
+ return comments;
468
696
  }
469
697
 
470
- // Parse the given file and return an array of Comment objects.
698
+ /**
699
+ * call-seq:
700
+ * Prism::parse_comments(source, **options) -> Array
701
+ *
702
+ * Parse the given string and return an array of Comment objects. For supported
703
+ * options, see Prism::parse.
704
+ */
471
705
  static VALUE
472
- parse_file_comments(VALUE self, VALUE filepath) {
706
+ parse_comments(int argc, VALUE *argv, VALUE self) {
473
707
  pm_string_t input;
708
+ pm_options_t options = { 0 };
709
+ string_options(argc, argv, &input, &options);
474
710
 
475
- const char *checked = check_string(filepath);
476
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
711
+ VALUE result = parse_input_comments(&input, &options);
712
+ pm_string_free(&input);
713
+ pm_options_free(&options);
477
714
 
478
- VALUE value = parse_input_comments(&input, checked);
715
+ return result;
716
+ }
717
+
718
+ /**
719
+ * call-seq:
720
+ * Prism::parse_file_comments(filepath, **options) -> Array
721
+ *
722
+ * Parse the given file and return an array of Comment objects. For supported
723
+ * options, see Prism::parse.
724
+ */
725
+ static VALUE
726
+ parse_file_comments(int argc, VALUE *argv, VALUE self) {
727
+ pm_string_t input;
728
+ pm_options_t options = { 0 };
729
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
730
+
731
+ VALUE value = parse_input_comments(&input, &options);
479
732
  pm_string_free(&input);
733
+ pm_options_free(&options);
480
734
 
481
735
  return value;
482
736
  }
483
737
 
484
- // Parse the given string and return a ParseResult instance.
738
+ /**
739
+ * call-seq:
740
+ * Prism::parse_lex(source, **options) -> ParseResult
741
+ *
742
+ * Parse the given string and return a ParseResult instance that contains a
743
+ * 2-element array, where the first element is the AST and the second element is
744
+ * an array of Token instances.
745
+ *
746
+ * This API is only meant to be used in the case where you need both the AST and
747
+ * the tokens. If you only need one or the other, use either Prism::parse or
748
+ * Prism::lex.
749
+ *
750
+ * For supported options, see Prism::parse.
751
+ */
485
752
  static VALUE
486
753
  parse_lex(int argc, VALUE *argv, VALUE self) {
487
- VALUE string;
488
- VALUE filepath;
489
- rb_scan_args(argc, argv, "11", &string, &filepath);
490
-
491
754
  pm_string_t input;
492
- input_load_string(&input, string);
755
+ pm_options_t options = { 0 };
756
+ string_options(argc, argv, &input, &options);
493
757
 
494
- VALUE value = parse_lex_input(&input, check_string(filepath), true);
758
+ VALUE value = parse_lex_input(&input, &options, true);
495
759
  pm_string_free(&input);
760
+ pm_options_free(&options);
496
761
 
497
762
  return value;
498
763
  }
499
764
 
500
- // Parse and lex the given file and return a ParseResult instance.
765
+ /**
766
+ * call-seq:
767
+ * Prism::parse_lex_file(filepath, **options) -> ParseResult
768
+ *
769
+ * Parse the given file and return a ParseResult instance that contains a
770
+ * 2-element array, where the first element is the AST and the second element is
771
+ * an array of Token instances.
772
+ *
773
+ * This API is only meant to be used in the case where you need both the AST and
774
+ * the tokens. If you only need one or the other, use either Prism::parse_file
775
+ * or Prism::lex_file.
776
+ *
777
+ * For supported options, see Prism::parse.
778
+ */
501
779
  static VALUE
502
- parse_lex_file(VALUE self, VALUE filepath) {
780
+ parse_lex_file(int argc, VALUE *argv, VALUE self) {
503
781
  pm_string_t input;
782
+ pm_options_t options = { 0 };
783
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
504
784
 
505
- const char *checked = check_string(filepath);
506
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
507
-
508
- VALUE value = parse_lex_input(&input, checked, true);
785
+ VALUE value = parse_lex_input(&input, &options, true);
509
786
  pm_string_free(&input);
787
+ pm_options_free(&options);
510
788
 
511
789
  return value;
512
790
  }
@@ -515,13 +793,17 @@ parse_lex_file(VALUE self, VALUE filepath) {
515
793
  /* Utility functions exposed to make testing easier */
516
794
  /******************************************************************************/
517
795
 
518
- // Returns an array of strings corresponding to the named capture groups in the
519
- // given source string. If prism was unable to parse the regular expression, this
520
- // function returns nil.
796
+ /**
797
+ * call-seq:
798
+ * Debug::named_captures(source) -> Array
799
+ *
800
+ * Returns an array of strings corresponding to the named capture groups in the
801
+ * given source string. If prism was unable to parse the regular expression,
802
+ * this function returns nil.
803
+ */
521
804
  static VALUE
522
805
  named_captures(VALUE self, VALUE source) {
523
- pm_string_list_t string_list;
524
- pm_string_list_init(&string_list);
806
+ pm_string_list_t string_list = { 0 };
525
807
 
526
808
  if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &pm_encoding_utf_8)) {
527
809
  pm_string_list_free(&string_list);
@@ -538,7 +820,12 @@ named_captures(VALUE self, VALUE source) {
538
820
  return names;
539
821
  }
540
822
 
541
- // Return a hash of information about the given source string's memory usage.
823
+ /**
824
+ * call-seq:
825
+ * Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx }
826
+ *
827
+ * Return a hash of information about the given source string's memory usage.
828
+ */
542
829
  static VALUE
543
830
  memsize(VALUE self, VALUE string) {
544
831
  pm_parser_t parser;
@@ -559,8 +846,13 @@ memsize(VALUE self, VALUE string) {
559
846
  return result;
560
847
  }
561
848
 
562
- // Parse the file, but do nothing with the result. This is used to profile the
563
- // parser for memory and speed.
849
+ /**
850
+ * call-seq:
851
+ * Debug::profile_file(filepath) -> nil
852
+ *
853
+ * Parse the file, but do nothing with the result. This is used to profile the
854
+ * parser for memory and speed.
855
+ */
564
856
  static VALUE
565
857
  profile_file(VALUE self, VALUE filepath) {
566
858
  pm_string_t input;
@@ -568,39 +860,28 @@ profile_file(VALUE self, VALUE filepath) {
568
860
  const char *checked = check_string(filepath);
569
861
  if (!pm_string_mapped_init(&input, checked)) return Qnil;
570
862
 
863
+ pm_options_t options = { 0 };
864
+ pm_options_filepath_set(&options, checked);
865
+
571
866
  pm_parser_t parser;
572
- pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), checked);
867
+ pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
573
868
 
574
869
  pm_node_t *node = pm_parse(&parser);
575
870
  pm_node_destroy(&parser, node);
576
871
  pm_parser_free(&parser);
577
-
872
+ pm_options_free(&options);
578
873
  pm_string_free(&input);
579
874
 
580
875
  return Qnil;
581
876
  }
582
877
 
583
- // Parse the file and serialize the result. This is mostly used to test this
584
- // path since it is used by client libraries.
585
- static VALUE
586
- parse_serialize_file_metadata(VALUE self, VALUE filepath, VALUE metadata) {
587
- pm_string_t input;
588
- pm_buffer_t buffer;
589
- pm_buffer_init(&buffer);
590
-
591
- const char *checked = check_string(filepath);
592
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
593
-
594
- pm_parse_serialize(pm_string_source(&input), pm_string_length(&input), &buffer, check_string(metadata));
595
- VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
596
-
597
- pm_string_free(&input);
598
- pm_buffer_free(&buffer);
599
- return result;
600
- }
601
-
602
- // Inspect the AST that represents the given source using the prism pretty print
603
- // as opposed to the Ruby implementation.
878
+ /**
879
+ * call-seq:
880
+ * Debug::inspect_node(source) -> inspected
881
+ *
882
+ * Inspect the AST that represents the given source using the prism pretty print
883
+ * as opposed to the Ruby implementation.
884
+ */
604
885
  static VALUE
605
886
  inspect_node(VALUE self, VALUE source) {
606
887
  pm_string_t input;
@@ -628,6 +909,9 @@ inspect_node(VALUE self, VALUE source) {
628
909
  /* Initialization of the extension */
629
910
  /******************************************************************************/
630
911
 
912
+ /**
913
+ * The init function that Ruby calls when loading this extension.
914
+ */
631
915
  RUBY_FUNC_EXPORTED void
632
916
  Init_prism(void) {
633
917
  // Make sure that the prism library version matches the expected version.
@@ -649,27 +933,46 @@ Init_prism(void) {
649
933
  rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
650
934
  rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
651
935
  rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
936
+ rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment);
937
+ rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment);
938
+ rb_cPrismDATAComment = rb_define_class_under(rb_cPrism, "DATAComment", rb_cPrismComment);
652
939
  rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
653
940
  rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
654
941
  rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
655
942
  rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
656
943
 
657
- // Define the version string here so that we can use the constants defined
658
- // in prism.h.
944
+ // Intern all of the options that we support so that we don't have to do it
945
+ // every time we parse.
946
+ rb_option_id_filepath = rb_intern_const("filepath");
947
+ rb_option_id_encoding = rb_intern_const("encoding");
948
+ rb_option_id_line = rb_intern_const("line");
949
+ rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
950
+ rb_option_id_verbose = rb_intern_const("verbose");
951
+ rb_option_id_scopes = rb_intern_const("scopes");
952
+
953
+ /**
954
+ * The version of the prism library.
955
+ */
659
956
  rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
660
- rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CExtension")));
957
+
958
+ /**
959
+ * The backend of the parser that prism is using to parse Ruby code. This
960
+ * can be either :CEXT or :FFI. On runtimes that support C extensions, we
961
+ * default to :CEXT. Otherwise we use :FFI.
962
+ */
963
+ rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CEXT")));
661
964
 
662
965
  // First, the functions that have to do with lexing and parsing.
663
966
  rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
664
- rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, 1);
967
+ rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
665
968
  rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
666
- rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, 1);
969
+ rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
667
970
  rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
668
- rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, 1);
971
+ rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
669
972
  rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
670
- rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, 1);
973
+ rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
671
974
  rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
672
- rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, 1);
975
+ rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1);
673
976
 
674
977
  // Next, the functions that will be called by the parser to perform various
675
978
  // internal tasks. We expose these to make them easier to test.
@@ -677,7 +980,6 @@ Init_prism(void) {
677
980
  rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
678
981
  rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
679
982
  rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
680
- rb_define_singleton_method(rb_cPrismDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
681
983
  rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
682
984
 
683
985
  // Next, initialize the other APIs.