prism 0.15.1 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -1
  3. data/Makefile +12 -0
  4. data/README.md +3 -1
  5. data/config.yml +66 -50
  6. data/docs/configuration.md +2 -0
  7. data/docs/fuzzing.md +1 -1
  8. data/docs/javascript.md +90 -0
  9. data/docs/releasing.md +27 -0
  10. data/docs/ruby_api.md +2 -0
  11. data/docs/serialization.md +28 -29
  12. data/ext/prism/api_node.c +856 -826
  13. data/ext/prism/api_pack.c +20 -9
  14. data/ext/prism/extension.c +494 -119
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +3157 -747
  17. data/include/prism/defines.h +40 -8
  18. data/include/prism/diagnostic.h +36 -3
  19. data/include/prism/enc/pm_encoding.h +119 -28
  20. data/include/prism/node.h +38 -30
  21. data/include/prism/options.h +204 -0
  22. data/include/prism/pack.h +44 -33
  23. data/include/prism/parser.h +445 -199
  24. data/include/prism/prettyprint.h +26 -0
  25. data/include/prism/regexp.h +16 -2
  26. data/include/prism/util/pm_buffer.h +102 -18
  27. data/include/prism/util/pm_char.h +162 -48
  28. data/include/prism/util/pm_constant_pool.h +128 -34
  29. data/include/prism/util/pm_list.h +68 -38
  30. data/include/prism/util/pm_memchr.h +18 -3
  31. data/include/prism/util/pm_newline_list.h +71 -28
  32. data/include/prism/util/pm_state_stack.h +25 -7
  33. data/include/prism/util/pm_string.h +115 -27
  34. data/include/prism/util/pm_string_list.h +25 -6
  35. data/include/prism/util/pm_strncasecmp.h +32 -0
  36. data/include/prism/util/pm_strpbrk.h +31 -17
  37. data/include/prism/version.h +28 -3
  38. data/include/prism.h +229 -36
  39. data/lib/prism/compiler.rb +5 -5
  40. data/lib/prism/debug.rb +43 -13
  41. data/lib/prism/desugar_compiler.rb +1 -1
  42. data/lib/prism/dispatcher.rb +27 -26
  43. data/lib/prism/dsl.rb +16 -16
  44. data/lib/prism/ffi.rb +138 -61
  45. data/lib/prism/lex_compat.rb +26 -16
  46. data/lib/prism/mutation_compiler.rb +11 -11
  47. data/lib/prism/node.rb +426 -227
  48. data/lib/prism/node_ext.rb +23 -16
  49. data/lib/prism/node_inspector.rb +1 -1
  50. data/lib/prism/pack.rb +79 -40
  51. data/lib/prism/parse_result/comments.rb +7 -2
  52. data/lib/prism/parse_result/newlines.rb +4 -0
  53. data/lib/prism/parse_result.rb +157 -21
  54. data/lib/prism/pattern.rb +14 -3
  55. data/lib/prism/ripper_compat.rb +28 -10
  56. data/lib/prism/serialize.rb +935 -307
  57. data/lib/prism/visitor.rb +9 -5
  58. data/lib/prism.rb +20 -2
  59. data/prism.gemspec +11 -2
  60. data/rbi/prism.rbi +7305 -0
  61. data/rbi/prism_static.rbi +196 -0
  62. data/sig/prism.rbs +4468 -0
  63. data/sig/prism_static.rbs +123 -0
  64. data/src/diagnostic.c +56 -53
  65. data/src/enc/pm_big5.c +1 -0
  66. data/src/enc/pm_euc_jp.c +1 -0
  67. data/src/enc/pm_gbk.c +1 -0
  68. data/src/enc/pm_shift_jis.c +1 -0
  69. data/src/enc/pm_tables.c +316 -80
  70. data/src/enc/pm_unicode.c +54 -9
  71. data/src/enc/pm_windows_31j.c +1 -0
  72. data/src/node.c +357 -345
  73. data/src/options.c +170 -0
  74. data/src/prettyprint.c +7697 -1643
  75. data/src/prism.c +1964 -1125
  76. data/src/regexp.c +153 -95
  77. data/src/serialize.c +432 -397
  78. data/src/token_type.c +3 -1
  79. data/src/util/pm_buffer.c +88 -23
  80. data/src/util/pm_char.c +103 -57
  81. data/src/util/pm_constant_pool.c +52 -22
  82. data/src/util/pm_list.c +12 -4
  83. data/src/util/pm_memchr.c +5 -3
  84. data/src/util/pm_newline_list.c +25 -63
  85. data/src/util/pm_state_stack.c +9 -3
  86. data/src/util/pm_string.c +95 -85
  87. data/src/util/pm_string_list.c +14 -15
  88. data/src/util/pm_strncasecmp.c +10 -3
  89. data/src/util/pm_strpbrk.c +25 -19
  90. metadata +12 -3
  91. data/docs/prism.png +0 -0
@@ -1,7 +1,7 @@
1
1
  #include "prism/extension.h"
2
2
 
3
- // NOTE: this file should contain only bindings.
4
- // All non-trivial logic should be in librubyparser so it can be shared its the various callers.
3
+ // NOTE: this file should contain only bindings. All non-trivial logic should be
4
+ // in librubyparser so it can be shared its the various callers.
5
5
 
6
6
  VALUE rb_cPrism;
7
7
  VALUE rb_cPrismNode;
@@ -10,18 +10,30 @@ VALUE rb_cPrismToken;
10
10
  VALUE rb_cPrismLocation;
11
11
 
12
12
  VALUE rb_cPrismComment;
13
+ VALUE rb_cPrismInlineComment;
14
+ VALUE rb_cPrismEmbDocComment;
15
+ VALUE rb_cPrismDATAComment;
13
16
  VALUE rb_cPrismMagicComment;
14
17
  VALUE rb_cPrismParseError;
15
18
  VALUE rb_cPrismParseWarning;
16
19
  VALUE rb_cPrismParseResult;
17
20
 
21
+ ID rb_option_id_filepath;
22
+ ID rb_option_id_encoding;
23
+ ID rb_option_id_line;
24
+ ID rb_option_id_frozen_string_literal;
25
+ ID rb_option_id_verbose;
26
+ ID rb_option_id_scopes;
27
+
18
28
  /******************************************************************************/
19
29
  /* IO of Ruby code */
20
30
  /******************************************************************************/
21
31
 
22
- // Check if the given VALUE is a string. If it's nil, then return NULL. If it's
23
- // not a string, then raise a type error. Otherwise return the VALUE as a C
24
- // string.
32
+ /**
33
+ * Check if the given VALUE is a string. If it's nil, then return NULL. If it's
34
+ * not a string, then raise a type error. Otherwise return the VALUE as a C
35
+ * string.
36
+ */
25
37
  static const char *
26
38
  check_string(VALUE value) {
27
39
  // If the value is nil, then we don't need to do anything.
@@ -38,7 +50,9 @@ check_string(VALUE value) {
38
50
  return RSTRING_PTR(value);
39
51
  }
40
52
 
41
- // Load the contents and size of the given string into the given pm_string_t.
53
+ /**
54
+ * Load the contents and size of the given string into the given pm_string_t.
55
+ */
42
56
  static void
43
57
  input_load_string(pm_string_t *input, VALUE string) {
44
58
  // Check if the string is a string. If it's not, then raise a type error.
@@ -49,20 +63,181 @@ input_load_string(pm_string_t *input, VALUE string) {
49
63
  pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
50
64
  }
51
65
 
66
+ /******************************************************************************/
67
+ /* Building C options from Ruby options */
68
+ /******************************************************************************/
69
+
70
+ /**
71
+ * Build the scopes associated with the provided Ruby keyword value.
72
+ */
73
+ static void
74
+ build_options_scopes(pm_options_t *options, VALUE scopes) {
75
+ // Check if the value is an array. If it's not, then raise a type error.
76
+ if (!RB_TYPE_P(scopes, T_ARRAY)) {
77
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scopes));
78
+ }
79
+
80
+ // Initialize the scopes array.
81
+ size_t scopes_count = RARRAY_LEN(scopes);
82
+ pm_options_scopes_init(options, scopes_count);
83
+
84
+ // Iterate over the scopes and add them to the options.
85
+ for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
86
+ VALUE scope = rb_ary_entry(scopes, scope_index);
87
+
88
+ // Check that the scope is an array. If it's not, then raise a type
89
+ // error.
90
+ if (!RB_TYPE_P(scope, T_ARRAY)) {
91
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scope));
92
+ }
93
+
94
+ // Initialize the scope array.
95
+ size_t locals_count = RARRAY_LEN(scope);
96
+ pm_options_scope_t *options_scope = &options->scopes[scope_index];
97
+ pm_options_scope_init(options_scope, locals_count);
98
+
99
+ // Iterate over the locals and add them to the scope.
100
+ for (size_t local_index = 0; local_index < locals_count; local_index++) {
101
+ VALUE local = rb_ary_entry(scope, local_index);
102
+
103
+ // Check that the local is a symbol. If it's not, then raise a
104
+ // type error.
105
+ if (!RB_TYPE_P(local, T_SYMBOL)) {
106
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(local));
107
+ }
108
+
109
+ // Add the local to the scope.
110
+ pm_string_t *scope_local = &options_scope->locals[local_index];
111
+ const char *name = rb_id2name(SYM2ID(local));
112
+ pm_string_constant_init(scope_local, name, strlen(name));
113
+ }
114
+ }
115
+ }
116
+
117
+ /**
118
+ * An iterator function that is called for each key-value in the keywords hash.
119
+ */
120
+ static int
121
+ build_options_i(VALUE key, VALUE value, VALUE argument) {
122
+ pm_options_t *options = (pm_options_t *) argument;
123
+ ID key_id = SYM2ID(key);
124
+
125
+ if (key_id == rb_option_id_filepath) {
126
+ if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
127
+ } else if (key_id == rb_option_id_encoding) {
128
+ if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
129
+ } else if (key_id == rb_option_id_line) {
130
+ if (!NIL_P(value)) pm_options_line_set(options, NUM2UINT(value));
131
+ } else if (key_id == rb_option_id_frozen_string_literal) {
132
+ if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
133
+ } else if (key_id == rb_option_id_verbose) {
134
+ pm_options_suppress_warnings_set(options, value != Qtrue);
135
+ } else if (key_id == rb_option_id_scopes) {
136
+ if (!NIL_P(value)) build_options_scopes(options, value);
137
+ } else {
138
+ rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key);
139
+ }
140
+
141
+ return ST_CONTINUE;
142
+ }
143
+
144
+ /**
145
+ * We need a struct here to pass through rb_protect and it has to be a single
146
+ * value. Because the sizeof(VALUE) == sizeof(void *), we're going to pass this
147
+ * through as an opaque pointer and cast it on both sides.
148
+ */
149
+ struct build_options_data {
150
+ pm_options_t *options;
151
+ VALUE keywords;
152
+ };
153
+
154
+ /**
155
+ * Build the set of options from the given keywords. Note that this can raise a
156
+ * Ruby error if the options are not valid.
157
+ */
158
+ static VALUE
159
+ build_options(VALUE argument) {
160
+ struct build_options_data *data = (struct build_options_data *) argument;
161
+ rb_hash_foreach(data->keywords, build_options_i, (VALUE) data->options);
162
+ return Qnil;
163
+ }
164
+
165
+ /**
166
+ * Extract the options from the given keyword arguments.
167
+ */
168
+ static void
169
+ extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
170
+ if (!NIL_P(keywords)) {
171
+ struct build_options_data data = { .options = options, .keywords = keywords };
172
+ struct build_options_data *argument = &data;
173
+
174
+ int state = 0;
175
+ rb_protect(build_options, (VALUE) argument, &state);
176
+
177
+ if (state != 0) {
178
+ pm_options_free(options);
179
+ rb_jump_tag(state);
180
+ }
181
+ }
182
+
183
+ if (!NIL_P(filepath)) {
184
+ if (!RB_TYPE_P(filepath, T_STRING)) {
185
+ pm_options_free(options);
186
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
187
+ }
188
+
189
+ pm_options_filepath_set(options, RSTRING_PTR(filepath));
190
+ }
191
+ }
192
+
193
+ /**
194
+ * Read options for methods that look like (source, **options).
195
+ */
196
+ static void
197
+ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
198
+ VALUE string;
199
+ VALUE keywords;
200
+ rb_scan_args(argc, argv, "1:", &string, &keywords);
201
+
202
+ extract_options(options, Qnil, keywords);
203
+ input_load_string(input, string);
204
+ }
205
+
206
+ /**
207
+ * Read options for methods that look like (filepath, **options).
208
+ */
209
+ static bool
210
+ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
211
+ VALUE filepath;
212
+ VALUE keywords;
213
+ rb_scan_args(argc, argv, "1:", &filepath, &keywords);
214
+
215
+ extract_options(options, filepath, keywords);
216
+
217
+ if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) {
218
+ pm_options_free(options);
219
+ return false;
220
+ }
221
+
222
+ return true;
223
+ }
224
+
52
225
  /******************************************************************************/
53
226
  /* Serializing the AST */
54
227
  /******************************************************************************/
55
228
 
56
- // Dump the AST corresponding to the given input to a string.
229
+ /**
230
+ * Dump the AST corresponding to the given input to a string.
231
+ */
57
232
  static VALUE
58
- dump_input(pm_string_t *input, const char *filepath) {
233
+ dump_input(pm_string_t *input, const pm_options_t *options) {
59
234
  pm_buffer_t buffer;
60
235
  if (!pm_buffer_init(&buffer)) {
61
236
  rb_raise(rb_eNoMemError, "failed to allocate memory");
62
237
  }
63
238
 
64
239
  pm_parser_t parser;
65
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
240
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
66
241
 
67
242
  pm_node_t *node = pm_parse(&parser);
68
243
  pm_serialize(&parser, node, &buffer);
@@ -75,15 +250,18 @@ dump_input(pm_string_t *input, const char *filepath) {
75
250
  return result;
76
251
  }
77
252
 
78
- // Dump the AST corresponding to the given string to a string.
253
+ /**
254
+ * call-seq:
255
+ * Prism::dump(source, **options) -> String
256
+ *
257
+ * Dump the AST corresponding to the given string to a string. For supported
258
+ * options, see Prism::parse.
259
+ */
79
260
  static VALUE
80
261
  dump(int argc, VALUE *argv, VALUE self) {
81
- VALUE string;
82
- VALUE filepath;
83
- rb_scan_args(argc, argv, "11", &string, &filepath);
84
-
85
262
  pm_string_t input;
86
- input_load_string(&input, string);
263
+ pm_options_t options = { 0 };
264
+ string_options(argc, argv, &input, &options);
87
265
 
88
266
  #ifdef PRISM_DEBUG_MODE_BUILD
89
267
  size_t length = pm_string_length(&input);
@@ -92,25 +270,34 @@ dump(int argc, VALUE *argv, VALUE self) {
92
270
  pm_string_constant_init(&input, dup, length);
93
271
  #endif
94
272
 
95
- VALUE value = dump_input(&input, check_string(filepath));
273
+ VALUE value = dump_input(&input, &options);
96
274
 
97
275
  #ifdef PRISM_DEBUG_MODE_BUILD
98
276
  free(dup);
99
277
  #endif
100
278
 
279
+ pm_string_free(&input);
280
+ pm_options_free(&options);
281
+
101
282
  return value;
102
283
  }
103
284
 
104
- // Dump the AST corresponding to the given file to a string.
285
+ /**
286
+ * call-seq:
287
+ * Prism::dump_file(filepath, **options) -> String
288
+ *
289
+ * Dump the AST corresponding to the given file to a string. For supported
290
+ * options, see Prism::parse.
291
+ */
105
292
  static VALUE
106
- dump_file(VALUE self, VALUE filepath) {
293
+ dump_file(int argc, VALUE *argv, VALUE self) {
107
294
  pm_string_t input;
295
+ pm_options_t options = { 0 };
296
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
108
297
 
109
- const char *checked = check_string(filepath);
110
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
111
-
112
- VALUE value = dump_input(&input, checked);
298
+ VALUE value = dump_input(&input, &options);
113
299
  pm_string_free(&input);
300
+ pm_options_free(&options);
114
301
 
115
302
  return value;
116
303
  }
@@ -119,7 +306,9 @@ dump_file(VALUE self, VALUE filepath) {
119
306
  /* Extracting values for the parse result */
120
307
  /******************************************************************************/
121
308
 
122
- // Extract the comments out of the parser into an array.
309
+ /**
310
+ * Extract the comments out of the parser into an array.
311
+ */
123
312
  static VALUE
124
313
  parser_comments(pm_parser_t *parser, VALUE source) {
125
314
  VALUE comments = rb_ary_new();
@@ -134,27 +323,26 @@ parser_comments(pm_parser_t *parser, VALUE source) {
134
323
  VALUE type;
135
324
  switch (comment->type) {
136
325
  case PM_COMMENT_INLINE:
137
- type = ID2SYM(rb_intern("inline"));
326
+ type = rb_cPrismInlineComment;
138
327
  break;
139
328
  case PM_COMMENT_EMBDOC:
140
- type = ID2SYM(rb_intern("embdoc"));
329
+ type = rb_cPrismEmbDocComment;
141
330
  break;
142
331
  case PM_COMMENT___END__:
143
- type = ID2SYM(rb_intern("__END__"));
144
- break;
145
- default:
146
- type = ID2SYM(rb_intern("inline"));
332
+ type = rb_cPrismDATAComment;
147
333
  break;
148
334
  }
149
335
 
150
- VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
151
- rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cPrismComment));
336
+ VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
337
+ rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
152
338
  }
153
339
 
154
340
  return comments;
155
341
  }
156
342
 
157
- // Extract the magic comments out of the parser into an array.
343
+ /**
344
+ * Extract the magic comments out of the parser into an array.
345
+ */
158
346
  static VALUE
159
347
  parser_magic_comments(pm_parser_t *parser, VALUE source) {
160
348
  VALUE magic_comments = rb_ary_new();
@@ -183,7 +371,9 @@ parser_magic_comments(pm_parser_t *parser, VALUE source) {
183
371
  return magic_comments;
184
372
  }
185
373
 
186
- // Extract the errors out of the parser into an array.
374
+ /**
375
+ * Extract the errors out of the parser into an array.
376
+ */
187
377
  static VALUE
188
378
  parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
189
379
  VALUE errors = rb_ary_new();
@@ -207,7 +397,9 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
207
397
  return errors;
208
398
  }
209
399
 
210
- // Extract the warnings out of the parser into an array.
400
+ /**
401
+ * Extract the warnings out of the parser into an array.
402
+ */
211
403
  static VALUE
212
404
  parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
213
405
  VALUE warnings = rb_ary_new();
@@ -235,18 +427,22 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
235
427
  /* Lexing Ruby code */
236
428
  /******************************************************************************/
237
429
 
238
- // This struct gets stored in the parser and passed in to the lex callback any
239
- // time a new token is found. We use it to store the necessary information to
240
- // initialize a Token instance.
430
+ /**
431
+ * This struct gets stored in the parser and passed in to the lex callback any
432
+ * time a new token is found. We use it to store the necessary information to
433
+ * initialize a Token instance.
434
+ */
241
435
  typedef struct {
242
436
  VALUE source;
243
437
  VALUE tokens;
244
438
  rb_encoding *encoding;
245
439
  } parse_lex_data_t;
246
440
 
247
- // This is passed as a callback to the parser. It gets called every time a new
248
- // token is found. Once found, we initialize a new instance of Token and push it
249
- // onto the tokens array.
441
+ /**
442
+ * This is passed as a callback to the parser. It gets called every time a new
443
+ * token is found. Once found, we initialize a new instance of Token and push it
444
+ * onto the tokens array.
445
+ */
250
446
  static void
251
447
  parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
252
448
  parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
@@ -258,9 +454,11 @@ parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
258
454
  rb_ary_push(parse_lex_data->tokens, yields);
259
455
  }
260
456
 
261
- // This is called whenever the encoding changes based on the magic comment at
262
- // the top of the file. We use it to update the encoding that we are using to
263
- // create tokens.
457
+ /**
458
+ * This is called whenever the encoding changes based on the magic comment at
459
+ * the top of the file. We use it to update the encoding that we are using to
460
+ * create tokens.
461
+ */
264
462
  static void
265
463
  parse_lex_encoding_changed_callback(pm_parser_t *parser) {
266
464
  parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
@@ -281,17 +479,19 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) {
281
479
  }
282
480
  }
283
481
 
284
- // Parse the given input and return a ParseResult containing just the tokens or
285
- // the nodes and tokens.
482
+ /**
483
+ * Parse the given input and return a ParseResult containing just the tokens or
484
+ * the nodes and tokens.
485
+ */
286
486
  static VALUE
287
- parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
487
+ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) {
288
488
  pm_parser_t parser;
289
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
489
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
290
490
  pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
291
491
 
292
492
  VALUE offsets = rb_ary_new();
293
- VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), offsets };
294
- VALUE source = rb_class_new_instance(2, source_argv, rb_cPrismSource);
493
+ VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
494
+ VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
295
495
 
296
496
  parse_lex_data_t parse_lex_data = {
297
497
  .source = source,
@@ -338,29 +538,42 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
338
538
  return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
339
539
  }
340
540
 
341
- // Return an array of tokens corresponding to the given string.
541
+ /**
542
+ * call-seq:
543
+ * Prism::lex(source, **options) -> Array
544
+ *
545
+ * Return an array of Token instances corresponding to the given string. For
546
+ * supported options, see Prism::parse.
547
+ */
342
548
  static VALUE
343
549
  lex(int argc, VALUE *argv, VALUE self) {
344
- VALUE string;
345
- VALUE filepath;
346
- rb_scan_args(argc, argv, "11", &string, &filepath);
347
-
348
550
  pm_string_t input;
349
- input_load_string(&input, string);
551
+ pm_options_t options = { 0 };
552
+ string_options(argc, argv, &input, &options);
350
553
 
351
- return parse_lex_input(&input, check_string(filepath), false);
554
+ VALUE result = parse_lex_input(&input, &options, false);
555
+ pm_string_free(&input);
556
+ pm_options_free(&options);
557
+
558
+ return result;
352
559
  }
353
560
 
354
- // Return an array of tokens corresponding to the given file.
561
+ /**
562
+ * call-seq:
563
+ * Prism::lex_file(filepath, **options) -> Array
564
+ *
565
+ * Return an array of Token instances corresponding to the given file. For
566
+ * supported options, see Prism::parse.
567
+ */
355
568
  static VALUE
356
- lex_file(VALUE self, VALUE filepath) {
569
+ lex_file(int argc, VALUE *argv, VALUE self) {
357
570
  pm_string_t input;
571
+ pm_options_t options = { 0 };
572
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
358
573
 
359
- const char *checked = check_string(filepath);
360
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
361
-
362
- VALUE value = parse_lex_input(&input, checked, false);
574
+ VALUE value = parse_lex_input(&input, &options, false);
363
575
  pm_string_free(&input);
576
+ pm_options_free(&options);
364
577
 
365
578
  return value;
366
579
  }
@@ -369,11 +582,13 @@ lex_file(VALUE self, VALUE filepath) {
369
582
  /* Parsing Ruby code */
370
583
  /******************************************************************************/
371
584
 
372
- // Parse the given input and return a ParseResult instance.
585
+ /**
586
+ * Parse the given input and return a ParseResult instance.
587
+ */
373
588
  static VALUE
374
- parse_input(pm_string_t *input, const char *filepath) {
589
+ parse_input(pm_string_t *input, const pm_options_t *options) {
375
590
  pm_parser_t parser;
376
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
591
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
377
592
 
378
593
  pm_node_t *node = pm_parse(&parser);
379
594
  rb_encoding *encoding = rb_enc_find(parser.encoding.name);
@@ -396,15 +611,31 @@ parse_input(pm_string_t *input, const char *filepath) {
396
611
  return result;
397
612
  }
398
613
 
399
- // Parse the given string and return a ParseResult instance.
614
+ /**
615
+ * call-seq:
616
+ * Prism::parse(source, **options) -> ParseResult
617
+ *
618
+ * Parse the given string and return a ParseResult instance. The options that
619
+ * are supported are:
620
+ *
621
+ * * `filepath` - the filepath of the source being parsed. This should be a
622
+ * string or nil
623
+ * * `encoding` - the encoding of the source being parsed. This should be an
624
+ * encoding or nil
625
+ * * `line` - the line number that the parse starts on. This should be an
626
+ * integer or nil. Note that this is 1-indexed.
627
+ * * `frozen_string_literal` - whether or not the frozen string literal pragma
628
+ * has been set. This should be a boolean or nil.
629
+ * * `verbose` - the current level of verbosity. This controls whether or not
630
+ * the parser emits warnings. This should be a boolean or nil.
631
+ * * `scopes` - the locals that are in scope surrounding the code that is being
632
+ * parsed. This should be an array of arrays of symbols or nil.
633
+ */
400
634
  static VALUE
401
635
  parse(int argc, VALUE *argv, VALUE self) {
402
- VALUE string;
403
- VALUE filepath;
404
- rb_scan_args(argc, argv, "11", &string, &filepath);
405
-
406
636
  pm_string_t input;
407
- input_load_string(&input, string);
637
+ pm_options_t options = { 0 };
638
+ string_options(argc, argv, &input, &options);
408
639
 
409
640
  #ifdef PRISM_DEBUG_MODE_BUILD
410
641
  size_t length = pm_string_length(&input);
@@ -413,55 +644,147 @@ parse(int argc, VALUE *argv, VALUE self) {
413
644
  pm_string_constant_init(&input, dup, length);
414
645
  #endif
415
646
 
416
- VALUE value = parse_input(&input, check_string(filepath));
647
+ VALUE value = parse_input(&input, &options);
417
648
 
418
649
  #ifdef PRISM_DEBUG_MODE_BUILD
419
650
  free(dup);
420
651
  #endif
421
652
 
653
+ pm_string_free(&input);
654
+ pm_options_free(&options);
422
655
  return value;
423
656
  }
424
657
 
425
- // Parse the given file and return a ParseResult instance.
658
+ /**
659
+ * call-seq:
660
+ * Prism::parse_file(filepath, **options) -> ParseResult
661
+ *
662
+ * Parse the given file and return a ParseResult instance. For supported
663
+ * options, see Prism::parse.
664
+ */
426
665
  static VALUE
427
- parse_file(VALUE self, VALUE filepath) {
666
+ parse_file(int argc, VALUE *argv, VALUE self) {
428
667
  pm_string_t input;
668
+ pm_options_t options = { 0 };
669
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
429
670
 
430
- const char *checked = check_string(filepath);
431
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
432
-
433
- VALUE value = parse_input(&input, checked);
671
+ VALUE value = parse_input(&input, &options);
434
672
  pm_string_free(&input);
673
+ pm_options_free(&options);
435
674
 
436
675
  return value;
437
676
  }
438
677
 
439
- // Parse the given string and return a ParseResult instance.
678
+ /**
679
+ * Parse the given input and return an array of Comment objects.
680
+ */
440
681
  static VALUE
441
- parse_lex(int argc, VALUE *argv, VALUE self) {
442
- VALUE string;
443
- VALUE filepath;
444
- rb_scan_args(argc, argv, "11", &string, &filepath);
682
+ parse_input_comments(pm_string_t *input, const pm_options_t *options) {
683
+ pm_parser_t parser;
684
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
445
685
 
686
+ pm_node_t *node = pm_parse(&parser);
687
+ rb_encoding *encoding = rb_enc_find(parser.encoding.name);
688
+
689
+ VALUE source = pm_source_new(&parser, encoding);
690
+ VALUE comments = parser_comments(&parser, source);
691
+
692
+ pm_node_destroy(&parser, node);
693
+ pm_parser_free(&parser);
694
+
695
+ return comments;
696
+ }
697
+
698
+ /**
699
+ * call-seq:
700
+ * Prism::parse_comments(source, **options) -> Array
701
+ *
702
+ * Parse the given string and return an array of Comment objects. For supported
703
+ * options, see Prism::parse.
704
+ */
705
+ static VALUE
706
+ parse_comments(int argc, VALUE *argv, VALUE self) {
446
707
  pm_string_t input;
447
- input_load_string(&input, string);
708
+ pm_options_t options = { 0 };
709
+ string_options(argc, argv, &input, &options);
448
710
 
449
- VALUE value = parse_lex_input(&input, check_string(filepath), true);
711
+ VALUE result = parse_input_comments(&input, &options);
450
712
  pm_string_free(&input);
713
+ pm_options_free(&options);
714
+
715
+ return result;
716
+ }
717
+
718
+ /**
719
+ * call-seq:
720
+ * Prism::parse_file_comments(filepath, **options) -> Array
721
+ *
722
+ * Parse the given file and return an array of Comment objects. For supported
723
+ * options, see Prism::parse.
724
+ */
725
+ static VALUE
726
+ parse_file_comments(int argc, VALUE *argv, VALUE self) {
727
+ pm_string_t input;
728
+ pm_options_t options = { 0 };
729
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
730
+
731
+ VALUE value = parse_input_comments(&input, &options);
732
+ pm_string_free(&input);
733
+ pm_options_free(&options);
451
734
 
452
735
  return value;
453
736
  }
454
737
 
455
- // Parse and lex the given file and return a ParseResult instance.
738
+ /**
739
+ * call-seq:
740
+ * Prism::parse_lex(source, **options) -> ParseResult
741
+ *
742
+ * Parse the given string and return a ParseResult instance that contains a
743
+ * 2-element array, where the first element is the AST and the second element is
744
+ * an array of Token instances.
745
+ *
746
+ * This API is only meant to be used in the case where you need both the AST and
747
+ * the tokens. If you only need one or the other, use either Prism::parse or
748
+ * Prism::lex.
749
+ *
750
+ * For supported options, see Prism::parse.
751
+ */
456
752
  static VALUE
457
- parse_lex_file(VALUE self, VALUE filepath) {
753
+ parse_lex(int argc, VALUE *argv, VALUE self) {
458
754
  pm_string_t input;
755
+ pm_options_t options = { 0 };
756
+ string_options(argc, argv, &input, &options);
459
757
 
460
- const char *checked = check_string(filepath);
461
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
758
+ VALUE value = parse_lex_input(&input, &options, true);
759
+ pm_string_free(&input);
760
+ pm_options_free(&options);
462
761
 
463
- VALUE value = parse_lex_input(&input, checked, true);
762
+ return value;
763
+ }
764
+
765
+ /**
766
+ * call-seq:
767
+ * Prism::parse_lex_file(filepath, **options) -> ParseResult
768
+ *
769
+ * Parse the given file and return a ParseResult instance that contains a
770
+ * 2-element array, where the first element is the AST and the second element is
771
+ * an array of Token instances.
772
+ *
773
+ * This API is only meant to be used in the case where you need both the AST and
774
+ * the tokens. If you only need one or the other, use either Prism::parse_file
775
+ * or Prism::lex_file.
776
+ *
777
+ * For supported options, see Prism::parse.
778
+ */
779
+ static VALUE
780
+ parse_lex_file(int argc, VALUE *argv, VALUE self) {
781
+ pm_string_t input;
782
+ pm_options_t options = { 0 };
783
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
784
+
785
+ VALUE value = parse_lex_input(&input, &options, true);
464
786
  pm_string_free(&input);
787
+ pm_options_free(&options);
465
788
 
466
789
  return value;
467
790
  }
@@ -470,13 +793,17 @@ parse_lex_file(VALUE self, VALUE filepath) {
470
793
  /* Utility functions exposed to make testing easier */
471
794
  /******************************************************************************/
472
795
 
473
- // Returns an array of strings corresponding to the named capture groups in the
474
- // given source string. If prism was unable to parse the regular expression, this
475
- // function returns nil.
796
+ /**
797
+ * call-seq:
798
+ * Debug::named_captures(source) -> Array
799
+ *
800
+ * Returns an array of strings corresponding to the named capture groups in the
801
+ * given source string. If prism was unable to parse the regular expression,
802
+ * this function returns nil.
803
+ */
476
804
  static VALUE
477
805
  named_captures(VALUE self, VALUE source) {
478
- pm_string_list_t string_list;
479
- pm_string_list_init(&string_list);
806
+ pm_string_list_t string_list = { 0 };
480
807
 
481
808
  if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &pm_encoding_utf_8)) {
482
809
  pm_string_list_free(&string_list);
@@ -493,7 +820,12 @@ named_captures(VALUE self, VALUE source) {
493
820
  return names;
494
821
  }
495
822
 
496
- // Return a hash of information about the given source string's memory usage.
823
+ /**
824
+ * call-seq:
825
+ * Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx }
826
+ *
827
+ * Return a hash of information about the given source string's memory usage.
828
+ */
497
829
  static VALUE
498
830
  memsize(VALUE self, VALUE string) {
499
831
  pm_parser_t parser;
@@ -514,8 +846,13 @@ memsize(VALUE self, VALUE string) {
514
846
  return result;
515
847
  }
516
848
 
517
- // Parse the file, but do nothing with the result. This is used to profile the
518
- // parser for memory and speed.
849
+ /**
850
+ * call-seq:
851
+ * Debug::profile_file(filepath) -> nil
852
+ *
853
+ * Parse the file, but do nothing with the result. This is used to profile the
854
+ * parser for memory and speed.
855
+ */
519
856
  static VALUE
520
857
  profile_file(VALUE self, VALUE filepath) {
521
858
  pm_string_t input;
@@ -523,41 +860,58 @@ profile_file(VALUE self, VALUE filepath) {
523
860
  const char *checked = check_string(filepath);
524
861
  if (!pm_string_mapped_init(&input, checked)) return Qnil;
525
862
 
863
+ pm_options_t options = { 0 };
864
+ pm_options_filepath_set(&options, checked);
865
+
526
866
  pm_parser_t parser;
527
- pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), checked);
867
+ pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
528
868
 
529
869
  pm_node_t *node = pm_parse(&parser);
530
870
  pm_node_destroy(&parser, node);
531
871
  pm_parser_free(&parser);
532
-
872
+ pm_options_free(&options);
533
873
  pm_string_free(&input);
534
874
 
535
875
  return Qnil;
536
876
  }
537
877
 
538
- // Parse the file and serialize the result. This is mostly used to test this
539
- // path since it is used by client libraries.
878
+ /**
879
+ * call-seq:
880
+ * Debug::inspect_node(source) -> inspected
881
+ *
882
+ * Inspect the AST that represents the given source using the prism pretty print
883
+ * as opposed to the Ruby implementation.
884
+ */
540
885
  static VALUE
541
- parse_serialize_file_metadata(VALUE self, VALUE filepath, VALUE metadata) {
886
+ inspect_node(VALUE self, VALUE source) {
542
887
  pm_string_t input;
543
- pm_buffer_t buffer;
544
- pm_buffer_init(&buffer);
888
+ input_load_string(&input, source);
545
889
 
546
- const char *checked = check_string(filepath);
547
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
890
+ pm_parser_t parser;
891
+ pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL);
548
892
 
549
- pm_parse_serialize(pm_string_source(&input), pm_string_length(&input), &buffer, check_string(metadata));
550
- VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
893
+ pm_node_t *node = pm_parse(&parser);
894
+ pm_buffer_t buffer = { 0 };
895
+
896
+ pm_prettyprint(&buffer, &parser, node);
897
+
898
+ rb_encoding *encoding = rb_enc_find(parser.encoding.name);
899
+ VALUE string = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
551
900
 
552
- pm_string_free(&input);
553
901
  pm_buffer_free(&buffer);
554
- return result;
902
+ pm_node_destroy(&parser, node);
903
+ pm_parser_free(&parser);
904
+
905
+ return string;
555
906
  }
556
907
 
557
908
  /******************************************************************************/
558
909
  /* Initialization of the extension */
559
910
  /******************************************************************************/
560
911
 
912
+ /**
913
+ * The init function that Ruby calls when loading this extension.
914
+ */
561
915
  RUBY_FUNC_EXPORTED void
562
916
  Init_prism(void) {
563
917
  // Make sure that the prism library version matches the expected version.
@@ -579,25 +933,46 @@ Init_prism(void) {
579
933
  rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
580
934
  rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
581
935
  rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
936
+ rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment);
937
+ rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment);
938
+ rb_cPrismDATAComment = rb_define_class_under(rb_cPrism, "DATAComment", rb_cPrismComment);
582
939
  rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
583
940
  rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
584
941
  rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
585
942
  rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
586
943
 
587
- // Define the version string here so that we can use the constants defined
588
- // in prism.h.
944
+ // Intern all of the options that we support so that we don't have to do it
945
+ // every time we parse.
946
+ rb_option_id_filepath = rb_intern_const("filepath");
947
+ rb_option_id_encoding = rb_intern_const("encoding");
948
+ rb_option_id_line = rb_intern_const("line");
949
+ rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
950
+ rb_option_id_verbose = rb_intern_const("verbose");
951
+ rb_option_id_scopes = rb_intern_const("scopes");
952
+
953
+ /**
954
+ * The version of the prism library.
955
+ */
589
956
  rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
590
- rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CExtension")));
957
+
958
+ /**
959
+ * The backend of the parser that prism is using to parse Ruby code. This
960
+ * can be either :CEXT or :FFI. On runtimes that support C extensions, we
961
+ * default to :CEXT. Otherwise we use :FFI.
962
+ */
963
+ rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CEXT")));
591
964
 
592
965
  // First, the functions that have to do with lexing and parsing.
593
966
  rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
594
- rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, 1);
967
+ rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
595
968
  rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
596
- rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, 1);
969
+ rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
597
970
  rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
598
- rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, 1);
971
+ rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
972
+ rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
973
+ rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
599
974
  rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
600
- rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, 1);
975
+ rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1);
601
976
 
602
977
  // Next, the functions that will be called by the parser to perform various
603
978
  // internal tasks. We expose these to make them easier to test.
@@ -605,7 +980,7 @@ Init_prism(void) {
605
980
  rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
606
981
  rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
607
982
  rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
608
- rb_define_singleton_method(rb_cPrismDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
983
+ rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
609
984
 
610
985
  // Next, initialize the other APIs.
611
986
  Init_prism_api_node();