prism 0.15.1 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -1
  3. data/Makefile +12 -0
  4. data/README.md +3 -1
  5. data/config.yml +66 -50
  6. data/docs/configuration.md +2 -0
  7. data/docs/fuzzing.md +1 -1
  8. data/docs/javascript.md +90 -0
  9. data/docs/releasing.md +27 -0
  10. data/docs/ruby_api.md +2 -0
  11. data/docs/serialization.md +28 -29
  12. data/ext/prism/api_node.c +856 -826
  13. data/ext/prism/api_pack.c +20 -9
  14. data/ext/prism/extension.c +494 -119
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +3157 -747
  17. data/include/prism/defines.h +40 -8
  18. data/include/prism/diagnostic.h +36 -3
  19. data/include/prism/enc/pm_encoding.h +119 -28
  20. data/include/prism/node.h +38 -30
  21. data/include/prism/options.h +204 -0
  22. data/include/prism/pack.h +44 -33
  23. data/include/prism/parser.h +445 -199
  24. data/include/prism/prettyprint.h +26 -0
  25. data/include/prism/regexp.h +16 -2
  26. data/include/prism/util/pm_buffer.h +102 -18
  27. data/include/prism/util/pm_char.h +162 -48
  28. data/include/prism/util/pm_constant_pool.h +128 -34
  29. data/include/prism/util/pm_list.h +68 -38
  30. data/include/prism/util/pm_memchr.h +18 -3
  31. data/include/prism/util/pm_newline_list.h +71 -28
  32. data/include/prism/util/pm_state_stack.h +25 -7
  33. data/include/prism/util/pm_string.h +115 -27
  34. data/include/prism/util/pm_string_list.h +25 -6
  35. data/include/prism/util/pm_strncasecmp.h +32 -0
  36. data/include/prism/util/pm_strpbrk.h +31 -17
  37. data/include/prism/version.h +28 -3
  38. data/include/prism.h +229 -36
  39. data/lib/prism/compiler.rb +5 -5
  40. data/lib/prism/debug.rb +43 -13
  41. data/lib/prism/desugar_compiler.rb +1 -1
  42. data/lib/prism/dispatcher.rb +27 -26
  43. data/lib/prism/dsl.rb +16 -16
  44. data/lib/prism/ffi.rb +138 -61
  45. data/lib/prism/lex_compat.rb +26 -16
  46. data/lib/prism/mutation_compiler.rb +11 -11
  47. data/lib/prism/node.rb +426 -227
  48. data/lib/prism/node_ext.rb +23 -16
  49. data/lib/prism/node_inspector.rb +1 -1
  50. data/lib/prism/pack.rb +79 -40
  51. data/lib/prism/parse_result/comments.rb +7 -2
  52. data/lib/prism/parse_result/newlines.rb +4 -0
  53. data/lib/prism/parse_result.rb +157 -21
  54. data/lib/prism/pattern.rb +14 -3
  55. data/lib/prism/ripper_compat.rb +28 -10
  56. data/lib/prism/serialize.rb +935 -307
  57. data/lib/prism/visitor.rb +9 -5
  58. data/lib/prism.rb +20 -2
  59. data/prism.gemspec +11 -2
  60. data/rbi/prism.rbi +7305 -0
  61. data/rbi/prism_static.rbi +196 -0
  62. data/sig/prism.rbs +4468 -0
  63. data/sig/prism_static.rbs +123 -0
  64. data/src/diagnostic.c +56 -53
  65. data/src/enc/pm_big5.c +1 -0
  66. data/src/enc/pm_euc_jp.c +1 -0
  67. data/src/enc/pm_gbk.c +1 -0
  68. data/src/enc/pm_shift_jis.c +1 -0
  69. data/src/enc/pm_tables.c +316 -80
  70. data/src/enc/pm_unicode.c +54 -9
  71. data/src/enc/pm_windows_31j.c +1 -0
  72. data/src/node.c +357 -345
  73. data/src/options.c +170 -0
  74. data/src/prettyprint.c +7697 -1643
  75. data/src/prism.c +1964 -1125
  76. data/src/regexp.c +153 -95
  77. data/src/serialize.c +432 -397
  78. data/src/token_type.c +3 -1
  79. data/src/util/pm_buffer.c +88 -23
  80. data/src/util/pm_char.c +103 -57
  81. data/src/util/pm_constant_pool.c +52 -22
  82. data/src/util/pm_list.c +12 -4
  83. data/src/util/pm_memchr.c +5 -3
  84. data/src/util/pm_newline_list.c +25 -63
  85. data/src/util/pm_state_stack.c +9 -3
  86. data/src/util/pm_string.c +95 -85
  87. data/src/util/pm_string_list.c +14 -15
  88. data/src/util/pm_strncasecmp.c +10 -3
  89. data/src/util/pm_strpbrk.c +25 -19
  90. metadata +12 -3
  91. data/docs/prism.png +0 -0
@@ -1,7 +1,7 @@
1
1
  #include "prism/extension.h"
2
2
 
3
- // NOTE: this file should contain only bindings.
4
- // All non-trivial logic should be in librubyparser so it can be shared its the various callers.
3
+ // NOTE: this file should contain only bindings. All non-trivial logic should be
4
+ // in librubyparser so it can be shared its the various callers.
5
5
 
6
6
  VALUE rb_cPrism;
7
7
  VALUE rb_cPrismNode;
@@ -10,18 +10,30 @@ VALUE rb_cPrismToken;
10
10
  VALUE rb_cPrismLocation;
11
11
 
12
12
  VALUE rb_cPrismComment;
13
+ VALUE rb_cPrismInlineComment;
14
+ VALUE rb_cPrismEmbDocComment;
15
+ VALUE rb_cPrismDATAComment;
13
16
  VALUE rb_cPrismMagicComment;
14
17
  VALUE rb_cPrismParseError;
15
18
  VALUE rb_cPrismParseWarning;
16
19
  VALUE rb_cPrismParseResult;
17
20
 
21
+ ID rb_option_id_filepath;
22
+ ID rb_option_id_encoding;
23
+ ID rb_option_id_line;
24
+ ID rb_option_id_frozen_string_literal;
25
+ ID rb_option_id_verbose;
26
+ ID rb_option_id_scopes;
27
+
18
28
  /******************************************************************************/
19
29
  /* IO of Ruby code */
20
30
  /******************************************************************************/
21
31
 
22
- // Check if the given VALUE is a string. If it's nil, then return NULL. If it's
23
- // not a string, then raise a type error. Otherwise return the VALUE as a C
24
- // string.
32
+ /**
33
+ * Check if the given VALUE is a string. If it's nil, then return NULL. If it's
34
+ * not a string, then raise a type error. Otherwise return the VALUE as a C
35
+ * string.
36
+ */
25
37
  static const char *
26
38
  check_string(VALUE value) {
27
39
  // If the value is nil, then we don't need to do anything.
@@ -38,7 +50,9 @@ check_string(VALUE value) {
38
50
  return RSTRING_PTR(value);
39
51
  }
40
52
 
41
- // Load the contents and size of the given string into the given pm_string_t.
53
+ /**
54
+ * Load the contents and size of the given string into the given pm_string_t.
55
+ */
42
56
  static void
43
57
  input_load_string(pm_string_t *input, VALUE string) {
44
58
  // Check if the string is a string. If it's not, then raise a type error.
@@ -49,20 +63,181 @@ input_load_string(pm_string_t *input, VALUE string) {
49
63
  pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
50
64
  }
51
65
 
66
+ /******************************************************************************/
67
+ /* Building C options from Ruby options */
68
+ /******************************************************************************/
69
+
70
+ /**
71
+ * Build the scopes associated with the provided Ruby keyword value.
72
+ */
73
+ static void
74
+ build_options_scopes(pm_options_t *options, VALUE scopes) {
75
+ // Check if the value is an array. If it's not, then raise a type error.
76
+ if (!RB_TYPE_P(scopes, T_ARRAY)) {
77
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scopes));
78
+ }
79
+
80
+ // Initialize the scopes array.
81
+ size_t scopes_count = RARRAY_LEN(scopes);
82
+ pm_options_scopes_init(options, scopes_count);
83
+
84
+ // Iterate over the scopes and add them to the options.
85
+ for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
86
+ VALUE scope = rb_ary_entry(scopes, scope_index);
87
+
88
+ // Check that the scope is an array. If it's not, then raise a type
89
+ // error.
90
+ if (!RB_TYPE_P(scope, T_ARRAY)) {
91
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scope));
92
+ }
93
+
94
+ // Initialize the scope array.
95
+ size_t locals_count = RARRAY_LEN(scope);
96
+ pm_options_scope_t *options_scope = &options->scopes[scope_index];
97
+ pm_options_scope_init(options_scope, locals_count);
98
+
99
+ // Iterate over the locals and add them to the scope.
100
+ for (size_t local_index = 0; local_index < locals_count; local_index++) {
101
+ VALUE local = rb_ary_entry(scope, local_index);
102
+
103
+ // Check that the local is a symbol. If it's not, then raise a
104
+ // type error.
105
+ if (!RB_TYPE_P(local, T_SYMBOL)) {
106
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(local));
107
+ }
108
+
109
+ // Add the local to the scope.
110
+ pm_string_t *scope_local = &options_scope->locals[local_index];
111
+ const char *name = rb_id2name(SYM2ID(local));
112
+ pm_string_constant_init(scope_local, name, strlen(name));
113
+ }
114
+ }
115
+ }
116
+
117
+ /**
118
+ * An iterator function that is called for each key-value in the keywords hash.
119
+ */
120
+ static int
121
+ build_options_i(VALUE key, VALUE value, VALUE argument) {
122
+ pm_options_t *options = (pm_options_t *) argument;
123
+ ID key_id = SYM2ID(key);
124
+
125
+ if (key_id == rb_option_id_filepath) {
126
+ if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
127
+ } else if (key_id == rb_option_id_encoding) {
128
+ if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
129
+ } else if (key_id == rb_option_id_line) {
130
+ if (!NIL_P(value)) pm_options_line_set(options, NUM2UINT(value));
131
+ } else if (key_id == rb_option_id_frozen_string_literal) {
132
+ if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
133
+ } else if (key_id == rb_option_id_verbose) {
134
+ pm_options_suppress_warnings_set(options, value != Qtrue);
135
+ } else if (key_id == rb_option_id_scopes) {
136
+ if (!NIL_P(value)) build_options_scopes(options, value);
137
+ } else {
138
+ rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key);
139
+ }
140
+
141
+ return ST_CONTINUE;
142
+ }
143
+
144
+ /**
145
+ * We need a struct here to pass through rb_protect and it has to be a single
146
+ * value. Because the sizeof(VALUE) == sizeof(void *), we're going to pass this
147
+ * through as an opaque pointer and cast it on both sides.
148
+ */
149
+ struct build_options_data {
150
+ pm_options_t *options;
151
+ VALUE keywords;
152
+ };
153
+
154
+ /**
155
+ * Build the set of options from the given keywords. Note that this can raise a
156
+ * Ruby error if the options are not valid.
157
+ */
158
+ static VALUE
159
+ build_options(VALUE argument) {
160
+ struct build_options_data *data = (struct build_options_data *) argument;
161
+ rb_hash_foreach(data->keywords, build_options_i, (VALUE) data->options);
162
+ return Qnil;
163
+ }
164
+
165
+ /**
166
+ * Extract the options from the given keyword arguments.
167
+ */
168
+ static void
169
+ extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
170
+ if (!NIL_P(keywords)) {
171
+ struct build_options_data data = { .options = options, .keywords = keywords };
172
+ struct build_options_data *argument = &data;
173
+
174
+ int state = 0;
175
+ rb_protect(build_options, (VALUE) argument, &state);
176
+
177
+ if (state != 0) {
178
+ pm_options_free(options);
179
+ rb_jump_tag(state);
180
+ }
181
+ }
182
+
183
+ if (!NIL_P(filepath)) {
184
+ if (!RB_TYPE_P(filepath, T_STRING)) {
185
+ pm_options_free(options);
186
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
187
+ }
188
+
189
+ pm_options_filepath_set(options, RSTRING_PTR(filepath));
190
+ }
191
+ }
192
+
193
+ /**
194
+ * Read options for methods that look like (source, **options).
195
+ */
196
+ static void
197
+ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
198
+ VALUE string;
199
+ VALUE keywords;
200
+ rb_scan_args(argc, argv, "1:", &string, &keywords);
201
+
202
+ extract_options(options, Qnil, keywords);
203
+ input_load_string(input, string);
204
+ }
205
+
206
+ /**
207
+ * Read options for methods that look like (filepath, **options).
208
+ */
209
+ static bool
210
+ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
211
+ VALUE filepath;
212
+ VALUE keywords;
213
+ rb_scan_args(argc, argv, "1:", &filepath, &keywords);
214
+
215
+ extract_options(options, filepath, keywords);
216
+
217
+ if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) {
218
+ pm_options_free(options);
219
+ return false;
220
+ }
221
+
222
+ return true;
223
+ }
224
+
52
225
  /******************************************************************************/
53
226
  /* Serializing the AST */
54
227
  /******************************************************************************/
55
228
 
56
- // Dump the AST corresponding to the given input to a string.
229
+ /**
230
+ * Dump the AST corresponding to the given input to a string.
231
+ */
57
232
  static VALUE
58
- dump_input(pm_string_t *input, const char *filepath) {
233
+ dump_input(pm_string_t *input, const pm_options_t *options) {
59
234
  pm_buffer_t buffer;
60
235
  if (!pm_buffer_init(&buffer)) {
61
236
  rb_raise(rb_eNoMemError, "failed to allocate memory");
62
237
  }
63
238
 
64
239
  pm_parser_t parser;
65
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
240
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
66
241
 
67
242
  pm_node_t *node = pm_parse(&parser);
68
243
  pm_serialize(&parser, node, &buffer);
@@ -75,15 +250,18 @@ dump_input(pm_string_t *input, const char *filepath) {
75
250
  return result;
76
251
  }
77
252
 
78
- // Dump the AST corresponding to the given string to a string.
253
+ /**
254
+ * call-seq:
255
+ * Prism::dump(source, **options) -> String
256
+ *
257
+ * Dump the AST corresponding to the given string to a string. For supported
258
+ * options, see Prism::parse.
259
+ */
79
260
  static VALUE
80
261
  dump(int argc, VALUE *argv, VALUE self) {
81
- VALUE string;
82
- VALUE filepath;
83
- rb_scan_args(argc, argv, "11", &string, &filepath);
84
-
85
262
  pm_string_t input;
86
- input_load_string(&input, string);
263
+ pm_options_t options = { 0 };
264
+ string_options(argc, argv, &input, &options);
87
265
 
88
266
  #ifdef PRISM_DEBUG_MODE_BUILD
89
267
  size_t length = pm_string_length(&input);
@@ -92,25 +270,34 @@ dump(int argc, VALUE *argv, VALUE self) {
92
270
  pm_string_constant_init(&input, dup, length);
93
271
  #endif
94
272
 
95
- VALUE value = dump_input(&input, check_string(filepath));
273
+ VALUE value = dump_input(&input, &options);
96
274
 
97
275
  #ifdef PRISM_DEBUG_MODE_BUILD
98
276
  free(dup);
99
277
  #endif
100
278
 
279
+ pm_string_free(&input);
280
+ pm_options_free(&options);
281
+
101
282
  return value;
102
283
  }
103
284
 
104
- // Dump the AST corresponding to the given file to a string.
285
+ /**
286
+ * call-seq:
287
+ * Prism::dump_file(filepath, **options) -> String
288
+ *
289
+ * Dump the AST corresponding to the given file to a string. For supported
290
+ * options, see Prism::parse.
291
+ */
105
292
  static VALUE
106
- dump_file(VALUE self, VALUE filepath) {
293
+ dump_file(int argc, VALUE *argv, VALUE self) {
107
294
  pm_string_t input;
295
+ pm_options_t options = { 0 };
296
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
108
297
 
109
- const char *checked = check_string(filepath);
110
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
111
-
112
- VALUE value = dump_input(&input, checked);
298
+ VALUE value = dump_input(&input, &options);
113
299
  pm_string_free(&input);
300
+ pm_options_free(&options);
114
301
 
115
302
  return value;
116
303
  }
@@ -119,7 +306,9 @@ dump_file(VALUE self, VALUE filepath) {
119
306
  /* Extracting values for the parse result */
120
307
  /******************************************************************************/
121
308
 
122
- // Extract the comments out of the parser into an array.
309
+ /**
310
+ * Extract the comments out of the parser into an array.
311
+ */
123
312
  static VALUE
124
313
  parser_comments(pm_parser_t *parser, VALUE source) {
125
314
  VALUE comments = rb_ary_new();
@@ -134,27 +323,26 @@ parser_comments(pm_parser_t *parser, VALUE source) {
134
323
  VALUE type;
135
324
  switch (comment->type) {
136
325
  case PM_COMMENT_INLINE:
137
- type = ID2SYM(rb_intern("inline"));
326
+ type = rb_cPrismInlineComment;
138
327
  break;
139
328
  case PM_COMMENT_EMBDOC:
140
- type = ID2SYM(rb_intern("embdoc"));
329
+ type = rb_cPrismEmbDocComment;
141
330
  break;
142
331
  case PM_COMMENT___END__:
143
- type = ID2SYM(rb_intern("__END__"));
144
- break;
145
- default:
146
- type = ID2SYM(rb_intern("inline"));
332
+ type = rb_cPrismDATAComment;
147
333
  break;
148
334
  }
149
335
 
150
- VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
151
- rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cPrismComment));
336
+ VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
337
+ rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
152
338
  }
153
339
 
154
340
  return comments;
155
341
  }
156
342
 
157
- // Extract the magic comments out of the parser into an array.
343
+ /**
344
+ * Extract the magic comments out of the parser into an array.
345
+ */
158
346
  static VALUE
159
347
  parser_magic_comments(pm_parser_t *parser, VALUE source) {
160
348
  VALUE magic_comments = rb_ary_new();
@@ -183,7 +371,9 @@ parser_magic_comments(pm_parser_t *parser, VALUE source) {
183
371
  return magic_comments;
184
372
  }
185
373
 
186
- // Extract the errors out of the parser into an array.
374
+ /**
375
+ * Extract the errors out of the parser into an array.
376
+ */
187
377
  static VALUE
188
378
  parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
189
379
  VALUE errors = rb_ary_new();
@@ -207,7 +397,9 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
207
397
  return errors;
208
398
  }
209
399
 
210
- // Extract the warnings out of the parser into an array.
400
+ /**
401
+ * Extract the warnings out of the parser into an array.
402
+ */
211
403
  static VALUE
212
404
  parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
213
405
  VALUE warnings = rb_ary_new();
@@ -235,18 +427,22 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
235
427
  /* Lexing Ruby code */
236
428
  /******************************************************************************/
237
429
 
238
- // This struct gets stored in the parser and passed in to the lex callback any
239
- // time a new token is found. We use it to store the necessary information to
240
- // initialize a Token instance.
430
+ /**
431
+ * This struct gets stored in the parser and passed in to the lex callback any
432
+ * time a new token is found. We use it to store the necessary information to
433
+ * initialize a Token instance.
434
+ */
241
435
  typedef struct {
242
436
  VALUE source;
243
437
  VALUE tokens;
244
438
  rb_encoding *encoding;
245
439
  } parse_lex_data_t;
246
440
 
247
- // This is passed as a callback to the parser. It gets called every time a new
248
- // token is found. Once found, we initialize a new instance of Token and push it
249
- // onto the tokens array.
441
+ /**
442
+ * This is passed as a callback to the parser. It gets called every time a new
443
+ * token is found. Once found, we initialize a new instance of Token and push it
444
+ * onto the tokens array.
445
+ */
250
446
  static void
251
447
  parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
252
448
  parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
@@ -258,9 +454,11 @@ parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
258
454
  rb_ary_push(parse_lex_data->tokens, yields);
259
455
  }
260
456
 
261
- // This is called whenever the encoding changes based on the magic comment at
262
- // the top of the file. We use it to update the encoding that we are using to
263
- // create tokens.
457
+ /**
458
+ * This is called whenever the encoding changes based on the magic comment at
459
+ * the top of the file. We use it to update the encoding that we are using to
460
+ * create tokens.
461
+ */
264
462
  static void
265
463
  parse_lex_encoding_changed_callback(pm_parser_t *parser) {
266
464
  parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
@@ -281,17 +479,19 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) {
281
479
  }
282
480
  }
283
481
 
284
- // Parse the given input and return a ParseResult containing just the tokens or
285
- // the nodes and tokens.
482
+ /**
483
+ * Parse the given input and return a ParseResult containing just the tokens or
484
+ * the nodes and tokens.
485
+ */
286
486
  static VALUE
287
- parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
487
+ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) {
288
488
  pm_parser_t parser;
289
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
489
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
290
490
  pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
291
491
 
292
492
  VALUE offsets = rb_ary_new();
293
- VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), offsets };
294
- VALUE source = rb_class_new_instance(2, source_argv, rb_cPrismSource);
493
+ VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
494
+ VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
295
495
 
296
496
  parse_lex_data_t parse_lex_data = {
297
497
  .source = source,
@@ -338,29 +538,42 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
338
538
  return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
339
539
  }
340
540
 
341
- // Return an array of tokens corresponding to the given string.
541
+ /**
542
+ * call-seq:
543
+ * Prism::lex(source, **options) -> Array
544
+ *
545
+ * Return an array of Token instances corresponding to the given string. For
546
+ * supported options, see Prism::parse.
547
+ */
342
548
  static VALUE
343
549
  lex(int argc, VALUE *argv, VALUE self) {
344
- VALUE string;
345
- VALUE filepath;
346
- rb_scan_args(argc, argv, "11", &string, &filepath);
347
-
348
550
  pm_string_t input;
349
- input_load_string(&input, string);
551
+ pm_options_t options = { 0 };
552
+ string_options(argc, argv, &input, &options);
350
553
 
351
- return parse_lex_input(&input, check_string(filepath), false);
554
+ VALUE result = parse_lex_input(&input, &options, false);
555
+ pm_string_free(&input);
556
+ pm_options_free(&options);
557
+
558
+ return result;
352
559
  }
353
560
 
354
- // Return an array of tokens corresponding to the given file.
561
+ /**
562
+ * call-seq:
563
+ * Prism::lex_file(filepath, **options) -> Array
564
+ *
565
+ * Return an array of Token instances corresponding to the given file. For
566
+ * supported options, see Prism::parse.
567
+ */
355
568
  static VALUE
356
- lex_file(VALUE self, VALUE filepath) {
569
+ lex_file(int argc, VALUE *argv, VALUE self) {
357
570
  pm_string_t input;
571
+ pm_options_t options = { 0 };
572
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
358
573
 
359
- const char *checked = check_string(filepath);
360
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
361
-
362
- VALUE value = parse_lex_input(&input, checked, false);
574
+ VALUE value = parse_lex_input(&input, &options, false);
363
575
  pm_string_free(&input);
576
+ pm_options_free(&options);
364
577
 
365
578
  return value;
366
579
  }
@@ -369,11 +582,13 @@ lex_file(VALUE self, VALUE filepath) {
369
582
  /* Parsing Ruby code */
370
583
  /******************************************************************************/
371
584
 
372
- // Parse the given input and return a ParseResult instance.
585
+ /**
586
+ * Parse the given input and return a ParseResult instance.
587
+ */
373
588
  static VALUE
374
- parse_input(pm_string_t *input, const char *filepath) {
589
+ parse_input(pm_string_t *input, const pm_options_t *options) {
375
590
  pm_parser_t parser;
376
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
591
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
377
592
 
378
593
  pm_node_t *node = pm_parse(&parser);
379
594
  rb_encoding *encoding = rb_enc_find(parser.encoding.name);
@@ -396,15 +611,31 @@ parse_input(pm_string_t *input, const char *filepath) {
396
611
  return result;
397
612
  }
398
613
 
399
- // Parse the given string and return a ParseResult instance.
614
+ /**
615
+ * call-seq:
616
+ * Prism::parse(source, **options) -> ParseResult
617
+ *
618
+ * Parse the given string and return a ParseResult instance. The options that
619
+ * are supported are:
620
+ *
621
+ * * `filepath` - the filepath of the source being parsed. This should be a
622
+ * string or nil
623
+ * * `encoding` - the encoding of the source being parsed. This should be an
624
+ * encoding or nil
625
+ * * `line` - the line number that the parse starts on. This should be an
626
+ * integer or nil. Note that this is 1-indexed.
627
+ * * `frozen_string_literal` - whether or not the frozen string literal pragma
628
+ * has been set. This should be a boolean or nil.
629
+ * * `verbose` - the current level of verbosity. This controls whether or not
630
+ * the parser emits warnings. This should be a boolean or nil.
631
+ * * `scopes` - the locals that are in scope surrounding the code that is being
632
+ * parsed. This should be an array of arrays of symbols or nil.
633
+ */
400
634
  static VALUE
401
635
  parse(int argc, VALUE *argv, VALUE self) {
402
- VALUE string;
403
- VALUE filepath;
404
- rb_scan_args(argc, argv, "11", &string, &filepath);
405
-
406
636
  pm_string_t input;
407
- input_load_string(&input, string);
637
+ pm_options_t options = { 0 };
638
+ string_options(argc, argv, &input, &options);
408
639
 
409
640
  #ifdef PRISM_DEBUG_MODE_BUILD
410
641
  size_t length = pm_string_length(&input);
@@ -413,55 +644,147 @@ parse(int argc, VALUE *argv, VALUE self) {
413
644
  pm_string_constant_init(&input, dup, length);
414
645
  #endif
415
646
 
416
- VALUE value = parse_input(&input, check_string(filepath));
647
+ VALUE value = parse_input(&input, &options);
417
648
 
418
649
  #ifdef PRISM_DEBUG_MODE_BUILD
419
650
  free(dup);
420
651
  #endif
421
652
 
653
+ pm_string_free(&input);
654
+ pm_options_free(&options);
422
655
  return value;
423
656
  }
424
657
 
425
- // Parse the given file and return a ParseResult instance.
658
+ /**
659
+ * call-seq:
660
+ * Prism::parse_file(filepath, **options) -> ParseResult
661
+ *
662
+ * Parse the given file and return a ParseResult instance. For supported
663
+ * options, see Prism::parse.
664
+ */
426
665
  static VALUE
427
- parse_file(VALUE self, VALUE filepath) {
666
+ parse_file(int argc, VALUE *argv, VALUE self) {
428
667
  pm_string_t input;
668
+ pm_options_t options = { 0 };
669
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
429
670
 
430
- const char *checked = check_string(filepath);
431
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
432
-
433
- VALUE value = parse_input(&input, checked);
671
+ VALUE value = parse_input(&input, &options);
434
672
  pm_string_free(&input);
673
+ pm_options_free(&options);
435
674
 
436
675
  return value;
437
676
  }
438
677
 
439
- // Parse the given string and return a ParseResult instance.
678
+ /**
679
+ * Parse the given input and return an array of Comment objects.
680
+ */
440
681
  static VALUE
441
- parse_lex(int argc, VALUE *argv, VALUE self) {
442
- VALUE string;
443
- VALUE filepath;
444
- rb_scan_args(argc, argv, "11", &string, &filepath);
682
+ parse_input_comments(pm_string_t *input, const pm_options_t *options) {
683
+ pm_parser_t parser;
684
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
445
685
 
686
+ pm_node_t *node = pm_parse(&parser);
687
+ rb_encoding *encoding = rb_enc_find(parser.encoding.name);
688
+
689
+ VALUE source = pm_source_new(&parser, encoding);
690
+ VALUE comments = parser_comments(&parser, source);
691
+
692
+ pm_node_destroy(&parser, node);
693
+ pm_parser_free(&parser);
694
+
695
+ return comments;
696
+ }
697
+
698
+ /**
699
+ * call-seq:
700
+ * Prism::parse_comments(source, **options) -> Array
701
+ *
702
+ * Parse the given string and return an array of Comment objects. For supported
703
+ * options, see Prism::parse.
704
+ */
705
+ static VALUE
706
+ parse_comments(int argc, VALUE *argv, VALUE self) {
446
707
  pm_string_t input;
447
- input_load_string(&input, string);
708
+ pm_options_t options = { 0 };
709
+ string_options(argc, argv, &input, &options);
448
710
 
449
- VALUE value = parse_lex_input(&input, check_string(filepath), true);
711
+ VALUE result = parse_input_comments(&input, &options);
450
712
  pm_string_free(&input);
713
+ pm_options_free(&options);
714
+
715
+ return result;
716
+ }
717
+
718
+ /**
719
+ * call-seq:
720
+ * Prism::parse_file_comments(filepath, **options) -> Array
721
+ *
722
+ * Parse the given file and return an array of Comment objects. For supported
723
+ * options, see Prism::parse.
724
+ */
725
+ static VALUE
726
+ parse_file_comments(int argc, VALUE *argv, VALUE self) {
727
+ pm_string_t input;
728
+ pm_options_t options = { 0 };
729
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
730
+
731
+ VALUE value = parse_input_comments(&input, &options);
732
+ pm_string_free(&input);
733
+ pm_options_free(&options);
451
734
 
452
735
  return value;
453
736
  }
454
737
 
455
- // Parse and lex the given file and return a ParseResult instance.
738
+ /**
739
+ * call-seq:
740
+ * Prism::parse_lex(source, **options) -> ParseResult
741
+ *
742
+ * Parse the given string and return a ParseResult instance that contains a
743
+ * 2-element array, where the first element is the AST and the second element is
744
+ * an array of Token instances.
745
+ *
746
+ * This API is only meant to be used in the case where you need both the AST and
747
+ * the tokens. If you only need one or the other, use either Prism::parse or
748
+ * Prism::lex.
749
+ *
750
+ * For supported options, see Prism::parse.
751
+ */
456
752
  static VALUE
457
- parse_lex_file(VALUE self, VALUE filepath) {
753
+ parse_lex(int argc, VALUE *argv, VALUE self) {
458
754
  pm_string_t input;
755
+ pm_options_t options = { 0 };
756
+ string_options(argc, argv, &input, &options);
459
757
 
460
- const char *checked = check_string(filepath);
461
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
758
+ VALUE value = parse_lex_input(&input, &options, true);
759
+ pm_string_free(&input);
760
+ pm_options_free(&options);
462
761
 
463
- VALUE value = parse_lex_input(&input, checked, true);
762
+ return value;
763
+ }
764
+
765
+ /**
766
+ * call-seq:
767
+ * Prism::parse_lex_file(filepath, **options) -> ParseResult
768
+ *
769
+ * Parse the given file and return a ParseResult instance that contains a
770
+ * 2-element array, where the first element is the AST and the second element is
771
+ * an array of Token instances.
772
+ *
773
+ * This API is only meant to be used in the case where you need both the AST and
774
+ * the tokens. If you only need one or the other, use either Prism::parse_file
775
+ * or Prism::lex_file.
776
+ *
777
+ * For supported options, see Prism::parse.
778
+ */
779
+ static VALUE
780
+ parse_lex_file(int argc, VALUE *argv, VALUE self) {
781
+ pm_string_t input;
782
+ pm_options_t options = { 0 };
783
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
784
+
785
+ VALUE value = parse_lex_input(&input, &options, true);
464
786
  pm_string_free(&input);
787
+ pm_options_free(&options);
465
788
 
466
789
  return value;
467
790
  }
@@ -470,13 +793,17 @@ parse_lex_file(VALUE self, VALUE filepath) {
470
793
  /* Utility functions exposed to make testing easier */
471
794
  /******************************************************************************/
472
795
 
473
- // Returns an array of strings corresponding to the named capture groups in the
474
- // given source string. If prism was unable to parse the regular expression, this
475
- // function returns nil.
796
+ /**
797
+ * call-seq:
798
+ * Debug::named_captures(source) -> Array
799
+ *
800
+ * Returns an array of strings corresponding to the named capture groups in the
801
+ * given source string. If prism was unable to parse the regular expression,
802
+ * this function returns nil.
803
+ */
476
804
  static VALUE
477
805
  named_captures(VALUE self, VALUE source) {
478
- pm_string_list_t string_list;
479
- pm_string_list_init(&string_list);
806
+ pm_string_list_t string_list = { 0 };
480
807
 
481
808
  if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &pm_encoding_utf_8)) {
482
809
  pm_string_list_free(&string_list);
@@ -493,7 +820,12 @@ named_captures(VALUE self, VALUE source) {
493
820
  return names;
494
821
  }
495
822
 
496
- // Return a hash of information about the given source string's memory usage.
823
+ /**
824
+ * call-seq:
825
+ * Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx }
826
+ *
827
+ * Return a hash of information about the given source string's memory usage.
828
+ */
497
829
  static VALUE
498
830
  memsize(VALUE self, VALUE string) {
499
831
  pm_parser_t parser;
@@ -514,8 +846,13 @@ memsize(VALUE self, VALUE string) {
514
846
  return result;
515
847
  }
516
848
 
517
- // Parse the file, but do nothing with the result. This is used to profile the
518
- // parser for memory and speed.
849
+ /**
850
+ * call-seq:
851
+ * Debug::profile_file(filepath) -> nil
852
+ *
853
+ * Parse the file, but do nothing with the result. This is used to profile the
854
+ * parser for memory and speed.
855
+ */
519
856
  static VALUE
520
857
  profile_file(VALUE self, VALUE filepath) {
521
858
  pm_string_t input;
@@ -523,41 +860,58 @@ profile_file(VALUE self, VALUE filepath) {
523
860
  const char *checked = check_string(filepath);
524
861
  if (!pm_string_mapped_init(&input, checked)) return Qnil;
525
862
 
863
+ pm_options_t options = { 0 };
864
+ pm_options_filepath_set(&options, checked);
865
+
526
866
  pm_parser_t parser;
527
- pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), checked);
867
+ pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
528
868
 
529
869
  pm_node_t *node = pm_parse(&parser);
530
870
  pm_node_destroy(&parser, node);
531
871
  pm_parser_free(&parser);
532
-
872
+ pm_options_free(&options);
533
873
  pm_string_free(&input);
534
874
 
535
875
  return Qnil;
536
876
  }
537
877
 
538
- // Parse the file and serialize the result. This is mostly used to test this
539
- // path since it is used by client libraries.
878
+ /**
879
+ * call-seq:
880
+ * Debug::inspect_node(source) -> inspected
881
+ *
882
+ * Inspect the AST that represents the given source using the prism pretty print
883
+ * as opposed to the Ruby implementation.
884
+ */
540
885
  static VALUE
541
- parse_serialize_file_metadata(VALUE self, VALUE filepath, VALUE metadata) {
886
+ inspect_node(VALUE self, VALUE source) {
542
887
  pm_string_t input;
543
- pm_buffer_t buffer;
544
- pm_buffer_init(&buffer);
888
+ input_load_string(&input, source);
545
889
 
546
- const char *checked = check_string(filepath);
547
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
890
+ pm_parser_t parser;
891
+ pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL);
548
892
 
549
- pm_parse_serialize(pm_string_source(&input), pm_string_length(&input), &buffer, check_string(metadata));
550
- VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
893
+ pm_node_t *node = pm_parse(&parser);
894
+ pm_buffer_t buffer = { 0 };
895
+
896
+ pm_prettyprint(&buffer, &parser, node);
897
+
898
+ rb_encoding *encoding = rb_enc_find(parser.encoding.name);
899
+ VALUE string = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
551
900
 
552
- pm_string_free(&input);
553
901
  pm_buffer_free(&buffer);
554
- return result;
902
+ pm_node_destroy(&parser, node);
903
+ pm_parser_free(&parser);
904
+
905
+ return string;
555
906
  }
556
907
 
557
908
  /******************************************************************************/
558
909
  /* Initialization of the extension */
559
910
  /******************************************************************************/
560
911
 
912
+ /**
913
+ * The init function that Ruby calls when loading this extension.
914
+ */
561
915
  RUBY_FUNC_EXPORTED void
562
916
  Init_prism(void) {
563
917
  // Make sure that the prism library version matches the expected version.
@@ -579,25 +933,46 @@ Init_prism(void) {
579
933
  rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
580
934
  rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
581
935
  rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
936
+ rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment);
937
+ rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment);
938
+ rb_cPrismDATAComment = rb_define_class_under(rb_cPrism, "DATAComment", rb_cPrismComment);
582
939
  rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
583
940
  rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
584
941
  rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
585
942
  rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
586
943
 
587
- // Define the version string here so that we can use the constants defined
588
- // in prism.h.
944
+ // Intern all of the options that we support so that we don't have to do it
945
+ // every time we parse.
946
+ rb_option_id_filepath = rb_intern_const("filepath");
947
+ rb_option_id_encoding = rb_intern_const("encoding");
948
+ rb_option_id_line = rb_intern_const("line");
949
+ rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
950
+ rb_option_id_verbose = rb_intern_const("verbose");
951
+ rb_option_id_scopes = rb_intern_const("scopes");
952
+
953
+ /**
954
+ * The version of the prism library.
955
+ */
589
956
  rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
590
- rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CExtension")));
957
+
958
+ /**
959
+ * The backend of the parser that prism is using to parse Ruby code. This
960
+ * can be either :CEXT or :FFI. On runtimes that support C extensions, we
961
+ * default to :CEXT. Otherwise we use :FFI.
962
+ */
963
+ rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CEXT")));
591
964
 
592
965
  // First, the functions that have to do with lexing and parsing.
593
966
  rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
594
- rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, 1);
967
+ rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
595
968
  rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
596
- rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, 1);
969
+ rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
597
970
  rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
598
- rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, 1);
971
+ rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
972
+ rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
973
+ rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
599
974
  rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
600
- rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, 1);
975
+ rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1);
601
976
 
602
977
  // Next, the functions that will be called by the parser to perform various
603
978
  // internal tasks. We expose these to make them easier to test.
@@ -605,7 +980,7 @@ Init_prism(void) {
605
980
  rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
606
981
  rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
607
982
  rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
608
- rb_define_singleton_method(rb_cPrismDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
983
+ rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
609
984
 
610
985
  // Next, initialize the other APIs.
611
986
  Init_prism_api_node();