prism 0.16.0 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +23 -1
  3. data/Makefile +6 -0
  4. data/README.md +1 -1
  5. data/config.yml +50 -35
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/releasing.md +4 -1
  8. data/docs/serialization.md +28 -29
  9. data/ext/prism/api_node.c +802 -770
  10. data/ext/prism/api_pack.c +20 -9
  11. data/ext/prism/extension.c +465 -160
  12. data/ext/prism/extension.h +1 -1
  13. data/include/prism/ast.h +3173 -763
  14. data/include/prism/defines.h +32 -9
  15. data/include/prism/diagnostic.h +36 -3
  16. data/include/prism/enc/pm_encoding.h +118 -28
  17. data/include/prism/node.h +38 -13
  18. data/include/prism/options.h +204 -0
  19. data/include/prism/pack.h +44 -33
  20. data/include/prism/parser.h +445 -200
  21. data/include/prism/prettyprint.h +12 -1
  22. data/include/prism/regexp.h +16 -2
  23. data/include/prism/util/pm_buffer.h +94 -16
  24. data/include/prism/util/pm_char.h +162 -48
  25. data/include/prism/util/pm_constant_pool.h +126 -32
  26. data/include/prism/util/pm_list.h +68 -38
  27. data/include/prism/util/pm_memchr.h +18 -3
  28. data/include/prism/util/pm_newline_list.h +70 -27
  29. data/include/prism/util/pm_state_stack.h +25 -7
  30. data/include/prism/util/pm_string.h +115 -27
  31. data/include/prism/util/pm_string_list.h +25 -6
  32. data/include/prism/util/pm_strncasecmp.h +32 -0
  33. data/include/prism/util/pm_strpbrk.h +31 -17
  34. data/include/prism/version.h +28 -3
  35. data/include/prism.h +224 -31
  36. data/lib/prism/compiler.rb +6 -3
  37. data/lib/prism/debug.rb +23 -7
  38. data/lib/prism/dispatcher.rb +33 -18
  39. data/lib/prism/dsl.rb +10 -5
  40. data/lib/prism/ffi.rb +132 -80
  41. data/lib/prism/lex_compat.rb +25 -15
  42. data/lib/prism/mutation_compiler.rb +10 -5
  43. data/lib/prism/node.rb +370 -135
  44. data/lib/prism/node_ext.rb +1 -1
  45. data/lib/prism/node_inspector.rb +1 -1
  46. data/lib/prism/pack.rb +79 -40
  47. data/lib/prism/parse_result/comments.rb +7 -2
  48. data/lib/prism/parse_result/newlines.rb +4 -0
  49. data/lib/prism/parse_result.rb +150 -30
  50. data/lib/prism/pattern.rb +11 -0
  51. data/lib/prism/ripper_compat.rb +28 -10
  52. data/lib/prism/serialize.rb +87 -55
  53. data/lib/prism/visitor.rb +10 -3
  54. data/lib/prism.rb +20 -2
  55. data/prism.gemspec +4 -2
  56. data/rbi/prism.rbi +5545 -5505
  57. data/rbi/prism_static.rbi +141 -131
  58. data/sig/prism.rbs +72 -43
  59. data/sig/prism_static.rbs +14 -1
  60. data/src/diagnostic.c +56 -53
  61. data/src/enc/pm_big5.c +1 -0
  62. data/src/enc/pm_euc_jp.c +1 -0
  63. data/src/enc/pm_gbk.c +1 -0
  64. data/src/enc/pm_shift_jis.c +1 -0
  65. data/src/enc/pm_tables.c +316 -80
  66. data/src/enc/pm_unicode.c +53 -8
  67. data/src/enc/pm_windows_31j.c +1 -0
  68. data/src/node.c +334 -321
  69. data/src/options.c +170 -0
  70. data/src/prettyprint.c +74 -47
  71. data/src/prism.c +1642 -856
  72. data/src/regexp.c +151 -95
  73. data/src/serialize.c +44 -20
  74. data/src/token_type.c +3 -1
  75. data/src/util/pm_buffer.c +45 -15
  76. data/src/util/pm_char.c +103 -57
  77. data/src/util/pm_constant_pool.c +51 -21
  78. data/src/util/pm_list.c +12 -4
  79. data/src/util/pm_memchr.c +5 -3
  80. data/src/util/pm_newline_list.c +20 -12
  81. data/src/util/pm_state_stack.c +9 -3
  82. data/src/util/pm_string.c +95 -85
  83. data/src/util/pm_string_list.c +14 -15
  84. data/src/util/pm_strncasecmp.c +10 -3
  85. data/src/util/pm_strpbrk.c +25 -19
  86. metadata +5 -3
  87. data/docs/prism.png +0 -0
@@ -1,7 +1,7 @@
1
1
  #include "prism/extension.h"
2
2
 
3
- // NOTE: this file should contain only bindings.
4
- // All non-trivial logic should be in librubyparser so it can be shared its the various callers.
3
+ // NOTE: this file should contain only bindings. All non-trivial logic should be
4
+ // in librubyparser so it can be shared its the various callers.
5
5
 
6
6
  VALUE rb_cPrism;
7
7
  VALUE rb_cPrismNode;
@@ -10,18 +10,30 @@ VALUE rb_cPrismToken;
10
10
  VALUE rb_cPrismLocation;
11
11
 
12
12
  VALUE rb_cPrismComment;
13
+ VALUE rb_cPrismInlineComment;
14
+ VALUE rb_cPrismEmbDocComment;
15
+ VALUE rb_cPrismDATAComment;
13
16
  VALUE rb_cPrismMagicComment;
14
17
  VALUE rb_cPrismParseError;
15
18
  VALUE rb_cPrismParseWarning;
16
19
  VALUE rb_cPrismParseResult;
17
20
 
21
+ ID rb_option_id_filepath;
22
+ ID rb_option_id_encoding;
23
+ ID rb_option_id_line;
24
+ ID rb_option_id_frozen_string_literal;
25
+ ID rb_option_id_verbose;
26
+ ID rb_option_id_scopes;
27
+
18
28
  /******************************************************************************/
19
29
  /* IO of Ruby code */
20
30
  /******************************************************************************/
21
31
 
22
- // Check if the given VALUE is a string. If it's nil, then return NULL. If it's
23
- // not a string, then raise a type error. Otherwise return the VALUE as a C
24
- // string.
32
+ /**
33
+ * Check if the given VALUE is a string. If it's nil, then return NULL. If it's
34
+ * not a string, then raise a type error. Otherwise return the VALUE as a C
35
+ * string.
36
+ */
25
37
  static const char *
26
38
  check_string(VALUE value) {
27
39
  // If the value is nil, then we don't need to do anything.
@@ -38,7 +50,9 @@ check_string(VALUE value) {
38
50
  return RSTRING_PTR(value);
39
51
  }
40
52
 
41
- // Load the contents and size of the given string into the given pm_string_t.
53
+ /**
54
+ * Load the contents and size of the given string into the given pm_string_t.
55
+ */
42
56
  static void
43
57
  input_load_string(pm_string_t *input, VALUE string) {
44
58
  // Check if the string is a string. If it's not, then raise a type error.
@@ -49,20 +63,181 @@ input_load_string(pm_string_t *input, VALUE string) {
49
63
  pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
50
64
  }
51
65
 
66
+ /******************************************************************************/
67
+ /* Building C options from Ruby options */
68
+ /******************************************************************************/
69
+
70
+ /**
71
+ * Build the scopes associated with the provided Ruby keyword value.
72
+ */
73
+ static void
74
+ build_options_scopes(pm_options_t *options, VALUE scopes) {
75
+ // Check if the value is an array. If it's not, then raise a type error.
76
+ if (!RB_TYPE_P(scopes, T_ARRAY)) {
77
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scopes));
78
+ }
79
+
80
+ // Initialize the scopes array.
81
+ size_t scopes_count = RARRAY_LEN(scopes);
82
+ pm_options_scopes_init(options, scopes_count);
83
+
84
+ // Iterate over the scopes and add them to the options.
85
+ for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
86
+ VALUE scope = rb_ary_entry(scopes, scope_index);
87
+
88
+ // Check that the scope is an array. If it's not, then raise a type
89
+ // error.
90
+ if (!RB_TYPE_P(scope, T_ARRAY)) {
91
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scope));
92
+ }
93
+
94
+ // Initialize the scope array.
95
+ size_t locals_count = RARRAY_LEN(scope);
96
+ pm_options_scope_t *options_scope = &options->scopes[scope_index];
97
+ pm_options_scope_init(options_scope, locals_count);
98
+
99
+ // Iterate over the locals and add them to the scope.
100
+ for (size_t local_index = 0; local_index < locals_count; local_index++) {
101
+ VALUE local = rb_ary_entry(scope, local_index);
102
+
103
+ // Check that the local is a symbol. If it's not, then raise a
104
+ // type error.
105
+ if (!RB_TYPE_P(local, T_SYMBOL)) {
106
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(local));
107
+ }
108
+
109
+ // Add the local to the scope.
110
+ pm_string_t *scope_local = &options_scope->locals[local_index];
111
+ const char *name = rb_id2name(SYM2ID(local));
112
+ pm_string_constant_init(scope_local, name, strlen(name));
113
+ }
114
+ }
115
+ }
116
+
117
+ /**
118
+ * An iterator function that is called for each key-value in the keywords hash.
119
+ */
120
+ static int
121
+ build_options_i(VALUE key, VALUE value, VALUE argument) {
122
+ pm_options_t *options = (pm_options_t *) argument;
123
+ ID key_id = SYM2ID(key);
124
+
125
+ if (key_id == rb_option_id_filepath) {
126
+ if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
127
+ } else if (key_id == rb_option_id_encoding) {
128
+ if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
129
+ } else if (key_id == rb_option_id_line) {
130
+ if (!NIL_P(value)) pm_options_line_set(options, NUM2UINT(value));
131
+ } else if (key_id == rb_option_id_frozen_string_literal) {
132
+ if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
133
+ } else if (key_id == rb_option_id_verbose) {
134
+ pm_options_suppress_warnings_set(options, value != Qtrue);
135
+ } else if (key_id == rb_option_id_scopes) {
136
+ if (!NIL_P(value)) build_options_scopes(options, value);
137
+ } else {
138
+ rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key);
139
+ }
140
+
141
+ return ST_CONTINUE;
142
+ }
143
+
144
+ /**
145
+ * We need a struct here to pass through rb_protect and it has to be a single
146
+ * value. Because the sizeof(VALUE) == sizeof(void *), we're going to pass this
147
+ * through as an opaque pointer and cast it on both sides.
148
+ */
149
+ struct build_options_data {
150
+ pm_options_t *options;
151
+ VALUE keywords;
152
+ };
153
+
154
+ /**
155
+ * Build the set of options from the given keywords. Note that this can raise a
156
+ * Ruby error if the options are not valid.
157
+ */
158
+ static VALUE
159
+ build_options(VALUE argument) {
160
+ struct build_options_data *data = (struct build_options_data *) argument;
161
+ rb_hash_foreach(data->keywords, build_options_i, (VALUE) data->options);
162
+ return Qnil;
163
+ }
164
+
165
+ /**
166
+ * Extract the options from the given keyword arguments.
167
+ */
168
+ static void
169
+ extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
170
+ if (!NIL_P(keywords)) {
171
+ struct build_options_data data = { .options = options, .keywords = keywords };
172
+ struct build_options_data *argument = &data;
173
+
174
+ int state = 0;
175
+ rb_protect(build_options, (VALUE) argument, &state);
176
+
177
+ if (state != 0) {
178
+ pm_options_free(options);
179
+ rb_jump_tag(state);
180
+ }
181
+ }
182
+
183
+ if (!NIL_P(filepath)) {
184
+ if (!RB_TYPE_P(filepath, T_STRING)) {
185
+ pm_options_free(options);
186
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
187
+ }
188
+
189
+ pm_options_filepath_set(options, RSTRING_PTR(filepath));
190
+ }
191
+ }
192
+
193
+ /**
194
+ * Read options for methods that look like (source, **options).
195
+ */
196
+ static void
197
+ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
198
+ VALUE string;
199
+ VALUE keywords;
200
+ rb_scan_args(argc, argv, "1:", &string, &keywords);
201
+
202
+ extract_options(options, Qnil, keywords);
203
+ input_load_string(input, string);
204
+ }
205
+
206
+ /**
207
+ * Read options for methods that look like (filepath, **options).
208
+ */
209
+ static bool
210
+ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
211
+ VALUE filepath;
212
+ VALUE keywords;
213
+ rb_scan_args(argc, argv, "1:", &filepath, &keywords);
214
+
215
+ extract_options(options, filepath, keywords);
216
+
217
+ if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) {
218
+ pm_options_free(options);
219
+ return false;
220
+ }
221
+
222
+ return true;
223
+ }
224
+
52
225
  /******************************************************************************/
53
226
  /* Serializing the AST */
54
227
  /******************************************************************************/
55
228
 
56
- // Dump the AST corresponding to the given input to a string.
229
+ /**
230
+ * Dump the AST corresponding to the given input to a string.
231
+ */
57
232
  static VALUE
58
- dump_input(pm_string_t *input, const char *filepath) {
233
+ dump_input(pm_string_t *input, const pm_options_t *options) {
59
234
  pm_buffer_t buffer;
60
235
  if (!pm_buffer_init(&buffer)) {
61
236
  rb_raise(rb_eNoMemError, "failed to allocate memory");
62
237
  }
63
238
 
64
239
  pm_parser_t parser;
65
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
240
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
66
241
 
67
242
  pm_node_t *node = pm_parse(&parser);
68
243
  pm_serialize(&parser, node, &buffer);
@@ -75,15 +250,18 @@ dump_input(pm_string_t *input, const char *filepath) {
75
250
  return result;
76
251
  }
77
252
 
78
- // Dump the AST corresponding to the given string to a string.
253
+ /**
254
+ * call-seq:
255
+ * Prism::dump(source, **options) -> String
256
+ *
257
+ * Dump the AST corresponding to the given string to a string. For supported
258
+ * options, see Prism::parse.
259
+ */
79
260
  static VALUE
80
261
  dump(int argc, VALUE *argv, VALUE self) {
81
- VALUE string;
82
- VALUE filepath;
83
- rb_scan_args(argc, argv, "11", &string, &filepath);
84
-
85
262
  pm_string_t input;
86
- input_load_string(&input, string);
263
+ pm_options_t options = { 0 };
264
+ string_options(argc, argv, &input, &options);
87
265
 
88
266
  #ifdef PRISM_DEBUG_MODE_BUILD
89
267
  size_t length = pm_string_length(&input);
@@ -92,25 +270,34 @@ dump(int argc, VALUE *argv, VALUE self) {
92
270
  pm_string_constant_init(&input, dup, length);
93
271
  #endif
94
272
 
95
- VALUE value = dump_input(&input, check_string(filepath));
273
+ VALUE value = dump_input(&input, &options);
96
274
 
97
275
  #ifdef PRISM_DEBUG_MODE_BUILD
98
276
  free(dup);
99
277
  #endif
100
278
 
279
+ pm_string_free(&input);
280
+ pm_options_free(&options);
281
+
101
282
  return value;
102
283
  }
103
284
 
104
- // Dump the AST corresponding to the given file to a string.
285
+ /**
286
+ * call-seq:
287
+ * Prism::dump_file(filepath, **options) -> String
288
+ *
289
+ * Dump the AST corresponding to the given file to a string. For supported
290
+ * options, see Prism::parse.
291
+ */
105
292
  static VALUE
106
- dump_file(VALUE self, VALUE filepath) {
293
+ dump_file(int argc, VALUE *argv, VALUE self) {
107
294
  pm_string_t input;
295
+ pm_options_t options = { 0 };
296
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
108
297
 
109
- const char *checked = check_string(filepath);
110
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
111
-
112
- VALUE value = dump_input(&input, checked);
298
+ VALUE value = dump_input(&input, &options);
113
299
  pm_string_free(&input);
300
+ pm_options_free(&options);
114
301
 
115
302
  return value;
116
303
  }
@@ -119,7 +306,9 @@ dump_file(VALUE self, VALUE filepath) {
119
306
  /* Extracting values for the parse result */
120
307
  /******************************************************************************/
121
308
 
122
- // Extract the comments out of the parser into an array.
309
+ /**
310
+ * Extract the comments out of the parser into an array.
311
+ */
123
312
  static VALUE
124
313
  parser_comments(pm_parser_t *parser, VALUE source) {
125
314
  VALUE comments = rb_ary_new();
@@ -134,27 +323,29 @@ parser_comments(pm_parser_t *parser, VALUE source) {
134
323
  VALUE type;
135
324
  switch (comment->type) {
136
325
  case PM_COMMENT_INLINE:
137
- type = ID2SYM(rb_intern("inline"));
326
+ type = rb_cPrismInlineComment;
138
327
  break;
139
328
  case PM_COMMENT_EMBDOC:
140
- type = ID2SYM(rb_intern("embdoc"));
329
+ type = rb_cPrismEmbDocComment;
141
330
  break;
142
331
  case PM_COMMENT___END__:
143
- type = ID2SYM(rb_intern("__END__"));
332
+ type = rb_cPrismDATAComment;
144
333
  break;
145
334
  default:
146
- type = ID2SYM(rb_intern("inline"));
335
+ type = rb_cPrismInlineComment;
147
336
  break;
148
337
  }
149
338
 
150
- VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
151
- rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cPrismComment));
339
+ VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
340
+ rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
152
341
  }
153
342
 
154
343
  return comments;
155
344
  }
156
345
 
157
- // Extract the magic comments out of the parser into an array.
346
+ /**
347
+ * Extract the magic comments out of the parser into an array.
348
+ */
158
349
  static VALUE
159
350
  parser_magic_comments(pm_parser_t *parser, VALUE source) {
160
351
  VALUE magic_comments = rb_ary_new();
@@ -183,7 +374,9 @@ parser_magic_comments(pm_parser_t *parser, VALUE source) {
183
374
  return magic_comments;
184
375
  }
185
376
 
186
- // Extract the errors out of the parser into an array.
377
+ /**
378
+ * Extract the errors out of the parser into an array.
379
+ */
187
380
  static VALUE
188
381
  parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
189
382
  VALUE errors = rb_ary_new();
@@ -207,7 +400,9 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
207
400
  return errors;
208
401
  }
209
402
 
210
- // Extract the warnings out of the parser into an array.
403
+ /**
404
+ * Extract the warnings out of the parser into an array.
405
+ */
211
406
  static VALUE
212
407
  parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
213
408
  VALUE warnings = rb_ary_new();
@@ -235,18 +430,22 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
235
430
  /* Lexing Ruby code */
236
431
  /******************************************************************************/
237
432
 
238
- // This struct gets stored in the parser and passed in to the lex callback any
239
- // time a new token is found. We use it to store the necessary information to
240
- // initialize a Token instance.
433
+ /**
434
+ * This struct gets stored in the parser and passed in to the lex callback any
435
+ * time a new token is found. We use it to store the necessary information to
436
+ * initialize a Token instance.
437
+ */
241
438
  typedef struct {
242
439
  VALUE source;
243
440
  VALUE tokens;
244
441
  rb_encoding *encoding;
245
442
  } parse_lex_data_t;
246
443
 
247
- // This is passed as a callback to the parser. It gets called every time a new
248
- // token is found. Once found, we initialize a new instance of Token and push it
249
- // onto the tokens array.
444
+ /**
445
+ * This is passed as a callback to the parser. It gets called every time a new
446
+ * token is found. Once found, we initialize a new instance of Token and push it
447
+ * onto the tokens array.
448
+ */
250
449
  static void
251
450
  parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
252
451
  parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
@@ -258,9 +457,11 @@ parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
258
457
  rb_ary_push(parse_lex_data->tokens, yields);
259
458
  }
260
459
 
261
- // This is called whenever the encoding changes based on the magic comment at
262
- // the top of the file. We use it to update the encoding that we are using to
263
- // create tokens.
460
+ /**
461
+ * This is called whenever the encoding changes based on the magic comment at
462
+ * the top of the file. We use it to update the encoding that we are using to
463
+ * create tokens.
464
+ */
264
465
  static void
265
466
  parse_lex_encoding_changed_callback(pm_parser_t *parser) {
266
467
  parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
@@ -281,17 +482,19 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) {
281
482
  }
282
483
  }
283
484
 
284
- // Parse the given input and return a ParseResult containing just the tokens or
285
- // the nodes and tokens.
485
+ /**
486
+ * Parse the given input and return a ParseResult containing just the tokens or
487
+ * the nodes and tokens.
488
+ */
286
489
  static VALUE
287
- parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
490
+ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) {
288
491
  pm_parser_t parser;
289
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
492
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
290
493
  pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
291
494
 
292
495
  VALUE offsets = rb_ary_new();
293
- VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), offsets };
294
- VALUE source = rb_class_new_instance(2, source_argv, rb_cPrismSource);
496
+ VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
497
+ VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
295
498
 
296
499
  parse_lex_data_t parse_lex_data = {
297
500
  .source = source,
@@ -338,29 +541,42 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
338
541
  return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
339
542
  }
340
543
 
341
- // Return an array of tokens corresponding to the given string.
544
+ /**
545
+ * call-seq:
546
+ * Prism::lex(source, **options) -> Array
547
+ *
548
+ * Return an array of Token instances corresponding to the given string. For
549
+ * supported options, see Prism::parse.
550
+ */
342
551
  static VALUE
343
552
  lex(int argc, VALUE *argv, VALUE self) {
344
- VALUE string;
345
- VALUE filepath;
346
- rb_scan_args(argc, argv, "11", &string, &filepath);
347
-
348
553
  pm_string_t input;
349
- input_load_string(&input, string);
554
+ pm_options_t options = { 0 };
555
+ string_options(argc, argv, &input, &options);
556
+
557
+ VALUE result = parse_lex_input(&input, &options, false);
558
+ pm_string_free(&input);
559
+ pm_options_free(&options);
350
560
 
351
- return parse_lex_input(&input, check_string(filepath), false);
561
+ return result;
352
562
  }
353
563
 
354
- // Return an array of tokens corresponding to the given file.
564
+ /**
565
+ * call-seq:
566
+ * Prism::lex_file(filepath, **options) -> Array
567
+ *
568
+ * Return an array of Token instances corresponding to the given file. For
569
+ * supported options, see Prism::parse.
570
+ */
355
571
  static VALUE
356
- lex_file(VALUE self, VALUE filepath) {
572
+ lex_file(int argc, VALUE *argv, VALUE self) {
357
573
  pm_string_t input;
574
+ pm_options_t options = { 0 };
575
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
358
576
 
359
- const char *checked = check_string(filepath);
360
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
361
-
362
- VALUE value = parse_lex_input(&input, checked, false);
577
+ VALUE value = parse_lex_input(&input, &options, false);
363
578
  pm_string_free(&input);
579
+ pm_options_free(&options);
364
580
 
365
581
  return value;
366
582
  }
@@ -369,11 +585,13 @@ lex_file(VALUE self, VALUE filepath) {
369
585
  /* Parsing Ruby code */
370
586
  /******************************************************************************/
371
587
 
372
- // Parse the given input and return a ParseResult instance.
588
+ /**
589
+ * Parse the given input and return a ParseResult instance.
590
+ */
373
591
  static VALUE
374
- parse_input(pm_string_t *input, const char *filepath) {
592
+ parse_input(pm_string_t *input, const pm_options_t *options) {
375
593
  pm_parser_t parser;
376
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
594
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
377
595
 
378
596
  pm_node_t *node = pm_parse(&parser);
379
597
  rb_encoding *encoding = rb_enc_find(parser.encoding.name);
@@ -396,33 +614,31 @@ parse_input(pm_string_t *input, const char *filepath) {
396
614
  return result;
397
615
  }
398
616
 
399
- // Parse the given input and return an array of Comment objects.
400
- static VALUE
401
- parse_input_comments(pm_string_t *input, const char *filepath) {
402
- pm_parser_t parser;
403
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
404
-
405
- pm_node_t *node = pm_parse(&parser);
406
- rb_encoding *encoding = rb_enc_find(parser.encoding.name);
407
-
408
- VALUE source = pm_source_new(&parser, encoding);
409
- VALUE comments = parser_comments(&parser, source);
410
-
411
- pm_node_destroy(&parser, node);
412
- pm_parser_free(&parser);
413
-
414
- return comments;
415
- }
416
-
417
- // Parse the given string and return a ParseResult instance.
617
+ /**
618
+ * call-seq:
619
+ * Prism::parse(source, **options) -> ParseResult
620
+ *
621
+ * Parse the given string and return a ParseResult instance. The options that
622
+ * are supported are:
623
+ *
624
+ * * `filepath` - the filepath of the source being parsed. This should be a
625
+ * string or nil
626
+ * * `encoding` - the encoding of the source being parsed. This should be an
627
+ * encoding or nil
628
+ * * `line` - the line number that the parse starts on. This should be an
629
+ * integer or nil. Note that this is 1-indexed.
630
+ * * `frozen_string_literal` - whether or not the frozen string literal pragma
631
+ * has been set. This should be a boolean or nil.
632
+ * * `verbose` - the current level of verbosity. This controls whether or not
633
+ * the parser emits warnings. This should be a boolean or nil.
634
+ * * `scopes` - the locals that are in scope surrounding the code that is being
635
+ * parsed. This should be an array of arrays of symbols or nil.
636
+ */
418
637
  static VALUE
419
638
  parse(int argc, VALUE *argv, VALUE self) {
420
- VALUE string;
421
- VALUE filepath;
422
- rb_scan_args(argc, argv, "11", &string, &filepath);
423
-
424
639
  pm_string_t input;
425
- input_load_string(&input, string);
640
+ pm_options_t options = { 0 };
641
+ string_options(argc, argv, &input, &options);
426
642
 
427
643
  #ifdef PRISM_DEBUG_MODE_BUILD
428
644
  size_t length = pm_string_length(&input);
@@ -431,82 +647,147 @@ parse(int argc, VALUE *argv, VALUE self) {
431
647
  pm_string_constant_init(&input, dup, length);
432
648
  #endif
433
649
 
434
- VALUE value = parse_input(&input, check_string(filepath));
650
+ VALUE value = parse_input(&input, &options);
435
651
 
436
652
  #ifdef PRISM_DEBUG_MODE_BUILD
437
653
  free(dup);
438
654
  #endif
439
655
 
656
+ pm_string_free(&input);
657
+ pm_options_free(&options);
440
658
  return value;
441
659
  }
442
660
 
443
- // Parse the given file and return a ParseResult instance.
661
+ /**
662
+ * call-seq:
663
+ * Prism::parse_file(filepath, **options) -> ParseResult
664
+ *
665
+ * Parse the given file and return a ParseResult instance. For supported
666
+ * options, see Prism::parse.
667
+ */
444
668
  static VALUE
445
- parse_file(VALUE self, VALUE filepath) {
669
+ parse_file(int argc, VALUE *argv, VALUE self) {
446
670
  pm_string_t input;
671
+ pm_options_t options = { 0 };
672
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
447
673
 
448
- const char *checked = check_string(filepath);
449
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
450
-
451
- VALUE value = parse_input(&input, checked);
674
+ VALUE value = parse_input(&input, &options);
452
675
  pm_string_free(&input);
676
+ pm_options_free(&options);
453
677
 
454
678
  return value;
455
679
  }
456
680
 
457
- // Parse the given string and return an array of Comment objects.
681
+ /**
682
+ * Parse the given input and return an array of Comment objects.
683
+ */
458
684
  static VALUE
459
- parse_comments(int argc, VALUE *argv, VALUE self) {
460
- VALUE string;
461
- VALUE filepath;
462
- rb_scan_args(argc, argv, "11", &string, &filepath);
685
+ parse_input_comments(pm_string_t *input, const pm_options_t *options) {
686
+ pm_parser_t parser;
687
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
463
688
 
464
- pm_string_t input;
465
- input_load_string(&input, string);
689
+ pm_node_t *node = pm_parse(&parser);
690
+ rb_encoding *encoding = rb_enc_find(parser.encoding.name);
691
+
692
+ VALUE source = pm_source_new(&parser, encoding);
693
+ VALUE comments = parser_comments(&parser, source);
466
694
 
467
- return parse_input_comments(&input, check_string(filepath));
695
+ pm_node_destroy(&parser, node);
696
+ pm_parser_free(&parser);
697
+
698
+ return comments;
468
699
  }
469
700
 
470
- // Parse the given file and return an array of Comment objects.
701
+ /**
702
+ * call-seq:
703
+ * Prism::parse_comments(source, **options) -> Array
704
+ *
705
+ * Parse the given string and return an array of Comment objects. For supported
706
+ * options, see Prism::parse.
707
+ */
471
708
  static VALUE
472
- parse_file_comments(VALUE self, VALUE filepath) {
709
+ parse_comments(int argc, VALUE *argv, VALUE self) {
473
710
  pm_string_t input;
711
+ pm_options_t options = { 0 };
712
+ string_options(argc, argv, &input, &options);
474
713
 
475
- const char *checked = check_string(filepath);
476
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
714
+ VALUE result = parse_input_comments(&input, &options);
715
+ pm_string_free(&input);
716
+ pm_options_free(&options);
477
717
 
478
- VALUE value = parse_input_comments(&input, checked);
718
+ return result;
719
+ }
720
+
721
+ /**
722
+ * call-seq:
723
+ * Prism::parse_file_comments(filepath, **options) -> Array
724
+ *
725
+ * Parse the given file and return an array of Comment objects. For supported
726
+ * options, see Prism::parse.
727
+ */
728
+ static VALUE
729
+ parse_file_comments(int argc, VALUE *argv, VALUE self) {
730
+ pm_string_t input;
731
+ pm_options_t options = { 0 };
732
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
733
+
734
+ VALUE value = parse_input_comments(&input, &options);
479
735
  pm_string_free(&input);
736
+ pm_options_free(&options);
480
737
 
481
738
  return value;
482
739
  }
483
740
 
484
- // Parse the given string and return a ParseResult instance.
741
+ /**
742
+ * call-seq:
743
+ * Prism::parse_lex(source, **options) -> ParseResult
744
+ *
745
+ * Parse the given string and return a ParseResult instance that contains a
746
+ * 2-element array, where the first element is the AST and the second element is
747
+ * an array of Token instances.
748
+ *
749
+ * This API is only meant to be used in the case where you need both the AST and
750
+ * the tokens. If you only need one or the other, use either Prism::parse or
751
+ * Prism::lex.
752
+ *
753
+ * For supported options, see Prism::parse.
754
+ */
485
755
  static VALUE
486
756
  parse_lex(int argc, VALUE *argv, VALUE self) {
487
- VALUE string;
488
- VALUE filepath;
489
- rb_scan_args(argc, argv, "11", &string, &filepath);
490
-
491
757
  pm_string_t input;
492
- input_load_string(&input, string);
758
+ pm_options_t options = { 0 };
759
+ string_options(argc, argv, &input, &options);
493
760
 
494
- VALUE value = parse_lex_input(&input, check_string(filepath), true);
761
+ VALUE value = parse_lex_input(&input, &options, true);
495
762
  pm_string_free(&input);
763
+ pm_options_free(&options);
496
764
 
497
765
  return value;
498
766
  }
499
767
 
500
- // Parse and lex the given file and return a ParseResult instance.
768
+ /**
769
+ * call-seq:
770
+ * Prism::parse_lex_file(filepath, **options) -> ParseResult
771
+ *
772
+ * Parse the given file and return a ParseResult instance that contains a
773
+ * 2-element array, where the first element is the AST and the second element is
774
+ * an array of Token instances.
775
+ *
776
+ * This API is only meant to be used in the case where you need both the AST and
777
+ * the tokens. If you only need one or the other, use either Prism::parse_file
778
+ * or Prism::lex_file.
779
+ *
780
+ * For supported options, see Prism::parse.
781
+ */
501
782
  static VALUE
502
- parse_lex_file(VALUE self, VALUE filepath) {
783
+ parse_lex_file(int argc, VALUE *argv, VALUE self) {
503
784
  pm_string_t input;
785
+ pm_options_t options = { 0 };
786
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
504
787
 
505
- const char *checked = check_string(filepath);
506
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
507
-
508
- VALUE value = parse_lex_input(&input, checked, true);
788
+ VALUE value = parse_lex_input(&input, &options, true);
509
789
  pm_string_free(&input);
790
+ pm_options_free(&options);
510
791
 
511
792
  return value;
512
793
  }
@@ -515,13 +796,17 @@ parse_lex_file(VALUE self, VALUE filepath) {
515
796
  /* Utility functions exposed to make testing easier */
516
797
  /******************************************************************************/
517
798
 
518
- // Returns an array of strings corresponding to the named capture groups in the
519
- // given source string. If prism was unable to parse the regular expression, this
520
- // function returns nil.
799
+ /**
800
+ * call-seq:
801
+ * Debug::named_captures(source) -> Array
802
+ *
803
+ * Returns an array of strings corresponding to the named capture groups in the
804
+ * given source string. If prism was unable to parse the regular expression,
805
+ * this function returns nil.
806
+ */
521
807
  static VALUE
522
808
  named_captures(VALUE self, VALUE source) {
523
- pm_string_list_t string_list;
524
- pm_string_list_init(&string_list);
809
+ pm_string_list_t string_list = { 0 };
525
810
 
526
811
  if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &pm_encoding_utf_8)) {
527
812
  pm_string_list_free(&string_list);
@@ -538,7 +823,12 @@ named_captures(VALUE self, VALUE source) {
538
823
  return names;
539
824
  }
540
825
 
541
- // Return a hash of information about the given source string's memory usage.
826
+ /**
827
+ * call-seq:
828
+ * Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx }
829
+ *
830
+ * Return a hash of information about the given source string's memory usage.
831
+ */
542
832
  static VALUE
543
833
  memsize(VALUE self, VALUE string) {
544
834
  pm_parser_t parser;
@@ -559,8 +849,13 @@ memsize(VALUE self, VALUE string) {
559
849
  return result;
560
850
  }
561
851
 
562
- // Parse the file, but do nothing with the result. This is used to profile the
563
- // parser for memory and speed.
852
+ /**
853
+ * call-seq:
854
+ * Debug::profile_file(filepath) -> nil
855
+ *
856
+ * Parse the file, but do nothing with the result. This is used to profile the
857
+ * parser for memory and speed.
858
+ */
564
859
  static VALUE
565
860
  profile_file(VALUE self, VALUE filepath) {
566
861
  pm_string_t input;
@@ -568,39 +863,28 @@ profile_file(VALUE self, VALUE filepath) {
568
863
  const char *checked = check_string(filepath);
569
864
  if (!pm_string_mapped_init(&input, checked)) return Qnil;
570
865
 
866
+ pm_options_t options = { 0 };
867
+ pm_options_filepath_set(&options, checked);
868
+
571
869
  pm_parser_t parser;
572
- pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), checked);
870
+ pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
573
871
 
574
872
  pm_node_t *node = pm_parse(&parser);
575
873
  pm_node_destroy(&parser, node);
576
874
  pm_parser_free(&parser);
577
-
875
+ pm_options_free(&options);
578
876
  pm_string_free(&input);
579
877
 
580
878
  return Qnil;
581
879
  }
582
880
 
583
- // Parse the file and serialize the result. This is mostly used to test this
584
- // path since it is used by client libraries.
585
- static VALUE
586
- parse_serialize_file_metadata(VALUE self, VALUE filepath, VALUE metadata) {
587
- pm_string_t input;
588
- pm_buffer_t buffer;
589
- pm_buffer_init(&buffer);
590
-
591
- const char *checked = check_string(filepath);
592
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
593
-
594
- pm_parse_serialize(pm_string_source(&input), pm_string_length(&input), &buffer, check_string(metadata));
595
- VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
596
-
597
- pm_string_free(&input);
598
- pm_buffer_free(&buffer);
599
- return result;
600
- }
601
-
602
- // Inspect the AST that represents the given source using the prism pretty print
603
- // as opposed to the Ruby implementation.
881
+ /**
882
+ * call-seq:
883
+ * Debug::inspect_node(source) -> inspected
884
+ *
885
+ * Inspect the AST that represents the given source using the prism pretty print
886
+ * as opposed to the Ruby implementation.
887
+ */
604
888
  static VALUE
605
889
  inspect_node(VALUE self, VALUE source) {
606
890
  pm_string_t input;
@@ -628,6 +912,9 @@ inspect_node(VALUE self, VALUE source) {
628
912
  /* Initialization of the extension */
629
913
  /******************************************************************************/
630
914
 
915
+ /**
916
+ * The init function that Ruby calls when loading this extension.
917
+ */
631
918
  RUBY_FUNC_EXPORTED void
632
919
  Init_prism(void) {
633
920
  // Make sure that the prism library version matches the expected version.
@@ -649,27 +936,46 @@ Init_prism(void) {
649
936
  rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
650
937
  rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
651
938
  rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
939
+ rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment);
940
+ rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment);
941
+ rb_cPrismDATAComment = rb_define_class_under(rb_cPrism, "DATAComment", rb_cPrismComment);
652
942
  rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
653
943
  rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
654
944
  rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
655
945
  rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
656
946
 
657
- // Define the version string here so that we can use the constants defined
658
- // in prism.h.
947
+ // Intern all of the options that we support so that we don't have to do it
948
+ // every time we parse.
949
+ rb_option_id_filepath = rb_intern_const("filepath");
950
+ rb_option_id_encoding = rb_intern_const("encoding");
951
+ rb_option_id_line = rb_intern_const("line");
952
+ rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
953
+ rb_option_id_verbose = rb_intern_const("verbose");
954
+ rb_option_id_scopes = rb_intern_const("scopes");
955
+
956
+ /**
957
+ * The version of the prism library.
958
+ */
659
959
  rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
660
- rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CExtension")));
960
+
961
+ /**
962
+ * The backend of the parser that prism is using to parse Ruby code. This
963
+ * can be either :CEXT or :FFI. On runtimes that support C extensions, we
964
+ * default to :CEXT. Otherwise we use :FFI.
965
+ */
966
+ rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CEXT")));
661
967
 
662
968
  // First, the functions that have to do with lexing and parsing.
663
969
  rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
664
- rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, 1);
970
+ rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
665
971
  rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
666
- rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, 1);
972
+ rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
667
973
  rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
668
- rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, 1);
974
+ rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
669
975
  rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
670
- rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, 1);
976
+ rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
671
977
  rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
672
- rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, 1);
978
+ rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1);
673
979
 
674
980
  // Next, the functions that will be called by the parser to perform various
675
981
  // internal tasks. We expose these to make them easier to test.
@@ -677,7 +983,6 @@ Init_prism(void) {
677
983
  rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
678
984
  rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
679
985
  rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
680
- rb_define_singleton_method(rb_cPrismDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
681
986
  rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
682
987
 
683
988
  // Next, initialize the other APIs.