prism 0.16.0 → 0.17.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +23 -1
  3. data/Makefile +6 -0
  4. data/README.md +1 -1
  5. data/config.yml +50 -35
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/releasing.md +4 -1
  8. data/docs/serialization.md +28 -29
  9. data/ext/prism/api_node.c +802 -770
  10. data/ext/prism/api_pack.c +20 -9
  11. data/ext/prism/extension.c +465 -160
  12. data/ext/prism/extension.h +1 -1
  13. data/include/prism/ast.h +3173 -763
  14. data/include/prism/defines.h +32 -9
  15. data/include/prism/diagnostic.h +36 -3
  16. data/include/prism/enc/pm_encoding.h +118 -28
  17. data/include/prism/node.h +38 -13
  18. data/include/prism/options.h +204 -0
  19. data/include/prism/pack.h +44 -33
  20. data/include/prism/parser.h +445 -200
  21. data/include/prism/prettyprint.h +12 -1
  22. data/include/prism/regexp.h +16 -2
  23. data/include/prism/util/pm_buffer.h +94 -16
  24. data/include/prism/util/pm_char.h +162 -48
  25. data/include/prism/util/pm_constant_pool.h +126 -32
  26. data/include/prism/util/pm_list.h +68 -38
  27. data/include/prism/util/pm_memchr.h +18 -3
  28. data/include/prism/util/pm_newline_list.h +70 -27
  29. data/include/prism/util/pm_state_stack.h +25 -7
  30. data/include/prism/util/pm_string.h +115 -27
  31. data/include/prism/util/pm_string_list.h +25 -6
  32. data/include/prism/util/pm_strncasecmp.h +32 -0
  33. data/include/prism/util/pm_strpbrk.h +31 -17
  34. data/include/prism/version.h +28 -3
  35. data/include/prism.h +224 -31
  36. data/lib/prism/compiler.rb +6 -3
  37. data/lib/prism/debug.rb +23 -7
  38. data/lib/prism/dispatcher.rb +33 -18
  39. data/lib/prism/dsl.rb +10 -5
  40. data/lib/prism/ffi.rb +132 -80
  41. data/lib/prism/lex_compat.rb +25 -15
  42. data/lib/prism/mutation_compiler.rb +10 -5
  43. data/lib/prism/node.rb +370 -135
  44. data/lib/prism/node_ext.rb +1 -1
  45. data/lib/prism/node_inspector.rb +1 -1
  46. data/lib/prism/pack.rb +79 -40
  47. data/lib/prism/parse_result/comments.rb +7 -2
  48. data/lib/prism/parse_result/newlines.rb +4 -0
  49. data/lib/prism/parse_result.rb +150 -30
  50. data/lib/prism/pattern.rb +11 -0
  51. data/lib/prism/ripper_compat.rb +28 -10
  52. data/lib/prism/serialize.rb +87 -55
  53. data/lib/prism/visitor.rb +10 -3
  54. data/lib/prism.rb +20 -2
  55. data/prism.gemspec +4 -2
  56. data/rbi/prism.rbi +5545 -5505
  57. data/rbi/prism_static.rbi +141 -131
  58. data/sig/prism.rbs +72 -43
  59. data/sig/prism_static.rbs +14 -1
  60. data/src/diagnostic.c +56 -53
  61. data/src/enc/pm_big5.c +1 -0
  62. data/src/enc/pm_euc_jp.c +1 -0
  63. data/src/enc/pm_gbk.c +1 -0
  64. data/src/enc/pm_shift_jis.c +1 -0
  65. data/src/enc/pm_tables.c +316 -80
  66. data/src/enc/pm_unicode.c +53 -8
  67. data/src/enc/pm_windows_31j.c +1 -0
  68. data/src/node.c +334 -321
  69. data/src/options.c +170 -0
  70. data/src/prettyprint.c +74 -47
  71. data/src/prism.c +1642 -856
  72. data/src/regexp.c +151 -95
  73. data/src/serialize.c +44 -20
  74. data/src/token_type.c +3 -1
  75. data/src/util/pm_buffer.c +45 -15
  76. data/src/util/pm_char.c +103 -57
  77. data/src/util/pm_constant_pool.c +51 -21
  78. data/src/util/pm_list.c +12 -4
  79. data/src/util/pm_memchr.c +5 -3
  80. data/src/util/pm_newline_list.c +20 -12
  81. data/src/util/pm_state_stack.c +9 -3
  82. data/src/util/pm_string.c +95 -85
  83. data/src/util/pm_string_list.c +14 -15
  84. data/src/util/pm_strncasecmp.c +10 -3
  85. data/src/util/pm_strpbrk.c +25 -19
  86. metadata +5 -3
  87. data/docs/prism.png +0 -0
@@ -1,7 +1,7 @@
1
1
  #include "prism/extension.h"
2
2
 
3
- // NOTE: this file should contain only bindings.
4
- // All non-trivial logic should be in librubyparser so it can be shared its the various callers.
3
+ // NOTE: this file should contain only bindings. All non-trivial logic should be
4
+ // in librubyparser so it can be shared its the various callers.
5
5
 
6
6
  VALUE rb_cPrism;
7
7
  VALUE rb_cPrismNode;
@@ -10,18 +10,30 @@ VALUE rb_cPrismToken;
10
10
  VALUE rb_cPrismLocation;
11
11
 
12
12
  VALUE rb_cPrismComment;
13
+ VALUE rb_cPrismInlineComment;
14
+ VALUE rb_cPrismEmbDocComment;
15
+ VALUE rb_cPrismDATAComment;
13
16
  VALUE rb_cPrismMagicComment;
14
17
  VALUE rb_cPrismParseError;
15
18
  VALUE rb_cPrismParseWarning;
16
19
  VALUE rb_cPrismParseResult;
17
20
 
21
+ ID rb_option_id_filepath;
22
+ ID rb_option_id_encoding;
23
+ ID rb_option_id_line;
24
+ ID rb_option_id_frozen_string_literal;
25
+ ID rb_option_id_verbose;
26
+ ID rb_option_id_scopes;
27
+
18
28
  /******************************************************************************/
19
29
  /* IO of Ruby code */
20
30
  /******************************************************************************/
21
31
 
22
- // Check if the given VALUE is a string. If it's nil, then return NULL. If it's
23
- // not a string, then raise a type error. Otherwise return the VALUE as a C
24
- // string.
32
+ /**
33
+ * Check if the given VALUE is a string. If it's nil, then return NULL. If it's
34
+ * not a string, then raise a type error. Otherwise return the VALUE as a C
35
+ * string.
36
+ */
25
37
  static const char *
26
38
  check_string(VALUE value) {
27
39
  // If the value is nil, then we don't need to do anything.
@@ -38,7 +50,9 @@ check_string(VALUE value) {
38
50
  return RSTRING_PTR(value);
39
51
  }
40
52
 
41
- // Load the contents and size of the given string into the given pm_string_t.
53
+ /**
54
+ * Load the contents and size of the given string into the given pm_string_t.
55
+ */
42
56
  static void
43
57
  input_load_string(pm_string_t *input, VALUE string) {
44
58
  // Check if the string is a string. If it's not, then raise a type error.
@@ -49,20 +63,181 @@ input_load_string(pm_string_t *input, VALUE string) {
49
63
  pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
50
64
  }
51
65
 
66
+ /******************************************************************************/
67
+ /* Building C options from Ruby options */
68
+ /******************************************************************************/
69
+
70
+ /**
71
+ * Build the scopes associated with the provided Ruby keyword value.
72
+ */
73
+ static void
74
+ build_options_scopes(pm_options_t *options, VALUE scopes) {
75
+ // Check if the value is an array. If it's not, then raise a type error.
76
+ if (!RB_TYPE_P(scopes, T_ARRAY)) {
77
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scopes));
78
+ }
79
+
80
+ // Initialize the scopes array.
81
+ size_t scopes_count = RARRAY_LEN(scopes);
82
+ pm_options_scopes_init(options, scopes_count);
83
+
84
+ // Iterate over the scopes and add them to the options.
85
+ for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
86
+ VALUE scope = rb_ary_entry(scopes, scope_index);
87
+
88
+ // Check that the scope is an array. If it's not, then raise a type
89
+ // error.
90
+ if (!RB_TYPE_P(scope, T_ARRAY)) {
91
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scope));
92
+ }
93
+
94
+ // Initialize the scope array.
95
+ size_t locals_count = RARRAY_LEN(scope);
96
+ pm_options_scope_t *options_scope = &options->scopes[scope_index];
97
+ pm_options_scope_init(options_scope, locals_count);
98
+
99
+ // Iterate over the locals and add them to the scope.
100
+ for (size_t local_index = 0; local_index < locals_count; local_index++) {
101
+ VALUE local = rb_ary_entry(scope, local_index);
102
+
103
+ // Check that the local is a symbol. If it's not, then raise a
104
+ // type error.
105
+ if (!RB_TYPE_P(local, T_SYMBOL)) {
106
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(local));
107
+ }
108
+
109
+ // Add the local to the scope.
110
+ pm_string_t *scope_local = &options_scope->locals[local_index];
111
+ const char *name = rb_id2name(SYM2ID(local));
112
+ pm_string_constant_init(scope_local, name, strlen(name));
113
+ }
114
+ }
115
+ }
116
+
117
+ /**
118
+ * An iterator function that is called for each key-value in the keywords hash.
119
+ */
120
+ static int
121
+ build_options_i(VALUE key, VALUE value, VALUE argument) {
122
+ pm_options_t *options = (pm_options_t *) argument;
123
+ ID key_id = SYM2ID(key);
124
+
125
+ if (key_id == rb_option_id_filepath) {
126
+ if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
127
+ } else if (key_id == rb_option_id_encoding) {
128
+ if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
129
+ } else if (key_id == rb_option_id_line) {
130
+ if (!NIL_P(value)) pm_options_line_set(options, NUM2UINT(value));
131
+ } else if (key_id == rb_option_id_frozen_string_literal) {
132
+ if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
133
+ } else if (key_id == rb_option_id_verbose) {
134
+ pm_options_suppress_warnings_set(options, value != Qtrue);
135
+ } else if (key_id == rb_option_id_scopes) {
136
+ if (!NIL_P(value)) build_options_scopes(options, value);
137
+ } else {
138
+ rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key);
139
+ }
140
+
141
+ return ST_CONTINUE;
142
+ }
143
+
144
+ /**
145
+ * We need a struct here to pass through rb_protect and it has to be a single
146
+ * value. Because the sizeof(VALUE) == sizeof(void *), we're going to pass this
147
+ * through as an opaque pointer and cast it on both sides.
148
+ */
149
+ struct build_options_data {
150
+ pm_options_t *options;
151
+ VALUE keywords;
152
+ };
153
+
154
+ /**
155
+ * Build the set of options from the given keywords. Note that this can raise a
156
+ * Ruby error if the options are not valid.
157
+ */
158
+ static VALUE
159
+ build_options(VALUE argument) {
160
+ struct build_options_data *data = (struct build_options_data *) argument;
161
+ rb_hash_foreach(data->keywords, build_options_i, (VALUE) data->options);
162
+ return Qnil;
163
+ }
164
+
165
+ /**
166
+ * Extract the options from the given keyword arguments.
167
+ */
168
+ static void
169
+ extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
170
+ if (!NIL_P(keywords)) {
171
+ struct build_options_data data = { .options = options, .keywords = keywords };
172
+ struct build_options_data *argument = &data;
173
+
174
+ int state = 0;
175
+ rb_protect(build_options, (VALUE) argument, &state);
176
+
177
+ if (state != 0) {
178
+ pm_options_free(options);
179
+ rb_jump_tag(state);
180
+ }
181
+ }
182
+
183
+ if (!NIL_P(filepath)) {
184
+ if (!RB_TYPE_P(filepath, T_STRING)) {
185
+ pm_options_free(options);
186
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
187
+ }
188
+
189
+ pm_options_filepath_set(options, RSTRING_PTR(filepath));
190
+ }
191
+ }
192
+
193
+ /**
194
+ * Read options for methods that look like (source, **options).
195
+ */
196
+ static void
197
+ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
198
+ VALUE string;
199
+ VALUE keywords;
200
+ rb_scan_args(argc, argv, "1:", &string, &keywords);
201
+
202
+ extract_options(options, Qnil, keywords);
203
+ input_load_string(input, string);
204
+ }
205
+
206
+ /**
207
+ * Read options for methods that look like (filepath, **options).
208
+ */
209
+ static bool
210
+ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
211
+ VALUE filepath;
212
+ VALUE keywords;
213
+ rb_scan_args(argc, argv, "1:", &filepath, &keywords);
214
+
215
+ extract_options(options, filepath, keywords);
216
+
217
+ if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) {
218
+ pm_options_free(options);
219
+ return false;
220
+ }
221
+
222
+ return true;
223
+ }
224
+
52
225
  /******************************************************************************/
53
226
  /* Serializing the AST */
54
227
  /******************************************************************************/
55
228
 
56
- // Dump the AST corresponding to the given input to a string.
229
+ /**
230
+ * Dump the AST corresponding to the given input to a string.
231
+ */
57
232
  static VALUE
58
- dump_input(pm_string_t *input, const char *filepath) {
233
+ dump_input(pm_string_t *input, const pm_options_t *options) {
59
234
  pm_buffer_t buffer;
60
235
  if (!pm_buffer_init(&buffer)) {
61
236
  rb_raise(rb_eNoMemError, "failed to allocate memory");
62
237
  }
63
238
 
64
239
  pm_parser_t parser;
65
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
240
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
66
241
 
67
242
  pm_node_t *node = pm_parse(&parser);
68
243
  pm_serialize(&parser, node, &buffer);
@@ -75,15 +250,18 @@ dump_input(pm_string_t *input, const char *filepath) {
75
250
  return result;
76
251
  }
77
252
 
78
- // Dump the AST corresponding to the given string to a string.
253
+ /**
254
+ * call-seq:
255
+ * Prism::dump(source, **options) -> String
256
+ *
257
+ * Dump the AST corresponding to the given string to a string. For supported
258
+ * options, see Prism::parse.
259
+ */
79
260
  static VALUE
80
261
  dump(int argc, VALUE *argv, VALUE self) {
81
- VALUE string;
82
- VALUE filepath;
83
- rb_scan_args(argc, argv, "11", &string, &filepath);
84
-
85
262
  pm_string_t input;
86
- input_load_string(&input, string);
263
+ pm_options_t options = { 0 };
264
+ string_options(argc, argv, &input, &options);
87
265
 
88
266
  #ifdef PRISM_DEBUG_MODE_BUILD
89
267
  size_t length = pm_string_length(&input);
@@ -92,25 +270,34 @@ dump(int argc, VALUE *argv, VALUE self) {
92
270
  pm_string_constant_init(&input, dup, length);
93
271
  #endif
94
272
 
95
- VALUE value = dump_input(&input, check_string(filepath));
273
+ VALUE value = dump_input(&input, &options);
96
274
 
97
275
  #ifdef PRISM_DEBUG_MODE_BUILD
98
276
  free(dup);
99
277
  #endif
100
278
 
279
+ pm_string_free(&input);
280
+ pm_options_free(&options);
281
+
101
282
  return value;
102
283
  }
103
284
 
104
- // Dump the AST corresponding to the given file to a string.
285
+ /**
286
+ * call-seq:
287
+ * Prism::dump_file(filepath, **options) -> String
288
+ *
289
+ * Dump the AST corresponding to the given file to a string. For supported
290
+ * options, see Prism::parse.
291
+ */
105
292
  static VALUE
106
- dump_file(VALUE self, VALUE filepath) {
293
+ dump_file(int argc, VALUE *argv, VALUE self) {
107
294
  pm_string_t input;
295
+ pm_options_t options = { 0 };
296
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
108
297
 
109
- const char *checked = check_string(filepath);
110
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
111
-
112
- VALUE value = dump_input(&input, checked);
298
+ VALUE value = dump_input(&input, &options);
113
299
  pm_string_free(&input);
300
+ pm_options_free(&options);
114
301
 
115
302
  return value;
116
303
  }
@@ -119,7 +306,9 @@ dump_file(VALUE self, VALUE filepath) {
119
306
  /* Extracting values for the parse result */
120
307
  /******************************************************************************/
121
308
 
122
- // Extract the comments out of the parser into an array.
309
+ /**
310
+ * Extract the comments out of the parser into an array.
311
+ */
123
312
  static VALUE
124
313
  parser_comments(pm_parser_t *parser, VALUE source) {
125
314
  VALUE comments = rb_ary_new();
@@ -134,27 +323,29 @@ parser_comments(pm_parser_t *parser, VALUE source) {
134
323
  VALUE type;
135
324
  switch (comment->type) {
136
325
  case PM_COMMENT_INLINE:
137
- type = ID2SYM(rb_intern("inline"));
326
+ type = rb_cPrismInlineComment;
138
327
  break;
139
328
  case PM_COMMENT_EMBDOC:
140
- type = ID2SYM(rb_intern("embdoc"));
329
+ type = rb_cPrismEmbDocComment;
141
330
  break;
142
331
  case PM_COMMENT___END__:
143
- type = ID2SYM(rb_intern("__END__"));
332
+ type = rb_cPrismDATAComment;
144
333
  break;
145
334
  default:
146
- type = ID2SYM(rb_intern("inline"));
335
+ type = rb_cPrismInlineComment;
147
336
  break;
148
337
  }
149
338
 
150
- VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
151
- rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cPrismComment));
339
+ VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
340
+ rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
152
341
  }
153
342
 
154
343
  return comments;
155
344
  }
156
345
 
157
- // Extract the magic comments out of the parser into an array.
346
+ /**
347
+ * Extract the magic comments out of the parser into an array.
348
+ */
158
349
  static VALUE
159
350
  parser_magic_comments(pm_parser_t *parser, VALUE source) {
160
351
  VALUE magic_comments = rb_ary_new();
@@ -183,7 +374,9 @@ parser_magic_comments(pm_parser_t *parser, VALUE source) {
183
374
  return magic_comments;
184
375
  }
185
376
 
186
- // Extract the errors out of the parser into an array.
377
+ /**
378
+ * Extract the errors out of the parser into an array.
379
+ */
187
380
  static VALUE
188
381
  parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
189
382
  VALUE errors = rb_ary_new();
@@ -207,7 +400,9 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
207
400
  return errors;
208
401
  }
209
402
 
210
- // Extract the warnings out of the parser into an array.
403
+ /**
404
+ * Extract the warnings out of the parser into an array.
405
+ */
211
406
  static VALUE
212
407
  parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
213
408
  VALUE warnings = rb_ary_new();
@@ -235,18 +430,22 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
235
430
  /* Lexing Ruby code */
236
431
  /******************************************************************************/
237
432
 
238
- // This struct gets stored in the parser and passed in to the lex callback any
239
- // time a new token is found. We use it to store the necessary information to
240
- // initialize a Token instance.
433
+ /**
434
+ * This struct gets stored in the parser and passed in to the lex callback any
435
+ * time a new token is found. We use it to store the necessary information to
436
+ * initialize a Token instance.
437
+ */
241
438
  typedef struct {
242
439
  VALUE source;
243
440
  VALUE tokens;
244
441
  rb_encoding *encoding;
245
442
  } parse_lex_data_t;
246
443
 
247
- // This is passed as a callback to the parser. It gets called every time a new
248
- // token is found. Once found, we initialize a new instance of Token and push it
249
- // onto the tokens array.
444
+ /**
445
+ * This is passed as a callback to the parser. It gets called every time a new
446
+ * token is found. Once found, we initialize a new instance of Token and push it
447
+ * onto the tokens array.
448
+ */
250
449
  static void
251
450
  parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
252
451
  parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
@@ -258,9 +457,11 @@ parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
258
457
  rb_ary_push(parse_lex_data->tokens, yields);
259
458
  }
260
459
 
261
- // This is called whenever the encoding changes based on the magic comment at
262
- // the top of the file. We use it to update the encoding that we are using to
263
- // create tokens.
460
+ /**
461
+ * This is called whenever the encoding changes based on the magic comment at
462
+ * the top of the file. We use it to update the encoding that we are using to
463
+ * create tokens.
464
+ */
264
465
  static void
265
466
  parse_lex_encoding_changed_callback(pm_parser_t *parser) {
266
467
  parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
@@ -281,17 +482,19 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) {
281
482
  }
282
483
  }
283
484
 
284
- // Parse the given input and return a ParseResult containing just the tokens or
285
- // the nodes and tokens.
485
+ /**
486
+ * Parse the given input and return a ParseResult containing just the tokens or
487
+ * the nodes and tokens.
488
+ */
286
489
  static VALUE
287
- parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
490
+ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) {
288
491
  pm_parser_t parser;
289
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
492
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
290
493
  pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
291
494
 
292
495
  VALUE offsets = rb_ary_new();
293
- VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), offsets };
294
- VALUE source = rb_class_new_instance(2, source_argv, rb_cPrismSource);
496
+ VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
497
+ VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
295
498
 
296
499
  parse_lex_data_t parse_lex_data = {
297
500
  .source = source,
@@ -338,29 +541,42 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
338
541
  return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
339
542
  }
340
543
 
341
- // Return an array of tokens corresponding to the given string.
544
+ /**
545
+ * call-seq:
546
+ * Prism::lex(source, **options) -> Array
547
+ *
548
+ * Return an array of Token instances corresponding to the given string. For
549
+ * supported options, see Prism::parse.
550
+ */
342
551
  static VALUE
343
552
  lex(int argc, VALUE *argv, VALUE self) {
344
- VALUE string;
345
- VALUE filepath;
346
- rb_scan_args(argc, argv, "11", &string, &filepath);
347
-
348
553
  pm_string_t input;
349
- input_load_string(&input, string);
554
+ pm_options_t options = { 0 };
555
+ string_options(argc, argv, &input, &options);
556
+
557
+ VALUE result = parse_lex_input(&input, &options, false);
558
+ pm_string_free(&input);
559
+ pm_options_free(&options);
350
560
 
351
- return parse_lex_input(&input, check_string(filepath), false);
561
+ return result;
352
562
  }
353
563
 
354
- // Return an array of tokens corresponding to the given file.
564
+ /**
565
+ * call-seq:
566
+ * Prism::lex_file(filepath, **options) -> Array
567
+ *
568
+ * Return an array of Token instances corresponding to the given file. For
569
+ * supported options, see Prism::parse.
570
+ */
355
571
  static VALUE
356
- lex_file(VALUE self, VALUE filepath) {
572
+ lex_file(int argc, VALUE *argv, VALUE self) {
357
573
  pm_string_t input;
574
+ pm_options_t options = { 0 };
575
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
358
576
 
359
- const char *checked = check_string(filepath);
360
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
361
-
362
- VALUE value = parse_lex_input(&input, checked, false);
577
+ VALUE value = parse_lex_input(&input, &options, false);
363
578
  pm_string_free(&input);
579
+ pm_options_free(&options);
364
580
 
365
581
  return value;
366
582
  }
@@ -369,11 +585,13 @@ lex_file(VALUE self, VALUE filepath) {
369
585
  /* Parsing Ruby code */
370
586
  /******************************************************************************/
371
587
 
372
- // Parse the given input and return a ParseResult instance.
588
+ /**
589
+ * Parse the given input and return a ParseResult instance.
590
+ */
373
591
  static VALUE
374
- parse_input(pm_string_t *input, const char *filepath) {
592
+ parse_input(pm_string_t *input, const pm_options_t *options) {
375
593
  pm_parser_t parser;
376
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
594
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
377
595
 
378
596
  pm_node_t *node = pm_parse(&parser);
379
597
  rb_encoding *encoding = rb_enc_find(parser.encoding.name);
@@ -396,33 +614,31 @@ parse_input(pm_string_t *input, const char *filepath) {
396
614
  return result;
397
615
  }
398
616
 
399
- // Parse the given input and return an array of Comment objects.
400
- static VALUE
401
- parse_input_comments(pm_string_t *input, const char *filepath) {
402
- pm_parser_t parser;
403
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
404
-
405
- pm_node_t *node = pm_parse(&parser);
406
- rb_encoding *encoding = rb_enc_find(parser.encoding.name);
407
-
408
- VALUE source = pm_source_new(&parser, encoding);
409
- VALUE comments = parser_comments(&parser, source);
410
-
411
- pm_node_destroy(&parser, node);
412
- pm_parser_free(&parser);
413
-
414
- return comments;
415
- }
416
-
417
- // Parse the given string and return a ParseResult instance.
617
+ /**
618
+ * call-seq:
619
+ * Prism::parse(source, **options) -> ParseResult
620
+ *
621
+ * Parse the given string and return a ParseResult instance. The options that
622
+ * are supported are:
623
+ *
624
+ * * `filepath` - the filepath of the source being parsed. This should be a
625
+ * string or nil
626
+ * * `encoding` - the encoding of the source being parsed. This should be an
627
+ * encoding or nil
628
+ * * `line` - the line number that the parse starts on. This should be an
629
+ * integer or nil. Note that this is 1-indexed.
630
+ * * `frozen_string_literal` - whether or not the frozen string literal pragma
631
+ * has been set. This should be a boolean or nil.
632
+ * * `verbose` - the current level of verbosity. This controls whether or not
633
+ * the parser emits warnings. This should be a boolean or nil.
634
+ * * `scopes` - the locals that are in scope surrounding the code that is being
635
+ * parsed. This should be an array of arrays of symbols or nil.
636
+ */
418
637
  static VALUE
419
638
  parse(int argc, VALUE *argv, VALUE self) {
420
- VALUE string;
421
- VALUE filepath;
422
- rb_scan_args(argc, argv, "11", &string, &filepath);
423
-
424
639
  pm_string_t input;
425
- input_load_string(&input, string);
640
+ pm_options_t options = { 0 };
641
+ string_options(argc, argv, &input, &options);
426
642
 
427
643
  #ifdef PRISM_DEBUG_MODE_BUILD
428
644
  size_t length = pm_string_length(&input);
@@ -431,82 +647,147 @@ parse(int argc, VALUE *argv, VALUE self) {
431
647
  pm_string_constant_init(&input, dup, length);
432
648
  #endif
433
649
 
434
- VALUE value = parse_input(&input, check_string(filepath));
650
+ VALUE value = parse_input(&input, &options);
435
651
 
436
652
  #ifdef PRISM_DEBUG_MODE_BUILD
437
653
  free(dup);
438
654
  #endif
439
655
 
656
+ pm_string_free(&input);
657
+ pm_options_free(&options);
440
658
  return value;
441
659
  }
442
660
 
443
- // Parse the given file and return a ParseResult instance.
661
+ /**
662
+ * call-seq:
663
+ * Prism::parse_file(filepath, **options) -> ParseResult
664
+ *
665
+ * Parse the given file and return a ParseResult instance. For supported
666
+ * options, see Prism::parse.
667
+ */
444
668
  static VALUE
445
- parse_file(VALUE self, VALUE filepath) {
669
+ parse_file(int argc, VALUE *argv, VALUE self) {
446
670
  pm_string_t input;
671
+ pm_options_t options = { 0 };
672
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
447
673
 
448
- const char *checked = check_string(filepath);
449
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
450
-
451
- VALUE value = parse_input(&input, checked);
674
+ VALUE value = parse_input(&input, &options);
452
675
  pm_string_free(&input);
676
+ pm_options_free(&options);
453
677
 
454
678
  return value;
455
679
  }
456
680
 
457
- // Parse the given string and return an array of Comment objects.
681
+ /**
682
+ * Parse the given input and return an array of Comment objects.
683
+ */
458
684
  static VALUE
459
- parse_comments(int argc, VALUE *argv, VALUE self) {
460
- VALUE string;
461
- VALUE filepath;
462
- rb_scan_args(argc, argv, "11", &string, &filepath);
685
+ parse_input_comments(pm_string_t *input, const pm_options_t *options) {
686
+ pm_parser_t parser;
687
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
463
688
 
464
- pm_string_t input;
465
- input_load_string(&input, string);
689
+ pm_node_t *node = pm_parse(&parser);
690
+ rb_encoding *encoding = rb_enc_find(parser.encoding.name);
691
+
692
+ VALUE source = pm_source_new(&parser, encoding);
693
+ VALUE comments = parser_comments(&parser, source);
466
694
 
467
- return parse_input_comments(&input, check_string(filepath));
695
+ pm_node_destroy(&parser, node);
696
+ pm_parser_free(&parser);
697
+
698
+ return comments;
468
699
  }
469
700
 
470
- // Parse the given file and return an array of Comment objects.
701
+ /**
702
+ * call-seq:
703
+ * Prism::parse_comments(source, **options) -> Array
704
+ *
705
+ * Parse the given string and return an array of Comment objects. For supported
706
+ * options, see Prism::parse.
707
+ */
471
708
  static VALUE
472
- parse_file_comments(VALUE self, VALUE filepath) {
709
+ parse_comments(int argc, VALUE *argv, VALUE self) {
473
710
  pm_string_t input;
711
+ pm_options_t options = { 0 };
712
+ string_options(argc, argv, &input, &options);
474
713
 
475
- const char *checked = check_string(filepath);
476
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
714
+ VALUE result = parse_input_comments(&input, &options);
715
+ pm_string_free(&input);
716
+ pm_options_free(&options);
477
717
 
478
- VALUE value = parse_input_comments(&input, checked);
718
+ return result;
719
+ }
720
+
721
+ /**
722
+ * call-seq:
723
+ * Prism::parse_file_comments(filepath, **options) -> Array
724
+ *
725
+ * Parse the given file and return an array of Comment objects. For supported
726
+ * options, see Prism::parse.
727
+ */
728
+ static VALUE
729
+ parse_file_comments(int argc, VALUE *argv, VALUE self) {
730
+ pm_string_t input;
731
+ pm_options_t options = { 0 };
732
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
733
+
734
+ VALUE value = parse_input_comments(&input, &options);
479
735
  pm_string_free(&input);
736
+ pm_options_free(&options);
480
737
 
481
738
  return value;
482
739
  }
483
740
 
484
- // Parse the given string and return a ParseResult instance.
741
+ /**
742
+ * call-seq:
743
+ * Prism::parse_lex(source, **options) -> ParseResult
744
+ *
745
+ * Parse the given string and return a ParseResult instance that contains a
746
+ * 2-element array, where the first element is the AST and the second element is
747
+ * an array of Token instances.
748
+ *
749
+ * This API is only meant to be used in the case where you need both the AST and
750
+ * the tokens. If you only need one or the other, use either Prism::parse or
751
+ * Prism::lex.
752
+ *
753
+ * For supported options, see Prism::parse.
754
+ */
485
755
  static VALUE
486
756
  parse_lex(int argc, VALUE *argv, VALUE self) {
487
- VALUE string;
488
- VALUE filepath;
489
- rb_scan_args(argc, argv, "11", &string, &filepath);
490
-
491
757
  pm_string_t input;
492
- input_load_string(&input, string);
758
+ pm_options_t options = { 0 };
759
+ string_options(argc, argv, &input, &options);
493
760
 
494
- VALUE value = parse_lex_input(&input, check_string(filepath), true);
761
+ VALUE value = parse_lex_input(&input, &options, true);
495
762
  pm_string_free(&input);
763
+ pm_options_free(&options);
496
764
 
497
765
  return value;
498
766
  }
499
767
 
500
- // Parse and lex the given file and return a ParseResult instance.
768
+ /**
769
+ * call-seq:
770
+ * Prism::parse_lex_file(filepath, **options) -> ParseResult
771
+ *
772
+ * Parse the given file and return a ParseResult instance that contains a
773
+ * 2-element array, where the first element is the AST and the second element is
774
+ * an array of Token instances.
775
+ *
776
+ * This API is only meant to be used in the case where you need both the AST and
777
+ * the tokens. If you only need one or the other, use either Prism::parse_file
778
+ * or Prism::lex_file.
779
+ *
780
+ * For supported options, see Prism::parse.
781
+ */
501
782
  static VALUE
502
- parse_lex_file(VALUE self, VALUE filepath) {
783
+ parse_lex_file(int argc, VALUE *argv, VALUE self) {
503
784
  pm_string_t input;
785
+ pm_options_t options = { 0 };
786
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
504
787
 
505
- const char *checked = check_string(filepath);
506
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
507
-
508
- VALUE value = parse_lex_input(&input, checked, true);
788
+ VALUE value = parse_lex_input(&input, &options, true);
509
789
  pm_string_free(&input);
790
+ pm_options_free(&options);
510
791
 
511
792
  return value;
512
793
  }
@@ -515,13 +796,17 @@ parse_lex_file(VALUE self, VALUE filepath) {
515
796
  /* Utility functions exposed to make testing easier */
516
797
  /******************************************************************************/
517
798
 
518
- // Returns an array of strings corresponding to the named capture groups in the
519
- // given source string. If prism was unable to parse the regular expression, this
520
- // function returns nil.
799
+ /**
800
+ * call-seq:
801
+ * Debug::named_captures(source) -> Array
802
+ *
803
+ * Returns an array of strings corresponding to the named capture groups in the
804
+ * given source string. If prism was unable to parse the regular expression,
805
+ * this function returns nil.
806
+ */
521
807
  static VALUE
522
808
  named_captures(VALUE self, VALUE source) {
523
- pm_string_list_t string_list;
524
- pm_string_list_init(&string_list);
809
+ pm_string_list_t string_list = { 0 };
525
810
 
526
811
  if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &pm_encoding_utf_8)) {
527
812
  pm_string_list_free(&string_list);
@@ -538,7 +823,12 @@ named_captures(VALUE self, VALUE source) {
538
823
  return names;
539
824
  }
540
825
 
541
- // Return a hash of information about the given source string's memory usage.
826
+ /**
827
+ * call-seq:
828
+ * Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx }
829
+ *
830
+ * Return a hash of information about the given source string's memory usage.
831
+ */
542
832
  static VALUE
543
833
  memsize(VALUE self, VALUE string) {
544
834
  pm_parser_t parser;
@@ -559,8 +849,13 @@ memsize(VALUE self, VALUE string) {
559
849
  return result;
560
850
  }
561
851
 
562
- // Parse the file, but do nothing with the result. This is used to profile the
563
- // parser for memory and speed.
852
+ /**
853
+ * call-seq:
854
+ * Debug::profile_file(filepath) -> nil
855
+ *
856
+ * Parse the file, but do nothing with the result. This is used to profile the
857
+ * parser for memory and speed.
858
+ */
564
859
  static VALUE
565
860
  profile_file(VALUE self, VALUE filepath) {
566
861
  pm_string_t input;
@@ -568,39 +863,28 @@ profile_file(VALUE self, VALUE filepath) {
568
863
  const char *checked = check_string(filepath);
569
864
  if (!pm_string_mapped_init(&input, checked)) return Qnil;
570
865
 
866
+ pm_options_t options = { 0 };
867
+ pm_options_filepath_set(&options, checked);
868
+
571
869
  pm_parser_t parser;
572
- pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), checked);
870
+ pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
573
871
 
574
872
  pm_node_t *node = pm_parse(&parser);
575
873
  pm_node_destroy(&parser, node);
576
874
  pm_parser_free(&parser);
577
-
875
+ pm_options_free(&options);
578
876
  pm_string_free(&input);
579
877
 
580
878
  return Qnil;
581
879
  }
582
880
 
583
- // Parse the file and serialize the result. This is mostly used to test this
584
- // path since it is used by client libraries.
585
- static VALUE
586
- parse_serialize_file_metadata(VALUE self, VALUE filepath, VALUE metadata) {
587
- pm_string_t input;
588
- pm_buffer_t buffer;
589
- pm_buffer_init(&buffer);
590
-
591
- const char *checked = check_string(filepath);
592
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
593
-
594
- pm_parse_serialize(pm_string_source(&input), pm_string_length(&input), &buffer, check_string(metadata));
595
- VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
596
-
597
- pm_string_free(&input);
598
- pm_buffer_free(&buffer);
599
- return result;
600
- }
601
-
602
- // Inspect the AST that represents the given source using the prism pretty print
603
- // as opposed to the Ruby implementation.
881
+ /**
882
+ * call-seq:
883
+ * Debug::inspect_node(source) -> inspected
884
+ *
885
+ * Inspect the AST that represents the given source using the prism pretty print
886
+ * as opposed to the Ruby implementation.
887
+ */
604
888
  static VALUE
605
889
  inspect_node(VALUE self, VALUE source) {
606
890
  pm_string_t input;
@@ -628,6 +912,9 @@ inspect_node(VALUE self, VALUE source) {
628
912
  /* Initialization of the extension */
629
913
  /******************************************************************************/
630
914
 
915
+ /**
916
+ * The init function that Ruby calls when loading this extension.
917
+ */
631
918
  RUBY_FUNC_EXPORTED void
632
919
  Init_prism(void) {
633
920
  // Make sure that the prism library version matches the expected version.
@@ -649,27 +936,46 @@ Init_prism(void) {
649
936
  rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
650
937
  rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
651
938
  rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
939
+ rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment);
940
+ rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment);
941
+ rb_cPrismDATAComment = rb_define_class_under(rb_cPrism, "DATAComment", rb_cPrismComment);
652
942
  rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
653
943
  rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
654
944
  rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
655
945
  rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
656
946
 
657
- // Define the version string here so that we can use the constants defined
658
- // in prism.h.
947
+ // Intern all of the options that we support so that we don't have to do it
948
+ // every time we parse.
949
+ rb_option_id_filepath = rb_intern_const("filepath");
950
+ rb_option_id_encoding = rb_intern_const("encoding");
951
+ rb_option_id_line = rb_intern_const("line");
952
+ rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
953
+ rb_option_id_verbose = rb_intern_const("verbose");
954
+ rb_option_id_scopes = rb_intern_const("scopes");
955
+
956
+ /**
957
+ * The version of the prism library.
958
+ */
659
959
  rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
660
- rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CExtension")));
960
+
961
+ /**
962
+ * The backend of the parser that prism is using to parse Ruby code. This
963
+ * can be either :CEXT or :FFI. On runtimes that support C extensions, we
964
+ * default to :CEXT. Otherwise we use :FFI.
965
+ */
966
+ rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CEXT")));
661
967
 
662
968
  // First, the functions that have to do with lexing and parsing.
663
969
  rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
664
- rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, 1);
970
+ rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
665
971
  rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
666
- rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, 1);
972
+ rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
667
973
  rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
668
- rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, 1);
974
+ rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
669
975
  rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
670
- rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, 1);
976
+ rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
671
977
  rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
672
- rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, 1);
978
+ rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1);
673
979
 
674
980
  // Next, the functions that will be called by the parser to perform various
675
981
  // internal tasks. We expose these to make them easier to test.
@@ -677,7 +983,6 @@ Init_prism(void) {
677
983
  rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
678
984
  rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
679
985
  rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
680
- rb_define_singleton_method(rb_cPrismDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
681
986
  rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
682
987
 
683
988
  // Next, initialize the other APIs.