prism 0.16.0 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -1
  3. data/Makefile +6 -0
  4. data/README.md +1 -1
  5. data/config.yml +50 -35
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/serialization.md +28 -29
  8. data/ext/prism/api_node.c +802 -770
  9. data/ext/prism/api_pack.c +20 -9
  10. data/ext/prism/extension.c +464 -162
  11. data/ext/prism/extension.h +1 -1
  12. data/include/prism/ast.h +3173 -763
  13. data/include/prism/defines.h +32 -9
  14. data/include/prism/diagnostic.h +36 -3
  15. data/include/prism/enc/pm_encoding.h +118 -28
  16. data/include/prism/node.h +38 -13
  17. data/include/prism/options.h +204 -0
  18. data/include/prism/pack.h +44 -33
  19. data/include/prism/parser.h +445 -200
  20. data/include/prism/prettyprint.h +12 -1
  21. data/include/prism/regexp.h +16 -2
  22. data/include/prism/util/pm_buffer.h +94 -16
  23. data/include/prism/util/pm_char.h +162 -48
  24. data/include/prism/util/pm_constant_pool.h +126 -32
  25. data/include/prism/util/pm_list.h +68 -38
  26. data/include/prism/util/pm_memchr.h +18 -3
  27. data/include/prism/util/pm_newline_list.h +70 -27
  28. data/include/prism/util/pm_state_stack.h +25 -7
  29. data/include/prism/util/pm_string.h +115 -27
  30. data/include/prism/util/pm_string_list.h +25 -6
  31. data/include/prism/util/pm_strncasecmp.h +32 -0
  32. data/include/prism/util/pm_strpbrk.h +31 -17
  33. data/include/prism/version.h +27 -2
  34. data/include/prism.h +224 -31
  35. data/lib/prism/compiler.rb +6 -3
  36. data/lib/prism/debug.rb +23 -7
  37. data/lib/prism/dispatcher.rb +33 -18
  38. data/lib/prism/dsl.rb +10 -5
  39. data/lib/prism/ffi.rb +132 -80
  40. data/lib/prism/lex_compat.rb +25 -15
  41. data/lib/prism/mutation_compiler.rb +10 -5
  42. data/lib/prism/node.rb +370 -135
  43. data/lib/prism/node_ext.rb +1 -1
  44. data/lib/prism/node_inspector.rb +1 -1
  45. data/lib/prism/pack.rb +79 -40
  46. data/lib/prism/parse_result/comments.rb +7 -2
  47. data/lib/prism/parse_result/newlines.rb +4 -0
  48. data/lib/prism/parse_result.rb +150 -30
  49. data/lib/prism/pattern.rb +11 -0
  50. data/lib/prism/ripper_compat.rb +28 -10
  51. data/lib/prism/serialize.rb +86 -54
  52. data/lib/prism/visitor.rb +10 -3
  53. data/lib/prism.rb +20 -2
  54. data/prism.gemspec +4 -2
  55. data/rbi/prism.rbi +104 -60
  56. data/rbi/prism_static.rbi +16 -2
  57. data/sig/prism.rbs +72 -43
  58. data/sig/prism_static.rbs +14 -1
  59. data/src/diagnostic.c +56 -53
  60. data/src/enc/pm_big5.c +1 -0
  61. data/src/enc/pm_euc_jp.c +1 -0
  62. data/src/enc/pm_gbk.c +1 -0
  63. data/src/enc/pm_shift_jis.c +1 -0
  64. data/src/enc/pm_tables.c +316 -80
  65. data/src/enc/pm_unicode.c +53 -8
  66. data/src/enc/pm_windows_31j.c +1 -0
  67. data/src/node.c +334 -321
  68. data/src/options.c +170 -0
  69. data/src/prettyprint.c +74 -47
  70. data/src/prism.c +1642 -856
  71. data/src/regexp.c +151 -95
  72. data/src/serialize.c +44 -20
  73. data/src/token_type.c +3 -1
  74. data/src/util/pm_buffer.c +45 -15
  75. data/src/util/pm_char.c +103 -57
  76. data/src/util/pm_constant_pool.c +51 -21
  77. data/src/util/pm_list.c +12 -4
  78. data/src/util/pm_memchr.c +5 -3
  79. data/src/util/pm_newline_list.c +20 -12
  80. data/src/util/pm_state_stack.c +9 -3
  81. data/src/util/pm_string.c +95 -85
  82. data/src/util/pm_string_list.c +14 -15
  83. data/src/util/pm_strncasecmp.c +10 -3
  84. data/src/util/pm_strpbrk.c +25 -19
  85. metadata +5 -3
  86. data/docs/prism.png +0 -0
@@ -1,7 +1,7 @@
1
1
  #include "prism/extension.h"
2
2
 
3
- // NOTE: this file should contain only bindings.
4
- // All non-trivial logic should be in librubyparser so it can be shared its the various callers.
3
+ // NOTE: this file should contain only bindings. All non-trivial logic should be
4
+ // in librubyparser so it can be shared its the various callers.
5
5
 
6
6
  VALUE rb_cPrism;
7
7
  VALUE rb_cPrismNode;
@@ -10,18 +10,30 @@ VALUE rb_cPrismToken;
10
10
  VALUE rb_cPrismLocation;
11
11
 
12
12
  VALUE rb_cPrismComment;
13
+ VALUE rb_cPrismInlineComment;
14
+ VALUE rb_cPrismEmbDocComment;
15
+ VALUE rb_cPrismDATAComment;
13
16
  VALUE rb_cPrismMagicComment;
14
17
  VALUE rb_cPrismParseError;
15
18
  VALUE rb_cPrismParseWarning;
16
19
  VALUE rb_cPrismParseResult;
17
20
 
21
+ ID rb_option_id_filepath;
22
+ ID rb_option_id_encoding;
23
+ ID rb_option_id_line;
24
+ ID rb_option_id_frozen_string_literal;
25
+ ID rb_option_id_verbose;
26
+ ID rb_option_id_scopes;
27
+
18
28
  /******************************************************************************/
19
29
  /* IO of Ruby code */
20
30
  /******************************************************************************/
21
31
 
22
- // Check if the given VALUE is a string. If it's nil, then return NULL. If it's
23
- // not a string, then raise a type error. Otherwise return the VALUE as a C
24
- // string.
32
+ /**
33
+ * Check if the given VALUE is a string. If it's nil, then return NULL. If it's
34
+ * not a string, then raise a type error. Otherwise return the VALUE as a C
35
+ * string.
36
+ */
25
37
  static const char *
26
38
  check_string(VALUE value) {
27
39
  // If the value is nil, then we don't need to do anything.
@@ -38,7 +50,9 @@ check_string(VALUE value) {
38
50
  return RSTRING_PTR(value);
39
51
  }
40
52
 
41
- // Load the contents and size of the given string into the given pm_string_t.
53
+ /**
54
+ * Load the contents and size of the given string into the given pm_string_t.
55
+ */
42
56
  static void
43
57
  input_load_string(pm_string_t *input, VALUE string) {
44
58
  // Check if the string is a string. If it's not, then raise a type error.
@@ -49,20 +63,181 @@ input_load_string(pm_string_t *input, VALUE string) {
49
63
  pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
50
64
  }
51
65
 
66
+ /******************************************************************************/
67
+ /* Building C options from Ruby options */
68
+ /******************************************************************************/
69
+
70
+ /**
71
+ * Build the scopes associated with the provided Ruby keyword value.
72
+ */
73
+ static void
74
+ build_options_scopes(pm_options_t *options, VALUE scopes) {
75
+ // Check if the value is an array. If it's not, then raise a type error.
76
+ if (!RB_TYPE_P(scopes, T_ARRAY)) {
77
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scopes));
78
+ }
79
+
80
+ // Initialize the scopes array.
81
+ size_t scopes_count = RARRAY_LEN(scopes);
82
+ pm_options_scopes_init(options, scopes_count);
83
+
84
+ // Iterate over the scopes and add them to the options.
85
+ for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
86
+ VALUE scope = rb_ary_entry(scopes, scope_index);
87
+
88
+ // Check that the scope is an array. If it's not, then raise a type
89
+ // error.
90
+ if (!RB_TYPE_P(scope, T_ARRAY)) {
91
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scope));
92
+ }
93
+
94
+ // Initialize the scope array.
95
+ size_t locals_count = RARRAY_LEN(scope);
96
+ pm_options_scope_t *options_scope = &options->scopes[scope_index];
97
+ pm_options_scope_init(options_scope, locals_count);
98
+
99
+ // Iterate over the locals and add them to the scope.
100
+ for (size_t local_index = 0; local_index < locals_count; local_index++) {
101
+ VALUE local = rb_ary_entry(scope, local_index);
102
+
103
+ // Check that the local is a symbol. If it's not, then raise a
104
+ // type error.
105
+ if (!RB_TYPE_P(local, T_SYMBOL)) {
106
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(local));
107
+ }
108
+
109
+ // Add the local to the scope.
110
+ pm_string_t *scope_local = &options_scope->locals[local_index];
111
+ const char *name = rb_id2name(SYM2ID(local));
112
+ pm_string_constant_init(scope_local, name, strlen(name));
113
+ }
114
+ }
115
+ }
116
+
117
+ /**
118
+ * An iterator function that is called for each key-value in the keywords hash.
119
+ */
120
+ static int
121
+ build_options_i(VALUE key, VALUE value, VALUE argument) {
122
+ pm_options_t *options = (pm_options_t *) argument;
123
+ ID key_id = SYM2ID(key);
124
+
125
+ if (key_id == rb_option_id_filepath) {
126
+ if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
127
+ } else if (key_id == rb_option_id_encoding) {
128
+ if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
129
+ } else if (key_id == rb_option_id_line) {
130
+ if (!NIL_P(value)) pm_options_line_set(options, NUM2UINT(value));
131
+ } else if (key_id == rb_option_id_frozen_string_literal) {
132
+ if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
133
+ } else if (key_id == rb_option_id_verbose) {
134
+ pm_options_suppress_warnings_set(options, value != Qtrue);
135
+ } else if (key_id == rb_option_id_scopes) {
136
+ if (!NIL_P(value)) build_options_scopes(options, value);
137
+ } else {
138
+ rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key);
139
+ }
140
+
141
+ return ST_CONTINUE;
142
+ }
143
+
144
+ /**
145
+ * We need a struct here to pass through rb_protect and it has to be a single
146
+ * value. Because the sizeof(VALUE) == sizeof(void *), we're going to pass this
147
+ * through as an opaque pointer and cast it on both sides.
148
+ */
149
+ struct build_options_data {
150
+ pm_options_t *options;
151
+ VALUE keywords;
152
+ };
153
+
154
+ /**
155
+ * Build the set of options from the given keywords. Note that this can raise a
156
+ * Ruby error if the options are not valid.
157
+ */
158
+ static VALUE
159
+ build_options(VALUE argument) {
160
+ struct build_options_data *data = (struct build_options_data *) argument;
161
+ rb_hash_foreach(data->keywords, build_options_i, (VALUE) data->options);
162
+ return Qnil;
163
+ }
164
+
165
+ /**
166
+ * Extract the options from the given keyword arguments.
167
+ */
168
+ static void
169
+ extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
170
+ if (!NIL_P(keywords)) {
171
+ struct build_options_data data = { .options = options, .keywords = keywords };
172
+ struct build_options_data *argument = &data;
173
+
174
+ int state = 0;
175
+ rb_protect(build_options, (VALUE) argument, &state);
176
+
177
+ if (state != 0) {
178
+ pm_options_free(options);
179
+ rb_jump_tag(state);
180
+ }
181
+ }
182
+
183
+ if (!NIL_P(filepath)) {
184
+ if (!RB_TYPE_P(filepath, T_STRING)) {
185
+ pm_options_free(options);
186
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
187
+ }
188
+
189
+ pm_options_filepath_set(options, RSTRING_PTR(filepath));
190
+ }
191
+ }
192
+
193
+ /**
194
+ * Read options for methods that look like (source, **options).
195
+ */
196
+ static void
197
+ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
198
+ VALUE string;
199
+ VALUE keywords;
200
+ rb_scan_args(argc, argv, "1:", &string, &keywords);
201
+
202
+ extract_options(options, Qnil, keywords);
203
+ input_load_string(input, string);
204
+ }
205
+
206
+ /**
207
+ * Read options for methods that look like (filepath, **options).
208
+ */
209
+ static bool
210
+ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
211
+ VALUE filepath;
212
+ VALUE keywords;
213
+ rb_scan_args(argc, argv, "1:", &filepath, &keywords);
214
+
215
+ extract_options(options, filepath, keywords);
216
+
217
+ if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) {
218
+ pm_options_free(options);
219
+ return false;
220
+ }
221
+
222
+ return true;
223
+ }
224
+
52
225
  /******************************************************************************/
53
226
  /* Serializing the AST */
54
227
  /******************************************************************************/
55
228
 
56
- // Dump the AST corresponding to the given input to a string.
229
+ /**
230
+ * Dump the AST corresponding to the given input to a string.
231
+ */
57
232
  static VALUE
58
- dump_input(pm_string_t *input, const char *filepath) {
233
+ dump_input(pm_string_t *input, const pm_options_t *options) {
59
234
  pm_buffer_t buffer;
60
235
  if (!pm_buffer_init(&buffer)) {
61
236
  rb_raise(rb_eNoMemError, "failed to allocate memory");
62
237
  }
63
238
 
64
239
  pm_parser_t parser;
65
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
240
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
66
241
 
67
242
  pm_node_t *node = pm_parse(&parser);
68
243
  pm_serialize(&parser, node, &buffer);
@@ -75,15 +250,18 @@ dump_input(pm_string_t *input, const char *filepath) {
75
250
  return result;
76
251
  }
77
252
 
78
- // Dump the AST corresponding to the given string to a string.
253
+ /**
254
+ * call-seq:
255
+ * Prism::dump(source, **options) -> String
256
+ *
257
+ * Dump the AST corresponding to the given string to a string. For supported
258
+ * options, see Prism::parse.
259
+ */
79
260
  static VALUE
80
261
  dump(int argc, VALUE *argv, VALUE self) {
81
- VALUE string;
82
- VALUE filepath;
83
- rb_scan_args(argc, argv, "11", &string, &filepath);
84
-
85
262
  pm_string_t input;
86
- input_load_string(&input, string);
263
+ pm_options_t options = { 0 };
264
+ string_options(argc, argv, &input, &options);
87
265
 
88
266
  #ifdef PRISM_DEBUG_MODE_BUILD
89
267
  size_t length = pm_string_length(&input);
@@ -92,25 +270,34 @@ dump(int argc, VALUE *argv, VALUE self) {
92
270
  pm_string_constant_init(&input, dup, length);
93
271
  #endif
94
272
 
95
- VALUE value = dump_input(&input, check_string(filepath));
273
+ VALUE value = dump_input(&input, &options);
96
274
 
97
275
  #ifdef PRISM_DEBUG_MODE_BUILD
98
276
  free(dup);
99
277
  #endif
100
278
 
279
+ pm_string_free(&input);
280
+ pm_options_free(&options);
281
+
101
282
  return value;
102
283
  }
103
284
 
104
- // Dump the AST corresponding to the given file to a string.
285
+ /**
286
+ * call-seq:
287
+ * Prism::dump_file(filepath, **options) -> String
288
+ *
289
+ * Dump the AST corresponding to the given file to a string. For supported
290
+ * options, see Prism::parse.
291
+ */
105
292
  static VALUE
106
- dump_file(VALUE self, VALUE filepath) {
293
+ dump_file(int argc, VALUE *argv, VALUE self) {
107
294
  pm_string_t input;
295
+ pm_options_t options = { 0 };
296
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
108
297
 
109
- const char *checked = check_string(filepath);
110
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
111
-
112
- VALUE value = dump_input(&input, checked);
298
+ VALUE value = dump_input(&input, &options);
113
299
  pm_string_free(&input);
300
+ pm_options_free(&options);
114
301
 
115
302
  return value;
116
303
  }
@@ -119,7 +306,9 @@ dump_file(VALUE self, VALUE filepath) {
119
306
  /* Extracting values for the parse result */
120
307
  /******************************************************************************/
121
308
 
122
- // Extract the comments out of the parser into an array.
309
+ /**
310
+ * Extract the comments out of the parser into an array.
311
+ */
123
312
  static VALUE
124
313
  parser_comments(pm_parser_t *parser, VALUE source) {
125
314
  VALUE comments = rb_ary_new();
@@ -134,27 +323,26 @@ parser_comments(pm_parser_t *parser, VALUE source) {
134
323
  VALUE type;
135
324
  switch (comment->type) {
136
325
  case PM_COMMENT_INLINE:
137
- type = ID2SYM(rb_intern("inline"));
326
+ type = rb_cPrismInlineComment;
138
327
  break;
139
328
  case PM_COMMENT_EMBDOC:
140
- type = ID2SYM(rb_intern("embdoc"));
329
+ type = rb_cPrismEmbDocComment;
141
330
  break;
142
331
  case PM_COMMENT___END__:
143
- type = ID2SYM(rb_intern("__END__"));
144
- break;
145
- default:
146
- type = ID2SYM(rb_intern("inline"));
332
+ type = rb_cPrismDATAComment;
147
333
  break;
148
334
  }
149
335
 
150
- VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
151
- rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cPrismComment));
336
+ VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
337
+ rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
152
338
  }
153
339
 
154
340
  return comments;
155
341
  }
156
342
 
157
- // Extract the magic comments out of the parser into an array.
343
+ /**
344
+ * Extract the magic comments out of the parser into an array.
345
+ */
158
346
  static VALUE
159
347
  parser_magic_comments(pm_parser_t *parser, VALUE source) {
160
348
  VALUE magic_comments = rb_ary_new();
@@ -183,7 +371,9 @@ parser_magic_comments(pm_parser_t *parser, VALUE source) {
183
371
  return magic_comments;
184
372
  }
185
373
 
186
- // Extract the errors out of the parser into an array.
374
+ /**
375
+ * Extract the errors out of the parser into an array.
376
+ */
187
377
  static VALUE
188
378
  parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
189
379
  VALUE errors = rb_ary_new();
@@ -207,7 +397,9 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
207
397
  return errors;
208
398
  }
209
399
 
210
- // Extract the warnings out of the parser into an array.
400
+ /**
401
+ * Extract the warnings out of the parser into an array.
402
+ */
211
403
  static VALUE
212
404
  parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
213
405
  VALUE warnings = rb_ary_new();
@@ -235,18 +427,22 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
235
427
  /* Lexing Ruby code */
236
428
  /******************************************************************************/
237
429
 
238
- // This struct gets stored in the parser and passed in to the lex callback any
239
- // time a new token is found. We use it to store the necessary information to
240
- // initialize a Token instance.
430
+ /**
431
+ * This struct gets stored in the parser and passed in to the lex callback any
432
+ * time a new token is found. We use it to store the necessary information to
433
+ * initialize a Token instance.
434
+ */
241
435
  typedef struct {
242
436
  VALUE source;
243
437
  VALUE tokens;
244
438
  rb_encoding *encoding;
245
439
  } parse_lex_data_t;
246
440
 
247
- // This is passed as a callback to the parser. It gets called every time a new
248
- // token is found. Once found, we initialize a new instance of Token and push it
249
- // onto the tokens array.
441
+ /**
442
+ * This is passed as a callback to the parser. It gets called every time a new
443
+ * token is found. Once found, we initialize a new instance of Token and push it
444
+ * onto the tokens array.
445
+ */
250
446
  static void
251
447
  parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
252
448
  parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
@@ -258,9 +454,11 @@ parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
258
454
  rb_ary_push(parse_lex_data->tokens, yields);
259
455
  }
260
456
 
261
- // This is called whenever the encoding changes based on the magic comment at
262
- // the top of the file. We use it to update the encoding that we are using to
263
- // create tokens.
457
+ /**
458
+ * This is called whenever the encoding changes based on the magic comment at
459
+ * the top of the file. We use it to update the encoding that we are using to
460
+ * create tokens.
461
+ */
264
462
  static void
265
463
  parse_lex_encoding_changed_callback(pm_parser_t *parser) {
266
464
  parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
@@ -281,17 +479,19 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) {
281
479
  }
282
480
  }
283
481
 
284
- // Parse the given input and return a ParseResult containing just the tokens or
285
- // the nodes and tokens.
482
+ /**
483
+ * Parse the given input and return a ParseResult containing just the tokens or
484
+ * the nodes and tokens.
485
+ */
286
486
  static VALUE
287
- parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
487
+ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) {
288
488
  pm_parser_t parser;
289
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
489
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
290
490
  pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
291
491
 
292
492
  VALUE offsets = rb_ary_new();
293
- VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), offsets };
294
- VALUE source = rb_class_new_instance(2, source_argv, rb_cPrismSource);
493
+ VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
494
+ VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
295
495
 
296
496
  parse_lex_data_t parse_lex_data = {
297
497
  .source = source,
@@ -338,29 +538,42 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
338
538
  return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
339
539
  }
340
540
 
341
- // Return an array of tokens corresponding to the given string.
541
+ /**
542
+ * call-seq:
543
+ * Prism::lex(source, **options) -> Array
544
+ *
545
+ * Return an array of Token instances corresponding to the given string. For
546
+ * supported options, see Prism::parse.
547
+ */
342
548
  static VALUE
343
549
  lex(int argc, VALUE *argv, VALUE self) {
344
- VALUE string;
345
- VALUE filepath;
346
- rb_scan_args(argc, argv, "11", &string, &filepath);
347
-
348
550
  pm_string_t input;
349
- input_load_string(&input, string);
551
+ pm_options_t options = { 0 };
552
+ string_options(argc, argv, &input, &options);
350
553
 
351
- return parse_lex_input(&input, check_string(filepath), false);
554
+ VALUE result = parse_lex_input(&input, &options, false);
555
+ pm_string_free(&input);
556
+ pm_options_free(&options);
557
+
558
+ return result;
352
559
  }
353
560
 
354
- // Return an array of tokens corresponding to the given file.
561
+ /**
562
+ * call-seq:
563
+ * Prism::lex_file(filepath, **options) -> Array
564
+ *
565
+ * Return an array of Token instances corresponding to the given file. For
566
+ * supported options, see Prism::parse.
567
+ */
355
568
  static VALUE
356
- lex_file(VALUE self, VALUE filepath) {
569
+ lex_file(int argc, VALUE *argv, VALUE self) {
357
570
  pm_string_t input;
571
+ pm_options_t options = { 0 };
572
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
358
573
 
359
- const char *checked = check_string(filepath);
360
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
361
-
362
- VALUE value = parse_lex_input(&input, checked, false);
574
+ VALUE value = parse_lex_input(&input, &options, false);
363
575
  pm_string_free(&input);
576
+ pm_options_free(&options);
364
577
 
365
578
  return value;
366
579
  }
@@ -369,11 +582,13 @@ lex_file(VALUE self, VALUE filepath) {
369
582
  /* Parsing Ruby code */
370
583
  /******************************************************************************/
371
584
 
372
- // Parse the given input and return a ParseResult instance.
585
+ /**
586
+ * Parse the given input and return a ParseResult instance.
587
+ */
373
588
  static VALUE
374
- parse_input(pm_string_t *input, const char *filepath) {
589
+ parse_input(pm_string_t *input, const pm_options_t *options) {
375
590
  pm_parser_t parser;
376
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
591
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
377
592
 
378
593
  pm_node_t *node = pm_parse(&parser);
379
594
  rb_encoding *encoding = rb_enc_find(parser.encoding.name);
@@ -396,33 +611,31 @@ parse_input(pm_string_t *input, const char *filepath) {
396
611
  return result;
397
612
  }
398
613
 
399
- // Parse the given input and return an array of Comment objects.
400
- static VALUE
401
- parse_input_comments(pm_string_t *input, const char *filepath) {
402
- pm_parser_t parser;
403
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
404
-
405
- pm_node_t *node = pm_parse(&parser);
406
- rb_encoding *encoding = rb_enc_find(parser.encoding.name);
407
-
408
- VALUE source = pm_source_new(&parser, encoding);
409
- VALUE comments = parser_comments(&parser, source);
410
-
411
- pm_node_destroy(&parser, node);
412
- pm_parser_free(&parser);
413
-
414
- return comments;
415
- }
416
-
417
- // Parse the given string and return a ParseResult instance.
614
+ /**
615
+ * call-seq:
616
+ * Prism::parse(source, **options) -> ParseResult
617
+ *
618
+ * Parse the given string and return a ParseResult instance. The options that
619
+ * are supported are:
620
+ *
621
+ * * `filepath` - the filepath of the source being parsed. This should be a
622
+ * string or nil
623
+ * * `encoding` - the encoding of the source being parsed. This should be an
624
+ * encoding or nil
625
+ * * `line` - the line number that the parse starts on. This should be an
626
+ * integer or nil. Note that this is 1-indexed.
627
+ * * `frozen_string_literal` - whether or not the frozen string literal pragma
628
+ * has been set. This should be a boolean or nil.
629
+ * * `verbose` - the current level of verbosity. This controls whether or not
630
+ * the parser emits warnings. This should be a boolean or nil.
631
+ * * `scopes` - the locals that are in scope surrounding the code that is being
632
+ * parsed. This should be an array of arrays of symbols or nil.
633
+ */
418
634
  static VALUE
419
635
  parse(int argc, VALUE *argv, VALUE self) {
420
- VALUE string;
421
- VALUE filepath;
422
- rb_scan_args(argc, argv, "11", &string, &filepath);
423
-
424
636
  pm_string_t input;
425
- input_load_string(&input, string);
637
+ pm_options_t options = { 0 };
638
+ string_options(argc, argv, &input, &options);
426
639
 
427
640
  #ifdef PRISM_DEBUG_MODE_BUILD
428
641
  size_t length = pm_string_length(&input);
@@ -431,82 +644,147 @@ parse(int argc, VALUE *argv, VALUE self) {
431
644
  pm_string_constant_init(&input, dup, length);
432
645
  #endif
433
646
 
434
- VALUE value = parse_input(&input, check_string(filepath));
647
+ VALUE value = parse_input(&input, &options);
435
648
 
436
649
  #ifdef PRISM_DEBUG_MODE_BUILD
437
650
  free(dup);
438
651
  #endif
439
652
 
653
+ pm_string_free(&input);
654
+ pm_options_free(&options);
440
655
  return value;
441
656
  }
442
657
 
443
- // Parse the given file and return a ParseResult instance.
658
+ /**
659
+ * call-seq:
660
+ * Prism::parse_file(filepath, **options) -> ParseResult
661
+ *
662
+ * Parse the given file and return a ParseResult instance. For supported
663
+ * options, see Prism::parse.
664
+ */
444
665
  static VALUE
445
- parse_file(VALUE self, VALUE filepath) {
666
+ parse_file(int argc, VALUE *argv, VALUE self) {
446
667
  pm_string_t input;
668
+ pm_options_t options = { 0 };
669
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
447
670
 
448
- const char *checked = check_string(filepath);
449
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
450
-
451
- VALUE value = parse_input(&input, checked);
671
+ VALUE value = parse_input(&input, &options);
452
672
  pm_string_free(&input);
673
+ pm_options_free(&options);
453
674
 
454
675
  return value;
455
676
  }
456
677
 
457
- // Parse the given string and return an array of Comment objects.
678
+ /**
679
+ * Parse the given input and return an array of Comment objects.
680
+ */
458
681
  static VALUE
459
- parse_comments(int argc, VALUE *argv, VALUE self) {
460
- VALUE string;
461
- VALUE filepath;
462
- rb_scan_args(argc, argv, "11", &string, &filepath);
682
+ parse_input_comments(pm_string_t *input, const pm_options_t *options) {
683
+ pm_parser_t parser;
684
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
463
685
 
464
- pm_string_t input;
465
- input_load_string(&input, string);
686
+ pm_node_t *node = pm_parse(&parser);
687
+ rb_encoding *encoding = rb_enc_find(parser.encoding.name);
688
+
689
+ VALUE source = pm_source_new(&parser, encoding);
690
+ VALUE comments = parser_comments(&parser, source);
466
691
 
467
- return parse_input_comments(&input, check_string(filepath));
692
+ pm_node_destroy(&parser, node);
693
+ pm_parser_free(&parser);
694
+
695
+ return comments;
468
696
  }
469
697
 
470
- // Parse the given file and return an array of Comment objects.
698
+ /**
699
+ * call-seq:
700
+ * Prism::parse_comments(source, **options) -> Array
701
+ *
702
+ * Parse the given string and return an array of Comment objects. For supported
703
+ * options, see Prism::parse.
704
+ */
471
705
  static VALUE
472
- parse_file_comments(VALUE self, VALUE filepath) {
706
+ parse_comments(int argc, VALUE *argv, VALUE self) {
473
707
  pm_string_t input;
708
+ pm_options_t options = { 0 };
709
+ string_options(argc, argv, &input, &options);
474
710
 
475
- const char *checked = check_string(filepath);
476
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
711
+ VALUE result = parse_input_comments(&input, &options);
712
+ pm_string_free(&input);
713
+ pm_options_free(&options);
477
714
 
478
- VALUE value = parse_input_comments(&input, checked);
715
+ return result;
716
+ }
717
+
718
+ /**
719
+ * call-seq:
720
+ * Prism::parse_file_comments(filepath, **options) -> Array
721
+ *
722
+ * Parse the given file and return an array of Comment objects. For supported
723
+ * options, see Prism::parse.
724
+ */
725
+ static VALUE
726
+ parse_file_comments(int argc, VALUE *argv, VALUE self) {
727
+ pm_string_t input;
728
+ pm_options_t options = { 0 };
729
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
730
+
731
+ VALUE value = parse_input_comments(&input, &options);
479
732
  pm_string_free(&input);
733
+ pm_options_free(&options);
480
734
 
481
735
  return value;
482
736
  }
483
737
 
484
- // Parse the given string and return a ParseResult instance.
738
+ /**
739
+ * call-seq:
740
+ * Prism::parse_lex(source, **options) -> ParseResult
741
+ *
742
+ * Parse the given string and return a ParseResult instance that contains a
743
+ * 2-element array, where the first element is the AST and the second element is
744
+ * an array of Token instances.
745
+ *
746
+ * This API is only meant to be used in the case where you need both the AST and
747
+ * the tokens. If you only need one or the other, use either Prism::parse or
748
+ * Prism::lex.
749
+ *
750
+ * For supported options, see Prism::parse.
751
+ */
485
752
  static VALUE
486
753
  parse_lex(int argc, VALUE *argv, VALUE self) {
487
- VALUE string;
488
- VALUE filepath;
489
- rb_scan_args(argc, argv, "11", &string, &filepath);
490
-
491
754
  pm_string_t input;
492
- input_load_string(&input, string);
755
+ pm_options_t options = { 0 };
756
+ string_options(argc, argv, &input, &options);
493
757
 
494
- VALUE value = parse_lex_input(&input, check_string(filepath), true);
758
+ VALUE value = parse_lex_input(&input, &options, true);
495
759
  pm_string_free(&input);
760
+ pm_options_free(&options);
496
761
 
497
762
  return value;
498
763
  }
499
764
 
500
- // Parse and lex the given file and return a ParseResult instance.
765
+ /**
766
+ * call-seq:
767
+ * Prism::parse_lex_file(filepath, **options) -> ParseResult
768
+ *
769
+ * Parse the given file and return a ParseResult instance that contains a
770
+ * 2-element array, where the first element is the AST and the second element is
771
+ * an array of Token instances.
772
+ *
773
+ * This API is only meant to be used in the case where you need both the AST and
774
+ * the tokens. If you only need one or the other, use either Prism::parse_file
775
+ * or Prism::lex_file.
776
+ *
777
+ * For supported options, see Prism::parse.
778
+ */
501
779
  static VALUE
502
- parse_lex_file(VALUE self, VALUE filepath) {
780
+ parse_lex_file(int argc, VALUE *argv, VALUE self) {
503
781
  pm_string_t input;
782
+ pm_options_t options = { 0 };
783
+ if (!file_options(argc, argv, &input, &options)) return Qnil;
504
784
 
505
- const char *checked = check_string(filepath);
506
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
507
-
508
- VALUE value = parse_lex_input(&input, checked, true);
785
+ VALUE value = parse_lex_input(&input, &options, true);
509
786
  pm_string_free(&input);
787
+ pm_options_free(&options);
510
788
 
511
789
  return value;
512
790
  }
@@ -515,13 +793,17 @@ parse_lex_file(VALUE self, VALUE filepath) {
515
793
  /* Utility functions exposed to make testing easier */
516
794
  /******************************************************************************/
517
795
 
518
- // Returns an array of strings corresponding to the named capture groups in the
519
- // given source string. If prism was unable to parse the regular expression, this
520
- // function returns nil.
796
+ /**
797
+ * call-seq:
798
+ * Debug::named_captures(source) -> Array
799
+ *
800
+ * Returns an array of strings corresponding to the named capture groups in the
801
+ * given source string. If prism was unable to parse the regular expression,
802
+ * this function returns nil.
803
+ */
521
804
  static VALUE
522
805
  named_captures(VALUE self, VALUE source) {
523
- pm_string_list_t string_list;
524
- pm_string_list_init(&string_list);
806
+ pm_string_list_t string_list = { 0 };
525
807
 
526
808
  if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &pm_encoding_utf_8)) {
527
809
  pm_string_list_free(&string_list);
@@ -538,7 +820,12 @@ named_captures(VALUE self, VALUE source) {
538
820
  return names;
539
821
  }
540
822
 
541
- // Return a hash of information about the given source string's memory usage.
823
+ /**
824
+ * call-seq:
825
+ * Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx }
826
+ *
827
+ * Return a hash of information about the given source string's memory usage.
828
+ */
542
829
  static VALUE
543
830
  memsize(VALUE self, VALUE string) {
544
831
  pm_parser_t parser;
@@ -559,8 +846,13 @@ memsize(VALUE self, VALUE string) {
559
846
  return result;
560
847
  }
561
848
 
562
- // Parse the file, but do nothing with the result. This is used to profile the
563
- // parser for memory and speed.
849
+ /**
850
+ * call-seq:
851
+ * Debug::profile_file(filepath) -> nil
852
+ *
853
+ * Parse the file, but do nothing with the result. This is used to profile the
854
+ * parser for memory and speed.
855
+ */
564
856
  static VALUE
565
857
  profile_file(VALUE self, VALUE filepath) {
566
858
  pm_string_t input;
@@ -568,39 +860,28 @@ profile_file(VALUE self, VALUE filepath) {
568
860
  const char *checked = check_string(filepath);
569
861
  if (!pm_string_mapped_init(&input, checked)) return Qnil;
570
862
 
863
+ pm_options_t options = { 0 };
864
+ pm_options_filepath_set(&options, checked);
865
+
571
866
  pm_parser_t parser;
572
- pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), checked);
867
+ pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
573
868
 
574
869
  pm_node_t *node = pm_parse(&parser);
575
870
  pm_node_destroy(&parser, node);
576
871
  pm_parser_free(&parser);
577
-
872
+ pm_options_free(&options);
578
873
  pm_string_free(&input);
579
874
 
580
875
  return Qnil;
581
876
  }
582
877
 
583
- // Parse the file and serialize the result. This is mostly used to test this
584
- // path since it is used by client libraries.
585
- static VALUE
586
- parse_serialize_file_metadata(VALUE self, VALUE filepath, VALUE metadata) {
587
- pm_string_t input;
588
- pm_buffer_t buffer;
589
- pm_buffer_init(&buffer);
590
-
591
- const char *checked = check_string(filepath);
592
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
593
-
594
- pm_parse_serialize(pm_string_source(&input), pm_string_length(&input), &buffer, check_string(metadata));
595
- VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
596
-
597
- pm_string_free(&input);
598
- pm_buffer_free(&buffer);
599
- return result;
600
- }
601
-
602
- // Inspect the AST that represents the given source using the prism pretty print
603
- // as opposed to the Ruby implementation.
878
+ /**
879
+ * call-seq:
880
+ * Debug::inspect_node(source) -> inspected
881
+ *
882
+ * Inspect the AST that represents the given source using the prism pretty print
883
+ * as opposed to the Ruby implementation.
884
+ */
604
885
  static VALUE
605
886
  inspect_node(VALUE self, VALUE source) {
606
887
  pm_string_t input;
@@ -628,6 +909,9 @@ inspect_node(VALUE self, VALUE source) {
628
909
  /* Initialization of the extension */
629
910
  /******************************************************************************/
630
911
 
912
+ /**
913
+ * The init function that Ruby calls when loading this extension.
914
+ */
631
915
  RUBY_FUNC_EXPORTED void
632
916
  Init_prism(void) {
633
917
  // Make sure that the prism library version matches the expected version.
@@ -649,27 +933,46 @@ Init_prism(void) {
649
933
  rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
650
934
  rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
651
935
  rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
936
+ rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment);
937
+ rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment);
938
+ rb_cPrismDATAComment = rb_define_class_under(rb_cPrism, "DATAComment", rb_cPrismComment);
652
939
  rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
653
940
  rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
654
941
  rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
655
942
  rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
656
943
 
657
- // Define the version string here so that we can use the constants defined
658
- // in prism.h.
944
+ // Intern all of the options that we support so that we don't have to do it
945
+ // every time we parse.
946
+ rb_option_id_filepath = rb_intern_const("filepath");
947
+ rb_option_id_encoding = rb_intern_const("encoding");
948
+ rb_option_id_line = rb_intern_const("line");
949
+ rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
950
+ rb_option_id_verbose = rb_intern_const("verbose");
951
+ rb_option_id_scopes = rb_intern_const("scopes");
952
+
953
+ /**
954
+ * The version of the prism library.
955
+ */
659
956
  rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
660
- rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CExtension")));
957
+
958
+ /**
959
+ * The backend of the parser that prism is using to parse Ruby code. This
960
+ * can be either :CEXT or :FFI. On runtimes that support C extensions, we
961
+ * default to :CEXT. Otherwise we use :FFI.
962
+ */
963
+ rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CEXT")));
661
964
 
662
965
  // First, the functions that have to do with lexing and parsing.
663
966
  rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
664
- rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, 1);
967
+ rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
665
968
  rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
666
- rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, 1);
969
+ rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
667
970
  rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
668
- rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, 1);
971
+ rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
669
972
  rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
670
- rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, 1);
973
+ rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
671
974
  rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
672
- rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, 1);
975
+ rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1);
673
976
 
674
977
  // Next, the functions that will be called by the parser to perform various
675
978
  // internal tasks. We expose these to make them easier to test.
@@ -677,7 +980,6 @@ Init_prism(void) {
677
980
  rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
678
981
  rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
679
982
  rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
680
- rb_define_singleton_method(rb_cPrismDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
681
983
  rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
682
984
 
683
985
  // Next, initialize the other APIs.