prism 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +172 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +84 -0
  7. data/README.md +89 -0
  8. data/config.yml +2481 -0
  9. data/docs/build_system.md +74 -0
  10. data/docs/building.md +22 -0
  11. data/docs/configuration.md +60 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +117 -0
  14. data/docs/fuzzing.md +93 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/mapping.md +117 -0
  17. data/docs/ripper.md +36 -0
  18. data/docs/ruby_api.md +25 -0
  19. data/docs/serialization.md +181 -0
  20. data/docs/testing.md +55 -0
  21. data/ext/prism/api_node.c +4725 -0
  22. data/ext/prism/api_pack.c +256 -0
  23. data/ext/prism/extconf.rb +136 -0
  24. data/ext/prism/extension.c +626 -0
  25. data/ext/prism/extension.h +18 -0
  26. data/include/prism/ast.h +1932 -0
  27. data/include/prism/defines.h +45 -0
  28. data/include/prism/diagnostic.h +231 -0
  29. data/include/prism/enc/pm_encoding.h +95 -0
  30. data/include/prism/node.h +41 -0
  31. data/include/prism/pack.h +141 -0
  32. data/include/prism/parser.h +418 -0
  33. data/include/prism/regexp.h +19 -0
  34. data/include/prism/unescape.h +48 -0
  35. data/include/prism/util/pm_buffer.h +51 -0
  36. data/include/prism/util/pm_char.h +91 -0
  37. data/include/prism/util/pm_constant_pool.h +78 -0
  38. data/include/prism/util/pm_list.h +67 -0
  39. data/include/prism/util/pm_memchr.h +14 -0
  40. data/include/prism/util/pm_newline_list.h +61 -0
  41. data/include/prism/util/pm_state_stack.h +24 -0
  42. data/include/prism/util/pm_string.h +61 -0
  43. data/include/prism/util/pm_string_list.h +25 -0
  44. data/include/prism/util/pm_strpbrk.h +29 -0
  45. data/include/prism/version.h +4 -0
  46. data/include/prism.h +82 -0
  47. data/lib/prism/compiler.rb +465 -0
  48. data/lib/prism/debug.rb +157 -0
  49. data/lib/prism/desugar_compiler.rb +206 -0
  50. data/lib/prism/dispatcher.rb +2051 -0
  51. data/lib/prism/dsl.rb +750 -0
  52. data/lib/prism/ffi.rb +251 -0
  53. data/lib/prism/lex_compat.rb +838 -0
  54. data/lib/prism/mutation_compiler.rb +718 -0
  55. data/lib/prism/node.rb +14540 -0
  56. data/lib/prism/node_ext.rb +55 -0
  57. data/lib/prism/node_inspector.rb +68 -0
  58. data/lib/prism/pack.rb +185 -0
  59. data/lib/prism/parse_result/comments.rb +172 -0
  60. data/lib/prism/parse_result/newlines.rb +60 -0
  61. data/lib/prism/parse_result.rb +266 -0
  62. data/lib/prism/pattern.rb +239 -0
  63. data/lib/prism/ripper_compat.rb +174 -0
  64. data/lib/prism/serialize.rb +662 -0
  65. data/lib/prism/visitor.rb +470 -0
  66. data/lib/prism.rb +64 -0
  67. data/prism.gemspec +113 -0
  68. data/src/diagnostic.c +287 -0
  69. data/src/enc/pm_big5.c +52 -0
  70. data/src/enc/pm_euc_jp.c +58 -0
  71. data/src/enc/pm_gbk.c +61 -0
  72. data/src/enc/pm_shift_jis.c +56 -0
  73. data/src/enc/pm_tables.c +507 -0
  74. data/src/enc/pm_unicode.c +2324 -0
  75. data/src/enc/pm_windows_31j.c +56 -0
  76. data/src/node.c +2633 -0
  77. data/src/pack.c +493 -0
  78. data/src/prettyprint.c +2136 -0
  79. data/src/prism.c +14587 -0
  80. data/src/regexp.c +580 -0
  81. data/src/serialize.c +1899 -0
  82. data/src/token_type.c +349 -0
  83. data/src/unescape.c +637 -0
  84. data/src/util/pm_buffer.c +103 -0
  85. data/src/util/pm_char.c +272 -0
  86. data/src/util/pm_constant_pool.c +252 -0
  87. data/src/util/pm_list.c +41 -0
  88. data/src/util/pm_memchr.c +33 -0
  89. data/src/util/pm_newline_list.c +134 -0
  90. data/src/util/pm_state_stack.c +19 -0
  91. data/src/util/pm_string.c +200 -0
  92. data/src/util/pm_string_list.c +29 -0
  93. data/src/util/pm_strncasecmp.c +17 -0
  94. data/src/util/pm_strpbrk.c +66 -0
  95. metadata +138 -0
@@ -0,0 +1,626 @@
1
+ #include "prism/extension.h"
2
+
3
+ // NOTE: this file should contain only bindings.
4
+ // All non-trivial logic should be in librubyparser so it can be shared its the various callers.
5
+
6
+ VALUE rb_cPrism;
7
+ VALUE rb_cPrismNode;
8
+ VALUE rb_cPrismSource;
9
+ VALUE rb_cPrismToken;
10
+ VALUE rb_cPrismLocation;
11
+
12
+ VALUE rb_cPrismComment;
13
+ VALUE rb_cPrismParseError;
14
+ VALUE rb_cPrismParseWarning;
15
+ VALUE rb_cPrismParseResult;
16
+
17
+ /******************************************************************************/
18
+ /* IO of Ruby code */
19
+ /******************************************************************************/
20
+
21
+ // Check if the given VALUE is a string. If it's nil, then return NULL. If it's
22
+ // not a string, then raise a type error. Otherwise return the VALUE as a C
23
+ // string.
24
+ static const char *
25
+ check_string(VALUE value) {
26
+ // If the value is nil, then we don't need to do anything.
27
+ if (NIL_P(value)) {
28
+ return NULL;
29
+ }
30
+
31
+ // Check if the value is a string. If it's not, then raise a type error.
32
+ if (!RB_TYPE_P(value, T_STRING)) {
33
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(value));
34
+ }
35
+
36
+ // Otherwise, return the value as a C string.
37
+ return RSTRING_PTR(value);
38
+ }
39
+
40
+ // Load the contents and size of the given string into the given pm_string_t.
41
+ static void
42
+ input_load_string(pm_string_t *input, VALUE string) {
43
+ // Check if the string is a string. If it's not, then raise a type error.
44
+ if (!RB_TYPE_P(string, T_STRING)) {
45
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
46
+ }
47
+
48
+ pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
49
+ }
50
+
51
+ /******************************************************************************/
52
+ /* Serializing the AST */
53
+ /******************************************************************************/
54
+
55
+ // Dump the AST corresponding to the given input to a string.
56
+ static VALUE
57
+ dump_input(pm_string_t *input, const char *filepath) {
58
+ pm_buffer_t buffer;
59
+ if (!pm_buffer_init(&buffer)) {
60
+ rb_raise(rb_eNoMemError, "failed to allocate memory");
61
+ }
62
+
63
+ pm_parser_t parser;
64
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
65
+
66
+ pm_node_t *node = pm_parse(&parser);
67
+ pm_serialize(&parser, node, &buffer);
68
+
69
+ VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
70
+ pm_node_destroy(&parser, node);
71
+ pm_buffer_free(&buffer);
72
+ pm_parser_free(&parser);
73
+
74
+ return result;
75
+ }
76
+
77
+ // Dump the AST corresponding to the given string to a string.
78
+ static VALUE
79
+ dump(int argc, VALUE *argv, VALUE self) {
80
+ VALUE string;
81
+ VALUE filepath;
82
+ rb_scan_args(argc, argv, "11", &string, &filepath);
83
+
84
+ pm_string_t input;
85
+ input_load_string(&input, string);
86
+
87
+ #ifdef PRISM_DEBUG_MODE_BUILD
88
+ size_t length = pm_string_length(&input);
89
+ char* dup = malloc(length);
90
+ memcpy(dup, pm_string_source(&input), length);
91
+ pm_string_constant_init(&input, dup, length);
92
+ #endif
93
+
94
+ VALUE value = dump_input(&input, check_string(filepath));
95
+
96
+ #ifdef PRISM_DEBUG_MODE_BUILD
97
+ free(dup);
98
+ #endif
99
+
100
+ return value;
101
+ }
102
+
103
+ // Dump the AST corresponding to the given file to a string.
104
+ static VALUE
105
+ dump_file(VALUE self, VALUE filepath) {
106
+ pm_string_t input;
107
+
108
+ const char *checked = check_string(filepath);
109
+ if (!pm_string_mapped_init(&input, checked)) return Qnil;
110
+
111
+ VALUE value = dump_input(&input, checked);
112
+ pm_string_free(&input);
113
+
114
+ return value;
115
+ }
116
+
117
+ /******************************************************************************/
118
+ /* Extracting values for the parse result */
119
+ /******************************************************************************/
120
+
121
+ // Extract the comments out of the parser into an array.
122
+ static VALUE
123
+ parser_comments(pm_parser_t *parser, VALUE source) {
124
+ VALUE comments = rb_ary_new();
125
+
126
+ for (pm_comment_t *comment = (pm_comment_t *) parser->comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
127
+ VALUE location_argv[] = {
128
+ source,
129
+ LONG2FIX(comment->start - parser->start),
130
+ LONG2FIX(comment->end - comment->start)
131
+ };
132
+
133
+ VALUE type;
134
+ switch (comment->type) {
135
+ case PM_COMMENT_INLINE:
136
+ type = ID2SYM(rb_intern("inline"));
137
+ break;
138
+ case PM_COMMENT_EMBDOC:
139
+ type = ID2SYM(rb_intern("embdoc"));
140
+ break;
141
+ case PM_COMMENT___END__:
142
+ type = ID2SYM(rb_intern("__END__"));
143
+ break;
144
+ default:
145
+ type = ID2SYM(rb_intern("inline"));
146
+ break;
147
+ }
148
+
149
+ VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
150
+ rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cPrismComment));
151
+ }
152
+
153
+ return comments;
154
+ }
155
+
156
+ // Extract the errors out of the parser into an array.
157
+ static VALUE
158
+ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
159
+ VALUE errors = rb_ary_new();
160
+ pm_diagnostic_t *error;
161
+
162
+ for (error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
163
+ VALUE location_argv[] = {
164
+ source,
165
+ LONG2FIX(error->start - parser->start),
166
+ LONG2FIX(error->end - error->start)
167
+ };
168
+
169
+ VALUE error_argv[] = {
170
+ rb_enc_str_new_cstr(error->message, encoding),
171
+ rb_class_new_instance(3, location_argv, rb_cPrismLocation)
172
+ };
173
+
174
+ rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cPrismParseError));
175
+ }
176
+
177
+ return errors;
178
+ }
179
+
180
+ // Extract the warnings out of the parser into an array.
181
+ static VALUE
182
+ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
183
+ VALUE warnings = rb_ary_new();
184
+ pm_diagnostic_t *warning;
185
+
186
+ for (warning = (pm_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (pm_diagnostic_t *) warning->node.next) {
187
+ VALUE location_argv[] = {
188
+ source,
189
+ LONG2FIX(warning->start - parser->start),
190
+ LONG2FIX(warning->end - warning->start)
191
+ };
192
+
193
+ VALUE warning_argv[] = {
194
+ rb_enc_str_new_cstr(warning->message, encoding),
195
+ rb_class_new_instance(3, location_argv, rb_cPrismLocation)
196
+ };
197
+
198
+ rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cPrismParseWarning));
199
+ }
200
+
201
+ return warnings;
202
+ }
203
+
204
+ /******************************************************************************/
205
+ /* Lexing Ruby code */
206
+ /******************************************************************************/
207
+
208
+ // This struct gets stored in the parser and passed in to the lex callback any
209
+ // time a new token is found. We use it to store the necessary information to
210
+ // initialize a Token instance.
211
+ typedef struct {
212
+ VALUE source;
213
+ VALUE tokens;
214
+ rb_encoding *encoding;
215
+ } parse_lex_data_t;
216
+
217
+ // This is passed as a callback to the parser. It gets called every time a new
218
+ // token is found. Once found, we initialize a new instance of Token and push it
219
+ // onto the tokens array.
220
+ static void
221
+ parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
222
+ parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
223
+
224
+ VALUE yields = rb_ary_new_capa(2);
225
+ rb_ary_push(yields, pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
226
+ rb_ary_push(yields, INT2FIX(parser->lex_state));
227
+
228
+ rb_ary_push(parse_lex_data->tokens, yields);
229
+ }
230
+
231
+ // This is called whenever the encoding changes based on the magic comment at
232
+ // the top of the file. We use it to update the encoding that we are using to
233
+ // create tokens.
234
+ static void
235
+ parse_lex_encoding_changed_callback(pm_parser_t *parser) {
236
+ parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
237
+ parse_lex_data->encoding = rb_enc_find(parser->encoding.name);
238
+
239
+ // Since the encoding changed, we need to go back and change the encoding of
240
+ // the tokens that were already lexed. This is only going to end up being
241
+ // one or two tokens, since the encoding can only change at the top of the
242
+ // file.
243
+ VALUE tokens = parse_lex_data->tokens;
244
+ for (long index = 0; index < RARRAY_LEN(tokens); index++) {
245
+ VALUE yields = rb_ary_entry(tokens, index);
246
+ VALUE token = rb_ary_entry(yields, 0);
247
+
248
+ VALUE value = rb_ivar_get(token, rb_intern("@value"));
249
+ rb_enc_associate(value, parse_lex_data->encoding);
250
+ ENC_CODERANGE_CLEAR(value);
251
+ }
252
+ }
253
+
254
+ // Parse the given input and return a ParseResult containing just the tokens or
255
+ // the nodes and tokens.
256
+ static VALUE
257
+ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
258
+ pm_parser_t parser;
259
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
260
+ pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
261
+
262
+ VALUE offsets = rb_ary_new();
263
+ VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), offsets };
264
+ VALUE source = rb_class_new_instance(2, source_argv, rb_cPrismSource);
265
+
266
+ parse_lex_data_t parse_lex_data = {
267
+ .source = source,
268
+ .tokens = rb_ary_new(),
269
+ .encoding = rb_utf8_encoding()
270
+ };
271
+
272
+ parse_lex_data_t *data = &parse_lex_data;
273
+ pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
274
+ .data = (void *) data,
275
+ .callback = parse_lex_token,
276
+ };
277
+
278
+ parser.lex_callback = &lex_callback;
279
+ pm_node_t *node = pm_parse(&parser);
280
+
281
+ // Here we need to update the source range to have the correct newline
282
+ // offsets. We do it here because we've already created the object and given
283
+ // it over to all of the tokens.
284
+ for (size_t index = 0; index < parser.newline_list.size; index++) {
285
+ rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
286
+ }
287
+
288
+ VALUE value;
289
+ if (return_nodes) {
290
+ value = rb_ary_new_capa(2);
291
+ rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding));
292
+ rb_ary_push(value, parse_lex_data.tokens);
293
+ } else {
294
+ value = parse_lex_data.tokens;
295
+ }
296
+
297
+ VALUE result_argv[] = {
298
+ value,
299
+ parser_comments(&parser, source),
300
+ parser_errors(&parser, parse_lex_data.encoding, source),
301
+ parser_warnings(&parser, parse_lex_data.encoding, source),
302
+ source
303
+ };
304
+
305
+ pm_node_destroy(&parser, node);
306
+ pm_parser_free(&parser);
307
+ return rb_class_new_instance(5, result_argv, rb_cPrismParseResult);
308
+ }
309
+
310
+ // Return an array of tokens corresponding to the given string.
311
+ static VALUE
312
+ lex(int argc, VALUE *argv, VALUE self) {
313
+ VALUE string;
314
+ VALUE filepath;
315
+ rb_scan_args(argc, argv, "11", &string, &filepath);
316
+
317
+ pm_string_t input;
318
+ input_load_string(&input, string);
319
+
320
+ return parse_lex_input(&input, check_string(filepath), false);
321
+ }
322
+
323
+ // Return an array of tokens corresponding to the given file.
324
+ static VALUE
325
+ lex_file(VALUE self, VALUE filepath) {
326
+ pm_string_t input;
327
+
328
+ const char *checked = check_string(filepath);
329
+ if (!pm_string_mapped_init(&input, checked)) return Qnil;
330
+
331
+ VALUE value = parse_lex_input(&input, checked, false);
332
+ pm_string_free(&input);
333
+
334
+ return value;
335
+ }
336
+
337
+ /******************************************************************************/
338
+ /* Parsing Ruby code */
339
+ /******************************************************************************/
340
+
341
+ // Parse the given input and return a ParseResult instance.
342
+ static VALUE
343
+ parse_input(pm_string_t *input, const char *filepath) {
344
+ pm_parser_t parser;
345
+ pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
346
+
347
+ pm_node_t *node = pm_parse(&parser);
348
+ rb_encoding *encoding = rb_enc_find(parser.encoding.name);
349
+
350
+ VALUE source = pm_source_new(&parser, encoding);
351
+ VALUE result_argv[] = {
352
+ pm_ast_new(&parser, node, encoding),
353
+ parser_comments(&parser, source),
354
+ parser_errors(&parser, encoding, source),
355
+ parser_warnings(&parser, encoding, source),
356
+ source
357
+ };
358
+
359
+ VALUE result = rb_class_new_instance(5, result_argv, rb_cPrismParseResult);
360
+
361
+ pm_node_destroy(&parser, node);
362
+ pm_parser_free(&parser);
363
+
364
+ return result;
365
+ }
366
+
367
+ // Parse the given string and return a ParseResult instance.
368
+ static VALUE
369
+ parse(int argc, VALUE *argv, VALUE self) {
370
+ VALUE string;
371
+ VALUE filepath;
372
+ rb_scan_args(argc, argv, "11", &string, &filepath);
373
+
374
+ pm_string_t input;
375
+ input_load_string(&input, string);
376
+
377
+ #ifdef PRISM_DEBUG_MODE_BUILD
378
+ size_t length = pm_string_length(&input);
379
+ char* dup = malloc(length);
380
+ memcpy(dup, pm_string_source(&input), length);
381
+ pm_string_constant_init(&input, dup, length);
382
+ #endif
383
+
384
+ VALUE value = parse_input(&input, check_string(filepath));
385
+
386
+ #ifdef PRISM_DEBUG_MODE_BUILD
387
+ free(dup);
388
+ #endif
389
+
390
+ return value;
391
+ }
392
+
393
+ // Parse the given file and return a ParseResult instance.
394
+ static VALUE
395
+ parse_file(VALUE self, VALUE filepath) {
396
+ pm_string_t input;
397
+
398
+ const char *checked = check_string(filepath);
399
+ if (!pm_string_mapped_init(&input, checked)) return Qnil;
400
+
401
+ VALUE value = parse_input(&input, checked);
402
+ pm_string_free(&input);
403
+
404
+ return value;
405
+ }
406
+
407
+ // Parse the given string and return a ParseResult instance.
408
+ static VALUE
409
+ parse_lex(int argc, VALUE *argv, VALUE self) {
410
+ VALUE string;
411
+ VALUE filepath;
412
+ rb_scan_args(argc, argv, "11", &string, &filepath);
413
+
414
+ pm_string_t input;
415
+ input_load_string(&input, string);
416
+
417
+ VALUE value = parse_lex_input(&input, check_string(filepath), true);
418
+ pm_string_free(&input);
419
+
420
+ return value;
421
+ }
422
+
423
+ // Parse and lex the given file and return a ParseResult instance.
424
+ static VALUE
425
+ parse_lex_file(VALUE self, VALUE filepath) {
426
+ pm_string_t input;
427
+
428
+ const char *checked = check_string(filepath);
429
+ if (!pm_string_mapped_init(&input, checked)) return Qnil;
430
+
431
+ VALUE value = parse_lex_input(&input, checked, true);
432
+ pm_string_free(&input);
433
+
434
+ return value;
435
+ }
436
+
437
+ /******************************************************************************/
438
+ /* Utility functions exposed to make testing easier */
439
+ /******************************************************************************/
440
+
441
+ // Returns an array of strings corresponding to the named capture groups in the
442
+ // given source string. If prism was unable to parse the regular expression, this
443
+ // function returns nil.
444
+ static VALUE
445
+ named_captures(VALUE self, VALUE source) {
446
+ pm_string_list_t string_list;
447
+ pm_string_list_init(&string_list);
448
+
449
+ if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &pm_encoding_utf_8)) {
450
+ pm_string_list_free(&string_list);
451
+ return Qnil;
452
+ }
453
+
454
+ VALUE names = rb_ary_new();
455
+ for (size_t index = 0; index < string_list.length; index++) {
456
+ const pm_string_t *string = &string_list.strings[index];
457
+ rb_ary_push(names, rb_str_new((const char *) pm_string_source(string), pm_string_length(string)));
458
+ }
459
+
460
+ pm_string_list_free(&string_list);
461
+ return names;
462
+ }
463
+
464
+ // Accepts a source string and a type of unescaping and returns the unescaped
465
+ // version.
466
+ static VALUE
467
+ unescape(VALUE source, pm_unescape_type_t unescape_type) {
468
+ pm_string_t result;
469
+
470
+ if (pm_unescape_string((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
471
+ VALUE str = rb_str_new((const char *) pm_string_source(&result), pm_string_length(&result));
472
+ pm_string_free(&result);
473
+ return str;
474
+ } else {
475
+ pm_string_free(&result);
476
+ return Qnil;
477
+ }
478
+ }
479
+
480
+ // Do not unescape anything in the given string. This is here to provide a
481
+ // consistent API.
482
+ static VALUE
483
+ unescape_none(VALUE self, VALUE source) {
484
+ return unescape(source, PM_UNESCAPE_NONE);
485
+ }
486
+
487
+ // Minimally unescape the given string. This means effectively unescaping just
488
+ // the quotes of a string. Returns the unescaped string.
489
+ static VALUE
490
+ unescape_minimal(VALUE self, VALUE source) {
491
+ return unescape(source, PM_UNESCAPE_MINIMAL);
492
+ }
493
+
494
+ // Escape the given string minimally plus whitespace. Returns the unescaped string.
495
+ static VALUE
496
+ unescape_whitespace(VALUE self, VALUE source) {
497
+ return unescape(source, PM_UNESCAPE_WHITESPACE);
498
+ }
499
+
500
+ // Unescape everything in the given string. Return the unescaped string.
501
+ static VALUE
502
+ unescape_all(VALUE self, VALUE source) {
503
+ return unescape(source, PM_UNESCAPE_ALL);
504
+ }
505
+
506
+ // Return a hash of information about the given source string's memory usage.
507
+ static VALUE
508
+ memsize(VALUE self, VALUE string) {
509
+ pm_parser_t parser;
510
+ size_t length = RSTRING_LEN(string);
511
+ pm_parser_init(&parser, (const uint8_t *) RSTRING_PTR(string), length, NULL);
512
+
513
+ pm_node_t *node = pm_parse(&parser);
514
+ pm_memsize_t memsize;
515
+ pm_node_memsize(node, &memsize);
516
+
517
+ pm_node_destroy(&parser, node);
518
+ pm_parser_free(&parser);
519
+
520
+ VALUE result = rb_hash_new();
521
+ rb_hash_aset(result, ID2SYM(rb_intern("length")), INT2FIX(length));
522
+ rb_hash_aset(result, ID2SYM(rb_intern("memsize")), INT2FIX(memsize.memsize));
523
+ rb_hash_aset(result, ID2SYM(rb_intern("node_count")), INT2FIX(memsize.node_count));
524
+ return result;
525
+ }
526
+
527
+ // Parse the file, but do nothing with the result. This is used to profile the
528
+ // parser for memory and speed.
529
+ static VALUE
530
+ profile_file(VALUE self, VALUE filepath) {
531
+ pm_string_t input;
532
+
533
+ const char *checked = check_string(filepath);
534
+ if (!pm_string_mapped_init(&input, checked)) return Qnil;
535
+
536
+ pm_parser_t parser;
537
+ pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), checked);
538
+
539
+ pm_node_t *node = pm_parse(&parser);
540
+ pm_node_destroy(&parser, node);
541
+ pm_parser_free(&parser);
542
+
543
+ pm_string_free(&input);
544
+
545
+ return Qnil;
546
+ }
547
+
548
+ // Parse the file and serialize the result. This is mostly used to test this
549
+ // path since it is used by client libraries.
550
+ static VALUE
551
+ parse_serialize_file_metadata(VALUE self, VALUE filepath, VALUE metadata) {
552
+ pm_string_t input;
553
+ pm_buffer_t buffer;
554
+ pm_buffer_init(&buffer);
555
+
556
+ const char *checked = check_string(filepath);
557
+ if (!pm_string_mapped_init(&input, checked)) return Qnil;
558
+
559
+ pm_parse_serialize(pm_string_source(&input), pm_string_length(&input), &buffer, check_string(metadata));
560
+ VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
561
+
562
+ pm_string_free(&input);
563
+ pm_buffer_free(&buffer);
564
+ return result;
565
+ }
566
+
567
+ /******************************************************************************/
568
+ /* Initialization of the extension */
569
+ /******************************************************************************/
570
+
571
+ RUBY_FUNC_EXPORTED void
572
+ Init_prism(void) {
573
+ // Make sure that the prism library version matches the expected version.
574
+ // Otherwise something was compiled incorrectly.
575
+ if (strcmp(pm_version(), EXPECTED_PRISM_VERSION) != 0) {
576
+ rb_raise(
577
+ rb_eRuntimeError,
578
+ "The prism library version (%s) does not match the expected version (%s)",
579
+ pm_version(),
580
+ EXPECTED_PRISM_VERSION
581
+ );
582
+ }
583
+
584
+ // Grab up references to all of the constants that we're going to need to
585
+ // reference throughout this extension.
586
+ rb_cPrism = rb_define_module("Prism");
587
+ rb_cPrismNode = rb_define_class_under(rb_cPrism, "Node", rb_cObject);
588
+ rb_cPrismSource = rb_define_class_under(rb_cPrism, "Source", rb_cObject);
589
+ rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
590
+ rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
591
+ rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
592
+ rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
593
+ rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
594
+ rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
595
+
596
+ // Define the version string here so that we can use the constants defined
597
+ // in prism.h.
598
+ rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
599
+ rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CExtension")));
600
+
601
+ // First, the functions that have to do with lexing and parsing.
602
+ rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
603
+ rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, 1);
604
+ rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
605
+ rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, 1);
606
+ rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
607
+ rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, 1);
608
+ rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
609
+ rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, 1);
610
+
611
+ // Next, the functions that will be called by the parser to perform various
612
+ // internal tasks. We expose these to make them easier to test.
613
+ VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug");
614
+ rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
615
+ rb_define_singleton_method(rb_cPrismDebug, "unescape_none", unescape_none, 1);
616
+ rb_define_singleton_method(rb_cPrismDebug, "unescape_minimal", unescape_minimal, 1);
617
+ rb_define_singleton_method(rb_cPrismDebug, "unescape_whitespace", unescape_whitespace, 1);
618
+ rb_define_singleton_method(rb_cPrismDebug, "unescape_all", unescape_all, 1);
619
+ rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
620
+ rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
621
+ rb_define_singleton_method(rb_cPrismDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
622
+
623
+ // Next, initialize the other APIs.
624
+ Init_prism_api_node();
625
+ Init_prism_pack();
626
+ }
@@ -0,0 +1,18 @@
1
+ #ifndef PRISM_EXT_NODE_H
2
+ #define PRISM_EXT_NODE_H
3
+
4
+ #define EXPECTED_PRISM_VERSION "0.13.0"
5
+
6
+ #include <ruby.h>
7
+ #include <ruby/encoding.h>
8
+ #include "prism.h"
9
+
10
+ VALUE pm_source_new(pm_parser_t *parser, rb_encoding *encoding);
11
+ VALUE pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source);
12
+ VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding);
13
+
14
+ void Init_prism_api_node(void);
15
+ void Init_prism_pack(void);
16
+ PRISM_EXPORTED_FUNCTION void Init_prism(void);
17
+
18
+ #endif