yarp 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +29 -8
- data/CONTRIBUTING.md +2 -2
- data/Makefile +5 -5
- data/README.md +11 -12
- data/config.yml +6 -2
- data/docs/build_system.md +21 -21
- data/docs/building.md +4 -4
- data/docs/configuration.md +25 -21
- data/docs/design.md +2 -2
- data/docs/encoding.md +17 -17
- data/docs/fuzzing.md +4 -4
- data/docs/heredocs.md +3 -3
- data/docs/mapping.md +94 -94
- data/docs/ripper.md +4 -4
- data/docs/ruby_api.md +11 -11
- data/docs/serialization.md +17 -16
- data/docs/testing.md +6 -6
- data/ext/prism/api_node.c +4725 -0
- data/ext/{yarp → prism}/api_pack.c +82 -82
- data/ext/{yarp → prism}/extconf.rb +13 -13
- data/ext/{yarp → prism}/extension.c +175 -168
- data/ext/prism/extension.h +18 -0
- data/include/prism/ast.h +1932 -0
- data/include/prism/defines.h +45 -0
- data/include/prism/diagnostic.h +231 -0
- data/include/{yarp/enc/yp_encoding.h → prism/enc/pm_encoding.h} +40 -40
- data/include/prism/node.h +41 -0
- data/include/prism/pack.h +141 -0
- data/include/{yarp → prism}/parser.h +143 -142
- data/include/prism/regexp.h +19 -0
- data/include/prism/unescape.h +48 -0
- data/include/prism/util/pm_buffer.h +51 -0
- data/include/{yarp/util/yp_char.h → prism/util/pm_char.h} +20 -20
- data/include/{yarp/util/yp_constant_pool.h → prism/util/pm_constant_pool.h} +26 -22
- data/include/{yarp/util/yp_list.h → prism/util/pm_list.h} +21 -21
- data/include/prism/util/pm_memchr.h +14 -0
- data/include/{yarp/util/yp_newline_list.h → prism/util/pm_newline_list.h} +11 -11
- data/include/prism/util/pm_state_stack.h +24 -0
- data/include/{yarp/util/yp_string.h → prism/util/pm_string.h} +20 -20
- data/include/prism/util/pm_string_list.h +25 -0
- data/include/{yarp/util/yp_strpbrk.h → prism/util/pm_strpbrk.h} +7 -7
- data/include/prism/version.h +4 -0
- data/include/prism.h +82 -0
- data/lib/prism/compiler.rb +465 -0
- data/lib/prism/debug.rb +157 -0
- data/lib/{yarp/desugar_visitor.rb → prism/desugar_compiler.rb} +4 -2
- data/lib/prism/dispatcher.rb +2051 -0
- data/lib/prism/dsl.rb +750 -0
- data/lib/{yarp → prism}/ffi.rb +66 -67
- data/lib/{yarp → prism}/lex_compat.rb +40 -43
- data/lib/{yarp/mutation_visitor.rb → prism/mutation_compiler.rb} +3 -3
- data/lib/{yarp → prism}/node.rb +2012 -2593
- data/lib/prism/node_ext.rb +55 -0
- data/lib/prism/node_inspector.rb +68 -0
- data/lib/{yarp → prism}/pack.rb +1 -1
- data/lib/{yarp → prism}/parse_result/comments.rb +1 -1
- data/lib/{yarp → prism}/parse_result/newlines.rb +1 -1
- data/lib/prism/parse_result.rb +266 -0
- data/lib/{yarp → prism}/pattern.rb +14 -14
- data/lib/{yarp → prism}/ripper_compat.rb +5 -5
- data/lib/{yarp → prism}/serialize.rb +12 -7
- data/lib/prism/visitor.rb +470 -0
- data/lib/prism.rb +64 -0
- data/lib/yarp.rb +2 -614
- data/src/diagnostic.c +213 -208
- data/src/enc/pm_big5.c +52 -0
- data/src/enc/pm_euc_jp.c +58 -0
- data/src/enc/{yp_gbk.c → pm_gbk.c} +16 -16
- data/src/enc/pm_shift_jis.c +56 -0
- data/src/enc/{yp_tables.c → pm_tables.c} +69 -69
- data/src/enc/{yp_unicode.c → pm_unicode.c} +40 -40
- data/src/enc/pm_windows_31j.c +56 -0
- data/src/node.c +1293 -1233
- data/src/pack.c +247 -247
- data/src/prettyprint.c +1479 -1479
- data/src/{yarp.c → prism.c} +5205 -5083
- data/src/regexp.c +132 -132
- data/src/serialize.c +1121 -1121
- data/src/token_type.c +169 -167
- data/src/unescape.c +106 -87
- data/src/util/pm_buffer.c +103 -0
- data/src/util/{yp_char.c → pm_char.c} +72 -72
- data/src/util/{yp_constant_pool.c → pm_constant_pool.c} +85 -64
- data/src/util/{yp_list.c → pm_list.c} +10 -10
- data/src/util/{yp_memchr.c → pm_memchr.c} +6 -4
- data/src/util/{yp_newline_list.c → pm_newline_list.c} +21 -21
- data/src/util/{yp_state_stack.c → pm_state_stack.c} +4 -4
- data/src/util/{yp_string.c → pm_string.c} +38 -38
- data/src/util/pm_string_list.c +29 -0
- data/src/util/{yp_strncasecmp.c → pm_strncasecmp.c} +1 -1
- data/src/util/{yp_strpbrk.c → pm_strpbrk.c} +8 -8
- data/yarp.gemspec +68 -59
- metadata +70 -61
- data/ext/yarp/api_node.c +0 -4728
- data/ext/yarp/extension.h +0 -18
- data/include/yarp/ast.h +0 -1929
- data/include/yarp/defines.h +0 -45
- data/include/yarp/diagnostic.h +0 -226
- data/include/yarp/node.h +0 -42
- data/include/yarp/pack.h +0 -141
- data/include/yarp/regexp.h +0 -19
- data/include/yarp/unescape.h +0 -44
- data/include/yarp/util/yp_buffer.h +0 -51
- data/include/yarp/util/yp_memchr.h +0 -14
- data/include/yarp/util/yp_state_stack.h +0 -24
- data/include/yarp/util/yp_string_list.h +0 -25
- data/include/yarp/version.h +0 -4
- data/include/yarp.h +0 -82
- data/src/enc/yp_big5.c +0 -52
- data/src/enc/yp_euc_jp.c +0 -58
- data/src/enc/yp_shift_jis.c +0 -56
- data/src/enc/yp_windows_31j.c +0 -56
- data/src/util/yp_buffer.c +0 -101
- data/src/util/yp_string_list.c +0 -29
@@ -1,18 +1,18 @@
|
|
1
|
-
#include "
|
1
|
+
#include "prism/extension.h"
|
2
2
|
|
3
3
|
// NOTE: this file should contain only bindings.
|
4
4
|
// All non-trivial logic should be in librubyparser so it can be shared its the various callers.
|
5
5
|
|
6
|
-
VALUE
|
7
|
-
VALUE
|
8
|
-
VALUE
|
9
|
-
VALUE
|
10
|
-
VALUE
|
6
|
+
VALUE rb_cPrism;
|
7
|
+
VALUE rb_cPrismNode;
|
8
|
+
VALUE rb_cPrismSource;
|
9
|
+
VALUE rb_cPrismToken;
|
10
|
+
VALUE rb_cPrismLocation;
|
11
11
|
|
12
|
-
VALUE
|
13
|
-
VALUE
|
14
|
-
VALUE
|
15
|
-
VALUE
|
12
|
+
VALUE rb_cPrismComment;
|
13
|
+
VALUE rb_cPrismParseError;
|
14
|
+
VALUE rb_cPrismParseWarning;
|
15
|
+
VALUE rb_cPrismParseResult;
|
16
16
|
|
17
17
|
/******************************************************************************/
|
18
18
|
/* IO of Ruby code */
|
@@ -37,15 +37,15 @@ check_string(VALUE value) {
|
|
37
37
|
return RSTRING_PTR(value);
|
38
38
|
}
|
39
39
|
|
40
|
-
// Load the contents and size of the given string into the given
|
40
|
+
// Load the contents and size of the given string into the given pm_string_t.
|
41
41
|
static void
|
42
|
-
input_load_string(
|
42
|
+
input_load_string(pm_string_t *input, VALUE string) {
|
43
43
|
// Check if the string is a string. If it's not, then raise a type error.
|
44
44
|
if (!RB_TYPE_P(string, T_STRING)) {
|
45
45
|
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
|
46
46
|
}
|
47
47
|
|
48
|
-
|
48
|
+
pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
|
49
49
|
}
|
50
50
|
|
51
51
|
/******************************************************************************/
|
@@ -54,22 +54,22 @@ input_load_string(yp_string_t *input, VALUE string) {
|
|
54
54
|
|
55
55
|
// Dump the AST corresponding to the given input to a string.
|
56
56
|
static VALUE
|
57
|
-
dump_input(
|
58
|
-
|
59
|
-
if (!
|
57
|
+
dump_input(pm_string_t *input, const char *filepath) {
|
58
|
+
pm_buffer_t buffer;
|
59
|
+
if (!pm_buffer_init(&buffer)) {
|
60
60
|
rb_raise(rb_eNoMemError, "failed to allocate memory");
|
61
61
|
}
|
62
62
|
|
63
|
-
|
64
|
-
|
63
|
+
pm_parser_t parser;
|
64
|
+
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
|
65
65
|
|
66
|
-
|
67
|
-
|
66
|
+
pm_node_t *node = pm_parse(&parser);
|
67
|
+
pm_serialize(&parser, node, &buffer);
|
68
68
|
|
69
|
-
VALUE result = rb_str_new(
|
70
|
-
|
71
|
-
|
72
|
-
|
69
|
+
VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
|
70
|
+
pm_node_destroy(&parser, node);
|
71
|
+
pm_buffer_free(&buffer);
|
72
|
+
pm_parser_free(&parser);
|
73
73
|
|
74
74
|
return result;
|
75
75
|
}
|
@@ -81,19 +81,19 @@ dump(int argc, VALUE *argv, VALUE self) {
|
|
81
81
|
VALUE filepath;
|
82
82
|
rb_scan_args(argc, argv, "11", &string, &filepath);
|
83
83
|
|
84
|
-
|
84
|
+
pm_string_t input;
|
85
85
|
input_load_string(&input, string);
|
86
86
|
|
87
|
-
#ifdef
|
88
|
-
size_t length =
|
87
|
+
#ifdef PRISM_DEBUG_MODE_BUILD
|
88
|
+
size_t length = pm_string_length(&input);
|
89
89
|
char* dup = malloc(length);
|
90
|
-
memcpy(dup,
|
91
|
-
|
90
|
+
memcpy(dup, pm_string_source(&input), length);
|
91
|
+
pm_string_constant_init(&input, dup, length);
|
92
92
|
#endif
|
93
93
|
|
94
94
|
VALUE value = dump_input(&input, check_string(filepath));
|
95
95
|
|
96
|
-
#ifdef
|
96
|
+
#ifdef PRISM_DEBUG_MODE_BUILD
|
97
97
|
free(dup);
|
98
98
|
#endif
|
99
99
|
|
@@ -103,13 +103,13 @@ dump(int argc, VALUE *argv, VALUE self) {
|
|
103
103
|
// Dump the AST corresponding to the given file to a string.
|
104
104
|
static VALUE
|
105
105
|
dump_file(VALUE self, VALUE filepath) {
|
106
|
-
|
106
|
+
pm_string_t input;
|
107
107
|
|
108
108
|
const char *checked = check_string(filepath);
|
109
|
-
if (!
|
109
|
+
if (!pm_string_mapped_init(&input, checked)) return Qnil;
|
110
110
|
|
111
111
|
VALUE value = dump_input(&input, checked);
|
112
|
-
|
112
|
+
pm_string_free(&input);
|
113
113
|
|
114
114
|
return value;
|
115
115
|
}
|
@@ -120,10 +120,10 @@ dump_file(VALUE self, VALUE filepath) {
|
|
120
120
|
|
121
121
|
// Extract the comments out of the parser into an array.
|
122
122
|
static VALUE
|
123
|
-
parser_comments(
|
123
|
+
parser_comments(pm_parser_t *parser, VALUE source) {
|
124
124
|
VALUE comments = rb_ary_new();
|
125
125
|
|
126
|
-
for (
|
126
|
+
for (pm_comment_t *comment = (pm_comment_t *) parser->comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
|
127
127
|
VALUE location_argv[] = {
|
128
128
|
source,
|
129
129
|
LONG2FIX(comment->start - parser->start),
|
@@ -132,13 +132,13 @@ parser_comments(yp_parser_t *parser, VALUE source) {
|
|
132
132
|
|
133
133
|
VALUE type;
|
134
134
|
switch (comment->type) {
|
135
|
-
case
|
135
|
+
case PM_COMMENT_INLINE:
|
136
136
|
type = ID2SYM(rb_intern("inline"));
|
137
137
|
break;
|
138
|
-
case
|
138
|
+
case PM_COMMENT_EMBDOC:
|
139
139
|
type = ID2SYM(rb_intern("embdoc"));
|
140
140
|
break;
|
141
|
-
case
|
141
|
+
case PM_COMMENT___END__:
|
142
142
|
type = ID2SYM(rb_intern("__END__"));
|
143
143
|
break;
|
144
144
|
default:
|
@@ -146,8 +146,8 @@ parser_comments(yp_parser_t *parser, VALUE source) {
|
|
146
146
|
break;
|
147
147
|
}
|
148
148
|
|
149
|
-
VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv,
|
150
|
-
rb_ary_push(comments, rb_class_new_instance(2, comment_argv,
|
149
|
+
VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
|
150
|
+
rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cPrismComment));
|
151
151
|
}
|
152
152
|
|
153
153
|
return comments;
|
@@ -155,11 +155,11 @@ parser_comments(yp_parser_t *parser, VALUE source) {
|
|
155
155
|
|
156
156
|
// Extract the errors out of the parser into an array.
|
157
157
|
static VALUE
|
158
|
-
parser_errors(
|
158
|
+
parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
159
159
|
VALUE errors = rb_ary_new();
|
160
|
-
|
160
|
+
pm_diagnostic_t *error;
|
161
161
|
|
162
|
-
for (error = (
|
162
|
+
for (error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
|
163
163
|
VALUE location_argv[] = {
|
164
164
|
source,
|
165
165
|
LONG2FIX(error->start - parser->start),
|
@@ -168,10 +168,10 @@ parser_errors(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
|
168
168
|
|
169
169
|
VALUE error_argv[] = {
|
170
170
|
rb_enc_str_new_cstr(error->message, encoding),
|
171
|
-
rb_class_new_instance(3, location_argv,
|
171
|
+
rb_class_new_instance(3, location_argv, rb_cPrismLocation)
|
172
172
|
};
|
173
173
|
|
174
|
-
rb_ary_push(errors, rb_class_new_instance(2, error_argv,
|
174
|
+
rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cPrismParseError));
|
175
175
|
}
|
176
176
|
|
177
177
|
return errors;
|
@@ -179,11 +179,11 @@ parser_errors(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
|
179
179
|
|
180
180
|
// Extract the warnings out of the parser into an array.
|
181
181
|
static VALUE
|
182
|
-
parser_warnings(
|
182
|
+
parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
183
183
|
VALUE warnings = rb_ary_new();
|
184
|
-
|
184
|
+
pm_diagnostic_t *warning;
|
185
185
|
|
186
|
-
for (warning = (
|
186
|
+
for (warning = (pm_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (pm_diagnostic_t *) warning->node.next) {
|
187
187
|
VALUE location_argv[] = {
|
188
188
|
source,
|
189
189
|
LONG2FIX(warning->start - parser->start),
|
@@ -192,10 +192,10 @@ parser_warnings(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
|
192
192
|
|
193
193
|
VALUE warning_argv[] = {
|
194
194
|
rb_enc_str_new_cstr(warning->message, encoding),
|
195
|
-
rb_class_new_instance(3, location_argv,
|
195
|
+
rb_class_new_instance(3, location_argv, rb_cPrismLocation)
|
196
196
|
};
|
197
197
|
|
198
|
-
rb_ary_push(warnings, rb_class_new_instance(2, warning_argv,
|
198
|
+
rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cPrismParseWarning));
|
199
199
|
}
|
200
200
|
|
201
201
|
return warnings;
|
@@ -218,11 +218,11 @@ typedef struct {
|
|
218
218
|
// token is found. Once found, we initialize a new instance of Token and push it
|
219
219
|
// onto the tokens array.
|
220
220
|
static void
|
221
|
-
parse_lex_token(void *data,
|
221
|
+
parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
|
222
222
|
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
|
223
223
|
|
224
224
|
VALUE yields = rb_ary_new_capa(2);
|
225
|
-
rb_ary_push(yields,
|
225
|
+
rb_ary_push(yields, pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
|
226
226
|
rb_ary_push(yields, INT2FIX(parser->lex_state));
|
227
227
|
|
228
228
|
rb_ary_push(parse_lex_data->tokens, yields);
|
@@ -232,7 +232,7 @@ parse_lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
|
|
232
232
|
// the top of the file. We use it to update the encoding that we are using to
|
233
233
|
// create tokens.
|
234
234
|
static void
|
235
|
-
parse_lex_encoding_changed_callback(
|
235
|
+
parse_lex_encoding_changed_callback(pm_parser_t *parser) {
|
236
236
|
parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
|
237
237
|
parse_lex_data->encoding = rb_enc_find(parser->encoding.name);
|
238
238
|
|
@@ -254,14 +254,14 @@ parse_lex_encoding_changed_callback(yp_parser_t *parser) {
|
|
254
254
|
// Parse the given input and return a ParseResult containing just the tokens or
|
255
255
|
// the nodes and tokens.
|
256
256
|
static VALUE
|
257
|
-
parse_lex_input(
|
258
|
-
|
259
|
-
|
260
|
-
|
257
|
+
parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
|
258
|
+
pm_parser_t parser;
|
259
|
+
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
|
260
|
+
pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
|
261
261
|
|
262
262
|
VALUE offsets = rb_ary_new();
|
263
|
-
VALUE source_argv[] = { rb_str_new((const char *)
|
264
|
-
VALUE source = rb_class_new_instance(2, source_argv,
|
263
|
+
VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), offsets };
|
264
|
+
VALUE source = rb_class_new_instance(2, source_argv, rb_cPrismSource);
|
265
265
|
|
266
266
|
parse_lex_data_t parse_lex_data = {
|
267
267
|
.source = source,
|
@@ -270,13 +270,13 @@ parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
|
|
270
270
|
};
|
271
271
|
|
272
272
|
parse_lex_data_t *data = &parse_lex_data;
|
273
|
-
|
273
|
+
pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
|
274
274
|
.data = (void *) data,
|
275
275
|
.callback = parse_lex_token,
|
276
276
|
};
|
277
277
|
|
278
278
|
parser.lex_callback = &lex_callback;
|
279
|
-
|
279
|
+
pm_node_t *node = pm_parse(&parser);
|
280
280
|
|
281
281
|
// Here we need to update the source range to have the correct newline
|
282
282
|
// offsets. We do it here because we've already created the object and given
|
@@ -288,7 +288,7 @@ parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
|
|
288
288
|
VALUE value;
|
289
289
|
if (return_nodes) {
|
290
290
|
value = rb_ary_new_capa(2);
|
291
|
-
rb_ary_push(value,
|
291
|
+
rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding));
|
292
292
|
rb_ary_push(value, parse_lex_data.tokens);
|
293
293
|
} else {
|
294
294
|
value = parse_lex_data.tokens;
|
@@ -302,9 +302,9 @@ parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
|
|
302
302
|
source
|
303
303
|
};
|
304
304
|
|
305
|
-
|
306
|
-
|
307
|
-
return rb_class_new_instance(5, result_argv,
|
305
|
+
pm_node_destroy(&parser, node);
|
306
|
+
pm_parser_free(&parser);
|
307
|
+
return rb_class_new_instance(5, result_argv, rb_cPrismParseResult);
|
308
308
|
}
|
309
309
|
|
310
310
|
// Return an array of tokens corresponding to the given string.
|
@@ -314,7 +314,7 @@ lex(int argc, VALUE *argv, VALUE self) {
|
|
314
314
|
VALUE filepath;
|
315
315
|
rb_scan_args(argc, argv, "11", &string, &filepath);
|
316
316
|
|
317
|
-
|
317
|
+
pm_string_t input;
|
318
318
|
input_load_string(&input, string);
|
319
319
|
|
320
320
|
return parse_lex_input(&input, check_string(filepath), false);
|
@@ -323,13 +323,13 @@ lex(int argc, VALUE *argv, VALUE self) {
|
|
323
323
|
// Return an array of tokens corresponding to the given file.
|
324
324
|
static VALUE
|
325
325
|
lex_file(VALUE self, VALUE filepath) {
|
326
|
-
|
326
|
+
pm_string_t input;
|
327
327
|
|
328
328
|
const char *checked = check_string(filepath);
|
329
|
-
if (!
|
329
|
+
if (!pm_string_mapped_init(&input, checked)) return Qnil;
|
330
330
|
|
331
331
|
VALUE value = parse_lex_input(&input, checked, false);
|
332
|
-
|
332
|
+
pm_string_free(&input);
|
333
333
|
|
334
334
|
return value;
|
335
335
|
}
|
@@ -340,26 +340,26 @@ lex_file(VALUE self, VALUE filepath) {
|
|
340
340
|
|
341
341
|
// Parse the given input and return a ParseResult instance.
|
342
342
|
static VALUE
|
343
|
-
parse_input(
|
344
|
-
|
345
|
-
|
343
|
+
parse_input(pm_string_t *input, const char *filepath) {
|
344
|
+
pm_parser_t parser;
|
345
|
+
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath);
|
346
346
|
|
347
|
-
|
347
|
+
pm_node_t *node = pm_parse(&parser);
|
348
348
|
rb_encoding *encoding = rb_enc_find(parser.encoding.name);
|
349
349
|
|
350
|
-
VALUE source =
|
350
|
+
VALUE source = pm_source_new(&parser, encoding);
|
351
351
|
VALUE result_argv[] = {
|
352
|
-
|
352
|
+
pm_ast_new(&parser, node, encoding),
|
353
353
|
parser_comments(&parser, source),
|
354
354
|
parser_errors(&parser, encoding, source),
|
355
355
|
parser_warnings(&parser, encoding, source),
|
356
356
|
source
|
357
357
|
};
|
358
358
|
|
359
|
-
VALUE result = rb_class_new_instance(5, result_argv,
|
359
|
+
VALUE result = rb_class_new_instance(5, result_argv, rb_cPrismParseResult);
|
360
360
|
|
361
|
-
|
362
|
-
|
361
|
+
pm_node_destroy(&parser, node);
|
362
|
+
pm_parser_free(&parser);
|
363
363
|
|
364
364
|
return result;
|
365
365
|
}
|
@@ -371,19 +371,19 @@ parse(int argc, VALUE *argv, VALUE self) {
|
|
371
371
|
VALUE filepath;
|
372
372
|
rb_scan_args(argc, argv, "11", &string, &filepath);
|
373
373
|
|
374
|
-
|
374
|
+
pm_string_t input;
|
375
375
|
input_load_string(&input, string);
|
376
376
|
|
377
|
-
#ifdef
|
378
|
-
size_t length =
|
377
|
+
#ifdef PRISM_DEBUG_MODE_BUILD
|
378
|
+
size_t length = pm_string_length(&input);
|
379
379
|
char* dup = malloc(length);
|
380
|
-
memcpy(dup,
|
381
|
-
|
380
|
+
memcpy(dup, pm_string_source(&input), length);
|
381
|
+
pm_string_constant_init(&input, dup, length);
|
382
382
|
#endif
|
383
383
|
|
384
384
|
VALUE value = parse_input(&input, check_string(filepath));
|
385
385
|
|
386
|
-
#ifdef
|
386
|
+
#ifdef PRISM_DEBUG_MODE_BUILD
|
387
387
|
free(dup);
|
388
388
|
#endif
|
389
389
|
|
@@ -393,13 +393,13 @@ parse(int argc, VALUE *argv, VALUE self) {
|
|
393
393
|
// Parse the given file and return a ParseResult instance.
|
394
394
|
static VALUE
|
395
395
|
parse_file(VALUE self, VALUE filepath) {
|
396
|
-
|
396
|
+
pm_string_t input;
|
397
397
|
|
398
398
|
const char *checked = check_string(filepath);
|
399
|
-
if (!
|
399
|
+
if (!pm_string_mapped_init(&input, checked)) return Qnil;
|
400
400
|
|
401
401
|
VALUE value = parse_input(&input, checked);
|
402
|
-
|
402
|
+
pm_string_free(&input);
|
403
403
|
|
404
404
|
return value;
|
405
405
|
}
|
@@ -411,11 +411,11 @@ parse_lex(int argc, VALUE *argv, VALUE self) {
|
|
411
411
|
VALUE filepath;
|
412
412
|
rb_scan_args(argc, argv, "11", &string, &filepath);
|
413
413
|
|
414
|
-
|
414
|
+
pm_string_t input;
|
415
415
|
input_load_string(&input, string);
|
416
416
|
|
417
417
|
VALUE value = parse_lex_input(&input, check_string(filepath), true);
|
418
|
-
|
418
|
+
pm_string_free(&input);
|
419
419
|
|
420
420
|
return value;
|
421
421
|
}
|
@@ -423,13 +423,13 @@ parse_lex(int argc, VALUE *argv, VALUE self) {
|
|
423
423
|
// Parse and lex the given file and return a ParseResult instance.
|
424
424
|
static VALUE
|
425
425
|
parse_lex_file(VALUE self, VALUE filepath) {
|
426
|
-
|
426
|
+
pm_string_t input;
|
427
427
|
|
428
428
|
const char *checked = check_string(filepath);
|
429
|
-
if (!
|
429
|
+
if (!pm_string_mapped_init(&input, checked)) return Qnil;
|
430
430
|
|
431
431
|
VALUE value = parse_lex_input(&input, checked, true);
|
432
|
-
|
432
|
+
pm_string_free(&input);
|
433
433
|
|
434
434
|
return value;
|
435
435
|
}
|
@@ -439,40 +439,40 @@ parse_lex_file(VALUE self, VALUE filepath) {
|
|
439
439
|
/******************************************************************************/
|
440
440
|
|
441
441
|
// Returns an array of strings corresponding to the named capture groups in the
|
442
|
-
// given source string. If
|
442
|
+
// given source string. If prism was unable to parse the regular expression, this
|
443
443
|
// function returns nil.
|
444
444
|
static VALUE
|
445
445
|
named_captures(VALUE self, VALUE source) {
|
446
|
-
|
447
|
-
|
446
|
+
pm_string_list_t string_list;
|
447
|
+
pm_string_list_init(&string_list);
|
448
448
|
|
449
|
-
if (!
|
450
|
-
|
449
|
+
if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &pm_encoding_utf_8)) {
|
450
|
+
pm_string_list_free(&string_list);
|
451
451
|
return Qnil;
|
452
452
|
}
|
453
453
|
|
454
454
|
VALUE names = rb_ary_new();
|
455
455
|
for (size_t index = 0; index < string_list.length; index++) {
|
456
|
-
const
|
457
|
-
rb_ary_push(names, rb_str_new((const char *)
|
456
|
+
const pm_string_t *string = &string_list.strings[index];
|
457
|
+
rb_ary_push(names, rb_str_new((const char *) pm_string_source(string), pm_string_length(string)));
|
458
458
|
}
|
459
459
|
|
460
|
-
|
460
|
+
pm_string_list_free(&string_list);
|
461
461
|
return names;
|
462
462
|
}
|
463
463
|
|
464
464
|
// Accepts a source string and a type of unescaping and returns the unescaped
|
465
465
|
// version.
|
466
466
|
static VALUE
|
467
|
-
unescape(VALUE source,
|
468
|
-
|
467
|
+
unescape(VALUE source, pm_unescape_type_t unescape_type) {
|
468
|
+
pm_string_t result;
|
469
469
|
|
470
|
-
if (
|
471
|
-
VALUE str = rb_str_new((const char *)
|
472
|
-
|
470
|
+
if (pm_unescape_string((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
|
471
|
+
VALUE str = rb_str_new((const char *) pm_string_source(&result), pm_string_length(&result));
|
472
|
+
pm_string_free(&result);
|
473
473
|
return str;
|
474
474
|
} else {
|
475
|
-
|
475
|
+
pm_string_free(&result);
|
476
476
|
return Qnil;
|
477
477
|
}
|
478
478
|
}
|
@@ -481,35 +481,41 @@ unescape(VALUE source, yp_unescape_type_t unescape_type) {
|
|
481
481
|
// consistent API.
|
482
482
|
static VALUE
|
483
483
|
unescape_none(VALUE self, VALUE source) {
|
484
|
-
return unescape(source,
|
484
|
+
return unescape(source, PM_UNESCAPE_NONE);
|
485
485
|
}
|
486
486
|
|
487
487
|
// Minimally unescape the given string. This means effectively unescaping just
|
488
488
|
// the quotes of a string. Returns the unescaped string.
|
489
489
|
static VALUE
|
490
490
|
unescape_minimal(VALUE self, VALUE source) {
|
491
|
-
return unescape(source,
|
491
|
+
return unescape(source, PM_UNESCAPE_MINIMAL);
|
492
|
+
}
|
493
|
+
|
494
|
+
// Escape the given string minimally plus whitespace. Returns the unescaped string.
|
495
|
+
static VALUE
|
496
|
+
unescape_whitespace(VALUE self, VALUE source) {
|
497
|
+
return unescape(source, PM_UNESCAPE_WHITESPACE);
|
492
498
|
}
|
493
499
|
|
494
500
|
// Unescape everything in the given string. Return the unescaped string.
|
495
501
|
static VALUE
|
496
502
|
unescape_all(VALUE self, VALUE source) {
|
497
|
-
return unescape(source,
|
503
|
+
return unescape(source, PM_UNESCAPE_ALL);
|
498
504
|
}
|
499
505
|
|
500
506
|
// Return a hash of information about the given source string's memory usage.
|
501
507
|
static VALUE
|
502
508
|
memsize(VALUE self, VALUE string) {
|
503
|
-
|
509
|
+
pm_parser_t parser;
|
504
510
|
size_t length = RSTRING_LEN(string);
|
505
|
-
|
511
|
+
pm_parser_init(&parser, (const uint8_t *) RSTRING_PTR(string), length, NULL);
|
506
512
|
|
507
|
-
|
508
|
-
|
509
|
-
|
513
|
+
pm_node_t *node = pm_parse(&parser);
|
514
|
+
pm_memsize_t memsize;
|
515
|
+
pm_node_memsize(node, &memsize);
|
510
516
|
|
511
|
-
|
512
|
-
|
517
|
+
pm_node_destroy(&parser, node);
|
518
|
+
pm_parser_free(&parser);
|
513
519
|
|
514
520
|
VALUE result = rb_hash_new();
|
515
521
|
rb_hash_aset(result, ID2SYM(rb_intern("length")), INT2FIX(length));
|
@@ -522,19 +528,19 @@ memsize(VALUE self, VALUE string) {
|
|
522
528
|
// parser for memory and speed.
|
523
529
|
static VALUE
|
524
530
|
profile_file(VALUE self, VALUE filepath) {
|
525
|
-
|
531
|
+
pm_string_t input;
|
526
532
|
|
527
533
|
const char *checked = check_string(filepath);
|
528
|
-
if (!
|
534
|
+
if (!pm_string_mapped_init(&input, checked)) return Qnil;
|
529
535
|
|
530
|
-
|
531
|
-
|
536
|
+
pm_parser_t parser;
|
537
|
+
pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), checked);
|
532
538
|
|
533
|
-
|
534
|
-
|
535
|
-
|
539
|
+
pm_node_t *node = pm_parse(&parser);
|
540
|
+
pm_node_destroy(&parser, node);
|
541
|
+
pm_parser_free(&parser);
|
536
542
|
|
537
|
-
|
543
|
+
pm_string_free(&input);
|
538
544
|
|
539
545
|
return Qnil;
|
540
546
|
}
|
@@ -543,18 +549,18 @@ profile_file(VALUE self, VALUE filepath) {
|
|
543
549
|
// path since it is used by client libraries.
|
544
550
|
static VALUE
|
545
551
|
parse_serialize_file_metadata(VALUE self, VALUE filepath, VALUE metadata) {
|
546
|
-
|
547
|
-
|
548
|
-
|
552
|
+
pm_string_t input;
|
553
|
+
pm_buffer_t buffer;
|
554
|
+
pm_buffer_init(&buffer);
|
549
555
|
|
550
556
|
const char *checked = check_string(filepath);
|
551
|
-
if (!
|
557
|
+
if (!pm_string_mapped_init(&input, checked)) return Qnil;
|
552
558
|
|
553
|
-
|
554
|
-
VALUE result = rb_str_new(
|
559
|
+
pm_parse_serialize(pm_string_source(&input), pm_string_length(&input), &buffer, check_string(metadata));
|
560
|
+
VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
|
555
561
|
|
556
|
-
|
557
|
-
|
562
|
+
pm_string_free(&input);
|
563
|
+
pm_buffer_free(&buffer);
|
558
564
|
return result;
|
559
565
|
}
|
560
566
|
|
@@ -563,57 +569,58 @@ parse_serialize_file_metadata(VALUE self, VALUE filepath, VALUE metadata) {
|
|
563
569
|
/******************************************************************************/
|
564
570
|
|
565
571
|
RUBY_FUNC_EXPORTED void
|
566
|
-
|
567
|
-
// Make sure that the
|
572
|
+
Init_prism(void) {
|
573
|
+
// Make sure that the prism library version matches the expected version.
|
568
574
|
// Otherwise something was compiled incorrectly.
|
569
|
-
if (strcmp(
|
575
|
+
if (strcmp(pm_version(), EXPECTED_PRISM_VERSION) != 0) {
|
570
576
|
rb_raise(
|
571
577
|
rb_eRuntimeError,
|
572
|
-
"The
|
573
|
-
|
574
|
-
|
578
|
+
"The prism library version (%s) does not match the expected version (%s)",
|
579
|
+
pm_version(),
|
580
|
+
EXPECTED_PRISM_VERSION
|
575
581
|
);
|
576
582
|
}
|
577
583
|
|
578
584
|
// Grab up references to all of the constants that we're going to need to
|
579
585
|
// reference throughout this extension.
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
586
|
+
rb_cPrism = rb_define_module("Prism");
|
587
|
+
rb_cPrismNode = rb_define_class_under(rb_cPrism, "Node", rb_cObject);
|
588
|
+
rb_cPrismSource = rb_define_class_under(rb_cPrism, "Source", rb_cObject);
|
589
|
+
rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
|
590
|
+
rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
|
591
|
+
rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
|
592
|
+
rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
|
593
|
+
rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
|
594
|
+
rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
|
589
595
|
|
590
596
|
// Define the version string here so that we can use the constants defined
|
591
|
-
// in
|
592
|
-
rb_define_const(
|
593
|
-
rb_define_const(
|
597
|
+
// in prism.h.
|
598
|
+
rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
|
599
|
+
rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CExtension")));
|
594
600
|
|
595
601
|
// First, the functions that have to do with lexing and parsing.
|
596
|
-
rb_define_singleton_method(
|
597
|
-
rb_define_singleton_method(
|
598
|
-
rb_define_singleton_method(
|
599
|
-
rb_define_singleton_method(
|
600
|
-
rb_define_singleton_method(
|
601
|
-
rb_define_singleton_method(
|
602
|
-
rb_define_singleton_method(
|
603
|
-
rb_define_singleton_method(
|
602
|
+
rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
|
603
|
+
rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, 1);
|
604
|
+
rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
|
605
|
+
rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, 1);
|
606
|
+
rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
|
607
|
+
rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, 1);
|
608
|
+
rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
|
609
|
+
rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, 1);
|
604
610
|
|
605
611
|
// Next, the functions that will be called by the parser to perform various
|
606
612
|
// internal tasks. We expose these to make them easier to test.
|
607
|
-
VALUE
|
608
|
-
rb_define_singleton_method(
|
609
|
-
rb_define_singleton_method(
|
610
|
-
rb_define_singleton_method(
|
611
|
-
rb_define_singleton_method(
|
612
|
-
rb_define_singleton_method(
|
613
|
-
rb_define_singleton_method(
|
614
|
-
rb_define_singleton_method(
|
613
|
+
VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug");
|
614
|
+
rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
|
615
|
+
rb_define_singleton_method(rb_cPrismDebug, "unescape_none", unescape_none, 1);
|
616
|
+
rb_define_singleton_method(rb_cPrismDebug, "unescape_minimal", unescape_minimal, 1);
|
617
|
+
rb_define_singleton_method(rb_cPrismDebug, "unescape_whitespace", unescape_whitespace, 1);
|
618
|
+
rb_define_singleton_method(rb_cPrismDebug, "unescape_all", unescape_all, 1);
|
619
|
+
rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
|
620
|
+
rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
|
621
|
+
rb_define_singleton_method(rb_cPrismDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
|
615
622
|
|
616
623
|
// Next, initialize the other APIs.
|
617
|
-
|
618
|
-
|
624
|
+
Init_prism_api_node();
|
625
|
+
Init_prism_pack();
|
619
626
|
}
|