yarp 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +51 -0
- data/LICENSE.md +7 -0
- data/Makefile.in +79 -0
- data/README.md +86 -0
- data/config.h.in +25 -0
- data/config.yml +2147 -0
- data/configure +4487 -0
- data/docs/build_system.md +85 -0
- data/docs/building.md +26 -0
- data/docs/configuration.md +56 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +116 -0
- data/docs/extension.md +20 -0
- data/docs/fuzzing.md +93 -0
- data/docs/heredocs.md +36 -0
- data/docs/mapping.md +117 -0
- data/docs/ripper.md +36 -0
- data/docs/serialization.md +130 -0
- data/docs/testing.md +55 -0
- data/ext/yarp/api_node.c +3680 -0
- data/ext/yarp/api_pack.c +256 -0
- data/ext/yarp/extconf.rb +131 -0
- data/ext/yarp/extension.c +547 -0
- data/ext/yarp/extension.h +18 -0
- data/include/yarp/ast.h +1412 -0
- data/include/yarp/defines.h +54 -0
- data/include/yarp/diagnostic.h +24 -0
- data/include/yarp/enc/yp_encoding.h +94 -0
- data/include/yarp/node.h +36 -0
- data/include/yarp/pack.h +141 -0
- data/include/yarp/parser.h +389 -0
- data/include/yarp/regexp.h +19 -0
- data/include/yarp/unescape.h +42 -0
- data/include/yarp/util/yp_buffer.h +39 -0
- data/include/yarp/util/yp_char.h +75 -0
- data/include/yarp/util/yp_constant_pool.h +64 -0
- data/include/yarp/util/yp_list.h +67 -0
- data/include/yarp/util/yp_memchr.h +14 -0
- data/include/yarp/util/yp_newline_list.h +54 -0
- data/include/yarp/util/yp_state_stack.h +24 -0
- data/include/yarp/util/yp_string.h +57 -0
- data/include/yarp/util/yp_string_list.h +28 -0
- data/include/yarp/util/yp_strpbrk.h +29 -0
- data/include/yarp/version.h +5 -0
- data/include/yarp.h +69 -0
- data/lib/yarp/lex_compat.rb +759 -0
- data/lib/yarp/node.rb +7428 -0
- data/lib/yarp/pack.rb +185 -0
- data/lib/yarp/ripper_compat.rb +174 -0
- data/lib/yarp/serialize.rb +389 -0
- data/lib/yarp.rb +330 -0
- data/src/diagnostic.c +25 -0
- data/src/enc/yp_big5.c +79 -0
- data/src/enc/yp_euc_jp.c +85 -0
- data/src/enc/yp_gbk.c +88 -0
- data/src/enc/yp_shift_jis.c +83 -0
- data/src/enc/yp_tables.c +509 -0
- data/src/enc/yp_unicode.c +2320 -0
- data/src/enc/yp_windows_31j.c +83 -0
- data/src/node.c +2011 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +1782 -0
- data/src/regexp.c +580 -0
- data/src/serialize.c +1576 -0
- data/src/token_type.c +347 -0
- data/src/unescape.c +576 -0
- data/src/util/yp_buffer.c +78 -0
- data/src/util/yp_char.c +229 -0
- data/src/util/yp_constant_pool.c +147 -0
- data/src/util/yp_list.c +50 -0
- data/src/util/yp_memchr.c +31 -0
- data/src/util/yp_newline_list.c +119 -0
- data/src/util/yp_state_stack.c +25 -0
- data/src/util/yp_string.c +207 -0
- data/src/util/yp_string_list.c +32 -0
- data/src/util/yp_strncasecmp.c +20 -0
- data/src/util/yp_strpbrk.c +66 -0
- data/src/yarp.c +13211 -0
- data/yarp.gemspec +100 -0
- metadata +125 -0
@@ -0,0 +1,547 @@
|
|
1
|
+
#include "yarp/extension.h"
|
2
|
+
|
3
|
+
// NOTE: this file should contain only bindings.
|
4
|
+
// All non-trivial logic should be in librubyparser so it can be shared its the various callers.
|
5
|
+
|
6
|
+
VALUE rb_cYARP;
|
7
|
+
VALUE rb_cYARPNode;
|
8
|
+
VALUE rb_cYARPSource;
|
9
|
+
VALUE rb_cYARPToken;
|
10
|
+
VALUE rb_cYARPLocation;
|
11
|
+
|
12
|
+
VALUE rb_cYARPComment;
|
13
|
+
VALUE rb_cYARPParseError;
|
14
|
+
VALUE rb_cYARPParseWarning;
|
15
|
+
VALUE rb_cYARPParseResult;
|
16
|
+
|
17
|
+
/******************************************************************************/
|
18
|
+
/* IO of Ruby code */
|
19
|
+
/******************************************************************************/
|
20
|
+
|
21
|
+
// Check if the given VALUE is a string. If it's nil, then return NULL. If it's
|
22
|
+
// not a string, then raise a type error. Otherwise return the VALUE as a C
|
23
|
+
// string.
|
24
|
+
static const char *
|
25
|
+
check_string(VALUE value) {
|
26
|
+
// If the value is nil, then we don't need to do anything.
|
27
|
+
if (NIL_P(value)) {
|
28
|
+
return NULL;
|
29
|
+
}
|
30
|
+
|
31
|
+
// Check if the value is a string. If it's not, then raise a type error.
|
32
|
+
if (!RB_TYPE_P(value, T_STRING)) {
|
33
|
+
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(value));
|
34
|
+
}
|
35
|
+
|
36
|
+
// Otherwise, return the value as a C string.
|
37
|
+
return RSTRING_PTR(value);
|
38
|
+
}
|
39
|
+
|
40
|
+
// Load the contents and size of the given string into the given yp_string_t.
|
41
|
+
static void
|
42
|
+
input_load_string(yp_string_t *input, VALUE string) {
|
43
|
+
// Check if the string is a string. If it's not, then raise a type error.
|
44
|
+
if (!RB_TYPE_P(string, T_STRING)) {
|
45
|
+
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
|
46
|
+
}
|
47
|
+
|
48
|
+
yp_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
|
49
|
+
}
|
50
|
+
|
51
|
+
/******************************************************************************/
|
52
|
+
/* Serializing the AST */
|
53
|
+
/******************************************************************************/
|
54
|
+
|
55
|
+
// Dump the AST corresponding to the given input to a string.
|
56
|
+
static VALUE
|
57
|
+
dump_input(yp_string_t *input, const char *filepath) {
|
58
|
+
yp_buffer_t buffer;
|
59
|
+
if (!yp_buffer_init(&buffer)) {
|
60
|
+
rb_raise(rb_eNoMemError, "failed to allocate memory");
|
61
|
+
}
|
62
|
+
|
63
|
+
yp_parser_t parser;
|
64
|
+
yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
|
65
|
+
|
66
|
+
yp_node_t *node = yp_parse(&parser);
|
67
|
+
yp_serialize(&parser, node, &buffer);
|
68
|
+
|
69
|
+
VALUE result = rb_str_new(buffer.value, buffer.length);
|
70
|
+
yp_node_destroy(&parser, node);
|
71
|
+
yp_buffer_free(&buffer);
|
72
|
+
yp_parser_free(&parser);
|
73
|
+
|
74
|
+
return result;
|
75
|
+
}
|
76
|
+
|
77
|
+
// Dump the AST corresponding to the given string to a string.
|
78
|
+
static VALUE
|
79
|
+
dump(int argc, VALUE *argv, VALUE self) {
|
80
|
+
VALUE string;
|
81
|
+
VALUE filepath;
|
82
|
+
rb_scan_args(argc, argv, "11", &string, &filepath);
|
83
|
+
|
84
|
+
yp_string_t input;
|
85
|
+
input_load_string(&input, string);
|
86
|
+
return dump_input(&input, check_string(filepath));
|
87
|
+
}
|
88
|
+
|
89
|
+
// Dump the AST corresponding to the given file to a string.
|
90
|
+
static VALUE
|
91
|
+
dump_file(VALUE self, VALUE filepath) {
|
92
|
+
yp_string_t input;
|
93
|
+
|
94
|
+
const char *checked = check_string(filepath);
|
95
|
+
if (!yp_string_mapped_init(&input, checked)) return Qnil;
|
96
|
+
|
97
|
+
VALUE value = dump_input(&input, checked);
|
98
|
+
yp_string_free(&input);
|
99
|
+
|
100
|
+
return value;
|
101
|
+
}
|
102
|
+
|
103
|
+
/******************************************************************************/
|
104
|
+
/* Extracting values for the parse result */
|
105
|
+
/******************************************************************************/
|
106
|
+
|
107
|
+
// Extract the comments out of the parser into an array.
|
108
|
+
static VALUE
|
109
|
+
parser_comments(yp_parser_t *parser, VALUE source) {
|
110
|
+
VALUE comments = rb_ary_new();
|
111
|
+
|
112
|
+
for (yp_comment_t *comment = (yp_comment_t *) parser->comment_list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
|
113
|
+
VALUE location_argv[] = {
|
114
|
+
source,
|
115
|
+
LONG2FIX(comment->start - parser->start),
|
116
|
+
LONG2FIX(comment->end - comment->start)
|
117
|
+
};
|
118
|
+
|
119
|
+
VALUE type;
|
120
|
+
switch (comment->type) {
|
121
|
+
case YP_COMMENT_INLINE:
|
122
|
+
type = ID2SYM(rb_intern("inline"));
|
123
|
+
break;
|
124
|
+
case YP_COMMENT_EMBDOC:
|
125
|
+
type = ID2SYM(rb_intern("embdoc"));
|
126
|
+
break;
|
127
|
+
case YP_COMMENT___END__:
|
128
|
+
type = ID2SYM(rb_intern("__END__"));
|
129
|
+
break;
|
130
|
+
default:
|
131
|
+
type = ID2SYM(rb_intern("inline"));
|
132
|
+
break;
|
133
|
+
}
|
134
|
+
|
135
|
+
VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cYARPLocation) };
|
136
|
+
rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cYARPComment));
|
137
|
+
}
|
138
|
+
|
139
|
+
return comments;
|
140
|
+
}
|
141
|
+
|
142
|
+
// Extract the errors out of the parser into an array.
|
143
|
+
static VALUE
|
144
|
+
parser_errors(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
145
|
+
VALUE errors = rb_ary_new();
|
146
|
+
yp_diagnostic_t *error;
|
147
|
+
|
148
|
+
for (error = (yp_diagnostic_t *) parser->error_list.head; error != NULL; error = (yp_diagnostic_t *) error->node.next) {
|
149
|
+
VALUE location_argv[] = {
|
150
|
+
source,
|
151
|
+
LONG2FIX(error->start - parser->start),
|
152
|
+
LONG2FIX(error->end - error->start)
|
153
|
+
};
|
154
|
+
|
155
|
+
VALUE error_argv[] = {
|
156
|
+
rb_enc_str_new_cstr(error->message, encoding),
|
157
|
+
rb_class_new_instance(3, location_argv, rb_cYARPLocation)
|
158
|
+
};
|
159
|
+
|
160
|
+
rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cYARPParseError));
|
161
|
+
}
|
162
|
+
|
163
|
+
return errors;
|
164
|
+
}
|
165
|
+
|
166
|
+
// Extract the warnings out of the parser into an array.
|
167
|
+
static VALUE
|
168
|
+
parser_warnings(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
169
|
+
VALUE warnings = rb_ary_new();
|
170
|
+
yp_diagnostic_t *warning;
|
171
|
+
|
172
|
+
for (warning = (yp_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (yp_diagnostic_t *) warning->node.next) {
|
173
|
+
VALUE location_argv[] = {
|
174
|
+
source,
|
175
|
+
LONG2FIX(warning->start - parser->start),
|
176
|
+
LONG2FIX(warning->end - warning->start)
|
177
|
+
};
|
178
|
+
|
179
|
+
VALUE warning_argv[] = {
|
180
|
+
rb_enc_str_new_cstr(warning->message, encoding),
|
181
|
+
rb_class_new_instance(3, location_argv, rb_cYARPLocation)
|
182
|
+
};
|
183
|
+
|
184
|
+
rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cYARPParseWarning));
|
185
|
+
}
|
186
|
+
|
187
|
+
return warnings;
|
188
|
+
}
|
189
|
+
|
190
|
+
/******************************************************************************/
|
191
|
+
/* Lexing Ruby code */
|
192
|
+
/******************************************************************************/
|
193
|
+
|
194
|
+
// This struct gets stored in the parser and passed in to the lex callback any
|
195
|
+
// time a new token is found. We use it to store the necessary information to
|
196
|
+
// initialize a Token instance.
|
197
|
+
typedef struct {
|
198
|
+
VALUE source;
|
199
|
+
VALUE tokens;
|
200
|
+
rb_encoding *encoding;
|
201
|
+
} lex_data_t;
|
202
|
+
|
203
|
+
// This is passed as a callback to the parser. It gets called every time a new
|
204
|
+
// token is found. Once found, we initialize a new instance of Token and push it
|
205
|
+
// onto the tokens array.
|
206
|
+
static void
|
207
|
+
lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
|
208
|
+
lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
|
209
|
+
|
210
|
+
VALUE yields = rb_ary_new_capa(2);
|
211
|
+
rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding, lex_data->source));
|
212
|
+
rb_ary_push(yields, INT2FIX(parser->lex_state));
|
213
|
+
|
214
|
+
rb_ary_push(lex_data->tokens, yields);
|
215
|
+
}
|
216
|
+
|
217
|
+
// This is called whenever the encoding changes based on the magic comment at
|
218
|
+
// the top of the file. We use it to update the encoding that we are using to
|
219
|
+
// create tokens.
|
220
|
+
static void
|
221
|
+
lex_encoding_changed_callback(yp_parser_t *parser) {
|
222
|
+
lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
|
223
|
+
lex_data->encoding = rb_enc_find(parser->encoding.name);
|
224
|
+
}
|
225
|
+
|
226
|
+
// Return an array of tokens corresponding to the given source.
|
227
|
+
static VALUE
|
228
|
+
lex_input(yp_string_t *input, const char *filepath) {
|
229
|
+
yp_parser_t parser;
|
230
|
+
yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
|
231
|
+
yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback);
|
232
|
+
|
233
|
+
VALUE offsets = rb_ary_new();
|
234
|
+
VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets };
|
235
|
+
VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
|
236
|
+
|
237
|
+
lex_data_t lex_data = {
|
238
|
+
.source = source,
|
239
|
+
.tokens = rb_ary_new(),
|
240
|
+
.encoding = rb_utf8_encoding()
|
241
|
+
};
|
242
|
+
|
243
|
+
lex_data_t *data = &lex_data;
|
244
|
+
yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
|
245
|
+
.data = (void *) data,
|
246
|
+
.callback = lex_token,
|
247
|
+
};
|
248
|
+
|
249
|
+
parser.lex_callback = &lex_callback;
|
250
|
+
yp_node_t *node = yp_parse(&parser);
|
251
|
+
|
252
|
+
// Here we need to update the source range to have the correct newline
|
253
|
+
// offsets. We do it here because we've already created the object and given
|
254
|
+
// it over to all of the tokens.
|
255
|
+
for (size_t index = 0; index < parser.newline_list.size; index++) {
|
256
|
+
rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
|
257
|
+
}
|
258
|
+
|
259
|
+
VALUE result_argv[] = {
|
260
|
+
lex_data.tokens,
|
261
|
+
parser_comments(&parser, source),
|
262
|
+
parser_errors(&parser, lex_data.encoding, source),
|
263
|
+
parser_warnings(&parser, lex_data.encoding, source),
|
264
|
+
source
|
265
|
+
};
|
266
|
+
|
267
|
+
VALUE result = rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
|
268
|
+
|
269
|
+
yp_node_destroy(&parser, node);
|
270
|
+
yp_parser_free(&parser);
|
271
|
+
|
272
|
+
return result;
|
273
|
+
}
|
274
|
+
|
275
|
+
// Return an array of tokens corresponding to the given string.
|
276
|
+
static VALUE
|
277
|
+
lex(int argc, VALUE *argv, VALUE self) {
|
278
|
+
VALUE string;
|
279
|
+
VALUE filepath;
|
280
|
+
rb_scan_args(argc, argv, "11", &string, &filepath);
|
281
|
+
|
282
|
+
yp_string_t input;
|
283
|
+
input_load_string(&input, string);
|
284
|
+
return lex_input(&input, check_string(filepath));
|
285
|
+
}
|
286
|
+
|
287
|
+
// Return an array of tokens corresponding to the given file.
|
288
|
+
static VALUE
|
289
|
+
lex_file(VALUE self, VALUE filepath) {
|
290
|
+
yp_string_t input;
|
291
|
+
|
292
|
+
const char *checked = check_string(filepath);
|
293
|
+
if (!yp_string_mapped_init(&input, checked)) return Qnil;
|
294
|
+
|
295
|
+
VALUE value = lex_input(&input, checked);
|
296
|
+
yp_string_free(&input);
|
297
|
+
|
298
|
+
return value;
|
299
|
+
}
|
300
|
+
|
301
|
+
/******************************************************************************/
|
302
|
+
/* Parsing Ruby code */
|
303
|
+
/******************************************************************************/
|
304
|
+
|
305
|
+
// Parse the given input and return a ParseResult instance.
|
306
|
+
static VALUE
|
307
|
+
parse_input(yp_string_t *input, const char *filepath) {
|
308
|
+
yp_parser_t parser;
|
309
|
+
yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
|
310
|
+
|
311
|
+
yp_node_t *node = yp_parse(&parser);
|
312
|
+
rb_encoding *encoding = rb_enc_find(parser.encoding.name);
|
313
|
+
|
314
|
+
VALUE source = yp_source_new(&parser);
|
315
|
+
VALUE result_argv[] = {
|
316
|
+
yp_ast_new(&parser, node, encoding),
|
317
|
+
parser_comments(&parser, source),
|
318
|
+
parser_errors(&parser, encoding, source),
|
319
|
+
parser_warnings(&parser, encoding, source),
|
320
|
+
source
|
321
|
+
};
|
322
|
+
|
323
|
+
VALUE result = rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
|
324
|
+
|
325
|
+
yp_node_destroy(&parser, node);
|
326
|
+
yp_parser_free(&parser);
|
327
|
+
|
328
|
+
return result;
|
329
|
+
}
|
330
|
+
|
331
|
+
// Parse the given string and return a ParseResult instance.
|
332
|
+
static VALUE
|
333
|
+
parse(int argc, VALUE *argv, VALUE self) {
|
334
|
+
VALUE string;
|
335
|
+
VALUE filepath;
|
336
|
+
rb_scan_args(argc, argv, "11", &string, &filepath);
|
337
|
+
|
338
|
+
yp_string_t input;
|
339
|
+
input_load_string(&input, string);
|
340
|
+
|
341
|
+
#ifdef YARP_DEBUG_MODE_BUILD
|
342
|
+
size_t length = yp_string_length(&input);
|
343
|
+
char* dup = malloc(length);
|
344
|
+
memcpy(dup, yp_string_source(&input), length);
|
345
|
+
yp_string_constant_init(&input, dup, length);
|
346
|
+
#endif
|
347
|
+
|
348
|
+
VALUE value = parse_input(&input, check_string(filepath));
|
349
|
+
|
350
|
+
#ifdef YARP_DEBUG_MODE_BUILD
|
351
|
+
free(dup);
|
352
|
+
#endif
|
353
|
+
|
354
|
+
return value;
|
355
|
+
}
|
356
|
+
|
357
|
+
// Parse the given file and return a ParseResult instance.
|
358
|
+
static VALUE
|
359
|
+
parse_file(VALUE self, VALUE filepath) {
|
360
|
+
yp_string_t input;
|
361
|
+
|
362
|
+
const char *checked = check_string(filepath);
|
363
|
+
if (!yp_string_mapped_init(&input, checked)) return Qnil;
|
364
|
+
|
365
|
+
VALUE value = parse_input(&input, checked);
|
366
|
+
yp_string_free(&input);
|
367
|
+
|
368
|
+
return value;
|
369
|
+
}
|
370
|
+
|
371
|
+
/******************************************************************************/
|
372
|
+
/* Utility functions exposed to make testing easier */
|
373
|
+
/******************************************************************************/
|
374
|
+
|
375
|
+
// Returns an array of strings corresponding to the named capture groups in the
|
376
|
+
// given source string. If YARP was unable to parse the regular expression, this
|
377
|
+
// function returns nil.
|
378
|
+
static VALUE
|
379
|
+
named_captures(VALUE self, VALUE source) {
|
380
|
+
yp_string_list_t string_list;
|
381
|
+
yp_string_list_init(&string_list);
|
382
|
+
|
383
|
+
if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &yp_encoding_utf_8)) {
|
384
|
+
yp_string_list_free(&string_list);
|
385
|
+
return Qnil;
|
386
|
+
}
|
387
|
+
|
388
|
+
VALUE names = rb_ary_new();
|
389
|
+
for (size_t index = 0; index < string_list.length; index++) {
|
390
|
+
const yp_string_t *string = &string_list.strings[index];
|
391
|
+
rb_ary_push(names, rb_str_new(yp_string_source(string), yp_string_length(string)));
|
392
|
+
}
|
393
|
+
|
394
|
+
yp_string_list_free(&string_list);
|
395
|
+
return names;
|
396
|
+
}
|
397
|
+
|
398
|
+
// Accepts a source string and a type of unescaping and returns the unescaped
|
399
|
+
// version.
|
400
|
+
static VALUE
|
401
|
+
unescape(VALUE source, yp_unescape_type_t unescape_type) {
|
402
|
+
yp_string_t result;
|
403
|
+
|
404
|
+
if (yp_unescape_string(RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
|
405
|
+
VALUE str = rb_str_new(yp_string_source(&result), yp_string_length(&result));
|
406
|
+
yp_string_free(&result);
|
407
|
+
return str;
|
408
|
+
} else {
|
409
|
+
yp_string_free(&result);
|
410
|
+
return Qnil;
|
411
|
+
}
|
412
|
+
}
|
413
|
+
|
414
|
+
// Do not unescape anything in the given string. This is here to provide a
|
415
|
+
// consistent API.
|
416
|
+
static VALUE
|
417
|
+
unescape_none(VALUE self, VALUE source) {
|
418
|
+
return unescape(source, YP_UNESCAPE_NONE);
|
419
|
+
}
|
420
|
+
|
421
|
+
// Minimally unescape the given string. This means effectively unescaping just
|
422
|
+
// the quotes of a string. Returns the unescaped string.
|
423
|
+
static VALUE
|
424
|
+
unescape_minimal(VALUE self, VALUE source) {
|
425
|
+
return unescape(source, YP_UNESCAPE_MINIMAL);
|
426
|
+
}
|
427
|
+
|
428
|
+
// Unescape everything in the given string. Return the unescaped string.
|
429
|
+
static VALUE
|
430
|
+
unescape_all(VALUE self, VALUE source) {
|
431
|
+
return unescape(source, YP_UNESCAPE_ALL);
|
432
|
+
}
|
433
|
+
|
434
|
+
// Return a hash of information about the given source string's memory usage.
|
435
|
+
static VALUE
|
436
|
+
memsize(VALUE self, VALUE string) {
|
437
|
+
yp_parser_t parser;
|
438
|
+
size_t length = RSTRING_LEN(string);
|
439
|
+
yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
|
440
|
+
|
441
|
+
yp_node_t *node = yp_parse(&parser);
|
442
|
+
yp_memsize_t memsize;
|
443
|
+
yp_node_memsize(node, &memsize);
|
444
|
+
|
445
|
+
yp_node_destroy(&parser, node);
|
446
|
+
yp_parser_free(&parser);
|
447
|
+
|
448
|
+
VALUE result = rb_hash_new();
|
449
|
+
rb_hash_aset(result, ID2SYM(rb_intern("length")), INT2FIX(length));
|
450
|
+
rb_hash_aset(result, ID2SYM(rb_intern("memsize")), INT2FIX(memsize.memsize));
|
451
|
+
rb_hash_aset(result, ID2SYM(rb_intern("node_count")), INT2FIX(memsize.node_count));
|
452
|
+
return result;
|
453
|
+
}
|
454
|
+
|
455
|
+
// Parse the file, but do nothing with the result. This is used to profile the
|
456
|
+
// parser for memory and speed.
|
457
|
+
static VALUE
|
458
|
+
profile_file(VALUE self, VALUE filepath) {
|
459
|
+
yp_string_t input;
|
460
|
+
|
461
|
+
const char *checked = check_string(filepath);
|
462
|
+
if (!yp_string_mapped_init(&input, checked)) return Qnil;
|
463
|
+
|
464
|
+
yp_parser_t parser;
|
465
|
+
yp_parser_init(&parser, yp_string_source(&input), yp_string_length(&input), checked);
|
466
|
+
|
467
|
+
yp_node_t *node = yp_parse(&parser);
|
468
|
+
yp_node_destroy(&parser, node);
|
469
|
+
yp_parser_free(&parser);
|
470
|
+
|
471
|
+
return Qnil;
|
472
|
+
}
|
473
|
+
|
474
|
+
// Parse the file and serialize the result. This is mostly used to test this
|
475
|
+
// path since it is used by client libraries.
|
476
|
+
static VALUE
|
477
|
+
parse_serialize_file_metadata(VALUE self, VALUE filepath, VALUE metadata) {
|
478
|
+
yp_string_t input;
|
479
|
+
yp_buffer_t buffer;
|
480
|
+
yp_buffer_init(&buffer);
|
481
|
+
|
482
|
+
const char *checked = check_string(filepath);
|
483
|
+
if (!yp_string_mapped_init(&input, checked)) return Qnil;
|
484
|
+
|
485
|
+
yp_parse_serialize(yp_string_source(&input), yp_string_length(&input), &buffer, check_string(metadata));
|
486
|
+
VALUE result = rb_str_new(buffer.value, buffer.length);
|
487
|
+
|
488
|
+
yp_buffer_free(&buffer);
|
489
|
+
return result;
|
490
|
+
}
|
491
|
+
|
492
|
+
/******************************************************************************/
|
493
|
+
/* Initialization of the extension */
|
494
|
+
/******************************************************************************/
|
495
|
+
|
496
|
+
RUBY_FUNC_EXPORTED void
|
497
|
+
Init_yarp(void) {
|
498
|
+
// Make sure that the YARP library version matches the expected version.
|
499
|
+
// Otherwise something was compiled incorrectly.
|
500
|
+
if (strcmp(yp_version(), EXPECTED_YARP_VERSION) != 0) {
|
501
|
+
rb_raise(
|
502
|
+
rb_eRuntimeError,
|
503
|
+
"The YARP library version (%s) does not match the expected version (%s)",
|
504
|
+
yp_version(),
|
505
|
+
EXPECTED_YARP_VERSION
|
506
|
+
);
|
507
|
+
}
|
508
|
+
|
509
|
+
// Grab up references to all of the constants that we're going to need to
|
510
|
+
// reference throughout this extension.
|
511
|
+
rb_cYARP = rb_define_module("YARP");
|
512
|
+
rb_cYARPNode = rb_define_class_under(rb_cYARP, "Node", rb_cObject);
|
513
|
+
rb_cYARPSource = rb_define_class_under(rb_cYARP, "Source", rb_cObject);
|
514
|
+
rb_cYARPToken = rb_define_class_under(rb_cYARP, "Token", rb_cObject);
|
515
|
+
rb_cYARPLocation = rb_define_class_under(rb_cYARP, "Location", rb_cObject);
|
516
|
+
rb_cYARPComment = rb_define_class_under(rb_cYARP, "Comment", rb_cObject);
|
517
|
+
rb_cYARPParseError = rb_define_class_under(rb_cYARP, "ParseError", rb_cObject);
|
518
|
+
rb_cYARPParseWarning = rb_define_class_under(rb_cYARP, "ParseWarning", rb_cObject);
|
519
|
+
rb_cYARPParseResult = rb_define_class_under(rb_cYARP, "ParseResult", rb_cObject);
|
520
|
+
|
521
|
+
// Define the version string here so that we can use the constants defined
|
522
|
+
// in yarp.h.
|
523
|
+
rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
|
524
|
+
|
525
|
+
// First, the functions that have to do with lexing and parsing.
|
526
|
+
rb_define_singleton_method(rb_cYARP, "dump", dump, -1);
|
527
|
+
rb_define_singleton_method(rb_cYARP, "dump_file", dump_file, 1);
|
528
|
+
rb_define_singleton_method(rb_cYARP, "lex", lex, -1);
|
529
|
+
rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
|
530
|
+
rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
|
531
|
+
rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
|
532
|
+
|
533
|
+
// Next, the functions that will be called by the parser to perform various
|
534
|
+
// internal tasks. We expose these to make them easier to test.
|
535
|
+
VALUE rb_cYARPDebug = rb_define_module_under(rb_cYARP, "Debug");
|
536
|
+
rb_define_singleton_method(rb_cYARPDebug, "named_captures", named_captures, 1);
|
537
|
+
rb_define_singleton_method(rb_cYARPDebug, "unescape_none", unescape_none, 1);
|
538
|
+
rb_define_singleton_method(rb_cYARPDebug, "unescape_minimal", unescape_minimal, 1);
|
539
|
+
rb_define_singleton_method(rb_cYARPDebug, "unescape_all", unescape_all, 1);
|
540
|
+
rb_define_singleton_method(rb_cYARPDebug, "memsize", memsize, 1);
|
541
|
+
rb_define_singleton_method(rb_cYARPDebug, "profile_file", profile_file, 1);
|
542
|
+
rb_define_singleton_method(rb_cYARPDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
|
543
|
+
|
544
|
+
// Next, initialize the other APIs.
|
545
|
+
Init_yarp_api_node();
|
546
|
+
Init_yarp_pack();
|
547
|
+
}
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#ifndef YARP_EXT_NODE_H
|
2
|
+
#define YARP_EXT_NODE_H
|
3
|
+
|
4
|
+
#include <ruby.h>
|
5
|
+
#include <ruby/encoding.h>
|
6
|
+
#include "yarp.h"
|
7
|
+
|
8
|
+
#define EXPECTED_YARP_VERSION "0.6.0"
|
9
|
+
|
10
|
+
VALUE yp_source_new(yp_parser_t *parser);
|
11
|
+
VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
|
12
|
+
VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);
|
13
|
+
|
14
|
+
void Init_yarp_api_node(void);
|
15
|
+
void Init_yarp_pack(void);
|
16
|
+
YP_EXPORTED_FUNCTION void Init_yarp(void);
|
17
|
+
|
18
|
+
#endif
|