yarp 0.8.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/yarp/extension.c CHANGED
@@ -198,52 +198,67 @@ typedef struct {
198
198
  VALUE source;
199
199
  VALUE tokens;
200
200
  rb_encoding *encoding;
201
- } lex_data_t;
201
+ } parse_lex_data_t;
202
202
 
203
203
  // This is passed as a callback to the parser. It gets called every time a new
204
204
  // token is found. Once found, we initialize a new instance of Token and push it
205
205
  // onto the tokens array.
206
206
  static void
207
- lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
208
- lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
207
+ parse_lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
208
+ parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
209
209
 
210
210
  VALUE yields = rb_ary_new_capa(2);
211
- rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding, lex_data->source));
211
+ rb_ary_push(yields, yp_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
212
212
  rb_ary_push(yields, INT2FIX(parser->lex_state));
213
213
 
214
- rb_ary_push(lex_data->tokens, yields);
214
+ rb_ary_push(parse_lex_data->tokens, yields);
215
215
  }
216
216
 
217
217
  // This is called whenever the encoding changes based on the magic comment at
218
218
  // the top of the file. We use it to update the encoding that we are using to
219
219
  // create tokens.
220
220
  static void
221
- lex_encoding_changed_callback(yp_parser_t *parser) {
222
- lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
223
- lex_data->encoding = rb_enc_find(parser->encoding.name);
221
+ parse_lex_encoding_changed_callback(yp_parser_t *parser) {
222
+ parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
223
+ parse_lex_data->encoding = rb_enc_find(parser->encoding.name);
224
+
225
+ // Since the encoding changed, we need to go back and change the encoding of
226
+ // the tokens that were already lexed. This is only going to end up being
227
+ // one or two tokens, since the encoding can only change at the top of the
228
+ // file.
229
+ VALUE tokens = parse_lex_data->tokens;
230
+ for (long index = 0; index < RARRAY_LEN(tokens); index++) {
231
+ VALUE yields = rb_ary_entry(tokens, index);
232
+ VALUE token = rb_ary_entry(yields, 0);
233
+
234
+ VALUE value = rb_ivar_get(token, rb_intern("@value"));
235
+ rb_enc_associate(value, parse_lex_data->encoding);
236
+ ENC_CODERANGE_CLEAR(value);
237
+ }
224
238
  }
225
239
 
226
- // Return an array of tokens corresponding to the given source.
240
+ // Parse the given input and return a ParseResult containing just the tokens or
241
+ // the nodes and tokens.
227
242
  static VALUE
228
- lex_input(yp_string_t *input, const char *filepath) {
243
+ parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
229
244
  yp_parser_t parser;
230
245
  yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
231
- yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback);
246
+ yp_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
232
247
 
233
248
  VALUE offsets = rb_ary_new();
234
249
  VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets };
235
250
  VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
236
251
 
237
- lex_data_t lex_data = {
252
+ parse_lex_data_t parse_lex_data = {
238
253
  .source = source,
239
254
  .tokens = rb_ary_new(),
240
255
  .encoding = rb_utf8_encoding()
241
256
  };
242
257
 
243
- lex_data_t *data = &lex_data;
258
+ parse_lex_data_t *data = &parse_lex_data;
244
259
  yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
245
260
  .data = (void *) data,
246
- .callback = lex_token,
261
+ .callback = parse_lex_token,
247
262
  };
248
263
 
249
264
  parser.lex_callback = &lex_callback;
@@ -256,20 +271,26 @@ lex_input(yp_string_t *input, const char *filepath) {
256
271
  rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
257
272
  }
258
273
 
274
+ VALUE value;
275
+ if (return_nodes) {
276
+ value = rb_ary_new_capa(2);
277
+ rb_ary_push(value, yp_ast_new(&parser, node, parse_lex_data.encoding));
278
+ rb_ary_push(value, parse_lex_data.tokens);
279
+ } else {
280
+ value = parse_lex_data.tokens;
281
+ }
282
+
259
283
  VALUE result_argv[] = {
260
- lex_data.tokens,
284
+ value,
261
285
  parser_comments(&parser, source),
262
- parser_errors(&parser, lex_data.encoding, source),
263
- parser_warnings(&parser, lex_data.encoding, source),
286
+ parser_errors(&parser, parse_lex_data.encoding, source),
287
+ parser_warnings(&parser, parse_lex_data.encoding, source),
264
288
  source
265
289
  };
266
290
 
267
- VALUE result = rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
268
-
269
291
  yp_node_destroy(&parser, node);
270
292
  yp_parser_free(&parser);
271
-
272
- return result;
293
+ return rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
273
294
  }
274
295
 
275
296
  // Return an array of tokens corresponding to the given string.
@@ -281,7 +302,8 @@ lex(int argc, VALUE *argv, VALUE self) {
281
302
 
282
303
  yp_string_t input;
283
304
  input_load_string(&input, string);
284
- return lex_input(&input, check_string(filepath));
305
+
306
+ return parse_lex_input(&input, check_string(filepath), false);
285
307
  }
286
308
 
287
309
  // Return an array of tokens corresponding to the given file.
@@ -292,7 +314,7 @@ lex_file(VALUE self, VALUE filepath) {
292
314
  const char *checked = check_string(filepath);
293
315
  if (!yp_string_mapped_init(&input, checked)) return Qnil;
294
316
 
295
- VALUE value = lex_input(&input, checked);
317
+ VALUE value = parse_lex_input(&input, checked, false);
296
318
  yp_string_free(&input);
297
319
 
298
320
  return value;
@@ -368,6 +390,32 @@ parse_file(VALUE self, VALUE filepath) {
368
390
  return value;
369
391
  }
370
392
 
393
+ // Parse the given string and return a ParseResult instance.
394
+ static VALUE
395
+ parse_lex(int argc, VALUE *argv, VALUE self) {
396
+ VALUE string;
397
+ VALUE filepath;
398
+ rb_scan_args(argc, argv, "11", &string, &filepath);
399
+
400
+ yp_string_t input;
401
+ input_load_string(&input, string);
402
+ return parse_lex_input(&input, check_string(filepath), true);
403
+ }
404
+
405
+ // Parse and lex the given file and return a ParseResult instance.
406
+ static VALUE
407
+ parse_lex_file(VALUE self, VALUE filepath) {
408
+ yp_string_t input;
409
+
410
+ const char *checked = check_string(filepath);
411
+ if (!yp_string_mapped_init(&input, checked)) return Qnil;
412
+
413
+ VALUE value = parse_lex_input(&input, checked, true);
414
+ yp_string_free(&input);
415
+
416
+ return value;
417
+ }
418
+
371
419
  /******************************************************************************/
372
420
  /* Utility functions exposed to make testing easier */
373
421
  /******************************************************************************/
@@ -521,7 +569,6 @@ Init_yarp(void) {
521
569
  // Define the version string here so that we can use the constants defined
522
570
  // in yarp.h.
523
571
  rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
524
-
525
572
  rb_define_const(rb_cYARP, "BACKEND", ID2SYM(rb_intern("CExtension")));
526
573
 
527
574
  // First, the functions that have to do with lexing and parsing.
@@ -531,6 +578,8 @@ Init_yarp(void) {
531
578
  rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
532
579
  rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
533
580
  rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
581
+ rb_define_singleton_method(rb_cYARP, "parse_lex", parse_lex, -1);
582
+ rb_define_singleton_method(rb_cYARP, "parse_lex_file", parse_lex_file, 1);
534
583
 
535
584
  // Next, the functions that will be called by the parser to perform various
536
585
  // internal tasks. We expose these to make them easier to test.
data/ext/yarp/extension.h CHANGED
@@ -1,12 +1,12 @@
1
1
  #ifndef YARP_EXT_NODE_H
2
2
  #define YARP_EXT_NODE_H
3
3
 
4
+ #define EXPECTED_YARP_VERSION "0.9.0"
5
+
4
6
  #include <ruby.h>
5
7
  #include <ruby/encoding.h>
6
8
  #include "yarp.h"
7
9
 
8
- #define EXPECTED_YARP_VERSION "0.8.0"
9
-
10
10
  VALUE yp_source_new(yp_parser_t *parser);
11
11
  VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
12
12
  VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);