yarp 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/yarp/extension.c CHANGED
@@ -198,52 +198,67 @@ typedef struct {
198
198
  VALUE source;
199
199
  VALUE tokens;
200
200
  rb_encoding *encoding;
201
- } lex_data_t;
201
+ } parse_lex_data_t;
202
202
 
203
203
  // This is passed as a callback to the parser. It gets called every time a new
204
204
  // token is found. Once found, we initialize a new instance of Token and push it
205
205
  // onto the tokens array.
206
206
  static void
207
- lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
208
- lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
207
+ parse_lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
208
+ parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
209
209
 
210
210
  VALUE yields = rb_ary_new_capa(2);
211
- rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding, lex_data->source));
211
+ rb_ary_push(yields, yp_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
212
212
  rb_ary_push(yields, INT2FIX(parser->lex_state));
213
213
 
214
- rb_ary_push(lex_data->tokens, yields);
214
+ rb_ary_push(parse_lex_data->tokens, yields);
215
215
  }
216
216
 
217
217
  // This is called whenever the encoding changes based on the magic comment at
218
218
  // the top of the file. We use it to update the encoding that we are using to
219
219
  // create tokens.
220
220
  static void
221
- lex_encoding_changed_callback(yp_parser_t *parser) {
222
- lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
223
- lex_data->encoding = rb_enc_find(parser->encoding.name);
221
+ parse_lex_encoding_changed_callback(yp_parser_t *parser) {
222
+ parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
223
+ parse_lex_data->encoding = rb_enc_find(parser->encoding.name);
224
+
225
+ // Since the encoding changed, we need to go back and change the encoding of
226
+ // the tokens that were already lexed. This is only going to end up being
227
+ // one or two tokens, since the encoding can only change at the top of the
228
+ // file.
229
+ VALUE tokens = parse_lex_data->tokens;
230
+ for (long index = 0; index < RARRAY_LEN(tokens); index++) {
231
+ VALUE yields = rb_ary_entry(tokens, index);
232
+ VALUE token = rb_ary_entry(yields, 0);
233
+
234
+ VALUE value = rb_ivar_get(token, rb_intern("@value"));
235
+ rb_enc_associate(value, parse_lex_data->encoding);
236
+ ENC_CODERANGE_CLEAR(value);
237
+ }
224
238
  }
225
239
 
226
- // Return an array of tokens corresponding to the given source.
240
+ // Parse the given input and return a ParseResult containing just the tokens or
241
+ // the nodes and tokens.
227
242
  static VALUE
228
- lex_input(yp_string_t *input, const char *filepath) {
243
+ parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
229
244
  yp_parser_t parser;
230
245
  yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
231
- yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback);
246
+ yp_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
232
247
 
233
248
  VALUE offsets = rb_ary_new();
234
249
  VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets };
235
250
  VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
236
251
 
237
- lex_data_t lex_data = {
252
+ parse_lex_data_t parse_lex_data = {
238
253
  .source = source,
239
254
  .tokens = rb_ary_new(),
240
255
  .encoding = rb_utf8_encoding()
241
256
  };
242
257
 
243
- lex_data_t *data = &lex_data;
258
+ parse_lex_data_t *data = &parse_lex_data;
244
259
  yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
245
260
  .data = (void *) data,
246
- .callback = lex_token,
261
+ .callback = parse_lex_token,
247
262
  };
248
263
 
249
264
  parser.lex_callback = &lex_callback;
@@ -256,20 +271,26 @@ lex_input(yp_string_t *input, const char *filepath) {
256
271
  rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
257
272
  }
258
273
 
274
+ VALUE value;
275
+ if (return_nodes) {
276
+ value = rb_ary_new_capa(2);
277
+ rb_ary_push(value, yp_ast_new(&parser, node, parse_lex_data.encoding));
278
+ rb_ary_push(value, parse_lex_data.tokens);
279
+ } else {
280
+ value = parse_lex_data.tokens;
281
+ }
282
+
259
283
  VALUE result_argv[] = {
260
- lex_data.tokens,
284
+ value,
261
285
  parser_comments(&parser, source),
262
- parser_errors(&parser, lex_data.encoding, source),
263
- parser_warnings(&parser, lex_data.encoding, source),
286
+ parser_errors(&parser, parse_lex_data.encoding, source),
287
+ parser_warnings(&parser, parse_lex_data.encoding, source),
264
288
  source
265
289
  };
266
290
 
267
- VALUE result = rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
268
-
269
291
  yp_node_destroy(&parser, node);
270
292
  yp_parser_free(&parser);
271
-
272
- return result;
293
+ return rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
273
294
  }
274
295
 
275
296
  // Return an array of tokens corresponding to the given string.
@@ -281,7 +302,8 @@ lex(int argc, VALUE *argv, VALUE self) {
281
302
 
282
303
  yp_string_t input;
283
304
  input_load_string(&input, string);
284
- return lex_input(&input, check_string(filepath));
305
+
306
+ return parse_lex_input(&input, check_string(filepath), false);
285
307
  }
286
308
 
287
309
  // Return an array of tokens corresponding to the given file.
@@ -292,7 +314,7 @@ lex_file(VALUE self, VALUE filepath) {
292
314
  const char *checked = check_string(filepath);
293
315
  if (!yp_string_mapped_init(&input, checked)) return Qnil;
294
316
 
295
- VALUE value = lex_input(&input, checked);
317
+ VALUE value = parse_lex_input(&input, checked, false);
296
318
  yp_string_free(&input);
297
319
 
298
320
  return value;
@@ -368,6 +390,32 @@ parse_file(VALUE self, VALUE filepath) {
368
390
  return value;
369
391
  }
370
392
 
393
+ // Parse the given string and return a ParseResult instance.
394
+ static VALUE
395
+ parse_lex(int argc, VALUE *argv, VALUE self) {
396
+ VALUE string;
397
+ VALUE filepath;
398
+ rb_scan_args(argc, argv, "11", &string, &filepath);
399
+
400
+ yp_string_t input;
401
+ input_load_string(&input, string);
402
+ return parse_lex_input(&input, check_string(filepath), true);
403
+ }
404
+
405
+ // Parse and lex the given file and return a ParseResult instance.
406
+ static VALUE
407
+ parse_lex_file(VALUE self, VALUE filepath) {
408
+ yp_string_t input;
409
+
410
+ const char *checked = check_string(filepath);
411
+ if (!yp_string_mapped_init(&input, checked)) return Qnil;
412
+
413
+ VALUE value = parse_lex_input(&input, checked, true);
414
+ yp_string_free(&input);
415
+
416
+ return value;
417
+ }
418
+
371
419
  /******************************************************************************/
372
420
  /* Utility functions exposed to make testing easier */
373
421
  /******************************************************************************/
@@ -521,7 +569,6 @@ Init_yarp(void) {
521
569
  // Define the version string here so that we can use the constants defined
522
570
  // in yarp.h.
523
571
  rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
524
-
525
572
  rb_define_const(rb_cYARP, "BACKEND", ID2SYM(rb_intern("CExtension")));
526
573
 
527
574
  // First, the functions that have to do with lexing and parsing.
@@ -531,6 +578,8 @@ Init_yarp(void) {
531
578
  rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
532
579
  rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
533
580
  rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
581
+ rb_define_singleton_method(rb_cYARP, "parse_lex", parse_lex, -1);
582
+ rb_define_singleton_method(rb_cYARP, "parse_lex_file", parse_lex_file, 1);
534
583
 
535
584
  // Next, the functions that will be called by the parser to perform various
536
585
  // internal tasks. We expose these to make them easier to test.
data/ext/yarp/extension.h CHANGED
@@ -1,12 +1,12 @@
1
1
  #ifndef YARP_EXT_NODE_H
2
2
  #define YARP_EXT_NODE_H
3
3
 
4
+ #define EXPECTED_YARP_VERSION "0.9.0"
5
+
4
6
  #include <ruby.h>
5
7
  #include <ruby/encoding.h>
6
8
  #include "yarp.h"
7
9
 
8
- #define EXPECTED_YARP_VERSION "0.8.0"
9
-
10
10
  VALUE yp_source_new(yp_parser_t *parser);
11
11
  VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
12
12
  VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);