yarp 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/include/yarp/ast.h CHANGED
@@ -259,95 +259,97 @@ enum yp_node_type {
259
259
  YP_NODE_ENSURE_NODE = 43,
260
260
  YP_NODE_FALSE_NODE = 44,
261
261
  YP_NODE_FIND_PATTERN_NODE = 45,
262
- YP_NODE_FLOAT_NODE = 46,
263
- YP_NODE_FOR_NODE = 47,
264
- YP_NODE_FORWARDING_ARGUMENTS_NODE = 48,
265
- YP_NODE_FORWARDING_PARAMETER_NODE = 49,
266
- YP_NODE_FORWARDING_SUPER_NODE = 50,
267
- YP_NODE_GLOBAL_VARIABLE_OPERATOR_AND_WRITE_NODE = 51,
268
- YP_NODE_GLOBAL_VARIABLE_OPERATOR_OR_WRITE_NODE = 52,
269
- YP_NODE_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE = 53,
270
- YP_NODE_GLOBAL_VARIABLE_READ_NODE = 54,
271
- YP_NODE_GLOBAL_VARIABLE_WRITE_NODE = 55,
272
- YP_NODE_HASH_NODE = 56,
273
- YP_NODE_HASH_PATTERN_NODE = 57,
274
- YP_NODE_IF_NODE = 58,
275
- YP_NODE_IMAGINARY_NODE = 59,
276
- YP_NODE_IN_NODE = 60,
277
- YP_NODE_INSTANCE_VARIABLE_OPERATOR_AND_WRITE_NODE = 61,
278
- YP_NODE_INSTANCE_VARIABLE_OPERATOR_OR_WRITE_NODE = 62,
279
- YP_NODE_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE = 63,
280
- YP_NODE_INSTANCE_VARIABLE_READ_NODE = 64,
281
- YP_NODE_INSTANCE_VARIABLE_WRITE_NODE = 65,
282
- YP_NODE_INTEGER_NODE = 66,
283
- YP_NODE_INTERPOLATED_REGULAR_EXPRESSION_NODE = 67,
284
- YP_NODE_INTERPOLATED_STRING_NODE = 68,
285
- YP_NODE_INTERPOLATED_SYMBOL_NODE = 69,
286
- YP_NODE_INTERPOLATED_X_STRING_NODE = 70,
287
- YP_NODE_KEYWORD_HASH_NODE = 71,
288
- YP_NODE_KEYWORD_PARAMETER_NODE = 72,
289
- YP_NODE_KEYWORD_REST_PARAMETER_NODE = 73,
290
- YP_NODE_LAMBDA_NODE = 74,
291
- YP_NODE_LOCAL_VARIABLE_OPERATOR_AND_WRITE_NODE = 75,
292
- YP_NODE_LOCAL_VARIABLE_OPERATOR_OR_WRITE_NODE = 76,
293
- YP_NODE_LOCAL_VARIABLE_OPERATOR_WRITE_NODE = 77,
294
- YP_NODE_LOCAL_VARIABLE_READ_NODE = 78,
295
- YP_NODE_LOCAL_VARIABLE_WRITE_NODE = 79,
296
- YP_NODE_MATCH_PREDICATE_NODE = 80,
297
- YP_NODE_MATCH_REQUIRED_NODE = 81,
298
- YP_NODE_MISSING_NODE = 82,
299
- YP_NODE_MODULE_NODE = 83,
300
- YP_NODE_MULTI_WRITE_NODE = 84,
301
- YP_NODE_NEXT_NODE = 85,
302
- YP_NODE_NIL_NODE = 86,
303
- YP_NODE_NO_KEYWORDS_PARAMETER_NODE = 87,
304
- YP_NODE_NUMBERED_REFERENCE_READ_NODE = 88,
305
- YP_NODE_OPTIONAL_PARAMETER_NODE = 89,
306
- YP_NODE_OR_NODE = 90,
307
- YP_NODE_PARAMETERS_NODE = 91,
308
- YP_NODE_PARENTHESES_NODE = 92,
309
- YP_NODE_PINNED_EXPRESSION_NODE = 93,
310
- YP_NODE_PINNED_VARIABLE_NODE = 94,
311
- YP_NODE_POST_EXECUTION_NODE = 95,
312
- YP_NODE_PRE_EXECUTION_NODE = 96,
313
- YP_NODE_PROGRAM_NODE = 97,
314
- YP_NODE_RANGE_NODE = 98,
315
- YP_NODE_RATIONAL_NODE = 99,
316
- YP_NODE_REDO_NODE = 100,
317
- YP_NODE_REGULAR_EXPRESSION_NODE = 101,
318
- YP_NODE_REQUIRED_DESTRUCTURED_PARAMETER_NODE = 102,
319
- YP_NODE_REQUIRED_PARAMETER_NODE = 103,
320
- YP_NODE_RESCUE_MODIFIER_NODE = 104,
321
- YP_NODE_RESCUE_NODE = 105,
322
- YP_NODE_REST_PARAMETER_NODE = 106,
323
- YP_NODE_RETRY_NODE = 107,
324
- YP_NODE_RETURN_NODE = 108,
325
- YP_NODE_SELF_NODE = 109,
326
- YP_NODE_SINGLETON_CLASS_NODE = 110,
327
- YP_NODE_SOURCE_ENCODING_NODE = 111,
328
- YP_NODE_SOURCE_FILE_NODE = 112,
329
- YP_NODE_SOURCE_LINE_NODE = 113,
330
- YP_NODE_SPLAT_NODE = 114,
331
- YP_NODE_STATEMENTS_NODE = 115,
332
- YP_NODE_STRING_CONCAT_NODE = 116,
333
- YP_NODE_STRING_NODE = 117,
334
- YP_NODE_SUPER_NODE = 118,
335
- YP_NODE_SYMBOL_NODE = 119,
336
- YP_NODE_TRUE_NODE = 120,
337
- YP_NODE_UNDEF_NODE = 121,
338
- YP_NODE_UNLESS_NODE = 122,
339
- YP_NODE_UNTIL_NODE = 123,
340
- YP_NODE_WHEN_NODE = 124,
341
- YP_NODE_WHILE_NODE = 125,
342
- YP_NODE_X_STRING_NODE = 126,
343
- YP_NODE_YIELD_NODE = 127,
262
+ YP_NODE_FLIP_FLOP_NODE = 46,
263
+ YP_NODE_FLOAT_NODE = 47,
264
+ YP_NODE_FOR_NODE = 48,
265
+ YP_NODE_FORWARDING_ARGUMENTS_NODE = 49,
266
+ YP_NODE_FORWARDING_PARAMETER_NODE = 50,
267
+ YP_NODE_FORWARDING_SUPER_NODE = 51,
268
+ YP_NODE_GLOBAL_VARIABLE_OPERATOR_AND_WRITE_NODE = 52,
269
+ YP_NODE_GLOBAL_VARIABLE_OPERATOR_OR_WRITE_NODE = 53,
270
+ YP_NODE_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE = 54,
271
+ YP_NODE_GLOBAL_VARIABLE_READ_NODE = 55,
272
+ YP_NODE_GLOBAL_VARIABLE_WRITE_NODE = 56,
273
+ YP_NODE_HASH_NODE = 57,
274
+ YP_NODE_HASH_PATTERN_NODE = 58,
275
+ YP_NODE_IF_NODE = 59,
276
+ YP_NODE_IMAGINARY_NODE = 60,
277
+ YP_NODE_IN_NODE = 61,
278
+ YP_NODE_INSTANCE_VARIABLE_OPERATOR_AND_WRITE_NODE = 62,
279
+ YP_NODE_INSTANCE_VARIABLE_OPERATOR_OR_WRITE_NODE = 63,
280
+ YP_NODE_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE = 64,
281
+ YP_NODE_INSTANCE_VARIABLE_READ_NODE = 65,
282
+ YP_NODE_INSTANCE_VARIABLE_WRITE_NODE = 66,
283
+ YP_NODE_INTEGER_NODE = 67,
284
+ YP_NODE_INTERPOLATED_REGULAR_EXPRESSION_NODE = 68,
285
+ YP_NODE_INTERPOLATED_STRING_NODE = 69,
286
+ YP_NODE_INTERPOLATED_SYMBOL_NODE = 70,
287
+ YP_NODE_INTERPOLATED_X_STRING_NODE = 71,
288
+ YP_NODE_KEYWORD_HASH_NODE = 72,
289
+ YP_NODE_KEYWORD_PARAMETER_NODE = 73,
290
+ YP_NODE_KEYWORD_REST_PARAMETER_NODE = 74,
291
+ YP_NODE_LAMBDA_NODE = 75,
292
+ YP_NODE_LOCAL_VARIABLE_OPERATOR_AND_WRITE_NODE = 76,
293
+ YP_NODE_LOCAL_VARIABLE_OPERATOR_OR_WRITE_NODE = 77,
294
+ YP_NODE_LOCAL_VARIABLE_OPERATOR_WRITE_NODE = 78,
295
+ YP_NODE_LOCAL_VARIABLE_READ_NODE = 79,
296
+ YP_NODE_LOCAL_VARIABLE_WRITE_NODE = 80,
297
+ YP_NODE_MATCH_PREDICATE_NODE = 81,
298
+ YP_NODE_MATCH_REQUIRED_NODE = 82,
299
+ YP_NODE_MISSING_NODE = 83,
300
+ YP_NODE_MODULE_NODE = 84,
301
+ YP_NODE_MULTI_WRITE_NODE = 85,
302
+ YP_NODE_NEXT_NODE = 86,
303
+ YP_NODE_NIL_NODE = 87,
304
+ YP_NODE_NO_KEYWORDS_PARAMETER_NODE = 88,
305
+ YP_NODE_NUMBERED_REFERENCE_READ_NODE = 89,
306
+ YP_NODE_OPTIONAL_PARAMETER_NODE = 90,
307
+ YP_NODE_OR_NODE = 91,
308
+ YP_NODE_PARAMETERS_NODE = 92,
309
+ YP_NODE_PARENTHESES_NODE = 93,
310
+ YP_NODE_PINNED_EXPRESSION_NODE = 94,
311
+ YP_NODE_PINNED_VARIABLE_NODE = 95,
312
+ YP_NODE_POST_EXECUTION_NODE = 96,
313
+ YP_NODE_PRE_EXECUTION_NODE = 97,
314
+ YP_NODE_PROGRAM_NODE = 98,
315
+ YP_NODE_RANGE_NODE = 99,
316
+ YP_NODE_RATIONAL_NODE = 100,
317
+ YP_NODE_REDO_NODE = 101,
318
+ YP_NODE_REGULAR_EXPRESSION_NODE = 102,
319
+ YP_NODE_REQUIRED_DESTRUCTURED_PARAMETER_NODE = 103,
320
+ YP_NODE_REQUIRED_PARAMETER_NODE = 104,
321
+ YP_NODE_RESCUE_MODIFIER_NODE = 105,
322
+ YP_NODE_RESCUE_NODE = 106,
323
+ YP_NODE_REST_PARAMETER_NODE = 107,
324
+ YP_NODE_RETRY_NODE = 108,
325
+ YP_NODE_RETURN_NODE = 109,
326
+ YP_NODE_SELF_NODE = 110,
327
+ YP_NODE_SINGLETON_CLASS_NODE = 111,
328
+ YP_NODE_SOURCE_ENCODING_NODE = 112,
329
+ YP_NODE_SOURCE_FILE_NODE = 113,
330
+ YP_NODE_SOURCE_LINE_NODE = 114,
331
+ YP_NODE_SPLAT_NODE = 115,
332
+ YP_NODE_STATEMENTS_NODE = 116,
333
+ YP_NODE_STRING_CONCAT_NODE = 117,
334
+ YP_NODE_STRING_NODE = 118,
335
+ YP_NODE_SUPER_NODE = 119,
336
+ YP_NODE_SYMBOL_NODE = 120,
337
+ YP_NODE_TRUE_NODE = 121,
338
+ YP_NODE_UNDEF_NODE = 122,
339
+ YP_NODE_UNLESS_NODE = 123,
340
+ YP_NODE_UNTIL_NODE = 124,
341
+ YP_NODE_WHEN_NODE = 125,
342
+ YP_NODE_WHILE_NODE = 126,
343
+ YP_NODE_X_STRING_NODE = 127,
344
+ YP_NODE_YIELD_NODE = 128,
344
345
  };
345
346
 
346
347
  typedef uint16_t yp_node_type_t;
347
348
  typedef uint16_t yp_node_flags_t;
348
349
 
349
- // We store the flags enum in every node in the tree
350
- static const uint16_t YP_NODE_FLAG_NEWLINE = 0x1;
350
+ // We store the flags enum in every node in the tree. Some flags are common to
351
+ // all nodes (the ones listed below). Others are specific to certain node types.
352
+ static const yp_node_flags_t YP_NODE_FLAG_NEWLINE = 0x1;
351
353
 
352
354
  // For easy access, we define some macros to check node type
353
355
  #define YP_NODE_TYPE(node) ((enum yp_node_type)node->type)
@@ -498,7 +500,6 @@ typedef struct yp_call_node {
498
500
  struct yp_arguments_node *arguments;
499
501
  yp_location_t closing_loc;
500
502
  struct yp_block_node *block;
501
- uint32_t flags;
502
503
  yp_string_t name;
503
504
  } yp_call_node_t;
504
505
 
@@ -746,6 +747,14 @@ typedef struct yp_find_pattern_node {
746
747
  yp_location_t closing_loc;
747
748
  } yp_find_pattern_node_t;
748
749
 
750
+ // FlipFlopNode
751
+ typedef struct yp_flip_flop_node {
752
+ yp_node_t base;
753
+ struct yp_node *left;
754
+ struct yp_node *right;
755
+ yp_location_t operator_loc;
756
+ } yp_flip_flop_node_t;
757
+
749
758
  // FloatNode
750
759
  typedef struct yp_float_node {
751
760
  yp_node_t base;
@@ -909,7 +918,6 @@ typedef struct yp_interpolated_regular_expression_node {
909
918
  yp_location_t opening_loc;
910
919
  struct yp_node_list parts;
911
920
  yp_location_t closing_loc;
912
- uint32_t flags;
913
921
  } yp_interpolated_regular_expression_node_t;
914
922
 
915
923
  // InterpolatedStringNode
@@ -1159,7 +1167,6 @@ typedef struct yp_range_node {
1159
1167
  struct yp_node *left;
1160
1168
  struct yp_node *right;
1161
1169
  yp_location_t operator_loc;
1162
- uint32_t flags;
1163
1170
  } yp_range_node_t;
1164
1171
 
1165
1172
  // RationalNode
@@ -1180,7 +1187,6 @@ typedef struct yp_regular_expression_node {
1180
1187
  yp_location_t content_loc;
1181
1188
  yp_location_t closing_loc;
1182
1189
  yp_string_t unescaped;
1183
- uint32_t flags;
1184
1190
  } yp_regular_expression_node_t;
1185
1191
 
1186
1192
  // RequiredDestructuredParameterNode
@@ -1343,7 +1349,6 @@ typedef struct yp_until_node {
1343
1349
  yp_location_t keyword_loc;
1344
1350
  struct yp_node *predicate;
1345
1351
  struct yp_statements_node *statements;
1346
- uint32_t flags;
1347
1352
  } yp_until_node_t;
1348
1353
 
1349
1354
  // WhenNode
@@ -1360,7 +1365,6 @@ typedef struct yp_while_node {
1360
1365
  yp_location_t keyword_loc;
1361
1366
  struct yp_node *predicate;
1362
1367
  struct yp_statements_node *statements;
1363
- uint32_t flags;
1364
1368
  } yp_while_node_t;
1365
1369
 
1366
1370
  // XStringNode
@@ -1383,30 +1387,30 @@ typedef struct yp_yield_node {
1383
1387
 
1384
1388
  // CallNodeFlags
1385
1389
  typedef enum {
1386
- YP_CALL_NODE_FLAGS_SAFE_NAVIGATION = 1 << 0,
1387
- YP_CALL_NODE_FLAGS_VARIABLE_CALL = 1 << 1,
1390
+ YP_CALL_NODE_FLAGS_SAFE_NAVIGATION = 1 << 1,
1391
+ YP_CALL_NODE_FLAGS_VARIABLE_CALL = 1 << 2,
1388
1392
  } yp_call_node_flags_t;
1389
1393
 
1390
1394
  // LoopFlags
1391
1395
  typedef enum {
1392
- YP_LOOP_FLAGS_BEGIN_MODIFIER = 1 << 0,
1396
+ YP_LOOP_FLAGS_BEGIN_MODIFIER = 1 << 1,
1393
1397
  } yp_loop_flags_t;
1394
1398
 
1395
- // RangeNodeFlags
1399
+ // RangeFlags
1396
1400
  typedef enum {
1397
- YP_RANGE_NODE_FLAGS_EXCLUDE_END = 1 << 0,
1398
- } yp_range_node_flags_t;
1401
+ YP_RANGE_FLAGS_EXCLUDE_END = 1 << 1,
1402
+ } yp_range_flags_t;
1399
1403
 
1400
1404
  // RegularExpressionFlags
1401
1405
  typedef enum {
1402
- YP_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE = 1 << 0,
1403
- YP_REGULAR_EXPRESSION_FLAGS_MULTI_LINE = 1 << 1,
1404
- YP_REGULAR_EXPRESSION_FLAGS_EXTENDED = 1 << 2,
1405
- YP_REGULAR_EXPRESSION_FLAGS_EUC_JP = 1 << 3,
1406
- YP_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT = 1 << 4,
1407
- YP_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J = 1 << 5,
1408
- YP_REGULAR_EXPRESSION_FLAGS_UTF_8 = 1 << 6,
1409
- YP_REGULAR_EXPRESSION_FLAGS_ONCE = 1 << 7,
1406
+ YP_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE = 1 << 1,
1407
+ YP_REGULAR_EXPRESSION_FLAGS_MULTI_LINE = 1 << 2,
1408
+ YP_REGULAR_EXPRESSION_FLAGS_EXTENDED = 1 << 3,
1409
+ YP_REGULAR_EXPRESSION_FLAGS_EUC_JP = 1 << 4,
1410
+ YP_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT = 1 << 5,
1411
+ YP_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J = 1 << 6,
1412
+ YP_REGULAR_EXPRESSION_FLAGS_UTF_8 = 1 << 7,
1413
+ YP_REGULAR_EXPRESSION_FLAGS_ONCE = 1 << 8,
1410
1414
  } yp_regular_expression_flags_t;
1411
1415
 
1412
1416
  #endif // YARP_AST_H
@@ -3,8 +3,6 @@
3
3
 
4
4
  // This file should be included first by any *.h or *.c in YARP
5
5
 
6
- #include "yarp/config.h"
7
-
8
6
  #include <ctype.h>
9
7
  #include <stdarg.h>
10
8
  #include <stddef.h>
@@ -38,17 +36,4 @@
38
36
 
39
37
  int yp_strncasecmp(const char *string1, const char *string2, size_t length);
40
38
 
41
- int yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...);
42
-
43
- #if defined(HAVE_SNPRINTF)
44
- // We use snprintf if it's available
45
- # define yp_snprintf snprintf
46
-
47
- #else
48
- // In case snprintf isn't present on the system, we provide our own that simply
49
- // forwards to the less-safe sprintf.
50
- # define yp_snprintf(dest, size, ...) sprintf((dest), __VA_ARGS__)
51
-
52
- #endif
53
-
54
39
  #endif
@@ -87,6 +87,7 @@ extern yp_encoding_t yp_encoding_iso_8859_16;
87
87
  extern yp_encoding_t yp_encoding_koi8_r;
88
88
  extern yp_encoding_t yp_encoding_shift_jis;
89
89
  extern yp_encoding_t yp_encoding_utf_8;
90
+ extern yp_encoding_t yp_encoding_utf8_mac;
90
91
  extern yp_encoding_t yp_encoding_windows_31j;
91
92
  extern yp_encoding_t yp_encoding_windows_1251;
92
93
  extern yp_encoding_t yp_encoding_windows_1252;
@@ -12,6 +12,7 @@
12
12
  // A yp_buffer_t is a simple memory buffer that stores data in a contiguous
13
13
  // block of memory. It is used to store the serialized representation of a
14
14
  // YARP tree.
15
+ // NOTE: keep in sync with YARP::LibRubyParser::Buffer in lib/yarp.rb
15
16
  typedef struct {
16
17
  char *value;
17
18
  size_t length;
@@ -36,7 +36,7 @@ void yp_string_constant_init(yp_string_t *string, const char *source, size_t len
36
36
  // for large files). This means that if we're on windows we'll use
37
37
  // `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
38
38
  // `mmap`, and on other POSIX systems we'll use `read`.
39
- bool yp_string_mapped_init(yp_string_t *string, const char *filepath);
39
+ YP_EXPORTED_FUNCTION bool yp_string_mapped_init(yp_string_t *string, const char *filepath);
40
40
 
41
41
  // Returns the memory size associated with the string.
42
42
  size_t yp_string_memsize(const yp_string_t *string);
@@ -54,4 +54,8 @@ YP_EXPORTED_FUNCTION const char * yp_string_source(const yp_string_t *string);
54
54
  // Free the associated memory of the given string.
55
55
  YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string);
56
56
 
57
+ // Returns the size of the yp_string_t struct. This is necessary to allocate the
58
+ // correct amount of memory in the FFI backend.
59
+ YP_EXPORTED_FUNCTION size_t yp_string_sizeof(void);
60
+
57
61
  #endif // YARP_STRING_H
@@ -1,5 +1,4 @@
1
1
  #define YP_VERSION_MAJOR 0
2
- #define YP_VERSION_MINOR 6
2
+ #define YP_VERSION_MINOR 7
3
3
  #define YP_VERSION_PATCH 0
4
-
5
- #define YP_VERSION "0.6.0"
4
+ #define YP_VERSION "0.7.0"
data/include/yarp.h CHANGED
@@ -59,10 +59,12 @@ YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, y
59
59
  // Serialize the AST represented by the given node to the given buffer.
60
60
  YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
61
61
 
62
- // Parse and serialize the AST represented by the given source to the given
63
- // buffer.
62
+ // Parse the given source to the AST and serialize the AST to the given buffer.
64
63
  YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *metadata);
65
64
 
65
+ // Lex the given source and serialize to the given buffer.
66
+ YP_EXPORTED_FUNCTION void yp_lex_serialize(const char *source, size_t size, const char *filepath, yp_buffer_t *buffer);
67
+
66
68
  // Returns a string representation of the given token type.
67
69
  YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);
68
70
 
data/lib/yarp/ffi.rb ADDED
@@ -0,0 +1,211 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file is responsible for mirroring the API provided by the C extension by
4
+ # using FFI to call into the shared library.
5
+
6
+ require "rbconfig"
7
+ require "ffi"
8
+
9
+ module YARP
10
+ BACKEND = :FFI
11
+
12
+ module LibRubyParser
13
+ extend FFI::Library
14
+
15
+ # Define the library that we will be pulling functions from. Note that this
16
+ # must align with the build shared library from make/rake.
17
+ ffi_lib File.expand_path("../../build/librubyparser.#{RbConfig::CONFIG["SOEXT"]}", __dir__)
18
+
19
+ # Convert a native C type declaration into a symbol that FFI understands.
20
+ # For example:
21
+ #
22
+ # const char * -> :pointer
23
+ # bool -> :bool
24
+ # size_t -> :size_t
25
+ # void -> :void
26
+ #
27
+ def self.resolve_type(type)
28
+ type = type.strip.delete_prefix("const ")
29
+ type.end_with?("*") ? :pointer : type.to_sym
30
+ end
31
+
32
+ # Read through the given header file and find the declaration of each of the
33
+ # given functions. For each one, define a function with the same name and
34
+ # signature as the C function.
35
+ def self.load_exported_functions_from(header, *functions)
36
+ File.foreach(File.expand_path("../../include/#{header}", __dir__)) do |line|
37
+ # We only want to attempt to load exported functions.
38
+ next unless line.start_with?("YP_EXPORTED_FUNCTION ")
39
+
40
+ # We only want to load the functions that we are interested in.
41
+ next unless functions.any? { |function| line.include?(function) }
42
+
43
+ # Parse the function declaration.
44
+ unless /^YP_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line
45
+ raise "Could not parse #{line}"
46
+ end
47
+
48
+ # Delete the function from the list of functions we are looking for to
49
+ # mark it as having been found.
50
+ functions.delete(name)
51
+
52
+ # Split up the argument types into an array, ensure we handle the case
53
+ # where there are no arguments (by explicit void).
54
+ arg_types = arg_types.split(",").map(&:strip)
55
+ arg_types = [] if arg_types == %w[void]
56
+
57
+ # Resolve the type of the argument by dropping the name of the argument
58
+ # first if it is present.
59
+ arg_types.map! { |type| resolve_type(type.sub(/\w+$/, "")) }
60
+
61
+ # Attach the function using the FFI library.
62
+ attach_function name, arg_types, resolve_type(return_type)
63
+ end
64
+
65
+ # If we didn't find all of the functions, raise an error.
66
+ raise "Could not find functions #{functions.inspect}" unless functions.empty?
67
+ end
68
+
69
+ load_exported_functions_from(
70
+ "yarp.h",
71
+ "yp_version",
72
+ "yp_parse_serialize",
73
+ "yp_lex_serialize"
74
+ )
75
+
76
+ load_exported_functions_from(
77
+ "yarp/util/yp_buffer.h",
78
+ "yp_buffer_init",
79
+ "yp_buffer_free"
80
+ )
81
+
82
+ load_exported_functions_from(
83
+ "yarp/util/yp_string.h",
84
+ "yp_string_mapped_init",
85
+ "yp_string_free",
86
+ "yp_string_source",
87
+ "yp_string_length",
88
+ "yp_string_sizeof"
89
+ )
90
+
91
+ # This object represents a yp_buffer_t. Its structure must be kept in sync
92
+ # with the C version.
93
+ class YPBuffer < FFI::Struct
94
+ layout value: :pointer, length: :size_t, capacity: :size_t
95
+
96
+ # Read the contents of the buffer into a String object and return it.
97
+ def to_ruby_string
98
+ self[:value].read_string(self[:length])
99
+ end
100
+ end
101
+
102
+ # Initialize a new buffer and yield it to the block. The buffer will be
103
+ # automatically freed when the block returns.
104
+ def self.with_buffer(&block)
105
+ buffer = YPBuffer.new
106
+
107
+ begin
108
+ raise unless yp_buffer_init(buffer)
109
+ yield buffer
110
+ ensure
111
+ yp_buffer_free(buffer)
112
+ buffer.pointer.free
113
+ end
114
+ end
115
+
116
+ # This object represents a yp_string_t. We only use it as an opaque pointer,
117
+ # so it doesn't have to be an FFI::Struct.
118
+ class YPString
119
+ attr_reader :pointer
120
+
121
+ def initialize(pointer)
122
+ @pointer = pointer
123
+ end
124
+
125
+ def source
126
+ LibRubyParser.yp_string_source(pointer)
127
+ end
128
+
129
+ def length
130
+ LibRubyParser.yp_string_length(pointer)
131
+ end
132
+
133
+ def read
134
+ source.read_string(length)
135
+ end
136
+ end
137
+
138
+ # This is the size of a yp_string_t. It is returned by the yp_string_sizeof
139
+ # function which we call once to ensure we have sufficient space for the
140
+ # yp_string_t FFI pointer.
141
+ SIZEOF_YP_STRING = yp_string_sizeof
142
+
143
+ # Yields a yp_string_t pointer to the given block.
144
+ def self.with_string(filepath, &block)
145
+ string = FFI::MemoryPointer.new(SIZEOF_YP_STRING)
146
+
147
+ begin
148
+ raise unless yp_string_mapped_init(string, filepath)
149
+ yield YPString.new(string)
150
+ ensure
151
+ yp_string_free(string)
152
+ string.free
153
+ end
154
+ end
155
+ end
156
+
157
+ # Mark the LibRubyParser module as private as it should only be called through
158
+ # the YARP module.
159
+ private_constant :LibRubyParser
160
+
161
+ # The version constant is set by reading the result of calling yp_version.
162
+ VERSION = LibRubyParser.yp_version.read_string
163
+
164
+ def self.dump_internal(source, source_size, filepath)
165
+ LibRubyParser.with_buffer do |buffer|
166
+ metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
167
+ LibRubyParser.yp_parse_serialize(source, source_size, buffer, metadata)
168
+ buffer.to_ruby_string
169
+ end
170
+ end
171
+ private_class_method :dump_internal
172
+
173
+ # Mirror the YARP.dump API by using the serialization API.
174
+ def self.dump(code, filepath = nil)
175
+ dump_internal(code, code.bytesize, filepath)
176
+ end
177
+
178
+ # Mirror the YARP.dump_file API by using the serialization API.
179
+ def self.dump_file(filepath)
180
+ LibRubyParser.with_string(filepath) do |string|
181
+ dump_internal(string.source, string.length, filepath)
182
+ end
183
+ end
184
+
185
+ # Mirror the YARP.lex API by using the serialization API.
186
+ def self.lex(code, filepath = nil)
187
+ LibRubyParser.with_buffer do |buffer|
188
+ LibRubyParser.yp_lex_serialize(code, code.bytesize, filepath, buffer)
189
+
190
+ source = Source.new(code)
191
+ Serialize.load_tokens(source, buffer.to_ruby_string).with_source(source)
192
+ end
193
+ end
194
+
195
+ # Mirror the YARP.lex_file API by using the serialization API.
196
+ def self.lex_file(filepath)
197
+ LibRubyParser.with_string(filepath) { |string| lex(string.read, filepath) }
198
+ end
199
+
200
+ # Mirror the YARP.parse API by using the serialization API.
201
+ def self.parse(code, filepath = nil)
202
+ YARP.load(code, dump(code, filepath)).with_source(Source.new(code))
203
+ end
204
+
205
+ # Mirror the YARP.parse_file API by using the serialization API. This uses
206
+ # native strings instead of Ruby strings because it allows us to use mmap when
207
+ # it is available.
208
+ def self.parse_file(filepath)
209
+ LibRubyParser.with_string(filepath) { |string| parse(string.read, filepath) }
210
+ end
211
+ end
@@ -647,19 +647,34 @@ module YARP
647
647
  # can shuffle around the token to match Ripper's output.
648
648
  case state
649
649
  when :default
650
+ # The default state is when there are no heredocs at all. In this
651
+ # state we can append the token to the list of tokens and move on.
650
652
  tokens << token
651
653
 
654
+ # If we get the declaration of a heredoc, then we open a new heredoc
655
+ # and move into the heredoc_opened state.
652
656
  if event == :on_heredoc_beg
653
657
  state = :heredoc_opened
654
658
  heredoc_stack.last << Heredoc.build(token)
655
659
  end
656
660
  when :heredoc_opened
661
+ # The heredoc_opened state is when we've seen the declaration of a
662
+ # heredoc and are now lexing the body of the heredoc. In this state we
663
+ # push tokens onto the most recently created heredoc.
657
664
  heredoc_stack.last.last << token
658
665
 
659
666
  case event
660
667
  when :on_heredoc_beg
668
+ # If we receive a heredoc declaration while lexing the body of a
669
+ # heredoc, this means we have nested heredocs. In this case we'll
670
+ # push a new heredoc onto the stack and stay in the heredoc_opened
671
+ # state since we're now lexing the body of the new heredoc.
661
672
  heredoc_stack << [Heredoc.build(token)]
662
673
  when :on_heredoc_end
674
+ # If we receive the end of a heredoc, then we're done lexing the
675
+ # body of the heredoc. In this case we now have a completed heredoc
676
+ # but need to wait for the next newline to push it into the token
677
+ # stream.
663
678
  state = :heredoc_closed
664
679
  end
665
680
  when :heredoc_closed
@@ -734,8 +749,7 @@ module YARP
734
749
  when :on_sp
735
750
  # skip
736
751
  when :on_tstring_content
737
- if previous[1] == :on_tstring_content &&
738
- (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
752
+ if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
739
753
  previous[2] << token[2]
740
754
  else
741
755
  results << token