yarp 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +36 -0
- data/CONTRIBUTING.md +4 -0
- data/{Makefile.in → Makefile} +3 -4
- data/README.md +1 -1
- data/config.yml +29 -7
- data/docs/build_system.md +4 -15
- data/docs/building.md +1 -5
- data/docs/encoding.md +1 -0
- data/docs/{extension.md → ruby_api.md} +6 -3
- data/docs/serialization.md +71 -24
- data/ext/yarp/api_node.c +38 -6
- data/ext/yarp/extconf.rb +15 -10
- data/ext/yarp/extension.c +2 -0
- data/ext/yarp/extension.h +1 -1
- data/include/yarp/ast.h +108 -104
- data/include/yarp/defines.h +0 -15
- data/include/yarp/enc/yp_encoding.h +1 -0
- data/include/yarp/util/yp_buffer.h +1 -0
- data/include/yarp/util/yp_string.h +5 -1
- data/include/yarp/version.h +2 -3
- data/include/yarp.h +4 -2
- data/lib/yarp/ffi.rb +211 -0
- data/lib/yarp/lex_compat.rb +16 -2
- data/lib/yarp/node.rb +169 -117
- data/lib/yarp/ripper_compat.rb +3 -3
- data/lib/yarp/serialize.rb +285 -92
- data/lib/yarp.rb +167 -2
- data/src/enc/yp_unicode.c +9 -0
- data/src/node.c +22 -0
- data/src/prettyprint.c +49 -30
- data/src/serialize.c +90 -17
- data/src/util/yp_string.c +8 -17
- data/src/yarp.c +181 -49
- data/yarp.gemspec +5 -5
- metadata +6 -6
- data/config.h.in +0 -25
- data/configure +0 -4487
data/include/yarp/ast.h
CHANGED
@@ -259,95 +259,97 @@ enum yp_node_type {
|
|
259
259
|
YP_NODE_ENSURE_NODE = 43,
|
260
260
|
YP_NODE_FALSE_NODE = 44,
|
261
261
|
YP_NODE_FIND_PATTERN_NODE = 45,
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
262
|
+
YP_NODE_FLIP_FLOP_NODE = 46,
|
263
|
+
YP_NODE_FLOAT_NODE = 47,
|
264
|
+
YP_NODE_FOR_NODE = 48,
|
265
|
+
YP_NODE_FORWARDING_ARGUMENTS_NODE = 49,
|
266
|
+
YP_NODE_FORWARDING_PARAMETER_NODE = 50,
|
267
|
+
YP_NODE_FORWARDING_SUPER_NODE = 51,
|
268
|
+
YP_NODE_GLOBAL_VARIABLE_OPERATOR_AND_WRITE_NODE = 52,
|
269
|
+
YP_NODE_GLOBAL_VARIABLE_OPERATOR_OR_WRITE_NODE = 53,
|
270
|
+
YP_NODE_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE = 54,
|
271
|
+
YP_NODE_GLOBAL_VARIABLE_READ_NODE = 55,
|
272
|
+
YP_NODE_GLOBAL_VARIABLE_WRITE_NODE = 56,
|
273
|
+
YP_NODE_HASH_NODE = 57,
|
274
|
+
YP_NODE_HASH_PATTERN_NODE = 58,
|
275
|
+
YP_NODE_IF_NODE = 59,
|
276
|
+
YP_NODE_IMAGINARY_NODE = 60,
|
277
|
+
YP_NODE_IN_NODE = 61,
|
278
|
+
YP_NODE_INSTANCE_VARIABLE_OPERATOR_AND_WRITE_NODE = 62,
|
279
|
+
YP_NODE_INSTANCE_VARIABLE_OPERATOR_OR_WRITE_NODE = 63,
|
280
|
+
YP_NODE_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE = 64,
|
281
|
+
YP_NODE_INSTANCE_VARIABLE_READ_NODE = 65,
|
282
|
+
YP_NODE_INSTANCE_VARIABLE_WRITE_NODE = 66,
|
283
|
+
YP_NODE_INTEGER_NODE = 67,
|
284
|
+
YP_NODE_INTERPOLATED_REGULAR_EXPRESSION_NODE = 68,
|
285
|
+
YP_NODE_INTERPOLATED_STRING_NODE = 69,
|
286
|
+
YP_NODE_INTERPOLATED_SYMBOL_NODE = 70,
|
287
|
+
YP_NODE_INTERPOLATED_X_STRING_NODE = 71,
|
288
|
+
YP_NODE_KEYWORD_HASH_NODE = 72,
|
289
|
+
YP_NODE_KEYWORD_PARAMETER_NODE = 73,
|
290
|
+
YP_NODE_KEYWORD_REST_PARAMETER_NODE = 74,
|
291
|
+
YP_NODE_LAMBDA_NODE = 75,
|
292
|
+
YP_NODE_LOCAL_VARIABLE_OPERATOR_AND_WRITE_NODE = 76,
|
293
|
+
YP_NODE_LOCAL_VARIABLE_OPERATOR_OR_WRITE_NODE = 77,
|
294
|
+
YP_NODE_LOCAL_VARIABLE_OPERATOR_WRITE_NODE = 78,
|
295
|
+
YP_NODE_LOCAL_VARIABLE_READ_NODE = 79,
|
296
|
+
YP_NODE_LOCAL_VARIABLE_WRITE_NODE = 80,
|
297
|
+
YP_NODE_MATCH_PREDICATE_NODE = 81,
|
298
|
+
YP_NODE_MATCH_REQUIRED_NODE = 82,
|
299
|
+
YP_NODE_MISSING_NODE = 83,
|
300
|
+
YP_NODE_MODULE_NODE = 84,
|
301
|
+
YP_NODE_MULTI_WRITE_NODE = 85,
|
302
|
+
YP_NODE_NEXT_NODE = 86,
|
303
|
+
YP_NODE_NIL_NODE = 87,
|
304
|
+
YP_NODE_NO_KEYWORDS_PARAMETER_NODE = 88,
|
305
|
+
YP_NODE_NUMBERED_REFERENCE_READ_NODE = 89,
|
306
|
+
YP_NODE_OPTIONAL_PARAMETER_NODE = 90,
|
307
|
+
YP_NODE_OR_NODE = 91,
|
308
|
+
YP_NODE_PARAMETERS_NODE = 92,
|
309
|
+
YP_NODE_PARENTHESES_NODE = 93,
|
310
|
+
YP_NODE_PINNED_EXPRESSION_NODE = 94,
|
311
|
+
YP_NODE_PINNED_VARIABLE_NODE = 95,
|
312
|
+
YP_NODE_POST_EXECUTION_NODE = 96,
|
313
|
+
YP_NODE_PRE_EXECUTION_NODE = 97,
|
314
|
+
YP_NODE_PROGRAM_NODE = 98,
|
315
|
+
YP_NODE_RANGE_NODE = 99,
|
316
|
+
YP_NODE_RATIONAL_NODE = 100,
|
317
|
+
YP_NODE_REDO_NODE = 101,
|
318
|
+
YP_NODE_REGULAR_EXPRESSION_NODE = 102,
|
319
|
+
YP_NODE_REQUIRED_DESTRUCTURED_PARAMETER_NODE = 103,
|
320
|
+
YP_NODE_REQUIRED_PARAMETER_NODE = 104,
|
321
|
+
YP_NODE_RESCUE_MODIFIER_NODE = 105,
|
322
|
+
YP_NODE_RESCUE_NODE = 106,
|
323
|
+
YP_NODE_REST_PARAMETER_NODE = 107,
|
324
|
+
YP_NODE_RETRY_NODE = 108,
|
325
|
+
YP_NODE_RETURN_NODE = 109,
|
326
|
+
YP_NODE_SELF_NODE = 110,
|
327
|
+
YP_NODE_SINGLETON_CLASS_NODE = 111,
|
328
|
+
YP_NODE_SOURCE_ENCODING_NODE = 112,
|
329
|
+
YP_NODE_SOURCE_FILE_NODE = 113,
|
330
|
+
YP_NODE_SOURCE_LINE_NODE = 114,
|
331
|
+
YP_NODE_SPLAT_NODE = 115,
|
332
|
+
YP_NODE_STATEMENTS_NODE = 116,
|
333
|
+
YP_NODE_STRING_CONCAT_NODE = 117,
|
334
|
+
YP_NODE_STRING_NODE = 118,
|
335
|
+
YP_NODE_SUPER_NODE = 119,
|
336
|
+
YP_NODE_SYMBOL_NODE = 120,
|
337
|
+
YP_NODE_TRUE_NODE = 121,
|
338
|
+
YP_NODE_UNDEF_NODE = 122,
|
339
|
+
YP_NODE_UNLESS_NODE = 123,
|
340
|
+
YP_NODE_UNTIL_NODE = 124,
|
341
|
+
YP_NODE_WHEN_NODE = 125,
|
342
|
+
YP_NODE_WHILE_NODE = 126,
|
343
|
+
YP_NODE_X_STRING_NODE = 127,
|
344
|
+
YP_NODE_YIELD_NODE = 128,
|
344
345
|
};
|
345
346
|
|
346
347
|
typedef uint16_t yp_node_type_t;
|
347
348
|
typedef uint16_t yp_node_flags_t;
|
348
349
|
|
349
|
-
// We store the flags enum in every node in the tree
|
350
|
-
|
350
|
+
// We store the flags enum in every node in the tree. Some flags are common to
|
351
|
+
// all nodes (the ones listed below). Others are specific to certain node types.
|
352
|
+
static const yp_node_flags_t YP_NODE_FLAG_NEWLINE = 0x1;
|
351
353
|
|
352
354
|
// For easy access, we define some macros to check node type
|
353
355
|
#define YP_NODE_TYPE(node) ((enum yp_node_type)node->type)
|
@@ -498,7 +500,6 @@ typedef struct yp_call_node {
|
|
498
500
|
struct yp_arguments_node *arguments;
|
499
501
|
yp_location_t closing_loc;
|
500
502
|
struct yp_block_node *block;
|
501
|
-
uint32_t flags;
|
502
503
|
yp_string_t name;
|
503
504
|
} yp_call_node_t;
|
504
505
|
|
@@ -746,6 +747,14 @@ typedef struct yp_find_pattern_node {
|
|
746
747
|
yp_location_t closing_loc;
|
747
748
|
} yp_find_pattern_node_t;
|
748
749
|
|
750
|
+
// FlipFlopNode
|
751
|
+
typedef struct yp_flip_flop_node {
|
752
|
+
yp_node_t base;
|
753
|
+
struct yp_node *left;
|
754
|
+
struct yp_node *right;
|
755
|
+
yp_location_t operator_loc;
|
756
|
+
} yp_flip_flop_node_t;
|
757
|
+
|
749
758
|
// FloatNode
|
750
759
|
typedef struct yp_float_node {
|
751
760
|
yp_node_t base;
|
@@ -909,7 +918,6 @@ typedef struct yp_interpolated_regular_expression_node {
|
|
909
918
|
yp_location_t opening_loc;
|
910
919
|
struct yp_node_list parts;
|
911
920
|
yp_location_t closing_loc;
|
912
|
-
uint32_t flags;
|
913
921
|
} yp_interpolated_regular_expression_node_t;
|
914
922
|
|
915
923
|
// InterpolatedStringNode
|
@@ -1159,7 +1167,6 @@ typedef struct yp_range_node {
|
|
1159
1167
|
struct yp_node *left;
|
1160
1168
|
struct yp_node *right;
|
1161
1169
|
yp_location_t operator_loc;
|
1162
|
-
uint32_t flags;
|
1163
1170
|
} yp_range_node_t;
|
1164
1171
|
|
1165
1172
|
// RationalNode
|
@@ -1180,7 +1187,6 @@ typedef struct yp_regular_expression_node {
|
|
1180
1187
|
yp_location_t content_loc;
|
1181
1188
|
yp_location_t closing_loc;
|
1182
1189
|
yp_string_t unescaped;
|
1183
|
-
uint32_t flags;
|
1184
1190
|
} yp_regular_expression_node_t;
|
1185
1191
|
|
1186
1192
|
// RequiredDestructuredParameterNode
|
@@ -1343,7 +1349,6 @@ typedef struct yp_until_node {
|
|
1343
1349
|
yp_location_t keyword_loc;
|
1344
1350
|
struct yp_node *predicate;
|
1345
1351
|
struct yp_statements_node *statements;
|
1346
|
-
uint32_t flags;
|
1347
1352
|
} yp_until_node_t;
|
1348
1353
|
|
1349
1354
|
// WhenNode
|
@@ -1360,7 +1365,6 @@ typedef struct yp_while_node {
|
|
1360
1365
|
yp_location_t keyword_loc;
|
1361
1366
|
struct yp_node *predicate;
|
1362
1367
|
struct yp_statements_node *statements;
|
1363
|
-
uint32_t flags;
|
1364
1368
|
} yp_while_node_t;
|
1365
1369
|
|
1366
1370
|
// XStringNode
|
@@ -1383,30 +1387,30 @@ typedef struct yp_yield_node {
|
|
1383
1387
|
|
1384
1388
|
// CallNodeFlags
|
1385
1389
|
typedef enum {
|
1386
|
-
YP_CALL_NODE_FLAGS_SAFE_NAVIGATION = 1 <<
|
1387
|
-
YP_CALL_NODE_FLAGS_VARIABLE_CALL = 1 <<
|
1390
|
+
YP_CALL_NODE_FLAGS_SAFE_NAVIGATION = 1 << 1,
|
1391
|
+
YP_CALL_NODE_FLAGS_VARIABLE_CALL = 1 << 2,
|
1388
1392
|
} yp_call_node_flags_t;
|
1389
1393
|
|
1390
1394
|
// LoopFlags
|
1391
1395
|
typedef enum {
|
1392
|
-
YP_LOOP_FLAGS_BEGIN_MODIFIER = 1 <<
|
1396
|
+
YP_LOOP_FLAGS_BEGIN_MODIFIER = 1 << 1,
|
1393
1397
|
} yp_loop_flags_t;
|
1394
1398
|
|
1395
|
-
//
|
1399
|
+
// RangeFlags
|
1396
1400
|
typedef enum {
|
1397
|
-
|
1398
|
-
}
|
1401
|
+
YP_RANGE_FLAGS_EXCLUDE_END = 1 << 1,
|
1402
|
+
} yp_range_flags_t;
|
1399
1403
|
|
1400
1404
|
// RegularExpressionFlags
|
1401
1405
|
typedef enum {
|
1402
|
-
YP_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE = 1 <<
|
1403
|
-
YP_REGULAR_EXPRESSION_FLAGS_MULTI_LINE = 1 <<
|
1404
|
-
YP_REGULAR_EXPRESSION_FLAGS_EXTENDED = 1 <<
|
1405
|
-
YP_REGULAR_EXPRESSION_FLAGS_EUC_JP = 1 <<
|
1406
|
-
YP_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT = 1 <<
|
1407
|
-
YP_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J = 1 <<
|
1408
|
-
YP_REGULAR_EXPRESSION_FLAGS_UTF_8 = 1 <<
|
1409
|
-
YP_REGULAR_EXPRESSION_FLAGS_ONCE = 1 <<
|
1406
|
+
YP_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE = 1 << 1,
|
1407
|
+
YP_REGULAR_EXPRESSION_FLAGS_MULTI_LINE = 1 << 2,
|
1408
|
+
YP_REGULAR_EXPRESSION_FLAGS_EXTENDED = 1 << 3,
|
1409
|
+
YP_REGULAR_EXPRESSION_FLAGS_EUC_JP = 1 << 4,
|
1410
|
+
YP_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT = 1 << 5,
|
1411
|
+
YP_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J = 1 << 6,
|
1412
|
+
YP_REGULAR_EXPRESSION_FLAGS_UTF_8 = 1 << 7,
|
1413
|
+
YP_REGULAR_EXPRESSION_FLAGS_ONCE = 1 << 8,
|
1410
1414
|
} yp_regular_expression_flags_t;
|
1411
1415
|
|
1412
1416
|
#endif // YARP_AST_H
|
data/include/yarp/defines.h
CHANGED
@@ -3,8 +3,6 @@
|
|
3
3
|
|
4
4
|
// This file should be included first by any *.h or *.c in YARP
|
5
5
|
|
6
|
-
#include "yarp/config.h"
|
7
|
-
|
8
6
|
#include <ctype.h>
|
9
7
|
#include <stdarg.h>
|
10
8
|
#include <stddef.h>
|
@@ -38,17 +36,4 @@
|
|
38
36
|
|
39
37
|
int yp_strncasecmp(const char *string1, const char *string2, size_t length);
|
40
38
|
|
41
|
-
int yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...);
|
42
|
-
|
43
|
-
#if defined(HAVE_SNPRINTF)
|
44
|
-
// We use snprintf if it's available
|
45
|
-
# define yp_snprintf snprintf
|
46
|
-
|
47
|
-
#else
|
48
|
-
// In case snprintf isn't present on the system, we provide our own that simply
|
49
|
-
// forwards to the less-safe sprintf.
|
50
|
-
# define yp_snprintf(dest, size, ...) sprintf((dest), __VA_ARGS__)
|
51
|
-
|
52
|
-
#endif
|
53
|
-
|
54
39
|
#endif
|
@@ -87,6 +87,7 @@ extern yp_encoding_t yp_encoding_iso_8859_16;
|
|
87
87
|
extern yp_encoding_t yp_encoding_koi8_r;
|
88
88
|
extern yp_encoding_t yp_encoding_shift_jis;
|
89
89
|
extern yp_encoding_t yp_encoding_utf_8;
|
90
|
+
extern yp_encoding_t yp_encoding_utf8_mac;
|
90
91
|
extern yp_encoding_t yp_encoding_windows_31j;
|
91
92
|
extern yp_encoding_t yp_encoding_windows_1251;
|
92
93
|
extern yp_encoding_t yp_encoding_windows_1252;
|
@@ -12,6 +12,7 @@
|
|
12
12
|
// A yp_buffer_t is a simple memory buffer that stores data in a contiguous
|
13
13
|
// block of memory. It is used to store the serialized representation of a
|
14
14
|
// YARP tree.
|
15
|
+
// NOTE: keep in sync with YARP::LibRubyParser::Buffer in lib/yarp.rb
|
15
16
|
typedef struct {
|
16
17
|
char *value;
|
17
18
|
size_t length;
|
@@ -36,7 +36,7 @@ void yp_string_constant_init(yp_string_t *string, const char *source, size_t len
|
|
36
36
|
// for large files). This means that if we're on windows we'll use
|
37
37
|
// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
|
38
38
|
// `mmap`, and on other POSIX systems we'll use `read`.
|
39
|
-
bool yp_string_mapped_init(yp_string_t *string, const char *filepath);
|
39
|
+
YP_EXPORTED_FUNCTION bool yp_string_mapped_init(yp_string_t *string, const char *filepath);
|
40
40
|
|
41
41
|
// Returns the memory size associated with the string.
|
42
42
|
size_t yp_string_memsize(const yp_string_t *string);
|
@@ -54,4 +54,8 @@ YP_EXPORTED_FUNCTION const char * yp_string_source(const yp_string_t *string);
|
|
54
54
|
// Free the associated memory of the given string.
|
55
55
|
YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string);
|
56
56
|
|
57
|
+
// Returns the size of the yp_string_t struct. This is necessary to allocate the
|
58
|
+
// correct amount of memory in the FFI backend.
|
59
|
+
YP_EXPORTED_FUNCTION size_t yp_string_sizeof(void);
|
60
|
+
|
57
61
|
#endif // YARP_STRING_H
|
data/include/yarp/version.h
CHANGED
data/include/yarp.h
CHANGED
@@ -59,10 +59,12 @@ YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, y
|
|
59
59
|
// Serialize the AST represented by the given node to the given buffer.
|
60
60
|
YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
|
61
61
|
|
62
|
-
// Parse
|
63
|
-
// buffer.
|
62
|
+
// Parse the given source to the AST and serialize the AST to the given buffer.
|
64
63
|
YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *metadata);
|
65
64
|
|
65
|
+
// Lex the given source and serialize to the given buffer.
|
66
|
+
YP_EXPORTED_FUNCTION void yp_lex_serialize(const char *source, size_t size, const char *filepath, yp_buffer_t *buffer);
|
67
|
+
|
66
68
|
// Returns a string representation of the given token type.
|
67
69
|
YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);
|
68
70
|
|
data/lib/yarp/ffi.rb
ADDED
@@ -0,0 +1,211 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This file is responsible for mirroring the API provided by the C extension by
|
4
|
+
# using FFI to call into the shared library.
|
5
|
+
|
6
|
+
require "rbconfig"
|
7
|
+
require "ffi"
|
8
|
+
|
9
|
+
module YARP
|
10
|
+
BACKEND = :FFI
|
11
|
+
|
12
|
+
module LibRubyParser
|
13
|
+
extend FFI::Library
|
14
|
+
|
15
|
+
# Define the library that we will be pulling functions from. Note that this
|
16
|
+
# must align with the build shared library from make/rake.
|
17
|
+
ffi_lib File.expand_path("../../build/librubyparser.#{RbConfig::CONFIG["SOEXT"]}", __dir__)
|
18
|
+
|
19
|
+
# Convert a native C type declaration into a symbol that FFI understands.
|
20
|
+
# For example:
|
21
|
+
#
|
22
|
+
# const char * -> :pointer
|
23
|
+
# bool -> :bool
|
24
|
+
# size_t -> :size_t
|
25
|
+
# void -> :void
|
26
|
+
#
|
27
|
+
def self.resolve_type(type)
|
28
|
+
type = type.strip.delete_prefix("const ")
|
29
|
+
type.end_with?("*") ? :pointer : type.to_sym
|
30
|
+
end
|
31
|
+
|
32
|
+
# Read through the given header file and find the declaration of each of the
|
33
|
+
# given functions. For each one, define a function with the same name and
|
34
|
+
# signature as the C function.
|
35
|
+
def self.load_exported_functions_from(header, *functions)
|
36
|
+
File.foreach(File.expand_path("../../include/#{header}", __dir__)) do |line|
|
37
|
+
# We only want to attempt to load exported functions.
|
38
|
+
next unless line.start_with?("YP_EXPORTED_FUNCTION ")
|
39
|
+
|
40
|
+
# We only want to load the functions that we are interested in.
|
41
|
+
next unless functions.any? { |function| line.include?(function) }
|
42
|
+
|
43
|
+
# Parse the function declaration.
|
44
|
+
unless /^YP_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line
|
45
|
+
raise "Could not parse #{line}"
|
46
|
+
end
|
47
|
+
|
48
|
+
# Delete the function from the list of functions we are looking for to
|
49
|
+
# mark it as having been found.
|
50
|
+
functions.delete(name)
|
51
|
+
|
52
|
+
# Split up the argument types into an array, ensure we handle the case
|
53
|
+
# where there are no arguments (by explicit void).
|
54
|
+
arg_types = arg_types.split(",").map(&:strip)
|
55
|
+
arg_types = [] if arg_types == %w[void]
|
56
|
+
|
57
|
+
# Resolve the type of the argument by dropping the name of the argument
|
58
|
+
# first if it is present.
|
59
|
+
arg_types.map! { |type| resolve_type(type.sub(/\w+$/, "")) }
|
60
|
+
|
61
|
+
# Attach the function using the FFI library.
|
62
|
+
attach_function name, arg_types, resolve_type(return_type)
|
63
|
+
end
|
64
|
+
|
65
|
+
# If we didn't find all of the functions, raise an error.
|
66
|
+
raise "Could not find functions #{functions.inspect}" unless functions.empty?
|
67
|
+
end
|
68
|
+
|
69
|
+
load_exported_functions_from(
|
70
|
+
"yarp.h",
|
71
|
+
"yp_version",
|
72
|
+
"yp_parse_serialize",
|
73
|
+
"yp_lex_serialize"
|
74
|
+
)
|
75
|
+
|
76
|
+
load_exported_functions_from(
|
77
|
+
"yarp/util/yp_buffer.h",
|
78
|
+
"yp_buffer_init",
|
79
|
+
"yp_buffer_free"
|
80
|
+
)
|
81
|
+
|
82
|
+
load_exported_functions_from(
|
83
|
+
"yarp/util/yp_string.h",
|
84
|
+
"yp_string_mapped_init",
|
85
|
+
"yp_string_free",
|
86
|
+
"yp_string_source",
|
87
|
+
"yp_string_length",
|
88
|
+
"yp_string_sizeof"
|
89
|
+
)
|
90
|
+
|
91
|
+
# This object represents a yp_buffer_t. Its structure must be kept in sync
|
92
|
+
# with the C version.
|
93
|
+
class YPBuffer < FFI::Struct
|
94
|
+
layout value: :pointer, length: :size_t, capacity: :size_t
|
95
|
+
|
96
|
+
# Read the contents of the buffer into a String object and return it.
|
97
|
+
def to_ruby_string
|
98
|
+
self[:value].read_string(self[:length])
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Initialize a new buffer and yield it to the block. The buffer will be
|
103
|
+
# automatically freed when the block returns.
|
104
|
+
def self.with_buffer(&block)
|
105
|
+
buffer = YPBuffer.new
|
106
|
+
|
107
|
+
begin
|
108
|
+
raise unless yp_buffer_init(buffer)
|
109
|
+
yield buffer
|
110
|
+
ensure
|
111
|
+
yp_buffer_free(buffer)
|
112
|
+
buffer.pointer.free
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# This object represents a yp_string_t. We only use it as an opaque pointer,
|
117
|
+
# so it doesn't have to be an FFI::Struct.
|
118
|
+
class YPString
|
119
|
+
attr_reader :pointer
|
120
|
+
|
121
|
+
def initialize(pointer)
|
122
|
+
@pointer = pointer
|
123
|
+
end
|
124
|
+
|
125
|
+
def source
|
126
|
+
LibRubyParser.yp_string_source(pointer)
|
127
|
+
end
|
128
|
+
|
129
|
+
def length
|
130
|
+
LibRubyParser.yp_string_length(pointer)
|
131
|
+
end
|
132
|
+
|
133
|
+
def read
|
134
|
+
source.read_string(length)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# This is the size of a yp_string_t. It is returned by the yp_string_sizeof
|
139
|
+
# function which we call once to ensure we have sufficient space for the
|
140
|
+
# yp_string_t FFI pointer.
|
141
|
+
SIZEOF_YP_STRING = yp_string_sizeof
|
142
|
+
|
143
|
+
# Yields a yp_string_t pointer to the given block.
|
144
|
+
def self.with_string(filepath, &block)
|
145
|
+
string = FFI::MemoryPointer.new(SIZEOF_YP_STRING)
|
146
|
+
|
147
|
+
begin
|
148
|
+
raise unless yp_string_mapped_init(string, filepath)
|
149
|
+
yield YPString.new(string)
|
150
|
+
ensure
|
151
|
+
yp_string_free(string)
|
152
|
+
string.free
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# Mark the LibRubyParser module as private as it should only be called through
|
158
|
+
# the YARP module.
|
159
|
+
private_constant :LibRubyParser
|
160
|
+
|
161
|
+
# The version constant is set by reading the result of calling yp_version.
|
162
|
+
VERSION = LibRubyParser.yp_version.read_string
|
163
|
+
|
164
|
+
def self.dump_internal(source, source_size, filepath)
|
165
|
+
LibRubyParser.with_buffer do |buffer|
|
166
|
+
metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
|
167
|
+
LibRubyParser.yp_parse_serialize(source, source_size, buffer, metadata)
|
168
|
+
buffer.to_ruby_string
|
169
|
+
end
|
170
|
+
end
|
171
|
+
private_class_method :dump_internal
|
172
|
+
|
173
|
+
# Mirror the YARP.dump API by using the serialization API.
|
174
|
+
def self.dump(code, filepath = nil)
|
175
|
+
dump_internal(code, code.bytesize, filepath)
|
176
|
+
end
|
177
|
+
|
178
|
+
# Mirror the YARP.dump_file API by using the serialization API.
|
179
|
+
def self.dump_file(filepath)
|
180
|
+
LibRubyParser.with_string(filepath) do |string|
|
181
|
+
dump_internal(string.source, string.length, filepath)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
# Mirror the YARP.lex API by using the serialization API.
|
186
|
+
def self.lex(code, filepath = nil)
|
187
|
+
LibRubyParser.with_buffer do |buffer|
|
188
|
+
LibRubyParser.yp_lex_serialize(code, code.bytesize, filepath, buffer)
|
189
|
+
|
190
|
+
source = Source.new(code)
|
191
|
+
Serialize.load_tokens(source, buffer.to_ruby_string).with_source(source)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
# Mirror the YARP.lex_file API by using the serialization API.
|
196
|
+
def self.lex_file(filepath)
|
197
|
+
LibRubyParser.with_string(filepath) { |string| lex(string.read, filepath) }
|
198
|
+
end
|
199
|
+
|
200
|
+
# Mirror the YARP.parse API by using the serialization API.
|
201
|
+
def self.parse(code, filepath = nil)
|
202
|
+
YARP.load(code, dump(code, filepath)).with_source(Source.new(code))
|
203
|
+
end
|
204
|
+
|
205
|
+
# Mirror the YARP.parse_file API by using the serialization API. This uses
|
206
|
+
# native strings instead of Ruby strings because it allows us to use mmap when
|
207
|
+
# it is available.
|
208
|
+
def self.parse_file(filepath)
|
209
|
+
LibRubyParser.with_string(filepath) { |string| parse(string.read, filepath) }
|
210
|
+
end
|
211
|
+
end
|
data/lib/yarp/lex_compat.rb
CHANGED
@@ -647,19 +647,34 @@ module YARP
|
|
647
647
|
# can shuffle around the token to match Ripper's output.
|
648
648
|
case state
|
649
649
|
when :default
|
650
|
+
# The default state is when there are no heredocs at all. In this
|
651
|
+
# state we can append the token to the list of tokens and move on.
|
650
652
|
tokens << token
|
651
653
|
|
654
|
+
# If we get the declaration of a heredoc, then we open a new heredoc
|
655
|
+
# and move into the heredoc_opened state.
|
652
656
|
if event == :on_heredoc_beg
|
653
657
|
state = :heredoc_opened
|
654
658
|
heredoc_stack.last << Heredoc.build(token)
|
655
659
|
end
|
656
660
|
when :heredoc_opened
|
661
|
+
# The heredoc_opened state is when we've seen the declaration of a
|
662
|
+
# heredoc and are now lexing the body of the heredoc. In this state we
|
663
|
+
# push tokens onto the most recently created heredoc.
|
657
664
|
heredoc_stack.last.last << token
|
658
665
|
|
659
666
|
case event
|
660
667
|
when :on_heredoc_beg
|
668
|
+
# If we receive a heredoc declaration while lexing the body of a
|
669
|
+
# heredoc, this means we have nested heredocs. In this case we'll
|
670
|
+
# push a new heredoc onto the stack and stay in the heredoc_opened
|
671
|
+
# state since we're now lexing the body of the new heredoc.
|
661
672
|
heredoc_stack << [Heredoc.build(token)]
|
662
673
|
when :on_heredoc_end
|
674
|
+
# If we receive the end of a heredoc, then we're done lexing the
|
675
|
+
# body of the heredoc. In this case we now have a completed heredoc
|
676
|
+
# but need to wait for the next newline to push it into the token
|
677
|
+
# stream.
|
663
678
|
state = :heredoc_closed
|
664
679
|
end
|
665
680
|
when :heredoc_closed
|
@@ -734,8 +749,7 @@ module YARP
|
|
734
749
|
when :on_sp
|
735
750
|
# skip
|
736
751
|
when :on_tstring_content
|
737
|
-
if previous[1] == :on_tstring_content &&
|
738
|
-
(token[2].start_with?("\#$") || token[2].start_with?("\#@"))
|
752
|
+
if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
|
739
753
|
previous[2] << token[2]
|
740
754
|
else
|
741
755
|
results << token
|