yarp 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +55 -0
- data/CONTRIBUTING.md +4 -0
- data/{Makefile.in → Makefile} +5 -4
- data/README.md +6 -3
- data/config.yml +83 -274
- data/docs/build_system.md +4 -15
- data/docs/building.md +1 -5
- data/docs/encoding.md +1 -0
- data/docs/{extension.md → ruby_api.md} +6 -3
- data/docs/serialization.md +71 -24
- data/ext/yarp/api_node.c +173 -585
- data/ext/yarp/extconf.rb +15 -10
- data/ext/yarp/extension.c +4 -2
- data/ext/yarp/extension.h +1 -1
- data/include/yarp/ast.h +167 -306
- data/include/yarp/defines.h +5 -15
- data/include/yarp/enc/yp_encoding.h +1 -0
- data/include/yarp/unescape.h +1 -1
- data/include/yarp/util/yp_buffer.h +9 -0
- data/include/yarp/util/yp_constant_pool.h +3 -0
- data/include/yarp/util/yp_list.h +7 -7
- data/include/yarp/util/yp_newline_list.h +4 -0
- data/include/yarp/util/yp_state_stack.h +1 -1
- data/include/yarp/util/yp_string.h +5 -1
- data/include/yarp/version.h +2 -3
- data/include/yarp.h +4 -2
- data/lib/yarp/ffi.rb +226 -0
- data/lib/yarp/lex_compat.rb +16 -2
- data/lib/yarp/node.rb +594 -1437
- data/lib/yarp/ripper_compat.rb +3 -3
- data/lib/yarp/serialize.rb +312 -149
- data/lib/yarp.rb +167 -2
- data/src/enc/yp_unicode.c +9 -0
- data/src/node.c +92 -250
- data/src/prettyprint.c +81 -206
- data/src/serialize.c +124 -149
- data/src/unescape.c +29 -35
- data/src/util/yp_buffer.c +18 -0
- data/src/util/yp_list.c +7 -16
- data/src/util/yp_state_stack.c +0 -6
- data/src/util/yp_string.c +8 -17
- data/src/yarp.c +444 -717
- data/yarp.gemspec +5 -5
- metadata +6 -6
- data/config.h.in +0 -25
- data/configure +0 -4487
data/include/yarp/unescape.h
CHANGED
@@ -31,7 +31,7 @@ typedef enum {
|
|
31
31
|
|
32
32
|
// Unescape the contents of the given token into the given string using the
|
33
33
|
// given unescape mode.
|
34
|
-
YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser,
|
34
|
+
YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list);
|
35
35
|
|
36
36
|
// Accepts a source string and a type of unescaping and returns the unescaped version.
|
37
37
|
// The caller must yp_string_free(result); after calling this function.
|
@@ -18,9 +18,18 @@ typedef struct {
|
|
18
18
|
size_t capacity;
|
19
19
|
} yp_buffer_t;
|
20
20
|
|
21
|
+
// Return the size of the yp_buffer_t struct.
|
22
|
+
YP_EXPORTED_FUNCTION size_t yp_buffer_sizeof(void);
|
23
|
+
|
21
24
|
// Initialize a yp_buffer_t with its default values.
|
22
25
|
YP_EXPORTED_FUNCTION bool yp_buffer_init(yp_buffer_t *buffer);
|
23
26
|
|
27
|
+
// Return the value of the buffer.
|
28
|
+
YP_EXPORTED_FUNCTION char * yp_buffer_value(yp_buffer_t *buffer);
|
29
|
+
|
30
|
+
// Return the length of the buffer.
|
31
|
+
YP_EXPORTED_FUNCTION size_t yp_buffer_length(yp_buffer_t *buffer);
|
32
|
+
|
24
33
|
// Append the given amount of space as zeroes to the buffer.
|
25
34
|
void yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length);
|
26
35
|
|
@@ -51,6 +51,9 @@ typedef struct {
|
|
51
51
|
size_t capacity;
|
52
52
|
} yp_constant_pool_t;
|
53
53
|
|
54
|
+
// Define an empty constant pool.
|
55
|
+
#define YP_CONSTANT_POOL_EMPTY ((yp_constant_pool_t) { .constants = NULL, .size = 0, .capacity = 0 })
|
56
|
+
|
54
57
|
// Initialize a new constant pool with a given capacity.
|
55
58
|
bool yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity);
|
56
59
|
|
data/include/yarp/util/yp_list.h
CHANGED
@@ -15,9 +15,7 @@
|
|
15
15
|
// int value;
|
16
16
|
// } yp_int_node_t;
|
17
17
|
//
|
18
|
-
// yp_list_t list;
|
19
|
-
// yp_list_init(&list);
|
20
|
-
//
|
18
|
+
// yp_list_t list = YP_LIST_EMPTY;
|
21
19
|
// yp_int_node_t *node = malloc(sizeof(yp_int_node_t));
|
22
20
|
// node->value = 5;
|
23
21
|
//
|
@@ -45,18 +43,20 @@ typedef struct yp_list_node {
|
|
45
43
|
// This represents the overall linked list. It keeps a pointer to the head and
|
46
44
|
// tail so that iteration is easy and pushing new nodes is easy.
|
47
45
|
typedef struct {
|
46
|
+
size_t size;
|
48
47
|
yp_list_node_t *head;
|
49
48
|
yp_list_node_t *tail;
|
50
49
|
} yp_list_t;
|
51
50
|
|
52
|
-
//
|
53
|
-
|
51
|
+
// This represents an empty list. It's used to initialize a stack-allocated list
|
52
|
+
// as opposed to a method call.
|
53
|
+
#define YP_LIST_EMPTY ((yp_list_t) { .size = 0, .head = NULL, .tail = NULL })
|
54
54
|
|
55
55
|
// Returns true if the given list is empty.
|
56
56
|
YP_EXPORTED_FUNCTION bool yp_list_empty_p(yp_list_t *list);
|
57
57
|
|
58
|
-
// Returns the size of the list
|
59
|
-
YP_EXPORTED_FUNCTION
|
58
|
+
// Returns the size of the list.
|
59
|
+
YP_EXPORTED_FUNCTION size_t yp_list_size(yp_list_t *list);
|
60
60
|
|
61
61
|
// Append a node to the given list.
|
62
62
|
void yp_list_append(yp_list_t *list, yp_list_node_t *node);
|
@@ -35,6 +35,10 @@ typedef struct {
|
|
35
35
|
size_t column;
|
36
36
|
} yp_line_column_t;
|
37
37
|
|
38
|
+
#define YP_NEWLINE_LIST_EMPTY ((yp_newline_list_t) { \
|
39
|
+
.start = NULL, .offsets = NULL, .size = 0, .capacity = 0, .last_offset = 0, .last_index = 0 \
|
40
|
+
})
|
41
|
+
|
38
42
|
// Initialize a new newline list with the given capacity. Returns true if the
|
39
43
|
// allocation of the offsets succeeds, otherwise returns false.
|
40
44
|
bool yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity);
|
@@ -10,7 +10,7 @@
|
|
10
10
|
typedef uint32_t yp_state_stack_t;
|
11
11
|
|
12
12
|
// Initializes the state stack to an empty stack.
|
13
|
-
|
13
|
+
#define YP_STATE_STACK_EMPTY ((yp_state_stack_t) 0)
|
14
14
|
|
15
15
|
// Pushes a value onto the stack.
|
16
16
|
void yp_state_stack_push(yp_state_stack_t *stack, bool value);
|
@@ -36,7 +36,7 @@ void yp_string_constant_init(yp_string_t *string, const char *source, size_t len
|
|
36
36
|
// for large files). This means that if we're on windows we'll use
|
37
37
|
// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
|
38
38
|
// `mmap`, and on other POSIX systems we'll use `read`.
|
39
|
-
bool yp_string_mapped_init(yp_string_t *string, const char *filepath);
|
39
|
+
YP_EXPORTED_FUNCTION bool yp_string_mapped_init(yp_string_t *string, const char *filepath);
|
40
40
|
|
41
41
|
// Returns the memory size associated with the string.
|
42
42
|
size_t yp_string_memsize(const yp_string_t *string);
|
@@ -54,4 +54,8 @@ YP_EXPORTED_FUNCTION const char * yp_string_source(const yp_string_t *string);
|
|
54
54
|
// Free the associated memory of the given string.
|
55
55
|
YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string);
|
56
56
|
|
57
|
+
// Returns the size of the yp_string_t struct. This is necessary to allocate the
|
58
|
+
// correct amount of memory in the FFI backend.
|
59
|
+
YP_EXPORTED_FUNCTION size_t yp_string_sizeof(void);
|
60
|
+
|
57
61
|
#endif // YARP_STRING_H
|
data/include/yarp/version.h
CHANGED
data/include/yarp.h
CHANGED
@@ -59,10 +59,12 @@ YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, y
|
|
59
59
|
// Serialize the AST represented by the given node to the given buffer.
|
60
60
|
YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
|
61
61
|
|
62
|
-
// Parse
|
63
|
-
// buffer.
|
62
|
+
// Parse the given source to the AST and serialize the AST to the given buffer.
|
64
63
|
YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *metadata);
|
65
64
|
|
65
|
+
// Lex the given source and serialize to the given buffer.
|
66
|
+
YP_EXPORTED_FUNCTION void yp_lex_serialize(const char *source, size_t size, const char *filepath, yp_buffer_t *buffer);
|
67
|
+
|
66
68
|
// Returns a string representation of the given token type.
|
67
69
|
YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);
|
68
70
|
|
data/lib/yarp/ffi.rb
ADDED
@@ -0,0 +1,226 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This file is responsible for mirroring the API provided by the C extension by
|
4
|
+
# using FFI to call into the shared library.
|
5
|
+
|
6
|
+
require "rbconfig"
|
7
|
+
require "ffi"
|
8
|
+
|
9
|
+
module YARP
|
10
|
+
BACKEND = :FFI
|
11
|
+
|
12
|
+
module LibRubyParser
|
13
|
+
extend FFI::Library
|
14
|
+
|
15
|
+
# Define the library that we will be pulling functions from. Note that this
|
16
|
+
# must align with the build shared library from make/rake.
|
17
|
+
ffi_lib File.expand_path("../../build/librubyparser.#{RbConfig::CONFIG["SOEXT"]}", __dir__)
|
18
|
+
|
19
|
+
# Convert a native C type declaration into a symbol that FFI understands.
|
20
|
+
# For example:
|
21
|
+
#
|
22
|
+
# const char * -> :pointer
|
23
|
+
# bool -> :bool
|
24
|
+
# size_t -> :size_t
|
25
|
+
# void -> :void
|
26
|
+
#
|
27
|
+
def self.resolve_type(type)
|
28
|
+
type = type.strip.delete_prefix("const ")
|
29
|
+
type.end_with?("*") ? :pointer : type.to_sym
|
30
|
+
end
|
31
|
+
|
32
|
+
# Read through the given header file and find the declaration of each of the
|
33
|
+
# given functions. For each one, define a function with the same name and
|
34
|
+
# signature as the C function.
|
35
|
+
def self.load_exported_functions_from(header, *functions)
|
36
|
+
File.foreach(File.expand_path("../../include/#{header}", __dir__)) do |line|
|
37
|
+
# We only want to attempt to load exported functions.
|
38
|
+
next unless line.start_with?("YP_EXPORTED_FUNCTION ")
|
39
|
+
|
40
|
+
# We only want to load the functions that we are interested in.
|
41
|
+
next unless functions.any? { |function| line.include?(function) }
|
42
|
+
|
43
|
+
# Parse the function declaration.
|
44
|
+
unless /^YP_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line
|
45
|
+
raise "Could not parse #{line}"
|
46
|
+
end
|
47
|
+
|
48
|
+
# Delete the function from the list of functions we are looking for to
|
49
|
+
# mark it as having been found.
|
50
|
+
functions.delete(name)
|
51
|
+
|
52
|
+
# Split up the argument types into an array, ensure we handle the case
|
53
|
+
# where there are no arguments (by explicit void).
|
54
|
+
arg_types = arg_types.split(",").map(&:strip)
|
55
|
+
arg_types = [] if arg_types == %w[void]
|
56
|
+
|
57
|
+
# Resolve the type of the argument by dropping the name of the argument
|
58
|
+
# first if it is present.
|
59
|
+
arg_types.map! { |type| resolve_type(type.sub(/\w+$/, "")) }
|
60
|
+
|
61
|
+
# Attach the function using the FFI library.
|
62
|
+
attach_function name, arg_types, resolve_type(return_type)
|
63
|
+
end
|
64
|
+
|
65
|
+
# If we didn't find all of the functions, raise an error.
|
66
|
+
raise "Could not find functions #{functions.inspect}" unless functions.empty?
|
67
|
+
end
|
68
|
+
|
69
|
+
load_exported_functions_from(
|
70
|
+
"yarp.h",
|
71
|
+
"yp_version",
|
72
|
+
"yp_parse_serialize",
|
73
|
+
"yp_lex_serialize"
|
74
|
+
)
|
75
|
+
|
76
|
+
load_exported_functions_from(
|
77
|
+
"yarp/util/yp_buffer.h",
|
78
|
+
"yp_buffer_sizeof",
|
79
|
+
"yp_buffer_init",
|
80
|
+
"yp_buffer_value",
|
81
|
+
"yp_buffer_length",
|
82
|
+
"yp_buffer_free"
|
83
|
+
)
|
84
|
+
|
85
|
+
load_exported_functions_from(
|
86
|
+
"yarp/util/yp_string.h",
|
87
|
+
"yp_string_mapped_init",
|
88
|
+
"yp_string_free",
|
89
|
+
"yp_string_source",
|
90
|
+
"yp_string_length",
|
91
|
+
"yp_string_sizeof"
|
92
|
+
)
|
93
|
+
|
94
|
+
# This object represents a yp_buffer_t. We only use it as an opaque pointer,
|
95
|
+
# so it doesn't need to know the fields of yp_buffer_t.
|
96
|
+
class YPBuffer
|
97
|
+
SIZEOF = LibRubyParser.yp_buffer_sizeof
|
98
|
+
|
99
|
+
attr_reader :pointer
|
100
|
+
|
101
|
+
def initialize(pointer)
|
102
|
+
@pointer = pointer
|
103
|
+
end
|
104
|
+
|
105
|
+
def value
|
106
|
+
LibRubyParser.yp_buffer_value(pointer)
|
107
|
+
end
|
108
|
+
|
109
|
+
def length
|
110
|
+
LibRubyParser.yp_buffer_length(pointer)
|
111
|
+
end
|
112
|
+
|
113
|
+
def read
|
114
|
+
value.read_string(length)
|
115
|
+
end
|
116
|
+
|
117
|
+
# Initialize a new buffer and yield it to the block. The buffer will be
|
118
|
+
# automatically freed when the block returns.
|
119
|
+
def self.with(&block)
|
120
|
+
pointer = FFI::MemoryPointer.new(SIZEOF)
|
121
|
+
|
122
|
+
begin
|
123
|
+
raise unless LibRubyParser.yp_buffer_init(pointer)
|
124
|
+
yield new(pointer)
|
125
|
+
ensure
|
126
|
+
LibRubyParser.yp_buffer_free(pointer)
|
127
|
+
pointer.free
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# This object represents a yp_string_t. We only use it as an opaque pointer,
|
133
|
+
# so it doesn't have to be an FFI::Struct.
|
134
|
+
class YPString
|
135
|
+
SIZEOF = LibRubyParser.yp_string_sizeof
|
136
|
+
|
137
|
+
attr_reader :pointer
|
138
|
+
|
139
|
+
def initialize(pointer)
|
140
|
+
@pointer = pointer
|
141
|
+
end
|
142
|
+
|
143
|
+
def source
|
144
|
+
LibRubyParser.yp_string_source(pointer)
|
145
|
+
end
|
146
|
+
|
147
|
+
def length
|
148
|
+
LibRubyParser.yp_string_length(pointer)
|
149
|
+
end
|
150
|
+
|
151
|
+
def read
|
152
|
+
source.read_string(length)
|
153
|
+
end
|
154
|
+
|
155
|
+
# Yields a yp_string_t pointer to the given block.
|
156
|
+
def self.with(filepath, &block)
|
157
|
+
pointer = FFI::MemoryPointer.new(SIZEOF)
|
158
|
+
|
159
|
+
begin
|
160
|
+
raise unless LibRubyParser.yp_string_mapped_init(pointer, filepath)
|
161
|
+
yield new(pointer)
|
162
|
+
ensure
|
163
|
+
LibRubyParser.yp_string_free(pointer)
|
164
|
+
pointer.free
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
# Mark the LibRubyParser module as private as it should only be called through
|
171
|
+
# the YARP module.
|
172
|
+
private_constant :LibRubyParser
|
173
|
+
|
174
|
+
# The version constant is set by reading the result of calling yp_version.
|
175
|
+
VERSION = LibRubyParser.yp_version.read_string
|
176
|
+
|
177
|
+
def self.dump_internal(source, source_size, filepath)
|
178
|
+
LibRubyParser::YPBuffer.with do |buffer|
|
179
|
+
metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
|
180
|
+
LibRubyParser.yp_parse_serialize(source, source_size, buffer.pointer, metadata)
|
181
|
+
buffer.read
|
182
|
+
end
|
183
|
+
end
|
184
|
+
private_class_method :dump_internal
|
185
|
+
|
186
|
+
# Mirror the YARP.dump API by using the serialization API.
|
187
|
+
def self.dump(code, filepath = nil)
|
188
|
+
dump_internal(code, code.bytesize, filepath)
|
189
|
+
end
|
190
|
+
|
191
|
+
# Mirror the YARP.dump_file API by using the serialization API.
|
192
|
+
def self.dump_file(filepath)
|
193
|
+
LibRubyParser::YPString.with(filepath) do |string|
|
194
|
+
dump_internal(string.source, string.length, filepath)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
# Mirror the YARP.lex API by using the serialization API.
|
199
|
+
def self.lex(code, filepath = nil)
|
200
|
+
LibRubyParser::YPBuffer.with do |buffer|
|
201
|
+
LibRubyParser.yp_lex_serialize(code, code.bytesize, filepath, buffer.pointer)
|
202
|
+
Serialize.load_tokens(Source.new(code), buffer.read)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
# Mirror the YARP.lex_file API by using the serialization API.
|
207
|
+
def self.lex_file(filepath)
|
208
|
+
LibRubyParser::YPString.with(filepath) do |string|
|
209
|
+
lex(string.read, filepath)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
# Mirror the YARP.parse API by using the serialization API.
|
214
|
+
def self.parse(code, filepath = nil)
|
215
|
+
YARP.load(code, dump(code, filepath))
|
216
|
+
end
|
217
|
+
|
218
|
+
# Mirror the YARP.parse_file API by using the serialization API. This uses
|
219
|
+
# native strings instead of Ruby strings because it allows us to use mmap when
|
220
|
+
# it is available.
|
221
|
+
def self.parse_file(filepath)
|
222
|
+
LibRubyParser::YPString.with(filepath) do |string|
|
223
|
+
parse(string.read, filepath)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
data/lib/yarp/lex_compat.rb
CHANGED
@@ -647,19 +647,34 @@ module YARP
|
|
647
647
|
# can shuffle around the token to match Ripper's output.
|
648
648
|
case state
|
649
649
|
when :default
|
650
|
+
# The default state is when there are no heredocs at all. In this
|
651
|
+
# state we can append the token to the list of tokens and move on.
|
650
652
|
tokens << token
|
651
653
|
|
654
|
+
# If we get the declaration of a heredoc, then we open a new heredoc
|
655
|
+
# and move into the heredoc_opened state.
|
652
656
|
if event == :on_heredoc_beg
|
653
657
|
state = :heredoc_opened
|
654
658
|
heredoc_stack.last << Heredoc.build(token)
|
655
659
|
end
|
656
660
|
when :heredoc_opened
|
661
|
+
# The heredoc_opened state is when we've seen the declaration of a
|
662
|
+
# heredoc and are now lexing the body of the heredoc. In this state we
|
663
|
+
# push tokens onto the most recently created heredoc.
|
657
664
|
heredoc_stack.last.last << token
|
658
665
|
|
659
666
|
case event
|
660
667
|
when :on_heredoc_beg
|
668
|
+
# If we receive a heredoc declaration while lexing the body of a
|
669
|
+
# heredoc, this means we have nested heredocs. In this case we'll
|
670
|
+
# push a new heredoc onto the stack and stay in the heredoc_opened
|
671
|
+
# state since we're now lexing the body of the new heredoc.
|
661
672
|
heredoc_stack << [Heredoc.build(token)]
|
662
673
|
when :on_heredoc_end
|
674
|
+
# If we receive the end of a heredoc, then we're done lexing the
|
675
|
+
# body of the heredoc. In this case we now have a completed heredoc
|
676
|
+
# but need to wait for the next newline to push it into the token
|
677
|
+
# stream.
|
663
678
|
state = :heredoc_closed
|
664
679
|
end
|
665
680
|
when :heredoc_closed
|
@@ -734,8 +749,7 @@ module YARP
|
|
734
749
|
when :on_sp
|
735
750
|
# skip
|
736
751
|
when :on_tstring_content
|
737
|
-
if previous[1] == :on_tstring_content &&
|
738
|
-
(token[2].start_with?("\#$") || token[2].start_with?("\#@"))
|
752
|
+
if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
|
739
753
|
previous[2] << token[2]
|
740
754
|
else
|
741
755
|
results << token
|