yarp 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +55 -0
  3. data/CONTRIBUTING.md +4 -0
  4. data/{Makefile.in → Makefile} +5 -4
  5. data/README.md +6 -3
  6. data/config.yml +83 -274
  7. data/docs/build_system.md +4 -15
  8. data/docs/building.md +1 -5
  9. data/docs/encoding.md +1 -0
  10. data/docs/{extension.md → ruby_api.md} +6 -3
  11. data/docs/serialization.md +71 -24
  12. data/ext/yarp/api_node.c +173 -585
  13. data/ext/yarp/extconf.rb +15 -10
  14. data/ext/yarp/extension.c +4 -2
  15. data/ext/yarp/extension.h +1 -1
  16. data/include/yarp/ast.h +167 -306
  17. data/include/yarp/defines.h +5 -15
  18. data/include/yarp/enc/yp_encoding.h +1 -0
  19. data/include/yarp/unescape.h +1 -1
  20. data/include/yarp/util/yp_buffer.h +9 -0
  21. data/include/yarp/util/yp_constant_pool.h +3 -0
  22. data/include/yarp/util/yp_list.h +7 -7
  23. data/include/yarp/util/yp_newline_list.h +4 -0
  24. data/include/yarp/util/yp_state_stack.h +1 -1
  25. data/include/yarp/util/yp_string.h +5 -1
  26. data/include/yarp/version.h +2 -3
  27. data/include/yarp.h +4 -2
  28. data/lib/yarp/ffi.rb +226 -0
  29. data/lib/yarp/lex_compat.rb +16 -2
  30. data/lib/yarp/node.rb +594 -1437
  31. data/lib/yarp/ripper_compat.rb +3 -3
  32. data/lib/yarp/serialize.rb +312 -149
  33. data/lib/yarp.rb +167 -2
  34. data/src/enc/yp_unicode.c +9 -0
  35. data/src/node.c +92 -250
  36. data/src/prettyprint.c +81 -206
  37. data/src/serialize.c +124 -149
  38. data/src/unescape.c +29 -35
  39. data/src/util/yp_buffer.c +18 -0
  40. data/src/util/yp_list.c +7 -16
  41. data/src/util/yp_state_stack.c +0 -6
  42. data/src/util/yp_string.c +8 -17
  43. data/src/yarp.c +444 -717
  44. data/yarp.gemspec +5 -5
  45. metadata +6 -6
  46. data/config.h.in +0 -25
  47. data/configure +0 -4487
@@ -31,7 +31,7 @@ typedef enum {
31
31
 
32
32
  // Unescape the contents of the given token into the given string using the
33
33
  // given unescape mode.
34
- YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list);
34
+ YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list);
35
35
 
36
36
  // Accepts a source string and a type of unescaping and returns the unescaped version.
37
37
  // The caller must yp_string_free(result); after calling this function.
@@ -18,9 +18,18 @@ typedef struct {
18
18
  size_t capacity;
19
19
  } yp_buffer_t;
20
20
 
21
+ // Return the size of the yp_buffer_t struct.
22
+ YP_EXPORTED_FUNCTION size_t yp_buffer_sizeof(void);
23
+
21
24
  // Initialize a yp_buffer_t with its default values.
22
25
  YP_EXPORTED_FUNCTION bool yp_buffer_init(yp_buffer_t *buffer);
23
26
 
27
+ // Return the value of the buffer.
28
+ YP_EXPORTED_FUNCTION char * yp_buffer_value(yp_buffer_t *buffer);
29
+
30
+ // Return the length of the buffer.
31
+ YP_EXPORTED_FUNCTION size_t yp_buffer_length(yp_buffer_t *buffer);
32
+
24
33
  // Append the given amount of space as zeroes to the buffer.
25
34
  void yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length);
26
35
 
@@ -51,6 +51,9 @@ typedef struct {
51
51
  size_t capacity;
52
52
  } yp_constant_pool_t;
53
53
 
54
+ // Define an empty constant pool.
55
+ #define YP_CONSTANT_POOL_EMPTY ((yp_constant_pool_t) { .constants = NULL, .size = 0, .capacity = 0 })
56
+
54
57
  // Initialize a new constant pool with a given capacity.
55
58
  bool yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity);
56
59
 
@@ -15,9 +15,7 @@
15
15
  // int value;
16
16
  // } yp_int_node_t;
17
17
  //
18
- // yp_list_t list;
19
- // yp_list_init(&list);
20
- //
18
+ // yp_list_t list = YP_LIST_EMPTY;
21
19
  // yp_int_node_t *node = malloc(sizeof(yp_int_node_t));
22
20
  // node->value = 5;
23
21
  //
@@ -45,18 +43,20 @@ typedef struct yp_list_node {
45
43
  // This represents the overall linked list. It keeps a pointer to the head and
46
44
  // tail so that iteration is easy and pushing new nodes is easy.
47
45
  typedef struct {
46
+ size_t size;
48
47
  yp_list_node_t *head;
49
48
  yp_list_node_t *tail;
50
49
  } yp_list_t;
51
50
 
52
- // Initializes a new list.
53
- YP_EXPORTED_FUNCTION void yp_list_init(yp_list_t *list);
51
+ // This represents an empty list. It's used to initialize a stack-allocated list
52
+ // as opposed to a method call.
53
+ #define YP_LIST_EMPTY ((yp_list_t) { .size = 0, .head = NULL, .tail = NULL })
54
54
 
55
55
  // Returns true if the given list is empty.
56
56
  YP_EXPORTED_FUNCTION bool yp_list_empty_p(yp_list_t *list);
57
57
 
58
- // Returns the size of the list in O(n) time.
59
- YP_EXPORTED_FUNCTION uint32_t yp_list_size(yp_list_t *list);
58
+ // Returns the size of the list.
59
+ YP_EXPORTED_FUNCTION size_t yp_list_size(yp_list_t *list);
60
60
 
61
61
  // Append a node to the given list.
62
62
  void yp_list_append(yp_list_t *list, yp_list_node_t *node);
@@ -35,6 +35,10 @@ typedef struct {
35
35
  size_t column;
36
36
  } yp_line_column_t;
37
37
 
38
+ #define YP_NEWLINE_LIST_EMPTY ((yp_newline_list_t) { \
39
+ .start = NULL, .offsets = NULL, .size = 0, .capacity = 0, .last_offset = 0, .last_index = 0 \
40
+ })
41
+
38
42
  // Initialize a new newline list with the given capacity. Returns true if the
39
43
  // allocation of the offsets succeeds, otherwise returns false.
40
44
  bool yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity);
@@ -10,7 +10,7 @@
10
10
  typedef uint32_t yp_state_stack_t;
11
11
 
12
12
  // Initializes the state stack to an empty stack.
13
- void yp_state_stack_init(yp_state_stack_t *stack);
13
+ #define YP_STATE_STACK_EMPTY ((yp_state_stack_t) 0)
14
14
 
15
15
  // Pushes a value onto the stack.
16
16
  void yp_state_stack_push(yp_state_stack_t *stack, bool value);
@@ -36,7 +36,7 @@ void yp_string_constant_init(yp_string_t *string, const char *source, size_t len
36
36
  // for large files). This means that if we're on windows we'll use
37
37
  // `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
38
38
  // `mmap`, and on other POSIX systems we'll use `read`.
39
- bool yp_string_mapped_init(yp_string_t *string, const char *filepath);
39
+ YP_EXPORTED_FUNCTION bool yp_string_mapped_init(yp_string_t *string, const char *filepath);
40
40
 
41
41
  // Returns the memory size associated with the string.
42
42
  size_t yp_string_memsize(const yp_string_t *string);
@@ -54,4 +54,8 @@ YP_EXPORTED_FUNCTION const char * yp_string_source(const yp_string_t *string);
54
54
  // Free the associated memory of the given string.
55
55
  YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string);
56
56
 
57
+ // Returns the size of the yp_string_t struct. This is necessary to allocate the
58
+ // correct amount of memory in the FFI backend.
59
+ YP_EXPORTED_FUNCTION size_t yp_string_sizeof(void);
60
+
57
61
  #endif // YARP_STRING_H
@@ -1,5 +1,4 @@
1
1
  #define YP_VERSION_MAJOR 0
2
- #define YP_VERSION_MINOR 6
2
+ #define YP_VERSION_MINOR 8
3
3
  #define YP_VERSION_PATCH 0
4
-
5
- #define YP_VERSION "0.6.0"
4
+ #define YP_VERSION "0.8.0"
data/include/yarp.h CHANGED
@@ -59,10 +59,12 @@ YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, y
59
59
  // Serialize the AST represented by the given node to the given buffer.
60
60
  YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
61
61
 
62
- // Parse and serialize the AST represented by the given source to the given
63
- // buffer.
62
+ // Parse the given source to the AST and serialize the AST to the given buffer.
64
63
  YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *metadata);
65
64
 
65
+ // Lex the given source and serialize to the given buffer.
66
+ YP_EXPORTED_FUNCTION void yp_lex_serialize(const char *source, size_t size, const char *filepath, yp_buffer_t *buffer);
67
+
66
68
  // Returns a string representation of the given token type.
67
69
  YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);
68
70
 
data/lib/yarp/ffi.rb ADDED
@@ -0,0 +1,226 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file is responsible for mirroring the API provided by the C extension by
4
+ # using FFI to call into the shared library.
5
+
6
+ require "rbconfig"
7
+ require "ffi"
8
+
9
+ module YARP
10
+ BACKEND = :FFI
11
+
12
+ module LibRubyParser
13
+ extend FFI::Library
14
+
15
+ # Define the library that we will be pulling functions from. Note that this
16
+ # must align with the build shared library from make/rake.
17
+ ffi_lib File.expand_path("../../build/librubyparser.#{RbConfig::CONFIG["SOEXT"]}", __dir__)
18
+
19
+ # Convert a native C type declaration into a symbol that FFI understands.
20
+ # For example:
21
+ #
22
+ # const char * -> :pointer
23
+ # bool -> :bool
24
+ # size_t -> :size_t
25
+ # void -> :void
26
+ #
27
+ def self.resolve_type(type)
28
+ type = type.strip.delete_prefix("const ")
29
+ type.end_with?("*") ? :pointer : type.to_sym
30
+ end
31
+
32
+ # Read through the given header file and find the declaration of each of the
33
+ # given functions. For each one, define a function with the same name and
34
+ # signature as the C function.
35
+ def self.load_exported_functions_from(header, *functions)
36
+ File.foreach(File.expand_path("../../include/#{header}", __dir__)) do |line|
37
+ # We only want to attempt to load exported functions.
38
+ next unless line.start_with?("YP_EXPORTED_FUNCTION ")
39
+
40
+ # We only want to load the functions that we are interested in.
41
+ next unless functions.any? { |function| line.include?(function) }
42
+
43
+ # Parse the function declaration.
44
+ unless /^YP_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line
45
+ raise "Could not parse #{line}"
46
+ end
47
+
48
+ # Delete the function from the list of functions we are looking for to
49
+ # mark it as having been found.
50
+ functions.delete(name)
51
+
52
+ # Split up the argument types into an array, ensure we handle the case
53
+ # where there are no arguments (by explicit void).
54
+ arg_types = arg_types.split(",").map(&:strip)
55
+ arg_types = [] if arg_types == %w[void]
56
+
57
+ # Resolve the type of the argument by dropping the name of the argument
58
+ # first if it is present.
59
+ arg_types.map! { |type| resolve_type(type.sub(/\w+$/, "")) }
60
+
61
+ # Attach the function using the FFI library.
62
+ attach_function name, arg_types, resolve_type(return_type)
63
+ end
64
+
65
+ # If we didn't find all of the functions, raise an error.
66
+ raise "Could not find functions #{functions.inspect}" unless functions.empty?
67
+ end
68
+
69
+ load_exported_functions_from(
70
+ "yarp.h",
71
+ "yp_version",
72
+ "yp_parse_serialize",
73
+ "yp_lex_serialize"
74
+ )
75
+
76
+ load_exported_functions_from(
77
+ "yarp/util/yp_buffer.h",
78
+ "yp_buffer_sizeof",
79
+ "yp_buffer_init",
80
+ "yp_buffer_value",
81
+ "yp_buffer_length",
82
+ "yp_buffer_free"
83
+ )
84
+
85
+ load_exported_functions_from(
86
+ "yarp/util/yp_string.h",
87
+ "yp_string_mapped_init",
88
+ "yp_string_free",
89
+ "yp_string_source",
90
+ "yp_string_length",
91
+ "yp_string_sizeof"
92
+ )
93
+
94
+ # This object represents a yp_buffer_t. We only use it as an opaque pointer,
95
+ # so it doesn't need to know the fields of yp_buffer_t.
96
+ class YPBuffer
97
+ SIZEOF = LibRubyParser.yp_buffer_sizeof
98
+
99
+ attr_reader :pointer
100
+
101
+ def initialize(pointer)
102
+ @pointer = pointer
103
+ end
104
+
105
+ def value
106
+ LibRubyParser.yp_buffer_value(pointer)
107
+ end
108
+
109
+ def length
110
+ LibRubyParser.yp_buffer_length(pointer)
111
+ end
112
+
113
+ def read
114
+ value.read_string(length)
115
+ end
116
+
117
+ # Initialize a new buffer and yield it to the block. The buffer will be
118
+ # automatically freed when the block returns.
119
+ def self.with(&block)
120
+ pointer = FFI::MemoryPointer.new(SIZEOF)
121
+
122
+ begin
123
+ raise unless LibRubyParser.yp_buffer_init(pointer)
124
+ yield new(pointer)
125
+ ensure
126
+ LibRubyParser.yp_buffer_free(pointer)
127
+ pointer.free
128
+ end
129
+ end
130
+ end
131
+
132
+ # This object represents a yp_string_t. We only use it as an opaque pointer,
133
+ # so it doesn't have to be an FFI::Struct.
134
+ class YPString
135
+ SIZEOF = LibRubyParser.yp_string_sizeof
136
+
137
+ attr_reader :pointer
138
+
139
+ def initialize(pointer)
140
+ @pointer = pointer
141
+ end
142
+
143
+ def source
144
+ LibRubyParser.yp_string_source(pointer)
145
+ end
146
+
147
+ def length
148
+ LibRubyParser.yp_string_length(pointer)
149
+ end
150
+
151
+ def read
152
+ source.read_string(length)
153
+ end
154
+
155
+ # Yields a yp_string_t pointer to the given block.
156
+ def self.with(filepath, &block)
157
+ pointer = FFI::MemoryPointer.new(SIZEOF)
158
+
159
+ begin
160
+ raise unless LibRubyParser.yp_string_mapped_init(pointer, filepath)
161
+ yield new(pointer)
162
+ ensure
163
+ LibRubyParser.yp_string_free(pointer)
164
+ pointer.free
165
+ end
166
+ end
167
+ end
168
+ end
169
+
170
+ # Mark the LibRubyParser module as private as it should only be called through
171
+ # the YARP module.
172
+ private_constant :LibRubyParser
173
+
174
+ # The version constant is set by reading the result of calling yp_version.
175
+ VERSION = LibRubyParser.yp_version.read_string
176
+
177
+ def self.dump_internal(source, source_size, filepath)
178
+ LibRubyParser::YPBuffer.with do |buffer|
179
+ metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
180
+ LibRubyParser.yp_parse_serialize(source, source_size, buffer.pointer, metadata)
181
+ buffer.read
182
+ end
183
+ end
184
+ private_class_method :dump_internal
185
+
186
+ # Mirror the YARP.dump API by using the serialization API.
187
+ def self.dump(code, filepath = nil)
188
+ dump_internal(code, code.bytesize, filepath)
189
+ end
190
+
191
+ # Mirror the YARP.dump_file API by using the serialization API.
192
+ def self.dump_file(filepath)
193
+ LibRubyParser::YPString.with(filepath) do |string|
194
+ dump_internal(string.source, string.length, filepath)
195
+ end
196
+ end
197
+
198
+ # Mirror the YARP.lex API by using the serialization API.
199
+ def self.lex(code, filepath = nil)
200
+ LibRubyParser::YPBuffer.with do |buffer|
201
+ LibRubyParser.yp_lex_serialize(code, code.bytesize, filepath, buffer.pointer)
202
+ Serialize.load_tokens(Source.new(code), buffer.read)
203
+ end
204
+ end
205
+
206
+ # Mirror the YARP.lex_file API by using the serialization API.
207
+ def self.lex_file(filepath)
208
+ LibRubyParser::YPString.with(filepath) do |string|
209
+ lex(string.read, filepath)
210
+ end
211
+ end
212
+
213
+ # Mirror the YARP.parse API by using the serialization API.
214
+ def self.parse(code, filepath = nil)
215
+ YARP.load(code, dump(code, filepath))
216
+ end
217
+
218
+ # Mirror the YARP.parse_file API by using the serialization API. This uses
219
+ # native strings instead of Ruby strings because it allows us to use mmap when
220
+ # it is available.
221
+ def self.parse_file(filepath)
222
+ LibRubyParser::YPString.with(filepath) do |string|
223
+ parse(string.read, filepath)
224
+ end
225
+ end
226
+ end
@@ -647,19 +647,34 @@ module YARP
647
647
  # can shuffle around the token to match Ripper's output.
648
648
  case state
649
649
  when :default
650
+ # The default state is when there are no heredocs at all. In this
651
+ # state we can append the token to the list of tokens and move on.
650
652
  tokens << token
651
653
 
654
+ # If we get the declaration of a heredoc, then we open a new heredoc
655
+ # and move into the heredoc_opened state.
652
656
  if event == :on_heredoc_beg
653
657
  state = :heredoc_opened
654
658
  heredoc_stack.last << Heredoc.build(token)
655
659
  end
656
660
  when :heredoc_opened
661
+ # The heredoc_opened state is when we've seen the declaration of a
662
+ # heredoc and are now lexing the body of the heredoc. In this state we
663
+ # push tokens onto the most recently created heredoc.
657
664
  heredoc_stack.last.last << token
658
665
 
659
666
  case event
660
667
  when :on_heredoc_beg
668
+ # If we receive a heredoc declaration while lexing the body of a
669
+ # heredoc, this means we have nested heredocs. In this case we'll
670
+ # push a new heredoc onto the stack and stay in the heredoc_opened
671
+ # state since we're now lexing the body of the new heredoc.
661
672
  heredoc_stack << [Heredoc.build(token)]
662
673
  when :on_heredoc_end
674
+ # If we receive the end of a heredoc, then we're done lexing the
675
+ # body of the heredoc. In this case we now have a completed heredoc
676
+ # but need to wait for the next newline to push it into the token
677
+ # stream.
663
678
  state = :heredoc_closed
664
679
  end
665
680
  when :heredoc_closed
@@ -734,8 +749,7 @@ module YARP
734
749
  when :on_sp
735
750
  # skip
736
751
  when :on_tstring_content
737
- if previous[1] == :on_tstring_content &&
738
- (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
752
+ if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
739
753
  previous[2] << token[2]
740
754
  else
741
755
  results << token