prism 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +172 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +84 -0
  7. data/README.md +89 -0
  8. data/config.yml +2481 -0
  9. data/docs/build_system.md +74 -0
  10. data/docs/building.md +22 -0
  11. data/docs/configuration.md +60 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +117 -0
  14. data/docs/fuzzing.md +93 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/mapping.md +117 -0
  17. data/docs/ripper.md +36 -0
  18. data/docs/ruby_api.md +25 -0
  19. data/docs/serialization.md +181 -0
  20. data/docs/testing.md +55 -0
  21. data/ext/prism/api_node.c +4725 -0
  22. data/ext/prism/api_pack.c +256 -0
  23. data/ext/prism/extconf.rb +136 -0
  24. data/ext/prism/extension.c +626 -0
  25. data/ext/prism/extension.h +18 -0
  26. data/include/prism/ast.h +1932 -0
  27. data/include/prism/defines.h +45 -0
  28. data/include/prism/diagnostic.h +231 -0
  29. data/include/prism/enc/pm_encoding.h +95 -0
  30. data/include/prism/node.h +41 -0
  31. data/include/prism/pack.h +141 -0
  32. data/include/prism/parser.h +418 -0
  33. data/include/prism/regexp.h +19 -0
  34. data/include/prism/unescape.h +48 -0
  35. data/include/prism/util/pm_buffer.h +51 -0
  36. data/include/prism/util/pm_char.h +91 -0
  37. data/include/prism/util/pm_constant_pool.h +78 -0
  38. data/include/prism/util/pm_list.h +67 -0
  39. data/include/prism/util/pm_memchr.h +14 -0
  40. data/include/prism/util/pm_newline_list.h +61 -0
  41. data/include/prism/util/pm_state_stack.h +24 -0
  42. data/include/prism/util/pm_string.h +61 -0
  43. data/include/prism/util/pm_string_list.h +25 -0
  44. data/include/prism/util/pm_strpbrk.h +29 -0
  45. data/include/prism/version.h +4 -0
  46. data/include/prism.h +82 -0
  47. data/lib/prism/compiler.rb +465 -0
  48. data/lib/prism/debug.rb +157 -0
  49. data/lib/prism/desugar_compiler.rb +206 -0
  50. data/lib/prism/dispatcher.rb +2051 -0
  51. data/lib/prism/dsl.rb +750 -0
  52. data/lib/prism/ffi.rb +251 -0
  53. data/lib/prism/lex_compat.rb +838 -0
  54. data/lib/prism/mutation_compiler.rb +718 -0
  55. data/lib/prism/node.rb +14540 -0
  56. data/lib/prism/node_ext.rb +55 -0
  57. data/lib/prism/node_inspector.rb +68 -0
  58. data/lib/prism/pack.rb +185 -0
  59. data/lib/prism/parse_result/comments.rb +172 -0
  60. data/lib/prism/parse_result/newlines.rb +60 -0
  61. data/lib/prism/parse_result.rb +266 -0
  62. data/lib/prism/pattern.rb +239 -0
  63. data/lib/prism/ripper_compat.rb +174 -0
  64. data/lib/prism/serialize.rb +662 -0
  65. data/lib/prism/visitor.rb +470 -0
  66. data/lib/prism.rb +64 -0
  67. data/prism.gemspec +113 -0
  68. data/src/diagnostic.c +287 -0
  69. data/src/enc/pm_big5.c +52 -0
  70. data/src/enc/pm_euc_jp.c +58 -0
  71. data/src/enc/pm_gbk.c +61 -0
  72. data/src/enc/pm_shift_jis.c +56 -0
  73. data/src/enc/pm_tables.c +507 -0
  74. data/src/enc/pm_unicode.c +2324 -0
  75. data/src/enc/pm_windows_31j.c +56 -0
  76. data/src/node.c +2633 -0
  77. data/src/pack.c +493 -0
  78. data/src/prettyprint.c +2136 -0
  79. data/src/prism.c +14587 -0
  80. data/src/regexp.c +580 -0
  81. data/src/serialize.c +1899 -0
  82. data/src/token_type.c +349 -0
  83. data/src/unescape.c +637 -0
  84. data/src/util/pm_buffer.c +103 -0
  85. data/src/util/pm_char.c +272 -0
  86. data/src/util/pm_constant_pool.c +252 -0
  87. data/src/util/pm_list.c +41 -0
  88. data/src/util/pm_memchr.c +33 -0
  89. data/src/util/pm_newline_list.c +134 -0
  90. data/src/util/pm_state_stack.c +19 -0
  91. data/src/util/pm_string.c +200 -0
  92. data/src/util/pm_string_list.c +29 -0
  93. data/src/util/pm_strncasecmp.c +17 -0
  94. data/src/util/pm_strpbrk.c +66 -0
  95. metadata +138 -0
data/lib/prism/ffi.rb ADDED
@@ -0,0 +1,251 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file is responsible for mirroring the API provided by the C extension by
4
+ # using FFI to call into the shared library.
5
+
6
+ require "rbconfig"
7
+ require "ffi"
8
+
9
+ module Prism
10
+ BACKEND = :FFI
11
+
12
+ module LibRubyParser
13
+ extend FFI::Library
14
+
15
+ # Define the library that we will be pulling functions from. Note that this
16
+ # must align with the build shared library from make/rake.
17
+ ffi_lib File.expand_path("../../build/librubyparser.#{RbConfig::CONFIG["SOEXT"]}", __dir__)
18
+
19
+ # Convert a native C type declaration into a symbol that FFI understands.
20
+ # For example:
21
+ #
22
+ # const char * -> :pointer
23
+ # bool -> :bool
24
+ # size_t -> :size_t
25
+ # void -> :void
26
+ #
27
+ def self.resolve_type(type)
28
+ type = type.strip.delete_prefix("const ")
29
+ type.end_with?("*") ? :pointer : type.to_sym
30
+ end
31
+
32
+ # Read through the given header file and find the declaration of each of the
33
+ # given functions. For each one, define a function with the same name and
34
+ # signature as the C function.
35
+ def self.load_exported_functions_from(header, *functions)
36
+ File.foreach(File.expand_path("../../include/#{header}", __dir__)) do |line|
37
+ # We only want to attempt to load exported functions.
38
+ next unless line.start_with?("PRISM_EXPORTED_FUNCTION ")
39
+
40
+ # We only want to load the functions that we are interested in.
41
+ next unless functions.any? { |function| line.include?(function) }
42
+
43
+ # Parse the function declaration.
44
+ unless /^PRISM_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line
45
+ raise "Could not parse #{line}"
46
+ end
47
+
48
+ # Delete the function from the list of functions we are looking for to
49
+ # mark it as having been found.
50
+ functions.delete(name)
51
+
52
+ # Split up the argument types into an array, ensure we handle the case
53
+ # where there are no arguments (by explicit void).
54
+ arg_types = arg_types.split(",").map(&:strip)
55
+ arg_types = [] if arg_types == %w[void]
56
+
57
+ # Resolve the type of the argument by dropping the name of the argument
58
+ # first if it is present.
59
+ arg_types.map! { |type| resolve_type(type.sub(/\w+$/, "")) }
60
+
61
+ # Attach the function using the FFI library.
62
+ attach_function name, arg_types, resolve_type(return_type)
63
+ end
64
+
65
+ # If we didn't find all of the functions, raise an error.
66
+ raise "Could not find functions #{functions.inspect}" unless functions.empty?
67
+ end
68
+
69
+ load_exported_functions_from(
70
+ "prism.h",
71
+ "pm_version",
72
+ "pm_parse_serialize",
73
+ "pm_lex_serialize",
74
+ "pm_parse_lex_serialize"
75
+ )
76
+
77
+ load_exported_functions_from(
78
+ "prism/util/pm_buffer.h",
79
+ "pm_buffer_sizeof",
80
+ "pm_buffer_init",
81
+ "pm_buffer_value",
82
+ "pm_buffer_length",
83
+ "pm_buffer_free"
84
+ )
85
+
86
+ load_exported_functions_from(
87
+ "prism/util/pm_string.h",
88
+ "pm_string_mapped_init",
89
+ "pm_string_free",
90
+ "pm_string_source",
91
+ "pm_string_length",
92
+ "pm_string_sizeof"
93
+ )
94
+
95
+ # This object represents a pm_buffer_t. We only use it as an opaque pointer,
96
+ # so it doesn't need to know the fields of pm_buffer_t.
97
+ class PrismBuffer
98
+ SIZEOF = LibRubyParser.pm_buffer_sizeof
99
+
100
+ attr_reader :pointer
101
+
102
+ def initialize(pointer)
103
+ @pointer = pointer
104
+ end
105
+
106
+ def value
107
+ LibRubyParser.pm_buffer_value(pointer)
108
+ end
109
+
110
+ def length
111
+ LibRubyParser.pm_buffer_length(pointer)
112
+ end
113
+
114
+ def read
115
+ value.read_string(length)
116
+ end
117
+
118
+ # Initialize a new buffer and yield it to the block. The buffer will be
119
+ # automatically freed when the block returns.
120
+ def self.with(&block)
121
+ pointer = FFI::MemoryPointer.new(SIZEOF)
122
+
123
+ begin
124
+ raise unless LibRubyParser.pm_buffer_init(pointer)
125
+ yield new(pointer)
126
+ ensure
127
+ LibRubyParser.pm_buffer_free(pointer)
128
+ pointer.free
129
+ end
130
+ end
131
+ end
132
+
133
+ # This object represents a pm_string_t. We only use it as an opaque pointer,
134
+ # so it doesn't have to be an FFI::Struct.
135
+ class PrismString
136
+ SIZEOF = LibRubyParser.pm_string_sizeof
137
+
138
+ attr_reader :pointer
139
+
140
+ def initialize(pointer)
141
+ @pointer = pointer
142
+ end
143
+
144
+ def source
145
+ LibRubyParser.pm_string_source(pointer)
146
+ end
147
+
148
+ def length
149
+ LibRubyParser.pm_string_length(pointer)
150
+ end
151
+
152
+ def read
153
+ source.read_string(length)
154
+ end
155
+
156
+ # Yields a pm_string_t pointer to the given block.
157
+ def self.with(filepath, &block)
158
+ pointer = FFI::MemoryPointer.new(SIZEOF)
159
+
160
+ begin
161
+ raise unless LibRubyParser.pm_string_mapped_init(pointer, filepath)
162
+ yield new(pointer)
163
+ ensure
164
+ LibRubyParser.pm_string_free(pointer)
165
+ pointer.free
166
+ end
167
+ end
168
+ end
169
+
170
+ def self.dump_internal(source, source_size, filepath)
171
+ PrismBuffer.with do |buffer|
172
+ metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
173
+ pm_parse_serialize(source, source_size, buffer.pointer, metadata)
174
+ buffer.read
175
+ end
176
+ end
177
+ end
178
+
179
+ # Mark the LibRubyParser module as private as it should only be called through
180
+ # the prism module.
181
+ private_constant :LibRubyParser
182
+
183
+ # The version constant is set by reading the result of calling pm_version.
184
+ VERSION = LibRubyParser.pm_version.read_string
185
+
186
+ # Mirror the Prism.dump API by using the serialization API.
187
+ def self.dump(code, filepath = nil)
188
+ LibRubyParser.dump_internal(code, code.bytesize, filepath)
189
+ end
190
+
191
+ # Mirror the Prism.dump_file API by using the serialization API.
192
+ def self.dump_file(filepath)
193
+ LibRubyParser::PrismString.with(filepath) do |string|
194
+ LibRubyParser.dump_internal(string.source, string.length, filepath)
195
+ end
196
+ end
197
+
198
+ # Mirror the Prism.lex API by using the serialization API.
199
+ def self.lex(code, filepath = nil)
200
+ LibRubyParser::PrismBuffer.with do |buffer|
201
+ LibRubyParser.pm_lex_serialize(code, code.bytesize, filepath, buffer.pointer)
202
+ Serialize.load_tokens(Source.new(code), buffer.read)
203
+ end
204
+ end
205
+
206
+ # Mirror the Prism.lex_file API by using the serialization API.
207
+ def self.lex_file(filepath)
208
+ LibRubyParser::PrismString.with(filepath) do |string|
209
+ lex(string.read, filepath)
210
+ end
211
+ end
212
+
213
+ # Mirror the Prism.parse API by using the serialization API.
214
+ def self.parse(code, filepath = nil)
215
+ Prism.load(code, dump(code, filepath))
216
+ end
217
+
218
+ # Mirror the Prism.parse_file API by using the serialization API. This uses
219
+ # native strings instead of Ruby strings because it allows us to use mmap when
220
+ # it is available.
221
+ def self.parse_file(filepath)
222
+ LibRubyParser::PrismString.with(filepath) do |string|
223
+ parse(string.read, filepath)
224
+ end
225
+ end
226
+
227
+ # Mirror the Prism.parse_lex API by using the serialization API.
228
+ def self.parse_lex(code, filepath = nil)
229
+ LibRubyParser::PrismBuffer.with do |buffer|
230
+ metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
231
+ LibRubyParser.pm_parse_lex_serialize(code, code.bytesize, buffer.pointer, metadata)
232
+
233
+ source = Source.new(code)
234
+ loader = Serialize::Loader.new(source, buffer.read)
235
+
236
+ tokens = loader.load_tokens
237
+ node, comments, errors, warnings = loader.load_nodes
238
+
239
+ tokens.each { |token,| token.value.force_encoding(loader.encoding) }
240
+
241
+ ParseResult.new([node, tokens], comments, errors, warnings, source)
242
+ end
243
+ end
244
+
245
+ # Mirror the Prism.parse_lex_file API by using the serialization API.
246
+ def self.parse_lex_file(filepath)
247
+ LibRubyParser::PrismString.with(filepath) do |string|
248
+ parse_lex(string.read, filepath)
249
+ end
250
+ end
251
+ end