jruby-prism-parser 0.24.0-java → 1.4.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BSDmakefile +58 -0
- data/CHANGELOG.md +269 -1
- data/CONTRIBUTING.md +0 -4
- data/Makefile +25 -18
- data/README.md +57 -6
- data/config.yml +1724 -140
- data/docs/build_system.md +39 -11
- data/docs/configuration.md +4 -0
- data/docs/cruby_compilation.md +1 -1
- data/docs/fuzzing.md +1 -1
- data/docs/parser_translation.md +14 -9
- data/docs/parsing_rules.md +4 -1
- data/docs/releasing.md +8 -10
- data/docs/relocation.md +34 -0
- data/docs/ripper_translation.md +72 -0
- data/docs/ruby_api.md +2 -1
- data/docs/serialization.md +29 -5
- data/ext/prism/api_node.c +3395 -1999
- data/ext/prism/api_pack.c +9 -0
- data/ext/prism/extconf.rb +55 -34
- data/ext/prism/extension.c +597 -346
- data/ext/prism/extension.h +6 -5
- data/include/prism/ast.h +2612 -455
- data/include/prism/defines.h +160 -2
- data/include/prism/diagnostic.h +188 -76
- data/include/prism/encoding.h +22 -4
- data/include/prism/node.h +89 -17
- data/include/prism/options.h +224 -12
- data/include/prism/pack.h +11 -0
- data/include/prism/parser.h +267 -66
- data/include/prism/prettyprint.h +8 -0
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +121 -0
- data/include/prism/util/pm_buffer.h +75 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +18 -9
- data/include/prism/util/pm_integer.h +126 -0
- data/include/prism/util/pm_list.h +1 -1
- data/include/prism/util/pm_newline_list.h +19 -0
- data/include/prism/util/pm_string.h +48 -8
- data/include/prism/version.h +3 -3
- data/include/prism.h +99 -5
- data/jruby-prism.jar +0 -0
- data/lib/prism/compiler.rb +11 -1
- data/lib/prism/desugar_compiler.rb +113 -74
- data/lib/prism/dispatcher.rb +45 -1
- data/lib/prism/dot_visitor.rb +201 -77
- data/lib/prism/dsl.rb +673 -461
- data/lib/prism/ffi.rb +233 -45
- data/lib/prism/inspect_visitor.rb +2389 -0
- data/lib/prism/lex_compat.rb +35 -16
- data/lib/prism/mutation_compiler.rb +24 -8
- data/lib/prism/node.rb +7731 -8460
- data/lib/prism/node_ext.rb +328 -32
- data/lib/prism/pack.rb +4 -0
- data/lib/prism/parse_result/comments.rb +34 -24
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +102 -12
- data/lib/prism/parse_result.rb +448 -44
- data/lib/prism/pattern.rb +28 -10
- data/lib/prism/polyfill/append_as_bytes.rb +15 -0
- data/lib/prism/polyfill/byteindex.rb +13 -0
- data/lib/prism/polyfill/unpack1.rb +14 -0
- data/lib/prism/reflection.rb +413 -0
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +1940 -1198
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/builder.rb +61 -0
- data/lib/prism/translation/parser/compiler.rb +569 -195
- data/lib/prism/translation/parser/lexer.rb +516 -39
- data/lib/prism/translation/parser.rb +177 -12
- data/lib/prism/translation/parser33.rb +1 -1
- data/lib/prism/translation/parser34.rb +1 -1
- data/lib/prism/translation/parser35.rb +12 -0
- data/lib/prism/translation/ripper/sexp.rb +125 -0
- data/lib/prism/translation/ripper/shim.rb +5 -0
- data/lib/prism/translation/ripper.rb +3224 -462
- data/lib/prism/translation/ruby_parser.rb +194 -69
- data/lib/prism/translation.rb +4 -1
- data/lib/prism/version.rb +1 -1
- data/lib/prism/visitor.rb +13 -0
- data/lib/prism.rb +17 -27
- data/prism.gemspec +57 -17
- data/rbi/prism/compiler.rbi +12 -0
- data/rbi/prism/dsl.rbi +524 -0
- data/rbi/prism/inspect_visitor.rbi +12 -0
- data/rbi/prism/node.rbi +8722 -0
- data/rbi/prism/node_ext.rbi +107 -0
- data/rbi/prism/parse_result.rbi +404 -0
- data/rbi/prism/reflection.rbi +58 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism/translation/parser.rbi +11 -0
- data/rbi/prism/translation/parser33.rbi +6 -0
- data/rbi/prism/translation/parser34.rbi +6 -0
- data/rbi/prism/translation/parser35.rbi +6 -0
- data/rbi/prism/translation/ripper.rbi +15 -0
- data/rbi/prism/visitor.rbi +473 -0
- data/rbi/prism.rbi +44 -7745
- data/sig/prism/compiler.rbs +9 -0
- data/sig/prism/dispatcher.rbs +16 -0
- data/sig/prism/dot_visitor.rbs +6 -0
- data/sig/prism/dsl.rbs +351 -0
- data/sig/prism/inspect_visitor.rbs +22 -0
- data/sig/prism/lex_compat.rbs +10 -0
- data/sig/prism/mutation_compiler.rbs +159 -0
- data/sig/prism/node.rbs +3614 -0
- data/sig/prism/node_ext.rbs +82 -0
- data/sig/prism/pack.rbs +43 -0
- data/sig/prism/parse_result.rbs +192 -0
- data/sig/prism/pattern.rbs +13 -0
- data/sig/prism/reflection.rbs +50 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/serialize.rbs +8 -0
- data/sig/prism/string_query.rbs +11 -0
- data/sig/prism/visitor.rbs +169 -0
- data/sig/prism.rbs +248 -4767
- data/src/diagnostic.c +672 -230
- data/src/encoding.c +211 -108
- data/src/node.c +7541 -1653
- data/src/options.c +135 -20
- data/src/pack.c +33 -17
- data/src/prettyprint.c +1543 -1485
- data/src/prism.c +7813 -3050
- data/src/regexp.c +225 -73
- data/src/serialize.c +101 -77
- data/src/static_literals.c +617 -0
- data/src/token_type.c +14 -13
- data/src/util/pm_buffer.c +187 -20
- data/src/util/pm_char.c +5 -5
- data/src/util/pm_constant_pool.c +39 -19
- data/src/util/pm_integer.c +670 -0
- data/src/util/pm_list.c +1 -1
- data/src/util/pm_newline_list.c +43 -5
- data/src/util/pm_string.c +213 -33
- data/src/util/pm_strncasecmp.c +13 -1
- data/src/util/pm_strpbrk.c +32 -6
- metadata +55 -19
- data/docs/ripper.md +0 -36
- data/include/prism/util/pm_state_stack.h +0 -42
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -206
- data/lib/prism/node_inspector.rb +0 -68
- data/lib/prism/translation/parser/rubocop.rb +0 -45
- data/rbi/prism_static.rbi +0 -207
- data/sig/prism_static.rbs +0 -201
- data/src/util/pm_state_stack.c +0 -25
- data/src/util/pm_string_list.c +0 -28
data/lib/prism/parse_result.rb
CHANGED
@@ -5,6 +5,33 @@ module Prism
|
|
5
5
|
# conjunction with locations to allow them to resolve line numbers and source
|
6
6
|
# ranges.
|
7
7
|
class Source
|
8
|
+
# Create a new source object with the given source code. This method should
|
9
|
+
# be used instead of `new` and it will return either a `Source` or a
|
10
|
+
# specialized and more performant `ASCIISource` if no multibyte characters
|
11
|
+
# are present in the source code.
|
12
|
+
def self.for(source, start_line = 1, offsets = [])
|
13
|
+
if source.ascii_only?
|
14
|
+
ASCIISource.new(source, start_line, offsets)
|
15
|
+
elsif source.encoding == Encoding::BINARY
|
16
|
+
source.force_encoding(Encoding::UTF_8)
|
17
|
+
|
18
|
+
if source.valid_encoding?
|
19
|
+
new(source, start_line, offsets)
|
20
|
+
else
|
21
|
+
# This is an extremely niche use case where the file is marked as
|
22
|
+
# binary, contains multi-byte characters, and those characters are not
|
23
|
+
# valid UTF-8. In this case we'll mark it as binary and fall back to
|
24
|
+
# treating everything as a single-byte character. This _may_ cause
|
25
|
+
# problems when asking for code units, but it appears to be the
|
26
|
+
# cleanest solution at the moment.
|
27
|
+
source.force_encoding(Encoding::BINARY)
|
28
|
+
ASCIISource.new(source, start_line, offsets)
|
29
|
+
end
|
30
|
+
else
|
31
|
+
new(source, start_line, offsets)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
8
35
|
# The source code that this source object represents.
|
9
36
|
attr_reader :source
|
10
37
|
|
@@ -21,10 +48,31 @@ module Prism
|
|
21
48
|
@offsets = offsets # set after parsing is done
|
22
49
|
end
|
23
50
|
|
51
|
+
# Replace the value of start_line with the given value.
|
52
|
+
def replace_start_line(start_line)
|
53
|
+
@start_line = start_line
|
54
|
+
end
|
55
|
+
|
56
|
+
# Replace the value of offsets with the given value.
|
57
|
+
def replace_offsets(offsets)
|
58
|
+
@offsets.replace(offsets)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Returns the encoding of the source code, which is set by parameters to the
|
62
|
+
# parser or by the encoding magic comment.
|
63
|
+
def encoding
|
64
|
+
source.encoding
|
65
|
+
end
|
66
|
+
|
67
|
+
# Returns the lines of the source code as an array of strings.
|
68
|
+
def lines
|
69
|
+
source.lines
|
70
|
+
end
|
71
|
+
|
24
72
|
# Perform a byteslice on the source code using the given byte offset and
|
25
73
|
# byte length.
|
26
74
|
def slice(byte_offset, length)
|
27
|
-
source.byteslice(byte_offset, length)
|
75
|
+
source.byteslice(byte_offset, length) or raise
|
28
76
|
end
|
29
77
|
|
30
78
|
# Binary search through the offsets to find the line number for the given
|
@@ -39,6 +87,12 @@ module Prism
|
|
39
87
|
offsets[find_line(byte_offset)]
|
40
88
|
end
|
41
89
|
|
90
|
+
# Returns the byte offset of the end of the line corresponding to the given
|
91
|
+
# byte offset.
|
92
|
+
def line_end(byte_offset)
|
93
|
+
offsets[find_line(byte_offset) + 1] || source.bytesize
|
94
|
+
end
|
95
|
+
|
42
96
|
# Return the column number for the given byte offset.
|
43
97
|
def column(byte_offset)
|
44
98
|
byte_offset - line_start(byte_offset)
|
@@ -46,7 +100,7 @@ module Prism
|
|
46
100
|
|
47
101
|
# Return the character offset for the given byte offset.
|
48
102
|
def character_offset(byte_offset)
|
49
|
-
source.byteslice(0, byte_offset).length
|
103
|
+
(source.byteslice(0, byte_offset) or raise).length
|
50
104
|
end
|
51
105
|
|
52
106
|
# Return the column number in characters for the given byte offset.
|
@@ -60,9 +114,26 @@ module Prism
|
|
60
114
|
# This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
|
61
115
|
# concept of code units that differs from the number of characters in other
|
62
116
|
# encodings, it is not captured here.
|
117
|
+
#
|
118
|
+
# We purposefully replace invalid and undefined characters with replacement
|
119
|
+
# characters in this conversion. This happens for two reasons. First, it's
|
120
|
+
# possible that the given byte offset will not occur on a character
|
121
|
+
# boundary. Second, it's possible that the source code will contain a
|
122
|
+
# character that has no equivalent in the given encoding.
|
63
123
|
def code_units_offset(byte_offset, encoding)
|
64
|
-
byteslice = source.byteslice(0, byte_offset).encode(encoding)
|
65
|
-
|
124
|
+
byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
|
125
|
+
|
126
|
+
if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
|
127
|
+
byteslice.bytesize / 2
|
128
|
+
else
|
129
|
+
byteslice.length
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Generate a cache that targets a specific encoding for calculating code
|
134
|
+
# unit offsets.
|
135
|
+
def code_units_cache(encoding)
|
136
|
+
CodeUnitsCache.new(source, encoding)
|
66
137
|
end
|
67
138
|
|
68
139
|
# Returns the column number in code units for the given encoding for the
|
@@ -71,6 +142,13 @@ module Prism
|
|
71
142
|
code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding)
|
72
143
|
end
|
73
144
|
|
145
|
+
# Freeze this object and the objects it contains.
|
146
|
+
def deep_freeze
|
147
|
+
source.freeze
|
148
|
+
offsets.freeze
|
149
|
+
freeze
|
150
|
+
end
|
151
|
+
|
74
152
|
private
|
75
153
|
|
76
154
|
# Binary search through the offsets to find the line number for the given
|
@@ -81,9 +159,9 @@ module Prism
|
|
81
159
|
|
82
160
|
while left <= right
|
83
161
|
mid = left + (right - left) / 2
|
84
|
-
return mid if offsets[mid] == byte_offset
|
162
|
+
return mid if (offset = offsets[mid]) == byte_offset
|
85
163
|
|
86
|
-
if
|
164
|
+
if offset < byte_offset
|
87
165
|
left = mid + 1
|
88
166
|
else
|
89
167
|
right = mid - 1
|
@@ -94,6 +172,120 @@ module Prism
|
|
94
172
|
end
|
95
173
|
end
|
96
174
|
|
175
|
+
# A cache that can be used to quickly compute code unit offsets from byte
|
176
|
+
# offsets. It purposefully provides only a single #[] method to access the
|
177
|
+
# cache in order to minimize surface area.
|
178
|
+
#
|
179
|
+
# Note that there are some known issues here that may or may not be addressed
|
180
|
+
# in the future:
|
181
|
+
#
|
182
|
+
# * The first is that there are issues when the cache computes values that are
|
183
|
+
# not on character boundaries. This can result in subsequent computations
|
184
|
+
# being off by one or more code units.
|
185
|
+
# * The second is that this cache is currently unbounded. In theory we could
|
186
|
+
# introduce some kind of LRU cache to limit the number of entries, but this
|
187
|
+
# has not yet been implemented.
|
188
|
+
#
|
189
|
+
class CodeUnitsCache
|
190
|
+
class UTF16Counter # :nodoc:
|
191
|
+
def initialize(source, encoding)
|
192
|
+
@source = source
|
193
|
+
@encoding = encoding
|
194
|
+
end
|
195
|
+
|
196
|
+
def count(byte_offset, byte_length)
|
197
|
+
@source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
class LengthCounter # :nodoc:
|
202
|
+
def initialize(source, encoding)
|
203
|
+
@source = source
|
204
|
+
@encoding = encoding
|
205
|
+
end
|
206
|
+
|
207
|
+
def count(byte_offset, byte_length)
|
208
|
+
@source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).length
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
private_constant :UTF16Counter, :LengthCounter
|
213
|
+
|
214
|
+
# Initialize a new cache with the given source and encoding.
|
215
|
+
def initialize(source, encoding)
|
216
|
+
@source = source
|
217
|
+
@counter =
|
218
|
+
if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
|
219
|
+
UTF16Counter.new(source, encoding)
|
220
|
+
else
|
221
|
+
LengthCounter.new(source, encoding)
|
222
|
+
end
|
223
|
+
|
224
|
+
@cache = {} #: Hash[Integer, Integer]
|
225
|
+
@offsets = [] #: Array[Integer]
|
226
|
+
end
|
227
|
+
|
228
|
+
# Retrieve the code units offset from the given byte offset.
|
229
|
+
def [](byte_offset)
|
230
|
+
@cache[byte_offset] ||=
|
231
|
+
if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil?
|
232
|
+
@offsets << byte_offset
|
233
|
+
@counter.count(0, byte_offset)
|
234
|
+
elsif index == 0
|
235
|
+
@offsets.unshift(byte_offset)
|
236
|
+
@counter.count(0, byte_offset)
|
237
|
+
else
|
238
|
+
@offsets.insert(index, byte_offset)
|
239
|
+
offset = @offsets[index - 1]
|
240
|
+
@cache[offset] + @counter.count(offset, byte_offset - offset)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
# Specialized version of Prism::Source for source code that includes ASCII
|
246
|
+
# characters only. This class is used to apply performance optimizations that
|
247
|
+
# cannot be applied to sources that include multibyte characters.
|
248
|
+
#
|
249
|
+
# In the extremely rare case that a source includes multi-byte characters but
|
250
|
+
# is marked as binary because of a magic encoding comment and it cannot be
|
251
|
+
# eagerly converted to UTF-8, this class will be used as well. This is because
|
252
|
+
# at that point we will treat everything as single-byte characters.
|
253
|
+
class ASCIISource < Source
|
254
|
+
# Return the character offset for the given byte offset.
|
255
|
+
def character_offset(byte_offset)
|
256
|
+
byte_offset
|
257
|
+
end
|
258
|
+
|
259
|
+
# Return the column number in characters for the given byte offset.
|
260
|
+
def character_column(byte_offset)
|
261
|
+
byte_offset - line_start(byte_offset)
|
262
|
+
end
|
263
|
+
|
264
|
+
# Returns the offset from the start of the file for the given byte offset
|
265
|
+
# counting in code units for the given encoding.
|
266
|
+
#
|
267
|
+
# This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
|
268
|
+
# concept of code units that differs from the number of characters in other
|
269
|
+
# encodings, it is not captured here.
|
270
|
+
def code_units_offset(byte_offset, encoding)
|
271
|
+
byte_offset
|
272
|
+
end
|
273
|
+
|
274
|
+
# Returns a cache that is the identity function in order to maintain the
|
275
|
+
# same interface. We can do this because code units are always equivalent to
|
276
|
+
# byte offsets for ASCII-only sources.
|
277
|
+
def code_units_cache(encoding)
|
278
|
+
->(byte_offset) { byte_offset }
|
279
|
+
end
|
280
|
+
|
281
|
+
# Specialized version of `code_units_column` that does not depend on
|
282
|
+
# `code_units_offset`, which is a more expensive operation. This is
|
283
|
+
# essentially the same as `Prism::Source#column`.
|
284
|
+
def code_units_column(byte_offset, encoding)
|
285
|
+
byte_offset - line_start(byte_offset)
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
97
289
|
# This represents a location in the source.
|
98
290
|
class Location
|
99
291
|
# A Source object that is used to determine more information from the given
|
@@ -108,25 +300,56 @@ module Prism
|
|
108
300
|
# The length of this location in bytes.
|
109
301
|
attr_reader :length
|
110
302
|
|
111
|
-
# The list of comments attached to this location
|
112
|
-
attr_reader :comments
|
113
|
-
|
114
303
|
# Create a new location object with the given source, start byte offset, and
|
115
304
|
# byte length.
|
116
305
|
def initialize(source, start_offset, length)
|
117
306
|
@source = source
|
118
307
|
@start_offset = start_offset
|
119
308
|
@length = length
|
120
|
-
|
309
|
+
|
310
|
+
# These are used to store comments that are associated with this location.
|
311
|
+
# They are initialized to `nil` to save on memory when there are no
|
312
|
+
# comments to be attached and/or the comment-related APIs are not used.
|
313
|
+
@leading_comments = nil
|
314
|
+
@trailing_comments = nil
|
315
|
+
end
|
316
|
+
|
317
|
+
# These are the comments that are associated with this location that exist
|
318
|
+
# before the start of this location.
|
319
|
+
def leading_comments
|
320
|
+
@leading_comments ||= []
|
321
|
+
end
|
322
|
+
|
323
|
+
# Attach a comment to the leading comments of this location.
|
324
|
+
def leading_comment(comment)
|
325
|
+
leading_comments << comment
|
326
|
+
end
|
327
|
+
|
328
|
+
# These are the comments that are associated with this location that exist
|
329
|
+
# after the end of this location.
|
330
|
+
def trailing_comments
|
331
|
+
@trailing_comments ||= []
|
332
|
+
end
|
333
|
+
|
334
|
+
# Attach a comment to the trailing comments of this location.
|
335
|
+
def trailing_comment(comment)
|
336
|
+
trailing_comments << comment
|
337
|
+
end
|
338
|
+
|
339
|
+
# Returns all comments that are associated with this location (both leading
|
340
|
+
# and trailing comments).
|
341
|
+
def comments
|
342
|
+
[*@leading_comments, *@trailing_comments]
|
121
343
|
end
|
122
344
|
|
123
345
|
# Create a new location object with the given options.
|
124
|
-
def copy(
|
125
|
-
Location.new(
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
346
|
+
def copy(source: self.source, start_offset: self.start_offset, length: self.length)
|
347
|
+
Location.new(source, start_offset, length)
|
348
|
+
end
|
349
|
+
|
350
|
+
# Returns a new location that is the result of chopping off the last byte.
|
351
|
+
def chop
|
352
|
+
copy(length: length == 0 ? length : length - 1)
|
130
353
|
end
|
131
354
|
|
132
355
|
# Returns a string representation of this location.
|
@@ -134,11 +357,25 @@ module Prism
|
|
134
357
|
"#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>"
|
135
358
|
end
|
136
359
|
|
360
|
+
# Returns all of the lines of the source code associated with this location.
|
361
|
+
def source_lines
|
362
|
+
source.lines
|
363
|
+
end
|
364
|
+
|
137
365
|
# The source code that this location represents.
|
138
366
|
def slice
|
139
367
|
source.slice(start_offset, length)
|
140
368
|
end
|
141
369
|
|
370
|
+
# The source code that this location represents starting from the beginning
|
371
|
+
# of the line that this location starts on to the end of the line that this
|
372
|
+
# location ends on.
|
373
|
+
def slice_lines
|
374
|
+
line_start = source.line_start(start_offset)
|
375
|
+
line_end = source.line_end(end_offset)
|
376
|
+
source.slice(line_start, line_end - line_start)
|
377
|
+
end
|
378
|
+
|
142
379
|
# The character offset from the beginning of the source where this location
|
143
380
|
# starts.
|
144
381
|
def start_character_offset
|
@@ -150,6 +387,12 @@ module Prism
|
|
150
387
|
source.code_units_offset(start_offset, encoding)
|
151
388
|
end
|
152
389
|
|
390
|
+
# The start offset from the start of the file in code units using the given
|
391
|
+
# cache to fetch or calculate the value.
|
392
|
+
def cached_start_code_units_offset(cache)
|
393
|
+
cache[start_offset]
|
394
|
+
end
|
395
|
+
|
153
396
|
# The byte offset from the beginning of the source where this location ends.
|
154
397
|
def end_offset
|
155
398
|
start_offset + length
|
@@ -166,6 +409,12 @@ module Prism
|
|
166
409
|
source.code_units_offset(end_offset, encoding)
|
167
410
|
end
|
168
411
|
|
412
|
+
# The end offset from the start of the file in code units using the given
|
413
|
+
# cache to fetch or calculate the value.
|
414
|
+
def cached_end_code_units_offset(cache)
|
415
|
+
cache[end_offset]
|
416
|
+
end
|
417
|
+
|
169
418
|
# The line number where this location starts.
|
170
419
|
def start_line
|
171
420
|
source.line(start_offset)
|
@@ -200,6 +449,12 @@ module Prism
|
|
200
449
|
source.code_units_column(start_offset, encoding)
|
201
450
|
end
|
202
451
|
|
452
|
+
# The start column in code units using the given cache to fetch or calculate
|
453
|
+
# the value.
|
454
|
+
def cached_start_code_units_column(cache)
|
455
|
+
cache[start_offset] - cache[source.line_start(start_offset)]
|
456
|
+
end
|
457
|
+
|
203
458
|
# The column number in bytes where this location ends from the start of the
|
204
459
|
# line.
|
205
460
|
def end_column
|
@@ -218,6 +473,12 @@ module Prism
|
|
218
473
|
source.code_units_column(end_offset, encoding)
|
219
474
|
end
|
220
475
|
|
476
|
+
# The end column in code units using the given cache to fetch or calculate
|
477
|
+
# the value.
|
478
|
+
def cached_end_code_units_column(cache)
|
479
|
+
cache[end_offset] - cache[source.line_start(end_offset)]
|
480
|
+
end
|
481
|
+
|
221
482
|
# Implement the hash pattern matching interface for Location.
|
222
483
|
def deconstruct_keys(keys)
|
223
484
|
{ start_offset: start_offset, end_offset: end_offset }
|
@@ -230,7 +491,7 @@ module Prism
|
|
230
491
|
|
231
492
|
# Returns true if the given other location is equal to this location.
|
232
493
|
def ==(other)
|
233
|
-
|
494
|
+
Location === other &&
|
234
495
|
other.start_offset == start_offset &&
|
235
496
|
other.end_offset == end_offset
|
236
497
|
end
|
@@ -245,11 +506,16 @@ module Prism
|
|
245
506
|
Location.new(source, start_offset, other.end_offset - start_offset)
|
246
507
|
end
|
247
508
|
|
248
|
-
#
|
249
|
-
#
|
250
|
-
#
|
251
|
-
def
|
252
|
-
|
509
|
+
# Join this location with the first occurrence of the string in the source
|
510
|
+
# that occurs after this location on the same line, and return the new
|
511
|
+
# location. This will raise an error if the string does not exist.
|
512
|
+
def adjoin(string)
|
513
|
+
line_suffix = source.slice(end_offset, source.line_end(end_offset) - end_offset)
|
514
|
+
|
515
|
+
line_suffix_index = line_suffix.byteindex(string)
|
516
|
+
raise "Could not find #{string}" if line_suffix_index.nil?
|
517
|
+
|
518
|
+
Location.new(source, start_offset, length + line_suffix_index + string.bytesize)
|
253
519
|
end
|
254
520
|
end
|
255
521
|
|
@@ -268,6 +534,11 @@ module Prism
|
|
268
534
|
def deconstruct_keys(keys)
|
269
535
|
{ location: location }
|
270
536
|
end
|
537
|
+
|
538
|
+
# Returns the content of the comment by slicing it from the source code.
|
539
|
+
def slice
|
540
|
+
location.slice
|
541
|
+
end
|
271
542
|
end
|
272
543
|
|
273
544
|
# InlineComment objects are the most common. They correspond to comments in
|
@@ -336,6 +607,10 @@ module Prism
|
|
336
607
|
|
337
608
|
# This represents an error that was encountered during parsing.
|
338
609
|
class ParseError
|
610
|
+
# The type of error. This is an _internal_ symbol that is used for
|
611
|
+
# communicating with translation layers. It is not meant to be public API.
|
612
|
+
attr_reader :type
|
613
|
+
|
339
614
|
# The message associated with this error.
|
340
615
|
attr_reader :message
|
341
616
|
|
@@ -346,7 +621,8 @@ module Prism
|
|
346
621
|
attr_reader :level
|
347
622
|
|
348
623
|
# Create a new error object with the given message and location.
|
349
|
-
def initialize(message, location, level)
|
624
|
+
def initialize(type, message, location, level)
|
625
|
+
@type = type
|
350
626
|
@message = message
|
351
627
|
@location = location
|
352
628
|
@level = level
|
@@ -354,17 +630,21 @@ module Prism
|
|
354
630
|
|
355
631
|
# Implement the hash pattern matching interface for ParseError.
|
356
632
|
def deconstruct_keys(keys)
|
357
|
-
{ message: message, location: location, level: level }
|
633
|
+
{ type: type, message: message, location: location, level: level }
|
358
634
|
end
|
359
635
|
|
360
636
|
# Returns a string representation of this error.
|
361
637
|
def inspect
|
362
|
-
"#<Prism::ParseError @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
|
638
|
+
"#<Prism::ParseError @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
|
363
639
|
end
|
364
640
|
end
|
365
641
|
|
366
642
|
# This represents a warning that was encountered during parsing.
|
367
643
|
class ParseWarning
|
644
|
+
# The type of warning. This is an _internal_ symbol that is used for
|
645
|
+
# communicating with translation layers. It is not meant to be public API.
|
646
|
+
attr_reader :type
|
647
|
+
|
368
648
|
# The message associated with this warning.
|
369
649
|
attr_reader :message
|
370
650
|
|
@@ -375,7 +655,8 @@ module Prism
|
|
375
655
|
attr_reader :level
|
376
656
|
|
377
657
|
# Create a new warning object with the given message and location.
|
378
|
-
def initialize(message, location, level)
|
658
|
+
def initialize(type, message, location, level)
|
659
|
+
@type = type
|
379
660
|
@message = message
|
380
661
|
@location = location
|
381
662
|
@level = level
|
@@ -383,24 +664,19 @@ module Prism
|
|
383
664
|
|
384
665
|
# Implement the hash pattern matching interface for ParseWarning.
|
385
666
|
def deconstruct_keys(keys)
|
386
|
-
{ message: message, location: location, level: level }
|
667
|
+
{ type: type, message: message, location: location, level: level }
|
387
668
|
end
|
388
669
|
|
389
670
|
# Returns a string representation of this warning.
|
390
671
|
def inspect
|
391
|
-
"#<Prism::ParseWarning @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
|
672
|
+
"#<Prism::ParseWarning @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
|
392
673
|
end
|
393
674
|
end
|
394
675
|
|
395
676
|
# This represents the result of a call to ::parse or ::parse_file. It contains
|
396
|
-
# the
|
397
|
-
# encountered.
|
398
|
-
class
|
399
|
-
# The value that was generated by parsing. Normally this holds the AST, but
|
400
|
-
# it can sometimes how a list of tokens or other results passed back from
|
401
|
-
# the parser.
|
402
|
-
attr_reader :value
|
403
|
-
|
677
|
+
# the requested structure, any comments that were encounters, and any errors
|
678
|
+
# that were encountered.
|
679
|
+
class Result
|
404
680
|
# The list of comments that were encountered during parsing.
|
405
681
|
attr_reader :comments
|
406
682
|
|
@@ -421,9 +697,8 @@ module Prism
|
|
421
697
|
# A Source instance that represents the source code that was parsed.
|
422
698
|
attr_reader :source
|
423
699
|
|
424
|
-
# Create a new
|
425
|
-
def initialize(
|
426
|
-
@value = value
|
700
|
+
# Create a new result object with the given values.
|
701
|
+
def initialize(comments, magic_comments, data_loc, errors, warnings, source)
|
427
702
|
@comments = comments
|
428
703
|
@magic_comments = magic_comments
|
429
704
|
@data_loc = data_loc
|
@@ -432,9 +707,14 @@ module Prism
|
|
432
707
|
@source = source
|
433
708
|
end
|
434
709
|
|
435
|
-
# Implement the hash pattern matching interface for
|
710
|
+
# Implement the hash pattern matching interface for Result.
|
436
711
|
def deconstruct_keys(keys)
|
437
|
-
{
|
712
|
+
{ comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
|
713
|
+
end
|
714
|
+
|
715
|
+
# Returns the encoding of the source code that was parsed.
|
716
|
+
def encoding
|
717
|
+
source.encoding
|
438
718
|
end
|
439
719
|
|
440
720
|
# Returns true if there were no errors during parsing and false if there
|
@@ -448,6 +728,88 @@ module Prism
|
|
448
728
|
def failure?
|
449
729
|
!success?
|
450
730
|
end
|
731
|
+
|
732
|
+
# Create a code units cache for the given encoding.
|
733
|
+
def code_units_cache(encoding)
|
734
|
+
source.code_units_cache(encoding)
|
735
|
+
end
|
736
|
+
end
|
737
|
+
|
738
|
+
# This is a result specific to the `parse` and `parse_file` methods.
|
739
|
+
class ParseResult < Result
|
740
|
+
autoload :Comments, "prism/parse_result/comments"
|
741
|
+
autoload :Errors, "prism/parse_result/errors"
|
742
|
+
autoload :Newlines, "prism/parse_result/newlines"
|
743
|
+
|
744
|
+
private_constant :Comments
|
745
|
+
private_constant :Errors
|
746
|
+
private_constant :Newlines
|
747
|
+
|
748
|
+
# The syntax tree that was parsed from the source code.
|
749
|
+
attr_reader :value
|
750
|
+
|
751
|
+
# Create a new parse result object with the given values.
|
752
|
+
def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
|
753
|
+
@value = value
|
754
|
+
super(comments, magic_comments, data_loc, errors, warnings, source)
|
755
|
+
end
|
756
|
+
|
757
|
+
# Implement the hash pattern matching interface for ParseResult.
|
758
|
+
def deconstruct_keys(keys)
|
759
|
+
super.merge!(value: value)
|
760
|
+
end
|
761
|
+
|
762
|
+
# Attach the list of comments to their respective locations in the tree.
|
763
|
+
def attach_comments!
|
764
|
+
Comments.new(self).attach! # steep:ignore
|
765
|
+
end
|
766
|
+
|
767
|
+
# Walk the tree and mark nodes that are on a new line, loosely emulating
|
768
|
+
# the behavior of CRuby's `:line` tracepoint event.
|
769
|
+
def mark_newlines!
|
770
|
+
value.accept(Newlines.new(source.offsets.size)) # steep:ignore
|
771
|
+
end
|
772
|
+
|
773
|
+
# Returns a string representation of the syntax tree with the errors
|
774
|
+
# displayed inline.
|
775
|
+
def errors_format
|
776
|
+
Errors.new(self).format
|
777
|
+
end
|
778
|
+
end
|
779
|
+
|
780
|
+
# This is a result specific to the `lex` and `lex_file` methods.
|
781
|
+
class LexResult < Result
|
782
|
+
# The list of tokens that were parsed from the source code.
|
783
|
+
attr_reader :value
|
784
|
+
|
785
|
+
# Create a new lex result object with the given values.
|
786
|
+
def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
|
787
|
+
@value = value
|
788
|
+
super(comments, magic_comments, data_loc, errors, warnings, source)
|
789
|
+
end
|
790
|
+
|
791
|
+
# Implement the hash pattern matching interface for LexResult.
|
792
|
+
def deconstruct_keys(keys)
|
793
|
+
super.merge!(value: value)
|
794
|
+
end
|
795
|
+
end
|
796
|
+
|
797
|
+
# This is a result specific to the `parse_lex` and `parse_lex_file` methods.
|
798
|
+
class ParseLexResult < Result
|
799
|
+
# A tuple of the syntax tree and the list of tokens that were parsed from
|
800
|
+
# the source code.
|
801
|
+
attr_reader :value
|
802
|
+
|
803
|
+
# Create a new parse lex result object with the given values.
|
804
|
+
def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
|
805
|
+
@value = value
|
806
|
+
super(comments, magic_comments, data_loc, errors, warnings, source)
|
807
|
+
end
|
808
|
+
|
809
|
+
# Implement the hash pattern matching interface for ParseLexResult.
|
810
|
+
def deconstruct_keys(keys)
|
811
|
+
super.merge!(value: value)
|
812
|
+
end
|
451
813
|
end
|
452
814
|
|
453
815
|
# This represents a token from the Ruby source.
|
@@ -477,8 +839,9 @@ module Prism
|
|
477
839
|
|
478
840
|
# A Location object representing the location of this token in the source.
|
479
841
|
def location
|
480
|
-
|
481
|
-
|
842
|
+
location = @location
|
843
|
+
return location if location.is_a?(Location)
|
844
|
+
@location = Location.new(source, location >> 32, location & 0xFFFFFFFF)
|
482
845
|
end
|
483
846
|
|
484
847
|
# Implement the pretty print interface for Token.
|
@@ -498,9 +861,50 @@ module Prism
|
|
498
861
|
|
499
862
|
# Returns true if the given other token is equal to this token.
|
500
863
|
def ==(other)
|
501
|
-
|
864
|
+
Token === other &&
|
502
865
|
other.type == type &&
|
503
866
|
other.value == value
|
504
867
|
end
|
868
|
+
|
869
|
+
# Returns a string representation of this token.
|
870
|
+
def inspect
|
871
|
+
location
|
872
|
+
super
|
873
|
+
end
|
874
|
+
|
875
|
+
# Freeze this object and the objects it contains.
|
876
|
+
def deep_freeze
|
877
|
+
value.freeze
|
878
|
+
location.freeze
|
879
|
+
freeze
|
880
|
+
end
|
881
|
+
end
|
882
|
+
|
883
|
+
# This object is passed to the various Prism.* methods that accept the
|
884
|
+
# `scopes` option as an element of the list. It defines both the local
|
885
|
+
# variables visible at that scope as well as the forwarding parameters
|
886
|
+
# available at that scope.
|
887
|
+
class Scope
|
888
|
+
# The list of local variables that are defined in this scope. This should be
|
889
|
+
# defined as an array of symbols.
|
890
|
+
attr_reader :locals
|
891
|
+
|
892
|
+
# The list of local variables that are forwarded to the next scope. This
|
893
|
+
# should by defined as an array of symbols containing the specific values of
|
894
|
+
# :*, :**, :&, or :"...".
|
895
|
+
attr_reader :forwarding
|
896
|
+
|
897
|
+
# Create a new scope object with the given locals and forwarding.
|
898
|
+
def initialize(locals, forwarding)
|
899
|
+
@locals = locals
|
900
|
+
@forwarding = forwarding
|
901
|
+
end
|
902
|
+
end
|
903
|
+
|
904
|
+
# Create a new scope with the given locals and forwarding options that is
|
905
|
+
# suitable for passing into one of the Prism.* methods that accepts the
|
906
|
+
# `scopes` option.
|
907
|
+
def self.scope(locals: [], forwarding: [])
|
908
|
+
Scope.new(locals, forwarding)
|
505
909
|
end
|
506
910
|
end
|