prism 1.1.0 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +39 -1
- data/Makefile +1 -1
- data/config.yml +422 -3
- data/docs/build_system.md +8 -11
- data/docs/relocation.md +34 -0
- data/ext/prism/api_node.c +18 -10
- data/ext/prism/extconf.rb +13 -36
- data/ext/prism/extension.c +68 -0
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +427 -3
- data/include/prism/defines.h +22 -7
- data/include/prism/diagnostic.h +1 -0
- data/include/prism/parser.h +25 -12
- data/include/prism/version.h +2 -2
- data/include/prism.h +47 -0
- data/lib/prism/dot_visitor.rb +10 -0
- data/lib/prism/dsl.rb +4 -4
- data/lib/prism/ffi.rb +49 -2
- data/lib/prism/inspect_visitor.rb +2 -0
- data/lib/prism/node.rb +1839 -96
- data/lib/prism/parse_result/errors.rb +1 -1
- data/lib/prism/parse_result.rb +140 -3
- data/lib/prism/reflection.rb +2 -2
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +17 -5
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/compiler.rb +36 -26
- data/lib/prism/translation/parser.rb +3 -3
- data/lib/prism/translation/ripper.rb +1 -5
- data/lib/prism/translation/ruby_parser.rb +14 -5
- data/lib/prism.rb +6 -4
- data/prism.gemspec +7 -1
- data/rbi/prism/dsl.rbi +4 -4
- data/rbi/prism/node.rbi +5118 -1030
- data/rbi/prism/parse_result.rbi +29 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism.rbi +34 -34
- data/sig/prism/dsl.rbs +2 -2
- data/sig/prism/node.rbs +13 -98
- data/sig/prism/parse_result.rbs +20 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/string_query.rbs +11 -0
- data/src/diagnostic.c +3 -1
- data/src/node.c +18 -0
- data/src/prettyprint.c +32 -0
- data/src/prism.c +586 -195
- data/src/regexp.c +7 -3
- data/src/serialize.c +12 -0
- data/src/static_literals.c +1 -1
- data/src/util/pm_char.c +1 -1
- data/src/util/pm_string.c +1 -0
- metadata +9 -3
@@ -17,7 +17,7 @@ module Prism
|
|
17
17
|
|
18
18
|
# Formats the errors in a human-readable way and return them as a string.
|
19
19
|
def format
|
20
|
-
error_lines = {}
|
20
|
+
error_lines = {} #: Hash[Integer, Array[ParseError]]
|
21
21
|
parse_result.errors.each do |error|
|
22
22
|
location = error.location
|
23
23
|
(location.start_line..location.end_line).each do |line|
|
data/lib/prism/parse_result.rb
CHANGED
@@ -12,6 +12,21 @@ module Prism
|
|
12
12
|
def self.for(source, start_line = 1, offsets = [])
|
13
13
|
if source.ascii_only?
|
14
14
|
ASCIISource.new(source, start_line, offsets)
|
15
|
+
elsif source.encoding == Encoding::BINARY
|
16
|
+
source.force_encoding(Encoding::UTF_8)
|
17
|
+
|
18
|
+
if source.valid_encoding?
|
19
|
+
new(source, start_line, offsets)
|
20
|
+
else
|
21
|
+
# This is an extremely niche use case where the file is marked as
|
22
|
+
# binary, contains multi-byte characters, and those characters are not
|
23
|
+
# valid UTF-8. In this case we'll mark it as binary and fall back to
|
24
|
+
# treating everything as a single-byte character. This _may_ cause
|
25
|
+
# problems when asking for code units, but it appears to be the
|
26
|
+
# cleanest solution at the moment.
|
27
|
+
source.force_encoding(Encoding::BINARY)
|
28
|
+
ASCIISource.new(source, start_line, offsets)
|
29
|
+
end
|
15
30
|
else
|
16
31
|
new(source, start_line, offsets)
|
17
32
|
end
|
@@ -89,8 +104,14 @@ module Prism
|
|
89
104
|
# This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
|
90
105
|
# concept of code units that differs from the number of characters in other
|
91
106
|
# encodings, it is not captured here.
|
107
|
+
#
|
108
|
+
# We purposefully replace invalid and undefined characters with replacement
|
109
|
+
# characters in this conversion. This happens for two reasons. First, it's
|
110
|
+
# possible that the given byte offset will not occur on a character
|
111
|
+
# boundary. Second, it's possible that the source code will contain a
|
112
|
+
# character that has no equivalent in the given encoding.
|
92
113
|
def code_units_offset(byte_offset, encoding)
|
93
|
-
byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding)
|
114
|
+
byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
|
94
115
|
|
95
116
|
if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
|
96
117
|
byteslice.bytesize / 2
|
@@ -99,6 +120,12 @@ module Prism
|
|
99
120
|
end
|
100
121
|
end
|
101
122
|
|
123
|
+
# Generate a cache that targets a specific encoding for calculating code
|
124
|
+
# unit offsets.
|
125
|
+
def code_units_cache(encoding)
|
126
|
+
CodeUnitsCache.new(source, encoding)
|
127
|
+
end
|
128
|
+
|
102
129
|
# Returns the column number in code units for the given encoding for the
|
103
130
|
# given byte offset.
|
104
131
|
def code_units_column(byte_offset, encoding)
|
@@ -128,10 +155,84 @@ module Prism
|
|
128
155
|
end
|
129
156
|
end
|
130
157
|
|
158
|
+
# A cache that can be used to quickly compute code unit offsets from byte
|
159
|
+
# offsets. It purposefully provides only a single #[] method to access the
|
160
|
+
# cache in order to minimize surface area.
|
161
|
+
#
|
162
|
+
# Note that there are some known issues here that may or may not be addressed
|
163
|
+
# in the future:
|
164
|
+
#
|
165
|
+
# * The first is that there are issues when the cache computes values that are
|
166
|
+
# not on character boundaries. This can result in subsequent computations
|
167
|
+
# being off by one or more code units.
|
168
|
+
# * The second is that this cache is currently unbounded. In theory we could
|
169
|
+
# introduce some kind of LRU cache to limit the number of entries, but this
|
170
|
+
# has not yet been implemented.
|
171
|
+
#
|
172
|
+
class CodeUnitsCache
|
173
|
+
class UTF16Counter # :nodoc:
|
174
|
+
def initialize(source, encoding)
|
175
|
+
@source = source
|
176
|
+
@encoding = encoding
|
177
|
+
end
|
178
|
+
|
179
|
+
def count(byte_offset, byte_length)
|
180
|
+
@source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
class LengthCounter # :nodoc:
|
185
|
+
def initialize(source, encoding)
|
186
|
+
@source = source
|
187
|
+
@encoding = encoding
|
188
|
+
end
|
189
|
+
|
190
|
+
def count(byte_offset, byte_length)
|
191
|
+
@source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).length
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
private_constant :UTF16Counter, :LengthCounter
|
196
|
+
|
197
|
+
# Initialize a new cache with the given source and encoding.
|
198
|
+
def initialize(source, encoding)
|
199
|
+
@source = source
|
200
|
+
@counter =
|
201
|
+
if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
|
202
|
+
UTF16Counter.new(source, encoding)
|
203
|
+
else
|
204
|
+
LengthCounter.new(source, encoding)
|
205
|
+
end
|
206
|
+
|
207
|
+
@cache = {} #: Hash[Integer, Integer]
|
208
|
+
@offsets = [] #: Array[Integer]
|
209
|
+
end
|
210
|
+
|
211
|
+
# Retrieve the code units offset from the given byte offset.
|
212
|
+
def [](byte_offset)
|
213
|
+
@cache[byte_offset] ||=
|
214
|
+
if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil?
|
215
|
+
@offsets << byte_offset
|
216
|
+
@counter.count(0, byte_offset)
|
217
|
+
elsif index == 0
|
218
|
+
@offsets.unshift(byte_offset)
|
219
|
+
@counter.count(0, byte_offset)
|
220
|
+
else
|
221
|
+
@offsets.insert(index, byte_offset)
|
222
|
+
offset = @offsets[index - 1]
|
223
|
+
@cache[offset] + @counter.count(offset, byte_offset - offset)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
131
228
|
# Specialized version of Prism::Source for source code that includes ASCII
|
132
229
|
# characters only. This class is used to apply performance optimizations that
|
133
|
-
# cannot be applied to sources that include multibyte characters.
|
134
|
-
#
|
230
|
+
# cannot be applied to sources that include multibyte characters.
|
231
|
+
#
|
232
|
+
# In the extremely rare case that a source includes multi-byte characters but
|
233
|
+
# is marked as binary because of a magic encoding comment and it cannot be
|
234
|
+
# eagerly converted to UTF-8, this class will be used as well. This is because
|
235
|
+
# at that point we will treat everything as single-byte characters.
|
135
236
|
class ASCIISource < Source
|
136
237
|
# Return the character offset for the given byte offset.
|
137
238
|
def character_offset(byte_offset)
|
@@ -153,6 +254,13 @@ module Prism
|
|
153
254
|
byte_offset
|
154
255
|
end
|
155
256
|
|
257
|
+
# Returns a cache that is the identity function in order to maintain the
|
258
|
+
# same interface. We can do this because code units are always equivalent to
|
259
|
+
# byte offsets for ASCII-only sources.
|
260
|
+
def code_units_cache(encoding)
|
261
|
+
->(byte_offset) { byte_offset }
|
262
|
+
end
|
263
|
+
|
156
264
|
# Specialized version of `code_units_column` that does not depend on
|
157
265
|
# `code_units_offset`, which is a more expensive operation. This is
|
158
266
|
# essentially the same as `Prism::Source#column`.
|
@@ -262,6 +370,12 @@ module Prism
|
|
262
370
|
source.code_units_offset(start_offset, encoding)
|
263
371
|
end
|
264
372
|
|
373
|
+
# The start offset from the start of the file in code units using the given
|
374
|
+
# cache to fetch or calculate the value.
|
375
|
+
def cached_start_code_units_offset(cache)
|
376
|
+
cache[start_offset]
|
377
|
+
end
|
378
|
+
|
265
379
|
# The byte offset from the beginning of the source where this location ends.
|
266
380
|
def end_offset
|
267
381
|
start_offset + length
|
@@ -278,6 +392,12 @@ module Prism
|
|
278
392
|
source.code_units_offset(end_offset, encoding)
|
279
393
|
end
|
280
394
|
|
395
|
+
# The end offset from the start of the file in code units using the given
|
396
|
+
# cache to fetch or calculate the value.
|
397
|
+
def cached_end_code_units_offset(cache)
|
398
|
+
cache[end_offset]
|
399
|
+
end
|
400
|
+
|
281
401
|
# The line number where this location starts.
|
282
402
|
def start_line
|
283
403
|
source.line(start_offset)
|
@@ -312,6 +432,12 @@ module Prism
|
|
312
432
|
source.code_units_column(start_offset, encoding)
|
313
433
|
end
|
314
434
|
|
435
|
+
# The start column in code units using the given cache to fetch or calculate
|
436
|
+
# the value.
|
437
|
+
def cached_start_code_units_column(cache)
|
438
|
+
cache[start_offset] - cache[source.line_start(start_offset)]
|
439
|
+
end
|
440
|
+
|
315
441
|
# The column number in bytes where this location ends from the start of the
|
316
442
|
# line.
|
317
443
|
def end_column
|
@@ -330,6 +456,12 @@ module Prism
|
|
330
456
|
source.code_units_column(end_offset, encoding)
|
331
457
|
end
|
332
458
|
|
459
|
+
# The end column in code units using the given cache to fetch or calculate
|
460
|
+
# the value.
|
461
|
+
def cached_end_code_units_column(cache)
|
462
|
+
cache[end_offset] - cache[source.line_start(end_offset)]
|
463
|
+
end
|
464
|
+
|
333
465
|
# Implement the hash pattern matching interface for Location.
|
334
466
|
def deconstruct_keys(keys)
|
335
467
|
{ start_offset: start_offset, end_offset: end_offset }
|
@@ -579,6 +711,11 @@ module Prism
|
|
579
711
|
def failure?
|
580
712
|
!success?
|
581
713
|
end
|
714
|
+
|
715
|
+
# Create a code units cache for the given encoding.
|
716
|
+
def code_units_cache(encoding)
|
717
|
+
source.code_units_cache(encoding)
|
718
|
+
end
|
582
719
|
end
|
583
720
|
|
584
721
|
# This is a result specific to the `parse` and `parse_file` methods.
|
data/lib/prism/reflection.rb
CHANGED
@@ -396,11 +396,11 @@ module Prism
|
|
396
396
|
when :unless_node
|
397
397
|
[LocationField.new(:keyword_loc), NodeField.new(:predicate), OptionalLocationField.new(:then_keyword_loc), OptionalNodeField.new(:statements), OptionalNodeField.new(:else_clause), OptionalLocationField.new(:end_keyword_loc)]
|
398
398
|
when :until_node
|
399
|
-
[FlagsField.new(:flags, [:begin_modifier?]), LocationField.new(:keyword_loc), OptionalLocationField.new(:closing_loc), NodeField.new(:predicate), OptionalNodeField.new(:statements)]
|
399
|
+
[FlagsField.new(:flags, [:begin_modifier?]), LocationField.new(:keyword_loc), OptionalLocationField.new(:do_keyword_loc), OptionalLocationField.new(:closing_loc), NodeField.new(:predicate), OptionalNodeField.new(:statements)]
|
400
400
|
when :when_node
|
401
401
|
[LocationField.new(:keyword_loc), NodeListField.new(:conditions), OptionalLocationField.new(:then_keyword_loc), OptionalNodeField.new(:statements)]
|
402
402
|
when :while_node
|
403
|
-
[FlagsField.new(:flags, [:begin_modifier?]), LocationField.new(:keyword_loc), OptionalLocationField.new(:closing_loc), NodeField.new(:predicate), OptionalNodeField.new(:statements)]
|
403
|
+
[FlagsField.new(:flags, [:begin_modifier?]), LocationField.new(:keyword_loc), OptionalLocationField.new(:do_keyword_loc), OptionalLocationField.new(:closing_loc), NodeField.new(:predicate), OptionalNodeField.new(:statements)]
|
404
404
|
when :x_string_node
|
405
405
|
[FlagsField.new(:flags, [:forced_utf8_encoding?, :forced_binary_encoding?]), LocationField.new(:opening_loc), LocationField.new(:content_loc), LocationField.new(:closing_loc), StringField.new(:unescaped)]
|
406
406
|
when :yield_node
|