jruby-prism-parser 0.24.0-java → 1.4.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +269 -1
  4. data/CONTRIBUTING.md +0 -4
  5. data/Makefile +25 -18
  6. data/README.md +57 -6
  7. data/config.yml +1724 -140
  8. data/docs/build_system.md +39 -11
  9. data/docs/configuration.md +4 -0
  10. data/docs/cruby_compilation.md +1 -1
  11. data/docs/fuzzing.md +1 -1
  12. data/docs/parser_translation.md +14 -9
  13. data/docs/parsing_rules.md +4 -1
  14. data/docs/releasing.md +8 -10
  15. data/docs/relocation.md +34 -0
  16. data/docs/ripper_translation.md +72 -0
  17. data/docs/ruby_api.md +2 -1
  18. data/docs/serialization.md +29 -5
  19. data/ext/prism/api_node.c +3395 -1999
  20. data/ext/prism/api_pack.c +9 -0
  21. data/ext/prism/extconf.rb +55 -34
  22. data/ext/prism/extension.c +597 -346
  23. data/ext/prism/extension.h +6 -5
  24. data/include/prism/ast.h +2612 -455
  25. data/include/prism/defines.h +160 -2
  26. data/include/prism/diagnostic.h +188 -76
  27. data/include/prism/encoding.h +22 -4
  28. data/include/prism/node.h +89 -17
  29. data/include/prism/options.h +224 -12
  30. data/include/prism/pack.h +11 -0
  31. data/include/prism/parser.h +267 -66
  32. data/include/prism/prettyprint.h +8 -0
  33. data/include/prism/regexp.h +18 -8
  34. data/include/prism/static_literals.h +121 -0
  35. data/include/prism/util/pm_buffer.h +75 -2
  36. data/include/prism/util/pm_char.h +1 -2
  37. data/include/prism/util/pm_constant_pool.h +18 -9
  38. data/include/prism/util/pm_integer.h +126 -0
  39. data/include/prism/util/pm_list.h +1 -1
  40. data/include/prism/util/pm_newline_list.h +19 -0
  41. data/include/prism/util/pm_string.h +48 -8
  42. data/include/prism/version.h +3 -3
  43. data/include/prism.h +99 -5
  44. data/jruby-prism.jar +0 -0
  45. data/lib/prism/compiler.rb +11 -1
  46. data/lib/prism/desugar_compiler.rb +113 -74
  47. data/lib/prism/dispatcher.rb +45 -1
  48. data/lib/prism/dot_visitor.rb +201 -77
  49. data/lib/prism/dsl.rb +673 -461
  50. data/lib/prism/ffi.rb +233 -45
  51. data/lib/prism/inspect_visitor.rb +2389 -0
  52. data/lib/prism/lex_compat.rb +35 -16
  53. data/lib/prism/mutation_compiler.rb +24 -8
  54. data/lib/prism/node.rb +7731 -8460
  55. data/lib/prism/node_ext.rb +328 -32
  56. data/lib/prism/pack.rb +4 -0
  57. data/lib/prism/parse_result/comments.rb +34 -24
  58. data/lib/prism/parse_result/errors.rb +65 -0
  59. data/lib/prism/parse_result/newlines.rb +102 -12
  60. data/lib/prism/parse_result.rb +448 -44
  61. data/lib/prism/pattern.rb +28 -10
  62. data/lib/prism/polyfill/append_as_bytes.rb +15 -0
  63. data/lib/prism/polyfill/byteindex.rb +13 -0
  64. data/lib/prism/polyfill/unpack1.rb +14 -0
  65. data/lib/prism/reflection.rb +413 -0
  66. data/lib/prism/relocation.rb +504 -0
  67. data/lib/prism/serialize.rb +1940 -1198
  68. data/lib/prism/string_query.rb +30 -0
  69. data/lib/prism/translation/parser/builder.rb +61 -0
  70. data/lib/prism/translation/parser/compiler.rb +569 -195
  71. data/lib/prism/translation/parser/lexer.rb +516 -39
  72. data/lib/prism/translation/parser.rb +177 -12
  73. data/lib/prism/translation/parser33.rb +1 -1
  74. data/lib/prism/translation/parser34.rb +1 -1
  75. data/lib/prism/translation/parser35.rb +12 -0
  76. data/lib/prism/translation/ripper/sexp.rb +125 -0
  77. data/lib/prism/translation/ripper/shim.rb +5 -0
  78. data/lib/prism/translation/ripper.rb +3224 -462
  79. data/lib/prism/translation/ruby_parser.rb +194 -69
  80. data/lib/prism/translation.rb +4 -1
  81. data/lib/prism/version.rb +1 -1
  82. data/lib/prism/visitor.rb +13 -0
  83. data/lib/prism.rb +17 -27
  84. data/prism.gemspec +57 -17
  85. data/rbi/prism/compiler.rbi +12 -0
  86. data/rbi/prism/dsl.rbi +524 -0
  87. data/rbi/prism/inspect_visitor.rbi +12 -0
  88. data/rbi/prism/node.rbi +8722 -0
  89. data/rbi/prism/node_ext.rbi +107 -0
  90. data/rbi/prism/parse_result.rbi +404 -0
  91. data/rbi/prism/reflection.rbi +58 -0
  92. data/rbi/prism/string_query.rbi +12 -0
  93. data/rbi/prism/translation/parser.rbi +11 -0
  94. data/rbi/prism/translation/parser33.rbi +6 -0
  95. data/rbi/prism/translation/parser34.rbi +6 -0
  96. data/rbi/prism/translation/parser35.rbi +6 -0
  97. data/rbi/prism/translation/ripper.rbi +15 -0
  98. data/rbi/prism/visitor.rbi +473 -0
  99. data/rbi/prism.rbi +44 -7745
  100. data/sig/prism/compiler.rbs +9 -0
  101. data/sig/prism/dispatcher.rbs +16 -0
  102. data/sig/prism/dot_visitor.rbs +6 -0
  103. data/sig/prism/dsl.rbs +351 -0
  104. data/sig/prism/inspect_visitor.rbs +22 -0
  105. data/sig/prism/lex_compat.rbs +10 -0
  106. data/sig/prism/mutation_compiler.rbs +159 -0
  107. data/sig/prism/node.rbs +3614 -0
  108. data/sig/prism/node_ext.rbs +82 -0
  109. data/sig/prism/pack.rbs +43 -0
  110. data/sig/prism/parse_result.rbs +192 -0
  111. data/sig/prism/pattern.rbs +13 -0
  112. data/sig/prism/reflection.rbs +50 -0
  113. data/sig/prism/relocation.rbs +185 -0
  114. data/sig/prism/serialize.rbs +8 -0
  115. data/sig/prism/string_query.rbs +11 -0
  116. data/sig/prism/visitor.rbs +169 -0
  117. data/sig/prism.rbs +248 -4767
  118. data/src/diagnostic.c +672 -230
  119. data/src/encoding.c +211 -108
  120. data/src/node.c +7541 -1653
  121. data/src/options.c +135 -20
  122. data/src/pack.c +33 -17
  123. data/src/prettyprint.c +1543 -1485
  124. data/src/prism.c +7813 -3050
  125. data/src/regexp.c +225 -73
  126. data/src/serialize.c +101 -77
  127. data/src/static_literals.c +617 -0
  128. data/src/token_type.c +14 -13
  129. data/src/util/pm_buffer.c +187 -20
  130. data/src/util/pm_char.c +5 -5
  131. data/src/util/pm_constant_pool.c +39 -19
  132. data/src/util/pm_integer.c +670 -0
  133. data/src/util/pm_list.c +1 -1
  134. data/src/util/pm_newline_list.c +43 -5
  135. data/src/util/pm_string.c +213 -33
  136. data/src/util/pm_strncasecmp.c +13 -1
  137. data/src/util/pm_strpbrk.c +32 -6
  138. metadata +55 -19
  139. data/docs/ripper.md +0 -36
  140. data/include/prism/util/pm_state_stack.h +0 -42
  141. data/include/prism/util/pm_string_list.h +0 -44
  142. data/lib/prism/debug.rb +0 -206
  143. data/lib/prism/node_inspector.rb +0 -68
  144. data/lib/prism/translation/parser/rubocop.rb +0 -45
  145. data/rbi/prism_static.rbi +0 -207
  146. data/sig/prism_static.rbs +0 -201
  147. data/src/util/pm_state_stack.c +0 -25
  148. data/src/util/pm_string_list.c +0 -28
@@ -5,6 +5,33 @@ module Prism
5
5
  # conjunction with locations to allow them to resolve line numbers and source
6
6
  # ranges.
7
7
  class Source
8
+ # Create a new source object with the given source code. This method should
9
+ # be used instead of `new` and it will return either a `Source` or a
10
+ # specialized and more performant `ASCIISource` if no multibyte characters
11
+ # are present in the source code.
12
+ def self.for(source, start_line = 1, offsets = [])
13
+ if source.ascii_only?
14
+ ASCIISource.new(source, start_line, offsets)
15
+ elsif source.encoding == Encoding::BINARY
16
+ source.force_encoding(Encoding::UTF_8)
17
+
18
+ if source.valid_encoding?
19
+ new(source, start_line, offsets)
20
+ else
21
+ # This is an extremely niche use case where the file is marked as
22
+ # binary, contains multi-byte characters, and those characters are not
23
+ # valid UTF-8. In this case we'll mark it as binary and fall back to
24
+ # treating everything as a single-byte character. This _may_ cause
25
+ # problems when asking for code units, but it appears to be the
26
+ # cleanest solution at the moment.
27
+ source.force_encoding(Encoding::BINARY)
28
+ ASCIISource.new(source, start_line, offsets)
29
+ end
30
+ else
31
+ new(source, start_line, offsets)
32
+ end
33
+ end
34
+
8
35
  # The source code that this source object represents.
9
36
  attr_reader :source
10
37
 
@@ -21,10 +48,31 @@ module Prism
21
48
  @offsets = offsets # set after parsing is done
22
49
  end
23
50
 
51
+ # Replace the value of start_line with the given value.
52
+ def replace_start_line(start_line)
53
+ @start_line = start_line
54
+ end
55
+
56
+ # Replace the value of offsets with the given value.
57
+ def replace_offsets(offsets)
58
+ @offsets.replace(offsets)
59
+ end
60
+
61
+ # Returns the encoding of the source code, which is set by parameters to the
62
+ # parser or by the encoding magic comment.
63
+ def encoding
64
+ source.encoding
65
+ end
66
+
67
+ # Returns the lines of the source code as an array of strings.
68
+ def lines
69
+ source.lines
70
+ end
71
+
24
72
  # Perform a byteslice on the source code using the given byte offset and
25
73
  # byte length.
26
74
  def slice(byte_offset, length)
27
- source.byteslice(byte_offset, length)
75
+ source.byteslice(byte_offset, length) or raise
28
76
  end
29
77
 
30
78
  # Binary search through the offsets to find the line number for the given
@@ -39,6 +87,12 @@ module Prism
39
87
  offsets[find_line(byte_offset)]
40
88
  end
41
89
 
90
+ # Returns the byte offset of the end of the line corresponding to the given
91
+ # byte offset.
92
+ def line_end(byte_offset)
93
+ offsets[find_line(byte_offset) + 1] || source.bytesize
94
+ end
95
+
42
96
  # Return the column number for the given byte offset.
43
97
  def column(byte_offset)
44
98
  byte_offset - line_start(byte_offset)
@@ -46,7 +100,7 @@ module Prism
46
100
 
47
101
  # Return the character offset for the given byte offset.
48
102
  def character_offset(byte_offset)
49
- source.byteslice(0, byte_offset).length
103
+ (source.byteslice(0, byte_offset) or raise).length
50
104
  end
51
105
 
52
106
  # Return the column number in characters for the given byte offset.
@@ -60,9 +114,26 @@ module Prism
60
114
  # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
61
115
  # concept of code units that differs from the number of characters in other
62
116
  # encodings, it is not captured here.
117
+ #
118
+ # We purposefully replace invalid and undefined characters with replacement
119
+ # characters in this conversion. This happens for two reasons. First, it's
120
+ # possible that the given byte offset will not occur on a character
121
+ # boundary. Second, it's possible that the source code will contain a
122
+ # character that has no equivalent in the given encoding.
63
123
  def code_units_offset(byte_offset, encoding)
64
- byteslice = source.byteslice(0, byte_offset).encode(encoding)
65
- (encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length
124
+ byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
125
+
126
+ if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
127
+ byteslice.bytesize / 2
128
+ else
129
+ byteslice.length
130
+ end
131
+ end
132
+
133
+ # Generate a cache that targets a specific encoding for calculating code
134
+ # unit offsets.
135
+ def code_units_cache(encoding)
136
+ CodeUnitsCache.new(source, encoding)
66
137
  end
67
138
 
68
139
  # Returns the column number in code units for the given encoding for the
@@ -71,6 +142,13 @@ module Prism
71
142
  code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding)
72
143
  end
73
144
 
145
+ # Freeze this object and the objects it contains.
146
+ def deep_freeze
147
+ source.freeze
148
+ offsets.freeze
149
+ freeze
150
+ end
151
+
74
152
  private
75
153
 
76
154
  # Binary search through the offsets to find the line number for the given
@@ -81,9 +159,9 @@ module Prism
81
159
 
82
160
  while left <= right
83
161
  mid = left + (right - left) / 2
84
- return mid if offsets[mid] == byte_offset
162
+ return mid if (offset = offsets[mid]) == byte_offset
85
163
 
86
- if offsets[mid] < byte_offset
164
+ if offset < byte_offset
87
165
  left = mid + 1
88
166
  else
89
167
  right = mid - 1
@@ -94,6 +172,120 @@ module Prism
94
172
  end
95
173
  end
96
174
 
175
+ # A cache that can be used to quickly compute code unit offsets from byte
176
+ # offsets. It purposefully provides only a single #[] method to access the
177
+ # cache in order to minimize surface area.
178
+ #
179
+ # Note that there are some known issues here that may or may not be addressed
180
+ # in the future:
181
+ #
182
+ # * The first is that there are issues when the cache computes values that are
183
+ # not on character boundaries. This can result in subsequent computations
184
+ # being off by one or more code units.
185
+ # * The second is that this cache is currently unbounded. In theory we could
186
+ # introduce some kind of LRU cache to limit the number of entries, but this
187
+ # has not yet been implemented.
188
+ #
189
+ class CodeUnitsCache
190
+ class UTF16Counter # :nodoc:
191
+ def initialize(source, encoding)
192
+ @source = source
193
+ @encoding = encoding
194
+ end
195
+
196
+ def count(byte_offset, byte_length)
197
+ @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
198
+ end
199
+ end
200
+
201
+ class LengthCounter # :nodoc:
202
+ def initialize(source, encoding)
203
+ @source = source
204
+ @encoding = encoding
205
+ end
206
+
207
+ def count(byte_offset, byte_length)
208
+ @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).length
209
+ end
210
+ end
211
+
212
+ private_constant :UTF16Counter, :LengthCounter
213
+
214
+ # Initialize a new cache with the given source and encoding.
215
+ def initialize(source, encoding)
216
+ @source = source
217
+ @counter =
218
+ if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
219
+ UTF16Counter.new(source, encoding)
220
+ else
221
+ LengthCounter.new(source, encoding)
222
+ end
223
+
224
+ @cache = {} #: Hash[Integer, Integer]
225
+ @offsets = [] #: Array[Integer]
226
+ end
227
+
228
+ # Retrieve the code units offset from the given byte offset.
229
+ def [](byte_offset)
230
+ @cache[byte_offset] ||=
231
+ if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil?
232
+ @offsets << byte_offset
233
+ @counter.count(0, byte_offset)
234
+ elsif index == 0
235
+ @offsets.unshift(byte_offset)
236
+ @counter.count(0, byte_offset)
237
+ else
238
+ @offsets.insert(index, byte_offset)
239
+ offset = @offsets[index - 1]
240
+ @cache[offset] + @counter.count(offset, byte_offset - offset)
241
+ end
242
+ end
243
+ end
244
+
245
+ # Specialized version of Prism::Source for source code that includes ASCII
246
+ # characters only. This class is used to apply performance optimizations that
247
+ # cannot be applied to sources that include multibyte characters.
248
+ #
249
+ # In the extremely rare case that a source includes multi-byte characters but
250
+ # is marked as binary because of a magic encoding comment and it cannot be
251
+ # eagerly converted to UTF-8, this class will be used as well. This is because
252
+ # at that point we will treat everything as single-byte characters.
253
+ class ASCIISource < Source
254
+ # Return the character offset for the given byte offset.
255
+ def character_offset(byte_offset)
256
+ byte_offset
257
+ end
258
+
259
+ # Return the column number in characters for the given byte offset.
260
+ def character_column(byte_offset)
261
+ byte_offset - line_start(byte_offset)
262
+ end
263
+
264
+ # Returns the offset from the start of the file for the given byte offset
265
+ # counting in code units for the given encoding.
266
+ #
267
+ # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
268
+ # concept of code units that differs from the number of characters in other
269
+ # encodings, it is not captured here.
270
+ def code_units_offset(byte_offset, encoding)
271
+ byte_offset
272
+ end
273
+
274
+ # Returns a cache that is the identity function in order to maintain the
275
+ # same interface. We can do this because code units are always equivalent to
276
+ # byte offsets for ASCII-only sources.
277
+ def code_units_cache(encoding)
278
+ ->(byte_offset) { byte_offset }
279
+ end
280
+
281
+ # Specialized version of `code_units_column` that does not depend on
282
+ # `code_units_offset`, which is a more expensive operation. This is
283
+ # essentially the same as `Prism::Source#column`.
284
+ def code_units_column(byte_offset, encoding)
285
+ byte_offset - line_start(byte_offset)
286
+ end
287
+ end
288
+
97
289
  # This represents a location in the source.
98
290
  class Location
99
291
  # A Source object that is used to determine more information from the given
@@ -108,25 +300,56 @@ module Prism
108
300
  # The length of this location in bytes.
109
301
  attr_reader :length
110
302
 
111
- # The list of comments attached to this location
112
- attr_reader :comments
113
-
114
303
  # Create a new location object with the given source, start byte offset, and
115
304
  # byte length.
116
305
  def initialize(source, start_offset, length)
117
306
  @source = source
118
307
  @start_offset = start_offset
119
308
  @length = length
120
- @comments = []
309
+
310
+ # These are used to store comments that are associated with this location.
311
+ # They are initialized to `nil` to save on memory when there are no
312
+ # comments to be attached and/or the comment-related APIs are not used.
313
+ @leading_comments = nil
314
+ @trailing_comments = nil
315
+ end
316
+
317
+ # These are the comments that are associated with this location that exist
318
+ # before the start of this location.
319
+ def leading_comments
320
+ @leading_comments ||= []
321
+ end
322
+
323
+ # Attach a comment to the leading comments of this location.
324
+ def leading_comment(comment)
325
+ leading_comments << comment
326
+ end
327
+
328
+ # These are the comments that are associated with this location that exist
329
+ # after the end of this location.
330
+ def trailing_comments
331
+ @trailing_comments ||= []
332
+ end
333
+
334
+ # Attach a comment to the trailing comments of this location.
335
+ def trailing_comment(comment)
336
+ trailing_comments << comment
337
+ end
338
+
339
+ # Returns all comments that are associated with this location (both leading
340
+ # and trailing comments).
341
+ def comments
342
+ [*@leading_comments, *@trailing_comments]
121
343
  end
122
344
 
123
345
  # Create a new location object with the given options.
124
- def copy(**options)
125
- Location.new(
126
- options.fetch(:source) { source },
127
- options.fetch(:start_offset) { start_offset },
128
- options.fetch(:length) { length }
129
- )
346
+ def copy(source: self.source, start_offset: self.start_offset, length: self.length)
347
+ Location.new(source, start_offset, length)
348
+ end
349
+
350
+ # Returns a new location that is the result of chopping off the last byte.
351
+ def chop
352
+ copy(length: length == 0 ? length : length - 1)
130
353
  end
131
354
 
132
355
  # Returns a string representation of this location.
@@ -134,11 +357,25 @@ module Prism
134
357
  "#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>"
135
358
  end
136
359
 
360
+ # Returns all of the lines of the source code associated with this location.
361
+ def source_lines
362
+ source.lines
363
+ end
364
+
137
365
  # The source code that this location represents.
138
366
  def slice
139
367
  source.slice(start_offset, length)
140
368
  end
141
369
 
370
+ # The source code that this location represents starting from the beginning
371
+ # of the line that this location starts on to the end of the line that this
372
+ # location ends on.
373
+ def slice_lines
374
+ line_start = source.line_start(start_offset)
375
+ line_end = source.line_end(end_offset)
376
+ source.slice(line_start, line_end - line_start)
377
+ end
378
+
142
379
  # The character offset from the beginning of the source where this location
143
380
  # starts.
144
381
  def start_character_offset
@@ -150,6 +387,12 @@ module Prism
150
387
  source.code_units_offset(start_offset, encoding)
151
388
  end
152
389
 
390
+ # The start offset from the start of the file in code units using the given
391
+ # cache to fetch or calculate the value.
392
+ def cached_start_code_units_offset(cache)
393
+ cache[start_offset]
394
+ end
395
+
153
396
  # The byte offset from the beginning of the source where this location ends.
154
397
  def end_offset
155
398
  start_offset + length
@@ -166,6 +409,12 @@ module Prism
166
409
  source.code_units_offset(end_offset, encoding)
167
410
  end
168
411
 
412
+ # The end offset from the start of the file in code units using the given
413
+ # cache to fetch or calculate the value.
414
+ def cached_end_code_units_offset(cache)
415
+ cache[end_offset]
416
+ end
417
+
169
418
  # The line number where this location starts.
170
419
  def start_line
171
420
  source.line(start_offset)
@@ -200,6 +449,12 @@ module Prism
200
449
  source.code_units_column(start_offset, encoding)
201
450
  end
202
451
 
452
+ # The start column in code units using the given cache to fetch or calculate
453
+ # the value.
454
+ def cached_start_code_units_column(cache)
455
+ cache[start_offset] - cache[source.line_start(start_offset)]
456
+ end
457
+
203
458
  # The column number in bytes where this location ends from the start of the
204
459
  # line.
205
460
  def end_column
@@ -218,6 +473,12 @@ module Prism
218
473
  source.code_units_column(end_offset, encoding)
219
474
  end
220
475
 
476
+ # The end column in code units using the given cache to fetch or calculate
477
+ # the value.
478
+ def cached_end_code_units_column(cache)
479
+ cache[end_offset] - cache[source.line_start(end_offset)]
480
+ end
481
+
221
482
  # Implement the hash pattern matching interface for Location.
222
483
  def deconstruct_keys(keys)
223
484
  { start_offset: start_offset, end_offset: end_offset }
@@ -230,7 +491,7 @@ module Prism
230
491
 
231
492
  # Returns true if the given other location is equal to this location.
232
493
  def ==(other)
233
- other.is_a?(Location) &&
494
+ Location === other &&
234
495
  other.start_offset == start_offset &&
235
496
  other.end_offset == end_offset
236
497
  end
@@ -245,11 +506,16 @@ module Prism
245
506
  Location.new(source, start_offset, other.end_offset - start_offset)
246
507
  end
247
508
 
248
- # Returns a null location that does not correspond to a source and points to
249
- # the beginning of the file. Useful for when you want a location object but
250
- # do not care where it points.
251
- def self.null
252
- new(nil, 0, 0)
509
+ # Join this location with the first occurrence of the string in the source
510
+ # that occurs after this location on the same line, and return the new
511
+ # location. This will raise an error if the string does not exist.
512
+ def adjoin(string)
513
+ line_suffix = source.slice(end_offset, source.line_end(end_offset) - end_offset)
514
+
515
+ line_suffix_index = line_suffix.byteindex(string)
516
+ raise "Could not find #{string}" if line_suffix_index.nil?
517
+
518
+ Location.new(source, start_offset, length + line_suffix_index + string.bytesize)
253
519
  end
254
520
  end
255
521
 
@@ -268,6 +534,11 @@ module Prism
268
534
  def deconstruct_keys(keys)
269
535
  { location: location }
270
536
  end
537
+
538
+ # Returns the content of the comment by slicing it from the source code.
539
+ def slice
540
+ location.slice
541
+ end
271
542
  end
272
543
 
273
544
  # InlineComment objects are the most common. They correspond to comments in
@@ -336,6 +607,10 @@ module Prism
336
607
 
337
608
  # This represents an error that was encountered during parsing.
338
609
  class ParseError
610
+ # The type of error. This is an _internal_ symbol that is used for
611
+ # communicating with translation layers. It is not meant to be public API.
612
+ attr_reader :type
613
+
339
614
  # The message associated with this error.
340
615
  attr_reader :message
341
616
 
@@ -346,7 +621,8 @@ module Prism
346
621
  attr_reader :level
347
622
 
348
623
  # Create a new error object with the given message and location.
349
- def initialize(message, location, level)
624
+ def initialize(type, message, location, level)
625
+ @type = type
350
626
  @message = message
351
627
  @location = location
352
628
  @level = level
@@ -354,17 +630,21 @@ module Prism
354
630
 
355
631
  # Implement the hash pattern matching interface for ParseError.
356
632
  def deconstruct_keys(keys)
357
- { message: message, location: location, level: level }
633
+ { type: type, message: message, location: location, level: level }
358
634
  end
359
635
 
360
636
  # Returns a string representation of this error.
361
637
  def inspect
362
- "#<Prism::ParseError @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
638
+ "#<Prism::ParseError @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
363
639
  end
364
640
  end
365
641
 
366
642
  # This represents a warning that was encountered during parsing.
367
643
  class ParseWarning
644
+ # The type of warning. This is an _internal_ symbol that is used for
645
+ # communicating with translation layers. It is not meant to be public API.
646
+ attr_reader :type
647
+
368
648
  # The message associated with this warning.
369
649
  attr_reader :message
370
650
 
@@ -375,7 +655,8 @@ module Prism
375
655
  attr_reader :level
376
656
 
377
657
  # Create a new warning object with the given message and location.
378
- def initialize(message, location, level)
658
+ def initialize(type, message, location, level)
659
+ @type = type
379
660
  @message = message
380
661
  @location = location
381
662
  @level = level
@@ -383,24 +664,19 @@ module Prism
383
664
 
384
665
  # Implement the hash pattern matching interface for ParseWarning.
385
666
  def deconstruct_keys(keys)
386
- { message: message, location: location, level: level }
667
+ { type: type, message: message, location: location, level: level }
387
668
  end
388
669
 
389
670
  # Returns a string representation of this warning.
390
671
  def inspect
391
- "#<Prism::ParseWarning @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
672
+ "#<Prism::ParseWarning @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
392
673
  end
393
674
  end
394
675
 
395
676
  # This represents the result of a call to ::parse or ::parse_file. It contains
396
- # the AST, any comments that were encounters, and any errors that were
397
- # encountered.
398
- class ParseResult
399
- # The value that was generated by parsing. Normally this holds the AST, but
400
- # it can sometimes how a list of tokens or other results passed back from
401
- # the parser.
402
- attr_reader :value
403
-
677
+ # the requested structure, any comments that were encounters, and any errors
678
+ # that were encountered.
679
+ class Result
404
680
  # The list of comments that were encountered during parsing.
405
681
  attr_reader :comments
406
682
 
@@ -421,9 +697,8 @@ module Prism
421
697
  # A Source instance that represents the source code that was parsed.
422
698
  attr_reader :source
423
699
 
424
- # Create a new parse result object with the given values.
425
- def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
426
- @value = value
700
+ # Create a new result object with the given values.
701
+ def initialize(comments, magic_comments, data_loc, errors, warnings, source)
427
702
  @comments = comments
428
703
  @magic_comments = magic_comments
429
704
  @data_loc = data_loc
@@ -432,9 +707,14 @@ module Prism
432
707
  @source = source
433
708
  end
434
709
 
435
- # Implement the hash pattern matching interface for ParseResult.
710
+ # Implement the hash pattern matching interface for Result.
436
711
  def deconstruct_keys(keys)
437
- { value: value, comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
712
+ { comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
713
+ end
714
+
715
+ # Returns the encoding of the source code that was parsed.
716
+ def encoding
717
+ source.encoding
438
718
  end
439
719
 
440
720
  # Returns true if there were no errors during parsing and false if there
@@ -448,6 +728,88 @@ module Prism
448
728
  def failure?
449
729
  !success?
450
730
  end
731
+
732
+ # Create a code units cache for the given encoding.
733
+ def code_units_cache(encoding)
734
+ source.code_units_cache(encoding)
735
+ end
736
+ end
737
+
738
+ # This is a result specific to the `parse` and `parse_file` methods.
739
+ class ParseResult < Result
740
+ autoload :Comments, "prism/parse_result/comments"
741
+ autoload :Errors, "prism/parse_result/errors"
742
+ autoload :Newlines, "prism/parse_result/newlines"
743
+
744
+ private_constant :Comments
745
+ private_constant :Errors
746
+ private_constant :Newlines
747
+
748
+ # The syntax tree that was parsed from the source code.
749
+ attr_reader :value
750
+
751
+ # Create a new parse result object with the given values.
752
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
753
+ @value = value
754
+ super(comments, magic_comments, data_loc, errors, warnings, source)
755
+ end
756
+
757
+ # Implement the hash pattern matching interface for ParseResult.
758
+ def deconstruct_keys(keys)
759
+ super.merge!(value: value)
760
+ end
761
+
762
+ # Attach the list of comments to their respective locations in the tree.
763
+ def attach_comments!
764
+ Comments.new(self).attach! # steep:ignore
765
+ end
766
+
767
+ # Walk the tree and mark nodes that are on a new line, loosely emulating
768
+ # the behavior of CRuby's `:line` tracepoint event.
769
+ def mark_newlines!
770
+ value.accept(Newlines.new(source.offsets.size)) # steep:ignore
771
+ end
772
+
773
+ # Returns a string representation of the syntax tree with the errors
774
+ # displayed inline.
775
+ def errors_format
776
+ Errors.new(self).format
777
+ end
778
+ end
779
+
780
+ # This is a result specific to the `lex` and `lex_file` methods.
781
+ class LexResult < Result
782
+ # The list of tokens that were parsed from the source code.
783
+ attr_reader :value
784
+
785
+ # Create a new lex result object with the given values.
786
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
787
+ @value = value
788
+ super(comments, magic_comments, data_loc, errors, warnings, source)
789
+ end
790
+
791
+ # Implement the hash pattern matching interface for LexResult.
792
+ def deconstruct_keys(keys)
793
+ super.merge!(value: value)
794
+ end
795
+ end
796
+
797
+ # This is a result specific to the `parse_lex` and `parse_lex_file` methods.
798
+ class ParseLexResult < Result
799
+ # A tuple of the syntax tree and the list of tokens that were parsed from
800
+ # the source code.
801
+ attr_reader :value
802
+
803
+ # Create a new parse lex result object with the given values.
804
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
805
+ @value = value
806
+ super(comments, magic_comments, data_loc, errors, warnings, source)
807
+ end
808
+
809
+ # Implement the hash pattern matching interface for ParseLexResult.
810
+ def deconstruct_keys(keys)
811
+ super.merge!(value: value)
812
+ end
451
813
  end
452
814
 
453
815
  # This represents a token from the Ruby source.
@@ -477,8 +839,9 @@ module Prism
477
839
 
478
840
  # A Location object representing the location of this token in the source.
479
841
  def location
480
- return @location if @location.is_a?(Location)
481
- @location = Location.new(source, @location >> 32, @location & 0xFFFFFFFF)
842
+ location = @location
843
+ return location if location.is_a?(Location)
844
+ @location = Location.new(source, location >> 32, location & 0xFFFFFFFF)
482
845
  end
483
846
 
484
847
  # Implement the pretty print interface for Token.
@@ -498,9 +861,50 @@ module Prism
498
861
 
499
862
  # Returns true if the given other token is equal to this token.
500
863
  def ==(other)
501
- other.is_a?(Token) &&
864
+ Token === other &&
502
865
  other.type == type &&
503
866
  other.value == value
504
867
  end
868
+
869
+ # Returns a string representation of this token.
870
+ def inspect
871
+ location
872
+ super
873
+ end
874
+
875
+ # Freeze this object and the objects it contains.
876
+ def deep_freeze
877
+ value.freeze
878
+ location.freeze
879
+ freeze
880
+ end
881
+ end
882
+
883
+ # This object is passed to the various Prism.* methods that accept the
884
+ # `scopes` option as an element of the list. It defines both the local
885
+ # variables visible at that scope as well as the forwarding parameters
886
+ # available at that scope.
887
+ class Scope
888
+ # The list of local variables that are defined in this scope. This should be
889
+ # defined as an array of symbols.
890
+ attr_reader :locals
891
+
892
+ # The list of local variables that are forwarded to the next scope. This
893
+ # should by defined as an array of symbols containing the specific values of
894
+ # :*, :**, :&, or :"...".
895
+ attr_reader :forwarding
896
+
897
+ # Create a new scope object with the given locals and forwarding.
898
+ def initialize(locals, forwarding)
899
+ @locals = locals
900
+ @forwarding = forwarding
901
+ end
902
+ end
903
+
904
+ # Create a new scope with the given locals and forwarding options that is
905
+ # suitable for passing into one of the Prism.* methods that accepts the
906
+ # `scopes` option.
907
+ def self.scope(locals: [], forwarding: [])
908
+ Scope.new(locals, forwarding)
505
909
  end
506
910
  end