prism 0.29.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +115 -1
  3. data/CONTRIBUTING.md +0 -4
  4. data/Makefile +1 -1
  5. data/README.md +4 -0
  6. data/config.yml +920 -148
  7. data/docs/build_system.md +8 -11
  8. data/docs/fuzzing.md +1 -1
  9. data/docs/parsing_rules.md +4 -1
  10. data/docs/relocation.md +34 -0
  11. data/docs/ripper_translation.md +22 -0
  12. data/docs/serialization.md +3 -0
  13. data/ext/prism/api_node.c +2863 -2079
  14. data/ext/prism/extconf.rb +14 -37
  15. data/ext/prism/extension.c +241 -391
  16. data/ext/prism/extension.h +2 -2
  17. data/include/prism/ast.h +2156 -453
  18. data/include/prism/defines.h +58 -7
  19. data/include/prism/diagnostic.h +24 -6
  20. data/include/prism/node.h +0 -21
  21. data/include/prism/options.h +94 -3
  22. data/include/prism/parser.h +82 -40
  23. data/include/prism/regexp.h +18 -8
  24. data/include/prism/static_literals.h +3 -2
  25. data/include/prism/util/pm_char.h +1 -2
  26. data/include/prism/util/pm_constant_pool.h +0 -8
  27. data/include/prism/util/pm_integer.h +22 -15
  28. data/include/prism/util/pm_newline_list.h +11 -0
  29. data/include/prism/util/pm_string.h +28 -12
  30. data/include/prism/version.h +3 -3
  31. data/include/prism.h +47 -11
  32. data/lib/prism/compiler.rb +3 -0
  33. data/lib/prism/desugar_compiler.rb +111 -74
  34. data/lib/prism/dispatcher.rb +16 -1
  35. data/lib/prism/dot_visitor.rb +55 -34
  36. data/lib/prism/dsl.rb +660 -468
  37. data/lib/prism/ffi.rb +113 -8
  38. data/lib/prism/inspect_visitor.rb +296 -64
  39. data/lib/prism/lex_compat.rb +1 -1
  40. data/lib/prism/mutation_compiler.rb +11 -6
  41. data/lib/prism/node.rb +4262 -5023
  42. data/lib/prism/node_ext.rb +91 -14
  43. data/lib/prism/parse_result/comments.rb +0 -7
  44. data/lib/prism/parse_result/errors.rb +65 -0
  45. data/lib/prism/parse_result/newlines.rb +101 -11
  46. data/lib/prism/parse_result.rb +183 -6
  47. data/lib/prism/reflection.rb +12 -10
  48. data/lib/prism/relocation.rb +504 -0
  49. data/lib/prism/serialize.rb +496 -609
  50. data/lib/prism/string_query.rb +30 -0
  51. data/lib/prism/translation/parser/compiler.rb +185 -155
  52. data/lib/prism/translation/parser/lexer.rb +26 -4
  53. data/lib/prism/translation/parser.rb +9 -4
  54. data/lib/prism/translation/ripper.rb +23 -25
  55. data/lib/prism/translation/ruby_parser.rb +86 -17
  56. data/lib/prism/visitor.rb +3 -0
  57. data/lib/prism.rb +6 -8
  58. data/prism.gemspec +9 -5
  59. data/rbi/prism/dsl.rbi +521 -0
  60. data/rbi/prism/node.rbi +1115 -1120
  61. data/rbi/prism/parse_result.rbi +29 -0
  62. data/rbi/prism/string_query.rbi +12 -0
  63. data/rbi/prism/visitor.rbi +3 -0
  64. data/rbi/prism.rbi +36 -30
  65. data/sig/prism/dsl.rbs +190 -303
  66. data/sig/prism/mutation_compiler.rbs +1 -0
  67. data/sig/prism/node.rbs +678 -632
  68. data/sig/prism/parse_result.rbs +22 -0
  69. data/sig/prism/relocation.rbs +185 -0
  70. data/sig/prism/string_query.rbs +11 -0
  71. data/sig/prism/visitor.rbs +1 -0
  72. data/sig/prism.rbs +103 -64
  73. data/src/diagnostic.c +64 -28
  74. data/src/node.c +502 -1739
  75. data/src/options.c +76 -27
  76. data/src/prettyprint.c +188 -112
  77. data/src/prism.c +3376 -2293
  78. data/src/regexp.c +208 -71
  79. data/src/serialize.c +182 -50
  80. data/src/static_literals.c +64 -85
  81. data/src/token_type.c +4 -4
  82. data/src/util/pm_char.c +1 -1
  83. data/src/util/pm_constant_pool.c +0 -8
  84. data/src/util/pm_integer.c +53 -25
  85. data/src/util/pm_newline_list.c +29 -0
  86. data/src/util/pm_string.c +131 -80
  87. data/src/util/pm_strpbrk.c +32 -6
  88. metadata +11 -7
  89. data/include/prism/util/pm_string_list.h +0 -44
  90. data/lib/prism/debug.rb +0 -249
  91. data/lib/prism/translation/parser/rubocop.rb +0 -73
  92. data/src/util/pm_string_list.c +0 -28
@@ -5,10 +5,13 @@
5
5
  module Prism
6
6
  class Node
7
7
  def deprecated(*replacements) # :nodoc:
8
+ location = caller_locations(1, 1)
9
+ location = location[0].label if location
8
10
  suggest = replacements.map { |replacement| "#{self.class}##{replacement}" }
11
+
9
12
  warn(<<~MSG, category: :deprecated)
10
- [deprecation]: #{self.class}##{caller_locations(1, 1)[0].label} is deprecated \
11
- and will be removed in the next major version. Use #{suggest.join("/")} instead.
13
+ [deprecation]: #{self.class}##{location} is deprecated and will be \
14
+ removed in the next major version. Use #{suggest.join("/")} instead.
12
15
  #{(caller(1, 3) || []).join("\n")}
13
16
  MSG
14
17
  end
@@ -18,7 +21,10 @@ module Prism
18
21
  # Returns a numeric value that represents the flags that were used to create
19
22
  # the regular expression.
20
23
  def options
21
- o = flags & (RegularExpressionFlags::IGNORE_CASE | RegularExpressionFlags::EXTENDED | RegularExpressionFlags::MULTI_LINE)
24
+ o = 0
25
+ o |= Regexp::IGNORECASE if flags.anybits?(RegularExpressionFlags::IGNORE_CASE)
26
+ o |= Regexp::EXTENDED if flags.anybits?(RegularExpressionFlags::EXTENDED)
27
+ o |= Regexp::MULTILINE if flags.anybits?(RegularExpressionFlags::MULTI_LINE)
22
28
  o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8)
23
29
  o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT)
24
30
  o
@@ -66,11 +72,12 @@ module Prism
66
72
  def to_interpolated
67
73
  InterpolatedStringNode.new(
68
74
  source,
75
+ -1,
76
+ location,
69
77
  frozen? ? InterpolatedStringNodeFlags::FROZEN : 0,
70
78
  opening_loc,
71
- [copy(opening_loc: nil, closing_loc: nil, location: content_loc)],
72
- closing_loc,
73
- location
79
+ [copy(location: content_loc, opening_loc: nil, closing_loc: nil)],
80
+ closing_loc
74
81
  )
75
82
  end
76
83
  end
@@ -83,10 +90,12 @@ module Prism
83
90
  def to_interpolated
84
91
  InterpolatedXStringNode.new(
85
92
  source,
93
+ -1,
94
+ location,
95
+ flags,
86
96
  opening_loc,
87
- [StringNode.new(source, 0, nil, content_loc, nil, unescaped, content_loc)],
88
- closing_loc,
89
- location
97
+ [StringNode.new(source, node_id, content_loc, 0, nil, content_loc, nil, unescaped)],
98
+ closing_loc
90
99
  )
91
100
  end
92
101
  end
@@ -103,7 +112,19 @@ module Prism
103
112
  class RationalNode < Node
104
113
  # Returns the value of the node as a Ruby Rational.
105
114
  def value
106
- Rational(numeric.is_a?(IntegerNode) ? numeric.value : slice.chomp("r"))
115
+ Rational(numerator, denominator)
116
+ end
117
+
118
+ # Returns the value of the node as an IntegerNode or a FloatNode. This
119
+ # method is deprecated in favor of #value or #numerator/#denominator.
120
+ def numeric
121
+ deprecated("value", "numerator", "denominator")
122
+
123
+ if denominator == 1
124
+ IntegerNode.new(source, -1, location.chop, flags, numerator)
125
+ else
126
+ FloatNode.new(source, -1, location.chop, 0, numerator.to_f / denominator)
127
+ end
107
128
  end
108
129
  end
109
130
 
@@ -180,7 +201,12 @@ module Prism
180
201
  # continue to supply that API.
181
202
  def child
182
203
  deprecated("name", "name_loc")
183
- name ? ConstantReadNode.new(source, name, name_loc) : MissingNode.new(source, location)
204
+
205
+ if name
206
+ ConstantReadNode.new(source, -1, name_loc, 0, name)
207
+ else
208
+ MissingNode.new(source, -1, location, 0)
209
+ end
184
210
  end
185
211
  end
186
212
 
@@ -216,7 +242,12 @@ module Prism
216
242
  # continue to supply that API.
217
243
  def child
218
244
  deprecated("name", "name_loc")
219
- name ? ConstantReadNode.new(source, name, name_loc) : MissingNode.new(source, location)
245
+
246
+ if name
247
+ ConstantReadNode.new(source, -1, name_loc, 0, name)
248
+ else
249
+ MissingNode.new(source, -1, location, 0)
250
+ end
220
251
  end
221
252
  end
222
253
 
@@ -249,9 +280,10 @@ module Prism
249
280
  end
250
281
 
251
282
  posts.each do |param|
252
- if param.is_a?(MultiTargetNode)
283
+ case param
284
+ when MultiTargetNode
253
285
  names << [:req]
254
- elsif param.is_a?(NoKeywordsParameterNode)
286
+ when NoKeywordsParameterNode, KeywordRestParameterNode, ForwardingParameterNode
255
287
  # Invalid syntax, e.g. "def f(**nil, ...)" moves the NoKeywordsParameterNode to posts
256
288
  raise "Invalid syntax"
257
289
  else
@@ -428,4 +460,49 @@ module Prism
428
460
  binary_operator_loc
429
461
  end
430
462
  end
463
+
464
+ class CaseMatchNode < Node
465
+ # Returns the else clause of the case match node. This method is deprecated
466
+ # in favor of #else_clause.
467
+ def consequent
468
+ deprecated("else_clause")
469
+ else_clause
470
+ end
471
+ end
472
+
473
+ class CaseNode < Node
474
+ # Returns the else clause of the case node. This method is deprecated in
475
+ # favor of #else_clause.
476
+ def consequent
477
+ deprecated("else_clause")
478
+ else_clause
479
+ end
480
+ end
481
+
482
+ class IfNode < Node
483
+ # Returns the subsequent if/elsif/else clause of the if node. This method is
484
+ # deprecated in favor of #subsequent.
485
+ def consequent
486
+ deprecated("subsequent")
487
+ subsequent
488
+ end
489
+ end
490
+
491
+ class RescueNode < Node
492
+ # Returns the subsequent rescue clause of the rescue node. This method is
493
+ # deprecated in favor of #subsequent.
494
+ def consequent
495
+ deprecated("subsequent")
496
+ subsequent
497
+ end
498
+ end
499
+
500
+ class UnlessNode < Node
501
+ # Returns the else clause of the unless node. This method is deprecated in
502
+ # favor of #else_clause.
503
+ def consequent
504
+ deprecated("else_clause")
505
+ else_clause
506
+ end
507
+ end
431
508
  end
@@ -183,12 +183,5 @@ module Prism
183
183
  [preceding, NodeTarget.new(node), following]
184
184
  end
185
185
  end
186
-
187
- private_constant :Comments
188
-
189
- # Attach the list of comments to their respective locations in the tree.
190
- def attach_comments!
191
- Comments.new(self).attach! # steep:ignore
192
- end
193
186
  end
194
187
  end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "stringio"
4
+
5
+ module Prism
6
+ class ParseResult < Result
7
+ # An object to represent the set of errors on a parse result. This object
8
+ # can be used to format the errors in a human-readable way.
9
+ class Errors
10
+ # The parse result that contains the errors.
11
+ attr_reader :parse_result
12
+
13
+ # Initialize a new set of errors from the given parse result.
14
+ def initialize(parse_result)
15
+ @parse_result = parse_result
16
+ end
17
+
18
+ # Formats the errors in a human-readable way and return them as a string.
19
+ def format
20
+ error_lines = {} #: Hash[Integer, Array[ParseError]]
21
+ parse_result.errors.each do |error|
22
+ location = error.location
23
+ (location.start_line..location.end_line).each do |line|
24
+ error_lines[line] ||= []
25
+ error_lines[line] << error
26
+ end
27
+ end
28
+
29
+ source_lines = parse_result.source.source.lines
30
+ source_lines << "" if error_lines.key?(source_lines.size + 1)
31
+
32
+ io = StringIO.new
33
+ source_lines.each.with_index(1) do |line, line_number|
34
+ io.puts(line)
35
+
36
+ (error_lines.delete(line_number) || []).each do |error|
37
+ location = error.location
38
+
39
+ case line_number
40
+ when location.start_line
41
+ io.print(" " * location.start_column + "^")
42
+
43
+ if location.start_line == location.end_line
44
+ if location.start_column != location.end_column
45
+ io.print("~" * (location.end_column - location.start_column - 1))
46
+ end
47
+
48
+ io.puts(" " + error.message)
49
+ else
50
+ io.puts("~" * (line.bytesize - location.start_column))
51
+ end
52
+ when location.end_line
53
+ io.puts("~" * location.end_column + " " + error.message)
54
+ else
55
+ io.puts("~" * line.bytesize)
56
+ end
57
+ end
58
+ end
59
+
60
+ io.puts
61
+ io.string
62
+ end
63
+ end
64
+ end
65
+ end
@@ -17,21 +17,27 @@ module Prism
17
17
  # Note that the logic in this file should be kept in sync with the Java
18
18
  # MarkNewlinesVisitor, since that visitor is responsible for marking the
19
19
  # newlines for JRuby/TruffleRuby.
20
+ #
21
+ # This file is autoloaded only when `mark_newlines!` is called, so the
22
+ # re-opening of the various nodes in this file will only be performed in
23
+ # that case. We do that to avoid storing the extra `@newline` instance
24
+ # variable on every node if we don't need it.
20
25
  class Newlines < Visitor
21
26
  # Create a new Newlines visitor with the given newline offsets.
22
- def initialize(newline_marked)
23
- @newline_marked = newline_marked
27
+ def initialize(lines)
28
+ # @type var lines: Integer
29
+ @lines = Array.new(1 + lines, false)
24
30
  end
25
31
 
26
32
  # Permit block/lambda nodes to mark newlines within themselves.
27
33
  def visit_block_node(node)
28
- old_newline_marked = @newline_marked
29
- @newline_marked = Array.new(old_newline_marked.size, false)
34
+ old_lines = @lines
35
+ @lines = Array.new(old_lines.size, false)
30
36
 
31
37
  begin
32
38
  super(node)
33
39
  ensure
34
- @newline_marked = old_newline_marked
40
+ @lines = old_lines
35
41
  end
36
42
  end
37
43
 
@@ -39,7 +45,7 @@ module Prism
39
45
 
40
46
  # Mark if/unless nodes as newlines.
41
47
  def visit_if_node(node)
42
- node.set_newline_flag(@newline_marked)
48
+ node.newline_flag!(@lines)
43
49
  super(node)
44
50
  end
45
51
 
@@ -48,17 +54,101 @@ module Prism
48
54
  # Permit statements lists to mark newlines within themselves.
49
55
  def visit_statements_node(node)
50
56
  node.body.each do |child|
51
- child.set_newline_flag(@newline_marked)
57
+ child.newline_flag!(@lines)
52
58
  end
53
59
  super(node)
54
60
  end
55
61
  end
62
+ end
63
+
64
+ class Node
65
+ def newline_flag? # :nodoc:
66
+ @newline_flag ? true : false
67
+ end
68
+
69
+ def newline_flag!(lines) # :nodoc:
70
+ line = location.start_line
71
+ unless lines[line]
72
+ lines[line] = true
73
+ @newline_flag = true
74
+ end
75
+ end
76
+ end
77
+
78
+ class BeginNode < Node
79
+ def newline_flag!(lines) # :nodoc:
80
+ # Never mark BeginNode with a newline flag, mark children instead.
81
+ end
82
+ end
83
+
84
+ class ParenthesesNode < Node
85
+ def newline_flag!(lines) # :nodoc:
86
+ # Never mark ParenthesesNode with a newline flag, mark children instead.
87
+ end
88
+ end
89
+
90
+ class IfNode < Node
91
+ def newline_flag!(lines) # :nodoc:
92
+ predicate.newline_flag!(lines)
93
+ end
94
+ end
95
+
96
+ class UnlessNode < Node
97
+ def newline_flag!(lines) # :nodoc:
98
+ predicate.newline_flag!(lines)
99
+ end
100
+ end
101
+
102
+ class UntilNode < Node
103
+ def newline_flag!(lines) # :nodoc:
104
+ predicate.newline_flag!(lines)
105
+ end
106
+ end
107
+
108
+ class WhileNode < Node
109
+ def newline_flag!(lines) # :nodoc:
110
+ predicate.newline_flag!(lines)
111
+ end
112
+ end
113
+
114
+ class RescueModifierNode < Node
115
+ def newline_flag!(lines) # :nodoc:
116
+ expression.newline_flag!(lines)
117
+ end
118
+ end
119
+
120
+ class InterpolatedMatchLastLineNode < Node
121
+ def newline_flag!(lines) # :nodoc:
122
+ first = parts.first
123
+ first.newline_flag!(lines) if first
124
+ end
125
+ end
126
+
127
+ class InterpolatedRegularExpressionNode < Node
128
+ def newline_flag!(lines) # :nodoc:
129
+ first = parts.first
130
+ first.newline_flag!(lines) if first
131
+ end
132
+ end
133
+
134
+ class InterpolatedStringNode < Node
135
+ def newline_flag!(lines) # :nodoc:
136
+ first = parts.first
137
+ first.newline_flag!(lines) if first
138
+ end
139
+ end
56
140
 
57
- private_constant :Newlines
141
+ class InterpolatedSymbolNode < Node
142
+ def newline_flag!(lines) # :nodoc:
143
+ first = parts.first
144
+ first.newline_flag!(lines) if first
145
+ end
146
+ end
58
147
 
59
- # Walk the tree and mark nodes that are on a new line.
60
- def mark_newlines!
61
- value.accept(Newlines.new(Array.new(1 + source.offsets.size, false))) # steep:ignore
148
+ class InterpolatedXStringNode < Node
149
+ def newline_flag!(lines) # :nodoc:
150
+ first = parts.first
151
+ first.newline_flag!(lines) if first
62
152
  end
63
153
  end
64
154
  end
@@ -10,7 +10,26 @@ module Prism
10
10
  # specialized and more performant `ASCIISource` if no multibyte characters
11
11
  # are present in the source code.
12
12
  def self.for(source, start_line = 1, offsets = [])
13
- source.ascii_only? ? ASCIISource.new(source, start_line, offsets): new(source, start_line, offsets)
13
+ if source.ascii_only?
14
+ ASCIISource.new(source, start_line, offsets)
15
+ elsif source.encoding == Encoding::BINARY
16
+ source.force_encoding(Encoding::UTF_8)
17
+
18
+ if source.valid_encoding?
19
+ new(source, start_line, offsets)
20
+ else
21
+ # This is an extremely niche use case where the file is marked as
22
+ # binary, contains multi-byte characters, and those characters are not
23
+ # valid UTF-8. In this case we'll mark it as binary and fall back to
24
+ # treating everything as a single-byte character. This _may_ cause
25
+ # problems when asking for code units, but it appears to be the
26
+ # cleanest solution at the moment.
27
+ source.force_encoding(Encoding::BINARY)
28
+ ASCIISource.new(source, start_line, offsets)
29
+ end
30
+ else
31
+ new(source, start_line, offsets)
32
+ end
14
33
  end
15
34
 
16
35
  # The source code that this source object represents.
@@ -85,9 +104,26 @@ module Prism
85
104
  # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
86
105
  # concept of code units that differs from the number of characters in other
87
106
  # encodings, it is not captured here.
107
+ #
108
+ # We purposefully replace invalid and undefined characters with replacement
109
+ # characters in this conversion. This happens for two reasons. First, it's
110
+ # possible that the given byte offset will not occur on a character
111
+ # boundary. Second, it's possible that the source code will contain a
112
+ # character that has no equivalent in the given encoding.
88
113
  def code_units_offset(byte_offset, encoding)
89
- byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding)
90
- (encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length
114
+ byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
115
+
116
+ if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
117
+ byteslice.bytesize / 2
118
+ else
119
+ byteslice.length
120
+ end
121
+ end
122
+
123
+ # Generate a cache that targets a specific encoding for calculating code
124
+ # unit offsets.
125
+ def code_units_cache(encoding)
126
+ CodeUnitsCache.new(source, encoding)
91
127
  end
92
128
 
93
129
  # Returns the column number in code units for the given encoding for the
@@ -119,10 +155,84 @@ module Prism
119
155
  end
120
156
  end
121
157
 
158
+ # A cache that can be used to quickly compute code unit offsets from byte
159
+ # offsets. It purposefully provides only a single #[] method to access the
160
+ # cache in order to minimize surface area.
161
+ #
162
+ # Note that there are some known issues here that may or may not be addressed
163
+ # in the future:
164
+ #
165
+ # * The first is that there are issues when the cache computes values that are
166
+ # not on character boundaries. This can result in subsequent computations
167
+ # being off by one or more code units.
168
+ # * The second is that this cache is currently unbounded. In theory we could
169
+ # introduce some kind of LRU cache to limit the number of entries, but this
170
+ # has not yet been implemented.
171
+ #
172
+ class CodeUnitsCache
173
+ class UTF16Counter # :nodoc:
174
+ def initialize(source, encoding)
175
+ @source = source
176
+ @encoding = encoding
177
+ end
178
+
179
+ def count(byte_offset, byte_length)
180
+ @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
181
+ end
182
+ end
183
+
184
+ class LengthCounter # :nodoc:
185
+ def initialize(source, encoding)
186
+ @source = source
187
+ @encoding = encoding
188
+ end
189
+
190
+ def count(byte_offset, byte_length)
191
+ @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).length
192
+ end
193
+ end
194
+
195
+ private_constant :UTF16Counter, :LengthCounter
196
+
197
+ # Initialize a new cache with the given source and encoding.
198
+ def initialize(source, encoding)
199
+ @source = source
200
+ @counter =
201
+ if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
202
+ UTF16Counter.new(source, encoding)
203
+ else
204
+ LengthCounter.new(source, encoding)
205
+ end
206
+
207
+ @cache = {} #: Hash[Integer, Integer]
208
+ @offsets = [] #: Array[Integer]
209
+ end
210
+
211
+ # Retrieve the code units offset from the given byte offset.
212
+ def [](byte_offset)
213
+ @cache[byte_offset] ||=
214
+ if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil?
215
+ @offsets << byte_offset
216
+ @counter.count(0, byte_offset)
217
+ elsif index == 0
218
+ @offsets.unshift(byte_offset)
219
+ @counter.count(0, byte_offset)
220
+ else
221
+ @offsets.insert(index, byte_offset)
222
+ offset = @offsets[index - 1]
223
+ @cache[offset] + @counter.count(offset, byte_offset - offset)
224
+ end
225
+ end
226
+ end
227
+
122
228
  # Specialized version of Prism::Source for source code that includes ASCII
123
229
  # characters only. This class is used to apply performance optimizations that
124
- # cannot be applied to sources that include multibyte characters. Sources that
125
- # include multibyte characters are represented by the Prism::Source class.
230
+ # cannot be applied to sources that include multibyte characters.
231
+ #
232
+ # In the extremely rare case that a source includes multi-byte characters but
233
+ # is marked as binary because of a magic encoding comment and it cannot be
234
+ # eagerly converted to UTF-8, this class will be used as well. This is because
235
+ # at that point we will treat everything as single-byte characters.
126
236
  class ASCIISource < Source
127
237
  # Return the character offset for the given byte offset.
128
238
  def character_offset(byte_offset)
@@ -144,9 +254,16 @@ module Prism
144
254
  byte_offset
145
255
  end
146
256
 
257
+ # Returns a cache that is the identity function in order to maintain the
258
+ # same interface. We can do this because code units are always equivalent to
259
+ # byte offsets for ASCII-only sources.
260
+ def code_units_cache(encoding)
261
+ ->(byte_offset) { byte_offset }
262
+ end
263
+
147
264
  # Specialized version of `code_units_column` that does not depend on
148
265
  # `code_units_offset`, which is a more expensive operation. This is
149
- # essentialy the same as `Prism::Source#column`.
266
+ # essentially the same as `Prism::Source#column`.
150
267
  def code_units_column(byte_offset, encoding)
151
268
  byte_offset - line_start(byte_offset)
152
269
  end
@@ -253,6 +370,12 @@ module Prism
253
370
  source.code_units_offset(start_offset, encoding)
254
371
  end
255
372
 
373
+ # The start offset from the start of the file in code units using the given
374
+ # cache to fetch or calculate the value.
375
+ def cached_start_code_units_offset(cache)
376
+ cache[start_offset]
377
+ end
378
+
256
379
  # The byte offset from the beginning of the source where this location ends.
257
380
  def end_offset
258
381
  start_offset + length
@@ -269,6 +392,12 @@ module Prism
269
392
  source.code_units_offset(end_offset, encoding)
270
393
  end
271
394
 
395
+ # The end offset from the start of the file in code units using the given
396
+ # cache to fetch or calculate the value.
397
+ def cached_end_code_units_offset(cache)
398
+ cache[end_offset]
399
+ end
400
+
272
401
  # The line number where this location starts.
273
402
  def start_line
274
403
  source.line(start_offset)
@@ -303,6 +432,12 @@ module Prism
303
432
  source.code_units_column(start_offset, encoding)
304
433
  end
305
434
 
435
+ # The start column in code units using the given cache to fetch or calculate
436
+ # the value.
437
+ def cached_start_code_units_column(cache)
438
+ cache[start_offset] - cache[source.line_start(start_offset)]
439
+ end
440
+
306
441
  # The column number in bytes where this location ends from the start of the
307
442
  # line.
308
443
  def end_column
@@ -321,6 +456,12 @@ module Prism
321
456
  source.code_units_column(end_offset, encoding)
322
457
  end
323
458
 
459
+ # The end column in code units using the given cache to fetch or calculate
460
+ # the value.
461
+ def cached_end_code_units_column(cache)
462
+ cache[end_offset] - cache[source.line_start(end_offset)]
463
+ end
464
+
324
465
  # Implement the hash pattern matching interface for Location.
325
466
  def deconstruct_keys(keys)
326
467
  { start_offset: start_offset, end_offset: end_offset }
@@ -570,10 +711,23 @@ module Prism
570
711
  def failure?
571
712
  !success?
572
713
  end
714
+
715
+ # Create a code units cache for the given encoding.
716
+ def code_units_cache(encoding)
717
+ source.code_units_cache(encoding)
718
+ end
573
719
  end
574
720
 
575
721
  # This is a result specific to the `parse` and `parse_file` methods.
576
722
  class ParseResult < Result
723
+ autoload :Comments, "prism/parse_result/comments"
724
+ autoload :Errors, "prism/parse_result/errors"
725
+ autoload :Newlines, "prism/parse_result/newlines"
726
+
727
+ private_constant :Comments
728
+ private_constant :Errors
729
+ private_constant :Newlines
730
+
577
731
  # The syntax tree that was parsed from the source code.
578
732
  attr_reader :value
579
733
 
@@ -587,6 +741,23 @@ module Prism
587
741
  def deconstruct_keys(keys)
588
742
  super.merge!(value: value)
589
743
  end
744
+
745
+ # Attach the list of comments to their respective locations in the tree.
746
+ def attach_comments!
747
+ Comments.new(self).attach! # steep:ignore
748
+ end
749
+
750
+ # Walk the tree and mark nodes that are on a new line, loosely emulating
751
+ # the behavior of CRuby's `:line` tracepoint event.
752
+ def mark_newlines!
753
+ value.accept(Newlines.new(source.offsets.size)) # steep:ignore
754
+ end
755
+
756
+ # Returns a string representation of the syntax tree with the errors
757
+ # displayed inline.
758
+ def errors_format
759
+ Errors.new(self).format
760
+ end
590
761
  end
591
762
 
592
763
  # This is a result specific to the `lex` and `lex_file` methods.
@@ -677,5 +848,11 @@ module Prism
677
848
  other.type == type &&
678
849
  other.value == value
679
850
  end
851
+
852
+ # Returns a string representation of this token.
853
+ def inspect
854
+ location
855
+ super
856
+ end
680
857
  end
681
858
  end