prism 0.29.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +115 -1
  3. data/CONTRIBUTING.md +0 -4
  4. data/Makefile +1 -1
  5. data/README.md +4 -0
  6. data/config.yml +920 -148
  7. data/docs/build_system.md +8 -11
  8. data/docs/fuzzing.md +1 -1
  9. data/docs/parsing_rules.md +4 -1
  10. data/docs/relocation.md +34 -0
  11. data/docs/ripper_translation.md +22 -0
  12. data/docs/serialization.md +3 -0
  13. data/ext/prism/api_node.c +2863 -2079
  14. data/ext/prism/extconf.rb +14 -37
  15. data/ext/prism/extension.c +241 -391
  16. data/ext/prism/extension.h +2 -2
  17. data/include/prism/ast.h +2156 -453
  18. data/include/prism/defines.h +58 -7
  19. data/include/prism/diagnostic.h +24 -6
  20. data/include/prism/node.h +0 -21
  21. data/include/prism/options.h +94 -3
  22. data/include/prism/parser.h +82 -40
  23. data/include/prism/regexp.h +18 -8
  24. data/include/prism/static_literals.h +3 -2
  25. data/include/prism/util/pm_char.h +1 -2
  26. data/include/prism/util/pm_constant_pool.h +0 -8
  27. data/include/prism/util/pm_integer.h +22 -15
  28. data/include/prism/util/pm_newline_list.h +11 -0
  29. data/include/prism/util/pm_string.h +28 -12
  30. data/include/prism/version.h +3 -3
  31. data/include/prism.h +47 -11
  32. data/lib/prism/compiler.rb +3 -0
  33. data/lib/prism/desugar_compiler.rb +111 -74
  34. data/lib/prism/dispatcher.rb +16 -1
  35. data/lib/prism/dot_visitor.rb +55 -34
  36. data/lib/prism/dsl.rb +660 -468
  37. data/lib/prism/ffi.rb +113 -8
  38. data/lib/prism/inspect_visitor.rb +296 -64
  39. data/lib/prism/lex_compat.rb +1 -1
  40. data/lib/prism/mutation_compiler.rb +11 -6
  41. data/lib/prism/node.rb +4262 -5023
  42. data/lib/prism/node_ext.rb +91 -14
  43. data/lib/prism/parse_result/comments.rb +0 -7
  44. data/lib/prism/parse_result/errors.rb +65 -0
  45. data/lib/prism/parse_result/newlines.rb +101 -11
  46. data/lib/prism/parse_result.rb +183 -6
  47. data/lib/prism/reflection.rb +12 -10
  48. data/lib/prism/relocation.rb +504 -0
  49. data/lib/prism/serialize.rb +496 -609
  50. data/lib/prism/string_query.rb +30 -0
  51. data/lib/prism/translation/parser/compiler.rb +185 -155
  52. data/lib/prism/translation/parser/lexer.rb +26 -4
  53. data/lib/prism/translation/parser.rb +9 -4
  54. data/lib/prism/translation/ripper.rb +23 -25
  55. data/lib/prism/translation/ruby_parser.rb +86 -17
  56. data/lib/prism/visitor.rb +3 -0
  57. data/lib/prism.rb +6 -8
  58. data/prism.gemspec +9 -5
  59. data/rbi/prism/dsl.rbi +521 -0
  60. data/rbi/prism/node.rbi +1115 -1120
  61. data/rbi/prism/parse_result.rbi +29 -0
  62. data/rbi/prism/string_query.rbi +12 -0
  63. data/rbi/prism/visitor.rbi +3 -0
  64. data/rbi/prism.rbi +36 -30
  65. data/sig/prism/dsl.rbs +190 -303
  66. data/sig/prism/mutation_compiler.rbs +1 -0
  67. data/sig/prism/node.rbs +678 -632
  68. data/sig/prism/parse_result.rbs +22 -0
  69. data/sig/prism/relocation.rbs +185 -0
  70. data/sig/prism/string_query.rbs +11 -0
  71. data/sig/prism/visitor.rbs +1 -0
  72. data/sig/prism.rbs +103 -64
  73. data/src/diagnostic.c +64 -28
  74. data/src/node.c +502 -1739
  75. data/src/options.c +76 -27
  76. data/src/prettyprint.c +188 -112
  77. data/src/prism.c +3376 -2293
  78. data/src/regexp.c +208 -71
  79. data/src/serialize.c +182 -50
  80. data/src/static_literals.c +64 -85
  81. data/src/token_type.c +4 -4
  82. data/src/util/pm_char.c +1 -1
  83. data/src/util/pm_constant_pool.c +0 -8
  84. data/src/util/pm_integer.c +53 -25
  85. data/src/util/pm_newline_list.c +29 -0
  86. data/src/util/pm_string.c +131 -80
  87. data/src/util/pm_strpbrk.c +32 -6
  88. metadata +11 -7
  89. data/include/prism/util/pm_string_list.h +0 -44
  90. data/lib/prism/debug.rb +0 -249
  91. data/lib/prism/translation/parser/rubocop.rb +0 -73
  92. data/src/util/pm_string_list.c +0 -28
@@ -5,10 +5,13 @@
5
5
  module Prism
6
6
  class Node
7
7
  def deprecated(*replacements) # :nodoc:
8
+ location = caller_locations(1, 1)
9
+ location = location[0].label if location
8
10
  suggest = replacements.map { |replacement| "#{self.class}##{replacement}" }
11
+
9
12
  warn(<<~MSG, category: :deprecated)
10
- [deprecation]: #{self.class}##{caller_locations(1, 1)[0].label} is deprecated \
11
- and will be removed in the next major version. Use #{suggest.join("/")} instead.
13
+ [deprecation]: #{self.class}##{location} is deprecated and will be \
14
+ removed in the next major version. Use #{suggest.join("/")} instead.
12
15
  #{(caller(1, 3) || []).join("\n")}
13
16
  MSG
14
17
  end
@@ -18,7 +21,10 @@ module Prism
18
21
  # Returns a numeric value that represents the flags that were used to create
19
22
  # the regular expression.
20
23
  def options
21
- o = flags & (RegularExpressionFlags::IGNORE_CASE | RegularExpressionFlags::EXTENDED | RegularExpressionFlags::MULTI_LINE)
24
+ o = 0
25
+ o |= Regexp::IGNORECASE if flags.anybits?(RegularExpressionFlags::IGNORE_CASE)
26
+ o |= Regexp::EXTENDED if flags.anybits?(RegularExpressionFlags::EXTENDED)
27
+ o |= Regexp::MULTILINE if flags.anybits?(RegularExpressionFlags::MULTI_LINE)
22
28
  o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8)
23
29
  o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT)
24
30
  o
@@ -66,11 +72,12 @@ module Prism
66
72
  def to_interpolated
67
73
  InterpolatedStringNode.new(
68
74
  source,
75
+ -1,
76
+ location,
69
77
  frozen? ? InterpolatedStringNodeFlags::FROZEN : 0,
70
78
  opening_loc,
71
- [copy(opening_loc: nil, closing_loc: nil, location: content_loc)],
72
- closing_loc,
73
- location
79
+ [copy(location: content_loc, opening_loc: nil, closing_loc: nil)],
80
+ closing_loc
74
81
  )
75
82
  end
76
83
  end
@@ -83,10 +90,12 @@ module Prism
83
90
  def to_interpolated
84
91
  InterpolatedXStringNode.new(
85
92
  source,
93
+ -1,
94
+ location,
95
+ flags,
86
96
  opening_loc,
87
- [StringNode.new(source, 0, nil, content_loc, nil, unescaped, content_loc)],
88
- closing_loc,
89
- location
97
+ [StringNode.new(source, node_id, content_loc, 0, nil, content_loc, nil, unescaped)],
98
+ closing_loc
90
99
  )
91
100
  end
92
101
  end
@@ -103,7 +112,19 @@ module Prism
103
112
  class RationalNode < Node
104
113
  # Returns the value of the node as a Ruby Rational.
105
114
  def value
106
- Rational(numeric.is_a?(IntegerNode) ? numeric.value : slice.chomp("r"))
115
+ Rational(numerator, denominator)
116
+ end
117
+
118
+ # Returns the value of the node as an IntegerNode or a FloatNode. This
119
+ # method is deprecated in favor of #value or #numerator/#denominator.
120
+ def numeric
121
+ deprecated("value", "numerator", "denominator")
122
+
123
+ if denominator == 1
124
+ IntegerNode.new(source, -1, location.chop, flags, numerator)
125
+ else
126
+ FloatNode.new(source, -1, location.chop, 0, numerator.to_f / denominator)
127
+ end
107
128
  end
108
129
  end
109
130
 
@@ -180,7 +201,12 @@ module Prism
180
201
  # continue to supply that API.
181
202
  def child
182
203
  deprecated("name", "name_loc")
183
- name ? ConstantReadNode.new(source, name, name_loc) : MissingNode.new(source, location)
204
+
205
+ if name
206
+ ConstantReadNode.new(source, -1, name_loc, 0, name)
207
+ else
208
+ MissingNode.new(source, -1, location, 0)
209
+ end
184
210
  end
185
211
  end
186
212
 
@@ -216,7 +242,12 @@ module Prism
216
242
  # continue to supply that API.
217
243
  def child
218
244
  deprecated("name", "name_loc")
219
- name ? ConstantReadNode.new(source, name, name_loc) : MissingNode.new(source, location)
245
+
246
+ if name
247
+ ConstantReadNode.new(source, -1, name_loc, 0, name)
248
+ else
249
+ MissingNode.new(source, -1, location, 0)
250
+ end
220
251
  end
221
252
  end
222
253
 
@@ -249,9 +280,10 @@ module Prism
249
280
  end
250
281
 
251
282
  posts.each do |param|
252
- if param.is_a?(MultiTargetNode)
283
+ case param
284
+ when MultiTargetNode
253
285
  names << [:req]
254
- elsif param.is_a?(NoKeywordsParameterNode)
286
+ when NoKeywordsParameterNode, KeywordRestParameterNode, ForwardingParameterNode
255
287
  # Invalid syntax, e.g. "def f(**nil, ...)" moves the NoKeywordsParameterNode to posts
256
288
  raise "Invalid syntax"
257
289
  else
@@ -428,4 +460,49 @@ module Prism
428
460
  binary_operator_loc
429
461
  end
430
462
  end
463
+
464
+ class CaseMatchNode < Node
465
+ # Returns the else clause of the case match node. This method is deprecated
466
+ # in favor of #else_clause.
467
+ def consequent
468
+ deprecated("else_clause")
469
+ else_clause
470
+ end
471
+ end
472
+
473
+ class CaseNode < Node
474
+ # Returns the else clause of the case node. This method is deprecated in
475
+ # favor of #else_clause.
476
+ def consequent
477
+ deprecated("else_clause")
478
+ else_clause
479
+ end
480
+ end
481
+
482
+ class IfNode < Node
483
+ # Returns the subsequent if/elsif/else clause of the if node. This method is
484
+ # deprecated in favor of #subsequent.
485
+ def consequent
486
+ deprecated("subsequent")
487
+ subsequent
488
+ end
489
+ end
490
+
491
+ class RescueNode < Node
492
+ # Returns the subsequent rescue clause of the rescue node. This method is
493
+ # deprecated in favor of #subsequent.
494
+ def consequent
495
+ deprecated("subsequent")
496
+ subsequent
497
+ end
498
+ end
499
+
500
+ class UnlessNode < Node
501
+ # Returns the else clause of the unless node. This method is deprecated in
502
+ # favor of #else_clause.
503
+ def consequent
504
+ deprecated("else_clause")
505
+ else_clause
506
+ end
507
+ end
431
508
  end
@@ -183,12 +183,5 @@ module Prism
183
183
  [preceding, NodeTarget.new(node), following]
184
184
  end
185
185
  end
186
-
187
- private_constant :Comments
188
-
189
- # Attach the list of comments to their respective locations in the tree.
190
- def attach_comments!
191
- Comments.new(self).attach! # steep:ignore
192
- end
193
186
  end
194
187
  end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "stringio"
4
+
5
+ module Prism
6
+ class ParseResult < Result
7
+ # An object to represent the set of errors on a parse result. This object
8
+ # can be used to format the errors in a human-readable way.
9
+ class Errors
10
+ # The parse result that contains the errors.
11
+ attr_reader :parse_result
12
+
13
+ # Initialize a new set of errors from the given parse result.
14
+ def initialize(parse_result)
15
+ @parse_result = parse_result
16
+ end
17
+
18
+ # Formats the errors in a human-readable way and return them as a string.
19
+ def format
20
+ error_lines = {} #: Hash[Integer, Array[ParseError]]
21
+ parse_result.errors.each do |error|
22
+ location = error.location
23
+ (location.start_line..location.end_line).each do |line|
24
+ error_lines[line] ||= []
25
+ error_lines[line] << error
26
+ end
27
+ end
28
+
29
+ source_lines = parse_result.source.source.lines
30
+ source_lines << "" if error_lines.key?(source_lines.size + 1)
31
+
32
+ io = StringIO.new
33
+ source_lines.each.with_index(1) do |line, line_number|
34
+ io.puts(line)
35
+
36
+ (error_lines.delete(line_number) || []).each do |error|
37
+ location = error.location
38
+
39
+ case line_number
40
+ when location.start_line
41
+ io.print(" " * location.start_column + "^")
42
+
43
+ if location.start_line == location.end_line
44
+ if location.start_column != location.end_column
45
+ io.print("~" * (location.end_column - location.start_column - 1))
46
+ end
47
+
48
+ io.puts(" " + error.message)
49
+ else
50
+ io.puts("~" * (line.bytesize - location.start_column))
51
+ end
52
+ when location.end_line
53
+ io.puts("~" * location.end_column + " " + error.message)
54
+ else
55
+ io.puts("~" * line.bytesize)
56
+ end
57
+ end
58
+ end
59
+
60
+ io.puts
61
+ io.string
62
+ end
63
+ end
64
+ end
65
+ end
@@ -17,21 +17,27 @@ module Prism
17
17
  # Note that the logic in this file should be kept in sync with the Java
18
18
  # MarkNewlinesVisitor, since that visitor is responsible for marking the
19
19
  # newlines for JRuby/TruffleRuby.
20
+ #
21
+ # This file is autoloaded only when `mark_newlines!` is called, so the
22
+ # re-opening of the various nodes in this file will only be performed in
23
+ # that case. We do that to avoid storing the extra `@newline` instance
24
+ # variable on every node if we don't need it.
20
25
  class Newlines < Visitor
21
26
  # Create a new Newlines visitor with the given newline offsets.
22
- def initialize(newline_marked)
23
- @newline_marked = newline_marked
27
+ def initialize(lines)
28
+ # @type var lines: Integer
29
+ @lines = Array.new(1 + lines, false)
24
30
  end
25
31
 
26
32
  # Permit block/lambda nodes to mark newlines within themselves.
27
33
  def visit_block_node(node)
28
- old_newline_marked = @newline_marked
29
- @newline_marked = Array.new(old_newline_marked.size, false)
34
+ old_lines = @lines
35
+ @lines = Array.new(old_lines.size, false)
30
36
 
31
37
  begin
32
38
  super(node)
33
39
  ensure
34
- @newline_marked = old_newline_marked
40
+ @lines = old_lines
35
41
  end
36
42
  end
37
43
 
@@ -39,7 +45,7 @@ module Prism
39
45
 
40
46
  # Mark if/unless nodes as newlines.
41
47
  def visit_if_node(node)
42
- node.set_newline_flag(@newline_marked)
48
+ node.newline_flag!(@lines)
43
49
  super(node)
44
50
  end
45
51
 
@@ -48,17 +54,101 @@ module Prism
48
54
  # Permit statements lists to mark newlines within themselves.
49
55
  def visit_statements_node(node)
50
56
  node.body.each do |child|
51
- child.set_newline_flag(@newline_marked)
57
+ child.newline_flag!(@lines)
52
58
  end
53
59
  super(node)
54
60
  end
55
61
  end
62
+ end
63
+
64
+ class Node
65
+ def newline_flag? # :nodoc:
66
+ @newline_flag ? true : false
67
+ end
68
+
69
+ def newline_flag!(lines) # :nodoc:
70
+ line = location.start_line
71
+ unless lines[line]
72
+ lines[line] = true
73
+ @newline_flag = true
74
+ end
75
+ end
76
+ end
77
+
78
+ class BeginNode < Node
79
+ def newline_flag!(lines) # :nodoc:
80
+ # Never mark BeginNode with a newline flag, mark children instead.
81
+ end
82
+ end
83
+
84
+ class ParenthesesNode < Node
85
+ def newline_flag!(lines) # :nodoc:
86
+ # Never mark ParenthesesNode with a newline flag, mark children instead.
87
+ end
88
+ end
89
+
90
+ class IfNode < Node
91
+ def newline_flag!(lines) # :nodoc:
92
+ predicate.newline_flag!(lines)
93
+ end
94
+ end
95
+
96
+ class UnlessNode < Node
97
+ def newline_flag!(lines) # :nodoc:
98
+ predicate.newline_flag!(lines)
99
+ end
100
+ end
101
+
102
+ class UntilNode < Node
103
+ def newline_flag!(lines) # :nodoc:
104
+ predicate.newline_flag!(lines)
105
+ end
106
+ end
107
+
108
+ class WhileNode < Node
109
+ def newline_flag!(lines) # :nodoc:
110
+ predicate.newline_flag!(lines)
111
+ end
112
+ end
113
+
114
+ class RescueModifierNode < Node
115
+ def newline_flag!(lines) # :nodoc:
116
+ expression.newline_flag!(lines)
117
+ end
118
+ end
119
+
120
+ class InterpolatedMatchLastLineNode < Node
121
+ def newline_flag!(lines) # :nodoc:
122
+ first = parts.first
123
+ first.newline_flag!(lines) if first
124
+ end
125
+ end
126
+
127
+ class InterpolatedRegularExpressionNode < Node
128
+ def newline_flag!(lines) # :nodoc:
129
+ first = parts.first
130
+ first.newline_flag!(lines) if first
131
+ end
132
+ end
133
+
134
+ class InterpolatedStringNode < Node
135
+ def newline_flag!(lines) # :nodoc:
136
+ first = parts.first
137
+ first.newline_flag!(lines) if first
138
+ end
139
+ end
56
140
 
57
- private_constant :Newlines
141
+ class InterpolatedSymbolNode < Node
142
+ def newline_flag!(lines) # :nodoc:
143
+ first = parts.first
144
+ first.newline_flag!(lines) if first
145
+ end
146
+ end
58
147
 
59
- # Walk the tree and mark nodes that are on a new line.
60
- def mark_newlines!
61
- value.accept(Newlines.new(Array.new(1 + source.offsets.size, false))) # steep:ignore
148
+ class InterpolatedXStringNode < Node
149
+ def newline_flag!(lines) # :nodoc:
150
+ first = parts.first
151
+ first.newline_flag!(lines) if first
62
152
  end
63
153
  end
64
154
  end
@@ -10,7 +10,26 @@ module Prism
10
10
  # specialized and more performant `ASCIISource` if no multibyte characters
11
11
  # are present in the source code.
12
12
  def self.for(source, start_line = 1, offsets = [])
13
- source.ascii_only? ? ASCIISource.new(source, start_line, offsets): new(source, start_line, offsets)
13
+ if source.ascii_only?
14
+ ASCIISource.new(source, start_line, offsets)
15
+ elsif source.encoding == Encoding::BINARY
16
+ source.force_encoding(Encoding::UTF_8)
17
+
18
+ if source.valid_encoding?
19
+ new(source, start_line, offsets)
20
+ else
21
+ # This is an extremely niche use case where the file is marked as
22
+ # binary, contains multi-byte characters, and those characters are not
23
+ # valid UTF-8. In this case we'll mark it as binary and fall back to
24
+ # treating everything as a single-byte character. This _may_ cause
25
+ # problems when asking for code units, but it appears to be the
26
+ # cleanest solution at the moment.
27
+ source.force_encoding(Encoding::BINARY)
28
+ ASCIISource.new(source, start_line, offsets)
29
+ end
30
+ else
31
+ new(source, start_line, offsets)
32
+ end
14
33
  end
15
34
 
16
35
  # The source code that this source object represents.
@@ -85,9 +104,26 @@ module Prism
85
104
  # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
86
105
  # concept of code units that differs from the number of characters in other
87
106
  # encodings, it is not captured here.
107
+ #
108
+ # We purposefully replace invalid and undefined characters with replacement
109
+ # characters in this conversion. This happens for two reasons. First, it's
110
+ # possible that the given byte offset will not occur on a character
111
+ # boundary. Second, it's possible that the source code will contain a
112
+ # character that has no equivalent in the given encoding.
88
113
  def code_units_offset(byte_offset, encoding)
89
- byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding)
90
- (encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length
114
+ byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
115
+
116
+ if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
117
+ byteslice.bytesize / 2
118
+ else
119
+ byteslice.length
120
+ end
121
+ end
122
+
123
+ # Generate a cache that targets a specific encoding for calculating code
124
+ # unit offsets.
125
+ def code_units_cache(encoding)
126
+ CodeUnitsCache.new(source, encoding)
91
127
  end
92
128
 
93
129
  # Returns the column number in code units for the given encoding for the
@@ -119,10 +155,84 @@ module Prism
119
155
  end
120
156
  end
121
157
 
158
+ # A cache that can be used to quickly compute code unit offsets from byte
159
+ # offsets. It purposefully provides only a single #[] method to access the
160
+ # cache in order to minimize surface area.
161
+ #
162
+ # Note that there are some known issues here that may or may not be addressed
163
+ # in the future:
164
+ #
165
+ # * The first is that there are issues when the cache computes values that are
166
+ # not on character boundaries. This can result in subsequent computations
167
+ # being off by one or more code units.
168
+ # * The second is that this cache is currently unbounded. In theory we could
169
+ # introduce some kind of LRU cache to limit the number of entries, but this
170
+ # has not yet been implemented.
171
+ #
172
+ class CodeUnitsCache
173
+ class UTF16Counter # :nodoc:
174
+ def initialize(source, encoding)
175
+ @source = source
176
+ @encoding = encoding
177
+ end
178
+
179
+ def count(byte_offset, byte_length)
180
+ @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
181
+ end
182
+ end
183
+
184
+ class LengthCounter # :nodoc:
185
+ def initialize(source, encoding)
186
+ @source = source
187
+ @encoding = encoding
188
+ end
189
+
190
+ def count(byte_offset, byte_length)
191
+ @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).length
192
+ end
193
+ end
194
+
195
+ private_constant :UTF16Counter, :LengthCounter
196
+
197
+ # Initialize a new cache with the given source and encoding.
198
+ def initialize(source, encoding)
199
+ @source = source
200
+ @counter =
201
+ if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
202
+ UTF16Counter.new(source, encoding)
203
+ else
204
+ LengthCounter.new(source, encoding)
205
+ end
206
+
207
+ @cache = {} #: Hash[Integer, Integer]
208
+ @offsets = [] #: Array[Integer]
209
+ end
210
+
211
+ # Retrieve the code units offset from the given byte offset.
212
+ def [](byte_offset)
213
+ @cache[byte_offset] ||=
214
+ if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil?
215
+ @offsets << byte_offset
216
+ @counter.count(0, byte_offset)
217
+ elsif index == 0
218
+ @offsets.unshift(byte_offset)
219
+ @counter.count(0, byte_offset)
220
+ else
221
+ @offsets.insert(index, byte_offset)
222
+ offset = @offsets[index - 1]
223
+ @cache[offset] + @counter.count(offset, byte_offset - offset)
224
+ end
225
+ end
226
+ end
227
+
122
228
  # Specialized version of Prism::Source for source code that includes ASCII
123
229
  # characters only. This class is used to apply performance optimizations that
124
- # cannot be applied to sources that include multibyte characters. Sources that
125
- # include multibyte characters are represented by the Prism::Source class.
230
+ # cannot be applied to sources that include multibyte characters.
231
+ #
232
+ # In the extremely rare case that a source includes multi-byte characters but
233
+ # is marked as binary because of a magic encoding comment and it cannot be
234
+ # eagerly converted to UTF-8, this class will be used as well. This is because
235
+ # at that point we will treat everything as single-byte characters.
126
236
  class ASCIISource < Source
127
237
  # Return the character offset for the given byte offset.
128
238
  def character_offset(byte_offset)
@@ -144,9 +254,16 @@ module Prism
144
254
  byte_offset
145
255
  end
146
256
 
257
+ # Returns a cache that is the identity function in order to maintain the
258
+ # same interface. We can do this because code units are always equivalent to
259
+ # byte offsets for ASCII-only sources.
260
+ def code_units_cache(encoding)
261
+ ->(byte_offset) { byte_offset }
262
+ end
263
+
147
264
  # Specialized version of `code_units_column` that does not depend on
148
265
  # `code_units_offset`, which is a more expensive operation. This is
149
- # essentialy the same as `Prism::Source#column`.
266
+ # essentially the same as `Prism::Source#column`.
150
267
  def code_units_column(byte_offset, encoding)
151
268
  byte_offset - line_start(byte_offset)
152
269
  end
@@ -253,6 +370,12 @@ module Prism
253
370
  source.code_units_offset(start_offset, encoding)
254
371
  end
255
372
 
373
+ # The start offset from the start of the file in code units using the given
374
+ # cache to fetch or calculate the value.
375
+ def cached_start_code_units_offset(cache)
376
+ cache[start_offset]
377
+ end
378
+
256
379
  # The byte offset from the beginning of the source where this location ends.
257
380
  def end_offset
258
381
  start_offset + length
@@ -269,6 +392,12 @@ module Prism
269
392
  source.code_units_offset(end_offset, encoding)
270
393
  end
271
394
 
395
+ # The end offset from the start of the file in code units using the given
396
+ # cache to fetch or calculate the value.
397
+ def cached_end_code_units_offset(cache)
398
+ cache[end_offset]
399
+ end
400
+
272
401
  # The line number where this location starts.
273
402
  def start_line
274
403
  source.line(start_offset)
@@ -303,6 +432,12 @@ module Prism
303
432
  source.code_units_column(start_offset, encoding)
304
433
  end
305
434
 
435
+ # The start column in code units using the given cache to fetch or calculate
436
+ # the value.
437
+ def cached_start_code_units_column(cache)
438
+ cache[start_offset] - cache[source.line_start(start_offset)]
439
+ end
440
+
306
441
  # The column number in bytes where this location ends from the start of the
307
442
  # line.
308
443
  def end_column
@@ -321,6 +456,12 @@ module Prism
321
456
  source.code_units_column(end_offset, encoding)
322
457
  end
323
458
 
459
+ # The end column in code units using the given cache to fetch or calculate
460
+ # the value.
461
+ def cached_end_code_units_column(cache)
462
+ cache[end_offset] - cache[source.line_start(end_offset)]
463
+ end
464
+
324
465
  # Implement the hash pattern matching interface for Location.
325
466
  def deconstruct_keys(keys)
326
467
  { start_offset: start_offset, end_offset: end_offset }
@@ -570,10 +711,23 @@ module Prism
570
711
  def failure?
571
712
  !success?
572
713
  end
714
+
715
+ # Create a code units cache for the given encoding.
716
+ def code_units_cache(encoding)
717
+ source.code_units_cache(encoding)
718
+ end
573
719
  end
574
720
 
575
721
  # This is a result specific to the `parse` and `parse_file` methods.
576
722
  class ParseResult < Result
723
+ autoload :Comments, "prism/parse_result/comments"
724
+ autoload :Errors, "prism/parse_result/errors"
725
+ autoload :Newlines, "prism/parse_result/newlines"
726
+
727
+ private_constant :Comments
728
+ private_constant :Errors
729
+ private_constant :Newlines
730
+
577
731
  # The syntax tree that was parsed from the source code.
578
732
  attr_reader :value
579
733
 
@@ -587,6 +741,23 @@ module Prism
587
741
  def deconstruct_keys(keys)
588
742
  super.merge!(value: value)
589
743
  end
744
+
745
+ # Attach the list of comments to their respective locations in the tree.
746
+ def attach_comments!
747
+ Comments.new(self).attach! # steep:ignore
748
+ end
749
+
750
+ # Walk the tree and mark nodes that are on a new line, loosely emulating
751
+ # the behavior of CRuby's `:line` tracepoint event.
752
+ def mark_newlines!
753
+ value.accept(Newlines.new(source.offsets.size)) # steep:ignore
754
+ end
755
+
756
+ # Returns a string representation of the syntax tree with the errors
757
+ # displayed inline.
758
+ def errors_format
759
+ Errors.new(self).format
760
+ end
590
761
  end
591
762
 
592
763
  # This is a result specific to the `lex` and `lex_file` methods.
@@ -677,5 +848,11 @@ module Prism
677
848
  other.type == type &&
678
849
  other.value == value
679
850
  end
851
+
852
+ # Returns a string representation of this token.
853
+ def inspect
854
+ location
855
+ super
856
+ end
680
857
  end
681
858
  end