prism 0.29.0 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +115 -1
- data/CONTRIBUTING.md +0 -4
- data/Makefile +1 -1
- data/README.md +4 -0
- data/config.yml +920 -148
- data/docs/build_system.md +8 -11
- data/docs/fuzzing.md +1 -1
- data/docs/parsing_rules.md +4 -1
- data/docs/relocation.md +34 -0
- data/docs/ripper_translation.md +22 -0
- data/docs/serialization.md +3 -0
- data/ext/prism/api_node.c +2863 -2079
- data/ext/prism/extconf.rb +14 -37
- data/ext/prism/extension.c +241 -391
- data/ext/prism/extension.h +2 -2
- data/include/prism/ast.h +2156 -453
- data/include/prism/defines.h +58 -7
- data/include/prism/diagnostic.h +24 -6
- data/include/prism/node.h +0 -21
- data/include/prism/options.h +94 -3
- data/include/prism/parser.h +82 -40
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +3 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +0 -8
- data/include/prism/util/pm_integer.h +22 -15
- data/include/prism/util/pm_newline_list.h +11 -0
- data/include/prism/util/pm_string.h +28 -12
- data/include/prism/version.h +3 -3
- data/include/prism.h +47 -11
- data/lib/prism/compiler.rb +3 -0
- data/lib/prism/desugar_compiler.rb +111 -74
- data/lib/prism/dispatcher.rb +16 -1
- data/lib/prism/dot_visitor.rb +55 -34
- data/lib/prism/dsl.rb +660 -468
- data/lib/prism/ffi.rb +113 -8
- data/lib/prism/inspect_visitor.rb +296 -64
- data/lib/prism/lex_compat.rb +1 -1
- data/lib/prism/mutation_compiler.rb +11 -6
- data/lib/prism/node.rb +4262 -5023
- data/lib/prism/node_ext.rb +91 -14
- data/lib/prism/parse_result/comments.rb +0 -7
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +101 -11
- data/lib/prism/parse_result.rb +183 -6
- data/lib/prism/reflection.rb +12 -10
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +496 -609
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/compiler.rb +185 -155
- data/lib/prism/translation/parser/lexer.rb +26 -4
- data/lib/prism/translation/parser.rb +9 -4
- data/lib/prism/translation/ripper.rb +23 -25
- data/lib/prism/translation/ruby_parser.rb +86 -17
- data/lib/prism/visitor.rb +3 -0
- data/lib/prism.rb +6 -8
- data/prism.gemspec +9 -5
- data/rbi/prism/dsl.rbi +521 -0
- data/rbi/prism/node.rbi +1115 -1120
- data/rbi/prism/parse_result.rbi +29 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism/visitor.rbi +3 -0
- data/rbi/prism.rbi +36 -30
- data/sig/prism/dsl.rbs +190 -303
- data/sig/prism/mutation_compiler.rbs +1 -0
- data/sig/prism/node.rbs +678 -632
- data/sig/prism/parse_result.rbs +22 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/string_query.rbs +11 -0
- data/sig/prism/visitor.rbs +1 -0
- data/sig/prism.rbs +103 -64
- data/src/diagnostic.c +64 -28
- data/src/node.c +502 -1739
- data/src/options.c +76 -27
- data/src/prettyprint.c +188 -112
- data/src/prism.c +3376 -2293
- data/src/regexp.c +208 -71
- data/src/serialize.c +182 -50
- data/src/static_literals.c +64 -85
- data/src/token_type.c +4 -4
- data/src/util/pm_char.c +1 -1
- data/src/util/pm_constant_pool.c +0 -8
- data/src/util/pm_integer.c +53 -25
- data/src/util/pm_newline_list.c +29 -0
- data/src/util/pm_string.c +131 -80
- data/src/util/pm_strpbrk.c +32 -6
- metadata +11 -7
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -249
- data/lib/prism/translation/parser/rubocop.rb +0 -73
- data/src/util/pm_string_list.c +0 -28
data/lib/prism/node_ext.rb
CHANGED
@@ -5,10 +5,13 @@
|
|
5
5
|
module Prism
|
6
6
|
class Node
|
7
7
|
def deprecated(*replacements) # :nodoc:
|
8
|
+
location = caller_locations(1, 1)
|
9
|
+
location = location[0].label if location
|
8
10
|
suggest = replacements.map { |replacement| "#{self.class}##{replacement}" }
|
11
|
+
|
9
12
|
warn(<<~MSG, category: :deprecated)
|
10
|
-
[deprecation]: #{self.class}##{
|
11
|
-
|
13
|
+
[deprecation]: #{self.class}##{location} is deprecated and will be \
|
14
|
+
removed in the next major version. Use #{suggest.join("/")} instead.
|
12
15
|
#{(caller(1, 3) || []).join("\n")}
|
13
16
|
MSG
|
14
17
|
end
|
@@ -18,7 +21,10 @@ module Prism
|
|
18
21
|
# Returns a numeric value that represents the flags that were used to create
|
19
22
|
# the regular expression.
|
20
23
|
def options
|
21
|
-
o =
|
24
|
+
o = 0
|
25
|
+
o |= Regexp::IGNORECASE if flags.anybits?(RegularExpressionFlags::IGNORE_CASE)
|
26
|
+
o |= Regexp::EXTENDED if flags.anybits?(RegularExpressionFlags::EXTENDED)
|
27
|
+
o |= Regexp::MULTILINE if flags.anybits?(RegularExpressionFlags::MULTI_LINE)
|
22
28
|
o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8)
|
23
29
|
o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT)
|
24
30
|
o
|
@@ -66,11 +72,12 @@ module Prism
|
|
66
72
|
def to_interpolated
|
67
73
|
InterpolatedStringNode.new(
|
68
74
|
source,
|
75
|
+
-1,
|
76
|
+
location,
|
69
77
|
frozen? ? InterpolatedStringNodeFlags::FROZEN : 0,
|
70
78
|
opening_loc,
|
71
|
-
[copy(
|
72
|
-
closing_loc
|
73
|
-
location
|
79
|
+
[copy(location: content_loc, opening_loc: nil, closing_loc: nil)],
|
80
|
+
closing_loc
|
74
81
|
)
|
75
82
|
end
|
76
83
|
end
|
@@ -83,10 +90,12 @@ module Prism
|
|
83
90
|
def to_interpolated
|
84
91
|
InterpolatedXStringNode.new(
|
85
92
|
source,
|
93
|
+
-1,
|
94
|
+
location,
|
95
|
+
flags,
|
86
96
|
opening_loc,
|
87
|
-
[StringNode.new(source, 0, nil, content_loc, nil, unescaped
|
88
|
-
closing_loc
|
89
|
-
location
|
97
|
+
[StringNode.new(source, node_id, content_loc, 0, nil, content_loc, nil, unescaped)],
|
98
|
+
closing_loc
|
90
99
|
)
|
91
100
|
end
|
92
101
|
end
|
@@ -103,7 +112,19 @@ module Prism
|
|
103
112
|
class RationalNode < Node
|
104
113
|
# Returns the value of the node as a Ruby Rational.
|
105
114
|
def value
|
106
|
-
Rational(
|
115
|
+
Rational(numerator, denominator)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Returns the value of the node as an IntegerNode or a FloatNode. This
|
119
|
+
# method is deprecated in favor of #value or #numerator/#denominator.
|
120
|
+
def numeric
|
121
|
+
deprecated("value", "numerator", "denominator")
|
122
|
+
|
123
|
+
if denominator == 1
|
124
|
+
IntegerNode.new(source, -1, location.chop, flags, numerator)
|
125
|
+
else
|
126
|
+
FloatNode.new(source, -1, location.chop, 0, numerator.to_f / denominator)
|
127
|
+
end
|
107
128
|
end
|
108
129
|
end
|
109
130
|
|
@@ -180,7 +201,12 @@ module Prism
|
|
180
201
|
# continue to supply that API.
|
181
202
|
def child
|
182
203
|
deprecated("name", "name_loc")
|
183
|
-
|
204
|
+
|
205
|
+
if name
|
206
|
+
ConstantReadNode.new(source, -1, name_loc, 0, name)
|
207
|
+
else
|
208
|
+
MissingNode.new(source, -1, location, 0)
|
209
|
+
end
|
184
210
|
end
|
185
211
|
end
|
186
212
|
|
@@ -216,7 +242,12 @@ module Prism
|
|
216
242
|
# continue to supply that API.
|
217
243
|
def child
|
218
244
|
deprecated("name", "name_loc")
|
219
|
-
|
245
|
+
|
246
|
+
if name
|
247
|
+
ConstantReadNode.new(source, -1, name_loc, 0, name)
|
248
|
+
else
|
249
|
+
MissingNode.new(source, -1, location, 0)
|
250
|
+
end
|
220
251
|
end
|
221
252
|
end
|
222
253
|
|
@@ -249,9 +280,10 @@ module Prism
|
|
249
280
|
end
|
250
281
|
|
251
282
|
posts.each do |param|
|
252
|
-
|
283
|
+
case param
|
284
|
+
when MultiTargetNode
|
253
285
|
names << [:req]
|
254
|
-
|
286
|
+
when NoKeywordsParameterNode, KeywordRestParameterNode, ForwardingParameterNode
|
255
287
|
# Invalid syntax, e.g. "def f(**nil, ...)" moves the NoKeywordsParameterNode to posts
|
256
288
|
raise "Invalid syntax"
|
257
289
|
else
|
@@ -428,4 +460,49 @@ module Prism
|
|
428
460
|
binary_operator_loc
|
429
461
|
end
|
430
462
|
end
|
463
|
+
|
464
|
+
class CaseMatchNode < Node
|
465
|
+
# Returns the else clause of the case match node. This method is deprecated
|
466
|
+
# in favor of #else_clause.
|
467
|
+
def consequent
|
468
|
+
deprecated("else_clause")
|
469
|
+
else_clause
|
470
|
+
end
|
471
|
+
end
|
472
|
+
|
473
|
+
class CaseNode < Node
|
474
|
+
# Returns the else clause of the case node. This method is deprecated in
|
475
|
+
# favor of #else_clause.
|
476
|
+
def consequent
|
477
|
+
deprecated("else_clause")
|
478
|
+
else_clause
|
479
|
+
end
|
480
|
+
end
|
481
|
+
|
482
|
+
class IfNode < Node
|
483
|
+
# Returns the subsequent if/elsif/else clause of the if node. This method is
|
484
|
+
# deprecated in favor of #subsequent.
|
485
|
+
def consequent
|
486
|
+
deprecated("subsequent")
|
487
|
+
subsequent
|
488
|
+
end
|
489
|
+
end
|
490
|
+
|
491
|
+
class RescueNode < Node
|
492
|
+
# Returns the subsequent rescue clause of the rescue node. This method is
|
493
|
+
# deprecated in favor of #subsequent.
|
494
|
+
def consequent
|
495
|
+
deprecated("subsequent")
|
496
|
+
subsequent
|
497
|
+
end
|
498
|
+
end
|
499
|
+
|
500
|
+
class UnlessNode < Node
|
501
|
+
# Returns the else clause of the unless node. This method is deprecated in
|
502
|
+
# favor of #else_clause.
|
503
|
+
def consequent
|
504
|
+
deprecated("else_clause")
|
505
|
+
else_clause
|
506
|
+
end
|
507
|
+
end
|
431
508
|
end
|
@@ -183,12 +183,5 @@ module Prism
|
|
183
183
|
[preceding, NodeTarget.new(node), following]
|
184
184
|
end
|
185
185
|
end
|
186
|
-
|
187
|
-
private_constant :Comments
|
188
|
-
|
189
|
-
# Attach the list of comments to their respective locations in the tree.
|
190
|
-
def attach_comments!
|
191
|
-
Comments.new(self).attach! # steep:ignore
|
192
|
-
end
|
193
186
|
end
|
194
187
|
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "stringio"
|
4
|
+
|
5
|
+
module Prism
|
6
|
+
class ParseResult < Result
|
7
|
+
# An object to represent the set of errors on a parse result. This object
|
8
|
+
# can be used to format the errors in a human-readable way.
|
9
|
+
class Errors
|
10
|
+
# The parse result that contains the errors.
|
11
|
+
attr_reader :parse_result
|
12
|
+
|
13
|
+
# Initialize a new set of errors from the given parse result.
|
14
|
+
def initialize(parse_result)
|
15
|
+
@parse_result = parse_result
|
16
|
+
end
|
17
|
+
|
18
|
+
# Formats the errors in a human-readable way and return them as a string.
|
19
|
+
def format
|
20
|
+
error_lines = {} #: Hash[Integer, Array[ParseError]]
|
21
|
+
parse_result.errors.each do |error|
|
22
|
+
location = error.location
|
23
|
+
(location.start_line..location.end_line).each do |line|
|
24
|
+
error_lines[line] ||= []
|
25
|
+
error_lines[line] << error
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
source_lines = parse_result.source.source.lines
|
30
|
+
source_lines << "" if error_lines.key?(source_lines.size + 1)
|
31
|
+
|
32
|
+
io = StringIO.new
|
33
|
+
source_lines.each.with_index(1) do |line, line_number|
|
34
|
+
io.puts(line)
|
35
|
+
|
36
|
+
(error_lines.delete(line_number) || []).each do |error|
|
37
|
+
location = error.location
|
38
|
+
|
39
|
+
case line_number
|
40
|
+
when location.start_line
|
41
|
+
io.print(" " * location.start_column + "^")
|
42
|
+
|
43
|
+
if location.start_line == location.end_line
|
44
|
+
if location.start_column != location.end_column
|
45
|
+
io.print("~" * (location.end_column - location.start_column - 1))
|
46
|
+
end
|
47
|
+
|
48
|
+
io.puts(" " + error.message)
|
49
|
+
else
|
50
|
+
io.puts("~" * (line.bytesize - location.start_column))
|
51
|
+
end
|
52
|
+
when location.end_line
|
53
|
+
io.puts("~" * location.end_column + " " + error.message)
|
54
|
+
else
|
55
|
+
io.puts("~" * line.bytesize)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
io.puts
|
61
|
+
io.string
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -17,21 +17,27 @@ module Prism
|
|
17
17
|
# Note that the logic in this file should be kept in sync with the Java
|
18
18
|
# MarkNewlinesVisitor, since that visitor is responsible for marking the
|
19
19
|
# newlines for JRuby/TruffleRuby.
|
20
|
+
#
|
21
|
+
# This file is autoloaded only when `mark_newlines!` is called, so the
|
22
|
+
# re-opening of the various nodes in this file will only be performed in
|
23
|
+
# that case. We do that to avoid storing the extra `@newline` instance
|
24
|
+
# variable on every node if we don't need it.
|
20
25
|
class Newlines < Visitor
|
21
26
|
# Create a new Newlines visitor with the given newline offsets.
|
22
|
-
def initialize(
|
23
|
-
@
|
27
|
+
def initialize(lines)
|
28
|
+
# @type var lines: Integer
|
29
|
+
@lines = Array.new(1 + lines, false)
|
24
30
|
end
|
25
31
|
|
26
32
|
# Permit block/lambda nodes to mark newlines within themselves.
|
27
33
|
def visit_block_node(node)
|
28
|
-
|
29
|
-
@
|
34
|
+
old_lines = @lines
|
35
|
+
@lines = Array.new(old_lines.size, false)
|
30
36
|
|
31
37
|
begin
|
32
38
|
super(node)
|
33
39
|
ensure
|
34
|
-
@
|
40
|
+
@lines = old_lines
|
35
41
|
end
|
36
42
|
end
|
37
43
|
|
@@ -39,7 +45,7 @@ module Prism
|
|
39
45
|
|
40
46
|
# Mark if/unless nodes as newlines.
|
41
47
|
def visit_if_node(node)
|
42
|
-
node.
|
48
|
+
node.newline_flag!(@lines)
|
43
49
|
super(node)
|
44
50
|
end
|
45
51
|
|
@@ -48,17 +54,101 @@ module Prism
|
|
48
54
|
# Permit statements lists to mark newlines within themselves.
|
49
55
|
def visit_statements_node(node)
|
50
56
|
node.body.each do |child|
|
51
|
-
child.
|
57
|
+
child.newline_flag!(@lines)
|
52
58
|
end
|
53
59
|
super(node)
|
54
60
|
end
|
55
61
|
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class Node
|
65
|
+
def newline_flag? # :nodoc:
|
66
|
+
@newline_flag ? true : false
|
67
|
+
end
|
68
|
+
|
69
|
+
def newline_flag!(lines) # :nodoc:
|
70
|
+
line = location.start_line
|
71
|
+
unless lines[line]
|
72
|
+
lines[line] = true
|
73
|
+
@newline_flag = true
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
class BeginNode < Node
|
79
|
+
def newline_flag!(lines) # :nodoc:
|
80
|
+
# Never mark BeginNode with a newline flag, mark children instead.
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class ParenthesesNode < Node
|
85
|
+
def newline_flag!(lines) # :nodoc:
|
86
|
+
# Never mark ParenthesesNode with a newline flag, mark children instead.
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
class IfNode < Node
|
91
|
+
def newline_flag!(lines) # :nodoc:
|
92
|
+
predicate.newline_flag!(lines)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class UnlessNode < Node
|
97
|
+
def newline_flag!(lines) # :nodoc:
|
98
|
+
predicate.newline_flag!(lines)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class UntilNode < Node
|
103
|
+
def newline_flag!(lines) # :nodoc:
|
104
|
+
predicate.newline_flag!(lines)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
class WhileNode < Node
|
109
|
+
def newline_flag!(lines) # :nodoc:
|
110
|
+
predicate.newline_flag!(lines)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
class RescueModifierNode < Node
|
115
|
+
def newline_flag!(lines) # :nodoc:
|
116
|
+
expression.newline_flag!(lines)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
class InterpolatedMatchLastLineNode < Node
|
121
|
+
def newline_flag!(lines) # :nodoc:
|
122
|
+
first = parts.first
|
123
|
+
first.newline_flag!(lines) if first
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
class InterpolatedRegularExpressionNode < Node
|
128
|
+
def newline_flag!(lines) # :nodoc:
|
129
|
+
first = parts.first
|
130
|
+
first.newline_flag!(lines) if first
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
class InterpolatedStringNode < Node
|
135
|
+
def newline_flag!(lines) # :nodoc:
|
136
|
+
first = parts.first
|
137
|
+
first.newline_flag!(lines) if first
|
138
|
+
end
|
139
|
+
end
|
56
140
|
|
57
|
-
|
141
|
+
class InterpolatedSymbolNode < Node
|
142
|
+
def newline_flag!(lines) # :nodoc:
|
143
|
+
first = parts.first
|
144
|
+
first.newline_flag!(lines) if first
|
145
|
+
end
|
146
|
+
end
|
58
147
|
|
59
|
-
|
60
|
-
def
|
61
|
-
|
148
|
+
class InterpolatedXStringNode < Node
|
149
|
+
def newline_flag!(lines) # :nodoc:
|
150
|
+
first = parts.first
|
151
|
+
first.newline_flag!(lines) if first
|
62
152
|
end
|
63
153
|
end
|
64
154
|
end
|
data/lib/prism/parse_result.rb
CHANGED
@@ -10,7 +10,26 @@ module Prism
|
|
10
10
|
# specialized and more performant `ASCIISource` if no multibyte characters
|
11
11
|
# are present in the source code.
|
12
12
|
def self.for(source, start_line = 1, offsets = [])
|
13
|
-
source.ascii_only?
|
13
|
+
if source.ascii_only?
|
14
|
+
ASCIISource.new(source, start_line, offsets)
|
15
|
+
elsif source.encoding == Encoding::BINARY
|
16
|
+
source.force_encoding(Encoding::UTF_8)
|
17
|
+
|
18
|
+
if source.valid_encoding?
|
19
|
+
new(source, start_line, offsets)
|
20
|
+
else
|
21
|
+
# This is an extremely niche use case where the file is marked as
|
22
|
+
# binary, contains multi-byte characters, and those characters are not
|
23
|
+
# valid UTF-8. In this case we'll mark it as binary and fall back to
|
24
|
+
# treating everything as a single-byte character. This _may_ cause
|
25
|
+
# problems when asking for code units, but it appears to be the
|
26
|
+
# cleanest solution at the moment.
|
27
|
+
source.force_encoding(Encoding::BINARY)
|
28
|
+
ASCIISource.new(source, start_line, offsets)
|
29
|
+
end
|
30
|
+
else
|
31
|
+
new(source, start_line, offsets)
|
32
|
+
end
|
14
33
|
end
|
15
34
|
|
16
35
|
# The source code that this source object represents.
|
@@ -85,9 +104,26 @@ module Prism
|
|
85
104
|
# This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
|
86
105
|
# concept of code units that differs from the number of characters in other
|
87
106
|
# encodings, it is not captured here.
|
107
|
+
#
|
108
|
+
# We purposefully replace invalid and undefined characters with replacement
|
109
|
+
# characters in this conversion. This happens for two reasons. First, it's
|
110
|
+
# possible that the given byte offset will not occur on a character
|
111
|
+
# boundary. Second, it's possible that the source code will contain a
|
112
|
+
# character that has no equivalent in the given encoding.
|
88
113
|
def code_units_offset(byte_offset, encoding)
|
89
|
-
byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding)
|
90
|
-
|
114
|
+
byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
|
115
|
+
|
116
|
+
if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
|
117
|
+
byteslice.bytesize / 2
|
118
|
+
else
|
119
|
+
byteslice.length
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Generate a cache that targets a specific encoding for calculating code
|
124
|
+
# unit offsets.
|
125
|
+
def code_units_cache(encoding)
|
126
|
+
CodeUnitsCache.new(source, encoding)
|
91
127
|
end
|
92
128
|
|
93
129
|
# Returns the column number in code units for the given encoding for the
|
@@ -119,10 +155,84 @@ module Prism
|
|
119
155
|
end
|
120
156
|
end
|
121
157
|
|
158
|
+
# A cache that can be used to quickly compute code unit offsets from byte
|
159
|
+
# offsets. It purposefully provides only a single #[] method to access the
|
160
|
+
# cache in order to minimize surface area.
|
161
|
+
#
|
162
|
+
# Note that there are some known issues here that may or may not be addressed
|
163
|
+
# in the future:
|
164
|
+
#
|
165
|
+
# * The first is that there are issues when the cache computes values that are
|
166
|
+
# not on character boundaries. This can result in subsequent computations
|
167
|
+
# being off by one or more code units.
|
168
|
+
# * The second is that this cache is currently unbounded. In theory we could
|
169
|
+
# introduce some kind of LRU cache to limit the number of entries, but this
|
170
|
+
# has not yet been implemented.
|
171
|
+
#
|
172
|
+
class CodeUnitsCache
|
173
|
+
class UTF16Counter # :nodoc:
|
174
|
+
def initialize(source, encoding)
|
175
|
+
@source = source
|
176
|
+
@encoding = encoding
|
177
|
+
end
|
178
|
+
|
179
|
+
def count(byte_offset, byte_length)
|
180
|
+
@source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
class LengthCounter # :nodoc:
|
185
|
+
def initialize(source, encoding)
|
186
|
+
@source = source
|
187
|
+
@encoding = encoding
|
188
|
+
end
|
189
|
+
|
190
|
+
def count(byte_offset, byte_length)
|
191
|
+
@source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).length
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
private_constant :UTF16Counter, :LengthCounter
|
196
|
+
|
197
|
+
# Initialize a new cache with the given source and encoding.
|
198
|
+
def initialize(source, encoding)
|
199
|
+
@source = source
|
200
|
+
@counter =
|
201
|
+
if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
|
202
|
+
UTF16Counter.new(source, encoding)
|
203
|
+
else
|
204
|
+
LengthCounter.new(source, encoding)
|
205
|
+
end
|
206
|
+
|
207
|
+
@cache = {} #: Hash[Integer, Integer]
|
208
|
+
@offsets = [] #: Array[Integer]
|
209
|
+
end
|
210
|
+
|
211
|
+
# Retrieve the code units offset from the given byte offset.
|
212
|
+
def [](byte_offset)
|
213
|
+
@cache[byte_offset] ||=
|
214
|
+
if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil?
|
215
|
+
@offsets << byte_offset
|
216
|
+
@counter.count(0, byte_offset)
|
217
|
+
elsif index == 0
|
218
|
+
@offsets.unshift(byte_offset)
|
219
|
+
@counter.count(0, byte_offset)
|
220
|
+
else
|
221
|
+
@offsets.insert(index, byte_offset)
|
222
|
+
offset = @offsets[index - 1]
|
223
|
+
@cache[offset] + @counter.count(offset, byte_offset - offset)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
122
228
|
# Specialized version of Prism::Source for source code that includes ASCII
|
123
229
|
# characters only. This class is used to apply performance optimizations that
|
124
|
-
# cannot be applied to sources that include multibyte characters.
|
125
|
-
#
|
230
|
+
# cannot be applied to sources that include multibyte characters.
|
231
|
+
#
|
232
|
+
# In the extremely rare case that a source includes multi-byte characters but
|
233
|
+
# is marked as binary because of a magic encoding comment and it cannot be
|
234
|
+
# eagerly converted to UTF-8, this class will be used as well. This is because
|
235
|
+
# at that point we will treat everything as single-byte characters.
|
126
236
|
class ASCIISource < Source
|
127
237
|
# Return the character offset for the given byte offset.
|
128
238
|
def character_offset(byte_offset)
|
@@ -144,9 +254,16 @@ module Prism
|
|
144
254
|
byte_offset
|
145
255
|
end
|
146
256
|
|
257
|
+
# Returns a cache that is the identity function in order to maintain the
|
258
|
+
# same interface. We can do this because code units are always equivalent to
|
259
|
+
# byte offsets for ASCII-only sources.
|
260
|
+
def code_units_cache(encoding)
|
261
|
+
->(byte_offset) { byte_offset }
|
262
|
+
end
|
263
|
+
|
147
264
|
# Specialized version of `code_units_column` that does not depend on
|
148
265
|
# `code_units_offset`, which is a more expensive operation. This is
|
149
|
-
#
|
266
|
+
# essentially the same as `Prism::Source#column`.
|
150
267
|
def code_units_column(byte_offset, encoding)
|
151
268
|
byte_offset - line_start(byte_offset)
|
152
269
|
end
|
@@ -253,6 +370,12 @@ module Prism
|
|
253
370
|
source.code_units_offset(start_offset, encoding)
|
254
371
|
end
|
255
372
|
|
373
|
+
# The start offset from the start of the file in code units using the given
|
374
|
+
# cache to fetch or calculate the value.
|
375
|
+
def cached_start_code_units_offset(cache)
|
376
|
+
cache[start_offset]
|
377
|
+
end
|
378
|
+
|
256
379
|
# The byte offset from the beginning of the source where this location ends.
|
257
380
|
def end_offset
|
258
381
|
start_offset + length
|
@@ -269,6 +392,12 @@ module Prism
|
|
269
392
|
source.code_units_offset(end_offset, encoding)
|
270
393
|
end
|
271
394
|
|
395
|
+
# The end offset from the start of the file in code units using the given
|
396
|
+
# cache to fetch or calculate the value.
|
397
|
+
def cached_end_code_units_offset(cache)
|
398
|
+
cache[end_offset]
|
399
|
+
end
|
400
|
+
|
272
401
|
# The line number where this location starts.
|
273
402
|
def start_line
|
274
403
|
source.line(start_offset)
|
@@ -303,6 +432,12 @@ module Prism
|
|
303
432
|
source.code_units_column(start_offset, encoding)
|
304
433
|
end
|
305
434
|
|
435
|
+
# The start column in code units using the given cache to fetch or calculate
|
436
|
+
# the value.
|
437
|
+
def cached_start_code_units_column(cache)
|
438
|
+
cache[start_offset] - cache[source.line_start(start_offset)]
|
439
|
+
end
|
440
|
+
|
306
441
|
# The column number in bytes where this location ends from the start of the
|
307
442
|
# line.
|
308
443
|
def end_column
|
@@ -321,6 +456,12 @@ module Prism
|
|
321
456
|
source.code_units_column(end_offset, encoding)
|
322
457
|
end
|
323
458
|
|
459
|
+
# The end column in code units using the given cache to fetch or calculate
|
460
|
+
# the value.
|
461
|
+
def cached_end_code_units_column(cache)
|
462
|
+
cache[end_offset] - cache[source.line_start(end_offset)]
|
463
|
+
end
|
464
|
+
|
324
465
|
# Implement the hash pattern matching interface for Location.
|
325
466
|
def deconstruct_keys(keys)
|
326
467
|
{ start_offset: start_offset, end_offset: end_offset }
|
@@ -570,10 +711,23 @@ module Prism
|
|
570
711
|
def failure?
|
571
712
|
!success?
|
572
713
|
end
|
714
|
+
|
715
|
+
# Create a code units cache for the given encoding.
|
716
|
+
def code_units_cache(encoding)
|
717
|
+
source.code_units_cache(encoding)
|
718
|
+
end
|
573
719
|
end
|
574
720
|
|
575
721
|
# This is a result specific to the `parse` and `parse_file` methods.
|
576
722
|
class ParseResult < Result
|
723
|
+
autoload :Comments, "prism/parse_result/comments"
|
724
|
+
autoload :Errors, "prism/parse_result/errors"
|
725
|
+
autoload :Newlines, "prism/parse_result/newlines"
|
726
|
+
|
727
|
+
private_constant :Comments
|
728
|
+
private_constant :Errors
|
729
|
+
private_constant :Newlines
|
730
|
+
|
577
731
|
# The syntax tree that was parsed from the source code.
|
578
732
|
attr_reader :value
|
579
733
|
|
@@ -587,6 +741,23 @@ module Prism
|
|
587
741
|
def deconstruct_keys(keys)
|
588
742
|
super.merge!(value: value)
|
589
743
|
end
|
744
|
+
|
745
|
+
# Attach the list of comments to their respective locations in the tree.
|
746
|
+
def attach_comments!
|
747
|
+
Comments.new(self).attach! # steep:ignore
|
748
|
+
end
|
749
|
+
|
750
|
+
# Walk the tree and mark nodes that are on a new line, loosely emulating
|
751
|
+
# the behavior of CRuby's `:line` tracepoint event.
|
752
|
+
def mark_newlines!
|
753
|
+
value.accept(Newlines.new(source.offsets.size)) # steep:ignore
|
754
|
+
end
|
755
|
+
|
756
|
+
# Returns a string representation of the syntax tree with the errors
|
757
|
+
# displayed inline.
|
758
|
+
def errors_format
|
759
|
+
Errors.new(self).format
|
760
|
+
end
|
590
761
|
end
|
591
762
|
|
592
763
|
# This is a result specific to the `lex` and `lex_file` methods.
|
@@ -677,5 +848,11 @@ module Prism
|
|
677
848
|
other.type == type &&
|
678
849
|
other.value == value
|
679
850
|
end
|
851
|
+
|
852
|
+
# Returns a string representation of this token.
|
853
|
+
def inspect
|
854
|
+
location
|
855
|
+
super
|
856
|
+
end
|
680
857
|
end
|
681
858
|
end
|