prism 0.29.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +115 -1
- data/CONTRIBUTING.md +0 -4
- data/Makefile +1 -1
- data/README.md +4 -0
- data/config.yml +920 -148
- data/docs/build_system.md +8 -11
- data/docs/fuzzing.md +1 -1
- data/docs/parsing_rules.md +4 -1
- data/docs/relocation.md +34 -0
- data/docs/ripper_translation.md +22 -0
- data/docs/serialization.md +3 -0
- data/ext/prism/api_node.c +2863 -2079
- data/ext/prism/extconf.rb +14 -37
- data/ext/prism/extension.c +241 -391
- data/ext/prism/extension.h +2 -2
- data/include/prism/ast.h +2156 -453
- data/include/prism/defines.h +58 -7
- data/include/prism/diagnostic.h +24 -6
- data/include/prism/node.h +0 -21
- data/include/prism/options.h +94 -3
- data/include/prism/parser.h +82 -40
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +3 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +0 -8
- data/include/prism/util/pm_integer.h +22 -15
- data/include/prism/util/pm_newline_list.h +11 -0
- data/include/prism/util/pm_string.h +28 -12
- data/include/prism/version.h +3 -3
- data/include/prism.h +47 -11
- data/lib/prism/compiler.rb +3 -0
- data/lib/prism/desugar_compiler.rb +111 -74
- data/lib/prism/dispatcher.rb +16 -1
- data/lib/prism/dot_visitor.rb +55 -34
- data/lib/prism/dsl.rb +660 -468
- data/lib/prism/ffi.rb +113 -8
- data/lib/prism/inspect_visitor.rb +296 -64
- data/lib/prism/lex_compat.rb +1 -1
- data/lib/prism/mutation_compiler.rb +11 -6
- data/lib/prism/node.rb +4262 -5023
- data/lib/prism/node_ext.rb +91 -14
- data/lib/prism/parse_result/comments.rb +0 -7
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +101 -11
- data/lib/prism/parse_result.rb +183 -6
- data/lib/prism/reflection.rb +12 -10
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +496 -609
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/compiler.rb +185 -155
- data/lib/prism/translation/parser/lexer.rb +26 -4
- data/lib/prism/translation/parser.rb +9 -4
- data/lib/prism/translation/ripper.rb +23 -25
- data/lib/prism/translation/ruby_parser.rb +86 -17
- data/lib/prism/visitor.rb +3 -0
- data/lib/prism.rb +6 -8
- data/prism.gemspec +9 -5
- data/rbi/prism/dsl.rbi +521 -0
- data/rbi/prism/node.rbi +1115 -1120
- data/rbi/prism/parse_result.rbi +29 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism/visitor.rbi +3 -0
- data/rbi/prism.rbi +36 -30
- data/sig/prism/dsl.rbs +190 -303
- data/sig/prism/mutation_compiler.rbs +1 -0
- data/sig/prism/node.rbs +678 -632
- data/sig/prism/parse_result.rbs +22 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/string_query.rbs +11 -0
- data/sig/prism/visitor.rbs +1 -0
- data/sig/prism.rbs +103 -64
- data/src/diagnostic.c +64 -28
- data/src/node.c +502 -1739
- data/src/options.c +76 -27
- data/src/prettyprint.c +188 -112
- data/src/prism.c +3376 -2293
- data/src/regexp.c +208 -71
- data/src/serialize.c +182 -50
- data/src/static_literals.c +64 -85
- data/src/token_type.c +4 -4
- data/src/util/pm_char.c +1 -1
- data/src/util/pm_constant_pool.c +0 -8
- data/src/util/pm_integer.c +53 -25
- data/src/util/pm_newline_list.c +29 -0
- data/src/util/pm_string.c +131 -80
- data/src/util/pm_strpbrk.c +32 -6
- metadata +11 -7
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -249
- data/lib/prism/translation/parser/rubocop.rb +0 -73
- data/src/util/pm_string_list.c +0 -28
data/lib/prism/node_ext.rb
CHANGED
@@ -5,10 +5,13 @@
|
|
5
5
|
module Prism
|
6
6
|
class Node
|
7
7
|
def deprecated(*replacements) # :nodoc:
|
8
|
+
location = caller_locations(1, 1)
|
9
|
+
location = location[0].label if location
|
8
10
|
suggest = replacements.map { |replacement| "#{self.class}##{replacement}" }
|
11
|
+
|
9
12
|
warn(<<~MSG, category: :deprecated)
|
10
|
-
[deprecation]: #{self.class}##{
|
11
|
-
|
13
|
+
[deprecation]: #{self.class}##{location} is deprecated and will be \
|
14
|
+
removed in the next major version. Use #{suggest.join("/")} instead.
|
12
15
|
#{(caller(1, 3) || []).join("\n")}
|
13
16
|
MSG
|
14
17
|
end
|
@@ -18,7 +21,10 @@ module Prism
|
|
18
21
|
# Returns a numeric value that represents the flags that were used to create
|
19
22
|
# the regular expression.
|
20
23
|
def options
|
21
|
-
o =
|
24
|
+
o = 0
|
25
|
+
o |= Regexp::IGNORECASE if flags.anybits?(RegularExpressionFlags::IGNORE_CASE)
|
26
|
+
o |= Regexp::EXTENDED if flags.anybits?(RegularExpressionFlags::EXTENDED)
|
27
|
+
o |= Regexp::MULTILINE if flags.anybits?(RegularExpressionFlags::MULTI_LINE)
|
22
28
|
o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8)
|
23
29
|
o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT)
|
24
30
|
o
|
@@ -66,11 +72,12 @@ module Prism
|
|
66
72
|
def to_interpolated
|
67
73
|
InterpolatedStringNode.new(
|
68
74
|
source,
|
75
|
+
-1,
|
76
|
+
location,
|
69
77
|
frozen? ? InterpolatedStringNodeFlags::FROZEN : 0,
|
70
78
|
opening_loc,
|
71
|
-
[copy(
|
72
|
-
closing_loc
|
73
|
-
location
|
79
|
+
[copy(location: content_loc, opening_loc: nil, closing_loc: nil)],
|
80
|
+
closing_loc
|
74
81
|
)
|
75
82
|
end
|
76
83
|
end
|
@@ -83,10 +90,12 @@ module Prism
|
|
83
90
|
def to_interpolated
|
84
91
|
InterpolatedXStringNode.new(
|
85
92
|
source,
|
93
|
+
-1,
|
94
|
+
location,
|
95
|
+
flags,
|
86
96
|
opening_loc,
|
87
|
-
[StringNode.new(source, 0, nil, content_loc, nil, unescaped
|
88
|
-
closing_loc
|
89
|
-
location
|
97
|
+
[StringNode.new(source, node_id, content_loc, 0, nil, content_loc, nil, unescaped)],
|
98
|
+
closing_loc
|
90
99
|
)
|
91
100
|
end
|
92
101
|
end
|
@@ -103,7 +112,19 @@ module Prism
|
|
103
112
|
class RationalNode < Node
|
104
113
|
# Returns the value of the node as a Ruby Rational.
|
105
114
|
def value
|
106
|
-
Rational(
|
115
|
+
Rational(numerator, denominator)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Returns the value of the node as an IntegerNode or a FloatNode. This
|
119
|
+
# method is deprecated in favor of #value or #numerator/#denominator.
|
120
|
+
def numeric
|
121
|
+
deprecated("value", "numerator", "denominator")
|
122
|
+
|
123
|
+
if denominator == 1
|
124
|
+
IntegerNode.new(source, -1, location.chop, flags, numerator)
|
125
|
+
else
|
126
|
+
FloatNode.new(source, -1, location.chop, 0, numerator.to_f / denominator)
|
127
|
+
end
|
107
128
|
end
|
108
129
|
end
|
109
130
|
|
@@ -180,7 +201,12 @@ module Prism
|
|
180
201
|
# continue to supply that API.
|
181
202
|
def child
|
182
203
|
deprecated("name", "name_loc")
|
183
|
-
|
204
|
+
|
205
|
+
if name
|
206
|
+
ConstantReadNode.new(source, -1, name_loc, 0, name)
|
207
|
+
else
|
208
|
+
MissingNode.new(source, -1, location, 0)
|
209
|
+
end
|
184
210
|
end
|
185
211
|
end
|
186
212
|
|
@@ -216,7 +242,12 @@ module Prism
|
|
216
242
|
# continue to supply that API.
|
217
243
|
def child
|
218
244
|
deprecated("name", "name_loc")
|
219
|
-
|
245
|
+
|
246
|
+
if name
|
247
|
+
ConstantReadNode.new(source, -1, name_loc, 0, name)
|
248
|
+
else
|
249
|
+
MissingNode.new(source, -1, location, 0)
|
250
|
+
end
|
220
251
|
end
|
221
252
|
end
|
222
253
|
|
@@ -249,9 +280,10 @@ module Prism
|
|
249
280
|
end
|
250
281
|
|
251
282
|
posts.each do |param|
|
252
|
-
|
283
|
+
case param
|
284
|
+
when MultiTargetNode
|
253
285
|
names << [:req]
|
254
|
-
|
286
|
+
when NoKeywordsParameterNode, KeywordRestParameterNode, ForwardingParameterNode
|
255
287
|
# Invalid syntax, e.g. "def f(**nil, ...)" moves the NoKeywordsParameterNode to posts
|
256
288
|
raise "Invalid syntax"
|
257
289
|
else
|
@@ -428,4 +460,49 @@ module Prism
|
|
428
460
|
binary_operator_loc
|
429
461
|
end
|
430
462
|
end
|
463
|
+
|
464
|
+
class CaseMatchNode < Node
|
465
|
+
# Returns the else clause of the case match node. This method is deprecated
|
466
|
+
# in favor of #else_clause.
|
467
|
+
def consequent
|
468
|
+
deprecated("else_clause")
|
469
|
+
else_clause
|
470
|
+
end
|
471
|
+
end
|
472
|
+
|
473
|
+
class CaseNode < Node
|
474
|
+
# Returns the else clause of the case node. This method is deprecated in
|
475
|
+
# favor of #else_clause.
|
476
|
+
def consequent
|
477
|
+
deprecated("else_clause")
|
478
|
+
else_clause
|
479
|
+
end
|
480
|
+
end
|
481
|
+
|
482
|
+
class IfNode < Node
|
483
|
+
# Returns the subsequent if/elsif/else clause of the if node. This method is
|
484
|
+
# deprecated in favor of #subsequent.
|
485
|
+
def consequent
|
486
|
+
deprecated("subsequent")
|
487
|
+
subsequent
|
488
|
+
end
|
489
|
+
end
|
490
|
+
|
491
|
+
class RescueNode < Node
|
492
|
+
# Returns the subsequent rescue clause of the rescue node. This method is
|
493
|
+
# deprecated in favor of #subsequent.
|
494
|
+
def consequent
|
495
|
+
deprecated("subsequent")
|
496
|
+
subsequent
|
497
|
+
end
|
498
|
+
end
|
499
|
+
|
500
|
+
class UnlessNode < Node
|
501
|
+
# Returns the else clause of the unless node. This method is deprecated in
|
502
|
+
# favor of #else_clause.
|
503
|
+
def consequent
|
504
|
+
deprecated("else_clause")
|
505
|
+
else_clause
|
506
|
+
end
|
507
|
+
end
|
431
508
|
end
|
@@ -183,12 +183,5 @@ module Prism
|
|
183
183
|
[preceding, NodeTarget.new(node), following]
|
184
184
|
end
|
185
185
|
end
|
186
|
-
|
187
|
-
private_constant :Comments
|
188
|
-
|
189
|
-
# Attach the list of comments to their respective locations in the tree.
|
190
|
-
def attach_comments!
|
191
|
-
Comments.new(self).attach! # steep:ignore
|
192
|
-
end
|
193
186
|
end
|
194
187
|
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "stringio"
|
4
|
+
|
5
|
+
module Prism
|
6
|
+
class ParseResult < Result
|
7
|
+
# An object to represent the set of errors on a parse result. This object
|
8
|
+
# can be used to format the errors in a human-readable way.
|
9
|
+
class Errors
|
10
|
+
# The parse result that contains the errors.
|
11
|
+
attr_reader :parse_result
|
12
|
+
|
13
|
+
# Initialize a new set of errors from the given parse result.
|
14
|
+
def initialize(parse_result)
|
15
|
+
@parse_result = parse_result
|
16
|
+
end
|
17
|
+
|
18
|
+
# Formats the errors in a human-readable way and return them as a string.
|
19
|
+
def format
|
20
|
+
error_lines = {} #: Hash[Integer, Array[ParseError]]
|
21
|
+
parse_result.errors.each do |error|
|
22
|
+
location = error.location
|
23
|
+
(location.start_line..location.end_line).each do |line|
|
24
|
+
error_lines[line] ||= []
|
25
|
+
error_lines[line] << error
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
source_lines = parse_result.source.source.lines
|
30
|
+
source_lines << "" if error_lines.key?(source_lines.size + 1)
|
31
|
+
|
32
|
+
io = StringIO.new
|
33
|
+
source_lines.each.with_index(1) do |line, line_number|
|
34
|
+
io.puts(line)
|
35
|
+
|
36
|
+
(error_lines.delete(line_number) || []).each do |error|
|
37
|
+
location = error.location
|
38
|
+
|
39
|
+
case line_number
|
40
|
+
when location.start_line
|
41
|
+
io.print(" " * location.start_column + "^")
|
42
|
+
|
43
|
+
if location.start_line == location.end_line
|
44
|
+
if location.start_column != location.end_column
|
45
|
+
io.print("~" * (location.end_column - location.start_column - 1))
|
46
|
+
end
|
47
|
+
|
48
|
+
io.puts(" " + error.message)
|
49
|
+
else
|
50
|
+
io.puts("~" * (line.bytesize - location.start_column))
|
51
|
+
end
|
52
|
+
when location.end_line
|
53
|
+
io.puts("~" * location.end_column + " " + error.message)
|
54
|
+
else
|
55
|
+
io.puts("~" * line.bytesize)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
io.puts
|
61
|
+
io.string
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -17,21 +17,27 @@ module Prism
|
|
17
17
|
# Note that the logic in this file should be kept in sync with the Java
|
18
18
|
# MarkNewlinesVisitor, since that visitor is responsible for marking the
|
19
19
|
# newlines for JRuby/TruffleRuby.
|
20
|
+
#
|
21
|
+
# This file is autoloaded only when `mark_newlines!` is called, so the
|
22
|
+
# re-opening of the various nodes in this file will only be performed in
|
23
|
+
# that case. We do that to avoid storing the extra `@newline` instance
|
24
|
+
# variable on every node if we don't need it.
|
20
25
|
class Newlines < Visitor
|
21
26
|
# Create a new Newlines visitor with the given newline offsets.
|
22
|
-
def initialize(
|
23
|
-
@
|
27
|
+
def initialize(lines)
|
28
|
+
# @type var lines: Integer
|
29
|
+
@lines = Array.new(1 + lines, false)
|
24
30
|
end
|
25
31
|
|
26
32
|
# Permit block/lambda nodes to mark newlines within themselves.
|
27
33
|
def visit_block_node(node)
|
28
|
-
|
29
|
-
@
|
34
|
+
old_lines = @lines
|
35
|
+
@lines = Array.new(old_lines.size, false)
|
30
36
|
|
31
37
|
begin
|
32
38
|
super(node)
|
33
39
|
ensure
|
34
|
-
@
|
40
|
+
@lines = old_lines
|
35
41
|
end
|
36
42
|
end
|
37
43
|
|
@@ -39,7 +45,7 @@ module Prism
|
|
39
45
|
|
40
46
|
# Mark if/unless nodes as newlines.
|
41
47
|
def visit_if_node(node)
|
42
|
-
node.
|
48
|
+
node.newline_flag!(@lines)
|
43
49
|
super(node)
|
44
50
|
end
|
45
51
|
|
@@ -48,17 +54,101 @@ module Prism
|
|
48
54
|
# Permit statements lists to mark newlines within themselves.
|
49
55
|
def visit_statements_node(node)
|
50
56
|
node.body.each do |child|
|
51
|
-
child.
|
57
|
+
child.newline_flag!(@lines)
|
52
58
|
end
|
53
59
|
super(node)
|
54
60
|
end
|
55
61
|
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class Node
|
65
|
+
def newline_flag? # :nodoc:
|
66
|
+
@newline_flag ? true : false
|
67
|
+
end
|
68
|
+
|
69
|
+
def newline_flag!(lines) # :nodoc:
|
70
|
+
line = location.start_line
|
71
|
+
unless lines[line]
|
72
|
+
lines[line] = true
|
73
|
+
@newline_flag = true
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
class BeginNode < Node
|
79
|
+
def newline_flag!(lines) # :nodoc:
|
80
|
+
# Never mark BeginNode with a newline flag, mark children instead.
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class ParenthesesNode < Node
|
85
|
+
def newline_flag!(lines) # :nodoc:
|
86
|
+
# Never mark ParenthesesNode with a newline flag, mark children instead.
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
class IfNode < Node
|
91
|
+
def newline_flag!(lines) # :nodoc:
|
92
|
+
predicate.newline_flag!(lines)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class UnlessNode < Node
|
97
|
+
def newline_flag!(lines) # :nodoc:
|
98
|
+
predicate.newline_flag!(lines)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class UntilNode < Node
|
103
|
+
def newline_flag!(lines) # :nodoc:
|
104
|
+
predicate.newline_flag!(lines)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
class WhileNode < Node
|
109
|
+
def newline_flag!(lines) # :nodoc:
|
110
|
+
predicate.newline_flag!(lines)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
class RescueModifierNode < Node
|
115
|
+
def newline_flag!(lines) # :nodoc:
|
116
|
+
expression.newline_flag!(lines)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
class InterpolatedMatchLastLineNode < Node
|
121
|
+
def newline_flag!(lines) # :nodoc:
|
122
|
+
first = parts.first
|
123
|
+
first.newline_flag!(lines) if first
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
class InterpolatedRegularExpressionNode < Node
|
128
|
+
def newline_flag!(lines) # :nodoc:
|
129
|
+
first = parts.first
|
130
|
+
first.newline_flag!(lines) if first
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
class InterpolatedStringNode < Node
|
135
|
+
def newline_flag!(lines) # :nodoc:
|
136
|
+
first = parts.first
|
137
|
+
first.newline_flag!(lines) if first
|
138
|
+
end
|
139
|
+
end
|
56
140
|
|
57
|
-
|
141
|
+
class InterpolatedSymbolNode < Node
|
142
|
+
def newline_flag!(lines) # :nodoc:
|
143
|
+
first = parts.first
|
144
|
+
first.newline_flag!(lines) if first
|
145
|
+
end
|
146
|
+
end
|
58
147
|
|
59
|
-
|
60
|
-
def
|
61
|
-
|
148
|
+
class InterpolatedXStringNode < Node
|
149
|
+
def newline_flag!(lines) # :nodoc:
|
150
|
+
first = parts.first
|
151
|
+
first.newline_flag!(lines) if first
|
62
152
|
end
|
63
153
|
end
|
64
154
|
end
|
data/lib/prism/parse_result.rb
CHANGED
@@ -10,7 +10,26 @@ module Prism
|
|
10
10
|
# specialized and more performant `ASCIISource` if no multibyte characters
|
11
11
|
# are present in the source code.
|
12
12
|
def self.for(source, start_line = 1, offsets = [])
|
13
|
-
source.ascii_only?
|
13
|
+
if source.ascii_only?
|
14
|
+
ASCIISource.new(source, start_line, offsets)
|
15
|
+
elsif source.encoding == Encoding::BINARY
|
16
|
+
source.force_encoding(Encoding::UTF_8)
|
17
|
+
|
18
|
+
if source.valid_encoding?
|
19
|
+
new(source, start_line, offsets)
|
20
|
+
else
|
21
|
+
# This is an extremely niche use case where the file is marked as
|
22
|
+
# binary, contains multi-byte characters, and those characters are not
|
23
|
+
# valid UTF-8. In this case we'll mark it as binary and fall back to
|
24
|
+
# treating everything as a single-byte character. This _may_ cause
|
25
|
+
# problems when asking for code units, but it appears to be the
|
26
|
+
# cleanest solution at the moment.
|
27
|
+
source.force_encoding(Encoding::BINARY)
|
28
|
+
ASCIISource.new(source, start_line, offsets)
|
29
|
+
end
|
30
|
+
else
|
31
|
+
new(source, start_line, offsets)
|
32
|
+
end
|
14
33
|
end
|
15
34
|
|
16
35
|
# The source code that this source object represents.
|
@@ -85,9 +104,26 @@ module Prism
|
|
85
104
|
# This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
|
86
105
|
# concept of code units that differs from the number of characters in other
|
87
106
|
# encodings, it is not captured here.
|
107
|
+
#
|
108
|
+
# We purposefully replace invalid and undefined characters with replacement
|
109
|
+
# characters in this conversion. This happens for two reasons. First, it's
|
110
|
+
# possible that the given byte offset will not occur on a character
|
111
|
+
# boundary. Second, it's possible that the source code will contain a
|
112
|
+
# character that has no equivalent in the given encoding.
|
88
113
|
def code_units_offset(byte_offset, encoding)
|
89
|
-
byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding)
|
90
|
-
|
114
|
+
byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
|
115
|
+
|
116
|
+
if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
|
117
|
+
byteslice.bytesize / 2
|
118
|
+
else
|
119
|
+
byteslice.length
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Generate a cache that targets a specific encoding for calculating code
|
124
|
+
# unit offsets.
|
125
|
+
def code_units_cache(encoding)
|
126
|
+
CodeUnitsCache.new(source, encoding)
|
91
127
|
end
|
92
128
|
|
93
129
|
# Returns the column number in code units for the given encoding for the
|
@@ -119,10 +155,84 @@ module Prism
|
|
119
155
|
end
|
120
156
|
end
|
121
157
|
|
158
|
+
# A cache that can be used to quickly compute code unit offsets from byte
|
159
|
+
# offsets. It purposefully provides only a single #[] method to access the
|
160
|
+
# cache in order to minimize surface area.
|
161
|
+
#
|
162
|
+
# Note that there are some known issues here that may or may not be addressed
|
163
|
+
# in the future:
|
164
|
+
#
|
165
|
+
# * The first is that there are issues when the cache computes values that are
|
166
|
+
# not on character boundaries. This can result in subsequent computations
|
167
|
+
# being off by one or more code units.
|
168
|
+
# * The second is that this cache is currently unbounded. In theory we could
|
169
|
+
# introduce some kind of LRU cache to limit the number of entries, but this
|
170
|
+
# has not yet been implemented.
|
171
|
+
#
|
172
|
+
class CodeUnitsCache
|
173
|
+
class UTF16Counter # :nodoc:
|
174
|
+
def initialize(source, encoding)
|
175
|
+
@source = source
|
176
|
+
@encoding = encoding
|
177
|
+
end
|
178
|
+
|
179
|
+
def count(byte_offset, byte_length)
|
180
|
+
@source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
class LengthCounter # :nodoc:
|
185
|
+
def initialize(source, encoding)
|
186
|
+
@source = source
|
187
|
+
@encoding = encoding
|
188
|
+
end
|
189
|
+
|
190
|
+
def count(byte_offset, byte_length)
|
191
|
+
@source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).length
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
private_constant :UTF16Counter, :LengthCounter
|
196
|
+
|
197
|
+
# Initialize a new cache with the given source and encoding.
|
198
|
+
def initialize(source, encoding)
|
199
|
+
@source = source
|
200
|
+
@counter =
|
201
|
+
if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
|
202
|
+
UTF16Counter.new(source, encoding)
|
203
|
+
else
|
204
|
+
LengthCounter.new(source, encoding)
|
205
|
+
end
|
206
|
+
|
207
|
+
@cache = {} #: Hash[Integer, Integer]
|
208
|
+
@offsets = [] #: Array[Integer]
|
209
|
+
end
|
210
|
+
|
211
|
+
# Retrieve the code units offset from the given byte offset.
|
212
|
+
def [](byte_offset)
|
213
|
+
@cache[byte_offset] ||=
|
214
|
+
if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil?
|
215
|
+
@offsets << byte_offset
|
216
|
+
@counter.count(0, byte_offset)
|
217
|
+
elsif index == 0
|
218
|
+
@offsets.unshift(byte_offset)
|
219
|
+
@counter.count(0, byte_offset)
|
220
|
+
else
|
221
|
+
@offsets.insert(index, byte_offset)
|
222
|
+
offset = @offsets[index - 1]
|
223
|
+
@cache[offset] + @counter.count(offset, byte_offset - offset)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
122
228
|
# Specialized version of Prism::Source for source code that includes ASCII
|
123
229
|
# characters only. This class is used to apply performance optimizations that
|
124
|
-
# cannot be applied to sources that include multibyte characters.
|
125
|
-
#
|
230
|
+
# cannot be applied to sources that include multibyte characters.
|
231
|
+
#
|
232
|
+
# In the extremely rare case that a source includes multi-byte characters but
|
233
|
+
# is marked as binary because of a magic encoding comment and it cannot be
|
234
|
+
# eagerly converted to UTF-8, this class will be used as well. This is because
|
235
|
+
# at that point we will treat everything as single-byte characters.
|
126
236
|
class ASCIISource < Source
|
127
237
|
# Return the character offset for the given byte offset.
|
128
238
|
def character_offset(byte_offset)
|
@@ -144,9 +254,16 @@ module Prism
|
|
144
254
|
byte_offset
|
145
255
|
end
|
146
256
|
|
257
|
+
# Returns a cache that is the identity function in order to maintain the
|
258
|
+
# same interface. We can do this because code units are always equivalent to
|
259
|
+
# byte offsets for ASCII-only sources.
|
260
|
+
def code_units_cache(encoding)
|
261
|
+
->(byte_offset) { byte_offset }
|
262
|
+
end
|
263
|
+
|
147
264
|
# Specialized version of `code_units_column` that does not depend on
|
148
265
|
# `code_units_offset`, which is a more expensive operation. This is
|
149
|
-
#
|
266
|
+
# essentially the same as `Prism::Source#column`.
|
150
267
|
def code_units_column(byte_offset, encoding)
|
151
268
|
byte_offset - line_start(byte_offset)
|
152
269
|
end
|
@@ -253,6 +370,12 @@ module Prism
|
|
253
370
|
source.code_units_offset(start_offset, encoding)
|
254
371
|
end
|
255
372
|
|
373
|
+
# The start offset from the start of the file in code units using the given
|
374
|
+
# cache to fetch or calculate the value.
|
375
|
+
def cached_start_code_units_offset(cache)
|
376
|
+
cache[start_offset]
|
377
|
+
end
|
378
|
+
|
256
379
|
# The byte offset from the beginning of the source where this location ends.
|
257
380
|
def end_offset
|
258
381
|
start_offset + length
|
@@ -269,6 +392,12 @@ module Prism
|
|
269
392
|
source.code_units_offset(end_offset, encoding)
|
270
393
|
end
|
271
394
|
|
395
|
+
# The end offset from the start of the file in code units using the given
|
396
|
+
# cache to fetch or calculate the value.
|
397
|
+
def cached_end_code_units_offset(cache)
|
398
|
+
cache[end_offset]
|
399
|
+
end
|
400
|
+
|
272
401
|
# The line number where this location starts.
|
273
402
|
def start_line
|
274
403
|
source.line(start_offset)
|
@@ -303,6 +432,12 @@ module Prism
|
|
303
432
|
source.code_units_column(start_offset, encoding)
|
304
433
|
end
|
305
434
|
|
435
|
+
# The start column in code units using the given cache to fetch or calculate
|
436
|
+
# the value.
|
437
|
+
def cached_start_code_units_column(cache)
|
438
|
+
cache[start_offset] - cache[source.line_start(start_offset)]
|
439
|
+
end
|
440
|
+
|
306
441
|
# The column number in bytes where this location ends from the start of the
|
307
442
|
# line.
|
308
443
|
def end_column
|
@@ -321,6 +456,12 @@ module Prism
|
|
321
456
|
source.code_units_column(end_offset, encoding)
|
322
457
|
end
|
323
458
|
|
459
|
+
# The end column in code units using the given cache to fetch or calculate
|
460
|
+
# the value.
|
461
|
+
def cached_end_code_units_column(cache)
|
462
|
+
cache[end_offset] - cache[source.line_start(end_offset)]
|
463
|
+
end
|
464
|
+
|
324
465
|
# Implement the hash pattern matching interface for Location.
|
325
466
|
def deconstruct_keys(keys)
|
326
467
|
{ start_offset: start_offset, end_offset: end_offset }
|
@@ -570,10 +711,23 @@ module Prism
|
|
570
711
|
def failure?
|
571
712
|
!success?
|
572
713
|
end
|
714
|
+
|
715
|
+
# Create a code units cache for the given encoding.
|
716
|
+
def code_units_cache(encoding)
|
717
|
+
source.code_units_cache(encoding)
|
718
|
+
end
|
573
719
|
end
|
574
720
|
|
575
721
|
# This is a result specific to the `parse` and `parse_file` methods.
|
576
722
|
class ParseResult < Result
|
723
|
+
autoload :Comments, "prism/parse_result/comments"
|
724
|
+
autoload :Errors, "prism/parse_result/errors"
|
725
|
+
autoload :Newlines, "prism/parse_result/newlines"
|
726
|
+
|
727
|
+
private_constant :Comments
|
728
|
+
private_constant :Errors
|
729
|
+
private_constant :Newlines
|
730
|
+
|
577
731
|
# The syntax tree that was parsed from the source code.
|
578
732
|
attr_reader :value
|
579
733
|
|
@@ -587,6 +741,23 @@ module Prism
|
|
587
741
|
def deconstruct_keys(keys)
|
588
742
|
super.merge!(value: value)
|
589
743
|
end
|
744
|
+
|
745
|
+
# Attach the list of comments to their respective locations in the tree.
|
746
|
+
def attach_comments!
|
747
|
+
Comments.new(self).attach! # steep:ignore
|
748
|
+
end
|
749
|
+
|
750
|
+
# Walk the tree and mark nodes that are on a new line, loosely emulating
|
751
|
+
# the behavior of CRuby's `:line` tracepoint event.
|
752
|
+
def mark_newlines!
|
753
|
+
value.accept(Newlines.new(source.offsets.size)) # steep:ignore
|
754
|
+
end
|
755
|
+
|
756
|
+
# Returns a string representation of the syntax tree with the errors
|
757
|
+
# displayed inline.
|
758
|
+
def errors_format
|
759
|
+
Errors.new(self).format
|
760
|
+
end
|
590
761
|
end
|
591
762
|
|
592
763
|
# This is a result specific to the `lex` and `lex_file` methods.
|
@@ -677,5 +848,11 @@ module Prism
|
|
677
848
|
other.type == type &&
|
678
849
|
other.value == value
|
679
850
|
end
|
851
|
+
|
852
|
+
# Returns a string representation of this token.
|
853
|
+
def inspect
|
854
|
+
location
|
855
|
+
super
|
856
|
+
end
|
680
857
|
end
|
681
858
|
end
|