prism 0.21.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +40 -1
- data/README.md +2 -1
- data/docs/releasing.md +84 -16
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +2 -0
- data/ext/prism/api_node.c +784 -785
- data/ext/prism/extension.c +56 -19
- data/ext/prism/extension.h +2 -2
- data/include/prism/diagnostic.h +11 -6
- data/include/prism/encoding.h +7 -0
- data/include/prism/util/pm_constant_pool.h +1 -1
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/lib/prism/ffi.rb +8 -3
- data/lib/prism/lex_compat.rb +17 -1
- data/lib/prism/node.rb +212 -32
- data/lib/prism/node_ext.rb +25 -2
- data/lib/prism/parse_result.rb +46 -16
- data/lib/prism/serialize.rb +14 -6
- data/lib/prism/translation/parser/compiler.rb +16 -6
- data/lib/prism/translation/parser.rb +19 -12
- data/lib/prism/translation/ripper.rb +577 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +3 -3
- data/lib/prism.rb +0 -1
- data/prism.gemspec +5 -3
- data/src/diagnostic.c +20 -15
- data/src/encoding.c +16 -17
- data/src/options.c +7 -2
- data/src/prism.c +145 -90
- data/src/serialize.c +24 -13
- data/src/token_type.c +3 -3
- data/src/util/pm_constant_pool.c +1 -1
- data/src/util/pm_string.c +0 -7
- data/src/util/pm_strpbrk.c +122 -14
- metadata +6 -4
- data/lib/prism/ripper_compat.rb +0 -207
data/lib/prism/node_ext.rb
CHANGED
@@ -94,7 +94,7 @@ module Prism
|
|
94
94
|
|
95
95
|
# Returns the full name of this constant. For example: "Foo"
|
96
96
|
def full_name
|
97
|
-
name.
|
97
|
+
name.to_s
|
98
98
|
end
|
99
99
|
end
|
100
100
|
|
@@ -135,7 +135,17 @@ module Prism
|
|
135
135
|
# Returns the list of parts for the full name of this constant path.
|
136
136
|
# For example: [:Foo, :Bar]
|
137
137
|
def full_name_parts
|
138
|
-
|
138
|
+
parts = case parent
|
139
|
+
when ConstantPathNode, ConstantReadNode
|
140
|
+
parent.full_name_parts
|
141
|
+
when nil
|
142
|
+
[:""]
|
143
|
+
else
|
144
|
+
raise ConstantPathNode::DynamicPartsInConstantPathError,
|
145
|
+
"Constant path target contains dynamic parts. Cannot compute full name"
|
146
|
+
end
|
147
|
+
|
148
|
+
parts.push(child.name)
|
139
149
|
end
|
140
150
|
|
141
151
|
# Returns the full name of this constant path. For example: "Foo::Bar"
|
@@ -144,6 +154,19 @@ module Prism
|
|
144
154
|
end
|
145
155
|
end
|
146
156
|
|
157
|
+
class ConstantTargetNode < Node
|
158
|
+
# Returns the list of parts for the full name of this constant.
|
159
|
+
# For example: [:Foo]
|
160
|
+
def full_name_parts
|
161
|
+
[name]
|
162
|
+
end
|
163
|
+
|
164
|
+
# Returns the full name of this constant. For example: "Foo"
|
165
|
+
def full_name
|
166
|
+
name.to_s
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
147
170
|
class ParametersNode < Node
|
148
171
|
# Mirrors the Method#parameters method.
|
149
172
|
def signature
|
data/lib/prism/parse_result.rb
CHANGED
@@ -9,18 +9,16 @@ module Prism
|
|
9
9
|
attr_reader :source
|
10
10
|
|
11
11
|
# The line number where this source starts.
|
12
|
-
|
12
|
+
attr_reader :start_line
|
13
13
|
|
14
14
|
# The list of newline byte offsets in the source code.
|
15
15
|
attr_reader :offsets
|
16
16
|
|
17
|
-
# Create a new source object with the given source code
|
18
|
-
|
19
|
-
# the source code.
|
20
|
-
def initialize(source, start_line = 1, offsets = compute_offsets(source))
|
17
|
+
# Create a new source object with the given source code.
|
18
|
+
def initialize(source, start_line = 1, offsets = [])
|
21
19
|
@source = source
|
22
|
-
@start_line = start_line
|
23
|
-
@offsets = offsets
|
20
|
+
@start_line = start_line # set after parsing is done
|
21
|
+
@offsets = offsets # set after parsing is done
|
24
22
|
end
|
25
23
|
|
26
24
|
# Perform a byteslice on the source code using the given byte offset and
|
@@ -56,6 +54,23 @@ module Prism
|
|
56
54
|
character_offset(byte_offset) - character_offset(line_start(byte_offset))
|
57
55
|
end
|
58
56
|
|
57
|
+
# Returns the offset from the start of the file for the given byte offset
|
58
|
+
# counting in code units for the given encoding.
|
59
|
+
#
|
60
|
+
# This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
|
61
|
+
# concept of code units that differs from the number of characters in other
|
62
|
+
# encodings, it is not captured here.
|
63
|
+
def code_units_offset(byte_offset, encoding)
|
64
|
+
byteslice = source.byteslice(0, byte_offset).encode(encoding)
|
65
|
+
(encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length
|
66
|
+
end
|
67
|
+
|
68
|
+
# Returns the column number in code units for the given encoding for the
|
69
|
+
# given byte offset.
|
70
|
+
def code_units_column(byte_offset, encoding)
|
71
|
+
code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding)
|
72
|
+
end
|
73
|
+
|
59
74
|
private
|
60
75
|
|
61
76
|
# Binary search through the offsets to find the line number for the given
|
@@ -77,21 +92,14 @@ module Prism
|
|
77
92
|
|
78
93
|
left - 1
|
79
94
|
end
|
80
|
-
|
81
|
-
# Find all of the newlines in the source code and return their byte offsets
|
82
|
-
# from the start of the string an array.
|
83
|
-
def compute_offsets(code)
|
84
|
-
offsets = [0]
|
85
|
-
code.b.scan("\n") { offsets << $~.end(0) }
|
86
|
-
offsets
|
87
|
-
end
|
88
95
|
end
|
89
96
|
|
90
97
|
# This represents a location in the source.
|
91
98
|
class Location
|
92
99
|
# A Source object that is used to determine more information from the given
|
93
100
|
# offset and length.
|
94
|
-
|
101
|
+
attr_reader :source
|
102
|
+
protected :source
|
95
103
|
|
96
104
|
# The byte offset from the beginning of the source where this location
|
97
105
|
# starts.
|
@@ -137,6 +145,11 @@ module Prism
|
|
137
145
|
source.character_offset(start_offset)
|
138
146
|
end
|
139
147
|
|
148
|
+
# The offset from the start of the file in code units of the given encoding.
|
149
|
+
def start_code_units_offset(encoding = Encoding::UTF_16LE)
|
150
|
+
source.code_units_offset(start_offset, encoding)
|
151
|
+
end
|
152
|
+
|
140
153
|
# The byte offset from the beginning of the source where this location ends.
|
141
154
|
def end_offset
|
142
155
|
start_offset + length
|
@@ -148,6 +161,11 @@ module Prism
|
|
148
161
|
source.character_offset(end_offset)
|
149
162
|
end
|
150
163
|
|
164
|
+
# The offset from the start of the file in code units of the given encoding.
|
165
|
+
def end_code_units_offset(encoding = Encoding::UTF_16LE)
|
166
|
+
source.code_units_offset(end_offset, encoding)
|
167
|
+
end
|
168
|
+
|
151
169
|
# The line number where this location starts.
|
152
170
|
def start_line
|
153
171
|
source.line(start_offset)
|
@@ -176,6 +194,12 @@ module Prism
|
|
176
194
|
source.character_column(start_offset)
|
177
195
|
end
|
178
196
|
|
197
|
+
# The column number in code units of the given encoding where this location
|
198
|
+
# starts from the start of the line.
|
199
|
+
def start_code_units_column(encoding = Encoding::UTF_16LE)
|
200
|
+
source.code_units_column(start_offset, encoding)
|
201
|
+
end
|
202
|
+
|
179
203
|
# The column number in bytes where this location ends from the start of the
|
180
204
|
# line.
|
181
205
|
def end_column
|
@@ -188,6 +212,12 @@ module Prism
|
|
188
212
|
source.character_column(end_offset)
|
189
213
|
end
|
190
214
|
|
215
|
+
# The column number in code units of the given encoding where this location
|
216
|
+
# ends from the start of the line.
|
217
|
+
def end_code_units_column(encoding = Encoding::UTF_16LE)
|
218
|
+
source.code_units_column(end_offset, encoding)
|
219
|
+
end
|
220
|
+
|
191
221
|
# Implement the hash pattern matching interface for Location.
|
192
222
|
def deconstruct_keys(keys)
|
193
223
|
{ start_offset: start_offset, end_offset: end_offset }
|
data/lib/prism/serialize.rb
CHANGED
@@ -27,7 +27,7 @@ module Prism
|
|
27
27
|
|
28
28
|
# The minor version of prism that we are expecting to find in the serialized
|
29
29
|
# strings.
|
30
|
-
MINOR_VERSION =
|
30
|
+
MINOR_VERSION = 23
|
31
31
|
|
32
32
|
# The patch version of prism that we are expecting to find in the serialized
|
33
33
|
# strings.
|
@@ -86,11 +86,15 @@ module Prism
|
|
86
86
|
end
|
87
87
|
|
88
88
|
def load_start_line
|
89
|
-
source.start_line
|
89
|
+
source.instance_variable_set :@start_line, load_varsint
|
90
|
+
end
|
91
|
+
|
92
|
+
def load_line_offsets
|
93
|
+
source.instance_variable_set :@offsets, Array.new(load_varuint) { load_varuint }
|
90
94
|
end
|
91
95
|
|
92
96
|
def load_comments
|
93
|
-
load_varuint
|
97
|
+
Array.new(load_varuint) do
|
94
98
|
case load_varuint
|
95
99
|
when 0 then InlineComment.new(load_location)
|
96
100
|
when 1 then EmbDocComment.new(load_location)
|
@@ -101,10 +105,10 @@ module Prism
|
|
101
105
|
|
102
106
|
def load_metadata
|
103
107
|
comments = load_comments
|
104
|
-
magic_comments = load_varuint
|
108
|
+
magic_comments = Array.new(load_varuint) { MagicComment.new(load_location, load_location) }
|
105
109
|
data_loc = load_optional_location
|
106
|
-
errors = load_varuint
|
107
|
-
warnings = load_varuint
|
110
|
+
errors = Array.new(load_varuint) { ParseError.new(load_embedded_string, load_location, load_error_level) }
|
111
|
+
warnings = Array.new(load_varuint) { ParseWarning.new(load_embedded_string, load_location, load_warning_level) }
|
108
112
|
[comments, magic_comments, data_loc, errors, warnings]
|
109
113
|
end
|
110
114
|
|
@@ -125,6 +129,7 @@ module Prism
|
|
125
129
|
tokens = load_tokens
|
126
130
|
encoding = load_encoding
|
127
131
|
load_start_line
|
132
|
+
load_line_offsets
|
128
133
|
comments, magic_comments, data_loc, errors, warnings = load_metadata
|
129
134
|
tokens.each { |token,| token.value.force_encoding(encoding) }
|
130
135
|
|
@@ -136,6 +141,7 @@ module Prism
|
|
136
141
|
load_header
|
137
142
|
load_encoding
|
138
143
|
load_start_line
|
144
|
+
load_line_offsets
|
139
145
|
|
140
146
|
comments, magic_comments, data_loc, errors, warnings = load_metadata
|
141
147
|
|
@@ -244,6 +250,8 @@ module Prism
|
|
244
250
|
case level
|
245
251
|
when 0
|
246
252
|
:fatal
|
253
|
+
when 1
|
254
|
+
:argument
|
247
255
|
else
|
248
256
|
raise "Unknown level: #{level}"
|
249
257
|
end
|
@@ -1062,12 +1062,22 @@ module Prism
|
|
1062
1062
|
|
1063
1063
|
# foo in bar
|
1064
1064
|
# ^^^^^^^^^^
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1065
|
+
if RUBY_VERSION >= "3.0"
|
1066
|
+
def visit_match_predicate_node(node)
|
1067
|
+
builder.match_pattern_p(
|
1068
|
+
visit(node.value),
|
1069
|
+
token(node.operator_loc),
|
1070
|
+
within_pattern { |compiler| node.pattern.accept(compiler) }
|
1071
|
+
)
|
1072
|
+
end
|
1073
|
+
else
|
1074
|
+
def visit_match_predicate_node(node)
|
1075
|
+
builder.match_pattern(
|
1076
|
+
visit(node.value),
|
1077
|
+
token(node.operator_loc),
|
1078
|
+
within_pattern { |compiler| node.pattern.accept(compiler) }
|
1079
|
+
)
|
1080
|
+
end
|
1071
1081
|
end
|
1072
1082
|
|
1073
1083
|
# foo => bar
|
@@ -68,17 +68,23 @@ module Prism
|
|
68
68
|
|
69
69
|
# Parses a source buffer and returns the AST, the source code comments,
|
70
70
|
# and the tokens emitted by the lexer.
|
71
|
-
def tokenize(source_buffer,
|
71
|
+
def tokenize(source_buffer, recover = false)
|
72
72
|
@source_buffer = source_buffer
|
73
73
|
source = source_buffer.source
|
74
74
|
|
75
75
|
offset_cache = build_offset_cache(source)
|
76
|
-
result =
|
76
|
+
result =
|
77
|
+
begin
|
78
|
+
unwrap(Prism.parse_lex(source, filepath: source_buffer.name), offset_cache)
|
79
|
+
rescue ::Parser::SyntaxError
|
80
|
+
raise if !recover
|
81
|
+
end
|
77
82
|
|
78
83
|
program, tokens = result.value
|
84
|
+
ast = build_ast(program, offset_cache) if result.success?
|
79
85
|
|
80
86
|
[
|
81
|
-
|
87
|
+
ast,
|
82
88
|
build_comments(result.comments, offset_cache),
|
83
89
|
build_tokens(tokens, offset_cache)
|
84
90
|
]
|
@@ -118,20 +124,21 @@ module Prism
|
|
118
124
|
# build the parser gem AST.
|
119
125
|
#
|
120
126
|
# If the bytesize of the source is the same as the length, then we can
|
121
|
-
# just use the offset directly. Otherwise, we build
|
122
|
-
#
|
123
|
-
#
|
124
|
-
# This is a good opportunity for some optimizations. If the source file
|
125
|
-
# has any multi-byte characters, this can tank the performance of the
|
126
|
-
# translator. We could make this significantly faster by using a
|
127
|
-
# different data structure for the cache.
|
127
|
+
# just use the offset directly. Otherwise, we build an array where the
|
128
|
+
# index is the byte offset and the value is the character offset.
|
128
129
|
def build_offset_cache(source)
|
129
130
|
if source.bytesize == source.length
|
130
131
|
-> (offset) { offset }
|
131
132
|
else
|
132
|
-
|
133
|
-
|
133
|
+
offset_cache = []
|
134
|
+
offset = 0
|
135
|
+
|
136
|
+
source.each_char do |char|
|
137
|
+
char.bytesize.times { offset_cache << offset }
|
138
|
+
offset += 1
|
134
139
|
end
|
140
|
+
|
141
|
+
offset_cache << offset
|
135
142
|
end
|
136
143
|
end
|
137
144
|
|