prism 0.21.0 → 0.23.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +40 -1
- data/README.md +2 -1
- data/docs/releasing.md +84 -16
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +2 -0
- data/ext/prism/api_node.c +784 -785
- data/ext/prism/extension.c +56 -19
- data/ext/prism/extension.h +2 -2
- data/include/prism/diagnostic.h +11 -6
- data/include/prism/encoding.h +7 -0
- data/include/prism/util/pm_constant_pool.h +1 -1
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/lib/prism/ffi.rb +8 -3
- data/lib/prism/lex_compat.rb +17 -1
- data/lib/prism/node.rb +212 -32
- data/lib/prism/node_ext.rb +25 -2
- data/lib/prism/parse_result.rb +46 -16
- data/lib/prism/serialize.rb +14 -6
- data/lib/prism/translation/parser/compiler.rb +16 -6
- data/lib/prism/translation/parser.rb +19 -12
- data/lib/prism/translation/ripper.rb +577 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +3 -3
- data/lib/prism.rb +0 -1
- data/prism.gemspec +5 -3
- data/src/diagnostic.c +20 -15
- data/src/encoding.c +16 -17
- data/src/options.c +7 -2
- data/src/prism.c +145 -90
- data/src/serialize.c +24 -13
- data/src/token_type.c +3 -3
- data/src/util/pm_constant_pool.c +1 -1
- data/src/util/pm_string.c +0 -7
- data/src/util/pm_strpbrk.c +122 -14
- metadata +6 -4
- data/lib/prism/ripper_compat.rb +0 -207
data/lib/prism/node_ext.rb
CHANGED
@@ -94,7 +94,7 @@ module Prism
|
|
94
94
|
|
95
95
|
# Returns the full name of this constant. For example: "Foo"
|
96
96
|
def full_name
|
97
|
-
name.
|
97
|
+
name.to_s
|
98
98
|
end
|
99
99
|
end
|
100
100
|
|
@@ -135,7 +135,17 @@ module Prism
|
|
135
135
|
# Returns the list of parts for the full name of this constant path.
|
136
136
|
# For example: [:Foo, :Bar]
|
137
137
|
def full_name_parts
|
138
|
-
|
138
|
+
parts = case parent
|
139
|
+
when ConstantPathNode, ConstantReadNode
|
140
|
+
parent.full_name_parts
|
141
|
+
when nil
|
142
|
+
[:""]
|
143
|
+
else
|
144
|
+
raise ConstantPathNode::DynamicPartsInConstantPathError,
|
145
|
+
"Constant path target contains dynamic parts. Cannot compute full name"
|
146
|
+
end
|
147
|
+
|
148
|
+
parts.push(child.name)
|
139
149
|
end
|
140
150
|
|
141
151
|
# Returns the full name of this constant path. For example: "Foo::Bar"
|
@@ -144,6 +154,19 @@ module Prism
|
|
144
154
|
end
|
145
155
|
end
|
146
156
|
|
157
|
+
class ConstantTargetNode < Node
|
158
|
+
# Returns the list of parts for the full name of this constant.
|
159
|
+
# For example: [:Foo]
|
160
|
+
def full_name_parts
|
161
|
+
[name]
|
162
|
+
end
|
163
|
+
|
164
|
+
# Returns the full name of this constant. For example: "Foo"
|
165
|
+
def full_name
|
166
|
+
name.to_s
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
147
170
|
class ParametersNode < Node
|
148
171
|
# Mirrors the Method#parameters method.
|
149
172
|
def signature
|
data/lib/prism/parse_result.rb
CHANGED
@@ -9,18 +9,16 @@ module Prism
|
|
9
9
|
attr_reader :source
|
10
10
|
|
11
11
|
# The line number where this source starts.
|
12
|
-
|
12
|
+
attr_reader :start_line
|
13
13
|
|
14
14
|
# The list of newline byte offsets in the source code.
|
15
15
|
attr_reader :offsets
|
16
16
|
|
17
|
-
# Create a new source object with the given source code
|
18
|
-
|
19
|
-
# the source code.
|
20
|
-
def initialize(source, start_line = 1, offsets = compute_offsets(source))
|
17
|
+
# Create a new source object with the given source code.
|
18
|
+
def initialize(source, start_line = 1, offsets = [])
|
21
19
|
@source = source
|
22
|
-
@start_line = start_line
|
23
|
-
@offsets = offsets
|
20
|
+
@start_line = start_line # set after parsing is done
|
21
|
+
@offsets = offsets # set after parsing is done
|
24
22
|
end
|
25
23
|
|
26
24
|
# Perform a byteslice on the source code using the given byte offset and
|
@@ -56,6 +54,23 @@ module Prism
|
|
56
54
|
character_offset(byte_offset) - character_offset(line_start(byte_offset))
|
57
55
|
end
|
58
56
|
|
57
|
+
# Returns the offset from the start of the file for the given byte offset
|
58
|
+
# counting in code units for the given encoding.
|
59
|
+
#
|
60
|
+
# This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
|
61
|
+
# concept of code units that differs from the number of characters in other
|
62
|
+
# encodings, it is not captured here.
|
63
|
+
def code_units_offset(byte_offset, encoding)
|
64
|
+
byteslice = source.byteslice(0, byte_offset).encode(encoding)
|
65
|
+
(encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length
|
66
|
+
end
|
67
|
+
|
68
|
+
# Returns the column number in code units for the given encoding for the
|
69
|
+
# given byte offset.
|
70
|
+
def code_units_column(byte_offset, encoding)
|
71
|
+
code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding)
|
72
|
+
end
|
73
|
+
|
59
74
|
private
|
60
75
|
|
61
76
|
# Binary search through the offsets to find the line number for the given
|
@@ -77,21 +92,14 @@ module Prism
|
|
77
92
|
|
78
93
|
left - 1
|
79
94
|
end
|
80
|
-
|
81
|
-
# Find all of the newlines in the source code and return their byte offsets
|
82
|
-
# from the start of the string an array.
|
83
|
-
def compute_offsets(code)
|
84
|
-
offsets = [0]
|
85
|
-
code.b.scan("\n") { offsets << $~.end(0) }
|
86
|
-
offsets
|
87
|
-
end
|
88
95
|
end
|
89
96
|
|
90
97
|
# This represents a location in the source.
|
91
98
|
class Location
|
92
99
|
# A Source object that is used to determine more information from the given
|
93
100
|
# offset and length.
|
94
|
-
|
101
|
+
attr_reader :source
|
102
|
+
protected :source
|
95
103
|
|
96
104
|
# The byte offset from the beginning of the source where this location
|
97
105
|
# starts.
|
@@ -137,6 +145,11 @@ module Prism
|
|
137
145
|
source.character_offset(start_offset)
|
138
146
|
end
|
139
147
|
|
148
|
+
# The offset from the start of the file in code units of the given encoding.
|
149
|
+
def start_code_units_offset(encoding = Encoding::UTF_16LE)
|
150
|
+
source.code_units_offset(start_offset, encoding)
|
151
|
+
end
|
152
|
+
|
140
153
|
# The byte offset from the beginning of the source where this location ends.
|
141
154
|
def end_offset
|
142
155
|
start_offset + length
|
@@ -148,6 +161,11 @@ module Prism
|
|
148
161
|
source.character_offset(end_offset)
|
149
162
|
end
|
150
163
|
|
164
|
+
# The offset from the start of the file in code units of the given encoding.
|
165
|
+
def end_code_units_offset(encoding = Encoding::UTF_16LE)
|
166
|
+
source.code_units_offset(end_offset, encoding)
|
167
|
+
end
|
168
|
+
|
151
169
|
# The line number where this location starts.
|
152
170
|
def start_line
|
153
171
|
source.line(start_offset)
|
@@ -176,6 +194,12 @@ module Prism
|
|
176
194
|
source.character_column(start_offset)
|
177
195
|
end
|
178
196
|
|
197
|
+
# The column number in code units of the given encoding where this location
|
198
|
+
# starts from the start of the line.
|
199
|
+
def start_code_units_column(encoding = Encoding::UTF_16LE)
|
200
|
+
source.code_units_column(start_offset, encoding)
|
201
|
+
end
|
202
|
+
|
179
203
|
# The column number in bytes where this location ends from the start of the
|
180
204
|
# line.
|
181
205
|
def end_column
|
@@ -188,6 +212,12 @@ module Prism
|
|
188
212
|
source.character_column(end_offset)
|
189
213
|
end
|
190
214
|
|
215
|
+
# The column number in code units of the given encoding where this location
|
216
|
+
# ends from the start of the line.
|
217
|
+
def end_code_units_column(encoding = Encoding::UTF_16LE)
|
218
|
+
source.code_units_column(end_offset, encoding)
|
219
|
+
end
|
220
|
+
|
191
221
|
# Implement the hash pattern matching interface for Location.
|
192
222
|
def deconstruct_keys(keys)
|
193
223
|
{ start_offset: start_offset, end_offset: end_offset }
|
data/lib/prism/serialize.rb
CHANGED
@@ -27,7 +27,7 @@ module Prism
|
|
27
27
|
|
28
28
|
# The minor version of prism that we are expecting to find in the serialized
|
29
29
|
# strings.
|
30
|
-
MINOR_VERSION =
|
30
|
+
MINOR_VERSION = 23
|
31
31
|
|
32
32
|
# The patch version of prism that we are expecting to find in the serialized
|
33
33
|
# strings.
|
@@ -86,11 +86,15 @@ module Prism
|
|
86
86
|
end
|
87
87
|
|
88
88
|
def load_start_line
|
89
|
-
source.start_line
|
89
|
+
source.instance_variable_set :@start_line, load_varsint
|
90
|
+
end
|
91
|
+
|
92
|
+
def load_line_offsets
|
93
|
+
source.instance_variable_set :@offsets, Array.new(load_varuint) { load_varuint }
|
90
94
|
end
|
91
95
|
|
92
96
|
def load_comments
|
93
|
-
load_varuint
|
97
|
+
Array.new(load_varuint) do
|
94
98
|
case load_varuint
|
95
99
|
when 0 then InlineComment.new(load_location)
|
96
100
|
when 1 then EmbDocComment.new(load_location)
|
@@ -101,10 +105,10 @@ module Prism
|
|
101
105
|
|
102
106
|
def load_metadata
|
103
107
|
comments = load_comments
|
104
|
-
magic_comments = load_varuint
|
108
|
+
magic_comments = Array.new(load_varuint) { MagicComment.new(load_location, load_location) }
|
105
109
|
data_loc = load_optional_location
|
106
|
-
errors = load_varuint
|
107
|
-
warnings = load_varuint
|
110
|
+
errors = Array.new(load_varuint) { ParseError.new(load_embedded_string, load_location, load_error_level) }
|
111
|
+
warnings = Array.new(load_varuint) { ParseWarning.new(load_embedded_string, load_location, load_warning_level) }
|
108
112
|
[comments, magic_comments, data_loc, errors, warnings]
|
109
113
|
end
|
110
114
|
|
@@ -125,6 +129,7 @@ module Prism
|
|
125
129
|
tokens = load_tokens
|
126
130
|
encoding = load_encoding
|
127
131
|
load_start_line
|
132
|
+
load_line_offsets
|
128
133
|
comments, magic_comments, data_loc, errors, warnings = load_metadata
|
129
134
|
tokens.each { |token,| token.value.force_encoding(encoding) }
|
130
135
|
|
@@ -136,6 +141,7 @@ module Prism
|
|
136
141
|
load_header
|
137
142
|
load_encoding
|
138
143
|
load_start_line
|
144
|
+
load_line_offsets
|
139
145
|
|
140
146
|
comments, magic_comments, data_loc, errors, warnings = load_metadata
|
141
147
|
|
@@ -244,6 +250,8 @@ module Prism
|
|
244
250
|
case level
|
245
251
|
when 0
|
246
252
|
:fatal
|
253
|
+
when 1
|
254
|
+
:argument
|
247
255
|
else
|
248
256
|
raise "Unknown level: #{level}"
|
249
257
|
end
|
@@ -1062,12 +1062,22 @@ module Prism
|
|
1062
1062
|
|
1063
1063
|
# foo in bar
|
1064
1064
|
# ^^^^^^^^^^
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1065
|
+
if RUBY_VERSION >= "3.0"
|
1066
|
+
def visit_match_predicate_node(node)
|
1067
|
+
builder.match_pattern_p(
|
1068
|
+
visit(node.value),
|
1069
|
+
token(node.operator_loc),
|
1070
|
+
within_pattern { |compiler| node.pattern.accept(compiler) }
|
1071
|
+
)
|
1072
|
+
end
|
1073
|
+
else
|
1074
|
+
def visit_match_predicate_node(node)
|
1075
|
+
builder.match_pattern(
|
1076
|
+
visit(node.value),
|
1077
|
+
token(node.operator_loc),
|
1078
|
+
within_pattern { |compiler| node.pattern.accept(compiler) }
|
1079
|
+
)
|
1080
|
+
end
|
1071
1081
|
end
|
1072
1082
|
|
1073
1083
|
# foo => bar
|
@@ -68,17 +68,23 @@ module Prism
|
|
68
68
|
|
69
69
|
# Parses a source buffer and returns the AST, the source code comments,
|
70
70
|
# and the tokens emitted by the lexer.
|
71
|
-
def tokenize(source_buffer,
|
71
|
+
def tokenize(source_buffer, recover = false)
|
72
72
|
@source_buffer = source_buffer
|
73
73
|
source = source_buffer.source
|
74
74
|
|
75
75
|
offset_cache = build_offset_cache(source)
|
76
|
-
result =
|
76
|
+
result =
|
77
|
+
begin
|
78
|
+
unwrap(Prism.parse_lex(source, filepath: source_buffer.name), offset_cache)
|
79
|
+
rescue ::Parser::SyntaxError
|
80
|
+
raise if !recover
|
81
|
+
end
|
77
82
|
|
78
83
|
program, tokens = result.value
|
84
|
+
ast = build_ast(program, offset_cache) if result.success?
|
79
85
|
|
80
86
|
[
|
81
|
-
|
87
|
+
ast,
|
82
88
|
build_comments(result.comments, offset_cache),
|
83
89
|
build_tokens(tokens, offset_cache)
|
84
90
|
]
|
@@ -118,20 +124,21 @@ module Prism
|
|
118
124
|
# build the parser gem AST.
|
119
125
|
#
|
120
126
|
# If the bytesize of the source is the same as the length, then we can
|
121
|
-
# just use the offset directly. Otherwise, we build
|
122
|
-
#
|
123
|
-
#
|
124
|
-
# This is a good opportunity for some optimizations. If the source file
|
125
|
-
# has any multi-byte characters, this can tank the performance of the
|
126
|
-
# translator. We could make this significantly faster by using a
|
127
|
-
# different data structure for the cache.
|
127
|
+
# just use the offset directly. Otherwise, we build an array where the
|
128
|
+
# index is the byte offset and the value is the character offset.
|
128
129
|
def build_offset_cache(source)
|
129
130
|
if source.bytesize == source.length
|
130
131
|
-> (offset) { offset }
|
131
132
|
else
|
132
|
-
|
133
|
-
|
133
|
+
offset_cache = []
|
134
|
+
offset = 0
|
135
|
+
|
136
|
+
source.each_char do |char|
|
137
|
+
char.bytesize.times { offset_cache << offset }
|
138
|
+
offset += 1
|
134
139
|
end
|
140
|
+
|
141
|
+
offset_cache << offset
|
135
142
|
end
|
136
143
|
end
|
137
144
|
|