prism 0.22.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +39 -1
- data/README.md +2 -1
- data/docs/releasing.md +67 -17
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +2 -0
- data/ext/prism/api_node.c +1982 -1538
- data/ext/prism/extension.c +12 -7
- data/ext/prism/extension.h +2 -2
- data/include/prism/diagnostic.h +3 -4
- data/include/prism/encoding.h +7 -0
- data/include/prism/util/pm_constant_pool.h +1 -1
- data/include/prism/util/pm_newline_list.h +4 -3
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/lib/prism/desugar_compiler.rb +225 -80
- data/lib/prism/dsl.rb +302 -299
- data/lib/prism/ffi.rb +103 -77
- data/lib/prism/lex_compat.rb +1 -0
- data/lib/prism/node.rb +3624 -2114
- data/lib/prism/node_ext.rb +25 -2
- data/lib/prism/parse_result.rb +56 -19
- data/lib/prism/serialize.rb +605 -303
- data/lib/prism/translation/parser/compiler.rb +1 -1
- data/lib/prism/translation/parser/rubocop.rb +11 -3
- data/lib/prism/translation/parser.rb +25 -12
- data/lib/prism/translation/parser33.rb +12 -0
- data/lib/prism/translation/parser34.rb +12 -0
- data/lib/prism/translation/ripper.rb +696 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +3 -3
- data/lib/prism.rb +0 -1
- data/prism.gemspec +6 -2
- data/src/diagnostic.c +10 -11
- data/src/encoding.c +16 -17
- data/src/options.c +7 -2
- data/src/prettyprint.c +3 -3
- data/src/prism.c +172 -97
- data/src/serialize.c +24 -13
- data/src/token_type.c +3 -3
- data/src/util/pm_constant_pool.c +1 -1
- data/src/util/pm_newline_list.c +6 -3
- data/src/util/pm_strpbrk.c +122 -14
- metadata +8 -4
- data/lib/prism/ripper_compat.rb +0 -285
data/lib/prism/translation.rb
CHANGED
@@ -2,10 +2,10 @@
|
|
2
2
|
|
3
3
|
module Prism
|
4
4
|
# This module is responsible for converting the prism syntax tree into other
|
5
|
-
# syntax trees.
|
6
|
-
# whitequark/parser gem's syntax tree, but support is planned for the
|
7
|
-
# seattlerb/ruby_parser gem's syntax tree as well.
|
5
|
+
# syntax trees.
|
8
6
|
module Translation
|
9
7
|
autoload :Parser, "prism/translation/parser"
|
8
|
+
autoload :Ripper, "prism/translation/ripper"
|
9
|
+
autoload :RubyParser, "prism/translation/ruby_parser"
|
10
10
|
end
|
11
11
|
end
|
data/lib/prism.rb
CHANGED
@@ -22,7 +22,6 @@ module Prism
|
|
22
22
|
autoload :LexRipper, "prism/lex_compat"
|
23
23
|
autoload :MutationCompiler, "prism/mutation_compiler"
|
24
24
|
autoload :NodeInspector, "prism/node_inspector"
|
25
|
-
autoload :RipperCompat, "prism/ripper_compat"
|
26
25
|
autoload :Pack, "prism/pack"
|
27
26
|
autoload :Pattern, "prism/pattern"
|
28
27
|
autoload :Serialize, "prism/serialize"
|
data/prism.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |spec|
|
4
4
|
spec.name = "prism"
|
5
|
-
spec.version = "0.
|
5
|
+
spec.version = "0.24.0"
|
6
6
|
spec.authors = ["Shopify"]
|
7
7
|
spec.email = ["ruby@shopify.com"]
|
8
8
|
|
@@ -36,6 +36,7 @@ Gem::Specification.new do |spec|
|
|
36
36
|
"docs/releasing.md",
|
37
37
|
"docs/ripper.md",
|
38
38
|
"docs/ruby_api.md",
|
39
|
+
"docs/ruby_parser_translation.md",
|
39
40
|
"docs/serialization.md",
|
40
41
|
"docs/testing.md",
|
41
42
|
"ext/prism/api_node.c",
|
@@ -83,13 +84,16 @@ Gem::Specification.new do |spec|
|
|
83
84
|
"lib/prism/parse_result/comments.rb",
|
84
85
|
"lib/prism/parse_result/newlines.rb",
|
85
86
|
"lib/prism/pattern.rb",
|
86
|
-
"lib/prism/ripper_compat.rb",
|
87
87
|
"lib/prism/serialize.rb",
|
88
88
|
"lib/prism/translation.rb",
|
89
89
|
"lib/prism/translation/parser.rb",
|
90
|
+
"lib/prism/translation/parser33.rb",
|
91
|
+
"lib/prism/translation/parser34.rb",
|
90
92
|
"lib/prism/translation/parser/compiler.rb",
|
91
93
|
"lib/prism/translation/parser/lexer.rb",
|
92
94
|
"lib/prism/translation/parser/rubocop.rb",
|
95
|
+
"lib/prism/translation/ripper.rb",
|
96
|
+
"lib/prism/translation/ruby_parser.rb",
|
93
97
|
"lib/prism/visitor.rb",
|
94
98
|
"src/diagnostic.c",
|
95
99
|
"src/encoding.c",
|
data/src/diagnostic.c
CHANGED
@@ -159,7 +159,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
|
|
159
159
|
[PM_ERR_ESCAPE_INVALID_UNICODE_LONG] = { "invalid Unicode escape sequence; maximum length is 6 digits", PM_ERROR_LEVEL_FATAL },
|
160
160
|
[PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = { "invalid Unicode escape sequence; needs closing `}`", PM_ERROR_LEVEL_FATAL },
|
161
161
|
[PM_ERR_EXPECT_ARGUMENT] = { "expected an argument", PM_ERROR_LEVEL_FATAL },
|
162
|
-
[PM_ERR_EXPECT_EOL_AFTER_STATEMENT] = { "
|
162
|
+
[PM_ERR_EXPECT_EOL_AFTER_STATEMENT] = { "unexpected %s, expecting end-of-input", PM_ERROR_LEVEL_FATAL },
|
163
163
|
[PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ] = { "expected an expression after `&&=`", PM_ERROR_LEVEL_FATAL },
|
164
164
|
[PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ] = { "expected an expression after `||=`", PM_ERROR_LEVEL_FATAL },
|
165
165
|
[PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA] = { "expected an expression after `,`", PM_ERROR_LEVEL_FATAL },
|
@@ -184,14 +184,14 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
|
|
184
184
|
[PM_ERR_FOR_IN] = { "expected an `in` after the index in a `for` statement", PM_ERROR_LEVEL_FATAL },
|
185
185
|
[PM_ERR_FOR_TERM] = { "expected an `end` to close the `for` loop", PM_ERROR_LEVEL_FATAL },
|
186
186
|
[PM_ERR_HASH_EXPRESSION_AFTER_LABEL] = { "expected an expression after the label in a hash", PM_ERROR_LEVEL_FATAL },
|
187
|
-
[PM_ERR_HASH_KEY] = { "
|
187
|
+
[PM_ERR_HASH_KEY] = { "unexpected %s, expecting '}' or a key in the hash literal", PM_ERROR_LEVEL_FATAL },
|
188
188
|
[PM_ERR_HASH_ROCKET] = { "expected a `=>` between the hash key and value", PM_ERROR_LEVEL_FATAL },
|
189
189
|
[PM_ERR_HASH_TERM] = { "expected a `}` to close the hash literal", PM_ERROR_LEVEL_FATAL },
|
190
190
|
[PM_ERR_HASH_VALUE] = { "expected a value in the hash literal", PM_ERROR_LEVEL_FATAL },
|
191
191
|
[PM_ERR_HEREDOC_TERM] = { "could not find a terminator for the heredoc", PM_ERROR_LEVEL_FATAL },
|
192
192
|
[PM_ERR_INCOMPLETE_QUESTION_MARK] = { "incomplete expression at `?`", PM_ERROR_LEVEL_FATAL },
|
193
|
-
[PM_ERR_INCOMPLETE_VARIABLE_CLASS] = { "
|
194
|
-
[PM_ERR_INCOMPLETE_VARIABLE_INSTANCE] = { "
|
193
|
+
[PM_ERR_INCOMPLETE_VARIABLE_CLASS] = { "`%.*s' is not allowed as a class variable name", PM_ERROR_LEVEL_FATAL },
|
194
|
+
[PM_ERR_INCOMPLETE_VARIABLE_INSTANCE] = { "`%.*s' is not allowed as an instance variable name", PM_ERROR_LEVEL_FATAL },
|
195
195
|
[PM_ERR_INVALID_FLOAT_EXPONENT] = { "invalid exponent", PM_ERROR_LEVEL_FATAL },
|
196
196
|
[PM_ERR_INVALID_NUMBER_BINARY] = { "invalid binary number", PM_ERROR_LEVEL_FATAL },
|
197
197
|
[PM_ERR_INVALID_NUMBER_DECIMAL] = { "invalid decimal number", PM_ERROR_LEVEL_FATAL },
|
@@ -202,7 +202,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
|
|
202
202
|
[PM_ERR_INVALID_MULTIBYTE_CHARACTER] = { "invalid multibyte character 0x%X", PM_ERROR_LEVEL_FATAL },
|
203
203
|
[PM_ERR_INVALID_PRINTABLE_CHARACTER] = { "invalid character `%c`", PM_ERROR_LEVEL_FATAL },
|
204
204
|
[PM_ERR_INVALID_PERCENT] = { "invalid `%` token", PM_ERROR_LEVEL_FATAL }, // TODO WHAT?
|
205
|
-
[PM_ERR_INVALID_VARIABLE_GLOBAL] = { "
|
205
|
+
[PM_ERR_INVALID_VARIABLE_GLOBAL] = { "`%.*s' is not allowed as a global variable name", PM_ERROR_LEVEL_FATAL },
|
206
206
|
[PM_ERR_IT_NOT_ALLOWED] = { "`it` is not allowed when an ordinary parameter is defined", PM_ERROR_LEVEL_FATAL },
|
207
207
|
[PM_ERR_LAMBDA_OPEN] = { "expected a `do` keyword or a `{` to open the lambda block", PM_ERROR_LEVEL_FATAL },
|
208
208
|
[PM_ERR_LAMBDA_TERM_BRACE] = { "expected a lambda block beginning with `{` to end with `}`", PM_ERROR_LEVEL_FATAL },
|
@@ -221,6 +221,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
|
|
221
221
|
[PM_ERR_MODULE_NAME] = { "expected a constant name after `module`", PM_ERROR_LEVEL_FATAL },
|
222
222
|
[PM_ERR_MODULE_TERM] = { "expected an `end` to close the `module` statement", PM_ERROR_LEVEL_FATAL },
|
223
223
|
[PM_ERR_MULTI_ASSIGN_MULTI_SPLATS] = { "multiple splats in multiple assignment", PM_ERROR_LEVEL_FATAL },
|
224
|
+
[PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST] = { "unexpected '%.*s' resulting in multiple splats in multiple assignment", PM_ERROR_LEVEL_FATAL },
|
224
225
|
[PM_ERR_NOT_EXPRESSION] = { "expected an expression after `not`", PM_ERROR_LEVEL_FATAL },
|
225
226
|
[PM_ERR_NO_LOCAL_VARIABLE] = { "%.*s: no such local variable", PM_ERROR_LEVEL_FATAL },
|
226
227
|
[PM_ERR_NUMBER_LITERAL_UNDERSCORE] = { "number literal ending with a `_`", PM_ERROR_LEVEL_FATAL },
|
@@ -274,7 +275,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
|
|
274
275
|
[PM_ERR_STATEMENT_UNDEF] = { "unexpected an `undef` at a non-statement position", PM_ERROR_LEVEL_FATAL },
|
275
276
|
[PM_ERR_STRING_CONCATENATION] = { "expected a string for concatenation", PM_ERROR_LEVEL_FATAL },
|
276
277
|
[PM_ERR_STRING_INTERPOLATED_TERM] = { "expected a closing delimiter for the interpolated string", PM_ERROR_LEVEL_FATAL },
|
277
|
-
[
|
278
|
+
[PM_ERR_STRING_LITERAL_EOF] = { "unterminated string meets end of file", PM_ERROR_LEVEL_FATAL },
|
279
|
+
[PM_ERR_STRING_LITERAL_TERM] = { "unexpected %s, expected a string literal terminator", PM_ERROR_LEVEL_FATAL },
|
278
280
|
[PM_ERR_SYMBOL_INVALID] = { "invalid symbol", PM_ERROR_LEVEL_FATAL }, // TODO expected symbol? prism.c ~9719
|
279
281
|
[PM_ERR_SYMBOL_TERM_DYNAMIC] = { "expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_FATAL },
|
280
282
|
[PM_ERR_SYMBOL_TERM_INTERPOLATED] = { "expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_FATAL },
|
@@ -282,17 +284,14 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
|
|
282
284
|
[PM_ERR_TERNARY_EXPRESSION_FALSE] = { "expected an expression after `:` in the ternary operator", PM_ERROR_LEVEL_FATAL },
|
283
285
|
[PM_ERR_TERNARY_EXPRESSION_TRUE] = { "expected an expression after `?` in the ternary operator", PM_ERROR_LEVEL_FATAL },
|
284
286
|
[PM_ERR_UNDEF_ARGUMENT] = { "invalid argument being passed to `undef`; expected a bare word, constant, or symbol argument", PM_ERROR_LEVEL_FATAL },
|
285
|
-
[
|
286
|
-
[PM_ERR_UNARY_RECEIVER_MINUS] = { "expected a receiver for unary `-`", PM_ERROR_LEVEL_FATAL },
|
287
|
-
[PM_ERR_UNARY_RECEIVER_PLUS] = { "expected a receiver for unary `+`", PM_ERROR_LEVEL_FATAL },
|
287
|
+
[PM_ERR_UNARY_RECEIVER] = { "unexpected %s, expected a receiver for unary `%c`", PM_ERROR_LEVEL_FATAL },
|
288
288
|
[PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT] = { "unexpected %s, assuming it is closing the parent %s", PM_ERROR_LEVEL_FATAL },
|
289
289
|
[PM_ERR_UNEXPECTED_TOKEN_IGNORE] = { "unexpected %s, ignoring it", PM_ERROR_LEVEL_FATAL },
|
290
|
-
[PM_ERR_UNARY_RECEIVER_TILDE] = { "expected a receiver for unary `~`", PM_ERROR_LEVEL_FATAL },
|
291
290
|
[PM_ERR_UNTIL_TERM] = { "expected an `end` to close the `until` statement", PM_ERROR_LEVEL_FATAL },
|
292
291
|
[PM_ERR_VOID_EXPRESSION] = { "unexpected void value expression", PM_ERROR_LEVEL_FATAL },
|
293
292
|
[PM_ERR_WHILE_TERM] = { "expected an `end` to close the `while` statement", PM_ERROR_LEVEL_FATAL },
|
294
293
|
[PM_ERR_WRITE_TARGET_IN_METHOD] = { "dynamic constant assignment", PM_ERROR_LEVEL_FATAL },
|
295
|
-
[PM_ERR_WRITE_TARGET_READONLY] = { "
|
294
|
+
[PM_ERR_WRITE_TARGET_READONLY] = { "Can't set variable %.*s", PM_ERROR_LEVEL_FATAL },
|
296
295
|
[PM_ERR_WRITE_TARGET_UNEXPECTED] = { "unexpected write target", PM_ERROR_LEVEL_FATAL },
|
297
296
|
[PM_ERR_XSTRING_TERM] = { "expected a closing delimiter for the `%x` or backtick string", PM_ERROR_LEVEL_FATAL },
|
298
297
|
|
data/src/encoding.c
CHANGED
@@ -2253,12 +2253,12 @@ static const uint8_t pm_utf_8_dfa[] = {
|
|
2253
2253
|
static pm_unicode_codepoint_t
|
2254
2254
|
pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
2255
2255
|
assert(n >= 0);
|
2256
|
-
size_t maximum = (size_t) n;
|
2257
2256
|
|
2257
|
+
size_t maximum = (n > 4) ? 4 : ((size_t) n);
|
2258
2258
|
uint32_t codepoint;
|
2259
2259
|
uint32_t state = 0;
|
2260
2260
|
|
2261
|
-
for (size_t index = 0; index <
|
2261
|
+
for (size_t index = 0; index < maximum; index++) {
|
2262
2262
|
uint32_t byte = b[index];
|
2263
2263
|
uint32_t type = pm_utf_8_dfa[byte];
|
2264
2264
|
|
@@ -2267,7 +2267,7 @@ pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
|
2267
2267
|
(0xffu >> type) & (byte);
|
2268
2268
|
|
2269
2269
|
state = pm_utf_8_dfa[256 + (state * 16) + type];
|
2270
|
-
if (
|
2270
|
+
if (state == 0) {
|
2271
2271
|
*width = index + 1;
|
2272
2272
|
return (pm_unicode_codepoint_t) codepoint;
|
2273
2273
|
}
|
@@ -2282,9 +2282,17 @@ pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
|
2282
2282
|
*/
|
2283
2283
|
size_t
|
2284
2284
|
pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
|
2285
|
-
|
2286
|
-
|
2287
|
-
|
2285
|
+
assert(n >= 0);
|
2286
|
+
|
2287
|
+
size_t maximum = (n > 4) ? 4 : ((size_t) n);
|
2288
|
+
uint32_t state = 0;
|
2289
|
+
|
2290
|
+
for (size_t index = 0; index < maximum; index++) {
|
2291
|
+
state = pm_utf_8_dfa[256 + (state * 16) + pm_utf_8_dfa[b[index]]];
|
2292
|
+
if (state == 0) return index + 1;
|
2293
|
+
}
|
2294
|
+
|
2295
|
+
return 0;
|
2288
2296
|
}
|
2289
2297
|
|
2290
2298
|
/**
|
@@ -4186,15 +4194,6 @@ pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
4186
4194
|
return 0;
|
4187
4195
|
}
|
4188
4196
|
|
4189
|
-
/**
|
4190
|
-
* Returns the size of the next character in the KOI-8 encoding. This means
|
4191
|
-
* checking if it's a valid codepoint in KOI-8 and if it is returning 1.
|
4192
|
-
*/
|
4193
|
-
static size_t
|
4194
|
-
pm_encoding_koi8_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
4195
|
-
return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
|
4196
|
-
}
|
4197
|
-
|
4198
4197
|
/**
|
4199
4198
|
* Returns the size of the next character in the Shift_JIS encoding, or 0 if a
|
4200
4199
|
* character cannot be decoded from the given bytes.
|
@@ -4652,7 +4651,7 @@ const pm_encoding_t pm_encodings[] = {
|
|
4652
4651
|
},
|
4653
4652
|
[PM_ENCODING_KOI8_R] = {
|
4654
4653
|
.name = "KOI8-R",
|
4655
|
-
.char_width =
|
4654
|
+
.char_width = pm_encoding_single_char_width,
|
4656
4655
|
.alnum_char = pm_encoding_koi8_r_alnum_char,
|
4657
4656
|
.alpha_char = pm_encoding_koi8_r_alpha_char,
|
4658
4657
|
.isupper_char = pm_encoding_koi8_r_isupper_char,
|
@@ -4660,7 +4659,7 @@ const pm_encoding_t pm_encodings[] = {
|
|
4660
4659
|
},
|
4661
4660
|
[PM_ENCODING_KOI8_U] = {
|
4662
4661
|
.name = "KOI8-U",
|
4663
|
-
.char_width =
|
4662
|
+
.char_width = pm_encoding_single_char_width,
|
4664
4663
|
.alnum_char = pm_encoding_koi8_u_alnum_char,
|
4665
4664
|
.alpha_char = pm_encoding_koi8_u_alpha_char,
|
4666
4665
|
.isupper_char = pm_encoding_koi8_u_isupper_char,
|
data/src/options.c
CHANGED
@@ -45,17 +45,22 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length
|
|
45
45
|
}
|
46
46
|
|
47
47
|
if (length == 5) {
|
48
|
-
if (strncmp(version, "3.3.0",
|
48
|
+
if (strncmp(version, "3.3.0", length) == 0) {
|
49
49
|
options->version = PM_OPTIONS_VERSION_CRUBY_3_3_0;
|
50
50
|
return true;
|
51
51
|
}
|
52
52
|
|
53
|
-
if (strncmp(version, "
|
53
|
+
if (strncmp(version, "3.4.0", length) == 0) {
|
54
54
|
options->version = PM_OPTIONS_VERSION_LATEST;
|
55
55
|
return true;
|
56
56
|
}
|
57
57
|
}
|
58
58
|
|
59
|
+
if (length == 6 && strncmp(version, "latest", length) == 0) {
|
60
|
+
options->version = PM_OPTIONS_VERSION_LATEST;
|
61
|
+
return true;
|
62
|
+
}
|
63
|
+
|
59
64
|
return false;
|
60
65
|
}
|
61
66
|
|
data/src/prettyprint.c
CHANGED
@@ -44,9 +44,9 @@ prettyprint_source(pm_buffer_t *output_buffer, const uint8_t *source, size_t len
|
|
44
44
|
|
45
45
|
static inline void
|
46
46
|
prettyprint_location(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_location_t *location) {
|
47
|
-
pm_line_column_t start = pm_newline_list_line_column(&parser->newline_list, location->start);
|
48
|
-
pm_line_column_t end = pm_newline_list_line_column(&parser->newline_list, location->end);
|
49
|
-
pm_buffer_append_format(output_buffer, "(%
|
47
|
+
pm_line_column_t start = pm_newline_list_line_column(&parser->newline_list, location->start, parser->start_line);
|
48
|
+
pm_line_column_t end = pm_newline_list_line_column(&parser->newline_list, location->end, parser->start_line);
|
49
|
+
pm_buffer_append_format(output_buffer, "(%" PRIi32 ",%" PRIu32 ")-(%" PRIi32 ",%" PRIu32 ")", start.line, start.column, end.line, end.column);
|
50
50
|
}
|
51
51
|
|
52
52
|
static inline void
|