prism 0.21.0 → 0.23.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,10 +2,10 @@
2
2
 
3
3
  module Prism
4
4
  # This module is responsible for converting the prism syntax tree into other
5
- # syntax trees. At the moment it only supports converting to the
6
- # whitequark/parser gem's syntax tree, but support is planned for the
7
- # seattlerb/ruby_parser gem's syntax tree as well.
5
+ # syntax trees.
8
6
  module Translation
9
7
  autoload :Parser, "prism/translation/parser"
8
+ autoload :Ripper, "prism/translation/ripper"
9
+ autoload :RubyParser, "prism/translation/ruby_parser"
10
10
  end
11
11
  end
data/lib/prism.rb CHANGED
@@ -22,7 +22,6 @@ module Prism
22
22
  autoload :LexRipper, "prism/lex_compat"
23
23
  autoload :MutationCompiler, "prism/mutation_compiler"
24
24
  autoload :NodeInspector, "prism/node_inspector"
25
- autoload :RipperCompat, "prism/ripper_compat"
26
25
  autoload :Pack, "prism/pack"
27
26
  autoload :Pattern, "prism/pattern"
28
27
  autoload :Serialize, "prism/serialize"
data/prism.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "prism"
5
- spec.version = "0.21.0"
5
+ spec.version = "0.23.0"
6
6
  spec.authors = ["Shopify"]
7
7
  spec.email = ["ruby@shopify.com"]
8
8
 
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
10
10
  spec.homepage = "https://github.com/ruby/prism"
11
11
  spec.license = "MIT"
12
12
 
13
- spec.required_ruby_version = ">= 3.0.0"
13
+ spec.required_ruby_version = ">= 2.7.0"
14
14
 
15
15
  spec.require_paths = ["lib"]
16
16
  spec.files = [
@@ -36,6 +36,7 @@ Gem::Specification.new do |spec|
36
36
  "docs/releasing.md",
37
37
  "docs/ripper.md",
38
38
  "docs/ruby_api.md",
39
+ "docs/ruby_parser_translation.md",
39
40
  "docs/serialization.md",
40
41
  "docs/testing.md",
41
42
  "ext/prism/api_node.c",
@@ -83,13 +84,14 @@ Gem::Specification.new do |spec|
83
84
  "lib/prism/parse_result/comments.rb",
84
85
  "lib/prism/parse_result/newlines.rb",
85
86
  "lib/prism/pattern.rb",
86
- "lib/prism/ripper_compat.rb",
87
87
  "lib/prism/serialize.rb",
88
88
  "lib/prism/translation.rb",
89
89
  "lib/prism/translation/parser.rb",
90
90
  "lib/prism/translation/parser/compiler.rb",
91
91
  "lib/prism/translation/parser/lexer.rb",
92
92
  "lib/prism/translation/parser/rubocop.rb",
93
+ "lib/prism/translation/ripper.rb",
94
+ "lib/prism/translation/ruby_parser.rb",
93
95
  "lib/prism/visitor.rb",
94
96
  "src/diagnostic.c",
95
97
  "src/encoding.c",
data/src/diagnostic.c CHANGED
@@ -63,7 +63,8 @@ typedef struct {
63
63
  *
64
64
  * For errors, they are:
65
65
  *
66
- * * `PM_ERROR_LEVEL_FATAL` - The level for all errors.
66
+ * * `PM_ERROR_LEVEL_FATAL` - The default level for errors.
67
+ * * `PM_ERROR_LEVEL_ARGUMENT` - Errors that should raise ArgumentError.
67
68
  *
68
69
  * For warnings, they are:
69
70
  *
@@ -71,9 +72,13 @@ typedef struct {
71
72
  * * `PM_WARNING_LEVEL_VERBOSE` - Warnings that appear with `-w`, as in `ruby -w -c -e 'code'`.
72
73
  */
73
74
  static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
75
+ // Special error that can be replaced
74
76
  [PM_ERR_CANNOT_PARSE_EXPRESSION] = { "cannot parse the expression", PM_ERROR_LEVEL_FATAL },
75
77
 
76
- // Errors
78
+ // Errors that should raise argument errors
79
+ [PM_ERR_INVALID_ENCODING_MAGIC_COMMENT] = { "unknown or invalid encoding in the magic comment", PM_ERROR_LEVEL_ARGUMENT },
80
+
81
+ // Errors that should raise syntax errors
77
82
  [PM_ERR_ALIAS_ARGUMENT] = { "invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable", PM_ERROR_LEVEL_FATAL },
78
83
  [PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = { "unexpected `&&=` in a multiple assignment", PM_ERROR_LEVEL_FATAL },
79
84
  [PM_ERR_ARGUMENT_AFTER_BLOCK] = { "unexpected argument after a block argument", PM_ERROR_LEVEL_FATAL },
@@ -154,7 +159,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
154
159
  [PM_ERR_ESCAPE_INVALID_UNICODE_LONG] = { "invalid Unicode escape sequence; maximum length is 6 digits", PM_ERROR_LEVEL_FATAL },
155
160
  [PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = { "invalid Unicode escape sequence; needs closing `}`", PM_ERROR_LEVEL_FATAL },
156
161
  [PM_ERR_EXPECT_ARGUMENT] = { "expected an argument", PM_ERROR_LEVEL_FATAL },
157
- [PM_ERR_EXPECT_EOL_AFTER_STATEMENT] = { "expected a newline or semicolon after the statement", PM_ERROR_LEVEL_FATAL },
162
+ [PM_ERR_EXPECT_EOL_AFTER_STATEMENT] = { "unexpected %s, expecting end-of-input", PM_ERROR_LEVEL_FATAL },
158
163
  [PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ] = { "expected an expression after `&&=`", PM_ERROR_LEVEL_FATAL },
159
164
  [PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ] = { "expected an expression after `||=`", PM_ERROR_LEVEL_FATAL },
160
165
  [PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA] = { "expected an expression after `,`", PM_ERROR_LEVEL_FATAL },
@@ -179,24 +184,25 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
179
184
  [PM_ERR_FOR_IN] = { "expected an `in` after the index in a `for` statement", PM_ERROR_LEVEL_FATAL },
180
185
  [PM_ERR_FOR_TERM] = { "expected an `end` to close the `for` loop", PM_ERROR_LEVEL_FATAL },
181
186
  [PM_ERR_HASH_EXPRESSION_AFTER_LABEL] = { "expected an expression after the label in a hash", PM_ERROR_LEVEL_FATAL },
182
- [PM_ERR_HASH_KEY] = { "expected a key in the hash literal", PM_ERROR_LEVEL_FATAL },
187
+ [PM_ERR_HASH_KEY] = { "unexpected %s, expecting '}' or a key in the hash literal", PM_ERROR_LEVEL_FATAL },
183
188
  [PM_ERR_HASH_ROCKET] = { "expected a `=>` between the hash key and value", PM_ERROR_LEVEL_FATAL },
184
189
  [PM_ERR_HASH_TERM] = { "expected a `}` to close the hash literal", PM_ERROR_LEVEL_FATAL },
185
190
  [PM_ERR_HASH_VALUE] = { "expected a value in the hash literal", PM_ERROR_LEVEL_FATAL },
186
191
  [PM_ERR_HEREDOC_TERM] = { "could not find a terminator for the heredoc", PM_ERROR_LEVEL_FATAL },
187
192
  [PM_ERR_INCOMPLETE_QUESTION_MARK] = { "incomplete expression at `?`", PM_ERROR_LEVEL_FATAL },
188
- [PM_ERR_INCOMPLETE_VARIABLE_CLASS] = { "incomplete class variable", PM_ERROR_LEVEL_FATAL },
189
- [PM_ERR_INCOMPLETE_VARIABLE_INSTANCE] = { "incomplete instance variable", PM_ERROR_LEVEL_FATAL },
190
- [PM_ERR_INVALID_ENCODING_MAGIC_COMMENT] = { "unknown or invalid encoding in the magic comment", PM_ERROR_LEVEL_FATAL },
193
+ [PM_ERR_INCOMPLETE_VARIABLE_CLASS] = { "`%.*s' is not allowed as a class variable name", PM_ERROR_LEVEL_FATAL },
194
+ [PM_ERR_INCOMPLETE_VARIABLE_INSTANCE] = { "`%.*s' is not allowed as an instance variable name", PM_ERROR_LEVEL_FATAL },
191
195
  [PM_ERR_INVALID_FLOAT_EXPONENT] = { "invalid exponent", PM_ERROR_LEVEL_FATAL },
192
196
  [PM_ERR_INVALID_NUMBER_BINARY] = { "invalid binary number", PM_ERROR_LEVEL_FATAL },
193
197
  [PM_ERR_INVALID_NUMBER_DECIMAL] = { "invalid decimal number", PM_ERROR_LEVEL_FATAL },
194
198
  [PM_ERR_INVALID_NUMBER_HEXADECIMAL] = { "invalid hexadecimal number", PM_ERROR_LEVEL_FATAL },
195
199
  [PM_ERR_INVALID_NUMBER_OCTAL] = { "invalid octal number", PM_ERROR_LEVEL_FATAL },
196
200
  [PM_ERR_INVALID_NUMBER_UNDERSCORE] = { "invalid underscore placement in number", PM_ERROR_LEVEL_FATAL },
201
+ [PM_ERR_INVALID_CHARACTER] = { "invalid character 0x%X", PM_ERROR_LEVEL_FATAL },
202
+ [PM_ERR_INVALID_MULTIBYTE_CHARACTER] = { "invalid multibyte character 0x%X", PM_ERROR_LEVEL_FATAL },
203
+ [PM_ERR_INVALID_PRINTABLE_CHARACTER] = { "invalid character `%c`", PM_ERROR_LEVEL_FATAL },
197
204
  [PM_ERR_INVALID_PERCENT] = { "invalid `%` token", PM_ERROR_LEVEL_FATAL }, // TODO WHAT?
198
- [PM_ERR_INVALID_TOKEN] = { "invalid token", PM_ERROR_LEVEL_FATAL }, // TODO WHAT?
199
- [PM_ERR_INVALID_VARIABLE_GLOBAL] = { "invalid global variable", PM_ERROR_LEVEL_FATAL },
205
+ [PM_ERR_INVALID_VARIABLE_GLOBAL] = { "`%.*s' is not allowed as a global variable name", PM_ERROR_LEVEL_FATAL },
200
206
  [PM_ERR_IT_NOT_ALLOWED] = { "`it` is not allowed when an ordinary parameter is defined", PM_ERROR_LEVEL_FATAL },
201
207
  [PM_ERR_LAMBDA_OPEN] = { "expected a `do` keyword or a `{` to open the lambda block", PM_ERROR_LEVEL_FATAL },
202
208
  [PM_ERR_LAMBDA_TERM_BRACE] = { "expected a lambda block beginning with `{` to end with `}`", PM_ERROR_LEVEL_FATAL },
@@ -215,6 +221,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
215
221
  [PM_ERR_MODULE_NAME] = { "expected a constant name after `module`", PM_ERROR_LEVEL_FATAL },
216
222
  [PM_ERR_MODULE_TERM] = { "expected an `end` to close the `module` statement", PM_ERROR_LEVEL_FATAL },
217
223
  [PM_ERR_MULTI_ASSIGN_MULTI_SPLATS] = { "multiple splats in multiple assignment", PM_ERROR_LEVEL_FATAL },
224
+ [PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST] = { "unexpected '%.*s' resulting in multiple splats in multiple assignment", PM_ERROR_LEVEL_FATAL },
218
225
  [PM_ERR_NOT_EXPRESSION] = { "expected an expression after `not`", PM_ERROR_LEVEL_FATAL },
219
226
  [PM_ERR_NO_LOCAL_VARIABLE] = { "%.*s: no such local variable", PM_ERROR_LEVEL_FATAL },
220
227
  [PM_ERR_NUMBER_LITERAL_UNDERSCORE] = { "number literal ending with a `_`", PM_ERROR_LEVEL_FATAL },
@@ -268,7 +275,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
268
275
  [PM_ERR_STATEMENT_UNDEF] = { "unexpected an `undef` at a non-statement position", PM_ERROR_LEVEL_FATAL },
269
276
  [PM_ERR_STRING_CONCATENATION] = { "expected a string for concatenation", PM_ERROR_LEVEL_FATAL },
270
277
  [PM_ERR_STRING_INTERPOLATED_TERM] = { "expected a closing delimiter for the interpolated string", PM_ERROR_LEVEL_FATAL },
271
- [PM_ERR_STRING_LITERAL_TERM] = { "expected a closing delimiter for the string literal", PM_ERROR_LEVEL_FATAL },
278
+ [PM_ERR_STRING_LITERAL_EOF] = { "unterminated string meets end of file", PM_ERROR_LEVEL_FATAL },
279
+ [PM_ERR_STRING_LITERAL_TERM] = { "unexpected %s, expected a string literal terminator", PM_ERROR_LEVEL_FATAL },
272
280
  [PM_ERR_SYMBOL_INVALID] = { "invalid symbol", PM_ERROR_LEVEL_FATAL }, // TODO expected symbol? prism.c ~9719
273
281
  [PM_ERR_SYMBOL_TERM_DYNAMIC] = { "expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_FATAL },
274
282
  [PM_ERR_SYMBOL_TERM_INTERPOLATED] = { "expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_FATAL },
@@ -276,17 +284,14 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
276
284
  [PM_ERR_TERNARY_EXPRESSION_FALSE] = { "expected an expression after `:` in the ternary operator", PM_ERROR_LEVEL_FATAL },
277
285
  [PM_ERR_TERNARY_EXPRESSION_TRUE] = { "expected an expression after `?` in the ternary operator", PM_ERROR_LEVEL_FATAL },
278
286
  [PM_ERR_UNDEF_ARGUMENT] = { "invalid argument being passed to `undef`; expected a bare word, constant, or symbol argument", PM_ERROR_LEVEL_FATAL },
279
- [PM_ERR_UNARY_RECEIVER_BANG] = { "expected a receiver for unary `!`", PM_ERROR_LEVEL_FATAL },
280
- [PM_ERR_UNARY_RECEIVER_MINUS] = { "expected a receiver for unary `-`", PM_ERROR_LEVEL_FATAL },
281
- [PM_ERR_UNARY_RECEIVER_PLUS] = { "expected a receiver for unary `+`", PM_ERROR_LEVEL_FATAL },
287
+ [PM_ERR_UNARY_RECEIVER] = { "unexpected %s, expected a receiver for unary `%c`", PM_ERROR_LEVEL_FATAL },
282
288
  [PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT] = { "unexpected %s, assuming it is closing the parent %s", PM_ERROR_LEVEL_FATAL },
283
289
  [PM_ERR_UNEXPECTED_TOKEN_IGNORE] = { "unexpected %s, ignoring it", PM_ERROR_LEVEL_FATAL },
284
- [PM_ERR_UNARY_RECEIVER_TILDE] = { "expected a receiver for unary `~`", PM_ERROR_LEVEL_FATAL },
285
290
  [PM_ERR_UNTIL_TERM] = { "expected an `end` to close the `until` statement", PM_ERROR_LEVEL_FATAL },
286
291
  [PM_ERR_VOID_EXPRESSION] = { "unexpected void value expression", PM_ERROR_LEVEL_FATAL },
287
292
  [PM_ERR_WHILE_TERM] = { "expected an `end` to close the `while` statement", PM_ERROR_LEVEL_FATAL },
288
293
  [PM_ERR_WRITE_TARGET_IN_METHOD] = { "dynamic constant assignment", PM_ERROR_LEVEL_FATAL },
289
- [PM_ERR_WRITE_TARGET_READONLY] = { "immutable variable as a write target", PM_ERROR_LEVEL_FATAL },
294
+ [PM_ERR_WRITE_TARGET_READONLY] = { "Can't set variable %.*s", PM_ERROR_LEVEL_FATAL },
290
295
  [PM_ERR_WRITE_TARGET_UNEXPECTED] = { "unexpected write target", PM_ERROR_LEVEL_FATAL },
291
296
  [PM_ERR_XSTRING_TERM] = { "expected a closing delimiter for the `%x` or backtick string", PM_ERROR_LEVEL_FATAL },
292
297
 
data/src/encoding.c CHANGED
@@ -2253,12 +2253,12 @@ static const uint8_t pm_utf_8_dfa[] = {
2253
2253
  static pm_unicode_codepoint_t
2254
2254
  pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2255
2255
  assert(n >= 0);
2256
- size_t maximum = (size_t) n;
2257
2256
 
2257
+ size_t maximum = (n > 4) ? 4 : ((size_t) n);
2258
2258
  uint32_t codepoint;
2259
2259
  uint32_t state = 0;
2260
2260
 
2261
- for (size_t index = 0; index < 4 && index < maximum; index++) {
2261
+ for (size_t index = 0; index < maximum; index++) {
2262
2262
  uint32_t byte = b[index];
2263
2263
  uint32_t type = pm_utf_8_dfa[byte];
2264
2264
 
@@ -2267,7 +2267,7 @@ pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2267
2267
  (0xffu >> type) & (byte);
2268
2268
 
2269
2269
  state = pm_utf_8_dfa[256 + (state * 16) + type];
2270
- if (!state) {
2270
+ if (state == 0) {
2271
2271
  *width = index + 1;
2272
2272
  return (pm_unicode_codepoint_t) codepoint;
2273
2273
  }
@@ -2282,9 +2282,17 @@ pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2282
2282
  */
2283
2283
  size_t
2284
2284
  pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
2285
- size_t width;
2286
- pm_utf_8_codepoint(b, n, &width);
2287
- return width;
2285
+ assert(n >= 0);
2286
+
2287
+ size_t maximum = (n > 4) ? 4 : ((size_t) n);
2288
+ uint32_t state = 0;
2289
+
2290
+ for (size_t index = 0; index < maximum; index++) {
2291
+ state = pm_utf_8_dfa[256 + (state * 16) + pm_utf_8_dfa[b[index]]];
2292
+ if (state == 0) return index + 1;
2293
+ }
2294
+
2295
+ return 0;
2288
2296
  }
2289
2297
 
2290
2298
  /**
@@ -4186,15 +4194,6 @@ pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
4186
4194
  return 0;
4187
4195
  }
4188
4196
 
4189
- /**
4190
- * Returns the size of the next character in the KOI-8 encoding. This means
4191
- * checking if it's a valid codepoint in KOI-8 and if it is returning 1.
4192
- */
4193
- static size_t
4194
- pm_encoding_koi8_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
4195
- return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
4196
- }
4197
-
4198
4197
  /**
4199
4198
  * Returns the size of the next character in the Shift_JIS encoding, or 0 if a
4200
4199
  * character cannot be decoded from the given bytes.
@@ -4652,7 +4651,7 @@ const pm_encoding_t pm_encodings[] = {
4652
4651
  },
4653
4652
  [PM_ENCODING_KOI8_R] = {
4654
4653
  .name = "KOI8-R",
4655
- .char_width = pm_encoding_koi8_char_width,
4654
+ .char_width = pm_encoding_single_char_width,
4656
4655
  .alnum_char = pm_encoding_koi8_r_alnum_char,
4657
4656
  .alpha_char = pm_encoding_koi8_r_alpha_char,
4658
4657
  .isupper_char = pm_encoding_koi8_r_isupper_char,
@@ -4660,7 +4659,7 @@ const pm_encoding_t pm_encodings[] = {
4660
4659
  },
4661
4660
  [PM_ENCODING_KOI8_U] = {
4662
4661
  .name = "KOI8-U",
4663
- .char_width = pm_encoding_koi8_char_width,
4662
+ .char_width = pm_encoding_single_char_width,
4664
4663
  .alnum_char = pm_encoding_koi8_u_alnum_char,
4665
4664
  .alpha_char = pm_encoding_koi8_u_alpha_char,
4666
4665
  .isupper_char = pm_encoding_koi8_u_isupper_char,
data/src/options.c CHANGED
@@ -45,17 +45,22 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length
45
45
  }
46
46
 
47
47
  if (length == 5) {
48
- if (strncmp(version, "3.3.0", 5) == 0) {
48
+ if (strncmp(version, "3.3.0", length) == 0) {
49
49
  options->version = PM_OPTIONS_VERSION_CRUBY_3_3_0;
50
50
  return true;
51
51
  }
52
52
 
53
- if (strncmp(version, "latest", 6) == 0) {
53
+ if (strncmp(version, "3.4.0", length) == 0) {
54
54
  options->version = PM_OPTIONS_VERSION_LATEST;
55
55
  return true;
56
56
  }
57
57
  }
58
58
 
59
+ if (length == 6 && strncmp(version, "latest", length) == 0) {
60
+ options->version = PM_OPTIONS_VERSION_LATEST;
61
+ return true;
62
+ }
63
+
59
64
  return false;
60
65
  }
61
66