prism 0.29.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +77 -1
  3. data/CONTRIBUTING.md +0 -4
  4. data/README.md +4 -0
  5. data/config.yml +498 -145
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/parsing_rules.md +4 -1
  8. data/docs/ripper_translation.md +22 -0
  9. data/docs/serialization.md +3 -0
  10. data/ext/prism/api_node.c +2858 -2082
  11. data/ext/prism/extconf.rb +1 -1
  12. data/ext/prism/extension.c +203 -421
  13. data/ext/prism/extension.h +2 -2
  14. data/include/prism/ast.h +1732 -453
  15. data/include/prism/defines.h +36 -0
  16. data/include/prism/diagnostic.h +23 -6
  17. data/include/prism/node.h +0 -21
  18. data/include/prism/options.h +94 -3
  19. data/include/prism/parser.h +57 -28
  20. data/include/prism/regexp.h +18 -8
  21. data/include/prism/static_literals.h +3 -2
  22. data/include/prism/util/pm_char.h +1 -2
  23. data/include/prism/util/pm_constant_pool.h +0 -8
  24. data/include/prism/util/pm_integer.h +22 -15
  25. data/include/prism/util/pm_newline_list.h +11 -0
  26. data/include/prism/util/pm_string.h +28 -12
  27. data/include/prism/version.h +3 -3
  28. data/include/prism.h +0 -11
  29. data/lib/prism/compiler.rb +3 -0
  30. data/lib/prism/desugar_compiler.rb +111 -74
  31. data/lib/prism/dispatcher.rb +16 -1
  32. data/lib/prism/dot_visitor.rb +45 -34
  33. data/lib/prism/dsl.rb +660 -468
  34. data/lib/prism/ffi.rb +64 -6
  35. data/lib/prism/inspect_visitor.rb +294 -64
  36. data/lib/prism/lex_compat.rb +1 -1
  37. data/lib/prism/mutation_compiler.rb +11 -6
  38. data/lib/prism/node.rb +2469 -4973
  39. data/lib/prism/node_ext.rb +91 -14
  40. data/lib/prism/parse_result/comments.rb +0 -7
  41. data/lib/prism/parse_result/errors.rb +65 -0
  42. data/lib/prism/parse_result/newlines.rb +101 -11
  43. data/lib/prism/parse_result.rb +43 -3
  44. data/lib/prism/reflection.rb +10 -8
  45. data/lib/prism/serialize.rb +484 -609
  46. data/lib/prism/translation/parser/compiler.rb +152 -132
  47. data/lib/prism/translation/parser/lexer.rb +26 -4
  48. data/lib/prism/translation/parser.rb +9 -4
  49. data/lib/prism/translation/ripper.rb +22 -20
  50. data/lib/prism/translation/ruby_parser.rb +73 -13
  51. data/lib/prism/visitor.rb +3 -0
  52. data/lib/prism.rb +0 -4
  53. data/prism.gemspec +3 -5
  54. data/rbi/prism/dsl.rbi +521 -0
  55. data/rbi/prism/node.rbi +744 -4837
  56. data/rbi/prism/visitor.rbi +3 -0
  57. data/rbi/prism.rbi +36 -30
  58. data/sig/prism/dsl.rbs +190 -303
  59. data/sig/prism/mutation_compiler.rbs +1 -0
  60. data/sig/prism/node.rbs +759 -628
  61. data/sig/prism/parse_result.rbs +2 -0
  62. data/sig/prism/visitor.rbs +1 -0
  63. data/sig/prism.rbs +103 -64
  64. data/src/diagnostic.c +62 -28
  65. data/src/node.c +499 -1754
  66. data/src/options.c +76 -27
  67. data/src/prettyprint.c +156 -112
  68. data/src/prism.c +2773 -2081
  69. data/src/regexp.c +202 -69
  70. data/src/serialize.c +170 -50
  71. data/src/static_literals.c +63 -84
  72. data/src/token_type.c +4 -4
  73. data/src/util/pm_constant_pool.c +0 -8
  74. data/src/util/pm_integer.c +53 -25
  75. data/src/util/pm_newline_list.c +29 -0
  76. data/src/util/pm_string.c +130 -80
  77. data/src/util/pm_strpbrk.c +32 -6
  78. metadata +4 -6
  79. data/include/prism/util/pm_string_list.h +0 -44
  80. data/lib/prism/debug.rb +0 -249
  81. data/lib/prism/translation/parser/rubocop.rb +0 -73
  82. data/src/util/pm_string_list.c +0 -28
@@ -25,6 +25,15 @@
25
25
  #define __STDC_FORMAT_MACROS
26
26
  #include <inttypes.h>
27
27
 
28
+ /**
29
+ * When we are parsing using recursive descent, we want to protect against
30
+ * malicious payloads that could attempt to crash our parser. We do this by
31
+ * specifying a maximum depth to which we are allowed to recurse.
32
+ */
33
+ #ifndef PRISM_DEPTH_MAXIMUM
34
+ #define PRISM_DEPTH_MAXIMUM 1000
35
+ #endif
36
+
28
37
  /**
29
38
  * By default, we compile with -fvisibility=hidden. When this is enabled, we
30
39
  * need to mark certain functions as being publically-visible. This macro does
@@ -118,6 +127,15 @@
118
127
  # endif
119
128
  #endif
120
129
 
130
+ /**
131
+ * If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem
132
+ * related code from the library. All filesystem related code should be guarded
133
+ * by PRISM_HAS_FILESYSTEM.
134
+ */
135
+ #ifndef PRISM_HAS_NO_FILESYSTEM
136
+ # define PRISM_HAS_FILESYSTEM
137
+ #endif
138
+
121
139
  /**
122
140
  * isinf on Windows is defined as accepting a float, but on POSIX systems it
123
141
  * accepts a float, a double, or a long double. We want to mirror this behavior
@@ -203,4 +221,22 @@
203
221
  #define PRISM_ENCODING_EXCLUDE_FULL
204
222
  #endif
205
223
 
224
+ /**
225
+ * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
226
+ * branch predication.
227
+ */
228
+ #if defined(__GNUC__) || defined(__clang__)
229
+ /** The compiler should predicate that this branch will be taken. */
230
+ #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
231
+
232
+ /** The compiler should predicate that this branch will not be taken. */
233
+ #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
234
+ #else
235
+ /** Void because this platform does not support branch prediction hints. */
236
+ #define PRISM_LIKELY(x) (x)
237
+
238
+ /** Void because this platform does not support branch prediction hints. */
239
+ #define PRISM_UNLIKELY(x) (x)
240
+ #endif
241
+
206
242
  #endif
@@ -1,10 +1,10 @@
1
- /******************************************************************************/
1
+ /*----------------------------------------------------------------------------*/
2
2
  /* This file is generated by the templates/template.rb script and should not */
3
3
  /* be modified manually. See */
4
4
  /* templates/include/prism/diagnostic.h.erb */
5
5
  /* if you are looking to modify the */
6
6
  /* template */
7
- /******************************************************************************/
7
+ /*----------------------------------------------------------------------------*/
8
8
 
9
9
  /**
10
10
  * @file diagnostic.h
@@ -44,7 +44,6 @@ typedef enum {
44
44
  PM_ERR_ARGUMENT_FORMAL_GLOBAL,
45
45
  PM_ERR_ARGUMENT_FORMAL_IVAR,
46
46
  PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
47
- PM_ERR_ARGUMENT_IN,
48
47
  PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND,
49
48
  PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
50
49
  PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
@@ -110,8 +109,10 @@ typedef enum {
110
109
  PM_ERR_ESCAPE_INVALID_META_REPEAT,
111
110
  PM_ERR_ESCAPE_INVALID_UNICODE,
112
111
  PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
112
+ PM_ERR_ESCAPE_INVALID_UNICODE_LIST,
113
113
  PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
114
114
  PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
115
+ PM_ERR_ESCAPE_INVALID_UNICODE_SHORT,
115
116
  PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
116
117
  PM_ERR_EXPECT_ARGUMENT,
117
118
  PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
@@ -126,6 +127,7 @@ typedef enum {
126
127
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
127
128
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
128
129
  PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
130
+ PM_ERR_EXPECT_FOR_DELIMITER,
129
131
  PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
130
132
  PM_ERR_EXPECT_IN_DELIMITER,
131
133
  PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
@@ -134,6 +136,7 @@ typedef enum {
134
136
  PM_ERR_EXPECT_RPAREN,
135
137
  PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
136
138
  PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
139
+ PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER,
137
140
  PM_ERR_EXPECT_STRING_CONTENT,
138
141
  PM_ERR_EXPECT_WHEN_DELIMITER,
139
142
  PM_ERR_EXPRESSION_BARE_HASH,
@@ -143,6 +146,7 @@ typedef enum {
143
146
  PM_ERR_EXPRESSION_NOT_WRITABLE_FILE,
144
147
  PM_ERR_EXPRESSION_NOT_WRITABLE_LINE,
145
148
  PM_ERR_EXPRESSION_NOT_WRITABLE_NIL,
149
+ PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED,
146
150
  PM_ERR_EXPRESSION_NOT_WRITABLE_SELF,
147
151
  PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE,
148
152
  PM_ERR_FLOAT_PARSE,
@@ -182,6 +186,7 @@ typedef enum {
182
186
  PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER,
183
187
  PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING,
184
188
  PM_ERR_INVALID_PERCENT,
189
+ PM_ERR_INVALID_PERCENT_EOF,
185
190
  PM_ERR_INVALID_PRINTABLE_CHARACTER,
186
191
  PM_ERR_INVALID_RETRY_AFTER_ELSE,
187
192
  PM_ERR_INVALID_RETRY_AFTER_ENSURE,
@@ -210,12 +215,15 @@ typedef enum {
210
215
  PM_ERR_MODULE_TERM,
211
216
  PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
212
217
  PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
218
+ PM_ERR_NESTING_TOO_DEEP,
213
219
  PM_ERR_NO_LOCAL_VARIABLE,
220
+ PM_ERR_NON_ASSOCIATIVE_OPERATOR,
214
221
  PM_ERR_NOT_EXPRESSION,
215
222
  PM_ERR_NUMBER_LITERAL_UNDERSCORE,
223
+ PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK,
216
224
  PM_ERR_NUMBERED_PARAMETER_IT,
217
225
  PM_ERR_NUMBERED_PARAMETER_ORDINARY,
218
- PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
226
+ PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK,
219
227
  PM_ERR_OPERATOR_MULTI_ASSIGN,
220
228
  PM_ERR_OPERATOR_WRITE_ARGUMENTS,
221
229
  PM_ERR_OPERATOR_WRITE_BLOCK,
@@ -232,8 +240,9 @@ typedef enum {
232
240
  PM_ERR_PARAMETER_SPLAT_MULTI,
233
241
  PM_ERR_PARAMETER_STAR,
234
242
  PM_ERR_PARAMETER_UNEXPECTED_FWD,
235
- PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
236
243
  PM_ERR_PARAMETER_UNEXPECTED_NO_KW,
244
+ PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
245
+ PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS,
237
246
  PM_ERR_PATTERN_CAPTURE_DUPLICATE,
238
247
  PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
239
248
  PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
@@ -245,6 +254,7 @@ typedef enum {
245
254
  PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
246
255
  PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
247
256
  PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
257
+ PM_ERR_PATTERN_FIND_MISSING_INNER,
248
258
  PM_ERR_PATTERN_HASH_IMPLICIT,
249
259
  PM_ERR_PATTERN_HASH_KEY,
250
260
  PM_ERR_PATTERN_HASH_KEY_DUPLICATE,
@@ -262,6 +272,7 @@ typedef enum {
262
272
  PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING,
263
273
  PM_ERR_REGEXP_INVALID_UNICODE_RANGE,
264
274
  PM_ERR_REGEXP_NON_ESCAPED_MBC,
275
+ PM_ERR_REGEXP_PARSE_ERROR,
265
276
  PM_ERR_REGEXP_TERM,
266
277
  PM_ERR_REGEXP_UNKNOWN_OPTIONS,
267
278
  PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP,
@@ -286,11 +297,15 @@ typedef enum {
286
297
  PM_ERR_TERNARY_COLON,
287
298
  PM_ERR_TERNARY_EXPRESSION_FALSE,
288
299
  PM_ERR_TERNARY_EXPRESSION_TRUE,
300
+ PM_ERR_UNARY_DISALLOWED,
289
301
  PM_ERR_UNARY_RECEIVER,
290
302
  PM_ERR_UNDEF_ARGUMENT,
291
303
  PM_ERR_UNEXPECTED_BLOCK_ARGUMENT,
292
304
  PM_ERR_UNEXPECTED_INDEX_BLOCK,
293
305
  PM_ERR_UNEXPECTED_INDEX_KEYWORDS,
306
+ PM_ERR_UNEXPECTED_LABEL,
307
+ PM_ERR_UNEXPECTED_MULTI_WRITE,
308
+ PM_ERR_UNEXPECTED_RANGE_OPERATOR,
294
309
  PM_ERR_UNEXPECTED_SAFE_NAVIGATION,
295
310
  PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
296
311
  PM_ERR_UNEXPECTED_TOKEN_IGNORE,
@@ -303,6 +318,7 @@ typedef enum {
303
318
  PM_ERR_XSTRING_TERM,
304
319
 
305
320
  // These are the warning diagnostics.
321
+ PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
306
322
  PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
307
323
  PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
308
324
  PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND,
@@ -318,10 +334,11 @@ typedef enum {
318
334
  PM_WARN_DUPLICATED_WHEN_CLAUSE,
319
335
  PM_WARN_FLOAT_OUT_OF_RANGE,
320
336
  PM_WARN_IGNORED_FROZEN_STRING_LITERAL,
337
+ PM_WARN_INDENTATION_MISMATCH,
321
338
  PM_WARN_INTEGER_IN_FLIP_FLOP,
322
339
  PM_WARN_INVALID_CHARACTER,
340
+ PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
323
341
  PM_WARN_INVALID_NUMBERED_REFERENCE,
324
- PM_WARN_INVALID_SHAREABLE_CONSTANT_VALUE,
325
342
  PM_WARN_KEYWORD_EOL,
326
343
  PM_WARN_LITERAL_IN_CONDITION_DEFAULT,
327
344
  PM_WARN_LITERAL_IN_CONDITION_VERBOSE,
data/include/prism/node.h CHANGED
@@ -56,27 +56,6 @@ void pm_node_list_free(pm_node_list_t *list);
56
56
  */
57
57
  PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
58
58
 
59
- /**
60
- * This struct stores the information gathered by the pm_node_memsize function.
61
- * It contains both the memory footprint and additionally metadata about the
62
- * shape of the tree.
63
- */
64
- typedef struct {
65
- /** The total memory footprint of the node and all of its children. */
66
- size_t memsize;
67
-
68
- /** The number of children the node has. */
69
- size_t node_count;
70
- } pm_memsize_t;
71
-
72
- /**
73
- * Calculates the memory footprint of a given node.
74
- *
75
- * @param node The node to calculate the memory footprint of.
76
- * @param memsize The memory footprint of the node and all of its children.
77
- */
78
- PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *memsize);
79
-
80
59
  /**
81
60
  * Returns a string representation of the given node type.
82
61
  *
@@ -7,6 +7,7 @@
7
7
  #define PRISM_OPTIONS_H
8
8
 
9
9
  #include "prism/defines.h"
10
+ #include "prism/util/pm_char.h"
10
11
  #include "prism/util/pm_string.h"
11
12
 
12
13
  #include <stdbool.h>
@@ -40,6 +41,23 @@ typedef struct pm_options_scope {
40
41
  pm_string_t *locals;
41
42
  } pm_options_scope_t;
42
43
 
44
+ // Forward declaration needed by the callback typedef.
45
+ struct pm_options;
46
+
47
+ /**
48
+ * The callback called when additional switches are found in a shebang comment
49
+ * that need to be processed by the runtime.
50
+ *
51
+ * @param options The options struct that may be updated by this callback.
52
+ * Certain fields will be checked for changes, specifically encoding,
53
+ * command_line, and frozen_string_literal.
54
+ * @param source The source of the shebang comment.
55
+ * @param length The length of the source.
56
+ * @param shebang_callback_data Any additional data that should be passed along
57
+ * to the callback.
58
+ */
59
+ typedef void (*pm_options_shebang_callback_t)(struct pm_options *options, const uint8_t *source, size_t length, void *shebang_callback_data);
60
+
43
61
  /**
44
62
  * The version of Ruby syntax that we should be parsing with. This is used to
45
63
  * allow consumers to specify which behavior they want in case they need to
@@ -56,7 +74,19 @@ typedef enum {
56
74
  /**
57
75
  * The options that can be passed to the parser.
58
76
  */
59
- typedef struct {
77
+ typedef struct pm_options {
78
+ /**
79
+ * The callback to call when additional switches are found in a shebang
80
+ * comment.
81
+ */
82
+ pm_options_shebang_callback_t shebang_callback;
83
+
84
+ /**
85
+ * Any additional data that should be passed along to the shebang callback
86
+ * if one was set.
87
+ */
88
+ void *shebang_callback_data;
89
+
60
90
  /** The name of the file that is currently being parsed. */
61
91
  pm_string_t filepath;
62
92
 
@@ -103,6 +133,30 @@ typedef struct {
103
133
  * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
104
134
  */
105
135
  int8_t frozen_string_literal;
136
+
137
+ /**
138
+ * Whether or not the encoding magic comments should be respected. This is a
139
+ * niche use-case where you want to parse a file with a specific encoding
140
+ * but ignore any encoding magic comments at the top of the file.
141
+ */
142
+ bool encoding_locked;
143
+
144
+ /**
145
+ * When the file being parsed is the main script, the shebang will be
146
+ * considered for command-line flags (or for implicit -x). The caller needs
147
+ * to pass this information to the parser so that it can behave correctly.
148
+ */
149
+ bool main_script;
150
+
151
+ /**
152
+ * When the file being parsed is considered a "partial" script, jumps will
153
+ * not be marked as errors if they are not contained within loops/blocks.
154
+ * This is used in the case that you're parsing a script that you know will
155
+ * be embedded inside another script later, but you do not have that context
156
+ * yet. For example, when parsing an ERB template that will be evaluated
157
+ * inside another script.
158
+ */
159
+ bool partial_script;
106
160
  } pm_options_t;
107
161
 
108
162
  /**
@@ -142,6 +196,16 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_P = 0x10;
142
196
  */
143
197
  static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20;
144
198
 
199
+ /**
200
+ * Set the shebang callback option on the given options struct.
201
+ *
202
+ * @param options The options struct to set the shebang callback on.
203
+ * @param shebang_callback The shebang callback to set.
204
+ * @param shebang_callback_data Any additional data that should be passed along
205
+ * to the callback.
206
+ */
207
+ PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data);
208
+
145
209
  /**
146
210
  * Set the filepath option on the given options struct.
147
211
  *
@@ -166,6 +230,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t
166
230
  */
167
231
  PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
168
232
 
233
+ /**
234
+ * Set the encoding_locked option on the given options struct.
235
+ *
236
+ * @param options The options struct to set the encoding_locked value on.
237
+ * @param encoding_locked The encoding_locked value to set.
238
+ */
239
+ PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked);
240
+
169
241
  /**
170
242
  * Set the frozen string literal option on the given options struct.
171
243
  *
@@ -194,6 +266,22 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options,
194
266
  */
195
267
  PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
196
268
 
269
+ /**
270
+ * Set the main script option on the given options struct.
271
+ *
272
+ * @param options The options struct to set the main script value on.
273
+ * @param main_script The main script value to set.
274
+ */
275
+ PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script);
276
+
277
+ /**
278
+ * Set the partial script option on the given options struct.
279
+ *
280
+ * @param options The options struct to set the partial script value on.
281
+ * @param partial_script The partial script value to set.
282
+ */
283
+ PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script);
284
+
197
285
  /**
198
286
  * Allocate and zero out the scopes array on the given options struct.
199
287
  *
@@ -261,6 +349,9 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
261
349
  * | `1` | -l command line option |
262
350
  * | `1` | -a command line option |
263
351
  * | `1` | the version |
352
+ * | `1` | encoding locked |
353
+ * | `1` | main script |
354
+ * | `1` | partial script |
264
355
  * | `4` | the number of scopes |
265
356
  * | ... | the scopes |
266
357
  *
@@ -293,8 +384,8 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
293
384
  * * The encoding can have a length of 0, in which case we'll use the default
294
385
  * encoding (UTF-8). If it's not 0, it should correspond to a name of an
295
386
  * encoding that can be passed to `Encoding.find` in Ruby.
296
- * * The frozen string literal and suppress warnings fields are booleans, so
297
- * their values should be either 0 or 1.
387
+ * * The frozen string literal, encoding locked, main script, and partial script
388
+ * fields are booleans, so their values should be either 0 or 1.
298
389
  * * The number of scopes can be 0.
299
390
  *
300
391
  * @param options The options struct to deserialize into.
@@ -364,6 +364,9 @@ typedef enum {
364
364
  /** a rescue statement within a lambda expression */
365
365
  PM_CONTEXT_LAMBDA_RESCUE,
366
366
 
367
+ /** the predicate clause of a loop statement */
368
+ PM_CONTEXT_LOOP_PREDICATE,
369
+
367
370
  /** the top level context */
368
371
  PM_CONTEXT_MAIN,
369
372
 
@@ -505,9 +508,9 @@ typedef struct {
505
508
  /** The type of shareable constant value that can be set. */
506
509
  typedef uint8_t pm_shareable_constant_value_t;
507
510
  static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
508
- static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = 0x1;
509
- static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = 0x2;
510
- static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = 0x4;
511
+ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
512
+ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
513
+ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
511
514
 
512
515
  /**
513
516
  * This tracks an individual local variable in a certain lexical context, as
@@ -546,6 +549,17 @@ typedef struct pm_locals {
546
549
  pm_local_t *locals;
547
550
  } pm_locals_t;
548
551
 
552
+ /** The flags about scope parameters that can be set. */
553
+ typedef uint8_t pm_scope_parameters_t;
554
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
555
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
556
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
557
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
558
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
559
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
560
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
561
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
562
+
549
563
  /**
550
564
  * This struct represents a node in a linked list of scopes. Some scopes can see
551
565
  * into their parent scopes, while others cannot.
@@ -557,10 +571,19 @@ typedef struct pm_scope {
557
571
  /** The IDs of the locals in the given scope. */
558
572
  pm_locals_t locals;
559
573
 
574
+ /**
575
+ * This is a list of the implicit parameters contained within the block.
576
+ * These will be processed after the block is parsed to determine the kind
577
+ * of parameters node that should be used and to check if any errors need to
578
+ * be added.
579
+ */
580
+ pm_node_list_t implicit_parameters;
581
+
560
582
  /**
561
583
  * This is a bitfield that indicates the parameters that are being used in
562
- * this scope. It is a combination of the PM_SCOPE_PARAMS_* constants. There
563
- * are three different kinds of parameters that can be used in a scope:
584
+ * this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
585
+ * There are three different kinds of parameters that can be used in a
586
+ * scope:
564
587
  *
565
588
  * - Ordinary parameters (e.g., def foo(bar); end)
566
589
  * - Numbered parameters (e.g., def foo; _1; end)
@@ -575,15 +598,7 @@ typedef struct pm_scope {
575
598
  * - def foo(&); end
576
599
  * - def foo(...); end
577
600
  */
578
- uint8_t parameters;
579
-
580
- /**
581
- * An integer indicating the number of numbered parameters on this scope.
582
- * This is necessary to determine if child blocks are allowed to use
583
- * numbered parameters, and to pass information to consumers of the AST
584
- * about how many numbered parameters exist.
585
- */
586
- int8_t numbered_parameters;
601
+ pm_scope_parameters_t parameters;
587
602
 
588
603
  /**
589
604
  * The current state of constant shareability for this scope. This is
@@ -598,20 +613,6 @@ typedef struct pm_scope {
598
613
  bool closed;
599
614
  } pm_scope_t;
600
615
 
601
- static const uint8_t PM_SCOPE_PARAMETERS_NONE = 0x0;
602
- static const uint8_t PM_SCOPE_PARAMETERS_ORDINARY = 0x1;
603
- static const uint8_t PM_SCOPE_PARAMETERS_NUMBERED = 0x2;
604
- static const uint8_t PM_SCOPE_PARAMETERS_IT = 0x4;
605
- static const uint8_t PM_SCOPE_PARAMETERS_TYPE_MASK = 0x7;
606
-
607
- static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x8;
608
- static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x10;
609
- static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x20;
610
- static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x40;
611
-
612
- static const int8_t PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED = -1;
613
- static const int8_t PM_SCOPE_NUMBERED_PARAMETERS_NONE = 0;
614
-
615
616
  /**
616
617
  * A struct that represents a stack of boolean values.
617
618
  */
@@ -624,6 +625,13 @@ typedef uint32_t pm_state_stack_t;
624
625
  * it's considering.
625
626
  */
626
627
  struct pm_parser {
628
+ /**
629
+ * The next node identifier that will be assigned. This is a unique
630
+ * identifier used to track nodes such that the syntax tree can be dropped
631
+ * but the node can be found through another parse.
632
+ */
633
+ uint32_t node_id;
634
+
627
635
  /** The current state of the lexer. */
628
636
  pm_lex_state_t lex_state;
629
637
 
@@ -853,12 +861,27 @@ struct pm_parser {
853
861
  */
854
862
  bool parsing_eval;
855
863
 
864
+ /**
865
+ * Whether or not we are parsing a "partial" script, which is a script that
866
+ * will be evaluated in the context of another script, so we should not
867
+ * check jumps (next/break/etc.) for validity.
868
+ */
869
+ bool partial_script;
870
+
856
871
  /** Whether or not we're at the beginning of a command. */
857
872
  bool command_start;
858
873
 
859
874
  /** Whether or not we're currently recovering from a syntax error. */
860
875
  bool recovering;
861
876
 
877
+ /**
878
+ * This is very specialized behavior for when you want to parse in a context
879
+ * that does not respect encoding comments. Its main use case is translating
880
+ * into the whitequark/parser AST which re-encodes source files in UTF-8
881
+ * before they are parsed and ignores encoding comments.
882
+ */
883
+ bool encoding_locked;
884
+
862
885
  /**
863
886
  * Whether or not the encoding has been changed by a magic comment. We use
864
887
  * this to provide a fast path for the lexer instead of going through the
@@ -886,6 +909,12 @@ struct pm_parser {
886
909
  * characters.
887
910
  */
888
911
  bool current_regular_expression_ascii_only;
912
+
913
+ /**
914
+ * By default, Ruby always warns about mismatched indentation. This can be
915
+ * toggled with a magic comment.
916
+ */
917
+ bool warn_mismatched_indentation;
889
918
  };
890
919
 
891
920
  #endif
@@ -10,7 +10,6 @@
10
10
  #include "prism/parser.h"
11
11
  #include "prism/encoding.h"
12
12
  #include "prism/util/pm_memchr.h"
13
- #include "prism/util/pm_string_list.h"
14
13
  #include "prism/util/pm_string.h"
15
14
 
16
15
  #include <stdbool.h>
@@ -18,16 +17,27 @@
18
17
  #include <string.h>
19
18
 
20
19
  /**
21
- * Parse a regular expression and extract the names of all of the named capture
22
- * groups.
20
+ * This callback is called when a named capture group is found.
21
+ */
22
+ typedef void (*pm_regexp_name_callback_t)(const pm_string_t *name, void *data);
23
+
24
+ /**
25
+ * This callback is called when a parse error is found.
26
+ */
27
+ typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data);
28
+
29
+ /**
30
+ * Parse a regular expression.
23
31
  *
32
+ * @param parser The parser that is currently being used.
24
33
  * @param source The source code to parse.
25
34
  * @param size The size of the source code.
26
- * @param named_captures The list to add the names of the named capture groups.
27
- * @param encoding_changed Whether or not the encoding changed from the default.
28
- * @param encoding The encoding of the source code.
29
- * @return Whether or not the parsing was successful.
35
+ * @param extended_mode Whether to parse the regular expression in extended mode.
36
+ * @param name_callback The optional callback to call when a named capture group is found.
37
+ * @param name_data The optional data to pass to the name callback.
38
+ * @param error_callback The callback to call when a parse error is found.
39
+ * @param error_data The data to pass to the error callback.
30
40
  */
31
- PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, const pm_encoding_t *encoding);
41
+ PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
32
42
 
33
43
  #endif
@@ -95,9 +95,10 @@ typedef struct {
95
95
  * @param start_line The line number that the parser starts on.
96
96
  * @param literals The set of static literals to add the node to.
97
97
  * @param node The node to add to the set.
98
+ * @param replace Whether to replace the previous node if one already exists.
98
99
  * @return A pointer to the node that is being overwritten, if there is one.
99
100
  */
100
- pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node);
101
+ pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
101
102
 
102
103
  /**
103
104
  * Free the internal memory associated with the given static literals set.
@@ -115,6 +116,6 @@ void pm_static_literals_free(pm_static_literals_t *literals);
115
116
  * @param encoding_name The name of the encoding of the source being parsed.
116
117
  * @param node The node to create a string representation of.
117
118
  */
118
- PRISM_EXPORTED_FUNCTION void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node);
119
+ void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node);
119
120
 
120
121
  #endif
@@ -34,8 +34,7 @@ size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
34
34
  * @return The number of characters at the start of the string that are
35
35
  * whitespace.
36
36
  */
37
- size_t
38
- pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
37
+ size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
39
38
 
40
39
  /**
41
40
  * Returns the number of characters at the start of the string that are inline
@@ -87,14 +87,6 @@ void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_co
87
87
  */
88
88
  bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id);
89
89
 
90
- /**
91
- * Get the memory size of a list of constant ids.
92
- *
93
- * @param list The list to get the memory size of.
94
- * @return The memory size of the list.
95
- */
96
- size_t pm_constant_id_list_memsize(pm_constant_id_list_t *list);
97
-
98
90
  /**
99
91
  * Free the memory associated with a list of constant ids.
100
92
  *