prism 0.29.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +115 -1
  3. data/CONTRIBUTING.md +0 -4
  4. data/Makefile +1 -1
  5. data/README.md +4 -0
  6. data/config.yml +920 -148
  7. data/docs/build_system.md +8 -11
  8. data/docs/fuzzing.md +1 -1
  9. data/docs/parsing_rules.md +4 -1
  10. data/docs/relocation.md +34 -0
  11. data/docs/ripper_translation.md +22 -0
  12. data/docs/serialization.md +3 -0
  13. data/ext/prism/api_node.c +2863 -2079
  14. data/ext/prism/extconf.rb +14 -37
  15. data/ext/prism/extension.c +241 -391
  16. data/ext/prism/extension.h +2 -2
  17. data/include/prism/ast.h +2156 -453
  18. data/include/prism/defines.h +58 -7
  19. data/include/prism/diagnostic.h +24 -6
  20. data/include/prism/node.h +0 -21
  21. data/include/prism/options.h +94 -3
  22. data/include/prism/parser.h +82 -40
  23. data/include/prism/regexp.h +18 -8
  24. data/include/prism/static_literals.h +3 -2
  25. data/include/prism/util/pm_char.h +1 -2
  26. data/include/prism/util/pm_constant_pool.h +0 -8
  27. data/include/prism/util/pm_integer.h +22 -15
  28. data/include/prism/util/pm_newline_list.h +11 -0
  29. data/include/prism/util/pm_string.h +28 -12
  30. data/include/prism/version.h +3 -3
  31. data/include/prism.h +47 -11
  32. data/lib/prism/compiler.rb +3 -0
  33. data/lib/prism/desugar_compiler.rb +111 -74
  34. data/lib/prism/dispatcher.rb +16 -1
  35. data/lib/prism/dot_visitor.rb +55 -34
  36. data/lib/prism/dsl.rb +660 -468
  37. data/lib/prism/ffi.rb +113 -8
  38. data/lib/prism/inspect_visitor.rb +296 -64
  39. data/lib/prism/lex_compat.rb +1 -1
  40. data/lib/prism/mutation_compiler.rb +11 -6
  41. data/lib/prism/node.rb +4262 -5023
  42. data/lib/prism/node_ext.rb +91 -14
  43. data/lib/prism/parse_result/comments.rb +0 -7
  44. data/lib/prism/parse_result/errors.rb +65 -0
  45. data/lib/prism/parse_result/newlines.rb +101 -11
  46. data/lib/prism/parse_result.rb +183 -6
  47. data/lib/prism/reflection.rb +12 -10
  48. data/lib/prism/relocation.rb +504 -0
  49. data/lib/prism/serialize.rb +496 -609
  50. data/lib/prism/string_query.rb +30 -0
  51. data/lib/prism/translation/parser/compiler.rb +185 -155
  52. data/lib/prism/translation/parser/lexer.rb +26 -4
  53. data/lib/prism/translation/parser.rb +9 -4
  54. data/lib/prism/translation/ripper.rb +23 -25
  55. data/lib/prism/translation/ruby_parser.rb +86 -17
  56. data/lib/prism/visitor.rb +3 -0
  57. data/lib/prism.rb +6 -8
  58. data/prism.gemspec +9 -5
  59. data/rbi/prism/dsl.rbi +521 -0
  60. data/rbi/prism/node.rbi +1115 -1120
  61. data/rbi/prism/parse_result.rbi +29 -0
  62. data/rbi/prism/string_query.rbi +12 -0
  63. data/rbi/prism/visitor.rbi +3 -0
  64. data/rbi/prism.rbi +36 -30
  65. data/sig/prism/dsl.rbs +190 -303
  66. data/sig/prism/mutation_compiler.rbs +1 -0
  67. data/sig/prism/node.rbs +678 -632
  68. data/sig/prism/parse_result.rbs +22 -0
  69. data/sig/prism/relocation.rbs +185 -0
  70. data/sig/prism/string_query.rbs +11 -0
  71. data/sig/prism/visitor.rbs +1 -0
  72. data/sig/prism.rbs +103 -64
  73. data/src/diagnostic.c +64 -28
  74. data/src/node.c +502 -1739
  75. data/src/options.c +76 -27
  76. data/src/prettyprint.c +188 -112
  77. data/src/prism.c +3376 -2293
  78. data/src/regexp.c +208 -71
  79. data/src/serialize.c +182 -50
  80. data/src/static_literals.c +64 -85
  81. data/src/token_type.c +4 -4
  82. data/src/util/pm_char.c +1 -1
  83. data/src/util/pm_constant_pool.c +0 -8
  84. data/src/util/pm_integer.c +53 -25
  85. data/src/util/pm_newline_list.c +29 -0
  86. data/src/util/pm_string.c +131 -80
  87. data/src/util/pm_strpbrk.c +32 -6
  88. metadata +11 -7
  89. data/include/prism/util/pm_string_list.h +0 -44
  90. data/lib/prism/debug.rb +0 -249
  91. data/lib/prism/translation/parser/rubocop.rb +0 -73
  92. data/src/util/pm_string_list.c +0 -28
@@ -25,6 +25,15 @@
25
25
  #define __STDC_FORMAT_MACROS
26
26
  #include <inttypes.h>
27
27
 
28
+ /**
29
+ * When we are parsing using recursive descent, we want to protect against
30
+ * malicious payloads that could attempt to crash our parser. We do this by
31
+ * specifying a maximum depth to which we are allowed to recurse.
32
+ */
33
+ #ifndef PRISM_DEPTH_MAXIMUM
34
+ #define PRISM_DEPTH_MAXIMUM 1000
35
+ #endif
36
+
28
37
  /**
29
38
  * By default, we compile with -fvisibility=hidden. When this is enabled, we
30
39
  * need to mark certain functions as being publically-visible. This macro does
@@ -119,14 +128,24 @@
119
128
  #endif
120
129
 
121
130
  /**
122
- * isinf on Windows is defined as accepting a float, but on POSIX systems it
123
- * accepts a float, a double, or a long double. We want to mirror this behavior
124
- * on windows.
131
+ * If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem
132
+ * related code from the library. All filesystem related code should be guarded
133
+ * by PRISM_HAS_FILESYSTEM.
125
134
  */
126
- #ifdef _WIN32
127
- # include <float.h>
128
- # undef isinf
129
- # define isinf(x) (sizeof(x) == sizeof(float) ? !_finitef(x) : !_finite(x))
135
+ #ifndef PRISM_HAS_NO_FILESYSTEM
136
+ # define PRISM_HAS_FILESYSTEM
137
+ #endif
138
+
139
+ /**
140
+ * isinf on POSIX systems it accepts a float, a double, or a long double.
141
+ * But mingw didn't provide an isinf macro, only an isinf function that only
142
+ * accepts floats, so we need to use _finite instead.
143
+ */
144
+ #ifdef __MINGW64__
145
+ #include <float.h>
146
+ #define PRISM_ISINF(x) (!_finite(x))
147
+ #else
148
+ #define PRISM_ISINF(x) isinf(x)
130
149
  #endif
131
150
 
132
151
  /**
@@ -203,4 +222,36 @@
203
222
  #define PRISM_ENCODING_EXCLUDE_FULL
204
223
  #endif
205
224
 
225
+ /**
226
+ * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
227
+ * branch predication.
228
+ */
229
+ #if defined(__GNUC__) || defined(__clang__)
230
+ /** The compiler should predicate that this branch will be taken. */
231
+ #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
232
+
233
+ /** The compiler should predicate that this branch will not be taken. */
234
+ #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
235
+ #else
236
+ /** Void because this platform does not support branch prediction hints. */
237
+ #define PRISM_LIKELY(x) (x)
238
+
239
+ /** Void because this platform does not support branch prediction hints. */
240
+ #define PRISM_UNLIKELY(x) (x)
241
+ #endif
242
+
243
+ /**
244
+ * We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
245
+ * Use PRISM_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional.
246
+ */
247
+ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L // C23 or later
248
+ #define PRISM_FALLTHROUGH [[fallthrough]];
249
+ #elif defined(__GNUC__) || defined(__clang__)
250
+ #define PRISM_FALLTHROUGH __attribute__((fallthrough));
251
+ #elif defined(_MSC_VER)
252
+ #define PRISM_FALLTHROUGH __fallthrough;
253
+ #else
254
+ #define PRISM_FALLTHROUGH
255
+ #endif
256
+
206
257
  #endif
@@ -1,10 +1,10 @@
1
- /******************************************************************************/
1
+ /*----------------------------------------------------------------------------*/
2
2
  /* This file is generated by the templates/template.rb script and should not */
3
3
  /* be modified manually. See */
4
4
  /* templates/include/prism/diagnostic.h.erb */
5
5
  /* if you are looking to modify the */
6
6
  /* template */
7
- /******************************************************************************/
7
+ /*----------------------------------------------------------------------------*/
8
8
 
9
9
  /**
10
10
  * @file diagnostic.h
@@ -44,7 +44,6 @@ typedef enum {
44
44
  PM_ERR_ARGUMENT_FORMAL_GLOBAL,
45
45
  PM_ERR_ARGUMENT_FORMAL_IVAR,
46
46
  PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
47
- PM_ERR_ARGUMENT_IN,
48
47
  PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND,
49
48
  PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
50
49
  PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
@@ -110,8 +109,10 @@ typedef enum {
110
109
  PM_ERR_ESCAPE_INVALID_META_REPEAT,
111
110
  PM_ERR_ESCAPE_INVALID_UNICODE,
112
111
  PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
112
+ PM_ERR_ESCAPE_INVALID_UNICODE_LIST,
113
113
  PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
114
114
  PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
115
+ PM_ERR_ESCAPE_INVALID_UNICODE_SHORT,
115
116
  PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
116
117
  PM_ERR_EXPECT_ARGUMENT,
117
118
  PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
@@ -126,6 +127,7 @@ typedef enum {
126
127
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
127
128
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
128
129
  PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
130
+ PM_ERR_EXPECT_FOR_DELIMITER,
129
131
  PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
130
132
  PM_ERR_EXPECT_IN_DELIMITER,
131
133
  PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
@@ -134,6 +136,7 @@ typedef enum {
134
136
  PM_ERR_EXPECT_RPAREN,
135
137
  PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
136
138
  PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
139
+ PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER,
137
140
  PM_ERR_EXPECT_STRING_CONTENT,
138
141
  PM_ERR_EXPECT_WHEN_DELIMITER,
139
142
  PM_ERR_EXPRESSION_BARE_HASH,
@@ -143,6 +146,7 @@ typedef enum {
143
146
  PM_ERR_EXPRESSION_NOT_WRITABLE_FILE,
144
147
  PM_ERR_EXPRESSION_NOT_WRITABLE_LINE,
145
148
  PM_ERR_EXPRESSION_NOT_WRITABLE_NIL,
149
+ PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED,
146
150
  PM_ERR_EXPRESSION_NOT_WRITABLE_SELF,
147
151
  PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE,
148
152
  PM_ERR_FLOAT_PARSE,
@@ -166,6 +170,7 @@ typedef enum {
166
170
  PM_ERR_INSTANCE_VARIABLE_BARE,
167
171
  PM_ERR_INVALID_BLOCK_EXIT,
168
172
  PM_ERR_INVALID_CHARACTER,
173
+ PM_ERR_INVALID_COMMA,
169
174
  PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
170
175
  PM_ERR_INVALID_ESCAPE_CHARACTER,
171
176
  PM_ERR_INVALID_FLOAT_EXPONENT,
@@ -182,6 +187,7 @@ typedef enum {
182
187
  PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER,
183
188
  PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING,
184
189
  PM_ERR_INVALID_PERCENT,
190
+ PM_ERR_INVALID_PERCENT_EOF,
185
191
  PM_ERR_INVALID_PRINTABLE_CHARACTER,
186
192
  PM_ERR_INVALID_RETRY_AFTER_ELSE,
187
193
  PM_ERR_INVALID_RETRY_AFTER_ENSURE,
@@ -210,12 +216,15 @@ typedef enum {
210
216
  PM_ERR_MODULE_TERM,
211
217
  PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
212
218
  PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
219
+ PM_ERR_NESTING_TOO_DEEP,
213
220
  PM_ERR_NO_LOCAL_VARIABLE,
221
+ PM_ERR_NON_ASSOCIATIVE_OPERATOR,
214
222
  PM_ERR_NOT_EXPRESSION,
215
223
  PM_ERR_NUMBER_LITERAL_UNDERSCORE,
224
+ PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK,
216
225
  PM_ERR_NUMBERED_PARAMETER_IT,
217
226
  PM_ERR_NUMBERED_PARAMETER_ORDINARY,
218
- PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
227
+ PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK,
219
228
  PM_ERR_OPERATOR_MULTI_ASSIGN,
220
229
  PM_ERR_OPERATOR_WRITE_ARGUMENTS,
221
230
  PM_ERR_OPERATOR_WRITE_BLOCK,
@@ -232,8 +241,9 @@ typedef enum {
232
241
  PM_ERR_PARAMETER_SPLAT_MULTI,
233
242
  PM_ERR_PARAMETER_STAR,
234
243
  PM_ERR_PARAMETER_UNEXPECTED_FWD,
235
- PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
236
244
  PM_ERR_PARAMETER_UNEXPECTED_NO_KW,
245
+ PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
246
+ PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS,
237
247
  PM_ERR_PATTERN_CAPTURE_DUPLICATE,
238
248
  PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
239
249
  PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
@@ -245,6 +255,7 @@ typedef enum {
245
255
  PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
246
256
  PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
247
257
  PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
258
+ PM_ERR_PATTERN_FIND_MISSING_INNER,
248
259
  PM_ERR_PATTERN_HASH_IMPLICIT,
249
260
  PM_ERR_PATTERN_HASH_KEY,
250
261
  PM_ERR_PATTERN_HASH_KEY_DUPLICATE,
@@ -262,6 +273,7 @@ typedef enum {
262
273
  PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING,
263
274
  PM_ERR_REGEXP_INVALID_UNICODE_RANGE,
264
275
  PM_ERR_REGEXP_NON_ESCAPED_MBC,
276
+ PM_ERR_REGEXP_PARSE_ERROR,
265
277
  PM_ERR_REGEXP_TERM,
266
278
  PM_ERR_REGEXP_UNKNOWN_OPTIONS,
267
279
  PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP,
@@ -286,11 +298,15 @@ typedef enum {
286
298
  PM_ERR_TERNARY_COLON,
287
299
  PM_ERR_TERNARY_EXPRESSION_FALSE,
288
300
  PM_ERR_TERNARY_EXPRESSION_TRUE,
301
+ PM_ERR_UNARY_DISALLOWED,
289
302
  PM_ERR_UNARY_RECEIVER,
290
303
  PM_ERR_UNDEF_ARGUMENT,
291
304
  PM_ERR_UNEXPECTED_BLOCK_ARGUMENT,
292
305
  PM_ERR_UNEXPECTED_INDEX_BLOCK,
293
306
  PM_ERR_UNEXPECTED_INDEX_KEYWORDS,
307
+ PM_ERR_UNEXPECTED_LABEL,
308
+ PM_ERR_UNEXPECTED_MULTI_WRITE,
309
+ PM_ERR_UNEXPECTED_RANGE_OPERATOR,
294
310
  PM_ERR_UNEXPECTED_SAFE_NAVIGATION,
295
311
  PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
296
312
  PM_ERR_UNEXPECTED_TOKEN_IGNORE,
@@ -303,6 +319,7 @@ typedef enum {
303
319
  PM_ERR_XSTRING_TERM,
304
320
 
305
321
  // These are the warning diagnostics.
322
+ PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
306
323
  PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
307
324
  PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
308
325
  PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND,
@@ -318,10 +335,11 @@ typedef enum {
318
335
  PM_WARN_DUPLICATED_WHEN_CLAUSE,
319
336
  PM_WARN_FLOAT_OUT_OF_RANGE,
320
337
  PM_WARN_IGNORED_FROZEN_STRING_LITERAL,
338
+ PM_WARN_INDENTATION_MISMATCH,
321
339
  PM_WARN_INTEGER_IN_FLIP_FLOP,
322
340
  PM_WARN_INVALID_CHARACTER,
341
+ PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
323
342
  PM_WARN_INVALID_NUMBERED_REFERENCE,
324
- PM_WARN_INVALID_SHAREABLE_CONSTANT_VALUE,
325
343
  PM_WARN_KEYWORD_EOL,
326
344
  PM_WARN_LITERAL_IN_CONDITION_DEFAULT,
327
345
  PM_WARN_LITERAL_IN_CONDITION_VERBOSE,
data/include/prism/node.h CHANGED
@@ -56,27 +56,6 @@ void pm_node_list_free(pm_node_list_t *list);
56
56
  */
57
57
  PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
58
58
 
59
- /**
60
- * This struct stores the information gathered by the pm_node_memsize function.
61
- * It contains both the memory footprint and additionally metadata about the
62
- * shape of the tree.
63
- */
64
- typedef struct {
65
- /** The total memory footprint of the node and all of its children. */
66
- size_t memsize;
67
-
68
- /** The number of children the node has. */
69
- size_t node_count;
70
- } pm_memsize_t;
71
-
72
- /**
73
- * Calculates the memory footprint of a given node.
74
- *
75
- * @param node The node to calculate the memory footprint of.
76
- * @param memsize The memory footprint of the node and all of its children.
77
- */
78
- PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *memsize);
79
-
80
59
  /**
81
60
  * Returns a string representation of the given node type.
82
61
  *
@@ -7,6 +7,7 @@
7
7
  #define PRISM_OPTIONS_H
8
8
 
9
9
  #include "prism/defines.h"
10
+ #include "prism/util/pm_char.h"
10
11
  #include "prism/util/pm_string.h"
11
12
 
12
13
  #include <stdbool.h>
@@ -40,6 +41,23 @@ typedef struct pm_options_scope {
40
41
  pm_string_t *locals;
41
42
  } pm_options_scope_t;
42
43
 
44
+ // Forward declaration needed by the callback typedef.
45
+ struct pm_options;
46
+
47
+ /**
48
+ * The callback called when additional switches are found in a shebang comment
49
+ * that need to be processed by the runtime.
50
+ *
51
+ * @param options The options struct that may be updated by this callback.
52
+ * Certain fields will be checked for changes, specifically encoding,
53
+ * command_line, and frozen_string_literal.
54
+ * @param source The source of the shebang comment.
55
+ * @param length The length of the source.
56
+ * @param shebang_callback_data Any additional data that should be passed along
57
+ * to the callback.
58
+ */
59
+ typedef void (*pm_options_shebang_callback_t)(struct pm_options *options, const uint8_t *source, size_t length, void *shebang_callback_data);
60
+
43
61
  /**
44
62
  * The version of Ruby syntax that we should be parsing with. This is used to
45
63
  * allow consumers to specify which behavior they want in case they need to
@@ -56,7 +74,19 @@ typedef enum {
56
74
  /**
57
75
  * The options that can be passed to the parser.
58
76
  */
59
- typedef struct {
77
+ typedef struct pm_options {
78
+ /**
79
+ * The callback to call when additional switches are found in a shebang
80
+ * comment.
81
+ */
82
+ pm_options_shebang_callback_t shebang_callback;
83
+
84
+ /**
85
+ * Any additional data that should be passed along to the shebang callback
86
+ * if one was set.
87
+ */
88
+ void *shebang_callback_data;
89
+
60
90
  /** The name of the file that is currently being parsed. */
61
91
  pm_string_t filepath;
62
92
 
@@ -103,6 +133,30 @@ typedef struct {
103
133
  * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
104
134
  */
105
135
  int8_t frozen_string_literal;
136
+
137
+ /**
138
+ * Whether or not the encoding magic comments should be respected. This is a
139
+ * niche use-case where you want to parse a file with a specific encoding
140
+ * but ignore any encoding magic comments at the top of the file.
141
+ */
142
+ bool encoding_locked;
143
+
144
+ /**
145
+ * When the file being parsed is the main script, the shebang will be
146
+ * considered for command-line flags (or for implicit -x). The caller needs
147
+ * to pass this information to the parser so that it can behave correctly.
148
+ */
149
+ bool main_script;
150
+
151
+ /**
152
+ * When the file being parsed is considered a "partial" script, jumps will
153
+ * not be marked as errors if they are not contained within loops/blocks.
154
+ * This is used in the case that you're parsing a script that you know will
155
+ * be embedded inside another script later, but you do not have that context
156
+ * yet. For example, when parsing an ERB template that will be evaluated
157
+ * inside another script.
158
+ */
159
+ bool partial_script;
106
160
  } pm_options_t;
107
161
 
108
162
  /**
@@ -142,6 +196,16 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_P = 0x10;
142
196
  */
143
197
  static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20;
144
198
 
199
+ /**
200
+ * Set the shebang callback option on the given options struct.
201
+ *
202
+ * @param options The options struct to set the shebang callback on.
203
+ * @param shebang_callback The shebang callback to set.
204
+ * @param shebang_callback_data Any additional data that should be passed along
205
+ * to the callback.
206
+ */
207
+ PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data);
208
+
145
209
  /**
146
210
  * Set the filepath option on the given options struct.
147
211
  *
@@ -166,6 +230,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t
166
230
  */
167
231
  PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
168
232
 
233
+ /**
234
+ * Set the encoding_locked option on the given options struct.
235
+ *
236
+ * @param options The options struct to set the encoding_locked value on.
237
+ * @param encoding_locked The encoding_locked value to set.
238
+ */
239
+ PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked);
240
+
169
241
  /**
170
242
  * Set the frozen string literal option on the given options struct.
171
243
  *
@@ -194,6 +266,22 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options,
194
266
  */
195
267
  PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
196
268
 
269
+ /**
270
+ * Set the main script option on the given options struct.
271
+ *
272
+ * @param options The options struct to set the main script value on.
273
+ * @param main_script The main script value to set.
274
+ */
275
+ PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script);
276
+
277
+ /**
278
+ * Set the partial script option on the given options struct.
279
+ *
280
+ * @param options The options struct to set the partial script value on.
281
+ * @param partial_script The partial script value to set.
282
+ */
283
+ PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script);
284
+
197
285
  /**
198
286
  * Allocate and zero out the scopes array on the given options struct.
199
287
  *
@@ -261,6 +349,9 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
261
349
  * | `1` | -l command line option |
262
350
  * | `1` | -a command line option |
263
351
  * | `1` | the version |
352
+ * | `1` | encoding locked |
353
+ * | `1` | main script |
354
+ * | `1` | partial script |
264
355
  * | `4` | the number of scopes |
265
356
  * | ... | the scopes |
266
357
  *
@@ -293,8 +384,8 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
293
384
  * * The encoding can have a length of 0, in which case we'll use the default
294
385
  * encoding (UTF-8). If it's not 0, it should correspond to a name of an
295
386
  * encoding that can be passed to `Encoding.find` in Ruby.
296
- * * The frozen string literal and suppress warnings fields are booleans, so
297
- * their values should be either 0 or 1.
387
+ * * The frozen string literal, encoding locked, main script, and partial script
388
+ * fields are booleans, so their values should be either 0 or 1.
298
389
  * * The number of scopes can be 0.
299
390
  *
300
391
  * @param options The options struct to deserialize into.
@@ -82,6 +82,23 @@ typedef enum {
82
82
  PM_HEREDOC_INDENT_TILDE,
83
83
  } pm_heredoc_indent_t;
84
84
 
85
+ /**
86
+ * All of the information necessary to store to lexing a heredoc.
87
+ */
88
+ typedef struct {
89
+ /** A pointer to the start of the heredoc identifier. */
90
+ const uint8_t *ident_start;
91
+
92
+ /** The length of the heredoc identifier. */
93
+ size_t ident_length;
94
+
95
+ /** The type of quote that the heredoc uses. */
96
+ pm_heredoc_quote_t quote;
97
+
98
+ /** The type of indentation that the heredoc uses. */
99
+ pm_heredoc_indent_t indent;
100
+ } pm_heredoc_lex_mode_t;
101
+
85
102
  /**
86
103
  * When lexing Ruby source, the lexer has a small amount of state to tell which
87
104
  * kind of token it is currently lexing. For example, when we find the start of
@@ -210,17 +227,10 @@ typedef struct pm_lex_mode {
210
227
  } string;
211
228
 
212
229
  struct {
213
- /** A pointer to the start of the heredoc identifier. */
214
- const uint8_t *ident_start;
215
-
216
- /** The length of the heredoc identifier. */
217
- size_t ident_length;
218
-
219
- /** The type of quote that the heredoc uses. */
220
- pm_heredoc_quote_t quote;
221
-
222
- /** The type of indentation that the heredoc uses. */
223
- pm_heredoc_indent_t indent;
230
+ /**
231
+ * All of the data necessary to lex a heredoc.
232
+ */
233
+ pm_heredoc_lex_mode_t base;
224
234
 
225
235
  /**
226
236
  * This is the pointer to the character where lexing should resume
@@ -233,7 +243,7 @@ typedef struct pm_lex_mode {
233
243
  * line so that we know how much to dedent each line in the case of
234
244
  * a tilde heredoc.
235
245
  */
236
- size_t common_whitespace;
246
+ size_t *common_whitespace;
237
247
 
238
248
  /** True if the previous token ended with a line continuation. */
239
249
  bool line_continuation;
@@ -364,6 +374,9 @@ typedef enum {
364
374
  /** a rescue statement within a lambda expression */
365
375
  PM_CONTEXT_LAMBDA_RESCUE,
366
376
 
377
+ /** the predicate clause of a loop statement */
378
+ PM_CONTEXT_LOOP_PREDICATE,
379
+
367
380
  /** the top level context */
368
381
  PM_CONTEXT_MAIN,
369
382
 
@@ -379,6 +392,9 @@ typedef enum {
379
392
  /** a rescue statement within a module statement */
380
393
  PM_CONTEXT_MODULE_RESCUE,
381
394
 
395
+ /** a multiple target expression */
396
+ PM_CONTEXT_MULTI_TARGET,
397
+
382
398
  /** a parenthesized expression */
383
399
  PM_CONTEXT_PARENS,
384
400
 
@@ -505,9 +521,9 @@ typedef struct {
505
521
  /** The type of shareable constant value that can be set. */
506
522
  typedef uint8_t pm_shareable_constant_value_t;
507
523
  static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
508
- static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = 0x1;
509
- static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = 0x2;
510
- static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = 0x4;
524
+ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
525
+ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
526
+ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
511
527
 
512
528
  /**
513
529
  * This tracks an individual local variable in a certain lexical context, as
@@ -546,6 +562,17 @@ typedef struct pm_locals {
546
562
  pm_local_t *locals;
547
563
  } pm_locals_t;
548
564
 
565
+ /** The flags about scope parameters that can be set. */
566
+ typedef uint8_t pm_scope_parameters_t;
567
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
568
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
569
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
570
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
571
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
572
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
573
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
574
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
575
+
549
576
  /**
550
577
  * This struct represents a node in a linked list of scopes. Some scopes can see
551
578
  * into their parent scopes, while others cannot.
@@ -557,10 +584,19 @@ typedef struct pm_scope {
557
584
  /** The IDs of the locals in the given scope. */
558
585
  pm_locals_t locals;
559
586
 
587
+ /**
588
+ * This is a list of the implicit parameters contained within the block.
589
+ * These will be processed after the block is parsed to determine the kind
590
+ * of parameters node that should be used and to check if any errors need to
591
+ * be added.
592
+ */
593
+ pm_node_list_t implicit_parameters;
594
+
560
595
  /**
561
596
  * This is a bitfield that indicates the parameters that are being used in
562
- * this scope. It is a combination of the PM_SCOPE_PARAMS_* constants. There
563
- * are three different kinds of parameters that can be used in a scope:
597
+ * this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
598
+ * There are three different kinds of parameters that can be used in a
599
+ * scope:
564
600
  *
565
601
  * - Ordinary parameters (e.g., def foo(bar); end)
566
602
  * - Numbered parameters (e.g., def foo; _1; end)
@@ -575,15 +611,7 @@ typedef struct pm_scope {
575
611
  * - def foo(&); end
576
612
  * - def foo(...); end
577
613
  */
578
- uint8_t parameters;
579
-
580
- /**
581
- * An integer indicating the number of numbered parameters on this scope.
582
- * This is necessary to determine if child blocks are allowed to use
583
- * numbered parameters, and to pass information to consumers of the AST
584
- * about how many numbered parameters exist.
585
- */
586
- int8_t numbered_parameters;
614
+ pm_scope_parameters_t parameters;
587
615
 
588
616
  /**
589
617
  * The current state of constant shareability for this scope. This is
@@ -598,20 +626,6 @@ typedef struct pm_scope {
598
626
  bool closed;
599
627
  } pm_scope_t;
600
628
 
601
- static const uint8_t PM_SCOPE_PARAMETERS_NONE = 0x0;
602
- static const uint8_t PM_SCOPE_PARAMETERS_ORDINARY = 0x1;
603
- static const uint8_t PM_SCOPE_PARAMETERS_NUMBERED = 0x2;
604
- static const uint8_t PM_SCOPE_PARAMETERS_IT = 0x4;
605
- static const uint8_t PM_SCOPE_PARAMETERS_TYPE_MASK = 0x7;
606
-
607
- static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x8;
608
- static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x10;
609
- static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x20;
610
- static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x40;
611
-
612
- static const int8_t PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED = -1;
613
- static const int8_t PM_SCOPE_NUMBERED_PARAMETERS_NONE = 0;
614
-
615
629
  /**
616
630
  * A struct that represents a stack of boolean values.
617
631
  */
@@ -624,6 +638,13 @@ typedef uint32_t pm_state_stack_t;
624
638
  * it's considering.
625
639
  */
626
640
  struct pm_parser {
641
+ /**
642
+ * The next node identifier that will be assigned. This is a unique
643
+ * identifier used to track nodes such that the syntax tree can be dropped
644
+ * but the node can be found through another parse.
645
+ */
646
+ uint32_t node_id;
647
+
627
648
  /** The current state of the lexer. */
628
649
  pm_lex_state_t lex_state;
629
650
 
@@ -853,12 +874,27 @@ struct pm_parser {
853
874
  */
854
875
  bool parsing_eval;
855
876
 
877
+ /**
878
+ * Whether or not we are parsing a "partial" script, which is a script that
879
+ * will be evaluated in the context of another script, so we should not
880
+ * check jumps (next/break/etc.) for validity.
881
+ */
882
+ bool partial_script;
883
+
856
884
  /** Whether or not we're at the beginning of a command. */
857
885
  bool command_start;
858
886
 
859
887
  /** Whether or not we're currently recovering from a syntax error. */
860
888
  bool recovering;
861
889
 
890
+ /**
891
+ * This is very specialized behavior for when you want to parse in a context
892
+ * that does not respect encoding comments. Its main use case is translating
893
+ * into the whitequark/parser AST which re-encodes source files in UTF-8
894
+ * before they are parsed and ignores encoding comments.
895
+ */
896
+ bool encoding_locked;
897
+
862
898
  /**
863
899
  * Whether or not the encoding has been changed by a magic comment. We use
864
900
  * this to provide a fast path for the lexer instead of going through the
@@ -886,6 +922,12 @@ struct pm_parser {
886
922
  * characters.
887
923
  */
888
924
  bool current_regular_expression_ascii_only;
925
+
926
+ /**
927
+ * By default, Ruby always warns about mismatched indentation. This can be
928
+ * toggled with a magic comment.
929
+ */
930
+ bool warn_mismatched_indentation;
889
931
  };
890
932
 
891
933
  #endif
@@ -10,7 +10,6 @@
10
10
  #include "prism/parser.h"
11
11
  #include "prism/encoding.h"
12
12
  #include "prism/util/pm_memchr.h"
13
- #include "prism/util/pm_string_list.h"
14
13
  #include "prism/util/pm_string.h"
15
14
 
16
15
  #include <stdbool.h>
@@ -18,16 +17,27 @@
18
17
  #include <string.h>
19
18
 
20
19
  /**
21
- * Parse a regular expression and extract the names of all of the named capture
22
- * groups.
20
+ * This callback is called when a named capture group is found.
21
+ */
22
+ typedef void (*pm_regexp_name_callback_t)(const pm_string_t *name, void *data);
23
+
24
+ /**
25
+ * This callback is called when a parse error is found.
26
+ */
27
+ typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data);
28
+
29
+ /**
30
+ * Parse a regular expression.
23
31
  *
32
+ * @param parser The parser that is currently being used.
24
33
  * @param source The source code to parse.
25
34
  * @param size The size of the source code.
26
- * @param named_captures The list to add the names of the named capture groups.
27
- * @param encoding_changed Whether or not the encoding changed from the default.
28
- * @param encoding The encoding of the source code.
29
- * @return Whether or not the parsing was successful.
35
+ * @param extended_mode Whether to parse the regular expression in extended mode.
36
+ * @param name_callback The optional callback to call when a named capture group is found.
37
+ * @param name_data The optional data to pass to the name callback.
38
+ * @param error_callback The callback to call when a parse error is found.
39
+ * @param error_data The data to pass to the error callback.
30
40
  */
31
- PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, const pm_encoding_t *encoding);
41
+ PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
32
42
 
33
43
  #endif