prism 0.29.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +115 -1
  3. data/CONTRIBUTING.md +0 -4
  4. data/Makefile +1 -1
  5. data/README.md +4 -0
  6. data/config.yml +920 -148
  7. data/docs/build_system.md +8 -11
  8. data/docs/fuzzing.md +1 -1
  9. data/docs/parsing_rules.md +4 -1
  10. data/docs/relocation.md +34 -0
  11. data/docs/ripper_translation.md +22 -0
  12. data/docs/serialization.md +3 -0
  13. data/ext/prism/api_node.c +2863 -2079
  14. data/ext/prism/extconf.rb +14 -37
  15. data/ext/prism/extension.c +241 -391
  16. data/ext/prism/extension.h +2 -2
  17. data/include/prism/ast.h +2156 -453
  18. data/include/prism/defines.h +58 -7
  19. data/include/prism/diagnostic.h +24 -6
  20. data/include/prism/node.h +0 -21
  21. data/include/prism/options.h +94 -3
  22. data/include/prism/parser.h +82 -40
  23. data/include/prism/regexp.h +18 -8
  24. data/include/prism/static_literals.h +3 -2
  25. data/include/prism/util/pm_char.h +1 -2
  26. data/include/prism/util/pm_constant_pool.h +0 -8
  27. data/include/prism/util/pm_integer.h +22 -15
  28. data/include/prism/util/pm_newline_list.h +11 -0
  29. data/include/prism/util/pm_string.h +28 -12
  30. data/include/prism/version.h +3 -3
  31. data/include/prism.h +47 -11
  32. data/lib/prism/compiler.rb +3 -0
  33. data/lib/prism/desugar_compiler.rb +111 -74
  34. data/lib/prism/dispatcher.rb +16 -1
  35. data/lib/prism/dot_visitor.rb +55 -34
  36. data/lib/prism/dsl.rb +660 -468
  37. data/lib/prism/ffi.rb +113 -8
  38. data/lib/prism/inspect_visitor.rb +296 -64
  39. data/lib/prism/lex_compat.rb +1 -1
  40. data/lib/prism/mutation_compiler.rb +11 -6
  41. data/lib/prism/node.rb +4262 -5023
  42. data/lib/prism/node_ext.rb +91 -14
  43. data/lib/prism/parse_result/comments.rb +0 -7
  44. data/lib/prism/parse_result/errors.rb +65 -0
  45. data/lib/prism/parse_result/newlines.rb +101 -11
  46. data/lib/prism/parse_result.rb +183 -6
  47. data/lib/prism/reflection.rb +12 -10
  48. data/lib/prism/relocation.rb +504 -0
  49. data/lib/prism/serialize.rb +496 -609
  50. data/lib/prism/string_query.rb +30 -0
  51. data/lib/prism/translation/parser/compiler.rb +185 -155
  52. data/lib/prism/translation/parser/lexer.rb +26 -4
  53. data/lib/prism/translation/parser.rb +9 -4
  54. data/lib/prism/translation/ripper.rb +23 -25
  55. data/lib/prism/translation/ruby_parser.rb +86 -17
  56. data/lib/prism/visitor.rb +3 -0
  57. data/lib/prism.rb +6 -8
  58. data/prism.gemspec +9 -5
  59. data/rbi/prism/dsl.rbi +521 -0
  60. data/rbi/prism/node.rbi +1115 -1120
  61. data/rbi/prism/parse_result.rbi +29 -0
  62. data/rbi/prism/string_query.rbi +12 -0
  63. data/rbi/prism/visitor.rbi +3 -0
  64. data/rbi/prism.rbi +36 -30
  65. data/sig/prism/dsl.rbs +190 -303
  66. data/sig/prism/mutation_compiler.rbs +1 -0
  67. data/sig/prism/node.rbs +678 -632
  68. data/sig/prism/parse_result.rbs +22 -0
  69. data/sig/prism/relocation.rbs +185 -0
  70. data/sig/prism/string_query.rbs +11 -0
  71. data/sig/prism/visitor.rbs +1 -0
  72. data/sig/prism.rbs +103 -64
  73. data/src/diagnostic.c +64 -28
  74. data/src/node.c +502 -1739
  75. data/src/options.c +76 -27
  76. data/src/prettyprint.c +188 -112
  77. data/src/prism.c +3376 -2293
  78. data/src/regexp.c +208 -71
  79. data/src/serialize.c +182 -50
  80. data/src/static_literals.c +64 -85
  81. data/src/token_type.c +4 -4
  82. data/src/util/pm_char.c +1 -1
  83. data/src/util/pm_constant_pool.c +0 -8
  84. data/src/util/pm_integer.c +53 -25
  85. data/src/util/pm_newline_list.c +29 -0
  86. data/src/util/pm_string.c +131 -80
  87. data/src/util/pm_strpbrk.c +32 -6
  88. metadata +11 -7
  89. data/include/prism/util/pm_string_list.h +0 -44
  90. data/lib/prism/debug.rb +0 -249
  91. data/lib/prism/translation/parser/rubocop.rb +0 -73
  92. data/src/util/pm_string_list.c +0 -28
@@ -25,6 +25,15 @@
25
25
  #define __STDC_FORMAT_MACROS
26
26
  #include <inttypes.h>
27
27
 
28
+ /**
29
+ * When we are parsing using recursive descent, we want to protect against
30
+ * malicious payloads that could attempt to crash our parser. We do this by
31
+ * specifying a maximum depth to which we are allowed to recurse.
32
+ */
33
+ #ifndef PRISM_DEPTH_MAXIMUM
34
+ #define PRISM_DEPTH_MAXIMUM 1000
35
+ #endif
36
+
28
37
  /**
29
38
  * By default, we compile with -fvisibility=hidden. When this is enabled, we
30
39
  * need to mark certain functions as being publically-visible. This macro does
@@ -119,14 +128,24 @@
119
128
  #endif
120
129
 
121
130
  /**
122
- * isinf on Windows is defined as accepting a float, but on POSIX systems it
123
- * accepts a float, a double, or a long double. We want to mirror this behavior
124
- * on windows.
131
+ * If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem
132
+ * related code from the library. All filesystem related code should be guarded
133
+ * by PRISM_HAS_FILESYSTEM.
125
134
  */
126
- #ifdef _WIN32
127
- # include <float.h>
128
- # undef isinf
129
- # define isinf(x) (sizeof(x) == sizeof(float) ? !_finitef(x) : !_finite(x))
135
+ #ifndef PRISM_HAS_NO_FILESYSTEM
136
+ # define PRISM_HAS_FILESYSTEM
137
+ #endif
138
+
139
+ /**
140
+ * isinf on POSIX systems it accepts a float, a double, or a long double.
141
+ * But mingw didn't provide an isinf macro, only an isinf function that only
142
+ * accepts floats, so we need to use _finite instead.
143
+ */
144
+ #ifdef __MINGW64__
145
+ #include <float.h>
146
+ #define PRISM_ISINF(x) (!_finite(x))
147
+ #else
148
+ #define PRISM_ISINF(x) isinf(x)
130
149
  #endif
131
150
 
132
151
  /**
@@ -203,4 +222,36 @@
203
222
  #define PRISM_ENCODING_EXCLUDE_FULL
204
223
  #endif
205
224
 
225
+ /**
226
+ * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
227
+ * branch predication.
228
+ */
229
+ #if defined(__GNUC__) || defined(__clang__)
230
+ /** The compiler should predicate that this branch will be taken. */
231
+ #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
232
+
233
+ /** The compiler should predicate that this branch will not be taken. */
234
+ #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
235
+ #else
236
+ /** Void because this platform does not support branch prediction hints. */
237
+ #define PRISM_LIKELY(x) (x)
238
+
239
+ /** Void because this platform does not support branch prediction hints. */
240
+ #define PRISM_UNLIKELY(x) (x)
241
+ #endif
242
+
243
+ /**
244
+ * We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
245
+ * Use PRISM_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional.
246
+ */
247
+ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L // C23 or later
248
+ #define PRISM_FALLTHROUGH [[fallthrough]];
249
+ #elif defined(__GNUC__) || defined(__clang__)
250
+ #define PRISM_FALLTHROUGH __attribute__((fallthrough));
251
+ #elif defined(_MSC_VER)
252
+ #define PRISM_FALLTHROUGH __fallthrough;
253
+ #else
254
+ #define PRISM_FALLTHROUGH
255
+ #endif
256
+
206
257
  #endif
@@ -1,10 +1,10 @@
1
- /******************************************************************************/
1
+ /*----------------------------------------------------------------------------*/
2
2
  /* This file is generated by the templates/template.rb script and should not */
3
3
  /* be modified manually. See */
4
4
  /* templates/include/prism/diagnostic.h.erb */
5
5
  /* if you are looking to modify the */
6
6
  /* template */
7
- /******************************************************************************/
7
+ /*----------------------------------------------------------------------------*/
8
8
 
9
9
  /**
10
10
  * @file diagnostic.h
@@ -44,7 +44,6 @@ typedef enum {
44
44
  PM_ERR_ARGUMENT_FORMAL_GLOBAL,
45
45
  PM_ERR_ARGUMENT_FORMAL_IVAR,
46
46
  PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
47
- PM_ERR_ARGUMENT_IN,
48
47
  PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND,
49
48
  PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
50
49
  PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
@@ -110,8 +109,10 @@ typedef enum {
110
109
  PM_ERR_ESCAPE_INVALID_META_REPEAT,
111
110
  PM_ERR_ESCAPE_INVALID_UNICODE,
112
111
  PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
112
+ PM_ERR_ESCAPE_INVALID_UNICODE_LIST,
113
113
  PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
114
114
  PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
115
+ PM_ERR_ESCAPE_INVALID_UNICODE_SHORT,
115
116
  PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
116
117
  PM_ERR_EXPECT_ARGUMENT,
117
118
  PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
@@ -126,6 +127,7 @@ typedef enum {
126
127
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
127
128
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
128
129
  PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
130
+ PM_ERR_EXPECT_FOR_DELIMITER,
129
131
  PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
130
132
  PM_ERR_EXPECT_IN_DELIMITER,
131
133
  PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
@@ -134,6 +136,7 @@ typedef enum {
134
136
  PM_ERR_EXPECT_RPAREN,
135
137
  PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
136
138
  PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
139
+ PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER,
137
140
  PM_ERR_EXPECT_STRING_CONTENT,
138
141
  PM_ERR_EXPECT_WHEN_DELIMITER,
139
142
  PM_ERR_EXPRESSION_BARE_HASH,
@@ -143,6 +146,7 @@ typedef enum {
143
146
  PM_ERR_EXPRESSION_NOT_WRITABLE_FILE,
144
147
  PM_ERR_EXPRESSION_NOT_WRITABLE_LINE,
145
148
  PM_ERR_EXPRESSION_NOT_WRITABLE_NIL,
149
+ PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED,
146
150
  PM_ERR_EXPRESSION_NOT_WRITABLE_SELF,
147
151
  PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE,
148
152
  PM_ERR_FLOAT_PARSE,
@@ -166,6 +170,7 @@ typedef enum {
166
170
  PM_ERR_INSTANCE_VARIABLE_BARE,
167
171
  PM_ERR_INVALID_BLOCK_EXIT,
168
172
  PM_ERR_INVALID_CHARACTER,
173
+ PM_ERR_INVALID_COMMA,
169
174
  PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
170
175
  PM_ERR_INVALID_ESCAPE_CHARACTER,
171
176
  PM_ERR_INVALID_FLOAT_EXPONENT,
@@ -182,6 +187,7 @@ typedef enum {
182
187
  PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER,
183
188
  PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING,
184
189
  PM_ERR_INVALID_PERCENT,
190
+ PM_ERR_INVALID_PERCENT_EOF,
185
191
  PM_ERR_INVALID_PRINTABLE_CHARACTER,
186
192
  PM_ERR_INVALID_RETRY_AFTER_ELSE,
187
193
  PM_ERR_INVALID_RETRY_AFTER_ENSURE,
@@ -210,12 +216,15 @@ typedef enum {
210
216
  PM_ERR_MODULE_TERM,
211
217
  PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
212
218
  PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
219
+ PM_ERR_NESTING_TOO_DEEP,
213
220
  PM_ERR_NO_LOCAL_VARIABLE,
221
+ PM_ERR_NON_ASSOCIATIVE_OPERATOR,
214
222
  PM_ERR_NOT_EXPRESSION,
215
223
  PM_ERR_NUMBER_LITERAL_UNDERSCORE,
224
+ PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK,
216
225
  PM_ERR_NUMBERED_PARAMETER_IT,
217
226
  PM_ERR_NUMBERED_PARAMETER_ORDINARY,
218
- PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
227
+ PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK,
219
228
  PM_ERR_OPERATOR_MULTI_ASSIGN,
220
229
  PM_ERR_OPERATOR_WRITE_ARGUMENTS,
221
230
  PM_ERR_OPERATOR_WRITE_BLOCK,
@@ -232,8 +241,9 @@ typedef enum {
232
241
  PM_ERR_PARAMETER_SPLAT_MULTI,
233
242
  PM_ERR_PARAMETER_STAR,
234
243
  PM_ERR_PARAMETER_UNEXPECTED_FWD,
235
- PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
236
244
  PM_ERR_PARAMETER_UNEXPECTED_NO_KW,
245
+ PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
246
+ PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS,
237
247
  PM_ERR_PATTERN_CAPTURE_DUPLICATE,
238
248
  PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
239
249
  PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
@@ -245,6 +255,7 @@ typedef enum {
245
255
  PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
246
256
  PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
247
257
  PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
258
+ PM_ERR_PATTERN_FIND_MISSING_INNER,
248
259
  PM_ERR_PATTERN_HASH_IMPLICIT,
249
260
  PM_ERR_PATTERN_HASH_KEY,
250
261
  PM_ERR_PATTERN_HASH_KEY_DUPLICATE,
@@ -262,6 +273,7 @@ typedef enum {
262
273
  PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING,
263
274
  PM_ERR_REGEXP_INVALID_UNICODE_RANGE,
264
275
  PM_ERR_REGEXP_NON_ESCAPED_MBC,
276
+ PM_ERR_REGEXP_PARSE_ERROR,
265
277
  PM_ERR_REGEXP_TERM,
266
278
  PM_ERR_REGEXP_UNKNOWN_OPTIONS,
267
279
  PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP,
@@ -286,11 +298,15 @@ typedef enum {
286
298
  PM_ERR_TERNARY_COLON,
287
299
  PM_ERR_TERNARY_EXPRESSION_FALSE,
288
300
  PM_ERR_TERNARY_EXPRESSION_TRUE,
301
+ PM_ERR_UNARY_DISALLOWED,
289
302
  PM_ERR_UNARY_RECEIVER,
290
303
  PM_ERR_UNDEF_ARGUMENT,
291
304
  PM_ERR_UNEXPECTED_BLOCK_ARGUMENT,
292
305
  PM_ERR_UNEXPECTED_INDEX_BLOCK,
293
306
  PM_ERR_UNEXPECTED_INDEX_KEYWORDS,
307
+ PM_ERR_UNEXPECTED_LABEL,
308
+ PM_ERR_UNEXPECTED_MULTI_WRITE,
309
+ PM_ERR_UNEXPECTED_RANGE_OPERATOR,
294
310
  PM_ERR_UNEXPECTED_SAFE_NAVIGATION,
295
311
  PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
296
312
  PM_ERR_UNEXPECTED_TOKEN_IGNORE,
@@ -303,6 +319,7 @@ typedef enum {
303
319
  PM_ERR_XSTRING_TERM,
304
320
 
305
321
  // These are the warning diagnostics.
322
+ PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
306
323
  PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
307
324
  PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
308
325
  PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND,
@@ -318,10 +335,11 @@ typedef enum {
318
335
  PM_WARN_DUPLICATED_WHEN_CLAUSE,
319
336
  PM_WARN_FLOAT_OUT_OF_RANGE,
320
337
  PM_WARN_IGNORED_FROZEN_STRING_LITERAL,
338
+ PM_WARN_INDENTATION_MISMATCH,
321
339
  PM_WARN_INTEGER_IN_FLIP_FLOP,
322
340
  PM_WARN_INVALID_CHARACTER,
341
+ PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
323
342
  PM_WARN_INVALID_NUMBERED_REFERENCE,
324
- PM_WARN_INVALID_SHAREABLE_CONSTANT_VALUE,
325
343
  PM_WARN_KEYWORD_EOL,
326
344
  PM_WARN_LITERAL_IN_CONDITION_DEFAULT,
327
345
  PM_WARN_LITERAL_IN_CONDITION_VERBOSE,
data/include/prism/node.h CHANGED
@@ -56,27 +56,6 @@ void pm_node_list_free(pm_node_list_t *list);
56
56
  */
57
57
  PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
58
58
 
59
- /**
60
- * This struct stores the information gathered by the pm_node_memsize function.
61
- * It contains both the memory footprint and additionally metadata about the
62
- * shape of the tree.
63
- */
64
- typedef struct {
65
- /** The total memory footprint of the node and all of its children. */
66
- size_t memsize;
67
-
68
- /** The number of children the node has. */
69
- size_t node_count;
70
- } pm_memsize_t;
71
-
72
- /**
73
- * Calculates the memory footprint of a given node.
74
- *
75
- * @param node The node to calculate the memory footprint of.
76
- * @param memsize The memory footprint of the node and all of its children.
77
- */
78
- PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *memsize);
79
-
80
59
  /**
81
60
  * Returns a string representation of the given node type.
82
61
  *
@@ -7,6 +7,7 @@
7
7
  #define PRISM_OPTIONS_H
8
8
 
9
9
  #include "prism/defines.h"
10
+ #include "prism/util/pm_char.h"
10
11
  #include "prism/util/pm_string.h"
11
12
 
12
13
  #include <stdbool.h>
@@ -40,6 +41,23 @@ typedef struct pm_options_scope {
40
41
  pm_string_t *locals;
41
42
  } pm_options_scope_t;
42
43
 
44
+ // Forward declaration needed by the callback typedef.
45
+ struct pm_options;
46
+
47
+ /**
48
+ * The callback called when additional switches are found in a shebang comment
49
+ * that need to be processed by the runtime.
50
+ *
51
+ * @param options The options struct that may be updated by this callback.
52
+ * Certain fields will be checked for changes, specifically encoding,
53
+ * command_line, and frozen_string_literal.
54
+ * @param source The source of the shebang comment.
55
+ * @param length The length of the source.
56
+ * @param shebang_callback_data Any additional data that should be passed along
57
+ * to the callback.
58
+ */
59
+ typedef void (*pm_options_shebang_callback_t)(struct pm_options *options, const uint8_t *source, size_t length, void *shebang_callback_data);
60
+
43
61
  /**
44
62
  * The version of Ruby syntax that we should be parsing with. This is used to
45
63
  * allow consumers to specify which behavior they want in case they need to
@@ -56,7 +74,19 @@ typedef enum {
56
74
  /**
57
75
  * The options that can be passed to the parser.
58
76
  */
59
- typedef struct {
77
+ typedef struct pm_options {
78
+ /**
79
+ * The callback to call when additional switches are found in a shebang
80
+ * comment.
81
+ */
82
+ pm_options_shebang_callback_t shebang_callback;
83
+
84
+ /**
85
+ * Any additional data that should be passed along to the shebang callback
86
+ * if one was set.
87
+ */
88
+ void *shebang_callback_data;
89
+
60
90
  /** The name of the file that is currently being parsed. */
61
91
  pm_string_t filepath;
62
92
 
@@ -103,6 +133,30 @@ typedef struct {
103
133
  * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
104
134
  */
105
135
  int8_t frozen_string_literal;
136
+
137
+ /**
138
+ * Whether or not the encoding magic comments should be respected. This is a
139
+ * niche use-case where you want to parse a file with a specific encoding
140
+ * but ignore any encoding magic comments at the top of the file.
141
+ */
142
+ bool encoding_locked;
143
+
144
+ /**
145
+ * When the file being parsed is the main script, the shebang will be
146
+ * considered for command-line flags (or for implicit -x). The caller needs
147
+ * to pass this information to the parser so that it can behave correctly.
148
+ */
149
+ bool main_script;
150
+
151
+ /**
152
+ * When the file being parsed is considered a "partial" script, jumps will
153
+ * not be marked as errors if they are not contained within loops/blocks.
154
+ * This is used in the case that you're parsing a script that you know will
155
+ * be embedded inside another script later, but you do not have that context
156
+ * yet. For example, when parsing an ERB template that will be evaluated
157
+ * inside another script.
158
+ */
159
+ bool partial_script;
106
160
  } pm_options_t;
107
161
 
108
162
  /**
@@ -142,6 +196,16 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_P = 0x10;
142
196
  */
143
197
  static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20;
144
198
 
199
+ /**
200
+ * Set the shebang callback option on the given options struct.
201
+ *
202
+ * @param options The options struct to set the shebang callback on.
203
+ * @param shebang_callback The shebang callback to set.
204
+ * @param shebang_callback_data Any additional data that should be passed along
205
+ * to the callback.
206
+ */
207
+ PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data);
208
+
145
209
  /**
146
210
  * Set the filepath option on the given options struct.
147
211
  *
@@ -166,6 +230,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t
166
230
  */
167
231
  PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
168
232
 
233
+ /**
234
+ * Set the encoding_locked option on the given options struct.
235
+ *
236
+ * @param options The options struct to set the encoding_locked value on.
237
+ * @param encoding_locked The encoding_locked value to set.
238
+ */
239
+ PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked);
240
+
169
241
  /**
170
242
  * Set the frozen string literal option on the given options struct.
171
243
  *
@@ -194,6 +266,22 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options,
194
266
  */
195
267
  PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
196
268
 
269
+ /**
270
+ * Set the main script option on the given options struct.
271
+ *
272
+ * @param options The options struct to set the main script value on.
273
+ * @param main_script The main script value to set.
274
+ */
275
+ PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script);
276
+
277
+ /**
278
+ * Set the partial script option on the given options struct.
279
+ *
280
+ * @param options The options struct to set the partial script value on.
281
+ * @param partial_script The partial script value to set.
282
+ */
283
+ PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script);
284
+
197
285
  /**
198
286
  * Allocate and zero out the scopes array on the given options struct.
199
287
  *
@@ -261,6 +349,9 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
261
349
  * | `1` | -l command line option |
262
350
  * | `1` | -a command line option |
263
351
  * | `1` | the version |
352
+ * | `1` | encoding locked |
353
+ * | `1` | main script |
354
+ * | `1` | partial script |
264
355
  * | `4` | the number of scopes |
265
356
  * | ... | the scopes |
266
357
  *
@@ -293,8 +384,8 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
293
384
  * * The encoding can have a length of 0, in which case we'll use the default
294
385
  * encoding (UTF-8). If it's not 0, it should correspond to a name of an
295
386
  * encoding that can be passed to `Encoding.find` in Ruby.
296
- * * The frozen string literal and suppress warnings fields are booleans, so
297
- * their values should be either 0 or 1.
387
+ * * The frozen string literal, encoding locked, main script, and partial script
388
+ * fields are booleans, so their values should be either 0 or 1.
298
389
  * * The number of scopes can be 0.
299
390
  *
300
391
  * @param options The options struct to deserialize into.
@@ -82,6 +82,23 @@ typedef enum {
82
82
  PM_HEREDOC_INDENT_TILDE,
83
83
  } pm_heredoc_indent_t;
84
84
 
85
+ /**
86
+ * All of the information necessary to store to lexing a heredoc.
87
+ */
88
+ typedef struct {
89
+ /** A pointer to the start of the heredoc identifier. */
90
+ const uint8_t *ident_start;
91
+
92
+ /** The length of the heredoc identifier. */
93
+ size_t ident_length;
94
+
95
+ /** The type of quote that the heredoc uses. */
96
+ pm_heredoc_quote_t quote;
97
+
98
+ /** The type of indentation that the heredoc uses. */
99
+ pm_heredoc_indent_t indent;
100
+ } pm_heredoc_lex_mode_t;
101
+
85
102
  /**
86
103
  * When lexing Ruby source, the lexer has a small amount of state to tell which
87
104
  * kind of token it is currently lexing. For example, when we find the start of
@@ -210,17 +227,10 @@ typedef struct pm_lex_mode {
210
227
  } string;
211
228
 
212
229
  struct {
213
- /** A pointer to the start of the heredoc identifier. */
214
- const uint8_t *ident_start;
215
-
216
- /** The length of the heredoc identifier. */
217
- size_t ident_length;
218
-
219
- /** The type of quote that the heredoc uses. */
220
- pm_heredoc_quote_t quote;
221
-
222
- /** The type of indentation that the heredoc uses. */
223
- pm_heredoc_indent_t indent;
230
+ /**
231
+ * All of the data necessary to lex a heredoc.
232
+ */
233
+ pm_heredoc_lex_mode_t base;
224
234
 
225
235
  /**
226
236
  * This is the pointer to the character where lexing should resume
@@ -233,7 +243,7 @@ typedef struct pm_lex_mode {
233
243
  * line so that we know how much to dedent each line in the case of
234
244
  * a tilde heredoc.
235
245
  */
236
- size_t common_whitespace;
246
+ size_t *common_whitespace;
237
247
 
238
248
  /** True if the previous token ended with a line continuation. */
239
249
  bool line_continuation;
@@ -364,6 +374,9 @@ typedef enum {
364
374
  /** a rescue statement within a lambda expression */
365
375
  PM_CONTEXT_LAMBDA_RESCUE,
366
376
 
377
+ /** the predicate clause of a loop statement */
378
+ PM_CONTEXT_LOOP_PREDICATE,
379
+
367
380
  /** the top level context */
368
381
  PM_CONTEXT_MAIN,
369
382
 
@@ -379,6 +392,9 @@ typedef enum {
379
392
  /** a rescue statement within a module statement */
380
393
  PM_CONTEXT_MODULE_RESCUE,
381
394
 
395
+ /** a multiple target expression */
396
+ PM_CONTEXT_MULTI_TARGET,
397
+
382
398
  /** a parenthesized expression */
383
399
  PM_CONTEXT_PARENS,
384
400
 
@@ -505,9 +521,9 @@ typedef struct {
505
521
  /** The type of shareable constant value that can be set. */
506
522
  typedef uint8_t pm_shareable_constant_value_t;
507
523
  static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
508
- static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = 0x1;
509
- static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = 0x2;
510
- static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = 0x4;
524
+ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
525
+ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
526
+ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
511
527
 
512
528
  /**
513
529
  * This tracks an individual local variable in a certain lexical context, as
@@ -546,6 +562,17 @@ typedef struct pm_locals {
546
562
  pm_local_t *locals;
547
563
  } pm_locals_t;
548
564
 
565
+ /** The flags about scope parameters that can be set. */
566
+ typedef uint8_t pm_scope_parameters_t;
567
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
568
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
569
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
570
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
571
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
572
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
573
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
574
+ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
575
+
549
576
  /**
550
577
  * This struct represents a node in a linked list of scopes. Some scopes can see
551
578
  * into their parent scopes, while others cannot.
@@ -557,10 +584,19 @@ typedef struct pm_scope {
557
584
  /** The IDs of the locals in the given scope. */
558
585
  pm_locals_t locals;
559
586
 
587
+ /**
588
+ * This is a list of the implicit parameters contained within the block.
589
+ * These will be processed after the block is parsed to determine the kind
590
+ * of parameters node that should be used and to check if any errors need to
591
+ * be added.
592
+ */
593
+ pm_node_list_t implicit_parameters;
594
+
560
595
  /**
561
596
  * This is a bitfield that indicates the parameters that are being used in
562
- * this scope. It is a combination of the PM_SCOPE_PARAMS_* constants. There
563
- * are three different kinds of parameters that can be used in a scope:
597
+ * this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
598
+ * There are three different kinds of parameters that can be used in a
599
+ * scope:
564
600
  *
565
601
  * - Ordinary parameters (e.g., def foo(bar); end)
566
602
  * - Numbered parameters (e.g., def foo; _1; end)
@@ -575,15 +611,7 @@ typedef struct pm_scope {
575
611
  * - def foo(&); end
576
612
  * - def foo(...); end
577
613
  */
578
- uint8_t parameters;
579
-
580
- /**
581
- * An integer indicating the number of numbered parameters on this scope.
582
- * This is necessary to determine if child blocks are allowed to use
583
- * numbered parameters, and to pass information to consumers of the AST
584
- * about how many numbered parameters exist.
585
- */
586
- int8_t numbered_parameters;
614
+ pm_scope_parameters_t parameters;
587
615
 
588
616
  /**
589
617
  * The current state of constant shareability for this scope. This is
@@ -598,20 +626,6 @@ typedef struct pm_scope {
598
626
  bool closed;
599
627
  } pm_scope_t;
600
628
 
601
- static const uint8_t PM_SCOPE_PARAMETERS_NONE = 0x0;
602
- static const uint8_t PM_SCOPE_PARAMETERS_ORDINARY = 0x1;
603
- static const uint8_t PM_SCOPE_PARAMETERS_NUMBERED = 0x2;
604
- static const uint8_t PM_SCOPE_PARAMETERS_IT = 0x4;
605
- static const uint8_t PM_SCOPE_PARAMETERS_TYPE_MASK = 0x7;
606
-
607
- static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x8;
608
- static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x10;
609
- static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x20;
610
- static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x40;
611
-
612
- static const int8_t PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED = -1;
613
- static const int8_t PM_SCOPE_NUMBERED_PARAMETERS_NONE = 0;
614
-
615
629
  /**
616
630
  * A struct that represents a stack of boolean values.
617
631
  */
@@ -624,6 +638,13 @@ typedef uint32_t pm_state_stack_t;
624
638
  * it's considering.
625
639
  */
626
640
  struct pm_parser {
641
+ /**
642
+ * The next node identifier that will be assigned. This is a unique
643
+ * identifier used to track nodes such that the syntax tree can be dropped
644
+ * but the node can be found through another parse.
645
+ */
646
+ uint32_t node_id;
647
+
627
648
  /** The current state of the lexer. */
628
649
  pm_lex_state_t lex_state;
629
650
 
@@ -853,12 +874,27 @@ struct pm_parser {
853
874
  */
854
875
  bool parsing_eval;
855
876
 
877
+ /**
878
+ * Whether or not we are parsing a "partial" script, which is a script that
879
+ * will be evaluated in the context of another script, so we should not
880
+ * check jumps (next/break/etc.) for validity.
881
+ */
882
+ bool partial_script;
883
+
856
884
  /** Whether or not we're at the beginning of a command. */
857
885
  bool command_start;
858
886
 
859
887
  /** Whether or not we're currently recovering from a syntax error. */
860
888
  bool recovering;
861
889
 
890
+ /**
891
+ * This is very specialized behavior for when you want to parse in a context
892
+ * that does not respect encoding comments. Its main use case is translating
893
+ * into the whitequark/parser AST which re-encodes source files in UTF-8
894
+ * before they are parsed and ignores encoding comments.
895
+ */
896
+ bool encoding_locked;
897
+
862
898
  /**
863
899
  * Whether or not the encoding has been changed by a magic comment. We use
864
900
  * this to provide a fast path for the lexer instead of going through the
@@ -886,6 +922,12 @@ struct pm_parser {
886
922
  * characters.
887
923
  */
888
924
  bool current_regular_expression_ascii_only;
925
+
926
+ /**
927
+ * By default, Ruby always warns about mismatched indentation. This can be
928
+ * toggled with a magic comment.
929
+ */
930
+ bool warn_mismatched_indentation;
889
931
  };
890
932
 
891
933
  #endif
@@ -10,7 +10,6 @@
10
10
  #include "prism/parser.h"
11
11
  #include "prism/encoding.h"
12
12
  #include "prism/util/pm_memchr.h"
13
- #include "prism/util/pm_string_list.h"
14
13
  #include "prism/util/pm_string.h"
15
14
 
16
15
  #include <stdbool.h>
@@ -18,16 +17,27 @@
18
17
  #include <string.h>
19
18
 
20
19
  /**
21
- * Parse a regular expression and extract the names of all of the named capture
22
- * groups.
20
+ * This callback is called when a named capture group is found.
21
+ */
22
+ typedef void (*pm_regexp_name_callback_t)(const pm_string_t *name, void *data);
23
+
24
+ /**
25
+ * This callback is called when a parse error is found.
26
+ */
27
+ typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data);
28
+
29
+ /**
30
+ * Parse a regular expression.
23
31
  *
32
+ * @param parser The parser that is currently being used.
24
33
  * @param source The source code to parse.
25
34
  * @param size The size of the source code.
26
- * @param named_captures The list to add the names of the named capture groups.
27
- * @param encoding_changed Whether or not the encoding changed from the default.
28
- * @param encoding The encoding of the source code.
29
- * @return Whether or not the parsing was successful.
35
+ * @param extended_mode Whether to parse the regular expression in extended mode.
36
+ * @param name_callback The optional callback to call when a named capture group is found.
37
+ * @param name_data The optional data to pass to the name callback.
38
+ * @param error_callback The callback to call when a parse error is found.
39
+ * @param error_data The data to pass to the error callback.
30
40
  */
31
- PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, const pm_encoding_t *encoding);
41
+ PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
32
42
 
33
43
  #endif