prism 0.29.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +115 -1
- data/CONTRIBUTING.md +0 -4
- data/Makefile +1 -1
- data/README.md +4 -0
- data/config.yml +920 -148
- data/docs/build_system.md +8 -11
- data/docs/fuzzing.md +1 -1
- data/docs/parsing_rules.md +4 -1
- data/docs/relocation.md +34 -0
- data/docs/ripper_translation.md +22 -0
- data/docs/serialization.md +3 -0
- data/ext/prism/api_node.c +2863 -2079
- data/ext/prism/extconf.rb +14 -37
- data/ext/prism/extension.c +241 -391
- data/ext/prism/extension.h +2 -2
- data/include/prism/ast.h +2156 -453
- data/include/prism/defines.h +58 -7
- data/include/prism/diagnostic.h +24 -6
- data/include/prism/node.h +0 -21
- data/include/prism/options.h +94 -3
- data/include/prism/parser.h +82 -40
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +3 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +0 -8
- data/include/prism/util/pm_integer.h +22 -15
- data/include/prism/util/pm_newline_list.h +11 -0
- data/include/prism/util/pm_string.h +28 -12
- data/include/prism/version.h +3 -3
- data/include/prism.h +47 -11
- data/lib/prism/compiler.rb +3 -0
- data/lib/prism/desugar_compiler.rb +111 -74
- data/lib/prism/dispatcher.rb +16 -1
- data/lib/prism/dot_visitor.rb +55 -34
- data/lib/prism/dsl.rb +660 -468
- data/lib/prism/ffi.rb +113 -8
- data/lib/prism/inspect_visitor.rb +296 -64
- data/lib/prism/lex_compat.rb +1 -1
- data/lib/prism/mutation_compiler.rb +11 -6
- data/lib/prism/node.rb +4262 -5023
- data/lib/prism/node_ext.rb +91 -14
- data/lib/prism/parse_result/comments.rb +0 -7
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +101 -11
- data/lib/prism/parse_result.rb +183 -6
- data/lib/prism/reflection.rb +12 -10
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +496 -609
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/compiler.rb +185 -155
- data/lib/prism/translation/parser/lexer.rb +26 -4
- data/lib/prism/translation/parser.rb +9 -4
- data/lib/prism/translation/ripper.rb +23 -25
- data/lib/prism/translation/ruby_parser.rb +86 -17
- data/lib/prism/visitor.rb +3 -0
- data/lib/prism.rb +6 -8
- data/prism.gemspec +9 -5
- data/rbi/prism/dsl.rbi +521 -0
- data/rbi/prism/node.rbi +1115 -1120
- data/rbi/prism/parse_result.rbi +29 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism/visitor.rbi +3 -0
- data/rbi/prism.rbi +36 -30
- data/sig/prism/dsl.rbs +190 -303
- data/sig/prism/mutation_compiler.rbs +1 -0
- data/sig/prism/node.rbs +678 -632
- data/sig/prism/parse_result.rbs +22 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/string_query.rbs +11 -0
- data/sig/prism/visitor.rbs +1 -0
- data/sig/prism.rbs +103 -64
- data/src/diagnostic.c +64 -28
- data/src/node.c +502 -1739
- data/src/options.c +76 -27
- data/src/prettyprint.c +188 -112
- data/src/prism.c +3376 -2293
- data/src/regexp.c +208 -71
- data/src/serialize.c +182 -50
- data/src/static_literals.c +64 -85
- data/src/token_type.c +4 -4
- data/src/util/pm_char.c +1 -1
- data/src/util/pm_constant_pool.c +0 -8
- data/src/util/pm_integer.c +53 -25
- data/src/util/pm_newline_list.c +29 -0
- data/src/util/pm_string.c +131 -80
- data/src/util/pm_strpbrk.c +32 -6
- metadata +11 -7
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -249
- data/lib/prism/translation/parser/rubocop.rb +0 -73
- data/src/util/pm_string_list.c +0 -28
data/include/prism/defines.h
CHANGED
@@ -25,6 +25,15 @@
|
|
25
25
|
#define __STDC_FORMAT_MACROS
|
26
26
|
#include <inttypes.h>
|
27
27
|
|
28
|
+
/**
|
29
|
+
* When we are parsing using recursive descent, we want to protect against
|
30
|
+
* malicious payloads that could attempt to crash our parser. We do this by
|
31
|
+
* specifying a maximum depth to which we are allowed to recurse.
|
32
|
+
*/
|
33
|
+
#ifndef PRISM_DEPTH_MAXIMUM
|
34
|
+
#define PRISM_DEPTH_MAXIMUM 1000
|
35
|
+
#endif
|
36
|
+
|
28
37
|
/**
|
29
38
|
* By default, we compile with -fvisibility=hidden. When this is enabled, we
|
30
39
|
* need to mark certain functions as being publically-visible. This macro does
|
@@ -119,14 +128,24 @@
|
|
119
128
|
#endif
|
120
129
|
|
121
130
|
/**
|
122
|
-
*
|
123
|
-
*
|
124
|
-
*
|
131
|
+
* If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem
|
132
|
+
* related code from the library. All filesystem related code should be guarded
|
133
|
+
* by PRISM_HAS_FILESYSTEM.
|
125
134
|
*/
|
126
|
-
#
|
127
|
-
#
|
128
|
-
#
|
129
|
-
|
135
|
+
#ifndef PRISM_HAS_NO_FILESYSTEM
|
136
|
+
# define PRISM_HAS_FILESYSTEM
|
137
|
+
#endif
|
138
|
+
|
139
|
+
/**
|
140
|
+
* isinf on POSIX systems it accepts a float, a double, or a long double.
|
141
|
+
* But mingw didn't provide an isinf macro, only an isinf function that only
|
142
|
+
* accepts floats, so we need to use _finite instead.
|
143
|
+
*/
|
144
|
+
#ifdef __MINGW64__
|
145
|
+
#include <float.h>
|
146
|
+
#define PRISM_ISINF(x) (!_finite(x))
|
147
|
+
#else
|
148
|
+
#define PRISM_ISINF(x) isinf(x)
|
130
149
|
#endif
|
131
150
|
|
132
151
|
/**
|
@@ -203,4 +222,36 @@
|
|
203
222
|
#define PRISM_ENCODING_EXCLUDE_FULL
|
204
223
|
#endif
|
205
224
|
|
225
|
+
/**
|
226
|
+
* Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
|
227
|
+
* branch predication.
|
228
|
+
*/
|
229
|
+
#if defined(__GNUC__) || defined(__clang__)
|
230
|
+
/** The compiler should predicate that this branch will be taken. */
|
231
|
+
#define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
|
232
|
+
|
233
|
+
/** The compiler should predicate that this branch will not be taken. */
|
234
|
+
#define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
|
235
|
+
#else
|
236
|
+
/** Void because this platform does not support branch prediction hints. */
|
237
|
+
#define PRISM_LIKELY(x) (x)
|
238
|
+
|
239
|
+
/** Void because this platform does not support branch prediction hints. */
|
240
|
+
#define PRISM_UNLIKELY(x) (x)
|
241
|
+
#endif
|
242
|
+
|
243
|
+
/**
|
244
|
+
* We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
|
245
|
+
* Use PRISM_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional.
|
246
|
+
*/
|
247
|
+
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L // C23 or later
|
248
|
+
#define PRISM_FALLTHROUGH [[fallthrough]];
|
249
|
+
#elif defined(__GNUC__) || defined(__clang__)
|
250
|
+
#define PRISM_FALLTHROUGH __attribute__((fallthrough));
|
251
|
+
#elif defined(_MSC_VER)
|
252
|
+
#define PRISM_FALLTHROUGH __fallthrough;
|
253
|
+
#else
|
254
|
+
#define PRISM_FALLTHROUGH
|
255
|
+
#endif
|
256
|
+
|
206
257
|
#endif
|
data/include/prism/diagnostic.h
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
|
1
|
+
/*----------------------------------------------------------------------------*/
|
2
2
|
/* This file is generated by the templates/template.rb script and should not */
|
3
3
|
/* be modified manually. See */
|
4
4
|
/* templates/include/prism/diagnostic.h.erb */
|
5
5
|
/* if you are looking to modify the */
|
6
6
|
/* template */
|
7
|
-
|
7
|
+
/*----------------------------------------------------------------------------*/
|
8
8
|
|
9
9
|
/**
|
10
10
|
* @file diagnostic.h
|
@@ -44,7 +44,6 @@ typedef enum {
|
|
44
44
|
PM_ERR_ARGUMENT_FORMAL_GLOBAL,
|
45
45
|
PM_ERR_ARGUMENT_FORMAL_IVAR,
|
46
46
|
PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
|
47
|
-
PM_ERR_ARGUMENT_IN,
|
48
47
|
PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND,
|
49
48
|
PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
|
50
49
|
PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
|
@@ -110,8 +109,10 @@ typedef enum {
|
|
110
109
|
PM_ERR_ESCAPE_INVALID_META_REPEAT,
|
111
110
|
PM_ERR_ESCAPE_INVALID_UNICODE,
|
112
111
|
PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
|
112
|
+
PM_ERR_ESCAPE_INVALID_UNICODE_LIST,
|
113
113
|
PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
|
114
114
|
PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
|
115
|
+
PM_ERR_ESCAPE_INVALID_UNICODE_SHORT,
|
115
116
|
PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
|
116
117
|
PM_ERR_EXPECT_ARGUMENT,
|
117
118
|
PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
|
@@ -126,6 +127,7 @@ typedef enum {
|
|
126
127
|
PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
|
127
128
|
PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
|
128
129
|
PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
|
130
|
+
PM_ERR_EXPECT_FOR_DELIMITER,
|
129
131
|
PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
|
130
132
|
PM_ERR_EXPECT_IN_DELIMITER,
|
131
133
|
PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
|
@@ -134,6 +136,7 @@ typedef enum {
|
|
134
136
|
PM_ERR_EXPECT_RPAREN,
|
135
137
|
PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
|
136
138
|
PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
|
139
|
+
PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER,
|
137
140
|
PM_ERR_EXPECT_STRING_CONTENT,
|
138
141
|
PM_ERR_EXPECT_WHEN_DELIMITER,
|
139
142
|
PM_ERR_EXPRESSION_BARE_HASH,
|
@@ -143,6 +146,7 @@ typedef enum {
|
|
143
146
|
PM_ERR_EXPRESSION_NOT_WRITABLE_FILE,
|
144
147
|
PM_ERR_EXPRESSION_NOT_WRITABLE_LINE,
|
145
148
|
PM_ERR_EXPRESSION_NOT_WRITABLE_NIL,
|
149
|
+
PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED,
|
146
150
|
PM_ERR_EXPRESSION_NOT_WRITABLE_SELF,
|
147
151
|
PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE,
|
148
152
|
PM_ERR_FLOAT_PARSE,
|
@@ -166,6 +170,7 @@ typedef enum {
|
|
166
170
|
PM_ERR_INSTANCE_VARIABLE_BARE,
|
167
171
|
PM_ERR_INVALID_BLOCK_EXIT,
|
168
172
|
PM_ERR_INVALID_CHARACTER,
|
173
|
+
PM_ERR_INVALID_COMMA,
|
169
174
|
PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
|
170
175
|
PM_ERR_INVALID_ESCAPE_CHARACTER,
|
171
176
|
PM_ERR_INVALID_FLOAT_EXPONENT,
|
@@ -182,6 +187,7 @@ typedef enum {
|
|
182
187
|
PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER,
|
183
188
|
PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING,
|
184
189
|
PM_ERR_INVALID_PERCENT,
|
190
|
+
PM_ERR_INVALID_PERCENT_EOF,
|
185
191
|
PM_ERR_INVALID_PRINTABLE_CHARACTER,
|
186
192
|
PM_ERR_INVALID_RETRY_AFTER_ELSE,
|
187
193
|
PM_ERR_INVALID_RETRY_AFTER_ENSURE,
|
@@ -210,12 +216,15 @@ typedef enum {
|
|
210
216
|
PM_ERR_MODULE_TERM,
|
211
217
|
PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
|
212
218
|
PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
|
219
|
+
PM_ERR_NESTING_TOO_DEEP,
|
213
220
|
PM_ERR_NO_LOCAL_VARIABLE,
|
221
|
+
PM_ERR_NON_ASSOCIATIVE_OPERATOR,
|
214
222
|
PM_ERR_NOT_EXPRESSION,
|
215
223
|
PM_ERR_NUMBER_LITERAL_UNDERSCORE,
|
224
|
+
PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK,
|
216
225
|
PM_ERR_NUMBERED_PARAMETER_IT,
|
217
226
|
PM_ERR_NUMBERED_PARAMETER_ORDINARY,
|
218
|
-
|
227
|
+
PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK,
|
219
228
|
PM_ERR_OPERATOR_MULTI_ASSIGN,
|
220
229
|
PM_ERR_OPERATOR_WRITE_ARGUMENTS,
|
221
230
|
PM_ERR_OPERATOR_WRITE_BLOCK,
|
@@ -232,8 +241,9 @@ typedef enum {
|
|
232
241
|
PM_ERR_PARAMETER_SPLAT_MULTI,
|
233
242
|
PM_ERR_PARAMETER_STAR,
|
234
243
|
PM_ERR_PARAMETER_UNEXPECTED_FWD,
|
235
|
-
PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
|
236
244
|
PM_ERR_PARAMETER_UNEXPECTED_NO_KW,
|
245
|
+
PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
|
246
|
+
PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS,
|
237
247
|
PM_ERR_PATTERN_CAPTURE_DUPLICATE,
|
238
248
|
PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
|
239
249
|
PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
|
@@ -245,6 +255,7 @@ typedef enum {
|
|
245
255
|
PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
|
246
256
|
PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
|
247
257
|
PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
|
258
|
+
PM_ERR_PATTERN_FIND_MISSING_INNER,
|
248
259
|
PM_ERR_PATTERN_HASH_IMPLICIT,
|
249
260
|
PM_ERR_PATTERN_HASH_KEY,
|
250
261
|
PM_ERR_PATTERN_HASH_KEY_DUPLICATE,
|
@@ -262,6 +273,7 @@ typedef enum {
|
|
262
273
|
PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING,
|
263
274
|
PM_ERR_REGEXP_INVALID_UNICODE_RANGE,
|
264
275
|
PM_ERR_REGEXP_NON_ESCAPED_MBC,
|
276
|
+
PM_ERR_REGEXP_PARSE_ERROR,
|
265
277
|
PM_ERR_REGEXP_TERM,
|
266
278
|
PM_ERR_REGEXP_UNKNOWN_OPTIONS,
|
267
279
|
PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP,
|
@@ -286,11 +298,15 @@ typedef enum {
|
|
286
298
|
PM_ERR_TERNARY_COLON,
|
287
299
|
PM_ERR_TERNARY_EXPRESSION_FALSE,
|
288
300
|
PM_ERR_TERNARY_EXPRESSION_TRUE,
|
301
|
+
PM_ERR_UNARY_DISALLOWED,
|
289
302
|
PM_ERR_UNARY_RECEIVER,
|
290
303
|
PM_ERR_UNDEF_ARGUMENT,
|
291
304
|
PM_ERR_UNEXPECTED_BLOCK_ARGUMENT,
|
292
305
|
PM_ERR_UNEXPECTED_INDEX_BLOCK,
|
293
306
|
PM_ERR_UNEXPECTED_INDEX_KEYWORDS,
|
307
|
+
PM_ERR_UNEXPECTED_LABEL,
|
308
|
+
PM_ERR_UNEXPECTED_MULTI_WRITE,
|
309
|
+
PM_ERR_UNEXPECTED_RANGE_OPERATOR,
|
294
310
|
PM_ERR_UNEXPECTED_SAFE_NAVIGATION,
|
295
311
|
PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
|
296
312
|
PM_ERR_UNEXPECTED_TOKEN_IGNORE,
|
@@ -303,6 +319,7 @@ typedef enum {
|
|
303
319
|
PM_ERR_XSTRING_TERM,
|
304
320
|
|
305
321
|
// These are the warning diagnostics.
|
322
|
+
PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
|
306
323
|
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
|
307
324
|
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
|
308
325
|
PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND,
|
@@ -318,10 +335,11 @@ typedef enum {
|
|
318
335
|
PM_WARN_DUPLICATED_WHEN_CLAUSE,
|
319
336
|
PM_WARN_FLOAT_OUT_OF_RANGE,
|
320
337
|
PM_WARN_IGNORED_FROZEN_STRING_LITERAL,
|
338
|
+
PM_WARN_INDENTATION_MISMATCH,
|
321
339
|
PM_WARN_INTEGER_IN_FLIP_FLOP,
|
322
340
|
PM_WARN_INVALID_CHARACTER,
|
341
|
+
PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
|
323
342
|
PM_WARN_INVALID_NUMBERED_REFERENCE,
|
324
|
-
PM_WARN_INVALID_SHAREABLE_CONSTANT_VALUE,
|
325
343
|
PM_WARN_KEYWORD_EOL,
|
326
344
|
PM_WARN_LITERAL_IN_CONDITION_DEFAULT,
|
327
345
|
PM_WARN_LITERAL_IN_CONDITION_VERBOSE,
|
data/include/prism/node.h
CHANGED
@@ -56,27 +56,6 @@ void pm_node_list_free(pm_node_list_t *list);
|
|
56
56
|
*/
|
57
57
|
PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
|
58
58
|
|
59
|
-
/**
|
60
|
-
* This struct stores the information gathered by the pm_node_memsize function.
|
61
|
-
* It contains both the memory footprint and additionally metadata about the
|
62
|
-
* shape of the tree.
|
63
|
-
*/
|
64
|
-
typedef struct {
|
65
|
-
/** The total memory footprint of the node and all of its children. */
|
66
|
-
size_t memsize;
|
67
|
-
|
68
|
-
/** The number of children the node has. */
|
69
|
-
size_t node_count;
|
70
|
-
} pm_memsize_t;
|
71
|
-
|
72
|
-
/**
|
73
|
-
* Calculates the memory footprint of a given node.
|
74
|
-
*
|
75
|
-
* @param node The node to calculate the memory footprint of.
|
76
|
-
* @param memsize The memory footprint of the node and all of its children.
|
77
|
-
*/
|
78
|
-
PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *memsize);
|
79
|
-
|
80
59
|
/**
|
81
60
|
* Returns a string representation of the given node type.
|
82
61
|
*
|
data/include/prism/options.h
CHANGED
@@ -7,6 +7,7 @@
|
|
7
7
|
#define PRISM_OPTIONS_H
|
8
8
|
|
9
9
|
#include "prism/defines.h"
|
10
|
+
#include "prism/util/pm_char.h"
|
10
11
|
#include "prism/util/pm_string.h"
|
11
12
|
|
12
13
|
#include <stdbool.h>
|
@@ -40,6 +41,23 @@ typedef struct pm_options_scope {
|
|
40
41
|
pm_string_t *locals;
|
41
42
|
} pm_options_scope_t;
|
42
43
|
|
44
|
+
// Forward declaration needed by the callback typedef.
|
45
|
+
struct pm_options;
|
46
|
+
|
47
|
+
/**
|
48
|
+
* The callback called when additional switches are found in a shebang comment
|
49
|
+
* that need to be processed by the runtime.
|
50
|
+
*
|
51
|
+
* @param options The options struct that may be updated by this callback.
|
52
|
+
* Certain fields will be checked for changes, specifically encoding,
|
53
|
+
* command_line, and frozen_string_literal.
|
54
|
+
* @param source The source of the shebang comment.
|
55
|
+
* @param length The length of the source.
|
56
|
+
* @param shebang_callback_data Any additional data that should be passed along
|
57
|
+
* to the callback.
|
58
|
+
*/
|
59
|
+
typedef void (*pm_options_shebang_callback_t)(struct pm_options *options, const uint8_t *source, size_t length, void *shebang_callback_data);
|
60
|
+
|
43
61
|
/**
|
44
62
|
* The version of Ruby syntax that we should be parsing with. This is used to
|
45
63
|
* allow consumers to specify which behavior they want in case they need to
|
@@ -56,7 +74,19 @@ typedef enum {
|
|
56
74
|
/**
|
57
75
|
* The options that can be passed to the parser.
|
58
76
|
*/
|
59
|
-
typedef struct {
|
77
|
+
typedef struct pm_options {
|
78
|
+
/**
|
79
|
+
* The callback to call when additional switches are found in a shebang
|
80
|
+
* comment.
|
81
|
+
*/
|
82
|
+
pm_options_shebang_callback_t shebang_callback;
|
83
|
+
|
84
|
+
/**
|
85
|
+
* Any additional data that should be passed along to the shebang callback
|
86
|
+
* if one was set.
|
87
|
+
*/
|
88
|
+
void *shebang_callback_data;
|
89
|
+
|
60
90
|
/** The name of the file that is currently being parsed. */
|
61
91
|
pm_string_t filepath;
|
62
92
|
|
@@ -103,6 +133,30 @@ typedef struct {
|
|
103
133
|
* - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
|
104
134
|
*/
|
105
135
|
int8_t frozen_string_literal;
|
136
|
+
|
137
|
+
/**
|
138
|
+
* Whether or not the encoding magic comments should be respected. This is a
|
139
|
+
* niche use-case where you want to parse a file with a specific encoding
|
140
|
+
* but ignore any encoding magic comments at the top of the file.
|
141
|
+
*/
|
142
|
+
bool encoding_locked;
|
143
|
+
|
144
|
+
/**
|
145
|
+
* When the file being parsed is the main script, the shebang will be
|
146
|
+
* considered for command-line flags (or for implicit -x). The caller needs
|
147
|
+
* to pass this information to the parser so that it can behave correctly.
|
148
|
+
*/
|
149
|
+
bool main_script;
|
150
|
+
|
151
|
+
/**
|
152
|
+
* When the file being parsed is considered a "partial" script, jumps will
|
153
|
+
* not be marked as errors if they are not contained within loops/blocks.
|
154
|
+
* This is used in the case that you're parsing a script that you know will
|
155
|
+
* be embedded inside another script later, but you do not have that context
|
156
|
+
* yet. For example, when parsing an ERB template that will be evaluated
|
157
|
+
* inside another script.
|
158
|
+
*/
|
159
|
+
bool partial_script;
|
106
160
|
} pm_options_t;
|
107
161
|
|
108
162
|
/**
|
@@ -142,6 +196,16 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_P = 0x10;
|
|
142
196
|
*/
|
143
197
|
static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20;
|
144
198
|
|
199
|
+
/**
|
200
|
+
* Set the shebang callback option on the given options struct.
|
201
|
+
*
|
202
|
+
* @param options The options struct to set the shebang callback on.
|
203
|
+
* @param shebang_callback The shebang callback to set.
|
204
|
+
* @param shebang_callback_data Any additional data that should be passed along
|
205
|
+
* to the callback.
|
206
|
+
*/
|
207
|
+
PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data);
|
208
|
+
|
145
209
|
/**
|
146
210
|
* Set the filepath option on the given options struct.
|
147
211
|
*
|
@@ -166,6 +230,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t
|
|
166
230
|
*/
|
167
231
|
PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
|
168
232
|
|
233
|
+
/**
|
234
|
+
* Set the encoding_locked option on the given options struct.
|
235
|
+
*
|
236
|
+
* @param options The options struct to set the encoding_locked value on.
|
237
|
+
* @param encoding_locked The encoding_locked value to set.
|
238
|
+
*/
|
239
|
+
PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked);
|
240
|
+
|
169
241
|
/**
|
170
242
|
* Set the frozen string literal option on the given options struct.
|
171
243
|
*
|
@@ -194,6 +266,22 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options,
|
|
194
266
|
*/
|
195
267
|
PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
|
196
268
|
|
269
|
+
/**
|
270
|
+
* Set the main script option on the given options struct.
|
271
|
+
*
|
272
|
+
* @param options The options struct to set the main script value on.
|
273
|
+
* @param main_script The main script value to set.
|
274
|
+
*/
|
275
|
+
PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script);
|
276
|
+
|
277
|
+
/**
|
278
|
+
* Set the partial script option on the given options struct.
|
279
|
+
*
|
280
|
+
* @param options The options struct to set the partial script value on.
|
281
|
+
* @param partial_script The partial script value to set.
|
282
|
+
*/
|
283
|
+
PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script);
|
284
|
+
|
197
285
|
/**
|
198
286
|
* Allocate and zero out the scopes array on the given options struct.
|
199
287
|
*
|
@@ -261,6 +349,9 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
|
|
261
349
|
* | `1` | -l command line option |
|
262
350
|
* | `1` | -a command line option |
|
263
351
|
* | `1` | the version |
|
352
|
+
* | `1` | encoding locked |
|
353
|
+
* | `1` | main script |
|
354
|
+
* | `1` | partial script |
|
264
355
|
* | `4` | the number of scopes |
|
265
356
|
* | ... | the scopes |
|
266
357
|
*
|
@@ -293,8 +384,8 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
|
|
293
384
|
* * The encoding can have a length of 0, in which case we'll use the default
|
294
385
|
* encoding (UTF-8). If it's not 0, it should correspond to a name of an
|
295
386
|
* encoding that can be passed to `Encoding.find` in Ruby.
|
296
|
-
* * The frozen string literal
|
297
|
-
* their values should be either 0 or 1.
|
387
|
+
* * The frozen string literal, encoding locked, main script, and partial script
|
388
|
+
* fields are booleans, so their values should be either 0 or 1.
|
298
389
|
* * The number of scopes can be 0.
|
299
390
|
*
|
300
391
|
* @param options The options struct to deserialize into.
|
data/include/prism/parser.h
CHANGED
@@ -82,6 +82,23 @@ typedef enum {
|
|
82
82
|
PM_HEREDOC_INDENT_TILDE,
|
83
83
|
} pm_heredoc_indent_t;
|
84
84
|
|
85
|
+
/**
|
86
|
+
* All of the information necessary to store to lexing a heredoc.
|
87
|
+
*/
|
88
|
+
typedef struct {
|
89
|
+
/** A pointer to the start of the heredoc identifier. */
|
90
|
+
const uint8_t *ident_start;
|
91
|
+
|
92
|
+
/** The length of the heredoc identifier. */
|
93
|
+
size_t ident_length;
|
94
|
+
|
95
|
+
/** The type of quote that the heredoc uses. */
|
96
|
+
pm_heredoc_quote_t quote;
|
97
|
+
|
98
|
+
/** The type of indentation that the heredoc uses. */
|
99
|
+
pm_heredoc_indent_t indent;
|
100
|
+
} pm_heredoc_lex_mode_t;
|
101
|
+
|
85
102
|
/**
|
86
103
|
* When lexing Ruby source, the lexer has a small amount of state to tell which
|
87
104
|
* kind of token it is currently lexing. For example, when we find the start of
|
@@ -210,17 +227,10 @@ typedef struct pm_lex_mode {
|
|
210
227
|
} string;
|
211
228
|
|
212
229
|
struct {
|
213
|
-
/**
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
size_t ident_length;
|
218
|
-
|
219
|
-
/** The type of quote that the heredoc uses. */
|
220
|
-
pm_heredoc_quote_t quote;
|
221
|
-
|
222
|
-
/** The type of indentation that the heredoc uses. */
|
223
|
-
pm_heredoc_indent_t indent;
|
230
|
+
/**
|
231
|
+
* All of the data necessary to lex a heredoc.
|
232
|
+
*/
|
233
|
+
pm_heredoc_lex_mode_t base;
|
224
234
|
|
225
235
|
/**
|
226
236
|
* This is the pointer to the character where lexing should resume
|
@@ -233,7 +243,7 @@ typedef struct pm_lex_mode {
|
|
233
243
|
* line so that we know how much to dedent each line in the case of
|
234
244
|
* a tilde heredoc.
|
235
245
|
*/
|
236
|
-
size_t common_whitespace;
|
246
|
+
size_t *common_whitespace;
|
237
247
|
|
238
248
|
/** True if the previous token ended with a line continuation. */
|
239
249
|
bool line_continuation;
|
@@ -364,6 +374,9 @@ typedef enum {
|
|
364
374
|
/** a rescue statement within a lambda expression */
|
365
375
|
PM_CONTEXT_LAMBDA_RESCUE,
|
366
376
|
|
377
|
+
/** the predicate clause of a loop statement */
|
378
|
+
PM_CONTEXT_LOOP_PREDICATE,
|
379
|
+
|
367
380
|
/** the top level context */
|
368
381
|
PM_CONTEXT_MAIN,
|
369
382
|
|
@@ -379,6 +392,9 @@ typedef enum {
|
|
379
392
|
/** a rescue statement within a module statement */
|
380
393
|
PM_CONTEXT_MODULE_RESCUE,
|
381
394
|
|
395
|
+
/** a multiple target expression */
|
396
|
+
PM_CONTEXT_MULTI_TARGET,
|
397
|
+
|
382
398
|
/** a parenthesized expression */
|
383
399
|
PM_CONTEXT_PARENS,
|
384
400
|
|
@@ -505,9 +521,9 @@ typedef struct {
|
|
505
521
|
/** The type of shareable constant value that can be set. */
|
506
522
|
typedef uint8_t pm_shareable_constant_value_t;
|
507
523
|
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
|
508
|
-
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL =
|
509
|
-
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING =
|
510
|
-
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY =
|
524
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
|
525
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
|
526
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
|
511
527
|
|
512
528
|
/**
|
513
529
|
* This tracks an individual local variable in a certain lexical context, as
|
@@ -546,6 +562,17 @@ typedef struct pm_locals {
|
|
546
562
|
pm_local_t *locals;
|
547
563
|
} pm_locals_t;
|
548
564
|
|
565
|
+
/** The flags about scope parameters that can be set. */
|
566
|
+
typedef uint8_t pm_scope_parameters_t;
|
567
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
|
568
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
|
569
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
|
570
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
|
571
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
|
572
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
|
573
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
|
574
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
|
575
|
+
|
549
576
|
/**
|
550
577
|
* This struct represents a node in a linked list of scopes. Some scopes can see
|
551
578
|
* into their parent scopes, while others cannot.
|
@@ -557,10 +584,19 @@ typedef struct pm_scope {
|
|
557
584
|
/** The IDs of the locals in the given scope. */
|
558
585
|
pm_locals_t locals;
|
559
586
|
|
587
|
+
/**
|
588
|
+
* This is a list of the implicit parameters contained within the block.
|
589
|
+
* These will be processed after the block is parsed to determine the kind
|
590
|
+
* of parameters node that should be used and to check if any errors need to
|
591
|
+
* be added.
|
592
|
+
*/
|
593
|
+
pm_node_list_t implicit_parameters;
|
594
|
+
|
560
595
|
/**
|
561
596
|
* This is a bitfield that indicates the parameters that are being used in
|
562
|
-
* this scope. It is a combination of the
|
563
|
-
* are three different kinds of parameters that can be used in a
|
597
|
+
* this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
|
598
|
+
* There are three different kinds of parameters that can be used in a
|
599
|
+
* scope:
|
564
600
|
*
|
565
601
|
* - Ordinary parameters (e.g., def foo(bar); end)
|
566
602
|
* - Numbered parameters (e.g., def foo; _1; end)
|
@@ -575,15 +611,7 @@ typedef struct pm_scope {
|
|
575
611
|
* - def foo(&); end
|
576
612
|
* - def foo(...); end
|
577
613
|
*/
|
578
|
-
|
579
|
-
|
580
|
-
/**
|
581
|
-
* An integer indicating the number of numbered parameters on this scope.
|
582
|
-
* This is necessary to determine if child blocks are allowed to use
|
583
|
-
* numbered parameters, and to pass information to consumers of the AST
|
584
|
-
* about how many numbered parameters exist.
|
585
|
-
*/
|
586
|
-
int8_t numbered_parameters;
|
614
|
+
pm_scope_parameters_t parameters;
|
587
615
|
|
588
616
|
/**
|
589
617
|
* The current state of constant shareability for this scope. This is
|
@@ -598,20 +626,6 @@ typedef struct pm_scope {
|
|
598
626
|
bool closed;
|
599
627
|
} pm_scope_t;
|
600
628
|
|
601
|
-
static const uint8_t PM_SCOPE_PARAMETERS_NONE = 0x0;
|
602
|
-
static const uint8_t PM_SCOPE_PARAMETERS_ORDINARY = 0x1;
|
603
|
-
static const uint8_t PM_SCOPE_PARAMETERS_NUMBERED = 0x2;
|
604
|
-
static const uint8_t PM_SCOPE_PARAMETERS_IT = 0x4;
|
605
|
-
static const uint8_t PM_SCOPE_PARAMETERS_TYPE_MASK = 0x7;
|
606
|
-
|
607
|
-
static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x8;
|
608
|
-
static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x10;
|
609
|
-
static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x20;
|
610
|
-
static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x40;
|
611
|
-
|
612
|
-
static const int8_t PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED = -1;
|
613
|
-
static const int8_t PM_SCOPE_NUMBERED_PARAMETERS_NONE = 0;
|
614
|
-
|
615
629
|
/**
|
616
630
|
* A struct that represents a stack of boolean values.
|
617
631
|
*/
|
@@ -624,6 +638,13 @@ typedef uint32_t pm_state_stack_t;
|
|
624
638
|
* it's considering.
|
625
639
|
*/
|
626
640
|
struct pm_parser {
|
641
|
+
/**
|
642
|
+
* The next node identifier that will be assigned. This is a unique
|
643
|
+
* identifier used to track nodes such that the syntax tree can be dropped
|
644
|
+
* but the node can be found through another parse.
|
645
|
+
*/
|
646
|
+
uint32_t node_id;
|
647
|
+
|
627
648
|
/** The current state of the lexer. */
|
628
649
|
pm_lex_state_t lex_state;
|
629
650
|
|
@@ -853,12 +874,27 @@ struct pm_parser {
|
|
853
874
|
*/
|
854
875
|
bool parsing_eval;
|
855
876
|
|
877
|
+
/**
|
878
|
+
* Whether or not we are parsing a "partial" script, which is a script that
|
879
|
+
* will be evaluated in the context of another script, so we should not
|
880
|
+
* check jumps (next/break/etc.) for validity.
|
881
|
+
*/
|
882
|
+
bool partial_script;
|
883
|
+
|
856
884
|
/** Whether or not we're at the beginning of a command. */
|
857
885
|
bool command_start;
|
858
886
|
|
859
887
|
/** Whether or not we're currently recovering from a syntax error. */
|
860
888
|
bool recovering;
|
861
889
|
|
890
|
+
/**
|
891
|
+
* This is very specialized behavior for when you want to parse in a context
|
892
|
+
* that does not respect encoding comments. Its main use case is translating
|
893
|
+
* into the whitequark/parser AST which re-encodes source files in UTF-8
|
894
|
+
* before they are parsed and ignores encoding comments.
|
895
|
+
*/
|
896
|
+
bool encoding_locked;
|
897
|
+
|
862
898
|
/**
|
863
899
|
* Whether or not the encoding has been changed by a magic comment. We use
|
864
900
|
* this to provide a fast path for the lexer instead of going through the
|
@@ -886,6 +922,12 @@ struct pm_parser {
|
|
886
922
|
* characters.
|
887
923
|
*/
|
888
924
|
bool current_regular_expression_ascii_only;
|
925
|
+
|
926
|
+
/**
|
927
|
+
* By default, Ruby always warns about mismatched indentation. This can be
|
928
|
+
* toggled with a magic comment.
|
929
|
+
*/
|
930
|
+
bool warn_mismatched_indentation;
|
889
931
|
};
|
890
932
|
|
891
933
|
#endif
|
data/include/prism/regexp.h
CHANGED
@@ -10,7 +10,6 @@
|
|
10
10
|
#include "prism/parser.h"
|
11
11
|
#include "prism/encoding.h"
|
12
12
|
#include "prism/util/pm_memchr.h"
|
13
|
-
#include "prism/util/pm_string_list.h"
|
14
13
|
#include "prism/util/pm_string.h"
|
15
14
|
|
16
15
|
#include <stdbool.h>
|
@@ -18,16 +17,27 @@
|
|
18
17
|
#include <string.h>
|
19
18
|
|
20
19
|
/**
|
21
|
-
*
|
22
|
-
|
20
|
+
* This callback is called when a named capture group is found.
|
21
|
+
*/
|
22
|
+
typedef void (*pm_regexp_name_callback_t)(const pm_string_t *name, void *data);
|
23
|
+
|
24
|
+
/**
|
25
|
+
* This callback is called when a parse error is found.
|
26
|
+
*/
|
27
|
+
typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data);
|
28
|
+
|
29
|
+
/**
|
30
|
+
* Parse a regular expression.
|
23
31
|
*
|
32
|
+
* @param parser The parser that is currently being used.
|
24
33
|
* @param source The source code to parse.
|
25
34
|
* @param size The size of the source code.
|
26
|
-
* @param
|
27
|
-
* @param
|
28
|
-
* @param
|
29
|
-
* @
|
35
|
+
* @param extended_mode Whether to parse the regular expression in extended mode.
|
36
|
+
* @param name_callback The optional callback to call when a named capture group is found.
|
37
|
+
* @param name_data The optional data to pass to the name callback.
|
38
|
+
* @param error_callback The callback to call when a parse error is found.
|
39
|
+
* @param error_data The data to pass to the error callback.
|
30
40
|
*/
|
31
|
-
PRISM_EXPORTED_FUNCTION
|
41
|
+
PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
|
32
42
|
|
33
43
|
#endif
|