prism 0.29.0 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +115 -1
- data/CONTRIBUTING.md +0 -4
- data/Makefile +1 -1
- data/README.md +4 -0
- data/config.yml +920 -148
- data/docs/build_system.md +8 -11
- data/docs/fuzzing.md +1 -1
- data/docs/parsing_rules.md +4 -1
- data/docs/relocation.md +34 -0
- data/docs/ripper_translation.md +22 -0
- data/docs/serialization.md +3 -0
- data/ext/prism/api_node.c +2863 -2079
- data/ext/prism/extconf.rb +14 -37
- data/ext/prism/extension.c +241 -391
- data/ext/prism/extension.h +2 -2
- data/include/prism/ast.h +2156 -453
- data/include/prism/defines.h +58 -7
- data/include/prism/diagnostic.h +24 -6
- data/include/prism/node.h +0 -21
- data/include/prism/options.h +94 -3
- data/include/prism/parser.h +82 -40
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +3 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +0 -8
- data/include/prism/util/pm_integer.h +22 -15
- data/include/prism/util/pm_newline_list.h +11 -0
- data/include/prism/util/pm_string.h +28 -12
- data/include/prism/version.h +3 -3
- data/include/prism.h +47 -11
- data/lib/prism/compiler.rb +3 -0
- data/lib/prism/desugar_compiler.rb +111 -74
- data/lib/prism/dispatcher.rb +16 -1
- data/lib/prism/dot_visitor.rb +55 -34
- data/lib/prism/dsl.rb +660 -468
- data/lib/prism/ffi.rb +113 -8
- data/lib/prism/inspect_visitor.rb +296 -64
- data/lib/prism/lex_compat.rb +1 -1
- data/lib/prism/mutation_compiler.rb +11 -6
- data/lib/prism/node.rb +4262 -5023
- data/lib/prism/node_ext.rb +91 -14
- data/lib/prism/parse_result/comments.rb +0 -7
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +101 -11
- data/lib/prism/parse_result.rb +183 -6
- data/lib/prism/reflection.rb +12 -10
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +496 -609
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/compiler.rb +185 -155
- data/lib/prism/translation/parser/lexer.rb +26 -4
- data/lib/prism/translation/parser.rb +9 -4
- data/lib/prism/translation/ripper.rb +23 -25
- data/lib/prism/translation/ruby_parser.rb +86 -17
- data/lib/prism/visitor.rb +3 -0
- data/lib/prism.rb +6 -8
- data/prism.gemspec +9 -5
- data/rbi/prism/dsl.rbi +521 -0
- data/rbi/prism/node.rbi +1115 -1120
- data/rbi/prism/parse_result.rbi +29 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism/visitor.rbi +3 -0
- data/rbi/prism.rbi +36 -30
- data/sig/prism/dsl.rbs +190 -303
- data/sig/prism/mutation_compiler.rbs +1 -0
- data/sig/prism/node.rbs +678 -632
- data/sig/prism/parse_result.rbs +22 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/string_query.rbs +11 -0
- data/sig/prism/visitor.rbs +1 -0
- data/sig/prism.rbs +103 -64
- data/src/diagnostic.c +64 -28
- data/src/node.c +502 -1739
- data/src/options.c +76 -27
- data/src/prettyprint.c +188 -112
- data/src/prism.c +3376 -2293
- data/src/regexp.c +208 -71
- data/src/serialize.c +182 -50
- data/src/static_literals.c +64 -85
- data/src/token_type.c +4 -4
- data/src/util/pm_char.c +1 -1
- data/src/util/pm_constant_pool.c +0 -8
- data/src/util/pm_integer.c +53 -25
- data/src/util/pm_newline_list.c +29 -0
- data/src/util/pm_string.c +131 -80
- data/src/util/pm_strpbrk.c +32 -6
- metadata +11 -7
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -249
- data/lib/prism/translation/parser/rubocop.rb +0 -73
- data/src/util/pm_string_list.c +0 -28
data/include/prism/defines.h
CHANGED
@@ -25,6 +25,15 @@
|
|
25
25
|
#define __STDC_FORMAT_MACROS
|
26
26
|
#include <inttypes.h>
|
27
27
|
|
28
|
+
/**
|
29
|
+
* When we are parsing using recursive descent, we want to protect against
|
30
|
+
* malicious payloads that could attempt to crash our parser. We do this by
|
31
|
+
* specifying a maximum depth to which we are allowed to recurse.
|
32
|
+
*/
|
33
|
+
#ifndef PRISM_DEPTH_MAXIMUM
|
34
|
+
#define PRISM_DEPTH_MAXIMUM 1000
|
35
|
+
#endif
|
36
|
+
|
28
37
|
/**
|
29
38
|
* By default, we compile with -fvisibility=hidden. When this is enabled, we
|
30
39
|
* need to mark certain functions as being publically-visible. This macro does
|
@@ -119,14 +128,24 @@
|
|
119
128
|
#endif
|
120
129
|
|
121
130
|
/**
|
122
|
-
*
|
123
|
-
*
|
124
|
-
*
|
131
|
+
* If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem
|
132
|
+
* related code from the library. All filesystem related code should be guarded
|
133
|
+
* by PRISM_HAS_FILESYSTEM.
|
125
134
|
*/
|
126
|
-
#
|
127
|
-
#
|
128
|
-
#
|
129
|
-
|
135
|
+
#ifndef PRISM_HAS_NO_FILESYSTEM
|
136
|
+
# define PRISM_HAS_FILESYSTEM
|
137
|
+
#endif
|
138
|
+
|
139
|
+
/**
|
140
|
+
* isinf on POSIX systems it accepts a float, a double, or a long double.
|
141
|
+
* But mingw didn't provide an isinf macro, only an isinf function that only
|
142
|
+
* accepts floats, so we need to use _finite instead.
|
143
|
+
*/
|
144
|
+
#ifdef __MINGW64__
|
145
|
+
#include <float.h>
|
146
|
+
#define PRISM_ISINF(x) (!_finite(x))
|
147
|
+
#else
|
148
|
+
#define PRISM_ISINF(x) isinf(x)
|
130
149
|
#endif
|
131
150
|
|
132
151
|
/**
|
@@ -203,4 +222,36 @@
|
|
203
222
|
#define PRISM_ENCODING_EXCLUDE_FULL
|
204
223
|
#endif
|
205
224
|
|
225
|
+
/**
|
226
|
+
* Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
|
227
|
+
* branch predication.
|
228
|
+
*/
|
229
|
+
#if defined(__GNUC__) || defined(__clang__)
|
230
|
+
/** The compiler should predicate that this branch will be taken. */
|
231
|
+
#define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
|
232
|
+
|
233
|
+
/** The compiler should predicate that this branch will not be taken. */
|
234
|
+
#define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
|
235
|
+
#else
|
236
|
+
/** Void because this platform does not support branch prediction hints. */
|
237
|
+
#define PRISM_LIKELY(x) (x)
|
238
|
+
|
239
|
+
/** Void because this platform does not support branch prediction hints. */
|
240
|
+
#define PRISM_UNLIKELY(x) (x)
|
241
|
+
#endif
|
242
|
+
|
243
|
+
/**
|
244
|
+
* We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
|
245
|
+
* Use PRISM_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional.
|
246
|
+
*/
|
247
|
+
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L // C23 or later
|
248
|
+
#define PRISM_FALLTHROUGH [[fallthrough]];
|
249
|
+
#elif defined(__GNUC__) || defined(__clang__)
|
250
|
+
#define PRISM_FALLTHROUGH __attribute__((fallthrough));
|
251
|
+
#elif defined(_MSC_VER)
|
252
|
+
#define PRISM_FALLTHROUGH __fallthrough;
|
253
|
+
#else
|
254
|
+
#define PRISM_FALLTHROUGH
|
255
|
+
#endif
|
256
|
+
|
206
257
|
#endif
|
data/include/prism/diagnostic.h
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
|
1
|
+
/*----------------------------------------------------------------------------*/
|
2
2
|
/* This file is generated by the templates/template.rb script and should not */
|
3
3
|
/* be modified manually. See */
|
4
4
|
/* templates/include/prism/diagnostic.h.erb */
|
5
5
|
/* if you are looking to modify the */
|
6
6
|
/* template */
|
7
|
-
|
7
|
+
/*----------------------------------------------------------------------------*/
|
8
8
|
|
9
9
|
/**
|
10
10
|
* @file diagnostic.h
|
@@ -44,7 +44,6 @@ typedef enum {
|
|
44
44
|
PM_ERR_ARGUMENT_FORMAL_GLOBAL,
|
45
45
|
PM_ERR_ARGUMENT_FORMAL_IVAR,
|
46
46
|
PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
|
47
|
-
PM_ERR_ARGUMENT_IN,
|
48
47
|
PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND,
|
49
48
|
PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
|
50
49
|
PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
|
@@ -110,8 +109,10 @@ typedef enum {
|
|
110
109
|
PM_ERR_ESCAPE_INVALID_META_REPEAT,
|
111
110
|
PM_ERR_ESCAPE_INVALID_UNICODE,
|
112
111
|
PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
|
112
|
+
PM_ERR_ESCAPE_INVALID_UNICODE_LIST,
|
113
113
|
PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
|
114
114
|
PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
|
115
|
+
PM_ERR_ESCAPE_INVALID_UNICODE_SHORT,
|
115
116
|
PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
|
116
117
|
PM_ERR_EXPECT_ARGUMENT,
|
117
118
|
PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
|
@@ -126,6 +127,7 @@ typedef enum {
|
|
126
127
|
PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
|
127
128
|
PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
|
128
129
|
PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
|
130
|
+
PM_ERR_EXPECT_FOR_DELIMITER,
|
129
131
|
PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
|
130
132
|
PM_ERR_EXPECT_IN_DELIMITER,
|
131
133
|
PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
|
@@ -134,6 +136,7 @@ typedef enum {
|
|
134
136
|
PM_ERR_EXPECT_RPAREN,
|
135
137
|
PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
|
136
138
|
PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
|
139
|
+
PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER,
|
137
140
|
PM_ERR_EXPECT_STRING_CONTENT,
|
138
141
|
PM_ERR_EXPECT_WHEN_DELIMITER,
|
139
142
|
PM_ERR_EXPRESSION_BARE_HASH,
|
@@ -143,6 +146,7 @@ typedef enum {
|
|
143
146
|
PM_ERR_EXPRESSION_NOT_WRITABLE_FILE,
|
144
147
|
PM_ERR_EXPRESSION_NOT_WRITABLE_LINE,
|
145
148
|
PM_ERR_EXPRESSION_NOT_WRITABLE_NIL,
|
149
|
+
PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED,
|
146
150
|
PM_ERR_EXPRESSION_NOT_WRITABLE_SELF,
|
147
151
|
PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE,
|
148
152
|
PM_ERR_FLOAT_PARSE,
|
@@ -166,6 +170,7 @@ typedef enum {
|
|
166
170
|
PM_ERR_INSTANCE_VARIABLE_BARE,
|
167
171
|
PM_ERR_INVALID_BLOCK_EXIT,
|
168
172
|
PM_ERR_INVALID_CHARACTER,
|
173
|
+
PM_ERR_INVALID_COMMA,
|
169
174
|
PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
|
170
175
|
PM_ERR_INVALID_ESCAPE_CHARACTER,
|
171
176
|
PM_ERR_INVALID_FLOAT_EXPONENT,
|
@@ -182,6 +187,7 @@ typedef enum {
|
|
182
187
|
PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER,
|
183
188
|
PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING,
|
184
189
|
PM_ERR_INVALID_PERCENT,
|
190
|
+
PM_ERR_INVALID_PERCENT_EOF,
|
185
191
|
PM_ERR_INVALID_PRINTABLE_CHARACTER,
|
186
192
|
PM_ERR_INVALID_RETRY_AFTER_ELSE,
|
187
193
|
PM_ERR_INVALID_RETRY_AFTER_ENSURE,
|
@@ -210,12 +216,15 @@ typedef enum {
|
|
210
216
|
PM_ERR_MODULE_TERM,
|
211
217
|
PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
|
212
218
|
PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
|
219
|
+
PM_ERR_NESTING_TOO_DEEP,
|
213
220
|
PM_ERR_NO_LOCAL_VARIABLE,
|
221
|
+
PM_ERR_NON_ASSOCIATIVE_OPERATOR,
|
214
222
|
PM_ERR_NOT_EXPRESSION,
|
215
223
|
PM_ERR_NUMBER_LITERAL_UNDERSCORE,
|
224
|
+
PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK,
|
216
225
|
PM_ERR_NUMBERED_PARAMETER_IT,
|
217
226
|
PM_ERR_NUMBERED_PARAMETER_ORDINARY,
|
218
|
-
|
227
|
+
PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK,
|
219
228
|
PM_ERR_OPERATOR_MULTI_ASSIGN,
|
220
229
|
PM_ERR_OPERATOR_WRITE_ARGUMENTS,
|
221
230
|
PM_ERR_OPERATOR_WRITE_BLOCK,
|
@@ -232,8 +241,9 @@ typedef enum {
|
|
232
241
|
PM_ERR_PARAMETER_SPLAT_MULTI,
|
233
242
|
PM_ERR_PARAMETER_STAR,
|
234
243
|
PM_ERR_PARAMETER_UNEXPECTED_FWD,
|
235
|
-
PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
|
236
244
|
PM_ERR_PARAMETER_UNEXPECTED_NO_KW,
|
245
|
+
PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
|
246
|
+
PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS,
|
237
247
|
PM_ERR_PATTERN_CAPTURE_DUPLICATE,
|
238
248
|
PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
|
239
249
|
PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
|
@@ -245,6 +255,7 @@ typedef enum {
|
|
245
255
|
PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
|
246
256
|
PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
|
247
257
|
PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
|
258
|
+
PM_ERR_PATTERN_FIND_MISSING_INNER,
|
248
259
|
PM_ERR_PATTERN_HASH_IMPLICIT,
|
249
260
|
PM_ERR_PATTERN_HASH_KEY,
|
250
261
|
PM_ERR_PATTERN_HASH_KEY_DUPLICATE,
|
@@ -262,6 +273,7 @@ typedef enum {
|
|
262
273
|
PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING,
|
263
274
|
PM_ERR_REGEXP_INVALID_UNICODE_RANGE,
|
264
275
|
PM_ERR_REGEXP_NON_ESCAPED_MBC,
|
276
|
+
PM_ERR_REGEXP_PARSE_ERROR,
|
265
277
|
PM_ERR_REGEXP_TERM,
|
266
278
|
PM_ERR_REGEXP_UNKNOWN_OPTIONS,
|
267
279
|
PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP,
|
@@ -286,11 +298,15 @@ typedef enum {
|
|
286
298
|
PM_ERR_TERNARY_COLON,
|
287
299
|
PM_ERR_TERNARY_EXPRESSION_FALSE,
|
288
300
|
PM_ERR_TERNARY_EXPRESSION_TRUE,
|
301
|
+
PM_ERR_UNARY_DISALLOWED,
|
289
302
|
PM_ERR_UNARY_RECEIVER,
|
290
303
|
PM_ERR_UNDEF_ARGUMENT,
|
291
304
|
PM_ERR_UNEXPECTED_BLOCK_ARGUMENT,
|
292
305
|
PM_ERR_UNEXPECTED_INDEX_BLOCK,
|
293
306
|
PM_ERR_UNEXPECTED_INDEX_KEYWORDS,
|
307
|
+
PM_ERR_UNEXPECTED_LABEL,
|
308
|
+
PM_ERR_UNEXPECTED_MULTI_WRITE,
|
309
|
+
PM_ERR_UNEXPECTED_RANGE_OPERATOR,
|
294
310
|
PM_ERR_UNEXPECTED_SAFE_NAVIGATION,
|
295
311
|
PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
|
296
312
|
PM_ERR_UNEXPECTED_TOKEN_IGNORE,
|
@@ -303,6 +319,7 @@ typedef enum {
|
|
303
319
|
PM_ERR_XSTRING_TERM,
|
304
320
|
|
305
321
|
// These are the warning diagnostics.
|
322
|
+
PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
|
306
323
|
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
|
307
324
|
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
|
308
325
|
PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND,
|
@@ -318,10 +335,11 @@ typedef enum {
|
|
318
335
|
PM_WARN_DUPLICATED_WHEN_CLAUSE,
|
319
336
|
PM_WARN_FLOAT_OUT_OF_RANGE,
|
320
337
|
PM_WARN_IGNORED_FROZEN_STRING_LITERAL,
|
338
|
+
PM_WARN_INDENTATION_MISMATCH,
|
321
339
|
PM_WARN_INTEGER_IN_FLIP_FLOP,
|
322
340
|
PM_WARN_INVALID_CHARACTER,
|
341
|
+
PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
|
323
342
|
PM_WARN_INVALID_NUMBERED_REFERENCE,
|
324
|
-
PM_WARN_INVALID_SHAREABLE_CONSTANT_VALUE,
|
325
343
|
PM_WARN_KEYWORD_EOL,
|
326
344
|
PM_WARN_LITERAL_IN_CONDITION_DEFAULT,
|
327
345
|
PM_WARN_LITERAL_IN_CONDITION_VERBOSE,
|
data/include/prism/node.h
CHANGED
@@ -56,27 +56,6 @@ void pm_node_list_free(pm_node_list_t *list);
|
|
56
56
|
*/
|
57
57
|
PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
|
58
58
|
|
59
|
-
/**
|
60
|
-
* This struct stores the information gathered by the pm_node_memsize function.
|
61
|
-
* It contains both the memory footprint and additionally metadata about the
|
62
|
-
* shape of the tree.
|
63
|
-
*/
|
64
|
-
typedef struct {
|
65
|
-
/** The total memory footprint of the node and all of its children. */
|
66
|
-
size_t memsize;
|
67
|
-
|
68
|
-
/** The number of children the node has. */
|
69
|
-
size_t node_count;
|
70
|
-
} pm_memsize_t;
|
71
|
-
|
72
|
-
/**
|
73
|
-
* Calculates the memory footprint of a given node.
|
74
|
-
*
|
75
|
-
* @param node The node to calculate the memory footprint of.
|
76
|
-
* @param memsize The memory footprint of the node and all of its children.
|
77
|
-
*/
|
78
|
-
PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *memsize);
|
79
|
-
|
80
59
|
/**
|
81
60
|
* Returns a string representation of the given node type.
|
82
61
|
*
|
data/include/prism/options.h
CHANGED
@@ -7,6 +7,7 @@
|
|
7
7
|
#define PRISM_OPTIONS_H
|
8
8
|
|
9
9
|
#include "prism/defines.h"
|
10
|
+
#include "prism/util/pm_char.h"
|
10
11
|
#include "prism/util/pm_string.h"
|
11
12
|
|
12
13
|
#include <stdbool.h>
|
@@ -40,6 +41,23 @@ typedef struct pm_options_scope {
|
|
40
41
|
pm_string_t *locals;
|
41
42
|
} pm_options_scope_t;
|
42
43
|
|
44
|
+
// Forward declaration needed by the callback typedef.
|
45
|
+
struct pm_options;
|
46
|
+
|
47
|
+
/**
|
48
|
+
* The callback called when additional switches are found in a shebang comment
|
49
|
+
* that need to be processed by the runtime.
|
50
|
+
*
|
51
|
+
* @param options The options struct that may be updated by this callback.
|
52
|
+
* Certain fields will be checked for changes, specifically encoding,
|
53
|
+
* command_line, and frozen_string_literal.
|
54
|
+
* @param source The source of the shebang comment.
|
55
|
+
* @param length The length of the source.
|
56
|
+
* @param shebang_callback_data Any additional data that should be passed along
|
57
|
+
* to the callback.
|
58
|
+
*/
|
59
|
+
typedef void (*pm_options_shebang_callback_t)(struct pm_options *options, const uint8_t *source, size_t length, void *shebang_callback_data);
|
60
|
+
|
43
61
|
/**
|
44
62
|
* The version of Ruby syntax that we should be parsing with. This is used to
|
45
63
|
* allow consumers to specify which behavior they want in case they need to
|
@@ -56,7 +74,19 @@ typedef enum {
|
|
56
74
|
/**
|
57
75
|
* The options that can be passed to the parser.
|
58
76
|
*/
|
59
|
-
typedef struct {
|
77
|
+
typedef struct pm_options {
|
78
|
+
/**
|
79
|
+
* The callback to call when additional switches are found in a shebang
|
80
|
+
* comment.
|
81
|
+
*/
|
82
|
+
pm_options_shebang_callback_t shebang_callback;
|
83
|
+
|
84
|
+
/**
|
85
|
+
* Any additional data that should be passed along to the shebang callback
|
86
|
+
* if one was set.
|
87
|
+
*/
|
88
|
+
void *shebang_callback_data;
|
89
|
+
|
60
90
|
/** The name of the file that is currently being parsed. */
|
61
91
|
pm_string_t filepath;
|
62
92
|
|
@@ -103,6 +133,30 @@ typedef struct {
|
|
103
133
|
* - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
|
104
134
|
*/
|
105
135
|
int8_t frozen_string_literal;
|
136
|
+
|
137
|
+
/**
|
138
|
+
* Whether or not the encoding magic comments should be respected. This is a
|
139
|
+
* niche use-case where you want to parse a file with a specific encoding
|
140
|
+
* but ignore any encoding magic comments at the top of the file.
|
141
|
+
*/
|
142
|
+
bool encoding_locked;
|
143
|
+
|
144
|
+
/**
|
145
|
+
* When the file being parsed is the main script, the shebang will be
|
146
|
+
* considered for command-line flags (or for implicit -x). The caller needs
|
147
|
+
* to pass this information to the parser so that it can behave correctly.
|
148
|
+
*/
|
149
|
+
bool main_script;
|
150
|
+
|
151
|
+
/**
|
152
|
+
* When the file being parsed is considered a "partial" script, jumps will
|
153
|
+
* not be marked as errors if they are not contained within loops/blocks.
|
154
|
+
* This is used in the case that you're parsing a script that you know will
|
155
|
+
* be embedded inside another script later, but you do not have that context
|
156
|
+
* yet. For example, when parsing an ERB template that will be evaluated
|
157
|
+
* inside another script.
|
158
|
+
*/
|
159
|
+
bool partial_script;
|
106
160
|
} pm_options_t;
|
107
161
|
|
108
162
|
/**
|
@@ -142,6 +196,16 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_P = 0x10;
|
|
142
196
|
*/
|
143
197
|
static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20;
|
144
198
|
|
199
|
+
/**
|
200
|
+
* Set the shebang callback option on the given options struct.
|
201
|
+
*
|
202
|
+
* @param options The options struct to set the shebang callback on.
|
203
|
+
* @param shebang_callback The shebang callback to set.
|
204
|
+
* @param shebang_callback_data Any additional data that should be passed along
|
205
|
+
* to the callback.
|
206
|
+
*/
|
207
|
+
PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data);
|
208
|
+
|
145
209
|
/**
|
146
210
|
* Set the filepath option on the given options struct.
|
147
211
|
*
|
@@ -166,6 +230,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t
|
|
166
230
|
*/
|
167
231
|
PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
|
168
232
|
|
233
|
+
/**
|
234
|
+
* Set the encoding_locked option on the given options struct.
|
235
|
+
*
|
236
|
+
* @param options The options struct to set the encoding_locked value on.
|
237
|
+
* @param encoding_locked The encoding_locked value to set.
|
238
|
+
*/
|
239
|
+
PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked);
|
240
|
+
|
169
241
|
/**
|
170
242
|
* Set the frozen string literal option on the given options struct.
|
171
243
|
*
|
@@ -194,6 +266,22 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options,
|
|
194
266
|
*/
|
195
267
|
PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
|
196
268
|
|
269
|
+
/**
|
270
|
+
* Set the main script option on the given options struct.
|
271
|
+
*
|
272
|
+
* @param options The options struct to set the main script value on.
|
273
|
+
* @param main_script The main script value to set.
|
274
|
+
*/
|
275
|
+
PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script);
|
276
|
+
|
277
|
+
/**
|
278
|
+
* Set the partial script option on the given options struct.
|
279
|
+
*
|
280
|
+
* @param options The options struct to set the partial script value on.
|
281
|
+
* @param partial_script The partial script value to set.
|
282
|
+
*/
|
283
|
+
PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script);
|
284
|
+
|
197
285
|
/**
|
198
286
|
* Allocate and zero out the scopes array on the given options struct.
|
199
287
|
*
|
@@ -261,6 +349,9 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
|
|
261
349
|
* | `1` | -l command line option |
|
262
350
|
* | `1` | -a command line option |
|
263
351
|
* | `1` | the version |
|
352
|
+
* | `1` | encoding locked |
|
353
|
+
* | `1` | main script |
|
354
|
+
* | `1` | partial script |
|
264
355
|
* | `4` | the number of scopes |
|
265
356
|
* | ... | the scopes |
|
266
357
|
*
|
@@ -293,8 +384,8 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
|
|
293
384
|
* * The encoding can have a length of 0, in which case we'll use the default
|
294
385
|
* encoding (UTF-8). If it's not 0, it should correspond to a name of an
|
295
386
|
* encoding that can be passed to `Encoding.find` in Ruby.
|
296
|
-
* * The frozen string literal
|
297
|
-
* their values should be either 0 or 1.
|
387
|
+
* * The frozen string literal, encoding locked, main script, and partial script
|
388
|
+
* fields are booleans, so their values should be either 0 or 1.
|
298
389
|
* * The number of scopes can be 0.
|
299
390
|
*
|
300
391
|
* @param options The options struct to deserialize into.
|
data/include/prism/parser.h
CHANGED
@@ -82,6 +82,23 @@ typedef enum {
|
|
82
82
|
PM_HEREDOC_INDENT_TILDE,
|
83
83
|
} pm_heredoc_indent_t;
|
84
84
|
|
85
|
+
/**
|
86
|
+
* All of the information necessary to store to lexing a heredoc.
|
87
|
+
*/
|
88
|
+
typedef struct {
|
89
|
+
/** A pointer to the start of the heredoc identifier. */
|
90
|
+
const uint8_t *ident_start;
|
91
|
+
|
92
|
+
/** The length of the heredoc identifier. */
|
93
|
+
size_t ident_length;
|
94
|
+
|
95
|
+
/** The type of quote that the heredoc uses. */
|
96
|
+
pm_heredoc_quote_t quote;
|
97
|
+
|
98
|
+
/** The type of indentation that the heredoc uses. */
|
99
|
+
pm_heredoc_indent_t indent;
|
100
|
+
} pm_heredoc_lex_mode_t;
|
101
|
+
|
85
102
|
/**
|
86
103
|
* When lexing Ruby source, the lexer has a small amount of state to tell which
|
87
104
|
* kind of token it is currently lexing. For example, when we find the start of
|
@@ -210,17 +227,10 @@ typedef struct pm_lex_mode {
|
|
210
227
|
} string;
|
211
228
|
|
212
229
|
struct {
|
213
|
-
/**
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
size_t ident_length;
|
218
|
-
|
219
|
-
/** The type of quote that the heredoc uses. */
|
220
|
-
pm_heredoc_quote_t quote;
|
221
|
-
|
222
|
-
/** The type of indentation that the heredoc uses. */
|
223
|
-
pm_heredoc_indent_t indent;
|
230
|
+
/**
|
231
|
+
* All of the data necessary to lex a heredoc.
|
232
|
+
*/
|
233
|
+
pm_heredoc_lex_mode_t base;
|
224
234
|
|
225
235
|
/**
|
226
236
|
* This is the pointer to the character where lexing should resume
|
@@ -233,7 +243,7 @@ typedef struct pm_lex_mode {
|
|
233
243
|
* line so that we know how much to dedent each line in the case of
|
234
244
|
* a tilde heredoc.
|
235
245
|
*/
|
236
|
-
size_t common_whitespace;
|
246
|
+
size_t *common_whitespace;
|
237
247
|
|
238
248
|
/** True if the previous token ended with a line continuation. */
|
239
249
|
bool line_continuation;
|
@@ -364,6 +374,9 @@ typedef enum {
|
|
364
374
|
/** a rescue statement within a lambda expression */
|
365
375
|
PM_CONTEXT_LAMBDA_RESCUE,
|
366
376
|
|
377
|
+
/** the predicate clause of a loop statement */
|
378
|
+
PM_CONTEXT_LOOP_PREDICATE,
|
379
|
+
|
367
380
|
/** the top level context */
|
368
381
|
PM_CONTEXT_MAIN,
|
369
382
|
|
@@ -379,6 +392,9 @@ typedef enum {
|
|
379
392
|
/** a rescue statement within a module statement */
|
380
393
|
PM_CONTEXT_MODULE_RESCUE,
|
381
394
|
|
395
|
+
/** a multiple target expression */
|
396
|
+
PM_CONTEXT_MULTI_TARGET,
|
397
|
+
|
382
398
|
/** a parenthesized expression */
|
383
399
|
PM_CONTEXT_PARENS,
|
384
400
|
|
@@ -505,9 +521,9 @@ typedef struct {
|
|
505
521
|
/** The type of shareable constant value that can be set. */
|
506
522
|
typedef uint8_t pm_shareable_constant_value_t;
|
507
523
|
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
|
508
|
-
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL =
|
509
|
-
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING =
|
510
|
-
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY =
|
524
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
|
525
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
|
526
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
|
511
527
|
|
512
528
|
/**
|
513
529
|
* This tracks an individual local variable in a certain lexical context, as
|
@@ -546,6 +562,17 @@ typedef struct pm_locals {
|
|
546
562
|
pm_local_t *locals;
|
547
563
|
} pm_locals_t;
|
548
564
|
|
565
|
+
/** The flags about scope parameters that can be set. */
|
566
|
+
typedef uint8_t pm_scope_parameters_t;
|
567
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
|
568
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
|
569
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
|
570
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
|
571
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
|
572
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
|
573
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
|
574
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
|
575
|
+
|
549
576
|
/**
|
550
577
|
* This struct represents a node in a linked list of scopes. Some scopes can see
|
551
578
|
* into their parent scopes, while others cannot.
|
@@ -557,10 +584,19 @@ typedef struct pm_scope {
|
|
557
584
|
/** The IDs of the locals in the given scope. */
|
558
585
|
pm_locals_t locals;
|
559
586
|
|
587
|
+
/**
|
588
|
+
* This is a list of the implicit parameters contained within the block.
|
589
|
+
* These will be processed after the block is parsed to determine the kind
|
590
|
+
* of parameters node that should be used and to check if any errors need to
|
591
|
+
* be added.
|
592
|
+
*/
|
593
|
+
pm_node_list_t implicit_parameters;
|
594
|
+
|
560
595
|
/**
|
561
596
|
* This is a bitfield that indicates the parameters that are being used in
|
562
|
-
* this scope. It is a combination of the
|
563
|
-
* are three different kinds of parameters that can be used in a
|
597
|
+
* this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
|
598
|
+
* There are three different kinds of parameters that can be used in a
|
599
|
+
* scope:
|
564
600
|
*
|
565
601
|
* - Ordinary parameters (e.g., def foo(bar); end)
|
566
602
|
* - Numbered parameters (e.g., def foo; _1; end)
|
@@ -575,15 +611,7 @@ typedef struct pm_scope {
|
|
575
611
|
* - def foo(&); end
|
576
612
|
* - def foo(...); end
|
577
613
|
*/
|
578
|
-
|
579
|
-
|
580
|
-
/**
|
581
|
-
* An integer indicating the number of numbered parameters on this scope.
|
582
|
-
* This is necessary to determine if child blocks are allowed to use
|
583
|
-
* numbered parameters, and to pass information to consumers of the AST
|
584
|
-
* about how many numbered parameters exist.
|
585
|
-
*/
|
586
|
-
int8_t numbered_parameters;
|
614
|
+
pm_scope_parameters_t parameters;
|
587
615
|
|
588
616
|
/**
|
589
617
|
* The current state of constant shareability for this scope. This is
|
@@ -598,20 +626,6 @@ typedef struct pm_scope {
|
|
598
626
|
bool closed;
|
599
627
|
} pm_scope_t;
|
600
628
|
|
601
|
-
static const uint8_t PM_SCOPE_PARAMETERS_NONE = 0x0;
|
602
|
-
static const uint8_t PM_SCOPE_PARAMETERS_ORDINARY = 0x1;
|
603
|
-
static const uint8_t PM_SCOPE_PARAMETERS_NUMBERED = 0x2;
|
604
|
-
static const uint8_t PM_SCOPE_PARAMETERS_IT = 0x4;
|
605
|
-
static const uint8_t PM_SCOPE_PARAMETERS_TYPE_MASK = 0x7;
|
606
|
-
|
607
|
-
static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x8;
|
608
|
-
static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x10;
|
609
|
-
static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x20;
|
610
|
-
static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x40;
|
611
|
-
|
612
|
-
static const int8_t PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED = -1;
|
613
|
-
static const int8_t PM_SCOPE_NUMBERED_PARAMETERS_NONE = 0;
|
614
|
-
|
615
629
|
/**
|
616
630
|
* A struct that represents a stack of boolean values.
|
617
631
|
*/
|
@@ -624,6 +638,13 @@ typedef uint32_t pm_state_stack_t;
|
|
624
638
|
* it's considering.
|
625
639
|
*/
|
626
640
|
struct pm_parser {
|
641
|
+
/**
|
642
|
+
* The next node identifier that will be assigned. This is a unique
|
643
|
+
* identifier used to track nodes such that the syntax tree can be dropped
|
644
|
+
* but the node can be found through another parse.
|
645
|
+
*/
|
646
|
+
uint32_t node_id;
|
647
|
+
|
627
648
|
/** The current state of the lexer. */
|
628
649
|
pm_lex_state_t lex_state;
|
629
650
|
|
@@ -853,12 +874,27 @@ struct pm_parser {
|
|
853
874
|
*/
|
854
875
|
bool parsing_eval;
|
855
876
|
|
877
|
+
/**
|
878
|
+
* Whether or not we are parsing a "partial" script, which is a script that
|
879
|
+
* will be evaluated in the context of another script, so we should not
|
880
|
+
* check jumps (next/break/etc.) for validity.
|
881
|
+
*/
|
882
|
+
bool partial_script;
|
883
|
+
|
856
884
|
/** Whether or not we're at the beginning of a command. */
|
857
885
|
bool command_start;
|
858
886
|
|
859
887
|
/** Whether or not we're currently recovering from a syntax error. */
|
860
888
|
bool recovering;
|
861
889
|
|
890
|
+
/**
|
891
|
+
* This is very specialized behavior for when you want to parse in a context
|
892
|
+
* that does not respect encoding comments. Its main use case is translating
|
893
|
+
* into the whitequark/parser AST which re-encodes source files in UTF-8
|
894
|
+
* before they are parsed and ignores encoding comments.
|
895
|
+
*/
|
896
|
+
bool encoding_locked;
|
897
|
+
|
862
898
|
/**
|
863
899
|
* Whether or not the encoding has been changed by a magic comment. We use
|
864
900
|
* this to provide a fast path for the lexer instead of going through the
|
@@ -886,6 +922,12 @@ struct pm_parser {
|
|
886
922
|
* characters.
|
887
923
|
*/
|
888
924
|
bool current_regular_expression_ascii_only;
|
925
|
+
|
926
|
+
/**
|
927
|
+
* By default, Ruby always warns about mismatched indentation. This can be
|
928
|
+
* toggled with a magic comment.
|
929
|
+
*/
|
930
|
+
bool warn_mismatched_indentation;
|
889
931
|
};
|
890
932
|
|
891
933
|
#endif
|
data/include/prism/regexp.h
CHANGED
@@ -10,7 +10,6 @@
|
|
10
10
|
#include "prism/parser.h"
|
11
11
|
#include "prism/encoding.h"
|
12
12
|
#include "prism/util/pm_memchr.h"
|
13
|
-
#include "prism/util/pm_string_list.h"
|
14
13
|
#include "prism/util/pm_string.h"
|
15
14
|
|
16
15
|
#include <stdbool.h>
|
@@ -18,16 +17,27 @@
|
|
18
17
|
#include <string.h>
|
19
18
|
|
20
19
|
/**
|
21
|
-
*
|
22
|
-
|
20
|
+
* This callback is called when a named capture group is found.
|
21
|
+
*/
|
22
|
+
typedef void (*pm_regexp_name_callback_t)(const pm_string_t *name, void *data);
|
23
|
+
|
24
|
+
/**
|
25
|
+
* This callback is called when a parse error is found.
|
26
|
+
*/
|
27
|
+
typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data);
|
28
|
+
|
29
|
+
/**
|
30
|
+
* Parse a regular expression.
|
23
31
|
*
|
32
|
+
* @param parser The parser that is currently being used.
|
24
33
|
* @param source The source code to parse.
|
25
34
|
* @param size The size of the source code.
|
26
|
-
* @param
|
27
|
-
* @param
|
28
|
-
* @param
|
29
|
-
* @
|
35
|
+
* @param extended_mode Whether to parse the regular expression in extended mode.
|
36
|
+
* @param name_callback The optional callback to call when a named capture group is found.
|
37
|
+
* @param name_data The optional data to pass to the name callback.
|
38
|
+
* @param error_callback The callback to call when a parse error is found.
|
39
|
+
* @param error_data The data to pass to the error callback.
|
30
40
|
*/
|
31
|
-
PRISM_EXPORTED_FUNCTION
|
41
|
+
PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
|
32
42
|
|
33
43
|
#endif
|