prism 0.29.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +77 -1
- data/CONTRIBUTING.md +0 -4
- data/README.md +4 -0
- data/config.yml +498 -145
- data/docs/fuzzing.md +1 -1
- data/docs/parsing_rules.md +4 -1
- data/docs/ripper_translation.md +22 -0
- data/docs/serialization.md +3 -0
- data/ext/prism/api_node.c +2858 -2082
- data/ext/prism/extconf.rb +1 -1
- data/ext/prism/extension.c +203 -421
- data/ext/prism/extension.h +2 -2
- data/include/prism/ast.h +1732 -453
- data/include/prism/defines.h +36 -0
- data/include/prism/diagnostic.h +23 -6
- data/include/prism/node.h +0 -21
- data/include/prism/options.h +94 -3
- data/include/prism/parser.h +57 -28
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +3 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +0 -8
- data/include/prism/util/pm_integer.h +22 -15
- data/include/prism/util/pm_newline_list.h +11 -0
- data/include/prism/util/pm_string.h +28 -12
- data/include/prism/version.h +3 -3
- data/include/prism.h +0 -11
- data/lib/prism/compiler.rb +3 -0
- data/lib/prism/desugar_compiler.rb +111 -74
- data/lib/prism/dispatcher.rb +16 -1
- data/lib/prism/dot_visitor.rb +45 -34
- data/lib/prism/dsl.rb +660 -468
- data/lib/prism/ffi.rb +64 -6
- data/lib/prism/inspect_visitor.rb +294 -64
- data/lib/prism/lex_compat.rb +1 -1
- data/lib/prism/mutation_compiler.rb +11 -6
- data/lib/prism/node.rb +2469 -4973
- data/lib/prism/node_ext.rb +91 -14
- data/lib/prism/parse_result/comments.rb +0 -7
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +101 -11
- data/lib/prism/parse_result.rb +43 -3
- data/lib/prism/reflection.rb +10 -8
- data/lib/prism/serialize.rb +484 -609
- data/lib/prism/translation/parser/compiler.rb +152 -132
- data/lib/prism/translation/parser/lexer.rb +26 -4
- data/lib/prism/translation/parser.rb +9 -4
- data/lib/prism/translation/ripper.rb +22 -20
- data/lib/prism/translation/ruby_parser.rb +73 -13
- data/lib/prism/visitor.rb +3 -0
- data/lib/prism.rb +0 -4
- data/prism.gemspec +3 -5
- data/rbi/prism/dsl.rbi +521 -0
- data/rbi/prism/node.rbi +744 -4837
- data/rbi/prism/visitor.rbi +3 -0
- data/rbi/prism.rbi +36 -30
- data/sig/prism/dsl.rbs +190 -303
- data/sig/prism/mutation_compiler.rbs +1 -0
- data/sig/prism/node.rbs +759 -628
- data/sig/prism/parse_result.rbs +2 -0
- data/sig/prism/visitor.rbs +1 -0
- data/sig/prism.rbs +103 -64
- data/src/diagnostic.c +62 -28
- data/src/node.c +499 -1754
- data/src/options.c +76 -27
- data/src/prettyprint.c +156 -112
- data/src/prism.c +2773 -2081
- data/src/regexp.c +202 -69
- data/src/serialize.c +170 -50
- data/src/static_literals.c +63 -84
- data/src/token_type.c +4 -4
- data/src/util/pm_constant_pool.c +0 -8
- data/src/util/pm_integer.c +53 -25
- data/src/util/pm_newline_list.c +29 -0
- data/src/util/pm_string.c +130 -80
- data/src/util/pm_strpbrk.c +32 -6
- metadata +4 -6
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -249
- data/lib/prism/translation/parser/rubocop.rb +0 -73
- data/src/util/pm_string_list.c +0 -28
data/include/prism/defines.h
CHANGED
@@ -25,6 +25,15 @@
|
|
25
25
|
#define __STDC_FORMAT_MACROS
|
26
26
|
#include <inttypes.h>
|
27
27
|
|
28
|
+
/**
|
29
|
+
* When we are parsing using recursive descent, we want to protect against
|
30
|
+
* malicious payloads that could attempt to crash our parser. We do this by
|
31
|
+
* specifying a maximum depth to which we are allowed to recurse.
|
32
|
+
*/
|
33
|
+
#ifndef PRISM_DEPTH_MAXIMUM
|
34
|
+
#define PRISM_DEPTH_MAXIMUM 1000
|
35
|
+
#endif
|
36
|
+
|
28
37
|
/**
|
29
38
|
* By default, we compile with -fvisibility=hidden. When this is enabled, we
|
30
39
|
* need to mark certain functions as being publically-visible. This macro does
|
@@ -118,6 +127,15 @@
|
|
118
127
|
# endif
|
119
128
|
#endif
|
120
129
|
|
130
|
+
/**
|
131
|
+
* If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem
|
132
|
+
* related code from the library. All filesystem related code should be guarded
|
133
|
+
* by PRISM_HAS_FILESYSTEM.
|
134
|
+
*/
|
135
|
+
#ifndef PRISM_HAS_NO_FILESYSTEM
|
136
|
+
# define PRISM_HAS_FILESYSTEM
|
137
|
+
#endif
|
138
|
+
|
121
139
|
/**
|
122
140
|
* isinf on Windows is defined as accepting a float, but on POSIX systems it
|
123
141
|
* accepts a float, a double, or a long double. We want to mirror this behavior
|
@@ -203,4 +221,22 @@
|
|
203
221
|
#define PRISM_ENCODING_EXCLUDE_FULL
|
204
222
|
#endif
|
205
223
|
|
224
|
+
/**
|
225
|
+
* Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
|
226
|
+
* branch predication.
|
227
|
+
*/
|
228
|
+
#if defined(__GNUC__) || defined(__clang__)
|
229
|
+
/** The compiler should predicate that this branch will be taken. */
|
230
|
+
#define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
|
231
|
+
|
232
|
+
/** The compiler should predicate that this branch will not be taken. */
|
233
|
+
#define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
|
234
|
+
#else
|
235
|
+
/** Void because this platform does not support branch prediction hints. */
|
236
|
+
#define PRISM_LIKELY(x) (x)
|
237
|
+
|
238
|
+
/** Void because this platform does not support branch prediction hints. */
|
239
|
+
#define PRISM_UNLIKELY(x) (x)
|
240
|
+
#endif
|
241
|
+
|
206
242
|
#endif
|
data/include/prism/diagnostic.h
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
|
1
|
+
/*----------------------------------------------------------------------------*/
|
2
2
|
/* This file is generated by the templates/template.rb script and should not */
|
3
3
|
/* be modified manually. See */
|
4
4
|
/* templates/include/prism/diagnostic.h.erb */
|
5
5
|
/* if you are looking to modify the */
|
6
6
|
/* template */
|
7
|
-
|
7
|
+
/*----------------------------------------------------------------------------*/
|
8
8
|
|
9
9
|
/**
|
10
10
|
* @file diagnostic.h
|
@@ -44,7 +44,6 @@ typedef enum {
|
|
44
44
|
PM_ERR_ARGUMENT_FORMAL_GLOBAL,
|
45
45
|
PM_ERR_ARGUMENT_FORMAL_IVAR,
|
46
46
|
PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
|
47
|
-
PM_ERR_ARGUMENT_IN,
|
48
47
|
PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND,
|
49
48
|
PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
|
50
49
|
PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
|
@@ -110,8 +109,10 @@ typedef enum {
|
|
110
109
|
PM_ERR_ESCAPE_INVALID_META_REPEAT,
|
111
110
|
PM_ERR_ESCAPE_INVALID_UNICODE,
|
112
111
|
PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
|
112
|
+
PM_ERR_ESCAPE_INVALID_UNICODE_LIST,
|
113
113
|
PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
|
114
114
|
PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
|
115
|
+
PM_ERR_ESCAPE_INVALID_UNICODE_SHORT,
|
115
116
|
PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
|
116
117
|
PM_ERR_EXPECT_ARGUMENT,
|
117
118
|
PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
|
@@ -126,6 +127,7 @@ typedef enum {
|
|
126
127
|
PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
|
127
128
|
PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
|
128
129
|
PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
|
130
|
+
PM_ERR_EXPECT_FOR_DELIMITER,
|
129
131
|
PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
|
130
132
|
PM_ERR_EXPECT_IN_DELIMITER,
|
131
133
|
PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
|
@@ -134,6 +136,7 @@ typedef enum {
|
|
134
136
|
PM_ERR_EXPECT_RPAREN,
|
135
137
|
PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
|
136
138
|
PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
|
139
|
+
PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER,
|
137
140
|
PM_ERR_EXPECT_STRING_CONTENT,
|
138
141
|
PM_ERR_EXPECT_WHEN_DELIMITER,
|
139
142
|
PM_ERR_EXPRESSION_BARE_HASH,
|
@@ -143,6 +146,7 @@ typedef enum {
|
|
143
146
|
PM_ERR_EXPRESSION_NOT_WRITABLE_FILE,
|
144
147
|
PM_ERR_EXPRESSION_NOT_WRITABLE_LINE,
|
145
148
|
PM_ERR_EXPRESSION_NOT_WRITABLE_NIL,
|
149
|
+
PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED,
|
146
150
|
PM_ERR_EXPRESSION_NOT_WRITABLE_SELF,
|
147
151
|
PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE,
|
148
152
|
PM_ERR_FLOAT_PARSE,
|
@@ -182,6 +186,7 @@ typedef enum {
|
|
182
186
|
PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER,
|
183
187
|
PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING,
|
184
188
|
PM_ERR_INVALID_PERCENT,
|
189
|
+
PM_ERR_INVALID_PERCENT_EOF,
|
185
190
|
PM_ERR_INVALID_PRINTABLE_CHARACTER,
|
186
191
|
PM_ERR_INVALID_RETRY_AFTER_ELSE,
|
187
192
|
PM_ERR_INVALID_RETRY_AFTER_ENSURE,
|
@@ -210,12 +215,15 @@ typedef enum {
|
|
210
215
|
PM_ERR_MODULE_TERM,
|
211
216
|
PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
|
212
217
|
PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
|
218
|
+
PM_ERR_NESTING_TOO_DEEP,
|
213
219
|
PM_ERR_NO_LOCAL_VARIABLE,
|
220
|
+
PM_ERR_NON_ASSOCIATIVE_OPERATOR,
|
214
221
|
PM_ERR_NOT_EXPRESSION,
|
215
222
|
PM_ERR_NUMBER_LITERAL_UNDERSCORE,
|
223
|
+
PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK,
|
216
224
|
PM_ERR_NUMBERED_PARAMETER_IT,
|
217
225
|
PM_ERR_NUMBERED_PARAMETER_ORDINARY,
|
218
|
-
|
226
|
+
PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK,
|
219
227
|
PM_ERR_OPERATOR_MULTI_ASSIGN,
|
220
228
|
PM_ERR_OPERATOR_WRITE_ARGUMENTS,
|
221
229
|
PM_ERR_OPERATOR_WRITE_BLOCK,
|
@@ -232,8 +240,9 @@ typedef enum {
|
|
232
240
|
PM_ERR_PARAMETER_SPLAT_MULTI,
|
233
241
|
PM_ERR_PARAMETER_STAR,
|
234
242
|
PM_ERR_PARAMETER_UNEXPECTED_FWD,
|
235
|
-
PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
|
236
243
|
PM_ERR_PARAMETER_UNEXPECTED_NO_KW,
|
244
|
+
PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
|
245
|
+
PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS,
|
237
246
|
PM_ERR_PATTERN_CAPTURE_DUPLICATE,
|
238
247
|
PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
|
239
248
|
PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
|
@@ -245,6 +254,7 @@ typedef enum {
|
|
245
254
|
PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
|
246
255
|
PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
|
247
256
|
PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
|
257
|
+
PM_ERR_PATTERN_FIND_MISSING_INNER,
|
248
258
|
PM_ERR_PATTERN_HASH_IMPLICIT,
|
249
259
|
PM_ERR_PATTERN_HASH_KEY,
|
250
260
|
PM_ERR_PATTERN_HASH_KEY_DUPLICATE,
|
@@ -262,6 +272,7 @@ typedef enum {
|
|
262
272
|
PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING,
|
263
273
|
PM_ERR_REGEXP_INVALID_UNICODE_RANGE,
|
264
274
|
PM_ERR_REGEXP_NON_ESCAPED_MBC,
|
275
|
+
PM_ERR_REGEXP_PARSE_ERROR,
|
265
276
|
PM_ERR_REGEXP_TERM,
|
266
277
|
PM_ERR_REGEXP_UNKNOWN_OPTIONS,
|
267
278
|
PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP,
|
@@ -286,11 +297,15 @@ typedef enum {
|
|
286
297
|
PM_ERR_TERNARY_COLON,
|
287
298
|
PM_ERR_TERNARY_EXPRESSION_FALSE,
|
288
299
|
PM_ERR_TERNARY_EXPRESSION_TRUE,
|
300
|
+
PM_ERR_UNARY_DISALLOWED,
|
289
301
|
PM_ERR_UNARY_RECEIVER,
|
290
302
|
PM_ERR_UNDEF_ARGUMENT,
|
291
303
|
PM_ERR_UNEXPECTED_BLOCK_ARGUMENT,
|
292
304
|
PM_ERR_UNEXPECTED_INDEX_BLOCK,
|
293
305
|
PM_ERR_UNEXPECTED_INDEX_KEYWORDS,
|
306
|
+
PM_ERR_UNEXPECTED_LABEL,
|
307
|
+
PM_ERR_UNEXPECTED_MULTI_WRITE,
|
308
|
+
PM_ERR_UNEXPECTED_RANGE_OPERATOR,
|
294
309
|
PM_ERR_UNEXPECTED_SAFE_NAVIGATION,
|
295
310
|
PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
|
296
311
|
PM_ERR_UNEXPECTED_TOKEN_IGNORE,
|
@@ -303,6 +318,7 @@ typedef enum {
|
|
303
318
|
PM_ERR_XSTRING_TERM,
|
304
319
|
|
305
320
|
// These are the warning diagnostics.
|
321
|
+
PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
|
306
322
|
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
|
307
323
|
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
|
308
324
|
PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND,
|
@@ -318,10 +334,11 @@ typedef enum {
|
|
318
334
|
PM_WARN_DUPLICATED_WHEN_CLAUSE,
|
319
335
|
PM_WARN_FLOAT_OUT_OF_RANGE,
|
320
336
|
PM_WARN_IGNORED_FROZEN_STRING_LITERAL,
|
337
|
+
PM_WARN_INDENTATION_MISMATCH,
|
321
338
|
PM_WARN_INTEGER_IN_FLIP_FLOP,
|
322
339
|
PM_WARN_INVALID_CHARACTER,
|
340
|
+
PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
|
323
341
|
PM_WARN_INVALID_NUMBERED_REFERENCE,
|
324
|
-
PM_WARN_INVALID_SHAREABLE_CONSTANT_VALUE,
|
325
342
|
PM_WARN_KEYWORD_EOL,
|
326
343
|
PM_WARN_LITERAL_IN_CONDITION_DEFAULT,
|
327
344
|
PM_WARN_LITERAL_IN_CONDITION_VERBOSE,
|
data/include/prism/node.h
CHANGED
@@ -56,27 +56,6 @@ void pm_node_list_free(pm_node_list_t *list);
|
|
56
56
|
*/
|
57
57
|
PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
|
58
58
|
|
59
|
-
/**
|
60
|
-
* This struct stores the information gathered by the pm_node_memsize function.
|
61
|
-
* It contains both the memory footprint and additionally metadata about the
|
62
|
-
* shape of the tree.
|
63
|
-
*/
|
64
|
-
typedef struct {
|
65
|
-
/** The total memory footprint of the node and all of its children. */
|
66
|
-
size_t memsize;
|
67
|
-
|
68
|
-
/** The number of children the node has. */
|
69
|
-
size_t node_count;
|
70
|
-
} pm_memsize_t;
|
71
|
-
|
72
|
-
/**
|
73
|
-
* Calculates the memory footprint of a given node.
|
74
|
-
*
|
75
|
-
* @param node The node to calculate the memory footprint of.
|
76
|
-
* @param memsize The memory footprint of the node and all of its children.
|
77
|
-
*/
|
78
|
-
PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *memsize);
|
79
|
-
|
80
59
|
/**
|
81
60
|
* Returns a string representation of the given node type.
|
82
61
|
*
|
data/include/prism/options.h
CHANGED
@@ -7,6 +7,7 @@
|
|
7
7
|
#define PRISM_OPTIONS_H
|
8
8
|
|
9
9
|
#include "prism/defines.h"
|
10
|
+
#include "prism/util/pm_char.h"
|
10
11
|
#include "prism/util/pm_string.h"
|
11
12
|
|
12
13
|
#include <stdbool.h>
|
@@ -40,6 +41,23 @@ typedef struct pm_options_scope {
|
|
40
41
|
pm_string_t *locals;
|
41
42
|
} pm_options_scope_t;
|
42
43
|
|
44
|
+
// Forward declaration needed by the callback typedef.
|
45
|
+
struct pm_options;
|
46
|
+
|
47
|
+
/**
|
48
|
+
* The callback called when additional switches are found in a shebang comment
|
49
|
+
* that need to be processed by the runtime.
|
50
|
+
*
|
51
|
+
* @param options The options struct that may be updated by this callback.
|
52
|
+
* Certain fields will be checked for changes, specifically encoding,
|
53
|
+
* command_line, and frozen_string_literal.
|
54
|
+
* @param source The source of the shebang comment.
|
55
|
+
* @param length The length of the source.
|
56
|
+
* @param shebang_callback_data Any additional data that should be passed along
|
57
|
+
* to the callback.
|
58
|
+
*/
|
59
|
+
typedef void (*pm_options_shebang_callback_t)(struct pm_options *options, const uint8_t *source, size_t length, void *shebang_callback_data);
|
60
|
+
|
43
61
|
/**
|
44
62
|
* The version of Ruby syntax that we should be parsing with. This is used to
|
45
63
|
* allow consumers to specify which behavior they want in case they need to
|
@@ -56,7 +74,19 @@ typedef enum {
|
|
56
74
|
/**
|
57
75
|
* The options that can be passed to the parser.
|
58
76
|
*/
|
59
|
-
typedef struct {
|
77
|
+
typedef struct pm_options {
|
78
|
+
/**
|
79
|
+
* The callback to call when additional switches are found in a shebang
|
80
|
+
* comment.
|
81
|
+
*/
|
82
|
+
pm_options_shebang_callback_t shebang_callback;
|
83
|
+
|
84
|
+
/**
|
85
|
+
* Any additional data that should be passed along to the shebang callback
|
86
|
+
* if one was set.
|
87
|
+
*/
|
88
|
+
void *shebang_callback_data;
|
89
|
+
|
60
90
|
/** The name of the file that is currently being parsed. */
|
61
91
|
pm_string_t filepath;
|
62
92
|
|
@@ -103,6 +133,30 @@ typedef struct {
|
|
103
133
|
* - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
|
104
134
|
*/
|
105
135
|
int8_t frozen_string_literal;
|
136
|
+
|
137
|
+
/**
|
138
|
+
* Whether or not the encoding magic comments should be respected. This is a
|
139
|
+
* niche use-case where you want to parse a file with a specific encoding
|
140
|
+
* but ignore any encoding magic comments at the top of the file.
|
141
|
+
*/
|
142
|
+
bool encoding_locked;
|
143
|
+
|
144
|
+
/**
|
145
|
+
* When the file being parsed is the main script, the shebang will be
|
146
|
+
* considered for command-line flags (or for implicit -x). The caller needs
|
147
|
+
* to pass this information to the parser so that it can behave correctly.
|
148
|
+
*/
|
149
|
+
bool main_script;
|
150
|
+
|
151
|
+
/**
|
152
|
+
* When the file being parsed is considered a "partial" script, jumps will
|
153
|
+
* not be marked as errors if they are not contained within loops/blocks.
|
154
|
+
* This is used in the case that you're parsing a script that you know will
|
155
|
+
* be embedded inside another script later, but you do not have that context
|
156
|
+
* yet. For example, when parsing an ERB template that will be evaluated
|
157
|
+
* inside another script.
|
158
|
+
*/
|
159
|
+
bool partial_script;
|
106
160
|
} pm_options_t;
|
107
161
|
|
108
162
|
/**
|
@@ -142,6 +196,16 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_P = 0x10;
|
|
142
196
|
*/
|
143
197
|
static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20;
|
144
198
|
|
199
|
+
/**
|
200
|
+
* Set the shebang callback option on the given options struct.
|
201
|
+
*
|
202
|
+
* @param options The options struct to set the shebang callback on.
|
203
|
+
* @param shebang_callback The shebang callback to set.
|
204
|
+
* @param shebang_callback_data Any additional data that should be passed along
|
205
|
+
* to the callback.
|
206
|
+
*/
|
207
|
+
PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data);
|
208
|
+
|
145
209
|
/**
|
146
210
|
* Set the filepath option on the given options struct.
|
147
211
|
*
|
@@ -166,6 +230,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t
|
|
166
230
|
*/
|
167
231
|
PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
|
168
232
|
|
233
|
+
/**
|
234
|
+
* Set the encoding_locked option on the given options struct.
|
235
|
+
*
|
236
|
+
* @param options The options struct to set the encoding_locked value on.
|
237
|
+
* @param encoding_locked The encoding_locked value to set.
|
238
|
+
*/
|
239
|
+
PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked);
|
240
|
+
|
169
241
|
/**
|
170
242
|
* Set the frozen string literal option on the given options struct.
|
171
243
|
*
|
@@ -194,6 +266,22 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options,
|
|
194
266
|
*/
|
195
267
|
PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
|
196
268
|
|
269
|
+
/**
|
270
|
+
* Set the main script option on the given options struct.
|
271
|
+
*
|
272
|
+
* @param options The options struct to set the main script value on.
|
273
|
+
* @param main_script The main script value to set.
|
274
|
+
*/
|
275
|
+
PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script);
|
276
|
+
|
277
|
+
/**
|
278
|
+
* Set the partial script option on the given options struct.
|
279
|
+
*
|
280
|
+
* @param options The options struct to set the partial script value on.
|
281
|
+
* @param partial_script The partial script value to set.
|
282
|
+
*/
|
283
|
+
PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script);
|
284
|
+
|
197
285
|
/**
|
198
286
|
* Allocate and zero out the scopes array on the given options struct.
|
199
287
|
*
|
@@ -261,6 +349,9 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
|
|
261
349
|
* | `1` | -l command line option |
|
262
350
|
* | `1` | -a command line option |
|
263
351
|
* | `1` | the version |
|
352
|
+
* | `1` | encoding locked |
|
353
|
+
* | `1` | main script |
|
354
|
+
* | `1` | partial script |
|
264
355
|
* | `4` | the number of scopes |
|
265
356
|
* | ... | the scopes |
|
266
357
|
*
|
@@ -293,8 +384,8 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
|
|
293
384
|
* * The encoding can have a length of 0, in which case we'll use the default
|
294
385
|
* encoding (UTF-8). If it's not 0, it should correspond to a name of an
|
295
386
|
* encoding that can be passed to `Encoding.find` in Ruby.
|
296
|
-
* * The frozen string literal
|
297
|
-
* their values should be either 0 or 1.
|
387
|
+
* * The frozen string literal, encoding locked, main script, and partial script
|
388
|
+
* fields are booleans, so their values should be either 0 or 1.
|
298
389
|
* * The number of scopes can be 0.
|
299
390
|
*
|
300
391
|
* @param options The options struct to deserialize into.
|
data/include/prism/parser.h
CHANGED
@@ -364,6 +364,9 @@ typedef enum {
|
|
364
364
|
/** a rescue statement within a lambda expression */
|
365
365
|
PM_CONTEXT_LAMBDA_RESCUE,
|
366
366
|
|
367
|
+
/** the predicate clause of a loop statement */
|
368
|
+
PM_CONTEXT_LOOP_PREDICATE,
|
369
|
+
|
367
370
|
/** the top level context */
|
368
371
|
PM_CONTEXT_MAIN,
|
369
372
|
|
@@ -505,9 +508,9 @@ typedef struct {
|
|
505
508
|
/** The type of shareable constant value that can be set. */
|
506
509
|
typedef uint8_t pm_shareable_constant_value_t;
|
507
510
|
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
|
508
|
-
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL =
|
509
|
-
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING =
|
510
|
-
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY =
|
511
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
|
512
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
|
513
|
+
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
|
511
514
|
|
512
515
|
/**
|
513
516
|
* This tracks an individual local variable in a certain lexical context, as
|
@@ -546,6 +549,17 @@ typedef struct pm_locals {
|
|
546
549
|
pm_local_t *locals;
|
547
550
|
} pm_locals_t;
|
548
551
|
|
552
|
+
/** The flags about scope parameters that can be set. */
|
553
|
+
typedef uint8_t pm_scope_parameters_t;
|
554
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
|
555
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
|
556
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
|
557
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
|
558
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
|
559
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
|
560
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
|
561
|
+
static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
|
562
|
+
|
549
563
|
/**
|
550
564
|
* This struct represents a node in a linked list of scopes. Some scopes can see
|
551
565
|
* into their parent scopes, while others cannot.
|
@@ -557,10 +571,19 @@ typedef struct pm_scope {
|
|
557
571
|
/** The IDs of the locals in the given scope. */
|
558
572
|
pm_locals_t locals;
|
559
573
|
|
574
|
+
/**
|
575
|
+
* This is a list of the implicit parameters contained within the block.
|
576
|
+
* These will be processed after the block is parsed to determine the kind
|
577
|
+
* of parameters node that should be used and to check if any errors need to
|
578
|
+
* be added.
|
579
|
+
*/
|
580
|
+
pm_node_list_t implicit_parameters;
|
581
|
+
|
560
582
|
/**
|
561
583
|
* This is a bitfield that indicates the parameters that are being used in
|
562
|
-
* this scope. It is a combination of the
|
563
|
-
* are three different kinds of parameters that can be used in a
|
584
|
+
* this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
|
585
|
+
* There are three different kinds of parameters that can be used in a
|
586
|
+
* scope:
|
564
587
|
*
|
565
588
|
* - Ordinary parameters (e.g., def foo(bar); end)
|
566
589
|
* - Numbered parameters (e.g., def foo; _1; end)
|
@@ -575,15 +598,7 @@ typedef struct pm_scope {
|
|
575
598
|
* - def foo(&); end
|
576
599
|
* - def foo(...); end
|
577
600
|
*/
|
578
|
-
|
579
|
-
|
580
|
-
/**
|
581
|
-
* An integer indicating the number of numbered parameters on this scope.
|
582
|
-
* This is necessary to determine if child blocks are allowed to use
|
583
|
-
* numbered parameters, and to pass information to consumers of the AST
|
584
|
-
* about how many numbered parameters exist.
|
585
|
-
*/
|
586
|
-
int8_t numbered_parameters;
|
601
|
+
pm_scope_parameters_t parameters;
|
587
602
|
|
588
603
|
/**
|
589
604
|
* The current state of constant shareability for this scope. This is
|
@@ -598,20 +613,6 @@ typedef struct pm_scope {
|
|
598
613
|
bool closed;
|
599
614
|
} pm_scope_t;
|
600
615
|
|
601
|
-
static const uint8_t PM_SCOPE_PARAMETERS_NONE = 0x0;
|
602
|
-
static const uint8_t PM_SCOPE_PARAMETERS_ORDINARY = 0x1;
|
603
|
-
static const uint8_t PM_SCOPE_PARAMETERS_NUMBERED = 0x2;
|
604
|
-
static const uint8_t PM_SCOPE_PARAMETERS_IT = 0x4;
|
605
|
-
static const uint8_t PM_SCOPE_PARAMETERS_TYPE_MASK = 0x7;
|
606
|
-
|
607
|
-
static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x8;
|
608
|
-
static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x10;
|
609
|
-
static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x20;
|
610
|
-
static const uint8_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x40;
|
611
|
-
|
612
|
-
static const int8_t PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED = -1;
|
613
|
-
static const int8_t PM_SCOPE_NUMBERED_PARAMETERS_NONE = 0;
|
614
|
-
|
615
616
|
/**
|
616
617
|
* A struct that represents a stack of boolean values.
|
617
618
|
*/
|
@@ -624,6 +625,13 @@ typedef uint32_t pm_state_stack_t;
|
|
624
625
|
* it's considering.
|
625
626
|
*/
|
626
627
|
struct pm_parser {
|
628
|
+
/**
|
629
|
+
* The next node identifier that will be assigned. This is a unique
|
630
|
+
* identifier used to track nodes such that the syntax tree can be dropped
|
631
|
+
* but the node can be found through another parse.
|
632
|
+
*/
|
633
|
+
uint32_t node_id;
|
634
|
+
|
627
635
|
/** The current state of the lexer. */
|
628
636
|
pm_lex_state_t lex_state;
|
629
637
|
|
@@ -853,12 +861,27 @@ struct pm_parser {
|
|
853
861
|
*/
|
854
862
|
bool parsing_eval;
|
855
863
|
|
864
|
+
/**
|
865
|
+
* Whether or not we are parsing a "partial" script, which is a script that
|
866
|
+
* will be evaluated in the context of another script, so we should not
|
867
|
+
* check jumps (next/break/etc.) for validity.
|
868
|
+
*/
|
869
|
+
bool partial_script;
|
870
|
+
|
856
871
|
/** Whether or not we're at the beginning of a command. */
|
857
872
|
bool command_start;
|
858
873
|
|
859
874
|
/** Whether or not we're currently recovering from a syntax error. */
|
860
875
|
bool recovering;
|
861
876
|
|
877
|
+
/**
|
878
|
+
* This is very specialized behavior for when you want to parse in a context
|
879
|
+
* that does not respect encoding comments. Its main use case is translating
|
880
|
+
* into the whitequark/parser AST which re-encodes source files in UTF-8
|
881
|
+
* before they are parsed and ignores encoding comments.
|
882
|
+
*/
|
883
|
+
bool encoding_locked;
|
884
|
+
|
862
885
|
/**
|
863
886
|
* Whether or not the encoding has been changed by a magic comment. We use
|
864
887
|
* this to provide a fast path for the lexer instead of going through the
|
@@ -886,6 +909,12 @@ struct pm_parser {
|
|
886
909
|
* characters.
|
887
910
|
*/
|
888
911
|
bool current_regular_expression_ascii_only;
|
912
|
+
|
913
|
+
/**
|
914
|
+
* By default, Ruby always warns about mismatched indentation. This can be
|
915
|
+
* toggled with a magic comment.
|
916
|
+
*/
|
917
|
+
bool warn_mismatched_indentation;
|
889
918
|
};
|
890
919
|
|
891
920
|
#endif
|
data/include/prism/regexp.h
CHANGED
@@ -10,7 +10,6 @@
|
|
10
10
|
#include "prism/parser.h"
|
11
11
|
#include "prism/encoding.h"
|
12
12
|
#include "prism/util/pm_memchr.h"
|
13
|
-
#include "prism/util/pm_string_list.h"
|
14
13
|
#include "prism/util/pm_string.h"
|
15
14
|
|
16
15
|
#include <stdbool.h>
|
@@ -18,16 +17,27 @@
|
|
18
17
|
#include <string.h>
|
19
18
|
|
20
19
|
/**
|
21
|
-
*
|
22
|
-
|
20
|
+
* This callback is called when a named capture group is found.
|
21
|
+
*/
|
22
|
+
typedef void (*pm_regexp_name_callback_t)(const pm_string_t *name, void *data);
|
23
|
+
|
24
|
+
/**
|
25
|
+
* This callback is called when a parse error is found.
|
26
|
+
*/
|
27
|
+
typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data);
|
28
|
+
|
29
|
+
/**
|
30
|
+
* Parse a regular expression.
|
23
31
|
*
|
32
|
+
* @param parser The parser that is currently being used.
|
24
33
|
* @param source The source code to parse.
|
25
34
|
* @param size The size of the source code.
|
26
|
-
* @param
|
27
|
-
* @param
|
28
|
-
* @param
|
29
|
-
* @
|
35
|
+
* @param extended_mode Whether to parse the regular expression in extended mode.
|
36
|
+
* @param name_callback The optional callback to call when a named capture group is found.
|
37
|
+
* @param name_data The optional data to pass to the name callback.
|
38
|
+
* @param error_callback The callback to call when a parse error is found.
|
39
|
+
* @param error_data The data to pass to the error callback.
|
30
40
|
*/
|
31
|
-
PRISM_EXPORTED_FUNCTION
|
41
|
+
PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
|
32
42
|
|
33
43
|
#endif
|
@@ -95,9 +95,10 @@ typedef struct {
|
|
95
95
|
* @param start_line The line number that the parser starts on.
|
96
96
|
* @param literals The set of static literals to add the node to.
|
97
97
|
* @param node The node to add to the set.
|
98
|
+
* @param replace Whether to replace the previous node if one already exists.
|
98
99
|
* @return A pointer to the node that is being overwritten, if there is one.
|
99
100
|
*/
|
100
|
-
pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node);
|
101
|
+
pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
|
101
102
|
|
102
103
|
/**
|
103
104
|
* Free the internal memory associated with the given static literals set.
|
@@ -115,6 +116,6 @@ void pm_static_literals_free(pm_static_literals_t *literals);
|
|
115
116
|
* @param encoding_name The name of the encoding of the source being parsed.
|
116
117
|
* @param node The node to create a string representation of.
|
117
118
|
*/
|
118
|
-
|
119
|
+
void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node);
|
119
120
|
|
120
121
|
#endif
|
@@ -34,8 +34,7 @@ size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
|
|
34
34
|
* @return The number of characters at the start of the string that are
|
35
35
|
* whitespace.
|
36
36
|
*/
|
37
|
-
size_t
|
38
|
-
pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
|
37
|
+
size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
|
39
38
|
|
40
39
|
/**
|
41
40
|
* Returns the number of characters at the start of the string that are inline
|
@@ -87,14 +87,6 @@ void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_co
|
|
87
87
|
*/
|
88
88
|
bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id);
|
89
89
|
|
90
|
-
/**
|
91
|
-
* Get the memory size of a list of constant ids.
|
92
|
-
*
|
93
|
-
* @param list The list to get the memory size of.
|
94
|
-
* @return The memory size of the list.
|
95
|
-
*/
|
96
|
-
size_t pm_constant_id_list_memsize(pm_constant_id_list_t *list);
|
97
|
-
|
98
90
|
/**
|
99
91
|
* Free the memory associated with a list of constant ids.
|
100
92
|
*
|