yarp 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +29 -8
- data/CONTRIBUTING.md +2 -2
- data/Makefile +5 -5
- data/README.md +11 -12
- data/config.yml +6 -2
- data/docs/build_system.md +21 -21
- data/docs/building.md +4 -4
- data/docs/configuration.md +25 -21
- data/docs/design.md +2 -2
- data/docs/encoding.md +17 -17
- data/docs/fuzzing.md +4 -4
- data/docs/heredocs.md +3 -3
- data/docs/mapping.md +94 -94
- data/docs/ripper.md +4 -4
- data/docs/ruby_api.md +11 -11
- data/docs/serialization.md +17 -16
- data/docs/testing.md +6 -6
- data/ext/prism/api_node.c +4725 -0
- data/ext/{yarp → prism}/api_pack.c +82 -82
- data/ext/{yarp → prism}/extconf.rb +13 -13
- data/ext/{yarp → prism}/extension.c +175 -168
- data/ext/prism/extension.h +18 -0
- data/include/prism/ast.h +1932 -0
- data/include/prism/defines.h +45 -0
- data/include/prism/diagnostic.h +231 -0
- data/include/{yarp/enc/yp_encoding.h → prism/enc/pm_encoding.h} +40 -40
- data/include/prism/node.h +41 -0
- data/include/prism/pack.h +141 -0
- data/include/{yarp → prism}/parser.h +143 -142
- data/include/prism/regexp.h +19 -0
- data/include/prism/unescape.h +48 -0
- data/include/prism/util/pm_buffer.h +51 -0
- data/include/{yarp/util/yp_char.h → prism/util/pm_char.h} +20 -20
- data/include/{yarp/util/yp_constant_pool.h → prism/util/pm_constant_pool.h} +26 -22
- data/include/{yarp/util/yp_list.h → prism/util/pm_list.h} +21 -21
- data/include/prism/util/pm_memchr.h +14 -0
- data/include/{yarp/util/yp_newline_list.h → prism/util/pm_newline_list.h} +11 -11
- data/include/prism/util/pm_state_stack.h +24 -0
- data/include/{yarp/util/yp_string.h → prism/util/pm_string.h} +20 -20
- data/include/prism/util/pm_string_list.h +25 -0
- data/include/{yarp/util/yp_strpbrk.h → prism/util/pm_strpbrk.h} +7 -7
- data/include/prism/version.h +4 -0
- data/include/prism.h +82 -0
- data/lib/prism/compiler.rb +465 -0
- data/lib/prism/debug.rb +157 -0
- data/lib/{yarp/desugar_visitor.rb → prism/desugar_compiler.rb} +4 -2
- data/lib/prism/dispatcher.rb +2051 -0
- data/lib/prism/dsl.rb +750 -0
- data/lib/{yarp → prism}/ffi.rb +66 -67
- data/lib/{yarp → prism}/lex_compat.rb +40 -43
- data/lib/{yarp/mutation_visitor.rb → prism/mutation_compiler.rb} +3 -3
- data/lib/{yarp → prism}/node.rb +2012 -2593
- data/lib/prism/node_ext.rb +55 -0
- data/lib/prism/node_inspector.rb +68 -0
- data/lib/{yarp → prism}/pack.rb +1 -1
- data/lib/{yarp → prism}/parse_result/comments.rb +1 -1
- data/lib/{yarp → prism}/parse_result/newlines.rb +1 -1
- data/lib/prism/parse_result.rb +266 -0
- data/lib/{yarp → prism}/pattern.rb +14 -14
- data/lib/{yarp → prism}/ripper_compat.rb +5 -5
- data/lib/{yarp → prism}/serialize.rb +12 -7
- data/lib/prism/visitor.rb +470 -0
- data/lib/prism.rb +64 -0
- data/lib/yarp.rb +2 -614
- data/src/diagnostic.c +213 -208
- data/src/enc/pm_big5.c +52 -0
- data/src/enc/pm_euc_jp.c +58 -0
- data/src/enc/{yp_gbk.c → pm_gbk.c} +16 -16
- data/src/enc/pm_shift_jis.c +56 -0
- data/src/enc/{yp_tables.c → pm_tables.c} +69 -69
- data/src/enc/{yp_unicode.c → pm_unicode.c} +40 -40
- data/src/enc/pm_windows_31j.c +56 -0
- data/src/node.c +1293 -1233
- data/src/pack.c +247 -247
- data/src/prettyprint.c +1479 -1479
- data/src/{yarp.c → prism.c} +5205 -5083
- data/src/regexp.c +132 -132
- data/src/serialize.c +1121 -1121
- data/src/token_type.c +169 -167
- data/src/unescape.c +106 -87
- data/src/util/pm_buffer.c +103 -0
- data/src/util/{yp_char.c → pm_char.c} +72 -72
- data/src/util/{yp_constant_pool.c → pm_constant_pool.c} +85 -64
- data/src/util/{yp_list.c → pm_list.c} +10 -10
- data/src/util/{yp_memchr.c → pm_memchr.c} +6 -4
- data/src/util/{yp_newline_list.c → pm_newline_list.c} +21 -21
- data/src/util/{yp_state_stack.c → pm_state_stack.c} +4 -4
- data/src/util/{yp_string.c → pm_string.c} +38 -38
- data/src/util/pm_string_list.c +29 -0
- data/src/util/{yp_strncasecmp.c → pm_strncasecmp.c} +1 -1
- data/src/util/{yp_strpbrk.c → pm_strpbrk.c} +8 -8
- data/yarp.gemspec +68 -59
- metadata +70 -61
- data/ext/yarp/api_node.c +0 -4728
- data/ext/yarp/extension.h +0 -18
- data/include/yarp/ast.h +0 -1929
- data/include/yarp/defines.h +0 -45
- data/include/yarp/diagnostic.h +0 -226
- data/include/yarp/node.h +0 -42
- data/include/yarp/pack.h +0 -141
- data/include/yarp/regexp.h +0 -19
- data/include/yarp/unescape.h +0 -44
- data/include/yarp/util/yp_buffer.h +0 -51
- data/include/yarp/util/yp_memchr.h +0 -14
- data/include/yarp/util/yp_state_stack.h +0 -24
- data/include/yarp/util/yp_string_list.h +0 -25
- data/include/yarp/version.h +0 -4
- data/include/yarp.h +0 -82
- data/src/enc/yp_big5.c +0 -52
- data/src/enc/yp_euc_jp.c +0 -58
- data/src/enc/yp_shift_jis.c +0 -56
- data/src/enc/yp_windows_31j.c +0 -56
- data/src/util/yp_buffer.c +0 -101
- data/src/util/yp_string_list.c +0 -29
@@ -1,13 +1,13 @@
|
|
1
|
-
#ifndef
|
2
|
-
#define
|
1
|
+
#ifndef PRISM_PARSER_H
|
2
|
+
#define PRISM_PARSER_H
|
3
3
|
|
4
|
-
#include "
|
5
|
-
#include "
|
6
|
-
#include "
|
7
|
-
#include "
|
8
|
-
#include "
|
9
|
-
#include "
|
10
|
-
#include "
|
4
|
+
#include "prism/ast.h"
|
5
|
+
#include "prism/defines.h"
|
6
|
+
#include "prism/enc/pm_encoding.h"
|
7
|
+
#include "prism/util/pm_constant_pool.h"
|
8
|
+
#include "prism/util/pm_list.h"
|
9
|
+
#include "prism/util/pm_newline_list.h"
|
10
|
+
#include "prism/util/pm_state_stack.h"
|
11
11
|
|
12
12
|
#include <stdbool.h>
|
13
13
|
|
@@ -15,88 +15,88 @@
|
|
15
15
|
// the lexer can track. This is used to determine which kind of token to return
|
16
16
|
// based on the context of the parser.
|
17
17
|
typedef enum {
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
}
|
18
|
+
PM_LEX_STATE_BIT_BEG,
|
19
|
+
PM_LEX_STATE_BIT_END,
|
20
|
+
PM_LEX_STATE_BIT_ENDARG,
|
21
|
+
PM_LEX_STATE_BIT_ENDFN,
|
22
|
+
PM_LEX_STATE_BIT_ARG,
|
23
|
+
PM_LEX_STATE_BIT_CMDARG,
|
24
|
+
PM_LEX_STATE_BIT_MID,
|
25
|
+
PM_LEX_STATE_BIT_FNAME,
|
26
|
+
PM_LEX_STATE_BIT_DOT,
|
27
|
+
PM_LEX_STATE_BIT_CLASS,
|
28
|
+
PM_LEX_STATE_BIT_LABEL,
|
29
|
+
PM_LEX_STATE_BIT_LABELED,
|
30
|
+
PM_LEX_STATE_BIT_FITEM
|
31
|
+
} pm_lex_state_bit_t;
|
32
32
|
|
33
33
|
// This enum combines the various bits from the above enum into individual
|
34
34
|
// values that represent the various states of the lexer.
|
35
35
|
typedef enum {
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
}
|
36
|
+
PM_LEX_STATE_NONE = 0,
|
37
|
+
PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
|
38
|
+
PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END),
|
39
|
+
PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG),
|
40
|
+
PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN),
|
41
|
+
PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG),
|
42
|
+
PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG),
|
43
|
+
PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID),
|
44
|
+
PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME),
|
45
|
+
PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT),
|
46
|
+
PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS),
|
47
|
+
PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL),
|
48
|
+
PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED),
|
49
|
+
PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM),
|
50
|
+
PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS,
|
51
|
+
PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG,
|
52
|
+
PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
|
53
|
+
} pm_lex_state_t;
|
54
54
|
|
55
55
|
typedef enum {
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
}
|
56
|
+
PM_HEREDOC_QUOTE_NONE,
|
57
|
+
PM_HEREDOC_QUOTE_SINGLE = '\'',
|
58
|
+
PM_HEREDOC_QUOTE_DOUBLE = '"',
|
59
|
+
PM_HEREDOC_QUOTE_BACKTICK = '`',
|
60
|
+
} pm_heredoc_quote_t;
|
61
61
|
|
62
62
|
typedef enum {
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
}
|
63
|
+
PM_HEREDOC_INDENT_NONE,
|
64
|
+
PM_HEREDOC_INDENT_DASH,
|
65
|
+
PM_HEREDOC_INDENT_TILDE,
|
66
|
+
} pm_heredoc_indent_t;
|
67
67
|
|
68
68
|
// When lexing Ruby source, the lexer has a small amount of state to tell which
|
69
69
|
// kind of token it is currently lexing. For example, when we find the start of
|
70
70
|
// a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
|
71
|
-
// that the lexer is now in the
|
71
|
+
// that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
|
72
72
|
// are found as part of a string.
|
73
|
-
typedef struct
|
73
|
+
typedef struct pm_lex_mode {
|
74
74
|
enum {
|
75
75
|
// This state is used when any given token is being lexed.
|
76
|
-
|
76
|
+
PM_LEX_DEFAULT,
|
77
77
|
|
78
78
|
// This state is used when we're lexing as normal but inside an embedded
|
79
79
|
// expression of a string.
|
80
|
-
|
80
|
+
PM_LEX_EMBEXPR,
|
81
81
|
|
82
82
|
// This state is used when we're lexing a variable that is embedded
|
83
83
|
// directly inside of a string with the # shorthand.
|
84
|
-
|
84
|
+
PM_LEX_EMBVAR,
|
85
85
|
|
86
86
|
// This state is used when you are inside the content of a heredoc.
|
87
|
-
|
87
|
+
PM_LEX_HEREDOC,
|
88
88
|
|
89
89
|
// This state is used when we are lexing a list of tokens, as in a %w
|
90
90
|
// word list literal or a %i symbol list literal.
|
91
|
-
|
91
|
+
PM_LEX_LIST,
|
92
92
|
|
93
93
|
// This state is used when a regular expression has been begun and we
|
94
94
|
// are looking for the terminator.
|
95
|
-
|
95
|
+
PM_LEX_REGEXP,
|
96
96
|
|
97
97
|
// This state is used when we are lexing a string or a string-like
|
98
98
|
// token, as in string content with either quote or an xstring.
|
99
|
-
|
99
|
+
PM_LEX_STRING
|
100
100
|
} mode;
|
101
101
|
|
102
102
|
union {
|
@@ -166,8 +166,8 @@ typedef struct yp_lex_mode {
|
|
166
166
|
const uint8_t *ident_start;
|
167
167
|
size_t ident_length;
|
168
168
|
|
169
|
-
|
170
|
-
|
169
|
+
pm_heredoc_quote_t quote;
|
170
|
+
pm_heredoc_indent_t indent;
|
171
171
|
|
172
172
|
// This is the pointer to the character where lexing should resume
|
173
173
|
// once the heredoc has been completely processed.
|
@@ -176,83 +176,83 @@ typedef struct yp_lex_mode {
|
|
176
176
|
} as;
|
177
177
|
|
178
178
|
// The previous lex state so that it knows how to pop.
|
179
|
-
struct
|
180
|
-
}
|
179
|
+
struct pm_lex_mode *prev;
|
180
|
+
} pm_lex_mode_t;
|
181
181
|
|
182
182
|
// We pre-allocate a certain number of lex states in order to avoid having to
|
183
183
|
// call malloc too many times while parsing. You really shouldn't need more than
|
184
184
|
// this because you only really nest deeply when doing string interpolation.
|
185
|
-
#define
|
185
|
+
#define PM_LEX_STACK_SIZE 4
|
186
186
|
|
187
187
|
// A forward declaration since our error handler struct accepts a parser for
|
188
188
|
// each of its function calls.
|
189
|
-
typedef struct
|
189
|
+
typedef struct pm_parser pm_parser_t;
|
190
190
|
|
191
191
|
// While parsing, we keep track of a stack of contexts. This is helpful for
|
192
192
|
// error recovery so that we can pop back to a previous context when we hit a
|
193
193
|
// token that is understood by a parent context but not by the current context.
|
194
194
|
typedef enum {
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
}
|
195
|
+
PM_CONTEXT_BEGIN, // a begin statement
|
196
|
+
PM_CONTEXT_BLOCK_BRACES, // expressions in block arguments using braces
|
197
|
+
PM_CONTEXT_BLOCK_KEYWORDS, // expressions in block arguments using do..end
|
198
|
+
PM_CONTEXT_CASE_WHEN, // a case when statements
|
199
|
+
PM_CONTEXT_CASE_IN, // a case in statements
|
200
|
+
PM_CONTEXT_CLASS, // a class declaration
|
201
|
+
PM_CONTEXT_DEF, // a method definition
|
202
|
+
PM_CONTEXT_DEF_PARAMS, // a method definition's parameters
|
203
|
+
PM_CONTEXT_DEFAULT_PARAMS, // a method definition's default parameter
|
204
|
+
PM_CONTEXT_ELSE, // an else clause
|
205
|
+
PM_CONTEXT_ELSIF, // an elsif clause
|
206
|
+
PM_CONTEXT_EMBEXPR, // an interpolated expression
|
207
|
+
PM_CONTEXT_ENSURE, // an ensure statement
|
208
|
+
PM_CONTEXT_FOR, // a for loop
|
209
|
+
PM_CONTEXT_IF, // an if statement
|
210
|
+
PM_CONTEXT_LAMBDA_BRACES, // a lambda expression with braces
|
211
|
+
PM_CONTEXT_LAMBDA_DO_END, // a lambda expression with do..end
|
212
|
+
PM_CONTEXT_MAIN, // the top level context
|
213
|
+
PM_CONTEXT_MODULE, // a module declaration
|
214
|
+
PM_CONTEXT_PARENS, // a parenthesized expression
|
215
|
+
PM_CONTEXT_POSTEXE, // an END block
|
216
|
+
PM_CONTEXT_PREDICATE, // a predicate inside an if/elsif/unless statement
|
217
|
+
PM_CONTEXT_PREEXE, // a BEGIN block
|
218
|
+
PM_CONTEXT_RESCUE_ELSE, // a rescue else statement
|
219
|
+
PM_CONTEXT_RESCUE, // a rescue statement
|
220
|
+
PM_CONTEXT_SCLASS, // a singleton class definition
|
221
|
+
PM_CONTEXT_UNLESS, // an unless statement
|
222
|
+
PM_CONTEXT_UNTIL, // an until statement
|
223
|
+
PM_CONTEXT_WHILE, // a while statement
|
224
|
+
} pm_context_t;
|
225
225
|
|
226
226
|
// This is a node in a linked list of contexts.
|
227
|
-
typedef struct
|
228
|
-
|
229
|
-
struct
|
230
|
-
}
|
227
|
+
typedef struct pm_context_node {
|
228
|
+
pm_context_t context;
|
229
|
+
struct pm_context_node *prev;
|
230
|
+
} pm_context_node_t;
|
231
231
|
|
232
232
|
// This is the type of a comment that we've found while parsing.
|
233
233
|
typedef enum {
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
}
|
234
|
+
PM_COMMENT_INLINE,
|
235
|
+
PM_COMMENT_EMBDOC,
|
236
|
+
PM_COMMENT___END__
|
237
|
+
} pm_comment_type_t;
|
238
238
|
|
239
239
|
// This is a node in the linked list of comments that we've found while parsing.
|
240
|
-
typedef struct
|
241
|
-
|
240
|
+
typedef struct pm_comment {
|
241
|
+
pm_list_node_t node;
|
242
242
|
const uint8_t *start;
|
243
243
|
const uint8_t *end;
|
244
|
-
|
245
|
-
}
|
244
|
+
pm_comment_type_t type;
|
245
|
+
} pm_comment_t;
|
246
246
|
|
247
|
-
// When the encoding that is being used to parse the source is changed by
|
247
|
+
// When the encoding that is being used to parse the source is changed by prism,
|
248
248
|
// we provide the ability here to call out to a user-defined function.
|
249
|
-
typedef void (*
|
249
|
+
typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
|
250
250
|
|
251
|
-
// When an encoding is encountered that isn't understood by
|
251
|
+
// When an encoding is encountered that isn't understood by prism, we provide
|
252
252
|
// the ability here to call out to a user-defined function to get an encoding
|
253
253
|
// struct. If the function returns something that isn't NULL, we set that to
|
254
254
|
// our encoding and use it to parse identifiers.
|
255
|
-
typedef
|
255
|
+
typedef pm_encoding_t *(*pm_encoding_decode_callback_t)(pm_parser_t *parser, const uint8_t *name, size_t width);
|
256
256
|
|
257
257
|
// When you are lexing through a file, the lexer needs all of the information
|
258
258
|
// that the parser additionally provides (for example, the local table). So if
|
@@ -268,17 +268,17 @@ typedef struct {
|
|
268
268
|
|
269
269
|
// This is the callback that is called when a token is lexed. It is passed
|
270
270
|
// the opaque data pointer, the parser, and the token that was lexed.
|
271
|
-
void (*callback)(void *data,
|
272
|
-
}
|
271
|
+
void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
|
272
|
+
} pm_lex_callback_t;
|
273
273
|
|
274
274
|
// This struct represents a node in a linked list of scopes. Some scopes can see
|
275
275
|
// into their parent scopes, while others cannot.
|
276
|
-
typedef struct
|
276
|
+
typedef struct pm_scope {
|
277
277
|
// The IDs of the locals in the given scope.
|
278
|
-
|
278
|
+
pm_constant_id_list_t locals;
|
279
279
|
|
280
280
|
// A pointer to the previous scope in the linked list.
|
281
|
-
struct
|
281
|
+
struct pm_scope *previous;
|
282
282
|
|
283
283
|
// A boolean indicating whether or not this scope can see into its parent.
|
284
284
|
// If closed is true, then the scope cannot see into its parent.
|
@@ -293,14 +293,14 @@ typedef struct yp_scope {
|
|
293
293
|
// This is necessary to determine if child blocks are allowed to use
|
294
294
|
// numbered parameters.
|
295
295
|
bool numbered_params;
|
296
|
-
}
|
296
|
+
} pm_scope_t;
|
297
297
|
|
298
298
|
// This struct represents the overall parser. It contains a reference to the
|
299
299
|
// source file, as well as pointers that indicate where in the source it's
|
300
300
|
// currently parsing. It also contains the most recent and current token that
|
301
301
|
// it's considering.
|
302
|
-
struct
|
303
|
-
|
302
|
+
struct pm_parser {
|
303
|
+
pm_lex_state_t lex_state; // the current state of the lexer
|
304
304
|
int enclosure_nesting; // tracks the current nesting of (), [], and {}
|
305
305
|
|
306
306
|
// Used to temporarily track the nesting of enclosures to determine if a {
|
@@ -313,22 +313,22 @@ struct yp_parser {
|
|
313
313
|
|
314
314
|
// the stack used to determine if a do keyword belongs to the predicate of a
|
315
315
|
// while, until, or for loop
|
316
|
-
|
316
|
+
pm_state_stack_t do_loop_stack;
|
317
317
|
|
318
318
|
// the stack used to determine if a do keyword belongs to the beginning of a
|
319
319
|
// block
|
320
|
-
|
320
|
+
pm_state_stack_t accepts_block_stack;
|
321
321
|
|
322
322
|
struct {
|
323
|
-
|
324
|
-
|
323
|
+
pm_lex_mode_t *current; // the current mode of the lexer
|
324
|
+
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]; // the stack of lexer modes
|
325
325
|
size_t index; // the current index into the lexer mode stack
|
326
326
|
} lex_modes;
|
327
327
|
|
328
328
|
const uint8_t *start; // the pointer to the start of the source
|
329
329
|
const uint8_t *end; // the pointer to the end of the source
|
330
|
-
|
331
|
-
|
330
|
+
pm_token_t previous; // the previous token we were considering
|
331
|
+
pm_token_t current; // the current token we're considering
|
332
332
|
|
333
333
|
// This is a special field set on the parser when we need the parser to jump
|
334
334
|
// to a specific location when lexing the next token, as opposed to just
|
@@ -341,26 +341,27 @@ struct yp_parser {
|
|
341
341
|
// found on a line then this is NULL.
|
342
342
|
const uint8_t *heredoc_end;
|
343
343
|
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
344
|
+
pm_list_t comment_list; // the list of comments that have been found while parsing
|
345
|
+
pm_list_t warning_list; // the list of warnings that have been found while parsing
|
346
|
+
pm_list_t error_list; // the list of errors that have been found while parsing
|
347
|
+
pm_scope_t *current_scope; // the current local scope
|
348
348
|
|
349
|
-
|
349
|
+
pm_context_node_t *current_context; // the current parsing context
|
350
350
|
|
351
351
|
// The encoding functions for the current file is attached to the parser as
|
352
352
|
// it's parsing so that it can change with a magic comment.
|
353
|
-
|
353
|
+
pm_encoding_t encoding;
|
354
354
|
|
355
355
|
// When the encoding that is being used to parse the source is changed by
|
356
|
-
//
|
357
|
-
|
356
|
+
// prism, we provide the ability here to call out to a user-defined
|
357
|
+
// function.
|
358
|
+
pm_encoding_changed_callback_t encoding_changed_callback;
|
358
359
|
|
359
|
-
// When an encoding is encountered that isn't understood by
|
360
|
-
// the ability here to call out to a user-defined function to get an
|
360
|
+
// When an encoding is encountered that isn't understood by prism, we
|
361
|
+
// provide the ability here to call out to a user-defined function to get an
|
361
362
|
// encoding struct. If the function returns something that isn't NULL, we
|
362
363
|
// set that to our encoding and use it to parse identifiers.
|
363
|
-
|
364
|
+
pm_encoding_decode_callback_t encoding_decode_callback;
|
364
365
|
|
365
366
|
// This pointer indicates where a comment must start if it is to be
|
366
367
|
// considered an encoding comment.
|
@@ -368,24 +369,24 @@ struct yp_parser {
|
|
368
369
|
|
369
370
|
// This is an optional callback that can be attached to the parser that will
|
370
371
|
// be called whenever a new token is lexed by the parser.
|
371
|
-
|
372
|
+
pm_lex_callback_t *lex_callback;
|
372
373
|
|
373
374
|
// This is the path of the file being parsed
|
374
375
|
// We use the filepath when constructing SourceFileNodes
|
375
|
-
|
376
|
+
pm_string_t filepath_string;
|
376
377
|
|
377
378
|
// This constant pool keeps all of the constants defined throughout the file
|
378
379
|
// so that we can reference them later.
|
379
|
-
|
380
|
+
pm_constant_pool_t constant_pool;
|
380
381
|
|
381
382
|
// This is the list of newline offsets in the source file.
|
382
|
-
|
383
|
+
pm_newline_list_t newline_list;
|
383
384
|
|
384
385
|
// We want to add a flag to integer nodes that indicates their base. We only
|
385
386
|
// want to parse these once, but we don't have space on the token itself to
|
386
387
|
// communicate this information. So we store it here and pass it through
|
387
388
|
// when we find tokens that we need it for.
|
388
|
-
|
389
|
+
pm_node_flags_t integer_base;
|
389
390
|
|
390
391
|
// Whether or not we're at the beginning of a command
|
391
392
|
bool command_start;
|
@@ -414,4 +415,4 @@ struct yp_parser {
|
|
414
415
|
bool frozen_string_literal;
|
415
416
|
};
|
416
417
|
|
417
|
-
#endif //
|
418
|
+
#endif // PRISM_PARSER_H
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#ifndef PRISM_REGEXP_H
|
2
|
+
#define PRISM_REGEXP_H
|
3
|
+
|
4
|
+
#include "prism/defines.h"
|
5
|
+
#include "prism/parser.h"
|
6
|
+
#include "prism/enc/pm_encoding.h"
|
7
|
+
#include "prism/util/pm_memchr.h"
|
8
|
+
#include "prism/util/pm_string_list.h"
|
9
|
+
#include "prism/util/pm_string.h"
|
10
|
+
|
11
|
+
#include <stdbool.h>
|
12
|
+
#include <stddef.h>
|
13
|
+
#include <string.h>
|
14
|
+
|
15
|
+
// Parse a regular expression and extract the names of all of the named capture
|
16
|
+
// groups.
|
17
|
+
PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding);
|
18
|
+
|
19
|
+
#endif
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#ifndef PRISM_UNESCAPE_H
|
2
|
+
#define PRISM_UNESCAPE_H
|
3
|
+
|
4
|
+
#include "prism/defines.h"
|
5
|
+
#include "prism/diagnostic.h"
|
6
|
+
#include "prism/parser.h"
|
7
|
+
#include "prism/util/pm_char.h"
|
8
|
+
#include "prism/util/pm_list.h"
|
9
|
+
#include "prism/util/pm_memchr.h"
|
10
|
+
#include "prism/util/pm_string.h"
|
11
|
+
|
12
|
+
#include <assert.h>
|
13
|
+
#include <stdbool.h>
|
14
|
+
#include <stdint.h>
|
15
|
+
#include <string.h>
|
16
|
+
|
17
|
+
// The type of unescape we are performing.
|
18
|
+
typedef enum {
|
19
|
+
// When we're creating a string inside of a list literal like %w, we
|
20
|
+
// shouldn't escape anything.
|
21
|
+
PM_UNESCAPE_NONE,
|
22
|
+
|
23
|
+
// When we're unescaping a single-quoted string, we only need to unescape
|
24
|
+
// single quotes and backslashes.
|
25
|
+
PM_UNESCAPE_MINIMAL,
|
26
|
+
|
27
|
+
// When we're unescaping a string list, in addition to MINIMAL, we need to
|
28
|
+
// unescape whitespace.
|
29
|
+
PM_UNESCAPE_WHITESPACE,
|
30
|
+
|
31
|
+
// When we're unescaping a double-quoted string, we need to unescape all
|
32
|
+
// escapes.
|
33
|
+
PM_UNESCAPE_ALL,
|
34
|
+
} pm_unescape_type_t;
|
35
|
+
|
36
|
+
// Unescape the contents of the given token into the given string using the given unescape mode.
|
37
|
+
PRISM_EXPORTED_FUNCTION void pm_unescape_manipulate_string(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type);
|
38
|
+
void pm_unescape_manipulate_char_literal(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type);
|
39
|
+
|
40
|
+
// Accepts a source string and a type of unescaping and returns the unescaped version.
|
41
|
+
// The caller must pm_string_free(result); after calling this function.
|
42
|
+
PRISM_EXPORTED_FUNCTION bool pm_unescape_string(const uint8_t *start, size_t length, pm_unescape_type_t unescape_type, pm_string_t *result);
|
43
|
+
|
44
|
+
// Returns the number of bytes that encompass the first escape sequence in the
|
45
|
+
// given string.
|
46
|
+
size_t pm_unescape_calculate_difference(pm_parser_t *parser, const uint8_t *value, pm_unescape_type_t unescape_type, bool expect_single_codepoint);
|
47
|
+
|
48
|
+
#endif
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#ifndef PRISM_BUFFER_H
|
2
|
+
#define PRISM_BUFFER_H
|
3
|
+
|
4
|
+
#include "prism/defines.h"
|
5
|
+
|
6
|
+
#include <assert.h>
|
7
|
+
#include <stdbool.h>
|
8
|
+
#include <stdint.h>
|
9
|
+
#include <stdlib.h>
|
10
|
+
#include <string.h>
|
11
|
+
|
12
|
+
// A pm_buffer_t is a simple memory buffer that stores data in a contiguous
|
13
|
+
// block of memory. It is used to store the serialized representation of a
|
14
|
+
// prism tree.
|
15
|
+
typedef struct {
|
16
|
+
char *value;
|
17
|
+
size_t length;
|
18
|
+
size_t capacity;
|
19
|
+
} pm_buffer_t;
|
20
|
+
|
21
|
+
// Return the size of the pm_buffer_t struct.
|
22
|
+
PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void);
|
23
|
+
|
24
|
+
// Initialize a pm_buffer_t with its default values.
|
25
|
+
PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
|
26
|
+
|
27
|
+
// Return the value of the buffer.
|
28
|
+
PRISM_EXPORTED_FUNCTION char * pm_buffer_value(pm_buffer_t *buffer);
|
29
|
+
|
30
|
+
// Return the length of the buffer.
|
31
|
+
PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(pm_buffer_t *buffer);
|
32
|
+
|
33
|
+
// Append the given amount of space as zeroes to the buffer.
|
34
|
+
void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length);
|
35
|
+
|
36
|
+
// Append a string to the buffer.
|
37
|
+
void pm_buffer_append_str(pm_buffer_t *buffer, const char *value, size_t length);
|
38
|
+
|
39
|
+
// Append a list of bytes to the buffer.
|
40
|
+
void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length);
|
41
|
+
|
42
|
+
// Append a single byte to the buffer.
|
43
|
+
void pm_buffer_append_u8(pm_buffer_t *buffer, uint8_t value);
|
44
|
+
|
45
|
+
// Append a 32-bit unsigned integer to the buffer.
|
46
|
+
void pm_buffer_append_u32(pm_buffer_t *buffer, uint32_t value);
|
47
|
+
|
48
|
+
// Free the memory associated with the buffer.
|
49
|
+
PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer);
|
50
|
+
|
51
|
+
#endif
|