prism 0.18.0 → 0.19.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +31 -1
- data/README.md +2 -1
- data/config.yml +188 -55
- data/docs/building.md +9 -2
- data/docs/configuration.md +10 -9
- data/docs/encoding.md +24 -56
- data/docs/local_variable_depth.md +229 -0
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +18 -13
- data/ext/prism/api_node.c +337 -195
- data/ext/prism/extconf.rb +13 -7
- data/ext/prism/extension.c +96 -32
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +340 -137
- data/include/prism/defines.h +17 -0
- data/include/prism/diagnostic.h +11 -5
- data/include/prism/encoding.h +248 -0
- data/include/prism/options.h +2 -2
- data/include/prism/parser.h +62 -42
- data/include/prism/regexp.h +2 -2
- data/include/prism/util/pm_buffer.h +9 -1
- data/include/prism/util/pm_memchr.h +2 -2
- data/include/prism/util/pm_strpbrk.h +3 -3
- data/include/prism/version.h +2 -2
- data/include/prism.h +13 -15
- data/lib/prism/compiler.rb +12 -0
- data/lib/prism/debug.rb +9 -4
- data/lib/prism/desugar_compiler.rb +3 -3
- data/lib/prism/dispatcher.rb +56 -0
- data/lib/prism/dot_visitor.rb +476 -198
- data/lib/prism/dsl.rb +66 -46
- data/lib/prism/ffi.rb +16 -3
- data/lib/prism/lex_compat.rb +19 -9
- data/lib/prism/mutation_compiler.rb +20 -0
- data/lib/prism/node.rb +1173 -450
- data/lib/prism/node_ext.rb +41 -16
- data/lib/prism/parse_result.rb +12 -15
- data/lib/prism/ripper_compat.rb +49 -34
- data/lib/prism/serialize.rb +242 -212
- data/lib/prism/visitor.rb +12 -0
- data/lib/prism.rb +20 -4
- data/prism.gemspec +4 -10
- data/rbi/prism.rbi +605 -230
- data/rbi/prism_static.rbi +3 -0
- data/sig/prism.rbs +379 -124
- data/sig/prism_static.rbs +1 -0
- data/src/diagnostic.c +228 -222
- data/src/encoding.c +5137 -0
- data/src/node.c +66 -0
- data/src/options.c +21 -2
- data/src/prettyprint.c +806 -406
- data/src/prism.c +1092 -700
- data/src/regexp.c +3 -3
- data/src/serialize.c +227 -157
- data/src/util/pm_buffer.c +10 -1
- data/src/util/pm_memchr.c +1 -1
- data/src/util/pm_strpbrk.c +4 -4
- metadata +5 -11
- data/include/prism/enc/pm_encoding.h +0 -227
- data/src/enc/pm_big5.c +0 -116
- data/src/enc/pm_cp51932.c +0 -57
- data/src/enc/pm_euc_jp.c +0 -69
- data/src/enc/pm_gbk.c +0 -65
- data/src/enc/pm_shift_jis.c +0 -57
- data/src/enc/pm_tables.c +0 -2073
- data/src/enc/pm_unicode.c +0 -2369
- data/src/enc/pm_windows_31j.c +0 -57
data/include/prism/defines.h
CHANGED
@@ -74,4 +74,21 @@
|
|
74
74
|
# define snprintf _snprintf
|
75
75
|
#endif
|
76
76
|
|
77
|
+
/**
|
78
|
+
* A simple utility macro to concatenate two tokens together, necessary when one
|
79
|
+
* of the tokens is itself a macro.
|
80
|
+
*/
|
81
|
+
#define PM_CONCATENATE(left, right) left ## right
|
82
|
+
|
83
|
+
/**
|
84
|
+
* We want to be able to use static assertions, but they weren't standardized
|
85
|
+
* until C11. As such, we polyfill it here by making a hacky typedef that will
|
86
|
+
* fail to compile due to a negative array size if the condition is false.
|
87
|
+
*/
|
88
|
+
#if defined(_Static_assert)
|
89
|
+
# define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
|
90
|
+
#else
|
91
|
+
# define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
|
92
|
+
#endif
|
93
|
+
|
77
94
|
#endif
|
data/include/prism/diagnostic.h
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
#ifndef PRISM_DIAGNOSTIC_H
|
7
7
|
#define PRISM_DIAGNOSTIC_H
|
8
8
|
|
9
|
+
#include "prism/ast.h"
|
9
10
|
#include "prism/defines.h"
|
10
11
|
#include "prism/util/pm_list.h"
|
11
12
|
|
@@ -22,11 +23,8 @@ typedef struct {
|
|
22
23
|
/** The embedded base node. */
|
23
24
|
pm_list_node_t node;
|
24
25
|
|
25
|
-
/**
|
26
|
-
|
27
|
-
|
28
|
-
/** A pointer to the end of the source that generated the diagnostic. */
|
29
|
-
const uint8_t *end;
|
26
|
+
/** The location of the diagnostic in the source. */
|
27
|
+
pm_location_t location;
|
30
28
|
|
31
29
|
/** The message associated with the diagnostic. */
|
32
30
|
const char *message;
|
@@ -179,6 +177,7 @@ typedef enum {
|
|
179
177
|
PM_ERR_LIST_W_UPPER_ELEMENT,
|
180
178
|
PM_ERR_LIST_W_UPPER_TERM,
|
181
179
|
PM_ERR_MALLOC_FAILED,
|
180
|
+
PM_ERR_MIXED_ENCODING,
|
182
181
|
PM_ERR_MODULE_IN_METHOD,
|
183
182
|
PM_ERR_MODULE_NAME,
|
184
183
|
PM_ERR_MODULE_TERM,
|
@@ -192,6 +191,7 @@ typedef enum {
|
|
192
191
|
PM_ERR_OPERATOR_WRITE_BLOCK,
|
193
192
|
PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
|
194
193
|
PM_ERR_PARAMETER_BLOCK_MULTI,
|
194
|
+
PM_ERR_PARAMETER_CIRCULAR,
|
195
195
|
PM_ERR_PARAMETER_METHOD_NAME,
|
196
196
|
PM_ERR_PARAMETER_NAME_REPEAT,
|
197
197
|
PM_ERR_PARAMETER_NO_DEFAULT,
|
@@ -211,6 +211,7 @@ typedef enum {
|
|
211
211
|
PM_ERR_PATTERN_EXPRESSION_AFTER_PIN,
|
212
212
|
PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
|
213
213
|
PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
|
214
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
|
214
215
|
PM_ERR_PATTERN_HASH_KEY,
|
215
216
|
PM_ERR_PATTERN_HASH_KEY_LABEL,
|
216
217
|
PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
|
@@ -226,6 +227,10 @@ typedef enum {
|
|
226
227
|
PM_ERR_RESCUE_TERM,
|
227
228
|
PM_ERR_RESCUE_VARIABLE,
|
228
229
|
PM_ERR_RETURN_INVALID,
|
230
|
+
PM_ERR_STATEMENT_ALIAS,
|
231
|
+
PM_ERR_STATEMENT_POSTEXE_END,
|
232
|
+
PM_ERR_STATEMENT_PREEXE_BEGIN,
|
233
|
+
PM_ERR_STATEMENT_UNDEF,
|
229
234
|
PM_ERR_STRING_CONCATENATION,
|
230
235
|
PM_ERR_STRING_INTERPOLATED_TERM,
|
231
236
|
PM_ERR_STRING_LITERAL_TERM,
|
@@ -243,6 +248,7 @@ typedef enum {
|
|
243
248
|
PM_ERR_UNTIL_TERM,
|
244
249
|
PM_ERR_VOID_EXPRESSION,
|
245
250
|
PM_ERR_WHILE_TERM,
|
251
|
+
PM_ERR_WRITE_TARGET_IN_METHOD,
|
246
252
|
PM_ERR_WRITE_TARGET_READONLY,
|
247
253
|
PM_ERR_WRITE_TARGET_UNEXPECTED,
|
248
254
|
PM_ERR_XSTRING_TERM,
|
@@ -0,0 +1,248 @@
|
|
1
|
+
/**
|
2
|
+
* @file encoding.h
|
3
|
+
*
|
4
|
+
* The encoding interface and implementations used by the parser.
|
5
|
+
*/
|
6
|
+
#ifndef PRISM_ENCODING_H
|
7
|
+
#define PRISM_ENCODING_H
|
8
|
+
|
9
|
+
#include "prism/defines.h"
|
10
|
+
#include "prism/util/pm_strncasecmp.h"
|
11
|
+
|
12
|
+
#include <assert.h>
|
13
|
+
#include <stdbool.h>
|
14
|
+
#include <stddef.h>
|
15
|
+
#include <stdint.h>
|
16
|
+
|
17
|
+
/**
|
18
|
+
* This struct defines the functions necessary to implement the encoding
|
19
|
+
* interface so we can determine how many bytes the subsequent character takes.
|
20
|
+
* Each callback should return the number of bytes, or 0 if the next bytes are
|
21
|
+
* invalid for the encoding and type.
|
22
|
+
*/
|
23
|
+
typedef struct {
|
24
|
+
/**
|
25
|
+
* Return the number of bytes that the next character takes if it is valid
|
26
|
+
* in the encoding. Does not read more than n bytes. It is assumed that n is
|
27
|
+
* at least 1.
|
28
|
+
*/
|
29
|
+
size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
|
30
|
+
|
31
|
+
/**
|
32
|
+
* Return the number of bytes that the next character takes if it is valid
|
33
|
+
* in the encoding and is alphabetical. Does not read more than n bytes. It
|
34
|
+
* is assumed that n is at least 1.
|
35
|
+
*/
|
36
|
+
size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
|
37
|
+
|
38
|
+
/**
|
39
|
+
* Return the number of bytes that the next character takes if it is valid
|
40
|
+
* in the encoding and is alphanumeric. Does not read more than n bytes. It
|
41
|
+
* is assumed that n is at least 1.
|
42
|
+
*/
|
43
|
+
size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
|
44
|
+
|
45
|
+
/**
|
46
|
+
* Return true if the next character is valid in the encoding and is an
|
47
|
+
* uppercase character. Does not read more than n bytes. It is assumed that
|
48
|
+
* n is at least 1.
|
49
|
+
*/
|
50
|
+
bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
|
51
|
+
|
52
|
+
/**
|
53
|
+
* The name of the encoding. This should correspond to a value that can be
|
54
|
+
* passed to Encoding.find in Ruby.
|
55
|
+
*/
|
56
|
+
const char *name;
|
57
|
+
|
58
|
+
/**
|
59
|
+
* Return true if the encoding is a multibyte encoding.
|
60
|
+
*/
|
61
|
+
bool multibyte;
|
62
|
+
} pm_encoding_t;
|
63
|
+
|
64
|
+
/**
|
65
|
+
* All of the lookup tables use the first bit of each embedded byte to indicate
|
66
|
+
* whether the codepoint is alphabetical.
|
67
|
+
*/
|
68
|
+
#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
|
69
|
+
|
70
|
+
/**
|
71
|
+
* All of the lookup tables use the second bit of each embedded byte to indicate
|
72
|
+
* whether the codepoint is alphanumeric.
|
73
|
+
*/
|
74
|
+
#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
|
75
|
+
|
76
|
+
/**
|
77
|
+
* All of the lookup tables use the third bit of each embedded byte to indicate
|
78
|
+
* whether the codepoint is uppercase.
|
79
|
+
*/
|
80
|
+
#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
|
81
|
+
|
82
|
+
/**
|
83
|
+
* Return the size of the next character in the UTF-8 encoding if it is an
|
84
|
+
* alphabetical character.
|
85
|
+
*
|
86
|
+
* @param b The bytes to read.
|
87
|
+
* @param n The number of bytes that can be read.
|
88
|
+
* @returns The number of bytes that the next character takes if it is valid in
|
89
|
+
* the encoding, or 0 if it is not.
|
90
|
+
*/
|
91
|
+
size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
|
92
|
+
|
93
|
+
/**
|
94
|
+
* Return the size of the next character in the UTF-8 encoding if it is an
|
95
|
+
* alphanumeric character.
|
96
|
+
*
|
97
|
+
* @param b The bytes to read.
|
98
|
+
* @param n The number of bytes that can be read.
|
99
|
+
* @returns The number of bytes that the next character takes if it is valid in
|
100
|
+
* the encoding, or 0 if it is not.
|
101
|
+
*/
|
102
|
+
size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
|
103
|
+
|
104
|
+
/**
|
105
|
+
* Return true if the next character in the UTF-8 encoding if it is an uppercase
|
106
|
+
* character.
|
107
|
+
*
|
108
|
+
* @param b The bytes to read.
|
109
|
+
* @param n The number of bytes that can be read.
|
110
|
+
* @returns True if the next character is valid in the encoding and is an
|
111
|
+
* uppercase character, or false if it is not.
|
112
|
+
*/
|
113
|
+
bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
|
114
|
+
|
115
|
+
/**
|
116
|
+
* This lookup table is referenced in both the UTF-8 encoding file and the
|
117
|
+
* parser directly in order to speed up the default encoding processing. It is
|
118
|
+
* used to indicate whether a character is alphabetical, alphanumeric, or
|
119
|
+
* uppercase in unicode mappings.
|
120
|
+
*/
|
121
|
+
extern const uint8_t pm_encoding_unicode_table[256];
|
122
|
+
|
123
|
+
/**
|
124
|
+
* These are all of the encodings that prism supports.
|
125
|
+
*/
|
126
|
+
typedef enum {
|
127
|
+
PM_ENCODING_UTF_8 = 0,
|
128
|
+
PM_ENCODING_ASCII_8BIT,
|
129
|
+
PM_ENCODING_BIG5,
|
130
|
+
PM_ENCODING_BIG5_HKSCS,
|
131
|
+
PM_ENCODING_BIG5_UAO,
|
132
|
+
PM_ENCODING_CESU_8,
|
133
|
+
PM_ENCODING_CP51932,
|
134
|
+
PM_ENCODING_CP850,
|
135
|
+
PM_ENCODING_CP852,
|
136
|
+
PM_ENCODING_CP855,
|
137
|
+
PM_ENCODING_CP949,
|
138
|
+
PM_ENCODING_CP950,
|
139
|
+
PM_ENCODING_CP951,
|
140
|
+
PM_ENCODING_EMACS_MULE,
|
141
|
+
PM_ENCODING_EUC_JP,
|
142
|
+
PM_ENCODING_EUC_JP_MS,
|
143
|
+
PM_ENCODING_EUC_JIS_2004,
|
144
|
+
PM_ENCODING_EUC_KR,
|
145
|
+
PM_ENCODING_EUC_TW,
|
146
|
+
PM_ENCODING_GB12345,
|
147
|
+
PM_ENCODING_GB18030,
|
148
|
+
PM_ENCODING_GB1988,
|
149
|
+
PM_ENCODING_GB2312,
|
150
|
+
PM_ENCODING_GBK,
|
151
|
+
PM_ENCODING_IBM437,
|
152
|
+
PM_ENCODING_IBM720,
|
153
|
+
PM_ENCODING_IBM737,
|
154
|
+
PM_ENCODING_IBM775,
|
155
|
+
PM_ENCODING_IBM852,
|
156
|
+
PM_ENCODING_IBM855,
|
157
|
+
PM_ENCODING_IBM857,
|
158
|
+
PM_ENCODING_IBM860,
|
159
|
+
PM_ENCODING_IBM861,
|
160
|
+
PM_ENCODING_IBM862,
|
161
|
+
PM_ENCODING_IBM863,
|
162
|
+
PM_ENCODING_IBM864,
|
163
|
+
PM_ENCODING_IBM865,
|
164
|
+
PM_ENCODING_IBM866,
|
165
|
+
PM_ENCODING_IBM869,
|
166
|
+
PM_ENCODING_ISO_8859_1,
|
167
|
+
PM_ENCODING_ISO_8859_2,
|
168
|
+
PM_ENCODING_ISO_8859_3,
|
169
|
+
PM_ENCODING_ISO_8859_4,
|
170
|
+
PM_ENCODING_ISO_8859_5,
|
171
|
+
PM_ENCODING_ISO_8859_6,
|
172
|
+
PM_ENCODING_ISO_8859_7,
|
173
|
+
PM_ENCODING_ISO_8859_8,
|
174
|
+
PM_ENCODING_ISO_8859_9,
|
175
|
+
PM_ENCODING_ISO_8859_10,
|
176
|
+
PM_ENCODING_ISO_8859_11,
|
177
|
+
PM_ENCODING_ISO_8859_13,
|
178
|
+
PM_ENCODING_ISO_8859_14,
|
179
|
+
PM_ENCODING_ISO_8859_15,
|
180
|
+
PM_ENCODING_ISO_8859_16,
|
181
|
+
PM_ENCODING_KOI8_R,
|
182
|
+
PM_ENCODING_KOI8_U,
|
183
|
+
PM_ENCODING_MAC_CENT_EURO,
|
184
|
+
PM_ENCODING_MAC_CROATIAN,
|
185
|
+
PM_ENCODING_MAC_CYRILLIC,
|
186
|
+
PM_ENCODING_MAC_GREEK,
|
187
|
+
PM_ENCODING_MAC_ICELAND,
|
188
|
+
PM_ENCODING_MAC_JAPANESE,
|
189
|
+
PM_ENCODING_MAC_ROMAN,
|
190
|
+
PM_ENCODING_MAC_ROMANIA,
|
191
|
+
PM_ENCODING_MAC_THAI,
|
192
|
+
PM_ENCODING_MAC_TURKISH,
|
193
|
+
PM_ENCODING_MAC_UKRAINE,
|
194
|
+
PM_ENCODING_SHIFT_JIS,
|
195
|
+
PM_ENCODING_SJIS_DOCOMO,
|
196
|
+
PM_ENCODING_SJIS_KDDI,
|
197
|
+
PM_ENCODING_SJIS_SOFTBANK,
|
198
|
+
PM_ENCODING_STATELESS_ISO_2022_JP,
|
199
|
+
PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
|
200
|
+
PM_ENCODING_TIS_620,
|
201
|
+
PM_ENCODING_US_ASCII,
|
202
|
+
PM_ENCODING_UTF8_MAC,
|
203
|
+
PM_ENCODING_UTF8_DOCOMO,
|
204
|
+
PM_ENCODING_UTF8_KDDI,
|
205
|
+
PM_ENCODING_UTF8_SOFTBANK,
|
206
|
+
PM_ENCODING_WINDOWS_1250,
|
207
|
+
PM_ENCODING_WINDOWS_1251,
|
208
|
+
PM_ENCODING_WINDOWS_1252,
|
209
|
+
PM_ENCODING_WINDOWS_1253,
|
210
|
+
PM_ENCODING_WINDOWS_1254,
|
211
|
+
PM_ENCODING_WINDOWS_1255,
|
212
|
+
PM_ENCODING_WINDOWS_1256,
|
213
|
+
PM_ENCODING_WINDOWS_1257,
|
214
|
+
PM_ENCODING_WINDOWS_1258,
|
215
|
+
PM_ENCODING_WINDOWS_31J,
|
216
|
+
PM_ENCODING_WINDOWS_874,
|
217
|
+
PM_ENCODING_MAXIMUM
|
218
|
+
} pm_encoding_type_t;
|
219
|
+
|
220
|
+
/**
|
221
|
+
* This is the table of all of the encodings that prism supports.
|
222
|
+
*/
|
223
|
+
extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
|
224
|
+
|
225
|
+
/**
|
226
|
+
* This is the default UTF-8 encoding. We need a reference to it to quickly
|
227
|
+
* create parsers.
|
228
|
+
*/
|
229
|
+
#define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])
|
230
|
+
|
231
|
+
/**
|
232
|
+
* This is the US-ASCII encoding. We need a reference to it to be able to
|
233
|
+
* compare against it when a string is being created because it could possibly
|
234
|
+
* need to fall back to ASCII-8BIT.
|
235
|
+
*/
|
236
|
+
#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
|
237
|
+
|
238
|
+
/**
|
239
|
+
* Parse the given name of an encoding and return a pointer to the corresponding
|
240
|
+
* encoding struct if one can be found, otherwise return NULL.
|
241
|
+
*
|
242
|
+
* @param start A pointer to the first byte of the name.
|
243
|
+
* @param end A pointer to the last byte of the name.
|
244
|
+
* @returns A pointer to the encoding struct if one is found, otherwise NULL.
|
245
|
+
*/
|
246
|
+
const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);
|
247
|
+
|
248
|
+
#endif
|
data/include/prism/options.h
CHANGED
@@ -35,7 +35,7 @@ typedef struct {
|
|
35
35
|
* The line within the file that the parse starts on. This value is
|
36
36
|
* 0-indexed.
|
37
37
|
*/
|
38
|
-
|
38
|
+
int32_t line;
|
39
39
|
|
40
40
|
/**
|
41
41
|
* The name of the encoding that the source file is in. Note that this must
|
@@ -80,7 +80,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, cons
|
|
80
80
|
* @param options The options struct to set the line on.
|
81
81
|
* @param line The line to set.
|
82
82
|
*/
|
83
|
-
PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options,
|
83
|
+
PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line);
|
84
84
|
|
85
85
|
/**
|
86
86
|
* Set the encoding option on the given options struct.
|
data/include/prism/parser.h
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
|
9
9
|
#include "prism/ast.h"
|
10
10
|
#include "prism/defines.h"
|
11
|
-
#include "prism/
|
11
|
+
#include "prism/encoding.h"
|
12
12
|
#include "prism/util/pm_constant_pool.h"
|
13
13
|
#include "prism/util/pm_list.h"
|
14
14
|
#include "prism/util/pm_newline_list.h"
|
@@ -17,6 +17,12 @@
|
|
17
17
|
|
18
18
|
#include <stdbool.h>
|
19
19
|
|
20
|
+
// TODO: remove this by renaming the original flag
|
21
|
+
/**
|
22
|
+
* Temporary alias for the PM_NODE_FLAG_STATIC_KEYS flag.
|
23
|
+
*/
|
24
|
+
#define PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS PM_KEYWORD_HASH_NODE_FLAGS_STATIC_KEYS
|
25
|
+
|
20
26
|
/**
|
21
27
|
* This enum provides various bits that represent different kinds of states that
|
22
28
|
* the lexer can track. This is used to determine which kind of token to return
|
@@ -297,6 +303,9 @@ typedef enum {
|
|
297
303
|
/** an ensure statement */
|
298
304
|
PM_CONTEXT_ENSURE,
|
299
305
|
|
306
|
+
/** an ensure statement within a method definition */
|
307
|
+
PM_CONTEXT_ENSURE_DEF,
|
308
|
+
|
300
309
|
/** a for loop */
|
301
310
|
PM_CONTEXT_FOR,
|
302
311
|
|
@@ -333,9 +342,15 @@ typedef enum {
|
|
333
342
|
/** a rescue else statement */
|
334
343
|
PM_CONTEXT_RESCUE_ELSE,
|
335
344
|
|
345
|
+
/** a rescue else statement within a method definition */
|
346
|
+
PM_CONTEXT_RESCUE_ELSE_DEF,
|
347
|
+
|
336
348
|
/** a rescue statement */
|
337
349
|
PM_CONTEXT_RESCUE,
|
338
350
|
|
351
|
+
/** a rescue statement within a method definition */
|
352
|
+
PM_CONTEXT_RESCUE_DEF,
|
353
|
+
|
339
354
|
/** a singleton class definition */
|
340
355
|
PM_CONTEXT_SCLASS,
|
341
356
|
|
@@ -361,8 +376,7 @@ typedef struct pm_context_node {
|
|
361
376
|
/** This is the type of a comment that we've found while parsing. */
|
362
377
|
typedef enum {
|
363
378
|
PM_COMMENT_INLINE,
|
364
|
-
PM_COMMENT_EMBDOC
|
365
|
-
PM_COMMENT___END__
|
379
|
+
PM_COMMENT_EMBDOC
|
366
380
|
} pm_comment_type_t;
|
367
381
|
|
368
382
|
/**
|
@@ -374,11 +388,8 @@ typedef struct pm_comment {
|
|
374
388
|
/** The embedded base node. */
|
375
389
|
pm_list_node_t node;
|
376
390
|
|
377
|
-
/**
|
378
|
-
|
379
|
-
|
380
|
-
/** A pointer to the end of the comment in the source. */
|
381
|
-
const uint8_t *end;
|
391
|
+
/** The location of the comment in the source. */
|
392
|
+
pm_location_t location;
|
382
393
|
|
383
394
|
/** The type of comment that we've found. */
|
384
395
|
pm_comment_type_t type;
|
@@ -413,14 +424,6 @@ typedef struct {
|
|
413
424
|
*/
|
414
425
|
typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
|
415
426
|
|
416
|
-
/**
|
417
|
-
* When an encoding is encountered that isn't understood by prism, we provide
|
418
|
-
* the ability here to call out to a user-defined function to get an encoding
|
419
|
-
* struct. If the function returns something that isn't NULL, we set that to
|
420
|
-
* our encoding and use it to parse identifiers.
|
421
|
-
*/
|
422
|
-
typedef pm_encoding_t *(*pm_encoding_decode_callback_t)(pm_parser_t *parser, const uint8_t *name, size_t width);
|
423
|
-
|
424
427
|
/**
|
425
428
|
* When you are lexing through a file, the lexer needs all of the information
|
426
429
|
* that the parser additionally provides (for example, the local table). So if
|
@@ -469,18 +472,12 @@ typedef struct pm_scope {
|
|
469
472
|
bool explicit_params;
|
470
473
|
|
471
474
|
/**
|
472
|
-
*
|
475
|
+
* An integer indicating the number of numbered parameters on this scope.
|
473
476
|
* This is necessary to determine if child blocks are allowed to use
|
474
|
-
* numbered parameters
|
475
|
-
|
476
|
-
bool numbered_params;
|
477
|
-
|
478
|
-
/**
|
479
|
-
* A transparent scope is a scope that cannot have locals set on itself.
|
480
|
-
* When a local is set on this scope, it will instead be set on the parent
|
481
|
-
* scope's local table.
|
477
|
+
* numbered parameters, and to pass information to consumers of the AST
|
478
|
+
* about how many numbered parameters exist.
|
482
479
|
*/
|
483
|
-
|
480
|
+
uint8_t numbered_parameters;
|
484
481
|
} pm_scope_t;
|
485
482
|
|
486
483
|
/**
|
@@ -532,12 +529,6 @@ struct pm_parser {
|
|
532
529
|
size_t index;
|
533
530
|
} lex_modes;
|
534
531
|
|
535
|
-
/**
|
536
|
-
* The common_whitespace value from the most-recently-popped heredoc mode of the lexer, so we
|
537
|
-
* can dedent the heredoc after popping the lex mode.
|
538
|
-
*/
|
539
|
-
size_t current_string_common_whitespace;
|
540
|
-
|
541
532
|
/** The pointer to the start of the source. */
|
542
533
|
const uint8_t *start;
|
543
534
|
|
@@ -571,6 +562,9 @@ struct pm_parser {
|
|
571
562
|
/** The list of magic comments that have been found while parsing. */
|
572
563
|
pm_list_t magic_comment_list;
|
573
564
|
|
565
|
+
/** The optional location of the __END__ keyword and its contents. */
|
566
|
+
pm_location_t data_loc;
|
567
|
+
|
574
568
|
/** The list of warnings that have been found while parsing. */
|
575
569
|
pm_list_t warning_list;
|
576
570
|
|
@@ -587,7 +581,7 @@ struct pm_parser {
|
|
587
581
|
* The encoding functions for the current file is attached to the parser as
|
588
582
|
* it's parsing so that it can change with a magic comment.
|
589
583
|
*/
|
590
|
-
pm_encoding_t encoding;
|
584
|
+
const pm_encoding_t *encoding;
|
591
585
|
|
592
586
|
/**
|
593
587
|
* When the encoding that is being used to parse the source is changed by
|
@@ -596,14 +590,6 @@ struct pm_parser {
|
|
596
590
|
*/
|
597
591
|
pm_encoding_changed_callback_t encoding_changed_callback;
|
598
592
|
|
599
|
-
/**
|
600
|
-
* When an encoding is encountered that isn't understood by prism, we
|
601
|
-
* provide the ability here to call out to a user-defined function to get an
|
602
|
-
* encoding struct. If the function returns something that isn't NULL, we
|
603
|
-
* set that to our encoding and use it to parse identifiers.
|
604
|
-
*/
|
605
|
-
pm_encoding_decode_callback_t encoding_decode_callback;
|
606
|
-
|
607
593
|
/**
|
608
594
|
* This pointer indicates where a comment must start if it is to be
|
609
595
|
* considered an encoding comment.
|
@@ -649,7 +635,38 @@ struct pm_parser {
|
|
649
635
|
* The line number at the start of the parse. This will be used to offset
|
650
636
|
* the line numbers of all of the locations.
|
651
637
|
*/
|
652
|
-
|
638
|
+
int32_t start_line;
|
639
|
+
|
640
|
+
/**
|
641
|
+
* When a string-like expression is being lexed, any byte or escape sequence
|
642
|
+
* that resolves to a value whose top bit is set (i.e., >= 0x80) will
|
643
|
+
* explicitly set the encoding to the same encoding as the source.
|
644
|
+
* Alternatively, if a unicode escape sequence is used (e.g., \\u{80}) that
|
645
|
+
* resolves to a value whose top bit is set, then the encoding will be
|
646
|
+
* explicitly set to UTF-8.
|
647
|
+
*
|
648
|
+
* The _next_ time this happens, if the encoding that is about to become the
|
649
|
+
* explicitly set encoding does not match the previously set explicit
|
650
|
+
* encoding, a mixed encoding error will be emitted.
|
651
|
+
*
|
652
|
+
* When the expression is finished being lexed, the explicit encoding
|
653
|
+
* controls the encoding of the expression. For the most part this means
|
654
|
+
* that the expression will either be encoded in the source encoding or
|
655
|
+
* UTF-8. This holds for all encodings except US-ASCII. If the source is
|
656
|
+
* US-ASCII and an explicit encoding was set that was _not_ UTF-8, then the
|
657
|
+
* expression will be encoded as ASCII-8BIT.
|
658
|
+
*
|
659
|
+
* Note that if the expression is a list, different elements within the same
|
660
|
+
* list can have different encodings, so this will get reset between each
|
661
|
+
* element. Furthermore all of this only applies to lists that support
|
662
|
+
* interpolation, because otherwise escapes that could change the encoding
|
663
|
+
* are ignored.
|
664
|
+
*
|
665
|
+
* At first glance, it may make more sense for this to live on the lexer
|
666
|
+
* mode, but we need it here to communicate back to the parser for character
|
667
|
+
* literals that do not push a new lexer mode.
|
668
|
+
*/
|
669
|
+
const pm_encoding_t *explicit_encoding;
|
653
670
|
|
654
671
|
/** Whether or not we're at the beginning of a command. */
|
655
672
|
bool command_start;
|
@@ -673,6 +690,9 @@ struct pm_parser {
|
|
673
690
|
/** This flag indicates that we are currently parsing a keyword argument. */
|
674
691
|
bool in_keyword_arg;
|
675
692
|
|
693
|
+
/** The current parameter name id on parsing its default value. */
|
694
|
+
pm_constant_id_t current_param_name;
|
695
|
+
|
676
696
|
/**
|
677
697
|
* Whether or not the parser has seen a token that has semantic meaning
|
678
698
|
* (i.e., a token that is not a comment or whitespace).
|
data/include/prism/regexp.h
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
|
9
9
|
#include "prism/defines.h"
|
10
10
|
#include "prism/parser.h"
|
11
|
-
#include "prism/
|
11
|
+
#include "prism/encoding.h"
|
12
12
|
#include "prism/util/pm_memchr.h"
|
13
13
|
#include "prism/util/pm_string_list.h"
|
14
14
|
#include "prism/util/pm_string.h"
|
@@ -28,6 +28,6 @@
|
|
28
28
|
* @param encoding The encoding of the source code.
|
29
29
|
* @return Whether or not the parsing was successful.
|
30
30
|
*/
|
31
|
-
PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding);
|
31
|
+
PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, const pm_encoding_t *encoding);
|
32
32
|
|
33
33
|
#endif
|
@@ -118,7 +118,15 @@ void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value);
|
|
118
118
|
* @param buffer The buffer to append to.
|
119
119
|
* @param value The integer to append.
|
120
120
|
*/
|
121
|
-
void
|
121
|
+
void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
|
122
|
+
|
123
|
+
/**
|
124
|
+
* Append a 32-bit signed integer to the buffer as a variable-length integer.
|
125
|
+
*
|
126
|
+
* @param buffer The buffer to append to.
|
127
|
+
* @param value The integer to append.
|
128
|
+
*/
|
129
|
+
void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
|
122
130
|
|
123
131
|
/**
|
124
132
|
* Concatenate one buffer onto another.
|
@@ -7,7 +7,7 @@
|
|
7
7
|
#define PRISM_MEMCHR_H
|
8
8
|
|
9
9
|
#include "prism/defines.h"
|
10
|
-
#include "prism/
|
10
|
+
#include "prism/encoding.h"
|
11
11
|
|
12
12
|
#include <stddef.h>
|
13
13
|
|
@@ -24,6 +24,6 @@
|
|
24
24
|
* @return A pointer to the first occurrence of the character in the source
|
25
25
|
* string, or NULL if no such character exists.
|
26
26
|
*/
|
27
|
-
void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, pm_encoding_t *encoding);
|
27
|
+
void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding);
|
28
28
|
|
29
29
|
#endif
|
@@ -32,12 +32,12 @@
|
|
32
32
|
* need to take a slower path and iterate one multi-byte character at a time.
|
33
33
|
*
|
34
34
|
* @param parser The parser.
|
35
|
-
* @param source The source
|
35
|
+
* @param source The source to search.
|
36
36
|
* @param charset The charset to search for.
|
37
|
-
* @param length The maximum
|
37
|
+
* @param length The maximum number of bytes to search.
|
38
38
|
* @return A pointer to the first character in the source string that is in the
|
39
39
|
* charset, or NULL if no such character exists.
|
40
40
|
*/
|
41
|
-
const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
|
41
|
+
const uint8_t * pm_strpbrk(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
|
42
42
|
|
43
43
|
#endif
|
data/include/prism/version.h
CHANGED
@@ -14,7 +14,7 @@
|
|
14
14
|
/**
|
15
15
|
* The minor version of the Prism library as an int.
|
16
16
|
*/
|
17
|
-
#define PRISM_VERSION_MINOR
|
17
|
+
#define PRISM_VERSION_MINOR 19
|
18
18
|
|
19
19
|
/**
|
20
20
|
* The patch version of the Prism library as an int.
|
@@ -24,6 +24,6 @@
|
|
24
24
|
/**
|
25
25
|
* The version of the Prism library as a constant string.
|
26
26
|
*/
|
27
|
-
#define PRISM_VERSION "0.
|
27
|
+
#define PRISM_VERSION "0.19.0"
|
28
28
|
|
29
29
|
#endif
|