prism 0.17.1 → 0.19.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -1
- data/Makefile +5 -5
- data/README.md +4 -3
- data/config.yml +214 -68
- data/docs/build_system.md +6 -6
- data/docs/building.md +10 -3
- data/docs/configuration.md +11 -9
- data/docs/encoding.md +92 -88
- data/docs/heredocs.md +1 -1
- data/docs/javascript.md +29 -1
- data/docs/local_variable_depth.md +229 -0
- data/docs/ruby_api.md +16 -0
- data/docs/serialization.md +18 -13
- data/ext/prism/api_node.c +411 -240
- data/ext/prism/extconf.rb +97 -127
- data/ext/prism/extension.c +97 -33
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +377 -159
- data/include/prism/defines.h +17 -0
- data/include/prism/diagnostic.h +38 -6
- data/include/prism/{enc/pm_encoding.h → encoding.h} +126 -64
- data/include/prism/options.h +2 -2
- data/include/prism/parser.h +62 -36
- data/include/prism/regexp.h +2 -2
- data/include/prism/util/pm_buffer.h +9 -1
- data/include/prism/util/pm_memchr.h +2 -2
- data/include/prism/util/pm_strpbrk.h +3 -3
- data/include/prism/version.h +3 -3
- data/include/prism.h +13 -15
- data/lib/prism/compiler.rb +15 -3
- data/lib/prism/debug.rb +13 -4
- data/lib/prism/desugar_compiler.rb +4 -3
- data/lib/prism/dispatcher.rb +70 -14
- data/lib/prism/dot_visitor.rb +4612 -0
- data/lib/prism/dsl.rb +77 -57
- data/lib/prism/ffi.rb +19 -6
- data/lib/prism/lex_compat.rb +19 -9
- data/lib/prism/mutation_compiler.rb +26 -6
- data/lib/prism/node.rb +1314 -522
- data/lib/prism/node_ext.rb +102 -19
- data/lib/prism/parse_result.rb +58 -27
- data/lib/prism/ripper_compat.rb +49 -34
- data/lib/prism/serialize.rb +251 -227
- data/lib/prism/visitor.rb +15 -3
- data/lib/prism.rb +21 -4
- data/prism.gemspec +7 -9
- data/rbi/prism.rbi +688 -284
- data/rbi/prism_static.rbi +3 -0
- data/sig/prism.rbs +426 -156
- data/sig/prism_static.rbs +1 -0
- data/src/diagnostic.c +280 -216
- data/src/encoding.c +5137 -0
- data/src/node.c +99 -21
- data/src/options.c +21 -2
- data/src/prettyprint.c +1743 -1241
- data/src/prism.c +1774 -831
- data/src/regexp.c +15 -15
- data/src/serialize.c +261 -164
- data/src/util/pm_buffer.c +10 -1
- data/src/util/pm_memchr.c +1 -1
- data/src/util/pm_strpbrk.c +4 -4
- metadata +8 -10
- data/src/enc/pm_big5.c +0 -53
- data/src/enc/pm_euc_jp.c +0 -59
- data/src/enc/pm_gbk.c +0 -62
- data/src/enc/pm_shift_jis.c +0 -57
- data/src/enc/pm_tables.c +0 -743
- data/src/enc/pm_unicode.c +0 -2369
- data/src/enc/pm_windows_31j.c +0 -57
data/include/prism/defines.h
CHANGED
@@ -74,4 +74,21 @@
|
|
74
74
|
# define snprintf _snprintf
|
75
75
|
#endif
|
76
76
|
|
77
|
+
/**
|
78
|
+
* A simple utility macro to concatenate two tokens together, necessary when one
|
79
|
+
* of the tokens is itself a macro.
|
80
|
+
*/
|
81
|
+
#define PM_CONCATENATE(left, right) left ## right
|
82
|
+
|
83
|
+
/**
|
84
|
+
* We want to be able to use static assertions, but they weren't standardized
|
85
|
+
* until C11. As such, we polyfill it here by making a hacky typedef that will
|
86
|
+
* fail to compile due to a negative array size if the condition is false.
|
87
|
+
*/
|
88
|
+
#if defined(_Static_assert)
|
89
|
+
# define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
|
90
|
+
#else
|
91
|
+
# define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
|
92
|
+
#endif
|
93
|
+
|
77
94
|
#endif
|
data/include/prism/diagnostic.h
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
#ifndef PRISM_DIAGNOSTIC_H
|
7
7
|
#define PRISM_DIAGNOSTIC_H
|
8
8
|
|
9
|
+
#include "prism/ast.h"
|
9
10
|
#include "prism/defines.h"
|
10
11
|
#include "prism/util/pm_list.h"
|
11
12
|
|
@@ -22,14 +23,18 @@ typedef struct {
|
|
22
23
|
/** The embedded base node. */
|
23
24
|
pm_list_node_t node;
|
24
25
|
|
25
|
-
/**
|
26
|
-
|
27
|
-
|
28
|
-
/** A pointer to the end of the source that generated the diagnostic. */
|
29
|
-
const uint8_t *end;
|
26
|
+
/** The location of the diagnostic in the source. */
|
27
|
+
pm_location_t location;
|
30
28
|
|
31
29
|
/** The message associated with the diagnostic. */
|
32
30
|
const char *message;
|
31
|
+
|
32
|
+
/**
|
33
|
+
* Whether or not the memory related to the message of this diagnostic is
|
34
|
+
* owned by this diagnostic. If it is, it needs to be freed when the
|
35
|
+
* diagnostic is freed.
|
36
|
+
*/
|
37
|
+
bool owned;
|
33
38
|
} pm_diagnostic_t;
|
34
39
|
|
35
40
|
/**
|
@@ -40,12 +45,14 @@ typedef enum {
|
|
40
45
|
PM_ERR_ALIAS_ARGUMENT,
|
41
46
|
PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
|
42
47
|
PM_ERR_ARGUMENT_AFTER_BLOCK,
|
48
|
+
PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES,
|
43
49
|
PM_ERR_ARGUMENT_BARE_HASH,
|
44
50
|
PM_ERR_ARGUMENT_BLOCK_MULTI,
|
45
51
|
PM_ERR_ARGUMENT_FORMAL_CLASS,
|
46
52
|
PM_ERR_ARGUMENT_FORMAL_CONSTANT,
|
47
53
|
PM_ERR_ARGUMENT_FORMAL_GLOBAL,
|
48
54
|
PM_ERR_ARGUMENT_FORMAL_IVAR,
|
55
|
+
PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
|
49
56
|
PM_ERR_ARGUMENT_NO_FORWARDING_AMP,
|
50
57
|
PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
|
51
58
|
PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
|
@@ -71,6 +78,7 @@ typedef enum {
|
|
71
78
|
PM_ERR_CANNOT_PARSE_STRING_PART,
|
72
79
|
PM_ERR_CASE_EXPRESSION_AFTER_CASE,
|
73
80
|
PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
|
81
|
+
PM_ERR_CASE_MATCH_MISSING_PREDICATE,
|
74
82
|
PM_ERR_CASE_MISSING_CONDITIONS,
|
75
83
|
PM_ERR_CASE_TERM,
|
76
84
|
PM_ERR_CLASS_IN_METHOD,
|
@@ -169,6 +177,7 @@ typedef enum {
|
|
169
177
|
PM_ERR_LIST_W_UPPER_ELEMENT,
|
170
178
|
PM_ERR_LIST_W_UPPER_TERM,
|
171
179
|
PM_ERR_MALLOC_FAILED,
|
180
|
+
PM_ERR_MIXED_ENCODING,
|
172
181
|
PM_ERR_MODULE_IN_METHOD,
|
173
182
|
PM_ERR_MODULE_NAME,
|
174
183
|
PM_ERR_MODULE_TERM,
|
@@ -182,6 +191,7 @@ typedef enum {
|
|
182
191
|
PM_ERR_OPERATOR_WRITE_BLOCK,
|
183
192
|
PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
|
184
193
|
PM_ERR_PARAMETER_BLOCK_MULTI,
|
194
|
+
PM_ERR_PARAMETER_CIRCULAR,
|
185
195
|
PM_ERR_PARAMETER_METHOD_NAME,
|
186
196
|
PM_ERR_PARAMETER_NAME_REPEAT,
|
187
197
|
PM_ERR_PARAMETER_NO_DEFAULT,
|
@@ -201,6 +211,7 @@ typedef enum {
|
|
201
211
|
PM_ERR_PATTERN_EXPRESSION_AFTER_PIN,
|
202
212
|
PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
|
203
213
|
PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
|
214
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
|
204
215
|
PM_ERR_PATTERN_HASH_KEY,
|
205
216
|
PM_ERR_PATTERN_HASH_KEY_LABEL,
|
206
217
|
PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
|
@@ -216,6 +227,10 @@ typedef enum {
|
|
216
227
|
PM_ERR_RESCUE_TERM,
|
217
228
|
PM_ERR_RESCUE_VARIABLE,
|
218
229
|
PM_ERR_RETURN_INVALID,
|
230
|
+
PM_ERR_STATEMENT_ALIAS,
|
231
|
+
PM_ERR_STATEMENT_POSTEXE_END,
|
232
|
+
PM_ERR_STATEMENT_PREEXE_BEGIN,
|
233
|
+
PM_ERR_STATEMENT_UNDEF,
|
219
234
|
PM_ERR_STRING_CONCATENATION,
|
220
235
|
PM_ERR_STRING_INTERPOLATED_TERM,
|
221
236
|
PM_ERR_STRING_LITERAL_TERM,
|
@@ -231,7 +246,9 @@ typedef enum {
|
|
231
246
|
PM_ERR_UNARY_RECEIVER_TILDE,
|
232
247
|
PM_ERR_UNDEF_ARGUMENT,
|
233
248
|
PM_ERR_UNTIL_TERM,
|
249
|
+
PM_ERR_VOID_EXPRESSION,
|
234
250
|
PM_ERR_WHILE_TERM,
|
251
|
+
PM_ERR_WRITE_TARGET_IN_METHOD,
|
235
252
|
PM_ERR_WRITE_TARGET_READONLY,
|
236
253
|
PM_ERR_WRITE_TARGET_UNEXPECTED,
|
237
254
|
PM_ERR_XSTRING_TERM,
|
@@ -239,13 +256,15 @@ typedef enum {
|
|
239
256
|
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
|
240
257
|
PM_WARN_AMBIGUOUS_PREFIX_STAR,
|
241
258
|
PM_WARN_AMBIGUOUS_SLASH,
|
259
|
+
PM_WARN_END_IN_METHOD,
|
242
260
|
|
243
261
|
/* This must be the last member. */
|
244
262
|
PM_DIAGNOSTIC_ID_LEN,
|
245
263
|
} pm_diagnostic_id_t;
|
246
264
|
|
247
265
|
/**
|
248
|
-
* Append a diagnostic to the given list of diagnostics
|
266
|
+
* Append a diagnostic to the given list of diagnostics that is using shared
|
267
|
+
* memory for its message.
|
249
268
|
*
|
250
269
|
* @param list The list to append to.
|
251
270
|
* @param start The start of the diagnostic.
|
@@ -255,6 +274,19 @@ typedef enum {
|
|
255
274
|
*/
|
256
275
|
bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
|
257
276
|
|
277
|
+
/**
|
278
|
+
* Append a diagnostic to the given list of diagnostics that is using a format
|
279
|
+
* string for its message.
|
280
|
+
*
|
281
|
+
* @param list The list to append to.
|
282
|
+
* @param start The start of the diagnostic.
|
283
|
+
* @param end The end of the diagnostic.
|
284
|
+
* @param diag_id The diagnostic ID.
|
285
|
+
* @param ... The arguments to the format string for the message.
|
286
|
+
* @return Whether the diagnostic was successfully appended.
|
287
|
+
*/
|
288
|
+
bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...);
|
289
|
+
|
258
290
|
/**
|
259
291
|
* Deallocate the internal state of the given diagnostic list.
|
260
292
|
*
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/**
|
2
|
-
* @file
|
2
|
+
* @file encoding.h
|
3
3
|
*
|
4
4
|
* The encoding interface and implementations used by the parser.
|
5
5
|
*/
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#define PRISM_ENCODING_H
|
8
8
|
|
9
9
|
#include "prism/defines.h"
|
10
|
+
#include "prism/util/pm_strncasecmp.h"
|
10
11
|
|
11
12
|
#include <assert.h>
|
12
13
|
#include <stdbool.h>
|
@@ -78,39 +79,6 @@ typedef struct {
|
|
78
79
|
*/
|
79
80
|
#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
|
80
81
|
|
81
|
-
/**
|
82
|
-
* Return the size of the next character in the ASCII encoding if it is an
|
83
|
-
* alphabetical character.
|
84
|
-
*
|
85
|
-
* @param b The bytes to read.
|
86
|
-
* @param n The number of bytes that can be read.
|
87
|
-
* @returns The number of bytes that the next character takes if it is valid in
|
88
|
-
* the encoding, or 0 if it is not.
|
89
|
-
*/
|
90
|
-
size_t pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
|
91
|
-
|
92
|
-
/**
|
93
|
-
* Return the size of the next character in the ASCII encoding if it is an
|
94
|
-
* alphanumeric character.
|
95
|
-
*
|
96
|
-
* @param b The bytes to read.
|
97
|
-
* @param n The number of bytes that can be read.
|
98
|
-
* @returns The number of bytes that the next character takes if it is valid in
|
99
|
-
* the encoding, or 0 if it is not.
|
100
|
-
*/
|
101
|
-
size_t pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
|
102
|
-
|
103
|
-
/**
|
104
|
-
* Return true if the next character in the ASCII encoding if it is an uppercase
|
105
|
-
* character.
|
106
|
-
*
|
107
|
-
* @param b The bytes to read.
|
108
|
-
* @param n The number of bytes that can be read.
|
109
|
-
* @returns True if the next character is valid in the encoding and is an
|
110
|
-
* uppercase character, or false if it is not.
|
111
|
-
*/
|
112
|
-
bool pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
|
113
|
-
|
114
82
|
/**
|
115
83
|
* Return the size of the next character in the UTF-8 encoding if it is an
|
116
84
|
* alphabetical character.
|
@@ -152,35 +120,129 @@ bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
|
|
152
120
|
*/
|
153
121
|
extern const uint8_t pm_encoding_unicode_table[256];
|
154
122
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
123
|
+
/**
|
124
|
+
* These are all of the encodings that prism supports.
|
125
|
+
*/
|
126
|
+
typedef enum {
|
127
|
+
PM_ENCODING_UTF_8 = 0,
|
128
|
+
PM_ENCODING_ASCII_8BIT,
|
129
|
+
PM_ENCODING_BIG5,
|
130
|
+
PM_ENCODING_BIG5_HKSCS,
|
131
|
+
PM_ENCODING_BIG5_UAO,
|
132
|
+
PM_ENCODING_CESU_8,
|
133
|
+
PM_ENCODING_CP51932,
|
134
|
+
PM_ENCODING_CP850,
|
135
|
+
PM_ENCODING_CP852,
|
136
|
+
PM_ENCODING_CP855,
|
137
|
+
PM_ENCODING_CP949,
|
138
|
+
PM_ENCODING_CP950,
|
139
|
+
PM_ENCODING_CP951,
|
140
|
+
PM_ENCODING_EMACS_MULE,
|
141
|
+
PM_ENCODING_EUC_JP,
|
142
|
+
PM_ENCODING_EUC_JP_MS,
|
143
|
+
PM_ENCODING_EUC_JIS_2004,
|
144
|
+
PM_ENCODING_EUC_KR,
|
145
|
+
PM_ENCODING_EUC_TW,
|
146
|
+
PM_ENCODING_GB12345,
|
147
|
+
PM_ENCODING_GB18030,
|
148
|
+
PM_ENCODING_GB1988,
|
149
|
+
PM_ENCODING_GB2312,
|
150
|
+
PM_ENCODING_GBK,
|
151
|
+
PM_ENCODING_IBM437,
|
152
|
+
PM_ENCODING_IBM720,
|
153
|
+
PM_ENCODING_IBM737,
|
154
|
+
PM_ENCODING_IBM775,
|
155
|
+
PM_ENCODING_IBM852,
|
156
|
+
PM_ENCODING_IBM855,
|
157
|
+
PM_ENCODING_IBM857,
|
158
|
+
PM_ENCODING_IBM860,
|
159
|
+
PM_ENCODING_IBM861,
|
160
|
+
PM_ENCODING_IBM862,
|
161
|
+
PM_ENCODING_IBM863,
|
162
|
+
PM_ENCODING_IBM864,
|
163
|
+
PM_ENCODING_IBM865,
|
164
|
+
PM_ENCODING_IBM866,
|
165
|
+
PM_ENCODING_IBM869,
|
166
|
+
PM_ENCODING_ISO_8859_1,
|
167
|
+
PM_ENCODING_ISO_8859_2,
|
168
|
+
PM_ENCODING_ISO_8859_3,
|
169
|
+
PM_ENCODING_ISO_8859_4,
|
170
|
+
PM_ENCODING_ISO_8859_5,
|
171
|
+
PM_ENCODING_ISO_8859_6,
|
172
|
+
PM_ENCODING_ISO_8859_7,
|
173
|
+
PM_ENCODING_ISO_8859_8,
|
174
|
+
PM_ENCODING_ISO_8859_9,
|
175
|
+
PM_ENCODING_ISO_8859_10,
|
176
|
+
PM_ENCODING_ISO_8859_11,
|
177
|
+
PM_ENCODING_ISO_8859_13,
|
178
|
+
PM_ENCODING_ISO_8859_14,
|
179
|
+
PM_ENCODING_ISO_8859_15,
|
180
|
+
PM_ENCODING_ISO_8859_16,
|
181
|
+
PM_ENCODING_KOI8_R,
|
182
|
+
PM_ENCODING_KOI8_U,
|
183
|
+
PM_ENCODING_MAC_CENT_EURO,
|
184
|
+
PM_ENCODING_MAC_CROATIAN,
|
185
|
+
PM_ENCODING_MAC_CYRILLIC,
|
186
|
+
PM_ENCODING_MAC_GREEK,
|
187
|
+
PM_ENCODING_MAC_ICELAND,
|
188
|
+
PM_ENCODING_MAC_JAPANESE,
|
189
|
+
PM_ENCODING_MAC_ROMAN,
|
190
|
+
PM_ENCODING_MAC_ROMANIA,
|
191
|
+
PM_ENCODING_MAC_THAI,
|
192
|
+
PM_ENCODING_MAC_TURKISH,
|
193
|
+
PM_ENCODING_MAC_UKRAINE,
|
194
|
+
PM_ENCODING_SHIFT_JIS,
|
195
|
+
PM_ENCODING_SJIS_DOCOMO,
|
196
|
+
PM_ENCODING_SJIS_KDDI,
|
197
|
+
PM_ENCODING_SJIS_SOFTBANK,
|
198
|
+
PM_ENCODING_STATELESS_ISO_2022_JP,
|
199
|
+
PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
|
200
|
+
PM_ENCODING_TIS_620,
|
201
|
+
PM_ENCODING_US_ASCII,
|
202
|
+
PM_ENCODING_UTF8_MAC,
|
203
|
+
PM_ENCODING_UTF8_DOCOMO,
|
204
|
+
PM_ENCODING_UTF8_KDDI,
|
205
|
+
PM_ENCODING_UTF8_SOFTBANK,
|
206
|
+
PM_ENCODING_WINDOWS_1250,
|
207
|
+
PM_ENCODING_WINDOWS_1251,
|
208
|
+
PM_ENCODING_WINDOWS_1252,
|
209
|
+
PM_ENCODING_WINDOWS_1253,
|
210
|
+
PM_ENCODING_WINDOWS_1254,
|
211
|
+
PM_ENCODING_WINDOWS_1255,
|
212
|
+
PM_ENCODING_WINDOWS_1256,
|
213
|
+
PM_ENCODING_WINDOWS_1257,
|
214
|
+
PM_ENCODING_WINDOWS_1258,
|
215
|
+
PM_ENCODING_WINDOWS_31J,
|
216
|
+
PM_ENCODING_WINDOWS_874,
|
217
|
+
PM_ENCODING_MAXIMUM
|
218
|
+
} pm_encoding_type_t;
|
219
|
+
|
220
|
+
/**
|
221
|
+
* This is the table of all of the encodings that prism supports.
|
222
|
+
*/
|
223
|
+
extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
|
224
|
+
|
225
|
+
/**
|
226
|
+
* This is the default UTF-8 encoding. We need a reference to it to quickly
|
227
|
+
* create parsers.
|
228
|
+
*/
|
229
|
+
#define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])
|
230
|
+
|
231
|
+
/**
|
232
|
+
* This is the US-ASCII encoding. We need a reference to it to be able to
|
233
|
+
* compare against it when a string is being created because it could possibly
|
234
|
+
* need to fall back to ASCII-8BIT.
|
235
|
+
*/
|
236
|
+
#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
|
237
|
+
|
238
|
+
/**
|
239
|
+
* Parse the given name of an encoding and return a pointer to the corresponding
|
240
|
+
* encoding struct if one can be found, otherwise return NULL.
|
241
|
+
*
|
242
|
+
* @param start A pointer to the first byte of the name.
|
243
|
+
* @param end A pointer to the last byte of the name.
|
244
|
+
* @returns A pointer to the encoding struct if one is found, otherwise NULL.
|
245
|
+
*/
|
246
|
+
const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);
|
185
247
|
|
186
248
|
#endif
|
data/include/prism/options.h
CHANGED
@@ -35,7 +35,7 @@ typedef struct {
|
|
35
35
|
* The line within the file that the parse starts on. This value is
|
36
36
|
* 0-indexed.
|
37
37
|
*/
|
38
|
-
|
38
|
+
int32_t line;
|
39
39
|
|
40
40
|
/**
|
41
41
|
* The name of the encoding that the source file is in. Note that this must
|
@@ -80,7 +80,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, cons
|
|
80
80
|
* @param options The options struct to set the line on.
|
81
81
|
* @param line The line to set.
|
82
82
|
*/
|
83
|
-
PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options,
|
83
|
+
PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line);
|
84
84
|
|
85
85
|
/**
|
86
86
|
* Set the encoding option on the given options struct.
|
data/include/prism/parser.h
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
|
9
9
|
#include "prism/ast.h"
|
10
10
|
#include "prism/defines.h"
|
11
|
-
#include "prism/
|
11
|
+
#include "prism/encoding.h"
|
12
12
|
#include "prism/util/pm_constant_pool.h"
|
13
13
|
#include "prism/util/pm_list.h"
|
14
14
|
#include "prism/util/pm_newline_list.h"
|
@@ -17,6 +17,12 @@
|
|
17
17
|
|
18
18
|
#include <stdbool.h>
|
19
19
|
|
20
|
+
// TODO: remove this by renaming the original flag
|
21
|
+
/**
|
22
|
+
* Temporary alias for the PM_NODE_FLAG_STATIC_KEYS flag.
|
23
|
+
*/
|
24
|
+
#define PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS PM_KEYWORD_HASH_NODE_FLAGS_STATIC_KEYS
|
25
|
+
|
20
26
|
/**
|
21
27
|
* This enum provides various bits that represent different kinds of states that
|
22
28
|
* the lexer can track. This is used to determine which kind of token to return
|
@@ -297,6 +303,9 @@ typedef enum {
|
|
297
303
|
/** an ensure statement */
|
298
304
|
PM_CONTEXT_ENSURE,
|
299
305
|
|
306
|
+
/** an ensure statement within a method definition */
|
307
|
+
PM_CONTEXT_ENSURE_DEF,
|
308
|
+
|
300
309
|
/** a for loop */
|
301
310
|
PM_CONTEXT_FOR,
|
302
311
|
|
@@ -333,9 +342,15 @@ typedef enum {
|
|
333
342
|
/** a rescue else statement */
|
334
343
|
PM_CONTEXT_RESCUE_ELSE,
|
335
344
|
|
345
|
+
/** a rescue else statement within a method definition */
|
346
|
+
PM_CONTEXT_RESCUE_ELSE_DEF,
|
347
|
+
|
336
348
|
/** a rescue statement */
|
337
349
|
PM_CONTEXT_RESCUE,
|
338
350
|
|
351
|
+
/** a rescue statement within a method definition */
|
352
|
+
PM_CONTEXT_RESCUE_DEF,
|
353
|
+
|
339
354
|
/** a singleton class definition */
|
340
355
|
PM_CONTEXT_SCLASS,
|
341
356
|
|
@@ -361,8 +376,7 @@ typedef struct pm_context_node {
|
|
361
376
|
/** This is the type of a comment that we've found while parsing. */
|
362
377
|
typedef enum {
|
363
378
|
PM_COMMENT_INLINE,
|
364
|
-
PM_COMMENT_EMBDOC
|
365
|
-
PM_COMMENT___END__
|
379
|
+
PM_COMMENT_EMBDOC
|
366
380
|
} pm_comment_type_t;
|
367
381
|
|
368
382
|
/**
|
@@ -374,11 +388,8 @@ typedef struct pm_comment {
|
|
374
388
|
/** The embedded base node. */
|
375
389
|
pm_list_node_t node;
|
376
390
|
|
377
|
-
/**
|
378
|
-
|
379
|
-
|
380
|
-
/** A pointer to the end of the comment in the source. */
|
381
|
-
const uint8_t *end;
|
391
|
+
/** The location of the comment in the source. */
|
392
|
+
pm_location_t location;
|
382
393
|
|
383
394
|
/** The type of comment that we've found. */
|
384
395
|
pm_comment_type_t type;
|
@@ -413,14 +424,6 @@ typedef struct {
|
|
413
424
|
*/
|
414
425
|
typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
|
415
426
|
|
416
|
-
/**
|
417
|
-
* When an encoding is encountered that isn't understood by prism, we provide
|
418
|
-
* the ability here to call out to a user-defined function to get an encoding
|
419
|
-
* struct. If the function returns something that isn't NULL, we set that to
|
420
|
-
* our encoding and use it to parse identifiers.
|
421
|
-
*/
|
422
|
-
typedef pm_encoding_t *(*pm_encoding_decode_callback_t)(pm_parser_t *parser, const uint8_t *name, size_t width);
|
423
|
-
|
424
427
|
/**
|
425
428
|
* When you are lexing through a file, the lexer needs all of the information
|
426
429
|
* that the parser additionally provides (for example, the local table). So if
|
@@ -469,18 +472,12 @@ typedef struct pm_scope {
|
|
469
472
|
bool explicit_params;
|
470
473
|
|
471
474
|
/**
|
472
|
-
*
|
475
|
+
* An integer indicating the number of numbered parameters on this scope.
|
473
476
|
* This is necessary to determine if child blocks are allowed to use
|
474
|
-
* numbered parameters
|
477
|
+
* numbered parameters, and to pass information to consumers of the AST
|
478
|
+
* about how many numbered parameters exist.
|
475
479
|
*/
|
476
|
-
|
477
|
-
|
478
|
-
/**
|
479
|
-
* A transparent scope is a scope that cannot have locals set on itself.
|
480
|
-
* When a local is set on this scope, it will instead be set on the parent
|
481
|
-
* scope's local table.
|
482
|
-
*/
|
483
|
-
bool transparent;
|
480
|
+
uint8_t numbered_parameters;
|
484
481
|
} pm_scope_t;
|
485
482
|
|
486
483
|
/**
|
@@ -565,6 +562,9 @@ struct pm_parser {
|
|
565
562
|
/** The list of magic comments that have been found while parsing. */
|
566
563
|
pm_list_t magic_comment_list;
|
567
564
|
|
565
|
+
/** The optional location of the __END__ keyword and its contents. */
|
566
|
+
pm_location_t data_loc;
|
567
|
+
|
568
568
|
/** The list of warnings that have been found while parsing. */
|
569
569
|
pm_list_t warning_list;
|
570
570
|
|
@@ -581,7 +581,7 @@ struct pm_parser {
|
|
581
581
|
* The encoding functions for the current file is attached to the parser as
|
582
582
|
* it's parsing so that it can change with a magic comment.
|
583
583
|
*/
|
584
|
-
pm_encoding_t encoding;
|
584
|
+
const pm_encoding_t *encoding;
|
585
585
|
|
586
586
|
/**
|
587
587
|
* When the encoding that is being used to parse the source is changed by
|
@@ -590,14 +590,6 @@ struct pm_parser {
|
|
590
590
|
*/
|
591
591
|
pm_encoding_changed_callback_t encoding_changed_callback;
|
592
592
|
|
593
|
-
/**
|
594
|
-
* When an encoding is encountered that isn't understood by prism, we
|
595
|
-
* provide the ability here to call out to a user-defined function to get an
|
596
|
-
* encoding struct. If the function returns something that isn't NULL, we
|
597
|
-
* set that to our encoding and use it to parse identifiers.
|
598
|
-
*/
|
599
|
-
pm_encoding_decode_callback_t encoding_decode_callback;
|
600
|
-
|
601
593
|
/**
|
602
594
|
* This pointer indicates where a comment must start if it is to be
|
603
595
|
* considered an encoding comment.
|
@@ -643,7 +635,38 @@ struct pm_parser {
|
|
643
635
|
* The line number at the start of the parse. This will be used to offset
|
644
636
|
* the line numbers of all of the locations.
|
645
637
|
*/
|
646
|
-
|
638
|
+
int32_t start_line;
|
639
|
+
|
640
|
+
/**
|
641
|
+
* When a string-like expression is being lexed, any byte or escape sequence
|
642
|
+
* that resolves to a value whose top bit is set (i.e., >= 0x80) will
|
643
|
+
* explicitly set the encoding to the same encoding as the source.
|
644
|
+
* Alternatively, if a unicode escape sequence is used (e.g., \\u{80}) that
|
645
|
+
* resolves to a value whose top bit is set, then the encoding will be
|
646
|
+
* explicitly set to UTF-8.
|
647
|
+
*
|
648
|
+
* The _next_ time this happens, if the encoding that is about to become the
|
649
|
+
* explicitly set encoding does not match the previously set explicit
|
650
|
+
* encoding, a mixed encoding error will be emitted.
|
651
|
+
*
|
652
|
+
* When the expression is finished being lexed, the explicit encoding
|
653
|
+
* controls the encoding of the expression. For the most part this means
|
654
|
+
* that the expression will either be encoded in the source encoding or
|
655
|
+
* UTF-8. This holds for all encodings except US-ASCII. If the source is
|
656
|
+
* US-ASCII and an explicit encoding was set that was _not_ UTF-8, then the
|
657
|
+
* expression will be encoded as ASCII-8BIT.
|
658
|
+
*
|
659
|
+
* Note that if the expression is a list, different elements within the same
|
660
|
+
* list can have different encodings, so this will get reset between each
|
661
|
+
* element. Furthermore all of this only applies to lists that support
|
662
|
+
* interpolation, because otherwise escapes that could change the encoding
|
663
|
+
* are ignored.
|
664
|
+
*
|
665
|
+
* At first glance, it may make more sense for this to live on the lexer
|
666
|
+
* mode, but we need it here to communicate back to the parser for character
|
667
|
+
* literals that do not push a new lexer mode.
|
668
|
+
*/
|
669
|
+
const pm_encoding_t *explicit_encoding;
|
647
670
|
|
648
671
|
/** Whether or not we're at the beginning of a command. */
|
649
672
|
bool command_start;
|
@@ -667,6 +690,9 @@ struct pm_parser {
|
|
667
690
|
/** This flag indicates that we are currently parsing a keyword argument. */
|
668
691
|
bool in_keyword_arg;
|
669
692
|
|
693
|
+
/** The current parameter name id on parsing its default value. */
|
694
|
+
pm_constant_id_t current_param_name;
|
695
|
+
|
670
696
|
/**
|
671
697
|
* Whether or not the parser has seen a token that has semantic meaning
|
672
698
|
* (i.e., a token that is not a comment or whitespace).
|
data/include/prism/regexp.h
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
|
9
9
|
#include "prism/defines.h"
|
10
10
|
#include "prism/parser.h"
|
11
|
-
#include "prism/
|
11
|
+
#include "prism/encoding.h"
|
12
12
|
#include "prism/util/pm_memchr.h"
|
13
13
|
#include "prism/util/pm_string_list.h"
|
14
14
|
#include "prism/util/pm_string.h"
|
@@ -28,6 +28,6 @@
|
|
28
28
|
* @param encoding The encoding of the source code.
|
29
29
|
* @return Whether or not the parsing was successful.
|
30
30
|
*/
|
31
|
-
PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding);
|
31
|
+
PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, const pm_encoding_t *encoding);
|
32
32
|
|
33
33
|
#endif
|
@@ -118,7 +118,15 @@ void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value);
|
|
118
118
|
* @param buffer The buffer to append to.
|
119
119
|
* @param value The integer to append.
|
120
120
|
*/
|
121
|
-
void
|
121
|
+
void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
|
122
|
+
|
123
|
+
/**
|
124
|
+
* Append a 32-bit signed integer to the buffer as a variable-length integer.
|
125
|
+
*
|
126
|
+
* @param buffer The buffer to append to.
|
127
|
+
* @param value The integer to append.
|
128
|
+
*/
|
129
|
+
void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
|
122
130
|
|
123
131
|
/**
|
124
132
|
* Concatenate one buffer onto another.
|
@@ -7,7 +7,7 @@
|
|
7
7
|
#define PRISM_MEMCHR_H
|
8
8
|
|
9
9
|
#include "prism/defines.h"
|
10
|
-
#include "prism/
|
10
|
+
#include "prism/encoding.h"
|
11
11
|
|
12
12
|
#include <stddef.h>
|
13
13
|
|
@@ -24,6 +24,6 @@
|
|
24
24
|
* @return A pointer to the first occurrence of the character in the source
|
25
25
|
* string, or NULL if no such character exists.
|
26
26
|
*/
|
27
|
-
void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, pm_encoding_t *encoding);
|
27
|
+
void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding);
|
28
28
|
|
29
29
|
#endif
|
@@ -32,12 +32,12 @@
|
|
32
32
|
* need to take a slower path and iterate one multi-byte character at a time.
|
33
33
|
*
|
34
34
|
* @param parser The parser.
|
35
|
-
* @param source The source
|
35
|
+
* @param source The source to search.
|
36
36
|
* @param charset The charset to search for.
|
37
|
-
* @param length The maximum
|
37
|
+
* @param length The maximum number of bytes to search.
|
38
38
|
* @return A pointer to the first character in the source string that is in the
|
39
39
|
* charset, or NULL if no such character exists.
|
40
40
|
*/
|
41
|
-
const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
|
41
|
+
const uint8_t * pm_strpbrk(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
|
42
42
|
|
43
43
|
#endif
|
data/include/prism/version.h
CHANGED
@@ -14,16 +14,16 @@
|
|
14
14
|
/**
|
15
15
|
* The minor version of the Prism library as an int.
|
16
16
|
*/
|
17
|
-
#define PRISM_VERSION_MINOR
|
17
|
+
#define PRISM_VERSION_MINOR 19
|
18
18
|
|
19
19
|
/**
|
20
20
|
* The patch version of the Prism library as an int.
|
21
21
|
*/
|
22
|
-
#define PRISM_VERSION_PATCH
|
22
|
+
#define PRISM_VERSION_PATCH 0
|
23
23
|
|
24
24
|
/**
|
25
25
|
* The version of the Prism library as a constant string.
|
26
26
|
*/
|
27
|
-
#define PRISM_VERSION "0.
|
27
|
+
#define PRISM_VERSION "0.19.0"
|
28
28
|
|
29
29
|
#endif
|