prism 0.17.1 → 0.19.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +60 -1
  3. data/Makefile +5 -5
  4. data/README.md +4 -3
  5. data/config.yml +214 -68
  6. data/docs/build_system.md +6 -6
  7. data/docs/building.md +10 -3
  8. data/docs/configuration.md +11 -9
  9. data/docs/encoding.md +92 -88
  10. data/docs/heredocs.md +1 -1
  11. data/docs/javascript.md +29 -1
  12. data/docs/local_variable_depth.md +229 -0
  13. data/docs/ruby_api.md +16 -0
  14. data/docs/serialization.md +18 -13
  15. data/ext/prism/api_node.c +411 -240
  16. data/ext/prism/extconf.rb +97 -127
  17. data/ext/prism/extension.c +97 -33
  18. data/ext/prism/extension.h +1 -1
  19. data/include/prism/ast.h +377 -159
  20. data/include/prism/defines.h +17 -0
  21. data/include/prism/diagnostic.h +38 -6
  22. data/include/prism/{enc/pm_encoding.h → encoding.h} +126 -64
  23. data/include/prism/options.h +2 -2
  24. data/include/prism/parser.h +62 -36
  25. data/include/prism/regexp.h +2 -2
  26. data/include/prism/util/pm_buffer.h +9 -1
  27. data/include/prism/util/pm_memchr.h +2 -2
  28. data/include/prism/util/pm_strpbrk.h +3 -3
  29. data/include/prism/version.h +3 -3
  30. data/include/prism.h +13 -15
  31. data/lib/prism/compiler.rb +15 -3
  32. data/lib/prism/debug.rb +13 -4
  33. data/lib/prism/desugar_compiler.rb +4 -3
  34. data/lib/prism/dispatcher.rb +70 -14
  35. data/lib/prism/dot_visitor.rb +4612 -0
  36. data/lib/prism/dsl.rb +77 -57
  37. data/lib/prism/ffi.rb +19 -6
  38. data/lib/prism/lex_compat.rb +19 -9
  39. data/lib/prism/mutation_compiler.rb +26 -6
  40. data/lib/prism/node.rb +1314 -522
  41. data/lib/prism/node_ext.rb +102 -19
  42. data/lib/prism/parse_result.rb +58 -27
  43. data/lib/prism/ripper_compat.rb +49 -34
  44. data/lib/prism/serialize.rb +251 -227
  45. data/lib/prism/visitor.rb +15 -3
  46. data/lib/prism.rb +21 -4
  47. data/prism.gemspec +7 -9
  48. data/rbi/prism.rbi +688 -284
  49. data/rbi/prism_static.rbi +3 -0
  50. data/sig/prism.rbs +426 -156
  51. data/sig/prism_static.rbs +1 -0
  52. data/src/diagnostic.c +280 -216
  53. data/src/encoding.c +5137 -0
  54. data/src/node.c +99 -21
  55. data/src/options.c +21 -2
  56. data/src/prettyprint.c +1743 -1241
  57. data/src/prism.c +1774 -831
  58. data/src/regexp.c +15 -15
  59. data/src/serialize.c +261 -164
  60. data/src/util/pm_buffer.c +10 -1
  61. data/src/util/pm_memchr.c +1 -1
  62. data/src/util/pm_strpbrk.c +4 -4
  63. metadata +8 -10
  64. data/src/enc/pm_big5.c +0 -53
  65. data/src/enc/pm_euc_jp.c +0 -59
  66. data/src/enc/pm_gbk.c +0 -62
  67. data/src/enc/pm_shift_jis.c +0 -57
  68. data/src/enc/pm_tables.c +0 -743
  69. data/src/enc/pm_unicode.c +0 -2369
  70. data/src/enc/pm_windows_31j.c +0 -57
@@ -74,4 +74,21 @@
74
74
  # define snprintf _snprintf
75
75
  #endif
76
76
 
77
+ /**
78
+ * A simple utility macro to concatenate two tokens together, necessary when one
79
+ * of the tokens is itself a macro.
80
+ */
81
+ #define PM_CONCATENATE(left, right) left ## right
82
+
83
+ /**
84
+ * We want to be able to use static assertions, but they weren't standardized
85
+ * until C11. As such, we polyfill it here by making a hacky typedef that will
86
+ * fail to compile due to a negative array size if the condition is false.
87
+ */
88
+ #if defined(_Static_assert)
89
+ # define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
90
+ #else
91
+ # define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
92
+ #endif
93
+
77
94
  #endif
@@ -6,6 +6,7 @@
6
6
  #ifndef PRISM_DIAGNOSTIC_H
7
7
  #define PRISM_DIAGNOSTIC_H
8
8
 
9
+ #include "prism/ast.h"
9
10
  #include "prism/defines.h"
10
11
  #include "prism/util/pm_list.h"
11
12
 
@@ -22,14 +23,18 @@ typedef struct {
22
23
  /** The embedded base node. */
23
24
  pm_list_node_t node;
24
25
 
25
- /** A pointer to the start of the source that generated the diagnostic. */
26
- const uint8_t *start;
27
-
28
- /** A pointer to the end of the source that generated the diagnostic. */
29
- const uint8_t *end;
26
+ /** The location of the diagnostic in the source. */
27
+ pm_location_t location;
30
28
 
31
29
  /** The message associated with the diagnostic. */
32
30
  const char *message;
31
+
32
+ /**
33
+ * Whether or not the memory related to the message of this diagnostic is
34
+ * owned by this diagnostic. If it is, it needs to be freed when the
35
+ * diagnostic is freed.
36
+ */
37
+ bool owned;
33
38
  } pm_diagnostic_t;
34
39
 
35
40
  /**
@@ -40,12 +45,14 @@ typedef enum {
40
45
  PM_ERR_ALIAS_ARGUMENT,
41
46
  PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
42
47
  PM_ERR_ARGUMENT_AFTER_BLOCK,
48
+ PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES,
43
49
  PM_ERR_ARGUMENT_BARE_HASH,
44
50
  PM_ERR_ARGUMENT_BLOCK_MULTI,
45
51
  PM_ERR_ARGUMENT_FORMAL_CLASS,
46
52
  PM_ERR_ARGUMENT_FORMAL_CONSTANT,
47
53
  PM_ERR_ARGUMENT_FORMAL_GLOBAL,
48
54
  PM_ERR_ARGUMENT_FORMAL_IVAR,
55
+ PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
49
56
  PM_ERR_ARGUMENT_NO_FORWARDING_AMP,
50
57
  PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
51
58
  PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
@@ -71,6 +78,7 @@ typedef enum {
71
78
  PM_ERR_CANNOT_PARSE_STRING_PART,
72
79
  PM_ERR_CASE_EXPRESSION_AFTER_CASE,
73
80
  PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
81
+ PM_ERR_CASE_MATCH_MISSING_PREDICATE,
74
82
  PM_ERR_CASE_MISSING_CONDITIONS,
75
83
  PM_ERR_CASE_TERM,
76
84
  PM_ERR_CLASS_IN_METHOD,
@@ -169,6 +177,7 @@ typedef enum {
169
177
  PM_ERR_LIST_W_UPPER_ELEMENT,
170
178
  PM_ERR_LIST_W_UPPER_TERM,
171
179
  PM_ERR_MALLOC_FAILED,
180
+ PM_ERR_MIXED_ENCODING,
172
181
  PM_ERR_MODULE_IN_METHOD,
173
182
  PM_ERR_MODULE_NAME,
174
183
  PM_ERR_MODULE_TERM,
@@ -182,6 +191,7 @@ typedef enum {
182
191
  PM_ERR_OPERATOR_WRITE_BLOCK,
183
192
  PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
184
193
  PM_ERR_PARAMETER_BLOCK_MULTI,
194
+ PM_ERR_PARAMETER_CIRCULAR,
185
195
  PM_ERR_PARAMETER_METHOD_NAME,
186
196
  PM_ERR_PARAMETER_NAME_REPEAT,
187
197
  PM_ERR_PARAMETER_NO_DEFAULT,
@@ -201,6 +211,7 @@ typedef enum {
201
211
  PM_ERR_PATTERN_EXPRESSION_AFTER_PIN,
202
212
  PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
203
213
  PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
214
+ PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
204
215
  PM_ERR_PATTERN_HASH_KEY,
205
216
  PM_ERR_PATTERN_HASH_KEY_LABEL,
206
217
  PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
@@ -216,6 +227,10 @@ typedef enum {
216
227
  PM_ERR_RESCUE_TERM,
217
228
  PM_ERR_RESCUE_VARIABLE,
218
229
  PM_ERR_RETURN_INVALID,
230
+ PM_ERR_STATEMENT_ALIAS,
231
+ PM_ERR_STATEMENT_POSTEXE_END,
232
+ PM_ERR_STATEMENT_PREEXE_BEGIN,
233
+ PM_ERR_STATEMENT_UNDEF,
219
234
  PM_ERR_STRING_CONCATENATION,
220
235
  PM_ERR_STRING_INTERPOLATED_TERM,
221
236
  PM_ERR_STRING_LITERAL_TERM,
@@ -231,7 +246,9 @@ typedef enum {
231
246
  PM_ERR_UNARY_RECEIVER_TILDE,
232
247
  PM_ERR_UNDEF_ARGUMENT,
233
248
  PM_ERR_UNTIL_TERM,
249
+ PM_ERR_VOID_EXPRESSION,
234
250
  PM_ERR_WHILE_TERM,
251
+ PM_ERR_WRITE_TARGET_IN_METHOD,
235
252
  PM_ERR_WRITE_TARGET_READONLY,
236
253
  PM_ERR_WRITE_TARGET_UNEXPECTED,
237
254
  PM_ERR_XSTRING_TERM,
@@ -239,13 +256,15 @@ typedef enum {
239
256
  PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
240
257
  PM_WARN_AMBIGUOUS_PREFIX_STAR,
241
258
  PM_WARN_AMBIGUOUS_SLASH,
259
+ PM_WARN_END_IN_METHOD,
242
260
 
243
261
  /* This must be the last member. */
244
262
  PM_DIAGNOSTIC_ID_LEN,
245
263
  } pm_diagnostic_id_t;
246
264
 
247
265
  /**
248
- * Append a diagnostic to the given list of diagnostics.
266
+ * Append a diagnostic to the given list of diagnostics that is using shared
267
+ * memory for its message.
249
268
  *
250
269
  * @param list The list to append to.
251
270
  * @param start The start of the diagnostic.
@@ -255,6 +274,19 @@ typedef enum {
255
274
  */
256
275
  bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
257
276
 
277
+ /**
278
+ * Append a diagnostic to the given list of diagnostics that is using a format
279
+ * string for its message.
280
+ *
281
+ * @param list The list to append to.
282
+ * @param start The start of the diagnostic.
283
+ * @param end The end of the diagnostic.
284
+ * @param diag_id The diagnostic ID.
285
+ * @param ... The arguments to the format string for the message.
286
+ * @return Whether the diagnostic was successfully appended.
287
+ */
288
+ bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...);
289
+
258
290
  /**
259
291
  * Deallocate the internal state of the given diagnostic list.
260
292
  *
@@ -1,5 +1,5 @@
1
1
  /**
2
- * @file pm_encoding.h
2
+ * @file encoding.h
3
3
  *
4
4
  * The encoding interface and implementations used by the parser.
5
5
  */
@@ -7,6 +7,7 @@
7
7
  #define PRISM_ENCODING_H
8
8
 
9
9
  #include "prism/defines.h"
10
+ #include "prism/util/pm_strncasecmp.h"
10
11
 
11
12
  #include <assert.h>
12
13
  #include <stdbool.h>
@@ -78,39 +79,6 @@ typedef struct {
78
79
  */
79
80
  #define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
80
81
 
81
- /**
82
- * Return the size of the next character in the ASCII encoding if it is an
83
- * alphabetical character.
84
- *
85
- * @param b The bytes to read.
86
- * @param n The number of bytes that can be read.
87
- * @returns The number of bytes that the next character takes if it is valid in
88
- * the encoding, or 0 if it is not.
89
- */
90
- size_t pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
91
-
92
- /**
93
- * Return the size of the next character in the ASCII encoding if it is an
94
- * alphanumeric character.
95
- *
96
- * @param b The bytes to read.
97
- * @param n The number of bytes that can be read.
98
- * @returns The number of bytes that the next character takes if it is valid in
99
- * the encoding, or 0 if it is not.
100
- */
101
- size_t pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
102
-
103
- /**
104
- * Return true if the next character in the ASCII encoding if it is an uppercase
105
- * character.
106
- *
107
- * @param b The bytes to read.
108
- * @param n The number of bytes that can be read.
109
- * @returns True if the next character is valid in the encoding and is an
110
- * uppercase character, or false if it is not.
111
- */
112
- bool pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
113
-
114
82
  /**
115
83
  * Return the size of the next character in the UTF-8 encoding if it is an
116
84
  * alphabetical character.
@@ -152,35 +120,129 @@ bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
152
120
  */
153
121
  extern const uint8_t pm_encoding_unicode_table[256];
154
122
 
155
- // Below are the encodings that are supported by the parser. They are defined in
156
- // their own files in the src/enc directory.
157
-
158
- extern pm_encoding_t pm_encoding_ascii;
159
- extern pm_encoding_t pm_encoding_ascii_8bit;
160
- extern pm_encoding_t pm_encoding_big5;
161
- extern pm_encoding_t pm_encoding_euc_jp;
162
- extern pm_encoding_t pm_encoding_gbk;
163
- extern pm_encoding_t pm_encoding_iso_8859_1;
164
- extern pm_encoding_t pm_encoding_iso_8859_2;
165
- extern pm_encoding_t pm_encoding_iso_8859_3;
166
- extern pm_encoding_t pm_encoding_iso_8859_4;
167
- extern pm_encoding_t pm_encoding_iso_8859_5;
168
- extern pm_encoding_t pm_encoding_iso_8859_6;
169
- extern pm_encoding_t pm_encoding_iso_8859_7;
170
- extern pm_encoding_t pm_encoding_iso_8859_8;
171
- extern pm_encoding_t pm_encoding_iso_8859_9;
172
- extern pm_encoding_t pm_encoding_iso_8859_10;
173
- extern pm_encoding_t pm_encoding_iso_8859_11;
174
- extern pm_encoding_t pm_encoding_iso_8859_13;
175
- extern pm_encoding_t pm_encoding_iso_8859_14;
176
- extern pm_encoding_t pm_encoding_iso_8859_15;
177
- extern pm_encoding_t pm_encoding_iso_8859_16;
178
- extern pm_encoding_t pm_encoding_koi8_r;
179
- extern pm_encoding_t pm_encoding_shift_jis;
180
- extern pm_encoding_t pm_encoding_utf_8;
181
- extern pm_encoding_t pm_encoding_utf8_mac;
182
- extern pm_encoding_t pm_encoding_windows_31j;
183
- extern pm_encoding_t pm_encoding_windows_1251;
184
- extern pm_encoding_t pm_encoding_windows_1252;
123
+ /**
124
+ * These are all of the encodings that prism supports.
125
+ */
126
+ typedef enum {
127
+ PM_ENCODING_UTF_8 = 0,
128
+ PM_ENCODING_ASCII_8BIT,
129
+ PM_ENCODING_BIG5,
130
+ PM_ENCODING_BIG5_HKSCS,
131
+ PM_ENCODING_BIG5_UAO,
132
+ PM_ENCODING_CESU_8,
133
+ PM_ENCODING_CP51932,
134
+ PM_ENCODING_CP850,
135
+ PM_ENCODING_CP852,
136
+ PM_ENCODING_CP855,
137
+ PM_ENCODING_CP949,
138
+ PM_ENCODING_CP950,
139
+ PM_ENCODING_CP951,
140
+ PM_ENCODING_EMACS_MULE,
141
+ PM_ENCODING_EUC_JP,
142
+ PM_ENCODING_EUC_JP_MS,
143
+ PM_ENCODING_EUC_JIS_2004,
144
+ PM_ENCODING_EUC_KR,
145
+ PM_ENCODING_EUC_TW,
146
+ PM_ENCODING_GB12345,
147
+ PM_ENCODING_GB18030,
148
+ PM_ENCODING_GB1988,
149
+ PM_ENCODING_GB2312,
150
+ PM_ENCODING_GBK,
151
+ PM_ENCODING_IBM437,
152
+ PM_ENCODING_IBM720,
153
+ PM_ENCODING_IBM737,
154
+ PM_ENCODING_IBM775,
155
+ PM_ENCODING_IBM852,
156
+ PM_ENCODING_IBM855,
157
+ PM_ENCODING_IBM857,
158
+ PM_ENCODING_IBM860,
159
+ PM_ENCODING_IBM861,
160
+ PM_ENCODING_IBM862,
161
+ PM_ENCODING_IBM863,
162
+ PM_ENCODING_IBM864,
163
+ PM_ENCODING_IBM865,
164
+ PM_ENCODING_IBM866,
165
+ PM_ENCODING_IBM869,
166
+ PM_ENCODING_ISO_8859_1,
167
+ PM_ENCODING_ISO_8859_2,
168
+ PM_ENCODING_ISO_8859_3,
169
+ PM_ENCODING_ISO_8859_4,
170
+ PM_ENCODING_ISO_8859_5,
171
+ PM_ENCODING_ISO_8859_6,
172
+ PM_ENCODING_ISO_8859_7,
173
+ PM_ENCODING_ISO_8859_8,
174
+ PM_ENCODING_ISO_8859_9,
175
+ PM_ENCODING_ISO_8859_10,
176
+ PM_ENCODING_ISO_8859_11,
177
+ PM_ENCODING_ISO_8859_13,
178
+ PM_ENCODING_ISO_8859_14,
179
+ PM_ENCODING_ISO_8859_15,
180
+ PM_ENCODING_ISO_8859_16,
181
+ PM_ENCODING_KOI8_R,
182
+ PM_ENCODING_KOI8_U,
183
+ PM_ENCODING_MAC_CENT_EURO,
184
+ PM_ENCODING_MAC_CROATIAN,
185
+ PM_ENCODING_MAC_CYRILLIC,
186
+ PM_ENCODING_MAC_GREEK,
187
+ PM_ENCODING_MAC_ICELAND,
188
+ PM_ENCODING_MAC_JAPANESE,
189
+ PM_ENCODING_MAC_ROMAN,
190
+ PM_ENCODING_MAC_ROMANIA,
191
+ PM_ENCODING_MAC_THAI,
192
+ PM_ENCODING_MAC_TURKISH,
193
+ PM_ENCODING_MAC_UKRAINE,
194
+ PM_ENCODING_SHIFT_JIS,
195
+ PM_ENCODING_SJIS_DOCOMO,
196
+ PM_ENCODING_SJIS_KDDI,
197
+ PM_ENCODING_SJIS_SOFTBANK,
198
+ PM_ENCODING_STATELESS_ISO_2022_JP,
199
+ PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
200
+ PM_ENCODING_TIS_620,
201
+ PM_ENCODING_US_ASCII,
202
+ PM_ENCODING_UTF8_MAC,
203
+ PM_ENCODING_UTF8_DOCOMO,
204
+ PM_ENCODING_UTF8_KDDI,
205
+ PM_ENCODING_UTF8_SOFTBANK,
206
+ PM_ENCODING_WINDOWS_1250,
207
+ PM_ENCODING_WINDOWS_1251,
208
+ PM_ENCODING_WINDOWS_1252,
209
+ PM_ENCODING_WINDOWS_1253,
210
+ PM_ENCODING_WINDOWS_1254,
211
+ PM_ENCODING_WINDOWS_1255,
212
+ PM_ENCODING_WINDOWS_1256,
213
+ PM_ENCODING_WINDOWS_1257,
214
+ PM_ENCODING_WINDOWS_1258,
215
+ PM_ENCODING_WINDOWS_31J,
216
+ PM_ENCODING_WINDOWS_874,
217
+ PM_ENCODING_MAXIMUM
218
+ } pm_encoding_type_t;
219
+
220
+ /**
221
+ * This is the table of all of the encodings that prism supports.
222
+ */
223
+ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
224
+
225
+ /**
226
+ * This is the default UTF-8 encoding. We need a reference to it to quickly
227
+ * create parsers.
228
+ */
229
+ #define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])
230
+
231
+ /**
232
+ * This is the US-ASCII encoding. We need a reference to it to be able to
233
+ * compare against it when a string is being created because it could possibly
234
+ * need to fall back to ASCII-8BIT.
235
+ */
236
+ #define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
237
+
238
+ /**
239
+ * Parse the given name of an encoding and return a pointer to the corresponding
240
+ * encoding struct if one can be found, otherwise return NULL.
241
+ *
242
+ * @param start A pointer to the first byte of the name.
243
+ * @param end A pointer to the last byte of the name.
244
+ * @returns A pointer to the encoding struct if one is found, otherwise NULL.
245
+ */
246
+ const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);
185
247
 
186
248
  #endif
@@ -35,7 +35,7 @@ typedef struct {
35
35
  * The line within the file that the parse starts on. This value is
36
36
  * 0-indexed.
37
37
  */
38
- uint32_t line;
38
+ int32_t line;
39
39
 
40
40
  /**
41
41
  * The name of the encoding that the source file is in. Note that this must
@@ -80,7 +80,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, cons
80
80
  * @param options The options struct to set the line on.
81
81
  * @param line The line to set.
82
82
  */
83
- PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, uint32_t line);
83
+ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line);
84
84
 
85
85
  /**
86
86
  * Set the encoding option on the given options struct.
@@ -8,7 +8,7 @@
8
8
 
9
9
  #include "prism/ast.h"
10
10
  #include "prism/defines.h"
11
- #include "prism/enc/pm_encoding.h"
11
+ #include "prism/encoding.h"
12
12
  #include "prism/util/pm_constant_pool.h"
13
13
  #include "prism/util/pm_list.h"
14
14
  #include "prism/util/pm_newline_list.h"
@@ -17,6 +17,12 @@
17
17
 
18
18
  #include <stdbool.h>
19
19
 
20
+ // TODO: remove this by renaming the original flag
21
+ /**
22
+ * Temporary alias for the PM_NODE_FLAG_STATIC_KEYS flag.
23
+ */
24
+ #define PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS PM_KEYWORD_HASH_NODE_FLAGS_STATIC_KEYS
25
+
20
26
  /**
21
27
  * This enum provides various bits that represent different kinds of states that
22
28
  * the lexer can track. This is used to determine which kind of token to return
@@ -297,6 +303,9 @@ typedef enum {
297
303
  /** an ensure statement */
298
304
  PM_CONTEXT_ENSURE,
299
305
 
306
+ /** an ensure statement within a method definition */
307
+ PM_CONTEXT_ENSURE_DEF,
308
+
300
309
  /** a for loop */
301
310
  PM_CONTEXT_FOR,
302
311
 
@@ -333,9 +342,15 @@ typedef enum {
333
342
  /** a rescue else statement */
334
343
  PM_CONTEXT_RESCUE_ELSE,
335
344
 
345
+ /** a rescue else statement within a method definition */
346
+ PM_CONTEXT_RESCUE_ELSE_DEF,
347
+
336
348
  /** a rescue statement */
337
349
  PM_CONTEXT_RESCUE,
338
350
 
351
+ /** a rescue statement within a method definition */
352
+ PM_CONTEXT_RESCUE_DEF,
353
+
339
354
  /** a singleton class definition */
340
355
  PM_CONTEXT_SCLASS,
341
356
 
@@ -361,8 +376,7 @@ typedef struct pm_context_node {
361
376
  /** This is the type of a comment that we've found while parsing. */
362
377
  typedef enum {
363
378
  PM_COMMENT_INLINE,
364
- PM_COMMENT_EMBDOC,
365
- PM_COMMENT___END__
379
+ PM_COMMENT_EMBDOC
366
380
  } pm_comment_type_t;
367
381
 
368
382
  /**
@@ -374,11 +388,8 @@ typedef struct pm_comment {
374
388
  /** The embedded base node. */
375
389
  pm_list_node_t node;
376
390
 
377
- /** A pointer to the start of the comment in the source. */
378
- const uint8_t *start;
379
-
380
- /** A pointer to the end of the comment in the source. */
381
- const uint8_t *end;
391
+ /** The location of the comment in the source. */
392
+ pm_location_t location;
382
393
 
383
394
  /** The type of comment that we've found. */
384
395
  pm_comment_type_t type;
@@ -413,14 +424,6 @@ typedef struct {
413
424
  */
414
425
  typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
415
426
 
416
- /**
417
- * When an encoding is encountered that isn't understood by prism, we provide
418
- * the ability here to call out to a user-defined function to get an encoding
419
- * struct. If the function returns something that isn't NULL, we set that to
420
- * our encoding and use it to parse identifiers.
421
- */
422
- typedef pm_encoding_t *(*pm_encoding_decode_callback_t)(pm_parser_t *parser, const uint8_t *name, size_t width);
423
-
424
427
  /**
425
428
  * When you are lexing through a file, the lexer needs all of the information
426
429
  * that the parser additionally provides (for example, the local table). So if
@@ -469,18 +472,12 @@ typedef struct pm_scope {
469
472
  bool explicit_params;
470
473
 
471
474
  /**
472
- * A boolean indicating whether or not this scope has numbered parameters.
475
+ * An integer indicating the number of numbered parameters on this scope.
473
476
  * This is necessary to determine if child blocks are allowed to use
474
- * numbered parameters.
477
+ * numbered parameters, and to pass information to consumers of the AST
478
+ * about how many numbered parameters exist.
475
479
  */
476
- bool numbered_params;
477
-
478
- /**
479
- * A transparent scope is a scope that cannot have locals set on itself.
480
- * When a local is set on this scope, it will instead be set on the parent
481
- * scope's local table.
482
- */
483
- bool transparent;
480
+ uint8_t numbered_parameters;
484
481
  } pm_scope_t;
485
482
 
486
483
  /**
@@ -565,6 +562,9 @@ struct pm_parser {
565
562
  /** The list of magic comments that have been found while parsing. */
566
563
  pm_list_t magic_comment_list;
567
564
 
565
+ /** The optional location of the __END__ keyword and its contents. */
566
+ pm_location_t data_loc;
567
+
568
568
  /** The list of warnings that have been found while parsing. */
569
569
  pm_list_t warning_list;
570
570
 
@@ -581,7 +581,7 @@ struct pm_parser {
581
581
  * The encoding functions for the current file is attached to the parser as
582
582
  * it's parsing so that it can change with a magic comment.
583
583
  */
584
- pm_encoding_t encoding;
584
+ const pm_encoding_t *encoding;
585
585
 
586
586
  /**
587
587
  * When the encoding that is being used to parse the source is changed by
@@ -590,14 +590,6 @@ struct pm_parser {
590
590
  */
591
591
  pm_encoding_changed_callback_t encoding_changed_callback;
592
592
 
593
- /**
594
- * When an encoding is encountered that isn't understood by prism, we
595
- * provide the ability here to call out to a user-defined function to get an
596
- * encoding struct. If the function returns something that isn't NULL, we
597
- * set that to our encoding and use it to parse identifiers.
598
- */
599
- pm_encoding_decode_callback_t encoding_decode_callback;
600
-
601
593
  /**
602
594
  * This pointer indicates where a comment must start if it is to be
603
595
  * considered an encoding comment.
@@ -643,7 +635,38 @@ struct pm_parser {
643
635
  * The line number at the start of the parse. This will be used to offset
644
636
  * the line numbers of all of the locations.
645
637
  */
646
- uint32_t start_line;
638
+ int32_t start_line;
639
+
640
+ /**
641
+ * When a string-like expression is being lexed, any byte or escape sequence
642
+ * that resolves to a value whose top bit is set (i.e., >= 0x80) will
643
+ * explicitly set the encoding to the same encoding as the source.
644
+ * Alternatively, if a unicode escape sequence is used (e.g., \\u{80}) that
645
+ * resolves to a value whose top bit is set, then the encoding will be
646
+ * explicitly set to UTF-8.
647
+ *
648
+ * The _next_ time this happens, if the encoding that is about to become the
649
+ * explicitly set encoding does not match the previously set explicit
650
+ * encoding, a mixed encoding error will be emitted.
651
+ *
652
+ * When the expression is finished being lexed, the explicit encoding
653
+ * controls the encoding of the expression. For the most part this means
654
+ * that the expression will either be encoded in the source encoding or
655
+ * UTF-8. This holds for all encodings except US-ASCII. If the source is
656
+ * US-ASCII and an explicit encoding was set that was _not_ UTF-8, then the
657
+ * expression will be encoded as ASCII-8BIT.
658
+ *
659
+ * Note that if the expression is a list, different elements within the same
660
+ * list can have different encodings, so this will get reset between each
661
+ * element. Furthermore all of this only applies to lists that support
662
+ * interpolation, because otherwise escapes that could change the encoding
663
+ * are ignored.
664
+ *
665
+ * At first glance, it may make more sense for this to live on the lexer
666
+ * mode, but we need it here to communicate back to the parser for character
667
+ * literals that do not push a new lexer mode.
668
+ */
669
+ const pm_encoding_t *explicit_encoding;
647
670
 
648
671
  /** Whether or not we're at the beginning of a command. */
649
672
  bool command_start;
@@ -667,6 +690,9 @@ struct pm_parser {
667
690
  /** This flag indicates that we are currently parsing a keyword argument. */
668
691
  bool in_keyword_arg;
669
692
 
693
+ /** The current parameter name id on parsing its default value. */
694
+ pm_constant_id_t current_param_name;
695
+
670
696
  /**
671
697
  * Whether or not the parser has seen a token that has semantic meaning
672
698
  * (i.e., a token that is not a comment or whitespace).
@@ -8,7 +8,7 @@
8
8
 
9
9
  #include "prism/defines.h"
10
10
  #include "prism/parser.h"
11
- #include "prism/enc/pm_encoding.h"
11
+ #include "prism/encoding.h"
12
12
  #include "prism/util/pm_memchr.h"
13
13
  #include "prism/util/pm_string_list.h"
14
14
  #include "prism/util/pm_string.h"
@@ -28,6 +28,6 @@
28
28
  * @param encoding The encoding of the source code.
29
29
  * @return Whether or not the parsing was successful.
30
30
  */
31
- PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding);
31
+ PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, const pm_encoding_t *encoding);
32
32
 
33
33
  #endif
@@ -118,7 +118,15 @@ void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value);
118
118
  * @param buffer The buffer to append to.
119
119
  * @param value The integer to append.
120
120
  */
121
- void pm_buffer_append_varint(pm_buffer_t *buffer, uint32_t value);
121
+ void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
122
+
123
+ /**
124
+ * Append a 32-bit signed integer to the buffer as a variable-length integer.
125
+ *
126
+ * @param buffer The buffer to append to.
127
+ * @param value The integer to append.
128
+ */
129
+ void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
122
130
 
123
131
  /**
124
132
  * Concatenate one buffer onto another.
@@ -7,7 +7,7 @@
7
7
  #define PRISM_MEMCHR_H
8
8
 
9
9
  #include "prism/defines.h"
10
- #include "prism/enc/pm_encoding.h"
10
+ #include "prism/encoding.h"
11
11
 
12
12
  #include <stddef.h>
13
13
 
@@ -24,6 +24,6 @@
24
24
  * @return A pointer to the first occurrence of the character in the source
25
25
  * string, or NULL if no such character exists.
26
26
  */
27
- void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, pm_encoding_t *encoding);
27
+ void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding);
28
28
 
29
29
  #endif
@@ -32,12 +32,12 @@
32
32
  * need to take a slower path and iterate one multi-byte character at a time.
33
33
  *
34
34
  * @param parser The parser.
35
- * @param source The source string.
35
+ * @param source The source to search.
36
36
  * @param charset The charset to search for.
37
- * @param length The maximum length to search.
37
+ * @param length The maximum number of bytes to search.
38
38
  * @return A pointer to the first character in the source string that is in the
39
39
  * charset, or NULL if no such character exists.
40
40
  */
41
- const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
41
+ const uint8_t * pm_strpbrk(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
42
42
 
43
43
  #endif
@@ -14,16 +14,16 @@
14
14
  /**
15
15
  * The minor version of the Prism library as an int.
16
16
  */
17
- #define PRISM_VERSION_MINOR 17
17
+ #define PRISM_VERSION_MINOR 19
18
18
 
19
19
  /**
20
20
  * The patch version of the Prism library as an int.
21
21
  */
22
- #define PRISM_VERSION_PATCH 1
22
+ #define PRISM_VERSION_PATCH 0
23
23
 
24
24
  /**
25
25
  * The version of the Prism library as a constant string.
26
26
  */
27
- #define PRISM_VERSION "0.17.1"
27
+ #define PRISM_VERSION "0.19.0"
28
28
 
29
29
  #endif