prism 0.17.1 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +60 -1
  3. data/Makefile +5 -5
  4. data/README.md +4 -3
  5. data/config.yml +214 -68
  6. data/docs/build_system.md +6 -6
  7. data/docs/building.md +10 -3
  8. data/docs/configuration.md +11 -9
  9. data/docs/encoding.md +92 -88
  10. data/docs/heredocs.md +1 -1
  11. data/docs/javascript.md +29 -1
  12. data/docs/local_variable_depth.md +229 -0
  13. data/docs/ruby_api.md +16 -0
  14. data/docs/serialization.md +18 -13
  15. data/ext/prism/api_node.c +411 -240
  16. data/ext/prism/extconf.rb +97 -127
  17. data/ext/prism/extension.c +97 -33
  18. data/ext/prism/extension.h +1 -1
  19. data/include/prism/ast.h +377 -159
  20. data/include/prism/defines.h +17 -0
  21. data/include/prism/diagnostic.h +38 -6
  22. data/include/prism/{enc/pm_encoding.h → encoding.h} +126 -64
  23. data/include/prism/options.h +2 -2
  24. data/include/prism/parser.h +62 -36
  25. data/include/prism/regexp.h +2 -2
  26. data/include/prism/util/pm_buffer.h +9 -1
  27. data/include/prism/util/pm_memchr.h +2 -2
  28. data/include/prism/util/pm_strpbrk.h +3 -3
  29. data/include/prism/version.h +3 -3
  30. data/include/prism.h +13 -15
  31. data/lib/prism/compiler.rb +15 -3
  32. data/lib/prism/debug.rb +13 -4
  33. data/lib/prism/desugar_compiler.rb +4 -3
  34. data/lib/prism/dispatcher.rb +70 -14
  35. data/lib/prism/dot_visitor.rb +4612 -0
  36. data/lib/prism/dsl.rb +77 -57
  37. data/lib/prism/ffi.rb +19 -6
  38. data/lib/prism/lex_compat.rb +19 -9
  39. data/lib/prism/mutation_compiler.rb +26 -6
  40. data/lib/prism/node.rb +1314 -522
  41. data/lib/prism/node_ext.rb +102 -19
  42. data/lib/prism/parse_result.rb +58 -27
  43. data/lib/prism/ripper_compat.rb +49 -34
  44. data/lib/prism/serialize.rb +251 -227
  45. data/lib/prism/visitor.rb +15 -3
  46. data/lib/prism.rb +21 -4
  47. data/prism.gemspec +7 -9
  48. data/rbi/prism.rbi +688 -284
  49. data/rbi/prism_static.rbi +3 -0
  50. data/sig/prism.rbs +426 -156
  51. data/sig/prism_static.rbs +1 -0
  52. data/src/diagnostic.c +280 -216
  53. data/src/encoding.c +5137 -0
  54. data/src/node.c +99 -21
  55. data/src/options.c +21 -2
  56. data/src/prettyprint.c +1743 -1241
  57. data/src/prism.c +1774 -831
  58. data/src/regexp.c +15 -15
  59. data/src/serialize.c +261 -164
  60. data/src/util/pm_buffer.c +10 -1
  61. data/src/util/pm_memchr.c +1 -1
  62. data/src/util/pm_strpbrk.c +4 -4
  63. metadata +8 -10
  64. data/src/enc/pm_big5.c +0 -53
  65. data/src/enc/pm_euc_jp.c +0 -59
  66. data/src/enc/pm_gbk.c +0 -62
  67. data/src/enc/pm_shift_jis.c +0 -57
  68. data/src/enc/pm_tables.c +0 -743
  69. data/src/enc/pm_unicode.c +0 -2369
  70. data/src/enc/pm_windows_31j.c +0 -57
@@ -74,4 +74,21 @@
74
74
  # define snprintf _snprintf
75
75
  #endif
76
76
 
77
+ /**
78
+ * A simple utility macro to concatenate two tokens together, necessary when one
79
+ * of the tokens is itself a macro.
80
+ */
81
+ #define PM_CONCATENATE(left, right) left ## right
82
+
83
+ /**
84
+ * We want to be able to use static assertions, but they weren't standardized
85
+ * until C11. As such, we polyfill it here by making a hacky typedef that will
86
+ * fail to compile due to a negative array size if the condition is false.
87
+ */
88
+ #if defined(_Static_assert)
89
+ # define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
90
+ #else
91
+ # define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
92
+ #endif
93
+
77
94
  #endif
@@ -6,6 +6,7 @@
6
6
  #ifndef PRISM_DIAGNOSTIC_H
7
7
  #define PRISM_DIAGNOSTIC_H
8
8
 
9
+ #include "prism/ast.h"
9
10
  #include "prism/defines.h"
10
11
  #include "prism/util/pm_list.h"
11
12
 
@@ -22,14 +23,18 @@ typedef struct {
22
23
  /** The embedded base node. */
23
24
  pm_list_node_t node;
24
25
 
25
- /** A pointer to the start of the source that generated the diagnostic. */
26
- const uint8_t *start;
27
-
28
- /** A pointer to the end of the source that generated the diagnostic. */
29
- const uint8_t *end;
26
+ /** The location of the diagnostic in the source. */
27
+ pm_location_t location;
30
28
 
31
29
  /** The message associated with the diagnostic. */
32
30
  const char *message;
31
+
32
+ /**
33
+ * Whether or not the memory related to the message of this diagnostic is
34
+ * owned by this diagnostic. If it is, it needs to be freed when the
35
+ * diagnostic is freed.
36
+ */
37
+ bool owned;
33
38
  } pm_diagnostic_t;
34
39
 
35
40
  /**
@@ -40,12 +45,14 @@ typedef enum {
40
45
  PM_ERR_ALIAS_ARGUMENT,
41
46
  PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
42
47
  PM_ERR_ARGUMENT_AFTER_BLOCK,
48
+ PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES,
43
49
  PM_ERR_ARGUMENT_BARE_HASH,
44
50
  PM_ERR_ARGUMENT_BLOCK_MULTI,
45
51
  PM_ERR_ARGUMENT_FORMAL_CLASS,
46
52
  PM_ERR_ARGUMENT_FORMAL_CONSTANT,
47
53
  PM_ERR_ARGUMENT_FORMAL_GLOBAL,
48
54
  PM_ERR_ARGUMENT_FORMAL_IVAR,
55
+ PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
49
56
  PM_ERR_ARGUMENT_NO_FORWARDING_AMP,
50
57
  PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
51
58
  PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
@@ -71,6 +78,7 @@ typedef enum {
71
78
  PM_ERR_CANNOT_PARSE_STRING_PART,
72
79
  PM_ERR_CASE_EXPRESSION_AFTER_CASE,
73
80
  PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
81
+ PM_ERR_CASE_MATCH_MISSING_PREDICATE,
74
82
  PM_ERR_CASE_MISSING_CONDITIONS,
75
83
  PM_ERR_CASE_TERM,
76
84
  PM_ERR_CLASS_IN_METHOD,
@@ -169,6 +177,7 @@ typedef enum {
169
177
  PM_ERR_LIST_W_UPPER_ELEMENT,
170
178
  PM_ERR_LIST_W_UPPER_TERM,
171
179
  PM_ERR_MALLOC_FAILED,
180
+ PM_ERR_MIXED_ENCODING,
172
181
  PM_ERR_MODULE_IN_METHOD,
173
182
  PM_ERR_MODULE_NAME,
174
183
  PM_ERR_MODULE_TERM,
@@ -182,6 +191,7 @@ typedef enum {
182
191
  PM_ERR_OPERATOR_WRITE_BLOCK,
183
192
  PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
184
193
  PM_ERR_PARAMETER_BLOCK_MULTI,
194
+ PM_ERR_PARAMETER_CIRCULAR,
185
195
  PM_ERR_PARAMETER_METHOD_NAME,
186
196
  PM_ERR_PARAMETER_NAME_REPEAT,
187
197
  PM_ERR_PARAMETER_NO_DEFAULT,
@@ -201,6 +211,7 @@ typedef enum {
201
211
  PM_ERR_PATTERN_EXPRESSION_AFTER_PIN,
202
212
  PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
203
213
  PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
214
+ PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
204
215
  PM_ERR_PATTERN_HASH_KEY,
205
216
  PM_ERR_PATTERN_HASH_KEY_LABEL,
206
217
  PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
@@ -216,6 +227,10 @@ typedef enum {
216
227
  PM_ERR_RESCUE_TERM,
217
228
  PM_ERR_RESCUE_VARIABLE,
218
229
  PM_ERR_RETURN_INVALID,
230
+ PM_ERR_STATEMENT_ALIAS,
231
+ PM_ERR_STATEMENT_POSTEXE_END,
232
+ PM_ERR_STATEMENT_PREEXE_BEGIN,
233
+ PM_ERR_STATEMENT_UNDEF,
219
234
  PM_ERR_STRING_CONCATENATION,
220
235
  PM_ERR_STRING_INTERPOLATED_TERM,
221
236
  PM_ERR_STRING_LITERAL_TERM,
@@ -231,7 +246,9 @@ typedef enum {
231
246
  PM_ERR_UNARY_RECEIVER_TILDE,
232
247
  PM_ERR_UNDEF_ARGUMENT,
233
248
  PM_ERR_UNTIL_TERM,
249
+ PM_ERR_VOID_EXPRESSION,
234
250
  PM_ERR_WHILE_TERM,
251
+ PM_ERR_WRITE_TARGET_IN_METHOD,
235
252
  PM_ERR_WRITE_TARGET_READONLY,
236
253
  PM_ERR_WRITE_TARGET_UNEXPECTED,
237
254
  PM_ERR_XSTRING_TERM,
@@ -239,13 +256,15 @@ typedef enum {
239
256
  PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
240
257
  PM_WARN_AMBIGUOUS_PREFIX_STAR,
241
258
  PM_WARN_AMBIGUOUS_SLASH,
259
+ PM_WARN_END_IN_METHOD,
242
260
 
243
261
  /* This must be the last member. */
244
262
  PM_DIAGNOSTIC_ID_LEN,
245
263
  } pm_diagnostic_id_t;
246
264
 
247
265
  /**
248
- * Append a diagnostic to the given list of diagnostics.
266
+ * Append a diagnostic to the given list of diagnostics that is using shared
267
+ * memory for its message.
249
268
  *
250
269
  * @param list The list to append to.
251
270
  * @param start The start of the diagnostic.
@@ -255,6 +274,19 @@ typedef enum {
255
274
  */
256
275
  bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
257
276
 
277
+ /**
278
+ * Append a diagnostic to the given list of diagnostics that is using a format
279
+ * string for its message.
280
+ *
281
+ * @param list The list to append to.
282
+ * @param start The start of the diagnostic.
283
+ * @param end The end of the diagnostic.
284
+ * @param diag_id The diagnostic ID.
285
+ * @param ... The arguments to the format string for the message.
286
+ * @return Whether the diagnostic was successfully appended.
287
+ */
288
+ bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...);
289
+
258
290
  /**
259
291
  * Deallocate the internal state of the given diagnostic list.
260
292
  *
@@ -1,5 +1,5 @@
1
1
  /**
2
- * @file pm_encoding.h
2
+ * @file encoding.h
3
3
  *
4
4
  * The encoding interface and implementations used by the parser.
5
5
  */
@@ -7,6 +7,7 @@
7
7
  #define PRISM_ENCODING_H
8
8
 
9
9
  #include "prism/defines.h"
10
+ #include "prism/util/pm_strncasecmp.h"
10
11
 
11
12
  #include <assert.h>
12
13
  #include <stdbool.h>
@@ -78,39 +79,6 @@ typedef struct {
78
79
  */
79
80
  #define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
80
81
 
81
- /**
82
- * Return the size of the next character in the ASCII encoding if it is an
83
- * alphabetical character.
84
- *
85
- * @param b The bytes to read.
86
- * @param n The number of bytes that can be read.
87
- * @returns The number of bytes that the next character takes if it is valid in
88
- * the encoding, or 0 if it is not.
89
- */
90
- size_t pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
91
-
92
- /**
93
- * Return the size of the next character in the ASCII encoding if it is an
94
- * alphanumeric character.
95
- *
96
- * @param b The bytes to read.
97
- * @param n The number of bytes that can be read.
98
- * @returns The number of bytes that the next character takes if it is valid in
99
- * the encoding, or 0 if it is not.
100
- */
101
- size_t pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
102
-
103
- /**
104
- * Return true if the next character in the ASCII encoding if it is an uppercase
105
- * character.
106
- *
107
- * @param b The bytes to read.
108
- * @param n The number of bytes that can be read.
109
- * @returns True if the next character is valid in the encoding and is an
110
- * uppercase character, or false if it is not.
111
- */
112
- bool pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
113
-
114
82
  /**
115
83
  * Return the size of the next character in the UTF-8 encoding if it is an
116
84
  * alphabetical character.
@@ -152,35 +120,129 @@ bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
152
120
  */
153
121
  extern const uint8_t pm_encoding_unicode_table[256];
154
122
 
155
- // Below are the encodings that are supported by the parser. They are defined in
156
- // their own files in the src/enc directory.
157
-
158
- extern pm_encoding_t pm_encoding_ascii;
159
- extern pm_encoding_t pm_encoding_ascii_8bit;
160
- extern pm_encoding_t pm_encoding_big5;
161
- extern pm_encoding_t pm_encoding_euc_jp;
162
- extern pm_encoding_t pm_encoding_gbk;
163
- extern pm_encoding_t pm_encoding_iso_8859_1;
164
- extern pm_encoding_t pm_encoding_iso_8859_2;
165
- extern pm_encoding_t pm_encoding_iso_8859_3;
166
- extern pm_encoding_t pm_encoding_iso_8859_4;
167
- extern pm_encoding_t pm_encoding_iso_8859_5;
168
- extern pm_encoding_t pm_encoding_iso_8859_6;
169
- extern pm_encoding_t pm_encoding_iso_8859_7;
170
- extern pm_encoding_t pm_encoding_iso_8859_8;
171
- extern pm_encoding_t pm_encoding_iso_8859_9;
172
- extern pm_encoding_t pm_encoding_iso_8859_10;
173
- extern pm_encoding_t pm_encoding_iso_8859_11;
174
- extern pm_encoding_t pm_encoding_iso_8859_13;
175
- extern pm_encoding_t pm_encoding_iso_8859_14;
176
- extern pm_encoding_t pm_encoding_iso_8859_15;
177
- extern pm_encoding_t pm_encoding_iso_8859_16;
178
- extern pm_encoding_t pm_encoding_koi8_r;
179
- extern pm_encoding_t pm_encoding_shift_jis;
180
- extern pm_encoding_t pm_encoding_utf_8;
181
- extern pm_encoding_t pm_encoding_utf8_mac;
182
- extern pm_encoding_t pm_encoding_windows_31j;
183
- extern pm_encoding_t pm_encoding_windows_1251;
184
- extern pm_encoding_t pm_encoding_windows_1252;
123
+ /**
124
+ * These are all of the encodings that prism supports.
125
+ */
126
+ typedef enum {
127
+ PM_ENCODING_UTF_8 = 0,
128
+ PM_ENCODING_ASCII_8BIT,
129
+ PM_ENCODING_BIG5,
130
+ PM_ENCODING_BIG5_HKSCS,
131
+ PM_ENCODING_BIG5_UAO,
132
+ PM_ENCODING_CESU_8,
133
+ PM_ENCODING_CP51932,
134
+ PM_ENCODING_CP850,
135
+ PM_ENCODING_CP852,
136
+ PM_ENCODING_CP855,
137
+ PM_ENCODING_CP949,
138
+ PM_ENCODING_CP950,
139
+ PM_ENCODING_CP951,
140
+ PM_ENCODING_EMACS_MULE,
141
+ PM_ENCODING_EUC_JP,
142
+ PM_ENCODING_EUC_JP_MS,
143
+ PM_ENCODING_EUC_JIS_2004,
144
+ PM_ENCODING_EUC_KR,
145
+ PM_ENCODING_EUC_TW,
146
+ PM_ENCODING_GB12345,
147
+ PM_ENCODING_GB18030,
148
+ PM_ENCODING_GB1988,
149
+ PM_ENCODING_GB2312,
150
+ PM_ENCODING_GBK,
151
+ PM_ENCODING_IBM437,
152
+ PM_ENCODING_IBM720,
153
+ PM_ENCODING_IBM737,
154
+ PM_ENCODING_IBM775,
155
+ PM_ENCODING_IBM852,
156
+ PM_ENCODING_IBM855,
157
+ PM_ENCODING_IBM857,
158
+ PM_ENCODING_IBM860,
159
+ PM_ENCODING_IBM861,
160
+ PM_ENCODING_IBM862,
161
+ PM_ENCODING_IBM863,
162
+ PM_ENCODING_IBM864,
163
+ PM_ENCODING_IBM865,
164
+ PM_ENCODING_IBM866,
165
+ PM_ENCODING_IBM869,
166
+ PM_ENCODING_ISO_8859_1,
167
+ PM_ENCODING_ISO_8859_2,
168
+ PM_ENCODING_ISO_8859_3,
169
+ PM_ENCODING_ISO_8859_4,
170
+ PM_ENCODING_ISO_8859_5,
171
+ PM_ENCODING_ISO_8859_6,
172
+ PM_ENCODING_ISO_8859_7,
173
+ PM_ENCODING_ISO_8859_8,
174
+ PM_ENCODING_ISO_8859_9,
175
+ PM_ENCODING_ISO_8859_10,
176
+ PM_ENCODING_ISO_8859_11,
177
+ PM_ENCODING_ISO_8859_13,
178
+ PM_ENCODING_ISO_8859_14,
179
+ PM_ENCODING_ISO_8859_15,
180
+ PM_ENCODING_ISO_8859_16,
181
+ PM_ENCODING_KOI8_R,
182
+ PM_ENCODING_KOI8_U,
183
+ PM_ENCODING_MAC_CENT_EURO,
184
+ PM_ENCODING_MAC_CROATIAN,
185
+ PM_ENCODING_MAC_CYRILLIC,
186
+ PM_ENCODING_MAC_GREEK,
187
+ PM_ENCODING_MAC_ICELAND,
188
+ PM_ENCODING_MAC_JAPANESE,
189
+ PM_ENCODING_MAC_ROMAN,
190
+ PM_ENCODING_MAC_ROMANIA,
191
+ PM_ENCODING_MAC_THAI,
192
+ PM_ENCODING_MAC_TURKISH,
193
+ PM_ENCODING_MAC_UKRAINE,
194
+ PM_ENCODING_SHIFT_JIS,
195
+ PM_ENCODING_SJIS_DOCOMO,
196
+ PM_ENCODING_SJIS_KDDI,
197
+ PM_ENCODING_SJIS_SOFTBANK,
198
+ PM_ENCODING_STATELESS_ISO_2022_JP,
199
+ PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
200
+ PM_ENCODING_TIS_620,
201
+ PM_ENCODING_US_ASCII,
202
+ PM_ENCODING_UTF8_MAC,
203
+ PM_ENCODING_UTF8_DOCOMO,
204
+ PM_ENCODING_UTF8_KDDI,
205
+ PM_ENCODING_UTF8_SOFTBANK,
206
+ PM_ENCODING_WINDOWS_1250,
207
+ PM_ENCODING_WINDOWS_1251,
208
+ PM_ENCODING_WINDOWS_1252,
209
+ PM_ENCODING_WINDOWS_1253,
210
+ PM_ENCODING_WINDOWS_1254,
211
+ PM_ENCODING_WINDOWS_1255,
212
+ PM_ENCODING_WINDOWS_1256,
213
+ PM_ENCODING_WINDOWS_1257,
214
+ PM_ENCODING_WINDOWS_1258,
215
+ PM_ENCODING_WINDOWS_31J,
216
+ PM_ENCODING_WINDOWS_874,
217
+ PM_ENCODING_MAXIMUM
218
+ } pm_encoding_type_t;
219
+
220
+ /**
221
+ * This is the table of all of the encodings that prism supports.
222
+ */
223
+ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
224
+
225
+ /**
226
+ * This is the default UTF-8 encoding. We need a reference to it to quickly
227
+ * create parsers.
228
+ */
229
+ #define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])
230
+
231
+ /**
232
+ * This is the US-ASCII encoding. We need a reference to it to be able to
233
+ * compare against it when a string is being created because it could possibly
234
+ * need to fall back to ASCII-8BIT.
235
+ */
236
+ #define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
237
+
238
+ /**
239
+ * Parse the given name of an encoding and return a pointer to the corresponding
240
+ * encoding struct if one can be found, otherwise return NULL.
241
+ *
242
+ * @param start A pointer to the first byte of the name.
243
+ * @param end A pointer to the last byte of the name.
244
+ * @returns A pointer to the encoding struct if one is found, otherwise NULL.
245
+ */
246
+ const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);
185
247
 
186
248
  #endif
@@ -35,7 +35,7 @@ typedef struct {
35
35
  * The line within the file that the parse starts on. This value is
36
36
  * 0-indexed.
37
37
  */
38
- uint32_t line;
38
+ int32_t line;
39
39
 
40
40
  /**
41
41
  * The name of the encoding that the source file is in. Note that this must
@@ -80,7 +80,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, cons
80
80
  * @param options The options struct to set the line on.
81
81
  * @param line The line to set.
82
82
  */
83
- PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, uint32_t line);
83
+ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line);
84
84
 
85
85
  /**
86
86
  * Set the encoding option on the given options struct.
@@ -8,7 +8,7 @@
8
8
 
9
9
  #include "prism/ast.h"
10
10
  #include "prism/defines.h"
11
- #include "prism/enc/pm_encoding.h"
11
+ #include "prism/encoding.h"
12
12
  #include "prism/util/pm_constant_pool.h"
13
13
  #include "prism/util/pm_list.h"
14
14
  #include "prism/util/pm_newline_list.h"
@@ -17,6 +17,12 @@
17
17
 
18
18
  #include <stdbool.h>
19
19
 
20
+ // TODO: remove this by renaming the original flag
21
+ /**
22
+ * Temporary alias for the PM_NODE_FLAG_STATIC_KEYS flag.
23
+ */
24
+ #define PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS PM_KEYWORD_HASH_NODE_FLAGS_STATIC_KEYS
25
+
20
26
  /**
21
27
  * This enum provides various bits that represent different kinds of states that
22
28
  * the lexer can track. This is used to determine which kind of token to return
@@ -297,6 +303,9 @@ typedef enum {
297
303
  /** an ensure statement */
298
304
  PM_CONTEXT_ENSURE,
299
305
 
306
+ /** an ensure statement within a method definition */
307
+ PM_CONTEXT_ENSURE_DEF,
308
+
300
309
  /** a for loop */
301
310
  PM_CONTEXT_FOR,
302
311
 
@@ -333,9 +342,15 @@ typedef enum {
333
342
  /** a rescue else statement */
334
343
  PM_CONTEXT_RESCUE_ELSE,
335
344
 
345
+ /** a rescue else statement within a method definition */
346
+ PM_CONTEXT_RESCUE_ELSE_DEF,
347
+
336
348
  /** a rescue statement */
337
349
  PM_CONTEXT_RESCUE,
338
350
 
351
+ /** a rescue statement within a method definition */
352
+ PM_CONTEXT_RESCUE_DEF,
353
+
339
354
  /** a singleton class definition */
340
355
  PM_CONTEXT_SCLASS,
341
356
 
@@ -361,8 +376,7 @@ typedef struct pm_context_node {
361
376
  /** This is the type of a comment that we've found while parsing. */
362
377
  typedef enum {
363
378
  PM_COMMENT_INLINE,
364
- PM_COMMENT_EMBDOC,
365
- PM_COMMENT___END__
379
+ PM_COMMENT_EMBDOC
366
380
  } pm_comment_type_t;
367
381
 
368
382
  /**
@@ -374,11 +388,8 @@ typedef struct pm_comment {
374
388
  /** The embedded base node. */
375
389
  pm_list_node_t node;
376
390
 
377
- /** A pointer to the start of the comment in the source. */
378
- const uint8_t *start;
379
-
380
- /** A pointer to the end of the comment in the source. */
381
- const uint8_t *end;
391
+ /** The location of the comment in the source. */
392
+ pm_location_t location;
382
393
 
383
394
  /** The type of comment that we've found. */
384
395
  pm_comment_type_t type;
@@ -413,14 +424,6 @@ typedef struct {
413
424
  */
414
425
  typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
415
426
 
416
- /**
417
- * When an encoding is encountered that isn't understood by prism, we provide
418
- * the ability here to call out to a user-defined function to get an encoding
419
- * struct. If the function returns something that isn't NULL, we set that to
420
- * our encoding and use it to parse identifiers.
421
- */
422
- typedef pm_encoding_t *(*pm_encoding_decode_callback_t)(pm_parser_t *parser, const uint8_t *name, size_t width);
423
-
424
427
  /**
425
428
  * When you are lexing through a file, the lexer needs all of the information
426
429
  * that the parser additionally provides (for example, the local table). So if
@@ -469,18 +472,12 @@ typedef struct pm_scope {
469
472
  bool explicit_params;
470
473
 
471
474
  /**
472
- * A boolean indicating whether or not this scope has numbered parameters.
475
+ * An integer indicating the number of numbered parameters on this scope.
473
476
  * This is necessary to determine if child blocks are allowed to use
474
- * numbered parameters.
477
+ * numbered parameters, and to pass information to consumers of the AST
478
+ * about how many numbered parameters exist.
475
479
  */
476
- bool numbered_params;
477
-
478
- /**
479
- * A transparent scope is a scope that cannot have locals set on itself.
480
- * When a local is set on this scope, it will instead be set on the parent
481
- * scope's local table.
482
- */
483
- bool transparent;
480
+ uint8_t numbered_parameters;
484
481
  } pm_scope_t;
485
482
 
486
483
  /**
@@ -565,6 +562,9 @@ struct pm_parser {
565
562
  /** The list of magic comments that have been found while parsing. */
566
563
  pm_list_t magic_comment_list;
567
564
 
565
+ /** The optional location of the __END__ keyword and its contents. */
566
+ pm_location_t data_loc;
567
+
568
568
  /** The list of warnings that have been found while parsing. */
569
569
  pm_list_t warning_list;
570
570
 
@@ -581,7 +581,7 @@ struct pm_parser {
581
581
  * The encoding functions for the current file is attached to the parser as
582
582
  * it's parsing so that it can change with a magic comment.
583
583
  */
584
- pm_encoding_t encoding;
584
+ const pm_encoding_t *encoding;
585
585
 
586
586
  /**
587
587
  * When the encoding that is being used to parse the source is changed by
@@ -590,14 +590,6 @@ struct pm_parser {
590
590
  */
591
591
  pm_encoding_changed_callback_t encoding_changed_callback;
592
592
 
593
- /**
594
- * When an encoding is encountered that isn't understood by prism, we
595
- * provide the ability here to call out to a user-defined function to get an
596
- * encoding struct. If the function returns something that isn't NULL, we
597
- * set that to our encoding and use it to parse identifiers.
598
- */
599
- pm_encoding_decode_callback_t encoding_decode_callback;
600
-
601
593
  /**
602
594
  * This pointer indicates where a comment must start if it is to be
603
595
  * considered an encoding comment.
@@ -643,7 +635,38 @@ struct pm_parser {
643
635
  * The line number at the start of the parse. This will be used to offset
644
636
  * the line numbers of all of the locations.
645
637
  */
646
- uint32_t start_line;
638
+ int32_t start_line;
639
+
640
+ /**
641
+ * When a string-like expression is being lexed, any byte or escape sequence
642
+ * that resolves to a value whose top bit is set (i.e., >= 0x80) will
643
+ * explicitly set the encoding to the same encoding as the source.
644
+ * Alternatively, if a unicode escape sequence is used (e.g., \\u{80}) that
645
+ * resolves to a value whose top bit is set, then the encoding will be
646
+ * explicitly set to UTF-8.
647
+ *
648
+ * The _next_ time this happens, if the encoding that is about to become the
649
+ * explicitly set encoding does not match the previously set explicit
650
+ * encoding, a mixed encoding error will be emitted.
651
+ *
652
+ * When the expression is finished being lexed, the explicit encoding
653
+ * controls the encoding of the expression. For the most part this means
654
+ * that the expression will either be encoded in the source encoding or
655
+ * UTF-8. This holds for all encodings except US-ASCII. If the source is
656
+ * US-ASCII and an explicit encoding was set that was _not_ UTF-8, then the
657
+ * expression will be encoded as ASCII-8BIT.
658
+ *
659
+ * Note that if the expression is a list, different elements within the same
660
+ * list can have different encodings, so this will get reset between each
661
+ * element. Furthermore all of this only applies to lists that support
662
+ * interpolation, because otherwise escapes that could change the encoding
663
+ * are ignored.
664
+ *
665
+ * At first glance, it may make more sense for this to live on the lexer
666
+ * mode, but we need it here to communicate back to the parser for character
667
+ * literals that do not push a new lexer mode.
668
+ */
669
+ const pm_encoding_t *explicit_encoding;
647
670
 
648
671
  /** Whether or not we're at the beginning of a command. */
649
672
  bool command_start;
@@ -667,6 +690,9 @@ struct pm_parser {
667
690
  /** This flag indicates that we are currently parsing a keyword argument. */
668
691
  bool in_keyword_arg;
669
692
 
693
+ /** The current parameter name id on parsing its default value. */
694
+ pm_constant_id_t current_param_name;
695
+
670
696
  /**
671
697
  * Whether or not the parser has seen a token that has semantic meaning
672
698
  * (i.e., a token that is not a comment or whitespace).
@@ -8,7 +8,7 @@
8
8
 
9
9
  #include "prism/defines.h"
10
10
  #include "prism/parser.h"
11
- #include "prism/enc/pm_encoding.h"
11
+ #include "prism/encoding.h"
12
12
  #include "prism/util/pm_memchr.h"
13
13
  #include "prism/util/pm_string_list.h"
14
14
  #include "prism/util/pm_string.h"
@@ -28,6 +28,6 @@
28
28
  * @param encoding The encoding of the source code.
29
29
  * @return Whether or not the parsing was successful.
30
30
  */
31
- PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding);
31
+ PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, const pm_encoding_t *encoding);
32
32
 
33
33
  #endif
@@ -118,7 +118,15 @@ void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value);
118
118
  * @param buffer The buffer to append to.
119
119
  * @param value The integer to append.
120
120
  */
121
- void pm_buffer_append_varint(pm_buffer_t *buffer, uint32_t value);
121
+ void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
122
+
123
+ /**
124
+ * Append a 32-bit signed integer to the buffer as a variable-length integer.
125
+ *
126
+ * @param buffer The buffer to append to.
127
+ * @param value The integer to append.
128
+ */
129
+ void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
122
130
 
123
131
  /**
124
132
  * Concatenate one buffer onto another.
@@ -7,7 +7,7 @@
7
7
  #define PRISM_MEMCHR_H
8
8
 
9
9
  #include "prism/defines.h"
10
- #include "prism/enc/pm_encoding.h"
10
+ #include "prism/encoding.h"
11
11
 
12
12
  #include <stddef.h>
13
13
 
@@ -24,6 +24,6 @@
24
24
  * @return A pointer to the first occurrence of the character in the source
25
25
  * string, or NULL if no such character exists.
26
26
  */
27
- void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, pm_encoding_t *encoding);
27
+ void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding);
28
28
 
29
29
  #endif
@@ -32,12 +32,12 @@
32
32
  * need to take a slower path and iterate one multi-byte character at a time.
33
33
  *
34
34
  * @param parser The parser.
35
- * @param source The source string.
35
+ * @param source The source to search.
36
36
  * @param charset The charset to search for.
37
- * @param length The maximum length to search.
37
+ * @param length The maximum number of bytes to search.
38
38
  * @return A pointer to the first character in the source string that is in the
39
39
  * charset, or NULL if no such character exists.
40
40
  */
41
- const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
41
+ const uint8_t * pm_strpbrk(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
42
42
 
43
43
  #endif
@@ -14,16 +14,16 @@
14
14
  /**
15
15
  * The minor version of the Prism library as an int.
16
16
  */
17
- #define PRISM_VERSION_MINOR 17
17
+ #define PRISM_VERSION_MINOR 19
18
18
 
19
19
  /**
20
20
  * The patch version of the Prism library as an int.
21
21
  */
22
- #define PRISM_VERSION_PATCH 1
22
+ #define PRISM_VERSION_PATCH 0
23
23
 
24
24
  /**
25
25
  * The version of the Prism library as a constant string.
26
26
  */
27
- #define PRISM_VERSION "0.17.1"
27
+ #define PRISM_VERSION "0.19.0"
28
28
 
29
29
  #endif