prism 0.23.0 → 0.25.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +65 -1
  4. data/Makefile +5 -2
  5. data/README.md +45 -6
  6. data/config.yml +499 -4
  7. data/docs/build_system.md +31 -0
  8. data/docs/configuration.md +2 -0
  9. data/docs/cruby_compilation.md +1 -1
  10. data/docs/parser_translation.md +14 -9
  11. data/docs/releasing.md +3 -3
  12. data/docs/ripper_translation.md +50 -0
  13. data/docs/ruby_api.md +1 -0
  14. data/docs/serialization.md +26 -5
  15. data/ext/prism/api_node.c +2342 -1801
  16. data/ext/prism/api_pack.c +9 -0
  17. data/ext/prism/extconf.rb +27 -11
  18. data/ext/prism/extension.c +313 -66
  19. data/ext/prism/extension.h +5 -4
  20. data/include/prism/ast.h +213 -64
  21. data/include/prism/defines.h +106 -2
  22. data/include/prism/diagnostic.h +134 -71
  23. data/include/prism/encoding.h +22 -4
  24. data/include/prism/node.h +93 -0
  25. data/include/prism/options.h +82 -7
  26. data/include/prism/pack.h +11 -0
  27. data/include/prism/parser.h +198 -53
  28. data/include/prism/prettyprint.h +8 -0
  29. data/include/prism/static_literals.h +118 -0
  30. data/include/prism/util/pm_buffer.h +65 -2
  31. data/include/prism/util/pm_constant_pool.h +18 -1
  32. data/include/prism/util/pm_integer.h +119 -0
  33. data/include/prism/util/pm_list.h +1 -1
  34. data/include/prism/util/pm_newline_list.h +12 -3
  35. data/include/prism/util/pm_string.h +26 -2
  36. data/include/prism/version.h +2 -2
  37. data/include/prism.h +59 -1
  38. data/lib/prism/compiler.rb +8 -1
  39. data/lib/prism/debug.rb +46 -3
  40. data/lib/prism/desugar_compiler.rb +225 -80
  41. data/lib/prism/dispatcher.rb +29 -0
  42. data/lib/prism/dot_visitor.rb +87 -16
  43. data/lib/prism/dsl.rb +315 -300
  44. data/lib/prism/ffi.rb +165 -84
  45. data/lib/prism/lex_compat.rb +17 -15
  46. data/lib/prism/mutation_compiler.rb +11 -0
  47. data/lib/prism/node.rb +4857 -3750
  48. data/lib/prism/node_ext.rb +77 -29
  49. data/lib/prism/pack.rb +4 -0
  50. data/lib/prism/parse_result/comments.rb +34 -17
  51. data/lib/prism/parse_result/newlines.rb +3 -1
  52. data/lib/prism/parse_result.rb +88 -34
  53. data/lib/prism/pattern.rb +16 -4
  54. data/lib/prism/polyfill/string.rb +12 -0
  55. data/lib/prism/serialize.rb +960 -327
  56. data/lib/prism/translation/parser/compiler.rb +152 -50
  57. data/lib/prism/translation/parser/lexer.rb +103 -22
  58. data/lib/prism/translation/parser/rubocop.rb +47 -11
  59. data/lib/prism/translation/parser.rb +134 -10
  60. data/lib/prism/translation/parser33.rb +12 -0
  61. data/lib/prism/translation/parser34.rb +12 -0
  62. data/lib/prism/translation/ripper/sexp.rb +125 -0
  63. data/lib/prism/translation/ripper/shim.rb +5 -0
  64. data/lib/prism/translation/ripper.rb +3248 -379
  65. data/lib/prism/translation/ruby_parser.rb +35 -18
  66. data/lib/prism/translation.rb +3 -1
  67. data/lib/prism/visitor.rb +10 -0
  68. data/lib/prism.rb +8 -2
  69. data/prism.gemspec +35 -4
  70. data/rbi/prism/compiler.rbi +14 -0
  71. data/rbi/prism/desugar_compiler.rbi +5 -0
  72. data/rbi/prism/mutation_compiler.rbi +5 -0
  73. data/rbi/prism/node.rbi +8221 -0
  74. data/rbi/prism/node_ext.rbi +102 -0
  75. data/rbi/prism/parse_result.rbi +304 -0
  76. data/rbi/prism/translation/parser/compiler.rbi +13 -0
  77. data/rbi/prism/translation/ripper/ripper_compiler.rbi +5 -0
  78. data/rbi/prism/translation/ripper.rbi +25 -0
  79. data/rbi/prism/translation/ruby_parser.rbi +11 -0
  80. data/rbi/prism/visitor.rbi +470 -0
  81. data/rbi/prism.rbi +39 -7749
  82. data/sig/prism/compiler.rbs +9 -0
  83. data/sig/prism/dispatcher.rbs +16 -0
  84. data/sig/prism/dot_visitor.rbs +6 -0
  85. data/sig/prism/dsl.rbs +462 -0
  86. data/sig/prism/mutation_compiler.rbs +158 -0
  87. data/sig/prism/node.rbs +3529 -0
  88. data/sig/prism/node_ext.rbs +78 -0
  89. data/sig/prism/pack.rbs +43 -0
  90. data/sig/prism/parse_result.rbs +127 -0
  91. data/sig/prism/pattern.rbs +13 -0
  92. data/sig/prism/serialize.rbs +7 -0
  93. data/sig/prism/visitor.rbs +168 -0
  94. data/sig/prism.rbs +188 -4767
  95. data/src/diagnostic.c +575 -230
  96. data/src/encoding.c +211 -108
  97. data/src/node.c +7526 -447
  98. data/src/options.c +36 -12
  99. data/src/pack.c +33 -17
  100. data/src/prettyprint.c +1297 -1388
  101. data/src/prism.c +3665 -1121
  102. data/src/regexp.c +17 -2
  103. data/src/serialize.c +47 -28
  104. data/src/static_literals.c +552 -0
  105. data/src/token_type.c +1 -0
  106. data/src/util/pm_buffer.c +147 -20
  107. data/src/util/pm_char.c +4 -4
  108. data/src/util/pm_constant_pool.c +35 -11
  109. data/src/util/pm_integer.c +629 -0
  110. data/src/util/pm_list.c +1 -1
  111. data/src/util/pm_newline_list.c +20 -8
  112. data/src/util/pm_string.c +134 -5
  113. data/src/util/pm_string_list.c +2 -2
  114. metadata +37 -6
  115. data/docs/ripper.md +0 -36
  116. data/rbi/prism_static.rbi +0 -207
  117. data/sig/prism_static.rbs +0 -201
@@ -1,3 +1,11 @@
1
+ /******************************************************************************/
2
+ /* This file is generated by the templates/template.rb script and should not */
3
+ /* be modified manually. See */
4
+ /* templates/include/prism/diagnostic.h.erb */
5
+ /* if you are looking to modify the */
6
+ /* template */
7
+ /******************************************************************************/
8
+
1
9
  /**
2
10
  * @file diagnostic.h
3
11
  *
@@ -14,68 +22,14 @@
14
22
  #include <stdlib.h>
15
23
  #include <assert.h>
16
24
 
17
- /**
18
- * The levels of errors generated during parsing.
19
- */
20
- typedef enum {
21
- /** For errors that cannot be recovered from. */
22
- PM_ERROR_LEVEL_FATAL = 0,
23
-
24
- /** For errors that should raise an argument error. */
25
- PM_ERROR_LEVEL_ARGUMENT = 1
26
- } pm_error_level_t;
27
-
28
- /**
29
- * The levels of warnings generated during parsing.
30
- */
31
- typedef enum {
32
- /** For warnings which should be emitted if $VERBOSE != nil. */
33
- PM_WARNING_LEVEL_DEFAULT = 0,
34
-
35
- /** For warnings which should be emitted if $VERBOSE == true. */
36
- PM_WARNING_LEVEL_VERBOSE = 1
37
- } pm_warning_level_t;
38
-
39
- /**
40
- * This struct represents a diagnostic generated during parsing.
41
- *
42
- * @extends pm_list_node_t
43
- */
44
- typedef struct {
45
- /** The embedded base node. */
46
- pm_list_node_t node;
47
-
48
- /** The location of the diagnostic in the source. */
49
- pm_location_t location;
50
-
51
- /** The message associated with the diagnostic. */
52
- const char *message;
53
-
54
- /**
55
- * Whether or not the memory related to the message of this diagnostic is
56
- * owned by this diagnostic. If it is, it needs to be freed when the
57
- * diagnostic is freed.
58
- */
59
- bool owned;
60
-
61
- /**
62
- * The level of the diagnostic, see `pm_error_level_t` and
63
- * `pm_warning_level_t` for possible values.
64
- */
65
- uint8_t level;
66
- } pm_diagnostic_t;
67
-
68
25
  /**
69
26
  * The diagnostic IDs of all of the diagnostics, used to communicate the types
70
27
  * of errors between the parser and the user.
71
28
  */
72
29
  typedef enum {
73
- // This is a special error that we can potentially replace by others. For
74
- // an example of how this is used, see parse_expression_prefix.
75
- PM_ERR_CANNOT_PARSE_EXPRESSION,
76
-
77
- // These are the error codes.
30
+ // These are the error diagnostics.
78
31
  PM_ERR_ALIAS_ARGUMENT,
32
+ PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE,
79
33
  PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
80
34
  PM_ERR_ARGUMENT_AFTER_BLOCK,
81
35
  PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES,
@@ -91,6 +45,7 @@ typedef enum {
91
45
  PM_ERR_ARGUMENT_NO_FORWARDING_AMP,
92
46
  PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
93
47
  PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
48
+ PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR,
94
49
  PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT,
95
50
  PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT,
96
51
  PM_ERR_ARGUMENT_TERM_PAREN,
@@ -109,6 +64,7 @@ typedef enum {
109
64
  PM_ERR_BLOCK_PARAM_PIPE_TERM,
110
65
  PM_ERR_BLOCK_TERM_BRACE,
111
66
  PM_ERR_BLOCK_TERM_END,
67
+ PM_ERR_CANNOT_PARSE_EXPRESSION,
112
68
  PM_ERR_CANNOT_PARSE_STRING_PART,
113
69
  PM_ERR_CASE_EXPRESSION_AFTER_CASE,
114
70
  PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
@@ -120,6 +76,7 @@ typedef enum {
120
76
  PM_ERR_CLASS_SUPERCLASS,
121
77
  PM_ERR_CLASS_TERM,
122
78
  PM_ERR_CLASS_UNEXPECTED_END,
79
+ PM_ERR_CLASS_VARIABLE_BARE,
123
80
  PM_ERR_CONDITIONAL_ELSIF_PREDICATE,
124
81
  PM_ERR_CONDITIONAL_IF_PREDICATE,
125
82
  PM_ERR_CONDITIONAL_PREDICATE_TERM,
@@ -157,13 +114,13 @@ typedef enum {
157
114
  PM_ERR_EXPECT_ARGUMENT,
158
115
  PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
159
116
  PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ,
160
- PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
161
117
  PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA,
162
118
  PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL,
163
119
  PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS,
164
120
  PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN,
165
- PM_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
166
121
  PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR,
122
+ PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
123
+ PM_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
167
124
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
168
125
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
169
126
  PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
@@ -176,10 +133,12 @@ typedef enum {
176
133
  PM_ERR_EXPECT_STRING_CONTENT,
177
134
  PM_ERR_EXPECT_WHEN_DELIMITER,
178
135
  PM_ERR_EXPRESSION_BARE_HASH,
136
+ PM_ERR_FLOAT_PARSE,
179
137
  PM_ERR_FOR_COLLECTION,
180
138
  PM_ERR_FOR_IN,
181
139
  PM_ERR_FOR_INDEX,
182
140
  PM_ERR_FOR_TERM,
141
+ PM_ERR_GLOBAL_VARIABLE_BARE,
183
142
  PM_ERR_HASH_EXPRESSION_AFTER_LABEL,
184
143
  PM_ERR_HASH_KEY,
185
144
  PM_ERR_HASH_ROCKET,
@@ -188,20 +147,32 @@ typedef enum {
188
147
  PM_ERR_HEREDOC_TERM,
189
148
  PM_ERR_INCOMPLETE_QUESTION_MARK,
190
149
  PM_ERR_INCOMPLETE_VARIABLE_CLASS,
150
+ PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3_0,
191
151
  PM_ERR_INCOMPLETE_VARIABLE_INSTANCE,
152
+ PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3_0,
153
+ PM_ERR_INSTANCE_VARIABLE_BARE,
154
+ PM_ERR_INVALID_BLOCK_EXIT,
155
+ PM_ERR_INVALID_CHARACTER,
192
156
  PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
193
157
  PM_ERR_INVALID_FLOAT_EXPONENT,
158
+ PM_ERR_INVALID_MULTIBYTE_CHAR,
159
+ PM_ERR_INVALID_MULTIBYTE_CHARACTER,
160
+ PM_ERR_INVALID_MULTIBYTE_ESCAPE,
194
161
  PM_ERR_INVALID_NUMBER_BINARY,
195
162
  PM_ERR_INVALID_NUMBER_DECIMAL,
196
163
  PM_ERR_INVALID_NUMBER_HEXADECIMAL,
197
164
  PM_ERR_INVALID_NUMBER_OCTAL,
198
165
  PM_ERR_INVALID_NUMBER_UNDERSCORE,
199
- PM_ERR_INVALID_CHARACTER,
200
- PM_ERR_INVALID_MULTIBYTE_CHARACTER,
201
- PM_ERR_INVALID_PRINTABLE_CHARACTER,
202
166
  PM_ERR_INVALID_PERCENT,
167
+ PM_ERR_INVALID_PRINTABLE_CHARACTER,
168
+ PM_ERR_INVALID_RETRY_AFTER_ELSE,
169
+ PM_ERR_INVALID_RETRY_AFTER_ENSURE,
170
+ PM_ERR_INVALID_RETRY_WITHOUT_RESCUE,
203
171
  PM_ERR_INVALID_VARIABLE_GLOBAL,
204
- PM_ERR_IT_NOT_ALLOWED,
172
+ PM_ERR_INVALID_VARIABLE_GLOBAL_3_3_0,
173
+ PM_ERR_INVALID_YIELD,
174
+ PM_ERR_IT_NOT_ALLOWED_NUMBERED,
175
+ PM_ERR_IT_NOT_ALLOWED_ORDINARY,
205
176
  PM_ERR_LAMBDA_OPEN,
206
177
  PM_ERR_LAMBDA_TERM_BRACE,
207
178
  PM_ERR_LAMBDA_TERM_END,
@@ -220,10 +191,11 @@ typedef enum {
220
191
  PM_ERR_MODULE_TERM,
221
192
  PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
222
193
  PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
223
- PM_ERR_NOT_EXPRESSION,
224
194
  PM_ERR_NO_LOCAL_VARIABLE,
195
+ PM_ERR_NOT_EXPRESSION,
225
196
  PM_ERR_NUMBER_LITERAL_UNDERSCORE,
226
- PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED,
197
+ PM_ERR_NUMBERED_PARAMETER_IT,
198
+ PM_ERR_NUMBERED_PARAMETER_ORDINARY,
227
199
  PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
228
200
  PM_ERR_OPERATOR_MULTI_ASSIGN,
229
201
  PM_ERR_OPERATOR_WRITE_ARGUMENTS,
@@ -232,7 +204,7 @@ typedef enum {
232
204
  PM_ERR_PARAMETER_BLOCK_MULTI,
233
205
  PM_ERR_PARAMETER_CIRCULAR,
234
206
  PM_ERR_PARAMETER_METHOD_NAME,
235
- PM_ERR_PARAMETER_NAME_REPEAT,
207
+ PM_ERR_PARAMETER_NAME_DUPLICATED,
236
208
  PM_ERR_PARAMETER_NO_DEFAULT,
237
209
  PM_ERR_PARAMETER_NO_DEFAULT_KW,
238
210
  PM_ERR_PARAMETER_NUMBERED_RESERVED,
@@ -241,9 +213,10 @@ typedef enum {
241
213
  PM_ERR_PARAMETER_STAR,
242
214
  PM_ERR_PARAMETER_UNEXPECTED_FWD,
243
215
  PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
216
+ PM_ERR_PATTERN_CAPTURE_DUPLICATE,
244
217
  PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
245
- PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
246
218
  PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
219
+ PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
247
220
  PM_ERR_PATTERN_EXPRESSION_AFTER_IN,
248
221
  PM_ERR_PATTERN_EXPRESSION_AFTER_KEY,
249
222
  PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN,
@@ -252,6 +225,7 @@ typedef enum {
252
225
  PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
253
226
  PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
254
227
  PM_ERR_PATTERN_HASH_KEY,
228
+ PM_ERR_PATTERN_HASH_KEY_DUPLICATE,
255
229
  PM_ERR_PATTERN_HASH_KEY_LABEL,
256
230
  PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
257
231
  PM_ERR_PATTERN_LABEL_AFTER_COMMA,
@@ -260,12 +234,19 @@ typedef enum {
260
234
  PM_ERR_PATTERN_TERM_BRACKET,
261
235
  PM_ERR_PATTERN_TERM_PAREN,
262
236
  PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN,
237
+ PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH,
238
+ PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING,
239
+ PM_ERR_REGEXP_INVALID_UNICODE_RANGE,
240
+ PM_ERR_REGEXP_NON_ESCAPED_MBC,
263
241
  PM_ERR_REGEXP_TERM,
242
+ PM_ERR_REGEXP_UNKNOWN_OPTIONS,
243
+ PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP,
264
244
  PM_ERR_RESCUE_EXPRESSION,
265
245
  PM_ERR_RESCUE_MODIFIER_VALUE,
266
246
  PM_ERR_RESCUE_TERM,
267
247
  PM_ERR_RESCUE_VARIABLE,
268
248
  PM_ERR_RETURN_INVALID,
249
+ PM_ERR_SCRIPT_NOT_FOUND,
269
250
  PM_ERR_SINGLETON_FOR_LITERALS,
270
251
  PM_ERR_STATEMENT_ALIAS,
271
252
  PM_ERR_STATEMENT_POSTEXE_END,
@@ -282,9 +263,9 @@ typedef enum {
282
263
  PM_ERR_TERNARY_EXPRESSION_FALSE,
283
264
  PM_ERR_TERNARY_EXPRESSION_TRUE,
284
265
  PM_ERR_UNARY_RECEIVER,
266
+ PM_ERR_UNDEF_ARGUMENT,
285
267
  PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
286
268
  PM_ERR_UNEXPECTED_TOKEN_IGNORE,
287
- PM_ERR_UNDEF_ARGUMENT,
288
269
  PM_ERR_UNTIL_TERM,
289
270
  PM_ERR_VOID_EXPRESSION,
290
271
  PM_ERR_WHILE_TERM,
@@ -293,17 +274,99 @@ typedef enum {
293
274
  PM_ERR_WRITE_TARGET_UNEXPECTED,
294
275
  PM_ERR_XSTRING_TERM,
295
276
 
296
- // These are the warning codes.
277
+ // These are the warning diagnostics.
297
278
  PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
298
279
  PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
280
+ PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND,
299
281
  PM_WARN_AMBIGUOUS_PREFIX_STAR,
282
+ PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR,
300
283
  PM_WARN_AMBIGUOUS_SLASH,
284
+ PM_WARN_COMPARISON_AFTER_COMPARISON,
285
+ PM_WARN_DOT_DOT_DOT_EOL,
286
+ PM_WARN_EQUAL_IN_CONDITIONAL,
287
+ PM_WARN_EQUAL_IN_CONDITIONAL_3_3_0,
301
288
  PM_WARN_END_IN_METHOD,
302
-
303
- // This is the number of diagnostic codes.
304
- PM_DIAGNOSTIC_ID_LEN,
289
+ PM_WARN_DUPLICATED_HASH_KEY,
290
+ PM_WARN_DUPLICATED_WHEN_CLAUSE,
291
+ PM_WARN_FLOAT_OUT_OF_RANGE,
292
+ PM_WARN_IGNORED_FROZEN_STRING_LITERAL,
293
+ PM_WARN_INTEGER_IN_FLIP_FLOP,
294
+ PM_WARN_INVALID_CHARACTER,
295
+ PM_WARN_INVALID_NUMBERED_REFERENCE,
296
+ PM_WARN_INVALID_SHAREABLE_CONSTANT_VALUE,
297
+ PM_WARN_KEYWORD_EOL,
298
+ PM_WARN_LITERAL_IN_CONDITION_DEFAULT,
299
+ PM_WARN_LITERAL_IN_CONDITION_VERBOSE,
300
+ PM_WARN_SHEBANG_CARRIAGE_RETURN,
301
+ PM_WARN_UNEXPECTED_CARRIAGE_RETURN,
302
+ PM_WARN_UNUSED_LOCAL_VARIABLE,
305
303
  } pm_diagnostic_id_t;
306
304
 
305
+ /**
306
+ * This struct represents a diagnostic generated during parsing.
307
+ *
308
+ * @extends pm_list_node_t
309
+ */
310
+ typedef struct {
311
+ /** The embedded base node. */
312
+ pm_list_node_t node;
313
+
314
+ /** The location of the diagnostic in the source. */
315
+ pm_location_t location;
316
+
317
+ /** The ID of the diagnostic. */
318
+ pm_diagnostic_id_t diag_id;
319
+
320
+ /** The message associated with the diagnostic. */
321
+ const char *message;
322
+
323
+ /**
324
+ * Whether or not the memory related to the message of this diagnostic is
325
+ * owned by this diagnostic. If it is, it needs to be freed when the
326
+ * diagnostic is freed.
327
+ */
328
+ bool owned;
329
+
330
+ /**
331
+ * The level of the diagnostic, see `pm_error_level_t` and
332
+ * `pm_warning_level_t` for possible values.
333
+ */
334
+ uint8_t level;
335
+ } pm_diagnostic_t;
336
+
337
+ /**
338
+ * The levels of errors generated during parsing.
339
+ */
340
+ typedef enum {
341
+ /** For errors that should raise a syntax error. */
342
+ PM_ERROR_LEVEL_SYNTAX = 0,
343
+
344
+ /** For errors that should raise an argument error. */
345
+ PM_ERROR_LEVEL_ARGUMENT = 1,
346
+
347
+ /** For errors that should raise a load error. */
348
+ PM_ERROR_LEVEL_LOAD = 2
349
+ } pm_error_level_t;
350
+
351
+ /**
352
+ * The levels of warnings generated during parsing.
353
+ */
354
+ typedef enum {
355
+ /** For warnings which should be emitted if $VERBOSE != nil. */
356
+ PM_WARNING_LEVEL_DEFAULT = 0,
357
+
358
+ /** For warnings which should be emitted if $VERBOSE == true. */
359
+ PM_WARNING_LEVEL_VERBOSE = 1
360
+ } pm_warning_level_t;
361
+
362
+ /**
363
+ * Get the human-readable name of the given diagnostic ID.
364
+ *
365
+ * @param diag_id The diagnostic ID.
366
+ * @return The human-readable name of the diagnostic ID.
367
+ */
368
+ const char * pm_diagnostic_id_human(pm_diagnostic_id_t diag_id);
369
+
307
370
  /**
308
371
  * Append a diagnostic to the given list of diagnostics that is using shared
309
372
  * memory for its message.
@@ -135,7 +135,14 @@ extern const uint8_t pm_encoding_unicode_table[256];
135
135
  */
136
136
  typedef enum {
137
137
  PM_ENCODING_UTF_8 = 0,
138
+ PM_ENCODING_US_ASCII,
138
139
  PM_ENCODING_ASCII_8BIT,
140
+ PM_ENCODING_EUC_JP,
141
+ PM_ENCODING_WINDOWS_31J,
142
+
143
+ // We optionally support excluding the full set of encodings to only support the
144
+ // minimum necessary to process Ruby code without encoding comments.
145
+ #ifndef PRISM_ENCODING_EXCLUDE_FULL
139
146
  PM_ENCODING_BIG5,
140
147
  PM_ENCODING_BIG5_HKSCS,
141
148
  PM_ENCODING_BIG5_UAO,
@@ -148,7 +155,6 @@ typedef enum {
148
155
  PM_ENCODING_CP950,
149
156
  PM_ENCODING_CP951,
150
157
  PM_ENCODING_EMACS_MULE,
151
- PM_ENCODING_EUC_JP,
152
158
  PM_ENCODING_EUC_JP_MS,
153
159
  PM_ENCODING_EUC_JIS_2004,
154
160
  PM_ENCODING_EUC_KR,
@@ -208,7 +214,6 @@ typedef enum {
208
214
  PM_ENCODING_STATELESS_ISO_2022_JP,
209
215
  PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
210
216
  PM_ENCODING_TIS_620,
211
- PM_ENCODING_US_ASCII,
212
217
  PM_ENCODING_UTF8_MAC,
213
218
  PM_ENCODING_UTF8_DOCOMO,
214
219
  PM_ENCODING_UTF8_KDDI,
@@ -222,8 +227,9 @@ typedef enum {
222
227
  PM_ENCODING_WINDOWS_1256,
223
228
  PM_ENCODING_WINDOWS_1257,
224
229
  PM_ENCODING_WINDOWS_1258,
225
- PM_ENCODING_WINDOWS_31J,
226
230
  PM_ENCODING_WINDOWS_874,
231
+ #endif
232
+
227
233
  PM_ENCODING_MAXIMUM
228
234
  } pm_encoding_type_t;
229
235
 
@@ -248,10 +254,22 @@ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
248
254
  /**
249
255
  * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
250
256
  * can compare against it because invalid multibyte characters are not a thing
251
- * in this encoding.
257
+ * in this encoding. It is also needed for handling Regexp encoding flags.
252
258
  */
253
259
  #define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
254
260
 
261
+ /**
262
+ * This is the EUC-JP encoding. We need a reference to it to quickly process
263
+ * regular expression modifiers.
264
+ */
265
+ #define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP])
266
+
267
+ /**
268
+ * This is the Windows-31J encoding. We need a reference to it to quickly
269
+ * process regular expression modifiers.
270
+ */
271
+ #define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J])
272
+
255
273
  /**
256
274
  * Parse the given name of an encoding and return a pointer to the corresponding
257
275
  * encoding struct if one can be found, otherwise return NULL.
data/include/prism/node.h CHANGED
@@ -8,6 +8,14 @@
8
8
 
9
9
  #include "prism/defines.h"
10
10
  #include "prism/parser.h"
11
+ #include "prism/util/pm_buffer.h"
12
+
13
+ /**
14
+ * Loop through each node in the node list, writing each node to the given
15
+ * pm_node_t pointer.
16
+ */
17
+ #define PM_NODE_LIST_FOREACH(list, index, node) \
18
+ for (size_t index = 0; index < (list)->size && ((node) = (list)->nodes[index]); index++)
11
19
 
12
20
  /**
13
21
  * Append a new node onto the end of the node list.
@@ -17,6 +25,29 @@
17
25
  */
18
26
  void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
19
27
 
28
+ /**
29
+ * Prepend a new node onto the beginning of the node list.
30
+ *
31
+ * @param list The list to prepend to.
32
+ * @param node The node to prepend.
33
+ */
34
+ void pm_node_list_prepend(pm_node_list_t *list, pm_node_t *node);
35
+
36
+ /**
37
+ * Concatenate the given node list onto the end of the other node list.
38
+ *
39
+ * @param list The list to concatenate onto.
40
+ * @param other The list to concatenate.
41
+ */
42
+ void pm_node_list_concat(pm_node_list_t *list, pm_node_list_t *other);
43
+
44
+ /**
45
+ * Free the internal memory associated with the given node list.
46
+ *
47
+ * @param list The list to free.
48
+ */
49
+ void pm_node_list_free(pm_node_list_t *list);
50
+
20
51
  /**
21
52
  * Deallocate a node and all of its children.
22
53
  *
@@ -54,4 +85,66 @@ PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *mems
54
85
  */
55
86
  PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_type);
56
87
 
88
+ /**
89
+ * Visit each of the nodes in this subtree using the given visitor callback. The
90
+ * callback function will be called for each node in the subtree. If it returns
91
+ * false, then that node's children will not be visited. If it returns true,
92
+ * then the children will be visited. The data parameter is treated as an opaque
93
+ * pointer and is passed to the visitor callback for consumers to use as they
94
+ * see fit.
95
+ *
96
+ * As an example:
97
+ *
98
+ * ```c
99
+ * #include "prism.h"
100
+ *
101
+ * bool visit(const pm_node_t *node, void *data) {
102
+ * size_t *indent = (size_t *) data;
103
+ * for (size_t i = 0; i < *indent * 2; i++) putc(' ', stdout);
104
+ * printf("%s\n", pm_node_type_to_str(node->type));
105
+ *
106
+ * size_t next_indent = *indent + 1;
107
+ * size_t *next_data = &next_indent;
108
+ * pm_visit_child_nodes(node, visit, next_data);
109
+ *
110
+ * return false;
111
+ * }
112
+ *
113
+ * int main(void) {
114
+ * const char *source = "1 + 2; 3 + 4";
115
+ * size_t size = strlen(source);
116
+ *
117
+ * pm_parser_t parser;
118
+ * pm_options_t options = { 0 };
119
+ * pm_parser_init(&parser, (const uint8_t *) source, size, &options);
120
+ *
121
+ * size_t indent = 0;
122
+ * pm_node_t *node = pm_parse(&parser);
123
+ *
124
+ * size_t *data = &indent;
125
+ * pm_visit_node(node, visit, data);
126
+ *
127
+ * pm_node_destroy(&parser, node);
128
+ * pm_parser_free(&parser);
129
+ * return EXIT_SUCCESS;
130
+ * }
131
+ * ```
132
+ *
133
+ * @param node The root node to start visiting from.
134
+ * @param visitor The callback to call for each node in the subtree.
135
+ * @param data An opaque pointer that is passed to the visitor callback.
136
+ */
137
+ PRISM_EXPORTED_FUNCTION void pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data);
138
+
139
+ /**
140
+ * Visit the children of the given node with the given callback. This is the
141
+ * default behavior for walking the tree that is called from pm_visit_node if
142
+ * the callback returns true.
143
+ *
144
+ * @param node The node to visit the children of.
145
+ * @param visitor The callback to call for each child node.
146
+ * @param data An opaque pointer that is passed to the visitor callback.
147
+ */
148
+ PRISM_EXPORTED_FUNCTION void pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data);
149
+
57
150
  #endif
@@ -13,6 +13,22 @@
13
13
  #include <stddef.h>
14
14
  #include <stdint.h>
15
15
 
16
+ /**
17
+ * String literals should be made frozen.
18
+ */
19
+ #define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED ((int8_t) -1)
20
+
21
+ /**
22
+ * String literals may be frozen or mutable depending on the implementation
23
+ * default.
24
+ */
25
+ #define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET ((int8_t) 0)
26
+
27
+ /**
28
+ * String literals should be made mutable.
29
+ */
30
+ #define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED ((int8_t) 1)
31
+
16
32
  /**
17
33
  * A scope of locals surrounding the code that is being parsed.
18
34
  */
@@ -76,10 +92,56 @@ typedef struct {
76
92
  */
77
93
  pm_options_version_t version;
78
94
 
79
- /** Whether or not the frozen string literal option has been set. */
80
- bool frozen_string_literal;
95
+ /** A bitset of the various options that were set on the command line. */
96
+ uint8_t command_line;
97
+
98
+ /**
99
+ * Whether or not the frozen string literal option has been set.
100
+ * May be:
101
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
102
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
103
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
104
+ */
105
+ int8_t frozen_string_literal;
81
106
  } pm_options_t;
82
107
 
108
+ /**
109
+ * A bit representing whether or not the command line -a option was set. -a
110
+ * splits the input line $_ into $F.
111
+ */
112
+ static const uint8_t PM_OPTIONS_COMMAND_LINE_A = 0x1;
113
+
114
+ /**
115
+ * A bit representing whether or not the command line -e option was set. -e
116
+ * allow the user to specify a script to be executed. This is necessary for
117
+ * prism to know because certain warnings are not generated when -e is used.
118
+ */
119
+ static const uint8_t PM_OPTIONS_COMMAND_LINE_E = 0x2;
120
+
121
+ /**
122
+ * A bit representing whether or not the command line -l option was set. -l
123
+ * chomps the input line by default.
124
+ */
125
+ static const uint8_t PM_OPTIONS_COMMAND_LINE_L = 0x4;
126
+
127
+ /**
128
+ * A bit representing whether or not the command line -n option was set. -n
129
+ * wraps the script in a while gets loop.
130
+ */
131
+ static const uint8_t PM_OPTIONS_COMMAND_LINE_N = 0x8;
132
+
133
+ /**
134
+ * A bit representing whether or not the command line -p option was set. -p
135
+ * prints the value of $_ at the end of each loop.
136
+ */
137
+ static const uint8_t PM_OPTIONS_COMMAND_LINE_P = 0x10;
138
+
139
+ /**
140
+ * A bit representing whether or not the command line -x option was set. -x
141
+ * searches the input file for a shebang that matches the current Ruby engine.
142
+ */
143
+ static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20;
144
+
83
145
  /**
84
146
  * Set the filepath option on the given options struct.
85
147
  *
@@ -112,6 +174,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, cons
112
174
  */
113
175
  PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal);
114
176
 
177
+ /**
178
+ * Sets the command line option on the given options struct.
179
+ *
180
+ * @param options The options struct to set the command line option on.
181
+ * @param command_line The command_line value to set.
182
+ */
183
+ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, uint8_t command_line);
184
+
115
185
  /**
116
186
  * Set the version option on the given options struct by parsing the given
117
187
  * string. If the string contains an invalid option, this returns false.
@@ -129,8 +199,9 @@ PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const
129
199
  *
130
200
  * @param options The options struct to initialize the scopes array on.
131
201
  * @param scopes_count The number of scopes to allocate.
202
+ * @return Whether or not the scopes array was initialized successfully.
132
203
  */
133
- PRISM_EXPORTED_FUNCTION void pm_options_scopes_init(pm_options_t *options, size_t scopes_count);
204
+ PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count);
134
205
 
135
206
  /**
136
207
  * Return a pointer to the scope at the given index within the given options.
@@ -147,8 +218,9 @@ PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm
147
218
  *
148
219
  * @param scope The scope struct to initialize.
149
220
  * @param locals_count The number of locals to allocate.
221
+ * @return Whether or not the scope was initialized successfully.
150
222
  */
151
- PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count);
223
+ PRISM_EXPORTED_FUNCTION bool pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count);
152
224
 
153
225
  /**
154
226
  * Return a pointer to the local at the given index within the given scope.
@@ -184,7 +256,10 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
184
256
  * | `4` | the length the encoding |
185
257
  * | ... | the encoding bytes |
186
258
  * | `1` | frozen string literal |
187
- * | `1` | suppress warnings |
259
+ * | `1` | -p command line option |
260
+ * | `1` | -n command line option |
261
+ * | `1` | -l command line option |
262
+ * | `1` | -a command line option |
188
263
  * | `1` | the version |
189
264
  * | `4` | the number of scopes |
190
265
  * | ... | the scopes |
@@ -196,14 +271,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
196
271
  * | `0` | use the latest version of prism |
197
272
  * | `1` | use the version of prism that is vendored in CRuby 3.3.0 |
198
273
  *
199
- * Each scope is layed out as follows:
274
+ * Each scope is laid out as follows:
200
275
  *
201
276
  * | # bytes | field |
202
277
  * | ------- | -------------------------- |
203
278
  * | `4` | the number of locals |
204
279
  * | ... | the locals |
205
280
  *
206
- * Each local is layed out as follows:
281
+ * Each local is laid out as follows:
207
282
  *
208
283
  * | # bytes | field |
209
284
  * | ------- | -------------------------- |
data/include/prism/pack.h CHANGED
@@ -8,6 +8,15 @@
8
8
 
9
9
  #include "prism/defines.h"
10
10
 
11
+ // We optionally support parsing String#pack templates. For systems that don't
12
+ // want or need this functionality, it can be turned off with the
13
+ // PRISM_EXCLUDE_PACK define.
14
+ #ifdef PRISM_EXCLUDE_PACK
15
+
16
+ void pm_pack_parse(void);
17
+
18
+ #else
19
+
11
20
  #include <stdint.h>
12
21
  #include <stdlib.h>
13
22
 
@@ -150,3 +159,5 @@ pm_pack_parse(
150
159
  PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size);
151
160
 
152
161
  #endif
162
+
163
+ #endif