prism 0.24.0 → 0.26.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +69 -1
  4. data/Makefile +22 -16
  5. data/README.md +45 -6
  6. data/config.yml +510 -4
  7. data/docs/build_system.md +31 -0
  8. data/docs/configuration.md +3 -0
  9. data/docs/cruby_compilation.md +1 -1
  10. data/docs/parser_translation.md +14 -9
  11. data/docs/releasing.md +7 -9
  12. data/docs/ripper_translation.md +50 -0
  13. data/docs/ruby_api.md +1 -0
  14. data/docs/serialization.md +26 -5
  15. data/ext/prism/api_node.c +911 -815
  16. data/ext/prism/api_pack.c +9 -0
  17. data/ext/prism/extconf.rb +34 -13
  18. data/ext/prism/extension.c +341 -68
  19. data/ext/prism/extension.h +5 -4
  20. data/include/prism/ast.h +213 -64
  21. data/include/prism/defines.h +106 -2
  22. data/include/prism/diagnostic.h +146 -72
  23. data/include/prism/encoding.h +22 -4
  24. data/include/prism/node.h +93 -0
  25. data/include/prism/options.h +82 -7
  26. data/include/prism/pack.h +11 -0
  27. data/include/prism/parser.h +203 -54
  28. data/include/prism/prettyprint.h +8 -0
  29. data/include/prism/static_literals.h +118 -0
  30. data/include/prism/util/pm_buffer.h +65 -2
  31. data/include/prism/util/pm_constant_pool.h +18 -1
  32. data/include/prism/util/pm_integer.h +119 -0
  33. data/include/prism/util/pm_list.h +1 -1
  34. data/include/prism/util/pm_newline_list.h +8 -0
  35. data/include/prism/util/pm_string.h +26 -2
  36. data/include/prism/version.h +2 -2
  37. data/include/prism.h +59 -1
  38. data/lib/prism/compiler.rb +8 -1
  39. data/lib/prism/debug.rb +46 -3
  40. data/lib/prism/desugar_compiler.rb +4 -2
  41. data/lib/prism/dispatcher.rb +29 -0
  42. data/lib/prism/dot_visitor.rb +87 -16
  43. data/lib/prism/dsl.rb +24 -12
  44. data/lib/prism/ffi.rb +77 -12
  45. data/lib/prism/lex_compat.rb +17 -15
  46. data/lib/prism/mutation_compiler.rb +11 -0
  47. data/lib/prism/node.rb +2112 -2499
  48. data/lib/prism/node_ext.rb +77 -29
  49. data/lib/prism/pack.rb +4 -0
  50. data/lib/prism/parse_result/comments.rb +34 -17
  51. data/lib/prism/parse_result/newlines.rb +3 -1
  52. data/lib/prism/parse_result.rb +83 -32
  53. data/lib/prism/pattern.rb +16 -4
  54. data/lib/prism/polyfill/string.rb +12 -0
  55. data/lib/prism/reflection.rb +421 -0
  56. data/lib/prism/serialize.rb +450 -102
  57. data/lib/prism/translation/parser/compiler.rb +189 -50
  58. data/lib/prism/translation/parser/lexer.rb +103 -22
  59. data/lib/prism/translation/parser/rubocop.rb +41 -13
  60. data/lib/prism/translation/parser.rb +119 -7
  61. data/lib/prism/translation/parser33.rb +1 -1
  62. data/lib/prism/translation/parser34.rb +1 -1
  63. data/lib/prism/translation/ripper/sexp.rb +125 -0
  64. data/lib/prism/translation/ripper/shim.rb +5 -0
  65. data/lib/prism/translation/ripper.rb +3212 -462
  66. data/lib/prism/translation/ruby_parser.rb +35 -18
  67. data/lib/prism/translation.rb +3 -1
  68. data/lib/prism/visitor.rb +10 -0
  69. data/lib/prism.rb +9 -18
  70. data/prism.gemspec +39 -6
  71. data/rbi/prism/compiler.rbi +14 -0
  72. data/rbi/prism/desugar_compiler.rbi +5 -0
  73. data/rbi/prism/mutation_compiler.rbi +5 -0
  74. data/rbi/prism/node.rbi +8674 -0
  75. data/rbi/prism/node_ext.rbi +102 -0
  76. data/rbi/prism/parse_result.rbi +307 -0
  77. data/rbi/prism/reflection.rbi +64 -0
  78. data/rbi/prism/translation/parser/compiler.rbi +13 -0
  79. data/rbi/prism/translation/parser.rbi +11 -0
  80. data/rbi/prism/translation/parser33.rbi +6 -0
  81. data/rbi/prism/translation/parser34.rbi +6 -0
  82. data/rbi/prism/translation/ripper/ripper_compiler.rbi +5 -0
  83. data/rbi/prism/translation/ripper.rbi +25 -0
  84. data/rbi/prism/translation/ruby_parser.rbi +11 -0
  85. data/rbi/prism/visitor.rbi +470 -0
  86. data/rbi/prism.rbi +38 -7748
  87. data/sig/prism/compiler.rbs +9 -0
  88. data/sig/prism/dispatcher.rbs +16 -0
  89. data/sig/prism/dot_visitor.rbs +6 -0
  90. data/sig/prism/dsl.rbs +462 -0
  91. data/sig/prism/mutation_compiler.rbs +158 -0
  92. data/sig/prism/node.rbs +3538 -0
  93. data/sig/prism/node_ext.rbs +78 -0
  94. data/sig/prism/pack.rbs +43 -0
  95. data/sig/prism/parse_result.rbs +128 -0
  96. data/sig/prism/pattern.rbs +13 -0
  97. data/sig/prism/reflection.rbs +56 -0
  98. data/sig/prism/serialize.rbs +7 -0
  99. data/sig/prism/visitor.rbs +168 -0
  100. data/sig/prism.rbs +188 -4767
  101. data/src/diagnostic.c +597 -230
  102. data/src/encoding.c +211 -108
  103. data/src/node.c +7526 -447
  104. data/src/options.c +66 -31
  105. data/src/pack.c +33 -17
  106. data/src/prettyprint.c +1294 -1385
  107. data/src/prism.c +4015 -1149
  108. data/src/regexp.c +17 -2
  109. data/src/serialize.c +47 -28
  110. data/src/static_literals.c +552 -0
  111. data/src/token_type.c +4 -3
  112. data/src/util/pm_buffer.c +147 -20
  113. data/src/util/pm_char.c +4 -4
  114. data/src/util/pm_constant_pool.c +35 -11
  115. data/src/util/pm_integer.c +635 -0
  116. data/src/util/pm_list.c +1 -1
  117. data/src/util/pm_newline_list.c +14 -5
  118. data/src/util/pm_string.c +134 -5
  119. data/src/util/pm_string_list.c +2 -2
  120. metadata +41 -8
  121. data/docs/ripper.md +0 -36
  122. data/include/prism/util/pm_state_stack.h +0 -42
  123. data/rbi/prism_static.rbi +0 -207
  124. data/sig/prism_static.rbs +0 -201
  125. data/src/util/pm_state_stack.c +0 -25
@@ -1,3 +1,11 @@
1
+ /******************************************************************************/
2
+ /* This file is generated by the templates/template.rb script and should not */
3
+ /* be modified manually. See */
4
+ /* templates/include/prism/diagnostic.h.erb */
5
+ /* if you are looking to modify the */
6
+ /* template */
7
+ /******************************************************************************/
8
+
1
9
  /**
2
10
  * @file diagnostic.h
3
11
  *
@@ -14,68 +22,14 @@
14
22
  #include <stdlib.h>
15
23
  #include <assert.h>
16
24
 
17
- /**
18
- * The levels of errors generated during parsing.
19
- */
20
- typedef enum {
21
- /** For errors that cannot be recovered from. */
22
- PM_ERROR_LEVEL_FATAL = 0,
23
-
24
- /** For errors that should raise an argument error. */
25
- PM_ERROR_LEVEL_ARGUMENT = 1
26
- } pm_error_level_t;
27
-
28
- /**
29
- * The levels of warnings generated during parsing.
30
- */
31
- typedef enum {
32
- /** For warnings which should be emitted if $VERBOSE != nil. */
33
- PM_WARNING_LEVEL_DEFAULT = 0,
34
-
35
- /** For warnings which should be emitted if $VERBOSE == true. */
36
- PM_WARNING_LEVEL_VERBOSE = 1
37
- } pm_warning_level_t;
38
-
39
- /**
40
- * This struct represents a diagnostic generated during parsing.
41
- *
42
- * @extends pm_list_node_t
43
- */
44
- typedef struct {
45
- /** The embedded base node. */
46
- pm_list_node_t node;
47
-
48
- /** The location of the diagnostic in the source. */
49
- pm_location_t location;
50
-
51
- /** The message associated with the diagnostic. */
52
- const char *message;
53
-
54
- /**
55
- * Whether or not the memory related to the message of this diagnostic is
56
- * owned by this diagnostic. If it is, it needs to be freed when the
57
- * diagnostic is freed.
58
- */
59
- bool owned;
60
-
61
- /**
62
- * The level of the diagnostic, see `pm_error_level_t` and
63
- * `pm_warning_level_t` for possible values.
64
- */
65
- uint8_t level;
66
- } pm_diagnostic_t;
67
-
68
25
  /**
69
26
  * The diagnostic IDs of all of the diagnostics, used to communicate the types
70
27
  * of errors between the parser and the user.
71
28
  */
72
29
  typedef enum {
73
- // This is a special error that we can potentially replace by others. For
74
- // an example of how this is used, see parse_expression_prefix.
75
- PM_ERR_CANNOT_PARSE_EXPRESSION,
76
-
77
- // These are the error codes.
30
+ // These are the error diagnostics.
78
31
  PM_ERR_ALIAS_ARGUMENT,
32
+ PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE,
79
33
  PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
80
34
  PM_ERR_ARGUMENT_AFTER_BLOCK,
81
35
  PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES,
@@ -91,6 +45,7 @@ typedef enum {
91
45
  PM_ERR_ARGUMENT_NO_FORWARDING_AMP,
92
46
  PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
93
47
  PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
48
+ PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR,
94
49
  PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT,
95
50
  PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT,
96
51
  PM_ERR_ARGUMENT_TERM_PAREN,
@@ -109,6 +64,7 @@ typedef enum {
109
64
  PM_ERR_BLOCK_PARAM_PIPE_TERM,
110
65
  PM_ERR_BLOCK_TERM_BRACE,
111
66
  PM_ERR_BLOCK_TERM_END,
67
+ PM_ERR_CANNOT_PARSE_EXPRESSION,
112
68
  PM_ERR_CANNOT_PARSE_STRING_PART,
113
69
  PM_ERR_CASE_EXPRESSION_AFTER_CASE,
114
70
  PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
@@ -120,6 +76,7 @@ typedef enum {
120
76
  PM_ERR_CLASS_SUPERCLASS,
121
77
  PM_ERR_CLASS_TERM,
122
78
  PM_ERR_CLASS_UNEXPECTED_END,
79
+ PM_ERR_CLASS_VARIABLE_BARE,
123
80
  PM_ERR_CONDITIONAL_ELSIF_PREDICATE,
124
81
  PM_ERR_CONDITIONAL_IF_PREDICATE,
125
82
  PM_ERR_CONDITIONAL_PREDICATE_TERM,
@@ -132,7 +89,6 @@ typedef enum {
132
89
  PM_ERR_DEF_ENDLESS,
133
90
  PM_ERR_DEF_ENDLESS_SETTER,
134
91
  PM_ERR_DEF_NAME,
135
- PM_ERR_DEF_NAME_AFTER_RECEIVER,
136
92
  PM_ERR_DEF_PARAMS_TERM,
137
93
  PM_ERR_DEF_PARAMS_TERM_PAREN,
138
94
  PM_ERR_DEF_RECEIVER,
@@ -157,18 +113,19 @@ typedef enum {
157
113
  PM_ERR_EXPECT_ARGUMENT,
158
114
  PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
159
115
  PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ,
160
- PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
161
116
  PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA,
162
117
  PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL,
163
118
  PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS,
164
119
  PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN,
165
- PM_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
166
120
  PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR,
121
+ PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
122
+ PM_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
167
123
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
168
124
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
169
125
  PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
170
126
  PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
171
127
  PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
128
+ PM_ERR_EXPECT_MESSAGE,
172
129
  PM_ERR_EXPECT_RBRACKET,
173
130
  PM_ERR_EXPECT_RPAREN,
174
131
  PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
@@ -176,10 +133,20 @@ typedef enum {
176
133
  PM_ERR_EXPECT_STRING_CONTENT,
177
134
  PM_ERR_EXPECT_WHEN_DELIMITER,
178
135
  PM_ERR_EXPRESSION_BARE_HASH,
136
+ PM_ERR_EXPRESSION_NOT_WRITABLE,
137
+ PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING,
138
+ PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE,
139
+ PM_ERR_EXPRESSION_NOT_WRITABLE_FILE,
140
+ PM_ERR_EXPRESSION_NOT_WRITABLE_LINE,
141
+ PM_ERR_EXPRESSION_NOT_WRITABLE_NIL,
142
+ PM_ERR_EXPRESSION_NOT_WRITABLE_SELF,
143
+ PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE,
144
+ PM_ERR_FLOAT_PARSE,
179
145
  PM_ERR_FOR_COLLECTION,
180
146
  PM_ERR_FOR_IN,
181
147
  PM_ERR_FOR_INDEX,
182
148
  PM_ERR_FOR_TERM,
149
+ PM_ERR_GLOBAL_VARIABLE_BARE,
183
150
  PM_ERR_HASH_EXPRESSION_AFTER_LABEL,
184
151
  PM_ERR_HASH_KEY,
185
152
  PM_ERR_HASH_ROCKET,
@@ -188,20 +155,32 @@ typedef enum {
188
155
  PM_ERR_HEREDOC_TERM,
189
156
  PM_ERR_INCOMPLETE_QUESTION_MARK,
190
157
  PM_ERR_INCOMPLETE_VARIABLE_CLASS,
158
+ PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3_0,
191
159
  PM_ERR_INCOMPLETE_VARIABLE_INSTANCE,
160
+ PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3_0,
161
+ PM_ERR_INSTANCE_VARIABLE_BARE,
162
+ PM_ERR_INVALID_BLOCK_EXIT,
163
+ PM_ERR_INVALID_CHARACTER,
192
164
  PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
193
165
  PM_ERR_INVALID_FLOAT_EXPONENT,
166
+ PM_ERR_INVALID_MULTIBYTE_CHAR,
167
+ PM_ERR_INVALID_MULTIBYTE_CHARACTER,
168
+ PM_ERR_INVALID_MULTIBYTE_ESCAPE,
194
169
  PM_ERR_INVALID_NUMBER_BINARY,
195
170
  PM_ERR_INVALID_NUMBER_DECIMAL,
196
171
  PM_ERR_INVALID_NUMBER_HEXADECIMAL,
197
172
  PM_ERR_INVALID_NUMBER_OCTAL,
198
173
  PM_ERR_INVALID_NUMBER_UNDERSCORE,
199
- PM_ERR_INVALID_CHARACTER,
200
- PM_ERR_INVALID_MULTIBYTE_CHARACTER,
201
- PM_ERR_INVALID_PRINTABLE_CHARACTER,
202
174
  PM_ERR_INVALID_PERCENT,
175
+ PM_ERR_INVALID_PRINTABLE_CHARACTER,
176
+ PM_ERR_INVALID_RETRY_AFTER_ELSE,
177
+ PM_ERR_INVALID_RETRY_AFTER_ENSURE,
178
+ PM_ERR_INVALID_RETRY_WITHOUT_RESCUE,
203
179
  PM_ERR_INVALID_VARIABLE_GLOBAL,
204
- PM_ERR_IT_NOT_ALLOWED,
180
+ PM_ERR_INVALID_VARIABLE_GLOBAL_3_3_0,
181
+ PM_ERR_INVALID_YIELD,
182
+ PM_ERR_IT_NOT_ALLOWED_NUMBERED,
183
+ PM_ERR_IT_NOT_ALLOWED_ORDINARY,
205
184
  PM_ERR_LAMBDA_OPEN,
206
185
  PM_ERR_LAMBDA_TERM_BRACE,
207
186
  PM_ERR_LAMBDA_TERM_END,
@@ -220,10 +199,11 @@ typedef enum {
220
199
  PM_ERR_MODULE_TERM,
221
200
  PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
222
201
  PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
223
- PM_ERR_NOT_EXPRESSION,
224
202
  PM_ERR_NO_LOCAL_VARIABLE,
203
+ PM_ERR_NOT_EXPRESSION,
225
204
  PM_ERR_NUMBER_LITERAL_UNDERSCORE,
226
- PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED,
205
+ PM_ERR_NUMBERED_PARAMETER_IT,
206
+ PM_ERR_NUMBERED_PARAMETER_ORDINARY,
227
207
  PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
228
208
  PM_ERR_OPERATOR_MULTI_ASSIGN,
229
209
  PM_ERR_OPERATOR_WRITE_ARGUMENTS,
@@ -232,7 +212,7 @@ typedef enum {
232
212
  PM_ERR_PARAMETER_BLOCK_MULTI,
233
213
  PM_ERR_PARAMETER_CIRCULAR,
234
214
  PM_ERR_PARAMETER_METHOD_NAME,
235
- PM_ERR_PARAMETER_NAME_REPEAT,
215
+ PM_ERR_PARAMETER_NAME_DUPLICATED,
236
216
  PM_ERR_PARAMETER_NO_DEFAULT,
237
217
  PM_ERR_PARAMETER_NO_DEFAULT_KW,
238
218
  PM_ERR_PARAMETER_NUMBERED_RESERVED,
@@ -241,9 +221,10 @@ typedef enum {
241
221
  PM_ERR_PARAMETER_STAR,
242
222
  PM_ERR_PARAMETER_UNEXPECTED_FWD,
243
223
  PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
224
+ PM_ERR_PATTERN_CAPTURE_DUPLICATE,
244
225
  PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
245
- PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
246
226
  PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
227
+ PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
247
228
  PM_ERR_PATTERN_EXPRESSION_AFTER_IN,
248
229
  PM_ERR_PATTERN_EXPRESSION_AFTER_KEY,
249
230
  PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN,
@@ -252,6 +233,7 @@ typedef enum {
252
233
  PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
253
234
  PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
254
235
  PM_ERR_PATTERN_HASH_KEY,
236
+ PM_ERR_PATTERN_HASH_KEY_DUPLICATE,
255
237
  PM_ERR_PATTERN_HASH_KEY_LABEL,
256
238
  PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
257
239
  PM_ERR_PATTERN_LABEL_AFTER_COMMA,
@@ -260,12 +242,19 @@ typedef enum {
260
242
  PM_ERR_PATTERN_TERM_BRACKET,
261
243
  PM_ERR_PATTERN_TERM_PAREN,
262
244
  PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN,
245
+ PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH,
246
+ PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING,
247
+ PM_ERR_REGEXP_INVALID_UNICODE_RANGE,
248
+ PM_ERR_REGEXP_NON_ESCAPED_MBC,
263
249
  PM_ERR_REGEXP_TERM,
250
+ PM_ERR_REGEXP_UNKNOWN_OPTIONS,
251
+ PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP,
264
252
  PM_ERR_RESCUE_EXPRESSION,
265
253
  PM_ERR_RESCUE_MODIFIER_VALUE,
266
254
  PM_ERR_RESCUE_TERM,
267
255
  PM_ERR_RESCUE_VARIABLE,
268
256
  PM_ERR_RETURN_INVALID,
257
+ PM_ERR_SCRIPT_NOT_FOUND,
269
258
  PM_ERR_SINGLETON_FOR_LITERALS,
270
259
  PM_ERR_STATEMENT_ALIAS,
271
260
  PM_ERR_STATEMENT_POSTEXE_END,
@@ -282,9 +271,10 @@ typedef enum {
282
271
  PM_ERR_TERNARY_EXPRESSION_FALSE,
283
272
  PM_ERR_TERNARY_EXPRESSION_TRUE,
284
273
  PM_ERR_UNARY_RECEIVER,
274
+ PM_ERR_UNDEF_ARGUMENT,
275
+ PM_ERR_UNEXPECTED_BLOCK_ARGUMENT,
285
276
  PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
286
277
  PM_ERR_UNEXPECTED_TOKEN_IGNORE,
287
- PM_ERR_UNDEF_ARGUMENT,
288
278
  PM_ERR_UNTIL_TERM,
289
279
  PM_ERR_VOID_EXPRESSION,
290
280
  PM_ERR_WHILE_TERM,
@@ -293,17 +283,101 @@ typedef enum {
293
283
  PM_ERR_WRITE_TARGET_UNEXPECTED,
294
284
  PM_ERR_XSTRING_TERM,
295
285
 
296
- // These are the warning codes.
286
+ // These are the warning diagnostics.
297
287
  PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
298
288
  PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
289
+ PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND,
299
290
  PM_WARN_AMBIGUOUS_PREFIX_STAR,
291
+ PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR,
300
292
  PM_WARN_AMBIGUOUS_SLASH,
293
+ PM_WARN_COMPARISON_AFTER_COMPARISON,
294
+ PM_WARN_DOT_DOT_DOT_EOL,
295
+ PM_WARN_EQUAL_IN_CONDITIONAL,
296
+ PM_WARN_EQUAL_IN_CONDITIONAL_3_3_0,
301
297
  PM_WARN_END_IN_METHOD,
302
-
303
- // This is the number of diagnostic codes.
304
- PM_DIAGNOSTIC_ID_LEN,
298
+ PM_WARN_DUPLICATED_HASH_KEY,
299
+ PM_WARN_DUPLICATED_WHEN_CLAUSE,
300
+ PM_WARN_FLOAT_OUT_OF_RANGE,
301
+ PM_WARN_IGNORED_FROZEN_STRING_LITERAL,
302
+ PM_WARN_INTEGER_IN_FLIP_FLOP,
303
+ PM_WARN_INVALID_CHARACTER,
304
+ PM_WARN_INVALID_NUMBERED_REFERENCE,
305
+ PM_WARN_INVALID_SHAREABLE_CONSTANT_VALUE,
306
+ PM_WARN_KEYWORD_EOL,
307
+ PM_WARN_LITERAL_IN_CONDITION_DEFAULT,
308
+ PM_WARN_LITERAL_IN_CONDITION_VERBOSE,
309
+ PM_WARN_SHEBANG_CARRIAGE_RETURN,
310
+ PM_WARN_UNEXPECTED_CARRIAGE_RETURN,
311
+ PM_WARN_UNREACHABLE_STATEMENT,
312
+ PM_WARN_UNUSED_LOCAL_VARIABLE,
313
+ PM_WARN_VOID_STATEMENT,
305
314
  } pm_diagnostic_id_t;
306
315
 
316
+ /**
317
+ * This struct represents a diagnostic generated during parsing.
318
+ *
319
+ * @extends pm_list_node_t
320
+ */
321
+ typedef struct {
322
+ /** The embedded base node. */
323
+ pm_list_node_t node;
324
+
325
+ /** The location of the diagnostic in the source. */
326
+ pm_location_t location;
327
+
328
+ /** The ID of the diagnostic. */
329
+ pm_diagnostic_id_t diag_id;
330
+
331
+ /** The message associated with the diagnostic. */
332
+ const char *message;
333
+
334
+ /**
335
+ * Whether or not the memory related to the message of this diagnostic is
336
+ * owned by this diagnostic. If it is, it needs to be freed when the
337
+ * diagnostic is freed.
338
+ */
339
+ bool owned;
340
+
341
+ /**
342
+ * The level of the diagnostic, see `pm_error_level_t` and
343
+ * `pm_warning_level_t` for possible values.
344
+ */
345
+ uint8_t level;
346
+ } pm_diagnostic_t;
347
+
348
+ /**
349
+ * The levels of errors generated during parsing.
350
+ */
351
+ typedef enum {
352
+ /** For errors that should raise a syntax error. */
353
+ PM_ERROR_LEVEL_SYNTAX = 0,
354
+
355
+ /** For errors that should raise an argument error. */
356
+ PM_ERROR_LEVEL_ARGUMENT = 1,
357
+
358
+ /** For errors that should raise a load error. */
359
+ PM_ERROR_LEVEL_LOAD = 2
360
+ } pm_error_level_t;
361
+
362
+ /**
363
+ * The levels of warnings generated during parsing.
364
+ */
365
+ typedef enum {
366
+ /** For warnings which should be emitted if $VERBOSE != nil. */
367
+ PM_WARNING_LEVEL_DEFAULT = 0,
368
+
369
+ /** For warnings which should be emitted if $VERBOSE == true. */
370
+ PM_WARNING_LEVEL_VERBOSE = 1
371
+ } pm_warning_level_t;
372
+
373
+ /**
374
+ * Get the human-readable name of the given diagnostic ID.
375
+ *
376
+ * @param diag_id The diagnostic ID.
377
+ * @return The human-readable name of the diagnostic ID.
378
+ */
379
+ const char * pm_diagnostic_id_human(pm_diagnostic_id_t diag_id);
380
+
307
381
  /**
308
382
  * Append a diagnostic to the given list of diagnostics that is using shared
309
383
  * memory for its message.
@@ -135,7 +135,14 @@ extern const uint8_t pm_encoding_unicode_table[256];
135
135
  */
136
136
  typedef enum {
137
137
  PM_ENCODING_UTF_8 = 0,
138
+ PM_ENCODING_US_ASCII,
138
139
  PM_ENCODING_ASCII_8BIT,
140
+ PM_ENCODING_EUC_JP,
141
+ PM_ENCODING_WINDOWS_31J,
142
+
143
+ // We optionally support excluding the full set of encodings to only support the
144
+ // minimum necessary to process Ruby code without encoding comments.
145
+ #ifndef PRISM_ENCODING_EXCLUDE_FULL
139
146
  PM_ENCODING_BIG5,
140
147
  PM_ENCODING_BIG5_HKSCS,
141
148
  PM_ENCODING_BIG5_UAO,
@@ -148,7 +155,6 @@ typedef enum {
148
155
  PM_ENCODING_CP950,
149
156
  PM_ENCODING_CP951,
150
157
  PM_ENCODING_EMACS_MULE,
151
- PM_ENCODING_EUC_JP,
152
158
  PM_ENCODING_EUC_JP_MS,
153
159
  PM_ENCODING_EUC_JIS_2004,
154
160
  PM_ENCODING_EUC_KR,
@@ -208,7 +214,6 @@ typedef enum {
208
214
  PM_ENCODING_STATELESS_ISO_2022_JP,
209
215
  PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
210
216
  PM_ENCODING_TIS_620,
211
- PM_ENCODING_US_ASCII,
212
217
  PM_ENCODING_UTF8_MAC,
213
218
  PM_ENCODING_UTF8_DOCOMO,
214
219
  PM_ENCODING_UTF8_KDDI,
@@ -222,8 +227,9 @@ typedef enum {
222
227
  PM_ENCODING_WINDOWS_1256,
223
228
  PM_ENCODING_WINDOWS_1257,
224
229
  PM_ENCODING_WINDOWS_1258,
225
- PM_ENCODING_WINDOWS_31J,
226
230
  PM_ENCODING_WINDOWS_874,
231
+ #endif
232
+
227
233
  PM_ENCODING_MAXIMUM
228
234
  } pm_encoding_type_t;
229
235
 
@@ -248,10 +254,22 @@ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
248
254
  /**
249
255
  * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
250
256
  * can compare against it because invalid multibyte characters are not a thing
251
- * in this encoding.
257
+ * in this encoding. It is also needed for handling Regexp encoding flags.
252
258
  */
253
259
  #define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
254
260
 
261
+ /**
262
+ * This is the EUC-JP encoding. We need a reference to it to quickly process
263
+ * regular expression modifiers.
264
+ */
265
+ #define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP])
266
+
267
+ /**
268
+ * This is the Windows-31J encoding. We need a reference to it to quickly
269
+ * process regular expression modifiers.
270
+ */
271
+ #define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J])
272
+
255
273
  /**
256
274
  * Parse the given name of an encoding and return a pointer to the corresponding
257
275
  * encoding struct if one can be found, otherwise return NULL.
data/include/prism/node.h CHANGED
@@ -8,6 +8,14 @@
8
8
 
9
9
  #include "prism/defines.h"
10
10
  #include "prism/parser.h"
11
+ #include "prism/util/pm_buffer.h"
12
+
13
+ /**
14
+ * Loop through each node in the node list, writing each node to the given
15
+ * pm_node_t pointer.
16
+ */
17
+ #define PM_NODE_LIST_FOREACH(list, index, node) \
18
+ for (size_t index = 0; index < (list)->size && ((node) = (list)->nodes[index]); index++)
11
19
 
12
20
  /**
13
21
  * Append a new node onto the end of the node list.
@@ -17,6 +25,29 @@
17
25
  */
18
26
  void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
19
27
 
28
+ /**
29
+ * Prepend a new node onto the beginning of the node list.
30
+ *
31
+ * @param list The list to prepend to.
32
+ * @param node The node to prepend.
33
+ */
34
+ void pm_node_list_prepend(pm_node_list_t *list, pm_node_t *node);
35
+
36
+ /**
37
+ * Concatenate the given node list onto the end of the other node list.
38
+ *
39
+ * @param list The list to concatenate onto.
40
+ * @param other The list to concatenate.
41
+ */
42
+ void pm_node_list_concat(pm_node_list_t *list, pm_node_list_t *other);
43
+
44
+ /**
45
+ * Free the internal memory associated with the given node list.
46
+ *
47
+ * @param list The list to free.
48
+ */
49
+ void pm_node_list_free(pm_node_list_t *list);
50
+
20
51
  /**
21
52
  * Deallocate a node and all of its children.
22
53
  *
@@ -54,4 +85,66 @@ PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *mems
54
85
  */
55
86
  PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_type);
56
87
 
88
+ /**
89
+ * Visit each of the nodes in this subtree using the given visitor callback. The
90
+ * callback function will be called for each node in the subtree. If it returns
91
+ * false, then that node's children will not be visited. If it returns true,
92
+ * then the children will be visited. The data parameter is treated as an opaque
93
+ * pointer and is passed to the visitor callback for consumers to use as they
94
+ * see fit.
95
+ *
96
+ * As an example:
97
+ *
98
+ * ```c
99
+ * #include "prism.h"
100
+ *
101
+ * bool visit(const pm_node_t *node, void *data) {
102
+ * size_t *indent = (size_t *) data;
103
+ * for (size_t i = 0; i < *indent * 2; i++) putc(' ', stdout);
104
+ * printf("%s\n", pm_node_type_to_str(node->type));
105
+ *
106
+ * size_t next_indent = *indent + 1;
107
+ * size_t *next_data = &next_indent;
108
+ * pm_visit_child_nodes(node, visit, next_data);
109
+ *
110
+ * return false;
111
+ * }
112
+ *
113
+ * int main(void) {
114
+ * const char *source = "1 + 2; 3 + 4";
115
+ * size_t size = strlen(source);
116
+ *
117
+ * pm_parser_t parser;
118
+ * pm_options_t options = { 0 };
119
+ * pm_parser_init(&parser, (const uint8_t *) source, size, &options);
120
+ *
121
+ * size_t indent = 0;
122
+ * pm_node_t *node = pm_parse(&parser);
123
+ *
124
+ * size_t *data = &indent;
125
+ * pm_visit_node(node, visit, data);
126
+ *
127
+ * pm_node_destroy(&parser, node);
128
+ * pm_parser_free(&parser);
129
+ * return EXIT_SUCCESS;
130
+ * }
131
+ * ```
132
+ *
133
+ * @param node The root node to start visiting from.
134
+ * @param visitor The callback to call for each node in the subtree.
135
+ * @param data An opaque pointer that is passed to the visitor callback.
136
+ */
137
+ PRISM_EXPORTED_FUNCTION void pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data);
138
+
139
+ /**
140
+ * Visit the children of the given node with the given callback. This is the
141
+ * default behavior for walking the tree that is called from pm_visit_node if
142
+ * the callback returns true.
143
+ *
144
+ * @param node The node to visit the children of.
145
+ * @param visitor The callback to call for each child node.
146
+ * @param data An opaque pointer that is passed to the visitor callback.
147
+ */
148
+ PRISM_EXPORTED_FUNCTION void pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data);
149
+
57
150
  #endif
@@ -13,6 +13,22 @@
13
13
  #include <stddef.h>
14
14
  #include <stdint.h>
15
15
 
16
+ /**
17
+ * String literals should be made frozen.
18
+ */
19
+ #define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED ((int8_t) -1)
20
+
21
+ /**
22
+ * String literals may be frozen or mutable depending on the implementation
23
+ * default.
24
+ */
25
+ #define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET ((int8_t) 0)
26
+
27
+ /**
28
+ * String literals should be made mutable.
29
+ */
30
+ #define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED ((int8_t) 1)
31
+
16
32
  /**
17
33
  * A scope of locals surrounding the code that is being parsed.
18
34
  */
@@ -76,10 +92,56 @@ typedef struct {
76
92
  */
77
93
  pm_options_version_t version;
78
94
 
79
- /** Whether or not the frozen string literal option has been set. */
80
- bool frozen_string_literal;
95
+ /** A bitset of the various options that were set on the command line. */
96
+ uint8_t command_line;
97
+
98
+ /**
99
+ * Whether or not the frozen string literal option has been set.
100
+ * May be:
101
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
102
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
103
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
104
+ */
105
+ int8_t frozen_string_literal;
81
106
  } pm_options_t;
82
107
 
108
+ /**
109
+ * A bit representing whether or not the command line -a option was set. -a
110
+ * splits the input line $_ into $F.
111
+ */
112
+ static const uint8_t PM_OPTIONS_COMMAND_LINE_A = 0x1;
113
+
114
+ /**
115
+ * A bit representing whether or not the command line -e option was set. -e
116
+ * allow the user to specify a script to be executed. This is necessary for
117
+ * prism to know because certain warnings are not generated when -e is used.
118
+ */
119
+ static const uint8_t PM_OPTIONS_COMMAND_LINE_E = 0x2;
120
+
121
+ /**
122
+ * A bit representing whether or not the command line -l option was set. -l
123
+ * chomps the input line by default.
124
+ */
125
+ static const uint8_t PM_OPTIONS_COMMAND_LINE_L = 0x4;
126
+
127
+ /**
128
+ * A bit representing whether or not the command line -n option was set. -n
129
+ * wraps the script in a while gets loop.
130
+ */
131
+ static const uint8_t PM_OPTIONS_COMMAND_LINE_N = 0x8;
132
+
133
+ /**
134
+ * A bit representing whether or not the command line -p option was set. -p
135
+ * prints the value of $_ at the end of each loop.
136
+ */
137
+ static const uint8_t PM_OPTIONS_COMMAND_LINE_P = 0x10;
138
+
139
+ /**
140
+ * A bit representing whether or not the command line -x option was set. -x
141
+ * searches the input file for a shebang that matches the current Ruby engine.
142
+ */
143
+ static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20;
144
+
83
145
  /**
84
146
  * Set the filepath option on the given options struct.
85
147
  *
@@ -112,6 +174,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, cons
112
174
  */
113
175
  PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal);
114
176
 
177
+ /**
178
+ * Sets the command line option on the given options struct.
179
+ *
180
+ * @param options The options struct to set the command line option on.
181
+ * @param command_line The command_line value to set.
182
+ */
183
+ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, uint8_t command_line);
184
+
115
185
  /**
116
186
  * Set the version option on the given options struct by parsing the given
117
187
  * string. If the string contains an invalid option, this returns false.
@@ -129,8 +199,9 @@ PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const
129
199
  *
130
200
  * @param options The options struct to initialize the scopes array on.
131
201
  * @param scopes_count The number of scopes to allocate.
202
+ * @return Whether or not the scopes array was initialized successfully.
132
203
  */
133
- PRISM_EXPORTED_FUNCTION void pm_options_scopes_init(pm_options_t *options, size_t scopes_count);
204
+ PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count);
134
205
 
135
206
  /**
136
207
  * Return a pointer to the scope at the given index within the given options.
@@ -147,8 +218,9 @@ PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm
147
218
  *
148
219
  * @param scope The scope struct to initialize.
149
220
  * @param locals_count The number of locals to allocate.
221
+ * @return Whether or not the scope was initialized successfully.
150
222
  */
151
- PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count);
223
+ PRISM_EXPORTED_FUNCTION bool pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count);
152
224
 
153
225
  /**
154
226
  * Return a pointer to the local at the given index within the given scope.
@@ -184,7 +256,10 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
184
256
  * | `4` | the length the encoding |
185
257
  * | ... | the encoding bytes |
186
258
  * | `1` | frozen string literal |
187
- * | `1` | suppress warnings |
259
+ * | `1` | -p command line option |
260
+ * | `1` | -n command line option |
261
+ * | `1` | -l command line option |
262
+ * | `1` | -a command line option |
188
263
  * | `1` | the version |
189
264
  * | `4` | the number of scopes |
190
265
  * | ... | the scopes |
@@ -196,14 +271,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
196
271
  * | `0` | use the latest version of prism |
197
272
  * | `1` | use the version of prism that is vendored in CRuby 3.3.0 |
198
273
  *
199
- * Each scope is layed out as follows:
274
+ * Each scope is laid out as follows:
200
275
  *
201
276
  * | # bytes | field |
202
277
  * | ------- | -------------------------- |
203
278
  * | `4` | the number of locals |
204
279
  * | ... | the locals |
205
280
  *
206
- * Each local is layed out as follows:
281
+ * Each local is laid out as follows:
207
282
  *
208
283
  * | # bytes | field |
209
284
  * | ------- | -------------------------- |