jruby-prism-parser 0.24.0-java → 1.4.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +269 -1
  4. data/CONTRIBUTING.md +0 -4
  5. data/Makefile +25 -18
  6. data/README.md +57 -6
  7. data/config.yml +1724 -140
  8. data/docs/build_system.md +39 -11
  9. data/docs/configuration.md +4 -0
  10. data/docs/cruby_compilation.md +1 -1
  11. data/docs/fuzzing.md +1 -1
  12. data/docs/parser_translation.md +14 -9
  13. data/docs/parsing_rules.md +4 -1
  14. data/docs/releasing.md +8 -10
  15. data/docs/relocation.md +34 -0
  16. data/docs/ripper_translation.md +72 -0
  17. data/docs/ruby_api.md +2 -1
  18. data/docs/serialization.md +29 -5
  19. data/ext/prism/api_node.c +3395 -1999
  20. data/ext/prism/api_pack.c +9 -0
  21. data/ext/prism/extconf.rb +55 -34
  22. data/ext/prism/extension.c +597 -346
  23. data/ext/prism/extension.h +6 -5
  24. data/include/prism/ast.h +2612 -455
  25. data/include/prism/defines.h +160 -2
  26. data/include/prism/diagnostic.h +188 -76
  27. data/include/prism/encoding.h +22 -4
  28. data/include/prism/node.h +89 -17
  29. data/include/prism/options.h +224 -12
  30. data/include/prism/pack.h +11 -0
  31. data/include/prism/parser.h +267 -66
  32. data/include/prism/prettyprint.h +8 -0
  33. data/include/prism/regexp.h +18 -8
  34. data/include/prism/static_literals.h +121 -0
  35. data/include/prism/util/pm_buffer.h +75 -2
  36. data/include/prism/util/pm_char.h +1 -2
  37. data/include/prism/util/pm_constant_pool.h +18 -9
  38. data/include/prism/util/pm_integer.h +126 -0
  39. data/include/prism/util/pm_list.h +1 -1
  40. data/include/prism/util/pm_newline_list.h +19 -0
  41. data/include/prism/util/pm_string.h +48 -8
  42. data/include/prism/version.h +3 -3
  43. data/include/prism.h +99 -5
  44. data/jruby-prism.jar +0 -0
  45. data/lib/prism/compiler.rb +11 -1
  46. data/lib/prism/desugar_compiler.rb +113 -74
  47. data/lib/prism/dispatcher.rb +45 -1
  48. data/lib/prism/dot_visitor.rb +201 -77
  49. data/lib/prism/dsl.rb +673 -461
  50. data/lib/prism/ffi.rb +233 -45
  51. data/lib/prism/inspect_visitor.rb +2389 -0
  52. data/lib/prism/lex_compat.rb +35 -16
  53. data/lib/prism/mutation_compiler.rb +24 -8
  54. data/lib/prism/node.rb +7731 -8460
  55. data/lib/prism/node_ext.rb +328 -32
  56. data/lib/prism/pack.rb +4 -0
  57. data/lib/prism/parse_result/comments.rb +34 -24
  58. data/lib/prism/parse_result/errors.rb +65 -0
  59. data/lib/prism/parse_result/newlines.rb +102 -12
  60. data/lib/prism/parse_result.rb +448 -44
  61. data/lib/prism/pattern.rb +28 -10
  62. data/lib/prism/polyfill/append_as_bytes.rb +15 -0
  63. data/lib/prism/polyfill/byteindex.rb +13 -0
  64. data/lib/prism/polyfill/unpack1.rb +14 -0
  65. data/lib/prism/reflection.rb +413 -0
  66. data/lib/prism/relocation.rb +504 -0
  67. data/lib/prism/serialize.rb +1940 -1198
  68. data/lib/prism/string_query.rb +30 -0
  69. data/lib/prism/translation/parser/builder.rb +61 -0
  70. data/lib/prism/translation/parser/compiler.rb +569 -195
  71. data/lib/prism/translation/parser/lexer.rb +516 -39
  72. data/lib/prism/translation/parser.rb +177 -12
  73. data/lib/prism/translation/parser33.rb +1 -1
  74. data/lib/prism/translation/parser34.rb +1 -1
  75. data/lib/prism/translation/parser35.rb +12 -0
  76. data/lib/prism/translation/ripper/sexp.rb +125 -0
  77. data/lib/prism/translation/ripper/shim.rb +5 -0
  78. data/lib/prism/translation/ripper.rb +3224 -462
  79. data/lib/prism/translation/ruby_parser.rb +194 -69
  80. data/lib/prism/translation.rb +4 -1
  81. data/lib/prism/version.rb +1 -1
  82. data/lib/prism/visitor.rb +13 -0
  83. data/lib/prism.rb +17 -27
  84. data/prism.gemspec +57 -17
  85. data/rbi/prism/compiler.rbi +12 -0
  86. data/rbi/prism/dsl.rbi +524 -0
  87. data/rbi/prism/inspect_visitor.rbi +12 -0
  88. data/rbi/prism/node.rbi +8722 -0
  89. data/rbi/prism/node_ext.rbi +107 -0
  90. data/rbi/prism/parse_result.rbi +404 -0
  91. data/rbi/prism/reflection.rbi +58 -0
  92. data/rbi/prism/string_query.rbi +12 -0
  93. data/rbi/prism/translation/parser.rbi +11 -0
  94. data/rbi/prism/translation/parser33.rbi +6 -0
  95. data/rbi/prism/translation/parser34.rbi +6 -0
  96. data/rbi/prism/translation/parser35.rbi +6 -0
  97. data/rbi/prism/translation/ripper.rbi +15 -0
  98. data/rbi/prism/visitor.rbi +473 -0
  99. data/rbi/prism.rbi +44 -7745
  100. data/sig/prism/compiler.rbs +9 -0
  101. data/sig/prism/dispatcher.rbs +16 -0
  102. data/sig/prism/dot_visitor.rbs +6 -0
  103. data/sig/prism/dsl.rbs +351 -0
  104. data/sig/prism/inspect_visitor.rbs +22 -0
  105. data/sig/prism/lex_compat.rbs +10 -0
  106. data/sig/prism/mutation_compiler.rbs +159 -0
  107. data/sig/prism/node.rbs +3614 -0
  108. data/sig/prism/node_ext.rbs +82 -0
  109. data/sig/prism/pack.rbs +43 -0
  110. data/sig/prism/parse_result.rbs +192 -0
  111. data/sig/prism/pattern.rbs +13 -0
  112. data/sig/prism/reflection.rbs +50 -0
  113. data/sig/prism/relocation.rbs +185 -0
  114. data/sig/prism/serialize.rbs +8 -0
  115. data/sig/prism/string_query.rbs +11 -0
  116. data/sig/prism/visitor.rbs +169 -0
  117. data/sig/prism.rbs +248 -4767
  118. data/src/diagnostic.c +672 -230
  119. data/src/encoding.c +211 -108
  120. data/src/node.c +7541 -1653
  121. data/src/options.c +135 -20
  122. data/src/pack.c +33 -17
  123. data/src/prettyprint.c +1543 -1485
  124. data/src/prism.c +7813 -3050
  125. data/src/regexp.c +225 -73
  126. data/src/serialize.c +101 -77
  127. data/src/static_literals.c +617 -0
  128. data/src/token_type.c +14 -13
  129. data/src/util/pm_buffer.c +187 -20
  130. data/src/util/pm_char.c +5 -5
  131. data/src/util/pm_constant_pool.c +39 -19
  132. data/src/util/pm_integer.c +670 -0
  133. data/src/util/pm_list.c +1 -1
  134. data/src/util/pm_newline_list.c +43 -5
  135. data/src/util/pm_string.c +213 -33
  136. data/src/util/pm_strncasecmp.c +13 -1
  137. data/src/util/pm_strpbrk.c +32 -6
  138. metadata +55 -19
  139. data/docs/ripper.md +0 -36
  140. data/include/prism/util/pm_state_stack.h +0 -42
  141. data/include/prism/util/pm_string_list.h +0 -44
  142. data/lib/prism/debug.rb +0 -206
  143. data/lib/prism/node_inspector.rb +0 -68
  144. data/lib/prism/translation/parser/rubocop.rb +0 -45
  145. data/rbi/prism_static.rbi +0 -207
  146. data/sig/prism_static.rbs +0 -201
  147. data/src/util/pm_state_stack.c +0 -25
  148. data/src/util/pm_string_list.c +0 -28
@@ -10,6 +10,8 @@
10
10
  #define PRISM_DEFINES_H
11
11
 
12
12
  #include <ctype.h>
13
+ #include <limits.h>
14
+ #include <math.h>
13
15
  #include <stdarg.h>
14
16
  #include <stddef.h>
15
17
  #include <stdint.h>
@@ -21,9 +23,20 @@
21
23
  * some platforms they aren't included unless this is already defined.
22
24
  */
23
25
  #define __STDC_FORMAT_MACROS
24
-
26
+ // Include sys/types.h before inttypes.h to work around issue with
27
+ // certain versions of GCC and newlib which causes omission of PRIx64
28
+ #include <sys/types.h>
25
29
  #include <inttypes.h>
26
30
 
31
+ /**
32
+ * When we are parsing using recursive descent, we want to protect against
33
+ * malicious payloads that could attempt to crash our parser. We do this by
34
+ * specifying a maximum depth to which we are allowed to recurse.
35
+ */
36
+ #ifndef PRISM_DEPTH_MAXIMUM
37
+ #define PRISM_DEPTH_MAXIMUM 10000
38
+ #endif
39
+
27
40
  /**
28
41
  * By default, we compile with -fvisibility=hidden. When this is enabled, we
29
42
  * need to mark certain functions as being publically-visible. This macro does
@@ -48,7 +61,11 @@
48
61
  * compiler-agnostic way.
49
62
  */
50
63
  #if defined(__GNUC__)
51
- # define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index)))
64
+ # if defined(__MINGW_PRINTF_FORMAT)
65
+ # define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(__MINGW_PRINTF_FORMAT, string_index, argument_index)))
66
+ # else
67
+ # define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index)))
68
+ # endif
52
69
  #elif defined(__clang__)
53
70
  # define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((__format__(__printf__, string_index, argument_index)))
54
71
  #else
@@ -99,4 +116,145 @@
99
116
  # define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
100
117
  #endif
101
118
 
119
+ /**
120
+ * In general, libc for embedded systems does not support memory-mapped files.
121
+ * If the target platform is POSIX or Windows, we can map a file in memory and
122
+ * read it in a more efficient manner.
123
+ */
124
+ #ifdef _WIN32
125
+ # define PRISM_HAS_MMAP
126
+ #else
127
+ # include <unistd.h>
128
+ # ifdef _POSIX_MAPPED_FILES
129
+ # define PRISM_HAS_MMAP
130
+ # endif
131
+ #endif
132
+
133
+ /**
134
+ * If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem
135
+ * related code from the library. All filesystem related code should be guarded
136
+ * by PRISM_HAS_FILESYSTEM.
137
+ */
138
+ #ifndef PRISM_HAS_NO_FILESYSTEM
139
+ # define PRISM_HAS_FILESYSTEM
140
+ #endif
141
+
142
+ /**
143
+ * isinf on POSIX systems it accepts a float, a double, or a long double.
144
+ * But mingw didn't provide an isinf macro, only an isinf function that only
145
+ * accepts floats, so we need to use _finite instead.
146
+ */
147
+ #ifdef __MINGW64__
148
+ #include <float.h>
149
+ #define PRISM_ISINF(x) (!_finite(x))
150
+ #else
151
+ #define PRISM_ISINF(x) isinf(x)
152
+ #endif
153
+
154
+ /**
155
+ * If you build prism with a custom allocator, configure it with
156
+ * "-D PRISM_XALLOCATOR" to use your own allocator that defines xmalloc,
157
+ * xrealloc, xcalloc, and xfree.
158
+ *
159
+ * For example, your `prism_xallocator.h` file could look like this:
160
+ *
161
+ * ```
162
+ * #ifndef PRISM_XALLOCATOR_H
163
+ * #define PRISM_XALLOCATOR_H
164
+ * #define xmalloc my_malloc
165
+ * #define xrealloc my_realloc
166
+ * #define xcalloc my_calloc
167
+ * #define xfree my_free
168
+ * #endif
169
+ * ```
170
+ */
171
+ #ifdef PRISM_XALLOCATOR
172
+ #include "prism_xallocator.h"
173
+ #else
174
+ #ifndef xmalloc
175
+ /**
176
+ * The malloc function that should be used. This can be overridden with
177
+ * the PRISM_XALLOCATOR define.
178
+ */
179
+ #define xmalloc malloc
180
+ #endif
181
+
182
+ #ifndef xrealloc
183
+ /**
184
+ * The realloc function that should be used. This can be overridden with
185
+ * the PRISM_XALLOCATOR define.
186
+ */
187
+ #define xrealloc realloc
188
+ #endif
189
+
190
+ #ifndef xcalloc
191
+ /**
192
+ * The calloc function that should be used. This can be overridden with
193
+ * the PRISM_XALLOCATOR define.
194
+ */
195
+ #define xcalloc calloc
196
+ #endif
197
+
198
+ #ifndef xfree
199
+ /**
200
+ * The free function that should be used. This can be overridden with the
201
+ * PRISM_XALLOCATOR define.
202
+ */
203
+ #define xfree free
204
+ #endif
205
+ #endif
206
+
207
+ /**
208
+ * If PRISM_BUILD_MINIMAL is defined, then we're going to define every possible
209
+ * switch that will turn off certain features of prism.
210
+ */
211
+ #ifdef PRISM_BUILD_MINIMAL
212
+ /** Exclude the serialization API. */
213
+ #define PRISM_EXCLUDE_SERIALIZATION
214
+
215
+ /** Exclude the JSON serialization API. */
216
+ #define PRISM_EXCLUDE_JSON
217
+
218
+ /** Exclude the Array#pack parser API. */
219
+ #define PRISM_EXCLUDE_PACK
220
+
221
+ /** Exclude the prettyprint API. */
222
+ #define PRISM_EXCLUDE_PRETTYPRINT
223
+
224
+ /** Exclude the full set of encodings, using the minimal only. */
225
+ #define PRISM_ENCODING_EXCLUDE_FULL
226
+ #endif
227
+
228
+ /**
229
+ * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
230
+ * branch predication.
231
+ */
232
+ #if defined(__GNUC__) || defined(__clang__)
233
+ /** The compiler should predicate that this branch will be taken. */
234
+ #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
235
+
236
+ /** The compiler should predicate that this branch will not be taken. */
237
+ #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
238
+ #else
239
+ /** Void because this platform does not support branch prediction hints. */
240
+ #define PRISM_LIKELY(x) (x)
241
+
242
+ /** Void because this platform does not support branch prediction hints. */
243
+ #define PRISM_UNLIKELY(x) (x)
244
+ #endif
245
+
246
+ /**
247
+ * We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
248
+ * Use PRISM_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional.
249
+ */
250
+ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L // C23 or later
251
+ #define PRISM_FALLTHROUGH [[fallthrough]];
252
+ #elif defined(__GNUC__) || defined(__clang__)
253
+ #define PRISM_FALLTHROUGH __attribute__((fallthrough));
254
+ #elif defined(_MSC_VER)
255
+ #define PRISM_FALLTHROUGH __fallthrough;
256
+ #else
257
+ #define PRISM_FALLTHROUGH
258
+ #endif
259
+
102
260
  #endif
@@ -1,3 +1,11 @@
1
+ /*----------------------------------------------------------------------------*/
2
+ /* This file is generated by the templates/template.rb script and should not */
3
+ /* be modified manually. See */
4
+ /* templates/include/prism/diagnostic.h.erb */
5
+ /* if you are looking to modify the */
6
+ /* template */
7
+ /*----------------------------------------------------------------------------*/
8
+
1
9
  /**
2
10
  * @file diagnostic.h
3
11
  *
@@ -14,83 +22,32 @@
14
22
  #include <stdlib.h>
15
23
  #include <assert.h>
16
24
 
17
- /**
18
- * The levels of errors generated during parsing.
19
- */
20
- typedef enum {
21
- /** For errors that cannot be recovered from. */
22
- PM_ERROR_LEVEL_FATAL = 0,
23
-
24
- /** For errors that should raise an argument error. */
25
- PM_ERROR_LEVEL_ARGUMENT = 1
26
- } pm_error_level_t;
27
-
28
- /**
29
- * The levels of warnings generated during parsing.
30
- */
31
- typedef enum {
32
- /** For warnings which should be emitted if $VERBOSE != nil. */
33
- PM_WARNING_LEVEL_DEFAULT = 0,
34
-
35
- /** For warnings which should be emitted if $VERBOSE == true. */
36
- PM_WARNING_LEVEL_VERBOSE = 1
37
- } pm_warning_level_t;
38
-
39
- /**
40
- * This struct represents a diagnostic generated during parsing.
41
- *
42
- * @extends pm_list_node_t
43
- */
44
- typedef struct {
45
- /** The embedded base node. */
46
- pm_list_node_t node;
47
-
48
- /** The location of the diagnostic in the source. */
49
- pm_location_t location;
50
-
51
- /** The message associated with the diagnostic. */
52
- const char *message;
53
-
54
- /**
55
- * Whether or not the memory related to the message of this diagnostic is
56
- * owned by this diagnostic. If it is, it needs to be freed when the
57
- * diagnostic is freed.
58
- */
59
- bool owned;
60
-
61
- /**
62
- * The level of the diagnostic, see `pm_error_level_t` and
63
- * `pm_warning_level_t` for possible values.
64
- */
65
- uint8_t level;
66
- } pm_diagnostic_t;
67
-
68
25
  /**
69
26
  * The diagnostic IDs of all of the diagnostics, used to communicate the types
70
27
  * of errors between the parser and the user.
71
28
  */
72
29
  typedef enum {
73
- // This is a special error that we can potentially replace by others. For
74
- // an example of how this is used, see parse_expression_prefix.
75
- PM_ERR_CANNOT_PARSE_EXPRESSION,
76
-
77
- // These are the error codes.
30
+ // These are the error diagnostics.
78
31
  PM_ERR_ALIAS_ARGUMENT,
32
+ PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE,
79
33
  PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
80
34
  PM_ERR_ARGUMENT_AFTER_BLOCK,
81
35
  PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES,
82
36
  PM_ERR_ARGUMENT_BARE_HASH,
83
37
  PM_ERR_ARGUMENT_BLOCK_FORWARDING,
84
38
  PM_ERR_ARGUMENT_BLOCK_MULTI,
39
+ PM_ERR_ARGUMENT_CONFLICT_AMPERSAND,
40
+ PM_ERR_ARGUMENT_CONFLICT_STAR,
41
+ PM_ERR_ARGUMENT_CONFLICT_STAR_STAR,
85
42
  PM_ERR_ARGUMENT_FORMAL_CLASS,
86
43
  PM_ERR_ARGUMENT_FORMAL_CONSTANT,
87
44
  PM_ERR_ARGUMENT_FORMAL_GLOBAL,
88
45
  PM_ERR_ARGUMENT_FORMAL_IVAR,
89
46
  PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
90
- PM_ERR_ARGUMENT_IN,
91
- PM_ERR_ARGUMENT_NO_FORWARDING_AMP,
47
+ PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND,
92
48
  PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
93
49
  PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
50
+ PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR,
94
51
  PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT,
95
52
  PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT,
96
53
  PM_ERR_ARGUMENT_TERM_PAREN,
@@ -109,6 +66,7 @@ typedef enum {
109
66
  PM_ERR_BLOCK_PARAM_PIPE_TERM,
110
67
  PM_ERR_BLOCK_TERM_BRACE,
111
68
  PM_ERR_BLOCK_TERM_END,
69
+ PM_ERR_CANNOT_PARSE_EXPRESSION,
112
70
  PM_ERR_CANNOT_PARSE_STRING_PART,
113
71
  PM_ERR_CASE_EXPRESSION_AFTER_CASE,
114
72
  PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
@@ -120,6 +78,7 @@ typedef enum {
120
78
  PM_ERR_CLASS_SUPERCLASS,
121
79
  PM_ERR_CLASS_TERM,
122
80
  PM_ERR_CLASS_UNEXPECTED_END,
81
+ PM_ERR_CLASS_VARIABLE_BARE,
123
82
  PM_ERR_CONDITIONAL_ELSIF_PREDICATE,
124
83
  PM_ERR_CONDITIONAL_IF_PREDICATE,
125
84
  PM_ERR_CONDITIONAL_PREDICATE_TERM,
@@ -132,7 +91,6 @@ typedef enum {
132
91
  PM_ERR_DEF_ENDLESS,
133
92
  PM_ERR_DEF_ENDLESS_SETTER,
134
93
  PM_ERR_DEF_NAME,
135
- PM_ERR_DEF_NAME_AFTER_RECEIVER,
136
94
  PM_ERR_DEF_PARAMS_TERM,
137
95
  PM_ERR_DEF_PARAMS_TERM_PAREN,
138
96
  PM_ERR_DEF_RECEIVER,
@@ -151,57 +109,95 @@ typedef enum {
151
109
  PM_ERR_ESCAPE_INVALID_META_REPEAT,
152
110
  PM_ERR_ESCAPE_INVALID_UNICODE,
153
111
  PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
112
+ PM_ERR_ESCAPE_INVALID_UNICODE_LIST,
154
113
  PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
155
114
  PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
115
+ PM_ERR_ESCAPE_INVALID_UNICODE_SHORT,
156
116
  PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
157
117
  PM_ERR_EXPECT_ARGUMENT,
158
118
  PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
159
119
  PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ,
160
- PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
161
120
  PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA,
162
121
  PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL,
163
122
  PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS,
164
123
  PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN,
165
- PM_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
166
124
  PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR,
125
+ PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
126
+ PM_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
167
127
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
168
128
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
169
129
  PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
130
+ PM_ERR_EXPECT_FOR_DELIMITER,
170
131
  PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
132
+ PM_ERR_EXPECT_IN_DELIMITER,
171
133
  PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
134
+ PM_ERR_EXPECT_MESSAGE,
172
135
  PM_ERR_EXPECT_RBRACKET,
173
136
  PM_ERR_EXPECT_RPAREN,
174
137
  PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
175
138
  PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
139
+ PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER,
176
140
  PM_ERR_EXPECT_STRING_CONTENT,
177
141
  PM_ERR_EXPECT_WHEN_DELIMITER,
178
142
  PM_ERR_EXPRESSION_BARE_HASH,
143
+ PM_ERR_EXPRESSION_NOT_WRITABLE,
144
+ PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING,
145
+ PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE,
146
+ PM_ERR_EXPRESSION_NOT_WRITABLE_FILE,
147
+ PM_ERR_EXPRESSION_NOT_WRITABLE_LINE,
148
+ PM_ERR_EXPRESSION_NOT_WRITABLE_NIL,
149
+ PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED,
150
+ PM_ERR_EXPRESSION_NOT_WRITABLE_SELF,
151
+ PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE,
152
+ PM_ERR_FLOAT_PARSE,
179
153
  PM_ERR_FOR_COLLECTION,
180
154
  PM_ERR_FOR_IN,
181
155
  PM_ERR_FOR_INDEX,
182
156
  PM_ERR_FOR_TERM,
157
+ PM_ERR_GLOBAL_VARIABLE_BARE,
183
158
  PM_ERR_HASH_EXPRESSION_AFTER_LABEL,
184
159
  PM_ERR_HASH_KEY,
185
160
  PM_ERR_HASH_ROCKET,
186
161
  PM_ERR_HASH_TERM,
187
162
  PM_ERR_HASH_VALUE,
163
+ PM_ERR_HEREDOC_IDENTIFIER,
188
164
  PM_ERR_HEREDOC_TERM,
189
165
  PM_ERR_INCOMPLETE_QUESTION_MARK,
190
166
  PM_ERR_INCOMPLETE_VARIABLE_CLASS,
167
+ PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3,
191
168
  PM_ERR_INCOMPLETE_VARIABLE_INSTANCE,
169
+ PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3,
170
+ PM_ERR_INSTANCE_VARIABLE_BARE,
171
+ PM_ERR_INVALID_BLOCK_EXIT,
172
+ PM_ERR_INVALID_CHARACTER,
173
+ PM_ERR_INVALID_COMMA,
192
174
  PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
175
+ PM_ERR_INVALID_ESCAPE_CHARACTER,
193
176
  PM_ERR_INVALID_FLOAT_EXPONENT,
177
+ PM_ERR_INVALID_LOCAL_VARIABLE_READ,
178
+ PM_ERR_INVALID_LOCAL_VARIABLE_WRITE,
179
+ PM_ERR_INVALID_MULTIBYTE_CHAR,
180
+ PM_ERR_INVALID_MULTIBYTE_CHARACTER,
181
+ PM_ERR_INVALID_MULTIBYTE_ESCAPE,
194
182
  PM_ERR_INVALID_NUMBER_BINARY,
195
183
  PM_ERR_INVALID_NUMBER_DECIMAL,
184
+ PM_ERR_INVALID_NUMBER_FRACTION,
196
185
  PM_ERR_INVALID_NUMBER_HEXADECIMAL,
197
186
  PM_ERR_INVALID_NUMBER_OCTAL,
198
- PM_ERR_INVALID_NUMBER_UNDERSCORE,
199
- PM_ERR_INVALID_CHARACTER,
200
- PM_ERR_INVALID_MULTIBYTE_CHARACTER,
201
- PM_ERR_INVALID_PRINTABLE_CHARACTER,
187
+ PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER,
188
+ PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING,
202
189
  PM_ERR_INVALID_PERCENT,
190
+ PM_ERR_INVALID_PERCENT_EOF,
191
+ PM_ERR_INVALID_PRINTABLE_CHARACTER,
192
+ PM_ERR_INVALID_RETRY_AFTER_ELSE,
193
+ PM_ERR_INVALID_RETRY_AFTER_ENSURE,
194
+ PM_ERR_INVALID_RETRY_WITHOUT_RESCUE,
195
+ PM_ERR_INVALID_SYMBOL,
203
196
  PM_ERR_INVALID_VARIABLE_GLOBAL,
204
- PM_ERR_IT_NOT_ALLOWED,
197
+ PM_ERR_INVALID_VARIABLE_GLOBAL_3_3,
198
+ PM_ERR_INVALID_YIELD,
199
+ PM_ERR_IT_NOT_ALLOWED_NUMBERED,
200
+ PM_ERR_IT_NOT_ALLOWED_ORDINARY,
205
201
  PM_ERR_LAMBDA_OPEN,
206
202
  PM_ERR_LAMBDA_TERM_BRACE,
207
203
  PM_ERR_LAMBDA_TERM_END,
@@ -220,19 +216,24 @@ typedef enum {
220
216
  PM_ERR_MODULE_TERM,
221
217
  PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
222
218
  PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
223
- PM_ERR_NOT_EXPRESSION,
219
+ PM_ERR_NESTING_TOO_DEEP,
224
220
  PM_ERR_NO_LOCAL_VARIABLE,
221
+ PM_ERR_NON_ASSOCIATIVE_OPERATOR,
222
+ PM_ERR_NOT_EXPRESSION,
225
223
  PM_ERR_NUMBER_LITERAL_UNDERSCORE,
226
- PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED,
227
- PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
224
+ PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK,
225
+ PM_ERR_NUMBERED_PARAMETER_IT,
226
+ PM_ERR_NUMBERED_PARAMETER_ORDINARY,
227
+ PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK,
228
228
  PM_ERR_OPERATOR_MULTI_ASSIGN,
229
229
  PM_ERR_OPERATOR_WRITE_ARGUMENTS,
230
230
  PM_ERR_OPERATOR_WRITE_BLOCK,
231
231
  PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
232
232
  PM_ERR_PARAMETER_BLOCK_MULTI,
233
233
  PM_ERR_PARAMETER_CIRCULAR,
234
+ PM_ERR_PARAMETER_FORWARDING_AFTER_REST,
234
235
  PM_ERR_PARAMETER_METHOD_NAME,
235
- PM_ERR_PARAMETER_NAME_REPEAT,
236
+ PM_ERR_PARAMETER_NAME_DUPLICATED,
236
237
  PM_ERR_PARAMETER_NO_DEFAULT,
237
238
  PM_ERR_PARAMETER_NO_DEFAULT_KW,
238
239
  PM_ERR_PARAMETER_NUMBERED_RESERVED,
@@ -240,10 +241,13 @@ typedef enum {
240
241
  PM_ERR_PARAMETER_SPLAT_MULTI,
241
242
  PM_ERR_PARAMETER_STAR,
242
243
  PM_ERR_PARAMETER_UNEXPECTED_FWD,
244
+ PM_ERR_PARAMETER_UNEXPECTED_NO_KW,
243
245
  PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
246
+ PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS,
247
+ PM_ERR_PATTERN_CAPTURE_DUPLICATE,
244
248
  PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
245
- PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
246
249
  PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
250
+ PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
247
251
  PM_ERR_PATTERN_EXPRESSION_AFTER_IN,
248
252
  PM_ERR_PATTERN_EXPRESSION_AFTER_KEY,
249
253
  PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN,
@@ -251,8 +255,13 @@ typedef enum {
251
255
  PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
252
256
  PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
253
257
  PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
258
+ PM_ERR_PATTERN_FIND_MISSING_INNER,
259
+ PM_ERR_PATTERN_HASH_IMPLICIT,
254
260
  PM_ERR_PATTERN_HASH_KEY,
261
+ PM_ERR_PATTERN_HASH_KEY_DUPLICATE,
262
+ PM_ERR_PATTERN_HASH_KEY_INTERPOLATED,
255
263
  PM_ERR_PATTERN_HASH_KEY_LABEL,
264
+ PM_ERR_PATTERN_HASH_KEY_LOCALS,
256
265
  PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
257
266
  PM_ERR_PATTERN_LABEL_AFTER_COMMA,
258
267
  PM_ERR_PATTERN_REST,
@@ -260,12 +269,20 @@ typedef enum {
260
269
  PM_ERR_PATTERN_TERM_BRACKET,
261
270
  PM_ERR_PATTERN_TERM_PAREN,
262
271
  PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN,
272
+ PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH,
273
+ PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING,
274
+ PM_ERR_REGEXP_INVALID_UNICODE_RANGE,
275
+ PM_ERR_REGEXP_NON_ESCAPED_MBC,
276
+ PM_ERR_REGEXP_PARSE_ERROR,
263
277
  PM_ERR_REGEXP_TERM,
278
+ PM_ERR_REGEXP_UNKNOWN_OPTIONS,
279
+ PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP,
264
280
  PM_ERR_RESCUE_EXPRESSION,
265
281
  PM_ERR_RESCUE_MODIFIER_VALUE,
266
282
  PM_ERR_RESCUE_TERM,
267
283
  PM_ERR_RESCUE_VARIABLE,
268
284
  PM_ERR_RETURN_INVALID,
285
+ PM_ERR_SCRIPT_NOT_FOUND,
269
286
  PM_ERR_SINGLETON_FOR_LITERALS,
270
287
  PM_ERR_STATEMENT_ALIAS,
271
288
  PM_ERR_STATEMENT_POSTEXE_END,
@@ -281,10 +298,18 @@ typedef enum {
281
298
  PM_ERR_TERNARY_COLON,
282
299
  PM_ERR_TERNARY_EXPRESSION_FALSE,
283
300
  PM_ERR_TERNARY_EXPRESSION_TRUE,
301
+ PM_ERR_UNARY_DISALLOWED,
284
302
  PM_ERR_UNARY_RECEIVER,
303
+ PM_ERR_UNDEF_ARGUMENT,
304
+ PM_ERR_UNEXPECTED_BLOCK_ARGUMENT,
305
+ PM_ERR_UNEXPECTED_INDEX_BLOCK,
306
+ PM_ERR_UNEXPECTED_INDEX_KEYWORDS,
307
+ PM_ERR_UNEXPECTED_LABEL,
308
+ PM_ERR_UNEXPECTED_MULTI_WRITE,
309
+ PM_ERR_UNEXPECTED_RANGE_OPERATOR,
310
+ PM_ERR_UNEXPECTED_SAFE_NAVIGATION,
285
311
  PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
286
312
  PM_ERR_UNEXPECTED_TOKEN_IGNORE,
287
- PM_ERR_UNDEF_ARGUMENT,
288
313
  PM_ERR_UNTIL_TERM,
289
314
  PM_ERR_VOID_EXPRESSION,
290
315
  PM_ERR_WHILE_TERM,
@@ -293,17 +318,104 @@ typedef enum {
293
318
  PM_ERR_WRITE_TARGET_UNEXPECTED,
294
319
  PM_ERR_XSTRING_TERM,
295
320
 
296
- // These are the warning codes.
321
+ // These are the warning diagnostics.
322
+ PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
297
323
  PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
298
324
  PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
325
+ PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND,
299
326
  PM_WARN_AMBIGUOUS_PREFIX_STAR,
327
+ PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR,
300
328
  PM_WARN_AMBIGUOUS_SLASH,
329
+ PM_WARN_COMPARISON_AFTER_COMPARISON,
330
+ PM_WARN_DOT_DOT_DOT_EOL,
331
+ PM_WARN_EQUAL_IN_CONDITIONAL,
332
+ PM_WARN_EQUAL_IN_CONDITIONAL_3_3,
301
333
  PM_WARN_END_IN_METHOD,
302
-
303
- // This is the number of diagnostic codes.
304
- PM_DIAGNOSTIC_ID_LEN,
334
+ PM_WARN_DUPLICATED_HASH_KEY,
335
+ PM_WARN_DUPLICATED_WHEN_CLAUSE,
336
+ PM_WARN_FLOAT_OUT_OF_RANGE,
337
+ PM_WARN_IGNORED_FROZEN_STRING_LITERAL,
338
+ PM_WARN_INDENTATION_MISMATCH,
339
+ PM_WARN_INTEGER_IN_FLIP_FLOP,
340
+ PM_WARN_INVALID_CHARACTER,
341
+ PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
342
+ PM_WARN_INVALID_NUMBERED_REFERENCE,
343
+ PM_WARN_KEYWORD_EOL,
344
+ PM_WARN_LITERAL_IN_CONDITION_DEFAULT,
345
+ PM_WARN_LITERAL_IN_CONDITION_VERBOSE,
346
+ PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE,
347
+ PM_WARN_SHEBANG_CARRIAGE_RETURN,
348
+ PM_WARN_UNEXPECTED_CARRIAGE_RETURN,
349
+ PM_WARN_UNREACHABLE_STATEMENT,
350
+ PM_WARN_UNUSED_LOCAL_VARIABLE,
351
+ PM_WARN_VOID_STATEMENT,
305
352
  } pm_diagnostic_id_t;
306
353
 
354
+ /**
355
+ * This struct represents a diagnostic generated during parsing.
356
+ *
357
+ * @extends pm_list_node_t
358
+ */
359
+ typedef struct {
360
+ /** The embedded base node. */
361
+ pm_list_node_t node;
362
+
363
+ /** The location of the diagnostic in the source. */
364
+ pm_location_t location;
365
+
366
+ /** The ID of the diagnostic. */
367
+ pm_diagnostic_id_t diag_id;
368
+
369
+ /** The message associated with the diagnostic. */
370
+ const char *message;
371
+
372
+ /**
373
+ * Whether or not the memory related to the message of this diagnostic is
374
+ * owned by this diagnostic. If it is, it needs to be freed when the
375
+ * diagnostic is freed.
376
+ */
377
+ bool owned;
378
+
379
+ /**
380
+ * The level of the diagnostic, see `pm_error_level_t` and
381
+ * `pm_warning_level_t` for possible values.
382
+ */
383
+ uint8_t level;
384
+ } pm_diagnostic_t;
385
+
386
+ /**
387
+ * The levels of errors generated during parsing.
388
+ */
389
+ typedef enum {
390
+ /** For errors that should raise a syntax error. */
391
+ PM_ERROR_LEVEL_SYNTAX = 0,
392
+
393
+ /** For errors that should raise an argument error. */
394
+ PM_ERROR_LEVEL_ARGUMENT = 1,
395
+
396
+ /** For errors that should raise a load error. */
397
+ PM_ERROR_LEVEL_LOAD = 2
398
+ } pm_error_level_t;
399
+
400
+ /**
401
+ * The levels of warnings generated during parsing.
402
+ */
403
+ typedef enum {
404
+ /** For warnings which should be emitted if $VERBOSE != nil. */
405
+ PM_WARNING_LEVEL_DEFAULT = 0,
406
+
407
+ /** For warnings which should be emitted if $VERBOSE == true. */
408
+ PM_WARNING_LEVEL_VERBOSE = 1
409
+ } pm_warning_level_t;
410
+
411
+ /**
412
+ * Get the human-readable name of the given diagnostic ID.
413
+ *
414
+ * @param diag_id The diagnostic ID.
415
+ * @return The human-readable name of the diagnostic ID.
416
+ */
417
+ const char * pm_diagnostic_id_human(pm_diagnostic_id_t diag_id);
418
+
307
419
  /**
308
420
  * Append a diagnostic to the given list of diagnostics that is using shared
309
421
  * memory for its message.
@@ -135,7 +135,14 @@ extern const uint8_t pm_encoding_unicode_table[256];
135
135
  */
136
136
  typedef enum {
137
137
  PM_ENCODING_UTF_8 = 0,
138
+ PM_ENCODING_US_ASCII,
138
139
  PM_ENCODING_ASCII_8BIT,
140
+ PM_ENCODING_EUC_JP,
141
+ PM_ENCODING_WINDOWS_31J,
142
+
143
+ // We optionally support excluding the full set of encodings to only support the
144
+ // minimum necessary to process Ruby code without encoding comments.
145
+ #ifndef PRISM_ENCODING_EXCLUDE_FULL
139
146
  PM_ENCODING_BIG5,
140
147
  PM_ENCODING_BIG5_HKSCS,
141
148
  PM_ENCODING_BIG5_UAO,
@@ -148,7 +155,6 @@ typedef enum {
148
155
  PM_ENCODING_CP950,
149
156
  PM_ENCODING_CP951,
150
157
  PM_ENCODING_EMACS_MULE,
151
- PM_ENCODING_EUC_JP,
152
158
  PM_ENCODING_EUC_JP_MS,
153
159
  PM_ENCODING_EUC_JIS_2004,
154
160
  PM_ENCODING_EUC_KR,
@@ -208,7 +214,6 @@ typedef enum {
208
214
  PM_ENCODING_STATELESS_ISO_2022_JP,
209
215
  PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
210
216
  PM_ENCODING_TIS_620,
211
- PM_ENCODING_US_ASCII,
212
217
  PM_ENCODING_UTF8_MAC,
213
218
  PM_ENCODING_UTF8_DOCOMO,
214
219
  PM_ENCODING_UTF8_KDDI,
@@ -222,8 +227,9 @@ typedef enum {
222
227
  PM_ENCODING_WINDOWS_1256,
223
228
  PM_ENCODING_WINDOWS_1257,
224
229
  PM_ENCODING_WINDOWS_1258,
225
- PM_ENCODING_WINDOWS_31J,
226
230
  PM_ENCODING_WINDOWS_874,
231
+ #endif
232
+
227
233
  PM_ENCODING_MAXIMUM
228
234
  } pm_encoding_type_t;
229
235
 
@@ -248,10 +254,22 @@ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
248
254
  /**
249
255
  * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
250
256
  * can compare against it because invalid multibyte characters are not a thing
251
- * in this encoding.
257
+ * in this encoding. It is also needed for handling Regexp encoding flags.
252
258
  */
253
259
  #define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
254
260
 
261
+ /**
262
+ * This is the EUC-JP encoding. We need a reference to it to quickly process
263
+ * regular expression modifiers.
264
+ */
265
+ #define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP])
266
+
267
+ /**
268
+ * This is the Windows-31J encoding. We need a reference to it to quickly
269
+ * process regular expression modifiers.
270
+ */
271
+ #define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J])
272
+
255
273
  /**
256
274
  * Parse the given name of an encoding and return a pointer to the corresponding
257
275
  * encoding struct if one can be found, otherwise return NULL.