jruby-prism-parser 0.23.0.pre.SNAPSHOT-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +401 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +101 -0
  7. data/README.md +98 -0
  8. data/config.yml +2902 -0
  9. data/docs/build_system.md +91 -0
  10. data/docs/configuration.md +64 -0
  11. data/docs/cruby_compilation.md +27 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +121 -0
  14. data/docs/fuzzing.md +88 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/javascript.md +118 -0
  17. data/docs/local_variable_depth.md +229 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/parser_translation.md +34 -0
  20. data/docs/parsing_rules.md +19 -0
  21. data/docs/releasing.md +98 -0
  22. data/docs/ripper.md +36 -0
  23. data/docs/ruby_api.md +43 -0
  24. data/docs/ruby_parser_translation.md +19 -0
  25. data/docs/serialization.md +209 -0
  26. data/docs/testing.md +55 -0
  27. data/ext/prism/api_node.c +5098 -0
  28. data/ext/prism/api_pack.c +267 -0
  29. data/ext/prism/extconf.rb +110 -0
  30. data/ext/prism/extension.c +1155 -0
  31. data/ext/prism/extension.h +18 -0
  32. data/include/prism/ast.h +5807 -0
  33. data/include/prism/defines.h +102 -0
  34. data/include/prism/diagnostic.h +339 -0
  35. data/include/prism/encoding.h +265 -0
  36. data/include/prism/node.h +57 -0
  37. data/include/prism/options.h +230 -0
  38. data/include/prism/pack.h +152 -0
  39. data/include/prism/parser.h +732 -0
  40. data/include/prism/prettyprint.h +26 -0
  41. data/include/prism/regexp.h +33 -0
  42. data/include/prism/util/pm_buffer.h +155 -0
  43. data/include/prism/util/pm_char.h +205 -0
  44. data/include/prism/util/pm_constant_pool.h +209 -0
  45. data/include/prism/util/pm_list.h +97 -0
  46. data/include/prism/util/pm_memchr.h +29 -0
  47. data/include/prism/util/pm_newline_list.h +93 -0
  48. data/include/prism/util/pm_state_stack.h +42 -0
  49. data/include/prism/util/pm_string.h +150 -0
  50. data/include/prism/util/pm_string_list.h +44 -0
  51. data/include/prism/util/pm_strncasecmp.h +32 -0
  52. data/include/prism/util/pm_strpbrk.h +46 -0
  53. data/include/prism/version.h +29 -0
  54. data/include/prism.h +289 -0
  55. data/jruby-prism.jar +0 -0
  56. data/lib/prism/compiler.rb +486 -0
  57. data/lib/prism/debug.rb +206 -0
  58. data/lib/prism/desugar_compiler.rb +207 -0
  59. data/lib/prism/dispatcher.rb +2150 -0
  60. data/lib/prism/dot_visitor.rb +4634 -0
  61. data/lib/prism/dsl.rb +785 -0
  62. data/lib/prism/ffi.rb +346 -0
  63. data/lib/prism/lex_compat.rb +908 -0
  64. data/lib/prism/mutation_compiler.rb +753 -0
  65. data/lib/prism/node.rb +17864 -0
  66. data/lib/prism/node_ext.rb +212 -0
  67. data/lib/prism/node_inspector.rb +68 -0
  68. data/lib/prism/pack.rb +224 -0
  69. data/lib/prism/parse_result/comments.rb +177 -0
  70. data/lib/prism/parse_result/newlines.rb +64 -0
  71. data/lib/prism/parse_result.rb +498 -0
  72. data/lib/prism/pattern.rb +250 -0
  73. data/lib/prism/serialize.rb +1354 -0
  74. data/lib/prism/translation/parser/compiler.rb +1838 -0
  75. data/lib/prism/translation/parser/lexer.rb +335 -0
  76. data/lib/prism/translation/parser/rubocop.rb +37 -0
  77. data/lib/prism/translation/parser.rb +178 -0
  78. data/lib/prism/translation/ripper.rb +577 -0
  79. data/lib/prism/translation/ruby_parser.rb +1521 -0
  80. data/lib/prism/translation.rb +11 -0
  81. data/lib/prism/version.rb +3 -0
  82. data/lib/prism/visitor.rb +495 -0
  83. data/lib/prism.rb +99 -0
  84. data/prism.gemspec +135 -0
  85. data/rbi/prism.rbi +7767 -0
  86. data/rbi/prism_static.rbi +207 -0
  87. data/sig/prism.rbs +4773 -0
  88. data/sig/prism_static.rbs +201 -0
  89. data/src/diagnostic.c +400 -0
  90. data/src/encoding.c +5132 -0
  91. data/src/node.c +2786 -0
  92. data/src/options.c +213 -0
  93. data/src/pack.c +493 -0
  94. data/src/prettyprint.c +8881 -0
  95. data/src/prism.c +18406 -0
  96. data/src/regexp.c +638 -0
  97. data/src/serialize.c +1554 -0
  98. data/src/token_type.c +700 -0
  99. data/src/util/pm_buffer.c +190 -0
  100. data/src/util/pm_char.c +318 -0
  101. data/src/util/pm_constant_pool.c +322 -0
  102. data/src/util/pm_list.c +49 -0
  103. data/src/util/pm_memchr.c +35 -0
  104. data/src/util/pm_newline_list.c +84 -0
  105. data/src/util/pm_state_stack.c +25 -0
  106. data/src/util/pm_string.c +203 -0
  107. data/src/util/pm_string_list.c +28 -0
  108. data/src/util/pm_strncasecmp.c +24 -0
  109. data/src/util/pm_strpbrk.c +180 -0
  110. metadata +156 -0
@@ -0,0 +1,102 @@
1
+ /**
2
+ * @file defines.h
3
+ *
4
+ * Macro definitions used throughout the prism library.
5
+ *
6
+ * This file should be included first by any *.h or *.c in prism for consistency
7
+ * and to ensure that the macros are defined before they are used.
8
+ */
9
+ #ifndef PRISM_DEFINES_H
10
+ #define PRISM_DEFINES_H
11
+
12
+ #include <ctype.h>
13
+ #include <stdarg.h>
14
+ #include <stddef.h>
15
+ #include <stdint.h>
16
+ #include <stdio.h>
17
+ #include <string.h>
18
+
19
+ /**
20
+ * We want to be able to use the PRI* macros for printing out integers, but on
21
+ * some platforms they aren't included unless this is already defined.
22
+ */
23
+ #define __STDC_FORMAT_MACROS
24
+
25
+ #include <inttypes.h>
26
+
27
+ /**
28
+ * By default, we compile with -fvisibility=hidden. When this is enabled, we
29
+ * need to mark certain functions as being publically-visible. This macro does
30
+ * that in a compiler-agnostic way.
31
+ */
32
+ #ifndef PRISM_EXPORTED_FUNCTION
33
+ # ifdef PRISM_EXPORT_SYMBOLS
34
+ # ifdef _WIN32
35
+ # define PRISM_EXPORTED_FUNCTION __declspec(dllexport) extern
36
+ # else
37
+ # define PRISM_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
38
+ # endif
39
+ # else
40
+ # define PRISM_EXPORTED_FUNCTION
41
+ # endif
42
+ #endif
43
+
44
+ /**
45
+ * Certain compilers support specifying that a function accepts variadic
46
+ * parameters that look like printf format strings to provide a better developer
47
+ * experience when someone is using the function. This macro does that in a
48
+ * compiler-agnostic way.
49
+ */
50
+ #if defined(__GNUC__)
51
+ # define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index)))
52
+ #elif defined(__clang__)
53
+ # define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((__format__(__printf__, string_index, argument_index)))
54
+ #else
55
+ # define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index)
56
+ #endif
57
+
58
+ /**
59
+ * GCC will warn if you specify a function or parameter that is unused at
60
+ * runtime. This macro allows you to mark a function or parameter as unused in a
61
+ * compiler-agnostic way.
62
+ */
63
+ #if defined(__GNUC__)
64
+ # define PRISM_ATTRIBUTE_UNUSED __attribute__((unused))
65
+ #else
66
+ # define PRISM_ATTRIBUTE_UNUSED
67
+ #endif
68
+
69
+ /**
70
+ * Old Visual Studio versions do not support the inline keyword, so we need to
71
+ * define it to be __inline.
72
+ */
73
+ #if defined(_MSC_VER) && !defined(inline)
74
+ # define inline __inline
75
+ #endif
76
+
77
+ /**
78
+ * Old Visual Studio versions before 2015 do not implement sprintf, but instead
79
+ * implement _snprintf. We standard that here.
80
+ */
81
+ #if !defined(snprintf) && defined(_MSC_VER) && (_MSC_VER < 1900)
82
+ # define snprintf _snprintf
83
+ #endif
84
+
85
+ /**
86
+ * A simple utility macro to concatenate two tokens together, necessary when one
87
+ * of the tokens is itself a macro.
88
+ */
89
+ #define PM_CONCATENATE(left, right) left ## right
90
+
91
+ /**
92
+ * We want to be able to use static assertions, but they weren't standardized
93
+ * until C11. As such, we polyfill it here by making a hacky typedef that will
94
+ * fail to compile due to a negative array size if the condition is false.
95
+ */
96
+ #if defined(_Static_assert)
97
+ # define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
98
+ #else
99
+ # define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
100
+ #endif
101
+
102
+ #endif
@@ -0,0 +1,339 @@
1
+ /**
2
+ * @file diagnostic.h
3
+ *
4
+ * A list of diagnostics generated during parsing.
5
+ */
6
+ #ifndef PRISM_DIAGNOSTIC_H
7
+ #define PRISM_DIAGNOSTIC_H
8
+
9
+ #include "prism/ast.h"
10
+ #include "prism/defines.h"
11
+ #include "prism/util/pm_list.h"
12
+
13
+ #include <stdbool.h>
14
+ #include <stdlib.h>
15
+ #include <assert.h>
16
+
17
+ /**
18
+ * The levels of errors generated during parsing.
19
+ */
20
+ typedef enum {
21
+ /** For errors that cannot be recovered from. */
22
+ PM_ERROR_LEVEL_FATAL = 0,
23
+
24
+ /** For errors that should raise an argument error. */
25
+ PM_ERROR_LEVEL_ARGUMENT = 1
26
+ } pm_error_level_t;
27
+
28
+ /**
29
+ * The levels of warnings generated during parsing.
30
+ */
31
+ typedef enum {
32
+ /** For warnings which should be emitted if $VERBOSE != nil. */
33
+ PM_WARNING_LEVEL_DEFAULT = 0,
34
+
35
+ /** For warnings which should be emitted if $VERBOSE == true. */
36
+ PM_WARNING_LEVEL_VERBOSE = 1
37
+ } pm_warning_level_t;
38
+
39
+ /**
40
+ * This struct represents a diagnostic generated during parsing.
41
+ *
42
+ * @extends pm_list_node_t
43
+ */
44
+ typedef struct {
45
+ /** The embedded base node. */
46
+ pm_list_node_t node;
47
+
48
+ /** The location of the diagnostic in the source. */
49
+ pm_location_t location;
50
+
51
+ /** The message associated with the diagnostic. */
52
+ const char *message;
53
+
54
+ /**
55
+ * Whether or not the memory related to the message of this diagnostic is
56
+ * owned by this diagnostic. If it is, it needs to be freed when the
57
+ * diagnostic is freed.
58
+ */
59
+ bool owned;
60
+
61
+ /**
62
+ * The level of the diagnostic, see `pm_error_level_t` and
63
+ * `pm_warning_level_t` for possible values.
64
+ */
65
+ uint8_t level;
66
+ } pm_diagnostic_t;
67
+
68
+ /**
69
+ * The diagnostic IDs of all of the diagnostics, used to communicate the types
70
+ * of errors between the parser and the user.
71
+ */
72
+ typedef enum {
73
+ // This is a special error that we can potentially replace by others. For
74
+ // an example of how this is used, see parse_expression_prefix.
75
+ PM_ERR_CANNOT_PARSE_EXPRESSION,
76
+
77
+ // These are the error codes.
78
+ PM_ERR_ALIAS_ARGUMENT,
79
+ PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
80
+ PM_ERR_ARGUMENT_AFTER_BLOCK,
81
+ PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES,
82
+ PM_ERR_ARGUMENT_BARE_HASH,
83
+ PM_ERR_ARGUMENT_BLOCK_FORWARDING,
84
+ PM_ERR_ARGUMENT_BLOCK_MULTI,
85
+ PM_ERR_ARGUMENT_FORMAL_CLASS,
86
+ PM_ERR_ARGUMENT_FORMAL_CONSTANT,
87
+ PM_ERR_ARGUMENT_FORMAL_GLOBAL,
88
+ PM_ERR_ARGUMENT_FORMAL_IVAR,
89
+ PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
90
+ PM_ERR_ARGUMENT_IN,
91
+ PM_ERR_ARGUMENT_NO_FORWARDING_AMP,
92
+ PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
93
+ PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
94
+ PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT,
95
+ PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT,
96
+ PM_ERR_ARGUMENT_TERM_PAREN,
97
+ PM_ERR_ARGUMENT_UNEXPECTED_BLOCK,
98
+ PM_ERR_ARRAY_ELEMENT,
99
+ PM_ERR_ARRAY_EXPRESSION,
100
+ PM_ERR_ARRAY_EXPRESSION_AFTER_STAR,
101
+ PM_ERR_ARRAY_SEPARATOR,
102
+ PM_ERR_ARRAY_TERM,
103
+ PM_ERR_BEGIN_LONELY_ELSE,
104
+ PM_ERR_BEGIN_TERM,
105
+ PM_ERR_BEGIN_UPCASE_BRACE,
106
+ PM_ERR_BEGIN_UPCASE_TERM,
107
+ PM_ERR_BEGIN_UPCASE_TOPLEVEL,
108
+ PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE,
109
+ PM_ERR_BLOCK_PARAM_PIPE_TERM,
110
+ PM_ERR_BLOCK_TERM_BRACE,
111
+ PM_ERR_BLOCK_TERM_END,
112
+ PM_ERR_CANNOT_PARSE_STRING_PART,
113
+ PM_ERR_CASE_EXPRESSION_AFTER_CASE,
114
+ PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
115
+ PM_ERR_CASE_MATCH_MISSING_PREDICATE,
116
+ PM_ERR_CASE_MISSING_CONDITIONS,
117
+ PM_ERR_CASE_TERM,
118
+ PM_ERR_CLASS_IN_METHOD,
119
+ PM_ERR_CLASS_NAME,
120
+ PM_ERR_CLASS_SUPERCLASS,
121
+ PM_ERR_CLASS_TERM,
122
+ PM_ERR_CLASS_UNEXPECTED_END,
123
+ PM_ERR_CONDITIONAL_ELSIF_PREDICATE,
124
+ PM_ERR_CONDITIONAL_IF_PREDICATE,
125
+ PM_ERR_CONDITIONAL_PREDICATE_TERM,
126
+ PM_ERR_CONDITIONAL_TERM,
127
+ PM_ERR_CONDITIONAL_TERM_ELSE,
128
+ PM_ERR_CONDITIONAL_UNLESS_PREDICATE,
129
+ PM_ERR_CONDITIONAL_UNTIL_PREDICATE,
130
+ PM_ERR_CONDITIONAL_WHILE_PREDICATE,
131
+ PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT,
132
+ PM_ERR_DEF_ENDLESS,
133
+ PM_ERR_DEF_ENDLESS_SETTER,
134
+ PM_ERR_DEF_NAME,
135
+ PM_ERR_DEF_NAME_AFTER_RECEIVER,
136
+ PM_ERR_DEF_PARAMS_TERM,
137
+ PM_ERR_DEF_PARAMS_TERM_PAREN,
138
+ PM_ERR_DEF_RECEIVER,
139
+ PM_ERR_DEF_RECEIVER_TERM,
140
+ PM_ERR_DEF_TERM,
141
+ PM_ERR_DEFINED_EXPRESSION,
142
+ PM_ERR_EMBDOC_TERM,
143
+ PM_ERR_EMBEXPR_END,
144
+ PM_ERR_EMBVAR_INVALID,
145
+ PM_ERR_END_UPCASE_BRACE,
146
+ PM_ERR_END_UPCASE_TERM,
147
+ PM_ERR_ESCAPE_INVALID_CONTROL,
148
+ PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT,
149
+ PM_ERR_ESCAPE_INVALID_HEXADECIMAL,
150
+ PM_ERR_ESCAPE_INVALID_META,
151
+ PM_ERR_ESCAPE_INVALID_META_REPEAT,
152
+ PM_ERR_ESCAPE_INVALID_UNICODE,
153
+ PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
154
+ PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
155
+ PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
156
+ PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
157
+ PM_ERR_EXPECT_ARGUMENT,
158
+ PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
159
+ PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ,
160
+ PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
161
+ PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA,
162
+ PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL,
163
+ PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS,
164
+ PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN,
165
+ PM_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
166
+ PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR,
167
+ PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
168
+ PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
169
+ PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
170
+ PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
171
+ PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
172
+ PM_ERR_EXPECT_RBRACKET,
173
+ PM_ERR_EXPECT_RPAREN,
174
+ PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
175
+ PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
176
+ PM_ERR_EXPECT_STRING_CONTENT,
177
+ PM_ERR_EXPECT_WHEN_DELIMITER,
178
+ PM_ERR_EXPRESSION_BARE_HASH,
179
+ PM_ERR_FOR_COLLECTION,
180
+ PM_ERR_FOR_IN,
181
+ PM_ERR_FOR_INDEX,
182
+ PM_ERR_FOR_TERM,
183
+ PM_ERR_HASH_EXPRESSION_AFTER_LABEL,
184
+ PM_ERR_HASH_KEY,
185
+ PM_ERR_HASH_ROCKET,
186
+ PM_ERR_HASH_TERM,
187
+ PM_ERR_HASH_VALUE,
188
+ PM_ERR_HEREDOC_TERM,
189
+ PM_ERR_INCOMPLETE_QUESTION_MARK,
190
+ PM_ERR_INCOMPLETE_VARIABLE_CLASS,
191
+ PM_ERR_INCOMPLETE_VARIABLE_INSTANCE,
192
+ PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
193
+ PM_ERR_INVALID_FLOAT_EXPONENT,
194
+ PM_ERR_INVALID_NUMBER_BINARY,
195
+ PM_ERR_INVALID_NUMBER_DECIMAL,
196
+ PM_ERR_INVALID_NUMBER_HEXADECIMAL,
197
+ PM_ERR_INVALID_NUMBER_OCTAL,
198
+ PM_ERR_INVALID_NUMBER_UNDERSCORE,
199
+ PM_ERR_INVALID_CHARACTER,
200
+ PM_ERR_INVALID_MULTIBYTE_CHARACTER,
201
+ PM_ERR_INVALID_PRINTABLE_CHARACTER,
202
+ PM_ERR_INVALID_PERCENT,
203
+ PM_ERR_INVALID_VARIABLE_GLOBAL,
204
+ PM_ERR_IT_NOT_ALLOWED,
205
+ PM_ERR_LAMBDA_OPEN,
206
+ PM_ERR_LAMBDA_TERM_BRACE,
207
+ PM_ERR_LAMBDA_TERM_END,
208
+ PM_ERR_LIST_I_LOWER_ELEMENT,
209
+ PM_ERR_LIST_I_LOWER_TERM,
210
+ PM_ERR_LIST_I_UPPER_ELEMENT,
211
+ PM_ERR_LIST_I_UPPER_TERM,
212
+ PM_ERR_LIST_W_LOWER_ELEMENT,
213
+ PM_ERR_LIST_W_LOWER_TERM,
214
+ PM_ERR_LIST_W_UPPER_ELEMENT,
215
+ PM_ERR_LIST_W_UPPER_TERM,
216
+ PM_ERR_MALLOC_FAILED,
217
+ PM_ERR_MIXED_ENCODING,
218
+ PM_ERR_MODULE_IN_METHOD,
219
+ PM_ERR_MODULE_NAME,
220
+ PM_ERR_MODULE_TERM,
221
+ PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
222
+ PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
223
+ PM_ERR_NOT_EXPRESSION,
224
+ PM_ERR_NO_LOCAL_VARIABLE,
225
+ PM_ERR_NUMBER_LITERAL_UNDERSCORE,
226
+ PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED,
227
+ PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
228
+ PM_ERR_OPERATOR_MULTI_ASSIGN,
229
+ PM_ERR_OPERATOR_WRITE_ARGUMENTS,
230
+ PM_ERR_OPERATOR_WRITE_BLOCK,
231
+ PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
232
+ PM_ERR_PARAMETER_BLOCK_MULTI,
233
+ PM_ERR_PARAMETER_CIRCULAR,
234
+ PM_ERR_PARAMETER_METHOD_NAME,
235
+ PM_ERR_PARAMETER_NAME_REPEAT,
236
+ PM_ERR_PARAMETER_NO_DEFAULT,
237
+ PM_ERR_PARAMETER_NO_DEFAULT_KW,
238
+ PM_ERR_PARAMETER_NUMBERED_RESERVED,
239
+ PM_ERR_PARAMETER_ORDER,
240
+ PM_ERR_PARAMETER_SPLAT_MULTI,
241
+ PM_ERR_PARAMETER_STAR,
242
+ PM_ERR_PARAMETER_UNEXPECTED_FWD,
243
+ PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
244
+ PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
245
+ PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
246
+ PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
247
+ PM_ERR_PATTERN_EXPRESSION_AFTER_IN,
248
+ PM_ERR_PATTERN_EXPRESSION_AFTER_KEY,
249
+ PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN,
250
+ PM_ERR_PATTERN_EXPRESSION_AFTER_PIN,
251
+ PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
252
+ PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
253
+ PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
254
+ PM_ERR_PATTERN_HASH_KEY,
255
+ PM_ERR_PATTERN_HASH_KEY_LABEL,
256
+ PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
257
+ PM_ERR_PATTERN_LABEL_AFTER_COMMA,
258
+ PM_ERR_PATTERN_REST,
259
+ PM_ERR_PATTERN_TERM_BRACE,
260
+ PM_ERR_PATTERN_TERM_BRACKET,
261
+ PM_ERR_PATTERN_TERM_PAREN,
262
+ PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN,
263
+ PM_ERR_REGEXP_TERM,
264
+ PM_ERR_RESCUE_EXPRESSION,
265
+ PM_ERR_RESCUE_MODIFIER_VALUE,
266
+ PM_ERR_RESCUE_TERM,
267
+ PM_ERR_RESCUE_VARIABLE,
268
+ PM_ERR_RETURN_INVALID,
269
+ PM_ERR_SINGLETON_FOR_LITERALS,
270
+ PM_ERR_STATEMENT_ALIAS,
271
+ PM_ERR_STATEMENT_POSTEXE_END,
272
+ PM_ERR_STATEMENT_PREEXE_BEGIN,
273
+ PM_ERR_STATEMENT_UNDEF,
274
+ PM_ERR_STRING_CONCATENATION,
275
+ PM_ERR_STRING_INTERPOLATED_TERM,
276
+ PM_ERR_STRING_LITERAL_EOF,
277
+ PM_ERR_STRING_LITERAL_TERM,
278
+ PM_ERR_SYMBOL_INVALID,
279
+ PM_ERR_SYMBOL_TERM_DYNAMIC,
280
+ PM_ERR_SYMBOL_TERM_INTERPOLATED,
281
+ PM_ERR_TERNARY_COLON,
282
+ PM_ERR_TERNARY_EXPRESSION_FALSE,
283
+ PM_ERR_TERNARY_EXPRESSION_TRUE,
284
+ PM_ERR_UNARY_RECEIVER,
285
+ PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
286
+ PM_ERR_UNEXPECTED_TOKEN_IGNORE,
287
+ PM_ERR_UNDEF_ARGUMENT,
288
+ PM_ERR_UNTIL_TERM,
289
+ PM_ERR_VOID_EXPRESSION,
290
+ PM_ERR_WHILE_TERM,
291
+ PM_ERR_WRITE_TARGET_IN_METHOD,
292
+ PM_ERR_WRITE_TARGET_READONLY,
293
+ PM_ERR_WRITE_TARGET_UNEXPECTED,
294
+ PM_ERR_XSTRING_TERM,
295
+
296
+ // These are the warning codes.
297
+ PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
298
+ PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
299
+ PM_WARN_AMBIGUOUS_PREFIX_STAR,
300
+ PM_WARN_AMBIGUOUS_SLASH,
301
+ PM_WARN_END_IN_METHOD,
302
+
303
+ // This is the number of diagnostic codes.
304
+ PM_DIAGNOSTIC_ID_LEN,
305
+ } pm_diagnostic_id_t;
306
+
307
+ /**
308
+ * Append a diagnostic to the given list of diagnostics that is using shared
309
+ * memory for its message.
310
+ *
311
+ * @param list The list to append to.
312
+ * @param start The start of the diagnostic.
313
+ * @param end The end of the diagnostic.
314
+ * @param diag_id The diagnostic ID.
315
+ * @return Whether the diagnostic was successfully appended.
316
+ */
317
+ bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
318
+
319
+ /**
320
+ * Append a diagnostic to the given list of diagnostics that is using a format
321
+ * string for its message.
322
+ *
323
+ * @param list The list to append to.
324
+ * @param start The start of the diagnostic.
325
+ * @param end The end of the diagnostic.
326
+ * @param diag_id The diagnostic ID.
327
+ * @param ... The arguments to the format string for the message.
328
+ * @return Whether the diagnostic was successfully appended.
329
+ */
330
+ bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...);
331
+
332
+ /**
333
+ * Deallocate the internal state of the given diagnostic list.
334
+ *
335
+ * @param list The list to deallocate.
336
+ */
337
+ void pm_diagnostic_list_free(pm_list_t *list);
338
+
339
+ #endif
@@ -0,0 +1,265 @@
1
+ /**
2
+ * @file encoding.h
3
+ *
4
+ * The encoding interface and implementations used by the parser.
5
+ */
6
+ #ifndef PRISM_ENCODING_H
7
+ #define PRISM_ENCODING_H
8
+
9
+ #include "prism/defines.h"
10
+ #include "prism/util/pm_strncasecmp.h"
11
+
12
+ #include <assert.h>
13
+ #include <stdbool.h>
14
+ #include <stddef.h>
15
+ #include <stdint.h>
16
+
17
+ /**
18
+ * This struct defines the functions necessary to implement the encoding
19
+ * interface so we can determine how many bytes the subsequent character takes.
20
+ * Each callback should return the number of bytes, or 0 if the next bytes are
21
+ * invalid for the encoding and type.
22
+ */
23
+ typedef struct {
24
+ /**
25
+ * Return the number of bytes that the next character takes if it is valid
26
+ * in the encoding. Does not read more than n bytes. It is assumed that n is
27
+ * at least 1.
28
+ */
29
+ size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
30
+
31
+ /**
32
+ * Return the number of bytes that the next character takes if it is valid
33
+ * in the encoding and is alphabetical. Does not read more than n bytes. It
34
+ * is assumed that n is at least 1.
35
+ */
36
+ size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
37
+
38
+ /**
39
+ * Return the number of bytes that the next character takes if it is valid
40
+ * in the encoding and is alphanumeric. Does not read more than n bytes. It
41
+ * is assumed that n is at least 1.
42
+ */
43
+ size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
44
+
45
+ /**
46
+ * Return true if the next character is valid in the encoding and is an
47
+ * uppercase character. Does not read more than n bytes. It is assumed that
48
+ * n is at least 1.
49
+ */
50
+ bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
51
+
52
+ /**
53
+ * The name of the encoding. This should correspond to a value that can be
54
+ * passed to Encoding.find in Ruby.
55
+ */
56
+ const char *name;
57
+
58
+ /**
59
+ * Return true if the encoding is a multibyte encoding.
60
+ */
61
+ bool multibyte;
62
+ } pm_encoding_t;
63
+
64
+ /**
65
+ * All of the lookup tables use the first bit of each embedded byte to indicate
66
+ * whether the codepoint is alphabetical.
67
+ */
68
+ #define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
69
+
70
+ /**
71
+ * All of the lookup tables use the second bit of each embedded byte to indicate
72
+ * whether the codepoint is alphanumeric.
73
+ */
74
+ #define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
75
+
76
+ /**
77
+ * All of the lookup tables use the third bit of each embedded byte to indicate
78
+ * whether the codepoint is uppercase.
79
+ */
80
+ #define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
81
+
82
+ /**
83
+ * Return the size of the next character in the UTF-8 encoding.
84
+ *
85
+ * @param b The bytes to read.
86
+ * @param n The number of bytes that can be read.
87
+ * @returns The number of bytes that the next character takes if it is valid in
88
+ * the encoding, or 0 if it is not.
89
+ */
90
+ size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n);
91
+
92
+ /**
93
+ * Return the size of the next character in the UTF-8 encoding if it is an
94
+ * alphabetical character.
95
+ *
96
+ * @param b The bytes to read.
97
+ * @param n The number of bytes that can be read.
98
+ * @returns The number of bytes that the next character takes if it is valid in
99
+ * the encoding, or 0 if it is not.
100
+ */
101
+ size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
102
+
103
+ /**
104
+ * Return the size of the next character in the UTF-8 encoding if it is an
105
+ * alphanumeric character.
106
+ *
107
+ * @param b The bytes to read.
108
+ * @param n The number of bytes that can be read.
109
+ * @returns The number of bytes that the next character takes if it is valid in
110
+ * the encoding, or 0 if it is not.
111
+ */
112
+ size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
113
+
114
+ /**
115
+ * Return true if the next character in the UTF-8 encoding if it is an uppercase
116
+ * character.
117
+ *
118
+ * @param b The bytes to read.
119
+ * @param n The number of bytes that can be read.
120
+ * @returns True if the next character is valid in the encoding and is an
121
+ * uppercase character, or false if it is not.
122
+ */
123
+ bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
124
+
125
+ /**
126
+ * This lookup table is referenced in both the UTF-8 encoding file and the
127
+ * parser directly in order to speed up the default encoding processing. It is
128
+ * used to indicate whether a character is alphabetical, alphanumeric, or
129
+ * uppercase in unicode mappings.
130
+ */
131
+ extern const uint8_t pm_encoding_unicode_table[256];
132
+
133
+ /**
134
+ * These are all of the encodings that prism supports.
135
+ */
136
+ typedef enum {
137
+ PM_ENCODING_UTF_8 = 0,
138
+ PM_ENCODING_ASCII_8BIT,
139
+ PM_ENCODING_BIG5,
140
+ PM_ENCODING_BIG5_HKSCS,
141
+ PM_ENCODING_BIG5_UAO,
142
+ PM_ENCODING_CESU_8,
143
+ PM_ENCODING_CP51932,
144
+ PM_ENCODING_CP850,
145
+ PM_ENCODING_CP852,
146
+ PM_ENCODING_CP855,
147
+ PM_ENCODING_CP949,
148
+ PM_ENCODING_CP950,
149
+ PM_ENCODING_CP951,
150
+ PM_ENCODING_EMACS_MULE,
151
+ PM_ENCODING_EUC_JP,
152
+ PM_ENCODING_EUC_JP_MS,
153
+ PM_ENCODING_EUC_JIS_2004,
154
+ PM_ENCODING_EUC_KR,
155
+ PM_ENCODING_EUC_TW,
156
+ PM_ENCODING_GB12345,
157
+ PM_ENCODING_GB18030,
158
+ PM_ENCODING_GB1988,
159
+ PM_ENCODING_GB2312,
160
+ PM_ENCODING_GBK,
161
+ PM_ENCODING_IBM437,
162
+ PM_ENCODING_IBM720,
163
+ PM_ENCODING_IBM737,
164
+ PM_ENCODING_IBM775,
165
+ PM_ENCODING_IBM852,
166
+ PM_ENCODING_IBM855,
167
+ PM_ENCODING_IBM857,
168
+ PM_ENCODING_IBM860,
169
+ PM_ENCODING_IBM861,
170
+ PM_ENCODING_IBM862,
171
+ PM_ENCODING_IBM863,
172
+ PM_ENCODING_IBM864,
173
+ PM_ENCODING_IBM865,
174
+ PM_ENCODING_IBM866,
175
+ PM_ENCODING_IBM869,
176
+ PM_ENCODING_ISO_8859_1,
177
+ PM_ENCODING_ISO_8859_2,
178
+ PM_ENCODING_ISO_8859_3,
179
+ PM_ENCODING_ISO_8859_4,
180
+ PM_ENCODING_ISO_8859_5,
181
+ PM_ENCODING_ISO_8859_6,
182
+ PM_ENCODING_ISO_8859_7,
183
+ PM_ENCODING_ISO_8859_8,
184
+ PM_ENCODING_ISO_8859_9,
185
+ PM_ENCODING_ISO_8859_10,
186
+ PM_ENCODING_ISO_8859_11,
187
+ PM_ENCODING_ISO_8859_13,
188
+ PM_ENCODING_ISO_8859_14,
189
+ PM_ENCODING_ISO_8859_15,
190
+ PM_ENCODING_ISO_8859_16,
191
+ PM_ENCODING_KOI8_R,
192
+ PM_ENCODING_KOI8_U,
193
+ PM_ENCODING_MAC_CENT_EURO,
194
+ PM_ENCODING_MAC_CROATIAN,
195
+ PM_ENCODING_MAC_CYRILLIC,
196
+ PM_ENCODING_MAC_GREEK,
197
+ PM_ENCODING_MAC_ICELAND,
198
+ PM_ENCODING_MAC_JAPANESE,
199
+ PM_ENCODING_MAC_ROMAN,
200
+ PM_ENCODING_MAC_ROMANIA,
201
+ PM_ENCODING_MAC_THAI,
202
+ PM_ENCODING_MAC_TURKISH,
203
+ PM_ENCODING_MAC_UKRAINE,
204
+ PM_ENCODING_SHIFT_JIS,
205
+ PM_ENCODING_SJIS_DOCOMO,
206
+ PM_ENCODING_SJIS_KDDI,
207
+ PM_ENCODING_SJIS_SOFTBANK,
208
+ PM_ENCODING_STATELESS_ISO_2022_JP,
209
+ PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
210
+ PM_ENCODING_TIS_620,
211
+ PM_ENCODING_US_ASCII,
212
+ PM_ENCODING_UTF8_MAC,
213
+ PM_ENCODING_UTF8_DOCOMO,
214
+ PM_ENCODING_UTF8_KDDI,
215
+ PM_ENCODING_UTF8_SOFTBANK,
216
+ PM_ENCODING_WINDOWS_1250,
217
+ PM_ENCODING_WINDOWS_1251,
218
+ PM_ENCODING_WINDOWS_1252,
219
+ PM_ENCODING_WINDOWS_1253,
220
+ PM_ENCODING_WINDOWS_1254,
221
+ PM_ENCODING_WINDOWS_1255,
222
+ PM_ENCODING_WINDOWS_1256,
223
+ PM_ENCODING_WINDOWS_1257,
224
+ PM_ENCODING_WINDOWS_1258,
225
+ PM_ENCODING_WINDOWS_31J,
226
+ PM_ENCODING_WINDOWS_874,
227
+ PM_ENCODING_MAXIMUM
228
+ } pm_encoding_type_t;
229
+
230
+ /**
231
+ * This is the table of all of the encodings that prism supports.
232
+ */
233
+ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
234
+
235
+ /**
236
+ * This is the default UTF-8 encoding. We need a reference to it to quickly
237
+ * create parsers.
238
+ */
239
+ #define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])
240
+
241
+ /**
242
+ * This is the US-ASCII encoding. We need a reference to it to be able to
243
+ * compare against it when a string is being created because it could possibly
244
+ * need to fall back to ASCII-8BIT.
245
+ */
246
+ #define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
247
+
248
+ /**
249
+ * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
250
+ * can compare against it because invalid multibyte characters are not a thing
251
+ * in this encoding.
252
+ */
253
+ #define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
254
+
255
+ /**
256
+ * Parse the given name of an encoding and return a pointer to the corresponding
257
+ * encoding struct if one can be found, otherwise return NULL.
258
+ *
259
+ * @param start A pointer to the first byte of the name.
260
+ * @param end A pointer to the last byte of the name.
261
+ * @returns A pointer to the encoding struct if one is found, otherwise NULL.
262
+ */
263
+ const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);
264
+
265
+ #endif