jruby-prism-parser 0.23.0.pre.SNAPSHOT-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (110) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +401 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +101 -0
  7. data/README.md +98 -0
  8. data/config.yml +2902 -0
  9. data/docs/build_system.md +91 -0
  10. data/docs/configuration.md +64 -0
  11. data/docs/cruby_compilation.md +27 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +121 -0
  14. data/docs/fuzzing.md +88 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/javascript.md +118 -0
  17. data/docs/local_variable_depth.md +229 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/parser_translation.md +34 -0
  20. data/docs/parsing_rules.md +19 -0
  21. data/docs/releasing.md +98 -0
  22. data/docs/ripper.md +36 -0
  23. data/docs/ruby_api.md +43 -0
  24. data/docs/ruby_parser_translation.md +19 -0
  25. data/docs/serialization.md +209 -0
  26. data/docs/testing.md +55 -0
  27. data/ext/prism/api_node.c +5098 -0
  28. data/ext/prism/api_pack.c +267 -0
  29. data/ext/prism/extconf.rb +110 -0
  30. data/ext/prism/extension.c +1155 -0
  31. data/ext/prism/extension.h +18 -0
  32. data/include/prism/ast.h +5807 -0
  33. data/include/prism/defines.h +102 -0
  34. data/include/prism/diagnostic.h +339 -0
  35. data/include/prism/encoding.h +265 -0
  36. data/include/prism/node.h +57 -0
  37. data/include/prism/options.h +230 -0
  38. data/include/prism/pack.h +152 -0
  39. data/include/prism/parser.h +732 -0
  40. data/include/prism/prettyprint.h +26 -0
  41. data/include/prism/regexp.h +33 -0
  42. data/include/prism/util/pm_buffer.h +155 -0
  43. data/include/prism/util/pm_char.h +205 -0
  44. data/include/prism/util/pm_constant_pool.h +209 -0
  45. data/include/prism/util/pm_list.h +97 -0
  46. data/include/prism/util/pm_memchr.h +29 -0
  47. data/include/prism/util/pm_newline_list.h +93 -0
  48. data/include/prism/util/pm_state_stack.h +42 -0
  49. data/include/prism/util/pm_string.h +150 -0
  50. data/include/prism/util/pm_string_list.h +44 -0
  51. data/include/prism/util/pm_strncasecmp.h +32 -0
  52. data/include/prism/util/pm_strpbrk.h +46 -0
  53. data/include/prism/version.h +29 -0
  54. data/include/prism.h +289 -0
  55. data/jruby-prism.jar +0 -0
  56. data/lib/prism/compiler.rb +486 -0
  57. data/lib/prism/debug.rb +206 -0
  58. data/lib/prism/desugar_compiler.rb +207 -0
  59. data/lib/prism/dispatcher.rb +2150 -0
  60. data/lib/prism/dot_visitor.rb +4634 -0
  61. data/lib/prism/dsl.rb +785 -0
  62. data/lib/prism/ffi.rb +346 -0
  63. data/lib/prism/lex_compat.rb +908 -0
  64. data/lib/prism/mutation_compiler.rb +753 -0
  65. data/lib/prism/node.rb +17864 -0
  66. data/lib/prism/node_ext.rb +212 -0
  67. data/lib/prism/node_inspector.rb +68 -0
  68. data/lib/prism/pack.rb +224 -0
  69. data/lib/prism/parse_result/comments.rb +177 -0
  70. data/lib/prism/parse_result/newlines.rb +64 -0
  71. data/lib/prism/parse_result.rb +498 -0
  72. data/lib/prism/pattern.rb +250 -0
  73. data/lib/prism/serialize.rb +1354 -0
  74. data/lib/prism/translation/parser/compiler.rb +1838 -0
  75. data/lib/prism/translation/parser/lexer.rb +335 -0
  76. data/lib/prism/translation/parser/rubocop.rb +37 -0
  77. data/lib/prism/translation/parser.rb +178 -0
  78. data/lib/prism/translation/ripper.rb +577 -0
  79. data/lib/prism/translation/ruby_parser.rb +1521 -0
  80. data/lib/prism/translation.rb +11 -0
  81. data/lib/prism/version.rb +3 -0
  82. data/lib/prism/visitor.rb +495 -0
  83. data/lib/prism.rb +99 -0
  84. data/prism.gemspec +135 -0
  85. data/rbi/prism.rbi +7767 -0
  86. data/rbi/prism_static.rbi +207 -0
  87. data/sig/prism.rbs +4773 -0
  88. data/sig/prism_static.rbs +201 -0
  89. data/src/diagnostic.c +400 -0
  90. data/src/encoding.c +5132 -0
  91. data/src/node.c +2786 -0
  92. data/src/options.c +213 -0
  93. data/src/pack.c +493 -0
  94. data/src/prettyprint.c +8881 -0
  95. data/src/prism.c +18406 -0
  96. data/src/regexp.c +638 -0
  97. data/src/serialize.c +1554 -0
  98. data/src/token_type.c +700 -0
  99. data/src/util/pm_buffer.c +190 -0
  100. data/src/util/pm_char.c +318 -0
  101. data/src/util/pm_constant_pool.c +322 -0
  102. data/src/util/pm_list.c +49 -0
  103. data/src/util/pm_memchr.c +35 -0
  104. data/src/util/pm_newline_list.c +84 -0
  105. data/src/util/pm_state_stack.c +25 -0
  106. data/src/util/pm_string.c +203 -0
  107. data/src/util/pm_string_list.c +28 -0
  108. data/src/util/pm_strncasecmp.c +24 -0
  109. data/src/util/pm_strpbrk.c +180 -0
  110. metadata +156 -0
@@ -0,0 +1,102 @@
1
+ /**
2
+ * @file defines.h
3
+ *
4
+ * Macro definitions used throughout the prism library.
5
+ *
6
+ * This file should be included first by any *.h or *.c in prism for consistency
7
+ * and to ensure that the macros are defined before they are used.
8
+ */
9
+ #ifndef PRISM_DEFINES_H
10
+ #define PRISM_DEFINES_H
11
+
12
+ #include <ctype.h>
13
+ #include <stdarg.h>
14
+ #include <stddef.h>
15
+ #include <stdint.h>
16
+ #include <stdio.h>
17
+ #include <string.h>
18
+
19
+ /**
20
+ * We want to be able to use the PRI* macros for printing out integers, but on
21
+ * some platforms they aren't included unless this is already defined.
22
+ */
23
+ #define __STDC_FORMAT_MACROS
24
+
25
+ #include <inttypes.h>
26
+
27
+ /**
28
+ * By default, we compile with -fvisibility=hidden. When this is enabled, we
29
+ * need to mark certain functions as being publically-visible. This macro does
30
+ * that in a compiler-agnostic way.
31
+ */
32
+ #ifndef PRISM_EXPORTED_FUNCTION
33
+ # ifdef PRISM_EXPORT_SYMBOLS
34
+ # ifdef _WIN32
35
+ # define PRISM_EXPORTED_FUNCTION __declspec(dllexport) extern
36
+ # else
37
+ # define PRISM_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
38
+ # endif
39
+ # else
40
+ # define PRISM_EXPORTED_FUNCTION
41
+ # endif
42
+ #endif
43
+
44
+ /**
45
+ * Certain compilers support specifying that a function accepts variadic
46
+ * parameters that look like printf format strings to provide a better developer
47
+ * experience when someone is using the function. This macro does that in a
48
+ * compiler-agnostic way.
49
+ */
50
+ #if defined(__GNUC__)
51
+ # define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index)))
52
+ #elif defined(__clang__)
53
+ # define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((__format__(__printf__, string_index, argument_index)))
54
+ #else
55
+ # define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index)
56
+ #endif
57
+
58
+ /**
59
+ * GCC will warn if you specify a function or parameter that is unused at
60
+ * runtime. This macro allows you to mark a function or parameter as unused in a
61
+ * compiler-agnostic way.
62
+ */
63
+ #if defined(__GNUC__)
64
+ # define PRISM_ATTRIBUTE_UNUSED __attribute__((unused))
65
+ #else
66
+ # define PRISM_ATTRIBUTE_UNUSED
67
+ #endif
68
+
69
+ /**
70
+ * Old Visual Studio versions do not support the inline keyword, so we need to
71
+ * define it to be __inline.
72
+ */
73
+ #if defined(_MSC_VER) && !defined(inline)
74
+ # define inline __inline
75
+ #endif
76
+
77
+ /**
78
+ * Old Visual Studio versions before 2015 do not implement sprintf, but instead
79
+ * implement _snprintf. We standard that here.
80
+ */
81
+ #if !defined(snprintf) && defined(_MSC_VER) && (_MSC_VER < 1900)
82
+ # define snprintf _snprintf
83
+ #endif
84
+
85
+ /**
86
+ * A simple utility macro to concatenate two tokens together, necessary when one
87
+ * of the tokens is itself a macro.
88
+ */
89
+ #define PM_CONCATENATE(left, right) left ## right
90
+
91
+ /**
92
+ * We want to be able to use static assertions, but they weren't standardized
93
+ * until C11. As such, we polyfill it here by making a hacky typedef that will
94
+ * fail to compile due to a negative array size if the condition is false.
95
+ */
96
+ #if defined(_Static_assert)
97
+ # define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
98
+ #else
99
+ # define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
100
+ #endif
101
+
102
+ #endif
@@ -0,0 +1,339 @@
1
+ /**
2
+ * @file diagnostic.h
3
+ *
4
+ * A list of diagnostics generated during parsing.
5
+ */
6
+ #ifndef PRISM_DIAGNOSTIC_H
7
+ #define PRISM_DIAGNOSTIC_H
8
+
9
+ #include "prism/ast.h"
10
+ #include "prism/defines.h"
11
+ #include "prism/util/pm_list.h"
12
+
13
+ #include <stdbool.h>
14
+ #include <stdlib.h>
15
+ #include <assert.h>
16
+
17
+ /**
18
+ * The levels of errors generated during parsing.
19
+ */
20
+ typedef enum {
21
+ /** For errors that cannot be recovered from. */
22
+ PM_ERROR_LEVEL_FATAL = 0,
23
+
24
+ /** For errors that should raise an argument error. */
25
+ PM_ERROR_LEVEL_ARGUMENT = 1
26
+ } pm_error_level_t;
27
+
28
+ /**
29
+ * The levels of warnings generated during parsing.
30
+ */
31
+ typedef enum {
32
+ /** For warnings which should be emitted if $VERBOSE != nil. */
33
+ PM_WARNING_LEVEL_DEFAULT = 0,
34
+
35
+ /** For warnings which should be emitted if $VERBOSE == true. */
36
+ PM_WARNING_LEVEL_VERBOSE = 1
37
+ } pm_warning_level_t;
38
+
39
+ /**
40
+ * This struct represents a diagnostic generated during parsing.
41
+ *
42
+ * @extends pm_list_node_t
43
+ */
44
+ typedef struct {
45
+ /** The embedded base node. */
46
+ pm_list_node_t node;
47
+
48
+ /** The location of the diagnostic in the source. */
49
+ pm_location_t location;
50
+
51
+ /** The message associated with the diagnostic. */
52
+ const char *message;
53
+
54
+ /**
55
+ * Whether or not the memory related to the message of this diagnostic is
56
+ * owned by this diagnostic. If it is, it needs to be freed when the
57
+ * diagnostic is freed.
58
+ */
59
+ bool owned;
60
+
61
+ /**
62
+ * The level of the diagnostic, see `pm_error_level_t` and
63
+ * `pm_warning_level_t` for possible values.
64
+ */
65
+ uint8_t level;
66
+ } pm_diagnostic_t;
67
+
68
+ /**
69
+ * The diagnostic IDs of all of the diagnostics, used to communicate the types
70
+ * of errors between the parser and the user.
71
+ */
72
+ typedef enum {
73
+ // This is a special error that we can potentially replace by others. For
74
+ // an example of how this is used, see parse_expression_prefix.
75
+ PM_ERR_CANNOT_PARSE_EXPRESSION,
76
+
77
+ // These are the error codes.
78
+ PM_ERR_ALIAS_ARGUMENT,
79
+ PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
80
+ PM_ERR_ARGUMENT_AFTER_BLOCK,
81
+ PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES,
82
+ PM_ERR_ARGUMENT_BARE_HASH,
83
+ PM_ERR_ARGUMENT_BLOCK_FORWARDING,
84
+ PM_ERR_ARGUMENT_BLOCK_MULTI,
85
+ PM_ERR_ARGUMENT_FORMAL_CLASS,
86
+ PM_ERR_ARGUMENT_FORMAL_CONSTANT,
87
+ PM_ERR_ARGUMENT_FORMAL_GLOBAL,
88
+ PM_ERR_ARGUMENT_FORMAL_IVAR,
89
+ PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
90
+ PM_ERR_ARGUMENT_IN,
91
+ PM_ERR_ARGUMENT_NO_FORWARDING_AMP,
92
+ PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
93
+ PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
94
+ PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT,
95
+ PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT,
96
+ PM_ERR_ARGUMENT_TERM_PAREN,
97
+ PM_ERR_ARGUMENT_UNEXPECTED_BLOCK,
98
+ PM_ERR_ARRAY_ELEMENT,
99
+ PM_ERR_ARRAY_EXPRESSION,
100
+ PM_ERR_ARRAY_EXPRESSION_AFTER_STAR,
101
+ PM_ERR_ARRAY_SEPARATOR,
102
+ PM_ERR_ARRAY_TERM,
103
+ PM_ERR_BEGIN_LONELY_ELSE,
104
+ PM_ERR_BEGIN_TERM,
105
+ PM_ERR_BEGIN_UPCASE_BRACE,
106
+ PM_ERR_BEGIN_UPCASE_TERM,
107
+ PM_ERR_BEGIN_UPCASE_TOPLEVEL,
108
+ PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE,
109
+ PM_ERR_BLOCK_PARAM_PIPE_TERM,
110
+ PM_ERR_BLOCK_TERM_BRACE,
111
+ PM_ERR_BLOCK_TERM_END,
112
+ PM_ERR_CANNOT_PARSE_STRING_PART,
113
+ PM_ERR_CASE_EXPRESSION_AFTER_CASE,
114
+ PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
115
+ PM_ERR_CASE_MATCH_MISSING_PREDICATE,
116
+ PM_ERR_CASE_MISSING_CONDITIONS,
117
+ PM_ERR_CASE_TERM,
118
+ PM_ERR_CLASS_IN_METHOD,
119
+ PM_ERR_CLASS_NAME,
120
+ PM_ERR_CLASS_SUPERCLASS,
121
+ PM_ERR_CLASS_TERM,
122
+ PM_ERR_CLASS_UNEXPECTED_END,
123
+ PM_ERR_CONDITIONAL_ELSIF_PREDICATE,
124
+ PM_ERR_CONDITIONAL_IF_PREDICATE,
125
+ PM_ERR_CONDITIONAL_PREDICATE_TERM,
126
+ PM_ERR_CONDITIONAL_TERM,
127
+ PM_ERR_CONDITIONAL_TERM_ELSE,
128
+ PM_ERR_CONDITIONAL_UNLESS_PREDICATE,
129
+ PM_ERR_CONDITIONAL_UNTIL_PREDICATE,
130
+ PM_ERR_CONDITIONAL_WHILE_PREDICATE,
131
+ PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT,
132
+ PM_ERR_DEF_ENDLESS,
133
+ PM_ERR_DEF_ENDLESS_SETTER,
134
+ PM_ERR_DEF_NAME,
135
+ PM_ERR_DEF_NAME_AFTER_RECEIVER,
136
+ PM_ERR_DEF_PARAMS_TERM,
137
+ PM_ERR_DEF_PARAMS_TERM_PAREN,
138
+ PM_ERR_DEF_RECEIVER,
139
+ PM_ERR_DEF_RECEIVER_TERM,
140
+ PM_ERR_DEF_TERM,
141
+ PM_ERR_DEFINED_EXPRESSION,
142
+ PM_ERR_EMBDOC_TERM,
143
+ PM_ERR_EMBEXPR_END,
144
+ PM_ERR_EMBVAR_INVALID,
145
+ PM_ERR_END_UPCASE_BRACE,
146
+ PM_ERR_END_UPCASE_TERM,
147
+ PM_ERR_ESCAPE_INVALID_CONTROL,
148
+ PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT,
149
+ PM_ERR_ESCAPE_INVALID_HEXADECIMAL,
150
+ PM_ERR_ESCAPE_INVALID_META,
151
+ PM_ERR_ESCAPE_INVALID_META_REPEAT,
152
+ PM_ERR_ESCAPE_INVALID_UNICODE,
153
+ PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
154
+ PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
155
+ PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
156
+ PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
157
+ PM_ERR_EXPECT_ARGUMENT,
158
+ PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
159
+ PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ,
160
+ PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
161
+ PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA,
162
+ PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL,
163
+ PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS,
164
+ PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN,
165
+ PM_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
166
+ PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR,
167
+ PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
168
+ PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
169
+ PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
170
+ PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
171
+ PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
172
+ PM_ERR_EXPECT_RBRACKET,
173
+ PM_ERR_EXPECT_RPAREN,
174
+ PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
175
+ PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
176
+ PM_ERR_EXPECT_STRING_CONTENT,
177
+ PM_ERR_EXPECT_WHEN_DELIMITER,
178
+ PM_ERR_EXPRESSION_BARE_HASH,
179
+ PM_ERR_FOR_COLLECTION,
180
+ PM_ERR_FOR_IN,
181
+ PM_ERR_FOR_INDEX,
182
+ PM_ERR_FOR_TERM,
183
+ PM_ERR_HASH_EXPRESSION_AFTER_LABEL,
184
+ PM_ERR_HASH_KEY,
185
+ PM_ERR_HASH_ROCKET,
186
+ PM_ERR_HASH_TERM,
187
+ PM_ERR_HASH_VALUE,
188
+ PM_ERR_HEREDOC_TERM,
189
+ PM_ERR_INCOMPLETE_QUESTION_MARK,
190
+ PM_ERR_INCOMPLETE_VARIABLE_CLASS,
191
+ PM_ERR_INCOMPLETE_VARIABLE_INSTANCE,
192
+ PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
193
+ PM_ERR_INVALID_FLOAT_EXPONENT,
194
+ PM_ERR_INVALID_NUMBER_BINARY,
195
+ PM_ERR_INVALID_NUMBER_DECIMAL,
196
+ PM_ERR_INVALID_NUMBER_HEXADECIMAL,
197
+ PM_ERR_INVALID_NUMBER_OCTAL,
198
+ PM_ERR_INVALID_NUMBER_UNDERSCORE,
199
+ PM_ERR_INVALID_CHARACTER,
200
+ PM_ERR_INVALID_MULTIBYTE_CHARACTER,
201
+ PM_ERR_INVALID_PRINTABLE_CHARACTER,
202
+ PM_ERR_INVALID_PERCENT,
203
+ PM_ERR_INVALID_VARIABLE_GLOBAL,
204
+ PM_ERR_IT_NOT_ALLOWED,
205
+ PM_ERR_LAMBDA_OPEN,
206
+ PM_ERR_LAMBDA_TERM_BRACE,
207
+ PM_ERR_LAMBDA_TERM_END,
208
+ PM_ERR_LIST_I_LOWER_ELEMENT,
209
+ PM_ERR_LIST_I_LOWER_TERM,
210
+ PM_ERR_LIST_I_UPPER_ELEMENT,
211
+ PM_ERR_LIST_I_UPPER_TERM,
212
+ PM_ERR_LIST_W_LOWER_ELEMENT,
213
+ PM_ERR_LIST_W_LOWER_TERM,
214
+ PM_ERR_LIST_W_UPPER_ELEMENT,
215
+ PM_ERR_LIST_W_UPPER_TERM,
216
+ PM_ERR_MALLOC_FAILED,
217
+ PM_ERR_MIXED_ENCODING,
218
+ PM_ERR_MODULE_IN_METHOD,
219
+ PM_ERR_MODULE_NAME,
220
+ PM_ERR_MODULE_TERM,
221
+ PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
222
+ PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
223
+ PM_ERR_NOT_EXPRESSION,
224
+ PM_ERR_NO_LOCAL_VARIABLE,
225
+ PM_ERR_NUMBER_LITERAL_UNDERSCORE,
226
+ PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED,
227
+ PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
228
+ PM_ERR_OPERATOR_MULTI_ASSIGN,
229
+ PM_ERR_OPERATOR_WRITE_ARGUMENTS,
230
+ PM_ERR_OPERATOR_WRITE_BLOCK,
231
+ PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
232
+ PM_ERR_PARAMETER_BLOCK_MULTI,
233
+ PM_ERR_PARAMETER_CIRCULAR,
234
+ PM_ERR_PARAMETER_METHOD_NAME,
235
+ PM_ERR_PARAMETER_NAME_REPEAT,
236
+ PM_ERR_PARAMETER_NO_DEFAULT,
237
+ PM_ERR_PARAMETER_NO_DEFAULT_KW,
238
+ PM_ERR_PARAMETER_NUMBERED_RESERVED,
239
+ PM_ERR_PARAMETER_ORDER,
240
+ PM_ERR_PARAMETER_SPLAT_MULTI,
241
+ PM_ERR_PARAMETER_STAR,
242
+ PM_ERR_PARAMETER_UNEXPECTED_FWD,
243
+ PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
244
+ PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
245
+ PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
246
+ PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
247
+ PM_ERR_PATTERN_EXPRESSION_AFTER_IN,
248
+ PM_ERR_PATTERN_EXPRESSION_AFTER_KEY,
249
+ PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN,
250
+ PM_ERR_PATTERN_EXPRESSION_AFTER_PIN,
251
+ PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
252
+ PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
253
+ PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
254
+ PM_ERR_PATTERN_HASH_KEY,
255
+ PM_ERR_PATTERN_HASH_KEY_LABEL,
256
+ PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
257
+ PM_ERR_PATTERN_LABEL_AFTER_COMMA,
258
+ PM_ERR_PATTERN_REST,
259
+ PM_ERR_PATTERN_TERM_BRACE,
260
+ PM_ERR_PATTERN_TERM_BRACKET,
261
+ PM_ERR_PATTERN_TERM_PAREN,
262
+ PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN,
263
+ PM_ERR_REGEXP_TERM,
264
+ PM_ERR_RESCUE_EXPRESSION,
265
+ PM_ERR_RESCUE_MODIFIER_VALUE,
266
+ PM_ERR_RESCUE_TERM,
267
+ PM_ERR_RESCUE_VARIABLE,
268
+ PM_ERR_RETURN_INVALID,
269
+ PM_ERR_SINGLETON_FOR_LITERALS,
270
+ PM_ERR_STATEMENT_ALIAS,
271
+ PM_ERR_STATEMENT_POSTEXE_END,
272
+ PM_ERR_STATEMENT_PREEXE_BEGIN,
273
+ PM_ERR_STATEMENT_UNDEF,
274
+ PM_ERR_STRING_CONCATENATION,
275
+ PM_ERR_STRING_INTERPOLATED_TERM,
276
+ PM_ERR_STRING_LITERAL_EOF,
277
+ PM_ERR_STRING_LITERAL_TERM,
278
+ PM_ERR_SYMBOL_INVALID,
279
+ PM_ERR_SYMBOL_TERM_DYNAMIC,
280
+ PM_ERR_SYMBOL_TERM_INTERPOLATED,
281
+ PM_ERR_TERNARY_COLON,
282
+ PM_ERR_TERNARY_EXPRESSION_FALSE,
283
+ PM_ERR_TERNARY_EXPRESSION_TRUE,
284
+ PM_ERR_UNARY_RECEIVER,
285
+ PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
286
+ PM_ERR_UNEXPECTED_TOKEN_IGNORE,
287
+ PM_ERR_UNDEF_ARGUMENT,
288
+ PM_ERR_UNTIL_TERM,
289
+ PM_ERR_VOID_EXPRESSION,
290
+ PM_ERR_WHILE_TERM,
291
+ PM_ERR_WRITE_TARGET_IN_METHOD,
292
+ PM_ERR_WRITE_TARGET_READONLY,
293
+ PM_ERR_WRITE_TARGET_UNEXPECTED,
294
+ PM_ERR_XSTRING_TERM,
295
+
296
+ // These are the warning codes.
297
+ PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
298
+ PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
299
+ PM_WARN_AMBIGUOUS_PREFIX_STAR,
300
+ PM_WARN_AMBIGUOUS_SLASH,
301
+ PM_WARN_END_IN_METHOD,
302
+
303
+ // This is the number of diagnostic codes.
304
+ PM_DIAGNOSTIC_ID_LEN,
305
+ } pm_diagnostic_id_t;
306
+
307
+ /**
308
+ * Append a diagnostic to the given list of diagnostics that is using shared
309
+ * memory for its message.
310
+ *
311
+ * @param list The list to append to.
312
+ * @param start The start of the diagnostic.
313
+ * @param end The end of the diagnostic.
314
+ * @param diag_id The diagnostic ID.
315
+ * @return Whether the diagnostic was successfully appended.
316
+ */
317
+ bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
318
+
319
+ /**
320
+ * Append a diagnostic to the given list of diagnostics that is using a format
321
+ * string for its message.
322
+ *
323
+ * @param list The list to append to.
324
+ * @param start The start of the diagnostic.
325
+ * @param end The end of the diagnostic.
326
+ * @param diag_id The diagnostic ID.
327
+ * @param ... The arguments to the format string for the message.
328
+ * @return Whether the diagnostic was successfully appended.
329
+ */
330
+ bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...);
331
+
332
+ /**
333
+ * Deallocate the internal state of the given diagnostic list.
334
+ *
335
+ * @param list The list to deallocate.
336
+ */
337
+ void pm_diagnostic_list_free(pm_list_t *list);
338
+
339
+ #endif
@@ -0,0 +1,265 @@
1
+ /**
2
+ * @file encoding.h
3
+ *
4
+ * The encoding interface and implementations used by the parser.
5
+ */
6
+ #ifndef PRISM_ENCODING_H
7
+ #define PRISM_ENCODING_H
8
+
9
+ #include "prism/defines.h"
10
+ #include "prism/util/pm_strncasecmp.h"
11
+
12
+ #include <assert.h>
13
+ #include <stdbool.h>
14
+ #include <stddef.h>
15
+ #include <stdint.h>
16
+
17
+ /**
18
+ * This struct defines the functions necessary to implement the encoding
19
+ * interface so we can determine how many bytes the subsequent character takes.
20
+ * Each callback should return the number of bytes, or 0 if the next bytes are
21
+ * invalid for the encoding and type.
22
+ */
23
+ typedef struct {
24
+ /**
25
+ * Return the number of bytes that the next character takes if it is valid
26
+ * in the encoding. Does not read more than n bytes. It is assumed that n is
27
+ * at least 1.
28
+ */
29
+ size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
30
+
31
+ /**
32
+ * Return the number of bytes that the next character takes if it is valid
33
+ * in the encoding and is alphabetical. Does not read more than n bytes. It
34
+ * is assumed that n is at least 1.
35
+ */
36
+ size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
37
+
38
+ /**
39
+ * Return the number of bytes that the next character takes if it is valid
40
+ * in the encoding and is alphanumeric. Does not read more than n bytes. It
41
+ * is assumed that n is at least 1.
42
+ */
43
+ size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
44
+
45
+ /**
46
+ * Return true if the next character is valid in the encoding and is an
47
+ * uppercase character. Does not read more than n bytes. It is assumed that
48
+ * n is at least 1.
49
+ */
50
+ bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
51
+
52
+ /**
53
+ * The name of the encoding. This should correspond to a value that can be
54
+ * passed to Encoding.find in Ruby.
55
+ */
56
+ const char *name;
57
+
58
+ /**
59
+ * Return true if the encoding is a multibyte encoding.
60
+ */
61
+ bool multibyte;
62
+ } pm_encoding_t;
63
+
64
+ /**
65
+ * All of the lookup tables use the first bit of each embedded byte to indicate
66
+ * whether the codepoint is alphabetical.
67
+ */
68
+ #define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
69
+
70
+ /**
71
+ * All of the lookup tables use the second bit of each embedded byte to indicate
72
+ * whether the codepoint is alphanumeric.
73
+ */
74
+ #define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
75
+
76
+ /**
77
+ * All of the lookup tables use the third bit of each embedded byte to indicate
78
+ * whether the codepoint is uppercase.
79
+ */
80
+ #define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
81
+
82
+ /**
83
+ * Return the size of the next character in the UTF-8 encoding.
84
+ *
85
+ * @param b The bytes to read.
86
+ * @param n The number of bytes that can be read.
87
+ * @returns The number of bytes that the next character takes if it is valid in
88
+ * the encoding, or 0 if it is not.
89
+ */
90
+ size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n);
91
+
92
+ /**
93
+ * Return the size of the next character in the UTF-8 encoding if it is an
94
+ * alphabetical character.
95
+ *
96
+ * @param b The bytes to read.
97
+ * @param n The number of bytes that can be read.
98
+ * @returns The number of bytes that the next character takes if it is valid in
99
+ * the encoding, or 0 if it is not.
100
+ */
101
+ size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
102
+
103
+ /**
104
+ * Return the size of the next character in the UTF-8 encoding if it is an
105
+ * alphanumeric character.
106
+ *
107
+ * @param b The bytes to read.
108
+ * @param n The number of bytes that can be read.
109
+ * @returns The number of bytes that the next character takes if it is valid in
110
+ * the encoding, or 0 if it is not.
111
+ */
112
+ size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
113
+
114
+ /**
115
+ * Return true if the next character in the UTF-8 encoding if it is an uppercase
116
+ * character.
117
+ *
118
+ * @param b The bytes to read.
119
+ * @param n The number of bytes that can be read.
120
+ * @returns True if the next character is valid in the encoding and is an
121
+ * uppercase character, or false if it is not.
122
+ */
123
+ bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
124
+
125
+ /**
126
+ * This lookup table is referenced in both the UTF-8 encoding file and the
127
+ * parser directly in order to speed up the default encoding processing. It is
128
+ * used to indicate whether a character is alphabetical, alphanumeric, or
129
+ * uppercase in unicode mappings.
130
+ */
131
+ extern const uint8_t pm_encoding_unicode_table[256];
132
+
133
+ /**
134
+ * These are all of the encodings that prism supports.
135
+ */
136
+ typedef enum {
137
+ PM_ENCODING_UTF_8 = 0,
138
+ PM_ENCODING_ASCII_8BIT,
139
+ PM_ENCODING_BIG5,
140
+ PM_ENCODING_BIG5_HKSCS,
141
+ PM_ENCODING_BIG5_UAO,
142
+ PM_ENCODING_CESU_8,
143
+ PM_ENCODING_CP51932,
144
+ PM_ENCODING_CP850,
145
+ PM_ENCODING_CP852,
146
+ PM_ENCODING_CP855,
147
+ PM_ENCODING_CP949,
148
+ PM_ENCODING_CP950,
149
+ PM_ENCODING_CP951,
150
+ PM_ENCODING_EMACS_MULE,
151
+ PM_ENCODING_EUC_JP,
152
+ PM_ENCODING_EUC_JP_MS,
153
+ PM_ENCODING_EUC_JIS_2004,
154
+ PM_ENCODING_EUC_KR,
155
+ PM_ENCODING_EUC_TW,
156
+ PM_ENCODING_GB12345,
157
+ PM_ENCODING_GB18030,
158
+ PM_ENCODING_GB1988,
159
+ PM_ENCODING_GB2312,
160
+ PM_ENCODING_GBK,
161
+ PM_ENCODING_IBM437,
162
+ PM_ENCODING_IBM720,
163
+ PM_ENCODING_IBM737,
164
+ PM_ENCODING_IBM775,
165
+ PM_ENCODING_IBM852,
166
+ PM_ENCODING_IBM855,
167
+ PM_ENCODING_IBM857,
168
+ PM_ENCODING_IBM860,
169
+ PM_ENCODING_IBM861,
170
+ PM_ENCODING_IBM862,
171
+ PM_ENCODING_IBM863,
172
+ PM_ENCODING_IBM864,
173
+ PM_ENCODING_IBM865,
174
+ PM_ENCODING_IBM866,
175
+ PM_ENCODING_IBM869,
176
+ PM_ENCODING_ISO_8859_1,
177
+ PM_ENCODING_ISO_8859_2,
178
+ PM_ENCODING_ISO_8859_3,
179
+ PM_ENCODING_ISO_8859_4,
180
+ PM_ENCODING_ISO_8859_5,
181
+ PM_ENCODING_ISO_8859_6,
182
+ PM_ENCODING_ISO_8859_7,
183
+ PM_ENCODING_ISO_8859_8,
184
+ PM_ENCODING_ISO_8859_9,
185
+ PM_ENCODING_ISO_8859_10,
186
+ PM_ENCODING_ISO_8859_11,
187
+ PM_ENCODING_ISO_8859_13,
188
+ PM_ENCODING_ISO_8859_14,
189
+ PM_ENCODING_ISO_8859_15,
190
+ PM_ENCODING_ISO_8859_16,
191
+ PM_ENCODING_KOI8_R,
192
+ PM_ENCODING_KOI8_U,
193
+ PM_ENCODING_MAC_CENT_EURO,
194
+ PM_ENCODING_MAC_CROATIAN,
195
+ PM_ENCODING_MAC_CYRILLIC,
196
+ PM_ENCODING_MAC_GREEK,
197
+ PM_ENCODING_MAC_ICELAND,
198
+ PM_ENCODING_MAC_JAPANESE,
199
+ PM_ENCODING_MAC_ROMAN,
200
+ PM_ENCODING_MAC_ROMANIA,
201
+ PM_ENCODING_MAC_THAI,
202
+ PM_ENCODING_MAC_TURKISH,
203
+ PM_ENCODING_MAC_UKRAINE,
204
+ PM_ENCODING_SHIFT_JIS,
205
+ PM_ENCODING_SJIS_DOCOMO,
206
+ PM_ENCODING_SJIS_KDDI,
207
+ PM_ENCODING_SJIS_SOFTBANK,
208
+ PM_ENCODING_STATELESS_ISO_2022_JP,
209
+ PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
210
+ PM_ENCODING_TIS_620,
211
+ PM_ENCODING_US_ASCII,
212
+ PM_ENCODING_UTF8_MAC,
213
+ PM_ENCODING_UTF8_DOCOMO,
214
+ PM_ENCODING_UTF8_KDDI,
215
+ PM_ENCODING_UTF8_SOFTBANK,
216
+ PM_ENCODING_WINDOWS_1250,
217
+ PM_ENCODING_WINDOWS_1251,
218
+ PM_ENCODING_WINDOWS_1252,
219
+ PM_ENCODING_WINDOWS_1253,
220
+ PM_ENCODING_WINDOWS_1254,
221
+ PM_ENCODING_WINDOWS_1255,
222
+ PM_ENCODING_WINDOWS_1256,
223
+ PM_ENCODING_WINDOWS_1257,
224
+ PM_ENCODING_WINDOWS_1258,
225
+ PM_ENCODING_WINDOWS_31J,
226
+ PM_ENCODING_WINDOWS_874,
227
+ PM_ENCODING_MAXIMUM
228
+ } pm_encoding_type_t;
229
+
230
+ /**
231
+ * This is the table of all of the encodings that prism supports.
232
+ */
233
+ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
234
+
235
+ /**
236
+ * This is the default UTF-8 encoding. We need a reference to it to quickly
237
+ * create parsers.
238
+ */
239
+ #define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])
240
+
241
+ /**
242
+ * This is the US-ASCII encoding. We need a reference to it to be able to
243
+ * compare against it when a string is being created because it could possibly
244
+ * need to fall back to ASCII-8BIT.
245
+ */
246
+ #define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
247
+
248
+ /**
249
+ * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
250
+ * can compare against it because invalid multibyte characters are not a thing
251
+ * in this encoding.
252
+ */
253
+ #define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
254
+
255
+ /**
256
+ * Parse the given name of an encoding and return a pointer to the corresponding
257
+ * encoding struct if one can be found, otherwise return NULL.
258
+ *
259
+ * @param start A pointer to the first byte of the name.
260
+ * @param end A pointer to the last byte of the name.
261
+ * @returns A pointer to the encoding struct if one is found, otherwise NULL.
262
+ */
263
+ const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);
264
+
265
+ #endif