jruby-prism-parser 0.23.0.pre.SNAPSHOT-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +401 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +62 -0
- data/LICENSE.md +7 -0
- data/Makefile +101 -0
- data/README.md +98 -0
- data/config.yml +2902 -0
- data/docs/build_system.md +91 -0
- data/docs/configuration.md +64 -0
- data/docs/cruby_compilation.md +27 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +121 -0
- data/docs/fuzzing.md +88 -0
- data/docs/heredocs.md +36 -0
- data/docs/javascript.md +118 -0
- data/docs/local_variable_depth.md +229 -0
- data/docs/mapping.md +117 -0
- data/docs/parser_translation.md +34 -0
- data/docs/parsing_rules.md +19 -0
- data/docs/releasing.md +98 -0
- data/docs/ripper.md +36 -0
- data/docs/ruby_api.md +43 -0
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +209 -0
- data/docs/testing.md +55 -0
- data/ext/prism/api_node.c +5098 -0
- data/ext/prism/api_pack.c +267 -0
- data/ext/prism/extconf.rb +110 -0
- data/ext/prism/extension.c +1155 -0
- data/ext/prism/extension.h +18 -0
- data/include/prism/ast.h +5807 -0
- data/include/prism/defines.h +102 -0
- data/include/prism/diagnostic.h +339 -0
- data/include/prism/encoding.h +265 -0
- data/include/prism/node.h +57 -0
- data/include/prism/options.h +230 -0
- data/include/prism/pack.h +152 -0
- data/include/prism/parser.h +732 -0
- data/include/prism/prettyprint.h +26 -0
- data/include/prism/regexp.h +33 -0
- data/include/prism/util/pm_buffer.h +155 -0
- data/include/prism/util/pm_char.h +205 -0
- data/include/prism/util/pm_constant_pool.h +209 -0
- data/include/prism/util/pm_list.h +97 -0
- data/include/prism/util/pm_memchr.h +29 -0
- data/include/prism/util/pm_newline_list.h +93 -0
- data/include/prism/util/pm_state_stack.h +42 -0
- data/include/prism/util/pm_string.h +150 -0
- data/include/prism/util/pm_string_list.h +44 -0
- data/include/prism/util/pm_strncasecmp.h +32 -0
- data/include/prism/util/pm_strpbrk.h +46 -0
- data/include/prism/version.h +29 -0
- data/include/prism.h +289 -0
- data/jruby-prism.jar +0 -0
- data/lib/prism/compiler.rb +486 -0
- data/lib/prism/debug.rb +206 -0
- data/lib/prism/desugar_compiler.rb +207 -0
- data/lib/prism/dispatcher.rb +2150 -0
- data/lib/prism/dot_visitor.rb +4634 -0
- data/lib/prism/dsl.rb +785 -0
- data/lib/prism/ffi.rb +346 -0
- data/lib/prism/lex_compat.rb +908 -0
- data/lib/prism/mutation_compiler.rb +753 -0
- data/lib/prism/node.rb +17864 -0
- data/lib/prism/node_ext.rb +212 -0
- data/lib/prism/node_inspector.rb +68 -0
- data/lib/prism/pack.rb +224 -0
- data/lib/prism/parse_result/comments.rb +177 -0
- data/lib/prism/parse_result/newlines.rb +64 -0
- data/lib/prism/parse_result.rb +498 -0
- data/lib/prism/pattern.rb +250 -0
- data/lib/prism/serialize.rb +1354 -0
- data/lib/prism/translation/parser/compiler.rb +1838 -0
- data/lib/prism/translation/parser/lexer.rb +335 -0
- data/lib/prism/translation/parser/rubocop.rb +37 -0
- data/lib/prism/translation/parser.rb +178 -0
- data/lib/prism/translation/ripper.rb +577 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +11 -0
- data/lib/prism/version.rb +3 -0
- data/lib/prism/visitor.rb +495 -0
- data/lib/prism.rb +99 -0
- data/prism.gemspec +135 -0
- data/rbi/prism.rbi +7767 -0
- data/rbi/prism_static.rbi +207 -0
- data/sig/prism.rbs +4773 -0
- data/sig/prism_static.rbs +201 -0
- data/src/diagnostic.c +400 -0
- data/src/encoding.c +5132 -0
- data/src/node.c +2786 -0
- data/src/options.c +213 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +8881 -0
- data/src/prism.c +18406 -0
- data/src/regexp.c +638 -0
- data/src/serialize.c +1554 -0
- data/src/token_type.c +700 -0
- data/src/util/pm_buffer.c +190 -0
- data/src/util/pm_char.c +318 -0
- data/src/util/pm_constant_pool.c +322 -0
- data/src/util/pm_list.c +49 -0
- data/src/util/pm_memchr.c +35 -0
- data/src/util/pm_newline_list.c +84 -0
- data/src/util/pm_state_stack.c +25 -0
- data/src/util/pm_string.c +203 -0
- data/src/util/pm_string_list.c +28 -0
- data/src/util/pm_strncasecmp.c +24 -0
- data/src/util/pm_strpbrk.c +180 -0
- metadata +156 -0
@@ -0,0 +1,102 @@
|
|
1
|
+
/**
|
2
|
+
* @file defines.h
|
3
|
+
*
|
4
|
+
* Macro definitions used throughout the prism library.
|
5
|
+
*
|
6
|
+
* This file should be included first by any *.h or *.c in prism for consistency
|
7
|
+
* and to ensure that the macros are defined before they are used.
|
8
|
+
*/
|
9
|
+
#ifndef PRISM_DEFINES_H
|
10
|
+
#define PRISM_DEFINES_H
|
11
|
+
|
12
|
+
#include <ctype.h>
|
13
|
+
#include <stdarg.h>
|
14
|
+
#include <stddef.h>
|
15
|
+
#include <stdint.h>
|
16
|
+
#include <stdio.h>
|
17
|
+
#include <string.h>
|
18
|
+
|
19
|
+
/**
|
20
|
+
* We want to be able to use the PRI* macros for printing out integers, but on
|
21
|
+
* some platforms they aren't included unless this is already defined.
|
22
|
+
*/
|
23
|
+
#define __STDC_FORMAT_MACROS
|
24
|
+
|
25
|
+
#include <inttypes.h>
|
26
|
+
|
27
|
+
/**
|
28
|
+
* By default, we compile with -fvisibility=hidden. When this is enabled, we
|
29
|
+
* need to mark certain functions as being publically-visible. This macro does
|
30
|
+
* that in a compiler-agnostic way.
|
31
|
+
*/
|
32
|
+
#ifndef PRISM_EXPORTED_FUNCTION
|
33
|
+
# ifdef PRISM_EXPORT_SYMBOLS
|
34
|
+
# ifdef _WIN32
|
35
|
+
# define PRISM_EXPORTED_FUNCTION __declspec(dllexport) extern
|
36
|
+
# else
|
37
|
+
# define PRISM_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
|
38
|
+
# endif
|
39
|
+
# else
|
40
|
+
# define PRISM_EXPORTED_FUNCTION
|
41
|
+
# endif
|
42
|
+
#endif
|
43
|
+
|
44
|
+
/**
|
45
|
+
* Certain compilers support specifying that a function accepts variadic
|
46
|
+
* parameters that look like printf format strings to provide a better developer
|
47
|
+
* experience when someone is using the function. This macro does that in a
|
48
|
+
* compiler-agnostic way.
|
49
|
+
*/
|
50
|
+
#if defined(__GNUC__)
|
51
|
+
# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index)))
|
52
|
+
#elif defined(__clang__)
|
53
|
+
# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((__format__(__printf__, string_index, argument_index)))
|
54
|
+
#else
|
55
|
+
# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index)
|
56
|
+
#endif
|
57
|
+
|
58
|
+
/**
|
59
|
+
* GCC will warn if you specify a function or parameter that is unused at
|
60
|
+
* runtime. This macro allows you to mark a function or parameter as unused in a
|
61
|
+
* compiler-agnostic way.
|
62
|
+
*/
|
63
|
+
#if defined(__GNUC__)
|
64
|
+
# define PRISM_ATTRIBUTE_UNUSED __attribute__((unused))
|
65
|
+
#else
|
66
|
+
# define PRISM_ATTRIBUTE_UNUSED
|
67
|
+
#endif
|
68
|
+
|
69
|
+
/**
|
70
|
+
* Old Visual Studio versions do not support the inline keyword, so we need to
|
71
|
+
* define it to be __inline.
|
72
|
+
*/
|
73
|
+
#if defined(_MSC_VER) && !defined(inline)
|
74
|
+
# define inline __inline
|
75
|
+
#endif
|
76
|
+
|
77
|
+
/**
|
78
|
+
* Old Visual Studio versions before 2015 do not implement sprintf, but instead
|
79
|
+
* implement _snprintf. We standard that here.
|
80
|
+
*/
|
81
|
+
#if !defined(snprintf) && defined(_MSC_VER) && (_MSC_VER < 1900)
|
82
|
+
# define snprintf _snprintf
|
83
|
+
#endif
|
84
|
+
|
85
|
+
/**
|
86
|
+
* A simple utility macro to concatenate two tokens together, necessary when one
|
87
|
+
* of the tokens is itself a macro.
|
88
|
+
*/
|
89
|
+
#define PM_CONCATENATE(left, right) left ## right
|
90
|
+
|
91
|
+
/**
|
92
|
+
* We want to be able to use static assertions, but they weren't standardized
|
93
|
+
* until C11. As such, we polyfill it here by making a hacky typedef that will
|
94
|
+
* fail to compile due to a negative array size if the condition is false.
|
95
|
+
*/
|
96
|
+
#if defined(_Static_assert)
|
97
|
+
# define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
|
98
|
+
#else
|
99
|
+
# define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
|
100
|
+
#endif
|
101
|
+
|
102
|
+
#endif
|
@@ -0,0 +1,339 @@
|
|
1
|
+
/**
|
2
|
+
* @file diagnostic.h
|
3
|
+
*
|
4
|
+
* A list of diagnostics generated during parsing.
|
5
|
+
*/
|
6
|
+
#ifndef PRISM_DIAGNOSTIC_H
|
7
|
+
#define PRISM_DIAGNOSTIC_H
|
8
|
+
|
9
|
+
#include "prism/ast.h"
|
10
|
+
#include "prism/defines.h"
|
11
|
+
#include "prism/util/pm_list.h"
|
12
|
+
|
13
|
+
#include <stdbool.h>
|
14
|
+
#include <stdlib.h>
|
15
|
+
#include <assert.h>
|
16
|
+
|
17
|
+
/**
|
18
|
+
* The levels of errors generated during parsing.
|
19
|
+
*/
|
20
|
+
typedef enum {
|
21
|
+
/** For errors that cannot be recovered from. */
|
22
|
+
PM_ERROR_LEVEL_FATAL = 0,
|
23
|
+
|
24
|
+
/** For errors that should raise an argument error. */
|
25
|
+
PM_ERROR_LEVEL_ARGUMENT = 1
|
26
|
+
} pm_error_level_t;
|
27
|
+
|
28
|
+
/**
|
29
|
+
* The levels of warnings generated during parsing.
|
30
|
+
*/
|
31
|
+
typedef enum {
|
32
|
+
/** For warnings which should be emitted if $VERBOSE != nil. */
|
33
|
+
PM_WARNING_LEVEL_DEFAULT = 0,
|
34
|
+
|
35
|
+
/** For warnings which should be emitted if $VERBOSE == true. */
|
36
|
+
PM_WARNING_LEVEL_VERBOSE = 1
|
37
|
+
} pm_warning_level_t;
|
38
|
+
|
39
|
+
/**
|
40
|
+
* This struct represents a diagnostic generated during parsing.
|
41
|
+
*
|
42
|
+
* @extends pm_list_node_t
|
43
|
+
*/
|
44
|
+
typedef struct {
|
45
|
+
/** The embedded base node. */
|
46
|
+
pm_list_node_t node;
|
47
|
+
|
48
|
+
/** The location of the diagnostic in the source. */
|
49
|
+
pm_location_t location;
|
50
|
+
|
51
|
+
/** The message associated with the diagnostic. */
|
52
|
+
const char *message;
|
53
|
+
|
54
|
+
/**
|
55
|
+
* Whether or not the memory related to the message of this diagnostic is
|
56
|
+
* owned by this diagnostic. If it is, it needs to be freed when the
|
57
|
+
* diagnostic is freed.
|
58
|
+
*/
|
59
|
+
bool owned;
|
60
|
+
|
61
|
+
/**
|
62
|
+
* The level of the diagnostic, see `pm_error_level_t` and
|
63
|
+
* `pm_warning_level_t` for possible values.
|
64
|
+
*/
|
65
|
+
uint8_t level;
|
66
|
+
} pm_diagnostic_t;
|
67
|
+
|
68
|
+
/**
|
69
|
+
* The diagnostic IDs of all of the diagnostics, used to communicate the types
|
70
|
+
* of errors between the parser and the user.
|
71
|
+
*/
|
72
|
+
typedef enum {
|
73
|
+
// This is a special error that we can potentially replace by others. For
|
74
|
+
// an example of how this is used, see parse_expression_prefix.
|
75
|
+
PM_ERR_CANNOT_PARSE_EXPRESSION,
|
76
|
+
|
77
|
+
// These are the error codes.
|
78
|
+
PM_ERR_ALIAS_ARGUMENT,
|
79
|
+
PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
|
80
|
+
PM_ERR_ARGUMENT_AFTER_BLOCK,
|
81
|
+
PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES,
|
82
|
+
PM_ERR_ARGUMENT_BARE_HASH,
|
83
|
+
PM_ERR_ARGUMENT_BLOCK_FORWARDING,
|
84
|
+
PM_ERR_ARGUMENT_BLOCK_MULTI,
|
85
|
+
PM_ERR_ARGUMENT_FORMAL_CLASS,
|
86
|
+
PM_ERR_ARGUMENT_FORMAL_CONSTANT,
|
87
|
+
PM_ERR_ARGUMENT_FORMAL_GLOBAL,
|
88
|
+
PM_ERR_ARGUMENT_FORMAL_IVAR,
|
89
|
+
PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
|
90
|
+
PM_ERR_ARGUMENT_IN,
|
91
|
+
PM_ERR_ARGUMENT_NO_FORWARDING_AMP,
|
92
|
+
PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
|
93
|
+
PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
|
94
|
+
PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT,
|
95
|
+
PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT,
|
96
|
+
PM_ERR_ARGUMENT_TERM_PAREN,
|
97
|
+
PM_ERR_ARGUMENT_UNEXPECTED_BLOCK,
|
98
|
+
PM_ERR_ARRAY_ELEMENT,
|
99
|
+
PM_ERR_ARRAY_EXPRESSION,
|
100
|
+
PM_ERR_ARRAY_EXPRESSION_AFTER_STAR,
|
101
|
+
PM_ERR_ARRAY_SEPARATOR,
|
102
|
+
PM_ERR_ARRAY_TERM,
|
103
|
+
PM_ERR_BEGIN_LONELY_ELSE,
|
104
|
+
PM_ERR_BEGIN_TERM,
|
105
|
+
PM_ERR_BEGIN_UPCASE_BRACE,
|
106
|
+
PM_ERR_BEGIN_UPCASE_TERM,
|
107
|
+
PM_ERR_BEGIN_UPCASE_TOPLEVEL,
|
108
|
+
PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE,
|
109
|
+
PM_ERR_BLOCK_PARAM_PIPE_TERM,
|
110
|
+
PM_ERR_BLOCK_TERM_BRACE,
|
111
|
+
PM_ERR_BLOCK_TERM_END,
|
112
|
+
PM_ERR_CANNOT_PARSE_STRING_PART,
|
113
|
+
PM_ERR_CASE_EXPRESSION_AFTER_CASE,
|
114
|
+
PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
|
115
|
+
PM_ERR_CASE_MATCH_MISSING_PREDICATE,
|
116
|
+
PM_ERR_CASE_MISSING_CONDITIONS,
|
117
|
+
PM_ERR_CASE_TERM,
|
118
|
+
PM_ERR_CLASS_IN_METHOD,
|
119
|
+
PM_ERR_CLASS_NAME,
|
120
|
+
PM_ERR_CLASS_SUPERCLASS,
|
121
|
+
PM_ERR_CLASS_TERM,
|
122
|
+
PM_ERR_CLASS_UNEXPECTED_END,
|
123
|
+
PM_ERR_CONDITIONAL_ELSIF_PREDICATE,
|
124
|
+
PM_ERR_CONDITIONAL_IF_PREDICATE,
|
125
|
+
PM_ERR_CONDITIONAL_PREDICATE_TERM,
|
126
|
+
PM_ERR_CONDITIONAL_TERM,
|
127
|
+
PM_ERR_CONDITIONAL_TERM_ELSE,
|
128
|
+
PM_ERR_CONDITIONAL_UNLESS_PREDICATE,
|
129
|
+
PM_ERR_CONDITIONAL_UNTIL_PREDICATE,
|
130
|
+
PM_ERR_CONDITIONAL_WHILE_PREDICATE,
|
131
|
+
PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT,
|
132
|
+
PM_ERR_DEF_ENDLESS,
|
133
|
+
PM_ERR_DEF_ENDLESS_SETTER,
|
134
|
+
PM_ERR_DEF_NAME,
|
135
|
+
PM_ERR_DEF_NAME_AFTER_RECEIVER,
|
136
|
+
PM_ERR_DEF_PARAMS_TERM,
|
137
|
+
PM_ERR_DEF_PARAMS_TERM_PAREN,
|
138
|
+
PM_ERR_DEF_RECEIVER,
|
139
|
+
PM_ERR_DEF_RECEIVER_TERM,
|
140
|
+
PM_ERR_DEF_TERM,
|
141
|
+
PM_ERR_DEFINED_EXPRESSION,
|
142
|
+
PM_ERR_EMBDOC_TERM,
|
143
|
+
PM_ERR_EMBEXPR_END,
|
144
|
+
PM_ERR_EMBVAR_INVALID,
|
145
|
+
PM_ERR_END_UPCASE_BRACE,
|
146
|
+
PM_ERR_END_UPCASE_TERM,
|
147
|
+
PM_ERR_ESCAPE_INVALID_CONTROL,
|
148
|
+
PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT,
|
149
|
+
PM_ERR_ESCAPE_INVALID_HEXADECIMAL,
|
150
|
+
PM_ERR_ESCAPE_INVALID_META,
|
151
|
+
PM_ERR_ESCAPE_INVALID_META_REPEAT,
|
152
|
+
PM_ERR_ESCAPE_INVALID_UNICODE,
|
153
|
+
PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
|
154
|
+
PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
|
155
|
+
PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
|
156
|
+
PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
|
157
|
+
PM_ERR_EXPECT_ARGUMENT,
|
158
|
+
PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
|
159
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ,
|
160
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
|
161
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA,
|
162
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL,
|
163
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS,
|
164
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN,
|
165
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
|
166
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR,
|
167
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
|
168
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
|
169
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
|
170
|
+
PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
|
171
|
+
PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
|
172
|
+
PM_ERR_EXPECT_RBRACKET,
|
173
|
+
PM_ERR_EXPECT_RPAREN,
|
174
|
+
PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
|
175
|
+
PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
|
176
|
+
PM_ERR_EXPECT_STRING_CONTENT,
|
177
|
+
PM_ERR_EXPECT_WHEN_DELIMITER,
|
178
|
+
PM_ERR_EXPRESSION_BARE_HASH,
|
179
|
+
PM_ERR_FOR_COLLECTION,
|
180
|
+
PM_ERR_FOR_IN,
|
181
|
+
PM_ERR_FOR_INDEX,
|
182
|
+
PM_ERR_FOR_TERM,
|
183
|
+
PM_ERR_HASH_EXPRESSION_AFTER_LABEL,
|
184
|
+
PM_ERR_HASH_KEY,
|
185
|
+
PM_ERR_HASH_ROCKET,
|
186
|
+
PM_ERR_HASH_TERM,
|
187
|
+
PM_ERR_HASH_VALUE,
|
188
|
+
PM_ERR_HEREDOC_TERM,
|
189
|
+
PM_ERR_INCOMPLETE_QUESTION_MARK,
|
190
|
+
PM_ERR_INCOMPLETE_VARIABLE_CLASS,
|
191
|
+
PM_ERR_INCOMPLETE_VARIABLE_INSTANCE,
|
192
|
+
PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
|
193
|
+
PM_ERR_INVALID_FLOAT_EXPONENT,
|
194
|
+
PM_ERR_INVALID_NUMBER_BINARY,
|
195
|
+
PM_ERR_INVALID_NUMBER_DECIMAL,
|
196
|
+
PM_ERR_INVALID_NUMBER_HEXADECIMAL,
|
197
|
+
PM_ERR_INVALID_NUMBER_OCTAL,
|
198
|
+
PM_ERR_INVALID_NUMBER_UNDERSCORE,
|
199
|
+
PM_ERR_INVALID_CHARACTER,
|
200
|
+
PM_ERR_INVALID_MULTIBYTE_CHARACTER,
|
201
|
+
PM_ERR_INVALID_PRINTABLE_CHARACTER,
|
202
|
+
PM_ERR_INVALID_PERCENT,
|
203
|
+
PM_ERR_INVALID_VARIABLE_GLOBAL,
|
204
|
+
PM_ERR_IT_NOT_ALLOWED,
|
205
|
+
PM_ERR_LAMBDA_OPEN,
|
206
|
+
PM_ERR_LAMBDA_TERM_BRACE,
|
207
|
+
PM_ERR_LAMBDA_TERM_END,
|
208
|
+
PM_ERR_LIST_I_LOWER_ELEMENT,
|
209
|
+
PM_ERR_LIST_I_LOWER_TERM,
|
210
|
+
PM_ERR_LIST_I_UPPER_ELEMENT,
|
211
|
+
PM_ERR_LIST_I_UPPER_TERM,
|
212
|
+
PM_ERR_LIST_W_LOWER_ELEMENT,
|
213
|
+
PM_ERR_LIST_W_LOWER_TERM,
|
214
|
+
PM_ERR_LIST_W_UPPER_ELEMENT,
|
215
|
+
PM_ERR_LIST_W_UPPER_TERM,
|
216
|
+
PM_ERR_MALLOC_FAILED,
|
217
|
+
PM_ERR_MIXED_ENCODING,
|
218
|
+
PM_ERR_MODULE_IN_METHOD,
|
219
|
+
PM_ERR_MODULE_NAME,
|
220
|
+
PM_ERR_MODULE_TERM,
|
221
|
+
PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
|
222
|
+
PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
|
223
|
+
PM_ERR_NOT_EXPRESSION,
|
224
|
+
PM_ERR_NO_LOCAL_VARIABLE,
|
225
|
+
PM_ERR_NUMBER_LITERAL_UNDERSCORE,
|
226
|
+
PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED,
|
227
|
+
PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
|
228
|
+
PM_ERR_OPERATOR_MULTI_ASSIGN,
|
229
|
+
PM_ERR_OPERATOR_WRITE_ARGUMENTS,
|
230
|
+
PM_ERR_OPERATOR_WRITE_BLOCK,
|
231
|
+
PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
|
232
|
+
PM_ERR_PARAMETER_BLOCK_MULTI,
|
233
|
+
PM_ERR_PARAMETER_CIRCULAR,
|
234
|
+
PM_ERR_PARAMETER_METHOD_NAME,
|
235
|
+
PM_ERR_PARAMETER_NAME_REPEAT,
|
236
|
+
PM_ERR_PARAMETER_NO_DEFAULT,
|
237
|
+
PM_ERR_PARAMETER_NO_DEFAULT_KW,
|
238
|
+
PM_ERR_PARAMETER_NUMBERED_RESERVED,
|
239
|
+
PM_ERR_PARAMETER_ORDER,
|
240
|
+
PM_ERR_PARAMETER_SPLAT_MULTI,
|
241
|
+
PM_ERR_PARAMETER_STAR,
|
242
|
+
PM_ERR_PARAMETER_UNEXPECTED_FWD,
|
243
|
+
PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
|
244
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
|
245
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
|
246
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
|
247
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_IN,
|
248
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_KEY,
|
249
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN,
|
250
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_PIN,
|
251
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
|
252
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
|
253
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_REST,
|
254
|
+
PM_ERR_PATTERN_HASH_KEY,
|
255
|
+
PM_ERR_PATTERN_HASH_KEY_LABEL,
|
256
|
+
PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
|
257
|
+
PM_ERR_PATTERN_LABEL_AFTER_COMMA,
|
258
|
+
PM_ERR_PATTERN_REST,
|
259
|
+
PM_ERR_PATTERN_TERM_BRACE,
|
260
|
+
PM_ERR_PATTERN_TERM_BRACKET,
|
261
|
+
PM_ERR_PATTERN_TERM_PAREN,
|
262
|
+
PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN,
|
263
|
+
PM_ERR_REGEXP_TERM,
|
264
|
+
PM_ERR_RESCUE_EXPRESSION,
|
265
|
+
PM_ERR_RESCUE_MODIFIER_VALUE,
|
266
|
+
PM_ERR_RESCUE_TERM,
|
267
|
+
PM_ERR_RESCUE_VARIABLE,
|
268
|
+
PM_ERR_RETURN_INVALID,
|
269
|
+
PM_ERR_SINGLETON_FOR_LITERALS,
|
270
|
+
PM_ERR_STATEMENT_ALIAS,
|
271
|
+
PM_ERR_STATEMENT_POSTEXE_END,
|
272
|
+
PM_ERR_STATEMENT_PREEXE_BEGIN,
|
273
|
+
PM_ERR_STATEMENT_UNDEF,
|
274
|
+
PM_ERR_STRING_CONCATENATION,
|
275
|
+
PM_ERR_STRING_INTERPOLATED_TERM,
|
276
|
+
PM_ERR_STRING_LITERAL_EOF,
|
277
|
+
PM_ERR_STRING_LITERAL_TERM,
|
278
|
+
PM_ERR_SYMBOL_INVALID,
|
279
|
+
PM_ERR_SYMBOL_TERM_DYNAMIC,
|
280
|
+
PM_ERR_SYMBOL_TERM_INTERPOLATED,
|
281
|
+
PM_ERR_TERNARY_COLON,
|
282
|
+
PM_ERR_TERNARY_EXPRESSION_FALSE,
|
283
|
+
PM_ERR_TERNARY_EXPRESSION_TRUE,
|
284
|
+
PM_ERR_UNARY_RECEIVER,
|
285
|
+
PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
|
286
|
+
PM_ERR_UNEXPECTED_TOKEN_IGNORE,
|
287
|
+
PM_ERR_UNDEF_ARGUMENT,
|
288
|
+
PM_ERR_UNTIL_TERM,
|
289
|
+
PM_ERR_VOID_EXPRESSION,
|
290
|
+
PM_ERR_WHILE_TERM,
|
291
|
+
PM_ERR_WRITE_TARGET_IN_METHOD,
|
292
|
+
PM_ERR_WRITE_TARGET_READONLY,
|
293
|
+
PM_ERR_WRITE_TARGET_UNEXPECTED,
|
294
|
+
PM_ERR_XSTRING_TERM,
|
295
|
+
|
296
|
+
// These are the warning codes.
|
297
|
+
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
|
298
|
+
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
|
299
|
+
PM_WARN_AMBIGUOUS_PREFIX_STAR,
|
300
|
+
PM_WARN_AMBIGUOUS_SLASH,
|
301
|
+
PM_WARN_END_IN_METHOD,
|
302
|
+
|
303
|
+
// This is the number of diagnostic codes.
|
304
|
+
PM_DIAGNOSTIC_ID_LEN,
|
305
|
+
} pm_diagnostic_id_t;
|
306
|
+
|
307
|
+
/**
|
308
|
+
* Append a diagnostic to the given list of diagnostics that is using shared
|
309
|
+
* memory for its message.
|
310
|
+
*
|
311
|
+
* @param list The list to append to.
|
312
|
+
* @param start The start of the diagnostic.
|
313
|
+
* @param end The end of the diagnostic.
|
314
|
+
* @param diag_id The diagnostic ID.
|
315
|
+
* @return Whether the diagnostic was successfully appended.
|
316
|
+
*/
|
317
|
+
bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
|
318
|
+
|
319
|
+
/**
|
320
|
+
* Append a diagnostic to the given list of diagnostics that is using a format
|
321
|
+
* string for its message.
|
322
|
+
*
|
323
|
+
* @param list The list to append to.
|
324
|
+
* @param start The start of the diagnostic.
|
325
|
+
* @param end The end of the diagnostic.
|
326
|
+
* @param diag_id The diagnostic ID.
|
327
|
+
* @param ... The arguments to the format string for the message.
|
328
|
+
* @return Whether the diagnostic was successfully appended.
|
329
|
+
*/
|
330
|
+
bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...);
|
331
|
+
|
332
|
+
/**
|
333
|
+
* Deallocate the internal state of the given diagnostic list.
|
334
|
+
*
|
335
|
+
* @param list The list to deallocate.
|
336
|
+
*/
|
337
|
+
void pm_diagnostic_list_free(pm_list_t *list);
|
338
|
+
|
339
|
+
#endif
|
@@ -0,0 +1,265 @@
|
|
1
|
+
/**
|
2
|
+
* @file encoding.h
|
3
|
+
*
|
4
|
+
* The encoding interface and implementations used by the parser.
|
5
|
+
*/
|
6
|
+
#ifndef PRISM_ENCODING_H
|
7
|
+
#define PRISM_ENCODING_H
|
8
|
+
|
9
|
+
#include "prism/defines.h"
|
10
|
+
#include "prism/util/pm_strncasecmp.h"
|
11
|
+
|
12
|
+
#include <assert.h>
|
13
|
+
#include <stdbool.h>
|
14
|
+
#include <stddef.h>
|
15
|
+
#include <stdint.h>
|
16
|
+
|
17
|
+
/**
|
18
|
+
* This struct defines the functions necessary to implement the encoding
|
19
|
+
* interface so we can determine how many bytes the subsequent character takes.
|
20
|
+
* Each callback should return the number of bytes, or 0 if the next bytes are
|
21
|
+
* invalid for the encoding and type.
|
22
|
+
*/
|
23
|
+
typedef struct {
|
24
|
+
/**
|
25
|
+
* Return the number of bytes that the next character takes if it is valid
|
26
|
+
* in the encoding. Does not read more than n bytes. It is assumed that n is
|
27
|
+
* at least 1.
|
28
|
+
*/
|
29
|
+
size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
|
30
|
+
|
31
|
+
/**
|
32
|
+
* Return the number of bytes that the next character takes if it is valid
|
33
|
+
* in the encoding and is alphabetical. Does not read more than n bytes. It
|
34
|
+
* is assumed that n is at least 1.
|
35
|
+
*/
|
36
|
+
size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
|
37
|
+
|
38
|
+
/**
|
39
|
+
* Return the number of bytes that the next character takes if it is valid
|
40
|
+
* in the encoding and is alphanumeric. Does not read more than n bytes. It
|
41
|
+
* is assumed that n is at least 1.
|
42
|
+
*/
|
43
|
+
size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
|
44
|
+
|
45
|
+
/**
|
46
|
+
* Return true if the next character is valid in the encoding and is an
|
47
|
+
* uppercase character. Does not read more than n bytes. It is assumed that
|
48
|
+
* n is at least 1.
|
49
|
+
*/
|
50
|
+
bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
|
51
|
+
|
52
|
+
/**
|
53
|
+
* The name of the encoding. This should correspond to a value that can be
|
54
|
+
* passed to Encoding.find in Ruby.
|
55
|
+
*/
|
56
|
+
const char *name;
|
57
|
+
|
58
|
+
/**
|
59
|
+
* Return true if the encoding is a multibyte encoding.
|
60
|
+
*/
|
61
|
+
bool multibyte;
|
62
|
+
} pm_encoding_t;
|
63
|
+
|
64
|
+
/**
|
65
|
+
* All of the lookup tables use the first bit of each embedded byte to indicate
|
66
|
+
* whether the codepoint is alphabetical.
|
67
|
+
*/
|
68
|
+
#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
|
69
|
+
|
70
|
+
/**
|
71
|
+
* All of the lookup tables use the second bit of each embedded byte to indicate
|
72
|
+
* whether the codepoint is alphanumeric.
|
73
|
+
*/
|
74
|
+
#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
|
75
|
+
|
76
|
+
/**
|
77
|
+
* All of the lookup tables use the third bit of each embedded byte to indicate
|
78
|
+
* whether the codepoint is uppercase.
|
79
|
+
*/
|
80
|
+
#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
|
81
|
+
|
82
|
+
/**
|
83
|
+
* Return the size of the next character in the UTF-8 encoding.
|
84
|
+
*
|
85
|
+
* @param b The bytes to read.
|
86
|
+
* @param n The number of bytes that can be read.
|
87
|
+
* @returns The number of bytes that the next character takes if it is valid in
|
88
|
+
* the encoding, or 0 if it is not.
|
89
|
+
*/
|
90
|
+
size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n);
|
91
|
+
|
92
|
+
/**
|
93
|
+
* Return the size of the next character in the UTF-8 encoding if it is an
|
94
|
+
* alphabetical character.
|
95
|
+
*
|
96
|
+
* @param b The bytes to read.
|
97
|
+
* @param n The number of bytes that can be read.
|
98
|
+
* @returns The number of bytes that the next character takes if it is valid in
|
99
|
+
* the encoding, or 0 if it is not.
|
100
|
+
*/
|
101
|
+
size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
|
102
|
+
|
103
|
+
/**
|
104
|
+
* Return the size of the next character in the UTF-8 encoding if it is an
|
105
|
+
* alphanumeric character.
|
106
|
+
*
|
107
|
+
* @param b The bytes to read.
|
108
|
+
* @param n The number of bytes that can be read.
|
109
|
+
* @returns The number of bytes that the next character takes if it is valid in
|
110
|
+
* the encoding, or 0 if it is not.
|
111
|
+
*/
|
112
|
+
size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
|
113
|
+
|
114
|
+
/**
|
115
|
+
* Return true if the next character in the UTF-8 encoding if it is an uppercase
|
116
|
+
* character.
|
117
|
+
*
|
118
|
+
* @param b The bytes to read.
|
119
|
+
* @param n The number of bytes that can be read.
|
120
|
+
* @returns True if the next character is valid in the encoding and is an
|
121
|
+
* uppercase character, or false if it is not.
|
122
|
+
*/
|
123
|
+
bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
|
124
|
+
|
125
|
+
/**
|
126
|
+
* This lookup table is referenced in both the UTF-8 encoding file and the
|
127
|
+
* parser directly in order to speed up the default encoding processing. It is
|
128
|
+
* used to indicate whether a character is alphabetical, alphanumeric, or
|
129
|
+
* uppercase in unicode mappings.
|
130
|
+
*/
|
131
|
+
extern const uint8_t pm_encoding_unicode_table[256];
|
132
|
+
|
133
|
+
/**
|
134
|
+
* These are all of the encodings that prism supports.
|
135
|
+
*/
|
136
|
+
typedef enum {
|
137
|
+
PM_ENCODING_UTF_8 = 0,
|
138
|
+
PM_ENCODING_ASCII_8BIT,
|
139
|
+
PM_ENCODING_BIG5,
|
140
|
+
PM_ENCODING_BIG5_HKSCS,
|
141
|
+
PM_ENCODING_BIG5_UAO,
|
142
|
+
PM_ENCODING_CESU_8,
|
143
|
+
PM_ENCODING_CP51932,
|
144
|
+
PM_ENCODING_CP850,
|
145
|
+
PM_ENCODING_CP852,
|
146
|
+
PM_ENCODING_CP855,
|
147
|
+
PM_ENCODING_CP949,
|
148
|
+
PM_ENCODING_CP950,
|
149
|
+
PM_ENCODING_CP951,
|
150
|
+
PM_ENCODING_EMACS_MULE,
|
151
|
+
PM_ENCODING_EUC_JP,
|
152
|
+
PM_ENCODING_EUC_JP_MS,
|
153
|
+
PM_ENCODING_EUC_JIS_2004,
|
154
|
+
PM_ENCODING_EUC_KR,
|
155
|
+
PM_ENCODING_EUC_TW,
|
156
|
+
PM_ENCODING_GB12345,
|
157
|
+
PM_ENCODING_GB18030,
|
158
|
+
PM_ENCODING_GB1988,
|
159
|
+
PM_ENCODING_GB2312,
|
160
|
+
PM_ENCODING_GBK,
|
161
|
+
PM_ENCODING_IBM437,
|
162
|
+
PM_ENCODING_IBM720,
|
163
|
+
PM_ENCODING_IBM737,
|
164
|
+
PM_ENCODING_IBM775,
|
165
|
+
PM_ENCODING_IBM852,
|
166
|
+
PM_ENCODING_IBM855,
|
167
|
+
PM_ENCODING_IBM857,
|
168
|
+
PM_ENCODING_IBM860,
|
169
|
+
PM_ENCODING_IBM861,
|
170
|
+
PM_ENCODING_IBM862,
|
171
|
+
PM_ENCODING_IBM863,
|
172
|
+
PM_ENCODING_IBM864,
|
173
|
+
PM_ENCODING_IBM865,
|
174
|
+
PM_ENCODING_IBM866,
|
175
|
+
PM_ENCODING_IBM869,
|
176
|
+
PM_ENCODING_ISO_8859_1,
|
177
|
+
PM_ENCODING_ISO_8859_2,
|
178
|
+
PM_ENCODING_ISO_8859_3,
|
179
|
+
PM_ENCODING_ISO_8859_4,
|
180
|
+
PM_ENCODING_ISO_8859_5,
|
181
|
+
PM_ENCODING_ISO_8859_6,
|
182
|
+
PM_ENCODING_ISO_8859_7,
|
183
|
+
PM_ENCODING_ISO_8859_8,
|
184
|
+
PM_ENCODING_ISO_8859_9,
|
185
|
+
PM_ENCODING_ISO_8859_10,
|
186
|
+
PM_ENCODING_ISO_8859_11,
|
187
|
+
PM_ENCODING_ISO_8859_13,
|
188
|
+
PM_ENCODING_ISO_8859_14,
|
189
|
+
PM_ENCODING_ISO_8859_15,
|
190
|
+
PM_ENCODING_ISO_8859_16,
|
191
|
+
PM_ENCODING_KOI8_R,
|
192
|
+
PM_ENCODING_KOI8_U,
|
193
|
+
PM_ENCODING_MAC_CENT_EURO,
|
194
|
+
PM_ENCODING_MAC_CROATIAN,
|
195
|
+
PM_ENCODING_MAC_CYRILLIC,
|
196
|
+
PM_ENCODING_MAC_GREEK,
|
197
|
+
PM_ENCODING_MAC_ICELAND,
|
198
|
+
PM_ENCODING_MAC_JAPANESE,
|
199
|
+
PM_ENCODING_MAC_ROMAN,
|
200
|
+
PM_ENCODING_MAC_ROMANIA,
|
201
|
+
PM_ENCODING_MAC_THAI,
|
202
|
+
PM_ENCODING_MAC_TURKISH,
|
203
|
+
PM_ENCODING_MAC_UKRAINE,
|
204
|
+
PM_ENCODING_SHIFT_JIS,
|
205
|
+
PM_ENCODING_SJIS_DOCOMO,
|
206
|
+
PM_ENCODING_SJIS_KDDI,
|
207
|
+
PM_ENCODING_SJIS_SOFTBANK,
|
208
|
+
PM_ENCODING_STATELESS_ISO_2022_JP,
|
209
|
+
PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
|
210
|
+
PM_ENCODING_TIS_620,
|
211
|
+
PM_ENCODING_US_ASCII,
|
212
|
+
PM_ENCODING_UTF8_MAC,
|
213
|
+
PM_ENCODING_UTF8_DOCOMO,
|
214
|
+
PM_ENCODING_UTF8_KDDI,
|
215
|
+
PM_ENCODING_UTF8_SOFTBANK,
|
216
|
+
PM_ENCODING_WINDOWS_1250,
|
217
|
+
PM_ENCODING_WINDOWS_1251,
|
218
|
+
PM_ENCODING_WINDOWS_1252,
|
219
|
+
PM_ENCODING_WINDOWS_1253,
|
220
|
+
PM_ENCODING_WINDOWS_1254,
|
221
|
+
PM_ENCODING_WINDOWS_1255,
|
222
|
+
PM_ENCODING_WINDOWS_1256,
|
223
|
+
PM_ENCODING_WINDOWS_1257,
|
224
|
+
PM_ENCODING_WINDOWS_1258,
|
225
|
+
PM_ENCODING_WINDOWS_31J,
|
226
|
+
PM_ENCODING_WINDOWS_874,
|
227
|
+
PM_ENCODING_MAXIMUM
|
228
|
+
} pm_encoding_type_t;
|
229
|
+
|
230
|
+
/**
|
231
|
+
* This is the table of all of the encodings that prism supports.
|
232
|
+
*/
|
233
|
+
extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
|
234
|
+
|
235
|
+
/**
|
236
|
+
* This is the default UTF-8 encoding. We need a reference to it to quickly
|
237
|
+
* create parsers.
|
238
|
+
*/
|
239
|
+
#define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])
|
240
|
+
|
241
|
+
/**
|
242
|
+
* This is the US-ASCII encoding. We need a reference to it to be able to
|
243
|
+
* compare against it when a string is being created because it could possibly
|
244
|
+
* need to fall back to ASCII-8BIT.
|
245
|
+
*/
|
246
|
+
#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
|
247
|
+
|
248
|
+
/**
|
249
|
+
* This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
|
250
|
+
* can compare against it because invalid multibyte characters are not a thing
|
251
|
+
* in this encoding.
|
252
|
+
*/
|
253
|
+
#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
|
254
|
+
|
255
|
+
/**
|
256
|
+
* Parse the given name of an encoding and return a pointer to the corresponding
|
257
|
+
* encoding struct if one can be found, otherwise return NULL.
|
258
|
+
*
|
259
|
+
* @param start A pointer to the first byte of the name.
|
260
|
+
* @param end A pointer to the last byte of the name.
|
261
|
+
* @returns A pointer to the encoding struct if one is found, otherwise NULL.
|
262
|
+
*/
|
263
|
+
const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);
|
264
|
+
|
265
|
+
#endif
|