prism 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +172 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +62 -0
- data/LICENSE.md +7 -0
- data/Makefile +84 -0
- data/README.md +89 -0
- data/config.yml +2481 -0
- data/docs/build_system.md +74 -0
- data/docs/building.md +22 -0
- data/docs/configuration.md +60 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +117 -0
- data/docs/fuzzing.md +93 -0
- data/docs/heredocs.md +36 -0
- data/docs/mapping.md +117 -0
- data/docs/ripper.md +36 -0
- data/docs/ruby_api.md +25 -0
- data/docs/serialization.md +181 -0
- data/docs/testing.md +55 -0
- data/ext/prism/api_node.c +4725 -0
- data/ext/prism/api_pack.c +256 -0
- data/ext/prism/extconf.rb +136 -0
- data/ext/prism/extension.c +626 -0
- data/ext/prism/extension.h +18 -0
- data/include/prism/ast.h +1932 -0
- data/include/prism/defines.h +45 -0
- data/include/prism/diagnostic.h +231 -0
- data/include/prism/enc/pm_encoding.h +95 -0
- data/include/prism/node.h +41 -0
- data/include/prism/pack.h +141 -0
- data/include/prism/parser.h +418 -0
- data/include/prism/regexp.h +19 -0
- data/include/prism/unescape.h +48 -0
- data/include/prism/util/pm_buffer.h +51 -0
- data/include/prism/util/pm_char.h +91 -0
- data/include/prism/util/pm_constant_pool.h +78 -0
- data/include/prism/util/pm_list.h +67 -0
- data/include/prism/util/pm_memchr.h +14 -0
- data/include/prism/util/pm_newline_list.h +61 -0
- data/include/prism/util/pm_state_stack.h +24 -0
- data/include/prism/util/pm_string.h +61 -0
- data/include/prism/util/pm_string_list.h +25 -0
- data/include/prism/util/pm_strpbrk.h +29 -0
- data/include/prism/version.h +4 -0
- data/include/prism.h +82 -0
- data/lib/prism/compiler.rb +465 -0
- data/lib/prism/debug.rb +157 -0
- data/lib/prism/desugar_compiler.rb +206 -0
- data/lib/prism/dispatcher.rb +2051 -0
- data/lib/prism/dsl.rb +750 -0
- data/lib/prism/ffi.rb +251 -0
- data/lib/prism/lex_compat.rb +838 -0
- data/lib/prism/mutation_compiler.rb +718 -0
- data/lib/prism/node.rb +14540 -0
- data/lib/prism/node_ext.rb +55 -0
- data/lib/prism/node_inspector.rb +68 -0
- data/lib/prism/pack.rb +185 -0
- data/lib/prism/parse_result/comments.rb +172 -0
- data/lib/prism/parse_result/newlines.rb +60 -0
- data/lib/prism/parse_result.rb +266 -0
- data/lib/prism/pattern.rb +239 -0
- data/lib/prism/ripper_compat.rb +174 -0
- data/lib/prism/serialize.rb +662 -0
- data/lib/prism/visitor.rb +470 -0
- data/lib/prism.rb +64 -0
- data/prism.gemspec +113 -0
- data/src/diagnostic.c +287 -0
- data/src/enc/pm_big5.c +52 -0
- data/src/enc/pm_euc_jp.c +58 -0
- data/src/enc/pm_gbk.c +61 -0
- data/src/enc/pm_shift_jis.c +56 -0
- data/src/enc/pm_tables.c +507 -0
- data/src/enc/pm_unicode.c +2324 -0
- data/src/enc/pm_windows_31j.c +56 -0
- data/src/node.c +2633 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +2136 -0
- data/src/prism.c +14587 -0
- data/src/regexp.c +580 -0
- data/src/serialize.c +1899 -0
- data/src/token_type.c +349 -0
- data/src/unescape.c +637 -0
- data/src/util/pm_buffer.c +103 -0
- data/src/util/pm_char.c +272 -0
- data/src/util/pm_constant_pool.c +252 -0
- data/src/util/pm_list.c +41 -0
- data/src/util/pm_memchr.c +33 -0
- data/src/util/pm_newline_list.c +134 -0
- data/src/util/pm_state_stack.c +19 -0
- data/src/util/pm_string.c +200 -0
- data/src/util/pm_string_list.c +29 -0
- data/src/util/pm_strncasecmp.c +17 -0
- data/src/util/pm_strpbrk.c +66 -0
- metadata +138 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
#ifndef PRISM_DEFINES_H
|
2
|
+
#define PRISM_DEFINES_H
|
3
|
+
|
4
|
+
// This file should be included first by any *.h or *.c in prism
|
5
|
+
|
6
|
+
#include <ctype.h>
|
7
|
+
#include <stdarg.h>
|
8
|
+
#include <stddef.h>
|
9
|
+
#include <stdint.h>
|
10
|
+
#include <stdio.h>
|
11
|
+
#include <string.h>
|
12
|
+
|
13
|
+
// PRISM_EXPORTED_FUNCTION
|
14
|
+
#ifndef PRISM_EXPORTED_FUNCTION
|
15
|
+
# ifdef PRISM_EXPORT_SYMBOLS
|
16
|
+
# ifdef _WIN32
|
17
|
+
# define PRISM_EXPORTED_FUNCTION __declspec(dllexport) extern
|
18
|
+
# else
|
19
|
+
# define PRISM_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
|
20
|
+
# endif
|
21
|
+
# else
|
22
|
+
# define PRISM_EXPORTED_FUNCTION
|
23
|
+
# endif
|
24
|
+
#endif
|
25
|
+
|
26
|
+
// PRISM_ATTRIBUTE_UNUSED
|
27
|
+
#if defined(__GNUC__)
|
28
|
+
# define PRISM_ATTRIBUTE_UNUSED __attribute__((unused))
|
29
|
+
#else
|
30
|
+
# define PRISM_ATTRIBUTE_UNUSED
|
31
|
+
#endif
|
32
|
+
|
33
|
+
// inline
|
34
|
+
#if defined(_MSC_VER) && !defined(inline)
|
35
|
+
# define inline __inline
|
36
|
+
#endif
|
37
|
+
|
38
|
+
// Windows versions before 2015 use _snprintf
|
39
|
+
#if !defined(snprintf) && defined(_MSC_VER) && (_MSC_VER < 1900)
|
40
|
+
# define snprintf _snprintf
|
41
|
+
#endif
|
42
|
+
|
43
|
+
int pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length);
|
44
|
+
|
45
|
+
#endif
|
@@ -0,0 +1,231 @@
|
|
1
|
+
#ifndef PRISM_DIAGNOSTIC_H
|
2
|
+
#define PRISM_DIAGNOSTIC_H
|
3
|
+
|
4
|
+
#include "prism/defines.h"
|
5
|
+
#include "prism/util/pm_list.h"
|
6
|
+
|
7
|
+
#include <stdbool.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
#include <assert.h>
|
10
|
+
|
11
|
+
// This struct represents a diagnostic found during parsing.
|
12
|
+
typedef struct {
|
13
|
+
pm_list_node_t node;
|
14
|
+
const uint8_t *start;
|
15
|
+
const uint8_t *end;
|
16
|
+
const char *message;
|
17
|
+
} pm_diagnostic_t;
|
18
|
+
|
19
|
+
typedef enum {
|
20
|
+
PM_ERR_ALIAS_ARGUMENT,
|
21
|
+
PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
|
22
|
+
PM_ERR_ARGUMENT_AFTER_BLOCK,
|
23
|
+
PM_ERR_ARGUMENT_BARE_HASH,
|
24
|
+
PM_ERR_ARGUMENT_BLOCK_MULTI,
|
25
|
+
PM_ERR_ARGUMENT_FORMAL_CLASS,
|
26
|
+
PM_ERR_ARGUMENT_FORMAL_CONSTANT,
|
27
|
+
PM_ERR_ARGUMENT_FORMAL_GLOBAL,
|
28
|
+
PM_ERR_ARGUMENT_FORMAL_IVAR,
|
29
|
+
PM_ERR_ARGUMENT_NO_FORWARDING_AMP,
|
30
|
+
PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
|
31
|
+
PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
|
32
|
+
PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT,
|
33
|
+
PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT,
|
34
|
+
PM_ERR_ARGUMENT_TERM_PAREN,
|
35
|
+
PM_ERR_ARGUMENT_UNEXPECTED_BLOCK,
|
36
|
+
PM_ERR_ARRAY_ELEMENT,
|
37
|
+
PM_ERR_ARRAY_EXPRESSION,
|
38
|
+
PM_ERR_ARRAY_EXPRESSION_AFTER_STAR,
|
39
|
+
PM_ERR_ARRAY_SEPARATOR,
|
40
|
+
PM_ERR_ARRAY_TERM,
|
41
|
+
PM_ERR_BEGIN_LONELY_ELSE,
|
42
|
+
PM_ERR_BEGIN_TERM,
|
43
|
+
PM_ERR_BEGIN_UPCASE_BRACE,
|
44
|
+
PM_ERR_BEGIN_UPCASE_TERM,
|
45
|
+
PM_ERR_BEGIN_UPCASE_TOPLEVEL,
|
46
|
+
PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE,
|
47
|
+
PM_ERR_BLOCK_PARAM_PIPE_TERM,
|
48
|
+
PM_ERR_BLOCK_TERM_BRACE,
|
49
|
+
PM_ERR_BLOCK_TERM_END,
|
50
|
+
PM_ERR_CANNOT_PARSE_EXPRESSION,
|
51
|
+
PM_ERR_CANNOT_PARSE_STRING_PART,
|
52
|
+
PM_ERR_CASE_EXPRESSION_AFTER_CASE,
|
53
|
+
PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
|
54
|
+
PM_ERR_CASE_MISSING_CONDITIONS,
|
55
|
+
PM_ERR_CASE_TERM,
|
56
|
+
PM_ERR_CLASS_IN_METHOD,
|
57
|
+
PM_ERR_CLASS_NAME,
|
58
|
+
PM_ERR_CLASS_SUPERCLASS,
|
59
|
+
PM_ERR_CLASS_TERM,
|
60
|
+
PM_ERR_CLASS_UNEXPECTED_END,
|
61
|
+
PM_ERR_CONDITIONAL_ELSIF_PREDICATE,
|
62
|
+
PM_ERR_CONDITIONAL_IF_PREDICATE,
|
63
|
+
PM_ERR_CONDITIONAL_PREDICATE_TERM,
|
64
|
+
PM_ERR_CONDITIONAL_TERM,
|
65
|
+
PM_ERR_CONDITIONAL_TERM_ELSE,
|
66
|
+
PM_ERR_CONDITIONAL_UNLESS_PREDICATE,
|
67
|
+
PM_ERR_CONDITIONAL_UNTIL_PREDICATE,
|
68
|
+
PM_ERR_CONDITIONAL_WHILE_PREDICATE,
|
69
|
+
PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT,
|
70
|
+
PM_ERR_DEF_ENDLESS,
|
71
|
+
PM_ERR_DEF_ENDLESS_SETTER,
|
72
|
+
PM_ERR_DEF_NAME,
|
73
|
+
PM_ERR_DEF_NAME_AFTER_RECEIVER,
|
74
|
+
PM_ERR_DEF_PARAMS_TERM,
|
75
|
+
PM_ERR_DEF_PARAMS_TERM_PAREN,
|
76
|
+
PM_ERR_DEF_RECEIVER,
|
77
|
+
PM_ERR_DEF_RECEIVER_TERM,
|
78
|
+
PM_ERR_DEF_TERM,
|
79
|
+
PM_ERR_DEFINED_EXPRESSION,
|
80
|
+
PM_ERR_EMBDOC_TERM,
|
81
|
+
PM_ERR_EMBEXPR_END,
|
82
|
+
PM_ERR_EMBVAR_INVALID,
|
83
|
+
PM_ERR_END_UPCASE_BRACE,
|
84
|
+
PM_ERR_END_UPCASE_TERM,
|
85
|
+
PM_ERR_ESCAPE_INVALID_CONTROL,
|
86
|
+
PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT,
|
87
|
+
PM_ERR_ESCAPE_INVALID_HEXADECIMAL,
|
88
|
+
PM_ERR_ESCAPE_INVALID_META,
|
89
|
+
PM_ERR_ESCAPE_INVALID_META_REPEAT,
|
90
|
+
PM_ERR_ESCAPE_INVALID_UNICODE,
|
91
|
+
PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
|
92
|
+
PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
|
93
|
+
PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
|
94
|
+
PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
|
95
|
+
PM_ERR_EXPECT_ARGUMENT,
|
96
|
+
PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
|
97
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ,
|
98
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
|
99
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA,
|
100
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL,
|
101
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS,
|
102
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN,
|
103
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
|
104
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR,
|
105
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
|
106
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
|
107
|
+
PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
|
108
|
+
PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
|
109
|
+
PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
|
110
|
+
PM_ERR_EXPECT_RBRACKET,
|
111
|
+
PM_ERR_EXPECT_RPAREN,
|
112
|
+
PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
|
113
|
+
PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
|
114
|
+
PM_ERR_EXPECT_STRING_CONTENT,
|
115
|
+
PM_ERR_EXPECT_WHEN_DELIMITER,
|
116
|
+
PM_ERR_EXPRESSION_BARE_HASH,
|
117
|
+
PM_ERR_FOR_COLLECTION,
|
118
|
+
PM_ERR_FOR_IN,
|
119
|
+
PM_ERR_FOR_INDEX,
|
120
|
+
PM_ERR_FOR_TERM,
|
121
|
+
PM_ERR_HASH_EXPRESSION_AFTER_LABEL,
|
122
|
+
PM_ERR_HASH_KEY,
|
123
|
+
PM_ERR_HASH_ROCKET,
|
124
|
+
PM_ERR_HASH_TERM,
|
125
|
+
PM_ERR_HASH_VALUE,
|
126
|
+
PM_ERR_HEREDOC_TERM,
|
127
|
+
PM_ERR_INCOMPLETE_QUESTION_MARK,
|
128
|
+
PM_ERR_INCOMPLETE_VARIABLE_CLASS,
|
129
|
+
PM_ERR_INCOMPLETE_VARIABLE_INSTANCE,
|
130
|
+
PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
|
131
|
+
PM_ERR_INVALID_FLOAT_EXPONENT,
|
132
|
+
PM_ERR_INVALID_NUMBER_BINARY,
|
133
|
+
PM_ERR_INVALID_NUMBER_DECIMAL,
|
134
|
+
PM_ERR_INVALID_NUMBER_HEXADECIMAL,
|
135
|
+
PM_ERR_INVALID_NUMBER_OCTAL,
|
136
|
+
PM_ERR_INVALID_NUMBER_UNDERSCORE,
|
137
|
+
PM_ERR_INVALID_PERCENT,
|
138
|
+
PM_ERR_INVALID_TOKEN,
|
139
|
+
PM_ERR_INVALID_VARIABLE_GLOBAL,
|
140
|
+
PM_ERR_LAMBDA_OPEN,
|
141
|
+
PM_ERR_LAMBDA_TERM_BRACE,
|
142
|
+
PM_ERR_LAMBDA_TERM_END,
|
143
|
+
PM_ERR_LIST_I_LOWER_ELEMENT,
|
144
|
+
PM_ERR_LIST_I_LOWER_TERM,
|
145
|
+
PM_ERR_LIST_I_UPPER_ELEMENT,
|
146
|
+
PM_ERR_LIST_I_UPPER_TERM,
|
147
|
+
PM_ERR_LIST_W_LOWER_ELEMENT,
|
148
|
+
PM_ERR_LIST_W_LOWER_TERM,
|
149
|
+
PM_ERR_LIST_W_UPPER_ELEMENT,
|
150
|
+
PM_ERR_LIST_W_UPPER_TERM,
|
151
|
+
PM_ERR_MALLOC_FAILED,
|
152
|
+
PM_ERR_MODULE_IN_METHOD,
|
153
|
+
PM_ERR_MODULE_NAME,
|
154
|
+
PM_ERR_MODULE_TERM,
|
155
|
+
PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
|
156
|
+
PM_ERR_NOT_EXPRESSION,
|
157
|
+
PM_ERR_NUMBER_LITERAL_UNDERSCORE,
|
158
|
+
PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED,
|
159
|
+
PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
|
160
|
+
PM_ERR_OPERATOR_MULTI_ASSIGN,
|
161
|
+
PM_ERR_OPERATOR_WRITE_BLOCK,
|
162
|
+
PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
|
163
|
+
PM_ERR_PARAMETER_BLOCK_MULTI,
|
164
|
+
PM_ERR_PARAMETER_METHOD_NAME,
|
165
|
+
PM_ERR_PARAMETER_NAME_REPEAT,
|
166
|
+
PM_ERR_PARAMETER_NO_DEFAULT,
|
167
|
+
PM_ERR_PARAMETER_NO_DEFAULT_KW,
|
168
|
+
PM_ERR_PARAMETER_NUMBERED_RESERVED,
|
169
|
+
PM_ERR_PARAMETER_ORDER,
|
170
|
+
PM_ERR_PARAMETER_SPLAT_MULTI,
|
171
|
+
PM_ERR_PARAMETER_STAR,
|
172
|
+
PM_ERR_PARAMETER_UNEXPECTED_FWD,
|
173
|
+
PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
|
174
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
|
175
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
|
176
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
|
177
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_IN,
|
178
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_KEY,
|
179
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN,
|
180
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_PIN,
|
181
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
|
182
|
+
PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
|
183
|
+
PM_ERR_PATTERN_HASH_KEY,
|
184
|
+
PM_ERR_PATTERN_HASH_KEY_LABEL,
|
185
|
+
PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
|
186
|
+
PM_ERR_PATTERN_LABEL_AFTER_COMMA,
|
187
|
+
PM_ERR_PATTERN_REST,
|
188
|
+
PM_ERR_PATTERN_TERM_BRACE,
|
189
|
+
PM_ERR_PATTERN_TERM_BRACKET,
|
190
|
+
PM_ERR_PATTERN_TERM_PAREN,
|
191
|
+
PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN,
|
192
|
+
PM_ERR_REGEXP_TERM,
|
193
|
+
PM_ERR_RESCUE_EXPRESSION,
|
194
|
+
PM_ERR_RESCUE_MODIFIER_VALUE,
|
195
|
+
PM_ERR_RESCUE_TERM,
|
196
|
+
PM_ERR_RESCUE_VARIABLE,
|
197
|
+
PM_ERR_RETURN_INVALID,
|
198
|
+
PM_ERR_STRING_CONCATENATION,
|
199
|
+
PM_ERR_STRING_INTERPOLATED_TERM,
|
200
|
+
PM_ERR_STRING_LITERAL_TERM,
|
201
|
+
PM_ERR_SYMBOL_INVALID,
|
202
|
+
PM_ERR_SYMBOL_TERM_DYNAMIC,
|
203
|
+
PM_ERR_SYMBOL_TERM_INTERPOLATED,
|
204
|
+
PM_ERR_TERNARY_COLON,
|
205
|
+
PM_ERR_TERNARY_EXPRESSION_FALSE,
|
206
|
+
PM_ERR_TERNARY_EXPRESSION_TRUE,
|
207
|
+
PM_ERR_UNARY_RECEIVER_BANG,
|
208
|
+
PM_ERR_UNARY_RECEIVER_MINUS,
|
209
|
+
PM_ERR_UNARY_RECEIVER_PLUS,
|
210
|
+
PM_ERR_UNARY_RECEIVER_TILDE,
|
211
|
+
PM_ERR_UNDEF_ARGUMENT,
|
212
|
+
PM_ERR_UNTIL_TERM,
|
213
|
+
PM_ERR_WHILE_TERM,
|
214
|
+
PM_ERR_WRITE_TARGET_READONLY,
|
215
|
+
PM_ERR_WRITE_TARGET_UNEXPECTED,
|
216
|
+
PM_ERR_XSTRING_TERM,
|
217
|
+
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
|
218
|
+
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
|
219
|
+
PM_WARN_AMBIGUOUS_PREFIX_STAR,
|
220
|
+
PM_WARN_AMBIGUOUS_SLASH,
|
221
|
+
/* This must be the last member. */
|
222
|
+
PM_DIAGNOSTIC_ID_LEN,
|
223
|
+
} pm_diagnostic_id_t;
|
224
|
+
|
225
|
+
// Append a diagnostic to the given list of diagnostics.
|
226
|
+
bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
|
227
|
+
|
228
|
+
// Deallocate the internal state of the given diagnostic list.
|
229
|
+
void pm_diagnostic_list_free(pm_list_t *list);
|
230
|
+
|
231
|
+
#endif
|
@@ -0,0 +1,95 @@
|
|
1
|
+
#ifndef PRISM_ENCODING_H
|
2
|
+
#define PRISM_ENCODING_H
|
3
|
+
|
4
|
+
#include "prism/defines.h"
|
5
|
+
|
6
|
+
#include <assert.h>
|
7
|
+
#include <stdbool.h>
|
8
|
+
#include <stddef.h>
|
9
|
+
#include <stdint.h>
|
10
|
+
|
11
|
+
// This struct defines the functions necessary to implement the encoding
|
12
|
+
// interface so we can determine how many bytes the subsequent character takes.
|
13
|
+
// Each callback should return the number of bytes, or 0 if the next bytes are
|
14
|
+
// invalid for the encoding and type.
|
15
|
+
typedef struct {
|
16
|
+
// Return the number of bytes that the next character takes if it is valid
|
17
|
+
// in the encoding. Does not read more than n bytes. It is assumed that n is
|
18
|
+
// at least 1.
|
19
|
+
size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
|
20
|
+
|
21
|
+
// Return the number of bytes that the next character takes if it is valid
|
22
|
+
// in the encoding and is alphabetical. Does not read more than n bytes. It
|
23
|
+
// is assumed that n is at least 1.
|
24
|
+
size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
|
25
|
+
|
26
|
+
// Return the number of bytes that the next character takes if it is valid
|
27
|
+
// in the encoding and is alphanumeric. Does not read more than n bytes. It
|
28
|
+
// is assumed that n is at least 1.
|
29
|
+
size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
|
30
|
+
|
31
|
+
// Return true if the next character is valid in the encoding and is an
|
32
|
+
// uppercase character. Does not read more than n bytes. It is assumed that
|
33
|
+
// n is at least 1.
|
34
|
+
bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
|
35
|
+
|
36
|
+
// The name of the encoding. This should correspond to a value that can be
|
37
|
+
// passed to Encoding.find in Ruby.
|
38
|
+
const char *name;
|
39
|
+
|
40
|
+
// Return true if the encoding is a multibyte encoding.
|
41
|
+
bool multibyte;
|
42
|
+
} pm_encoding_t;
|
43
|
+
|
44
|
+
// These bits define the location of each bit of metadata within the various
|
45
|
+
// lookup tables that are used to determine the properties of a character.
|
46
|
+
#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
|
47
|
+
#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
|
48
|
+
#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
|
49
|
+
|
50
|
+
// These functions are reused by some other encodings, so they are defined here
|
51
|
+
// so they can be shared.
|
52
|
+
size_t pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
|
53
|
+
size_t pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
|
54
|
+
bool pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
|
55
|
+
|
56
|
+
// These functions are shared between the actual encoding and the fast path in
|
57
|
+
// the parser so they need to be internally visible.
|
58
|
+
size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
|
59
|
+
size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
|
60
|
+
|
61
|
+
// This lookup table is referenced in both the UTF-8 encoding file and the
|
62
|
+
// parser directly in order to speed up the default encoding processing.
|
63
|
+
extern const uint8_t pm_encoding_unicode_table[256];
|
64
|
+
|
65
|
+
// These are the encodings that are supported by the parser. They are defined in
|
66
|
+
// their own files in the src/enc directory.
|
67
|
+
extern pm_encoding_t pm_encoding_ascii;
|
68
|
+
extern pm_encoding_t pm_encoding_ascii_8bit;
|
69
|
+
extern pm_encoding_t pm_encoding_big5;
|
70
|
+
extern pm_encoding_t pm_encoding_euc_jp;
|
71
|
+
extern pm_encoding_t pm_encoding_gbk;
|
72
|
+
extern pm_encoding_t pm_encoding_iso_8859_1;
|
73
|
+
extern pm_encoding_t pm_encoding_iso_8859_2;
|
74
|
+
extern pm_encoding_t pm_encoding_iso_8859_3;
|
75
|
+
extern pm_encoding_t pm_encoding_iso_8859_4;
|
76
|
+
extern pm_encoding_t pm_encoding_iso_8859_5;
|
77
|
+
extern pm_encoding_t pm_encoding_iso_8859_6;
|
78
|
+
extern pm_encoding_t pm_encoding_iso_8859_7;
|
79
|
+
extern pm_encoding_t pm_encoding_iso_8859_8;
|
80
|
+
extern pm_encoding_t pm_encoding_iso_8859_9;
|
81
|
+
extern pm_encoding_t pm_encoding_iso_8859_10;
|
82
|
+
extern pm_encoding_t pm_encoding_iso_8859_11;
|
83
|
+
extern pm_encoding_t pm_encoding_iso_8859_13;
|
84
|
+
extern pm_encoding_t pm_encoding_iso_8859_14;
|
85
|
+
extern pm_encoding_t pm_encoding_iso_8859_15;
|
86
|
+
extern pm_encoding_t pm_encoding_iso_8859_16;
|
87
|
+
extern pm_encoding_t pm_encoding_koi8_r;
|
88
|
+
extern pm_encoding_t pm_encoding_shift_jis;
|
89
|
+
extern pm_encoding_t pm_encoding_utf_8;
|
90
|
+
extern pm_encoding_t pm_encoding_utf8_mac;
|
91
|
+
extern pm_encoding_t pm_encoding_windows_31j;
|
92
|
+
extern pm_encoding_t pm_encoding_windows_1251;
|
93
|
+
extern pm_encoding_t pm_encoding_windows_1252;
|
94
|
+
|
95
|
+
#endif
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#ifndef PRISM_NODE_H
|
2
|
+
#define PRISM_NODE_H
|
3
|
+
|
4
|
+
#include "prism/defines.h"
|
5
|
+
#include "prism/parser.h"
|
6
|
+
|
7
|
+
// Append a new node onto the end of the node list.
|
8
|
+
void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
|
9
|
+
|
10
|
+
// Clear the node but preserves the location.
|
11
|
+
void pm_node_clear(pm_node_t *node);
|
12
|
+
|
13
|
+
// Deallocate a node and all of its children.
|
14
|
+
PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
|
15
|
+
|
16
|
+
// This struct stores the information gathered by the pm_node_memsize function.
|
17
|
+
// It contains both the memory footprint and additionally metadata about the
|
18
|
+
// shape of the tree.
|
19
|
+
typedef struct {
|
20
|
+
size_t memsize;
|
21
|
+
size_t node_count;
|
22
|
+
} pm_memsize_t;
|
23
|
+
|
24
|
+
// Calculates the memory footprint of a given node.
|
25
|
+
PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *memsize);
|
26
|
+
|
27
|
+
// Returns a string representation of the given node type.
|
28
|
+
PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_type);
|
29
|
+
|
30
|
+
#define PM_EMPTY_NODE_LIST ((pm_node_list_t) { .nodes = NULL, .size = 0, .capacity = 0 })
|
31
|
+
|
32
|
+
// ScopeNodes are helper nodes, and will never be part of the AST. We manually
|
33
|
+
// declare them here to avoid generating them.
|
34
|
+
typedef struct pm_scope_node {
|
35
|
+
pm_node_t base;
|
36
|
+
struct pm_parameters_node *parameters;
|
37
|
+
pm_node_t *body;
|
38
|
+
pm_constant_id_list_t locals;
|
39
|
+
} pm_scope_node_t;
|
40
|
+
|
41
|
+
#endif // PRISM_NODE_H
|
@@ -0,0 +1,141 @@
|
|
1
|
+
#ifndef PRISM_PACK_H
|
2
|
+
#define PRISM_PACK_H
|
3
|
+
|
4
|
+
#include "prism/defines.h"
|
5
|
+
|
6
|
+
#include <stdint.h>
|
7
|
+
#include <stdlib.h>
|
8
|
+
|
9
|
+
typedef enum pm_pack_version {
|
10
|
+
PM_PACK_VERSION_3_2_0
|
11
|
+
} pm_pack_version;
|
12
|
+
|
13
|
+
typedef enum pm_pack_variant {
|
14
|
+
PM_PACK_VARIANT_PACK,
|
15
|
+
PM_PACK_VARIANT_UNPACK
|
16
|
+
} pm_pack_variant;
|
17
|
+
|
18
|
+
typedef enum pm_pack_type {
|
19
|
+
PM_PACK_SPACE,
|
20
|
+
PM_PACK_COMMENT,
|
21
|
+
PM_PACK_INTEGER,
|
22
|
+
PM_PACK_UTF8,
|
23
|
+
PM_PACK_BER,
|
24
|
+
PM_PACK_FLOAT,
|
25
|
+
PM_PACK_STRING_SPACE_PADDED,
|
26
|
+
PM_PACK_STRING_NULL_PADDED,
|
27
|
+
PM_PACK_STRING_NULL_TERMINATED,
|
28
|
+
PM_PACK_STRING_MSB,
|
29
|
+
PM_PACK_STRING_LSB,
|
30
|
+
PM_PACK_STRING_HEX_HIGH,
|
31
|
+
PM_PACK_STRING_HEX_LOW,
|
32
|
+
PM_PACK_STRING_UU,
|
33
|
+
PM_PACK_STRING_MIME,
|
34
|
+
PM_PACK_STRING_BASE64,
|
35
|
+
PM_PACK_STRING_FIXED,
|
36
|
+
PM_PACK_STRING_POINTER,
|
37
|
+
PM_PACK_MOVE,
|
38
|
+
PM_PACK_BACK,
|
39
|
+
PM_PACK_NULL,
|
40
|
+
PM_PACK_END
|
41
|
+
} pm_pack_type;
|
42
|
+
|
43
|
+
typedef enum pm_pack_signed {
|
44
|
+
PM_PACK_UNSIGNED,
|
45
|
+
PM_PACK_SIGNED,
|
46
|
+
PM_PACK_SIGNED_NA
|
47
|
+
} pm_pack_signed;
|
48
|
+
|
49
|
+
typedef enum pm_pack_endian {
|
50
|
+
PM_PACK_AGNOSTIC_ENDIAN,
|
51
|
+
PM_PACK_LITTLE_ENDIAN, // aka 'VAX', or 'V'
|
52
|
+
PM_PACK_BIG_ENDIAN, // aka 'network', or 'N'
|
53
|
+
PM_PACK_NATIVE_ENDIAN,
|
54
|
+
PM_PACK_ENDIAN_NA
|
55
|
+
} pm_pack_endian;
|
56
|
+
|
57
|
+
typedef enum pm_pack_size {
|
58
|
+
PM_PACK_SIZE_SHORT,
|
59
|
+
PM_PACK_SIZE_INT,
|
60
|
+
PM_PACK_SIZE_LONG,
|
61
|
+
PM_PACK_SIZE_LONG_LONG,
|
62
|
+
PM_PACK_SIZE_8,
|
63
|
+
PM_PACK_SIZE_16,
|
64
|
+
PM_PACK_SIZE_32,
|
65
|
+
PM_PACK_SIZE_64,
|
66
|
+
PM_PACK_SIZE_P,
|
67
|
+
PM_PACK_SIZE_NA
|
68
|
+
} pm_pack_size;
|
69
|
+
|
70
|
+
typedef enum pm_pack_length_type {
|
71
|
+
PM_PACK_LENGTH_FIXED,
|
72
|
+
PM_PACK_LENGTH_MAX,
|
73
|
+
PM_PACK_LENGTH_RELATIVE, // special case for unpack @*
|
74
|
+
PM_PACK_LENGTH_NA
|
75
|
+
} pm_pack_length_type;
|
76
|
+
|
77
|
+
typedef enum pm_pack_encoding {
|
78
|
+
PM_PACK_ENCODING_START,
|
79
|
+
PM_PACK_ENCODING_ASCII_8BIT,
|
80
|
+
PM_PACK_ENCODING_US_ASCII,
|
81
|
+
PM_PACK_ENCODING_UTF_8
|
82
|
+
} pm_pack_encoding;
|
83
|
+
|
84
|
+
typedef enum pm_pack_result {
|
85
|
+
PM_PACK_OK,
|
86
|
+
PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
|
87
|
+
PM_PACK_ERROR_UNKNOWN_DIRECTIVE,
|
88
|
+
PM_PACK_ERROR_LENGTH_TOO_BIG,
|
89
|
+
PM_PACK_ERROR_BANG_NOT_ALLOWED,
|
90
|
+
PM_PACK_ERROR_DOUBLE_ENDIAN
|
91
|
+
} pm_pack_result;
|
92
|
+
|
93
|
+
// Parse a single directive from a pack or unpack format string.
|
94
|
+
//
|
95
|
+
// Parameters:
|
96
|
+
// - [in] pm_pack_version version the version of Ruby
|
97
|
+
// - [in] pm_pack_variant variant pack or unpack
|
98
|
+
// - [in out] const char **format the start of the next directive to parse
|
99
|
+
// on calling, and advanced beyond the parsed directive on return, or as
|
100
|
+
// much of it as was consumed until an error was encountered
|
101
|
+
// - [in] const char *format_end the end of the format string
|
102
|
+
// - [out] pm_pack_type *type the type of the directive
|
103
|
+
// - [out] pm_pack_signed *signed_type
|
104
|
+
// whether the value is signed
|
105
|
+
// - [out] pm_pack_endian *endian the endianness of the value
|
106
|
+
// - [out] pm_pack_size *size the size of the value
|
107
|
+
// - [out] pm_pack_length_type *length_type
|
108
|
+
// what kind of length is specified
|
109
|
+
// - [out] size_t *length the length of the directive
|
110
|
+
// - [in out] pm_pack_encoding *encoding
|
111
|
+
// takes the current encoding of the string
|
112
|
+
// which would result from parsing the whole format string, and returns a
|
113
|
+
// possibly changed directive - the encoding should be
|
114
|
+
// PM_PACK_ENCODING_START when pm_pack_parse is called for the first
|
115
|
+
// directive in a format string
|
116
|
+
//
|
117
|
+
// Return:
|
118
|
+
// - PM_PACK_OK on success
|
119
|
+
// - PM_PACK_ERROR_* on error
|
120
|
+
//
|
121
|
+
// Notes:
|
122
|
+
// Consult Ruby documentation for the meaning of directives.
|
123
|
+
PRISM_EXPORTED_FUNCTION pm_pack_result
|
124
|
+
pm_pack_parse(
|
125
|
+
pm_pack_variant variant_arg,
|
126
|
+
const char **format,
|
127
|
+
const char *format_end,
|
128
|
+
pm_pack_type *type,
|
129
|
+
pm_pack_signed *signed_type,
|
130
|
+
pm_pack_endian *endian,
|
131
|
+
pm_pack_size *size,
|
132
|
+
pm_pack_length_type *length_type,
|
133
|
+
uint64_t *length,
|
134
|
+
pm_pack_encoding *encoding
|
135
|
+
);
|
136
|
+
|
137
|
+
// prism abstracts sizes away from the native system - this converts an abstract
|
138
|
+
// size to a native size.
|
139
|
+
PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size);
|
140
|
+
|
141
|
+
#endif
|