prism 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +172 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +84 -0
  7. data/README.md +89 -0
  8. data/config.yml +2481 -0
  9. data/docs/build_system.md +74 -0
  10. data/docs/building.md +22 -0
  11. data/docs/configuration.md +60 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +117 -0
  14. data/docs/fuzzing.md +93 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/mapping.md +117 -0
  17. data/docs/ripper.md +36 -0
  18. data/docs/ruby_api.md +25 -0
  19. data/docs/serialization.md +181 -0
  20. data/docs/testing.md +55 -0
  21. data/ext/prism/api_node.c +4725 -0
  22. data/ext/prism/api_pack.c +256 -0
  23. data/ext/prism/extconf.rb +136 -0
  24. data/ext/prism/extension.c +626 -0
  25. data/ext/prism/extension.h +18 -0
  26. data/include/prism/ast.h +1932 -0
  27. data/include/prism/defines.h +45 -0
  28. data/include/prism/diagnostic.h +231 -0
  29. data/include/prism/enc/pm_encoding.h +95 -0
  30. data/include/prism/node.h +41 -0
  31. data/include/prism/pack.h +141 -0
  32. data/include/prism/parser.h +418 -0
  33. data/include/prism/regexp.h +19 -0
  34. data/include/prism/unescape.h +48 -0
  35. data/include/prism/util/pm_buffer.h +51 -0
  36. data/include/prism/util/pm_char.h +91 -0
  37. data/include/prism/util/pm_constant_pool.h +78 -0
  38. data/include/prism/util/pm_list.h +67 -0
  39. data/include/prism/util/pm_memchr.h +14 -0
  40. data/include/prism/util/pm_newline_list.h +61 -0
  41. data/include/prism/util/pm_state_stack.h +24 -0
  42. data/include/prism/util/pm_string.h +61 -0
  43. data/include/prism/util/pm_string_list.h +25 -0
  44. data/include/prism/util/pm_strpbrk.h +29 -0
  45. data/include/prism/version.h +4 -0
  46. data/include/prism.h +82 -0
  47. data/lib/prism/compiler.rb +465 -0
  48. data/lib/prism/debug.rb +157 -0
  49. data/lib/prism/desugar_compiler.rb +206 -0
  50. data/lib/prism/dispatcher.rb +2051 -0
  51. data/lib/prism/dsl.rb +750 -0
  52. data/lib/prism/ffi.rb +251 -0
  53. data/lib/prism/lex_compat.rb +838 -0
  54. data/lib/prism/mutation_compiler.rb +718 -0
  55. data/lib/prism/node.rb +14540 -0
  56. data/lib/prism/node_ext.rb +55 -0
  57. data/lib/prism/node_inspector.rb +68 -0
  58. data/lib/prism/pack.rb +185 -0
  59. data/lib/prism/parse_result/comments.rb +172 -0
  60. data/lib/prism/parse_result/newlines.rb +60 -0
  61. data/lib/prism/parse_result.rb +266 -0
  62. data/lib/prism/pattern.rb +239 -0
  63. data/lib/prism/ripper_compat.rb +174 -0
  64. data/lib/prism/serialize.rb +662 -0
  65. data/lib/prism/visitor.rb +470 -0
  66. data/lib/prism.rb +64 -0
  67. data/prism.gemspec +113 -0
  68. data/src/diagnostic.c +287 -0
  69. data/src/enc/pm_big5.c +52 -0
  70. data/src/enc/pm_euc_jp.c +58 -0
  71. data/src/enc/pm_gbk.c +61 -0
  72. data/src/enc/pm_shift_jis.c +56 -0
  73. data/src/enc/pm_tables.c +507 -0
  74. data/src/enc/pm_unicode.c +2324 -0
  75. data/src/enc/pm_windows_31j.c +56 -0
  76. data/src/node.c +2633 -0
  77. data/src/pack.c +493 -0
  78. data/src/prettyprint.c +2136 -0
  79. data/src/prism.c +14587 -0
  80. data/src/regexp.c +580 -0
  81. data/src/serialize.c +1899 -0
  82. data/src/token_type.c +349 -0
  83. data/src/unescape.c +637 -0
  84. data/src/util/pm_buffer.c +103 -0
  85. data/src/util/pm_char.c +272 -0
  86. data/src/util/pm_constant_pool.c +252 -0
  87. data/src/util/pm_list.c +41 -0
  88. data/src/util/pm_memchr.c +33 -0
  89. data/src/util/pm_newline_list.c +134 -0
  90. data/src/util/pm_state_stack.c +19 -0
  91. data/src/util/pm_string.c +200 -0
  92. data/src/util/pm_string_list.c +29 -0
  93. data/src/util/pm_strncasecmp.c +17 -0
  94. data/src/util/pm_strpbrk.c +66 -0
  95. metadata +138 -0
@@ -0,0 +1,45 @@
1
+ #ifndef PRISM_DEFINES_H
2
+ #define PRISM_DEFINES_H
3
+
4
+ // This file should be included first by any *.h or *.c in prism
5
+
6
+ #include <ctype.h>
7
+ #include <stdarg.h>
8
+ #include <stddef.h>
9
+ #include <stdint.h>
10
+ #include <stdio.h>
11
+ #include <string.h>
12
+
13
+ // PRISM_EXPORTED_FUNCTION
14
+ #ifndef PRISM_EXPORTED_FUNCTION
15
+ # ifdef PRISM_EXPORT_SYMBOLS
16
+ # ifdef _WIN32
17
+ # define PRISM_EXPORTED_FUNCTION __declspec(dllexport) extern
18
+ # else
19
+ # define PRISM_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
20
+ # endif
21
+ # else
22
+ # define PRISM_EXPORTED_FUNCTION
23
+ # endif
24
+ #endif
25
+
26
+ // PRISM_ATTRIBUTE_UNUSED
27
+ #if defined(__GNUC__)
28
+ # define PRISM_ATTRIBUTE_UNUSED __attribute__((unused))
29
+ #else
30
+ # define PRISM_ATTRIBUTE_UNUSED
31
+ #endif
32
+
33
+ // inline
34
+ #if defined(_MSC_VER) && !defined(inline)
35
+ # define inline __inline
36
+ #endif
37
+
38
+ // Windows versions before 2015 use _snprintf
39
+ #if !defined(snprintf) && defined(_MSC_VER) && (_MSC_VER < 1900)
40
+ # define snprintf _snprintf
41
+ #endif
42
+
43
+ int pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length);
44
+
45
+ #endif
@@ -0,0 +1,231 @@
1
+ #ifndef PRISM_DIAGNOSTIC_H
2
+ #define PRISM_DIAGNOSTIC_H
3
+
4
+ #include "prism/defines.h"
5
+ #include "prism/util/pm_list.h"
6
+
7
+ #include <stdbool.h>
8
+ #include <stdlib.h>
9
+ #include <assert.h>
10
+
11
+ // This struct represents a diagnostic found during parsing.
12
+ typedef struct {
13
+ pm_list_node_t node;
14
+ const uint8_t *start;
15
+ const uint8_t *end;
16
+ const char *message;
17
+ } pm_diagnostic_t;
18
+
19
+ typedef enum {
20
+ PM_ERR_ALIAS_ARGUMENT,
21
+ PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
22
+ PM_ERR_ARGUMENT_AFTER_BLOCK,
23
+ PM_ERR_ARGUMENT_BARE_HASH,
24
+ PM_ERR_ARGUMENT_BLOCK_MULTI,
25
+ PM_ERR_ARGUMENT_FORMAL_CLASS,
26
+ PM_ERR_ARGUMENT_FORMAL_CONSTANT,
27
+ PM_ERR_ARGUMENT_FORMAL_GLOBAL,
28
+ PM_ERR_ARGUMENT_FORMAL_IVAR,
29
+ PM_ERR_ARGUMENT_NO_FORWARDING_AMP,
30
+ PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
31
+ PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
32
+ PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT,
33
+ PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT,
34
+ PM_ERR_ARGUMENT_TERM_PAREN,
35
+ PM_ERR_ARGUMENT_UNEXPECTED_BLOCK,
36
+ PM_ERR_ARRAY_ELEMENT,
37
+ PM_ERR_ARRAY_EXPRESSION,
38
+ PM_ERR_ARRAY_EXPRESSION_AFTER_STAR,
39
+ PM_ERR_ARRAY_SEPARATOR,
40
+ PM_ERR_ARRAY_TERM,
41
+ PM_ERR_BEGIN_LONELY_ELSE,
42
+ PM_ERR_BEGIN_TERM,
43
+ PM_ERR_BEGIN_UPCASE_BRACE,
44
+ PM_ERR_BEGIN_UPCASE_TERM,
45
+ PM_ERR_BEGIN_UPCASE_TOPLEVEL,
46
+ PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE,
47
+ PM_ERR_BLOCK_PARAM_PIPE_TERM,
48
+ PM_ERR_BLOCK_TERM_BRACE,
49
+ PM_ERR_BLOCK_TERM_END,
50
+ PM_ERR_CANNOT_PARSE_EXPRESSION,
51
+ PM_ERR_CANNOT_PARSE_STRING_PART,
52
+ PM_ERR_CASE_EXPRESSION_AFTER_CASE,
53
+ PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
54
+ PM_ERR_CASE_MISSING_CONDITIONS,
55
+ PM_ERR_CASE_TERM,
56
+ PM_ERR_CLASS_IN_METHOD,
57
+ PM_ERR_CLASS_NAME,
58
+ PM_ERR_CLASS_SUPERCLASS,
59
+ PM_ERR_CLASS_TERM,
60
+ PM_ERR_CLASS_UNEXPECTED_END,
61
+ PM_ERR_CONDITIONAL_ELSIF_PREDICATE,
62
+ PM_ERR_CONDITIONAL_IF_PREDICATE,
63
+ PM_ERR_CONDITIONAL_PREDICATE_TERM,
64
+ PM_ERR_CONDITIONAL_TERM,
65
+ PM_ERR_CONDITIONAL_TERM_ELSE,
66
+ PM_ERR_CONDITIONAL_UNLESS_PREDICATE,
67
+ PM_ERR_CONDITIONAL_UNTIL_PREDICATE,
68
+ PM_ERR_CONDITIONAL_WHILE_PREDICATE,
69
+ PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT,
70
+ PM_ERR_DEF_ENDLESS,
71
+ PM_ERR_DEF_ENDLESS_SETTER,
72
+ PM_ERR_DEF_NAME,
73
+ PM_ERR_DEF_NAME_AFTER_RECEIVER,
74
+ PM_ERR_DEF_PARAMS_TERM,
75
+ PM_ERR_DEF_PARAMS_TERM_PAREN,
76
+ PM_ERR_DEF_RECEIVER,
77
+ PM_ERR_DEF_RECEIVER_TERM,
78
+ PM_ERR_DEF_TERM,
79
+ PM_ERR_DEFINED_EXPRESSION,
80
+ PM_ERR_EMBDOC_TERM,
81
+ PM_ERR_EMBEXPR_END,
82
+ PM_ERR_EMBVAR_INVALID,
83
+ PM_ERR_END_UPCASE_BRACE,
84
+ PM_ERR_END_UPCASE_TERM,
85
+ PM_ERR_ESCAPE_INVALID_CONTROL,
86
+ PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT,
87
+ PM_ERR_ESCAPE_INVALID_HEXADECIMAL,
88
+ PM_ERR_ESCAPE_INVALID_META,
89
+ PM_ERR_ESCAPE_INVALID_META_REPEAT,
90
+ PM_ERR_ESCAPE_INVALID_UNICODE,
91
+ PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS,
92
+ PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL,
93
+ PM_ERR_ESCAPE_INVALID_UNICODE_LONG,
94
+ PM_ERR_ESCAPE_INVALID_UNICODE_TERM,
95
+ PM_ERR_EXPECT_ARGUMENT,
96
+ PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
97
+ PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ,
98
+ PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ,
99
+ PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA,
100
+ PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL,
101
+ PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS,
102
+ PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN,
103
+ PM_ERR_EXPECT_EXPRESSION_AFTER_QUESTION,
104
+ PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR,
105
+ PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
106
+ PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
107
+ PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
108
+ PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
109
+ PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
110
+ PM_ERR_EXPECT_RBRACKET,
111
+ PM_ERR_EXPECT_RPAREN,
112
+ PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
113
+ PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
114
+ PM_ERR_EXPECT_STRING_CONTENT,
115
+ PM_ERR_EXPECT_WHEN_DELIMITER,
116
+ PM_ERR_EXPRESSION_BARE_HASH,
117
+ PM_ERR_FOR_COLLECTION,
118
+ PM_ERR_FOR_IN,
119
+ PM_ERR_FOR_INDEX,
120
+ PM_ERR_FOR_TERM,
121
+ PM_ERR_HASH_EXPRESSION_AFTER_LABEL,
122
+ PM_ERR_HASH_KEY,
123
+ PM_ERR_HASH_ROCKET,
124
+ PM_ERR_HASH_TERM,
125
+ PM_ERR_HASH_VALUE,
126
+ PM_ERR_HEREDOC_TERM,
127
+ PM_ERR_INCOMPLETE_QUESTION_MARK,
128
+ PM_ERR_INCOMPLETE_VARIABLE_CLASS,
129
+ PM_ERR_INCOMPLETE_VARIABLE_INSTANCE,
130
+ PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
131
+ PM_ERR_INVALID_FLOAT_EXPONENT,
132
+ PM_ERR_INVALID_NUMBER_BINARY,
133
+ PM_ERR_INVALID_NUMBER_DECIMAL,
134
+ PM_ERR_INVALID_NUMBER_HEXADECIMAL,
135
+ PM_ERR_INVALID_NUMBER_OCTAL,
136
+ PM_ERR_INVALID_NUMBER_UNDERSCORE,
137
+ PM_ERR_INVALID_PERCENT,
138
+ PM_ERR_INVALID_TOKEN,
139
+ PM_ERR_INVALID_VARIABLE_GLOBAL,
140
+ PM_ERR_LAMBDA_OPEN,
141
+ PM_ERR_LAMBDA_TERM_BRACE,
142
+ PM_ERR_LAMBDA_TERM_END,
143
+ PM_ERR_LIST_I_LOWER_ELEMENT,
144
+ PM_ERR_LIST_I_LOWER_TERM,
145
+ PM_ERR_LIST_I_UPPER_ELEMENT,
146
+ PM_ERR_LIST_I_UPPER_TERM,
147
+ PM_ERR_LIST_W_LOWER_ELEMENT,
148
+ PM_ERR_LIST_W_LOWER_TERM,
149
+ PM_ERR_LIST_W_UPPER_ELEMENT,
150
+ PM_ERR_LIST_W_UPPER_TERM,
151
+ PM_ERR_MALLOC_FAILED,
152
+ PM_ERR_MODULE_IN_METHOD,
153
+ PM_ERR_MODULE_NAME,
154
+ PM_ERR_MODULE_TERM,
155
+ PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
156
+ PM_ERR_NOT_EXPRESSION,
157
+ PM_ERR_NUMBER_LITERAL_UNDERSCORE,
158
+ PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED,
159
+ PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE,
160
+ PM_ERR_OPERATOR_MULTI_ASSIGN,
161
+ PM_ERR_OPERATOR_WRITE_BLOCK,
162
+ PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI,
163
+ PM_ERR_PARAMETER_BLOCK_MULTI,
164
+ PM_ERR_PARAMETER_METHOD_NAME,
165
+ PM_ERR_PARAMETER_NAME_REPEAT,
166
+ PM_ERR_PARAMETER_NO_DEFAULT,
167
+ PM_ERR_PARAMETER_NO_DEFAULT_KW,
168
+ PM_ERR_PARAMETER_NUMBERED_RESERVED,
169
+ PM_ERR_PARAMETER_ORDER,
170
+ PM_ERR_PARAMETER_SPLAT_MULTI,
171
+ PM_ERR_PARAMETER_STAR,
172
+ PM_ERR_PARAMETER_UNEXPECTED_FWD,
173
+ PM_ERR_PARAMETER_WILD_LOOSE_COMMA,
174
+ PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET,
175
+ PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET,
176
+ PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA,
177
+ PM_ERR_PATTERN_EXPRESSION_AFTER_IN,
178
+ PM_ERR_PATTERN_EXPRESSION_AFTER_KEY,
179
+ PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN,
180
+ PM_ERR_PATTERN_EXPRESSION_AFTER_PIN,
181
+ PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE,
182
+ PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE,
183
+ PM_ERR_PATTERN_HASH_KEY,
184
+ PM_ERR_PATTERN_HASH_KEY_LABEL,
185
+ PM_ERR_PATTERN_IDENT_AFTER_HROCKET,
186
+ PM_ERR_PATTERN_LABEL_AFTER_COMMA,
187
+ PM_ERR_PATTERN_REST,
188
+ PM_ERR_PATTERN_TERM_BRACE,
189
+ PM_ERR_PATTERN_TERM_BRACKET,
190
+ PM_ERR_PATTERN_TERM_PAREN,
191
+ PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN,
192
+ PM_ERR_REGEXP_TERM,
193
+ PM_ERR_RESCUE_EXPRESSION,
194
+ PM_ERR_RESCUE_MODIFIER_VALUE,
195
+ PM_ERR_RESCUE_TERM,
196
+ PM_ERR_RESCUE_VARIABLE,
197
+ PM_ERR_RETURN_INVALID,
198
+ PM_ERR_STRING_CONCATENATION,
199
+ PM_ERR_STRING_INTERPOLATED_TERM,
200
+ PM_ERR_STRING_LITERAL_TERM,
201
+ PM_ERR_SYMBOL_INVALID,
202
+ PM_ERR_SYMBOL_TERM_DYNAMIC,
203
+ PM_ERR_SYMBOL_TERM_INTERPOLATED,
204
+ PM_ERR_TERNARY_COLON,
205
+ PM_ERR_TERNARY_EXPRESSION_FALSE,
206
+ PM_ERR_TERNARY_EXPRESSION_TRUE,
207
+ PM_ERR_UNARY_RECEIVER_BANG,
208
+ PM_ERR_UNARY_RECEIVER_MINUS,
209
+ PM_ERR_UNARY_RECEIVER_PLUS,
210
+ PM_ERR_UNARY_RECEIVER_TILDE,
211
+ PM_ERR_UNDEF_ARGUMENT,
212
+ PM_ERR_UNTIL_TERM,
213
+ PM_ERR_WHILE_TERM,
214
+ PM_ERR_WRITE_TARGET_READONLY,
215
+ PM_ERR_WRITE_TARGET_UNEXPECTED,
216
+ PM_ERR_XSTRING_TERM,
217
+ PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
218
+ PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
219
+ PM_WARN_AMBIGUOUS_PREFIX_STAR,
220
+ PM_WARN_AMBIGUOUS_SLASH,
221
+ /* This must be the last member. */
222
+ PM_DIAGNOSTIC_ID_LEN,
223
+ } pm_diagnostic_id_t;
224
+
225
+ // Append a diagnostic to the given list of diagnostics.
226
+ bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
227
+
228
+ // Deallocate the internal state of the given diagnostic list.
229
+ void pm_diagnostic_list_free(pm_list_t *list);
230
+
231
+ #endif
@@ -0,0 +1,95 @@
1
+ #ifndef PRISM_ENCODING_H
2
+ #define PRISM_ENCODING_H
3
+
4
+ #include "prism/defines.h"
5
+
6
+ #include <assert.h>
7
+ #include <stdbool.h>
8
+ #include <stddef.h>
9
+ #include <stdint.h>
10
+
11
+ // This struct defines the functions necessary to implement the encoding
12
+ // interface so we can determine how many bytes the subsequent character takes.
13
+ // Each callback should return the number of bytes, or 0 if the next bytes are
14
+ // invalid for the encoding and type.
15
+ typedef struct {
16
+ // Return the number of bytes that the next character takes if it is valid
17
+ // in the encoding. Does not read more than n bytes. It is assumed that n is
18
+ // at least 1.
19
+ size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
20
+
21
+ // Return the number of bytes that the next character takes if it is valid
22
+ // in the encoding and is alphabetical. Does not read more than n bytes. It
23
+ // is assumed that n is at least 1.
24
+ size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
25
+
26
+ // Return the number of bytes that the next character takes if it is valid
27
+ // in the encoding and is alphanumeric. Does not read more than n bytes. It
28
+ // is assumed that n is at least 1.
29
+ size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
30
+
31
+ // Return true if the next character is valid in the encoding and is an
32
+ // uppercase character. Does not read more than n bytes. It is assumed that
33
+ // n is at least 1.
34
+ bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
35
+
36
+ // The name of the encoding. This should correspond to a value that can be
37
+ // passed to Encoding.find in Ruby.
38
+ const char *name;
39
+
40
+ // Return true if the encoding is a multibyte encoding.
41
+ bool multibyte;
42
+ } pm_encoding_t;
43
+
44
+ // These bits define the location of each bit of metadata within the various
45
+ // lookup tables that are used to determine the properties of a character.
46
+ #define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
47
+ #define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
48
+ #define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
49
+
50
+ // These functions are reused by some other encodings, so they are defined here
51
+ // so they can be shared.
52
+ size_t pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
53
+ size_t pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
54
+ bool pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n);
55
+
56
+ // These functions are shared between the actual encoding and the fast path in
57
+ // the parser so they need to be internally visible.
58
+ size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
59
+ size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
60
+
61
+ // This lookup table is referenced in both the UTF-8 encoding file and the
62
+ // parser directly in order to speed up the default encoding processing.
63
+ extern const uint8_t pm_encoding_unicode_table[256];
64
+
65
+ // These are the encodings that are supported by the parser. They are defined in
66
+ // their own files in the src/enc directory.
67
+ extern pm_encoding_t pm_encoding_ascii;
68
+ extern pm_encoding_t pm_encoding_ascii_8bit;
69
+ extern pm_encoding_t pm_encoding_big5;
70
+ extern pm_encoding_t pm_encoding_euc_jp;
71
+ extern pm_encoding_t pm_encoding_gbk;
72
+ extern pm_encoding_t pm_encoding_iso_8859_1;
73
+ extern pm_encoding_t pm_encoding_iso_8859_2;
74
+ extern pm_encoding_t pm_encoding_iso_8859_3;
75
+ extern pm_encoding_t pm_encoding_iso_8859_4;
76
+ extern pm_encoding_t pm_encoding_iso_8859_5;
77
+ extern pm_encoding_t pm_encoding_iso_8859_6;
78
+ extern pm_encoding_t pm_encoding_iso_8859_7;
79
+ extern pm_encoding_t pm_encoding_iso_8859_8;
80
+ extern pm_encoding_t pm_encoding_iso_8859_9;
81
+ extern pm_encoding_t pm_encoding_iso_8859_10;
82
+ extern pm_encoding_t pm_encoding_iso_8859_11;
83
+ extern pm_encoding_t pm_encoding_iso_8859_13;
84
+ extern pm_encoding_t pm_encoding_iso_8859_14;
85
+ extern pm_encoding_t pm_encoding_iso_8859_15;
86
+ extern pm_encoding_t pm_encoding_iso_8859_16;
87
+ extern pm_encoding_t pm_encoding_koi8_r;
88
+ extern pm_encoding_t pm_encoding_shift_jis;
89
+ extern pm_encoding_t pm_encoding_utf_8;
90
+ extern pm_encoding_t pm_encoding_utf8_mac;
91
+ extern pm_encoding_t pm_encoding_windows_31j;
92
+ extern pm_encoding_t pm_encoding_windows_1251;
93
+ extern pm_encoding_t pm_encoding_windows_1252;
94
+
95
+ #endif
@@ -0,0 +1,41 @@
1
+ #ifndef PRISM_NODE_H
2
+ #define PRISM_NODE_H
3
+
4
+ #include "prism/defines.h"
5
+ #include "prism/parser.h"
6
+
7
+ // Append a new node onto the end of the node list.
8
+ void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
9
+
10
+ // Clear the node but preserves the location.
11
+ void pm_node_clear(pm_node_t *node);
12
+
13
+ // Deallocate a node and all of its children.
14
+ PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
15
+
16
+ // This struct stores the information gathered by the pm_node_memsize function.
17
+ // It contains both the memory footprint and additionally metadata about the
18
+ // shape of the tree.
19
+ typedef struct {
20
+ size_t memsize;
21
+ size_t node_count;
22
+ } pm_memsize_t;
23
+
24
+ // Calculates the memory footprint of a given node.
25
+ PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *memsize);
26
+
27
+ // Returns a string representation of the given node type.
28
+ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_type);
29
+
30
+ #define PM_EMPTY_NODE_LIST ((pm_node_list_t) { .nodes = NULL, .size = 0, .capacity = 0 })
31
+
32
+ // ScopeNodes are helper nodes, and will never be part of the AST. We manually
33
+ // declare them here to avoid generating them.
34
+ typedef struct pm_scope_node {
35
+ pm_node_t base;
36
+ struct pm_parameters_node *parameters;
37
+ pm_node_t *body;
38
+ pm_constant_id_list_t locals;
39
+ } pm_scope_node_t;
40
+
41
+ #endif // PRISM_NODE_H
@@ -0,0 +1,141 @@
1
+ #ifndef PRISM_PACK_H
2
+ #define PRISM_PACK_H
3
+
4
+ #include "prism/defines.h"
5
+
6
+ #include <stdint.h>
7
+ #include <stdlib.h>
8
+
9
+ typedef enum pm_pack_version {
10
+ PM_PACK_VERSION_3_2_0
11
+ } pm_pack_version;
12
+
13
+ typedef enum pm_pack_variant {
14
+ PM_PACK_VARIANT_PACK,
15
+ PM_PACK_VARIANT_UNPACK
16
+ } pm_pack_variant;
17
+
18
+ typedef enum pm_pack_type {
19
+ PM_PACK_SPACE,
20
+ PM_PACK_COMMENT,
21
+ PM_PACK_INTEGER,
22
+ PM_PACK_UTF8,
23
+ PM_PACK_BER,
24
+ PM_PACK_FLOAT,
25
+ PM_PACK_STRING_SPACE_PADDED,
26
+ PM_PACK_STRING_NULL_PADDED,
27
+ PM_PACK_STRING_NULL_TERMINATED,
28
+ PM_PACK_STRING_MSB,
29
+ PM_PACK_STRING_LSB,
30
+ PM_PACK_STRING_HEX_HIGH,
31
+ PM_PACK_STRING_HEX_LOW,
32
+ PM_PACK_STRING_UU,
33
+ PM_PACK_STRING_MIME,
34
+ PM_PACK_STRING_BASE64,
35
+ PM_PACK_STRING_FIXED,
36
+ PM_PACK_STRING_POINTER,
37
+ PM_PACK_MOVE,
38
+ PM_PACK_BACK,
39
+ PM_PACK_NULL,
40
+ PM_PACK_END
41
+ } pm_pack_type;
42
+
43
+ typedef enum pm_pack_signed {
44
+ PM_PACK_UNSIGNED,
45
+ PM_PACK_SIGNED,
46
+ PM_PACK_SIGNED_NA
47
+ } pm_pack_signed;
48
+
49
+ typedef enum pm_pack_endian {
50
+ PM_PACK_AGNOSTIC_ENDIAN,
51
+ PM_PACK_LITTLE_ENDIAN, // aka 'VAX', or 'V'
52
+ PM_PACK_BIG_ENDIAN, // aka 'network', or 'N'
53
+ PM_PACK_NATIVE_ENDIAN,
54
+ PM_PACK_ENDIAN_NA
55
+ } pm_pack_endian;
56
+
57
+ typedef enum pm_pack_size {
58
+ PM_PACK_SIZE_SHORT,
59
+ PM_PACK_SIZE_INT,
60
+ PM_PACK_SIZE_LONG,
61
+ PM_PACK_SIZE_LONG_LONG,
62
+ PM_PACK_SIZE_8,
63
+ PM_PACK_SIZE_16,
64
+ PM_PACK_SIZE_32,
65
+ PM_PACK_SIZE_64,
66
+ PM_PACK_SIZE_P,
67
+ PM_PACK_SIZE_NA
68
+ } pm_pack_size;
69
+
70
+ typedef enum pm_pack_length_type {
71
+ PM_PACK_LENGTH_FIXED,
72
+ PM_PACK_LENGTH_MAX,
73
+ PM_PACK_LENGTH_RELATIVE, // special case for unpack @*
74
+ PM_PACK_LENGTH_NA
75
+ } pm_pack_length_type;
76
+
77
+ typedef enum pm_pack_encoding {
78
+ PM_PACK_ENCODING_START,
79
+ PM_PACK_ENCODING_ASCII_8BIT,
80
+ PM_PACK_ENCODING_US_ASCII,
81
+ PM_PACK_ENCODING_UTF_8
82
+ } pm_pack_encoding;
83
+
84
+ typedef enum pm_pack_result {
85
+ PM_PACK_OK,
86
+ PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
87
+ PM_PACK_ERROR_UNKNOWN_DIRECTIVE,
88
+ PM_PACK_ERROR_LENGTH_TOO_BIG,
89
+ PM_PACK_ERROR_BANG_NOT_ALLOWED,
90
+ PM_PACK_ERROR_DOUBLE_ENDIAN
91
+ } pm_pack_result;
92
+
93
+ // Parse a single directive from a pack or unpack format string.
94
+ //
95
+ // Parameters:
96
+ // - [in] pm_pack_version version the version of Ruby
97
+ // - [in] pm_pack_variant variant pack or unpack
98
+ // - [in out] const char **format the start of the next directive to parse
99
+ // on calling, and advanced beyond the parsed directive on return, or as
100
+ // much of it as was consumed until an error was encountered
101
+ // - [in] const char *format_end the end of the format string
102
+ // - [out] pm_pack_type *type the type of the directive
103
+ // - [out] pm_pack_signed *signed_type
104
+ // whether the value is signed
105
+ // - [out] pm_pack_endian *endian the endianness of the value
106
+ // - [out] pm_pack_size *size the size of the value
107
+ // - [out] pm_pack_length_type *length_type
108
+ // what kind of length is specified
109
+ // - [out] size_t *length the length of the directive
110
+ // - [in out] pm_pack_encoding *encoding
111
+ // takes the current encoding of the string
112
+ // which would result from parsing the whole format string, and returns a
113
+ // possibly changed directive - the encoding should be
114
+ // PM_PACK_ENCODING_START when pm_pack_parse is called for the first
115
+ // directive in a format string
116
+ //
117
+ // Return:
118
+ // - PM_PACK_OK on success
119
+ // - PM_PACK_ERROR_* on error
120
+ //
121
+ // Notes:
122
+ // Consult Ruby documentation for the meaning of directives.
123
+ PRISM_EXPORTED_FUNCTION pm_pack_result
124
+ pm_pack_parse(
125
+ pm_pack_variant variant_arg,
126
+ const char **format,
127
+ const char *format_end,
128
+ pm_pack_type *type,
129
+ pm_pack_signed *signed_type,
130
+ pm_pack_endian *endian,
131
+ pm_pack_size *size,
132
+ pm_pack_length_type *length_type,
133
+ uint64_t *length,
134
+ pm_pack_encoding *encoding
135
+ );
136
+
137
+ // prism abstracts sizes away from the native system - this converts an abstract
138
+ // size to a native size.
139
+ PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size);
140
+
141
+ #endif