jruby-prism-parser 0.23.0.pre.SNAPSHOT-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (110) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +401 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +101 -0
  7. data/README.md +98 -0
  8. data/config.yml +2902 -0
  9. data/docs/build_system.md +91 -0
  10. data/docs/configuration.md +64 -0
  11. data/docs/cruby_compilation.md +27 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +121 -0
  14. data/docs/fuzzing.md +88 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/javascript.md +118 -0
  17. data/docs/local_variable_depth.md +229 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/parser_translation.md +34 -0
  20. data/docs/parsing_rules.md +19 -0
  21. data/docs/releasing.md +98 -0
  22. data/docs/ripper.md +36 -0
  23. data/docs/ruby_api.md +43 -0
  24. data/docs/ruby_parser_translation.md +19 -0
  25. data/docs/serialization.md +209 -0
  26. data/docs/testing.md +55 -0
  27. data/ext/prism/api_node.c +5098 -0
  28. data/ext/prism/api_pack.c +267 -0
  29. data/ext/prism/extconf.rb +110 -0
  30. data/ext/prism/extension.c +1155 -0
  31. data/ext/prism/extension.h +18 -0
  32. data/include/prism/ast.h +5807 -0
  33. data/include/prism/defines.h +102 -0
  34. data/include/prism/diagnostic.h +339 -0
  35. data/include/prism/encoding.h +265 -0
  36. data/include/prism/node.h +57 -0
  37. data/include/prism/options.h +230 -0
  38. data/include/prism/pack.h +152 -0
  39. data/include/prism/parser.h +732 -0
  40. data/include/prism/prettyprint.h +26 -0
  41. data/include/prism/regexp.h +33 -0
  42. data/include/prism/util/pm_buffer.h +155 -0
  43. data/include/prism/util/pm_char.h +205 -0
  44. data/include/prism/util/pm_constant_pool.h +209 -0
  45. data/include/prism/util/pm_list.h +97 -0
  46. data/include/prism/util/pm_memchr.h +29 -0
  47. data/include/prism/util/pm_newline_list.h +93 -0
  48. data/include/prism/util/pm_state_stack.h +42 -0
  49. data/include/prism/util/pm_string.h +150 -0
  50. data/include/prism/util/pm_string_list.h +44 -0
  51. data/include/prism/util/pm_strncasecmp.h +32 -0
  52. data/include/prism/util/pm_strpbrk.h +46 -0
  53. data/include/prism/version.h +29 -0
  54. data/include/prism.h +289 -0
  55. data/jruby-prism.jar +0 -0
  56. data/lib/prism/compiler.rb +486 -0
  57. data/lib/prism/debug.rb +206 -0
  58. data/lib/prism/desugar_compiler.rb +207 -0
  59. data/lib/prism/dispatcher.rb +2150 -0
  60. data/lib/prism/dot_visitor.rb +4634 -0
  61. data/lib/prism/dsl.rb +785 -0
  62. data/lib/prism/ffi.rb +346 -0
  63. data/lib/prism/lex_compat.rb +908 -0
  64. data/lib/prism/mutation_compiler.rb +753 -0
  65. data/lib/prism/node.rb +17864 -0
  66. data/lib/prism/node_ext.rb +212 -0
  67. data/lib/prism/node_inspector.rb +68 -0
  68. data/lib/prism/pack.rb +224 -0
  69. data/lib/prism/parse_result/comments.rb +177 -0
  70. data/lib/prism/parse_result/newlines.rb +64 -0
  71. data/lib/prism/parse_result.rb +498 -0
  72. data/lib/prism/pattern.rb +250 -0
  73. data/lib/prism/serialize.rb +1354 -0
  74. data/lib/prism/translation/parser/compiler.rb +1838 -0
  75. data/lib/prism/translation/parser/lexer.rb +335 -0
  76. data/lib/prism/translation/parser/rubocop.rb +37 -0
  77. data/lib/prism/translation/parser.rb +178 -0
  78. data/lib/prism/translation/ripper.rb +577 -0
  79. data/lib/prism/translation/ruby_parser.rb +1521 -0
  80. data/lib/prism/translation.rb +11 -0
  81. data/lib/prism/version.rb +3 -0
  82. data/lib/prism/visitor.rb +495 -0
  83. data/lib/prism.rb +99 -0
  84. data/prism.gemspec +135 -0
  85. data/rbi/prism.rbi +7767 -0
  86. data/rbi/prism_static.rbi +207 -0
  87. data/sig/prism.rbs +4773 -0
  88. data/sig/prism_static.rbs +201 -0
  89. data/src/diagnostic.c +400 -0
  90. data/src/encoding.c +5132 -0
  91. data/src/node.c +2786 -0
  92. data/src/options.c +213 -0
  93. data/src/pack.c +493 -0
  94. data/src/prettyprint.c +8881 -0
  95. data/src/prism.c +18406 -0
  96. data/src/regexp.c +638 -0
  97. data/src/serialize.c +1554 -0
  98. data/src/token_type.c +700 -0
  99. data/src/util/pm_buffer.c +190 -0
  100. data/src/util/pm_char.c +318 -0
  101. data/src/util/pm_constant_pool.c +322 -0
  102. data/src/util/pm_list.c +49 -0
  103. data/src/util/pm_memchr.c +35 -0
  104. data/src/util/pm_newline_list.c +84 -0
  105. data/src/util/pm_state_stack.c +25 -0
  106. data/src/util/pm_string.c +203 -0
  107. data/src/util/pm_string_list.c +28 -0
  108. data/src/util/pm_strncasecmp.c +24 -0
  109. data/src/util/pm_strpbrk.c +180 -0
  110. metadata +156 -0
data/src/options.c ADDED
@@ -0,0 +1,213 @@
1
+ #include "prism/options.h"
2
+
3
+ /**
4
+ * Set the filepath option on the given options struct.
5
+ */
6
+ PRISM_EXPORTED_FUNCTION void
7
+ pm_options_filepath_set(pm_options_t *options, const char *filepath) {
8
+ pm_string_constant_init(&options->filepath, filepath, strlen(filepath));
9
+ }
10
+
11
+ /**
12
+ * Set the encoding option on the given options struct.
13
+ */
14
+ PRISM_EXPORTED_FUNCTION void
15
+ pm_options_encoding_set(pm_options_t *options, const char *encoding) {
16
+ pm_string_constant_init(&options->encoding, encoding, strlen(encoding));
17
+ }
18
+
19
+ /**
20
+ * Set the line option on the given options struct.
21
+ */
22
+ PRISM_EXPORTED_FUNCTION void
23
+ pm_options_line_set(pm_options_t *options, int32_t line) {
24
+ options->line = line;
25
+ }
26
+
27
+ /**
28
+ * Set the frozen string literal option on the given options struct.
29
+ */
30
+ PRISM_EXPORTED_FUNCTION void
31
+ pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal) {
32
+ options->frozen_string_literal = frozen_string_literal;
33
+ }
34
+
35
+ /**
36
+ * Set the version option on the given options struct by parsing the given
37
+ * string. If the string contains an invalid option, this returns false.
38
+ * Otherwise, it returns true.
39
+ */
40
+ PRISM_EXPORTED_FUNCTION bool
41
+ pm_options_version_set(pm_options_t *options, const char *version, size_t length) {
42
+ if (version == NULL && length == 0) {
43
+ options->version = PM_OPTIONS_VERSION_LATEST;
44
+ return true;
45
+ }
46
+
47
+ if (length == 5) {
48
+ if (strncmp(version, "3.3.0", length) == 0) {
49
+ options->version = PM_OPTIONS_VERSION_CRUBY_3_3_0;
50
+ return true;
51
+ }
52
+
53
+ if (strncmp(version, "3.4.0", length) == 0) {
54
+ options->version = PM_OPTIONS_VERSION_LATEST;
55
+ return true;
56
+ }
57
+ }
58
+
59
+ if (length == 6 && strncmp(version, "latest", length) == 0) {
60
+ options->version = PM_OPTIONS_VERSION_LATEST;
61
+ return true;
62
+ }
63
+
64
+ return false;
65
+ }
66
+
67
+ /**
68
+ * Allocate and zero out the scopes array on the given options struct.
69
+ */
70
+ PRISM_EXPORTED_FUNCTION void
71
+ pm_options_scopes_init(pm_options_t *options, size_t scopes_count) {
72
+ options->scopes_count = scopes_count;
73
+ options->scopes = calloc(scopes_count, sizeof(pm_options_scope_t));
74
+ if (options->scopes == NULL) abort();
75
+ }
76
+
77
+ /**
78
+ * Return a pointer to the scope at the given index within the given options.
79
+ */
80
+ PRISM_EXPORTED_FUNCTION const pm_options_scope_t *
81
+ pm_options_scope_get(const pm_options_t *options, size_t index) {
82
+ return &options->scopes[index];
83
+ }
84
+
85
+ /**
86
+ * Create a new options scope struct. This will hold a set of locals that are in
87
+ * scope surrounding the code that is being parsed.
88
+ */
89
+ PRISM_EXPORTED_FUNCTION void
90
+ pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) {
91
+ scope->locals_count = locals_count;
92
+ scope->locals = calloc(locals_count, sizeof(pm_string_t));
93
+ if (scope->locals == NULL) abort();
94
+ }
95
+
96
+ /**
97
+ * Return a pointer to the local at the given index within the given scope.
98
+ */
99
+ PRISM_EXPORTED_FUNCTION const pm_string_t *
100
+ pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index) {
101
+ return &scope->locals[index];
102
+ }
103
+
104
+ /**
105
+ * Free the internal memory associated with the options.
106
+ */
107
+ PRISM_EXPORTED_FUNCTION void
108
+ pm_options_free(pm_options_t *options) {
109
+ pm_string_free(&options->filepath);
110
+ pm_string_free(&options->encoding);
111
+
112
+ for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
113
+ pm_options_scope_t *scope = &options->scopes[scope_index];
114
+
115
+ for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
116
+ pm_string_free(&scope->locals[local_index]);
117
+ }
118
+
119
+ free(scope->locals);
120
+ }
121
+
122
+ free(options->scopes);
123
+ }
124
+
125
+ /**
126
+ * Read a 32-bit unsigned integer from a pointer. This function is used to read
127
+ * the options that are passed into the parser from the Ruby implementation. It
128
+ * handles aligned and unaligned reads.
129
+ */
130
+ static uint32_t
131
+ pm_options_read_u32(const char *data) {
132
+ if (((uintptr_t) data) % sizeof(uint32_t) == 0) {
133
+ return *((uint32_t *) data);
134
+ } else {
135
+ uint32_t value;
136
+ memcpy(&value, data, sizeof(uint32_t));
137
+ return value;
138
+ }
139
+ }
140
+
141
+ /**
142
+ * Read a 32-bit signed integer from a pointer. This function is used to read
143
+ * the options that are passed into the parser from the Ruby implementation. It
144
+ * handles aligned and unaligned reads.
145
+ */
146
+ static int32_t
147
+ pm_options_read_s32(const char *data) {
148
+ if (((uintptr_t) data) % sizeof(int32_t) == 0) {
149
+ return *((int32_t *) data);
150
+ } else {
151
+ int32_t value;
152
+ memcpy(&value, data, sizeof(int32_t));
153
+ return value;
154
+ }
155
+ }
156
+
157
+ /**
158
+ * Deserialize an options struct from the given binary string. This is used to
159
+ * pass options to the parser from an FFI call so that consumers of the library
160
+ * from an FFI perspective don't have to worry about the structure of our
161
+ * options structs. Since the source of these calls will be from Ruby
162
+ * implementation internals we assume it is from a trusted source.
163
+ */
164
+ void
165
+ pm_options_read(pm_options_t *options, const char *data) {
166
+ options->line = 1; // default
167
+ if (data == NULL) return;
168
+
169
+ uint32_t filepath_length = pm_options_read_u32(data);
170
+ data += 4;
171
+
172
+ if (filepath_length > 0) {
173
+ pm_string_constant_init(&options->filepath, data, filepath_length);
174
+ data += filepath_length;
175
+ }
176
+
177
+ options->line = pm_options_read_s32(data);
178
+ data += 4;
179
+
180
+ uint32_t encoding_length = pm_options_read_u32(data);
181
+ data += 4;
182
+
183
+ if (encoding_length > 0) {
184
+ pm_string_constant_init(&options->encoding, data, encoding_length);
185
+ data += encoding_length;
186
+ }
187
+
188
+ options->frozen_string_literal = *data++;
189
+ options->version = (pm_options_version_t) *data++;
190
+
191
+ uint32_t scopes_count = pm_options_read_u32(data);
192
+ data += 4;
193
+
194
+ if (scopes_count > 0) {
195
+ pm_options_scopes_init(options, scopes_count);
196
+
197
+ for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
198
+ uint32_t locals_count = pm_options_read_u32(data);
199
+ data += 4;
200
+
201
+ pm_options_scope_t *scope = &options->scopes[scope_index];
202
+ pm_options_scope_init(scope, locals_count);
203
+
204
+ for (size_t local_index = 0; local_index < locals_count; local_index++) {
205
+ uint32_t local_length = pm_options_read_u32(data);
206
+ data += 4;
207
+
208
+ pm_string_constant_init(&scope->locals[local_index], data, local_length);
209
+ data += local_length;
210
+ }
211
+ }
212
+ }
213
+ }
data/src/pack.c ADDED
@@ -0,0 +1,493 @@
1
+ #include "prism/pack.h"
2
+
3
+ #include <stdbool.h>
4
+ #include <errno.h>
5
+
6
+ static uintmax_t
7
+ strtoumaxc(const char **format);
8
+
9
+ PRISM_EXPORTED_FUNCTION pm_pack_result
10
+ pm_pack_parse(pm_pack_variant variant, const char **format, const char *format_end,
11
+ pm_pack_type *type, pm_pack_signed *signed_type, pm_pack_endian *endian, pm_pack_size *size,
12
+ pm_pack_length_type *length_type, uint64_t *length, pm_pack_encoding *encoding) {
13
+
14
+ if (*encoding == PM_PACK_ENCODING_START) {
15
+ *encoding = PM_PACK_ENCODING_US_ASCII;
16
+ }
17
+
18
+ if (*format == format_end) {
19
+ *type = PM_PACK_END;
20
+ *signed_type = PM_PACK_SIGNED_NA;
21
+ *endian = PM_PACK_ENDIAN_NA;
22
+ *size = PM_PACK_SIZE_NA;
23
+ *length_type = PM_PACK_LENGTH_NA;
24
+ return PM_PACK_OK;
25
+ }
26
+
27
+ *length_type = PM_PACK_LENGTH_FIXED;
28
+ *length = 1;
29
+ bool length_changed_allowed = true;
30
+
31
+ char directive = **format;
32
+ (*format)++;
33
+ switch (directive) {
34
+ case ' ':
35
+ case '\t':
36
+ case '\n':
37
+ case '\v':
38
+ case '\f':
39
+ case '\r':
40
+ *type = PM_PACK_SPACE;
41
+ *signed_type = PM_PACK_SIGNED_NA;
42
+ *endian = PM_PACK_ENDIAN_NA;
43
+ *size = PM_PACK_SIZE_NA;
44
+ *length_type = PM_PACK_LENGTH_NA;
45
+ *length = 0;
46
+ return PM_PACK_OK;
47
+ case '#':
48
+ while ((*format < format_end) && (**format != '\n')) {
49
+ (*format)++;
50
+ }
51
+ *type = PM_PACK_COMMENT;
52
+ *signed_type = PM_PACK_SIGNED_NA;
53
+ *endian = PM_PACK_ENDIAN_NA;
54
+ *size = PM_PACK_SIZE_NA;
55
+ *length_type = PM_PACK_LENGTH_NA;
56
+ *length = 0;
57
+ return PM_PACK_OK;
58
+ case 'C':
59
+ *type = PM_PACK_INTEGER;
60
+ *signed_type = PM_PACK_UNSIGNED;
61
+ *endian = PM_PACK_AGNOSTIC_ENDIAN;
62
+ *size = PM_PACK_SIZE_8;
63
+ break;
64
+ case 'S':
65
+ *type = PM_PACK_INTEGER;
66
+ *signed_type = PM_PACK_UNSIGNED;
67
+ *endian = PM_PACK_NATIVE_ENDIAN;
68
+ *size = PM_PACK_SIZE_16;
69
+ break;
70
+ case 'L':
71
+ *type = PM_PACK_INTEGER;
72
+ *signed_type = PM_PACK_UNSIGNED;
73
+ *endian = PM_PACK_NATIVE_ENDIAN;
74
+ *size = PM_PACK_SIZE_32;
75
+ break;
76
+ case 'Q':
77
+ *type = PM_PACK_INTEGER;
78
+ *signed_type = PM_PACK_UNSIGNED;
79
+ *endian = PM_PACK_NATIVE_ENDIAN;
80
+ *size = PM_PACK_SIZE_64;
81
+ break;
82
+ case 'J':
83
+ *type = PM_PACK_INTEGER;
84
+ *signed_type = PM_PACK_UNSIGNED;
85
+ *endian = PM_PACK_NATIVE_ENDIAN;
86
+ *size = PM_PACK_SIZE_P;
87
+ break;
88
+ case 'c':
89
+ *type = PM_PACK_INTEGER;
90
+ *signed_type = PM_PACK_SIGNED;
91
+ *endian = PM_PACK_AGNOSTIC_ENDIAN;
92
+ *size = PM_PACK_SIZE_8;
93
+ break;
94
+ case 's':
95
+ *type = PM_PACK_INTEGER;
96
+ *signed_type = PM_PACK_SIGNED;
97
+ *endian = PM_PACK_NATIVE_ENDIAN;
98
+ *size = PM_PACK_SIZE_16;
99
+ break;
100
+ case 'l':
101
+ *type = PM_PACK_INTEGER;
102
+ *signed_type = PM_PACK_SIGNED;
103
+ *endian = PM_PACK_NATIVE_ENDIAN;
104
+ *size = PM_PACK_SIZE_32;
105
+ break;
106
+ case 'q':
107
+ *type = PM_PACK_INTEGER;
108
+ *signed_type = PM_PACK_SIGNED;
109
+ *endian = PM_PACK_NATIVE_ENDIAN;
110
+ *size = PM_PACK_SIZE_64;
111
+ break;
112
+ case 'j':
113
+ *type = PM_PACK_INTEGER;
114
+ *signed_type = PM_PACK_SIGNED;
115
+ *endian = PM_PACK_NATIVE_ENDIAN;
116
+ *size = PM_PACK_SIZE_P;
117
+ break;
118
+ case 'I':
119
+ *type = PM_PACK_INTEGER;
120
+ *signed_type = PM_PACK_UNSIGNED;
121
+ *endian = PM_PACK_NATIVE_ENDIAN;
122
+ *size = PM_PACK_SIZE_INT;
123
+ break;
124
+ case 'i':
125
+ *type = PM_PACK_INTEGER;
126
+ *signed_type = PM_PACK_SIGNED;
127
+ *endian = PM_PACK_NATIVE_ENDIAN;
128
+ *size = PM_PACK_SIZE_INT;
129
+ break;
130
+ case 'n':
131
+ *type = PM_PACK_INTEGER;
132
+ *signed_type = PM_PACK_UNSIGNED;
133
+ *endian = PM_PACK_BIG_ENDIAN;
134
+ *size = PM_PACK_SIZE_16;
135
+ length_changed_allowed = false;
136
+ break;
137
+ case 'N':
138
+ *type = PM_PACK_INTEGER;
139
+ *signed_type = PM_PACK_UNSIGNED;
140
+ *endian = PM_PACK_BIG_ENDIAN;
141
+ *size = PM_PACK_SIZE_32;
142
+ length_changed_allowed = false;
143
+ break;
144
+ case 'v':
145
+ *type = PM_PACK_INTEGER;
146
+ *signed_type = PM_PACK_UNSIGNED;
147
+ *endian = PM_PACK_LITTLE_ENDIAN;
148
+ *size = PM_PACK_SIZE_16;
149
+ length_changed_allowed = false;
150
+ break;
151
+ case 'V':
152
+ *type = PM_PACK_INTEGER;
153
+ *signed_type = PM_PACK_UNSIGNED;
154
+ *endian = PM_PACK_LITTLE_ENDIAN;
155
+ *size = PM_PACK_SIZE_32;
156
+ length_changed_allowed = false;
157
+ break;
158
+ case 'U':
159
+ *type = PM_PACK_UTF8;
160
+ *signed_type = PM_PACK_SIGNED_NA;
161
+ *endian = PM_PACK_ENDIAN_NA;
162
+ *size = PM_PACK_SIZE_NA;
163
+ break;
164
+ case 'w':
165
+ *type = PM_PACK_BER;
166
+ *signed_type = PM_PACK_SIGNED_NA;
167
+ *endian = PM_PACK_ENDIAN_NA;
168
+ *size = PM_PACK_SIZE_NA;
169
+ break;
170
+ case 'D':
171
+ case 'd':
172
+ *type = PM_PACK_FLOAT;
173
+ *signed_type = PM_PACK_SIGNED_NA;
174
+ *endian = PM_PACK_NATIVE_ENDIAN;
175
+ *size = PM_PACK_SIZE_64;
176
+ break;
177
+ case 'F':
178
+ case 'f':
179
+ *type = PM_PACK_FLOAT;
180
+ *signed_type = PM_PACK_SIGNED_NA;
181
+ *endian = PM_PACK_NATIVE_ENDIAN;
182
+ *size = PM_PACK_SIZE_32;
183
+ break;
184
+ case 'E':
185
+ *type = PM_PACK_FLOAT;
186
+ *signed_type = PM_PACK_SIGNED_NA;
187
+ *endian = PM_PACK_LITTLE_ENDIAN;
188
+ *size = PM_PACK_SIZE_64;
189
+ break;
190
+ case 'e':
191
+ *type = PM_PACK_FLOAT;
192
+ *signed_type = PM_PACK_SIGNED_NA;
193
+ *endian = PM_PACK_LITTLE_ENDIAN;
194
+ *size = PM_PACK_SIZE_32;
195
+ break;
196
+ case 'G':
197
+ *type = PM_PACK_FLOAT;
198
+ *signed_type = PM_PACK_SIGNED_NA;
199
+ *endian = PM_PACK_BIG_ENDIAN;
200
+ *size = PM_PACK_SIZE_64;
201
+ break;
202
+ case 'g':
203
+ *type = PM_PACK_FLOAT;
204
+ *signed_type = PM_PACK_SIGNED_NA;
205
+ *endian = PM_PACK_BIG_ENDIAN;
206
+ *size = PM_PACK_SIZE_32;
207
+ break;
208
+ case 'A':
209
+ *type = PM_PACK_STRING_SPACE_PADDED;
210
+ *signed_type = PM_PACK_SIGNED_NA;
211
+ *endian = PM_PACK_ENDIAN_NA;
212
+ *size = PM_PACK_SIZE_NA;
213
+ break;
214
+ case 'a':
215
+ *type = PM_PACK_STRING_NULL_PADDED;
216
+ *signed_type = PM_PACK_SIGNED_NA;
217
+ *endian = PM_PACK_ENDIAN_NA;
218
+ *size = PM_PACK_SIZE_NA;
219
+ break;
220
+ case 'Z':
221
+ *type = PM_PACK_STRING_NULL_TERMINATED;
222
+ *signed_type = PM_PACK_SIGNED_NA;
223
+ *endian = PM_PACK_ENDIAN_NA;
224
+ *size = PM_PACK_SIZE_NA;
225
+ break;
226
+ case 'B':
227
+ *type = PM_PACK_STRING_MSB;
228
+ *signed_type = PM_PACK_SIGNED_NA;
229
+ *endian = PM_PACK_ENDIAN_NA;
230
+ *size = PM_PACK_SIZE_NA;
231
+ break;
232
+ case 'b':
233
+ *type = PM_PACK_STRING_LSB;
234
+ *signed_type = PM_PACK_SIGNED_NA;
235
+ *endian = PM_PACK_ENDIAN_NA;
236
+ *size = PM_PACK_SIZE_NA;
237
+ break;
238
+ case 'H':
239
+ *type = PM_PACK_STRING_HEX_HIGH;
240
+ *signed_type = PM_PACK_SIGNED_NA;
241
+ *endian = PM_PACK_ENDIAN_NA;
242
+ *size = PM_PACK_SIZE_NA;
243
+ break;
244
+ case 'h':
245
+ *type = PM_PACK_STRING_HEX_LOW;
246
+ *signed_type = PM_PACK_SIGNED_NA;
247
+ *endian = PM_PACK_ENDIAN_NA;
248
+ *size = PM_PACK_SIZE_NA;
249
+ break;
250
+ case 'u':
251
+ *type = PM_PACK_STRING_UU;
252
+ *signed_type = PM_PACK_SIGNED_NA;
253
+ *endian = PM_PACK_ENDIAN_NA;
254
+ *size = PM_PACK_SIZE_NA;
255
+ break;
256
+ case 'M':
257
+ *type = PM_PACK_STRING_MIME;
258
+ *signed_type = PM_PACK_SIGNED_NA;
259
+ *endian = PM_PACK_ENDIAN_NA;
260
+ *size = PM_PACK_SIZE_NA;
261
+ break;
262
+ case 'm':
263
+ *type = PM_PACK_STRING_BASE64;
264
+ *signed_type = PM_PACK_SIGNED_NA;
265
+ *endian = PM_PACK_ENDIAN_NA;
266
+ *size = PM_PACK_SIZE_NA;
267
+ break;
268
+ case 'P':
269
+ *type = PM_PACK_STRING_FIXED;
270
+ *signed_type = PM_PACK_SIGNED_NA;
271
+ *endian = PM_PACK_ENDIAN_NA;
272
+ *size = PM_PACK_SIZE_NA;
273
+ break;
274
+ case 'p':
275
+ *type = PM_PACK_STRING_POINTER;
276
+ *signed_type = PM_PACK_SIGNED_NA;
277
+ *endian = PM_PACK_ENDIAN_NA;
278
+ *size = PM_PACK_SIZE_NA;
279
+ break;
280
+ case '@':
281
+ *type = PM_PACK_MOVE;
282
+ *signed_type = PM_PACK_SIGNED_NA;
283
+ *endian = PM_PACK_ENDIAN_NA;
284
+ *size = PM_PACK_SIZE_NA;
285
+ break;
286
+ case 'X':
287
+ *type = PM_PACK_BACK;
288
+ *signed_type = PM_PACK_SIGNED_NA;
289
+ *endian = PM_PACK_ENDIAN_NA;
290
+ *size = PM_PACK_SIZE_NA;
291
+ break;
292
+ case 'x':
293
+ *type = PM_PACK_NULL;
294
+ *signed_type = PM_PACK_SIGNED_NA;
295
+ *endian = PM_PACK_ENDIAN_NA;
296
+ *size = PM_PACK_SIZE_NA;
297
+ break;
298
+ case '%':
299
+ return PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE;
300
+ default:
301
+ return PM_PACK_ERROR_UNKNOWN_DIRECTIVE;
302
+ }
303
+
304
+ bool explicit_endian = false;
305
+
306
+ while (*format < format_end) {
307
+ switch (**format) {
308
+ case '_':
309
+ case '!':
310
+ (*format)++;
311
+ if (*type != PM_PACK_INTEGER || !length_changed_allowed) {
312
+ return PM_PACK_ERROR_BANG_NOT_ALLOWED;
313
+ }
314
+ switch (*size) {
315
+ case PM_PACK_SIZE_SHORT:
316
+ case PM_PACK_SIZE_INT:
317
+ case PM_PACK_SIZE_LONG:
318
+ case PM_PACK_SIZE_LONG_LONG:
319
+ break;
320
+ case PM_PACK_SIZE_16:
321
+ *size = PM_PACK_SIZE_SHORT;
322
+ break;
323
+ case PM_PACK_SIZE_32:
324
+ *size = PM_PACK_SIZE_LONG;
325
+ break;
326
+ case PM_PACK_SIZE_64:
327
+ *size = PM_PACK_SIZE_LONG_LONG;
328
+ break;
329
+ case PM_PACK_SIZE_P:
330
+ break;
331
+ default:
332
+ return PM_PACK_ERROR_BANG_NOT_ALLOWED;
333
+ }
334
+ break;
335
+ case '<':
336
+ (*format)++;
337
+ if (explicit_endian) {
338
+ return PM_PACK_ERROR_DOUBLE_ENDIAN;
339
+ }
340
+ *endian = PM_PACK_LITTLE_ENDIAN;
341
+ explicit_endian = true;
342
+ break;
343
+ case '>':
344
+ (*format)++;
345
+ if (explicit_endian) {
346
+ return PM_PACK_ERROR_DOUBLE_ENDIAN;
347
+ }
348
+ *endian = PM_PACK_BIG_ENDIAN;
349
+ explicit_endian = true;
350
+ break;
351
+ default:
352
+ goto exit_modifier_loop;
353
+ }
354
+ }
355
+
356
+ exit_modifier_loop:
357
+
358
+ if (variant == PM_PACK_VARIANT_UNPACK && *type == PM_PACK_MOVE) {
359
+ *length = 0;
360
+ }
361
+
362
+ if (*format < format_end) {
363
+ if (**format == '*') {
364
+ switch (*type) {
365
+ case PM_PACK_NULL:
366
+ case PM_PACK_BACK:
367
+ switch (variant) {
368
+ case PM_PACK_VARIANT_PACK:
369
+ *length_type = PM_PACK_LENGTH_FIXED;
370
+ break;
371
+ case PM_PACK_VARIANT_UNPACK:
372
+ *length_type = PM_PACK_LENGTH_MAX;
373
+ break;
374
+ }
375
+ *length = 0;
376
+ break;
377
+
378
+ case PM_PACK_MOVE:
379
+ switch (variant) {
380
+ case PM_PACK_VARIANT_PACK:
381
+ *length_type = PM_PACK_LENGTH_FIXED;
382
+ break;
383
+ case PM_PACK_VARIANT_UNPACK:
384
+ *length_type = PM_PACK_LENGTH_RELATIVE;
385
+ break;
386
+ }
387
+ *length = 0;
388
+ break;
389
+
390
+ case PM_PACK_STRING_UU:
391
+ *length_type = PM_PACK_LENGTH_FIXED;
392
+ *length = 0;
393
+ break;
394
+
395
+ case PM_PACK_STRING_FIXED:
396
+ switch (variant) {
397
+ case PM_PACK_VARIANT_PACK:
398
+ *length_type = PM_PACK_LENGTH_FIXED;
399
+ *length = 1;
400
+ break;
401
+ case PM_PACK_VARIANT_UNPACK:
402
+ *length_type = PM_PACK_LENGTH_MAX;
403
+ *length = 0;
404
+ break;
405
+ }
406
+ break;
407
+
408
+ case PM_PACK_STRING_MIME:
409
+ case PM_PACK_STRING_BASE64:
410
+ *length_type = PM_PACK_LENGTH_FIXED;
411
+ *length = 1;
412
+ break;
413
+
414
+ default:
415
+ *length_type = PM_PACK_LENGTH_MAX;
416
+ *length = 0;
417
+ break;
418
+ }
419
+
420
+ (*format)++;
421
+ } else if (**format >= '0' && **format <= '9') {
422
+ errno = 0;
423
+ *length_type = PM_PACK_LENGTH_FIXED;
424
+ #if UINTMAX_MAX < UINT64_MAX
425
+ #error "prism's design assumes uintmax_t is at least as large as uint64_t"
426
+ #endif
427
+ uintmax_t length_max = strtoumaxc(format);
428
+ if (errno || length_max > UINT64_MAX) {
429
+ return PM_PACK_ERROR_LENGTH_TOO_BIG;
430
+ }
431
+ *length = (uint64_t) length_max;
432
+ }
433
+ }
434
+
435
+ switch (*type) {
436
+ case PM_PACK_UTF8:
437
+ /* if encoding is US-ASCII, upgrade to UTF-8 */
438
+ if (*encoding == PM_PACK_ENCODING_US_ASCII) {
439
+ *encoding = PM_PACK_ENCODING_UTF_8;
440
+ }
441
+ break;
442
+ case PM_PACK_STRING_MIME:
443
+ case PM_PACK_STRING_BASE64:
444
+ case PM_PACK_STRING_UU:
445
+ /* keep US-ASCII (do nothing) */
446
+ break;
447
+ default:
448
+ /* fall back to BINARY */
449
+ *encoding = PM_PACK_ENCODING_ASCII_8BIT;
450
+ break;
451
+ }
452
+
453
+ return PM_PACK_OK;
454
+ }
455
+
456
+ PRISM_EXPORTED_FUNCTION size_t
457
+ pm_size_to_native(pm_pack_size size) {
458
+ switch (size) {
459
+ case PM_PACK_SIZE_SHORT:
460
+ return sizeof(short);
461
+ case PM_PACK_SIZE_INT:
462
+ return sizeof(int);
463
+ case PM_PACK_SIZE_LONG:
464
+ return sizeof(long);
465
+ case PM_PACK_SIZE_LONG_LONG:
466
+ return sizeof(long long);
467
+ case PM_PACK_SIZE_8:
468
+ return 1;
469
+ case PM_PACK_SIZE_16:
470
+ return 2;
471
+ case PM_PACK_SIZE_32:
472
+ return 4;
473
+ case PM_PACK_SIZE_64:
474
+ return 8;
475
+ case PM_PACK_SIZE_P:
476
+ return sizeof(void *);
477
+ default:
478
+ return 0;
479
+ }
480
+ }
481
+
482
+ static uintmax_t
483
+ strtoumaxc(const char **format) {
484
+ uintmax_t value = 0;
485
+ while (**format >= '0' && **format <= '9') {
486
+ if (value > UINTMAX_MAX / 10) {
487
+ errno = ERANGE;
488
+ }
489
+ value = value * 10 + ((uintmax_t) (**format - '0'));
490
+ (*format)++;
491
+ }
492
+ return value;
493
+ }