prism 0.29.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +115 -1
- data/CONTRIBUTING.md +0 -4
- data/Makefile +1 -1
- data/README.md +4 -0
- data/config.yml +920 -148
- data/docs/build_system.md +8 -11
- data/docs/fuzzing.md +1 -1
- data/docs/parsing_rules.md +4 -1
- data/docs/relocation.md +34 -0
- data/docs/ripper_translation.md +22 -0
- data/docs/serialization.md +3 -0
- data/ext/prism/api_node.c +2863 -2079
- data/ext/prism/extconf.rb +14 -37
- data/ext/prism/extension.c +241 -391
- data/ext/prism/extension.h +2 -2
- data/include/prism/ast.h +2156 -453
- data/include/prism/defines.h +58 -7
- data/include/prism/diagnostic.h +24 -6
- data/include/prism/node.h +0 -21
- data/include/prism/options.h +94 -3
- data/include/prism/parser.h +82 -40
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +3 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +0 -8
- data/include/prism/util/pm_integer.h +22 -15
- data/include/prism/util/pm_newline_list.h +11 -0
- data/include/prism/util/pm_string.h +28 -12
- data/include/prism/version.h +3 -3
- data/include/prism.h +47 -11
- data/lib/prism/compiler.rb +3 -0
- data/lib/prism/desugar_compiler.rb +111 -74
- data/lib/prism/dispatcher.rb +16 -1
- data/lib/prism/dot_visitor.rb +55 -34
- data/lib/prism/dsl.rb +660 -468
- data/lib/prism/ffi.rb +113 -8
- data/lib/prism/inspect_visitor.rb +296 -64
- data/lib/prism/lex_compat.rb +1 -1
- data/lib/prism/mutation_compiler.rb +11 -6
- data/lib/prism/node.rb +4262 -5023
- data/lib/prism/node_ext.rb +91 -14
- data/lib/prism/parse_result/comments.rb +0 -7
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +101 -11
- data/lib/prism/parse_result.rb +183 -6
- data/lib/prism/reflection.rb +12 -10
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +496 -609
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/compiler.rb +185 -155
- data/lib/prism/translation/parser/lexer.rb +26 -4
- data/lib/prism/translation/parser.rb +9 -4
- data/lib/prism/translation/ripper.rb +23 -25
- data/lib/prism/translation/ruby_parser.rb +86 -17
- data/lib/prism/visitor.rb +3 -0
- data/lib/prism.rb +6 -8
- data/prism.gemspec +9 -5
- data/rbi/prism/dsl.rbi +521 -0
- data/rbi/prism/node.rbi +1115 -1120
- data/rbi/prism/parse_result.rbi +29 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism/visitor.rbi +3 -0
- data/rbi/prism.rbi +36 -30
- data/sig/prism/dsl.rbs +190 -303
- data/sig/prism/mutation_compiler.rbs +1 -0
- data/sig/prism/node.rbs +678 -632
- data/sig/prism/parse_result.rbs +22 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/string_query.rbs +11 -0
- data/sig/prism/visitor.rbs +1 -0
- data/sig/prism.rbs +103 -64
- data/src/diagnostic.c +64 -28
- data/src/node.c +502 -1739
- data/src/options.c +76 -27
- data/src/prettyprint.c +188 -112
- data/src/prism.c +3376 -2293
- data/src/regexp.c +208 -71
- data/src/serialize.c +182 -50
- data/src/static_literals.c +64 -85
- data/src/token_type.c +4 -4
- data/src/util/pm_char.c +1 -1
- data/src/util/pm_constant_pool.c +0 -8
- data/src/util/pm_integer.c +53 -25
- data/src/util/pm_newline_list.c +29 -0
- data/src/util/pm_string.c +131 -80
- data/src/util/pm_strpbrk.c +32 -6
- metadata +11 -7
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -249
- data/lib/prism/translation/parser/rubocop.rb +0 -73
- data/src/util/pm_string_list.c +0 -28
data/src/util/pm_string.c
CHANGED
@@ -47,6 +47,62 @@ pm_string_constant_init(pm_string_t *string, const char *source, size_t length)
|
|
47
47
|
};
|
48
48
|
}
|
49
49
|
|
50
|
+
#ifdef _WIN32
|
51
|
+
/**
|
52
|
+
* Represents a file handle on Windows, where the path will need to be freed
|
53
|
+
* when the file is closed.
|
54
|
+
*/
|
55
|
+
typedef struct {
|
56
|
+
/** The path to the file, which will become allocated memory. */
|
57
|
+
WCHAR *path;
|
58
|
+
|
59
|
+
/** The handle to the file, which will start as uninitialized memory. */
|
60
|
+
HANDLE file;
|
61
|
+
} pm_string_file_handle_t;
|
62
|
+
|
63
|
+
/**
|
64
|
+
* Open the file indicated by the filepath parameter for reading on Windows.
|
65
|
+
* Perform any kind of normalization that needs to happen on the filepath.
|
66
|
+
*/
|
67
|
+
static pm_string_init_result_t
|
68
|
+
pm_string_file_handle_open(pm_string_file_handle_t *handle, const char *filepath) {
|
69
|
+
int length = MultiByteToWideChar(CP_UTF8, 0, filepath, -1, NULL, 0);
|
70
|
+
if (length == 0) return PM_STRING_INIT_ERROR_GENERIC;
|
71
|
+
|
72
|
+
handle->path = xmalloc(sizeof(WCHAR) * ((size_t) length));
|
73
|
+
if ((handle->path == NULL) || (MultiByteToWideChar(CP_UTF8, 0, filepath, -1, handle->path, length) == 0)) {
|
74
|
+
xfree(handle->path);
|
75
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
76
|
+
}
|
77
|
+
|
78
|
+
handle->file = CreateFileW(handle->path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
|
79
|
+
if (handle->file == INVALID_HANDLE_VALUE) {
|
80
|
+
pm_string_init_result_t result = PM_STRING_INIT_ERROR_GENERIC;
|
81
|
+
|
82
|
+
if (GetLastError() == ERROR_ACCESS_DENIED) {
|
83
|
+
DWORD attributes = GetFileAttributesW(handle->path);
|
84
|
+
if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
|
85
|
+
result = PM_STRING_INIT_ERROR_DIRECTORY;
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
89
|
+
xfree(handle->path);
|
90
|
+
return result;
|
91
|
+
}
|
92
|
+
|
93
|
+
return PM_STRING_INIT_SUCCESS;
|
94
|
+
}
|
95
|
+
|
96
|
+
/**
|
97
|
+
* Close the file handle and free the path.
|
98
|
+
*/
|
99
|
+
static void
|
100
|
+
pm_string_file_handle_close(pm_string_file_handle_t *handle) {
|
101
|
+
xfree(handle->path);
|
102
|
+
CloseHandle(handle->file);
|
103
|
+
}
|
104
|
+
#endif
|
105
|
+
|
50
106
|
/**
|
51
107
|
* Read the file indicated by the filepath parameter into source and load its
|
52
108
|
* contents and size into the given `pm_string_t`. The given `pm_string_t`
|
@@ -58,62 +114,66 @@ pm_string_constant_init(pm_string_t *string, const char *source, size_t length)
|
|
58
114
|
* `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
|
59
115
|
* `mmap`, and on other POSIX systems we'll use `read`.
|
60
116
|
*/
|
61
|
-
PRISM_EXPORTED_FUNCTION
|
117
|
+
PRISM_EXPORTED_FUNCTION pm_string_init_result_t
|
62
118
|
pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
63
119
|
#ifdef _WIN32
|
64
120
|
// Open the file for reading.
|
65
|
-
|
66
|
-
|
67
|
-
if (
|
68
|
-
return false;
|
69
|
-
}
|
121
|
+
pm_string_file_handle_t handle;
|
122
|
+
pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
|
123
|
+
if (result != PM_STRING_INIT_SUCCESS) return result;
|
70
124
|
|
71
125
|
// Get the file size.
|
72
|
-
DWORD file_size = GetFileSize(file, NULL);
|
126
|
+
DWORD file_size = GetFileSize(handle.file, NULL);
|
73
127
|
if (file_size == INVALID_FILE_SIZE) {
|
74
|
-
|
75
|
-
return
|
128
|
+
pm_string_file_handle_close(&handle);
|
129
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
76
130
|
}
|
77
131
|
|
78
132
|
// If the file is empty, then we don't need to do anything else, we'll set
|
79
133
|
// the source to a constant empty string and return.
|
80
134
|
if (file_size == 0) {
|
81
|
-
|
135
|
+
pm_string_file_handle_close(&handle);
|
82
136
|
const uint8_t source[] = "";
|
83
137
|
*string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
|
84
|
-
return
|
138
|
+
return PM_STRING_INIT_SUCCESS;
|
85
139
|
}
|
86
140
|
|
87
141
|
// Create a mapping of the file.
|
88
|
-
HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
|
142
|
+
HANDLE mapping = CreateFileMapping(handle.file, NULL, PAGE_READONLY, 0, 0, NULL);
|
89
143
|
if (mapping == NULL) {
|
90
|
-
|
91
|
-
return
|
144
|
+
pm_string_file_handle_close(&handle);
|
145
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
92
146
|
}
|
93
147
|
|
94
148
|
// Map the file into memory.
|
95
149
|
uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
|
96
150
|
CloseHandle(mapping);
|
97
|
-
|
151
|
+
pm_string_file_handle_close(&handle);
|
98
152
|
|
99
153
|
if (source == NULL) {
|
100
|
-
return
|
154
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
101
155
|
}
|
102
156
|
|
103
157
|
*string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size };
|
104
|
-
return
|
158
|
+
return PM_STRING_INIT_SUCCESS;
|
105
159
|
#elif defined(_POSIX_MAPPED_FILES)
|
106
160
|
// Open the file for reading
|
107
161
|
int fd = open(filepath, O_RDONLY);
|
108
162
|
if (fd == -1) {
|
109
|
-
return
|
163
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
110
164
|
}
|
111
165
|
|
112
166
|
// Stat the file to get the file size
|
113
167
|
struct stat sb;
|
114
168
|
if (fstat(fd, &sb) == -1) {
|
115
169
|
close(fd);
|
116
|
-
return
|
170
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
171
|
+
}
|
172
|
+
|
173
|
+
// Ensure it is a file and not a directory
|
174
|
+
if (S_ISDIR(sb.st_mode)) {
|
175
|
+
close(fd);
|
176
|
+
return PM_STRING_INIT_ERROR_DIRECTORY;
|
117
177
|
}
|
118
178
|
|
119
179
|
// mmap the file descriptor to virtually get the contents
|
@@ -124,22 +184,20 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
|
124
184
|
close(fd);
|
125
185
|
const uint8_t source[] = "";
|
126
186
|
*string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
|
127
|
-
return
|
187
|
+
return PM_STRING_INIT_SUCCESS;
|
128
188
|
}
|
129
189
|
|
130
190
|
source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
|
131
191
|
if (source == MAP_FAILED) {
|
132
|
-
|
192
|
+
close(fd);
|
193
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
133
194
|
}
|
134
195
|
|
135
196
|
close(fd);
|
136
197
|
*string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size };
|
137
|
-
return
|
198
|
+
return PM_STRING_INIT_SUCCESS;
|
138
199
|
#else
|
139
|
-
(
|
140
|
-
(void) filepath;
|
141
|
-
perror("pm_string_mapped_init is not implemented for this platform");
|
142
|
-
return false;
|
200
|
+
return pm_string_file_init(string, filepath);
|
143
201
|
#endif
|
144
202
|
}
|
145
203
|
|
@@ -148,115 +206,108 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
|
148
206
|
* contents and size into the given `pm_string_t`. The given `pm_string_t`
|
149
207
|
* should be freed using `pm_string_free` when it is no longer used.
|
150
208
|
*/
|
151
|
-
PRISM_EXPORTED_FUNCTION
|
209
|
+
PRISM_EXPORTED_FUNCTION pm_string_init_result_t
|
152
210
|
pm_string_file_init(pm_string_t *string, const char *filepath) {
|
153
211
|
#ifdef _WIN32
|
154
212
|
// Open the file for reading.
|
155
|
-
|
156
|
-
|
157
|
-
if (
|
158
|
-
return false;
|
159
|
-
}
|
213
|
+
pm_string_file_handle_t handle;
|
214
|
+
pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
|
215
|
+
if (result != PM_STRING_INIT_SUCCESS) return result;
|
160
216
|
|
161
217
|
// Get the file size.
|
162
|
-
DWORD file_size = GetFileSize(file, NULL);
|
218
|
+
DWORD file_size = GetFileSize(handle.file, NULL);
|
163
219
|
if (file_size == INVALID_FILE_SIZE) {
|
164
|
-
|
165
|
-
return
|
220
|
+
pm_string_file_handle_close(&handle);
|
221
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
166
222
|
}
|
167
223
|
|
168
224
|
// If the file is empty, then we don't need to do anything else, we'll set
|
169
225
|
// the source to a constant empty string and return.
|
170
226
|
if (file_size == 0) {
|
171
|
-
|
227
|
+
pm_string_file_handle_close(&handle);
|
172
228
|
const uint8_t source[] = "";
|
173
229
|
*string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
|
174
|
-
return
|
230
|
+
return PM_STRING_INIT_SUCCESS;
|
175
231
|
}
|
176
232
|
|
177
233
|
// Create a buffer to read the file into.
|
178
234
|
uint8_t *source = xmalloc(file_size);
|
179
235
|
if (source == NULL) {
|
180
|
-
|
181
|
-
return
|
236
|
+
pm_string_file_handle_close(&handle);
|
237
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
182
238
|
}
|
183
239
|
|
184
240
|
// Read the contents of the file
|
185
241
|
DWORD bytes_read;
|
186
|
-
if (!ReadFile(file, source, file_size, &bytes_read, NULL)) {
|
187
|
-
|
188
|
-
return
|
242
|
+
if (!ReadFile(handle.file, source, file_size, &bytes_read, NULL)) {
|
243
|
+
pm_string_file_handle_close(&handle);
|
244
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
189
245
|
}
|
190
246
|
|
191
247
|
// Check the number of bytes read
|
192
248
|
if (bytes_read != file_size) {
|
193
249
|
xfree(source);
|
194
|
-
|
195
|
-
return
|
250
|
+
pm_string_file_handle_close(&handle);
|
251
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
196
252
|
}
|
197
253
|
|
198
|
-
|
254
|
+
pm_string_file_handle_close(&handle);
|
199
255
|
*string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = (size_t) file_size };
|
200
|
-
return
|
201
|
-
#elif defined(
|
202
|
-
|
203
|
-
|
204
|
-
|
256
|
+
return PM_STRING_INIT_SUCCESS;
|
257
|
+
#elif defined(PRISM_HAS_FILESYSTEM)
|
258
|
+
// Open the file for reading
|
259
|
+
int fd = open(filepath, O_RDONLY);
|
260
|
+
if (fd == -1) {
|
261
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
205
262
|
}
|
206
263
|
|
207
|
-
|
208
|
-
|
264
|
+
// Stat the file to get the file size
|
265
|
+
struct stat sb;
|
266
|
+
if (fstat(fd, &sb) == -1) {
|
267
|
+
close(fd);
|
268
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
269
|
+
}
|
209
270
|
|
210
|
-
|
211
|
-
|
212
|
-
|
271
|
+
// Ensure it is a file and not a directory
|
272
|
+
if (S_ISDIR(sb.st_mode)) {
|
273
|
+
close(fd);
|
274
|
+
return PM_STRING_INIT_ERROR_DIRECTORY;
|
213
275
|
}
|
214
276
|
|
215
|
-
|
216
|
-
|
277
|
+
// Check the size to see if it's empty
|
278
|
+
size_t size = (size_t) sb.st_size;
|
279
|
+
if (size == 0) {
|
280
|
+
close(fd);
|
217
281
|
const uint8_t source[] = "";
|
218
282
|
*string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
|
219
|
-
return
|
283
|
+
return PM_STRING_INIT_SUCCESS;
|
220
284
|
}
|
221
285
|
|
222
|
-
size_t length = (size_t)
|
286
|
+
size_t length = (size_t) size;
|
223
287
|
uint8_t *source = xmalloc(length);
|
224
288
|
if (source == NULL) {
|
225
|
-
|
226
|
-
return
|
289
|
+
close(fd);
|
290
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
227
291
|
}
|
228
292
|
|
229
|
-
|
230
|
-
|
231
|
-
fclose(file);
|
293
|
+
long bytes_read = (long) read(fd, source, length);
|
294
|
+
close(fd);
|
232
295
|
|
233
|
-
if (bytes_read
|
296
|
+
if (bytes_read == -1) {
|
234
297
|
xfree(source);
|
235
|
-
return
|
298
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
236
299
|
}
|
237
300
|
|
238
301
|
*string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length };
|
239
|
-
return
|
302
|
+
return PM_STRING_INIT_SUCCESS;
|
240
303
|
#else
|
241
304
|
(void) string;
|
242
305
|
(void) filepath;
|
243
306
|
perror("pm_string_file_init is not implemented for this platform");
|
244
|
-
return
|
307
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
245
308
|
#endif
|
246
309
|
}
|
247
310
|
|
248
|
-
/**
|
249
|
-
* Returns the memory size associated with the string.
|
250
|
-
*/
|
251
|
-
size_t
|
252
|
-
pm_string_memsize(const pm_string_t *string) {
|
253
|
-
size_t size = sizeof(pm_string_t);
|
254
|
-
if (string->type == PM_STRING_OWNED) {
|
255
|
-
size += string->length;
|
256
|
-
}
|
257
|
-
return size;
|
258
|
-
}
|
259
|
-
|
260
311
|
/**
|
261
312
|
* Ensure the string is owned. If it is not, then reinitialize it as owned and
|
262
313
|
* copy over the previous source.
|
data/src/util/pm_strpbrk.c
CHANGED
@@ -8,6 +8,27 @@ pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start
|
|
8
8
|
pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
|
9
9
|
}
|
10
10
|
|
11
|
+
/**
|
12
|
+
* Set the explicit encoding for the parser to the current encoding.
|
13
|
+
*/
|
14
|
+
static inline void
|
15
|
+
pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) {
|
16
|
+
if (parser->explicit_encoding != NULL) {
|
17
|
+
if (parser->explicit_encoding == parser->encoding) {
|
18
|
+
// Okay, we already locked to this encoding.
|
19
|
+
} else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
20
|
+
// Not okay, we already found a Unicode escape sequence and this
|
21
|
+
// conflicts.
|
22
|
+
pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name);
|
23
|
+
} else {
|
24
|
+
// Should not be anything else.
|
25
|
+
assert(false && "unreachable");
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
parser->explicit_encoding = parser->encoding;
|
30
|
+
}
|
31
|
+
|
11
32
|
/**
|
12
33
|
* This is the default path.
|
13
34
|
*/
|
@@ -52,7 +73,7 @@ pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *chars
|
|
52
73
|
* This is the path when the encoding is ASCII-8BIT.
|
53
74
|
*/
|
54
75
|
static inline const uint8_t *
|
55
|
-
pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maximum) {
|
76
|
+
pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
56
77
|
size_t index = 0;
|
57
78
|
|
58
79
|
while (index < maximum) {
|
@@ -60,6 +81,7 @@ pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maxi
|
|
60
81
|
return source + index;
|
61
82
|
}
|
62
83
|
|
84
|
+
if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1);
|
63
85
|
index++;
|
64
86
|
}
|
65
87
|
|
@@ -72,6 +94,7 @@ pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maxi
|
|
72
94
|
static inline const uint8_t *
|
73
95
|
pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
74
96
|
size_t index = 0;
|
97
|
+
const pm_encoding_t *encoding = parser->encoding;
|
75
98
|
|
76
99
|
while (index < maximum) {
|
77
100
|
if (strchr((const char *) charset, source[index]) != NULL) {
|
@@ -81,7 +104,8 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
81
104
|
if (source[index] < 0x80) {
|
82
105
|
index++;
|
83
106
|
} else {
|
84
|
-
size_t width =
|
107
|
+
size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
|
108
|
+
if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width);
|
85
109
|
|
86
110
|
if (width > 0) {
|
87
111
|
index += width;
|
@@ -96,7 +120,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
96
120
|
|
97
121
|
do {
|
98
122
|
index++;
|
99
|
-
} while (index < maximum &&
|
123
|
+
} while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
100
124
|
|
101
125
|
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
102
126
|
}
|
@@ -113,6 +137,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
113
137
|
static inline const uint8_t *
|
114
138
|
pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
115
139
|
size_t index = 0;
|
140
|
+
const pm_encoding_t *encoding = parser->encoding;
|
116
141
|
|
117
142
|
while (index < maximum) {
|
118
143
|
if (strchr((const char *) charset, source[index]) != NULL) {
|
@@ -122,7 +147,8 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
122
147
|
if (source[index] < 0x80 || !validate) {
|
123
148
|
index++;
|
124
149
|
} else {
|
125
|
-
size_t width =
|
150
|
+
size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
|
151
|
+
pm_strpbrk_explicit_encoding_set(parser, source, width);
|
126
152
|
|
127
153
|
if (width > 0) {
|
128
154
|
index += width;
|
@@ -135,7 +161,7 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
135
161
|
|
136
162
|
do {
|
137
163
|
index++;
|
138
|
-
} while (index < maximum &&
|
164
|
+
} while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
139
165
|
|
140
166
|
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
141
167
|
}
|
@@ -171,7 +197,7 @@ pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, p
|
|
171
197
|
} else if (!parser->encoding_changed) {
|
172
198
|
return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
|
173
199
|
} else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
|
174
|
-
return pm_strpbrk_ascii_8bit(source, charset, (size_t) length);
|
200
|
+
return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate);
|
175
201
|
} else if (parser->encoding->multibyte) {
|
176
202
|
return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
|
177
203
|
} else {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: prism
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-21 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -39,6 +39,7 @@ files:
|
|
39
39
|
- docs/parser_translation.md
|
40
40
|
- docs/parsing_rules.md
|
41
41
|
- docs/releasing.md
|
42
|
+
- docs/relocation.md
|
42
43
|
- docs/ripper_translation.md
|
43
44
|
- docs/ruby_api.md
|
44
45
|
- docs/ruby_parser_translation.md
|
@@ -69,13 +70,11 @@ files:
|
|
69
70
|
- include/prism/util/pm_memchr.h
|
70
71
|
- include/prism/util/pm_newline_list.h
|
71
72
|
- include/prism/util/pm_string.h
|
72
|
-
- include/prism/util/pm_string_list.h
|
73
73
|
- include/prism/util/pm_strncasecmp.h
|
74
74
|
- include/prism/util/pm_strpbrk.h
|
75
75
|
- include/prism/version.h
|
76
76
|
- lib/prism.rb
|
77
77
|
- lib/prism/compiler.rb
|
78
|
-
- lib/prism/debug.rb
|
79
78
|
- lib/prism/desugar_compiler.rb
|
80
79
|
- lib/prism/dispatcher.rb
|
81
80
|
- lib/prism/dot_visitor.rb
|
@@ -89,17 +88,19 @@ files:
|
|
89
88
|
- lib/prism/pack.rb
|
90
89
|
- lib/prism/parse_result.rb
|
91
90
|
- lib/prism/parse_result/comments.rb
|
91
|
+
- lib/prism/parse_result/errors.rb
|
92
92
|
- lib/prism/parse_result/newlines.rb
|
93
93
|
- lib/prism/pattern.rb
|
94
94
|
- lib/prism/polyfill/byteindex.rb
|
95
95
|
- lib/prism/polyfill/unpack1.rb
|
96
96
|
- lib/prism/reflection.rb
|
97
|
+
- lib/prism/relocation.rb
|
97
98
|
- lib/prism/serialize.rb
|
99
|
+
- lib/prism/string_query.rb
|
98
100
|
- lib/prism/translation.rb
|
99
101
|
- lib/prism/translation/parser.rb
|
100
102
|
- lib/prism/translation/parser/compiler.rb
|
101
103
|
- lib/prism/translation/parser/lexer.rb
|
102
|
-
- lib/prism/translation/parser/rubocop.rb
|
103
104
|
- lib/prism/translation/parser33.rb
|
104
105
|
- lib/prism/translation/parser34.rb
|
105
106
|
- lib/prism/translation/ripper.rb
|
@@ -110,11 +111,13 @@ files:
|
|
110
111
|
- prism.gemspec
|
111
112
|
- rbi/prism.rbi
|
112
113
|
- rbi/prism/compiler.rbi
|
114
|
+
- rbi/prism/dsl.rbi
|
113
115
|
- rbi/prism/inspect_visitor.rbi
|
114
116
|
- rbi/prism/node.rbi
|
115
117
|
- rbi/prism/node_ext.rbi
|
116
118
|
- rbi/prism/parse_result.rbi
|
117
119
|
- rbi/prism/reflection.rbi
|
120
|
+
- rbi/prism/string_query.rbi
|
118
121
|
- rbi/prism/translation/parser.rbi
|
119
122
|
- rbi/prism/translation/parser33.rbi
|
120
123
|
- rbi/prism/translation/parser34.rbi
|
@@ -134,7 +137,9 @@ files:
|
|
134
137
|
- sig/prism/parse_result.rbs
|
135
138
|
- sig/prism/pattern.rbs
|
136
139
|
- sig/prism/reflection.rbs
|
140
|
+
- sig/prism/relocation.rbs
|
137
141
|
- sig/prism/serialize.rbs
|
142
|
+
- sig/prism/string_query.rbs
|
138
143
|
- sig/prism/visitor.rbs
|
139
144
|
- src/diagnostic.c
|
140
145
|
- src/encoding.c
|
@@ -155,7 +160,6 @@ files:
|
|
155
160
|
- src/util/pm_memchr.c
|
156
161
|
- src/util/pm_newline_list.c
|
157
162
|
- src/util/pm_string.c
|
158
|
-
- src/util/pm_string_list.c
|
159
163
|
- src/util/pm_strncasecmp.c
|
160
164
|
- src/util/pm_strpbrk.c
|
161
165
|
homepage: https://github.com/ruby/prism
|
@@ -180,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
180
184
|
- !ruby/object:Gem::Version
|
181
185
|
version: '0'
|
182
186
|
requirements: []
|
183
|
-
rubygems_version: 3.
|
187
|
+
rubygems_version: 3.5.16
|
184
188
|
signing_key:
|
185
189
|
specification_version: 4
|
186
190
|
summary: Prism Ruby parser
|
@@ -1,44 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* @file pm_string_list.h
|
3
|
-
*
|
4
|
-
* A list of strings.
|
5
|
-
*/
|
6
|
-
#ifndef PRISM_STRING_LIST_H
|
7
|
-
#define PRISM_STRING_LIST_H
|
8
|
-
|
9
|
-
#include "prism/defines.h"
|
10
|
-
#include "prism/util/pm_string.h"
|
11
|
-
|
12
|
-
#include <stddef.h>
|
13
|
-
#include <stdlib.h>
|
14
|
-
|
15
|
-
/**
|
16
|
-
* A list of strings.
|
17
|
-
*/
|
18
|
-
typedef struct {
|
19
|
-
/** The length of the string list. */
|
20
|
-
size_t length;
|
21
|
-
|
22
|
-
/** The capacity of the string list that has been allocated. */
|
23
|
-
size_t capacity;
|
24
|
-
|
25
|
-
/** A pointer to the start of the string list. */
|
26
|
-
pm_string_t *strings;
|
27
|
-
} pm_string_list_t;
|
28
|
-
|
29
|
-
/**
|
30
|
-
* Append a pm_string_t to the given string list.
|
31
|
-
*
|
32
|
-
* @param string_list The string list to append to.
|
33
|
-
* @param string The string to append.
|
34
|
-
*/
|
35
|
-
void pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string);
|
36
|
-
|
37
|
-
/**
|
38
|
-
* Free the memory associated with the string list.
|
39
|
-
*
|
40
|
-
* @param string_list The string list to free.
|
41
|
-
*/
|
42
|
-
PRISM_EXPORTED_FUNCTION void pm_string_list_free(pm_string_list_t *string_list);
|
43
|
-
|
44
|
-
#endif
|