prism 0.29.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +77 -1
- data/CONTRIBUTING.md +0 -4
- data/README.md +4 -0
- data/config.yml +498 -145
- data/docs/fuzzing.md +1 -1
- data/docs/parsing_rules.md +4 -1
- data/docs/ripper_translation.md +22 -0
- data/docs/serialization.md +3 -0
- data/ext/prism/api_node.c +2858 -2082
- data/ext/prism/extconf.rb +1 -1
- data/ext/prism/extension.c +203 -421
- data/ext/prism/extension.h +2 -2
- data/include/prism/ast.h +1732 -453
- data/include/prism/defines.h +36 -0
- data/include/prism/diagnostic.h +23 -6
- data/include/prism/node.h +0 -21
- data/include/prism/options.h +94 -3
- data/include/prism/parser.h +57 -28
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +3 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +0 -8
- data/include/prism/util/pm_integer.h +22 -15
- data/include/prism/util/pm_newline_list.h +11 -0
- data/include/prism/util/pm_string.h +28 -12
- data/include/prism/version.h +3 -3
- data/include/prism.h +0 -11
- data/lib/prism/compiler.rb +3 -0
- data/lib/prism/desugar_compiler.rb +111 -74
- data/lib/prism/dispatcher.rb +16 -1
- data/lib/prism/dot_visitor.rb +45 -34
- data/lib/prism/dsl.rb +660 -468
- data/lib/prism/ffi.rb +64 -6
- data/lib/prism/inspect_visitor.rb +294 -64
- data/lib/prism/lex_compat.rb +1 -1
- data/lib/prism/mutation_compiler.rb +11 -6
- data/lib/prism/node.rb +2469 -4973
- data/lib/prism/node_ext.rb +91 -14
- data/lib/prism/parse_result/comments.rb +0 -7
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +101 -11
- data/lib/prism/parse_result.rb +43 -3
- data/lib/prism/reflection.rb +10 -8
- data/lib/prism/serialize.rb +484 -609
- data/lib/prism/translation/parser/compiler.rb +152 -132
- data/lib/prism/translation/parser/lexer.rb +26 -4
- data/lib/prism/translation/parser.rb +9 -4
- data/lib/prism/translation/ripper.rb +22 -20
- data/lib/prism/translation/ruby_parser.rb +73 -13
- data/lib/prism/visitor.rb +3 -0
- data/lib/prism.rb +0 -4
- data/prism.gemspec +3 -5
- data/rbi/prism/dsl.rbi +521 -0
- data/rbi/prism/node.rbi +744 -4837
- data/rbi/prism/visitor.rbi +3 -0
- data/rbi/prism.rbi +36 -30
- data/sig/prism/dsl.rbs +190 -303
- data/sig/prism/mutation_compiler.rbs +1 -0
- data/sig/prism/node.rbs +759 -628
- data/sig/prism/parse_result.rbs +2 -0
- data/sig/prism/visitor.rbs +1 -0
- data/sig/prism.rbs +103 -64
- data/src/diagnostic.c +62 -28
- data/src/node.c +499 -1754
- data/src/options.c +76 -27
- data/src/prettyprint.c +156 -112
- data/src/prism.c +2773 -2081
- data/src/regexp.c +202 -69
- data/src/serialize.c +170 -50
- data/src/static_literals.c +63 -84
- data/src/token_type.c +4 -4
- data/src/util/pm_constant_pool.c +0 -8
- data/src/util/pm_integer.c +53 -25
- data/src/util/pm_newline_list.c +29 -0
- data/src/util/pm_string.c +130 -80
- data/src/util/pm_strpbrk.c +32 -6
- metadata +4 -6
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -249
- data/lib/prism/translation/parser/rubocop.rb +0 -73
- data/src/util/pm_string_list.c +0 -28
data/src/util/pm_string.c
CHANGED
@@ -47,6 +47,62 @@ pm_string_constant_init(pm_string_t *string, const char *source, size_t length)
|
|
47
47
|
};
|
48
48
|
}
|
49
49
|
|
50
|
+
#ifdef _WIN32
|
51
|
+
/**
|
52
|
+
* Represents a file handle on Windows, where the path will need to be freed
|
53
|
+
* when the file is closed.
|
54
|
+
*/
|
55
|
+
typedef struct {
|
56
|
+
/** The path to the file, which will become allocated memory. */
|
57
|
+
WCHAR *path;
|
58
|
+
|
59
|
+
/** The handle to the file, which will start as uninitialized memory. */
|
60
|
+
HANDLE file;
|
61
|
+
} pm_string_file_handle_t;
|
62
|
+
|
63
|
+
/**
|
64
|
+
* Open the file indicated by the filepath parameter for reading on Windows.
|
65
|
+
* Perform any kind of normalization that needs to happen on the filepath.
|
66
|
+
*/
|
67
|
+
static pm_string_init_result_t
|
68
|
+
pm_string_file_handle_open(pm_string_file_handle_t *handle, const char *filepath) {
|
69
|
+
int length = MultiByteToWideChar(CP_UTF8, 0, filepath, -1, NULL, 0);
|
70
|
+
if (length == 0) return PM_STRING_INIT_ERROR_GENERIC;
|
71
|
+
|
72
|
+
handle->path = xmalloc(sizeof(WCHAR) * ((size_t) length));
|
73
|
+
if ((handle->path == NULL) || (MultiByteToWideChar(CP_UTF8, 0, filepath, -1, handle->path, length) == 0)) {
|
74
|
+
xfree(handle->path);
|
75
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
76
|
+
}
|
77
|
+
|
78
|
+
handle->file = CreateFileW(handle->path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
|
79
|
+
if (handle->file == INVALID_HANDLE_VALUE) {
|
80
|
+
pm_string_init_result_t result = PM_STRING_INIT_ERROR_GENERIC;
|
81
|
+
|
82
|
+
if (GetLastError() == ERROR_ACCESS_DENIED) {
|
83
|
+
DWORD attributes = GetFileAttributesW(handle->path);
|
84
|
+
if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
|
85
|
+
result = PM_STRING_INIT_ERROR_DIRECTORY;
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
89
|
+
xfree(handle->path);
|
90
|
+
return result;
|
91
|
+
}
|
92
|
+
|
93
|
+
return PM_STRING_INIT_SUCCESS;
|
94
|
+
}
|
95
|
+
|
96
|
+
/**
|
97
|
+
* Close the file handle and free the path.
|
98
|
+
*/
|
99
|
+
static void
|
100
|
+
pm_string_file_handle_close(pm_string_file_handle_t *handle) {
|
101
|
+
xfree(handle->path);
|
102
|
+
CloseHandle(handle->file);
|
103
|
+
}
|
104
|
+
#endif
|
105
|
+
|
50
106
|
/**
|
51
107
|
* Read the file indicated by the filepath parameter into source and load its
|
52
108
|
* contents and size into the given `pm_string_t`. The given `pm_string_t`
|
@@ -58,62 +114,66 @@ pm_string_constant_init(pm_string_t *string, const char *source, size_t length)
|
|
58
114
|
* `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
|
59
115
|
* `mmap`, and on other POSIX systems we'll use `read`.
|
60
116
|
*/
|
61
|
-
PRISM_EXPORTED_FUNCTION
|
117
|
+
PRISM_EXPORTED_FUNCTION pm_string_init_result_t
|
62
118
|
pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
63
119
|
#ifdef _WIN32
|
64
120
|
// Open the file for reading.
|
65
|
-
|
66
|
-
|
67
|
-
if (
|
68
|
-
return false;
|
69
|
-
}
|
121
|
+
pm_string_file_handle_t handle;
|
122
|
+
pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
|
123
|
+
if (result != PM_STRING_INIT_SUCCESS) return result;
|
70
124
|
|
71
125
|
// Get the file size.
|
72
|
-
DWORD file_size = GetFileSize(file, NULL);
|
126
|
+
DWORD file_size = GetFileSize(handle.file, NULL);
|
73
127
|
if (file_size == INVALID_FILE_SIZE) {
|
74
|
-
|
75
|
-
return
|
128
|
+
pm_string_file_handle_close(&handle);
|
129
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
76
130
|
}
|
77
131
|
|
78
132
|
// If the file is empty, then we don't need to do anything else, we'll set
|
79
133
|
// the source to a constant empty string and return.
|
80
134
|
if (file_size == 0) {
|
81
|
-
|
135
|
+
pm_string_file_handle_close(&handle);
|
82
136
|
const uint8_t source[] = "";
|
83
137
|
*string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
|
84
|
-
return
|
138
|
+
return PM_STRING_INIT_SUCCESS;
|
85
139
|
}
|
86
140
|
|
87
141
|
// Create a mapping of the file.
|
88
|
-
HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
|
142
|
+
HANDLE mapping = CreateFileMapping(handle.file, NULL, PAGE_READONLY, 0, 0, NULL);
|
89
143
|
if (mapping == NULL) {
|
90
|
-
|
91
|
-
return
|
144
|
+
pm_string_file_handle_close(&handle);
|
145
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
92
146
|
}
|
93
147
|
|
94
148
|
// Map the file into memory.
|
95
149
|
uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
|
96
150
|
CloseHandle(mapping);
|
97
|
-
|
151
|
+
pm_string_file_handle_close(&handle);
|
98
152
|
|
99
153
|
if (source == NULL) {
|
100
|
-
return
|
154
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
101
155
|
}
|
102
156
|
|
103
157
|
*string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size };
|
104
|
-
return
|
158
|
+
return PM_STRING_INIT_SUCCESS;
|
105
159
|
#elif defined(_POSIX_MAPPED_FILES)
|
106
160
|
// Open the file for reading
|
107
161
|
int fd = open(filepath, O_RDONLY);
|
108
162
|
if (fd == -1) {
|
109
|
-
return
|
163
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
110
164
|
}
|
111
165
|
|
112
166
|
// Stat the file to get the file size
|
113
167
|
struct stat sb;
|
114
168
|
if (fstat(fd, &sb) == -1) {
|
115
169
|
close(fd);
|
116
|
-
return
|
170
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
171
|
+
}
|
172
|
+
|
173
|
+
// Ensure it is a file and not a directory
|
174
|
+
if (S_ISDIR(sb.st_mode)) {
|
175
|
+
close(fd);
|
176
|
+
return PM_STRING_INIT_ERROR_DIRECTORY;
|
117
177
|
}
|
118
178
|
|
119
179
|
// mmap the file descriptor to virtually get the contents
|
@@ -124,22 +184,19 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
|
124
184
|
close(fd);
|
125
185
|
const uint8_t source[] = "";
|
126
186
|
*string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
|
127
|
-
return
|
187
|
+
return PM_STRING_INIT_SUCCESS;
|
128
188
|
}
|
129
189
|
|
130
190
|
source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
|
131
191
|
if (source == MAP_FAILED) {
|
132
|
-
return
|
192
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
133
193
|
}
|
134
194
|
|
135
195
|
close(fd);
|
136
196
|
*string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size };
|
137
|
-
return
|
197
|
+
return PM_STRING_INIT_SUCCESS;
|
138
198
|
#else
|
139
|
-
(
|
140
|
-
(void) filepath;
|
141
|
-
perror("pm_string_mapped_init is not implemented for this platform");
|
142
|
-
return false;
|
199
|
+
return pm_string_file_init(string, filepath);
|
143
200
|
#endif
|
144
201
|
}
|
145
202
|
|
@@ -148,115 +205,108 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
|
148
205
|
* contents and size into the given `pm_string_t`. The given `pm_string_t`
|
149
206
|
* should be freed using `pm_string_free` when it is no longer used.
|
150
207
|
*/
|
151
|
-
PRISM_EXPORTED_FUNCTION
|
208
|
+
PRISM_EXPORTED_FUNCTION pm_string_init_result_t
|
152
209
|
pm_string_file_init(pm_string_t *string, const char *filepath) {
|
153
210
|
#ifdef _WIN32
|
154
211
|
// Open the file for reading.
|
155
|
-
|
156
|
-
|
157
|
-
if (
|
158
|
-
return false;
|
159
|
-
}
|
212
|
+
pm_string_file_handle_t handle;
|
213
|
+
pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
|
214
|
+
if (result != PM_STRING_INIT_SUCCESS) return result;
|
160
215
|
|
161
216
|
// Get the file size.
|
162
|
-
DWORD file_size = GetFileSize(file, NULL);
|
217
|
+
DWORD file_size = GetFileSize(handle.file, NULL);
|
163
218
|
if (file_size == INVALID_FILE_SIZE) {
|
164
|
-
|
165
|
-
return
|
219
|
+
pm_string_file_handle_close(&handle);
|
220
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
166
221
|
}
|
167
222
|
|
168
223
|
// If the file is empty, then we don't need to do anything else, we'll set
|
169
224
|
// the source to a constant empty string and return.
|
170
225
|
if (file_size == 0) {
|
171
|
-
|
226
|
+
pm_string_file_handle_close(&handle);
|
172
227
|
const uint8_t source[] = "";
|
173
228
|
*string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
|
174
|
-
return
|
229
|
+
return PM_STRING_INIT_SUCCESS;
|
175
230
|
}
|
176
231
|
|
177
232
|
// Create a buffer to read the file into.
|
178
233
|
uint8_t *source = xmalloc(file_size);
|
179
234
|
if (source == NULL) {
|
180
|
-
|
181
|
-
return
|
235
|
+
pm_string_file_handle_close(&handle);
|
236
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
182
237
|
}
|
183
238
|
|
184
239
|
// Read the contents of the file
|
185
240
|
DWORD bytes_read;
|
186
|
-
if (!ReadFile(file, source, file_size, &bytes_read, NULL)) {
|
187
|
-
|
188
|
-
return
|
241
|
+
if (!ReadFile(handle.file, source, file_size, &bytes_read, NULL)) {
|
242
|
+
pm_string_file_handle_close(&handle);
|
243
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
189
244
|
}
|
190
245
|
|
191
246
|
// Check the number of bytes read
|
192
247
|
if (bytes_read != file_size) {
|
193
248
|
xfree(source);
|
194
|
-
|
195
|
-
return
|
249
|
+
pm_string_file_handle_close(&handle);
|
250
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
196
251
|
}
|
197
252
|
|
198
|
-
|
253
|
+
pm_string_file_handle_close(&handle);
|
199
254
|
*string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = (size_t) file_size };
|
200
|
-
return
|
201
|
-
#elif defined(
|
202
|
-
|
203
|
-
|
204
|
-
|
255
|
+
return PM_STRING_INIT_SUCCESS;
|
256
|
+
#elif defined(PRISM_HAS_FILESYSTEM)
|
257
|
+
// Open the file for reading
|
258
|
+
int fd = open(filepath, O_RDONLY);
|
259
|
+
if (fd == -1) {
|
260
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
205
261
|
}
|
206
262
|
|
207
|
-
|
208
|
-
|
263
|
+
// Stat the file to get the file size
|
264
|
+
struct stat sb;
|
265
|
+
if (fstat(fd, &sb) == -1) {
|
266
|
+
close(fd);
|
267
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
268
|
+
}
|
209
269
|
|
210
|
-
|
211
|
-
|
212
|
-
|
270
|
+
// Ensure it is a file and not a directory
|
271
|
+
if (S_ISDIR(sb.st_mode)) {
|
272
|
+
close(fd);
|
273
|
+
return PM_STRING_INIT_ERROR_DIRECTORY;
|
213
274
|
}
|
214
275
|
|
215
|
-
|
216
|
-
|
276
|
+
// Check the size to see if it's empty
|
277
|
+
size_t size = (size_t) sb.st_size;
|
278
|
+
if (size == 0) {
|
279
|
+
close(fd);
|
217
280
|
const uint8_t source[] = "";
|
218
281
|
*string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
|
219
|
-
return
|
282
|
+
return PM_STRING_INIT_SUCCESS;
|
220
283
|
}
|
221
284
|
|
222
|
-
size_t length = (size_t)
|
285
|
+
size_t length = (size_t) size;
|
223
286
|
uint8_t *source = xmalloc(length);
|
224
287
|
if (source == NULL) {
|
225
|
-
|
226
|
-
return
|
288
|
+
close(fd);
|
289
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
227
290
|
}
|
228
291
|
|
229
|
-
|
230
|
-
|
231
|
-
fclose(file);
|
292
|
+
long bytes_read = (long) read(fd, source, length);
|
293
|
+
close(fd);
|
232
294
|
|
233
|
-
if (bytes_read
|
295
|
+
if (bytes_read == -1) {
|
234
296
|
xfree(source);
|
235
|
-
return
|
297
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
236
298
|
}
|
237
299
|
|
238
300
|
*string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length };
|
239
|
-
return
|
301
|
+
return PM_STRING_INIT_SUCCESS;
|
240
302
|
#else
|
241
303
|
(void) string;
|
242
304
|
(void) filepath;
|
243
305
|
perror("pm_string_file_init is not implemented for this platform");
|
244
|
-
return
|
306
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
245
307
|
#endif
|
246
308
|
}
|
247
309
|
|
248
|
-
/**
|
249
|
-
* Returns the memory size associated with the string.
|
250
|
-
*/
|
251
|
-
size_t
|
252
|
-
pm_string_memsize(const pm_string_t *string) {
|
253
|
-
size_t size = sizeof(pm_string_t);
|
254
|
-
if (string->type == PM_STRING_OWNED) {
|
255
|
-
size += string->length;
|
256
|
-
}
|
257
|
-
return size;
|
258
|
-
}
|
259
|
-
|
260
310
|
/**
|
261
311
|
* Ensure the string is owned. If it is not, then reinitialize it as owned and
|
262
312
|
* copy over the previous source.
|
data/src/util/pm_strpbrk.c
CHANGED
@@ -8,6 +8,27 @@ pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start
|
|
8
8
|
pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
|
9
9
|
}
|
10
10
|
|
11
|
+
/**
|
12
|
+
* Set the explicit encoding for the parser to the current encoding.
|
13
|
+
*/
|
14
|
+
static inline void
|
15
|
+
pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) {
|
16
|
+
if (parser->explicit_encoding != NULL) {
|
17
|
+
if (parser->explicit_encoding == parser->encoding) {
|
18
|
+
// Okay, we already locked to this encoding.
|
19
|
+
} else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
20
|
+
// Not okay, we already found a Unicode escape sequence and this
|
21
|
+
// conflicts.
|
22
|
+
pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name);
|
23
|
+
} else {
|
24
|
+
// Should not be anything else.
|
25
|
+
assert(false && "unreachable");
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
parser->explicit_encoding = parser->encoding;
|
30
|
+
}
|
31
|
+
|
11
32
|
/**
|
12
33
|
* This is the default path.
|
13
34
|
*/
|
@@ -52,7 +73,7 @@ pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *chars
|
|
52
73
|
* This is the path when the encoding is ASCII-8BIT.
|
53
74
|
*/
|
54
75
|
static inline const uint8_t *
|
55
|
-
pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maximum) {
|
76
|
+
pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
56
77
|
size_t index = 0;
|
57
78
|
|
58
79
|
while (index < maximum) {
|
@@ -60,6 +81,7 @@ pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maxi
|
|
60
81
|
return source + index;
|
61
82
|
}
|
62
83
|
|
84
|
+
if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1);
|
63
85
|
index++;
|
64
86
|
}
|
65
87
|
|
@@ -72,6 +94,7 @@ pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maxi
|
|
72
94
|
static inline const uint8_t *
|
73
95
|
pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
74
96
|
size_t index = 0;
|
97
|
+
const pm_encoding_t *encoding = parser->encoding;
|
75
98
|
|
76
99
|
while (index < maximum) {
|
77
100
|
if (strchr((const char *) charset, source[index]) != NULL) {
|
@@ -81,7 +104,8 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
81
104
|
if (source[index] < 0x80) {
|
82
105
|
index++;
|
83
106
|
} else {
|
84
|
-
size_t width =
|
107
|
+
size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
|
108
|
+
if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width);
|
85
109
|
|
86
110
|
if (width > 0) {
|
87
111
|
index += width;
|
@@ -96,7 +120,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
96
120
|
|
97
121
|
do {
|
98
122
|
index++;
|
99
|
-
} while (index < maximum &&
|
123
|
+
} while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
100
124
|
|
101
125
|
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
102
126
|
}
|
@@ -113,6 +137,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
113
137
|
static inline const uint8_t *
|
114
138
|
pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
115
139
|
size_t index = 0;
|
140
|
+
const pm_encoding_t *encoding = parser->encoding;
|
116
141
|
|
117
142
|
while (index < maximum) {
|
118
143
|
if (strchr((const char *) charset, source[index]) != NULL) {
|
@@ -122,7 +147,8 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
122
147
|
if (source[index] < 0x80 || !validate) {
|
123
148
|
index++;
|
124
149
|
} else {
|
125
|
-
size_t width =
|
150
|
+
size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
|
151
|
+
pm_strpbrk_explicit_encoding_set(parser, source, width);
|
126
152
|
|
127
153
|
if (width > 0) {
|
128
154
|
index += width;
|
@@ -135,7 +161,7 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
135
161
|
|
136
162
|
do {
|
137
163
|
index++;
|
138
|
-
} while (index < maximum &&
|
164
|
+
} while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
139
165
|
|
140
166
|
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
141
167
|
}
|
@@ -171,7 +197,7 @@ pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, p
|
|
171
197
|
} else if (!parser->encoding_changed) {
|
172
198
|
return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
|
173
199
|
} else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
|
174
|
-
return pm_strpbrk_ascii_8bit(source, charset, (size_t) length);
|
200
|
+
return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate);
|
175
201
|
} else if (parser->encoding->multibyte) {
|
176
202
|
return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
|
177
203
|
} else {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: prism
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-10-02 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -69,13 +69,11 @@ files:
|
|
69
69
|
- include/prism/util/pm_memchr.h
|
70
70
|
- include/prism/util/pm_newline_list.h
|
71
71
|
- include/prism/util/pm_string.h
|
72
|
-
- include/prism/util/pm_string_list.h
|
73
72
|
- include/prism/util/pm_strncasecmp.h
|
74
73
|
- include/prism/util/pm_strpbrk.h
|
75
74
|
- include/prism/version.h
|
76
75
|
- lib/prism.rb
|
77
76
|
- lib/prism/compiler.rb
|
78
|
-
- lib/prism/debug.rb
|
79
77
|
- lib/prism/desugar_compiler.rb
|
80
78
|
- lib/prism/dispatcher.rb
|
81
79
|
- lib/prism/dot_visitor.rb
|
@@ -89,6 +87,7 @@ files:
|
|
89
87
|
- lib/prism/pack.rb
|
90
88
|
- lib/prism/parse_result.rb
|
91
89
|
- lib/prism/parse_result/comments.rb
|
90
|
+
- lib/prism/parse_result/errors.rb
|
92
91
|
- lib/prism/parse_result/newlines.rb
|
93
92
|
- lib/prism/pattern.rb
|
94
93
|
- lib/prism/polyfill/byteindex.rb
|
@@ -99,7 +98,6 @@ files:
|
|
99
98
|
- lib/prism/translation/parser.rb
|
100
99
|
- lib/prism/translation/parser/compiler.rb
|
101
100
|
- lib/prism/translation/parser/lexer.rb
|
102
|
-
- lib/prism/translation/parser/rubocop.rb
|
103
101
|
- lib/prism/translation/parser33.rb
|
104
102
|
- lib/prism/translation/parser34.rb
|
105
103
|
- lib/prism/translation/ripper.rb
|
@@ -110,6 +108,7 @@ files:
|
|
110
108
|
- prism.gemspec
|
111
109
|
- rbi/prism.rbi
|
112
110
|
- rbi/prism/compiler.rbi
|
111
|
+
- rbi/prism/dsl.rbi
|
113
112
|
- rbi/prism/inspect_visitor.rbi
|
114
113
|
- rbi/prism/node.rbi
|
115
114
|
- rbi/prism/node_ext.rbi
|
@@ -155,7 +154,6 @@ files:
|
|
155
154
|
- src/util/pm_memchr.c
|
156
155
|
- src/util/pm_newline_list.c
|
157
156
|
- src/util/pm_string.c
|
158
|
-
- src/util/pm_string_list.c
|
159
157
|
- src/util/pm_strncasecmp.c
|
160
158
|
- src/util/pm_strpbrk.c
|
161
159
|
homepage: https://github.com/ruby/prism
|
@@ -1,44 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* @file pm_string_list.h
|
3
|
-
*
|
4
|
-
* A list of strings.
|
5
|
-
*/
|
6
|
-
#ifndef PRISM_STRING_LIST_H
|
7
|
-
#define PRISM_STRING_LIST_H
|
8
|
-
|
9
|
-
#include "prism/defines.h"
|
10
|
-
#include "prism/util/pm_string.h"
|
11
|
-
|
12
|
-
#include <stddef.h>
|
13
|
-
#include <stdlib.h>
|
14
|
-
|
15
|
-
/**
|
16
|
-
* A list of strings.
|
17
|
-
*/
|
18
|
-
typedef struct {
|
19
|
-
/** The length of the string list. */
|
20
|
-
size_t length;
|
21
|
-
|
22
|
-
/** The capacity of the string list that has been allocated. */
|
23
|
-
size_t capacity;
|
24
|
-
|
25
|
-
/** A pointer to the start of the string list. */
|
26
|
-
pm_string_t *strings;
|
27
|
-
} pm_string_list_t;
|
28
|
-
|
29
|
-
/**
|
30
|
-
* Append a pm_string_t to the given string list.
|
31
|
-
*
|
32
|
-
* @param string_list The string list to append to.
|
33
|
-
* @param string The string to append.
|
34
|
-
*/
|
35
|
-
void pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string);
|
36
|
-
|
37
|
-
/**
|
38
|
-
* Free the memory associated with the string list.
|
39
|
-
*
|
40
|
-
* @param string_list The string list to free.
|
41
|
-
*/
|
42
|
-
PRISM_EXPORTED_FUNCTION void pm_string_list_free(pm_string_list_t *string_list);
|
43
|
-
|
44
|
-
#endif
|