jruby-prism-parser 0.24.0-java → 1.4.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BSDmakefile +58 -0
- data/CHANGELOG.md +269 -1
- data/CONTRIBUTING.md +0 -4
- data/Makefile +25 -18
- data/README.md +57 -6
- data/config.yml +1724 -140
- data/docs/build_system.md +39 -11
- data/docs/configuration.md +4 -0
- data/docs/cruby_compilation.md +1 -1
- data/docs/fuzzing.md +1 -1
- data/docs/parser_translation.md +14 -9
- data/docs/parsing_rules.md +4 -1
- data/docs/releasing.md +8 -10
- data/docs/relocation.md +34 -0
- data/docs/ripper_translation.md +72 -0
- data/docs/ruby_api.md +2 -1
- data/docs/serialization.md +29 -5
- data/ext/prism/api_node.c +3395 -1999
- data/ext/prism/api_pack.c +9 -0
- data/ext/prism/extconf.rb +55 -34
- data/ext/prism/extension.c +597 -346
- data/ext/prism/extension.h +6 -5
- data/include/prism/ast.h +2612 -455
- data/include/prism/defines.h +160 -2
- data/include/prism/diagnostic.h +188 -76
- data/include/prism/encoding.h +22 -4
- data/include/prism/node.h +89 -17
- data/include/prism/options.h +224 -12
- data/include/prism/pack.h +11 -0
- data/include/prism/parser.h +267 -66
- data/include/prism/prettyprint.h +8 -0
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +121 -0
- data/include/prism/util/pm_buffer.h +75 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +18 -9
- data/include/prism/util/pm_integer.h +126 -0
- data/include/prism/util/pm_list.h +1 -1
- data/include/prism/util/pm_newline_list.h +19 -0
- data/include/prism/util/pm_string.h +48 -8
- data/include/prism/version.h +3 -3
- data/include/prism.h +99 -5
- data/jruby-prism.jar +0 -0
- data/lib/prism/compiler.rb +11 -1
- data/lib/prism/desugar_compiler.rb +113 -74
- data/lib/prism/dispatcher.rb +45 -1
- data/lib/prism/dot_visitor.rb +201 -77
- data/lib/prism/dsl.rb +673 -461
- data/lib/prism/ffi.rb +233 -45
- data/lib/prism/inspect_visitor.rb +2389 -0
- data/lib/prism/lex_compat.rb +35 -16
- data/lib/prism/mutation_compiler.rb +24 -8
- data/lib/prism/node.rb +7731 -8460
- data/lib/prism/node_ext.rb +328 -32
- data/lib/prism/pack.rb +4 -0
- data/lib/prism/parse_result/comments.rb +34 -24
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +102 -12
- data/lib/prism/parse_result.rb +448 -44
- data/lib/prism/pattern.rb +28 -10
- data/lib/prism/polyfill/append_as_bytes.rb +15 -0
- data/lib/prism/polyfill/byteindex.rb +13 -0
- data/lib/prism/polyfill/unpack1.rb +14 -0
- data/lib/prism/reflection.rb +413 -0
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +1940 -1198
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/builder.rb +61 -0
- data/lib/prism/translation/parser/compiler.rb +569 -195
- data/lib/prism/translation/parser/lexer.rb +516 -39
- data/lib/prism/translation/parser.rb +177 -12
- data/lib/prism/translation/parser33.rb +1 -1
- data/lib/prism/translation/parser34.rb +1 -1
- data/lib/prism/translation/parser35.rb +12 -0
- data/lib/prism/translation/ripper/sexp.rb +125 -0
- data/lib/prism/translation/ripper/shim.rb +5 -0
- data/lib/prism/translation/ripper.rb +3224 -462
- data/lib/prism/translation/ruby_parser.rb +194 -69
- data/lib/prism/translation.rb +4 -1
- data/lib/prism/version.rb +1 -1
- data/lib/prism/visitor.rb +13 -0
- data/lib/prism.rb +17 -27
- data/prism.gemspec +57 -17
- data/rbi/prism/compiler.rbi +12 -0
- data/rbi/prism/dsl.rbi +524 -0
- data/rbi/prism/inspect_visitor.rbi +12 -0
- data/rbi/prism/node.rbi +8722 -0
- data/rbi/prism/node_ext.rbi +107 -0
- data/rbi/prism/parse_result.rbi +404 -0
- data/rbi/prism/reflection.rbi +58 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism/translation/parser.rbi +11 -0
- data/rbi/prism/translation/parser33.rbi +6 -0
- data/rbi/prism/translation/parser34.rbi +6 -0
- data/rbi/prism/translation/parser35.rbi +6 -0
- data/rbi/prism/translation/ripper.rbi +15 -0
- data/rbi/prism/visitor.rbi +473 -0
- data/rbi/prism.rbi +44 -7745
- data/sig/prism/compiler.rbs +9 -0
- data/sig/prism/dispatcher.rbs +16 -0
- data/sig/prism/dot_visitor.rbs +6 -0
- data/sig/prism/dsl.rbs +351 -0
- data/sig/prism/inspect_visitor.rbs +22 -0
- data/sig/prism/lex_compat.rbs +10 -0
- data/sig/prism/mutation_compiler.rbs +159 -0
- data/sig/prism/node.rbs +3614 -0
- data/sig/prism/node_ext.rbs +82 -0
- data/sig/prism/pack.rbs +43 -0
- data/sig/prism/parse_result.rbs +192 -0
- data/sig/prism/pattern.rbs +13 -0
- data/sig/prism/reflection.rbs +50 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/serialize.rbs +8 -0
- data/sig/prism/string_query.rbs +11 -0
- data/sig/prism/visitor.rbs +169 -0
- data/sig/prism.rbs +248 -4767
- data/src/diagnostic.c +672 -230
- data/src/encoding.c +211 -108
- data/src/node.c +7541 -1653
- data/src/options.c +135 -20
- data/src/pack.c +33 -17
- data/src/prettyprint.c +1543 -1485
- data/src/prism.c +7813 -3050
- data/src/regexp.c +225 -73
- data/src/serialize.c +101 -77
- data/src/static_literals.c +617 -0
- data/src/token_type.c +14 -13
- data/src/util/pm_buffer.c +187 -20
- data/src/util/pm_char.c +5 -5
- data/src/util/pm_constant_pool.c +39 -19
- data/src/util/pm_integer.c +670 -0
- data/src/util/pm_list.c +1 -1
- data/src/util/pm_newline_list.c +43 -5
- data/src/util/pm_string.c +213 -33
- data/src/util/pm_strncasecmp.c +13 -1
- data/src/util/pm_strpbrk.c +32 -6
- metadata +55 -19
- data/docs/ripper.md +0 -36
- data/include/prism/util/pm_state_stack.h +0 -42
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -206
- data/lib/prism/node_inspector.rb +0 -68
- data/lib/prism/translation/parser/rubocop.rb +0 -45
- data/rbi/prism_static.rbi +0 -207
- data/sig/prism_static.rbs +0 -201
- data/src/util/pm_state_stack.c +0 -25
- data/src/util/pm_string_list.c +0 -28
data/src/util/pm_newline_list.c
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
*/
|
7
7
|
bool
|
8
8
|
pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
|
9
|
-
list->offsets = (size_t *)
|
9
|
+
list->offsets = (size_t *) xcalloc(capacity, sizeof(size_t));
|
10
10
|
if (list->offsets == NULL) return false;
|
11
11
|
|
12
12
|
list->start = start;
|
@@ -19,6 +19,14 @@ pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capac
|
|
19
19
|
return true;
|
20
20
|
}
|
21
21
|
|
22
|
+
/**
|
23
|
+
* Clear out the newlines that have been appended to the list.
|
24
|
+
*/
|
25
|
+
void
|
26
|
+
pm_newline_list_clear(pm_newline_list_t *list) {
|
27
|
+
list->size = 1;
|
28
|
+
}
|
29
|
+
|
22
30
|
/**
|
23
31
|
* Append a new offset to the newline list. Returns true if the reallocation of
|
24
32
|
* the offsets succeeds (if one was necessary), otherwise returns false.
|
@@ -29,10 +37,11 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
|
|
29
37
|
size_t *original_offsets = list->offsets;
|
30
38
|
|
31
39
|
list->capacity = (list->capacity * 3) / 2;
|
32
|
-
list->offsets = (size_t *)
|
33
|
-
memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
|
34
|
-
free(original_offsets);
|
40
|
+
list->offsets = (size_t *) xcalloc(list->capacity, sizeof(size_t));
|
35
41
|
if (list->offsets == NULL) return false;
|
42
|
+
|
43
|
+
memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
|
44
|
+
xfree(original_offsets);
|
36
45
|
}
|
37
46
|
|
38
47
|
assert(*cursor == '\n');
|
@@ -45,6 +54,35 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
|
|
45
54
|
return true;
|
46
55
|
}
|
47
56
|
|
57
|
+
/**
|
58
|
+
* Returns the line of the given offset. If the offset is not in the list, the
|
59
|
+
* line of the closest offset less than the given offset is returned.
|
60
|
+
*/
|
61
|
+
int32_t
|
62
|
+
pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
|
63
|
+
assert(cursor >= list->start);
|
64
|
+
size_t offset = (size_t) (cursor - list->start);
|
65
|
+
|
66
|
+
size_t left = 0;
|
67
|
+
size_t right = list->size - 1;
|
68
|
+
|
69
|
+
while (left <= right) {
|
70
|
+
size_t mid = left + (right - left) / 2;
|
71
|
+
|
72
|
+
if (list->offsets[mid] == offset) {
|
73
|
+
return ((int32_t) mid) + start_line;
|
74
|
+
}
|
75
|
+
|
76
|
+
if (list->offsets[mid] < offset) {
|
77
|
+
left = mid + 1;
|
78
|
+
} else {
|
79
|
+
right = mid - 1;
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
return ((int32_t) left) + start_line - 1;
|
84
|
+
}
|
85
|
+
|
48
86
|
/**
|
49
87
|
* Returns the line and column of the given offset. If the offset is not in the
|
50
88
|
* list, the line and column of the closest offset less than the given offset
|
@@ -83,5 +121,5 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
|
|
83
121
|
*/
|
84
122
|
void
|
85
123
|
pm_newline_list_free(pm_newline_list_t *list) {
|
86
|
-
|
124
|
+
xfree(list->offsets);
|
87
125
|
}
|
data/src/util/pm_string.c
CHANGED
@@ -47,6 +47,62 @@ pm_string_constant_init(pm_string_t *string, const char *source, size_t length)
|
|
47
47
|
};
|
48
48
|
}
|
49
49
|
|
50
|
+
#ifdef _WIN32
|
51
|
+
/**
|
52
|
+
* Represents a file handle on Windows, where the path will need to be freed
|
53
|
+
* when the file is closed.
|
54
|
+
*/
|
55
|
+
typedef struct {
|
56
|
+
/** The path to the file, which will become allocated memory. */
|
57
|
+
WCHAR *path;
|
58
|
+
|
59
|
+
/** The handle to the file, which will start as uninitialized memory. */
|
60
|
+
HANDLE file;
|
61
|
+
} pm_string_file_handle_t;
|
62
|
+
|
63
|
+
/**
|
64
|
+
* Open the file indicated by the filepath parameter for reading on Windows.
|
65
|
+
* Perform any kind of normalization that needs to happen on the filepath.
|
66
|
+
*/
|
67
|
+
static pm_string_init_result_t
|
68
|
+
pm_string_file_handle_open(pm_string_file_handle_t *handle, const char *filepath) {
|
69
|
+
int length = MultiByteToWideChar(CP_UTF8, 0, filepath, -1, NULL, 0);
|
70
|
+
if (length == 0) return PM_STRING_INIT_ERROR_GENERIC;
|
71
|
+
|
72
|
+
handle->path = xmalloc(sizeof(WCHAR) * ((size_t) length));
|
73
|
+
if ((handle->path == NULL) || (MultiByteToWideChar(CP_UTF8, 0, filepath, -1, handle->path, length) == 0)) {
|
74
|
+
xfree(handle->path);
|
75
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
76
|
+
}
|
77
|
+
|
78
|
+
handle->file = CreateFileW(handle->path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
|
79
|
+
if (handle->file == INVALID_HANDLE_VALUE) {
|
80
|
+
pm_string_init_result_t result = PM_STRING_INIT_ERROR_GENERIC;
|
81
|
+
|
82
|
+
if (GetLastError() == ERROR_ACCESS_DENIED) {
|
83
|
+
DWORD attributes = GetFileAttributesW(handle->path);
|
84
|
+
if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
|
85
|
+
result = PM_STRING_INIT_ERROR_DIRECTORY;
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
89
|
+
xfree(handle->path);
|
90
|
+
return result;
|
91
|
+
}
|
92
|
+
|
93
|
+
return PM_STRING_INIT_SUCCESS;
|
94
|
+
}
|
95
|
+
|
96
|
+
/**
|
97
|
+
* Close the file handle and free the path.
|
98
|
+
*/
|
99
|
+
static void
|
100
|
+
pm_string_file_handle_close(pm_string_file_handle_t *handle) {
|
101
|
+
xfree(handle->path);
|
102
|
+
CloseHandle(handle->file);
|
103
|
+
}
|
104
|
+
#endif
|
105
|
+
|
50
106
|
/**
|
51
107
|
* Read the file indicated by the filepath parameter into source and load its
|
52
108
|
* contents and size into the given `pm_string_t`. The given `pm_string_t`
|
@@ -58,62 +114,66 @@ pm_string_constant_init(pm_string_t *string, const char *source, size_t length)
|
|
58
114
|
* `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
|
59
115
|
* `mmap`, and on other POSIX systems we'll use `read`.
|
60
116
|
*/
|
61
|
-
|
117
|
+
PRISM_EXPORTED_FUNCTION pm_string_init_result_t
|
62
118
|
pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
63
119
|
#ifdef _WIN32
|
64
120
|
// Open the file for reading.
|
65
|
-
|
66
|
-
|
67
|
-
if (
|
68
|
-
return false;
|
69
|
-
}
|
121
|
+
pm_string_file_handle_t handle;
|
122
|
+
pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
|
123
|
+
if (result != PM_STRING_INIT_SUCCESS) return result;
|
70
124
|
|
71
125
|
// Get the file size.
|
72
|
-
DWORD file_size = GetFileSize(file, NULL);
|
126
|
+
DWORD file_size = GetFileSize(handle.file, NULL);
|
73
127
|
if (file_size == INVALID_FILE_SIZE) {
|
74
|
-
|
75
|
-
return
|
128
|
+
pm_string_file_handle_close(&handle);
|
129
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
76
130
|
}
|
77
131
|
|
78
132
|
// If the file is empty, then we don't need to do anything else, we'll set
|
79
133
|
// the source to a constant empty string and return.
|
80
134
|
if (file_size == 0) {
|
81
|
-
|
135
|
+
pm_string_file_handle_close(&handle);
|
82
136
|
const uint8_t source[] = "";
|
83
137
|
*string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
|
84
|
-
return
|
138
|
+
return PM_STRING_INIT_SUCCESS;
|
85
139
|
}
|
86
140
|
|
87
141
|
// Create a mapping of the file.
|
88
|
-
HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
|
142
|
+
HANDLE mapping = CreateFileMapping(handle.file, NULL, PAGE_READONLY, 0, 0, NULL);
|
89
143
|
if (mapping == NULL) {
|
90
|
-
|
91
|
-
return
|
144
|
+
pm_string_file_handle_close(&handle);
|
145
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
92
146
|
}
|
93
147
|
|
94
148
|
// Map the file into memory.
|
95
149
|
uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
|
96
150
|
CloseHandle(mapping);
|
97
|
-
|
151
|
+
pm_string_file_handle_close(&handle);
|
98
152
|
|
99
153
|
if (source == NULL) {
|
100
|
-
return
|
154
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
101
155
|
}
|
102
156
|
|
103
157
|
*string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size };
|
104
|
-
return
|
105
|
-
#
|
158
|
+
return PM_STRING_INIT_SUCCESS;
|
159
|
+
#elif defined(_POSIX_MAPPED_FILES)
|
106
160
|
// Open the file for reading
|
107
161
|
int fd = open(filepath, O_RDONLY);
|
108
162
|
if (fd == -1) {
|
109
|
-
return
|
163
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
110
164
|
}
|
111
165
|
|
112
166
|
// Stat the file to get the file size
|
113
167
|
struct stat sb;
|
114
168
|
if (fstat(fd, &sb) == -1) {
|
115
169
|
close(fd);
|
116
|
-
return
|
170
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
171
|
+
}
|
172
|
+
|
173
|
+
// Ensure it is a file and not a directory
|
174
|
+
if (S_ISDIR(sb.st_mode)) {
|
175
|
+
close(fd);
|
176
|
+
return PM_STRING_INIT_ERROR_DIRECTORY;
|
117
177
|
}
|
118
178
|
|
119
179
|
// mmap the file descriptor to virtually get the contents
|
@@ -124,30 +184,128 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
|
124
184
|
close(fd);
|
125
185
|
const uint8_t source[] = "";
|
126
186
|
*string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
|
127
|
-
return
|
187
|
+
return PM_STRING_INIT_SUCCESS;
|
128
188
|
}
|
129
189
|
|
130
190
|
source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
|
131
191
|
if (source == MAP_FAILED) {
|
132
|
-
|
192
|
+
close(fd);
|
193
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
133
194
|
}
|
134
195
|
|
135
196
|
close(fd);
|
136
197
|
*string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size };
|
137
|
-
return
|
198
|
+
return PM_STRING_INIT_SUCCESS;
|
199
|
+
#else
|
200
|
+
return pm_string_file_init(string, filepath);
|
138
201
|
#endif
|
139
202
|
}
|
140
203
|
|
141
204
|
/**
|
142
|
-
*
|
205
|
+
* Read the file indicated by the filepath parameter into source and load its
|
206
|
+
* contents and size into the given `pm_string_t`. The given `pm_string_t`
|
207
|
+
* should be freed using `pm_string_free` when it is no longer used.
|
143
208
|
*/
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
209
|
+
PRISM_EXPORTED_FUNCTION pm_string_init_result_t
|
210
|
+
pm_string_file_init(pm_string_t *string, const char *filepath) {
|
211
|
+
#ifdef _WIN32
|
212
|
+
// Open the file for reading.
|
213
|
+
pm_string_file_handle_t handle;
|
214
|
+
pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
|
215
|
+
if (result != PM_STRING_INIT_SUCCESS) return result;
|
216
|
+
|
217
|
+
// Get the file size.
|
218
|
+
DWORD file_size = GetFileSize(handle.file, NULL);
|
219
|
+
if (file_size == INVALID_FILE_SIZE) {
|
220
|
+
pm_string_file_handle_close(&handle);
|
221
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
222
|
+
}
|
223
|
+
|
224
|
+
// If the file is empty, then we don't need to do anything else, we'll set
|
225
|
+
// the source to a constant empty string and return.
|
226
|
+
if (file_size == 0) {
|
227
|
+
pm_string_file_handle_close(&handle);
|
228
|
+
const uint8_t source[] = "";
|
229
|
+
*string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
|
230
|
+
return PM_STRING_INIT_SUCCESS;
|
231
|
+
}
|
232
|
+
|
233
|
+
// Create a buffer to read the file into.
|
234
|
+
uint8_t *source = xmalloc(file_size);
|
235
|
+
if (source == NULL) {
|
236
|
+
pm_string_file_handle_close(&handle);
|
237
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
238
|
+
}
|
239
|
+
|
240
|
+
// Read the contents of the file
|
241
|
+
DWORD bytes_read;
|
242
|
+
if (!ReadFile(handle.file, source, file_size, &bytes_read, NULL)) {
|
243
|
+
pm_string_file_handle_close(&handle);
|
244
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
245
|
+
}
|
246
|
+
|
247
|
+
// Check the number of bytes read
|
248
|
+
if (bytes_read != file_size) {
|
249
|
+
xfree(source);
|
250
|
+
pm_string_file_handle_close(&handle);
|
251
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
252
|
+
}
|
253
|
+
|
254
|
+
pm_string_file_handle_close(&handle);
|
255
|
+
*string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = (size_t) file_size };
|
256
|
+
return PM_STRING_INIT_SUCCESS;
|
257
|
+
#elif defined(PRISM_HAS_FILESYSTEM)
|
258
|
+
// Open the file for reading
|
259
|
+
int fd = open(filepath, O_RDONLY);
|
260
|
+
if (fd == -1) {
|
261
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
262
|
+
}
|
263
|
+
|
264
|
+
// Stat the file to get the file size
|
265
|
+
struct stat sb;
|
266
|
+
if (fstat(fd, &sb) == -1) {
|
267
|
+
close(fd);
|
268
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
269
|
+
}
|
270
|
+
|
271
|
+
// Ensure it is a file and not a directory
|
272
|
+
if (S_ISDIR(sb.st_mode)) {
|
273
|
+
close(fd);
|
274
|
+
return PM_STRING_INIT_ERROR_DIRECTORY;
|
275
|
+
}
|
276
|
+
|
277
|
+
// Check the size to see if it's empty
|
278
|
+
size_t size = (size_t) sb.st_size;
|
279
|
+
if (size == 0) {
|
280
|
+
close(fd);
|
281
|
+
const uint8_t source[] = "";
|
282
|
+
*string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
|
283
|
+
return PM_STRING_INIT_SUCCESS;
|
284
|
+
}
|
285
|
+
|
286
|
+
size_t length = (size_t) size;
|
287
|
+
uint8_t *source = xmalloc(length);
|
288
|
+
if (source == NULL) {
|
289
|
+
close(fd);
|
290
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
291
|
+
}
|
292
|
+
|
293
|
+
long bytes_read = (long) read(fd, source, length);
|
294
|
+
close(fd);
|
295
|
+
|
296
|
+
if (bytes_read == -1) {
|
297
|
+
xfree(source);
|
298
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
149
299
|
}
|
150
|
-
|
300
|
+
|
301
|
+
*string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length };
|
302
|
+
return PM_STRING_INIT_SUCCESS;
|
303
|
+
#else
|
304
|
+
(void) string;
|
305
|
+
(void) filepath;
|
306
|
+
perror("pm_string_file_init is not implemented for this platform");
|
307
|
+
return PM_STRING_INIT_ERROR_GENERIC;
|
308
|
+
#endif
|
151
309
|
}
|
152
310
|
|
153
311
|
/**
|
@@ -161,13 +319,33 @@ pm_string_ensure_owned(pm_string_t *string) {
|
|
161
319
|
size_t length = pm_string_length(string);
|
162
320
|
const uint8_t *source = pm_string_source(string);
|
163
321
|
|
164
|
-
uint8_t *memory =
|
322
|
+
uint8_t *memory = xmalloc(length);
|
165
323
|
if (!memory) return;
|
166
324
|
|
167
325
|
pm_string_owned_init(string, memory, length);
|
168
326
|
memcpy((void *) string->source, source, length);
|
169
327
|
}
|
170
328
|
|
329
|
+
/**
|
330
|
+
* Compare the underlying lengths and bytes of two strings. Returns 0 if the
|
331
|
+
* strings are equal, a negative number if the left string is less than the
|
332
|
+
* right string, and a positive number if the left string is greater than the
|
333
|
+
* right string.
|
334
|
+
*/
|
335
|
+
int
|
336
|
+
pm_string_compare(const pm_string_t *left, const pm_string_t *right) {
|
337
|
+
size_t left_length = pm_string_length(left);
|
338
|
+
size_t right_length = pm_string_length(right);
|
339
|
+
|
340
|
+
if (left_length < right_length) {
|
341
|
+
return -1;
|
342
|
+
} else if (left_length > right_length) {
|
343
|
+
return 1;
|
344
|
+
}
|
345
|
+
|
346
|
+
return memcmp(pm_string_source(left), pm_string_source(right), left_length);
|
347
|
+
}
|
348
|
+
|
171
349
|
/**
|
172
350
|
* Returns the length associated with the string.
|
173
351
|
*/
|
@@ -192,12 +370,14 @@ pm_string_free(pm_string_t *string) {
|
|
192
370
|
void *memory = (void *) string->source;
|
193
371
|
|
194
372
|
if (string->type == PM_STRING_OWNED) {
|
195
|
-
|
373
|
+
xfree(memory);
|
374
|
+
#ifdef PRISM_HAS_MMAP
|
196
375
|
} else if (string->type == PM_STRING_MAPPED && string->length) {
|
197
376
|
#if defined(_WIN32)
|
198
377
|
UnmapViewOfFile(memory);
|
199
|
-
#
|
378
|
+
#elif defined(_POSIX_MAPPED_FILES)
|
200
379
|
munmap(memory, string->length);
|
201
380
|
#endif
|
381
|
+
#endif /* PRISM_HAS_MMAP */
|
202
382
|
}
|
203
383
|
}
|
data/src/util/pm_strncasecmp.c
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
#include "prism/util/pm_strncasecmp.h"
|
2
2
|
|
3
|
+
/**
|
4
|
+
* A locale-insensitive version of `tolower(3)`
|
5
|
+
*/
|
6
|
+
static inline int
|
7
|
+
pm_tolower(int c)
|
8
|
+
{
|
9
|
+
if ('A' <= c && c <= 'Z') {
|
10
|
+
return c | 0x20;
|
11
|
+
}
|
12
|
+
return c;
|
13
|
+
}
|
14
|
+
|
3
15
|
/**
|
4
16
|
* Compare two strings, ignoring case, up to the given length. Returns 0 if the
|
5
17
|
* strings are equal, a negative number if string1 is less than string2, or a
|
@@ -16,7 +28,7 @@ pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
|
|
16
28
|
|
17
29
|
while (offset < length && string1[offset] != '\0') {
|
18
30
|
if (string2[offset] == '\0') return string1[offset];
|
19
|
-
if ((difference =
|
31
|
+
if ((difference = pm_tolower(string1[offset]) - pm_tolower(string2[offset])) != 0) return difference;
|
20
32
|
offset++;
|
21
33
|
}
|
22
34
|
|
data/src/util/pm_strpbrk.c
CHANGED
@@ -8,6 +8,27 @@ pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start
|
|
8
8
|
pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
|
9
9
|
}
|
10
10
|
|
11
|
+
/**
|
12
|
+
* Set the explicit encoding for the parser to the current encoding.
|
13
|
+
*/
|
14
|
+
static inline void
|
15
|
+
pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) {
|
16
|
+
if (parser->explicit_encoding != NULL) {
|
17
|
+
if (parser->explicit_encoding == parser->encoding) {
|
18
|
+
// Okay, we already locked to this encoding.
|
19
|
+
} else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
20
|
+
// Not okay, we already found a Unicode escape sequence and this
|
21
|
+
// conflicts.
|
22
|
+
pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name);
|
23
|
+
} else {
|
24
|
+
// Should not be anything else.
|
25
|
+
assert(false && "unreachable");
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
parser->explicit_encoding = parser->encoding;
|
30
|
+
}
|
31
|
+
|
11
32
|
/**
|
12
33
|
* This is the default path.
|
13
34
|
*/
|
@@ -52,7 +73,7 @@ pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *chars
|
|
52
73
|
* This is the path when the encoding is ASCII-8BIT.
|
53
74
|
*/
|
54
75
|
static inline const uint8_t *
|
55
|
-
pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maximum) {
|
76
|
+
pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
56
77
|
size_t index = 0;
|
57
78
|
|
58
79
|
while (index < maximum) {
|
@@ -60,6 +81,7 @@ pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maxi
|
|
60
81
|
return source + index;
|
61
82
|
}
|
62
83
|
|
84
|
+
if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1);
|
63
85
|
index++;
|
64
86
|
}
|
65
87
|
|
@@ -72,6 +94,7 @@ pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maxi
|
|
72
94
|
static inline const uint8_t *
|
73
95
|
pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
74
96
|
size_t index = 0;
|
97
|
+
const pm_encoding_t *encoding = parser->encoding;
|
75
98
|
|
76
99
|
while (index < maximum) {
|
77
100
|
if (strchr((const char *) charset, source[index]) != NULL) {
|
@@ -81,7 +104,8 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
81
104
|
if (source[index] < 0x80) {
|
82
105
|
index++;
|
83
106
|
} else {
|
84
|
-
size_t width =
|
107
|
+
size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
|
108
|
+
if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width);
|
85
109
|
|
86
110
|
if (width > 0) {
|
87
111
|
index += width;
|
@@ -96,7 +120,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
96
120
|
|
97
121
|
do {
|
98
122
|
index++;
|
99
|
-
} while (index < maximum &&
|
123
|
+
} while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
100
124
|
|
101
125
|
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
102
126
|
}
|
@@ -113,6 +137,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
113
137
|
static inline const uint8_t *
|
114
138
|
pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
115
139
|
size_t index = 0;
|
140
|
+
const pm_encoding_t *encoding = parser->encoding;
|
116
141
|
|
117
142
|
while (index < maximum) {
|
118
143
|
if (strchr((const char *) charset, source[index]) != NULL) {
|
@@ -122,7 +147,8 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
122
147
|
if (source[index] < 0x80 || !validate) {
|
123
148
|
index++;
|
124
149
|
} else {
|
125
|
-
size_t width =
|
150
|
+
size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
|
151
|
+
pm_strpbrk_explicit_encoding_set(parser, source, width);
|
126
152
|
|
127
153
|
if (width > 0) {
|
128
154
|
index += width;
|
@@ -135,7 +161,7 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
135
161
|
|
136
162
|
do {
|
137
163
|
index++;
|
138
|
-
} while (index < maximum &&
|
164
|
+
} while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
139
165
|
|
140
166
|
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
141
167
|
}
|
@@ -171,7 +197,7 @@ pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, p
|
|
171
197
|
} else if (!parser->encoding_changed) {
|
172
198
|
return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
|
173
199
|
} else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
|
174
|
-
return pm_strpbrk_ascii_8bit(source, charset, (size_t) length);
|
200
|
+
return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate);
|
175
201
|
} else if (parser->encoding->multibyte) {
|
176
202
|
return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
|
177
203
|
} else {
|