jruby-prism-parser 0.23.0.pre.SNAPSHOT-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +401 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +62 -0
- data/LICENSE.md +7 -0
- data/Makefile +101 -0
- data/README.md +98 -0
- data/config.yml +2902 -0
- data/docs/build_system.md +91 -0
- data/docs/configuration.md +64 -0
- data/docs/cruby_compilation.md +27 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +121 -0
- data/docs/fuzzing.md +88 -0
- data/docs/heredocs.md +36 -0
- data/docs/javascript.md +118 -0
- data/docs/local_variable_depth.md +229 -0
- data/docs/mapping.md +117 -0
- data/docs/parser_translation.md +34 -0
- data/docs/parsing_rules.md +19 -0
- data/docs/releasing.md +98 -0
- data/docs/ripper.md +36 -0
- data/docs/ruby_api.md +43 -0
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +209 -0
- data/docs/testing.md +55 -0
- data/ext/prism/api_node.c +5098 -0
- data/ext/prism/api_pack.c +267 -0
- data/ext/prism/extconf.rb +110 -0
- data/ext/prism/extension.c +1155 -0
- data/ext/prism/extension.h +18 -0
- data/include/prism/ast.h +5807 -0
- data/include/prism/defines.h +102 -0
- data/include/prism/diagnostic.h +339 -0
- data/include/prism/encoding.h +265 -0
- data/include/prism/node.h +57 -0
- data/include/prism/options.h +230 -0
- data/include/prism/pack.h +152 -0
- data/include/prism/parser.h +732 -0
- data/include/prism/prettyprint.h +26 -0
- data/include/prism/regexp.h +33 -0
- data/include/prism/util/pm_buffer.h +155 -0
- data/include/prism/util/pm_char.h +205 -0
- data/include/prism/util/pm_constant_pool.h +209 -0
- data/include/prism/util/pm_list.h +97 -0
- data/include/prism/util/pm_memchr.h +29 -0
- data/include/prism/util/pm_newline_list.h +93 -0
- data/include/prism/util/pm_state_stack.h +42 -0
- data/include/prism/util/pm_string.h +150 -0
- data/include/prism/util/pm_string_list.h +44 -0
- data/include/prism/util/pm_strncasecmp.h +32 -0
- data/include/prism/util/pm_strpbrk.h +46 -0
- data/include/prism/version.h +29 -0
- data/include/prism.h +289 -0
- data/jruby-prism.jar +0 -0
- data/lib/prism/compiler.rb +486 -0
- data/lib/prism/debug.rb +206 -0
- data/lib/prism/desugar_compiler.rb +207 -0
- data/lib/prism/dispatcher.rb +2150 -0
- data/lib/prism/dot_visitor.rb +4634 -0
- data/lib/prism/dsl.rb +785 -0
- data/lib/prism/ffi.rb +346 -0
- data/lib/prism/lex_compat.rb +908 -0
- data/lib/prism/mutation_compiler.rb +753 -0
- data/lib/prism/node.rb +17864 -0
- data/lib/prism/node_ext.rb +212 -0
- data/lib/prism/node_inspector.rb +68 -0
- data/lib/prism/pack.rb +224 -0
- data/lib/prism/parse_result/comments.rb +177 -0
- data/lib/prism/parse_result/newlines.rb +64 -0
- data/lib/prism/parse_result.rb +498 -0
- data/lib/prism/pattern.rb +250 -0
- data/lib/prism/serialize.rb +1354 -0
- data/lib/prism/translation/parser/compiler.rb +1838 -0
- data/lib/prism/translation/parser/lexer.rb +335 -0
- data/lib/prism/translation/parser/rubocop.rb +37 -0
- data/lib/prism/translation/parser.rb +178 -0
- data/lib/prism/translation/ripper.rb +577 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +11 -0
- data/lib/prism/version.rb +3 -0
- data/lib/prism/visitor.rb +495 -0
- data/lib/prism.rb +99 -0
- data/prism.gemspec +135 -0
- data/rbi/prism.rbi +7767 -0
- data/rbi/prism_static.rbi +207 -0
- data/sig/prism.rbs +4773 -0
- data/sig/prism_static.rbs +201 -0
- data/src/diagnostic.c +400 -0
- data/src/encoding.c +5132 -0
- data/src/node.c +2786 -0
- data/src/options.c +213 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +8881 -0
- data/src/prism.c +18406 -0
- data/src/regexp.c +638 -0
- data/src/serialize.c +1554 -0
- data/src/token_type.c +700 -0
- data/src/util/pm_buffer.c +190 -0
- data/src/util/pm_char.c +318 -0
- data/src/util/pm_constant_pool.c +322 -0
- data/src/util/pm_list.c +49 -0
- data/src/util/pm_memchr.c +35 -0
- data/src/util/pm_newline_list.c +84 -0
- data/src/util/pm_state_stack.c +25 -0
- data/src/util/pm_string.c +203 -0
- data/src/util/pm_string_list.c +28 -0
- data/src/util/pm_strncasecmp.c +24 -0
- data/src/util/pm_strpbrk.c +180 -0
- metadata +156 -0
@@ -0,0 +1,322 @@
|
|
1
|
+
#include "prism/util/pm_constant_pool.h"
|
2
|
+
|
3
|
+
/**
|
4
|
+
* Initialize a list of constant ids.
|
5
|
+
*/
|
6
|
+
void
|
7
|
+
pm_constant_id_list_init(pm_constant_id_list_t *list) {
|
8
|
+
list->ids = NULL;
|
9
|
+
list->size = 0;
|
10
|
+
list->capacity = 0;
|
11
|
+
}
|
12
|
+
|
13
|
+
/**
|
14
|
+
* Append a constant id to a list of constant ids. Returns false if any
|
15
|
+
* potential reallocations fail.
|
16
|
+
*/
|
17
|
+
bool
|
18
|
+
pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
|
19
|
+
if (list->size >= list->capacity) {
|
20
|
+
list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
|
21
|
+
list->ids = (pm_constant_id_t *) realloc(list->ids, sizeof(pm_constant_id_t) * list->capacity);
|
22
|
+
if (list->ids == NULL) return false;
|
23
|
+
}
|
24
|
+
|
25
|
+
list->ids[list->size++] = id;
|
26
|
+
return true;
|
27
|
+
}
|
28
|
+
|
29
|
+
/**
|
30
|
+
* Checks if the current constant id list includes the given constant id.
|
31
|
+
*/
|
32
|
+
bool
|
33
|
+
pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
|
34
|
+
for (size_t index = 0; index < list->size; index++) {
|
35
|
+
if (list->ids[index] == id) return true;
|
36
|
+
}
|
37
|
+
return false;
|
38
|
+
}
|
39
|
+
|
40
|
+
/**
|
41
|
+
* Get the memory size of a list of constant ids.
|
42
|
+
*/
|
43
|
+
size_t
|
44
|
+
pm_constant_id_list_memsize(pm_constant_id_list_t *list) {
|
45
|
+
return sizeof(pm_constant_id_list_t) + (list->capacity * sizeof(pm_constant_id_t));
|
46
|
+
}
|
47
|
+
|
48
|
+
/**
|
49
|
+
* Free the memory associated with a list of constant ids.
|
50
|
+
*/
|
51
|
+
void
|
52
|
+
pm_constant_id_list_free(pm_constant_id_list_t *list) {
|
53
|
+
if (list->ids != NULL) {
|
54
|
+
free(list->ids);
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
/**
|
59
|
+
* A relatively simple hash function (djb2) that is used to hash strings. We are
|
60
|
+
* optimizing here for simplicity and speed.
|
61
|
+
*/
|
62
|
+
static inline uint32_t
|
63
|
+
pm_constant_pool_hash(const uint8_t *start, size_t length) {
|
64
|
+
// This is a prime number used as the initial value for the hash function.
|
65
|
+
uint32_t value = 5381;
|
66
|
+
|
67
|
+
for (size_t index = 0; index < length; index++) {
|
68
|
+
value = ((value << 5) + value) + start[index];
|
69
|
+
}
|
70
|
+
|
71
|
+
return value;
|
72
|
+
}
|
73
|
+
|
74
|
+
/**
|
75
|
+
* https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
|
76
|
+
*/
|
77
|
+
static uint32_t
|
78
|
+
next_power_of_two(uint32_t v) {
|
79
|
+
// Avoid underflow in subtraction on next line.
|
80
|
+
if (v == 0) {
|
81
|
+
// 1 is the nearest power of 2 to 0 (2^0)
|
82
|
+
return 1;
|
83
|
+
}
|
84
|
+
v--;
|
85
|
+
v |= v >> 1;
|
86
|
+
v |= v >> 2;
|
87
|
+
v |= v >> 4;
|
88
|
+
v |= v >> 8;
|
89
|
+
v |= v >> 16;
|
90
|
+
v++;
|
91
|
+
return v;
|
92
|
+
}
|
93
|
+
|
94
|
+
#ifndef NDEBUG
|
95
|
+
static bool
|
96
|
+
is_power_of_two(uint32_t size) {
|
97
|
+
return (size & (size - 1)) == 0;
|
98
|
+
}
|
99
|
+
#endif
|
100
|
+
|
101
|
+
/**
|
102
|
+
* Resize a constant pool to a given capacity.
|
103
|
+
*/
|
104
|
+
static inline bool
|
105
|
+
pm_constant_pool_resize(pm_constant_pool_t *pool) {
|
106
|
+
assert(is_power_of_two(pool->capacity));
|
107
|
+
|
108
|
+
uint32_t next_capacity = pool->capacity * 2;
|
109
|
+
if (next_capacity < pool->capacity) return false;
|
110
|
+
|
111
|
+
const uint32_t mask = next_capacity - 1;
|
112
|
+
const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
|
113
|
+
|
114
|
+
void *next = calloc(next_capacity, element_size);
|
115
|
+
if (next == NULL) return false;
|
116
|
+
|
117
|
+
pm_constant_pool_bucket_t *next_buckets = next;
|
118
|
+
pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t));
|
119
|
+
|
120
|
+
// For each bucket in the current constant pool, find the index in the
|
121
|
+
// next constant pool, and insert it.
|
122
|
+
for (uint32_t index = 0; index < pool->capacity; index++) {
|
123
|
+
pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
|
124
|
+
|
125
|
+
// If an id is set on this constant, then we know we have content here.
|
126
|
+
// In this case we need to insert it into the next constant pool.
|
127
|
+
if (bucket->id != PM_CONSTANT_ID_UNSET) {
|
128
|
+
uint32_t next_index = bucket->hash & mask;
|
129
|
+
|
130
|
+
// This implements linear scanning to find the next available slot
|
131
|
+
// in case this index is already taken. We don't need to bother
|
132
|
+
// comparing the values since we know that the hash is unique.
|
133
|
+
while (next_buckets[next_index].id != PM_CONSTANT_ID_UNSET) {
|
134
|
+
next_index = (next_index + 1) & mask;
|
135
|
+
}
|
136
|
+
|
137
|
+
// Here we copy over the entire bucket, which includes the id so
|
138
|
+
// that they are consistent between resizes.
|
139
|
+
next_buckets[next_index] = *bucket;
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
143
|
+
// The constants are stable with respect to hash table resizes.
|
144
|
+
memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t));
|
145
|
+
|
146
|
+
// pool->constants and pool->buckets are allocated out of the same chunk
|
147
|
+
// of memory, with the buckets coming first.
|
148
|
+
free(pool->buckets);
|
149
|
+
pool->constants = next_constants;
|
150
|
+
pool->buckets = next_buckets;
|
151
|
+
pool->capacity = next_capacity;
|
152
|
+
return true;
|
153
|
+
}
|
154
|
+
|
155
|
+
/**
|
156
|
+
* Initialize a new constant pool with a given capacity.
|
157
|
+
*/
|
158
|
+
bool
|
159
|
+
pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
|
160
|
+
const uint32_t maximum = (~((uint32_t) 0));
|
161
|
+
if (capacity >= ((maximum / 2) + 1)) return false;
|
162
|
+
|
163
|
+
capacity = next_power_of_two(capacity);
|
164
|
+
const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
|
165
|
+
void *memory = calloc(capacity, element_size);
|
166
|
+
if (memory == NULL) return false;
|
167
|
+
|
168
|
+
pool->buckets = memory;
|
169
|
+
pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t));
|
170
|
+
pool->size = 0;
|
171
|
+
pool->capacity = capacity;
|
172
|
+
return true;
|
173
|
+
}
|
174
|
+
|
175
|
+
/**
|
176
|
+
* Return a pointer to the constant indicated by the given constant id.
|
177
|
+
*/
|
178
|
+
pm_constant_t *
|
179
|
+
pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
|
180
|
+
assert(constant_id != PM_CONSTANT_ID_UNSET && constant_id <= pool->size);
|
181
|
+
return &pool->constants[constant_id - 1];
|
182
|
+
}
|
183
|
+
|
184
|
+
/**
|
185
|
+
* Find a constant in a constant pool. Returns the id of the constant, or 0 if
|
186
|
+
* the constant is not found.
|
187
|
+
*/
|
188
|
+
pm_constant_id_t
|
189
|
+
pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
190
|
+
assert(is_power_of_two(pool->capacity));
|
191
|
+
const uint32_t mask = pool->capacity - 1;
|
192
|
+
|
193
|
+
uint32_t hash = pm_constant_pool_hash(start, length);
|
194
|
+
uint32_t index = hash & mask;
|
195
|
+
pm_constant_pool_bucket_t *bucket;
|
196
|
+
|
197
|
+
while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
|
198
|
+
pm_constant_t *constant = &pool->constants[bucket->id - 1];
|
199
|
+
if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
|
200
|
+
return bucket->id;
|
201
|
+
}
|
202
|
+
|
203
|
+
index = (index + 1) & mask;
|
204
|
+
}
|
205
|
+
|
206
|
+
return PM_CONSTANT_ID_UNSET;
|
207
|
+
}
|
208
|
+
|
209
|
+
/**
|
210
|
+
* Insert a constant into a constant pool and return its index in the pool.
|
211
|
+
*/
|
212
|
+
static inline pm_constant_id_t
|
213
|
+
pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
|
214
|
+
if (pool->size >= (pool->capacity / 4 * 3)) {
|
215
|
+
if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET;
|
216
|
+
}
|
217
|
+
|
218
|
+
assert(is_power_of_two(pool->capacity));
|
219
|
+
const uint32_t mask = pool->capacity - 1;
|
220
|
+
|
221
|
+
uint32_t hash = pm_constant_pool_hash(start, length);
|
222
|
+
uint32_t index = hash & mask;
|
223
|
+
pm_constant_pool_bucket_t *bucket;
|
224
|
+
|
225
|
+
while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
|
226
|
+
// If there is a collision, then we need to check if the content is the
|
227
|
+
// same as the content we are trying to insert. If it is, then we can
|
228
|
+
// return the id of the existing constant.
|
229
|
+
pm_constant_t *constant = &pool->constants[bucket->id - 1];
|
230
|
+
|
231
|
+
if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
|
232
|
+
// Since we have found a match, we need to check if this is
|
233
|
+
// attempting to insert a shared or an owned constant. We want to
|
234
|
+
// prefer shared constants since they don't require allocations.
|
235
|
+
if (type == PM_CONSTANT_POOL_BUCKET_OWNED) {
|
236
|
+
// If we're attempting to insert an owned constant and we have
|
237
|
+
// an existing constant, then either way we don't want the given
|
238
|
+
// memory. Either it's duplicated with the existing constant or
|
239
|
+
// it's not necessary because we have a shared version.
|
240
|
+
free((void *) start);
|
241
|
+
} else if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
|
242
|
+
// If we're attempting to insert a shared constant and the
|
243
|
+
// existing constant is owned, then we can free the owned
|
244
|
+
// constant and replace it with the shared constant.
|
245
|
+
free((void *) constant->start);
|
246
|
+
constant->start = start;
|
247
|
+
bucket->type = (unsigned int) (PM_CONSTANT_POOL_BUCKET_DEFAULT & 0x3);
|
248
|
+
}
|
249
|
+
|
250
|
+
return bucket->id;
|
251
|
+
}
|
252
|
+
|
253
|
+
index = (index + 1) & mask;
|
254
|
+
}
|
255
|
+
|
256
|
+
// IDs are allocated starting at 1, since the value 0 denotes a non-existant
|
257
|
+
// constant.
|
258
|
+
uint32_t id = ++pool->size;
|
259
|
+
assert(pool->size < ((uint32_t) (1 << 30)));
|
260
|
+
|
261
|
+
*bucket = (pm_constant_pool_bucket_t) {
|
262
|
+
.id = (unsigned int) (id & 0x3fffffff),
|
263
|
+
.type = (unsigned int) (type & 0x3),
|
264
|
+
.hash = hash
|
265
|
+
};
|
266
|
+
|
267
|
+
pool->constants[id - 1] = (pm_constant_t) {
|
268
|
+
.start = start,
|
269
|
+
.length = length,
|
270
|
+
};
|
271
|
+
|
272
|
+
return id;
|
273
|
+
}
|
274
|
+
|
275
|
+
/**
|
276
|
+
* Insert a constant into a constant pool. Returns the id of the constant, or
|
277
|
+
* PM_CONSTANT_ID_UNSET if any potential calls to resize fail.
|
278
|
+
*/
|
279
|
+
pm_constant_id_t
|
280
|
+
pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
281
|
+
return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT);
|
282
|
+
}
|
283
|
+
|
284
|
+
/**
|
285
|
+
* Insert a constant into a constant pool from memory that is now owned by the
|
286
|
+
* constant pool. Returns the id of the constant, or PM_CONSTANT_ID_UNSET if any
|
287
|
+
* potential calls to resize fail.
|
288
|
+
*/
|
289
|
+
pm_constant_id_t
|
290
|
+
pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
291
|
+
return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED);
|
292
|
+
}
|
293
|
+
|
294
|
+
/**
|
295
|
+
* Insert a constant into a constant pool from memory that is constant. Returns
|
296
|
+
* the id of the constant, or PM_CONSTANT_ID_UNSET if any potential calls to
|
297
|
+
* resize fail.
|
298
|
+
*/
|
299
|
+
pm_constant_id_t
|
300
|
+
pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
301
|
+
return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT);
|
302
|
+
}
|
303
|
+
|
304
|
+
/**
|
305
|
+
* Free the memory associated with a constant pool.
|
306
|
+
*/
|
307
|
+
void
|
308
|
+
pm_constant_pool_free(pm_constant_pool_t *pool) {
|
309
|
+
// For each constant in the current constant pool, free the contents if the
|
310
|
+
// contents are owned.
|
311
|
+
for (uint32_t index = 0; index < pool->capacity; index++) {
|
312
|
+
pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
|
313
|
+
|
314
|
+
// If an id is set on this constant, then we know we have content here.
|
315
|
+
if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
|
316
|
+
pm_constant_t *constant = &pool->constants[bucket->id - 1];
|
317
|
+
free((void *) constant->start);
|
318
|
+
}
|
319
|
+
}
|
320
|
+
|
321
|
+
free(pool->buckets);
|
322
|
+
}
|
data/src/util/pm_list.c
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
#include "prism/util/pm_list.h"
|
2
|
+
|
3
|
+
/**
|
4
|
+
* Returns true if the given list is empty.
|
5
|
+
*/
|
6
|
+
PRISM_EXPORTED_FUNCTION bool
|
7
|
+
pm_list_empty_p(pm_list_t *list) {
|
8
|
+
return list->head == NULL;
|
9
|
+
}
|
10
|
+
|
11
|
+
/**
|
12
|
+
* Returns the size of the list.
|
13
|
+
*/
|
14
|
+
PRISM_EXPORTED_FUNCTION size_t
|
15
|
+
pm_list_size(pm_list_t *list) {
|
16
|
+
return list->size;
|
17
|
+
}
|
18
|
+
|
19
|
+
/**
|
20
|
+
* Append a node to the given list.
|
21
|
+
*/
|
22
|
+
void
|
23
|
+
pm_list_append(pm_list_t *list, pm_list_node_t *node) {
|
24
|
+
if (list->head == NULL) {
|
25
|
+
list->head = node;
|
26
|
+
} else {
|
27
|
+
list->tail->next = node;
|
28
|
+
}
|
29
|
+
|
30
|
+
list->tail = node;
|
31
|
+
list->size++;
|
32
|
+
}
|
33
|
+
|
34
|
+
/**
|
35
|
+
* Deallocate the internal state of the given list.
|
36
|
+
*/
|
37
|
+
PRISM_EXPORTED_FUNCTION void
|
38
|
+
pm_list_free(pm_list_t *list) {
|
39
|
+
pm_list_node_t *node = list->head;
|
40
|
+
pm_list_node_t *next;
|
41
|
+
|
42
|
+
while (node != NULL) {
|
43
|
+
next = node->next;
|
44
|
+
free(node);
|
45
|
+
node = next;
|
46
|
+
}
|
47
|
+
|
48
|
+
list->size = 0;
|
49
|
+
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#include "prism/util/pm_memchr.h"
|
2
|
+
|
3
|
+
#define PRISM_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
|
4
|
+
|
5
|
+
/**
|
6
|
+
* We need to roll our own memchr to handle cases where the encoding changes and
|
7
|
+
* we need to search for a character in a buffer that could be the trailing byte
|
8
|
+
* of a multibyte character.
|
9
|
+
*/
|
10
|
+
void *
|
11
|
+
pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding) {
|
12
|
+
if (encoding_changed && encoding->multibyte && character >= PRISM_MEMCHR_TRAILING_BYTE_MINIMUM) {
|
13
|
+
const uint8_t *source = (const uint8_t *) memory;
|
14
|
+
size_t index = 0;
|
15
|
+
|
16
|
+
while (index < number) {
|
17
|
+
if (source[index] == character) {
|
18
|
+
return (void *) (source + index);
|
19
|
+
}
|
20
|
+
|
21
|
+
size_t width = encoding->char_width(source + index, (ptrdiff_t) (number - index));
|
22
|
+
if (width == 0) {
|
23
|
+
return NULL;
|
24
|
+
}
|
25
|
+
|
26
|
+
index += width;
|
27
|
+
}
|
28
|
+
|
29
|
+
return NULL;
|
30
|
+
} else {
|
31
|
+
return memchr(memory, character, number);
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
#undef PRISM_MEMCHR_TRAILING_BYTE_MINIMUM
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#include "prism/util/pm_newline_list.h"
|
2
|
+
|
3
|
+
/**
|
4
|
+
* Initialize a new newline list with the given capacity. Returns true if the
|
5
|
+
* allocation of the offsets succeeds, otherwise returns false.
|
6
|
+
*/
|
7
|
+
bool
|
8
|
+
pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
|
9
|
+
list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
|
10
|
+
if (list->offsets == NULL) return false;
|
11
|
+
|
12
|
+
list->start = start;
|
13
|
+
|
14
|
+
// This is 1 instead of 0 because we want to include the first line of the
|
15
|
+
// file as having offset 0, which is set because of calloc.
|
16
|
+
list->size = 1;
|
17
|
+
list->capacity = capacity;
|
18
|
+
|
19
|
+
return true;
|
20
|
+
}
|
21
|
+
|
22
|
+
/**
|
23
|
+
* Append a new offset to the newline list. Returns true if the reallocation of
|
24
|
+
* the offsets succeeds (if one was necessary), otherwise returns false.
|
25
|
+
*/
|
26
|
+
bool
|
27
|
+
pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
|
28
|
+
if (list->size == list->capacity) {
|
29
|
+
size_t *original_offsets = list->offsets;
|
30
|
+
|
31
|
+
list->capacity = (list->capacity * 3) / 2;
|
32
|
+
list->offsets = (size_t *) calloc(list->capacity, sizeof(size_t));
|
33
|
+
memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
|
34
|
+
free(original_offsets);
|
35
|
+
if (list->offsets == NULL) return false;
|
36
|
+
}
|
37
|
+
|
38
|
+
assert(*cursor == '\n');
|
39
|
+
assert(cursor >= list->start);
|
40
|
+
size_t newline_offset = (size_t) (cursor - list->start + 1);
|
41
|
+
|
42
|
+
assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
|
43
|
+
list->offsets[list->size++] = newline_offset;
|
44
|
+
|
45
|
+
return true;
|
46
|
+
}
|
47
|
+
|
48
|
+
/**
|
49
|
+
* Returns the line and column of the given offset. If the offset is not in the
|
50
|
+
* list, the line and column of the closest offset less than the given offset
|
51
|
+
* are returned.
|
52
|
+
*/
|
53
|
+
pm_line_column_t
|
54
|
+
pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor) {
|
55
|
+
assert(cursor >= list->start);
|
56
|
+
size_t offset = (size_t) (cursor - list->start);
|
57
|
+
|
58
|
+
size_t left = 0;
|
59
|
+
size_t right = list->size - 1;
|
60
|
+
|
61
|
+
while (left <= right) {
|
62
|
+
size_t mid = left + (right - left) / 2;
|
63
|
+
|
64
|
+
if (list->offsets[mid] == offset) {
|
65
|
+
return ((pm_line_column_t) { mid + 1, 0 });
|
66
|
+
}
|
67
|
+
|
68
|
+
if (list->offsets[mid] < offset) {
|
69
|
+
left = mid + 1;
|
70
|
+
} else {
|
71
|
+
right = mid - 1;
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
return ((pm_line_column_t) { left, offset - list->offsets[left - 1] });
|
76
|
+
}
|
77
|
+
|
78
|
+
/**
|
79
|
+
* Free the internal memory allocated for the newline list.
|
80
|
+
*/
|
81
|
+
void
|
82
|
+
pm_newline_list_free(pm_newline_list_t *list) {
|
83
|
+
free(list->offsets);
|
84
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#include "prism/util/pm_state_stack.h"
|
2
|
+
|
3
|
+
/**
|
4
|
+
* Pushes a value onto the stack.
|
5
|
+
*/
|
6
|
+
void
|
7
|
+
pm_state_stack_push(pm_state_stack_t *stack, bool value) {
|
8
|
+
*stack = (*stack << 1) | (value & 1);
|
9
|
+
}
|
10
|
+
|
11
|
+
/**
|
12
|
+
* Pops a value off the stack.
|
13
|
+
*/
|
14
|
+
void
|
15
|
+
pm_state_stack_pop(pm_state_stack_t *stack) {
|
16
|
+
*stack >>= 1;
|
17
|
+
}
|
18
|
+
|
19
|
+
/**
|
20
|
+
* Returns the value at the top of the stack.
|
21
|
+
*/
|
22
|
+
bool
|
23
|
+
pm_state_stack_p(pm_state_stack_t *stack) {
|
24
|
+
return *stack & 1;
|
25
|
+
}
|