prism 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +172 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +62 -0
- data/LICENSE.md +7 -0
- data/Makefile +84 -0
- data/README.md +89 -0
- data/config.yml +2481 -0
- data/docs/build_system.md +74 -0
- data/docs/building.md +22 -0
- data/docs/configuration.md +60 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +117 -0
- data/docs/fuzzing.md +93 -0
- data/docs/heredocs.md +36 -0
- data/docs/mapping.md +117 -0
- data/docs/ripper.md +36 -0
- data/docs/ruby_api.md +25 -0
- data/docs/serialization.md +181 -0
- data/docs/testing.md +55 -0
- data/ext/prism/api_node.c +4725 -0
- data/ext/prism/api_pack.c +256 -0
- data/ext/prism/extconf.rb +136 -0
- data/ext/prism/extension.c +626 -0
- data/ext/prism/extension.h +18 -0
- data/include/prism/ast.h +1932 -0
- data/include/prism/defines.h +45 -0
- data/include/prism/diagnostic.h +231 -0
- data/include/prism/enc/pm_encoding.h +95 -0
- data/include/prism/node.h +41 -0
- data/include/prism/pack.h +141 -0
- data/include/prism/parser.h +418 -0
- data/include/prism/regexp.h +19 -0
- data/include/prism/unescape.h +48 -0
- data/include/prism/util/pm_buffer.h +51 -0
- data/include/prism/util/pm_char.h +91 -0
- data/include/prism/util/pm_constant_pool.h +78 -0
- data/include/prism/util/pm_list.h +67 -0
- data/include/prism/util/pm_memchr.h +14 -0
- data/include/prism/util/pm_newline_list.h +61 -0
- data/include/prism/util/pm_state_stack.h +24 -0
- data/include/prism/util/pm_string.h +61 -0
- data/include/prism/util/pm_string_list.h +25 -0
- data/include/prism/util/pm_strpbrk.h +29 -0
- data/include/prism/version.h +4 -0
- data/include/prism.h +82 -0
- data/lib/prism/compiler.rb +465 -0
- data/lib/prism/debug.rb +157 -0
- data/lib/prism/desugar_compiler.rb +206 -0
- data/lib/prism/dispatcher.rb +2051 -0
- data/lib/prism/dsl.rb +750 -0
- data/lib/prism/ffi.rb +251 -0
- data/lib/prism/lex_compat.rb +838 -0
- data/lib/prism/mutation_compiler.rb +718 -0
- data/lib/prism/node.rb +14540 -0
- data/lib/prism/node_ext.rb +55 -0
- data/lib/prism/node_inspector.rb +68 -0
- data/lib/prism/pack.rb +185 -0
- data/lib/prism/parse_result/comments.rb +172 -0
- data/lib/prism/parse_result/newlines.rb +60 -0
- data/lib/prism/parse_result.rb +266 -0
- data/lib/prism/pattern.rb +239 -0
- data/lib/prism/ripper_compat.rb +174 -0
- data/lib/prism/serialize.rb +662 -0
- data/lib/prism/visitor.rb +470 -0
- data/lib/prism.rb +64 -0
- data/prism.gemspec +113 -0
- data/src/diagnostic.c +287 -0
- data/src/enc/pm_big5.c +52 -0
- data/src/enc/pm_euc_jp.c +58 -0
- data/src/enc/pm_gbk.c +61 -0
- data/src/enc/pm_shift_jis.c +56 -0
- data/src/enc/pm_tables.c +507 -0
- data/src/enc/pm_unicode.c +2324 -0
- data/src/enc/pm_windows_31j.c +56 -0
- data/src/node.c +2633 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +2136 -0
- data/src/prism.c +14587 -0
- data/src/regexp.c +580 -0
- data/src/serialize.c +1899 -0
- data/src/token_type.c +349 -0
- data/src/unescape.c +637 -0
- data/src/util/pm_buffer.c +103 -0
- data/src/util/pm_char.c +272 -0
- data/src/util/pm_constant_pool.c +252 -0
- data/src/util/pm_list.c +41 -0
- data/src/util/pm_memchr.c +33 -0
- data/src/util/pm_newline_list.c +134 -0
- data/src/util/pm_state_stack.c +19 -0
- data/src/util/pm_string.c +200 -0
- data/src/util/pm_string_list.c +29 -0
- data/src/util/pm_strncasecmp.c +17 -0
- data/src/util/pm_strpbrk.c +66 -0
- metadata +138 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
#include "prism/util/pm_memchr.h"
|
2
|
+
|
3
|
+
#define PRISM_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
|
4
|
+
|
5
|
+
// We need to roll our own memchr to handle cases where the encoding changes and
|
6
|
+
// we need to search for a character in a buffer that could be the trailing byte
|
7
|
+
// of a multibyte character.
|
8
|
+
void *
|
9
|
+
pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, pm_encoding_t *encoding) {
|
10
|
+
if (encoding_changed && encoding->multibyte && character >= PRISM_MEMCHR_TRAILING_BYTE_MINIMUM) {
|
11
|
+
const uint8_t *source = (const uint8_t *) memory;
|
12
|
+
size_t index = 0;
|
13
|
+
|
14
|
+
while (index < number) {
|
15
|
+
if (source[index] == character) {
|
16
|
+
return (void *) (source + index);
|
17
|
+
}
|
18
|
+
|
19
|
+
size_t width = encoding->char_width(source + index, (ptrdiff_t) (number - index));
|
20
|
+
if (width == 0) {
|
21
|
+
return NULL;
|
22
|
+
}
|
23
|
+
|
24
|
+
index += width;
|
25
|
+
}
|
26
|
+
|
27
|
+
return NULL;
|
28
|
+
} else {
|
29
|
+
return memchr(memory, character, number);
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
#undef PRISM_MEMCHR_TRAILING_BYTE_MINIMUM
|
@@ -0,0 +1,134 @@
|
|
1
|
+
#include "prism/util/pm_newline_list.h"
|
2
|
+
|
3
|
+
// Initialize a new newline list with the given capacity. Returns true if the
|
4
|
+
// allocation of the offsets succeeds, otherwise returns false.
|
5
|
+
bool
|
6
|
+
pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
|
7
|
+
list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
|
8
|
+
if (list->offsets == NULL) return false;
|
9
|
+
|
10
|
+
list->start = start;
|
11
|
+
|
12
|
+
// This is 1 instead of 0 because we want to include the first line of the
|
13
|
+
// file as having offset 0, which is set because of calloc.
|
14
|
+
list->size = 1;
|
15
|
+
list->capacity = capacity;
|
16
|
+
|
17
|
+
list->last_index = 0;
|
18
|
+
list->last_offset = 0;
|
19
|
+
|
20
|
+
return true;
|
21
|
+
}
|
22
|
+
|
23
|
+
// Append a new offset to the newline list. Returns true if the reallocation of
|
24
|
+
// the offsets succeeds (if one was necessary), otherwise returns false.
|
25
|
+
bool
|
26
|
+
pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
|
27
|
+
if (list->size == list->capacity) {
|
28
|
+
size_t *original_offsets = list->offsets;
|
29
|
+
|
30
|
+
list->capacity = (list->capacity * 3) / 2;
|
31
|
+
list->offsets = (size_t *) calloc(list->capacity, sizeof(size_t));
|
32
|
+
memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
|
33
|
+
free(original_offsets);
|
34
|
+
if (list->offsets == NULL) return false;
|
35
|
+
}
|
36
|
+
|
37
|
+
assert(*cursor == '\n');
|
38
|
+
assert(cursor >= list->start);
|
39
|
+
size_t newline_offset = (size_t) (cursor - list->start + 1);
|
40
|
+
|
41
|
+
assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
|
42
|
+
list->offsets[list->size++] = newline_offset;
|
43
|
+
|
44
|
+
return true;
|
45
|
+
}
|
46
|
+
|
47
|
+
// Conditionally append a new offset to the newline list, if the value passed in is a newline.
|
48
|
+
bool
|
49
|
+
pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) {
|
50
|
+
if (*cursor != '\n') {
|
51
|
+
return true;
|
52
|
+
}
|
53
|
+
return pm_newline_list_append(list, cursor);
|
54
|
+
}
|
55
|
+
|
56
|
+
// Returns the line and column of the given offset, assuming we don't have any
|
57
|
+
// information about the previous index that we found.
|
58
|
+
static pm_line_column_t
|
59
|
+
pm_newline_list_line_column_search(pm_newline_list_t *list, size_t offset) {
|
60
|
+
size_t left = 0;
|
61
|
+
size_t right = list->size - 1;
|
62
|
+
|
63
|
+
while (left <= right) {
|
64
|
+
size_t mid = left + (right - left) / 2;
|
65
|
+
|
66
|
+
if (list->offsets[mid] == offset) {
|
67
|
+
return ((pm_line_column_t) { mid, 0 });
|
68
|
+
}
|
69
|
+
|
70
|
+
if (list->offsets[mid] < offset) {
|
71
|
+
left = mid + 1;
|
72
|
+
} else {
|
73
|
+
right = mid - 1;
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
return ((pm_line_column_t) { left - 1, offset - list->offsets[left - 1] });
|
78
|
+
}
|
79
|
+
|
80
|
+
// Returns the line and column of the given offset, assuming we know the last
|
81
|
+
// index that we found.
|
82
|
+
static pm_line_column_t
|
83
|
+
pm_newline_list_line_column_scan(pm_newline_list_t *list, size_t offset) {
|
84
|
+
if (offset > list->last_offset) {
|
85
|
+
size_t index = list->last_index;
|
86
|
+
while (index < list->size && list->offsets[index] < offset) {
|
87
|
+
index++;
|
88
|
+
}
|
89
|
+
|
90
|
+
if (index == list->size) {
|
91
|
+
return ((pm_line_column_t) { index - 1, offset - list->offsets[index - 1] });
|
92
|
+
}
|
93
|
+
|
94
|
+
return ((pm_line_column_t) { index, 0 });
|
95
|
+
} else {
|
96
|
+
size_t index = list->last_index;
|
97
|
+
while (index > 0 && list->offsets[index] > offset) {
|
98
|
+
index--;
|
99
|
+
}
|
100
|
+
|
101
|
+
if (index == 0) {
|
102
|
+
return ((pm_line_column_t) { 0, offset });
|
103
|
+
}
|
104
|
+
|
105
|
+
return ((pm_line_column_t) { index, offset - list->offsets[index - 1] });
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
// Returns the line and column of the given offset. If the offset is not in the
|
110
|
+
// list, the line and column of the closest offset less than the given offset
|
111
|
+
// are returned.
|
112
|
+
pm_line_column_t
|
113
|
+
pm_newline_list_line_column(pm_newline_list_t *list, const uint8_t *cursor) {
|
114
|
+
assert(cursor >= list->start);
|
115
|
+
size_t offset = (size_t) (cursor - list->start);
|
116
|
+
pm_line_column_t result;
|
117
|
+
|
118
|
+
if (list->last_offset == 0) {
|
119
|
+
result = pm_newline_list_line_column_search(list, offset);
|
120
|
+
} else {
|
121
|
+
result = pm_newline_list_line_column_scan(list, offset);
|
122
|
+
}
|
123
|
+
|
124
|
+
list->last_index = result.line;
|
125
|
+
list->last_offset = offset;
|
126
|
+
|
127
|
+
return result;
|
128
|
+
}
|
129
|
+
|
130
|
+
// Free the internal memory allocated for the newline list.
|
131
|
+
void
|
132
|
+
pm_newline_list_free(pm_newline_list_t *list) {
|
133
|
+
free(list->offsets);
|
134
|
+
}
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#include "prism/util/pm_state_stack.h"
|
2
|
+
|
3
|
+
// Pushes a value onto the stack.
|
4
|
+
void
|
5
|
+
pm_state_stack_push(pm_state_stack_t *stack, bool value) {
|
6
|
+
*stack = (*stack << 1) | (value & 1);
|
7
|
+
}
|
8
|
+
|
9
|
+
// Pops a value off the stack.
|
10
|
+
void
|
11
|
+
pm_state_stack_pop(pm_state_stack_t *stack) {
|
12
|
+
*stack >>= 1;
|
13
|
+
}
|
14
|
+
|
15
|
+
// Returns the value at the top of the stack.
|
16
|
+
bool
|
17
|
+
pm_state_stack_p(pm_state_stack_t *stack) {
|
18
|
+
return *stack & 1;
|
19
|
+
}
|
@@ -0,0 +1,200 @@
|
|
1
|
+
#include "prism/util/pm_string.h"
|
2
|
+
|
3
|
+
// The following headers are necessary to read files using demand paging.
|
4
|
+
#ifdef _WIN32
|
5
|
+
#include <windows.h>
|
6
|
+
#else
|
7
|
+
#include <fcntl.h>
|
8
|
+
#include <sys/mman.h>
|
9
|
+
#include <sys/stat.h>
|
10
|
+
#include <unistd.h>
|
11
|
+
#endif
|
12
|
+
|
13
|
+
// Initialize a shared string that is based on initial input.
|
14
|
+
void
|
15
|
+
pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) {
|
16
|
+
assert(start <= end);
|
17
|
+
|
18
|
+
*string = (pm_string_t) {
|
19
|
+
.type = PM_STRING_SHARED,
|
20
|
+
.source = start,
|
21
|
+
.length = (size_t) (end - start)
|
22
|
+
};
|
23
|
+
}
|
24
|
+
|
25
|
+
// Initialize an owned string that is responsible for freeing allocated memory.
|
26
|
+
void
|
27
|
+
pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
|
28
|
+
*string = (pm_string_t) {
|
29
|
+
.type = PM_STRING_OWNED,
|
30
|
+
.source = source,
|
31
|
+
.length = length
|
32
|
+
};
|
33
|
+
}
|
34
|
+
|
35
|
+
// Initialize a constant string that doesn't own its memory source.
|
36
|
+
void
|
37
|
+
pm_string_constant_init(pm_string_t *string, const char *source, size_t length) {
|
38
|
+
*string = (pm_string_t) {
|
39
|
+
.type = PM_STRING_CONSTANT,
|
40
|
+
.source = (const uint8_t *) source,
|
41
|
+
.length = length
|
42
|
+
};
|
43
|
+
}
|
44
|
+
|
45
|
+
static void
|
46
|
+
pm_string_mapped_init_internal(pm_string_t *string, uint8_t *source, size_t length) {
|
47
|
+
*string = (pm_string_t) {
|
48
|
+
.type = PM_STRING_MAPPED,
|
49
|
+
.source = source,
|
50
|
+
.length = length
|
51
|
+
};
|
52
|
+
}
|
53
|
+
|
54
|
+
// Returns the memory size associated with the string.
|
55
|
+
size_t
|
56
|
+
pm_string_memsize(const pm_string_t *string) {
|
57
|
+
size_t size = sizeof(pm_string_t);
|
58
|
+
if (string->type == PM_STRING_OWNED) {
|
59
|
+
size += string->length;
|
60
|
+
}
|
61
|
+
return size;
|
62
|
+
}
|
63
|
+
|
64
|
+
// Ensure the string is owned. If it is not, then reinitialize it as owned and
|
65
|
+
// copy over the previous source.
|
66
|
+
void
|
67
|
+
pm_string_ensure_owned(pm_string_t *string) {
|
68
|
+
if (string->type == PM_STRING_OWNED) return;
|
69
|
+
|
70
|
+
size_t length = pm_string_length(string);
|
71
|
+
const uint8_t *source = pm_string_source(string);
|
72
|
+
|
73
|
+
uint8_t *memory = malloc(length);
|
74
|
+
if (!memory) return;
|
75
|
+
|
76
|
+
pm_string_owned_init(string, memory, length);
|
77
|
+
memcpy((void *) string->source, source, length);
|
78
|
+
}
|
79
|
+
|
80
|
+
// Returns the length associated with the string.
|
81
|
+
PRISM_EXPORTED_FUNCTION size_t
|
82
|
+
pm_string_length(const pm_string_t *string) {
|
83
|
+
return string->length;
|
84
|
+
}
|
85
|
+
|
86
|
+
// Returns the start pointer associated with the string.
|
87
|
+
PRISM_EXPORTED_FUNCTION const uint8_t *
|
88
|
+
pm_string_source(const pm_string_t *string) {
|
89
|
+
return string->source;
|
90
|
+
}
|
91
|
+
|
92
|
+
// Free the associated memory of the given string.
|
93
|
+
PRISM_EXPORTED_FUNCTION void
|
94
|
+
pm_string_free(pm_string_t *string) {
|
95
|
+
void *memory = (void *) string->source;
|
96
|
+
|
97
|
+
if (string->type == PM_STRING_OWNED) {
|
98
|
+
free(memory);
|
99
|
+
} else if (string->type == PM_STRING_MAPPED && string->length) {
|
100
|
+
#if defined(_WIN32)
|
101
|
+
UnmapViewOfFile(memory);
|
102
|
+
#else
|
103
|
+
munmap(memory, string->length);
|
104
|
+
#endif
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
bool
|
109
|
+
pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
110
|
+
#ifdef _WIN32
|
111
|
+
// Open the file for reading.
|
112
|
+
HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
113
|
+
|
114
|
+
if (file == INVALID_HANDLE_VALUE) {
|
115
|
+
perror("CreateFile failed");
|
116
|
+
return false;
|
117
|
+
}
|
118
|
+
|
119
|
+
// Get the file size.
|
120
|
+
DWORD file_size = GetFileSize(file, NULL);
|
121
|
+
if (file_size == INVALID_FILE_SIZE) {
|
122
|
+
CloseHandle(file);
|
123
|
+
perror("GetFileSize failed");
|
124
|
+
return false;
|
125
|
+
}
|
126
|
+
|
127
|
+
// If the file is empty, then we don't need to do anything else, we'll set
|
128
|
+
// the source to a constant empty string and return.
|
129
|
+
if (file_size == 0) {
|
130
|
+
CloseHandle(file);
|
131
|
+
uint8_t empty[] = "";
|
132
|
+
pm_string_mapped_init_internal(string, empty, 0);
|
133
|
+
return true;
|
134
|
+
}
|
135
|
+
|
136
|
+
// Create a mapping of the file.
|
137
|
+
HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
|
138
|
+
if (mapping == NULL) {
|
139
|
+
CloseHandle(file);
|
140
|
+
perror("CreateFileMapping failed");
|
141
|
+
return false;
|
142
|
+
}
|
143
|
+
|
144
|
+
// Map the file into memory.
|
145
|
+
uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
|
146
|
+
CloseHandle(mapping);
|
147
|
+
CloseHandle(file);
|
148
|
+
|
149
|
+
if (source == NULL) {
|
150
|
+
perror("MapViewOfFile failed");
|
151
|
+
return false;
|
152
|
+
}
|
153
|
+
|
154
|
+
pm_string_mapped_init_internal(string, source, (size_t) file_size);
|
155
|
+
return true;
|
156
|
+
#else
|
157
|
+
// Open the file for reading
|
158
|
+
int fd = open(filepath, O_RDONLY);
|
159
|
+
if (fd == -1) {
|
160
|
+
perror("open");
|
161
|
+
return false;
|
162
|
+
}
|
163
|
+
|
164
|
+
// Stat the file to get the file size
|
165
|
+
struct stat sb;
|
166
|
+
if (fstat(fd, &sb) == -1) {
|
167
|
+
close(fd);
|
168
|
+
perror("fstat");
|
169
|
+
return false;
|
170
|
+
}
|
171
|
+
|
172
|
+
// mmap the file descriptor to virtually get the contents
|
173
|
+
size_t size = (size_t) sb.st_size;
|
174
|
+
uint8_t *source = NULL;
|
175
|
+
|
176
|
+
if (size == 0) {
|
177
|
+
close(fd);
|
178
|
+
uint8_t empty[] = "";
|
179
|
+
pm_string_mapped_init_internal(string, empty, 0);
|
180
|
+
return true;
|
181
|
+
}
|
182
|
+
|
183
|
+
source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
|
184
|
+
if (source == MAP_FAILED) {
|
185
|
+
perror("Map failed");
|
186
|
+
return false;
|
187
|
+
}
|
188
|
+
|
189
|
+
close(fd);
|
190
|
+
pm_string_mapped_init_internal(string, source, size);
|
191
|
+
return true;
|
192
|
+
#endif
|
193
|
+
}
|
194
|
+
|
195
|
+
// Returns the size of the pm_string_t struct. This is necessary to allocate the
|
196
|
+
// correct amount of memory in the FFI backend.
|
197
|
+
PRISM_EXPORTED_FUNCTION size_t
|
198
|
+
pm_string_sizeof(void) {
|
199
|
+
return sizeof(pm_string_t);
|
200
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#include "prism/util/pm_string_list.h"
|
2
|
+
|
3
|
+
// Initialize a pm_string_list_t with its default values.
|
4
|
+
void
|
5
|
+
pm_string_list_init(pm_string_list_t *string_list) {
|
6
|
+
string_list->strings = (pm_string_t *) malloc(sizeof(pm_string_t));
|
7
|
+
string_list->length = 0;
|
8
|
+
string_list->capacity = 1;
|
9
|
+
}
|
10
|
+
|
11
|
+
// Append a pm_string_t to the given string list.
|
12
|
+
void
|
13
|
+
pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string) {
|
14
|
+
if (string_list->length + 1 > string_list->capacity) {
|
15
|
+
pm_string_t *original_string = string_list->strings;
|
16
|
+
string_list->capacity *= 2;
|
17
|
+
string_list->strings = (pm_string_t *) malloc(string_list->capacity * sizeof(pm_string_t));
|
18
|
+
memcpy(string_list->strings, original_string, (string_list->length) * sizeof(pm_string_t));
|
19
|
+
free(original_string);
|
20
|
+
}
|
21
|
+
|
22
|
+
string_list->strings[string_list->length++] = *string;
|
23
|
+
}
|
24
|
+
|
25
|
+
// Free the memory associated with the string list.
|
26
|
+
void
|
27
|
+
pm_string_list_free(pm_string_list_t *string_list) {
|
28
|
+
free(string_list->strings);
|
29
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#include <ctype.h>
|
2
|
+
#include <stddef.h>
|
3
|
+
#include <stdint.h>
|
4
|
+
|
5
|
+
int
|
6
|
+
pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
|
7
|
+
size_t offset = 0;
|
8
|
+
int difference = 0;
|
9
|
+
|
10
|
+
while (offset < length && string1[offset] != '\0') {
|
11
|
+
if (string2[offset] == '\0') return string1[offset];
|
12
|
+
if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference;
|
13
|
+
offset++;
|
14
|
+
}
|
15
|
+
|
16
|
+
return difference;
|
17
|
+
}
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#include "prism/util/pm_strpbrk.h"
|
2
|
+
|
3
|
+
// This is the slow path that does care about the encoding.
|
4
|
+
static inline const uint8_t *
|
5
|
+
pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
|
6
|
+
size_t index = 0;
|
7
|
+
|
8
|
+
while (index < maximum) {
|
9
|
+
if (strchr((const char *) charset, source[index]) != NULL) {
|
10
|
+
return source + index;
|
11
|
+
}
|
12
|
+
|
13
|
+
size_t width = parser->encoding.char_width(source + index, (ptrdiff_t) (maximum - index));
|
14
|
+
if (width == 0) {
|
15
|
+
return NULL;
|
16
|
+
}
|
17
|
+
|
18
|
+
index += width;
|
19
|
+
}
|
20
|
+
|
21
|
+
return NULL;
|
22
|
+
}
|
23
|
+
|
24
|
+
// This is the fast path that does not care about the encoding.
|
25
|
+
static inline const uint8_t *
|
26
|
+
pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) {
|
27
|
+
size_t index = 0;
|
28
|
+
|
29
|
+
while (index < maximum) {
|
30
|
+
if (strchr((const char *) charset, source[index]) != NULL) {
|
31
|
+
return source + index;
|
32
|
+
}
|
33
|
+
|
34
|
+
index++;
|
35
|
+
}
|
36
|
+
|
37
|
+
return NULL;
|
38
|
+
}
|
39
|
+
|
40
|
+
// Here we have rolled our own version of strpbrk. The standard library strpbrk
|
41
|
+
// has undefined behavior when the source string is not null-terminated. We want
|
42
|
+
// to support strings that are not null-terminated because pm_parse does not
|
43
|
+
// have the contract that the string is null-terminated. (This is desirable
|
44
|
+
// because it means the extension can call pm_parse with the result of a call to
|
45
|
+
// mmap).
|
46
|
+
//
|
47
|
+
// The standard library strpbrk also does not support passing a maximum length
|
48
|
+
// to search. We want to support this for the reason mentioned above, but we
|
49
|
+
// also don't want it to stop on null bytes. Ruby actually allows null bytes
|
50
|
+
// within strings, comments, regular expressions, etc. So we need to be able to
|
51
|
+
// skip past them.
|
52
|
+
//
|
53
|
+
// Finally, we want to support encodings wherein the charset could contain
|
54
|
+
// characters that are trailing bytes of multi-byte characters. For example, in
|
55
|
+
// Shift-JIS, the backslash character can be a trailing byte. In that case we
|
56
|
+
// need to take a slower path and iterate one multi-byte character at a time.
|
57
|
+
const uint8_t *
|
58
|
+
pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
|
59
|
+
if (length <= 0) {
|
60
|
+
return NULL;
|
61
|
+
} else if (parser->encoding_changed && parser->encoding.multibyte) {
|
62
|
+
return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length);
|
63
|
+
} else {
|
64
|
+
return pm_strpbrk_single_byte(source, charset, (size_t) length);
|
65
|
+
}
|
66
|
+
}
|
metadata
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: prism
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.13.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Shopify
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-09-29 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description:
|
14
|
+
email:
|
15
|
+
- ruby@shopify.com
|
16
|
+
executables: []
|
17
|
+
extensions:
|
18
|
+
- ext/prism/extconf.rb
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- CHANGELOG.md
|
22
|
+
- CODE_OF_CONDUCT.md
|
23
|
+
- CONTRIBUTING.md
|
24
|
+
- LICENSE.md
|
25
|
+
- Makefile
|
26
|
+
- README.md
|
27
|
+
- config.yml
|
28
|
+
- docs/build_system.md
|
29
|
+
- docs/building.md
|
30
|
+
- docs/configuration.md
|
31
|
+
- docs/design.md
|
32
|
+
- docs/encoding.md
|
33
|
+
- docs/fuzzing.md
|
34
|
+
- docs/heredocs.md
|
35
|
+
- docs/mapping.md
|
36
|
+
- docs/ripper.md
|
37
|
+
- docs/ruby_api.md
|
38
|
+
- docs/serialization.md
|
39
|
+
- docs/testing.md
|
40
|
+
- ext/prism/api_node.c
|
41
|
+
- ext/prism/api_pack.c
|
42
|
+
- ext/prism/extconf.rb
|
43
|
+
- ext/prism/extension.c
|
44
|
+
- ext/prism/extension.h
|
45
|
+
- include/prism.h
|
46
|
+
- include/prism/ast.h
|
47
|
+
- include/prism/defines.h
|
48
|
+
- include/prism/diagnostic.h
|
49
|
+
- include/prism/enc/pm_encoding.h
|
50
|
+
- include/prism/node.h
|
51
|
+
- include/prism/pack.h
|
52
|
+
- include/prism/parser.h
|
53
|
+
- include/prism/regexp.h
|
54
|
+
- include/prism/unescape.h
|
55
|
+
- include/prism/util/pm_buffer.h
|
56
|
+
- include/prism/util/pm_char.h
|
57
|
+
- include/prism/util/pm_constant_pool.h
|
58
|
+
- include/prism/util/pm_list.h
|
59
|
+
- include/prism/util/pm_memchr.h
|
60
|
+
- include/prism/util/pm_newline_list.h
|
61
|
+
- include/prism/util/pm_state_stack.h
|
62
|
+
- include/prism/util/pm_string.h
|
63
|
+
- include/prism/util/pm_string_list.h
|
64
|
+
- include/prism/util/pm_strpbrk.h
|
65
|
+
- include/prism/version.h
|
66
|
+
- lib/prism.rb
|
67
|
+
- lib/prism/compiler.rb
|
68
|
+
- lib/prism/debug.rb
|
69
|
+
- lib/prism/desugar_compiler.rb
|
70
|
+
- lib/prism/dispatcher.rb
|
71
|
+
- lib/prism/dsl.rb
|
72
|
+
- lib/prism/ffi.rb
|
73
|
+
- lib/prism/lex_compat.rb
|
74
|
+
- lib/prism/mutation_compiler.rb
|
75
|
+
- lib/prism/node.rb
|
76
|
+
- lib/prism/node_ext.rb
|
77
|
+
- lib/prism/node_inspector.rb
|
78
|
+
- lib/prism/pack.rb
|
79
|
+
- lib/prism/parse_result.rb
|
80
|
+
- lib/prism/parse_result/comments.rb
|
81
|
+
- lib/prism/parse_result/newlines.rb
|
82
|
+
- lib/prism/pattern.rb
|
83
|
+
- lib/prism/ripper_compat.rb
|
84
|
+
- lib/prism/serialize.rb
|
85
|
+
- lib/prism/visitor.rb
|
86
|
+
- prism.gemspec
|
87
|
+
- src/diagnostic.c
|
88
|
+
- src/enc/pm_big5.c
|
89
|
+
- src/enc/pm_euc_jp.c
|
90
|
+
- src/enc/pm_gbk.c
|
91
|
+
- src/enc/pm_shift_jis.c
|
92
|
+
- src/enc/pm_tables.c
|
93
|
+
- src/enc/pm_unicode.c
|
94
|
+
- src/enc/pm_windows_31j.c
|
95
|
+
- src/node.c
|
96
|
+
- src/pack.c
|
97
|
+
- src/prettyprint.c
|
98
|
+
- src/prism.c
|
99
|
+
- src/regexp.c
|
100
|
+
- src/serialize.c
|
101
|
+
- src/token_type.c
|
102
|
+
- src/unescape.c
|
103
|
+
- src/util/pm_buffer.c
|
104
|
+
- src/util/pm_char.c
|
105
|
+
- src/util/pm_constant_pool.c
|
106
|
+
- src/util/pm_list.c
|
107
|
+
- src/util/pm_memchr.c
|
108
|
+
- src/util/pm_newline_list.c
|
109
|
+
- src/util/pm_state_stack.c
|
110
|
+
- src/util/pm_string.c
|
111
|
+
- src/util/pm_string_list.c
|
112
|
+
- src/util/pm_strncasecmp.c
|
113
|
+
- src/util/pm_strpbrk.c
|
114
|
+
homepage: https://github.com/ruby/prism
|
115
|
+
licenses:
|
116
|
+
- MIT
|
117
|
+
metadata:
|
118
|
+
allowed_push_host: https://rubygems.org
|
119
|
+
post_install_message:
|
120
|
+
rdoc_options: []
|
121
|
+
require_paths:
|
122
|
+
- lib
|
123
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
124
|
+
requirements:
|
125
|
+
- - ">="
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
version: 3.0.0
|
128
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
129
|
+
requirements:
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: '0'
|
133
|
+
requirements: []
|
134
|
+
rubygems_version: 3.4.1
|
135
|
+
signing_key:
|
136
|
+
specification_version: 4
|
137
|
+
summary: Prism Ruby parser
|
138
|
+
test_files: []
|