prism 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +172 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +62 -0
- data/LICENSE.md +7 -0
- data/Makefile +84 -0
- data/README.md +89 -0
- data/config.yml +2481 -0
- data/docs/build_system.md +74 -0
- data/docs/building.md +22 -0
- data/docs/configuration.md +60 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +117 -0
- data/docs/fuzzing.md +93 -0
- data/docs/heredocs.md +36 -0
- data/docs/mapping.md +117 -0
- data/docs/ripper.md +36 -0
- data/docs/ruby_api.md +25 -0
- data/docs/serialization.md +181 -0
- data/docs/testing.md +55 -0
- data/ext/prism/api_node.c +4725 -0
- data/ext/prism/api_pack.c +256 -0
- data/ext/prism/extconf.rb +136 -0
- data/ext/prism/extension.c +626 -0
- data/ext/prism/extension.h +18 -0
- data/include/prism/ast.h +1932 -0
- data/include/prism/defines.h +45 -0
- data/include/prism/diagnostic.h +231 -0
- data/include/prism/enc/pm_encoding.h +95 -0
- data/include/prism/node.h +41 -0
- data/include/prism/pack.h +141 -0
- data/include/prism/parser.h +418 -0
- data/include/prism/regexp.h +19 -0
- data/include/prism/unescape.h +48 -0
- data/include/prism/util/pm_buffer.h +51 -0
- data/include/prism/util/pm_char.h +91 -0
- data/include/prism/util/pm_constant_pool.h +78 -0
- data/include/prism/util/pm_list.h +67 -0
- data/include/prism/util/pm_memchr.h +14 -0
- data/include/prism/util/pm_newline_list.h +61 -0
- data/include/prism/util/pm_state_stack.h +24 -0
- data/include/prism/util/pm_string.h +61 -0
- data/include/prism/util/pm_string_list.h +25 -0
- data/include/prism/util/pm_strpbrk.h +29 -0
- data/include/prism/version.h +4 -0
- data/include/prism.h +82 -0
- data/lib/prism/compiler.rb +465 -0
- data/lib/prism/debug.rb +157 -0
- data/lib/prism/desugar_compiler.rb +206 -0
- data/lib/prism/dispatcher.rb +2051 -0
- data/lib/prism/dsl.rb +750 -0
- data/lib/prism/ffi.rb +251 -0
- data/lib/prism/lex_compat.rb +838 -0
- data/lib/prism/mutation_compiler.rb +718 -0
- data/lib/prism/node.rb +14540 -0
- data/lib/prism/node_ext.rb +55 -0
- data/lib/prism/node_inspector.rb +68 -0
- data/lib/prism/pack.rb +185 -0
- data/lib/prism/parse_result/comments.rb +172 -0
- data/lib/prism/parse_result/newlines.rb +60 -0
- data/lib/prism/parse_result.rb +266 -0
- data/lib/prism/pattern.rb +239 -0
- data/lib/prism/ripper_compat.rb +174 -0
- data/lib/prism/serialize.rb +662 -0
- data/lib/prism/visitor.rb +470 -0
- data/lib/prism.rb +64 -0
- data/prism.gemspec +113 -0
- data/src/diagnostic.c +287 -0
- data/src/enc/pm_big5.c +52 -0
- data/src/enc/pm_euc_jp.c +58 -0
- data/src/enc/pm_gbk.c +61 -0
- data/src/enc/pm_shift_jis.c +56 -0
- data/src/enc/pm_tables.c +507 -0
- data/src/enc/pm_unicode.c +2324 -0
- data/src/enc/pm_windows_31j.c +56 -0
- data/src/node.c +2633 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +2136 -0
- data/src/prism.c +14587 -0
- data/src/regexp.c +580 -0
- data/src/serialize.c +1899 -0
- data/src/token_type.c +349 -0
- data/src/unescape.c +637 -0
- data/src/util/pm_buffer.c +103 -0
- data/src/util/pm_char.c +272 -0
- data/src/util/pm_constant_pool.c +252 -0
- data/src/util/pm_list.c +41 -0
- data/src/util/pm_memchr.c +33 -0
- data/src/util/pm_newline_list.c +134 -0
- data/src/util/pm_state_stack.c +19 -0
- data/src/util/pm_string.c +200 -0
- data/src/util/pm_string_list.c +29 -0
- data/src/util/pm_strncasecmp.c +17 -0
- data/src/util/pm_strpbrk.c +66 -0
- metadata +138 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
#include "prism/util/pm_memchr.h"
|
2
|
+
|
3
|
+
#define PRISM_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
|
4
|
+
|
5
|
+
// We need to roll our own memchr to handle cases where the encoding changes and
|
6
|
+
// we need to search for a character in a buffer that could be the trailing byte
|
7
|
+
// of a multibyte character.
|
8
|
+
void *
|
9
|
+
pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, pm_encoding_t *encoding) {
|
10
|
+
if (encoding_changed && encoding->multibyte && character >= PRISM_MEMCHR_TRAILING_BYTE_MINIMUM) {
|
11
|
+
const uint8_t *source = (const uint8_t *) memory;
|
12
|
+
size_t index = 0;
|
13
|
+
|
14
|
+
while (index < number) {
|
15
|
+
if (source[index] == character) {
|
16
|
+
return (void *) (source + index);
|
17
|
+
}
|
18
|
+
|
19
|
+
size_t width = encoding->char_width(source + index, (ptrdiff_t) (number - index));
|
20
|
+
if (width == 0) {
|
21
|
+
return NULL;
|
22
|
+
}
|
23
|
+
|
24
|
+
index += width;
|
25
|
+
}
|
26
|
+
|
27
|
+
return NULL;
|
28
|
+
} else {
|
29
|
+
return memchr(memory, character, number);
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
#undef PRISM_MEMCHR_TRAILING_BYTE_MINIMUM
|
@@ -0,0 +1,134 @@
|
|
1
|
+
#include "prism/util/pm_newline_list.h"
|
2
|
+
|
3
|
+
// Initialize a new newline list with the given capacity. Returns true if the
|
4
|
+
// allocation of the offsets succeeds, otherwise returns false.
|
5
|
+
bool
|
6
|
+
pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
|
7
|
+
list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
|
8
|
+
if (list->offsets == NULL) return false;
|
9
|
+
|
10
|
+
list->start = start;
|
11
|
+
|
12
|
+
// This is 1 instead of 0 because we want to include the first line of the
|
13
|
+
// file as having offset 0, which is set because of calloc.
|
14
|
+
list->size = 1;
|
15
|
+
list->capacity = capacity;
|
16
|
+
|
17
|
+
list->last_index = 0;
|
18
|
+
list->last_offset = 0;
|
19
|
+
|
20
|
+
return true;
|
21
|
+
}
|
22
|
+
|
23
|
+
// Append a new offset to the newline list. Returns true if the reallocation of
|
24
|
+
// the offsets succeeds (if one was necessary), otherwise returns false.
|
25
|
+
bool
|
26
|
+
pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
|
27
|
+
if (list->size == list->capacity) {
|
28
|
+
size_t *original_offsets = list->offsets;
|
29
|
+
|
30
|
+
list->capacity = (list->capacity * 3) / 2;
|
31
|
+
list->offsets = (size_t *) calloc(list->capacity, sizeof(size_t));
|
32
|
+
memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
|
33
|
+
free(original_offsets);
|
34
|
+
if (list->offsets == NULL) return false;
|
35
|
+
}
|
36
|
+
|
37
|
+
assert(*cursor == '\n');
|
38
|
+
assert(cursor >= list->start);
|
39
|
+
size_t newline_offset = (size_t) (cursor - list->start + 1);
|
40
|
+
|
41
|
+
assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
|
42
|
+
list->offsets[list->size++] = newline_offset;
|
43
|
+
|
44
|
+
return true;
|
45
|
+
}
|
46
|
+
|
47
|
+
// Conditionally append a new offset to the newline list, if the value passed in is a newline.
|
48
|
+
bool
|
49
|
+
pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) {
|
50
|
+
if (*cursor != '\n') {
|
51
|
+
return true;
|
52
|
+
}
|
53
|
+
return pm_newline_list_append(list, cursor);
|
54
|
+
}
|
55
|
+
|
56
|
+
// Returns the line and column of the given offset, assuming we don't have any
|
57
|
+
// information about the previous index that we found.
|
58
|
+
static pm_line_column_t
|
59
|
+
pm_newline_list_line_column_search(pm_newline_list_t *list, size_t offset) {
|
60
|
+
size_t left = 0;
|
61
|
+
size_t right = list->size - 1;
|
62
|
+
|
63
|
+
while (left <= right) {
|
64
|
+
size_t mid = left + (right - left) / 2;
|
65
|
+
|
66
|
+
if (list->offsets[mid] == offset) {
|
67
|
+
return ((pm_line_column_t) { mid, 0 });
|
68
|
+
}
|
69
|
+
|
70
|
+
if (list->offsets[mid] < offset) {
|
71
|
+
left = mid + 1;
|
72
|
+
} else {
|
73
|
+
right = mid - 1;
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
return ((pm_line_column_t) { left - 1, offset - list->offsets[left - 1] });
|
78
|
+
}
|
79
|
+
|
80
|
+
// Returns the line and column of the given offset, assuming we know the last
|
81
|
+
// index that we found.
|
82
|
+
static pm_line_column_t
|
83
|
+
pm_newline_list_line_column_scan(pm_newline_list_t *list, size_t offset) {
|
84
|
+
if (offset > list->last_offset) {
|
85
|
+
size_t index = list->last_index;
|
86
|
+
while (index < list->size && list->offsets[index] < offset) {
|
87
|
+
index++;
|
88
|
+
}
|
89
|
+
|
90
|
+
if (index == list->size) {
|
91
|
+
return ((pm_line_column_t) { index - 1, offset - list->offsets[index - 1] });
|
92
|
+
}
|
93
|
+
|
94
|
+
return ((pm_line_column_t) { index, 0 });
|
95
|
+
} else {
|
96
|
+
size_t index = list->last_index;
|
97
|
+
while (index > 0 && list->offsets[index] > offset) {
|
98
|
+
index--;
|
99
|
+
}
|
100
|
+
|
101
|
+
if (index == 0) {
|
102
|
+
return ((pm_line_column_t) { 0, offset });
|
103
|
+
}
|
104
|
+
|
105
|
+
return ((pm_line_column_t) { index, offset - list->offsets[index - 1] });
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
// Returns the line and column of the given offset. If the offset is not in the
|
110
|
+
// list, the line and column of the closest offset less than the given offset
|
111
|
+
// are returned.
|
112
|
+
pm_line_column_t
|
113
|
+
pm_newline_list_line_column(pm_newline_list_t *list, const uint8_t *cursor) {
|
114
|
+
assert(cursor >= list->start);
|
115
|
+
size_t offset = (size_t) (cursor - list->start);
|
116
|
+
pm_line_column_t result;
|
117
|
+
|
118
|
+
if (list->last_offset == 0) {
|
119
|
+
result = pm_newline_list_line_column_search(list, offset);
|
120
|
+
} else {
|
121
|
+
result = pm_newline_list_line_column_scan(list, offset);
|
122
|
+
}
|
123
|
+
|
124
|
+
list->last_index = result.line;
|
125
|
+
list->last_offset = offset;
|
126
|
+
|
127
|
+
return result;
|
128
|
+
}
|
129
|
+
|
130
|
+
// Free the internal memory allocated for the newline list.
|
131
|
+
void
|
132
|
+
pm_newline_list_free(pm_newline_list_t *list) {
|
133
|
+
free(list->offsets);
|
134
|
+
}
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#include "prism/util/pm_state_stack.h"
|
2
|
+
|
3
|
+
// Pushes a value onto the stack.
|
4
|
+
void
|
5
|
+
pm_state_stack_push(pm_state_stack_t *stack, bool value) {
|
6
|
+
*stack = (*stack << 1) | (value & 1);
|
7
|
+
}
|
8
|
+
|
9
|
+
// Pops a value off the stack.
|
10
|
+
void
|
11
|
+
pm_state_stack_pop(pm_state_stack_t *stack) {
|
12
|
+
*stack >>= 1;
|
13
|
+
}
|
14
|
+
|
15
|
+
// Returns the value at the top of the stack.
|
16
|
+
bool
|
17
|
+
pm_state_stack_p(pm_state_stack_t *stack) {
|
18
|
+
return *stack & 1;
|
19
|
+
}
|
@@ -0,0 +1,200 @@
|
|
1
|
+
#include "prism/util/pm_string.h"
|
2
|
+
|
3
|
+
// The following headers are necessary to read files using demand paging.
|
4
|
+
#ifdef _WIN32
|
5
|
+
#include <windows.h>
|
6
|
+
#else
|
7
|
+
#include <fcntl.h>
|
8
|
+
#include <sys/mman.h>
|
9
|
+
#include <sys/stat.h>
|
10
|
+
#include <unistd.h>
|
11
|
+
#endif
|
12
|
+
|
13
|
+
// Initialize a shared string that is based on initial input.
|
14
|
+
void
|
15
|
+
pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) {
|
16
|
+
assert(start <= end);
|
17
|
+
|
18
|
+
*string = (pm_string_t) {
|
19
|
+
.type = PM_STRING_SHARED,
|
20
|
+
.source = start,
|
21
|
+
.length = (size_t) (end - start)
|
22
|
+
};
|
23
|
+
}
|
24
|
+
|
25
|
+
// Initialize an owned string that is responsible for freeing allocated memory.
|
26
|
+
void
|
27
|
+
pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
|
28
|
+
*string = (pm_string_t) {
|
29
|
+
.type = PM_STRING_OWNED,
|
30
|
+
.source = source,
|
31
|
+
.length = length
|
32
|
+
};
|
33
|
+
}
|
34
|
+
|
35
|
+
// Initialize a constant string that doesn't own its memory source.
|
36
|
+
void
|
37
|
+
pm_string_constant_init(pm_string_t *string, const char *source, size_t length) {
|
38
|
+
*string = (pm_string_t) {
|
39
|
+
.type = PM_STRING_CONSTANT,
|
40
|
+
.source = (const uint8_t *) source,
|
41
|
+
.length = length
|
42
|
+
};
|
43
|
+
}
|
44
|
+
|
45
|
+
static void
|
46
|
+
pm_string_mapped_init_internal(pm_string_t *string, uint8_t *source, size_t length) {
|
47
|
+
*string = (pm_string_t) {
|
48
|
+
.type = PM_STRING_MAPPED,
|
49
|
+
.source = source,
|
50
|
+
.length = length
|
51
|
+
};
|
52
|
+
}
|
53
|
+
|
54
|
+
// Returns the memory size associated with the string.
|
55
|
+
size_t
|
56
|
+
pm_string_memsize(const pm_string_t *string) {
|
57
|
+
size_t size = sizeof(pm_string_t);
|
58
|
+
if (string->type == PM_STRING_OWNED) {
|
59
|
+
size += string->length;
|
60
|
+
}
|
61
|
+
return size;
|
62
|
+
}
|
63
|
+
|
64
|
+
// Ensure the string is owned. If it is not, then reinitialize it as owned and
|
65
|
+
// copy over the previous source.
|
66
|
+
void
|
67
|
+
pm_string_ensure_owned(pm_string_t *string) {
|
68
|
+
if (string->type == PM_STRING_OWNED) return;
|
69
|
+
|
70
|
+
size_t length = pm_string_length(string);
|
71
|
+
const uint8_t *source = pm_string_source(string);
|
72
|
+
|
73
|
+
uint8_t *memory = malloc(length);
|
74
|
+
if (!memory) return;
|
75
|
+
|
76
|
+
pm_string_owned_init(string, memory, length);
|
77
|
+
memcpy((void *) string->source, source, length);
|
78
|
+
}
|
79
|
+
|
80
|
+
// Returns the length associated with the string.
|
81
|
+
PRISM_EXPORTED_FUNCTION size_t
|
82
|
+
pm_string_length(const pm_string_t *string) {
|
83
|
+
return string->length;
|
84
|
+
}
|
85
|
+
|
86
|
+
// Returns the start pointer associated with the string.
|
87
|
+
PRISM_EXPORTED_FUNCTION const uint8_t *
|
88
|
+
pm_string_source(const pm_string_t *string) {
|
89
|
+
return string->source;
|
90
|
+
}
|
91
|
+
|
92
|
+
// Free the associated memory of the given string.
|
93
|
+
PRISM_EXPORTED_FUNCTION void
|
94
|
+
pm_string_free(pm_string_t *string) {
|
95
|
+
void *memory = (void *) string->source;
|
96
|
+
|
97
|
+
if (string->type == PM_STRING_OWNED) {
|
98
|
+
free(memory);
|
99
|
+
} else if (string->type == PM_STRING_MAPPED && string->length) {
|
100
|
+
#if defined(_WIN32)
|
101
|
+
UnmapViewOfFile(memory);
|
102
|
+
#else
|
103
|
+
munmap(memory, string->length);
|
104
|
+
#endif
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
bool
|
109
|
+
pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
110
|
+
#ifdef _WIN32
|
111
|
+
// Open the file for reading.
|
112
|
+
HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
113
|
+
|
114
|
+
if (file == INVALID_HANDLE_VALUE) {
|
115
|
+
perror("CreateFile failed");
|
116
|
+
return false;
|
117
|
+
}
|
118
|
+
|
119
|
+
// Get the file size.
|
120
|
+
DWORD file_size = GetFileSize(file, NULL);
|
121
|
+
if (file_size == INVALID_FILE_SIZE) {
|
122
|
+
CloseHandle(file);
|
123
|
+
perror("GetFileSize failed");
|
124
|
+
return false;
|
125
|
+
}
|
126
|
+
|
127
|
+
// If the file is empty, then we don't need to do anything else, we'll set
|
128
|
+
// the source to a constant empty string and return.
|
129
|
+
if (file_size == 0) {
|
130
|
+
CloseHandle(file);
|
131
|
+
uint8_t empty[] = "";
|
132
|
+
pm_string_mapped_init_internal(string, empty, 0);
|
133
|
+
return true;
|
134
|
+
}
|
135
|
+
|
136
|
+
// Create a mapping of the file.
|
137
|
+
HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
|
138
|
+
if (mapping == NULL) {
|
139
|
+
CloseHandle(file);
|
140
|
+
perror("CreateFileMapping failed");
|
141
|
+
return false;
|
142
|
+
}
|
143
|
+
|
144
|
+
// Map the file into memory.
|
145
|
+
uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
|
146
|
+
CloseHandle(mapping);
|
147
|
+
CloseHandle(file);
|
148
|
+
|
149
|
+
if (source == NULL) {
|
150
|
+
perror("MapViewOfFile failed");
|
151
|
+
return false;
|
152
|
+
}
|
153
|
+
|
154
|
+
pm_string_mapped_init_internal(string, source, (size_t) file_size);
|
155
|
+
return true;
|
156
|
+
#else
|
157
|
+
// Open the file for reading
|
158
|
+
int fd = open(filepath, O_RDONLY);
|
159
|
+
if (fd == -1) {
|
160
|
+
perror("open");
|
161
|
+
return false;
|
162
|
+
}
|
163
|
+
|
164
|
+
// Stat the file to get the file size
|
165
|
+
struct stat sb;
|
166
|
+
if (fstat(fd, &sb) == -1) {
|
167
|
+
close(fd);
|
168
|
+
perror("fstat");
|
169
|
+
return false;
|
170
|
+
}
|
171
|
+
|
172
|
+
// mmap the file descriptor to virtually get the contents
|
173
|
+
size_t size = (size_t) sb.st_size;
|
174
|
+
uint8_t *source = NULL;
|
175
|
+
|
176
|
+
if (size == 0) {
|
177
|
+
close(fd);
|
178
|
+
uint8_t empty[] = "";
|
179
|
+
pm_string_mapped_init_internal(string, empty, 0);
|
180
|
+
return true;
|
181
|
+
}
|
182
|
+
|
183
|
+
source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
|
184
|
+
if (source == MAP_FAILED) {
|
185
|
+
perror("Map failed");
|
186
|
+
return false;
|
187
|
+
}
|
188
|
+
|
189
|
+
close(fd);
|
190
|
+
pm_string_mapped_init_internal(string, source, size);
|
191
|
+
return true;
|
192
|
+
#endif
|
193
|
+
}
|
194
|
+
|
195
|
+
// Returns the size of the pm_string_t struct. This is necessary to allocate the
|
196
|
+
// correct amount of memory in the FFI backend.
|
197
|
+
PRISM_EXPORTED_FUNCTION size_t
|
198
|
+
pm_string_sizeof(void) {
|
199
|
+
return sizeof(pm_string_t);
|
200
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#include "prism/util/pm_string_list.h"
|
2
|
+
|
3
|
+
// Initialize a pm_string_list_t with its default values.
|
4
|
+
void
|
5
|
+
pm_string_list_init(pm_string_list_t *string_list) {
|
6
|
+
string_list->strings = (pm_string_t *) malloc(sizeof(pm_string_t));
|
7
|
+
string_list->length = 0;
|
8
|
+
string_list->capacity = 1;
|
9
|
+
}
|
10
|
+
|
11
|
+
// Append a pm_string_t to the given string list.
|
12
|
+
void
|
13
|
+
pm_string_list_append(pm_string_list_t *string_list, pm_string_t *string) {
|
14
|
+
if (string_list->length + 1 > string_list->capacity) {
|
15
|
+
pm_string_t *original_string = string_list->strings;
|
16
|
+
string_list->capacity *= 2;
|
17
|
+
string_list->strings = (pm_string_t *) malloc(string_list->capacity * sizeof(pm_string_t));
|
18
|
+
memcpy(string_list->strings, original_string, (string_list->length) * sizeof(pm_string_t));
|
19
|
+
free(original_string);
|
20
|
+
}
|
21
|
+
|
22
|
+
string_list->strings[string_list->length++] = *string;
|
23
|
+
}
|
24
|
+
|
25
|
+
// Free the memory associated with the string list.
|
26
|
+
void
|
27
|
+
pm_string_list_free(pm_string_list_t *string_list) {
|
28
|
+
free(string_list->strings);
|
29
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#include <ctype.h>
|
2
|
+
#include <stddef.h>
|
3
|
+
#include <stdint.h>
|
4
|
+
|
5
|
+
int
|
6
|
+
pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
|
7
|
+
size_t offset = 0;
|
8
|
+
int difference = 0;
|
9
|
+
|
10
|
+
while (offset < length && string1[offset] != '\0') {
|
11
|
+
if (string2[offset] == '\0') return string1[offset];
|
12
|
+
if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference;
|
13
|
+
offset++;
|
14
|
+
}
|
15
|
+
|
16
|
+
return difference;
|
17
|
+
}
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#include "prism/util/pm_strpbrk.h"
|
2
|
+
|
3
|
+
// This is the slow path that does care about the encoding.
|
4
|
+
static inline const uint8_t *
|
5
|
+
pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
|
6
|
+
size_t index = 0;
|
7
|
+
|
8
|
+
while (index < maximum) {
|
9
|
+
if (strchr((const char *) charset, source[index]) != NULL) {
|
10
|
+
return source + index;
|
11
|
+
}
|
12
|
+
|
13
|
+
size_t width = parser->encoding.char_width(source + index, (ptrdiff_t) (maximum - index));
|
14
|
+
if (width == 0) {
|
15
|
+
return NULL;
|
16
|
+
}
|
17
|
+
|
18
|
+
index += width;
|
19
|
+
}
|
20
|
+
|
21
|
+
return NULL;
|
22
|
+
}
|
23
|
+
|
24
|
+
// This is the fast path that does not care about the encoding.
|
25
|
+
static inline const uint8_t *
|
26
|
+
pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) {
|
27
|
+
size_t index = 0;
|
28
|
+
|
29
|
+
while (index < maximum) {
|
30
|
+
if (strchr((const char *) charset, source[index]) != NULL) {
|
31
|
+
return source + index;
|
32
|
+
}
|
33
|
+
|
34
|
+
index++;
|
35
|
+
}
|
36
|
+
|
37
|
+
return NULL;
|
38
|
+
}
|
39
|
+
|
40
|
+
// Here we have rolled our own version of strpbrk. The standard library strpbrk
|
41
|
+
// has undefined behavior when the source string is not null-terminated. We want
|
42
|
+
// to support strings that are not null-terminated because pm_parse does not
|
43
|
+
// have the contract that the string is null-terminated. (This is desirable
|
44
|
+
// because it means the extension can call pm_parse with the result of a call to
|
45
|
+
// mmap).
|
46
|
+
//
|
47
|
+
// The standard library strpbrk also does not support passing a maximum length
|
48
|
+
// to search. We want to support this for the reason mentioned above, but we
|
49
|
+
// also don't want it to stop on null bytes. Ruby actually allows null bytes
|
50
|
+
// within strings, comments, regular expressions, etc. So we need to be able to
|
51
|
+
// skip past them.
|
52
|
+
//
|
53
|
+
// Finally, we want to support encodings wherein the charset could contain
|
54
|
+
// characters that are trailing bytes of multi-byte characters. For example, in
|
55
|
+
// Shift-JIS, the backslash character can be a trailing byte. In that case we
|
56
|
+
// need to take a slower path and iterate one multi-byte character at a time.
|
57
|
+
const uint8_t *
|
58
|
+
pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
|
59
|
+
if (length <= 0) {
|
60
|
+
return NULL;
|
61
|
+
} else if (parser->encoding_changed && parser->encoding.multibyte) {
|
62
|
+
return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length);
|
63
|
+
} else {
|
64
|
+
return pm_strpbrk_single_byte(source, charset, (size_t) length);
|
65
|
+
}
|
66
|
+
}
|
metadata
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: prism
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.13.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Shopify
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-09-29 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description:
|
14
|
+
email:
|
15
|
+
- ruby@shopify.com
|
16
|
+
executables: []
|
17
|
+
extensions:
|
18
|
+
- ext/prism/extconf.rb
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- CHANGELOG.md
|
22
|
+
- CODE_OF_CONDUCT.md
|
23
|
+
- CONTRIBUTING.md
|
24
|
+
- LICENSE.md
|
25
|
+
- Makefile
|
26
|
+
- README.md
|
27
|
+
- config.yml
|
28
|
+
- docs/build_system.md
|
29
|
+
- docs/building.md
|
30
|
+
- docs/configuration.md
|
31
|
+
- docs/design.md
|
32
|
+
- docs/encoding.md
|
33
|
+
- docs/fuzzing.md
|
34
|
+
- docs/heredocs.md
|
35
|
+
- docs/mapping.md
|
36
|
+
- docs/ripper.md
|
37
|
+
- docs/ruby_api.md
|
38
|
+
- docs/serialization.md
|
39
|
+
- docs/testing.md
|
40
|
+
- ext/prism/api_node.c
|
41
|
+
- ext/prism/api_pack.c
|
42
|
+
- ext/prism/extconf.rb
|
43
|
+
- ext/prism/extension.c
|
44
|
+
- ext/prism/extension.h
|
45
|
+
- include/prism.h
|
46
|
+
- include/prism/ast.h
|
47
|
+
- include/prism/defines.h
|
48
|
+
- include/prism/diagnostic.h
|
49
|
+
- include/prism/enc/pm_encoding.h
|
50
|
+
- include/prism/node.h
|
51
|
+
- include/prism/pack.h
|
52
|
+
- include/prism/parser.h
|
53
|
+
- include/prism/regexp.h
|
54
|
+
- include/prism/unescape.h
|
55
|
+
- include/prism/util/pm_buffer.h
|
56
|
+
- include/prism/util/pm_char.h
|
57
|
+
- include/prism/util/pm_constant_pool.h
|
58
|
+
- include/prism/util/pm_list.h
|
59
|
+
- include/prism/util/pm_memchr.h
|
60
|
+
- include/prism/util/pm_newline_list.h
|
61
|
+
- include/prism/util/pm_state_stack.h
|
62
|
+
- include/prism/util/pm_string.h
|
63
|
+
- include/prism/util/pm_string_list.h
|
64
|
+
- include/prism/util/pm_strpbrk.h
|
65
|
+
- include/prism/version.h
|
66
|
+
- lib/prism.rb
|
67
|
+
- lib/prism/compiler.rb
|
68
|
+
- lib/prism/debug.rb
|
69
|
+
- lib/prism/desugar_compiler.rb
|
70
|
+
- lib/prism/dispatcher.rb
|
71
|
+
- lib/prism/dsl.rb
|
72
|
+
- lib/prism/ffi.rb
|
73
|
+
- lib/prism/lex_compat.rb
|
74
|
+
- lib/prism/mutation_compiler.rb
|
75
|
+
- lib/prism/node.rb
|
76
|
+
- lib/prism/node_ext.rb
|
77
|
+
- lib/prism/node_inspector.rb
|
78
|
+
- lib/prism/pack.rb
|
79
|
+
- lib/prism/parse_result.rb
|
80
|
+
- lib/prism/parse_result/comments.rb
|
81
|
+
- lib/prism/parse_result/newlines.rb
|
82
|
+
- lib/prism/pattern.rb
|
83
|
+
- lib/prism/ripper_compat.rb
|
84
|
+
- lib/prism/serialize.rb
|
85
|
+
- lib/prism/visitor.rb
|
86
|
+
- prism.gemspec
|
87
|
+
- src/diagnostic.c
|
88
|
+
- src/enc/pm_big5.c
|
89
|
+
- src/enc/pm_euc_jp.c
|
90
|
+
- src/enc/pm_gbk.c
|
91
|
+
- src/enc/pm_shift_jis.c
|
92
|
+
- src/enc/pm_tables.c
|
93
|
+
- src/enc/pm_unicode.c
|
94
|
+
- src/enc/pm_windows_31j.c
|
95
|
+
- src/node.c
|
96
|
+
- src/pack.c
|
97
|
+
- src/prettyprint.c
|
98
|
+
- src/prism.c
|
99
|
+
- src/regexp.c
|
100
|
+
- src/serialize.c
|
101
|
+
- src/token_type.c
|
102
|
+
- src/unescape.c
|
103
|
+
- src/util/pm_buffer.c
|
104
|
+
- src/util/pm_char.c
|
105
|
+
- src/util/pm_constant_pool.c
|
106
|
+
- src/util/pm_list.c
|
107
|
+
- src/util/pm_memchr.c
|
108
|
+
- src/util/pm_newline_list.c
|
109
|
+
- src/util/pm_state_stack.c
|
110
|
+
- src/util/pm_string.c
|
111
|
+
- src/util/pm_string_list.c
|
112
|
+
- src/util/pm_strncasecmp.c
|
113
|
+
- src/util/pm_strpbrk.c
|
114
|
+
homepage: https://github.com/ruby/prism
|
115
|
+
licenses:
|
116
|
+
- MIT
|
117
|
+
metadata:
|
118
|
+
allowed_push_host: https://rubygems.org
|
119
|
+
post_install_message:
|
120
|
+
rdoc_options: []
|
121
|
+
require_paths:
|
122
|
+
- lib
|
123
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
124
|
+
requirements:
|
125
|
+
- - ">="
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
version: 3.0.0
|
128
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
129
|
+
requirements:
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: '0'
|
133
|
+
requirements: []
|
134
|
+
rubygems_version: 3.4.1
|
135
|
+
signing_key:
|
136
|
+
specification_version: 4
|
137
|
+
summary: Prism Ruby parser
|
138
|
+
test_files: []
|