yarp 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +51 -0
- data/LICENSE.md +7 -0
- data/Makefile.in +79 -0
- data/README.md +86 -0
- data/config.h.in +25 -0
- data/config.yml +2147 -0
- data/configure +4487 -0
- data/docs/build_system.md +85 -0
- data/docs/building.md +26 -0
- data/docs/configuration.md +56 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +116 -0
- data/docs/extension.md +20 -0
- data/docs/fuzzing.md +93 -0
- data/docs/heredocs.md +36 -0
- data/docs/mapping.md +117 -0
- data/docs/ripper.md +36 -0
- data/docs/serialization.md +130 -0
- data/docs/testing.md +55 -0
- data/ext/yarp/api_node.c +3680 -0
- data/ext/yarp/api_pack.c +256 -0
- data/ext/yarp/extconf.rb +131 -0
- data/ext/yarp/extension.c +547 -0
- data/ext/yarp/extension.h +18 -0
- data/include/yarp/ast.h +1412 -0
- data/include/yarp/defines.h +54 -0
- data/include/yarp/diagnostic.h +24 -0
- data/include/yarp/enc/yp_encoding.h +94 -0
- data/include/yarp/node.h +36 -0
- data/include/yarp/pack.h +141 -0
- data/include/yarp/parser.h +389 -0
- data/include/yarp/regexp.h +19 -0
- data/include/yarp/unescape.h +42 -0
- data/include/yarp/util/yp_buffer.h +39 -0
- data/include/yarp/util/yp_char.h +75 -0
- data/include/yarp/util/yp_constant_pool.h +64 -0
- data/include/yarp/util/yp_list.h +67 -0
- data/include/yarp/util/yp_memchr.h +14 -0
- data/include/yarp/util/yp_newline_list.h +54 -0
- data/include/yarp/util/yp_state_stack.h +24 -0
- data/include/yarp/util/yp_string.h +57 -0
- data/include/yarp/util/yp_string_list.h +28 -0
- data/include/yarp/util/yp_strpbrk.h +29 -0
- data/include/yarp/version.h +5 -0
- data/include/yarp.h +69 -0
- data/lib/yarp/lex_compat.rb +759 -0
- data/lib/yarp/node.rb +7428 -0
- data/lib/yarp/pack.rb +185 -0
- data/lib/yarp/ripper_compat.rb +174 -0
- data/lib/yarp/serialize.rb +389 -0
- data/lib/yarp.rb +330 -0
- data/src/diagnostic.c +25 -0
- data/src/enc/yp_big5.c +79 -0
- data/src/enc/yp_euc_jp.c +85 -0
- data/src/enc/yp_gbk.c +88 -0
- data/src/enc/yp_shift_jis.c +83 -0
- data/src/enc/yp_tables.c +509 -0
- data/src/enc/yp_unicode.c +2320 -0
- data/src/enc/yp_windows_31j.c +83 -0
- data/src/node.c +2011 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +1782 -0
- data/src/regexp.c +580 -0
- data/src/serialize.c +1576 -0
- data/src/token_type.c +347 -0
- data/src/unescape.c +576 -0
- data/src/util/yp_buffer.c +78 -0
- data/src/util/yp_char.c +229 -0
- data/src/util/yp_constant_pool.c +147 -0
- data/src/util/yp_list.c +50 -0
- data/src/util/yp_memchr.c +31 -0
- data/src/util/yp_newline_list.c +119 -0
- data/src/util/yp_state_stack.c +25 -0
- data/src/util/yp_string.c +207 -0
- data/src/util/yp_string_list.c +32 -0
- data/src/util/yp_strncasecmp.c +20 -0
- data/src/util/yp_strpbrk.c +66 -0
- data/src/yarp.c +13211 -0
- data/yarp.gemspec +100 -0
- metadata +125 -0
@@ -0,0 +1,207 @@
|
|
1
|
+
#include "yarp/util/yp_string.h"
|
2
|
+
|
3
|
+
// The following headers are necessary to read files using demand paging.
|
4
|
+
#ifdef _WIN32
|
5
|
+
#include <windows.h>
|
6
|
+
#else
|
7
|
+
#include <fcntl.h>
|
8
|
+
#include <sys/mman.h>
|
9
|
+
#include <sys/stat.h>
|
10
|
+
#include <unistd.h>
|
11
|
+
#endif
|
12
|
+
|
13
|
+
// Initialize a shared string that is based on initial input.
|
14
|
+
void
|
15
|
+
yp_string_shared_init(yp_string_t *string, const char *start, const char *end) {
|
16
|
+
assert(start <= end);
|
17
|
+
*string = (yp_string_t) {
|
18
|
+
.type = YP_STRING_SHARED,
|
19
|
+
.source = (char*) start,
|
20
|
+
.length = (size_t) (end - start)
|
21
|
+
};
|
22
|
+
}
|
23
|
+
|
24
|
+
// Initialize an owned string that is responsible for freeing allocated memory.
|
25
|
+
void
|
26
|
+
yp_string_owned_init(yp_string_t *string, char *source, size_t length) {
|
27
|
+
*string = (yp_string_t) {
|
28
|
+
.type = YP_STRING_OWNED,
|
29
|
+
.source = source,
|
30
|
+
.length = length
|
31
|
+
};
|
32
|
+
}
|
33
|
+
|
34
|
+
// Initialize a constant string that doesn't own its memory source.
|
35
|
+
void
|
36
|
+
yp_string_constant_init(yp_string_t *string, const char *source, size_t length) {
|
37
|
+
*string = (yp_string_t) {
|
38
|
+
.type = YP_STRING_CONSTANT,
|
39
|
+
.source = (char*) source,
|
40
|
+
.length = length
|
41
|
+
};
|
42
|
+
}
|
43
|
+
|
44
|
+
static void
|
45
|
+
yp_string_mapped_init_internal(yp_string_t *string, char *source, size_t length) {
|
46
|
+
*string = (yp_string_t) {
|
47
|
+
.type = YP_STRING_MAPPED,
|
48
|
+
.source = source,
|
49
|
+
.length = length
|
50
|
+
};
|
51
|
+
}
|
52
|
+
|
53
|
+
// Returns the memory size associated with the string.
|
54
|
+
size_t
|
55
|
+
yp_string_memsize(const yp_string_t *string) {
|
56
|
+
size_t size = sizeof(yp_string_t);
|
57
|
+
if (string->type == YP_STRING_OWNED) {
|
58
|
+
size += string->length;
|
59
|
+
}
|
60
|
+
return size;
|
61
|
+
}
|
62
|
+
|
63
|
+
// Ensure the string is owned. If it is not, then reinitialize it as owned and
|
64
|
+
// copy over the previous source.
|
65
|
+
void
|
66
|
+
yp_string_ensure_owned(yp_string_t *string) {
|
67
|
+
if (string->type == YP_STRING_OWNED) return;
|
68
|
+
|
69
|
+
size_t length = yp_string_length(string);
|
70
|
+
const char *source = yp_string_source(string);
|
71
|
+
|
72
|
+
char *memory = malloc(length);
|
73
|
+
if (!memory) return;
|
74
|
+
|
75
|
+
yp_string_owned_init(string, memory, length);
|
76
|
+
memcpy(string->source, source, length);
|
77
|
+
}
|
78
|
+
|
79
|
+
// Returns the length associated with the string.
|
80
|
+
YP_EXPORTED_FUNCTION size_t
|
81
|
+
yp_string_length(const yp_string_t *string) {
|
82
|
+
return string->length;
|
83
|
+
}
|
84
|
+
|
85
|
+
// Returns the start pointer associated with the string.
|
86
|
+
YP_EXPORTED_FUNCTION const char *
|
87
|
+
yp_string_source(const yp_string_t *string) {
|
88
|
+
return string->source;
|
89
|
+
}
|
90
|
+
|
91
|
+
// Free the associated memory of the given string.
|
92
|
+
YP_EXPORTED_FUNCTION void
|
93
|
+
yp_string_free(yp_string_t *string) {
|
94
|
+
if (string->type == YP_STRING_OWNED) {
|
95
|
+
free(string->source);
|
96
|
+
} else if (string->type == YP_STRING_MAPPED && string->length) {
|
97
|
+
void *memory = (void *) string->source;
|
98
|
+
#if defined(_WIN32)
|
99
|
+
UnmapViewOfFile(memory);
|
100
|
+
#elif defined(HAVE_MMAP)
|
101
|
+
munmap(memory, string->length);
|
102
|
+
#else
|
103
|
+
free(memory);
|
104
|
+
#endif
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
bool
|
109
|
+
yp_string_mapped_init(yp_string_t *string, const char *filepath) {
|
110
|
+
#ifdef _WIN32
|
111
|
+
// Open the file for reading.
|
112
|
+
HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
113
|
+
|
114
|
+
if (file == INVALID_HANDLE_VALUE) {
|
115
|
+
perror("CreateFile failed");
|
116
|
+
return false;
|
117
|
+
}
|
118
|
+
|
119
|
+
// Get the file size.
|
120
|
+
DWORD file_size = GetFileSize(file, NULL);
|
121
|
+
if (file_size == INVALID_FILE_SIZE) {
|
122
|
+
CloseHandle(file);
|
123
|
+
perror("GetFileSize failed");
|
124
|
+
return false;
|
125
|
+
}
|
126
|
+
|
127
|
+
// If the file is empty, then we don't need to do anything else, we'll set
|
128
|
+
// the source to a constant empty string and return.
|
129
|
+
if (file_size == 0) {
|
130
|
+
CloseHandle(file);
|
131
|
+
char empty_string[] = "";
|
132
|
+
yp_string_mapped_init_internal(string, empty_string, 0);
|
133
|
+
return true;
|
134
|
+
}
|
135
|
+
|
136
|
+
// Create a mapping of the file.
|
137
|
+
HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
|
138
|
+
if (mapping == NULL) {
|
139
|
+
CloseHandle(file);
|
140
|
+
perror("CreateFileMapping failed");
|
141
|
+
return false;
|
142
|
+
}
|
143
|
+
|
144
|
+
// Map the file into memory.
|
145
|
+
char *source = (char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
|
146
|
+
CloseHandle(mapping);
|
147
|
+
CloseHandle(file);
|
148
|
+
|
149
|
+
if (source == NULL) {
|
150
|
+
perror("MapViewOfFile failed");
|
151
|
+
return false;
|
152
|
+
}
|
153
|
+
|
154
|
+
yp_string_mapped_init_internal(string, source, (size_t) file_size);
|
155
|
+
return true;
|
156
|
+
#else
|
157
|
+
// Open the file for reading
|
158
|
+
int fd = open(filepath, O_RDONLY);
|
159
|
+
if (fd == -1) {
|
160
|
+
perror("open");
|
161
|
+
return false;
|
162
|
+
}
|
163
|
+
|
164
|
+
// Stat the file to get the file size
|
165
|
+
struct stat sb;
|
166
|
+
if (fstat(fd, &sb) == -1) {
|
167
|
+
close(fd);
|
168
|
+
perror("fstat");
|
169
|
+
return false;
|
170
|
+
}
|
171
|
+
|
172
|
+
// mmap the file descriptor to virtually get the contents
|
173
|
+
size_t size = (size_t) sb.st_size;
|
174
|
+
char *source = NULL;
|
175
|
+
|
176
|
+
if (size == 0) {
|
177
|
+
close(fd);
|
178
|
+
char empty_string[] = "";
|
179
|
+
yp_string_mapped_init_internal(string, empty_string, 0);
|
180
|
+
return true;
|
181
|
+
}
|
182
|
+
|
183
|
+
#ifdef HAVE_MMAP
|
184
|
+
source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
|
185
|
+
if (source == MAP_FAILED) {
|
186
|
+
perror("Map failed");
|
187
|
+
return false;
|
188
|
+
}
|
189
|
+
#else
|
190
|
+
source = malloc(size);
|
191
|
+
if (source == NULL) {
|
192
|
+
return false;
|
193
|
+
}
|
194
|
+
|
195
|
+
ssize_t read_size = read(fd, (void *) source, size);
|
196
|
+
if (read_size < 0 || (size_t)read_size != size) {
|
197
|
+
perror("Read size is incorrect");
|
198
|
+
free((void *) source);
|
199
|
+
return false;
|
200
|
+
}
|
201
|
+
#endif
|
202
|
+
|
203
|
+
close(fd);
|
204
|
+
yp_string_mapped_init_internal(string, source, size);
|
205
|
+
return true;
|
206
|
+
#endif
|
207
|
+
}
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#include "yarp/util/yp_string_list.h"
|
2
|
+
|
3
|
+
// Allocate a new yp_string_list_t.
|
4
|
+
yp_string_list_t *
|
5
|
+
yp_string_list_alloc(void) {
|
6
|
+
return (yp_string_list_t *) malloc(sizeof(yp_string_list_t));
|
7
|
+
}
|
8
|
+
|
9
|
+
// Initialize a yp_string_list_t with its default values.
|
10
|
+
void
|
11
|
+
yp_string_list_init(yp_string_list_t *string_list) {
|
12
|
+
string_list->strings = (yp_string_t *) malloc(sizeof(yp_string_t));
|
13
|
+
string_list->length = 0;
|
14
|
+
string_list->capacity = 1;
|
15
|
+
}
|
16
|
+
|
17
|
+
// Append a yp_string_t to the given string list.
|
18
|
+
void
|
19
|
+
yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string) {
|
20
|
+
if (string_list->length + 1 > string_list->capacity) {
|
21
|
+
string_list->capacity *= 2;
|
22
|
+
string_list->strings = (yp_string_t *) realloc(string_list->strings, string_list->capacity * sizeof(yp_string_t));
|
23
|
+
}
|
24
|
+
|
25
|
+
string_list->strings[string_list->length++] = *string;
|
26
|
+
}
|
27
|
+
|
28
|
+
// Free the memory associated with the string list.
|
29
|
+
void
|
30
|
+
yp_string_list_free(yp_string_list_t *string_list) {
|
31
|
+
free(string_list->strings);
|
32
|
+
}
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#include <ctype.h>
|
2
|
+
#include <stddef.h>
|
3
|
+
|
4
|
+
int
|
5
|
+
yp_strncasecmp(const char *string1, const char *string2, size_t length) {
|
6
|
+
size_t offset = 0;
|
7
|
+
int difference = 0;
|
8
|
+
|
9
|
+
while (offset < length && string1[offset] != '\0') {
|
10
|
+
if (string2[offset] == '\0') return string1[offset];
|
11
|
+
|
12
|
+
unsigned char left = (unsigned char) string1[offset];
|
13
|
+
unsigned char right = (unsigned char) string2[offset];
|
14
|
+
|
15
|
+
if ((difference = tolower(left) - tolower(right)) != 0) return difference;
|
16
|
+
offset++;
|
17
|
+
}
|
18
|
+
|
19
|
+
return difference;
|
20
|
+
}
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#include "yarp/util/yp_strpbrk.h"
|
2
|
+
|
3
|
+
// This is the slow path that does care about the encoding.
|
4
|
+
static inline const char *
|
5
|
+
yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *charset, size_t maximum) {
|
6
|
+
size_t index = 0;
|
7
|
+
|
8
|
+
while (index < maximum) {
|
9
|
+
if (strchr(charset, source[index]) != NULL) {
|
10
|
+
return source + index;
|
11
|
+
}
|
12
|
+
|
13
|
+
size_t width = parser->encoding.char_width(source + index, (ptrdiff_t) (maximum - index));
|
14
|
+
if (width == 0) {
|
15
|
+
return NULL;
|
16
|
+
}
|
17
|
+
|
18
|
+
index += width;
|
19
|
+
}
|
20
|
+
|
21
|
+
return NULL;
|
22
|
+
}
|
23
|
+
|
24
|
+
// This is the fast path that does not care about the encoding.
|
25
|
+
static inline const char *
|
26
|
+
yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum) {
|
27
|
+
size_t index = 0;
|
28
|
+
|
29
|
+
while (index < maximum) {
|
30
|
+
if (strchr(charset, source[index]) != NULL) {
|
31
|
+
return source + index;
|
32
|
+
}
|
33
|
+
|
34
|
+
index++;
|
35
|
+
}
|
36
|
+
|
37
|
+
return NULL;
|
38
|
+
}
|
39
|
+
|
40
|
+
// Here we have rolled our own version of strpbrk. The standard library strpbrk
|
41
|
+
// has undefined behavior when the source string is not null-terminated. We want
|
42
|
+
// to support strings that are not null-terminated because yp_parse does not
|
43
|
+
// have the contract that the string is null-terminated. (This is desirable
|
44
|
+
// because it means the extension can call yp_parse with the result of a call to
|
45
|
+
// mmap).
|
46
|
+
//
|
47
|
+
// The standard library strpbrk also does not support passing a maximum length
|
48
|
+
// to search. We want to support this for the reason mentioned above, but we
|
49
|
+
// also don't want it to stop on null bytes. Ruby actually allows null bytes
|
50
|
+
// within strings, comments, regular expressions, etc. So we need to be able to
|
51
|
+
// skip past them.
|
52
|
+
//
|
53
|
+
// Finally, we want to support encodings wherein the charset could contain
|
54
|
+
// characters that are trailing bytes of multi-byte characters. For example, in
|
55
|
+
// Shift-JIS, the backslash character can be a trailing byte. In that case we
|
56
|
+
// need to take a slower path and iterate one multi-byte character at a time.
|
57
|
+
const char *
|
58
|
+
yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length) {
|
59
|
+
if (length <= 0) {
|
60
|
+
return NULL;
|
61
|
+
} else if (parser->encoding_changed && parser->encoding.multibyte) {
|
62
|
+
return yp_strpbrk_multi_byte(parser, source, charset, (size_t) length);
|
63
|
+
} else {
|
64
|
+
return yp_strpbrk_single_byte(source, charset, (size_t) length);
|
65
|
+
}
|
66
|
+
}
|