yarp 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +51 -0
- data/LICENSE.md +7 -0
- data/Makefile.in +79 -0
- data/README.md +86 -0
- data/config.h.in +25 -0
- data/config.yml +2147 -0
- data/configure +4487 -0
- data/docs/build_system.md +85 -0
- data/docs/building.md +26 -0
- data/docs/configuration.md +56 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +116 -0
- data/docs/extension.md +20 -0
- data/docs/fuzzing.md +93 -0
- data/docs/heredocs.md +36 -0
- data/docs/mapping.md +117 -0
- data/docs/ripper.md +36 -0
- data/docs/serialization.md +130 -0
- data/docs/testing.md +55 -0
- data/ext/yarp/api_node.c +3680 -0
- data/ext/yarp/api_pack.c +256 -0
- data/ext/yarp/extconf.rb +131 -0
- data/ext/yarp/extension.c +547 -0
- data/ext/yarp/extension.h +18 -0
- data/include/yarp/ast.h +1412 -0
- data/include/yarp/defines.h +54 -0
- data/include/yarp/diagnostic.h +24 -0
- data/include/yarp/enc/yp_encoding.h +94 -0
- data/include/yarp/node.h +36 -0
- data/include/yarp/pack.h +141 -0
- data/include/yarp/parser.h +389 -0
- data/include/yarp/regexp.h +19 -0
- data/include/yarp/unescape.h +42 -0
- data/include/yarp/util/yp_buffer.h +39 -0
- data/include/yarp/util/yp_char.h +75 -0
- data/include/yarp/util/yp_constant_pool.h +64 -0
- data/include/yarp/util/yp_list.h +67 -0
- data/include/yarp/util/yp_memchr.h +14 -0
- data/include/yarp/util/yp_newline_list.h +54 -0
- data/include/yarp/util/yp_state_stack.h +24 -0
- data/include/yarp/util/yp_string.h +57 -0
- data/include/yarp/util/yp_string_list.h +28 -0
- data/include/yarp/util/yp_strpbrk.h +29 -0
- data/include/yarp/version.h +5 -0
- data/include/yarp.h +69 -0
- data/lib/yarp/lex_compat.rb +759 -0
- data/lib/yarp/node.rb +7428 -0
- data/lib/yarp/pack.rb +185 -0
- data/lib/yarp/ripper_compat.rb +174 -0
- data/lib/yarp/serialize.rb +389 -0
- data/lib/yarp.rb +330 -0
- data/src/diagnostic.c +25 -0
- data/src/enc/yp_big5.c +79 -0
- data/src/enc/yp_euc_jp.c +85 -0
- data/src/enc/yp_gbk.c +88 -0
- data/src/enc/yp_shift_jis.c +83 -0
- data/src/enc/yp_tables.c +509 -0
- data/src/enc/yp_unicode.c +2320 -0
- data/src/enc/yp_windows_31j.c +83 -0
- data/src/node.c +2011 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +1782 -0
- data/src/regexp.c +580 -0
- data/src/serialize.c +1576 -0
- data/src/token_type.c +347 -0
- data/src/unescape.c +576 -0
- data/src/util/yp_buffer.c +78 -0
- data/src/util/yp_char.c +229 -0
- data/src/util/yp_constant_pool.c +147 -0
- data/src/util/yp_list.c +50 -0
- data/src/util/yp_memchr.c +31 -0
- data/src/util/yp_newline_list.c +119 -0
- data/src/util/yp_state_stack.c +25 -0
- data/src/util/yp_string.c +207 -0
- data/src/util/yp_string_list.c +32 -0
- data/src/util/yp_strncasecmp.c +20 -0
- data/src/util/yp_strpbrk.c +66 -0
- data/src/yarp.c +13211 -0
- data/yarp.gemspec +100 -0
- metadata +125 -0
@@ -0,0 +1,207 @@
|
|
1
|
+
#include "yarp/util/yp_string.h"
|
2
|
+
|
3
|
+
// The following headers are necessary to read files using demand paging.
|
4
|
+
#ifdef _WIN32
|
5
|
+
#include <windows.h>
|
6
|
+
#else
|
7
|
+
#include <fcntl.h>
|
8
|
+
#include <sys/mman.h>
|
9
|
+
#include <sys/stat.h>
|
10
|
+
#include <unistd.h>
|
11
|
+
#endif
|
12
|
+
|
13
|
+
// Initialize a shared string that is based on initial input.
|
14
|
+
void
|
15
|
+
yp_string_shared_init(yp_string_t *string, const char *start, const char *end) {
|
16
|
+
assert(start <= end);
|
17
|
+
*string = (yp_string_t) {
|
18
|
+
.type = YP_STRING_SHARED,
|
19
|
+
.source = (char*) start,
|
20
|
+
.length = (size_t) (end - start)
|
21
|
+
};
|
22
|
+
}
|
23
|
+
|
24
|
+
// Initialize an owned string that is responsible for freeing allocated memory.
|
25
|
+
void
|
26
|
+
yp_string_owned_init(yp_string_t *string, char *source, size_t length) {
|
27
|
+
*string = (yp_string_t) {
|
28
|
+
.type = YP_STRING_OWNED,
|
29
|
+
.source = source,
|
30
|
+
.length = length
|
31
|
+
};
|
32
|
+
}
|
33
|
+
|
34
|
+
// Initialize a constant string that doesn't own its memory source.
|
35
|
+
void
|
36
|
+
yp_string_constant_init(yp_string_t *string, const char *source, size_t length) {
|
37
|
+
*string = (yp_string_t) {
|
38
|
+
.type = YP_STRING_CONSTANT,
|
39
|
+
.source = (char*) source,
|
40
|
+
.length = length
|
41
|
+
};
|
42
|
+
}
|
43
|
+
|
44
|
+
static void
|
45
|
+
yp_string_mapped_init_internal(yp_string_t *string, char *source, size_t length) {
|
46
|
+
*string = (yp_string_t) {
|
47
|
+
.type = YP_STRING_MAPPED,
|
48
|
+
.source = source,
|
49
|
+
.length = length
|
50
|
+
};
|
51
|
+
}
|
52
|
+
|
53
|
+
// Returns the memory size associated with the string.
|
54
|
+
size_t
|
55
|
+
yp_string_memsize(const yp_string_t *string) {
|
56
|
+
size_t size = sizeof(yp_string_t);
|
57
|
+
if (string->type == YP_STRING_OWNED) {
|
58
|
+
size += string->length;
|
59
|
+
}
|
60
|
+
return size;
|
61
|
+
}
|
62
|
+
|
63
|
+
// Ensure the string is owned. If it is not, then reinitialize it as owned and
|
64
|
+
// copy over the previous source.
|
65
|
+
void
|
66
|
+
yp_string_ensure_owned(yp_string_t *string) {
|
67
|
+
if (string->type == YP_STRING_OWNED) return;
|
68
|
+
|
69
|
+
size_t length = yp_string_length(string);
|
70
|
+
const char *source = yp_string_source(string);
|
71
|
+
|
72
|
+
char *memory = malloc(length);
|
73
|
+
if (!memory) return;
|
74
|
+
|
75
|
+
yp_string_owned_init(string, memory, length);
|
76
|
+
memcpy(string->source, source, length);
|
77
|
+
}
|
78
|
+
|
79
|
+
// Returns the length associated with the string.
|
80
|
+
YP_EXPORTED_FUNCTION size_t
|
81
|
+
yp_string_length(const yp_string_t *string) {
|
82
|
+
return string->length;
|
83
|
+
}
|
84
|
+
|
85
|
+
// Returns the start pointer associated with the string.
|
86
|
+
YP_EXPORTED_FUNCTION const char *
|
87
|
+
yp_string_source(const yp_string_t *string) {
|
88
|
+
return string->source;
|
89
|
+
}
|
90
|
+
|
91
|
+
// Free the associated memory of the given string.
|
92
|
+
YP_EXPORTED_FUNCTION void
|
93
|
+
yp_string_free(yp_string_t *string) {
|
94
|
+
if (string->type == YP_STRING_OWNED) {
|
95
|
+
free(string->source);
|
96
|
+
} else if (string->type == YP_STRING_MAPPED && string->length) {
|
97
|
+
void *memory = (void *) string->source;
|
98
|
+
#if defined(_WIN32)
|
99
|
+
UnmapViewOfFile(memory);
|
100
|
+
#elif defined(HAVE_MMAP)
|
101
|
+
munmap(memory, string->length);
|
102
|
+
#else
|
103
|
+
free(memory);
|
104
|
+
#endif
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
bool
|
109
|
+
yp_string_mapped_init(yp_string_t *string, const char *filepath) {
|
110
|
+
#ifdef _WIN32
|
111
|
+
// Open the file for reading.
|
112
|
+
HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
113
|
+
|
114
|
+
if (file == INVALID_HANDLE_VALUE) {
|
115
|
+
perror("CreateFile failed");
|
116
|
+
return false;
|
117
|
+
}
|
118
|
+
|
119
|
+
// Get the file size.
|
120
|
+
DWORD file_size = GetFileSize(file, NULL);
|
121
|
+
if (file_size == INVALID_FILE_SIZE) {
|
122
|
+
CloseHandle(file);
|
123
|
+
perror("GetFileSize failed");
|
124
|
+
return false;
|
125
|
+
}
|
126
|
+
|
127
|
+
// If the file is empty, then we don't need to do anything else, we'll set
|
128
|
+
// the source to a constant empty string and return.
|
129
|
+
if (file_size == 0) {
|
130
|
+
CloseHandle(file);
|
131
|
+
char empty_string[] = "";
|
132
|
+
yp_string_mapped_init_internal(string, empty_string, 0);
|
133
|
+
return true;
|
134
|
+
}
|
135
|
+
|
136
|
+
// Create a mapping of the file.
|
137
|
+
HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
|
138
|
+
if (mapping == NULL) {
|
139
|
+
CloseHandle(file);
|
140
|
+
perror("CreateFileMapping failed");
|
141
|
+
return false;
|
142
|
+
}
|
143
|
+
|
144
|
+
// Map the file into memory.
|
145
|
+
char *source = (char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
|
146
|
+
CloseHandle(mapping);
|
147
|
+
CloseHandle(file);
|
148
|
+
|
149
|
+
if (source == NULL) {
|
150
|
+
perror("MapViewOfFile failed");
|
151
|
+
return false;
|
152
|
+
}
|
153
|
+
|
154
|
+
yp_string_mapped_init_internal(string, source, (size_t) file_size);
|
155
|
+
return true;
|
156
|
+
#else
|
157
|
+
// Open the file for reading
|
158
|
+
int fd = open(filepath, O_RDONLY);
|
159
|
+
if (fd == -1) {
|
160
|
+
perror("open");
|
161
|
+
return false;
|
162
|
+
}
|
163
|
+
|
164
|
+
// Stat the file to get the file size
|
165
|
+
struct stat sb;
|
166
|
+
if (fstat(fd, &sb) == -1) {
|
167
|
+
close(fd);
|
168
|
+
perror("fstat");
|
169
|
+
return false;
|
170
|
+
}
|
171
|
+
|
172
|
+
// mmap the file descriptor to virtually get the contents
|
173
|
+
size_t size = (size_t) sb.st_size;
|
174
|
+
char *source = NULL;
|
175
|
+
|
176
|
+
if (size == 0) {
|
177
|
+
close(fd);
|
178
|
+
char empty_string[] = "";
|
179
|
+
yp_string_mapped_init_internal(string, empty_string, 0);
|
180
|
+
return true;
|
181
|
+
}
|
182
|
+
|
183
|
+
#ifdef HAVE_MMAP
|
184
|
+
source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
|
185
|
+
if (source == MAP_FAILED) {
|
186
|
+
perror("Map failed");
|
187
|
+
return false;
|
188
|
+
}
|
189
|
+
#else
|
190
|
+
source = malloc(size);
|
191
|
+
if (source == NULL) {
|
192
|
+
return false;
|
193
|
+
}
|
194
|
+
|
195
|
+
ssize_t read_size = read(fd, (void *) source, size);
|
196
|
+
if (read_size < 0 || (size_t)read_size != size) {
|
197
|
+
perror("Read size is incorrect");
|
198
|
+
free((void *) source);
|
199
|
+
return false;
|
200
|
+
}
|
201
|
+
#endif
|
202
|
+
|
203
|
+
close(fd);
|
204
|
+
yp_string_mapped_init_internal(string, source, size);
|
205
|
+
return true;
|
206
|
+
#endif
|
207
|
+
}
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#include "yarp/util/yp_string_list.h"
|
2
|
+
|
3
|
+
// Allocate a new yp_string_list_t.
|
4
|
+
yp_string_list_t *
|
5
|
+
yp_string_list_alloc(void) {
|
6
|
+
return (yp_string_list_t *) malloc(sizeof(yp_string_list_t));
|
7
|
+
}
|
8
|
+
|
9
|
+
// Initialize a yp_string_list_t with its default values.
|
10
|
+
void
|
11
|
+
yp_string_list_init(yp_string_list_t *string_list) {
|
12
|
+
string_list->strings = (yp_string_t *) malloc(sizeof(yp_string_t));
|
13
|
+
string_list->length = 0;
|
14
|
+
string_list->capacity = 1;
|
15
|
+
}
|
16
|
+
|
17
|
+
// Append a yp_string_t to the given string list.
|
18
|
+
void
|
19
|
+
yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string) {
|
20
|
+
if (string_list->length + 1 > string_list->capacity) {
|
21
|
+
string_list->capacity *= 2;
|
22
|
+
string_list->strings = (yp_string_t *) realloc(string_list->strings, string_list->capacity * sizeof(yp_string_t));
|
23
|
+
}
|
24
|
+
|
25
|
+
string_list->strings[string_list->length++] = *string;
|
26
|
+
}
|
27
|
+
|
28
|
+
// Free the memory associated with the string list.
|
29
|
+
void
|
30
|
+
yp_string_list_free(yp_string_list_t *string_list) {
|
31
|
+
free(string_list->strings);
|
32
|
+
}
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#include <ctype.h>
|
2
|
+
#include <stddef.h>
|
3
|
+
|
4
|
+
int
|
5
|
+
yp_strncasecmp(const char *string1, const char *string2, size_t length) {
|
6
|
+
size_t offset = 0;
|
7
|
+
int difference = 0;
|
8
|
+
|
9
|
+
while (offset < length && string1[offset] != '\0') {
|
10
|
+
if (string2[offset] == '\0') return string1[offset];
|
11
|
+
|
12
|
+
unsigned char left = (unsigned char) string1[offset];
|
13
|
+
unsigned char right = (unsigned char) string2[offset];
|
14
|
+
|
15
|
+
if ((difference = tolower(left) - tolower(right)) != 0) return difference;
|
16
|
+
offset++;
|
17
|
+
}
|
18
|
+
|
19
|
+
return difference;
|
20
|
+
}
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#include "yarp/util/yp_strpbrk.h"
|
2
|
+
|
3
|
+
// This is the slow path that does care about the encoding.
|
4
|
+
static inline const char *
|
5
|
+
yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *charset, size_t maximum) {
|
6
|
+
size_t index = 0;
|
7
|
+
|
8
|
+
while (index < maximum) {
|
9
|
+
if (strchr(charset, source[index]) != NULL) {
|
10
|
+
return source + index;
|
11
|
+
}
|
12
|
+
|
13
|
+
size_t width = parser->encoding.char_width(source + index, (ptrdiff_t) (maximum - index));
|
14
|
+
if (width == 0) {
|
15
|
+
return NULL;
|
16
|
+
}
|
17
|
+
|
18
|
+
index += width;
|
19
|
+
}
|
20
|
+
|
21
|
+
return NULL;
|
22
|
+
}
|
23
|
+
|
24
|
+
// This is the fast path that does not care about the encoding.
|
25
|
+
static inline const char *
|
26
|
+
yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum) {
|
27
|
+
size_t index = 0;
|
28
|
+
|
29
|
+
while (index < maximum) {
|
30
|
+
if (strchr(charset, source[index]) != NULL) {
|
31
|
+
return source + index;
|
32
|
+
}
|
33
|
+
|
34
|
+
index++;
|
35
|
+
}
|
36
|
+
|
37
|
+
return NULL;
|
38
|
+
}
|
39
|
+
|
40
|
+
// Here we have rolled our own version of strpbrk. The standard library strpbrk
|
41
|
+
// has undefined behavior when the source string is not null-terminated. We want
|
42
|
+
// to support strings that are not null-terminated because yp_parse does not
|
43
|
+
// have the contract that the string is null-terminated. (This is desirable
|
44
|
+
// because it means the extension can call yp_parse with the result of a call to
|
45
|
+
// mmap).
|
46
|
+
//
|
47
|
+
// The standard library strpbrk also does not support passing a maximum length
|
48
|
+
// to search. We want to support this for the reason mentioned above, but we
|
49
|
+
// also don't want it to stop on null bytes. Ruby actually allows null bytes
|
50
|
+
// within strings, comments, regular expressions, etc. So we need to be able to
|
51
|
+
// skip past them.
|
52
|
+
//
|
53
|
+
// Finally, we want to support encodings wherein the charset could contain
|
54
|
+
// characters that are trailing bytes of multi-byte characters. For example, in
|
55
|
+
// Shift-JIS, the backslash character can be a trailing byte. In that case we
|
56
|
+
// need to take a slower path and iterate one multi-byte character at a time.
|
57
|
+
const char *
|
58
|
+
yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length) {
|
59
|
+
if (length <= 0) {
|
60
|
+
return NULL;
|
61
|
+
} else if (parser->encoding_changed && parser->encoding.multibyte) {
|
62
|
+
return yp_strpbrk_multi_byte(parser, source, charset, (size_t) length);
|
63
|
+
} else {
|
64
|
+
return yp_strpbrk_single_byte(source, charset, (size_t) length);
|
65
|
+
}
|
66
|
+
}
|