yarp 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +76 -0
  3. data/CONTRIBUTING.md +51 -0
  4. data/LICENSE.md +7 -0
  5. data/Makefile.in +79 -0
  6. data/README.md +86 -0
  7. data/config.h.in +25 -0
  8. data/config.yml +2147 -0
  9. data/configure +4487 -0
  10. data/docs/build_system.md +85 -0
  11. data/docs/building.md +26 -0
  12. data/docs/configuration.md +56 -0
  13. data/docs/design.md +53 -0
  14. data/docs/encoding.md +116 -0
  15. data/docs/extension.md +20 -0
  16. data/docs/fuzzing.md +93 -0
  17. data/docs/heredocs.md +36 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/ripper.md +36 -0
  20. data/docs/serialization.md +130 -0
  21. data/docs/testing.md +55 -0
  22. data/ext/yarp/api_node.c +3680 -0
  23. data/ext/yarp/api_pack.c +256 -0
  24. data/ext/yarp/extconf.rb +131 -0
  25. data/ext/yarp/extension.c +547 -0
  26. data/ext/yarp/extension.h +18 -0
  27. data/include/yarp/ast.h +1412 -0
  28. data/include/yarp/defines.h +54 -0
  29. data/include/yarp/diagnostic.h +24 -0
  30. data/include/yarp/enc/yp_encoding.h +94 -0
  31. data/include/yarp/node.h +36 -0
  32. data/include/yarp/pack.h +141 -0
  33. data/include/yarp/parser.h +389 -0
  34. data/include/yarp/regexp.h +19 -0
  35. data/include/yarp/unescape.h +42 -0
  36. data/include/yarp/util/yp_buffer.h +39 -0
  37. data/include/yarp/util/yp_char.h +75 -0
  38. data/include/yarp/util/yp_constant_pool.h +64 -0
  39. data/include/yarp/util/yp_list.h +67 -0
  40. data/include/yarp/util/yp_memchr.h +14 -0
  41. data/include/yarp/util/yp_newline_list.h +54 -0
  42. data/include/yarp/util/yp_state_stack.h +24 -0
  43. data/include/yarp/util/yp_string.h +57 -0
  44. data/include/yarp/util/yp_string_list.h +28 -0
  45. data/include/yarp/util/yp_strpbrk.h +29 -0
  46. data/include/yarp/version.h +5 -0
  47. data/include/yarp.h +69 -0
  48. data/lib/yarp/lex_compat.rb +759 -0
  49. data/lib/yarp/node.rb +7428 -0
  50. data/lib/yarp/pack.rb +185 -0
  51. data/lib/yarp/ripper_compat.rb +174 -0
  52. data/lib/yarp/serialize.rb +389 -0
  53. data/lib/yarp.rb +330 -0
  54. data/src/diagnostic.c +25 -0
  55. data/src/enc/yp_big5.c +79 -0
  56. data/src/enc/yp_euc_jp.c +85 -0
  57. data/src/enc/yp_gbk.c +88 -0
  58. data/src/enc/yp_shift_jis.c +83 -0
  59. data/src/enc/yp_tables.c +509 -0
  60. data/src/enc/yp_unicode.c +2320 -0
  61. data/src/enc/yp_windows_31j.c +83 -0
  62. data/src/node.c +2011 -0
  63. data/src/pack.c +493 -0
  64. data/src/prettyprint.c +1782 -0
  65. data/src/regexp.c +580 -0
  66. data/src/serialize.c +1576 -0
  67. data/src/token_type.c +347 -0
  68. data/src/unescape.c +576 -0
  69. data/src/util/yp_buffer.c +78 -0
  70. data/src/util/yp_char.c +229 -0
  71. data/src/util/yp_constant_pool.c +147 -0
  72. data/src/util/yp_list.c +50 -0
  73. data/src/util/yp_memchr.c +31 -0
  74. data/src/util/yp_newline_list.c +119 -0
  75. data/src/util/yp_state_stack.c +25 -0
  76. data/src/util/yp_string.c +207 -0
  77. data/src/util/yp_string_list.c +32 -0
  78. data/src/util/yp_strncasecmp.c +20 -0
  79. data/src/util/yp_strpbrk.c +66 -0
  80. data/src/yarp.c +13211 -0
  81. data/yarp.gemspec +100 -0
  82. metadata +125 -0
@@ -0,0 +1,207 @@
1
+ #include "yarp/util/yp_string.h"
2
+
3
+ // The following headers are necessary to read files using demand paging.
4
+ #ifdef _WIN32
5
+ #include <windows.h>
6
+ #else
7
+ #include <fcntl.h>
8
+ #include <sys/mman.h>
9
+ #include <sys/stat.h>
10
+ #include <unistd.h>
11
+ #endif
12
+
13
+ // Initialize a shared string that is based on initial input.
14
+ void
15
+ yp_string_shared_init(yp_string_t *string, const char *start, const char *end) {
16
+ assert(start <= end);
17
+ *string = (yp_string_t) {
18
+ .type = YP_STRING_SHARED,
19
+ .source = (char*) start,
20
+ .length = (size_t) (end - start)
21
+ };
22
+ }
23
+
24
+ // Initialize an owned string that is responsible for freeing allocated memory.
25
+ void
26
+ yp_string_owned_init(yp_string_t *string, char *source, size_t length) {
27
+ *string = (yp_string_t) {
28
+ .type = YP_STRING_OWNED,
29
+ .source = source,
30
+ .length = length
31
+ };
32
+ }
33
+
34
+ // Initialize a constant string that doesn't own its memory source.
35
+ void
36
+ yp_string_constant_init(yp_string_t *string, const char *source, size_t length) {
37
+ *string = (yp_string_t) {
38
+ .type = YP_STRING_CONSTANT,
39
+ .source = (char*) source,
40
+ .length = length
41
+ };
42
+ }
43
+
44
+ static void
45
+ yp_string_mapped_init_internal(yp_string_t *string, char *source, size_t length) {
46
+ *string = (yp_string_t) {
47
+ .type = YP_STRING_MAPPED,
48
+ .source = source,
49
+ .length = length
50
+ };
51
+ }
52
+
53
+ // Returns the memory size associated with the string.
54
+ size_t
55
+ yp_string_memsize(const yp_string_t *string) {
56
+ size_t size = sizeof(yp_string_t);
57
+ if (string->type == YP_STRING_OWNED) {
58
+ size += string->length;
59
+ }
60
+ return size;
61
+ }
62
+
63
+ // Ensure the string is owned. If it is not, then reinitialize it as owned and
64
+ // copy over the previous source.
65
+ void
66
+ yp_string_ensure_owned(yp_string_t *string) {
67
+ if (string->type == YP_STRING_OWNED) return;
68
+
69
+ size_t length = yp_string_length(string);
70
+ const char *source = yp_string_source(string);
71
+
72
+ char *memory = malloc(length);
73
+ if (!memory) return;
74
+
75
+ yp_string_owned_init(string, memory, length);
76
+ memcpy(string->source, source, length);
77
+ }
78
+
79
+ // Returns the length associated with the string.
80
+ YP_EXPORTED_FUNCTION size_t
81
+ yp_string_length(const yp_string_t *string) {
82
+ return string->length;
83
+ }
84
+
85
+ // Returns the start pointer associated with the string.
86
+ YP_EXPORTED_FUNCTION const char *
87
+ yp_string_source(const yp_string_t *string) {
88
+ return string->source;
89
+ }
90
+
91
+ // Free the associated memory of the given string.
92
+ YP_EXPORTED_FUNCTION void
93
+ yp_string_free(yp_string_t *string) {
94
+ if (string->type == YP_STRING_OWNED) {
95
+ free(string->source);
96
+ } else if (string->type == YP_STRING_MAPPED && string->length) {
97
+ void *memory = (void *) string->source;
98
+ #if defined(_WIN32)
99
+ UnmapViewOfFile(memory);
100
+ #elif defined(HAVE_MMAP)
101
+ munmap(memory, string->length);
102
+ #else
103
+ free(memory);
104
+ #endif
105
+ }
106
+ }
107
+
108
+ bool
109
+ yp_string_mapped_init(yp_string_t *string, const char *filepath) {
110
+ #ifdef _WIN32
111
+ // Open the file for reading.
112
+ HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
113
+
114
+ if (file == INVALID_HANDLE_VALUE) {
115
+ perror("CreateFile failed");
116
+ return false;
117
+ }
118
+
119
+ // Get the file size.
120
+ DWORD file_size = GetFileSize(file, NULL);
121
+ if (file_size == INVALID_FILE_SIZE) {
122
+ CloseHandle(file);
123
+ perror("GetFileSize failed");
124
+ return false;
125
+ }
126
+
127
+ // If the file is empty, then we don't need to do anything else, we'll set
128
+ // the source to a constant empty string and return.
129
+ if (file_size == 0) {
130
+ CloseHandle(file);
131
+ char empty_string[] = "";
132
+ yp_string_mapped_init_internal(string, empty_string, 0);
133
+ return true;
134
+ }
135
+
136
+ // Create a mapping of the file.
137
+ HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
138
+ if (mapping == NULL) {
139
+ CloseHandle(file);
140
+ perror("CreateFileMapping failed");
141
+ return false;
142
+ }
143
+
144
+ // Map the file into memory.
145
+ char *source = (char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
146
+ CloseHandle(mapping);
147
+ CloseHandle(file);
148
+
149
+ if (source == NULL) {
150
+ perror("MapViewOfFile failed");
151
+ return false;
152
+ }
153
+
154
+ yp_string_mapped_init_internal(string, source, (size_t) file_size);
155
+ return true;
156
+ #else
157
+ // Open the file for reading
158
+ int fd = open(filepath, O_RDONLY);
159
+ if (fd == -1) {
160
+ perror("open");
161
+ return false;
162
+ }
163
+
164
+ // Stat the file to get the file size
165
+ struct stat sb;
166
+ if (fstat(fd, &sb) == -1) {
167
+ close(fd);
168
+ perror("fstat");
169
+ return false;
170
+ }
171
+
172
+ // mmap the file descriptor to virtually get the contents
173
+ size_t size = (size_t) sb.st_size;
174
+ char *source = NULL;
175
+
176
+ if (size == 0) {
177
+ close(fd);
178
+ char empty_string[] = "";
179
+ yp_string_mapped_init_internal(string, empty_string, 0);
180
+ return true;
181
+ }
182
+
183
+ #ifdef HAVE_MMAP
184
+ source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
185
+ if (source == MAP_FAILED) {
186
+ perror("Map failed");
187
+ return false;
188
+ }
189
+ #else
190
+ source = malloc(size);
191
+ if (source == NULL) {
192
+ return false;
193
+ }
194
+
195
+ ssize_t read_size = read(fd, (void *) source, size);
196
+ if (read_size < 0 || (size_t)read_size != size) {
197
+ perror("Read size is incorrect");
198
+ free((void *) source);
199
+ return false;
200
+ }
201
+ #endif
202
+
203
+ close(fd);
204
+ yp_string_mapped_init_internal(string, source, size);
205
+ return true;
206
+ #endif
207
+ }
@@ -0,0 +1,32 @@
1
+ #include "yarp/util/yp_string_list.h"
2
+
3
+ // Allocate a new yp_string_list_t.
4
+ yp_string_list_t *
5
+ yp_string_list_alloc(void) {
6
+ return (yp_string_list_t *) malloc(sizeof(yp_string_list_t));
7
+ }
8
+
9
+ // Initialize a yp_string_list_t with its default values.
10
+ void
11
+ yp_string_list_init(yp_string_list_t *string_list) {
12
+ string_list->strings = (yp_string_t *) malloc(sizeof(yp_string_t));
13
+ string_list->length = 0;
14
+ string_list->capacity = 1;
15
+ }
16
+
17
+ // Append a yp_string_t to the given string list.
18
+ void
19
+ yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string) {
20
+ if (string_list->length + 1 > string_list->capacity) {
21
+ string_list->capacity *= 2;
22
+ string_list->strings = (yp_string_t *) realloc(string_list->strings, string_list->capacity * sizeof(yp_string_t));
23
+ }
24
+
25
+ string_list->strings[string_list->length++] = *string;
26
+ }
27
+
28
+ // Free the memory associated with the string list.
29
+ void
30
+ yp_string_list_free(yp_string_list_t *string_list) {
31
+ free(string_list->strings);
32
+ }
@@ -0,0 +1,20 @@
1
+ #include <ctype.h>
2
+ #include <stddef.h>
3
+
4
+ int
5
+ yp_strncasecmp(const char *string1, const char *string2, size_t length) {
6
+ size_t offset = 0;
7
+ int difference = 0;
8
+
9
+ while (offset < length && string1[offset] != '\0') {
10
+ if (string2[offset] == '\0') return string1[offset];
11
+
12
+ unsigned char left = (unsigned char) string1[offset];
13
+ unsigned char right = (unsigned char) string2[offset];
14
+
15
+ if ((difference = tolower(left) - tolower(right)) != 0) return difference;
16
+ offset++;
17
+ }
18
+
19
+ return difference;
20
+ }
@@ -0,0 +1,66 @@
1
+ #include "yarp/util/yp_strpbrk.h"
2
+
3
+ // This is the slow path that does care about the encoding.
4
+ static inline const char *
5
+ yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *charset, size_t maximum) {
6
+ size_t index = 0;
7
+
8
+ while (index < maximum) {
9
+ if (strchr(charset, source[index]) != NULL) {
10
+ return source + index;
11
+ }
12
+
13
+ size_t width = parser->encoding.char_width(source + index, (ptrdiff_t) (maximum - index));
14
+ if (width == 0) {
15
+ return NULL;
16
+ }
17
+
18
+ index += width;
19
+ }
20
+
21
+ return NULL;
22
+ }
23
+
24
+ // This is the fast path that does not care about the encoding.
25
+ static inline const char *
26
+ yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum) {
27
+ size_t index = 0;
28
+
29
+ while (index < maximum) {
30
+ if (strchr(charset, source[index]) != NULL) {
31
+ return source + index;
32
+ }
33
+
34
+ index++;
35
+ }
36
+
37
+ return NULL;
38
+ }
39
+
40
+ // Here we have rolled our own version of strpbrk. The standard library strpbrk
41
+ // has undefined behavior when the source string is not null-terminated. We want
42
+ // to support strings that are not null-terminated because yp_parse does not
43
+ // have the contract that the string is null-terminated. (This is desirable
44
+ // because it means the extension can call yp_parse with the result of a call to
45
+ // mmap).
46
+ //
47
+ // The standard library strpbrk also does not support passing a maximum length
48
+ // to search. We want to support this for the reason mentioned above, but we
49
+ // also don't want it to stop on null bytes. Ruby actually allows null bytes
50
+ // within strings, comments, regular expressions, etc. So we need to be able to
51
+ // skip past them.
52
+ //
53
+ // Finally, we want to support encodings wherein the charset could contain
54
+ // characters that are trailing bytes of multi-byte characters. For example, in
55
+ // Shift-JIS, the backslash character can be a trailing byte. In that case we
56
+ // need to take a slower path and iterate one multi-byte character at a time.
57
+ const char *
58
+ yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length) {
59
+ if (length <= 0) {
60
+ return NULL;
61
+ } else if (parser->encoding_changed && parser->encoding.multibyte) {
62
+ return yp_strpbrk_multi_byte(parser, source, charset, (size_t) length);
63
+ } else {
64
+ return yp_strpbrk_single_byte(source, charset, (size_t) length);
65
+ }
66
+ }