yarp 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +76 -0
  3. data/CONTRIBUTING.md +51 -0
  4. data/LICENSE.md +7 -0
  5. data/Makefile.in +79 -0
  6. data/README.md +86 -0
  7. data/config.h.in +25 -0
  8. data/config.yml +2147 -0
  9. data/configure +4487 -0
  10. data/docs/build_system.md +85 -0
  11. data/docs/building.md +26 -0
  12. data/docs/configuration.md +56 -0
  13. data/docs/design.md +53 -0
  14. data/docs/encoding.md +116 -0
  15. data/docs/extension.md +20 -0
  16. data/docs/fuzzing.md +93 -0
  17. data/docs/heredocs.md +36 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/ripper.md +36 -0
  20. data/docs/serialization.md +130 -0
  21. data/docs/testing.md +55 -0
  22. data/ext/yarp/api_node.c +3680 -0
  23. data/ext/yarp/api_pack.c +256 -0
  24. data/ext/yarp/extconf.rb +131 -0
  25. data/ext/yarp/extension.c +547 -0
  26. data/ext/yarp/extension.h +18 -0
  27. data/include/yarp/ast.h +1412 -0
  28. data/include/yarp/defines.h +54 -0
  29. data/include/yarp/diagnostic.h +24 -0
  30. data/include/yarp/enc/yp_encoding.h +94 -0
  31. data/include/yarp/node.h +36 -0
  32. data/include/yarp/pack.h +141 -0
  33. data/include/yarp/parser.h +389 -0
  34. data/include/yarp/regexp.h +19 -0
  35. data/include/yarp/unescape.h +42 -0
  36. data/include/yarp/util/yp_buffer.h +39 -0
  37. data/include/yarp/util/yp_char.h +75 -0
  38. data/include/yarp/util/yp_constant_pool.h +64 -0
  39. data/include/yarp/util/yp_list.h +67 -0
  40. data/include/yarp/util/yp_memchr.h +14 -0
  41. data/include/yarp/util/yp_newline_list.h +54 -0
  42. data/include/yarp/util/yp_state_stack.h +24 -0
  43. data/include/yarp/util/yp_string.h +57 -0
  44. data/include/yarp/util/yp_string_list.h +28 -0
  45. data/include/yarp/util/yp_strpbrk.h +29 -0
  46. data/include/yarp/version.h +5 -0
  47. data/include/yarp.h +69 -0
  48. data/lib/yarp/lex_compat.rb +759 -0
  49. data/lib/yarp/node.rb +7428 -0
  50. data/lib/yarp/pack.rb +185 -0
  51. data/lib/yarp/ripper_compat.rb +174 -0
  52. data/lib/yarp/serialize.rb +389 -0
  53. data/lib/yarp.rb +330 -0
  54. data/src/diagnostic.c +25 -0
  55. data/src/enc/yp_big5.c +79 -0
  56. data/src/enc/yp_euc_jp.c +85 -0
  57. data/src/enc/yp_gbk.c +88 -0
  58. data/src/enc/yp_shift_jis.c +83 -0
  59. data/src/enc/yp_tables.c +509 -0
  60. data/src/enc/yp_unicode.c +2320 -0
  61. data/src/enc/yp_windows_31j.c +83 -0
  62. data/src/node.c +2011 -0
  63. data/src/pack.c +493 -0
  64. data/src/prettyprint.c +1782 -0
  65. data/src/regexp.c +580 -0
  66. data/src/serialize.c +1576 -0
  67. data/src/token_type.c +347 -0
  68. data/src/unescape.c +576 -0
  69. data/src/util/yp_buffer.c +78 -0
  70. data/src/util/yp_char.c +229 -0
  71. data/src/util/yp_constant_pool.c +147 -0
  72. data/src/util/yp_list.c +50 -0
  73. data/src/util/yp_memchr.c +31 -0
  74. data/src/util/yp_newline_list.c +119 -0
  75. data/src/util/yp_state_stack.c +25 -0
  76. data/src/util/yp_string.c +207 -0
  77. data/src/util/yp_string_list.c +32 -0
  78. data/src/util/yp_strncasecmp.c +20 -0
  79. data/src/util/yp_strpbrk.c +66 -0
  80. data/src/yarp.c +13211 -0
  81. data/yarp.gemspec +100 -0
  82. metadata +125 -0
@@ -0,0 +1,207 @@
1
+ #include "yarp/util/yp_string.h"
2
+
3
+ // The following headers are necessary to read files using demand paging.
4
+ #ifdef _WIN32
5
+ #include <windows.h>
6
+ #else
7
+ #include <fcntl.h>
8
+ #include <sys/mman.h>
9
+ #include <sys/stat.h>
10
+ #include <unistd.h>
11
+ #endif
12
+
13
+ // Initialize a shared string that is based on initial input.
14
+ void
15
+ yp_string_shared_init(yp_string_t *string, const char *start, const char *end) {
16
+ assert(start <= end);
17
+ *string = (yp_string_t) {
18
+ .type = YP_STRING_SHARED,
19
+ .source = (char*) start,
20
+ .length = (size_t) (end - start)
21
+ };
22
+ }
23
+
24
+ // Initialize an owned string that is responsible for freeing allocated memory.
25
+ void
26
+ yp_string_owned_init(yp_string_t *string, char *source, size_t length) {
27
+ *string = (yp_string_t) {
28
+ .type = YP_STRING_OWNED,
29
+ .source = source,
30
+ .length = length
31
+ };
32
+ }
33
+
34
+ // Initialize a constant string that doesn't own its memory source.
35
+ void
36
+ yp_string_constant_init(yp_string_t *string, const char *source, size_t length) {
37
+ *string = (yp_string_t) {
38
+ .type = YP_STRING_CONSTANT,
39
+ .source = (char*) source,
40
+ .length = length
41
+ };
42
+ }
43
+
44
+ static void
45
+ yp_string_mapped_init_internal(yp_string_t *string, char *source, size_t length) {
46
+ *string = (yp_string_t) {
47
+ .type = YP_STRING_MAPPED,
48
+ .source = source,
49
+ .length = length
50
+ };
51
+ }
52
+
53
+ // Returns the memory size associated with the string.
54
+ size_t
55
+ yp_string_memsize(const yp_string_t *string) {
56
+ size_t size = sizeof(yp_string_t);
57
+ if (string->type == YP_STRING_OWNED) {
58
+ size += string->length;
59
+ }
60
+ return size;
61
+ }
62
+
63
+ // Ensure the string is owned. If it is not, then reinitialize it as owned and
64
+ // copy over the previous source.
65
+ void
66
+ yp_string_ensure_owned(yp_string_t *string) {
67
+ if (string->type == YP_STRING_OWNED) return;
68
+
69
+ size_t length = yp_string_length(string);
70
+ const char *source = yp_string_source(string);
71
+
72
+ char *memory = malloc(length);
73
+ if (!memory) return;
74
+
75
+ yp_string_owned_init(string, memory, length);
76
+ memcpy(string->source, source, length);
77
+ }
78
+
79
+ // Returns the length associated with the string.
80
+ YP_EXPORTED_FUNCTION size_t
81
+ yp_string_length(const yp_string_t *string) {
82
+ return string->length;
83
+ }
84
+
85
+ // Returns the start pointer associated with the string.
86
+ YP_EXPORTED_FUNCTION const char *
87
+ yp_string_source(const yp_string_t *string) {
88
+ return string->source;
89
+ }
90
+
91
+ // Free the associated memory of the given string.
92
+ YP_EXPORTED_FUNCTION void
93
+ yp_string_free(yp_string_t *string) {
94
+ if (string->type == YP_STRING_OWNED) {
95
+ free(string->source);
96
+ } else if (string->type == YP_STRING_MAPPED && string->length) {
97
+ void *memory = (void *) string->source;
98
+ #if defined(_WIN32)
99
+ UnmapViewOfFile(memory);
100
+ #elif defined(HAVE_MMAP)
101
+ munmap(memory, string->length);
102
+ #else
103
+ free(memory);
104
+ #endif
105
+ }
106
+ }
107
+
108
+ bool
109
+ yp_string_mapped_init(yp_string_t *string, const char *filepath) {
110
+ #ifdef _WIN32
111
+ // Open the file for reading.
112
+ HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
113
+
114
+ if (file == INVALID_HANDLE_VALUE) {
115
+ perror("CreateFile failed");
116
+ return false;
117
+ }
118
+
119
+ // Get the file size.
120
+ DWORD file_size = GetFileSize(file, NULL);
121
+ if (file_size == INVALID_FILE_SIZE) {
122
+ CloseHandle(file);
123
+ perror("GetFileSize failed");
124
+ return false;
125
+ }
126
+
127
+ // If the file is empty, then we don't need to do anything else, we'll set
128
+ // the source to a constant empty string and return.
129
+ if (file_size == 0) {
130
+ CloseHandle(file);
131
+ char empty_string[] = "";
132
+ yp_string_mapped_init_internal(string, empty_string, 0);
133
+ return true;
134
+ }
135
+
136
+ // Create a mapping of the file.
137
+ HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
138
+ if (mapping == NULL) {
139
+ CloseHandle(file);
140
+ perror("CreateFileMapping failed");
141
+ return false;
142
+ }
143
+
144
+ // Map the file into memory.
145
+ char *source = (char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
146
+ CloseHandle(mapping);
147
+ CloseHandle(file);
148
+
149
+ if (source == NULL) {
150
+ perror("MapViewOfFile failed");
151
+ return false;
152
+ }
153
+
154
+ yp_string_mapped_init_internal(string, source, (size_t) file_size);
155
+ return true;
156
+ #else
157
+ // Open the file for reading
158
+ int fd = open(filepath, O_RDONLY);
159
+ if (fd == -1) {
160
+ perror("open");
161
+ return false;
162
+ }
163
+
164
+ // Stat the file to get the file size
165
+ struct stat sb;
166
+ if (fstat(fd, &sb) == -1) {
167
+ close(fd);
168
+ perror("fstat");
169
+ return false;
170
+ }
171
+
172
+ // mmap the file descriptor to virtually get the contents
173
+ size_t size = (size_t) sb.st_size;
174
+ char *source = NULL;
175
+
176
+ if (size == 0) {
177
+ close(fd);
178
+ char empty_string[] = "";
179
+ yp_string_mapped_init_internal(string, empty_string, 0);
180
+ return true;
181
+ }
182
+
183
+ #ifdef HAVE_MMAP
184
+ source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
185
+ if (source == MAP_FAILED) {
186
+ perror("Map failed");
187
+ return false;
188
+ }
189
+ #else
190
+ source = malloc(size);
191
+ if (source == NULL) {
192
+ return false;
193
+ }
194
+
195
+ ssize_t read_size = read(fd, (void *) source, size);
196
+ if (read_size < 0 || (size_t)read_size != size) {
197
+ perror("Read size is incorrect");
198
+ free((void *) source);
199
+ return false;
200
+ }
201
+ #endif
202
+
203
+ close(fd);
204
+ yp_string_mapped_init_internal(string, source, size);
205
+ return true;
206
+ #endif
207
+ }
@@ -0,0 +1,32 @@
1
+ #include "yarp/util/yp_string_list.h"
2
+
3
+ // Allocate a new yp_string_list_t.
4
+ yp_string_list_t *
5
+ yp_string_list_alloc(void) {
6
+ return (yp_string_list_t *) malloc(sizeof(yp_string_list_t));
7
+ }
8
+
9
+ // Initialize a yp_string_list_t with its default values.
10
+ void
11
+ yp_string_list_init(yp_string_list_t *string_list) {
12
+ string_list->strings = (yp_string_t *) malloc(sizeof(yp_string_t));
13
+ string_list->length = 0;
14
+ string_list->capacity = 1;
15
+ }
16
+
17
+ // Append a yp_string_t to the given string list.
18
+ void
19
+ yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string) {
20
+ if (string_list->length + 1 > string_list->capacity) {
21
+ string_list->capacity *= 2;
22
+ string_list->strings = (yp_string_t *) realloc(string_list->strings, string_list->capacity * sizeof(yp_string_t));
23
+ }
24
+
25
+ string_list->strings[string_list->length++] = *string;
26
+ }
27
+
28
+ // Free the memory associated with the string list.
29
+ void
30
+ yp_string_list_free(yp_string_list_t *string_list) {
31
+ free(string_list->strings);
32
+ }
@@ -0,0 +1,20 @@
1
+ #include <ctype.h>
2
+ #include <stddef.h>
3
+
4
+ int
5
+ yp_strncasecmp(const char *string1, const char *string2, size_t length) {
6
+ size_t offset = 0;
7
+ int difference = 0;
8
+
9
+ while (offset < length && string1[offset] != '\0') {
10
+ if (string2[offset] == '\0') return string1[offset];
11
+
12
+ unsigned char left = (unsigned char) string1[offset];
13
+ unsigned char right = (unsigned char) string2[offset];
14
+
15
+ if ((difference = tolower(left) - tolower(right)) != 0) return difference;
16
+ offset++;
17
+ }
18
+
19
+ return difference;
20
+ }
@@ -0,0 +1,66 @@
1
+ #include "yarp/util/yp_strpbrk.h"
2
+
3
+ // This is the slow path that does care about the encoding.
4
+ static inline const char *
5
+ yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *charset, size_t maximum) {
6
+ size_t index = 0;
7
+
8
+ while (index < maximum) {
9
+ if (strchr(charset, source[index]) != NULL) {
10
+ return source + index;
11
+ }
12
+
13
+ size_t width = parser->encoding.char_width(source + index, (ptrdiff_t) (maximum - index));
14
+ if (width == 0) {
15
+ return NULL;
16
+ }
17
+
18
+ index += width;
19
+ }
20
+
21
+ return NULL;
22
+ }
23
+
24
+ // This is the fast path that does not care about the encoding.
25
+ static inline const char *
26
+ yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum) {
27
+ size_t index = 0;
28
+
29
+ while (index < maximum) {
30
+ if (strchr(charset, source[index]) != NULL) {
31
+ return source + index;
32
+ }
33
+
34
+ index++;
35
+ }
36
+
37
+ return NULL;
38
+ }
39
+
40
+ // Here we have rolled our own version of strpbrk. The standard library strpbrk
41
+ // has undefined behavior when the source string is not null-terminated. We want
42
+ // to support strings that are not null-terminated because yp_parse does not
43
+ // have the contract that the string is null-terminated. (This is desirable
44
+ // because it means the extension can call yp_parse with the result of a call to
45
+ // mmap).
46
+ //
47
+ // The standard library strpbrk also does not support passing a maximum length
48
+ // to search. We want to support this for the reason mentioned above, but we
49
+ // also don't want it to stop on null bytes. Ruby actually allows null bytes
50
+ // within strings, comments, regular expressions, etc. So we need to be able to
51
+ // skip past them.
52
+ //
53
+ // Finally, we want to support encodings wherein the charset could contain
54
+ // characters that are trailing bytes of multi-byte characters. For example, in
55
+ // Shift-JIS, the backslash character can be a trailing byte. In that case we
56
+ // need to take a slower path and iterate one multi-byte character at a time.
57
+ const char *
58
+ yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length) {
59
+ if (length <= 0) {
60
+ return NULL;
61
+ } else if (parser->encoding_changed && parser->encoding.multibyte) {
62
+ return yp_strpbrk_multi_byte(parser, source, charset, (size_t) length);
63
+ } else {
64
+ return yp_strpbrk_single_byte(source, charset, (size_t) length);
65
+ }
66
+ }