yarp 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +51 -0
- data/LICENSE.md +7 -0
- data/Makefile.in +79 -0
- data/README.md +86 -0
- data/config.h.in +25 -0
- data/config.yml +2147 -0
- data/configure +4487 -0
- data/docs/build_system.md +85 -0
- data/docs/building.md +26 -0
- data/docs/configuration.md +56 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +116 -0
- data/docs/extension.md +20 -0
- data/docs/fuzzing.md +93 -0
- data/docs/heredocs.md +36 -0
- data/docs/mapping.md +117 -0
- data/docs/ripper.md +36 -0
- data/docs/serialization.md +130 -0
- data/docs/testing.md +55 -0
- data/ext/yarp/api_node.c +3680 -0
- data/ext/yarp/api_pack.c +256 -0
- data/ext/yarp/extconf.rb +131 -0
- data/ext/yarp/extension.c +547 -0
- data/ext/yarp/extension.h +18 -0
- data/include/yarp/ast.h +1412 -0
- data/include/yarp/defines.h +54 -0
- data/include/yarp/diagnostic.h +24 -0
- data/include/yarp/enc/yp_encoding.h +94 -0
- data/include/yarp/node.h +36 -0
- data/include/yarp/pack.h +141 -0
- data/include/yarp/parser.h +389 -0
- data/include/yarp/regexp.h +19 -0
- data/include/yarp/unescape.h +42 -0
- data/include/yarp/util/yp_buffer.h +39 -0
- data/include/yarp/util/yp_char.h +75 -0
- data/include/yarp/util/yp_constant_pool.h +64 -0
- data/include/yarp/util/yp_list.h +67 -0
- data/include/yarp/util/yp_memchr.h +14 -0
- data/include/yarp/util/yp_newline_list.h +54 -0
- data/include/yarp/util/yp_state_stack.h +24 -0
- data/include/yarp/util/yp_string.h +57 -0
- data/include/yarp/util/yp_string_list.h +28 -0
- data/include/yarp/util/yp_strpbrk.h +29 -0
- data/include/yarp/version.h +5 -0
- data/include/yarp.h +69 -0
- data/lib/yarp/lex_compat.rb +759 -0
- data/lib/yarp/node.rb +7428 -0
- data/lib/yarp/pack.rb +185 -0
- data/lib/yarp/ripper_compat.rb +174 -0
- data/lib/yarp/serialize.rb +389 -0
- data/lib/yarp.rb +330 -0
- data/src/diagnostic.c +25 -0
- data/src/enc/yp_big5.c +79 -0
- data/src/enc/yp_euc_jp.c +85 -0
- data/src/enc/yp_gbk.c +88 -0
- data/src/enc/yp_shift_jis.c +83 -0
- data/src/enc/yp_tables.c +509 -0
- data/src/enc/yp_unicode.c +2320 -0
- data/src/enc/yp_windows_31j.c +83 -0
- data/src/node.c +2011 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +1782 -0
- data/src/regexp.c +580 -0
- data/src/serialize.c +1576 -0
- data/src/token_type.c +347 -0
- data/src/unescape.c +576 -0
- data/src/util/yp_buffer.c +78 -0
- data/src/util/yp_char.c +229 -0
- data/src/util/yp_constant_pool.c +147 -0
- data/src/util/yp_list.c +50 -0
- data/src/util/yp_memchr.c +31 -0
- data/src/util/yp_newline_list.c +119 -0
- data/src/util/yp_state_stack.c +25 -0
- data/src/util/yp_string.c +207 -0
- data/src/util/yp_string_list.c +32 -0
- data/src/util/yp_strncasecmp.c +20 -0
- data/src/util/yp_strpbrk.c +66 -0
- data/src/yarp.c +13211 -0
- data/yarp.gemspec +100 -0
- metadata +125 -0
data/src/util/yp_char.c
ADDED
@@ -0,0 +1,229 @@
|
|
1
|
+
#include "yarp/util/yp_char.h"
|
2
|
+
|
3
|
+
#define YP_CHAR_BIT_WHITESPACE (1 << 0)
|
4
|
+
#define YP_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
|
5
|
+
#define YP_CHAR_BIT_REGEXP_OPTION (1 << 2)
|
6
|
+
|
7
|
+
#define YP_NUMBER_BIT_BINARY_DIGIT (1 << 0)
|
8
|
+
#define YP_NUMBER_BIT_BINARY_NUMBER (1 << 1)
|
9
|
+
#define YP_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
|
10
|
+
#define YP_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
|
11
|
+
#define YP_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
|
12
|
+
#define YP_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
|
13
|
+
#define YP_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
|
14
|
+
#define YP_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
|
15
|
+
|
16
|
+
static const unsigned char yp_char_table[256] = {
|
17
|
+
//0 1 2 3 4 5 6 7 8 9 A B C D E F
|
18
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
|
19
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
20
|
+
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5x
|
24
|
+
0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 4, 4, // 6x
|
25
|
+
0, 0, 0, 4, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, // 7x
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
|
29
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
|
30
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
|
31
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
|
32
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
|
34
|
+
};
|
35
|
+
|
36
|
+
static const unsigned char yp_number_table[256] = {
|
37
|
+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
38
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
|
39
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
|
40
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
|
41
|
+
0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
|
42
|
+
0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
|
43
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
|
44
|
+
0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
|
45
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
|
46
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
|
47
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
|
48
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
|
49
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
|
50
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
|
51
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
|
52
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
|
53
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
|
54
|
+
};
|
55
|
+
|
56
|
+
static inline size_t
|
57
|
+
yp_strspn_char_kind(const char *string, ptrdiff_t length, unsigned char kind) {
|
58
|
+
if (length <= 0) return 0;
|
59
|
+
|
60
|
+
size_t size = 0;
|
61
|
+
size_t maximum = (size_t) length;
|
62
|
+
|
63
|
+
while (size < maximum && (yp_char_table[(unsigned char) string[size]] & kind)) size++;
|
64
|
+
return size;
|
65
|
+
}
|
66
|
+
|
67
|
+
// Returns the number of characters at the start of the string that are
|
68
|
+
// whitespace. Disallows searching past the given maximum number of characters.
|
69
|
+
size_t
|
70
|
+
yp_strspn_whitespace(const char *string, ptrdiff_t length) {
|
71
|
+
return yp_strspn_char_kind(string, length, YP_CHAR_BIT_WHITESPACE);
|
72
|
+
}
|
73
|
+
|
74
|
+
// Returns the number of characters at the start of the string that are
|
75
|
+
// whitespace while also tracking the location of each newline. Disallows
|
76
|
+
// searching past the given maximum number of characters.
|
77
|
+
size_t
|
78
|
+
yp_strspn_whitespace_newlines(const char *string, ptrdiff_t length, yp_newline_list_t *newline_list, bool stop_at_newline) {
|
79
|
+
if (length <= 0) return 0;
|
80
|
+
|
81
|
+
size_t size = 0;
|
82
|
+
size_t maximum = (size_t) length;
|
83
|
+
|
84
|
+
while (size < maximum && (yp_char_table[(unsigned char) string[size]] & YP_CHAR_BIT_WHITESPACE)) {
|
85
|
+
if (string[size] == '\n') {
|
86
|
+
if (stop_at_newline) {
|
87
|
+
return size + 1;
|
88
|
+
}
|
89
|
+
else {
|
90
|
+
yp_newline_list_append(newline_list, string + size);
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
size++;
|
95
|
+
}
|
96
|
+
|
97
|
+
return size;
|
98
|
+
}
|
99
|
+
|
100
|
+
// Returns the number of characters at the start of the string that are inline
|
101
|
+
// whitespace. Disallows searching past the given maximum number of characters.
|
102
|
+
size_t
|
103
|
+
yp_strspn_inline_whitespace(const char *string, ptrdiff_t length) {
|
104
|
+
return yp_strspn_char_kind(string, length, YP_CHAR_BIT_INLINE_WHITESPACE);
|
105
|
+
}
|
106
|
+
|
107
|
+
// Returns the number of characters at the start of the string that are regexp
|
108
|
+
// options. Disallows searching past the given maximum number of characters.
|
109
|
+
size_t
|
110
|
+
yp_strspn_regexp_option(const char *string, ptrdiff_t length) {
|
111
|
+
return yp_strspn_char_kind(string, length, YP_CHAR_BIT_REGEXP_OPTION);
|
112
|
+
}
|
113
|
+
|
114
|
+
static inline bool
|
115
|
+
yp_char_is_char_kind(const char c, unsigned char kind) {
|
116
|
+
return (yp_char_table[(unsigned char) c] & kind) != 0;
|
117
|
+
}
|
118
|
+
|
119
|
+
// Returns true if the given character is a whitespace character.
|
120
|
+
bool
|
121
|
+
yp_char_is_whitespace(const char c) {
|
122
|
+
return yp_char_is_char_kind(c, YP_CHAR_BIT_WHITESPACE);
|
123
|
+
}
|
124
|
+
|
125
|
+
// Returns true if the given character is an inline whitespace character.
|
126
|
+
bool
|
127
|
+
yp_char_is_inline_whitespace(const char c) {
|
128
|
+
return yp_char_is_char_kind(c, YP_CHAR_BIT_INLINE_WHITESPACE);
|
129
|
+
}
|
130
|
+
|
131
|
+
static inline size_t
|
132
|
+
yp_strspn_number_kind(const char *string, ptrdiff_t length, unsigned char kind) {
|
133
|
+
if (length <= 0) return 0;
|
134
|
+
|
135
|
+
size_t size = 0;
|
136
|
+
size_t maximum = (size_t) length;
|
137
|
+
|
138
|
+
while (size < maximum && (yp_number_table[(unsigned char) string[size]] & kind)) size++;
|
139
|
+
return size;
|
140
|
+
}
|
141
|
+
|
142
|
+
// Returns the number of characters at the start of the string that are binary
|
143
|
+
// digits or underscores. Disallows searching past the given maximum number of
|
144
|
+
// characters.
|
145
|
+
size_t
|
146
|
+
yp_strspn_binary_number(const char *string, ptrdiff_t length) {
|
147
|
+
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_BINARY_NUMBER);
|
148
|
+
}
|
149
|
+
|
150
|
+
// Returns the number of characters at the start of the string that are octal
|
151
|
+
// digits or underscores. Disallows searching past the given maximum number of
|
152
|
+
// characters.
|
153
|
+
size_t
|
154
|
+
yp_strspn_octal_number(const char *string, ptrdiff_t length) {
|
155
|
+
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_OCTAL_NUMBER);
|
156
|
+
}
|
157
|
+
|
158
|
+
// Returns the number of characters at the start of the string that are decimal
|
159
|
+
// digits. Disallows searching past the given maximum number of characters.
|
160
|
+
size_t
|
161
|
+
yp_strspn_decimal_digit(const char *string, ptrdiff_t length) {
|
162
|
+
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_DIGIT);
|
163
|
+
}
|
164
|
+
|
165
|
+
// Returns the number of characters at the start of the string that are decimal
|
166
|
+
// digits or underscores. Disallows searching past the given maximum number of
|
167
|
+
// characters.
|
168
|
+
size_t
|
169
|
+
yp_strspn_decimal_number(const char *string, ptrdiff_t length) {
|
170
|
+
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_NUMBER);
|
171
|
+
}
|
172
|
+
|
173
|
+
// Returns the number of characters at the start of the string that are
|
174
|
+
// hexadecimal digits. Disallows searching past the given maximum number of
|
175
|
+
// characters.
|
176
|
+
size_t
|
177
|
+
yp_strspn_hexadecimal_digit(const char *string, ptrdiff_t length) {
|
178
|
+
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
|
179
|
+
}
|
180
|
+
|
181
|
+
// Returns the number of characters at the start of the string that are
|
182
|
+
// hexadecimal digits or underscores. Disallows searching past the given maximum
|
183
|
+
// number of characters.
|
184
|
+
size_t
|
185
|
+
yp_strspn_hexadecimal_number(const char *string, ptrdiff_t length) {
|
186
|
+
return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
|
187
|
+
}
|
188
|
+
|
189
|
+
static inline bool
|
190
|
+
yp_char_is_number_kind(const char c, unsigned char kind) {
|
191
|
+
return (yp_number_table[(unsigned char) c] & kind) != 0;
|
192
|
+
}
|
193
|
+
|
194
|
+
// Returns true if the given character is a binary digit.
|
195
|
+
bool
|
196
|
+
yp_char_is_binary_digit(const char c) {
|
197
|
+
return yp_char_is_number_kind(c, YP_NUMBER_BIT_BINARY_DIGIT);
|
198
|
+
}
|
199
|
+
|
200
|
+
// Returns true if the given character is an octal digit.
|
201
|
+
bool
|
202
|
+
yp_char_is_octal_digit(const char c) {
|
203
|
+
return yp_char_is_number_kind(c, YP_NUMBER_BIT_OCTAL_DIGIT);
|
204
|
+
}
|
205
|
+
|
206
|
+
// Returns true if the given character is a decimal digit.
|
207
|
+
bool
|
208
|
+
yp_char_is_decimal_digit(const char c) {
|
209
|
+
return yp_char_is_number_kind(c, YP_NUMBER_BIT_DECIMAL_DIGIT);
|
210
|
+
}
|
211
|
+
|
212
|
+
// Returns true if the given character is a hexadecimal digit.
|
213
|
+
bool
|
214
|
+
yp_char_is_hexadecimal_digit(const char c) {
|
215
|
+
return yp_char_is_number_kind(c, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
|
216
|
+
}
|
217
|
+
|
218
|
+
#undef YP_CHAR_BIT_WHITESPACE
|
219
|
+
#undef YP_CHAR_BIT_INLINE_WHITESPACE
|
220
|
+
#undef YP_CHAR_BIT_REGEXP_OPTION
|
221
|
+
|
222
|
+
#undef YP_NUMBER_BIT_BINARY_DIGIT
|
223
|
+
#undef YP_NUMBER_BIT_BINARY_NUMBER
|
224
|
+
#undef YP_NUMBER_BIT_OCTAL_DIGIT
|
225
|
+
#undef YP_NUMBER_BIT_OCTAL_NUMBER
|
226
|
+
#undef YP_NUMBER_BIT_DECIMAL_DIGIT
|
227
|
+
#undef YP_NUMBER_BIT_DECIMAL_NUMBER
|
228
|
+
#undef YP_NUMBER_BIT_HEXADECIMAL_NUMBER
|
229
|
+
#undef YP_NUMBER_BIT_HEXADECIMAL_DIGIT
|
@@ -0,0 +1,147 @@
|
|
1
|
+
#include "yarp/util/yp_constant_pool.h"
|
2
|
+
|
3
|
+
// Initialize a list of constant ids.
|
4
|
+
void
|
5
|
+
yp_constant_id_list_init(yp_constant_id_list_t *list) {
|
6
|
+
list->ids = NULL;
|
7
|
+
list->size = 0;
|
8
|
+
list->capacity = 0;
|
9
|
+
}
|
10
|
+
|
11
|
+
// Append a constant id to a list of constant ids. Returns false if any
|
12
|
+
// potential reallocations fail.
|
13
|
+
bool
|
14
|
+
yp_constant_id_list_append(yp_constant_id_list_t *list, yp_constant_id_t id) {
|
15
|
+
if (list->size >= list->capacity) {
|
16
|
+
list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
|
17
|
+
list->ids = (yp_constant_id_t *) realloc(list->ids, sizeof(yp_constant_id_t) * list->capacity);
|
18
|
+
if (list->ids == NULL) return false;
|
19
|
+
}
|
20
|
+
|
21
|
+
list->ids[list->size++] = id;
|
22
|
+
return true;
|
23
|
+
}
|
24
|
+
|
25
|
+
// Checks if the current constant id list includes the given constant id.
|
26
|
+
bool
|
27
|
+
yp_constant_id_list_includes(yp_constant_id_list_t *list, yp_constant_id_t id) {
|
28
|
+
for (size_t index = 0; index < list->size; index++) {
|
29
|
+
if (list->ids[index] == id) return true;
|
30
|
+
}
|
31
|
+
return false;
|
32
|
+
}
|
33
|
+
|
34
|
+
// Get the memory size of a list of constant ids.
|
35
|
+
size_t
|
36
|
+
yp_constant_id_list_memsize(yp_constant_id_list_t *list) {
|
37
|
+
return sizeof(yp_constant_id_list_t) + (list->capacity * sizeof(yp_constant_id_t));
|
38
|
+
}
|
39
|
+
|
40
|
+
// Free the memory associated with a list of constant ids.
|
41
|
+
void
|
42
|
+
yp_constant_id_list_free(yp_constant_id_list_t *list) {
|
43
|
+
if (list->ids != NULL) {
|
44
|
+
free(list->ids);
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
// A relatively simple hash function (djb2) that is used to hash strings. We are
|
49
|
+
// optimizing here for simplicity and speed.
|
50
|
+
static inline size_t
|
51
|
+
yp_constant_pool_hash(const char *start, size_t length) {
|
52
|
+
// This is a prime number used as the initial value for the hash function.
|
53
|
+
size_t value = 5381;
|
54
|
+
|
55
|
+
for (size_t index = 0; index < length; index++) {
|
56
|
+
value = ((value << 5) + value) + ((unsigned char) start[index]);
|
57
|
+
}
|
58
|
+
|
59
|
+
return value;
|
60
|
+
}
|
61
|
+
|
62
|
+
// Resize a constant pool to a given capacity.
|
63
|
+
static inline bool
|
64
|
+
yp_constant_pool_resize(yp_constant_pool_t *pool) {
|
65
|
+
size_t next_capacity = pool->capacity * 2;
|
66
|
+
yp_constant_t *next_constants = calloc(next_capacity, sizeof(yp_constant_t));
|
67
|
+
if (next_constants == NULL) return false;
|
68
|
+
|
69
|
+
// For each constant in the current constant pool, rehash the content, find
|
70
|
+
// the index in the next constant pool, and insert it.
|
71
|
+
for (size_t index = 0; index < pool->capacity; index++) {
|
72
|
+
yp_constant_t *constant = &pool->constants[index];
|
73
|
+
|
74
|
+
// If an id is set on this constant, then we know we have content here.
|
75
|
+
// In this case we need to insert it into the next constant pool.
|
76
|
+
if (constant->id != 0) {
|
77
|
+
size_t next_index = constant->hash % next_capacity;
|
78
|
+
|
79
|
+
// This implements linear scanning to find the next available slot
|
80
|
+
// in case this index is already taken. We don't need to bother
|
81
|
+
// comparing the values since we know that the hash is unique.
|
82
|
+
while (next_constants[next_index].id != 0) {
|
83
|
+
next_index = (next_index + 1) % next_capacity;
|
84
|
+
}
|
85
|
+
|
86
|
+
// Here we copy over the entire constant, which includes the id so
|
87
|
+
// that they are consistent between resizes.
|
88
|
+
next_constants[next_index] = *constant;
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
92
|
+
free(pool->constants);
|
93
|
+
pool->constants = next_constants;
|
94
|
+
pool->capacity = next_capacity;
|
95
|
+
return true;
|
96
|
+
}
|
97
|
+
|
98
|
+
// Initialize a new constant pool with a given capacity.
|
99
|
+
bool
|
100
|
+
yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity) {
|
101
|
+
pool->constants = calloc(capacity, sizeof(yp_constant_t));
|
102
|
+
if (pool->constants == NULL) return false;
|
103
|
+
|
104
|
+
pool->size = 0;
|
105
|
+
pool->capacity = capacity;
|
106
|
+
return true;
|
107
|
+
}
|
108
|
+
|
109
|
+
// Insert a constant into a constant pool. Returns the id of the constant, or 0
|
110
|
+
// if any potential calls to resize fail.
|
111
|
+
yp_constant_id_t
|
112
|
+
yp_constant_pool_insert(yp_constant_pool_t *pool, const char *start, size_t length) {
|
113
|
+
if (pool->size >= (pool->capacity / 4 * 3)) {
|
114
|
+
if (!yp_constant_pool_resize(pool)) return 0;
|
115
|
+
}
|
116
|
+
|
117
|
+
size_t hash = yp_constant_pool_hash(start, length);
|
118
|
+
size_t index = hash % pool->capacity;
|
119
|
+
yp_constant_t *constant;
|
120
|
+
|
121
|
+
while (constant = &pool->constants[index], constant->id != 0) {
|
122
|
+
// If there is a collision, then we need to check if the content is the
|
123
|
+
// same as the content we are trying to insert. If it is, then we can
|
124
|
+
// return the id of the existing constant.
|
125
|
+
if ((constant->length == length) && strncmp(constant->start, start, length) == 0) {
|
126
|
+
return pool->constants[index].id;
|
127
|
+
}
|
128
|
+
|
129
|
+
index = (index + 1) % pool->capacity;
|
130
|
+
}
|
131
|
+
|
132
|
+
yp_constant_id_t id = (yp_constant_id_t)++pool->size;
|
133
|
+
pool->constants[index] = (yp_constant_t) {
|
134
|
+
.id = id,
|
135
|
+
.start = start,
|
136
|
+
.length = length,
|
137
|
+
.hash = hash
|
138
|
+
};
|
139
|
+
|
140
|
+
return id;
|
141
|
+
}
|
142
|
+
|
143
|
+
// Free the memory associated with a constant pool.
|
144
|
+
void
|
145
|
+
yp_constant_pool_free(yp_constant_pool_t *pool) {
|
146
|
+
free(pool->constants);
|
147
|
+
}
|
data/src/util/yp_list.c
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#include "yarp/util/yp_list.h"
|
2
|
+
|
3
|
+
// Initializes a new list.
|
4
|
+
YP_EXPORTED_FUNCTION void
|
5
|
+
yp_list_init(yp_list_t *list) {
|
6
|
+
*list = (yp_list_t) { .head = NULL, .tail = NULL };
|
7
|
+
}
|
8
|
+
|
9
|
+
// Returns true if the given list is empty.
|
10
|
+
YP_EXPORTED_FUNCTION bool
|
11
|
+
yp_list_empty_p(yp_list_t *list) {
|
12
|
+
return list->head == NULL;
|
13
|
+
}
|
14
|
+
|
15
|
+
YP_EXPORTED_FUNCTION uint32_t
|
16
|
+
yp_list_size(yp_list_t *list) {
|
17
|
+
yp_list_node_t *node = list->head;
|
18
|
+
uint32_t length = 0;
|
19
|
+
|
20
|
+
while (node != NULL) {
|
21
|
+
length++;
|
22
|
+
node = node->next;
|
23
|
+
}
|
24
|
+
|
25
|
+
return length;
|
26
|
+
}
|
27
|
+
|
28
|
+
// Append a node to the given list.
|
29
|
+
void
|
30
|
+
yp_list_append(yp_list_t *list, yp_list_node_t *node) {
|
31
|
+
if (list->head == NULL) {
|
32
|
+
list->head = node;
|
33
|
+
} else {
|
34
|
+
list->tail->next = node;
|
35
|
+
}
|
36
|
+
list->tail = node;
|
37
|
+
}
|
38
|
+
|
39
|
+
// Deallocate the internal state of the given list.
|
40
|
+
YP_EXPORTED_FUNCTION void
|
41
|
+
yp_list_free(yp_list_t *list) {
|
42
|
+
yp_list_node_t *node = list->head;
|
43
|
+
yp_list_node_t *next;
|
44
|
+
|
45
|
+
while (node != NULL) {
|
46
|
+
next = node->next;
|
47
|
+
free(node);
|
48
|
+
node = next;
|
49
|
+
}
|
50
|
+
}
|
@@ -0,0 +1,31 @@
|
|
1
|
+
#include "yarp/util/yp_memchr.h"
|
2
|
+
|
3
|
+
#define YP_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
|
4
|
+
|
5
|
+
// We need to roll our own memchr to handle cases where the encoding changes and
|
6
|
+
// we need to search for a character in a buffer that could be the trailing byte
|
7
|
+
// of a multibyte character.
|
8
|
+
void *
|
9
|
+
yp_memchr(const void *memory, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding) {
|
10
|
+
if (encoding_changed && encoding->multibyte && character >= YP_MEMCHR_TRAILING_BYTE_MINIMUM) {
|
11
|
+
const char *source = (const char *) memory;
|
12
|
+
size_t index = 0;
|
13
|
+
|
14
|
+
while (index < number) {
|
15
|
+
if (source[index] == character) {
|
16
|
+
return (void *) (source + index);
|
17
|
+
}
|
18
|
+
|
19
|
+
size_t width = encoding->char_width(source + index, (ptrdiff_t) (number - index));
|
20
|
+
if (width == 0) {
|
21
|
+
return NULL;
|
22
|
+
}
|
23
|
+
|
24
|
+
index += width;
|
25
|
+
}
|
26
|
+
|
27
|
+
return NULL;
|
28
|
+
} else {
|
29
|
+
return memchr(memory, character, number);
|
30
|
+
}
|
31
|
+
}
|
@@ -0,0 +1,119 @@
|
|
1
|
+
#include "yarp/util/yp_newline_list.h"
|
2
|
+
|
3
|
+
// Initialize a new newline list with the given capacity. Returns true if the
|
4
|
+
// allocation of the offsets succeeds, otherwise returns false.
|
5
|
+
bool
|
6
|
+
yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity) {
|
7
|
+
list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
|
8
|
+
if (list->offsets == NULL) return false;
|
9
|
+
|
10
|
+
list->start = start;
|
11
|
+
|
12
|
+
// This is 1 instead of 0 because we want to include the first line of the
|
13
|
+
// file as having offset 0, which is set because of calloc.
|
14
|
+
list->size = 1;
|
15
|
+
list->capacity = capacity;
|
16
|
+
|
17
|
+
list->last_index = 0;
|
18
|
+
list->last_offset = 0;
|
19
|
+
|
20
|
+
return true;
|
21
|
+
}
|
22
|
+
|
23
|
+
// Append a new offset to the newline list. Returns true if the reallocation of
|
24
|
+
// the offsets succeeds (if one was necessary), otherwise returns false.
|
25
|
+
bool
|
26
|
+
yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
|
27
|
+
if (list->size == list->capacity) {
|
28
|
+
list->capacity = (list->capacity * 3) / 2;
|
29
|
+
list->offsets = (size_t *) realloc(list->offsets, list->capacity * sizeof(size_t));
|
30
|
+
if (list->offsets == NULL) return false;
|
31
|
+
}
|
32
|
+
|
33
|
+
assert(cursor >= list->start);
|
34
|
+
size_t newline_offset = (size_t) (cursor - list->start + 1);
|
35
|
+
assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
|
36
|
+
list->offsets[list->size++] = newline_offset;
|
37
|
+
|
38
|
+
return true;
|
39
|
+
}
|
40
|
+
|
41
|
+
// Returns the line and column of the given offset, assuming we don't have any
|
42
|
+
// information about the previous index that we found.
|
43
|
+
static yp_line_column_t
|
44
|
+
yp_newline_list_line_column_search(yp_newline_list_t *list, size_t offset) {
|
45
|
+
size_t left = 0;
|
46
|
+
size_t right = list->size - 1;
|
47
|
+
|
48
|
+
while (left <= right) {
|
49
|
+
size_t mid = left + (right - left) / 2;
|
50
|
+
|
51
|
+
if (list->offsets[mid] == offset) {
|
52
|
+
return ((yp_line_column_t) { mid, 0 });
|
53
|
+
}
|
54
|
+
|
55
|
+
if (list->offsets[mid] < offset) {
|
56
|
+
left = mid + 1;
|
57
|
+
} else {
|
58
|
+
right = mid - 1;
|
59
|
+
}
|
60
|
+
}
|
61
|
+
|
62
|
+
return ((yp_line_column_t) { left - 1, offset - list->offsets[left - 1] });
|
63
|
+
}
|
64
|
+
|
65
|
+
// Returns the line and column of the given offset, assuming we know the last
|
66
|
+
// index that we found.
|
67
|
+
static yp_line_column_t
|
68
|
+
yp_newline_list_line_column_scan(yp_newline_list_t *list, size_t offset) {
|
69
|
+
if (offset > list->last_offset) {
|
70
|
+
size_t index = list->last_index;
|
71
|
+
while (index < list->size && list->offsets[index] < offset) {
|
72
|
+
index++;
|
73
|
+
}
|
74
|
+
|
75
|
+
if (index == list->size) {
|
76
|
+
return ((yp_line_column_t) { index - 1, offset - list->offsets[index - 1] });
|
77
|
+
}
|
78
|
+
|
79
|
+
return ((yp_line_column_t) { index, 0 });
|
80
|
+
} else {
|
81
|
+
size_t index = list->last_index;
|
82
|
+
while (index > 0 && list->offsets[index] > offset) {
|
83
|
+
index--;
|
84
|
+
}
|
85
|
+
|
86
|
+
if (index == 0) {
|
87
|
+
return ((yp_line_column_t) { 0, offset });
|
88
|
+
}
|
89
|
+
|
90
|
+
return ((yp_line_column_t) { index, offset - list->offsets[index - 1] });
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
// Returns the line and column of the given offset. If the offset is not in the
|
95
|
+
// list, the line and column of the closest offset less than the given offset
|
96
|
+
// are returned.
|
97
|
+
yp_line_column_t
|
98
|
+
yp_newline_list_line_column(yp_newline_list_t *list, const char *cursor) {
|
99
|
+
assert(cursor >= list->start);
|
100
|
+
size_t offset = (size_t) (cursor - list->start);
|
101
|
+
yp_line_column_t result;
|
102
|
+
|
103
|
+
if (list->last_offset == 0) {
|
104
|
+
result = yp_newline_list_line_column_search(list, offset);
|
105
|
+
} else {
|
106
|
+
result = yp_newline_list_line_column_scan(list, offset);
|
107
|
+
}
|
108
|
+
|
109
|
+
list->last_index = result.line;
|
110
|
+
list->last_offset = offset;
|
111
|
+
|
112
|
+
return result;
|
113
|
+
}
|
114
|
+
|
115
|
+
// Free the internal memory allocated for the newline list.
|
116
|
+
void
|
117
|
+
yp_newline_list_free(yp_newline_list_t *list) {
|
118
|
+
free(list->offsets);
|
119
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#include "yarp/util/yp_state_stack.h"
|
2
|
+
|
3
|
+
// Initializes the state stack to an empty stack.
|
4
|
+
void
|
5
|
+
yp_state_stack_init(yp_state_stack_t *stack) {
|
6
|
+
*stack = 0;
|
7
|
+
}
|
8
|
+
|
9
|
+
// Pushes a value onto the stack.
|
10
|
+
void
|
11
|
+
yp_state_stack_push(yp_state_stack_t *stack, bool value) {
|
12
|
+
*stack = (*stack << 1) | (value & 1);
|
13
|
+
}
|
14
|
+
|
15
|
+
// Pops a value off the stack.
|
16
|
+
void
|
17
|
+
yp_state_stack_pop(yp_state_stack_t *stack) {
|
18
|
+
*stack >>= 1;
|
19
|
+
}
|
20
|
+
|
21
|
+
// Returns the value at the top of the stack.
|
22
|
+
bool
|
23
|
+
yp_state_stack_p(yp_state_stack_t *stack) {
|
24
|
+
return *stack & 1;
|
25
|
+
}
|