yarp 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +76 -0
  3. data/CONTRIBUTING.md +51 -0
  4. data/LICENSE.md +7 -0
  5. data/Makefile.in +79 -0
  6. data/README.md +86 -0
  7. data/config.h.in +25 -0
  8. data/config.yml +2147 -0
  9. data/configure +4487 -0
  10. data/docs/build_system.md +85 -0
  11. data/docs/building.md +26 -0
  12. data/docs/configuration.md +56 -0
  13. data/docs/design.md +53 -0
  14. data/docs/encoding.md +116 -0
  15. data/docs/extension.md +20 -0
  16. data/docs/fuzzing.md +93 -0
  17. data/docs/heredocs.md +36 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/ripper.md +36 -0
  20. data/docs/serialization.md +130 -0
  21. data/docs/testing.md +55 -0
  22. data/ext/yarp/api_node.c +3680 -0
  23. data/ext/yarp/api_pack.c +256 -0
  24. data/ext/yarp/extconf.rb +131 -0
  25. data/ext/yarp/extension.c +547 -0
  26. data/ext/yarp/extension.h +18 -0
  27. data/include/yarp/ast.h +1412 -0
  28. data/include/yarp/defines.h +54 -0
  29. data/include/yarp/diagnostic.h +24 -0
  30. data/include/yarp/enc/yp_encoding.h +94 -0
  31. data/include/yarp/node.h +36 -0
  32. data/include/yarp/pack.h +141 -0
  33. data/include/yarp/parser.h +389 -0
  34. data/include/yarp/regexp.h +19 -0
  35. data/include/yarp/unescape.h +42 -0
  36. data/include/yarp/util/yp_buffer.h +39 -0
  37. data/include/yarp/util/yp_char.h +75 -0
  38. data/include/yarp/util/yp_constant_pool.h +64 -0
  39. data/include/yarp/util/yp_list.h +67 -0
  40. data/include/yarp/util/yp_memchr.h +14 -0
  41. data/include/yarp/util/yp_newline_list.h +54 -0
  42. data/include/yarp/util/yp_state_stack.h +24 -0
  43. data/include/yarp/util/yp_string.h +57 -0
  44. data/include/yarp/util/yp_string_list.h +28 -0
  45. data/include/yarp/util/yp_strpbrk.h +29 -0
  46. data/include/yarp/version.h +5 -0
  47. data/include/yarp.h +69 -0
  48. data/lib/yarp/lex_compat.rb +759 -0
  49. data/lib/yarp/node.rb +7428 -0
  50. data/lib/yarp/pack.rb +185 -0
  51. data/lib/yarp/ripper_compat.rb +174 -0
  52. data/lib/yarp/serialize.rb +389 -0
  53. data/lib/yarp.rb +330 -0
  54. data/src/diagnostic.c +25 -0
  55. data/src/enc/yp_big5.c +79 -0
  56. data/src/enc/yp_euc_jp.c +85 -0
  57. data/src/enc/yp_gbk.c +88 -0
  58. data/src/enc/yp_shift_jis.c +83 -0
  59. data/src/enc/yp_tables.c +509 -0
  60. data/src/enc/yp_unicode.c +2320 -0
  61. data/src/enc/yp_windows_31j.c +83 -0
  62. data/src/node.c +2011 -0
  63. data/src/pack.c +493 -0
  64. data/src/prettyprint.c +1782 -0
  65. data/src/regexp.c +580 -0
  66. data/src/serialize.c +1576 -0
  67. data/src/token_type.c +347 -0
  68. data/src/unescape.c +576 -0
  69. data/src/util/yp_buffer.c +78 -0
  70. data/src/util/yp_char.c +229 -0
  71. data/src/util/yp_constant_pool.c +147 -0
  72. data/src/util/yp_list.c +50 -0
  73. data/src/util/yp_memchr.c +31 -0
  74. data/src/util/yp_newline_list.c +119 -0
  75. data/src/util/yp_state_stack.c +25 -0
  76. data/src/util/yp_string.c +207 -0
  77. data/src/util/yp_string_list.c +32 -0
  78. data/src/util/yp_strncasecmp.c +20 -0
  79. data/src/util/yp_strpbrk.c +66 -0
  80. data/src/yarp.c +13211 -0
  81. data/yarp.gemspec +100 -0
  82. metadata +125 -0
@@ -0,0 +1,229 @@
1
+ #include "yarp/util/yp_char.h"
2
+
3
+ #define YP_CHAR_BIT_WHITESPACE (1 << 0)
4
+ #define YP_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
5
+ #define YP_CHAR_BIT_REGEXP_OPTION (1 << 2)
6
+
7
+ #define YP_NUMBER_BIT_BINARY_DIGIT (1 << 0)
8
+ #define YP_NUMBER_BIT_BINARY_NUMBER (1 << 1)
9
+ #define YP_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
10
+ #define YP_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
11
+ #define YP_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
12
+ #define YP_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
13
+ #define YP_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
14
+ #define YP_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
15
+
16
+ static const unsigned char yp_char_table[256] = {
17
+ //0 1 2 3 4 5 6 7 8 9 A B C D E F
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
19
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
20
+ 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
21
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5x
24
+ 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 4, 4, // 6x
25
+ 0, 0, 0, 4, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, // 7x
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
28
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
29
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
30
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
31
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
34
+ };
35
+
36
+ static const unsigned char yp_number_table[256] = {
37
+ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
38
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
39
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
40
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
41
+ 0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
42
+ 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
43
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
44
+ 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
45
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
46
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
47
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
48
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
49
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
50
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
51
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
52
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
53
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
54
+ };
55
+
56
+ static inline size_t
57
+ yp_strspn_char_kind(const char *string, ptrdiff_t length, unsigned char kind) {
58
+ if (length <= 0) return 0;
59
+
60
+ size_t size = 0;
61
+ size_t maximum = (size_t) length;
62
+
63
+ while (size < maximum && (yp_char_table[(unsigned char) string[size]] & kind)) size++;
64
+ return size;
65
+ }
66
+
67
+ // Returns the number of characters at the start of the string that are
68
+ // whitespace. Disallows searching past the given maximum number of characters.
69
+ size_t
70
+ yp_strspn_whitespace(const char *string, ptrdiff_t length) {
71
+ return yp_strspn_char_kind(string, length, YP_CHAR_BIT_WHITESPACE);
72
+ }
73
+
74
+ // Returns the number of characters at the start of the string that are
75
+ // whitespace while also tracking the location of each newline. Disallows
76
+ // searching past the given maximum number of characters.
77
+ size_t
78
+ yp_strspn_whitespace_newlines(const char *string, ptrdiff_t length, yp_newline_list_t *newline_list, bool stop_at_newline) {
79
+ if (length <= 0) return 0;
80
+
81
+ size_t size = 0;
82
+ size_t maximum = (size_t) length;
83
+
84
+ while (size < maximum && (yp_char_table[(unsigned char) string[size]] & YP_CHAR_BIT_WHITESPACE)) {
85
+ if (string[size] == '\n') {
86
+ if (stop_at_newline) {
87
+ return size + 1;
88
+ }
89
+ else {
90
+ yp_newline_list_append(newline_list, string + size);
91
+ }
92
+ }
93
+
94
+ size++;
95
+ }
96
+
97
+ return size;
98
+ }
99
+
100
+ // Returns the number of characters at the start of the string that are inline
101
+ // whitespace. Disallows searching past the given maximum number of characters.
102
+ size_t
103
+ yp_strspn_inline_whitespace(const char *string, ptrdiff_t length) {
104
+ return yp_strspn_char_kind(string, length, YP_CHAR_BIT_INLINE_WHITESPACE);
105
+ }
106
+
107
+ // Returns the number of characters at the start of the string that are regexp
108
+ // options. Disallows searching past the given maximum number of characters.
109
+ size_t
110
+ yp_strspn_regexp_option(const char *string, ptrdiff_t length) {
111
+ return yp_strspn_char_kind(string, length, YP_CHAR_BIT_REGEXP_OPTION);
112
+ }
113
+
114
+ static inline bool
115
+ yp_char_is_char_kind(const char c, unsigned char kind) {
116
+ return (yp_char_table[(unsigned char) c] & kind) != 0;
117
+ }
118
+
119
+ // Returns true if the given character is a whitespace character.
120
+ bool
121
+ yp_char_is_whitespace(const char c) {
122
+ return yp_char_is_char_kind(c, YP_CHAR_BIT_WHITESPACE);
123
+ }
124
+
125
+ // Returns true if the given character is an inline whitespace character.
126
+ bool
127
+ yp_char_is_inline_whitespace(const char c) {
128
+ return yp_char_is_char_kind(c, YP_CHAR_BIT_INLINE_WHITESPACE);
129
+ }
130
+
131
+ static inline size_t
132
+ yp_strspn_number_kind(const char *string, ptrdiff_t length, unsigned char kind) {
133
+ if (length <= 0) return 0;
134
+
135
+ size_t size = 0;
136
+ size_t maximum = (size_t) length;
137
+
138
+ while (size < maximum && (yp_number_table[(unsigned char) string[size]] & kind)) size++;
139
+ return size;
140
+ }
141
+
142
+ // Returns the number of characters at the start of the string that are binary
143
+ // digits or underscores. Disallows searching past the given maximum number of
144
+ // characters.
145
+ size_t
146
+ yp_strspn_binary_number(const char *string, ptrdiff_t length) {
147
+ return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_BINARY_NUMBER);
148
+ }
149
+
150
+ // Returns the number of characters at the start of the string that are octal
151
+ // digits or underscores. Disallows searching past the given maximum number of
152
+ // characters.
153
+ size_t
154
+ yp_strspn_octal_number(const char *string, ptrdiff_t length) {
155
+ return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_OCTAL_NUMBER);
156
+ }
157
+
158
+ // Returns the number of characters at the start of the string that are decimal
159
+ // digits. Disallows searching past the given maximum number of characters.
160
+ size_t
161
+ yp_strspn_decimal_digit(const char *string, ptrdiff_t length) {
162
+ return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_DIGIT);
163
+ }
164
+
165
+ // Returns the number of characters at the start of the string that are decimal
166
+ // digits or underscores. Disallows searching past the given maximum number of
167
+ // characters.
168
+ size_t
169
+ yp_strspn_decimal_number(const char *string, ptrdiff_t length) {
170
+ return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_NUMBER);
171
+ }
172
+
173
+ // Returns the number of characters at the start of the string that are
174
+ // hexadecimal digits. Disallows searching past the given maximum number of
175
+ // characters.
176
+ size_t
177
+ yp_strspn_hexadecimal_digit(const char *string, ptrdiff_t length) {
178
+ return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
179
+ }
180
+
181
+ // Returns the number of characters at the start of the string that are
182
+ // hexadecimal digits or underscores. Disallows searching past the given maximum
183
+ // number of characters.
184
+ size_t
185
+ yp_strspn_hexadecimal_number(const char *string, ptrdiff_t length) {
186
+ return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
187
+ }
188
+
189
+ static inline bool
190
+ yp_char_is_number_kind(const char c, unsigned char kind) {
191
+ return (yp_number_table[(unsigned char) c] & kind) != 0;
192
+ }
193
+
194
+ // Returns true if the given character is a binary digit.
195
+ bool
196
+ yp_char_is_binary_digit(const char c) {
197
+ return yp_char_is_number_kind(c, YP_NUMBER_BIT_BINARY_DIGIT);
198
+ }
199
+
200
+ // Returns true if the given character is an octal digit.
201
+ bool
202
+ yp_char_is_octal_digit(const char c) {
203
+ return yp_char_is_number_kind(c, YP_NUMBER_BIT_OCTAL_DIGIT);
204
+ }
205
+
206
+ // Returns true if the given character is a decimal digit.
207
+ bool
208
+ yp_char_is_decimal_digit(const char c) {
209
+ return yp_char_is_number_kind(c, YP_NUMBER_BIT_DECIMAL_DIGIT);
210
+ }
211
+
212
+ // Returns true if the given character is a hexadecimal digit.
213
+ bool
214
+ yp_char_is_hexadecimal_digit(const char c) {
215
+ return yp_char_is_number_kind(c, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
216
+ }
217
+
218
+ #undef YP_CHAR_BIT_WHITESPACE
219
+ #undef YP_CHAR_BIT_INLINE_WHITESPACE
220
+ #undef YP_CHAR_BIT_REGEXP_OPTION
221
+
222
+ #undef YP_NUMBER_BIT_BINARY_DIGIT
223
+ #undef YP_NUMBER_BIT_BINARY_NUMBER
224
+ #undef YP_NUMBER_BIT_OCTAL_DIGIT
225
+ #undef YP_NUMBER_BIT_OCTAL_NUMBER
226
+ #undef YP_NUMBER_BIT_DECIMAL_DIGIT
227
+ #undef YP_NUMBER_BIT_DECIMAL_NUMBER
228
+ #undef YP_NUMBER_BIT_HEXADECIMAL_NUMBER
229
+ #undef YP_NUMBER_BIT_HEXADECIMAL_DIGIT
@@ -0,0 +1,147 @@
1
+ #include "yarp/util/yp_constant_pool.h"
2
+
3
+ // Initialize a list of constant ids.
4
+ void
5
+ yp_constant_id_list_init(yp_constant_id_list_t *list) {
6
+ list->ids = NULL;
7
+ list->size = 0;
8
+ list->capacity = 0;
9
+ }
10
+
11
+ // Append a constant id to a list of constant ids. Returns false if any
12
+ // potential reallocations fail.
13
+ bool
14
+ yp_constant_id_list_append(yp_constant_id_list_t *list, yp_constant_id_t id) {
15
+ if (list->size >= list->capacity) {
16
+ list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
17
+ list->ids = (yp_constant_id_t *) realloc(list->ids, sizeof(yp_constant_id_t) * list->capacity);
18
+ if (list->ids == NULL) return false;
19
+ }
20
+
21
+ list->ids[list->size++] = id;
22
+ return true;
23
+ }
24
+
25
+ // Checks if the current constant id list includes the given constant id.
26
+ bool
27
+ yp_constant_id_list_includes(yp_constant_id_list_t *list, yp_constant_id_t id) {
28
+ for (size_t index = 0; index < list->size; index++) {
29
+ if (list->ids[index] == id) return true;
30
+ }
31
+ return false;
32
+ }
33
+
34
+ // Get the memory size of a list of constant ids.
35
+ size_t
36
+ yp_constant_id_list_memsize(yp_constant_id_list_t *list) {
37
+ return sizeof(yp_constant_id_list_t) + (list->capacity * sizeof(yp_constant_id_t));
38
+ }
39
+
40
+ // Free the memory associated with a list of constant ids.
41
+ void
42
+ yp_constant_id_list_free(yp_constant_id_list_t *list) {
43
+ if (list->ids != NULL) {
44
+ free(list->ids);
45
+ }
46
+ }
47
+
48
+ // A relatively simple hash function (djb2) that is used to hash strings. We are
49
+ // optimizing here for simplicity and speed.
50
+ static inline size_t
51
+ yp_constant_pool_hash(const char *start, size_t length) {
52
+ // This is a prime number used as the initial value for the hash function.
53
+ size_t value = 5381;
54
+
55
+ for (size_t index = 0; index < length; index++) {
56
+ value = ((value << 5) + value) + ((unsigned char) start[index]);
57
+ }
58
+
59
+ return value;
60
+ }
61
+
62
+ // Resize a constant pool to a given capacity.
63
+ static inline bool
64
+ yp_constant_pool_resize(yp_constant_pool_t *pool) {
65
+ size_t next_capacity = pool->capacity * 2;
66
+ yp_constant_t *next_constants = calloc(next_capacity, sizeof(yp_constant_t));
67
+ if (next_constants == NULL) return false;
68
+
69
+ // For each constant in the current constant pool, rehash the content, find
70
+ // the index in the next constant pool, and insert it.
71
+ for (size_t index = 0; index < pool->capacity; index++) {
72
+ yp_constant_t *constant = &pool->constants[index];
73
+
74
+ // If an id is set on this constant, then we know we have content here.
75
+ // In this case we need to insert it into the next constant pool.
76
+ if (constant->id != 0) {
77
+ size_t next_index = constant->hash % next_capacity;
78
+
79
+ // This implements linear scanning to find the next available slot
80
+ // in case this index is already taken. We don't need to bother
81
+ // comparing the values since we know that the hash is unique.
82
+ while (next_constants[next_index].id != 0) {
83
+ next_index = (next_index + 1) % next_capacity;
84
+ }
85
+
86
+ // Here we copy over the entire constant, which includes the id so
87
+ // that they are consistent between resizes.
88
+ next_constants[next_index] = *constant;
89
+ }
90
+ }
91
+
92
+ free(pool->constants);
93
+ pool->constants = next_constants;
94
+ pool->capacity = next_capacity;
95
+ return true;
96
+ }
97
+
98
+ // Initialize a new constant pool with a given capacity.
99
+ bool
100
+ yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity) {
101
+ pool->constants = calloc(capacity, sizeof(yp_constant_t));
102
+ if (pool->constants == NULL) return false;
103
+
104
+ pool->size = 0;
105
+ pool->capacity = capacity;
106
+ return true;
107
+ }
108
+
109
+ // Insert a constant into a constant pool. Returns the id of the constant, or 0
110
+ // if any potential calls to resize fail.
111
+ yp_constant_id_t
112
+ yp_constant_pool_insert(yp_constant_pool_t *pool, const char *start, size_t length) {
113
+ if (pool->size >= (pool->capacity / 4 * 3)) {
114
+ if (!yp_constant_pool_resize(pool)) return 0;
115
+ }
116
+
117
+ size_t hash = yp_constant_pool_hash(start, length);
118
+ size_t index = hash % pool->capacity;
119
+ yp_constant_t *constant;
120
+
121
+ while (constant = &pool->constants[index], constant->id != 0) {
122
+ // If there is a collision, then we need to check if the content is the
123
+ // same as the content we are trying to insert. If it is, then we can
124
+ // return the id of the existing constant.
125
+ if ((constant->length == length) && strncmp(constant->start, start, length) == 0) {
126
+ return pool->constants[index].id;
127
+ }
128
+
129
+ index = (index + 1) % pool->capacity;
130
+ }
131
+
132
+ yp_constant_id_t id = (yp_constant_id_t)++pool->size;
133
+ pool->constants[index] = (yp_constant_t) {
134
+ .id = id,
135
+ .start = start,
136
+ .length = length,
137
+ .hash = hash
138
+ };
139
+
140
+ return id;
141
+ }
142
+
143
+ // Free the memory associated with a constant pool.
144
+ void
145
+ yp_constant_pool_free(yp_constant_pool_t *pool) {
146
+ free(pool->constants);
147
+ }
@@ -0,0 +1,50 @@
1
+ #include "yarp/util/yp_list.h"
2
+
3
+ // Initializes a new list.
4
+ YP_EXPORTED_FUNCTION void
5
+ yp_list_init(yp_list_t *list) {
6
+ *list = (yp_list_t) { .head = NULL, .tail = NULL };
7
+ }
8
+
9
+ // Returns true if the given list is empty.
10
+ YP_EXPORTED_FUNCTION bool
11
+ yp_list_empty_p(yp_list_t *list) {
12
+ return list->head == NULL;
13
+ }
14
+
15
+ YP_EXPORTED_FUNCTION uint32_t
16
+ yp_list_size(yp_list_t *list) {
17
+ yp_list_node_t *node = list->head;
18
+ uint32_t length = 0;
19
+
20
+ while (node != NULL) {
21
+ length++;
22
+ node = node->next;
23
+ }
24
+
25
+ return length;
26
+ }
27
+
28
+ // Append a node to the given list.
29
+ void
30
+ yp_list_append(yp_list_t *list, yp_list_node_t *node) {
31
+ if (list->head == NULL) {
32
+ list->head = node;
33
+ } else {
34
+ list->tail->next = node;
35
+ }
36
+ list->tail = node;
37
+ }
38
+
39
+ // Deallocate the internal state of the given list.
40
+ YP_EXPORTED_FUNCTION void
41
+ yp_list_free(yp_list_t *list) {
42
+ yp_list_node_t *node = list->head;
43
+ yp_list_node_t *next;
44
+
45
+ while (node != NULL) {
46
+ next = node->next;
47
+ free(node);
48
+ node = next;
49
+ }
50
+ }
@@ -0,0 +1,31 @@
1
+ #include "yarp/util/yp_memchr.h"
2
+
3
+ #define YP_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
4
+
5
+ // We need to roll our own memchr to handle cases where the encoding changes and
6
+ // we need to search for a character in a buffer that could be the trailing byte
7
+ // of a multibyte character.
8
+ void *
9
+ yp_memchr(const void *memory, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding) {
10
+ if (encoding_changed && encoding->multibyte && character >= YP_MEMCHR_TRAILING_BYTE_MINIMUM) {
11
+ const char *source = (const char *) memory;
12
+ size_t index = 0;
13
+
14
+ while (index < number) {
15
+ if (source[index] == character) {
16
+ return (void *) (source + index);
17
+ }
18
+
19
+ size_t width = encoding->char_width(source + index, (ptrdiff_t) (number - index));
20
+ if (width == 0) {
21
+ return NULL;
22
+ }
23
+
24
+ index += width;
25
+ }
26
+
27
+ return NULL;
28
+ } else {
29
+ return memchr(memory, character, number);
30
+ }
31
+ }
@@ -0,0 +1,119 @@
1
+ #include "yarp/util/yp_newline_list.h"
2
+
3
+ // Initialize a new newline list with the given capacity. Returns true if the
4
+ // allocation of the offsets succeeds, otherwise returns false.
5
+ bool
6
+ yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity) {
7
+ list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
8
+ if (list->offsets == NULL) return false;
9
+
10
+ list->start = start;
11
+
12
+ // This is 1 instead of 0 because we want to include the first line of the
13
+ // file as having offset 0, which is set because of calloc.
14
+ list->size = 1;
15
+ list->capacity = capacity;
16
+
17
+ list->last_index = 0;
18
+ list->last_offset = 0;
19
+
20
+ return true;
21
+ }
22
+
23
+ // Append a new offset to the newline list. Returns true if the reallocation of
24
+ // the offsets succeeds (if one was necessary), otherwise returns false.
25
+ bool
26
+ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
27
+ if (list->size == list->capacity) {
28
+ list->capacity = (list->capacity * 3) / 2;
29
+ list->offsets = (size_t *) realloc(list->offsets, list->capacity * sizeof(size_t));
30
+ if (list->offsets == NULL) return false;
31
+ }
32
+
33
+ assert(cursor >= list->start);
34
+ size_t newline_offset = (size_t) (cursor - list->start + 1);
35
+ assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
36
+ list->offsets[list->size++] = newline_offset;
37
+
38
+ return true;
39
+ }
40
+
41
+ // Returns the line and column of the given offset, assuming we don't have any
42
+ // information about the previous index that we found.
43
+ static yp_line_column_t
44
+ yp_newline_list_line_column_search(yp_newline_list_t *list, size_t offset) {
45
+ size_t left = 0;
46
+ size_t right = list->size - 1;
47
+
48
+ while (left <= right) {
49
+ size_t mid = left + (right - left) / 2;
50
+
51
+ if (list->offsets[mid] == offset) {
52
+ return ((yp_line_column_t) { mid, 0 });
53
+ }
54
+
55
+ if (list->offsets[mid] < offset) {
56
+ left = mid + 1;
57
+ } else {
58
+ right = mid - 1;
59
+ }
60
+ }
61
+
62
+ return ((yp_line_column_t) { left - 1, offset - list->offsets[left - 1] });
63
+ }
64
+
65
+ // Returns the line and column of the given offset, assuming we know the last
66
+ // index that we found.
67
+ static yp_line_column_t
68
+ yp_newline_list_line_column_scan(yp_newline_list_t *list, size_t offset) {
69
+ if (offset > list->last_offset) {
70
+ size_t index = list->last_index;
71
+ while (index < list->size && list->offsets[index] < offset) {
72
+ index++;
73
+ }
74
+
75
+ if (index == list->size) {
76
+ return ((yp_line_column_t) { index - 1, offset - list->offsets[index - 1] });
77
+ }
78
+
79
+ return ((yp_line_column_t) { index, 0 });
80
+ } else {
81
+ size_t index = list->last_index;
82
+ while (index > 0 && list->offsets[index] > offset) {
83
+ index--;
84
+ }
85
+
86
+ if (index == 0) {
87
+ return ((yp_line_column_t) { 0, offset });
88
+ }
89
+
90
+ return ((yp_line_column_t) { index, offset - list->offsets[index - 1] });
91
+ }
92
+ }
93
+
94
+ // Returns the line and column of the given offset. If the offset is not in the
95
+ // list, the line and column of the closest offset less than the given offset
96
+ // are returned.
97
+ yp_line_column_t
98
+ yp_newline_list_line_column(yp_newline_list_t *list, const char *cursor) {
99
+ assert(cursor >= list->start);
100
+ size_t offset = (size_t) (cursor - list->start);
101
+ yp_line_column_t result;
102
+
103
+ if (list->last_offset == 0) {
104
+ result = yp_newline_list_line_column_search(list, offset);
105
+ } else {
106
+ result = yp_newline_list_line_column_scan(list, offset);
107
+ }
108
+
109
+ list->last_index = result.line;
110
+ list->last_offset = offset;
111
+
112
+ return result;
113
+ }
114
+
115
+ // Free the internal memory allocated for the newline list.
116
+ void
117
+ yp_newline_list_free(yp_newline_list_t *list) {
118
+ free(list->offsets);
119
+ }
@@ -0,0 +1,25 @@
1
+ #include "yarp/util/yp_state_stack.h"
2
+
3
+ // Initializes the state stack to an empty stack.
4
+ void
5
+ yp_state_stack_init(yp_state_stack_t *stack) {
6
+ *stack = 0;
7
+ }
8
+
9
+ // Pushes a value onto the stack.
10
+ void
11
+ yp_state_stack_push(yp_state_stack_t *stack, bool value) {
12
+ *stack = (*stack << 1) | (value & 1);
13
+ }
14
+
15
+ // Pops a value off the stack.
16
+ void
17
+ yp_state_stack_pop(yp_state_stack_t *stack) {
18
+ *stack >>= 1;
19
+ }
20
+
21
+ // Returns the value at the top of the stack.
22
+ bool
23
+ yp_state_stack_p(yp_state_stack_t *stack) {
24
+ return *stack & 1;
25
+ }