prism 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +172 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +84 -0
  7. data/README.md +89 -0
  8. data/config.yml +2481 -0
  9. data/docs/build_system.md +74 -0
  10. data/docs/building.md +22 -0
  11. data/docs/configuration.md +60 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +117 -0
  14. data/docs/fuzzing.md +93 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/mapping.md +117 -0
  17. data/docs/ripper.md +36 -0
  18. data/docs/ruby_api.md +25 -0
  19. data/docs/serialization.md +181 -0
  20. data/docs/testing.md +55 -0
  21. data/ext/prism/api_node.c +4725 -0
  22. data/ext/prism/api_pack.c +256 -0
  23. data/ext/prism/extconf.rb +136 -0
  24. data/ext/prism/extension.c +626 -0
  25. data/ext/prism/extension.h +18 -0
  26. data/include/prism/ast.h +1932 -0
  27. data/include/prism/defines.h +45 -0
  28. data/include/prism/diagnostic.h +231 -0
  29. data/include/prism/enc/pm_encoding.h +95 -0
  30. data/include/prism/node.h +41 -0
  31. data/include/prism/pack.h +141 -0
  32. data/include/prism/parser.h +418 -0
  33. data/include/prism/regexp.h +19 -0
  34. data/include/prism/unescape.h +48 -0
  35. data/include/prism/util/pm_buffer.h +51 -0
  36. data/include/prism/util/pm_char.h +91 -0
  37. data/include/prism/util/pm_constant_pool.h +78 -0
  38. data/include/prism/util/pm_list.h +67 -0
  39. data/include/prism/util/pm_memchr.h +14 -0
  40. data/include/prism/util/pm_newline_list.h +61 -0
  41. data/include/prism/util/pm_state_stack.h +24 -0
  42. data/include/prism/util/pm_string.h +61 -0
  43. data/include/prism/util/pm_string_list.h +25 -0
  44. data/include/prism/util/pm_strpbrk.h +29 -0
  45. data/include/prism/version.h +4 -0
  46. data/include/prism.h +82 -0
  47. data/lib/prism/compiler.rb +465 -0
  48. data/lib/prism/debug.rb +157 -0
  49. data/lib/prism/desugar_compiler.rb +206 -0
  50. data/lib/prism/dispatcher.rb +2051 -0
  51. data/lib/prism/dsl.rb +750 -0
  52. data/lib/prism/ffi.rb +251 -0
  53. data/lib/prism/lex_compat.rb +838 -0
  54. data/lib/prism/mutation_compiler.rb +718 -0
  55. data/lib/prism/node.rb +14540 -0
  56. data/lib/prism/node_ext.rb +55 -0
  57. data/lib/prism/node_inspector.rb +68 -0
  58. data/lib/prism/pack.rb +185 -0
  59. data/lib/prism/parse_result/comments.rb +172 -0
  60. data/lib/prism/parse_result/newlines.rb +60 -0
  61. data/lib/prism/parse_result.rb +266 -0
  62. data/lib/prism/pattern.rb +239 -0
  63. data/lib/prism/ripper_compat.rb +174 -0
  64. data/lib/prism/serialize.rb +662 -0
  65. data/lib/prism/visitor.rb +470 -0
  66. data/lib/prism.rb +64 -0
  67. data/prism.gemspec +113 -0
  68. data/src/diagnostic.c +287 -0
  69. data/src/enc/pm_big5.c +52 -0
  70. data/src/enc/pm_euc_jp.c +58 -0
  71. data/src/enc/pm_gbk.c +61 -0
  72. data/src/enc/pm_shift_jis.c +56 -0
  73. data/src/enc/pm_tables.c +507 -0
  74. data/src/enc/pm_unicode.c +2324 -0
  75. data/src/enc/pm_windows_31j.c +56 -0
  76. data/src/node.c +2633 -0
  77. data/src/pack.c +493 -0
  78. data/src/prettyprint.c +2136 -0
  79. data/src/prism.c +14587 -0
  80. data/src/regexp.c +580 -0
  81. data/src/serialize.c +1899 -0
  82. data/src/token_type.c +349 -0
  83. data/src/unescape.c +637 -0
  84. data/src/util/pm_buffer.c +103 -0
  85. data/src/util/pm_char.c +272 -0
  86. data/src/util/pm_constant_pool.c +252 -0
  87. data/src/util/pm_list.c +41 -0
  88. data/src/util/pm_memchr.c +33 -0
  89. data/src/util/pm_newline_list.c +134 -0
  90. data/src/util/pm_state_stack.c +19 -0
  91. data/src/util/pm_string.c +200 -0
  92. data/src/util/pm_string_list.c +29 -0
  93. data/src/util/pm_strncasecmp.c +17 -0
  94. data/src/util/pm_strpbrk.c +66 -0
  95. metadata +138 -0
@@ -0,0 +1,103 @@
1
+ #include "prism/util/pm_buffer.h"
2
+
3
+ #define PRISM_BUFFER_INITIAL_SIZE 1024
4
+
5
+ // Return the size of the pm_buffer_t struct.
6
+ size_t
7
+ pm_buffer_sizeof(void) {
8
+ return sizeof(pm_buffer_t);
9
+ }
10
+
11
+ // Initialize a pm_buffer_t with its default values.
12
+ bool
13
+ pm_buffer_init(pm_buffer_t *buffer) {
14
+ buffer->length = 0;
15
+ buffer->capacity = PRISM_BUFFER_INITIAL_SIZE;
16
+
17
+ buffer->value = (char *) malloc(PRISM_BUFFER_INITIAL_SIZE);
18
+ return buffer->value != NULL;
19
+ }
20
+
21
+ #undef PRISM_BUFFER_INITIAL_SIZE
22
+
23
+ // Return the value of the buffer.
24
+ char *
25
+ pm_buffer_value(pm_buffer_t *buffer) {
26
+ return buffer->value;
27
+ }
28
+
29
+ // Return the length of the buffer.
30
+ size_t
31
+ pm_buffer_length(pm_buffer_t *buffer) {
32
+ return buffer->length;
33
+ }
34
+
35
+ // Append the given amount of space to the buffer.
36
+ static inline void
37
+ pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
38
+ size_t next_length = buffer->length + length;
39
+
40
+ if (next_length > buffer->capacity) {
41
+ do {
42
+ buffer->capacity *= 2;
43
+ } while (next_length > buffer->capacity);
44
+
45
+ buffer->value = realloc(buffer->value, buffer->capacity);
46
+ }
47
+
48
+ buffer->length = next_length;
49
+ }
50
+
51
+ // Append a generic pointer to memory to the buffer.
52
+ static inline void
53
+ pm_buffer_append(pm_buffer_t *buffer, const void *source, size_t length) {
54
+ pm_buffer_append_length(buffer, length);
55
+ memcpy(buffer->value + (buffer->length - length), source, length);
56
+ }
57
+
58
+ // Append the given amount of space as zeroes to the buffer.
59
+ void
60
+ pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length) {
61
+ pm_buffer_append_length(buffer, length);
62
+ memset(buffer->value + (buffer->length - length), 0, length);
63
+ }
64
+
65
+ // Append a string to the buffer.
66
+ void
67
+ pm_buffer_append_str(pm_buffer_t *buffer, const char *value, size_t length) {
68
+ pm_buffer_append(buffer, value, length);
69
+ }
70
+
71
+ // Append a list of bytes to the buffer.
72
+ void
73
+ pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length) {
74
+ pm_buffer_append(buffer, (const char *) value, length);
75
+ }
76
+
77
+ // Append a single byte to the buffer.
78
+ void
79
+ pm_buffer_append_u8(pm_buffer_t *buffer, uint8_t value) {
80
+ const void *source = &value;
81
+ pm_buffer_append(buffer, source, sizeof(uint8_t));
82
+ }
83
+
84
+ // Append a 32-bit unsigned integer to the buffer.
85
+ void
86
+ pm_buffer_append_u32(pm_buffer_t *buffer, uint32_t value) {
87
+ if (value < 128) {
88
+ pm_buffer_append_u8(buffer, (uint8_t) value);
89
+ } else {
90
+ uint32_t n = value;
91
+ while (n >= 128) {
92
+ pm_buffer_append_u8(buffer, (uint8_t) (n | 128));
93
+ n >>= 7;
94
+ }
95
+ pm_buffer_append_u8(buffer, (uint8_t) n);
96
+ }
97
+ }
98
+
99
+ // Free the memory associated with the buffer.
100
+ void
101
+ pm_buffer_free(pm_buffer_t *buffer) {
102
+ free(buffer->value);
103
+ }
@@ -0,0 +1,272 @@
1
+ #include "prism/util/pm_char.h"
2
+
3
+ #define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
4
+ #define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
5
+ #define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)
6
+
7
+ #define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
8
+ #define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1)
9
+ #define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
10
+ #define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
11
+ #define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
12
+ #define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
13
+ #define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
14
+ #define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
15
+
16
+ static const uint8_t pm_byte_table[256] = {
17
+ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
19
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
20
+ 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
21
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5x
24
+ 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 4, 4, // 6x
25
+ 0, 0, 0, 4, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, // 7x
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
28
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
29
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
30
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
31
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
34
+ };
35
+
36
+ static const uint8_t pm_number_table[256] = {
37
+ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
38
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
39
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
40
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
41
+ 0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
42
+ 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
43
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
44
+ 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
45
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
46
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
47
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
48
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
49
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
50
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
51
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
52
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
53
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
54
+ };
55
+
56
+ static inline size_t
57
+ pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
58
+ if (length <= 0) return 0;
59
+
60
+ size_t size = 0;
61
+ size_t maximum = (size_t) length;
62
+
63
+ while (size < maximum && (pm_byte_table[string[size]] & kind)) size++;
64
+ return size;
65
+ }
66
+
67
+ // Returns the number of characters at the start of the string that are
68
+ // whitespace. Disallows searching past the given maximum number of characters.
69
+ size_t
70
+ pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
71
+ return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE);
72
+ }
73
+
74
+ // Returns the number of characters at the start of the string that are
75
+ // whitespace while also tracking the location of each newline. Disallows
76
+ // searching past the given maximum number of characters.
77
+ size_t
78
+ pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) {
79
+ if (length <= 0) return 0;
80
+
81
+ size_t size = 0;
82
+ size_t maximum = (size_t) length;
83
+
84
+ while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
85
+ if (string[size] == '\n') {
86
+ pm_newline_list_append(newline_list, string + size);
87
+ }
88
+
89
+ size++;
90
+ }
91
+
92
+ return size;
93
+ }
94
+
95
+ // Returns the number of characters at the start of the string that are inline
96
+ // whitespace. Disallows searching past the given maximum number of characters.
97
+ size_t
98
+ pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
99
+ return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE);
100
+ }
101
+
102
+ // Returns the number of characters at the start of the string that are regexp
103
+ // options. Disallows searching past the given maximum number of characters.
104
+ size_t
105
+ pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
106
+ return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
107
+ }
108
+
109
+ static inline bool
110
+ pm_char_is_char_kind(const uint8_t b, uint8_t kind) {
111
+ return (pm_byte_table[b] & kind) != 0;
112
+ }
113
+
114
+ // Returns true if the given character is a whitespace character.
115
+ bool
116
+ pm_char_is_whitespace(const uint8_t b) {
117
+ return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE);
118
+ }
119
+
120
+ // Returns true if the given character is an inline whitespace character.
121
+ bool
122
+ pm_char_is_inline_whitespace(const uint8_t b) {
123
+ return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE);
124
+ }
125
+
126
+ // Scan through the string and return the number of characters at the start of
127
+ // the string that match the given kind. Disallows searching past the given
128
+ // maximum number of characters.
129
+ static inline size_t
130
+ pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
131
+ if (length <= 0) return 0;
132
+
133
+ size_t size = 0;
134
+ size_t maximum = (size_t) length;
135
+
136
+ while (size < maximum && (pm_number_table[string[size]] & kind)) size++;
137
+ return size;
138
+ }
139
+
140
+ // Scan through the string and return the number of characters at the start of
141
+ // the string that match the given kind. Disallows searching past the given
142
+ // maximum number of characters.
143
+ //
144
+ // Additionally, report the location of the last invalid underscore character
145
+ // found in the string through the out invalid parameter.
146
+ static inline size_t
147
+ pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
148
+ if (length <= 0) return 0;
149
+
150
+ size_t size = 0;
151
+ size_t maximum = (size_t) length;
152
+
153
+ bool underscore = false;
154
+ while (size < maximum && (pm_number_table[string[size]] & kind)) {
155
+ if (string[size] == '_') {
156
+ if (underscore) *invalid = string + size;
157
+ underscore = true;
158
+ } else {
159
+ underscore = false;
160
+ }
161
+
162
+ size++;
163
+ }
164
+
165
+ if (string[size - 1] == '_') *invalid = string + size - 1;
166
+ return size;
167
+ }
168
+
169
+ // Returns the number of characters at the start of the string that are binary
170
+ // digits or underscores. Disallows searching past the given maximum number of
171
+ // characters.
172
+ //
173
+ // If multiple underscores are found in a row or if an underscore is
174
+ // found at the end of the number, then the invalid pointer is set to the index
175
+ // of the first invalid underscore.
176
+ size_t
177
+ pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
178
+ return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER);
179
+ }
180
+
181
+ // Returns the number of characters at the start of the string that are octal
182
+ // digits or underscores. Disallows searching past the given maximum number of
183
+ // characters.
184
+ //
185
+ // If multiple underscores are found in a row or if an underscore is
186
+ // found at the end of the number, then the invalid pointer is set to the index
187
+ // of the first invalid underscore.
188
+ size_t
189
+ pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
190
+ return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER);
191
+ }
192
+
193
+ // Returns the number of characters at the start of the string that are decimal
194
+ // digits. Disallows searching past the given maximum number of characters.
195
+ size_t
196
+ pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
197
+ return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
198
+ }
199
+
200
+ // Returns the number of characters at the start of the string that are decimal
201
+ // digits or underscores. Disallows searching past the given maximum number of
202
+ // characters.
203
+ //
204
+ // If multiple underscores are found in a row or if an underscore is
205
+ // found at the end of the number, then the invalid pointer is set to the index
206
+ // of the first invalid underscore.
207
+ size_t
208
+ pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
209
+ return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER);
210
+ }
211
+
212
+ // Returns the number of characters at the start of the string that are
213
+ // hexadecimal digits. Disallows searching past the given maximum number of
214
+ // characters.
215
+ size_t
216
+ pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
217
+ return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
218
+ }
219
+
220
+ // Returns the number of characters at the start of the string that are
221
+ // hexadecimal digits or underscores. Disallows searching past the given maximum
222
+ // number of characters.
223
+ //
224
+ // If multiple underscores are found in a row or if an underscore is
225
+ // found at the end of the number, then the invalid pointer is set to the index
226
+ // of the first invalid underscore.
227
+ size_t
228
+ pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
229
+ return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER);
230
+ }
231
+
232
+ static inline bool
233
+ pm_char_is_number_kind(const uint8_t b, uint8_t kind) {
234
+ return (pm_number_table[b] & kind) != 0;
235
+ }
236
+
237
+ // Returns true if the given character is a binary digit.
238
+ bool
239
+ pm_char_is_binary_digit(const uint8_t b) {
240
+ return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT);
241
+ }
242
+
243
+ // Returns true if the given character is an octal digit.
244
+ bool
245
+ pm_char_is_octal_digit(const uint8_t b) {
246
+ return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT);
247
+ }
248
+
249
+ // Returns true if the given character is a decimal digit.
250
+ bool
251
+ pm_char_is_decimal_digit(const uint8_t b) {
252
+ return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
253
+ }
254
+
255
+ // Returns true if the given character is a hexadecimal digit.
256
+ bool
257
+ pm_char_is_hexadecimal_digit(const uint8_t b) {
258
+ return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
259
+ }
260
+
261
+ #undef PRISM_CHAR_BIT_WHITESPACE
262
+ #undef PRISM_CHAR_BIT_INLINE_WHITESPACE
263
+ #undef PRISM_CHAR_BIT_REGEXP_OPTION
264
+
265
+ #undef PRISM_NUMBER_BIT_BINARY_DIGIT
266
+ #undef PRISM_NUMBER_BIT_BINARY_NUMBER
267
+ #undef PRISM_NUMBER_BIT_OCTAL_DIGIT
268
+ #undef PRISM_NUMBER_BIT_OCTAL_NUMBER
269
+ #undef PRISM_NUMBER_BIT_DECIMAL_DIGIT
270
+ #undef PRISM_NUMBER_BIT_DECIMAL_NUMBER
271
+ #undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER
272
+ #undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT
@@ -0,0 +1,252 @@
1
+ #include "prism/util/pm_constant_pool.h"
2
+
3
+ // Initialize a list of constant ids.
4
+ void
5
+ pm_constant_id_list_init(pm_constant_id_list_t *list) {
6
+ list->ids = NULL;
7
+ list->size = 0;
8
+ list->capacity = 0;
9
+ }
10
+
11
+ // Append a constant id to a list of constant ids. Returns false if any
12
+ // potential reallocations fail.
13
+ bool
14
+ pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
15
+ if (list->size >= list->capacity) {
16
+ list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
17
+ list->ids = (pm_constant_id_t *) realloc(list->ids, sizeof(pm_constant_id_t) * list->capacity);
18
+ if (list->ids == NULL) return false;
19
+ }
20
+
21
+ list->ids[list->size++] = id;
22
+ return true;
23
+ }
24
+
25
+ // Checks if the current constant id list includes the given constant id.
26
+ bool
27
+ pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
28
+ for (size_t index = 0; index < list->size; index++) {
29
+ if (list->ids[index] == id) return true;
30
+ }
31
+ return false;
32
+ }
33
+
34
+ // Get the memory size of a list of constant ids.
35
+ size_t
36
+ pm_constant_id_list_memsize(pm_constant_id_list_t *list) {
37
+ return sizeof(pm_constant_id_list_t) + (list->capacity * sizeof(pm_constant_id_t));
38
+ }
39
+
40
+ // Free the memory associated with a list of constant ids.
41
+ void
42
+ pm_constant_id_list_free(pm_constant_id_list_t *list) {
43
+ if (list->ids != NULL) {
44
+ free(list->ids);
45
+ }
46
+ }
47
+
48
+ // A relatively simple hash function (djb2) that is used to hash strings. We are
49
+ // optimizing here for simplicity and speed.
50
+ static inline uint32_t
51
+ pm_constant_pool_hash(const uint8_t *start, size_t length) {
52
+ // This is a prime number used as the initial value for the hash function.
53
+ uint32_t value = 5381;
54
+
55
+ for (size_t index = 0; index < length; index++) {
56
+ value = ((value << 5) + value) + start[index];
57
+ }
58
+
59
+ return value;
60
+ }
61
+
62
+ // https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
63
+ static uint32_t
64
+ next_power_of_two(uint32_t v) {
65
+ // Avoid underflow in subtraction on next line.
66
+ if (v == 0) {
67
+ // 1 is the nearest power of 2 to 0 (2^0)
68
+ return 1;
69
+ }
70
+ v--;
71
+ v |= v >> 1;
72
+ v |= v >> 2;
73
+ v |= v >> 4;
74
+ v |= v >> 8;
75
+ v |= v >> 16;
76
+ v++;
77
+ return v;
78
+ }
79
+
80
+ #ifndef NDEBUG
81
+ static bool
82
+ is_power_of_two(uint32_t size) {
83
+ return (size & (size - 1)) == 0;
84
+ }
85
+ #endif
86
+
87
+ // Resize a constant pool to a given capacity.
88
+ static inline bool
89
+ pm_constant_pool_resize(pm_constant_pool_t *pool) {
90
+ assert(is_power_of_two(pool->capacity));
91
+
92
+ uint32_t next_capacity = pool->capacity * 2;
93
+ if (next_capacity < pool->capacity) return false;
94
+
95
+ const uint32_t mask = next_capacity - 1;
96
+ const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
97
+
98
+ void *next = calloc(next_capacity, element_size);
99
+ if (next == NULL) return false;
100
+
101
+ pm_constant_pool_bucket_t *next_buckets = next;
102
+ pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t));
103
+
104
+ // For each bucket in the current constant pool, find the index in the
105
+ // next constant pool, and insert it.
106
+ for (uint32_t index = 0; index < pool->capacity; index++) {
107
+ pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
108
+
109
+ // If an id is set on this constant, then we know we have content here.
110
+ // In this case we need to insert it into the next constant pool.
111
+ if (bucket->id != 0) {
112
+ uint32_t next_index = bucket->hash & mask;
113
+
114
+ // This implements linear scanning to find the next available slot
115
+ // in case this index is already taken. We don't need to bother
116
+ // comparing the values since we know that the hash is unique.
117
+ while (next_buckets[next_index].id != 0) {
118
+ next_index = (next_index + 1) & mask;
119
+ }
120
+
121
+ // Here we copy over the entire bucket, which includes the id so
122
+ // that they are consistent between resizes.
123
+ next_buckets[next_index] = *bucket;
124
+ }
125
+ }
126
+
127
+ // The constants are stable with respect to hash table resizes.
128
+ memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t));
129
+
130
+ // pool->constants and pool->buckets are allocated out of the same chunk
131
+ // of memory, with the buckets coming first.
132
+ free(pool->buckets);
133
+ pool->constants = next_constants;
134
+ pool->buckets = next_buckets;
135
+ pool->capacity = next_capacity;
136
+ return true;
137
+ }
138
+
139
+ // Initialize a new constant pool with a given capacity.
140
+ bool
141
+ pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
142
+ const uint32_t maximum = (~((uint32_t) 0));
143
+ if (capacity >= ((maximum / 2) + 1)) return false;
144
+
145
+ capacity = next_power_of_two(capacity);
146
+ const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
147
+ void *memory = calloc(capacity, element_size);
148
+ if (memory == NULL) return false;
149
+
150
+ pool->buckets = memory;
151
+ pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t));
152
+ pool->size = 0;
153
+ pool->capacity = capacity;
154
+ return true;
155
+ }
156
+
157
+ // Insert a constant into a constant pool and return its index in the pool.
158
+ static inline pm_constant_id_t
159
+ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, bool owned) {
160
+ if (pool->size >= (pool->capacity / 4 * 3)) {
161
+ if (!pm_constant_pool_resize(pool)) return 0;
162
+ }
163
+
164
+ assert(is_power_of_two(pool->capacity));
165
+ const uint32_t mask = pool->capacity - 1;
166
+
167
+ uint32_t hash = pm_constant_pool_hash(start, length);
168
+ uint32_t index = hash & mask;
169
+ pm_constant_pool_bucket_t *bucket;
170
+
171
+ while (bucket = &pool->buckets[index], bucket->id != 0) {
172
+ // If there is a collision, then we need to check if the content is the
173
+ // same as the content we are trying to insert. If it is, then we can
174
+ // return the id of the existing constant.
175
+ pm_constant_t *constant = &pool->constants[bucket->id - 1];
176
+
177
+ if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
178
+ // Since we have found a match, we need to check if this is
179
+ // attempting to insert a shared or an owned constant. We want to
180
+ // prefer shared constants since they don't require allocations.
181
+ if (owned) {
182
+ // If we're attempting to insert an owned constant and we have
183
+ // an existing constant, then either way we don't want the given
184
+ // memory. Either it's duplicated with the existing constant or
185
+ // it's not necessary because we have a shared version.
186
+ free((void *) start);
187
+ } else if (bucket->owned) {
188
+ // If we're attempting to insert a shared constant and the
189
+ // existing constant is owned, then we can free the owned
190
+ // constant and replace it with the shared constant.
191
+ free((void *) constant->start);
192
+ constant->start = start;
193
+ bucket->owned = false;
194
+ }
195
+
196
+ return bucket->id;
197
+ }
198
+
199
+ index = (index + 1) & mask;
200
+ }
201
+
202
+ // IDs are allocated starting at 1, since the value 0 denotes a non-existant
203
+ // constant.
204
+ uint32_t id = ++pool->size;
205
+ assert(pool->size < ((uint32_t) (1 << 31)));
206
+
207
+ *bucket = (pm_constant_pool_bucket_t) {
208
+ .id = (unsigned int) (id & 0x7FFFFFFF),
209
+ .owned = owned,
210
+ .hash = hash
211
+ };
212
+
213
+ pool->constants[id - 1] = (pm_constant_t) {
214
+ .start = start,
215
+ .length = length,
216
+ };
217
+
218
+ return id;
219
+ }
220
+
221
+ // Insert a constant into a constant pool. Returns the id of the constant, or 0
222
+ // if any potential calls to resize fail.
223
+ pm_constant_id_t
224
+ pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
225
+ return pm_constant_pool_insert(pool, start, length, false);
226
+ }
227
+
228
+ // Insert a constant into a constant pool from memory that is now owned by the
229
+ // constant pool. Returns the id of the constant, or 0 if any potential calls to
230
+ // resize fail.
231
+ pm_constant_id_t
232
+ pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
233
+ return pm_constant_pool_insert(pool, start, length, true);
234
+ }
235
+
236
+ // Free the memory associated with a constant pool.
237
+ void
238
+ pm_constant_pool_free(pm_constant_pool_t *pool) {
239
+ // For each constant in the current constant pool, free the contents if the
240
+ // contents are owned.
241
+ for (uint32_t index = 0; index < pool->capacity; index++) {
242
+ pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
243
+
244
+ // If an id is set on this constant, then we know we have content here.
245
+ if (bucket->id != 0 && bucket->owned) {
246
+ pm_constant_t *constant = &pool->constants[bucket->id - 1];
247
+ free((void *) constant->start);
248
+ }
249
+ }
250
+
251
+ free(pool->buckets);
252
+ }
@@ -0,0 +1,41 @@
1
+ #include "prism/util/pm_list.h"
2
+
3
+ // Returns true if the given list is empty.
4
+ PRISM_EXPORTED_FUNCTION bool
5
+ pm_list_empty_p(pm_list_t *list) {
6
+ return list->head == NULL;
7
+ }
8
+
9
+ // Returns the size of the list.
10
+ PRISM_EXPORTED_FUNCTION size_t
11
+ pm_list_size(pm_list_t *list) {
12
+ return list->size;
13
+ }
14
+
15
+ // Append a node to the given list.
16
+ void
17
+ pm_list_append(pm_list_t *list, pm_list_node_t *node) {
18
+ if (list->head == NULL) {
19
+ list->head = node;
20
+ } else {
21
+ list->tail->next = node;
22
+ }
23
+
24
+ list->tail = node;
25
+ list->size++;
26
+ }
27
+
28
+ // Deallocate the internal state of the given list.
29
+ PRISM_EXPORTED_FUNCTION void
30
+ pm_list_free(pm_list_t *list) {
31
+ pm_list_node_t *node = list->head;
32
+ pm_list_node_t *next;
33
+
34
+ while (node != NULL) {
35
+ next = node->next;
36
+ free(node);
37
+ node = next;
38
+ }
39
+
40
+ list->size = 0;
41
+ }