prism 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +172 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +84 -0
  7. data/README.md +89 -0
  8. data/config.yml +2481 -0
  9. data/docs/build_system.md +74 -0
  10. data/docs/building.md +22 -0
  11. data/docs/configuration.md +60 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +117 -0
  14. data/docs/fuzzing.md +93 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/mapping.md +117 -0
  17. data/docs/ripper.md +36 -0
  18. data/docs/ruby_api.md +25 -0
  19. data/docs/serialization.md +181 -0
  20. data/docs/testing.md +55 -0
  21. data/ext/prism/api_node.c +4725 -0
  22. data/ext/prism/api_pack.c +256 -0
  23. data/ext/prism/extconf.rb +136 -0
  24. data/ext/prism/extension.c +626 -0
  25. data/ext/prism/extension.h +18 -0
  26. data/include/prism/ast.h +1932 -0
  27. data/include/prism/defines.h +45 -0
  28. data/include/prism/diagnostic.h +231 -0
  29. data/include/prism/enc/pm_encoding.h +95 -0
  30. data/include/prism/node.h +41 -0
  31. data/include/prism/pack.h +141 -0
  32. data/include/prism/parser.h +418 -0
  33. data/include/prism/regexp.h +19 -0
  34. data/include/prism/unescape.h +48 -0
  35. data/include/prism/util/pm_buffer.h +51 -0
  36. data/include/prism/util/pm_char.h +91 -0
  37. data/include/prism/util/pm_constant_pool.h +78 -0
  38. data/include/prism/util/pm_list.h +67 -0
  39. data/include/prism/util/pm_memchr.h +14 -0
  40. data/include/prism/util/pm_newline_list.h +61 -0
  41. data/include/prism/util/pm_state_stack.h +24 -0
  42. data/include/prism/util/pm_string.h +61 -0
  43. data/include/prism/util/pm_string_list.h +25 -0
  44. data/include/prism/util/pm_strpbrk.h +29 -0
  45. data/include/prism/version.h +4 -0
  46. data/include/prism.h +82 -0
  47. data/lib/prism/compiler.rb +465 -0
  48. data/lib/prism/debug.rb +157 -0
  49. data/lib/prism/desugar_compiler.rb +206 -0
  50. data/lib/prism/dispatcher.rb +2051 -0
  51. data/lib/prism/dsl.rb +750 -0
  52. data/lib/prism/ffi.rb +251 -0
  53. data/lib/prism/lex_compat.rb +838 -0
  54. data/lib/prism/mutation_compiler.rb +718 -0
  55. data/lib/prism/node.rb +14540 -0
  56. data/lib/prism/node_ext.rb +55 -0
  57. data/lib/prism/node_inspector.rb +68 -0
  58. data/lib/prism/pack.rb +185 -0
  59. data/lib/prism/parse_result/comments.rb +172 -0
  60. data/lib/prism/parse_result/newlines.rb +60 -0
  61. data/lib/prism/parse_result.rb +266 -0
  62. data/lib/prism/pattern.rb +239 -0
  63. data/lib/prism/ripper_compat.rb +174 -0
  64. data/lib/prism/serialize.rb +662 -0
  65. data/lib/prism/visitor.rb +470 -0
  66. data/lib/prism.rb +64 -0
  67. data/prism.gemspec +113 -0
  68. data/src/diagnostic.c +287 -0
  69. data/src/enc/pm_big5.c +52 -0
  70. data/src/enc/pm_euc_jp.c +58 -0
  71. data/src/enc/pm_gbk.c +61 -0
  72. data/src/enc/pm_shift_jis.c +56 -0
  73. data/src/enc/pm_tables.c +507 -0
  74. data/src/enc/pm_unicode.c +2324 -0
  75. data/src/enc/pm_windows_31j.c +56 -0
  76. data/src/node.c +2633 -0
  77. data/src/pack.c +493 -0
  78. data/src/prettyprint.c +2136 -0
  79. data/src/prism.c +14587 -0
  80. data/src/regexp.c +580 -0
  81. data/src/serialize.c +1899 -0
  82. data/src/token_type.c +349 -0
  83. data/src/unescape.c +637 -0
  84. data/src/util/pm_buffer.c +103 -0
  85. data/src/util/pm_char.c +272 -0
  86. data/src/util/pm_constant_pool.c +252 -0
  87. data/src/util/pm_list.c +41 -0
  88. data/src/util/pm_memchr.c +33 -0
  89. data/src/util/pm_newline_list.c +134 -0
  90. data/src/util/pm_state_stack.c +19 -0
  91. data/src/util/pm_string.c +200 -0
  92. data/src/util/pm_string_list.c +29 -0
  93. data/src/util/pm_strncasecmp.c +17 -0
  94. data/src/util/pm_strpbrk.c +66 -0
  95. metadata +138 -0
@@ -0,0 +1,103 @@
1
+ #include "prism/util/pm_buffer.h"
2
+
3
+ #define PRISM_BUFFER_INITIAL_SIZE 1024
4
+
5
+ // Return the size of the pm_buffer_t struct.
6
+ size_t
7
+ pm_buffer_sizeof(void) {
8
+ return sizeof(pm_buffer_t);
9
+ }
10
+
11
+ // Initialize a pm_buffer_t with its default values.
12
+ bool
13
+ pm_buffer_init(pm_buffer_t *buffer) {
14
+ buffer->length = 0;
15
+ buffer->capacity = PRISM_BUFFER_INITIAL_SIZE;
16
+
17
+ buffer->value = (char *) malloc(PRISM_BUFFER_INITIAL_SIZE);
18
+ return buffer->value != NULL;
19
+ }
20
+
21
+ #undef PRISM_BUFFER_INITIAL_SIZE
22
+
23
+ // Return the value of the buffer.
24
+ char *
25
+ pm_buffer_value(pm_buffer_t *buffer) {
26
+ return buffer->value;
27
+ }
28
+
29
+ // Return the length of the buffer.
30
+ size_t
31
+ pm_buffer_length(pm_buffer_t *buffer) {
32
+ return buffer->length;
33
+ }
34
+
35
+ // Append the given amount of space to the buffer.
36
+ static inline void
37
+ pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
38
+ size_t next_length = buffer->length + length;
39
+
40
+ if (next_length > buffer->capacity) {
41
+ do {
42
+ buffer->capacity *= 2;
43
+ } while (next_length > buffer->capacity);
44
+
45
+ buffer->value = realloc(buffer->value, buffer->capacity);
46
+ }
47
+
48
+ buffer->length = next_length;
49
+ }
50
+
51
+ // Append a generic pointer to memory to the buffer.
52
+ static inline void
53
+ pm_buffer_append(pm_buffer_t *buffer, const void *source, size_t length) {
54
+ pm_buffer_append_length(buffer, length);
55
+ memcpy(buffer->value + (buffer->length - length), source, length);
56
+ }
57
+
58
+ // Append the given amount of space as zeroes to the buffer.
59
+ void
60
+ pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length) {
61
+ pm_buffer_append_length(buffer, length);
62
+ memset(buffer->value + (buffer->length - length), 0, length);
63
+ }
64
+
65
+ // Append a string to the buffer.
66
+ void
67
+ pm_buffer_append_str(pm_buffer_t *buffer, const char *value, size_t length) {
68
+ pm_buffer_append(buffer, value, length);
69
+ }
70
+
71
+ // Append a list of bytes to the buffer.
72
+ void
73
+ pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length) {
74
+ pm_buffer_append(buffer, (const char *) value, length);
75
+ }
76
+
77
+ // Append a single byte to the buffer.
78
+ void
79
+ pm_buffer_append_u8(pm_buffer_t *buffer, uint8_t value) {
80
+ const void *source = &value;
81
+ pm_buffer_append(buffer, source, sizeof(uint8_t));
82
+ }
83
+
84
+ // Append a 32-bit unsigned integer to the buffer.
85
+ void
86
+ pm_buffer_append_u32(pm_buffer_t *buffer, uint32_t value) {
87
+ if (value < 128) {
88
+ pm_buffer_append_u8(buffer, (uint8_t) value);
89
+ } else {
90
+ uint32_t n = value;
91
+ while (n >= 128) {
92
+ pm_buffer_append_u8(buffer, (uint8_t) (n | 128));
93
+ n >>= 7;
94
+ }
95
+ pm_buffer_append_u8(buffer, (uint8_t) n);
96
+ }
97
+ }
98
+
99
+ // Free the memory associated with the buffer.
100
+ void
101
+ pm_buffer_free(pm_buffer_t *buffer) {
102
+ free(buffer->value);
103
+ }
@@ -0,0 +1,272 @@
1
+ #include "prism/util/pm_char.h"
2
+
3
+ #define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
4
+ #define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
5
+ #define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)
6
+
7
+ #define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
8
+ #define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1)
9
+ #define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
10
+ #define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
11
+ #define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
12
+ #define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
13
+ #define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
14
+ #define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
15
+
16
+ static const uint8_t pm_byte_table[256] = {
17
+ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
19
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
20
+ 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
21
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5x
24
+ 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 4, 4, // 6x
25
+ 0, 0, 0, 4, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, // 7x
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
28
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
29
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
30
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
31
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
34
+ };
35
+
36
+ static const uint8_t pm_number_table[256] = {
37
+ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
38
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
39
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
40
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
41
+ 0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
42
+ 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
43
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
44
+ 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
45
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
46
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
47
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
48
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
49
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
50
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
51
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
52
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
53
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
54
+ };
55
+
56
+ static inline size_t
57
+ pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
58
+ if (length <= 0) return 0;
59
+
60
+ size_t size = 0;
61
+ size_t maximum = (size_t) length;
62
+
63
+ while (size < maximum && (pm_byte_table[string[size]] & kind)) size++;
64
+ return size;
65
+ }
66
+
67
+ // Returns the number of characters at the start of the string that are
68
+ // whitespace. Disallows searching past the given maximum number of characters.
69
+ size_t
70
+ pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
71
+ return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE);
72
+ }
73
+
74
+ // Returns the number of characters at the start of the string that are
75
+ // whitespace while also tracking the location of each newline. Disallows
76
+ // searching past the given maximum number of characters.
77
+ size_t
78
+ pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) {
79
+ if (length <= 0) return 0;
80
+
81
+ size_t size = 0;
82
+ size_t maximum = (size_t) length;
83
+
84
+ while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
85
+ if (string[size] == '\n') {
86
+ pm_newline_list_append(newline_list, string + size);
87
+ }
88
+
89
+ size++;
90
+ }
91
+
92
+ return size;
93
+ }
94
+
95
+ // Returns the number of characters at the start of the string that are inline
96
+ // whitespace. Disallows searching past the given maximum number of characters.
97
+ size_t
98
+ pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
99
+ return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE);
100
+ }
101
+
102
+ // Returns the number of characters at the start of the string that are regexp
103
+ // options. Disallows searching past the given maximum number of characters.
104
+ size_t
105
+ pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
106
+ return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
107
+ }
108
+
109
+ static inline bool
110
+ pm_char_is_char_kind(const uint8_t b, uint8_t kind) {
111
+ return (pm_byte_table[b] & kind) != 0;
112
+ }
113
+
114
+ // Returns true if the given character is a whitespace character.
115
+ bool
116
+ pm_char_is_whitespace(const uint8_t b) {
117
+ return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE);
118
+ }
119
+
120
+ // Returns true if the given character is an inline whitespace character.
121
+ bool
122
+ pm_char_is_inline_whitespace(const uint8_t b) {
123
+ return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE);
124
+ }
125
+
126
+ // Scan through the string and return the number of characters at the start of
127
+ // the string that match the given kind. Disallows searching past the given
128
+ // maximum number of characters.
129
+ static inline size_t
130
+ pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
131
+ if (length <= 0) return 0;
132
+
133
+ size_t size = 0;
134
+ size_t maximum = (size_t) length;
135
+
136
+ while (size < maximum && (pm_number_table[string[size]] & kind)) size++;
137
+ return size;
138
+ }
139
+
140
+ // Scan through the string and return the number of characters at the start of
141
+ // the string that match the given kind. Disallows searching past the given
142
+ // maximum number of characters.
143
+ //
144
+ // Additionally, report the location of the last invalid underscore character
145
+ // found in the string through the out invalid parameter.
146
+ static inline size_t
147
+ pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
148
+ if (length <= 0) return 0;
149
+
150
+ size_t size = 0;
151
+ size_t maximum = (size_t) length;
152
+
153
+ bool underscore = false;
154
+ while (size < maximum && (pm_number_table[string[size]] & kind)) {
155
+ if (string[size] == '_') {
156
+ if (underscore) *invalid = string + size;
157
+ underscore = true;
158
+ } else {
159
+ underscore = false;
160
+ }
161
+
162
+ size++;
163
+ }
164
+
165
+ if (string[size - 1] == '_') *invalid = string + size - 1;
166
+ return size;
167
+ }
168
+
169
+ // Returns the number of characters at the start of the string that are binary
170
+ // digits or underscores. Disallows searching past the given maximum number of
171
+ // characters.
172
+ //
173
+ // If multiple underscores are found in a row or if an underscore is
174
+ // found at the end of the number, then the invalid pointer is set to the index
175
+ // of the first invalid underscore.
176
+ size_t
177
+ pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
178
+ return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER);
179
+ }
180
+
181
+ // Returns the number of characters at the start of the string that are octal
182
+ // digits or underscores. Disallows searching past the given maximum number of
183
+ // characters.
184
+ //
185
+ // If multiple underscores are found in a row or if an underscore is
186
+ // found at the end of the number, then the invalid pointer is set to the index
187
+ // of the first invalid underscore.
188
+ size_t
189
+ pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
190
+ return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER);
191
+ }
192
+
193
+ // Returns the number of characters at the start of the string that are decimal
194
+ // digits. Disallows searching past the given maximum number of characters.
195
+ size_t
196
+ pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
197
+ return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
198
+ }
199
+
200
+ // Returns the number of characters at the start of the string that are decimal
201
+ // digits or underscores. Disallows searching past the given maximum number of
202
+ // characters.
203
+ //
204
+ // If multiple underscores are found in a row or if an underscore is
205
+ // found at the end of the number, then the invalid pointer is set to the index
206
+ // of the first invalid underscore.
207
+ size_t
208
+ pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
209
+ return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER);
210
+ }
211
+
212
+ // Returns the number of characters at the start of the string that are
213
+ // hexadecimal digits. Disallows searching past the given maximum number of
214
+ // characters.
215
+ size_t
216
+ pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
217
+ return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
218
+ }
219
+
220
+ // Returns the number of characters at the start of the string that are
221
+ // hexadecimal digits or underscores. Disallows searching past the given maximum
222
+ // number of characters.
223
+ //
224
+ // If multiple underscores are found in a row or if an underscore is
225
+ // found at the end of the number, then the invalid pointer is set to the index
226
+ // of the first invalid underscore.
227
+ size_t
228
+ pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
229
+ return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER);
230
+ }
231
+
232
+ static inline bool
233
+ pm_char_is_number_kind(const uint8_t b, uint8_t kind) {
234
+ return (pm_number_table[b] & kind) != 0;
235
+ }
236
+
237
+ // Returns true if the given character is a binary digit.
238
+ bool
239
+ pm_char_is_binary_digit(const uint8_t b) {
240
+ return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT);
241
+ }
242
+
243
+ // Returns true if the given character is an octal digit.
244
+ bool
245
+ pm_char_is_octal_digit(const uint8_t b) {
246
+ return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT);
247
+ }
248
+
249
+ // Returns true if the given character is a decimal digit.
250
+ bool
251
+ pm_char_is_decimal_digit(const uint8_t b) {
252
+ return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
253
+ }
254
+
255
+ // Returns true if the given character is a hexadecimal digit.
256
+ bool
257
+ pm_char_is_hexadecimal_digit(const uint8_t b) {
258
+ return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
259
+ }
260
+
261
+ #undef PRISM_CHAR_BIT_WHITESPACE
262
+ #undef PRISM_CHAR_BIT_INLINE_WHITESPACE
263
+ #undef PRISM_CHAR_BIT_REGEXP_OPTION
264
+
265
+ #undef PRISM_NUMBER_BIT_BINARY_DIGIT
266
+ #undef PRISM_NUMBER_BIT_BINARY_NUMBER
267
+ #undef PRISM_NUMBER_BIT_OCTAL_DIGIT
268
+ #undef PRISM_NUMBER_BIT_OCTAL_NUMBER
269
+ #undef PRISM_NUMBER_BIT_DECIMAL_DIGIT
270
+ #undef PRISM_NUMBER_BIT_DECIMAL_NUMBER
271
+ #undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER
272
+ #undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT
@@ -0,0 +1,252 @@
1
+ #include "prism/util/pm_constant_pool.h"
2
+
3
+ // Initialize a list of constant ids.
4
+ void
5
+ pm_constant_id_list_init(pm_constant_id_list_t *list) {
6
+ list->ids = NULL;
7
+ list->size = 0;
8
+ list->capacity = 0;
9
+ }
10
+
11
+ // Append a constant id to a list of constant ids. Returns false if any
12
+ // potential reallocations fail.
13
+ bool
14
+ pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
15
+ if (list->size >= list->capacity) {
16
+ list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
17
+ list->ids = (pm_constant_id_t *) realloc(list->ids, sizeof(pm_constant_id_t) * list->capacity);
18
+ if (list->ids == NULL) return false;
19
+ }
20
+
21
+ list->ids[list->size++] = id;
22
+ return true;
23
+ }
24
+
25
+ // Checks if the current constant id list includes the given constant id.
26
+ bool
27
+ pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
28
+ for (size_t index = 0; index < list->size; index++) {
29
+ if (list->ids[index] == id) return true;
30
+ }
31
+ return false;
32
+ }
33
+
34
+ // Get the memory size of a list of constant ids.
35
+ size_t
36
+ pm_constant_id_list_memsize(pm_constant_id_list_t *list) {
37
+ return sizeof(pm_constant_id_list_t) + (list->capacity * sizeof(pm_constant_id_t));
38
+ }
39
+
40
+ // Free the memory associated with a list of constant ids.
41
+ void
42
+ pm_constant_id_list_free(pm_constant_id_list_t *list) {
43
+ if (list->ids != NULL) {
44
+ free(list->ids);
45
+ }
46
+ }
47
+
48
+ // A relatively simple hash function (djb2) that is used to hash strings. We are
49
+ // optimizing here for simplicity and speed.
50
+ static inline uint32_t
51
+ pm_constant_pool_hash(const uint8_t *start, size_t length) {
52
+ // This is a prime number used as the initial value for the hash function.
53
+ uint32_t value = 5381;
54
+
55
+ for (size_t index = 0; index < length; index++) {
56
+ value = ((value << 5) + value) + start[index];
57
+ }
58
+
59
+ return value;
60
+ }
61
+
62
+ // https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
63
+ static uint32_t
64
+ next_power_of_two(uint32_t v) {
65
+ // Avoid underflow in subtraction on next line.
66
+ if (v == 0) {
67
+ // 1 is the nearest power of 2 to 0 (2^0)
68
+ return 1;
69
+ }
70
+ v--;
71
+ v |= v >> 1;
72
+ v |= v >> 2;
73
+ v |= v >> 4;
74
+ v |= v >> 8;
75
+ v |= v >> 16;
76
+ v++;
77
+ return v;
78
+ }
79
+
80
+ #ifndef NDEBUG
81
+ static bool
82
+ is_power_of_two(uint32_t size) {
83
+ return (size & (size - 1)) == 0;
84
+ }
85
+ #endif
86
+
87
+ // Resize a constant pool to a given capacity.
88
+ static inline bool
89
+ pm_constant_pool_resize(pm_constant_pool_t *pool) {
90
+ assert(is_power_of_two(pool->capacity));
91
+
92
+ uint32_t next_capacity = pool->capacity * 2;
93
+ if (next_capacity < pool->capacity) return false;
94
+
95
+ const uint32_t mask = next_capacity - 1;
96
+ const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
97
+
98
+ void *next = calloc(next_capacity, element_size);
99
+ if (next == NULL) return false;
100
+
101
+ pm_constant_pool_bucket_t *next_buckets = next;
102
+ pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t));
103
+
104
+ // For each bucket in the current constant pool, find the index in the
105
+ // next constant pool, and insert it.
106
+ for (uint32_t index = 0; index < pool->capacity; index++) {
107
+ pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
108
+
109
+ // If an id is set on this constant, then we know we have content here.
110
+ // In this case we need to insert it into the next constant pool.
111
+ if (bucket->id != 0) {
112
+ uint32_t next_index = bucket->hash & mask;
113
+
114
+ // This implements linear scanning to find the next available slot
115
+ // in case this index is already taken. We don't need to bother
116
+ // comparing the values since we know that the hash is unique.
117
+ while (next_buckets[next_index].id != 0) {
118
+ next_index = (next_index + 1) & mask;
119
+ }
120
+
121
+ // Here we copy over the entire bucket, which includes the id so
122
+ // that they are consistent between resizes.
123
+ next_buckets[next_index] = *bucket;
124
+ }
125
+ }
126
+
127
+ // The constants are stable with respect to hash table resizes.
128
+ memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t));
129
+
130
+ // pool->constants and pool->buckets are allocated out of the same chunk
131
+ // of memory, with the buckets coming first.
132
+ free(pool->buckets);
133
+ pool->constants = next_constants;
134
+ pool->buckets = next_buckets;
135
+ pool->capacity = next_capacity;
136
+ return true;
137
+ }
138
+
139
+ // Initialize a new constant pool with a given capacity.
140
+ bool
141
+ pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
142
+ const uint32_t maximum = (~((uint32_t) 0));
143
+ if (capacity >= ((maximum / 2) + 1)) return false;
144
+
145
+ capacity = next_power_of_two(capacity);
146
+ const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
147
+ void *memory = calloc(capacity, element_size);
148
+ if (memory == NULL) return false;
149
+
150
+ pool->buckets = memory;
151
+ pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t));
152
+ pool->size = 0;
153
+ pool->capacity = capacity;
154
+ return true;
155
+ }
156
+
157
+ // Insert a constant into a constant pool and return its index in the pool.
158
+ static inline pm_constant_id_t
159
+ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, bool owned) {
160
+ if (pool->size >= (pool->capacity / 4 * 3)) {
161
+ if (!pm_constant_pool_resize(pool)) return 0;
162
+ }
163
+
164
+ assert(is_power_of_two(pool->capacity));
165
+ const uint32_t mask = pool->capacity - 1;
166
+
167
+ uint32_t hash = pm_constant_pool_hash(start, length);
168
+ uint32_t index = hash & mask;
169
+ pm_constant_pool_bucket_t *bucket;
170
+
171
+ while (bucket = &pool->buckets[index], bucket->id != 0) {
172
+ // If there is a collision, then we need to check if the content is the
173
+ // same as the content we are trying to insert. If it is, then we can
174
+ // return the id of the existing constant.
175
+ pm_constant_t *constant = &pool->constants[bucket->id - 1];
176
+
177
+ if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
178
+ // Since we have found a match, we need to check if this is
179
+ // attempting to insert a shared or an owned constant. We want to
180
+ // prefer shared constants since they don't require allocations.
181
+ if (owned) {
182
+ // If we're attempting to insert an owned constant and we have
183
+ // an existing constant, then either way we don't want the given
184
+ // memory. Either it's duplicated with the existing constant or
185
+ // it's not necessary because we have a shared version.
186
+ free((void *) start);
187
+ } else if (bucket->owned) {
188
+ // If we're attempting to insert a shared constant and the
189
+ // existing constant is owned, then we can free the owned
190
+ // constant and replace it with the shared constant.
191
+ free((void *) constant->start);
192
+ constant->start = start;
193
+ bucket->owned = false;
194
+ }
195
+
196
+ return bucket->id;
197
+ }
198
+
199
+ index = (index + 1) & mask;
200
+ }
201
+
202
+ // IDs are allocated starting at 1, since the value 0 denotes a non-existant
203
+ // constant.
204
+ uint32_t id = ++pool->size;
205
+ assert(pool->size < ((uint32_t) (1 << 31)));
206
+
207
+ *bucket = (pm_constant_pool_bucket_t) {
208
+ .id = (unsigned int) (id & 0x7FFFFFFF),
209
+ .owned = owned,
210
+ .hash = hash
211
+ };
212
+
213
+ pool->constants[id - 1] = (pm_constant_t) {
214
+ .start = start,
215
+ .length = length,
216
+ };
217
+
218
+ return id;
219
+ }
220
+
221
+ // Insert a constant into a constant pool. Returns the id of the constant, or 0
222
+ // if any potential calls to resize fail.
223
+ pm_constant_id_t
224
+ pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
225
+ return pm_constant_pool_insert(pool, start, length, false);
226
+ }
227
+
228
+ // Insert a constant into a constant pool from memory that is now owned by the
229
+ // constant pool. Returns the id of the constant, or 0 if any potential calls to
230
+ // resize fail.
231
+ pm_constant_id_t
232
+ pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
233
+ return pm_constant_pool_insert(pool, start, length, true);
234
+ }
235
+
236
+ // Free the memory associated with a constant pool.
237
+ void
238
+ pm_constant_pool_free(pm_constant_pool_t *pool) {
239
+ // For each constant in the current constant pool, free the contents if the
240
+ // contents are owned.
241
+ for (uint32_t index = 0; index < pool->capacity; index++) {
242
+ pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
243
+
244
+ // If an id is set on this constant, then we know we have content here.
245
+ if (bucket->id != 0 && bucket->owned) {
246
+ pm_constant_t *constant = &pool->constants[bucket->id - 1];
247
+ free((void *) constant->start);
248
+ }
249
+ }
250
+
251
+ free(pool->buckets);
252
+ }
@@ -0,0 +1,41 @@
1
+ #include "prism/util/pm_list.h"
2
+
3
+ // Returns true if the given list is empty.
4
+ PRISM_EXPORTED_FUNCTION bool
5
+ pm_list_empty_p(pm_list_t *list) {
6
+ return list->head == NULL;
7
+ }
8
+
9
+ // Returns the size of the list.
10
+ PRISM_EXPORTED_FUNCTION size_t
11
+ pm_list_size(pm_list_t *list) {
12
+ return list->size;
13
+ }
14
+
15
+ // Append a node to the given list.
16
+ void
17
+ pm_list_append(pm_list_t *list, pm_list_node_t *node) {
18
+ if (list->head == NULL) {
19
+ list->head = node;
20
+ } else {
21
+ list->tail->next = node;
22
+ }
23
+
24
+ list->tail = node;
25
+ list->size++;
26
+ }
27
+
28
+ // Deallocate the internal state of the given list.
29
+ PRISM_EXPORTED_FUNCTION void
30
+ pm_list_free(pm_list_t *list) {
31
+ pm_list_node_t *node = list->head;
32
+ pm_list_node_t *next;
33
+
34
+ while (node != NULL) {
35
+ next = node->next;
36
+ free(node);
37
+ node = next;
38
+ }
39
+
40
+ list->size = 0;
41
+ }