prism 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +172 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +62 -0
- data/LICENSE.md +7 -0
- data/Makefile +84 -0
- data/README.md +89 -0
- data/config.yml +2481 -0
- data/docs/build_system.md +74 -0
- data/docs/building.md +22 -0
- data/docs/configuration.md +60 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +117 -0
- data/docs/fuzzing.md +93 -0
- data/docs/heredocs.md +36 -0
- data/docs/mapping.md +117 -0
- data/docs/ripper.md +36 -0
- data/docs/ruby_api.md +25 -0
- data/docs/serialization.md +181 -0
- data/docs/testing.md +55 -0
- data/ext/prism/api_node.c +4725 -0
- data/ext/prism/api_pack.c +256 -0
- data/ext/prism/extconf.rb +136 -0
- data/ext/prism/extension.c +626 -0
- data/ext/prism/extension.h +18 -0
- data/include/prism/ast.h +1932 -0
- data/include/prism/defines.h +45 -0
- data/include/prism/diagnostic.h +231 -0
- data/include/prism/enc/pm_encoding.h +95 -0
- data/include/prism/node.h +41 -0
- data/include/prism/pack.h +141 -0
- data/include/prism/parser.h +418 -0
- data/include/prism/regexp.h +19 -0
- data/include/prism/unescape.h +48 -0
- data/include/prism/util/pm_buffer.h +51 -0
- data/include/prism/util/pm_char.h +91 -0
- data/include/prism/util/pm_constant_pool.h +78 -0
- data/include/prism/util/pm_list.h +67 -0
- data/include/prism/util/pm_memchr.h +14 -0
- data/include/prism/util/pm_newline_list.h +61 -0
- data/include/prism/util/pm_state_stack.h +24 -0
- data/include/prism/util/pm_string.h +61 -0
- data/include/prism/util/pm_string_list.h +25 -0
- data/include/prism/util/pm_strpbrk.h +29 -0
- data/include/prism/version.h +4 -0
- data/include/prism.h +82 -0
- data/lib/prism/compiler.rb +465 -0
- data/lib/prism/debug.rb +157 -0
- data/lib/prism/desugar_compiler.rb +206 -0
- data/lib/prism/dispatcher.rb +2051 -0
- data/lib/prism/dsl.rb +750 -0
- data/lib/prism/ffi.rb +251 -0
- data/lib/prism/lex_compat.rb +838 -0
- data/lib/prism/mutation_compiler.rb +718 -0
- data/lib/prism/node.rb +14540 -0
- data/lib/prism/node_ext.rb +55 -0
- data/lib/prism/node_inspector.rb +68 -0
- data/lib/prism/pack.rb +185 -0
- data/lib/prism/parse_result/comments.rb +172 -0
- data/lib/prism/parse_result/newlines.rb +60 -0
- data/lib/prism/parse_result.rb +266 -0
- data/lib/prism/pattern.rb +239 -0
- data/lib/prism/ripper_compat.rb +174 -0
- data/lib/prism/serialize.rb +662 -0
- data/lib/prism/visitor.rb +470 -0
- data/lib/prism.rb +64 -0
- data/prism.gemspec +113 -0
- data/src/diagnostic.c +287 -0
- data/src/enc/pm_big5.c +52 -0
- data/src/enc/pm_euc_jp.c +58 -0
- data/src/enc/pm_gbk.c +61 -0
- data/src/enc/pm_shift_jis.c +56 -0
- data/src/enc/pm_tables.c +507 -0
- data/src/enc/pm_unicode.c +2324 -0
- data/src/enc/pm_windows_31j.c +56 -0
- data/src/node.c +2633 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +2136 -0
- data/src/prism.c +14587 -0
- data/src/regexp.c +580 -0
- data/src/serialize.c +1899 -0
- data/src/token_type.c +349 -0
- data/src/unescape.c +637 -0
- data/src/util/pm_buffer.c +103 -0
- data/src/util/pm_char.c +272 -0
- data/src/util/pm_constant_pool.c +252 -0
- data/src/util/pm_list.c +41 -0
- data/src/util/pm_memchr.c +33 -0
- data/src/util/pm_newline_list.c +134 -0
- data/src/util/pm_state_stack.c +19 -0
- data/src/util/pm_string.c +200 -0
- data/src/util/pm_string_list.c +29 -0
- data/src/util/pm_strncasecmp.c +17 -0
- data/src/util/pm_strpbrk.c +66 -0
- metadata +138 -0
@@ -0,0 +1,103 @@
|
|
1
|
+
#include "prism/util/pm_buffer.h"
|
2
|
+
|
3
|
+
#define PRISM_BUFFER_INITIAL_SIZE 1024
|
4
|
+
|
5
|
+
// Return the size of the pm_buffer_t struct.
|
6
|
+
size_t
|
7
|
+
pm_buffer_sizeof(void) {
|
8
|
+
return sizeof(pm_buffer_t);
|
9
|
+
}
|
10
|
+
|
11
|
+
// Initialize a pm_buffer_t with its default values.
|
12
|
+
bool
|
13
|
+
pm_buffer_init(pm_buffer_t *buffer) {
|
14
|
+
buffer->length = 0;
|
15
|
+
buffer->capacity = PRISM_BUFFER_INITIAL_SIZE;
|
16
|
+
|
17
|
+
buffer->value = (char *) malloc(PRISM_BUFFER_INITIAL_SIZE);
|
18
|
+
return buffer->value != NULL;
|
19
|
+
}
|
20
|
+
|
21
|
+
#undef PRISM_BUFFER_INITIAL_SIZE
|
22
|
+
|
23
|
+
// Return the value of the buffer.
|
24
|
+
char *
|
25
|
+
pm_buffer_value(pm_buffer_t *buffer) {
|
26
|
+
return buffer->value;
|
27
|
+
}
|
28
|
+
|
29
|
+
// Return the length of the buffer.
|
30
|
+
size_t
|
31
|
+
pm_buffer_length(pm_buffer_t *buffer) {
|
32
|
+
return buffer->length;
|
33
|
+
}
|
34
|
+
|
35
|
+
// Append the given amount of space to the buffer.
|
36
|
+
static inline void
|
37
|
+
pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
|
38
|
+
size_t next_length = buffer->length + length;
|
39
|
+
|
40
|
+
if (next_length > buffer->capacity) {
|
41
|
+
do {
|
42
|
+
buffer->capacity *= 2;
|
43
|
+
} while (next_length > buffer->capacity);
|
44
|
+
|
45
|
+
buffer->value = realloc(buffer->value, buffer->capacity);
|
46
|
+
}
|
47
|
+
|
48
|
+
buffer->length = next_length;
|
49
|
+
}
|
50
|
+
|
51
|
+
// Append a generic pointer to memory to the buffer.
|
52
|
+
static inline void
|
53
|
+
pm_buffer_append(pm_buffer_t *buffer, const void *source, size_t length) {
|
54
|
+
pm_buffer_append_length(buffer, length);
|
55
|
+
memcpy(buffer->value + (buffer->length - length), source, length);
|
56
|
+
}
|
57
|
+
|
58
|
+
// Append the given amount of space as zeroes to the buffer.
|
59
|
+
void
|
60
|
+
pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length) {
|
61
|
+
pm_buffer_append_length(buffer, length);
|
62
|
+
memset(buffer->value + (buffer->length - length), 0, length);
|
63
|
+
}
|
64
|
+
|
65
|
+
// Append a string to the buffer.
|
66
|
+
void
|
67
|
+
pm_buffer_append_str(pm_buffer_t *buffer, const char *value, size_t length) {
|
68
|
+
pm_buffer_append(buffer, value, length);
|
69
|
+
}
|
70
|
+
|
71
|
+
// Append a list of bytes to the buffer.
|
72
|
+
void
|
73
|
+
pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length) {
|
74
|
+
pm_buffer_append(buffer, (const char *) value, length);
|
75
|
+
}
|
76
|
+
|
77
|
+
// Append a single byte to the buffer.
|
78
|
+
void
|
79
|
+
pm_buffer_append_u8(pm_buffer_t *buffer, uint8_t value) {
|
80
|
+
const void *source = &value;
|
81
|
+
pm_buffer_append(buffer, source, sizeof(uint8_t));
|
82
|
+
}
|
83
|
+
|
84
|
+
// Append a 32-bit unsigned integer to the buffer.
|
85
|
+
void
|
86
|
+
pm_buffer_append_u32(pm_buffer_t *buffer, uint32_t value) {
|
87
|
+
if (value < 128) {
|
88
|
+
pm_buffer_append_u8(buffer, (uint8_t) value);
|
89
|
+
} else {
|
90
|
+
uint32_t n = value;
|
91
|
+
while (n >= 128) {
|
92
|
+
pm_buffer_append_u8(buffer, (uint8_t) (n | 128));
|
93
|
+
n >>= 7;
|
94
|
+
}
|
95
|
+
pm_buffer_append_u8(buffer, (uint8_t) n);
|
96
|
+
}
|
97
|
+
}
|
98
|
+
|
99
|
+
// Free the memory associated with the buffer.
|
100
|
+
void
|
101
|
+
pm_buffer_free(pm_buffer_t *buffer) {
|
102
|
+
free(buffer->value);
|
103
|
+
}
|
data/src/util/pm_char.c
ADDED
@@ -0,0 +1,272 @@
|
|
1
|
+
#include "prism/util/pm_char.h"
|
2
|
+
|
3
|
+
#define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
|
4
|
+
#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
|
5
|
+
#define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)
|
6
|
+
|
7
|
+
#define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
|
8
|
+
#define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1)
|
9
|
+
#define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
|
10
|
+
#define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
|
11
|
+
#define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
|
12
|
+
#define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
|
13
|
+
#define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
|
14
|
+
#define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
|
15
|
+
|
16
|
+
static const uint8_t pm_byte_table[256] = {
|
17
|
+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
18
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
|
19
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
20
|
+
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5x
|
24
|
+
0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 4, 4, // 6x
|
25
|
+
0, 0, 0, 4, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, // 7x
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
|
29
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
|
30
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
|
31
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
|
32
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
|
34
|
+
};
|
35
|
+
|
36
|
+
static const uint8_t pm_number_table[256] = {
|
37
|
+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
38
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
|
39
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
|
40
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
|
41
|
+
0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
|
42
|
+
0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
|
43
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
|
44
|
+
0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
|
45
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
|
46
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
|
47
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
|
48
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
|
49
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
|
50
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
|
51
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
|
52
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
|
53
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
|
54
|
+
};
|
55
|
+
|
56
|
+
static inline size_t
|
57
|
+
pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
|
58
|
+
if (length <= 0) return 0;
|
59
|
+
|
60
|
+
size_t size = 0;
|
61
|
+
size_t maximum = (size_t) length;
|
62
|
+
|
63
|
+
while (size < maximum && (pm_byte_table[string[size]] & kind)) size++;
|
64
|
+
return size;
|
65
|
+
}
|
66
|
+
|
67
|
+
// Returns the number of characters at the start of the string that are
|
68
|
+
// whitespace. Disallows searching past the given maximum number of characters.
|
69
|
+
size_t
|
70
|
+
pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
|
71
|
+
return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE);
|
72
|
+
}
|
73
|
+
|
74
|
+
// Returns the number of characters at the start of the string that are
|
75
|
+
// whitespace while also tracking the location of each newline. Disallows
|
76
|
+
// searching past the given maximum number of characters.
|
77
|
+
size_t
|
78
|
+
pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) {
|
79
|
+
if (length <= 0) return 0;
|
80
|
+
|
81
|
+
size_t size = 0;
|
82
|
+
size_t maximum = (size_t) length;
|
83
|
+
|
84
|
+
while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
|
85
|
+
if (string[size] == '\n') {
|
86
|
+
pm_newline_list_append(newline_list, string + size);
|
87
|
+
}
|
88
|
+
|
89
|
+
size++;
|
90
|
+
}
|
91
|
+
|
92
|
+
return size;
|
93
|
+
}
|
94
|
+
|
95
|
+
// Returns the number of characters at the start of the string that are inline
|
96
|
+
// whitespace. Disallows searching past the given maximum number of characters.
|
97
|
+
size_t
|
98
|
+
pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
|
99
|
+
return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE);
|
100
|
+
}
|
101
|
+
|
102
|
+
// Returns the number of characters at the start of the string that are regexp
|
103
|
+
// options. Disallows searching past the given maximum number of characters.
|
104
|
+
size_t
|
105
|
+
pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
|
106
|
+
return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
|
107
|
+
}
|
108
|
+
|
109
|
+
static inline bool
|
110
|
+
pm_char_is_char_kind(const uint8_t b, uint8_t kind) {
|
111
|
+
return (pm_byte_table[b] & kind) != 0;
|
112
|
+
}
|
113
|
+
|
114
|
+
// Returns true if the given character is a whitespace character.
|
115
|
+
bool
|
116
|
+
pm_char_is_whitespace(const uint8_t b) {
|
117
|
+
return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE);
|
118
|
+
}
|
119
|
+
|
120
|
+
// Returns true if the given character is an inline whitespace character.
|
121
|
+
bool
|
122
|
+
pm_char_is_inline_whitespace(const uint8_t b) {
|
123
|
+
return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE);
|
124
|
+
}
|
125
|
+
|
126
|
+
// Scan through the string and return the number of characters at the start of
|
127
|
+
// the string that match the given kind. Disallows searching past the given
|
128
|
+
// maximum number of characters.
|
129
|
+
static inline size_t
|
130
|
+
pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
|
131
|
+
if (length <= 0) return 0;
|
132
|
+
|
133
|
+
size_t size = 0;
|
134
|
+
size_t maximum = (size_t) length;
|
135
|
+
|
136
|
+
while (size < maximum && (pm_number_table[string[size]] & kind)) size++;
|
137
|
+
return size;
|
138
|
+
}
|
139
|
+
|
140
|
+
// Scan through the string and return the number of characters at the start of
|
141
|
+
// the string that match the given kind. Disallows searching past the given
|
142
|
+
// maximum number of characters.
|
143
|
+
//
|
144
|
+
// Additionally, report the location of the last invalid underscore character
|
145
|
+
// found in the string through the out invalid parameter.
|
146
|
+
static inline size_t
|
147
|
+
pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
|
148
|
+
if (length <= 0) return 0;
|
149
|
+
|
150
|
+
size_t size = 0;
|
151
|
+
size_t maximum = (size_t) length;
|
152
|
+
|
153
|
+
bool underscore = false;
|
154
|
+
while (size < maximum && (pm_number_table[string[size]] & kind)) {
|
155
|
+
if (string[size] == '_') {
|
156
|
+
if (underscore) *invalid = string + size;
|
157
|
+
underscore = true;
|
158
|
+
} else {
|
159
|
+
underscore = false;
|
160
|
+
}
|
161
|
+
|
162
|
+
size++;
|
163
|
+
}
|
164
|
+
|
165
|
+
if (string[size - 1] == '_') *invalid = string + size - 1;
|
166
|
+
return size;
|
167
|
+
}
|
168
|
+
|
169
|
+
// Returns the number of characters at the start of the string that are binary
|
170
|
+
// digits or underscores. Disallows searching past the given maximum number of
|
171
|
+
// characters.
|
172
|
+
//
|
173
|
+
// If multiple underscores are found in a row or if an underscore is
|
174
|
+
// found at the end of the number, then the invalid pointer is set to the index
|
175
|
+
// of the first invalid underscore.
|
176
|
+
size_t
|
177
|
+
pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
|
178
|
+
return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER);
|
179
|
+
}
|
180
|
+
|
181
|
+
// Returns the number of characters at the start of the string that are octal
|
182
|
+
// digits or underscores. Disallows searching past the given maximum number of
|
183
|
+
// characters.
|
184
|
+
//
|
185
|
+
// If multiple underscores are found in a row or if an underscore is
|
186
|
+
// found at the end of the number, then the invalid pointer is set to the index
|
187
|
+
// of the first invalid underscore.
|
188
|
+
size_t
|
189
|
+
pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
|
190
|
+
return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER);
|
191
|
+
}
|
192
|
+
|
193
|
+
// Returns the number of characters at the start of the string that are decimal
|
194
|
+
// digits. Disallows searching past the given maximum number of characters.
|
195
|
+
size_t
|
196
|
+
pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
|
197
|
+
return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
|
198
|
+
}
|
199
|
+
|
200
|
+
// Returns the number of characters at the start of the string that are decimal
|
201
|
+
// digits or underscores. Disallows searching past the given maximum number of
|
202
|
+
// characters.
|
203
|
+
//
|
204
|
+
// If multiple underscores are found in a row or if an underscore is
|
205
|
+
// found at the end of the number, then the invalid pointer is set to the index
|
206
|
+
// of the first invalid underscore.
|
207
|
+
size_t
|
208
|
+
pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
|
209
|
+
return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER);
|
210
|
+
}
|
211
|
+
|
212
|
+
// Returns the number of characters at the start of the string that are
|
213
|
+
// hexadecimal digits. Disallows searching past the given maximum number of
|
214
|
+
// characters.
|
215
|
+
size_t
|
216
|
+
pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
|
217
|
+
return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
|
218
|
+
}
|
219
|
+
|
220
|
+
// Returns the number of characters at the start of the string that are
|
221
|
+
// hexadecimal digits or underscores. Disallows searching past the given maximum
|
222
|
+
// number of characters.
|
223
|
+
//
|
224
|
+
// If multiple underscores are found in a row or if an underscore is
|
225
|
+
// found at the end of the number, then the invalid pointer is set to the index
|
226
|
+
// of the first invalid underscore.
|
227
|
+
size_t
|
228
|
+
pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
|
229
|
+
return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER);
|
230
|
+
}
|
231
|
+
|
232
|
+
static inline bool
|
233
|
+
pm_char_is_number_kind(const uint8_t b, uint8_t kind) {
|
234
|
+
return (pm_number_table[b] & kind) != 0;
|
235
|
+
}
|
236
|
+
|
237
|
+
// Returns true if the given character is a binary digit.
|
238
|
+
bool
|
239
|
+
pm_char_is_binary_digit(const uint8_t b) {
|
240
|
+
return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT);
|
241
|
+
}
|
242
|
+
|
243
|
+
// Returns true if the given character is an octal digit.
|
244
|
+
bool
|
245
|
+
pm_char_is_octal_digit(const uint8_t b) {
|
246
|
+
return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT);
|
247
|
+
}
|
248
|
+
|
249
|
+
// Returns true if the given character is a decimal digit.
|
250
|
+
bool
|
251
|
+
pm_char_is_decimal_digit(const uint8_t b) {
|
252
|
+
return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
|
253
|
+
}
|
254
|
+
|
255
|
+
// Returns true if the given character is a hexadecimal digit.
|
256
|
+
bool
|
257
|
+
pm_char_is_hexadecimal_digit(const uint8_t b) {
|
258
|
+
return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
|
259
|
+
}
|
260
|
+
|
261
|
+
#undef PRISM_CHAR_BIT_WHITESPACE
|
262
|
+
#undef PRISM_CHAR_BIT_INLINE_WHITESPACE
|
263
|
+
#undef PRISM_CHAR_BIT_REGEXP_OPTION
|
264
|
+
|
265
|
+
#undef PRISM_NUMBER_BIT_BINARY_DIGIT
|
266
|
+
#undef PRISM_NUMBER_BIT_BINARY_NUMBER
|
267
|
+
#undef PRISM_NUMBER_BIT_OCTAL_DIGIT
|
268
|
+
#undef PRISM_NUMBER_BIT_OCTAL_NUMBER
|
269
|
+
#undef PRISM_NUMBER_BIT_DECIMAL_DIGIT
|
270
|
+
#undef PRISM_NUMBER_BIT_DECIMAL_NUMBER
|
271
|
+
#undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER
|
272
|
+
#undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT
|
@@ -0,0 +1,252 @@
|
|
1
|
+
#include "prism/util/pm_constant_pool.h"
|
2
|
+
|
3
|
+
// Initialize a list of constant ids.
|
4
|
+
void
|
5
|
+
pm_constant_id_list_init(pm_constant_id_list_t *list) {
|
6
|
+
list->ids = NULL;
|
7
|
+
list->size = 0;
|
8
|
+
list->capacity = 0;
|
9
|
+
}
|
10
|
+
|
11
|
+
// Append a constant id to a list of constant ids. Returns false if any
|
12
|
+
// potential reallocations fail.
|
13
|
+
bool
|
14
|
+
pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
|
15
|
+
if (list->size >= list->capacity) {
|
16
|
+
list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
|
17
|
+
list->ids = (pm_constant_id_t *) realloc(list->ids, sizeof(pm_constant_id_t) * list->capacity);
|
18
|
+
if (list->ids == NULL) return false;
|
19
|
+
}
|
20
|
+
|
21
|
+
list->ids[list->size++] = id;
|
22
|
+
return true;
|
23
|
+
}
|
24
|
+
|
25
|
+
// Checks if the current constant id list includes the given constant id.
|
26
|
+
bool
|
27
|
+
pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
|
28
|
+
for (size_t index = 0; index < list->size; index++) {
|
29
|
+
if (list->ids[index] == id) return true;
|
30
|
+
}
|
31
|
+
return false;
|
32
|
+
}
|
33
|
+
|
34
|
+
// Get the memory size of a list of constant ids.
|
35
|
+
size_t
|
36
|
+
pm_constant_id_list_memsize(pm_constant_id_list_t *list) {
|
37
|
+
return sizeof(pm_constant_id_list_t) + (list->capacity * sizeof(pm_constant_id_t));
|
38
|
+
}
|
39
|
+
|
40
|
+
// Free the memory associated with a list of constant ids.
|
41
|
+
void
|
42
|
+
pm_constant_id_list_free(pm_constant_id_list_t *list) {
|
43
|
+
if (list->ids != NULL) {
|
44
|
+
free(list->ids);
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
// A relatively simple hash function (djb2) that is used to hash strings. We are
|
49
|
+
// optimizing here for simplicity and speed.
|
50
|
+
static inline uint32_t
|
51
|
+
pm_constant_pool_hash(const uint8_t *start, size_t length) {
|
52
|
+
// This is a prime number used as the initial value for the hash function.
|
53
|
+
uint32_t value = 5381;
|
54
|
+
|
55
|
+
for (size_t index = 0; index < length; index++) {
|
56
|
+
value = ((value << 5) + value) + start[index];
|
57
|
+
}
|
58
|
+
|
59
|
+
return value;
|
60
|
+
}
|
61
|
+
|
62
|
+
// https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
|
63
|
+
static uint32_t
|
64
|
+
next_power_of_two(uint32_t v) {
|
65
|
+
// Avoid underflow in subtraction on next line.
|
66
|
+
if (v == 0) {
|
67
|
+
// 1 is the nearest power of 2 to 0 (2^0)
|
68
|
+
return 1;
|
69
|
+
}
|
70
|
+
v--;
|
71
|
+
v |= v >> 1;
|
72
|
+
v |= v >> 2;
|
73
|
+
v |= v >> 4;
|
74
|
+
v |= v >> 8;
|
75
|
+
v |= v >> 16;
|
76
|
+
v++;
|
77
|
+
return v;
|
78
|
+
}
|
79
|
+
|
80
|
+
#ifndef NDEBUG
|
81
|
+
static bool
|
82
|
+
is_power_of_two(uint32_t size) {
|
83
|
+
return (size & (size - 1)) == 0;
|
84
|
+
}
|
85
|
+
#endif
|
86
|
+
|
87
|
+
// Resize a constant pool to a given capacity.
|
88
|
+
static inline bool
|
89
|
+
pm_constant_pool_resize(pm_constant_pool_t *pool) {
|
90
|
+
assert(is_power_of_two(pool->capacity));
|
91
|
+
|
92
|
+
uint32_t next_capacity = pool->capacity * 2;
|
93
|
+
if (next_capacity < pool->capacity) return false;
|
94
|
+
|
95
|
+
const uint32_t mask = next_capacity - 1;
|
96
|
+
const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
|
97
|
+
|
98
|
+
void *next = calloc(next_capacity, element_size);
|
99
|
+
if (next == NULL) return false;
|
100
|
+
|
101
|
+
pm_constant_pool_bucket_t *next_buckets = next;
|
102
|
+
pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t));
|
103
|
+
|
104
|
+
// For each bucket in the current constant pool, find the index in the
|
105
|
+
// next constant pool, and insert it.
|
106
|
+
for (uint32_t index = 0; index < pool->capacity; index++) {
|
107
|
+
pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
|
108
|
+
|
109
|
+
// If an id is set on this constant, then we know we have content here.
|
110
|
+
// In this case we need to insert it into the next constant pool.
|
111
|
+
if (bucket->id != 0) {
|
112
|
+
uint32_t next_index = bucket->hash & mask;
|
113
|
+
|
114
|
+
// This implements linear scanning to find the next available slot
|
115
|
+
// in case this index is already taken. We don't need to bother
|
116
|
+
// comparing the values since we know that the hash is unique.
|
117
|
+
while (next_buckets[next_index].id != 0) {
|
118
|
+
next_index = (next_index + 1) & mask;
|
119
|
+
}
|
120
|
+
|
121
|
+
// Here we copy over the entire bucket, which includes the id so
|
122
|
+
// that they are consistent between resizes.
|
123
|
+
next_buckets[next_index] = *bucket;
|
124
|
+
}
|
125
|
+
}
|
126
|
+
|
127
|
+
// The constants are stable with respect to hash table resizes.
|
128
|
+
memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t));
|
129
|
+
|
130
|
+
// pool->constants and pool->buckets are allocated out of the same chunk
|
131
|
+
// of memory, with the buckets coming first.
|
132
|
+
free(pool->buckets);
|
133
|
+
pool->constants = next_constants;
|
134
|
+
pool->buckets = next_buckets;
|
135
|
+
pool->capacity = next_capacity;
|
136
|
+
return true;
|
137
|
+
}
|
138
|
+
|
139
|
+
// Initialize a new constant pool with a given capacity.
|
140
|
+
bool
|
141
|
+
pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
|
142
|
+
const uint32_t maximum = (~((uint32_t) 0));
|
143
|
+
if (capacity >= ((maximum / 2) + 1)) return false;
|
144
|
+
|
145
|
+
capacity = next_power_of_two(capacity);
|
146
|
+
const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
|
147
|
+
void *memory = calloc(capacity, element_size);
|
148
|
+
if (memory == NULL) return false;
|
149
|
+
|
150
|
+
pool->buckets = memory;
|
151
|
+
pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t));
|
152
|
+
pool->size = 0;
|
153
|
+
pool->capacity = capacity;
|
154
|
+
return true;
|
155
|
+
}
|
156
|
+
|
157
|
+
// Insert a constant into a constant pool and return its index in the pool.
|
158
|
+
static inline pm_constant_id_t
|
159
|
+
pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, bool owned) {
|
160
|
+
if (pool->size >= (pool->capacity / 4 * 3)) {
|
161
|
+
if (!pm_constant_pool_resize(pool)) return 0;
|
162
|
+
}
|
163
|
+
|
164
|
+
assert(is_power_of_two(pool->capacity));
|
165
|
+
const uint32_t mask = pool->capacity - 1;
|
166
|
+
|
167
|
+
uint32_t hash = pm_constant_pool_hash(start, length);
|
168
|
+
uint32_t index = hash & mask;
|
169
|
+
pm_constant_pool_bucket_t *bucket;
|
170
|
+
|
171
|
+
while (bucket = &pool->buckets[index], bucket->id != 0) {
|
172
|
+
// If there is a collision, then we need to check if the content is the
|
173
|
+
// same as the content we are trying to insert. If it is, then we can
|
174
|
+
// return the id of the existing constant.
|
175
|
+
pm_constant_t *constant = &pool->constants[bucket->id - 1];
|
176
|
+
|
177
|
+
if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
|
178
|
+
// Since we have found a match, we need to check if this is
|
179
|
+
// attempting to insert a shared or an owned constant. We want to
|
180
|
+
// prefer shared constants since they don't require allocations.
|
181
|
+
if (owned) {
|
182
|
+
// If we're attempting to insert an owned constant and we have
|
183
|
+
// an existing constant, then either way we don't want the given
|
184
|
+
// memory. Either it's duplicated with the existing constant or
|
185
|
+
// it's not necessary because we have a shared version.
|
186
|
+
free((void *) start);
|
187
|
+
} else if (bucket->owned) {
|
188
|
+
// If we're attempting to insert a shared constant and the
|
189
|
+
// existing constant is owned, then we can free the owned
|
190
|
+
// constant and replace it with the shared constant.
|
191
|
+
free((void *) constant->start);
|
192
|
+
constant->start = start;
|
193
|
+
bucket->owned = false;
|
194
|
+
}
|
195
|
+
|
196
|
+
return bucket->id;
|
197
|
+
}
|
198
|
+
|
199
|
+
index = (index + 1) & mask;
|
200
|
+
}
|
201
|
+
|
202
|
+
// IDs are allocated starting at 1, since the value 0 denotes a non-existant
|
203
|
+
// constant.
|
204
|
+
uint32_t id = ++pool->size;
|
205
|
+
assert(pool->size < ((uint32_t) (1 << 31)));
|
206
|
+
|
207
|
+
*bucket = (pm_constant_pool_bucket_t) {
|
208
|
+
.id = (unsigned int) (id & 0x7FFFFFFF),
|
209
|
+
.owned = owned,
|
210
|
+
.hash = hash
|
211
|
+
};
|
212
|
+
|
213
|
+
pool->constants[id - 1] = (pm_constant_t) {
|
214
|
+
.start = start,
|
215
|
+
.length = length,
|
216
|
+
};
|
217
|
+
|
218
|
+
return id;
|
219
|
+
}
|
220
|
+
|
221
|
+
// Insert a constant into a constant pool. Returns the id of the constant, or 0
|
222
|
+
// if any potential calls to resize fail.
|
223
|
+
pm_constant_id_t
|
224
|
+
pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
225
|
+
return pm_constant_pool_insert(pool, start, length, false);
|
226
|
+
}
|
227
|
+
|
228
|
+
// Insert a constant into a constant pool from memory that is now owned by the
|
229
|
+
// constant pool. Returns the id of the constant, or 0 if any potential calls to
|
230
|
+
// resize fail.
|
231
|
+
pm_constant_id_t
|
232
|
+
pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
233
|
+
return pm_constant_pool_insert(pool, start, length, true);
|
234
|
+
}
|
235
|
+
|
236
|
+
// Free the memory associated with a constant pool.
|
237
|
+
void
|
238
|
+
pm_constant_pool_free(pm_constant_pool_t *pool) {
|
239
|
+
// For each constant in the current constant pool, free the contents if the
|
240
|
+
// contents are owned.
|
241
|
+
for (uint32_t index = 0; index < pool->capacity; index++) {
|
242
|
+
pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
|
243
|
+
|
244
|
+
// If an id is set on this constant, then we know we have content here.
|
245
|
+
if (bucket->id != 0 && bucket->owned) {
|
246
|
+
pm_constant_t *constant = &pool->constants[bucket->id - 1];
|
247
|
+
free((void *) constant->start);
|
248
|
+
}
|
249
|
+
}
|
250
|
+
|
251
|
+
free(pool->buckets);
|
252
|
+
}
|
data/src/util/pm_list.c
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
#include "prism/util/pm_list.h"
|
2
|
+
|
3
|
+
// Returns true if the given list is empty.
|
4
|
+
PRISM_EXPORTED_FUNCTION bool
|
5
|
+
pm_list_empty_p(pm_list_t *list) {
|
6
|
+
return list->head == NULL;
|
7
|
+
}
|
8
|
+
|
9
|
+
// Returns the size of the list.
|
10
|
+
PRISM_EXPORTED_FUNCTION size_t
|
11
|
+
pm_list_size(pm_list_t *list) {
|
12
|
+
return list->size;
|
13
|
+
}
|
14
|
+
|
15
|
+
// Append a node to the given list.
|
16
|
+
void
|
17
|
+
pm_list_append(pm_list_t *list, pm_list_node_t *node) {
|
18
|
+
if (list->head == NULL) {
|
19
|
+
list->head = node;
|
20
|
+
} else {
|
21
|
+
list->tail->next = node;
|
22
|
+
}
|
23
|
+
|
24
|
+
list->tail = node;
|
25
|
+
list->size++;
|
26
|
+
}
|
27
|
+
|
28
|
+
// Deallocate the internal state of the given list.
|
29
|
+
PRISM_EXPORTED_FUNCTION void
|
30
|
+
pm_list_free(pm_list_t *list) {
|
31
|
+
pm_list_node_t *node = list->head;
|
32
|
+
pm_list_node_t *next;
|
33
|
+
|
34
|
+
while (node != NULL) {
|
35
|
+
next = node->next;
|
36
|
+
free(node);
|
37
|
+
node = next;
|
38
|
+
}
|
39
|
+
|
40
|
+
list->size = 0;
|
41
|
+
}
|