herb 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/{LICENSE → LICENSE.txt} +4 -3
- data/Makefile +121 -0
- data/README.md +102 -107
- data/Rakefile +184 -0
- data/exe/herb +5 -0
- data/ext/herb/error_helpers.c +302 -0
- data/ext/herb/error_helpers.h +15 -0
- data/ext/herb/extconf.rb +75 -0
- data/ext/herb/extension.c +110 -0
- data/ext/herb/extension.h +6 -0
- data/ext/herb/extension_helpers.c +117 -0
- data/ext/herb/extension_helpers.h +24 -0
- data/ext/herb/nodes.c +936 -0
- data/ext/herb/nodes.h +12 -0
- data/herb.gemspec +49 -0
- data/lib/herb/ast/node.rb +61 -0
- data/lib/herb/ast/nodes.rb +1542 -0
- data/lib/herb/ast.rb +6 -0
- data/lib/herb/cli.rb +164 -0
- data/lib/herb/errors.rb +352 -0
- data/lib/herb/lex_result.rb +20 -0
- data/lib/herb/libherb/array.rb +48 -0
- data/lib/herb/libherb/ast_node.rb +47 -0
- data/lib/herb/libherb/buffer.rb +53 -0
- data/lib/herb/libherb/extract_result.rb +17 -0
- data/lib/herb/libherb/lex_result.rb +29 -0
- data/lib/herb/libherb/libherb.rb +49 -0
- data/lib/herb/libherb/parse_result.rb +17 -0
- data/lib/herb/libherb/token.rb +43 -0
- data/lib/herb/libherb.rb +32 -0
- data/lib/herb/location.rb +42 -0
- data/lib/herb/parse_result.rb +26 -0
- data/lib/herb/position.rb +36 -0
- data/lib/herb/project.rb +361 -0
- data/lib/herb/range.rb +40 -0
- data/lib/herb/result.rb +21 -0
- data/lib/herb/token.rb +43 -0
- data/lib/herb/token_list.rb +11 -0
- data/lib/herb/version.rb +5 -0
- data/lib/herb.rb +21 -68
- data/src/analyze.c +989 -0
- data/src/analyze_helpers.c +241 -0
- data/src/analyzed_ruby.c +35 -0
- data/src/array.c +137 -0
- data/src/ast_node.c +81 -0
- data/src/ast_nodes.c +866 -0
- data/src/ast_pretty_print.c +588 -0
- data/src/buffer.c +199 -0
- data/src/errors.c +740 -0
- data/src/extract.c +110 -0
- data/src/herb.c +103 -0
- data/src/html_util.c +143 -0
- data/src/include/analyze.h +36 -0
- data/src/include/analyze_helpers.h +43 -0
- data/src/include/analyzed_ruby.h +33 -0
- data/src/include/array.h +33 -0
- data/src/include/ast_node.h +35 -0
- data/src/include/ast_nodes.h +303 -0
- data/src/include/ast_pretty_print.h +17 -0
- data/src/include/buffer.h +36 -0
- data/src/include/errors.h +125 -0
- data/src/include/extract.h +20 -0
- data/src/include/herb.h +32 -0
- data/src/include/html_util.h +13 -0
- data/src/include/io.h +9 -0
- data/src/include/json.h +28 -0
- data/src/include/lexer.h +13 -0
- data/src/include/lexer_peek_helpers.h +23 -0
- data/src/include/lexer_struct.h +32 -0
- data/src/include/location.h +25 -0
- data/src/include/macros.h +10 -0
- data/src/include/memory.h +12 -0
- data/src/include/parser.h +22 -0
- data/src/include/parser_helpers.h +33 -0
- data/src/include/position.h +22 -0
- data/src/include/pretty_print.h +53 -0
- data/src/include/prism_helpers.h +18 -0
- data/src/include/range.h +23 -0
- data/src/include/ruby_parser.h +6 -0
- data/src/include/token.h +25 -0
- data/src/include/token_matchers.h +21 -0
- data/src/include/token_struct.h +51 -0
- data/src/include/util.h +25 -0
- data/src/include/version.h +6 -0
- data/src/include/visitor.h +11 -0
- data/src/io.c +30 -0
- data/src/json.c +205 -0
- data/src/lexer.c +284 -0
- data/src/lexer_peek_helpers.c +59 -0
- data/src/location.c +41 -0
- data/src/main.c +162 -0
- data/src/memory.c +53 -0
- data/src/parser.c +704 -0
- data/src/parser_helpers.c +161 -0
- data/src/position.c +33 -0
- data/src/pretty_print.c +242 -0
- data/src/prism_helpers.c +50 -0
- data/src/range.c +38 -0
- data/src/ruby_parser.c +47 -0
- data/src/token.c +194 -0
- data/src/token_matchers.c +32 -0
- data/src/util.c +128 -0
- data/src/visitor.c +321 -0
- metadata +126 -82
- data/test/helper.rb +0 -7
- data/test/helpers_test.rb +0 -25
- data/test/parsing_test.rb +0 -110
data/src/lexer.c
ADDED
@@ -0,0 +1,284 @@
|
|
1
|
+
#include "include/buffer.h"
|
2
|
+
#include "include/lexer_peek_helpers.h"
|
3
|
+
#include "include/token.h"
|
4
|
+
#include "include/util.h"
|
5
|
+
|
6
|
+
#include <ctype.h>
|
7
|
+
#include <string.h>
|
8
|
+
|
9
|
+
#define LEXER_STALL_LIMIT 5
|
10
|
+
|
11
|
+
static size_t lexer_sizeof(void) {
|
12
|
+
return sizeof(struct LEXER_STRUCT);
|
13
|
+
}
|
14
|
+
|
15
|
+
static bool lexer_eof(const lexer_T* lexer) {
|
16
|
+
return lexer->current_character == '\0' || lexer->stalled;
|
17
|
+
}
|
18
|
+
|
19
|
+
static bool lexer_has_more_characters(const lexer_T* lexer) {
|
20
|
+
return lexer->current_position < lexer->source_length;
|
21
|
+
}
|
22
|
+
|
23
|
+
static bool lexer_stalled(lexer_T* lexer) {
|
24
|
+
if (lexer->last_position == lexer->current_position) {
|
25
|
+
lexer->stall_counter++;
|
26
|
+
|
27
|
+
if (lexer->stall_counter > LEXER_STALL_LIMIT) { lexer->stalled = true; }
|
28
|
+
} else {
|
29
|
+
lexer->stall_counter = 0;
|
30
|
+
lexer->last_position = lexer->current_position;
|
31
|
+
}
|
32
|
+
|
33
|
+
return lexer->stalled;
|
34
|
+
}
|
35
|
+
|
36
|
+
lexer_T* lexer_init(const char* source) {
|
37
|
+
if (source == NULL) { source = ""; }
|
38
|
+
|
39
|
+
lexer_T* lexer = calloc(1, lexer_sizeof());
|
40
|
+
|
41
|
+
lexer->state = STATE_DATA;
|
42
|
+
|
43
|
+
lexer->source = source;
|
44
|
+
lexer->source_length = strlen(source);
|
45
|
+
lexer->current_character = source[0];
|
46
|
+
|
47
|
+
lexer->current_line = 1;
|
48
|
+
lexer->current_column = 0;
|
49
|
+
lexer->current_position = 0;
|
50
|
+
|
51
|
+
lexer->previous_line = lexer->current_line;
|
52
|
+
lexer->previous_column = lexer->current_column;
|
53
|
+
lexer->previous_position = lexer->current_position;
|
54
|
+
|
55
|
+
lexer->stall_counter = 0;
|
56
|
+
lexer->last_position = 0;
|
57
|
+
lexer->stalled = false;
|
58
|
+
|
59
|
+
return lexer;
|
60
|
+
}
|
61
|
+
|
62
|
+
token_T* lexer_error(lexer_T* lexer, const char* message) {
|
63
|
+
char error_message[128];
|
64
|
+
|
65
|
+
snprintf(
|
66
|
+
error_message,
|
67
|
+
sizeof(error_message),
|
68
|
+
"[Lexer] Error: %s (character '%c', line %zu, col %zu)\n",
|
69
|
+
message,
|
70
|
+
lexer->current_character,
|
71
|
+
lexer->current_line,
|
72
|
+
lexer->current_column
|
73
|
+
);
|
74
|
+
|
75
|
+
return token_init(herb_strdup(error_message), TOKEN_ERROR, lexer);
|
76
|
+
}
|
77
|
+
|
78
|
+
static void lexer_advance(lexer_T* lexer) {
|
79
|
+
if (lexer_has_more_characters(lexer) && !lexer_eof(lexer)) {
|
80
|
+
if (!is_newline(lexer->current_character)) { lexer->current_column++; }
|
81
|
+
|
82
|
+
lexer->current_position++;
|
83
|
+
lexer->current_character = lexer->source[lexer->current_position];
|
84
|
+
}
|
85
|
+
}
|
86
|
+
|
87
|
+
static void lexer_advance_by(lexer_T* lexer, const size_t count) {
|
88
|
+
for (size_t i = 0; i < count; i++) {
|
89
|
+
lexer_advance(lexer);
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
static token_T* lexer_advance_with(lexer_T* lexer, const char* value, const token_type_T type) {
|
94
|
+
lexer_advance_by(lexer, strlen(value));
|
95
|
+
return token_init(value, type, lexer);
|
96
|
+
}
|
97
|
+
|
98
|
+
static token_T* lexer_advance_with_next(lexer_T* lexer, size_t count, token_type_T type) {
|
99
|
+
char* collected = malloc(count + 1);
|
100
|
+
if (!collected) { return NULL; }
|
101
|
+
|
102
|
+
for (size_t i = 0; i < count; i++) {
|
103
|
+
collected[i] = lexer->current_character;
|
104
|
+
lexer_advance(lexer);
|
105
|
+
}
|
106
|
+
|
107
|
+
collected[count] = '\0';
|
108
|
+
|
109
|
+
token_T* token = token_init(collected, type, lexer);
|
110
|
+
free(collected);
|
111
|
+
|
112
|
+
return token;
|
113
|
+
}
|
114
|
+
|
115
|
+
static token_T* lexer_advance_current(lexer_T* lexer, const token_type_T type) {
|
116
|
+
return lexer_advance_with(lexer, (char[]) { lexer->current_character, '\0' }, type);
|
117
|
+
}
|
118
|
+
|
119
|
+
static token_T* lexer_match_and_advance(lexer_T* lexer, const char* value, const token_type_T type) {
|
120
|
+
if (strncmp(lexer->source + lexer->current_position, value, strlen(value)) == 0) {
|
121
|
+
return lexer_advance_with(lexer, value, type);
|
122
|
+
}
|
123
|
+
|
124
|
+
return NULL;
|
125
|
+
}
|
126
|
+
|
127
|
+
// ===== Specialized Parsers
|
128
|
+
|
129
|
+
static token_T* lexer_parse_whitespace(lexer_T* lexer) {
|
130
|
+
buffer_T buffer = buffer_new();
|
131
|
+
|
132
|
+
while (isspace(lexer->current_character) && lexer->current_character != '\n' && lexer->current_character != '\r'
|
133
|
+
&& !lexer_eof(lexer)) {
|
134
|
+
buffer_append_char(&buffer, lexer->current_character);
|
135
|
+
lexer_advance(lexer);
|
136
|
+
}
|
137
|
+
|
138
|
+
token_T* token = token_init(buffer.value, TOKEN_WHITESPACE, lexer);
|
139
|
+
|
140
|
+
buffer_free(&buffer);
|
141
|
+
|
142
|
+
return token;
|
143
|
+
}
|
144
|
+
|
145
|
+
static token_T* lexer_parse_identifier(lexer_T* lexer) {
|
146
|
+
buffer_T buffer = buffer_new();
|
147
|
+
|
148
|
+
while ((isalnum(lexer->current_character) || lexer->current_character == '-' || lexer->current_character == '_'
|
149
|
+
|| lexer->current_character == ':')
|
150
|
+
&& !lexer_peek_for_html_comment_end(lexer, 0) && !lexer_eof(lexer)) {
|
151
|
+
|
152
|
+
buffer_append_char(&buffer, lexer->current_character);
|
153
|
+
lexer_advance(lexer);
|
154
|
+
}
|
155
|
+
|
156
|
+
token_T* token = token_init(buffer.value, TOKEN_IDENTIFIER, lexer);
|
157
|
+
|
158
|
+
buffer_free(&buffer);
|
159
|
+
|
160
|
+
return token;
|
161
|
+
}
|
162
|
+
|
163
|
+
// ===== ERB Parsing
|
164
|
+
|
165
|
+
static token_T* lexer_parse_erb_open(lexer_T* lexer) {
|
166
|
+
const char* erb_patterns[] = { "<%==", "<%=", "<%#", "<%-", "<%%", "<%" };
|
167
|
+
|
168
|
+
lexer->state = STATE_ERB_CONTENT;
|
169
|
+
|
170
|
+
for (size_t i = 0; i < sizeof(erb_patterns) / sizeof(erb_patterns[0]); i++) {
|
171
|
+
token_T* match = lexer_match_and_advance(lexer, erb_patterns[i], TOKEN_ERB_START);
|
172
|
+
if (match) { return match; }
|
173
|
+
}
|
174
|
+
|
175
|
+
return lexer_error(lexer, "Unexpected ERB start");
|
176
|
+
}
|
177
|
+
|
178
|
+
static token_T* lexer_parse_erb_content(lexer_T* lexer) {
|
179
|
+
buffer_T buffer = buffer_new();
|
180
|
+
|
181
|
+
while (!lexer_peek_erb_end(lexer, 0)) {
|
182
|
+
if (lexer_eof(lexer)) {
|
183
|
+
return token_init(buffer.value, TOKEN_ERROR, lexer); // Handle unexpected EOF
|
184
|
+
}
|
185
|
+
|
186
|
+
buffer_append_char(&buffer, lexer->current_character);
|
187
|
+
lexer_advance(lexer);
|
188
|
+
}
|
189
|
+
|
190
|
+
lexer->state = STATE_ERB_CLOSE;
|
191
|
+
|
192
|
+
token_T* token = token_init(buffer.value, TOKEN_ERB_CONTENT, lexer);
|
193
|
+
|
194
|
+
buffer_free(&buffer);
|
195
|
+
|
196
|
+
return token;
|
197
|
+
}
|
198
|
+
|
199
|
+
static token_T* lexer_parse_erb_close(lexer_T* lexer) {
|
200
|
+
lexer->state = STATE_DATA;
|
201
|
+
|
202
|
+
if (lexer_peek_erb_percent_close_tag(lexer, 0)) { return lexer_advance_with(lexer, "%%>", TOKEN_ERB_END); }
|
203
|
+
if (lexer_peek_erb_dash_close_tag(lexer, 0)) { return lexer_advance_with(lexer, "-%>", TOKEN_ERB_END); }
|
204
|
+
|
205
|
+
return lexer_advance_with(lexer, "%>", TOKEN_ERB_END);
|
206
|
+
}
|
207
|
+
|
208
|
+
// ===== Tokenizing Function
|
209
|
+
|
210
|
+
token_T* lexer_next_token(lexer_T* lexer) {
|
211
|
+
if (lexer_eof(lexer)) { return token_init("", TOKEN_EOF, lexer); }
|
212
|
+
if (lexer_stalled(lexer)) { return lexer_error(lexer, "Lexer stalled after 5 iterations"); }
|
213
|
+
|
214
|
+
if (lexer->state == STATE_ERB_CONTENT) { return lexer_parse_erb_content(lexer); }
|
215
|
+
if (lexer->state == STATE_ERB_CLOSE) { return lexer_parse_erb_close(lexer); }
|
216
|
+
|
217
|
+
if (lexer->current_character == '\r' && lexer_peek(lexer, 1) == '\n') {
|
218
|
+
return lexer_advance_with_next(lexer, 2, TOKEN_NEWLINE);
|
219
|
+
}
|
220
|
+
if (lexer->current_character == '\n') { return lexer_advance_current(lexer, TOKEN_NEWLINE); }
|
221
|
+
if (lexer->current_character == '\r') { return lexer_advance_current(lexer, TOKEN_NEWLINE); }
|
222
|
+
|
223
|
+
if (isspace(lexer->current_character)) { return lexer_parse_whitespace(lexer); }
|
224
|
+
|
225
|
+
if (lexer->current_character == '\xC2' && lexer_peek(lexer, 1) == '\xA0') {
|
226
|
+
return lexer_advance_with(lexer, "\xC2\xA0", TOKEN_NBSP);
|
227
|
+
}
|
228
|
+
|
229
|
+
switch (lexer->current_character) {
|
230
|
+
case '<': {
|
231
|
+
if (lexer_peek(lexer, 1) == '%') { return lexer_parse_erb_open(lexer); }
|
232
|
+
|
233
|
+
if (lexer_peek_for_doctype(lexer, 0)) {
|
234
|
+
return lexer_advance_with_next(lexer, strlen("<!DOCTYPE"), TOKEN_HTML_DOCTYPE);
|
235
|
+
}
|
236
|
+
|
237
|
+
if (isalnum(lexer_peek(lexer, 1))) { return lexer_advance_current(lexer, TOKEN_HTML_TAG_START); }
|
238
|
+
|
239
|
+
if (lexer_peek_for_html_comment_start(lexer, 0)) {
|
240
|
+
return lexer_advance_with(lexer, "<!--", TOKEN_HTML_COMMENT_START);
|
241
|
+
}
|
242
|
+
|
243
|
+
if (lexer_peek(lexer, 1) == '/' && isalnum(lexer_peek(lexer, 2))) {
|
244
|
+
return lexer_advance_with(lexer, "</", TOKEN_HTML_TAG_START_CLOSE);
|
245
|
+
}
|
246
|
+
|
247
|
+
return lexer_advance_current(lexer, TOKEN_LT);
|
248
|
+
}
|
249
|
+
|
250
|
+
case '/': {
|
251
|
+
token_T* token = lexer_match_and_advance(lexer, "/>", TOKEN_HTML_TAG_SELF_CLOSE);
|
252
|
+
return token ? token : lexer_advance_current(lexer, TOKEN_SLASH);
|
253
|
+
}
|
254
|
+
|
255
|
+
case '-': {
|
256
|
+
token_T* token = lexer_match_and_advance(lexer, "-->", TOKEN_HTML_COMMENT_END);
|
257
|
+
return token ? token : lexer_advance_current(lexer, TOKEN_DASH);
|
258
|
+
}
|
259
|
+
|
260
|
+
case '>': return lexer_advance_current(lexer, TOKEN_HTML_TAG_END);
|
261
|
+
case '_': return lexer_advance_current(lexer, TOKEN_UNDERSCORE);
|
262
|
+
case ':': return lexer_advance_current(lexer, TOKEN_COLON);
|
263
|
+
case ';': return lexer_advance_current(lexer, TOKEN_SEMICOLON);
|
264
|
+
case '&': return lexer_advance_current(lexer, TOKEN_AMPERSAND);
|
265
|
+
case '!': return lexer_advance_current(lexer, TOKEN_EXCLAMATION);
|
266
|
+
case '=': return lexer_advance_current(lexer, TOKEN_EQUALS);
|
267
|
+
case '%': return lexer_advance_current(lexer, TOKEN_PERCENT);
|
268
|
+
|
269
|
+
case '"':
|
270
|
+
case '\'': return lexer_advance_current(lexer, TOKEN_QUOTE);
|
271
|
+
|
272
|
+
default: {
|
273
|
+
if (isalnum(lexer->current_character)) { return lexer_parse_identifier(lexer); }
|
274
|
+
|
275
|
+
return lexer_advance_current(lexer, TOKEN_CHARACTER);
|
276
|
+
}
|
277
|
+
}
|
278
|
+
}
|
279
|
+
|
280
|
+
void lexer_free(lexer_T* lexer) {
|
281
|
+
if (lexer == NULL) { return; }
|
282
|
+
|
283
|
+
free(lexer);
|
284
|
+
}
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#include "include/lexer_peek_helpers.h"
|
2
|
+
#include "include/lexer_struct.h"
|
3
|
+
#include "include/macros.h"
|
4
|
+
|
5
|
+
#include <ctype.h>
|
6
|
+
#include <stdbool.h>
|
7
|
+
|
8
|
+
char lexer_backtrack(const lexer_T* lexer, const int offset) {
|
9
|
+
return lexer->source[MAX(lexer->current_position - offset, 0)];
|
10
|
+
}
|
11
|
+
|
12
|
+
char lexer_peek(const lexer_T* lexer, const int offset) {
|
13
|
+
return lexer->source[MIN(lexer->current_position + offset, lexer->source_length)];
|
14
|
+
}
|
15
|
+
|
16
|
+
bool lexer_peek_for(const lexer_T* lexer, const int offset, const char* pattern, const bool case_insensitive) {
|
17
|
+
for (int index = 0; pattern[index]; index++) {
|
18
|
+
const char character = lexer_peek(lexer, offset + index);
|
19
|
+
|
20
|
+
if (case_insensitive) {
|
21
|
+
if (tolower(character) != tolower(pattern[index])) { return false; }
|
22
|
+
} else {
|
23
|
+
if (character != pattern[index]) { return false; }
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
return true;
|
28
|
+
}
|
29
|
+
|
30
|
+
bool lexer_peek_for_doctype(const lexer_T* lexer, const int offset) {
|
31
|
+
return lexer_peek_for(lexer, offset, "<!DOCTYPE", true);
|
32
|
+
}
|
33
|
+
|
34
|
+
bool lexer_peek_for_html_comment_start(const lexer_T* lexer, const int offset) {
|
35
|
+
return lexer_peek_for(lexer, offset, "<!--", false);
|
36
|
+
}
|
37
|
+
|
38
|
+
bool lexer_peek_for_html_comment_end(const lexer_T* lexer, const int offset) {
|
39
|
+
return lexer_peek_for(lexer, offset, "-->", false);
|
40
|
+
}
|
41
|
+
|
42
|
+
bool lexer_peek_erb_close_tag(const lexer_T* lexer, const int offset) {
|
43
|
+
return lexer_peek_for(lexer, offset, "%>", false);
|
44
|
+
}
|
45
|
+
|
46
|
+
bool lexer_peek_erb_dash_close_tag(const lexer_T* lexer, const int offset) {
|
47
|
+
return lexer_peek_for(lexer, offset, "-%>", false);
|
48
|
+
}
|
49
|
+
|
50
|
+
bool lexer_peek_erb_percent_close_tag(const lexer_T* lexer, const int offset) {
|
51
|
+
return lexer_peek_for(lexer, offset, "%%>", false);
|
52
|
+
}
|
53
|
+
|
54
|
+
bool lexer_peek_erb_end(const lexer_T* lexer, const int offset) {
|
55
|
+
return (
|
56
|
+
lexer_peek_erb_close_tag(lexer, offset) || lexer_peek_erb_dash_close_tag(lexer, offset)
|
57
|
+
|| lexer_peek_erb_percent_close_tag(lexer, offset)
|
58
|
+
);
|
59
|
+
}
|
data/src/location.c
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
#include "include/location.h"
|
2
|
+
#include "include/memory.h"
|
3
|
+
#include "include/position.h"
|
4
|
+
|
5
|
+
size_t location_sizeof(void) {
|
6
|
+
return sizeof(location_T);
|
7
|
+
}
|
8
|
+
|
9
|
+
location_T* location_init(position_T* start, position_T* end) {
|
10
|
+
location_T* location = safe_malloc(location_sizeof());
|
11
|
+
|
12
|
+
location->start = start;
|
13
|
+
location->end = end;
|
14
|
+
|
15
|
+
return location;
|
16
|
+
}
|
17
|
+
|
18
|
+
location_T* location_from(size_t start_line, size_t start_column, size_t end_line, size_t end_column) {
|
19
|
+
return location_init(position_init(start_line, start_column), position_init(end_line, end_column));
|
20
|
+
}
|
21
|
+
|
22
|
+
position_T* location_start(location_T* location) {
|
23
|
+
return location->start;
|
24
|
+
}
|
25
|
+
|
26
|
+
position_T* location_end(location_T* location) {
|
27
|
+
return location->end;
|
28
|
+
}
|
29
|
+
|
30
|
+
location_T* location_copy(location_T* location) {
|
31
|
+
if (location == NULL) { return NULL; }
|
32
|
+
|
33
|
+
return location_init(position_copy(location->start), position_copy(location->end));
|
34
|
+
}
|
35
|
+
|
36
|
+
void location_free(location_T* location) {
|
37
|
+
if (location->start != NULL) { position_free(location->start); }
|
38
|
+
if (location->end != NULL) { position_free(location->end); }
|
39
|
+
|
40
|
+
free(location);
|
41
|
+
}
|
data/src/main.c
ADDED
@@ -0,0 +1,162 @@
|
|
1
|
+
#define _POSIX_C_SOURCE 199309L // Enables `clock_gettime()`
|
2
|
+
|
3
|
+
#include "include/analyze.h"
|
4
|
+
#include "include/ast_node.h"
|
5
|
+
#include "include/ast_nodes.h"
|
6
|
+
#include "include/ast_pretty_print.h"
|
7
|
+
#include "include/buffer.h"
|
8
|
+
#include "include/extract.h"
|
9
|
+
#include "include/herb.h"
|
10
|
+
#include "include/io.h"
|
11
|
+
#include "include/ruby_parser.h"
|
12
|
+
|
13
|
+
#include <stdio.h>
|
14
|
+
#include <string.h>
|
15
|
+
#include <time.h>
|
16
|
+
|
17
|
+
void print_time_diff(const struct timespec start, const struct timespec end, const char* verb) {
|
18
|
+
const double seconds = (double) end.tv_sec - (double) start.tv_sec;
|
19
|
+
const double nanoseconds = (double) end.tv_nsec - (double) start.tv_nsec;
|
20
|
+
const double total_ns = seconds * 1e9 + nanoseconds;
|
21
|
+
|
22
|
+
const double us = total_ns / 1e3;
|
23
|
+
const double ms = total_ns / 1e6;
|
24
|
+
const double s = total_ns / 1e9;
|
25
|
+
|
26
|
+
printf("Finished");
|
27
|
+
printf(" %s ", verb);
|
28
|
+
printf("in:\n\n");
|
29
|
+
|
30
|
+
printf(" %8.0f µs\n", us);
|
31
|
+
printf(" %8.3f ms\n", ms);
|
32
|
+
printf(" %8.6f s\n\n", s);
|
33
|
+
}
|
34
|
+
|
35
|
+
int main(const int argc, char* argv[]) {
|
36
|
+
if (argc < 2) {
|
37
|
+
printf("./herb [command] [options]\n\n");
|
38
|
+
|
39
|
+
printf("Herb 🌿 Powerful and seamless HTML-aware ERB parsing.\n\n");
|
40
|
+
|
41
|
+
printf("./herb lex [file] - Lex a file\n");
|
42
|
+
printf("./herb lex_json [file] - Lex a file and return the result as json.\n");
|
43
|
+
printf("./herb parse [file] - Parse a file\n");
|
44
|
+
printf("./herb ruby [file] - Extract Ruby from a file\n");
|
45
|
+
printf("./herb html [file] - Extract HTML from a file\n");
|
46
|
+
printf("./herb prism [file] - Extract Ruby from a file and parse the Ruby source with Prism\n");
|
47
|
+
|
48
|
+
return 1;
|
49
|
+
}
|
50
|
+
|
51
|
+
if (argc < 3) {
|
52
|
+
printf("Please specify input file.\n");
|
53
|
+
return 1;
|
54
|
+
}
|
55
|
+
|
56
|
+
buffer_T output;
|
57
|
+
|
58
|
+
if (!buffer_init(&output)) { return 1; }
|
59
|
+
|
60
|
+
char* source = herb_read_file(argv[2]);
|
61
|
+
|
62
|
+
struct timespec start, end;
|
63
|
+
clock_gettime(CLOCK_MONOTONIC, &start);
|
64
|
+
|
65
|
+
if (strcmp(argv[1], "visit") == 0) {
|
66
|
+
AST_DOCUMENT_NODE_T* root = herb_parse(source);
|
67
|
+
clock_gettime(CLOCK_MONOTONIC, &end);
|
68
|
+
|
69
|
+
herb_analyze_parse_tree(root, source);
|
70
|
+
|
71
|
+
ast_pretty_print_node((AST_NODE_T*) root, 0, 0, &output);
|
72
|
+
printf("%s\n", output.value);
|
73
|
+
|
74
|
+
print_time_diff(start, end, "visiting");
|
75
|
+
|
76
|
+
ast_node_free((AST_NODE_T*) root);
|
77
|
+
buffer_free(&output);
|
78
|
+
free(source);
|
79
|
+
|
80
|
+
return 0;
|
81
|
+
}
|
82
|
+
|
83
|
+
if (strcmp(argv[1], "lex") == 0) {
|
84
|
+
herb_lex_to_buffer(source, &output);
|
85
|
+
clock_gettime(CLOCK_MONOTONIC, &end);
|
86
|
+
|
87
|
+
printf("%s\n", output.value);
|
88
|
+
print_time_diff(start, end, "lexing");
|
89
|
+
|
90
|
+
buffer_free(&output);
|
91
|
+
free(source);
|
92
|
+
|
93
|
+
return 0;
|
94
|
+
}
|
95
|
+
|
96
|
+
if (strcmp(argv[1], "lex_json") == 0) {
|
97
|
+
herb_lex_json_to_buffer(source, &output);
|
98
|
+
|
99
|
+
printf("%s\n", output.value);
|
100
|
+
|
101
|
+
buffer_free(&output);
|
102
|
+
free(source);
|
103
|
+
|
104
|
+
return 0;
|
105
|
+
}
|
106
|
+
|
107
|
+
if (strcmp(argv[1], "parse") == 0) {
|
108
|
+
AST_DOCUMENT_NODE_T* root = herb_parse(source);
|
109
|
+
clock_gettime(CLOCK_MONOTONIC, &end);
|
110
|
+
|
111
|
+
ast_pretty_print_node((AST_NODE_T*) root, 0, 0, &output);
|
112
|
+
printf("%s\n", output.value);
|
113
|
+
|
114
|
+
print_time_diff(start, end, "parsing");
|
115
|
+
|
116
|
+
ast_node_free((AST_NODE_T*) root);
|
117
|
+
buffer_free(&output);
|
118
|
+
free(source);
|
119
|
+
|
120
|
+
return 0;
|
121
|
+
}
|
122
|
+
|
123
|
+
if (strcmp(argv[1], "ruby") == 0) {
|
124
|
+
herb_extract_ruby_to_buffer(source, &output);
|
125
|
+
clock_gettime(CLOCK_MONOTONIC, &end);
|
126
|
+
|
127
|
+
printf("%s\n", output.value);
|
128
|
+
print_time_diff(start, end, "extracting Ruby");
|
129
|
+
|
130
|
+
buffer_free(&output);
|
131
|
+
free(source);
|
132
|
+
|
133
|
+
return 0;
|
134
|
+
}
|
135
|
+
|
136
|
+
if (strcmp(argv[1], "html") == 0) {
|
137
|
+
herb_extract_html_to_buffer(source, &output);
|
138
|
+
clock_gettime(CLOCK_MONOTONIC, &end);
|
139
|
+
|
140
|
+
printf("%s\n", output.value);
|
141
|
+
print_time_diff(start, end, "extracting HTML");
|
142
|
+
|
143
|
+
buffer_free(&output);
|
144
|
+
free(source);
|
145
|
+
|
146
|
+
return 0;
|
147
|
+
}
|
148
|
+
|
149
|
+
if (strcmp(argv[1], "prism") == 0) {
|
150
|
+
printf("HTML+ERB File: \n%s\n", source);
|
151
|
+
|
152
|
+
char* ruby_source = herb_extract(source, HERB_EXTRACT_LANGUAGE_RUBY);
|
153
|
+
printf("Extracted Ruby: \n%s\n", ruby_source);
|
154
|
+
|
155
|
+
herb_parse_ruby_to_stdout(ruby_source);
|
156
|
+
|
157
|
+
return 0;
|
158
|
+
}
|
159
|
+
|
160
|
+
printf("Unknown Command: %s\n", argv[1]);
|
161
|
+
return 1;
|
162
|
+
}
|
data/src/memory.c
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
#include "memory.h"
|
2
|
+
|
3
|
+
#include <stdbool.h>
|
4
|
+
#include <stdio.h>
|
5
|
+
#include <stdlib.h>
|
6
|
+
|
7
|
+
static void* safe_malloc_internal(const size_t size, const bool fail_fast) {
|
8
|
+
if (size == 0) { return NULL; }
|
9
|
+
|
10
|
+
void* pointer = malloc(size);
|
11
|
+
|
12
|
+
if (!pointer) {
|
13
|
+
fprintf(stderr, "Error: Failed to allocate %zu bytes.\n", size);
|
14
|
+
fflush(stderr);
|
15
|
+
if (fail_fast) { exit(1); }
|
16
|
+
return NULL;
|
17
|
+
}
|
18
|
+
|
19
|
+
return pointer;
|
20
|
+
}
|
21
|
+
|
22
|
+
static void* safe_realloc_internal(void* pointer, const size_t new_size, const bool fail_fast) {
|
23
|
+
if (new_size == 0) { return NULL; }
|
24
|
+
|
25
|
+
if (!pointer) { return safe_malloc_internal(new_size, fail_fast); }
|
26
|
+
|
27
|
+
void* new_pointer = realloc(pointer, new_size);
|
28
|
+
|
29
|
+
if (!new_pointer) {
|
30
|
+
fprintf(stderr, "Error: Memory reallocation failed (size: %zu bytes).\n", new_size);
|
31
|
+
fflush(stderr);
|
32
|
+
if (fail_fast) { exit(1); }
|
33
|
+
return NULL;
|
34
|
+
}
|
35
|
+
|
36
|
+
return new_pointer;
|
37
|
+
}
|
38
|
+
|
39
|
+
void* safe_malloc(const size_t size) {
|
40
|
+
return safe_malloc_internal(size, true);
|
41
|
+
}
|
42
|
+
|
43
|
+
void* nullable_safe_malloc(const size_t size) {
|
44
|
+
return safe_malloc_internal(size, false);
|
45
|
+
}
|
46
|
+
|
47
|
+
void* safe_realloc(void* pointer, const size_t new_size) {
|
48
|
+
return safe_realloc_internal(pointer, new_size, true);
|
49
|
+
}
|
50
|
+
|
51
|
+
void* nullable_safe_realloc(void* pointer, const size_t new_size) {
|
52
|
+
return safe_realloc_internal(pointer, new_size, false);
|
53
|
+
}
|