herb 0.5.0-x86-linux-musl → 0.6.0-x86-linux-musl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +6 -3
- data/ext/herb/error_helpers.c +1 -1
- data/ext/herb/error_helpers.h +1 -1
- data/ext/herb/extension.c +20 -4
- data/ext/herb/nodes.c +70 -41
- data/ext/herb/nodes.h +1 -1
- data/lib/herb/3.0/herb.so +0 -0
- data/lib/herb/3.1/herb.so +0 -0
- data/lib/herb/3.2/herb.so +0 -0
- data/lib/herb/3.3/herb.so +0 -0
- data/lib/herb/3.4/herb.so +0 -0
- data/lib/herb/ast/nodes.rb +149 -91
- data/lib/herb/cli.rb +19 -6
- data/lib/herb/errors.rb +1 -1
- data/lib/herb/version.rb +1 -1
- data/lib/herb/visitor.rb +11 -6
- data/sig/herb/ast/nodes.rbs +65 -38
- data/sig/herb/visitor.rbs +6 -3
- data/sig/serialized_ast_errors.rbs +1 -1
- data/sig/serialized_ast_nodes.rbs +15 -10
- data/src/ast_nodes.c +79 -36
- data/src/ast_pretty_print.c +21 -13
- data/src/errors.c +1 -1
- data/src/herb.c +2 -2
- data/src/include/ast_nodes.h +23 -15
- data/src/include/ast_pretty_print.h +1 -1
- data/src/include/errors.h +1 -1
- data/src/include/herb.h +2 -1
- data/src/include/lexer_peek_helpers.h +21 -0
- data/src/include/parser.h +6 -1
- data/src/include/token_struct.h +6 -1
- data/src/include/version.h +1 -1
- data/src/lexer.c +20 -1
- data/src/lexer_peek_helpers.c +77 -0
- data/src/main.c +2 -2
- data/src/parser.c +435 -122
- data/src/token.c +5 -0
- data/src/visitor.c +39 -6
- metadata +2 -2
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-0.
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-0.6.0/templates/src/include/ast_pretty_print.h.erb
|
3
3
|
|
4
4
|
#ifndef HERB_AST_PRETTY_PRINT_H
|
5
5
|
#define HERB_AST_PRETTY_PRINT_H
|
data/src/include/errors.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
// NOTE: This file is generated by the templates/template.rb script and should not
|
2
|
-
// be modified manually. See /Users/marcoroth/Development/herb-release-0.
|
2
|
+
// be modified manually. See /Users/marcoroth/Development/herb-release-0.6.0/templates/src/include/errors.h.erb
|
3
3
|
|
4
4
|
#ifndef HERB_ERRORS_H
|
5
5
|
#define HERB_ERRORS_H
|
data/src/include/herb.h
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
#include "ast_node.h"
|
6
6
|
#include "buffer.h"
|
7
7
|
#include "extract.h"
|
8
|
+
#include "parser.h"
|
8
9
|
|
9
10
|
#include <stdint.h>
|
10
11
|
|
@@ -18,7 +19,7 @@ void herb_lex_json_to_buffer(const char* source, buffer_T* output);
|
|
18
19
|
array_T* herb_lex(const char* source);
|
19
20
|
array_T* herb_lex_file(const char* path);
|
20
21
|
|
21
|
-
AST_DOCUMENT_NODE_T* herb_parse(const char* source);
|
22
|
+
AST_DOCUMENT_NODE_T* herb_parse(const char* source, parser_options_T* options);
|
22
23
|
|
23
24
|
const char* herb_version(void);
|
24
25
|
const char* herb_prism_version(void);
|
@@ -2,13 +2,28 @@
|
|
2
2
|
#define HERB_LEXER_PEEK_HELPERS_H
|
3
3
|
|
4
4
|
#include "lexer_struct.h"
|
5
|
+
#include "token_struct.h"
|
5
6
|
|
6
7
|
#include <stdbool.h>
|
7
8
|
#include <stdio.h>
|
8
9
|
#include <stdlib.h>
|
9
10
|
|
11
|
+
typedef struct {
|
12
|
+
size_t position;
|
13
|
+
size_t line;
|
14
|
+
size_t column;
|
15
|
+
size_t previous_position;
|
16
|
+
size_t previous_line;
|
17
|
+
size_t previous_column;
|
18
|
+
char current_character;
|
19
|
+
lexer_state_T state;
|
20
|
+
} lexer_state_snapshot_T;
|
21
|
+
|
10
22
|
char lexer_peek(const lexer_T* lexer, int offset);
|
11
23
|
bool lexer_peek_for_doctype(const lexer_T* lexer, int offset);
|
24
|
+
bool lexer_peek_for_xml_declaration(const lexer_T* lexer, int offset);
|
25
|
+
bool lexer_peek_for_cdata_start(const lexer_T* lexer, int offset);
|
26
|
+
bool lexer_peek_for_cdata_end(const lexer_T* lexer, int offset);
|
12
27
|
|
13
28
|
bool lexer_peek_for_html_comment_start(const lexer_T* lexer, int offset);
|
14
29
|
bool lexer_peek_for_html_comment_end(const lexer_T* lexer, int offset);
|
@@ -20,4 +35,10 @@ bool lexer_peek_erb_end(const lexer_T* lexer, int offset);
|
|
20
35
|
|
21
36
|
char lexer_backtrack(const lexer_T* lexer, int offset);
|
22
37
|
|
38
|
+
bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T token_type);
|
39
|
+
bool lexer_peek_for_close_tag_start(const lexer_T* lexer, int offset);
|
40
|
+
|
41
|
+
lexer_state_snapshot_T lexer_save_state(lexer_T* lexer);
|
42
|
+
void lexer_restore_state(lexer_T* lexer, lexer_state_snapshot_T snapshot);
|
43
|
+
|
23
44
|
#endif
|
data/src/include/parser.h
CHANGED
@@ -15,15 +15,20 @@ typedef enum {
|
|
15
15
|
|
16
16
|
typedef enum { PARSER_STATE_DATA, PARSER_STATE_FOREIGN_CONTENT } parser_state_T;
|
17
17
|
|
18
|
+
typedef struct PARSER_OPTIONS_STRUCT {
|
19
|
+
bool track_whitespace;
|
20
|
+
} parser_options_T;
|
21
|
+
|
18
22
|
typedef struct PARSER_STRUCT {
|
19
23
|
lexer_T* lexer;
|
20
24
|
token_T* current_token;
|
21
25
|
array_T* open_tags_stack;
|
22
26
|
parser_state_T state;
|
23
27
|
foreign_content_type_T foreign_content_type;
|
28
|
+
parser_options_T* options;
|
24
29
|
} parser_T;
|
25
30
|
|
26
|
-
parser_T* parser_init(lexer_T* lexer);
|
31
|
+
parser_T* parser_init(lexer_T* lexer, parser_options_T* options);
|
27
32
|
|
28
33
|
AST_DOCUMENT_NODE_T* parser_parse(parser_T* parser);
|
29
34
|
|
data/src/include/token_struct.h
CHANGED
@@ -10,7 +10,11 @@ typedef enum {
|
|
10
10
|
TOKEN_NEWLINE, // \n
|
11
11
|
TOKEN_IDENTIFIER,
|
12
12
|
|
13
|
-
TOKEN_HTML_DOCTYPE,
|
13
|
+
TOKEN_HTML_DOCTYPE, // <!DOCTYPE, <!doctype, <!DoCtYpE, <!dOcTyPe
|
14
|
+
TOKEN_XML_DECLARATION, // <?xml
|
15
|
+
TOKEN_XML_DECLARATION_END, // ?>
|
16
|
+
TOKEN_CDATA_START, // <![CDATA[
|
17
|
+
TOKEN_CDATA_END, // ]]>
|
14
18
|
|
15
19
|
TOKEN_HTML_TAG_START, // <
|
16
20
|
TOKEN_HTML_TAG_START_CLOSE, // </
|
@@ -29,6 +33,7 @@ typedef enum {
|
|
29
33
|
TOKEN_EQUALS, // =
|
30
34
|
TOKEN_QUOTE, // ", '
|
31
35
|
TOKEN_BACKTICK, // `
|
36
|
+
TOKEN_BACKSLASH, // backslash
|
32
37
|
TOKEN_DASH, // -
|
33
38
|
TOKEN_UNDERSCORE, // _
|
34
39
|
TOKEN_EXCLAMATION, // !
|
data/src/include/version.h
CHANGED
data/src/lexer.c
CHANGED
@@ -290,13 +290,21 @@ token_T* lexer_next_token(lexer_T* lexer) {
|
|
290
290
|
return lexer_advance_with_next(lexer, strlen("<!DOCTYPE"), TOKEN_HTML_DOCTYPE);
|
291
291
|
}
|
292
292
|
|
293
|
+
if (lexer_peek_for_xml_declaration(lexer, 0)) {
|
294
|
+
return lexer_advance_with_next(lexer, strlen("<?xml"), TOKEN_XML_DECLARATION);
|
295
|
+
}
|
296
|
+
|
297
|
+
if (lexer_peek_for_cdata_start(lexer, 0)) {
|
298
|
+
return lexer_advance_with_next(lexer, strlen("<![CDATA["), TOKEN_CDATA_START);
|
299
|
+
}
|
300
|
+
|
293
301
|
if (isalnum(lexer_peek(lexer, 1))) { return lexer_advance_current(lexer, TOKEN_HTML_TAG_START); }
|
294
302
|
|
295
303
|
if (lexer_peek_for_html_comment_start(lexer, 0)) {
|
296
304
|
return lexer_advance_with(lexer, "<!--", TOKEN_HTML_COMMENT_START);
|
297
305
|
}
|
298
306
|
|
299
|
-
if (
|
307
|
+
if (lexer_peek_for_close_tag_start(lexer, 0)) {
|
300
308
|
return lexer_advance_with(lexer, "</", TOKEN_HTML_TAG_START_CLOSE);
|
301
309
|
}
|
302
310
|
|
@@ -308,11 +316,21 @@ token_T* lexer_next_token(lexer_T* lexer) {
|
|
308
316
|
return token ? token : lexer_advance_current(lexer, TOKEN_SLASH);
|
309
317
|
}
|
310
318
|
|
319
|
+
case '?': {
|
320
|
+
token_T* token = lexer_match_and_advance(lexer, "?>", TOKEN_XML_DECLARATION_END);
|
321
|
+
return token ? token : lexer_advance_current(lexer, TOKEN_CHARACTER);
|
322
|
+
}
|
323
|
+
|
311
324
|
case '-': {
|
312
325
|
token_T* token = lexer_match_and_advance(lexer, "-->", TOKEN_HTML_COMMENT_END);
|
313
326
|
return token ? token : lexer_advance_current(lexer, TOKEN_DASH);
|
314
327
|
}
|
315
328
|
|
329
|
+
case ']': {
|
330
|
+
token_T* token = lexer_match_and_advance(lexer, "]]>", TOKEN_CDATA_END);
|
331
|
+
return token ? token : lexer_advance_current(lexer, TOKEN_CHARACTER);
|
332
|
+
}
|
333
|
+
|
316
334
|
case '>': return lexer_advance_current(lexer, TOKEN_HTML_TAG_END);
|
317
335
|
case '_': return lexer_advance_current(lexer, TOKEN_UNDERSCORE);
|
318
336
|
case ':': return lexer_advance_current(lexer, TOKEN_COLON);
|
@@ -326,6 +344,7 @@ token_T* lexer_next_token(lexer_T* lexer) {
|
|
326
344
|
case '"':
|
327
345
|
case '\'': return lexer_advance_current(lexer, TOKEN_QUOTE);
|
328
346
|
case '`': return lexer_advance_current(lexer, TOKEN_BACKTICK);
|
347
|
+
case '\\': return lexer_advance_current(lexer, TOKEN_BACKSLASH);
|
329
348
|
|
330
349
|
default: {
|
331
350
|
if (isalnum(lexer->current_character)) { return lexer_parse_identifier(lexer); }
|
data/src/lexer_peek_helpers.c
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
#include "include/lexer_peek_helpers.h"
|
2
|
+
#include "include/lexer.h"
|
2
3
|
#include "include/lexer_struct.h"
|
3
4
|
#include "include/macros.h"
|
5
|
+
#include "include/token.h"
|
4
6
|
|
5
7
|
#include <ctype.h>
|
6
8
|
#include <stdbool.h>
|
@@ -31,6 +33,18 @@ bool lexer_peek_for_doctype(const lexer_T* lexer, const int offset) {
|
|
31
33
|
return lexer_peek_for(lexer, offset, "<!DOCTYPE", true);
|
32
34
|
}
|
33
35
|
|
36
|
+
bool lexer_peek_for_xml_declaration(const lexer_T* lexer, const int offset) {
|
37
|
+
return lexer_peek_for(lexer, offset, "<?xml", true);
|
38
|
+
}
|
39
|
+
|
40
|
+
bool lexer_peek_for_cdata_start(const lexer_T* lexer, const int offset) {
|
41
|
+
return lexer_peek_for(lexer, offset, "<![CDATA[", false);
|
42
|
+
}
|
43
|
+
|
44
|
+
bool lexer_peek_for_cdata_end(const lexer_T* lexer, const int offset) {
|
45
|
+
return lexer_peek_for(lexer, offset, "]]>", false);
|
46
|
+
}
|
47
|
+
|
34
48
|
bool lexer_peek_for_html_comment_start(const lexer_T* lexer, const int offset) {
|
35
49
|
return lexer_peek_for(lexer, offset, "<!--", false);
|
36
50
|
}
|
@@ -57,3 +71,66 @@ bool lexer_peek_erb_end(const lexer_T* lexer, const int offset) {
|
|
57
71
|
|| lexer_peek_erb_percent_close_tag(lexer, offset)
|
58
72
|
);
|
59
73
|
}
|
74
|
+
|
75
|
+
bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T token_type) {
|
76
|
+
size_t saved_position = lexer->current_position;
|
77
|
+
size_t saved_line = lexer->current_line;
|
78
|
+
size_t saved_column = lexer->current_column;
|
79
|
+
char saved_character = lexer->current_character;
|
80
|
+
|
81
|
+
token_T* token = lexer_next_token(lexer);
|
82
|
+
|
83
|
+
while (token && (token->type == TOKEN_WHITESPACE || token->type == TOKEN_NEWLINE)) {
|
84
|
+
token_free(token);
|
85
|
+
token = lexer_next_token(lexer);
|
86
|
+
}
|
87
|
+
|
88
|
+
bool result = (token && token->type == token_type);
|
89
|
+
|
90
|
+
if (token) { token_free(token); }
|
91
|
+
|
92
|
+
lexer->current_position = saved_position;
|
93
|
+
lexer->current_line = saved_line;
|
94
|
+
lexer->current_column = saved_column;
|
95
|
+
lexer->current_character = saved_character;
|
96
|
+
|
97
|
+
return result;
|
98
|
+
}
|
99
|
+
|
100
|
+
bool lexer_peek_for_close_tag_start(const lexer_T* lexer, const int offset) {
|
101
|
+
if (lexer_peek(lexer, offset) != '<' || lexer_peek(lexer, offset + 1) != '/') { return false; }
|
102
|
+
|
103
|
+
int pos = offset + 2;
|
104
|
+
|
105
|
+
while (lexer_peek(lexer, pos) == ' ' || lexer_peek(lexer, pos) == '\t' || lexer_peek(lexer, pos) == '\n'
|
106
|
+
|| lexer_peek(lexer, pos) == '\r') {
|
107
|
+
pos++;
|
108
|
+
}
|
109
|
+
|
110
|
+
char c = lexer_peek(lexer, pos);
|
111
|
+
|
112
|
+
return isalpha(c) || c == '_';
|
113
|
+
}
|
114
|
+
|
115
|
+
lexer_state_snapshot_T lexer_save_state(lexer_T* lexer) {
|
116
|
+
lexer_state_snapshot_T snapshot = { .position = lexer->current_position,
|
117
|
+
.line = lexer->current_line,
|
118
|
+
.column = lexer->current_column,
|
119
|
+
.previous_position = lexer->previous_position,
|
120
|
+
.previous_line = lexer->previous_line,
|
121
|
+
.previous_column = lexer->previous_column,
|
122
|
+
.current_character = lexer->current_character,
|
123
|
+
.state = lexer->state };
|
124
|
+
return snapshot;
|
125
|
+
}
|
126
|
+
|
127
|
+
void lexer_restore_state(lexer_T* lexer, lexer_state_snapshot_T snapshot) {
|
128
|
+
lexer->current_position = snapshot.position;
|
129
|
+
lexer->current_line = snapshot.line;
|
130
|
+
lexer->current_column = snapshot.column;
|
131
|
+
lexer->previous_position = snapshot.previous_position;
|
132
|
+
lexer->previous_line = snapshot.previous_line;
|
133
|
+
lexer->previous_column = snapshot.previous_column;
|
134
|
+
lexer->current_character = snapshot.current_character;
|
135
|
+
lexer->state = snapshot.state;
|
136
|
+
}
|
data/src/main.c
CHANGED
@@ -63,7 +63,7 @@ int main(const int argc, char* argv[]) {
|
|
63
63
|
clock_gettime(CLOCK_MONOTONIC, &start);
|
64
64
|
|
65
65
|
if (strcmp(argv[1], "visit") == 0) {
|
66
|
-
AST_DOCUMENT_NODE_T* root = herb_parse(source);
|
66
|
+
AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL);
|
67
67
|
clock_gettime(CLOCK_MONOTONIC, &end);
|
68
68
|
|
69
69
|
herb_analyze_parse_tree(root, source);
|
@@ -105,7 +105,7 @@ int main(const int argc, char* argv[]) {
|
|
105
105
|
}
|
106
106
|
|
107
107
|
if (strcmp(argv[1], "parse") == 0) {
|
108
|
-
AST_DOCUMENT_NODE_T* root = herb_parse(source);
|
108
|
+
AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL);
|
109
109
|
clock_gettime(CLOCK_MONOTONIC, &end);
|
110
110
|
|
111
111
|
ast_pretty_print_node((AST_NODE_T*) root, 0, 0, &output);
|