herb 0.7.5 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +8 -5
- data/config.yml +26 -6
- data/ext/herb/error_helpers.c +57 -3
- data/ext/herb/error_helpers.h +1 -1
- data/ext/herb/extconf.rb +1 -0
- data/ext/herb/extension.c +10 -24
- data/ext/herb/extension_helpers.c +3 -3
- data/ext/herb/extension_helpers.h +1 -1
- data/ext/herb/nodes.c +72 -37
- data/herb.gemspec +0 -2
- data/lib/herb/ast/helpers.rb +11 -0
- data/lib/herb/ast/node.rb +15 -6
- data/lib/herb/ast/nodes.rb +609 -392
- data/lib/herb/cli.rb +31 -0
- data/lib/herb/colors.rb +82 -0
- data/lib/herb/engine/compiler.rb +140 -14
- data/lib/herb/engine/debug_visitor.rb +1 -5
- data/lib/herb/engine/parser_error_overlay.rb +1 -1
- data/lib/herb/engine.rb +8 -14
- data/lib/herb/errors.rb +166 -56
- data/lib/herb/location.rb +2 -2
- data/lib/herb/project.rb +86 -21
- data/lib/herb/token.rb +14 -2
- data/lib/herb/version.rb +1 -1
- data/lib/herb.rb +1 -0
- data/sig/herb/ast/helpers.rbs +3 -0
- data/sig/herb/ast/node.rbs +12 -5
- data/sig/herb/ast/nodes.rbs +124 -62
- data/sig/herb/colors.rbs +35 -0
- data/sig/herb/engine/compiler.rbs +23 -1
- data/sig/herb/errors.rbs +74 -20
- data/sig/herb/token.rbs +8 -0
- data/sig/herb_c_extension.rbs +1 -1
- data/sig/serialized_ast_errors.rbs +8 -0
- data/src/analyze.c +420 -171
- data/src/analyze_helpers.c +5 -0
- data/src/analyze_missing_end.c +147 -0
- data/src/analyze_transform.c +196 -0
- data/src/analyzed_ruby.c +23 -2
- data/src/ast_node.c +5 -5
- data/src/ast_nodes.c +179 -179
- data/src/ast_pretty_print.c +232 -232
- data/src/element_source.c +7 -6
- data/src/errors.c +246 -126
- data/src/extract.c +92 -34
- data/src/herb.c +37 -49
- data/src/html_util.c +34 -96
- data/src/include/analyze.h +10 -2
- data/src/include/analyze_helpers.h +3 -0
- data/src/include/analyzed_ruby.h +4 -2
- data/src/include/ast_node.h +2 -2
- data/src/include/ast_nodes.h +67 -66
- data/src/include/ast_pretty_print.h +2 -2
- data/src/include/element_source.h +3 -1
- data/src/include/errors.h +30 -14
- data/src/include/extract.h +4 -4
- data/src/include/herb.h +6 -7
- data/src/include/html_util.h +4 -5
- data/src/include/lexer.h +1 -3
- data/src/include/lexer_peek_helpers.h +14 -14
- data/src/include/lexer_struct.h +3 -2
- data/src/include/macros.h +4 -0
- data/src/include/parser.h +12 -6
- data/src/include/parser_helpers.h +25 -15
- data/src/include/pretty_print.h +38 -28
- data/src/include/token.h +5 -8
- data/src/include/utf8.h +3 -2
- data/src/include/util/hb_arena.h +31 -0
- data/src/include/util/hb_arena_debug.h +8 -0
- data/src/include/util/hb_array.h +33 -0
- data/src/include/util/hb_buffer.h +34 -0
- data/src/include/util/hb_string.h +29 -0
- data/src/include/util/hb_system.h +9 -0
- data/src/include/util.h +3 -14
- data/src/include/version.h +1 -1
- data/src/include/visitor.h +1 -1
- data/src/io.c +7 -4
- data/src/lexer.c +61 -88
- data/src/lexer_peek_helpers.c +35 -37
- data/src/main.c +19 -23
- data/src/parser.c +282 -201
- data/src/parser_helpers.c +46 -40
- data/src/parser_match_tags.c +316 -0
- data/src/pretty_print.c +82 -106
- data/src/token.c +18 -65
- data/src/utf8.c +4 -4
- data/src/util/hb_arena.c +179 -0
- data/src/util/hb_arena_debug.c +237 -0
- data/src/{array.c → util/hb_array.c} +26 -27
- data/src/util/hb_buffer.c +203 -0
- data/src/util/hb_string.c +85 -0
- data/src/util/hb_system.c +30 -0
- data/src/util.c +29 -99
- data/src/visitor.c +54 -54
- data/templates/ext/herb/error_helpers.c.erb +3 -3
- data/templates/ext/herb/error_helpers.h.erb +1 -1
- data/templates/ext/herb/nodes.c.erb +11 -6
- data/templates/java/error_helpers.c.erb +75 -0
- data/templates/java/error_helpers.h.erb +20 -0
- data/templates/java/nodes.c.erb +97 -0
- data/templates/java/nodes.h.erb +23 -0
- data/templates/java/org/herb/ast/Errors.java.erb +121 -0
- data/templates/java/org/herb/ast/NodeVisitor.java.erb +14 -0
- data/templates/java/org/herb/ast/Nodes.java.erb +220 -0
- data/templates/java/org/herb/ast/Visitor.java.erb +56 -0
- data/templates/javascript/packages/node/extension/error_helpers.cpp.erb +8 -8
- data/templates/javascript/packages/node/extension/error_helpers.h.erb +1 -1
- data/templates/javascript/packages/node/extension/nodes.cpp.erb +9 -9
- data/templates/javascript/packages/node/extension/nodes.h.erb +1 -1
- data/templates/lib/herb/ast/nodes.rb.erb +28 -16
- data/templates/lib/herb/errors.rb.erb +17 -12
- data/templates/rust/src/ast/nodes.rs.erb +220 -0
- data/templates/rust/src/errors.rs.erb +216 -0
- data/templates/rust/src/nodes.rs.erb +374 -0
- data/templates/src/analyze_missing_end.c.erb +36 -0
- data/templates/src/analyze_transform.c.erb +24 -0
- data/templates/src/ast_nodes.c.erb +14 -14
- data/templates/src/ast_pretty_print.c.erb +36 -36
- data/templates/src/errors.c.erb +31 -31
- data/templates/src/include/ast_nodes.h.erb +10 -9
- data/templates/src/include/ast_pretty_print.h.erb +2 -2
- data/templates/src/include/errors.h.erb +6 -6
- data/templates/src/parser_match_tags.c.erb +38 -0
- data/templates/src/visitor.c.erb +4 -4
- data/templates/template.rb +22 -3
- data/templates/wasm/error_helpers.cpp.erb +9 -9
- data/templates/wasm/error_helpers.h.erb +1 -1
- data/templates/wasm/nodes.cpp.erb +9 -9
- data/templates/wasm/nodes.h.erb +1 -1
- data/vendor/prism/Rakefile +4 -1
- data/vendor/prism/config.yml +2 -1
- data/vendor/prism/include/prism/ast.h +31 -1
- data/vendor/prism/include/prism/diagnostic.h +1 -0
- data/vendor/prism/include/prism/version.h +3 -3
- data/vendor/prism/src/diagnostic.c +3 -1
- data/vendor/prism/src/prism.c +130 -71
- data/vendor/prism/src/util/pm_string.c +6 -8
- data/vendor/prism/templates/include/prism/ast.h.erb +2 -0
- data/vendor/prism/templates/java/org/prism/Loader.java.erb +2 -2
- data/vendor/prism/templates/javascript/src/deserialize.js.erb +2 -2
- data/vendor/prism/templates/lib/prism/serialize.rb.erb +2 -2
- data/vendor/prism/templates/sig/prism.rbs.erb +4 -0
- data/vendor/prism/templates/src/diagnostic.c.erb +1 -0
- metadata +34 -20
- data/lib/herb/libherb/array.rb +0 -51
- data/lib/herb/libherb/ast_node.rb +0 -50
- data/lib/herb/libherb/buffer.rb +0 -56
- data/lib/herb/libherb/extract_result.rb +0 -20
- data/lib/herb/libherb/lex_result.rb +0 -32
- data/lib/herb/libherb/libherb.rb +0 -52
- data/lib/herb/libherb/parse_result.rb +0 -20
- data/lib/herb/libherb/token.rb +0 -46
- data/lib/herb/libherb.rb +0 -35
- data/src/buffer.c +0 -241
- data/src/include/array.h +0 -33
- data/src/include/buffer.h +0 -39
- data/src/include/json.h +0 -28
- data/src/include/memory.h +0 -12
- data/src/json.c +0 -205
- data/src/memory.c +0 -53
data/src/parser.c
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
#include "include/parser.h"
|
|
2
|
-
#include "include/array.h"
|
|
3
2
|
#include "include/ast_node.h"
|
|
4
3
|
#include "include/ast_nodes.h"
|
|
5
|
-
#include "include/buffer.h"
|
|
6
4
|
#include "include/errors.h"
|
|
7
5
|
#include "include/html_util.h"
|
|
8
6
|
#include "include/lexer.h"
|
|
@@ -11,49 +9,46 @@
|
|
|
11
9
|
#include "include/token.h"
|
|
12
10
|
#include "include/token_matchers.h"
|
|
13
11
|
#include "include/util.h"
|
|
12
|
+
#include "include/util/hb_array.h"
|
|
13
|
+
#include "include/util/hb_buffer.h"
|
|
14
|
+
#include "include/util/hb_string.h"
|
|
15
|
+
#include "include/visitor.h"
|
|
14
16
|
|
|
15
17
|
#include <stdio.h>
|
|
16
18
|
#include <stdlib.h>
|
|
17
19
|
#include <string.h>
|
|
18
20
|
#include <strings.h>
|
|
19
21
|
|
|
20
|
-
static void parser_parse_in_data_state(parser_T* parser,
|
|
21
|
-
static void parser_parse_foreign_content(parser_T* parser,
|
|
22
|
+
static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors);
|
|
23
|
+
static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors);
|
|
22
24
|
static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
|
|
23
|
-
static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token,
|
|
24
|
-
static void parser_consume_whitespace(parser_T* parser,
|
|
25
|
+
static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, hb_array_T* children);
|
|
26
|
+
static void parser_consume_whitespace(parser_T* parser, hb_array_T* children);
|
|
25
27
|
static void parser_skip_erb_content(lexer_T* lexer);
|
|
26
28
|
static bool parser_lookahead_erb_is_attribute(lexer_T* lexer);
|
|
27
|
-
static void parser_handle_erb_in_open_tag(parser_T* parser,
|
|
28
|
-
static void parser_handle_whitespace_in_open_tag(parser_T* parser,
|
|
29
|
+
static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children);
|
|
30
|
+
static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* children);
|
|
31
|
+
|
|
32
|
+
const parser_options_T HERB_DEFAULT_PARSER_OPTIONS = { .track_whitespace = false };
|
|
29
33
|
|
|
30
34
|
size_t parser_sizeof(void) {
|
|
31
35
|
return sizeof(struct PARSER_STRUCT);
|
|
32
36
|
}
|
|
33
37
|
|
|
34
|
-
parser_T*
|
|
35
|
-
parser_T* parser = calloc(1, parser_sizeof());
|
|
36
|
-
|
|
38
|
+
void herb_parser_init(parser_T* parser, lexer_T* lexer, parser_options_T options) {
|
|
37
39
|
parser->lexer = lexer;
|
|
38
40
|
parser->current_token = lexer_next_token(lexer);
|
|
39
|
-
parser->open_tags_stack =
|
|
41
|
+
parser->open_tags_stack = hb_array_init(16);
|
|
40
42
|
parser->state = PARSER_STATE_DATA;
|
|
41
43
|
parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
|
|
42
|
-
|
|
43
|
-
if (options) {
|
|
44
|
-
parser->options = calloc(1, sizeof(parser_options_T));
|
|
45
|
-
parser->options->track_whitespace = options->track_whitespace;
|
|
46
|
-
} else {
|
|
47
|
-
parser->options = NULL;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
return parser;
|
|
44
|
+
parser->options = options;
|
|
51
45
|
}
|
|
52
46
|
|
|
53
47
|
static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
48
|
+
hb_array_T* errors = hb_array_init(8);
|
|
49
|
+
hb_array_T* children = hb_array_init(8);
|
|
50
|
+
hb_buffer_T content;
|
|
51
|
+
hb_buffer_init(&content, 128);
|
|
57
52
|
|
|
58
53
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_CDATA_START, errors);
|
|
59
54
|
position_T start = parser->current_token->location.start;
|
|
@@ -62,13 +57,13 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
|
62
57
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
63
58
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
64
59
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
65
|
-
|
|
60
|
+
hb_array_append(children, erb_node);
|
|
66
61
|
start = parser->current_token->location.start;
|
|
67
62
|
continue;
|
|
68
63
|
}
|
|
69
64
|
|
|
70
65
|
token_T* token = parser_advance(parser);
|
|
71
|
-
|
|
66
|
+
hb_buffer_append(&content, token->value);
|
|
72
67
|
token_free(token);
|
|
73
68
|
}
|
|
74
69
|
|
|
@@ -84,7 +79,7 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
|
84
79
|
errors
|
|
85
80
|
);
|
|
86
81
|
|
|
87
|
-
|
|
82
|
+
free(content.value);
|
|
88
83
|
token_free(tag_opening);
|
|
89
84
|
token_free(tag_closing);
|
|
90
85
|
|
|
@@ -92,19 +87,20 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
|
92
87
|
}
|
|
93
88
|
|
|
94
89
|
static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
95
|
-
|
|
96
|
-
|
|
90
|
+
hb_array_T* errors = hb_array_init(8);
|
|
91
|
+
hb_array_T* children = hb_array_init(8);
|
|
97
92
|
token_T* comment_start = parser_consume_expected(parser, TOKEN_HTML_COMMENT_START, errors);
|
|
98
93
|
position_T start = parser->current_token->location.start;
|
|
99
94
|
|
|
100
|
-
|
|
95
|
+
hb_buffer_T comment;
|
|
96
|
+
hb_buffer_init(&comment, 512);
|
|
101
97
|
|
|
102
98
|
while (token_is_none_of(parser, TOKEN_HTML_COMMENT_END, TOKEN_EOF)) {
|
|
103
99
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
104
100
|
parser_append_literal_node_from_buffer(parser, &comment, children, start);
|
|
105
101
|
|
|
106
102
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
107
|
-
|
|
103
|
+
hb_array_append(children, erb_node);
|
|
108
104
|
|
|
109
105
|
start = parser->current_token->location.start;
|
|
110
106
|
|
|
@@ -112,7 +108,7 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
|
112
108
|
}
|
|
113
109
|
|
|
114
110
|
token_T* token = parser_advance(parser);
|
|
115
|
-
|
|
111
|
+
hb_buffer_append(&comment, token->value);
|
|
116
112
|
token_free(token);
|
|
117
113
|
}
|
|
118
114
|
|
|
@@ -129,7 +125,7 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
|
129
125
|
errors
|
|
130
126
|
);
|
|
131
127
|
|
|
132
|
-
|
|
128
|
+
free(comment.value);
|
|
133
129
|
token_free(comment_start);
|
|
134
130
|
token_free(comment_end);
|
|
135
131
|
|
|
@@ -137,9 +133,10 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
|
137
133
|
}
|
|
138
134
|
|
|
139
135
|
static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
136
|
+
hb_array_T* errors = hb_array_init(8);
|
|
137
|
+
hb_array_T* children = hb_array_init(8);
|
|
138
|
+
hb_buffer_T content;
|
|
139
|
+
hb_buffer_init(&content, 64);
|
|
143
140
|
|
|
144
141
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_DOCTYPE, errors);
|
|
145
142
|
|
|
@@ -150,13 +147,13 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
|
150
147
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
151
148
|
|
|
152
149
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
153
|
-
|
|
150
|
+
hb_array_append(children, erb_node);
|
|
154
151
|
|
|
155
152
|
continue;
|
|
156
153
|
}
|
|
157
154
|
|
|
158
155
|
token_T* token = parser_consume_expected(parser, parser->current_token->type, errors);
|
|
159
|
-
|
|
156
|
+
hb_buffer_append(&content, token->value);
|
|
160
157
|
token_free(token);
|
|
161
158
|
}
|
|
162
159
|
|
|
@@ -175,15 +172,16 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
|
175
172
|
|
|
176
173
|
token_free(tag_opening);
|
|
177
174
|
token_free(tag_closing);
|
|
178
|
-
|
|
175
|
+
free(content.value);
|
|
179
176
|
|
|
180
177
|
return doctype;
|
|
181
178
|
}
|
|
182
179
|
|
|
183
180
|
static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser) {
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
181
|
+
hb_array_T* errors = hb_array_init(8);
|
|
182
|
+
hb_array_T* children = hb_array_init(8);
|
|
183
|
+
hb_buffer_T content;
|
|
184
|
+
hb_buffer_init(&content, 64);
|
|
187
185
|
|
|
188
186
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_XML_DECLARATION, errors);
|
|
189
187
|
|
|
@@ -194,7 +192,7 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
|
|
|
194
192
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
195
193
|
|
|
196
194
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
197
|
-
|
|
195
|
+
hb_array_append(children, erb_node);
|
|
198
196
|
|
|
199
197
|
start = parser->current_token->location.start;
|
|
200
198
|
|
|
@@ -202,7 +200,7 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
|
|
|
202
200
|
}
|
|
203
201
|
|
|
204
202
|
token_T* token = parser_advance(parser);
|
|
205
|
-
|
|
203
|
+
hb_buffer_append(&content, token->value);
|
|
206
204
|
token_free(token);
|
|
207
205
|
}
|
|
208
206
|
|
|
@@ -221,15 +219,16 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
|
|
|
221
219
|
|
|
222
220
|
token_free(tag_opening);
|
|
223
221
|
token_free(tag_closing);
|
|
224
|
-
|
|
222
|
+
free(content.value);
|
|
225
223
|
|
|
226
224
|
return xml_declaration;
|
|
227
225
|
}
|
|
228
226
|
|
|
229
|
-
static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser,
|
|
227
|
+
static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_array_T* document_errors) {
|
|
230
228
|
position_T start = parser->current_token->location.start;
|
|
231
229
|
|
|
232
|
-
|
|
230
|
+
hb_buffer_T content;
|
|
231
|
+
hb_buffer_init(&content, 2048);
|
|
233
232
|
|
|
234
233
|
while (token_is_none_of(
|
|
235
234
|
parser,
|
|
@@ -241,7 +240,7 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T
|
|
|
241
240
|
TOKEN_EOF
|
|
242
241
|
)) {
|
|
243
242
|
if (token_is(parser, TOKEN_ERROR)) {
|
|
244
|
-
|
|
243
|
+
free(content.value);
|
|
245
244
|
|
|
246
245
|
token_T* token = parser_consume_expected(parser, TOKEN_ERROR, document_errors);
|
|
247
246
|
append_unexpected_error(
|
|
@@ -259,32 +258,31 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T
|
|
|
259
258
|
}
|
|
260
259
|
|
|
261
260
|
token_T* token = parser_advance(parser);
|
|
262
|
-
|
|
261
|
+
hb_buffer_append(&content, token->value);
|
|
263
262
|
token_free(token);
|
|
264
263
|
}
|
|
265
264
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
if (buffer_length(&content) > 0) {
|
|
269
|
-
AST_HTML_TEXT_NODE_T* text_node =
|
|
270
|
-
ast_html_text_node_init(buffer_value(&content), start, parser->current_token->location.start, errors);
|
|
265
|
+
hb_array_T* errors = hb_array_init(8);
|
|
271
266
|
|
|
272
|
-
|
|
267
|
+
AST_HTML_TEXT_NODE_T* text_node = NULL;
|
|
273
268
|
|
|
274
|
-
|
|
269
|
+
if (hb_buffer_length(&content) > 0) {
|
|
270
|
+
text_node =
|
|
271
|
+
ast_html_text_node_init(hb_buffer_value(&content), start, parser->current_token->location.start, errors);
|
|
272
|
+
} else {
|
|
273
|
+
text_node = ast_html_text_node_init("", start, parser->current_token->location.start, errors);
|
|
275
274
|
}
|
|
276
275
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
buffer_free(&content);
|
|
276
|
+
free(content.value);
|
|
280
277
|
|
|
281
278
|
return text_node;
|
|
282
279
|
}
|
|
283
280
|
|
|
284
281
|
static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
282
|
+
hb_array_T* errors = hb_array_init(8);
|
|
283
|
+
hb_array_T* children = hb_array_init(8);
|
|
284
|
+
hb_buffer_T buffer;
|
|
285
|
+
hb_buffer_init(&buffer, 128);
|
|
288
286
|
position_T start = parser->current_token->location.start;
|
|
289
287
|
|
|
290
288
|
while (token_is_none_of(
|
|
@@ -300,14 +298,14 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
|
|
|
300
298
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
301
299
|
|
|
302
300
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
303
|
-
|
|
301
|
+
hb_array_append(children, erb_node);
|
|
304
302
|
|
|
305
303
|
start = parser->current_token->location.start;
|
|
306
304
|
continue;
|
|
307
305
|
}
|
|
308
306
|
|
|
309
307
|
token_T* token = parser_advance(parser);
|
|
310
|
-
|
|
308
|
+
hb_buffer_append(&buffer, token->value);
|
|
311
309
|
token_free(token);
|
|
312
310
|
}
|
|
313
311
|
|
|
@@ -317,8 +315,8 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
|
|
|
317
315
|
position_T node_end = { 0 };
|
|
318
316
|
|
|
319
317
|
if (children->size > 0) {
|
|
320
|
-
AST_NODE_T* first_child =
|
|
321
|
-
AST_NODE_T* last_child =
|
|
318
|
+
AST_NODE_T* first_child = hb_array_get(children, 0);
|
|
319
|
+
AST_NODE_T* last_child = hb_array_get(children, children->size - 1);
|
|
322
320
|
|
|
323
321
|
node_start = first_child->location.start;
|
|
324
322
|
node_end = last_child->location.end;
|
|
@@ -330,17 +328,18 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
|
|
|
330
328
|
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
|
|
331
329
|
ast_html_attribute_name_node_init(children, node_start, node_end, errors);
|
|
332
330
|
|
|
333
|
-
|
|
331
|
+
free(buffer.value);
|
|
334
332
|
|
|
335
333
|
return attribute_name;
|
|
336
334
|
}
|
|
337
335
|
|
|
338
336
|
static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value(
|
|
339
337
|
parser_T* parser,
|
|
340
|
-
|
|
341
|
-
|
|
338
|
+
hb_array_T* children,
|
|
339
|
+
hb_array_T* errors
|
|
342
340
|
) {
|
|
343
|
-
|
|
341
|
+
hb_buffer_T buffer;
|
|
342
|
+
hb_buffer_init(&buffer, 512);
|
|
344
343
|
token_T* opening_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
|
|
345
344
|
position_T start = parser->current_token->location.start;
|
|
346
345
|
|
|
@@ -352,7 +351,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
352
351
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
353
352
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
354
353
|
|
|
355
|
-
|
|
354
|
+
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
356
355
|
|
|
357
356
|
start = parser->current_token->location.start;
|
|
358
357
|
|
|
@@ -366,8 +365,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
366
365
|
|
|
367
366
|
if (next_token && next_token->type == TOKEN_QUOTE && opening_quote != NULL
|
|
368
367
|
&& strcmp(next_token->value, opening_quote->value) == 0) {
|
|
369
|
-
|
|
370
|
-
|
|
368
|
+
hb_buffer_append(&buffer, parser->current_token->value);
|
|
369
|
+
hb_buffer_append(&buffer, next_token->value);
|
|
371
370
|
|
|
372
371
|
token_free(parser->current_token);
|
|
373
372
|
token_free(next_token);
|
|
@@ -381,7 +380,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
381
380
|
}
|
|
382
381
|
}
|
|
383
382
|
|
|
384
|
-
|
|
383
|
+
hb_buffer_append(&buffer, parser->current_token->value);
|
|
385
384
|
token_free(parser->current_token);
|
|
386
385
|
|
|
387
386
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
@@ -409,7 +408,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
409
408
|
token_free(parser->current_token);
|
|
410
409
|
parser->current_token = potential_closing;
|
|
411
410
|
|
|
412
|
-
|
|
411
|
+
hb_buffer_append(&buffer, parser->current_token->value);
|
|
413
412
|
token_free(parser->current_token);
|
|
414
413
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
415
414
|
|
|
@@ -421,14 +420,14 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
421
420
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
422
421
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
423
422
|
|
|
424
|
-
|
|
423
|
+
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
425
424
|
|
|
426
425
|
start = parser->current_token->location.start;
|
|
427
426
|
|
|
428
427
|
continue;
|
|
429
428
|
}
|
|
430
429
|
|
|
431
|
-
|
|
430
|
+
hb_buffer_append(&buffer, parser->current_token->value);
|
|
432
431
|
token_free(parser->current_token);
|
|
433
432
|
|
|
434
433
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
@@ -442,7 +441,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
442
441
|
}
|
|
443
442
|
|
|
444
443
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
445
|
-
|
|
444
|
+
free(buffer.value);
|
|
446
445
|
|
|
447
446
|
token_T* closing_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
|
|
448
447
|
|
|
@@ -473,13 +472,13 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
473
472
|
}
|
|
474
473
|
|
|
475
474
|
static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser_T* parser) {
|
|
476
|
-
|
|
477
|
-
|
|
475
|
+
hb_array_T* children = hb_array_init(8);
|
|
476
|
+
hb_array_T* errors = hb_array_init(8);
|
|
478
477
|
|
|
479
478
|
// <div id=<%= "home" %>>
|
|
480
479
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
481
480
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
482
|
-
|
|
481
|
+
hb_array_append(children, erb_node);
|
|
483
482
|
|
|
484
483
|
return ast_html_attribute_value_node_init(
|
|
485
484
|
NULL,
|
|
@@ -498,7 +497,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
498
497
|
AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier);
|
|
499
498
|
token_free(identifier);
|
|
500
499
|
|
|
501
|
-
|
|
500
|
+
hb_array_append(children, literal);
|
|
502
501
|
|
|
503
502
|
return ast_html_attribute_value_node_init(
|
|
504
503
|
NULL,
|
|
@@ -561,12 +560,13 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
561
560
|
static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) {
|
|
562
561
|
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = parser_parse_html_attribute_name(parser);
|
|
563
562
|
|
|
564
|
-
if (parser->options
|
|
563
|
+
if (parser->options.track_whitespace) {
|
|
565
564
|
bool has_equals = (parser->current_token->type == TOKEN_EQUALS)
|
|
566
565
|
|| lexer_peek_for_token_type_after_whitespace(parser->lexer, TOKEN_EQUALS);
|
|
567
566
|
|
|
568
567
|
if (has_equals) {
|
|
569
|
-
|
|
568
|
+
hb_buffer_T equals_buffer;
|
|
569
|
+
hb_buffer_init(&equals_buffer, 256);
|
|
570
570
|
position_T equals_start = { 0 };
|
|
571
571
|
position_T equals_end = { 0 };
|
|
572
572
|
uint32_t range_start = 0;
|
|
@@ -582,7 +582,7 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
582
582
|
range_start = whitespace->range.from;
|
|
583
583
|
}
|
|
584
584
|
|
|
585
|
-
|
|
585
|
+
hb_buffer_append(&equals_buffer, whitespace->value);
|
|
586
586
|
token_free(whitespace);
|
|
587
587
|
}
|
|
588
588
|
|
|
@@ -594,14 +594,14 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
594
594
|
range_start = equals->range.from;
|
|
595
595
|
}
|
|
596
596
|
|
|
597
|
-
|
|
597
|
+
hb_buffer_append(&equals_buffer, equals->value);
|
|
598
598
|
equals_end = equals->location.end;
|
|
599
599
|
range_end = equals->range.to;
|
|
600
600
|
token_free(equals);
|
|
601
601
|
|
|
602
602
|
while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
|
603
603
|
token_T* whitespace = parser_advance(parser);
|
|
604
|
-
|
|
604
|
+
hb_buffer_append(&equals_buffer, whitespace->value);
|
|
605
605
|
equals_end = whitespace->location.end;
|
|
606
606
|
range_end = whitespace->range.to;
|
|
607
607
|
token_free(whitespace);
|
|
@@ -613,7 +613,7 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
613
613
|
equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end };
|
|
614
614
|
equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end };
|
|
615
615
|
|
|
616
|
-
|
|
616
|
+
free(equals_buffer.value);
|
|
617
617
|
|
|
618
618
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
|
|
619
619
|
|
|
@@ -719,12 +719,12 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
|
|
|
719
719
|
} while (true);
|
|
720
720
|
}
|
|
721
721
|
|
|
722
|
-
static void parser_handle_erb_in_open_tag(parser_T* parser,
|
|
722
|
+
static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children) {
|
|
723
723
|
bool is_output_tag = parser->current_token->value && strlen(parser->current_token->value) >= 3
|
|
724
724
|
&& strncmp(parser->current_token->value, "<%=", 3) == 0;
|
|
725
725
|
|
|
726
726
|
if (!is_output_tag) {
|
|
727
|
-
|
|
727
|
+
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
728
728
|
|
|
729
729
|
return;
|
|
730
730
|
}
|
|
@@ -738,13 +738,13 @@ static void parser_handle_erb_in_open_tag(parser_T* parser, array_T* children) {
|
|
|
738
738
|
bool looks_like_attribute = parser_lookahead_erb_is_attribute(&lexer_copy);
|
|
739
739
|
|
|
740
740
|
if (looks_like_attribute) {
|
|
741
|
-
|
|
741
|
+
hb_array_append(children, parser_parse_html_attribute(parser));
|
|
742
742
|
} else {
|
|
743
|
-
|
|
743
|
+
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
744
744
|
}
|
|
745
745
|
}
|
|
746
746
|
|
|
747
|
-
static void parser_handle_whitespace_in_open_tag(parser_T* parser,
|
|
747
|
+
static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* children) {
|
|
748
748
|
token_T* whitespace = parser_consume_if_present(parser, TOKEN_WHITESPACE);
|
|
749
749
|
|
|
750
750
|
if (whitespace != NULL) {
|
|
@@ -758,8 +758,8 @@ static void parser_handle_whitespace_in_open_tag(parser_T* parser, array_T* chil
|
|
|
758
758
|
}
|
|
759
759
|
|
|
760
760
|
static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
761
|
-
|
|
762
|
-
|
|
761
|
+
hb_array_T* errors = hb_array_init(8);
|
|
762
|
+
hb_array_T* children = hb_array_init(8);
|
|
763
763
|
|
|
764
764
|
token_T* tag_start = parser_consume_expected(parser, TOKEN_HTML_TAG_START, errors);
|
|
765
765
|
token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
|
|
@@ -771,7 +771,7 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
771
771
|
}
|
|
772
772
|
|
|
773
773
|
if (parser->current_token->type == TOKEN_IDENTIFIER) {
|
|
774
|
-
|
|
774
|
+
hb_array_append(children, parser_parse_html_attribute(parser));
|
|
775
775
|
continue;
|
|
776
776
|
}
|
|
777
777
|
|
|
@@ -781,7 +781,7 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
781
781
|
}
|
|
782
782
|
|
|
783
783
|
if (parser->current_token->type == TOKEN_AT) {
|
|
784
|
-
|
|
784
|
+
hb_array_append(children, parser_parse_html_attribute(parser));
|
|
785
785
|
continue;
|
|
786
786
|
}
|
|
787
787
|
|
|
@@ -791,7 +791,7 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
791
791
|
|
|
792
792
|
if (next_token && next_token->type == TOKEN_IDENTIFIER) {
|
|
793
793
|
token_free(next_token);
|
|
794
|
-
|
|
794
|
+
hb_array_append(children, parser_parse_html_attribute(parser));
|
|
795
795
|
|
|
796
796
|
continue;
|
|
797
797
|
}
|
|
@@ -818,8 +818,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
818
818
|
token_free(tag_start);
|
|
819
819
|
token_free(tag_name);
|
|
820
820
|
|
|
821
|
-
|
|
822
|
-
|
|
821
|
+
hb_array_free(&children);
|
|
822
|
+
hb_array_free(&errors);
|
|
823
823
|
|
|
824
824
|
return NULL;
|
|
825
825
|
}
|
|
@@ -846,8 +846,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
846
846
|
}
|
|
847
847
|
|
|
848
848
|
static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) {
|
|
849
|
-
|
|
850
|
-
|
|
849
|
+
hb_array_T* errors = hb_array_init(8);
|
|
850
|
+
hb_array_T* children = hb_array_init(8);
|
|
851
851
|
|
|
852
852
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
|
|
853
853
|
|
|
@@ -859,21 +859,21 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
|
|
|
859
859
|
|
|
860
860
|
token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
|
|
861
861
|
|
|
862
|
-
if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) {
|
|
863
|
-
|
|
864
|
-
|
|
862
|
+
if (tag_name != NULL && is_void_element(hb_string(tag_name->value)) && parser_in_svg_context(parser) == false) {
|
|
863
|
+
hb_string_T expected = html_self_closing_tag_string(hb_string(tag_name->value));
|
|
864
|
+
hb_string_T got = html_closing_tag_string(hb_string(tag_name->value));
|
|
865
865
|
|
|
866
866
|
append_void_element_closing_tag_error(
|
|
867
867
|
tag_name,
|
|
868
|
-
expected,
|
|
869
|
-
got,
|
|
868
|
+
expected.data,
|
|
869
|
+
got.data,
|
|
870
870
|
tag_opening->location.start,
|
|
871
871
|
tag_closing->location.end,
|
|
872
872
|
errors
|
|
873
873
|
);
|
|
874
874
|
|
|
875
|
-
free(expected);
|
|
876
|
-
free(got);
|
|
875
|
+
free(expected.data);
|
|
876
|
+
free(got.data);
|
|
877
877
|
}
|
|
878
878
|
|
|
879
879
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = ast_html_close_tag_node_init(
|
|
@@ -915,13 +915,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
|
|
|
915
915
|
parser_T* parser,
|
|
916
916
|
AST_HTML_OPEN_TAG_NODE_T* open_tag
|
|
917
917
|
) {
|
|
918
|
-
|
|
919
|
-
|
|
918
|
+
hb_array_T* errors = hb_array_init(8);
|
|
919
|
+
hb_array_T* body = hb_array_init(8);
|
|
920
920
|
|
|
921
921
|
parser_push_open_tag(parser, open_tag->tag_name);
|
|
922
922
|
|
|
923
|
-
if (open_tag->tag_name->value && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
|
|
924
|
-
foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value);
|
|
923
|
+
if (open_tag->tag_name->value && parser_is_foreign_content_tag(hb_string(open_tag->tag_name->value))) {
|
|
924
|
+
foreign_content_type_T content_type = parser_get_foreign_content_type(hb_string(open_tag->tag_name->value));
|
|
925
925
|
parser_enter_foreign_content(parser, content_type);
|
|
926
926
|
parser_parse_foreign_content(parser, body, errors);
|
|
927
927
|
} else {
|
|
@@ -932,13 +932,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
|
|
|
932
932
|
|
|
933
933
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser);
|
|
934
934
|
|
|
935
|
-
if (parser_in_svg_context(parser) == false && is_void_element(close_tag->tag_name->value)) {
|
|
936
|
-
|
|
935
|
+
if (parser_in_svg_context(parser) == false && is_void_element(hb_string(close_tag->tag_name->value))) {
|
|
936
|
+
hb_array_push(body, close_tag);
|
|
937
937
|
parser_parse_in_data_state(parser, body, errors);
|
|
938
938
|
close_tag = parser_parse_html_close_tag(parser);
|
|
939
939
|
}
|
|
940
940
|
|
|
941
|
-
bool matches_stack = parser_check_matching_tag(parser, close_tag->tag_name->value);
|
|
941
|
+
bool matches_stack = parser_check_matching_tag(parser, hb_string(close_tag->tag_name->value));
|
|
942
942
|
|
|
943
943
|
if (matches_stack) {
|
|
944
944
|
token_T* popped_token = parser_pop_open_tag(parser);
|
|
@@ -960,39 +960,28 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
|
|
|
960
960
|
);
|
|
961
961
|
}
|
|
962
962
|
|
|
963
|
-
static
|
|
963
|
+
static AST_NODE_T* parser_parse_html_element(parser_T* parser) {
|
|
964
964
|
AST_HTML_OPEN_TAG_NODE_T* open_tag = parser_parse_html_open_tag(parser);
|
|
965
965
|
|
|
966
966
|
// <tag />
|
|
967
|
-
if (open_tag->is_void) { return parser_parse_html_self_closing_element(parser, open_tag); }
|
|
967
|
+
if (open_tag->is_void) { return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag); }
|
|
968
968
|
|
|
969
969
|
// <tag>, in void element list, and not in inside an <svg> element
|
|
970
|
-
if (!open_tag->is_void && is_void_element(open_tag->tag_name->value) && !parser_in_svg_context(parser)) {
|
|
971
|
-
return parser_parse_html_self_closing_element(parser, open_tag);
|
|
970
|
+
if (!open_tag->is_void && is_void_element(hb_string(open_tag->tag_name->value)) && !parser_in_svg_context(parser)) {
|
|
971
|
+
return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag);
|
|
972
972
|
}
|
|
973
973
|
|
|
974
|
-
|
|
975
|
-
|
|
974
|
+
if (open_tag->tag_name->value && parser_is_foreign_content_tag(hb_string(open_tag->tag_name->value))) {
|
|
975
|
+
AST_HTML_ELEMENT_NODE_T* regular_element = parser_parse_html_regular_element(parser, open_tag);
|
|
976
976
|
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
parser_append_unexpected_error(parser, "Unknown HTML open tag type", "HTMLOpenTag or HTMLSelfCloseTag", errors);
|
|
977
|
+
if (regular_element != NULL) { return (AST_NODE_T*) regular_element; }
|
|
978
|
+
}
|
|
980
979
|
|
|
981
|
-
return
|
|
982
|
-
open_tag,
|
|
983
|
-
open_tag->tag_name,
|
|
984
|
-
NULL,
|
|
985
|
-
NULL,
|
|
986
|
-
false,
|
|
987
|
-
ELEMENT_SOURCE_HTML,
|
|
988
|
-
open_tag->base.location.start,
|
|
989
|
-
open_tag->base.location.end,
|
|
990
|
-
errors
|
|
991
|
-
);
|
|
980
|
+
return (AST_NODE_T*) open_tag;
|
|
992
981
|
}
|
|
993
982
|
|
|
994
983
|
static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
|
|
995
|
-
|
|
984
|
+
hb_array_T* errors = hb_array_init(8);
|
|
996
985
|
|
|
997
986
|
token_T* opening_tag = parser_consume_expected(parser, TOKEN_ERB_START, errors);
|
|
998
987
|
token_T* content = parser_consume_expected(parser, TOKEN_ERB_CONTENT, errors);
|
|
@@ -1017,14 +1006,15 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
|
|
|
1017
1006
|
return erb_node;
|
|
1018
1007
|
}
|
|
1019
1008
|
|
|
1020
|
-
static void parser_parse_foreign_content(parser_T* parser,
|
|
1021
|
-
|
|
1009
|
+
static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
|
|
1010
|
+
hb_buffer_T content;
|
|
1011
|
+
hb_buffer_init(&content, 1024);
|
|
1022
1012
|
position_T start = parser->current_token->location.start;
|
|
1023
|
-
|
|
1013
|
+
hb_string_T expected_closing_tag = parser_get_foreign_content_closing_tag(parser->foreign_content_type);
|
|
1024
1014
|
|
|
1025
|
-
if (expected_closing_tag
|
|
1015
|
+
if (hb_string_is_empty(expected_closing_tag)) {
|
|
1026
1016
|
parser_exit_foreign_content(parser);
|
|
1027
|
-
|
|
1017
|
+
free(content.value);
|
|
1028
1018
|
|
|
1029
1019
|
return;
|
|
1030
1020
|
}
|
|
@@ -1034,7 +1024,7 @@ static void parser_parse_foreign_content(parser_T* parser, array_T* children, ar
|
|
|
1034
1024
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
1035
1025
|
|
|
1036
1026
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
1037
|
-
|
|
1027
|
+
hb_array_append(children, erb_node);
|
|
1038
1028
|
|
|
1039
1029
|
start = parser->current_token->location.start;
|
|
1040
1030
|
|
|
@@ -1048,7 +1038,8 @@ static void parser_parse_foreign_content(parser_T* parser, array_T* children, ar
|
|
|
1048
1038
|
bool is_potential_match = false;
|
|
1049
1039
|
|
|
1050
1040
|
if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) {
|
|
1051
|
-
is_potential_match =
|
|
1041
|
+
is_potential_match =
|
|
1042
|
+
parser_is_expected_closing_tag_name(hb_string(next_token->value), parser->foreign_content_type);
|
|
1052
1043
|
}
|
|
1053
1044
|
|
|
1054
1045
|
lexer_restore_state(parser->lexer, saved_state);
|
|
@@ -1059,51 +1050,57 @@ static void parser_parse_foreign_content(parser_T* parser, array_T* children, ar
|
|
|
1059
1050
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
1060
1051
|
parser_exit_foreign_content(parser);
|
|
1061
1052
|
|
|
1062
|
-
|
|
1053
|
+
free(content.value);
|
|
1063
1054
|
|
|
1064
1055
|
return;
|
|
1065
1056
|
}
|
|
1066
1057
|
}
|
|
1067
1058
|
|
|
1068
1059
|
token_T* token = parser_advance(parser);
|
|
1069
|
-
|
|
1060
|
+
hb_buffer_append(&content, token->value);
|
|
1070
1061
|
token_free(token);
|
|
1071
1062
|
}
|
|
1072
1063
|
|
|
1073
1064
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
1074
1065
|
parser_exit_foreign_content(parser);
|
|
1075
|
-
|
|
1066
|
+
free(content.value);
|
|
1076
1067
|
}
|
|
1077
1068
|
|
|
1078
|
-
static void parser_parse_in_data_state(parser_T* parser,
|
|
1079
|
-
while (
|
|
1069
|
+
static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
|
|
1070
|
+
while (token_is_not(parser, TOKEN_EOF)) {
|
|
1071
|
+
|
|
1080
1072
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
1081
|
-
|
|
1073
|
+
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
1082
1074
|
continue;
|
|
1083
1075
|
}
|
|
1084
1076
|
|
|
1085
1077
|
if (token_is(parser, TOKEN_HTML_DOCTYPE)) {
|
|
1086
|
-
|
|
1078
|
+
hb_array_append(children, parser_parse_html_doctype(parser));
|
|
1087
1079
|
continue;
|
|
1088
1080
|
}
|
|
1089
1081
|
|
|
1090
1082
|
if (token_is(parser, TOKEN_XML_DECLARATION)) {
|
|
1091
|
-
|
|
1083
|
+
hb_array_append(children, parser_parse_xml_declaration(parser));
|
|
1092
1084
|
continue;
|
|
1093
1085
|
}
|
|
1094
1086
|
|
|
1095
1087
|
if (token_is(parser, TOKEN_CDATA_START)) {
|
|
1096
|
-
|
|
1088
|
+
hb_array_append(children, parser_parse_cdata(parser));
|
|
1097
1089
|
continue;
|
|
1098
1090
|
}
|
|
1099
1091
|
|
|
1100
1092
|
if (token_is(parser, TOKEN_HTML_COMMENT_START)) {
|
|
1101
|
-
|
|
1093
|
+
hb_array_append(children, parser_parse_html_comment(parser));
|
|
1102
1094
|
continue;
|
|
1103
1095
|
}
|
|
1104
1096
|
|
|
1105
1097
|
if (token_is(parser, TOKEN_HTML_TAG_START)) {
|
|
1106
|
-
|
|
1098
|
+
hb_array_append(children, parser_parse_html_element(parser));
|
|
1099
|
+
continue;
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
if (token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1103
|
+
hb_array_append(children, parser_parse_html_close_tag(parser));
|
|
1107
1104
|
continue;
|
|
1108
1105
|
}
|
|
1109
1106
|
|
|
@@ -1111,6 +1108,7 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
|
|
|
1111
1108
|
parser,
|
|
1112
1109
|
TOKEN_AMPERSAND,
|
|
1113
1110
|
TOKEN_AT,
|
|
1111
|
+
TOKEN_BACKSLASH,
|
|
1114
1112
|
TOKEN_BACKTICK,
|
|
1115
1113
|
TOKEN_CHARACTER,
|
|
1116
1114
|
TOKEN_COLON,
|
|
@@ -1127,7 +1125,7 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
|
|
|
1127
1125
|
TOKEN_UNDERSCORE,
|
|
1128
1126
|
TOKEN_WHITESPACE
|
|
1129
1127
|
)) {
|
|
1130
|
-
|
|
1128
|
+
hb_array_append(children, parser_parse_text_content(parser, errors));
|
|
1131
1129
|
continue;
|
|
1132
1130
|
}
|
|
1133
1131
|
|
|
@@ -1135,63 +1133,121 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
|
|
|
1135
1133
|
parser,
|
|
1136
1134
|
"Unexpected token",
|
|
1137
1135
|
"TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
|
|
1138
|
-
"TOKEN_NBSP, TOKEN_AT, or TOKEN_NEWLINE",
|
|
1136
|
+
"TOKEN_NBSP, TOKEN_AT, TOKEN_BACKSLASH, or TOKEN_NEWLINE",
|
|
1139
1137
|
errors
|
|
1140
1138
|
);
|
|
1141
1139
|
}
|
|
1142
1140
|
}
|
|
1143
1141
|
|
|
1144
|
-
static
|
|
1145
|
-
|
|
1146
|
-
token_T* unclosed_tag = parser_pop_open_tag(parser);
|
|
1142
|
+
static size_t find_matching_close_tag(hb_array_T* nodes, size_t start_idx, hb_string_T tag_name) {
|
|
1143
|
+
int depth = 0;
|
|
1147
1144
|
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1145
|
+
for (size_t i = start_idx + 1; i < hb_array_size(nodes); i++) {
|
|
1146
|
+
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
|
|
1147
|
+
if (node == NULL) { continue; }
|
|
1148
|
+
|
|
1149
|
+
if (node->type == AST_HTML_OPEN_TAG_NODE) {
|
|
1150
|
+
AST_HTML_OPEN_TAG_NODE_T* open = (AST_HTML_OPEN_TAG_NODE_T*) node;
|
|
1154
1151
|
|
|
1155
|
-
|
|
1152
|
+
if (hb_string_equals(hb_string(open->tag_name->value), tag_name)) { depth++; }
|
|
1153
|
+
} else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
|
|
1154
|
+
AST_HTML_CLOSE_TAG_NODE_T* close = (AST_HTML_CLOSE_TAG_NODE_T*) node;
|
|
1155
|
+
|
|
1156
|
+
if (hb_string_equals(hb_string(close->tag_name->value), tag_name)) {
|
|
1157
|
+
if (depth == 0) { return i; }
|
|
1158
|
+
depth--;
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1156
1161
|
}
|
|
1162
|
+
|
|
1163
|
+
return (size_t) -1;
|
|
1157
1164
|
}
|
|
1158
1165
|
|
|
1159
|
-
static
|
|
1160
|
-
while (token_is_not(parser, TOKEN_EOF)) {
|
|
1161
|
-
if (token_is_not(parser, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1162
|
-
parser_append_unexpected_token_error(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
|
|
1166
|
+
static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors);
|
|
1163
1167
|
|
|
1164
|
-
|
|
1165
|
-
|
|
1168
|
+
static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors) {
|
|
1169
|
+
hb_array_T* result = hb_array_init(hb_array_size(nodes));
|
|
1166
1170
|
|
|
1167
|
-
|
|
1168
|
-
|
|
1171
|
+
for (size_t index = 0; index < hb_array_size(nodes); index++) {
|
|
1172
|
+
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, index);
|
|
1173
|
+
if (node == NULL) { continue; }
|
|
1169
1174
|
|
|
1170
|
-
|
|
1175
|
+
if (node->type == AST_HTML_OPEN_TAG_NODE) {
|
|
1176
|
+
AST_HTML_OPEN_TAG_NODE_T* open_tag = (AST_HTML_OPEN_TAG_NODE_T*) node;
|
|
1177
|
+
hb_string_T tag_name = hb_string(open_tag->tag_name->value);
|
|
1171
1178
|
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1179
|
+
size_t close_index = find_matching_close_tag(nodes, index, tag_name);
|
|
1180
|
+
|
|
1181
|
+
if (close_index == (size_t) -1) {
|
|
1182
|
+
if (hb_array_size(open_tag->base.errors) == 0) {
|
|
1183
|
+
append_missing_closing_tag_error(
|
|
1184
|
+
open_tag->tag_name,
|
|
1185
|
+
open_tag->base.location.start,
|
|
1186
|
+
open_tag->base.location.end,
|
|
1187
|
+
open_tag->base.errors
|
|
1188
|
+
);
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
hb_array_append(result, node);
|
|
1192
|
+
} else {
|
|
1193
|
+
AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) hb_array_get(nodes, close_index);
|
|
1180
1194
|
|
|
1181
|
-
|
|
1195
|
+
hb_array_T* body = hb_array_init(close_index - index - 1);
|
|
1182
1196
|
|
|
1183
|
-
|
|
1197
|
+
for (size_t j = index + 1; j < close_index; j++) {
|
|
1198
|
+
hb_array_append(body, hb_array_get(nodes, j));
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
hb_array_T* processed_body = parser_build_elements_from_tags(body, errors);
|
|
1202
|
+
hb_array_free(&body);
|
|
1203
|
+
|
|
1204
|
+
hb_array_T* element_errors = hb_array_init(8);
|
|
1205
|
+
|
|
1206
|
+
AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init(
|
|
1207
|
+
open_tag,
|
|
1208
|
+
open_tag->tag_name,
|
|
1209
|
+
processed_body,
|
|
1210
|
+
close_tag,
|
|
1211
|
+
false,
|
|
1212
|
+
ELEMENT_SOURCE_HTML,
|
|
1213
|
+
open_tag->base.location.start,
|
|
1214
|
+
close_tag->base.location.end,
|
|
1215
|
+
element_errors
|
|
1216
|
+
);
|
|
1217
|
+
|
|
1218
|
+
hb_array_append(result, element);
|
|
1219
|
+
|
|
1220
|
+
index = close_index;
|
|
1221
|
+
}
|
|
1222
|
+
} else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
|
|
1223
|
+
AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) node;
|
|
1224
|
+
|
|
1225
|
+
if (!is_void_element(hb_string(close_tag->tag_name->value))) {
|
|
1226
|
+
if (hb_array_size(close_tag->base.errors) == 0) {
|
|
1227
|
+
append_missing_opening_tag_error(
|
|
1228
|
+
close_tag->tag_name,
|
|
1229
|
+
close_tag->base.location.start,
|
|
1230
|
+
close_tag->base.location.end,
|
|
1231
|
+
close_tag->base.errors
|
|
1232
|
+
);
|
|
1233
|
+
}
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
hb_array_append(result, node);
|
|
1237
|
+
} else {
|
|
1238
|
+
hb_array_append(result, node);
|
|
1239
|
+
}
|
|
1184
1240
|
}
|
|
1241
|
+
|
|
1242
|
+
return result;
|
|
1185
1243
|
}
|
|
1186
1244
|
|
|
1187
1245
|
static AST_DOCUMENT_NODE_T* parser_parse_document(parser_T* parser) {
|
|
1188
|
-
|
|
1189
|
-
|
|
1246
|
+
hb_array_T* children = hb_array_init(8);
|
|
1247
|
+
hb_array_T* errors = hb_array_init(8);
|
|
1190
1248
|
position_T start = parser->current_token->location.start;
|
|
1191
1249
|
|
|
1192
1250
|
parser_parse_in_data_state(parser, children, errors);
|
|
1193
|
-
parser_parse_unclosed_html_tags(parser, errors);
|
|
1194
|
-
parser_parse_stray_closing_tags(parser, children, errors);
|
|
1195
1251
|
|
|
1196
1252
|
token_T* eof = parser_consume_expected(parser, TOKEN_EOF, errors);
|
|
1197
1253
|
|
|
@@ -1206,26 +1262,26 @@ AST_DOCUMENT_NODE_T* herb_parser_parse(parser_T* parser) {
|
|
|
1206
1262
|
return parser_parse_document(parser);
|
|
1207
1263
|
}
|
|
1208
1264
|
|
|
1209
|
-
static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token,
|
|
1210
|
-
if (parser->options
|
|
1211
|
-
|
|
1265
|
+
static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, hb_array_T* children) {
|
|
1266
|
+
if (parser->options.track_whitespace) {
|
|
1267
|
+
hb_array_T* errors = hb_array_init(8);
|
|
1212
1268
|
AST_WHITESPACE_NODE_T* whitespace_node = ast_whitespace_node_init(
|
|
1213
1269
|
whitespace_token,
|
|
1214
1270
|
whitespace_token->location.start,
|
|
1215
1271
|
whitespace_token->location.end,
|
|
1216
1272
|
errors
|
|
1217
1273
|
);
|
|
1218
|
-
|
|
1274
|
+
hb_array_append(children, whitespace_node);
|
|
1219
1275
|
}
|
|
1220
1276
|
|
|
1221
1277
|
token_free(whitespace_token);
|
|
1222
1278
|
}
|
|
1223
1279
|
|
|
1224
|
-
static void parser_consume_whitespace(parser_T* parser,
|
|
1280
|
+
static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
|
|
1225
1281
|
while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
|
1226
1282
|
token_T* whitespace = parser_advance(parser);
|
|
1227
1283
|
|
|
1228
|
-
if (parser->options
|
|
1284
|
+
if (parser->options.track_whitespace && children != NULL) {
|
|
1229
1285
|
parser_handle_whitespace(parser, whitespace, children);
|
|
1230
1286
|
} else {
|
|
1231
1287
|
token_free(whitespace);
|
|
@@ -1233,13 +1289,38 @@ static void parser_consume_whitespace(parser_T* parser, array_T* children) {
|
|
|
1233
1289
|
}
|
|
1234
1290
|
}
|
|
1235
1291
|
|
|
1236
|
-
void
|
|
1292
|
+
void herb_parser_deinit(parser_T* parser) {
|
|
1237
1293
|
if (parser == NULL) { return; }
|
|
1238
1294
|
|
|
1239
|
-
if (parser->lexer != NULL) { lexer_free(parser->lexer); }
|
|
1240
1295
|
if (parser->current_token != NULL) { token_free(parser->current_token); }
|
|
1241
|
-
if (parser->open_tags_stack != NULL) {
|
|
1242
|
-
|
|
1296
|
+
if (parser->open_tags_stack != NULL) { hb_array_free(&parser->open_tags_stack); }
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors) {
|
|
1300
|
+
if (nodes == NULL || hb_array_size(nodes) == 0) { return; }
|
|
1301
|
+
|
|
1302
|
+
hb_array_T* processed = parser_build_elements_from_tags(nodes, errors);
|
|
1303
|
+
|
|
1304
|
+
while (hb_array_size(nodes) > 0) {
|
|
1305
|
+
hb_array_remove(nodes, 0);
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
for (size_t i = 0; i < hb_array_size(processed); i++) {
|
|
1309
|
+
hb_array_append(nodes, hb_array_get(processed, i));
|
|
1310
|
+
}
|
|
1311
|
+
|
|
1312
|
+
hb_array_free(&processed);
|
|
1313
|
+
|
|
1314
|
+
for (size_t i = 0; i < hb_array_size(nodes); i++) {
|
|
1315
|
+
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
|
|
1316
|
+
if (node == NULL) { continue; }
|
|
1317
|
+
|
|
1318
|
+
herb_visit_node(node, match_tags_visitor, errors);
|
|
1319
|
+
}
|
|
1320
|
+
}
|
|
1321
|
+
|
|
1322
|
+
void herb_parser_match_html_tags_post_analyze(AST_DOCUMENT_NODE_T* document) {
|
|
1323
|
+
if (document == NULL) { return; }
|
|
1243
1324
|
|
|
1244
|
-
|
|
1325
|
+
match_tags_in_node_array(document->children, document->base.errors);
|
|
1245
1326
|
}
|