@herb-tools/node 0.8.10 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/binding.gyp +26 -8
- package/dist/herb-node.cjs +41 -12
- package/dist/herb-node.cjs.map +1 -1
- package/dist/herb-node.esm.js +8 -1
- package/dist/herb-node.esm.js.map +1 -1
- package/dist/types/node-backend.d.ts +3 -1
- package/extension/error_helpers.cpp +395 -73
- package/extension/error_helpers.h +13 -3
- package/extension/extension_helpers.cpp +38 -35
- package/extension/extension_helpers.h +2 -2
- package/extension/herb.cpp +183 -64
- package/extension/libherb/analyze/action_view/attribute_extraction_helpers.c +290 -0
- package/extension/libherb/analyze/action_view/attribute_extraction_helpers.h +36 -0
- package/extension/libherb/analyze/action_view/content_tag.c +70 -0
- package/extension/libherb/analyze/action_view/link_to.c +143 -0
- package/extension/libherb/analyze/action_view/registry.c +60 -0
- package/extension/libherb/analyze/action_view/tag.c +64 -0
- package/extension/libherb/analyze/action_view/tag_helper_handler.h +41 -0
- package/extension/libherb/analyze/action_view/tag_helper_node_builders.c +305 -0
- package/extension/libherb/analyze/action_view/tag_helper_node_builders.h +70 -0
- package/extension/libherb/analyze/action_view/tag_helpers.c +748 -0
- package/extension/libherb/analyze/action_view/tag_helpers.h +38 -0
- package/extension/libherb/analyze/action_view/turbo_frame_tag.c +88 -0
- package/extension/libherb/analyze/analyze.c +882 -0
- package/extension/libherb/{include → analyze}/analyze.h +14 -4
- package/extension/libherb/{analyzed_ruby.c → analyze/analyzed_ruby.c} +13 -11
- package/extension/libherb/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
- package/extension/libherb/analyze/builders.c +343 -0
- package/extension/libherb/analyze/builders.h +27 -0
- package/extension/libherb/analyze/conditional_elements.c +594 -0
- package/extension/libherb/analyze/conditional_elements.h +9 -0
- package/extension/libherb/analyze/conditional_open_tags.c +640 -0
- package/extension/libherb/analyze/conditional_open_tags.h +9 -0
- package/extension/libherb/analyze/control_type.c +250 -0
- package/extension/libherb/analyze/control_type.h +14 -0
- package/extension/libherb/{analyze_helpers.c → analyze/helpers.c} +48 -23
- package/extension/libherb/{analyze_helpers.h → analyze/helpers.h} +4 -2
- package/extension/libherb/analyze/invalid_structures.c +193 -0
- package/extension/libherb/analyze/invalid_structures.h +11 -0
- package/extension/libherb/{analyze_missing_end.c → analyze/missing_end.c} +33 -22
- package/extension/libherb/analyze/parse_errors.c +84 -0
- package/extension/libherb/analyze/prism_annotate.c +397 -0
- package/extension/libherb/analyze/prism_annotate.h +16 -0
- package/extension/libherb/{analyze_transform.c → analyze/transform.c} +17 -3
- package/extension/libherb/ast_node.c +17 -7
- package/extension/libherb/ast_node.h +11 -5
- package/extension/libherb/ast_nodes.c +663 -388
- package/extension/libherb/ast_nodes.h +118 -39
- package/extension/libherb/ast_pretty_print.c +191 -7
- package/extension/libherb/ast_pretty_print.h +6 -1
- package/extension/libherb/element_source.h +3 -8
- package/extension/libherb/errors.c +1077 -521
- package/extension/libherb/errors.h +149 -56
- package/extension/libherb/extract.c +145 -49
- package/extension/libherb/extract.h +21 -5
- package/extension/libherb/herb.c +52 -34
- package/extension/libherb/herb.h +18 -6
- package/extension/libherb/herb_prism_node.h +13 -0
- package/extension/libherb/html_util.c +241 -12
- package/extension/libherb/html_util.h +7 -2
- package/extension/libherb/include/analyze/action_view/attribute_extraction_helpers.h +36 -0
- package/extension/libherb/include/analyze/action_view/tag_helper_handler.h +41 -0
- package/extension/libherb/include/analyze/action_view/tag_helper_node_builders.h +70 -0
- package/extension/libherb/include/analyze/action_view/tag_helpers.h +38 -0
- package/extension/libherb/{analyze.h → include/analyze/analyze.h} +14 -4
- package/extension/libherb/include/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
- package/extension/libherb/include/analyze/builders.h +27 -0
- package/extension/libherb/include/analyze/conditional_elements.h +9 -0
- package/extension/libherb/include/analyze/conditional_open_tags.h +9 -0
- package/extension/libherb/include/analyze/control_type.h +14 -0
- package/extension/libherb/include/{analyze_helpers.h → analyze/helpers.h} +4 -2
- package/extension/libherb/include/analyze/invalid_structures.h +11 -0
- package/extension/libherb/include/analyze/prism_annotate.h +16 -0
- package/extension/libherb/include/ast_node.h +11 -5
- package/extension/libherb/include/ast_nodes.h +118 -39
- package/extension/libherb/include/ast_pretty_print.h +6 -1
- package/extension/libherb/include/element_source.h +3 -8
- package/extension/libherb/include/errors.h +149 -56
- package/extension/libherb/include/extract.h +21 -5
- package/extension/libherb/include/herb.h +18 -6
- package/extension/libherb/include/herb_prism_node.h +13 -0
- package/extension/libherb/include/html_util.h +7 -2
- package/extension/libherb/include/io.h +3 -1
- package/extension/libherb/include/lex_helpers.h +29 -0
- package/extension/libherb/include/lexer.h +1 -1
- package/extension/libherb/include/lexer_peek_helpers.h +87 -13
- package/extension/libherb/include/lexer_struct.h +2 -0
- package/extension/libherb/include/location.h +2 -1
- package/extension/libherb/include/parser.h +27 -2
- package/extension/libherb/include/parser_helpers.h +19 -3
- package/extension/libherb/include/pretty_print.h +10 -5
- package/extension/libherb/include/prism_context.h +45 -0
- package/extension/libherb/include/prism_helpers.h +10 -7
- package/extension/libherb/include/prism_serialized.h +12 -0
- package/extension/libherb/include/token.h +16 -4
- package/extension/libherb/include/token_struct.h +10 -3
- package/extension/libherb/include/utf8.h +2 -1
- package/extension/libherb/include/util/hb_allocator.h +78 -0
- package/extension/libherb/include/util/hb_arena.h +6 -1
- package/extension/libherb/include/util/hb_arena_debug.h +12 -1
- package/extension/libherb/include/util/hb_array.h +7 -3
- package/extension/libherb/include/util/hb_buffer.h +6 -4
- package/extension/libherb/include/util/hb_foreach.h +79 -0
- package/extension/libherb/include/util/hb_narray.h +8 -4
- package/extension/libherb/include/util/hb_string.h +56 -9
- package/extension/libherb/include/util.h +6 -3
- package/extension/libherb/include/version.h +1 -1
- package/extension/libherb/io.c +3 -2
- package/extension/libherb/io.h +3 -1
- package/extension/libherb/lex_helpers.h +29 -0
- package/extension/libherb/lexer.c +42 -30
- package/extension/libherb/lexer.h +1 -1
- package/extension/libherb/lexer_peek_helpers.c +12 -74
- package/extension/libherb/lexer_peek_helpers.h +87 -13
- package/extension/libherb/lexer_struct.h +2 -0
- package/extension/libherb/location.c +2 -2
- package/extension/libherb/location.h +2 -1
- package/extension/libherb/main.c +53 -28
- package/extension/libherb/parser.c +783 -247
- package/extension/libherb/parser.h +27 -2
- package/extension/libherb/parser_helpers.c +110 -23
- package/extension/libherb/parser_helpers.h +19 -3
- package/extension/libherb/parser_match_tags.c +110 -49
- package/extension/libherb/pretty_print.c +29 -24
- package/extension/libherb/pretty_print.h +10 -5
- package/extension/libherb/prism_context.h +45 -0
- package/extension/libherb/prism_helpers.c +30 -27
- package/extension/libherb/prism_helpers.h +10 -7
- package/extension/libherb/prism_serialized.h +12 -0
- package/extension/libherb/ruby_parser.c +2 -0
- package/extension/libherb/token.c +151 -66
- package/extension/libherb/token.h +16 -4
- package/extension/libherb/token_matchers.c +0 -1
- package/extension/libherb/token_struct.h +10 -3
- package/extension/libherb/utf8.c +7 -6
- package/extension/libherb/utf8.h +2 -1
- package/extension/libherb/util/hb_allocator.c +341 -0
- package/extension/libherb/util/hb_allocator.h +78 -0
- package/extension/libherb/util/hb_arena.c +81 -56
- package/extension/libherb/util/hb_arena.h +6 -1
- package/extension/libherb/util/hb_arena_debug.c +32 -17
- package/extension/libherb/util/hb_arena_debug.h +12 -1
- package/extension/libherb/util/hb_array.c +30 -15
- package/extension/libherb/util/hb_array.h +7 -3
- package/extension/libherb/util/hb_buffer.c +17 -21
- package/extension/libherb/util/hb_buffer.h +6 -4
- package/extension/libherb/util/hb_foreach.h +79 -0
- package/extension/libherb/util/hb_narray.c +22 -7
- package/extension/libherb/util/hb_narray.h +8 -4
- package/extension/libherb/util/hb_string.c +49 -35
- package/extension/libherb/util/hb_string.h +56 -9
- package/extension/libherb/util.c +21 -11
- package/extension/libherb/util.h +6 -3
- package/extension/libherb/version.h +1 -1
- package/extension/libherb/visitor.c +48 -1
- package/extension/nodes.cpp +451 -6
- package/extension/nodes.h +8 -1
- package/package.json +12 -8
- package/src/node-backend.ts +11 -1
- package/dist/types/index-cjs.d.cts +0 -1
- package/extension/libherb/analyze.c +0 -1608
- package/extension/libherb/element_source.c +0 -12
- package/extension/libherb/include/util/hb_system.h +0 -9
- package/extension/libherb/util/hb_system.c +0 -30
- package/extension/libherb/util/hb_system.h +0 -9
- package/src/index-cjs.cts +0 -22
- /package/dist/types/{index-esm.d.mts → index.d.ts} +0 -0
- /package/src/{index-esm.mts → index.ts} +0 -0
|
@@ -20,6 +20,8 @@
|
|
|
20
20
|
#include <string.h>
|
|
21
21
|
#include <strings.h>
|
|
22
22
|
|
|
23
|
+
#define MAX_CONSECUTIVE_ERRORS 10
|
|
24
|
+
|
|
23
25
|
static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors);
|
|
24
26
|
static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors);
|
|
25
27
|
static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
|
|
@@ -27,29 +29,39 @@ static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token
|
|
|
27
29
|
static void parser_consume_whitespace(parser_T* parser, hb_array_T* children);
|
|
28
30
|
static void parser_skip_erb_content(lexer_T* lexer);
|
|
29
31
|
static bool parser_lookahead_erb_is_attribute(lexer_T* lexer);
|
|
32
|
+
static bool parser_lookahead_erb_is_control_flow(parser_T* parser);
|
|
30
33
|
static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children);
|
|
31
34
|
static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* children);
|
|
32
35
|
|
|
33
|
-
const parser_options_T HERB_DEFAULT_PARSER_OPTIONS = { .track_whitespace = false
|
|
36
|
+
const parser_options_T HERB_DEFAULT_PARSER_OPTIONS = { .track_whitespace = false,
|
|
37
|
+
.analyze = true,
|
|
38
|
+
.strict = true,
|
|
39
|
+
.action_view_helpers = false,
|
|
40
|
+
.prism_nodes_deep = false,
|
|
41
|
+
.prism_nodes = false,
|
|
42
|
+
.prism_program = false };
|
|
34
43
|
|
|
35
44
|
size_t parser_sizeof(void) {
|
|
36
45
|
return sizeof(struct PARSER_STRUCT);
|
|
37
46
|
}
|
|
38
47
|
|
|
39
48
|
void herb_parser_init(parser_T* parser, lexer_T* lexer, parser_options_T options) {
|
|
49
|
+
parser->allocator = lexer->allocator;
|
|
40
50
|
parser->lexer = lexer;
|
|
41
51
|
parser->current_token = lexer_next_token(lexer);
|
|
42
|
-
parser->open_tags_stack = hb_array_init(16);
|
|
52
|
+
parser->open_tags_stack = hb_array_init(16, parser->allocator);
|
|
43
53
|
parser->state = PARSER_STATE_DATA;
|
|
44
54
|
parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
|
|
45
55
|
parser->options = options;
|
|
56
|
+
parser->consecutive_error_count = 0;
|
|
57
|
+
parser->in_recovery_mode = false;
|
|
46
58
|
}
|
|
47
59
|
|
|
48
60
|
static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
49
|
-
hb_array_T* errors = hb_array_init(8);
|
|
50
|
-
hb_array_T* children = hb_array_init(8);
|
|
61
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
62
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
51
63
|
hb_buffer_T content;
|
|
52
|
-
hb_buffer_init(&content, 128);
|
|
64
|
+
hb_buffer_init(&content, 128, parser->allocator);
|
|
53
65
|
|
|
54
66
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_CDATA_START, errors);
|
|
55
67
|
position_T start = parser->current_token->location.start;
|
|
@@ -64,8 +76,8 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
|
64
76
|
}
|
|
65
77
|
|
|
66
78
|
token_T* token = parser_advance(parser);
|
|
67
|
-
|
|
68
|
-
token_free(token);
|
|
79
|
+
hb_buffer_append_string(&content, token->value);
|
|
80
|
+
token_free(token, parser->allocator);
|
|
69
81
|
}
|
|
70
82
|
|
|
71
83
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
@@ -77,26 +89,27 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
|
77
89
|
tag_closing,
|
|
78
90
|
tag_opening->location.start,
|
|
79
91
|
tag_closing->location.end,
|
|
80
|
-
errors
|
|
92
|
+
errors,
|
|
93
|
+
parser->allocator
|
|
81
94
|
);
|
|
82
95
|
|
|
83
|
-
|
|
84
|
-
token_free(tag_opening);
|
|
85
|
-
token_free(tag_closing);
|
|
96
|
+
hb_buffer_free(&content);
|
|
97
|
+
token_free(tag_opening, parser->allocator);
|
|
98
|
+
token_free(tag_closing, parser->allocator);
|
|
86
99
|
|
|
87
100
|
return cdata;
|
|
88
101
|
}
|
|
89
102
|
|
|
90
103
|
static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
91
|
-
hb_array_T* errors = hb_array_init(8);
|
|
92
|
-
hb_array_T* children = hb_array_init(8);
|
|
104
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
105
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
93
106
|
token_T* comment_start = parser_consume_expected(parser, TOKEN_HTML_COMMENT_START, errors);
|
|
94
107
|
position_T start = parser->current_token->location.start;
|
|
95
108
|
|
|
96
109
|
hb_buffer_T comment;
|
|
97
|
-
hb_buffer_init(&comment, 512);
|
|
110
|
+
hb_buffer_init(&comment, 512, parser->allocator);
|
|
98
111
|
|
|
99
|
-
while (token_is_none_of(parser, TOKEN_HTML_COMMENT_END, TOKEN_EOF)) {
|
|
112
|
+
while (token_is_none_of(parser, TOKEN_HTML_COMMENT_END, TOKEN_HTML_COMMENT_INVALID_END, TOKEN_EOF)) {
|
|
100
113
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
101
114
|
parser_append_literal_node_from_buffer(parser, &comment, children, start);
|
|
102
115
|
|
|
@@ -109,13 +122,26 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
|
109
122
|
}
|
|
110
123
|
|
|
111
124
|
token_T* token = parser_advance(parser);
|
|
112
|
-
|
|
113
|
-
token_free(token);
|
|
125
|
+
hb_buffer_append_string(&comment, token->value);
|
|
126
|
+
token_free(token, parser->allocator);
|
|
114
127
|
}
|
|
115
128
|
|
|
116
129
|
parser_append_literal_node_from_buffer(parser, &comment, children, start);
|
|
117
130
|
|
|
118
|
-
token_T* comment_end =
|
|
131
|
+
token_T* comment_end = NULL;
|
|
132
|
+
|
|
133
|
+
if (token_is(parser, TOKEN_HTML_COMMENT_INVALID_END)) {
|
|
134
|
+
comment_end = parser_advance(parser);
|
|
135
|
+
append_invalid_comment_closing_tag_error(
|
|
136
|
+
comment_end,
|
|
137
|
+
comment_end->location.start,
|
|
138
|
+
comment_end->location.end,
|
|
139
|
+
parser->allocator,
|
|
140
|
+
errors
|
|
141
|
+
);
|
|
142
|
+
} else {
|
|
143
|
+
comment_end = parser_consume_expected(parser, TOKEN_HTML_COMMENT_END, errors);
|
|
144
|
+
}
|
|
119
145
|
|
|
120
146
|
AST_HTML_COMMENT_NODE_T* comment_node = ast_html_comment_node_init(
|
|
121
147
|
comment_start,
|
|
@@ -123,21 +149,22 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
|
123
149
|
comment_end,
|
|
124
150
|
comment_start->location.start,
|
|
125
151
|
comment_end->location.end,
|
|
126
|
-
errors
|
|
152
|
+
errors,
|
|
153
|
+
parser->allocator
|
|
127
154
|
);
|
|
128
155
|
|
|
129
|
-
|
|
130
|
-
token_free(comment_start);
|
|
131
|
-
token_free(comment_end);
|
|
156
|
+
hb_buffer_free(&comment);
|
|
157
|
+
token_free(comment_start, parser->allocator);
|
|
158
|
+
token_free(comment_end, parser->allocator);
|
|
132
159
|
|
|
133
160
|
return comment_node;
|
|
134
161
|
}
|
|
135
162
|
|
|
136
163
|
static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
137
|
-
hb_array_T* errors = hb_array_init(8);
|
|
138
|
-
hb_array_T* children = hb_array_init(8);
|
|
164
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
165
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
139
166
|
hb_buffer_T content;
|
|
140
|
-
hb_buffer_init(&content, 64);
|
|
167
|
+
hb_buffer_init(&content, 64, parser->allocator);
|
|
141
168
|
|
|
142
169
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_DOCTYPE, errors);
|
|
143
170
|
|
|
@@ -154,8 +181,8 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
|
154
181
|
}
|
|
155
182
|
|
|
156
183
|
token_T* token = parser_consume_expected(parser, parser->current_token->type, errors);
|
|
157
|
-
|
|
158
|
-
token_free(token);
|
|
184
|
+
hb_buffer_append_string(&content, token->value);
|
|
185
|
+
token_free(token, parser->allocator);
|
|
159
186
|
}
|
|
160
187
|
|
|
161
188
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
@@ -168,21 +195,22 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
|
168
195
|
tag_closing,
|
|
169
196
|
tag_opening->location.start,
|
|
170
197
|
tag_closing->location.end,
|
|
171
|
-
errors
|
|
198
|
+
errors,
|
|
199
|
+
parser->allocator
|
|
172
200
|
);
|
|
173
201
|
|
|
174
|
-
token_free(tag_opening);
|
|
175
|
-
token_free(tag_closing);
|
|
176
|
-
|
|
202
|
+
token_free(tag_opening, parser->allocator);
|
|
203
|
+
token_free(tag_closing, parser->allocator);
|
|
204
|
+
hb_buffer_free(&content);
|
|
177
205
|
|
|
178
206
|
return doctype;
|
|
179
207
|
}
|
|
180
208
|
|
|
181
209
|
static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser) {
|
|
182
|
-
hb_array_T* errors = hb_array_init(8);
|
|
183
|
-
hb_array_T* children = hb_array_init(8);
|
|
210
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
211
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
184
212
|
hb_buffer_T content;
|
|
185
|
-
hb_buffer_init(&content, 64);
|
|
213
|
+
hb_buffer_init(&content, 64, parser->allocator);
|
|
186
214
|
|
|
187
215
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_XML_DECLARATION, errors);
|
|
188
216
|
|
|
@@ -201,8 +229,8 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
|
|
|
201
229
|
}
|
|
202
230
|
|
|
203
231
|
token_T* token = parser_advance(parser);
|
|
204
|
-
|
|
205
|
-
token_free(token);
|
|
232
|
+
hb_buffer_append_string(&content, token->value);
|
|
233
|
+
token_free(token, parser->allocator);
|
|
206
234
|
}
|
|
207
235
|
|
|
208
236
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
@@ -215,12 +243,13 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
|
|
|
215
243
|
tag_closing,
|
|
216
244
|
tag_opening->location.start,
|
|
217
245
|
tag_closing->location.end,
|
|
218
|
-
errors
|
|
246
|
+
errors,
|
|
247
|
+
parser->allocator
|
|
219
248
|
);
|
|
220
249
|
|
|
221
|
-
token_free(tag_opening);
|
|
222
|
-
token_free(tag_closing);
|
|
223
|
-
|
|
250
|
+
token_free(tag_opening, parser->allocator);
|
|
251
|
+
token_free(tag_closing, parser->allocator);
|
|
252
|
+
hb_buffer_free(&content);
|
|
224
253
|
|
|
225
254
|
return xml_declaration;
|
|
226
255
|
}
|
|
@@ -229,7 +258,7 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra
|
|
|
229
258
|
position_T start = parser->current_token->location.start;
|
|
230
259
|
|
|
231
260
|
hb_buffer_T content;
|
|
232
|
-
hb_buffer_init(&content, 2048);
|
|
261
|
+
hb_buffer_init(&content, 2048, parser->allocator);
|
|
233
262
|
|
|
234
263
|
while (token_is_none_of(
|
|
235
264
|
parser,
|
|
@@ -241,49 +270,66 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra
|
|
|
241
270
|
TOKEN_EOF
|
|
242
271
|
)) {
|
|
243
272
|
if (token_is(parser, TOKEN_ERROR)) {
|
|
244
|
-
|
|
273
|
+
hb_buffer_free(&content);
|
|
245
274
|
|
|
246
|
-
|
|
247
|
-
append_unexpected_error(
|
|
248
|
-
"Token Error",
|
|
249
|
-
"not TOKEN_ERROR",
|
|
250
|
-
token->value,
|
|
251
|
-
token->location.start,
|
|
252
|
-
token->location.end,
|
|
253
|
-
document_errors
|
|
254
|
-
);
|
|
255
|
-
|
|
256
|
-
token_free(token);
|
|
275
|
+
parser_append_unexpected_error_string(parser, document_errors, "Token Error", "not an error token");
|
|
257
276
|
|
|
258
277
|
return NULL;
|
|
259
278
|
}
|
|
260
279
|
|
|
280
|
+
if (parser->options.strict && parser->current_token->type == TOKEN_PERCENT) {
|
|
281
|
+
lexer_T lexer_copy = *parser->lexer;
|
|
282
|
+
token_T* peek_token = lexer_next_token(&lexer_copy);
|
|
283
|
+
|
|
284
|
+
if (peek_token && peek_token->type == TOKEN_HTML_TAG_END) {
|
|
285
|
+
position_T stray_start = parser->current_token->location.start;
|
|
286
|
+
position_T stray_end = peek_token->location.end;
|
|
287
|
+
token_free(peek_token, parser->allocator);
|
|
288
|
+
|
|
289
|
+
append_stray_erb_closing_tag_error(stray_start, stray_end, parser->allocator, document_errors);
|
|
290
|
+
|
|
291
|
+
token_T* percent = parser_advance(parser);
|
|
292
|
+
hb_buffer_append_string(&content, percent->value);
|
|
293
|
+
token_free(percent, parser->allocator);
|
|
294
|
+
|
|
295
|
+
token_T* gt = parser_advance(parser);
|
|
296
|
+
hb_buffer_append_string(&content, gt->value);
|
|
297
|
+
token_free(gt, parser->allocator);
|
|
298
|
+
|
|
299
|
+
continue;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
token_free(peek_token, parser->allocator);
|
|
303
|
+
}
|
|
304
|
+
|
|
261
305
|
token_T* token = parser_advance(parser);
|
|
262
|
-
|
|
263
|
-
token_free(token);
|
|
306
|
+
hb_buffer_append_string(&content, token->value);
|
|
307
|
+
token_free(token, parser->allocator);
|
|
264
308
|
}
|
|
265
309
|
|
|
266
|
-
hb_array_T* errors = hb_array_init(8);
|
|
310
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
267
311
|
|
|
268
312
|
AST_HTML_TEXT_NODE_T* text_node = NULL;
|
|
269
313
|
|
|
270
314
|
if (hb_buffer_length(&content) > 0) {
|
|
315
|
+
hb_string_T text_content = { .data = content.value, .length = (uint32_t) content.length };
|
|
271
316
|
text_node =
|
|
272
|
-
ast_html_text_node_init(
|
|
317
|
+
ast_html_text_node_init(text_content, start, parser->current_token->location.start, errors, parser->allocator);
|
|
273
318
|
} else {
|
|
274
|
-
text_node =
|
|
319
|
+
text_node =
|
|
320
|
+
ast_html_text_node_init(HB_STRING_EMPTY, start, parser->current_token->location.start, errors, parser->allocator);
|
|
275
321
|
}
|
|
276
322
|
|
|
277
|
-
|
|
323
|
+
hb_buffer_free(&content);
|
|
278
324
|
|
|
279
325
|
return text_node;
|
|
280
326
|
}
|
|
281
327
|
|
|
282
328
|
static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
|
|
283
|
-
hb_array_T* errors = hb_array_init(8);
|
|
284
|
-
hb_array_T* children = hb_array_init(8);
|
|
329
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
330
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
285
331
|
hb_buffer_T buffer;
|
|
286
|
-
hb_buffer_init(&buffer, 128);
|
|
332
|
+
hb_buffer_init(&buffer, 128, parser->allocator);
|
|
287
333
|
position_T start = parser->current_token->location.start;
|
|
288
334
|
|
|
289
335
|
while (token_is_none_of(
|
|
@@ -296,6 +342,16 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
|
|
|
296
342
|
TOKEN_EOF
|
|
297
343
|
)) {
|
|
298
344
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
345
|
+
hb_string_T tag = parser->current_token->value;
|
|
346
|
+
bool is_output_tag = (tag.length >= 3 && tag.data[2] == '=');
|
|
347
|
+
|
|
348
|
+
if (!is_output_tag) {
|
|
349
|
+
bool is_control_flow = parser_lookahead_erb_is_control_flow(parser);
|
|
350
|
+
|
|
351
|
+
if (hb_buffer_is_empty(&buffer) && hb_array_size(children) == 0) { break; }
|
|
352
|
+
if (is_control_flow) { break; }
|
|
353
|
+
}
|
|
354
|
+
|
|
299
355
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
300
356
|
|
|
301
357
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
@@ -306,8 +362,8 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
|
|
|
306
362
|
}
|
|
307
363
|
|
|
308
364
|
token_T* token = parser_advance(parser);
|
|
309
|
-
|
|
310
|
-
token_free(token);
|
|
365
|
+
hb_buffer_append_string(&buffer, token->value);
|
|
366
|
+
token_free(token, parser->allocator);
|
|
311
367
|
}
|
|
312
368
|
|
|
313
369
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
@@ -327,9 +383,9 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
|
|
|
327
383
|
}
|
|
328
384
|
|
|
329
385
|
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
|
|
330
|
-
ast_html_attribute_name_node_init(children, node_start, node_end, errors);
|
|
386
|
+
ast_html_attribute_name_node_init(children, node_start, node_end, errors, parser->allocator);
|
|
331
387
|
|
|
332
|
-
|
|
388
|
+
hb_buffer_free(&buffer);
|
|
333
389
|
|
|
334
390
|
return attribute_name;
|
|
335
391
|
}
|
|
@@ -340,55 +396,137 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
340
396
|
hb_array_T* errors
|
|
341
397
|
) {
|
|
342
398
|
hb_buffer_T buffer;
|
|
343
|
-
hb_buffer_init(&buffer, 512);
|
|
399
|
+
hb_buffer_init(&buffer, 512, parser->allocator);
|
|
344
400
|
token_T* opening_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
|
|
345
401
|
position_T start = parser->current_token->location.start;
|
|
346
402
|
|
|
347
403
|
while (!token_is(parser, TOKEN_EOF)
|
|
348
404
|
&& !(
|
|
349
405
|
token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
|
|
350
|
-
&&
|
|
406
|
+
&& hb_string_equals(parser->current_token->value, opening_quote->value)
|
|
351
407
|
)) {
|
|
352
|
-
if (token_is(parser,
|
|
408
|
+
if (token_is(parser, TOKEN_HTML_TAG_END) || token_is(parser, TOKEN_HTML_TAG_SELF_CLOSE)) {
|
|
409
|
+
lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
|
|
410
|
+
bool found_closing_quote = false;
|
|
411
|
+
token_T* lookahead = lexer_next_token(parser->lexer);
|
|
412
|
+
|
|
413
|
+
while (lookahead && lookahead->type != TOKEN_EOF) {
|
|
414
|
+
if (lookahead->type == TOKEN_QUOTE && opening_quote != NULL
|
|
415
|
+
&& hb_string_equals(lookahead->value, opening_quote->value)) {
|
|
416
|
+
found_closing_quote = true;
|
|
417
|
+
token_free(lookahead, parser->allocator);
|
|
418
|
+
break;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
token_free(lookahead, parser->allocator);
|
|
422
|
+
|
|
423
|
+
lookahead = lexer_next_token(parser->lexer);
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
if (lookahead && !found_closing_quote && lookahead->type == TOKEN_EOF) {
|
|
427
|
+
token_free(lookahead, parser->allocator);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
lexer_restore_state(parser->lexer, saved_state);
|
|
431
|
+
|
|
432
|
+
if (found_closing_quote) {
|
|
433
|
+
hb_buffer_append_string(&buffer, parser->current_token->value);
|
|
434
|
+
token_free(parser->current_token, parser->allocator);
|
|
435
|
+
parser->current_token = lexer_next_token(parser->lexer);
|
|
436
|
+
continue;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
append_unclosed_quote_error(
|
|
440
|
+
opening_quote,
|
|
441
|
+
opening_quote->location.start,
|
|
442
|
+
parser->current_token->location.start,
|
|
443
|
+
parser->allocator,
|
|
444
|
+
errors
|
|
445
|
+
);
|
|
446
|
+
|
|
353
447
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
448
|
+
hb_buffer_free(&buffer);
|
|
354
449
|
|
|
355
|
-
|
|
450
|
+
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
|
|
451
|
+
opening_quote,
|
|
452
|
+
children,
|
|
453
|
+
NULL,
|
|
454
|
+
true,
|
|
455
|
+
opening_quote->location.start,
|
|
456
|
+
parser->current_token->location.start,
|
|
457
|
+
errors,
|
|
458
|
+
parser->allocator
|
|
459
|
+
);
|
|
356
460
|
|
|
357
|
-
|
|
461
|
+
token_free(opening_quote, parser->allocator);
|
|
358
462
|
|
|
359
|
-
|
|
463
|
+
return attribute_value;
|
|
360
464
|
}
|
|
361
465
|
|
|
362
|
-
|
|
466
|
+
bool buffer_ends_with_whitespace = buffer.length > 0 && is_whitespace(buffer.value[buffer.length - 1]);
|
|
467
|
+
|
|
468
|
+
if (token_is(parser, TOKEN_IDENTIFIER) && buffer_ends_with_whitespace) {
|
|
363
469
|
lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
|
|
470
|
+
token_T* equals_token = lexer_next_token(parser->lexer);
|
|
471
|
+
bool looks_like_new_attribute = false;
|
|
364
472
|
|
|
365
|
-
|
|
473
|
+
if (equals_token && equals_token->type == TOKEN_EQUALS) {
|
|
474
|
+
token_T* after_equals = lexer_next_token(parser->lexer);
|
|
475
|
+
looks_like_new_attribute = (after_equals && after_equals->type == TOKEN_QUOTE);
|
|
366
476
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
hb_buffer_append(&buffer, parser->current_token->value);
|
|
370
|
-
hb_buffer_append(&buffer, next_token->value);
|
|
477
|
+
if (after_equals) { token_free(after_equals, parser->allocator); }
|
|
478
|
+
}
|
|
371
479
|
|
|
372
|
-
|
|
373
|
-
|
|
480
|
+
if (equals_token) { token_free(equals_token, parser->allocator); }
|
|
481
|
+
lexer_restore_state(parser->lexer, saved_state);
|
|
374
482
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
483
|
+
if (looks_like_new_attribute) {
|
|
484
|
+
append_unclosed_quote_error(
|
|
485
|
+
opening_quote,
|
|
486
|
+
opening_quote->location.start,
|
|
487
|
+
parser->current_token->location.start,
|
|
488
|
+
parser->allocator,
|
|
489
|
+
errors
|
|
490
|
+
);
|
|
491
|
+
|
|
492
|
+
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
493
|
+
hb_buffer_free(&buffer);
|
|
494
|
+
|
|
495
|
+
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
|
|
496
|
+
opening_quote,
|
|
497
|
+
children,
|
|
498
|
+
NULL,
|
|
499
|
+
true,
|
|
500
|
+
opening_quote->location.start,
|
|
501
|
+
parser->current_token->location.start,
|
|
502
|
+
errors,
|
|
503
|
+
parser->allocator
|
|
504
|
+
);
|
|
505
|
+
|
|
506
|
+
token_free(opening_quote, parser->allocator);
|
|
379
507
|
|
|
380
|
-
|
|
508
|
+
return attribute_value;
|
|
381
509
|
}
|
|
382
510
|
}
|
|
383
511
|
|
|
384
|
-
|
|
385
|
-
|
|
512
|
+
if (token_is(parser, TOKEN_ERB_START)) {
|
|
513
|
+
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
514
|
+
|
|
515
|
+
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
516
|
+
|
|
517
|
+
start = parser->current_token->location.start;
|
|
518
|
+
|
|
519
|
+
continue;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
hb_buffer_append_string(&buffer, parser->current_token->value);
|
|
523
|
+
token_free(parser->current_token, parser->allocator);
|
|
386
524
|
|
|
387
525
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
388
526
|
}
|
|
389
527
|
|
|
390
528
|
if (token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
|
|
391
|
-
&&
|
|
529
|
+
&& hb_string_equals(parser->current_token->value, opening_quote->value)) {
|
|
392
530
|
lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
|
|
393
531
|
|
|
394
532
|
token_T* potential_closing = parser->current_token;
|
|
@@ -396,27 +534,28 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
396
534
|
|
|
397
535
|
if (token_is(parser, TOKEN_IDENTIFIER) || token_is(parser, TOKEN_CHARACTER)) {
|
|
398
536
|
append_unexpected_error(
|
|
399
|
-
"Unescaped quote character in attribute value",
|
|
400
|
-
"
|
|
537
|
+
hb_string("Unescaped quote character in attribute value"),
|
|
538
|
+
hb_string("HTML entity ('/") or different quote style"),
|
|
401
539
|
opening_quote->value,
|
|
402
540
|
potential_closing->location.start,
|
|
403
541
|
potential_closing->location.end,
|
|
542
|
+
parser->allocator,
|
|
404
543
|
errors
|
|
405
544
|
);
|
|
406
545
|
|
|
407
546
|
lexer_restore_state(parser->lexer, saved_state);
|
|
408
547
|
|
|
409
|
-
token_free(parser->current_token);
|
|
548
|
+
token_free(parser->current_token, parser->allocator);
|
|
410
549
|
parser->current_token = potential_closing;
|
|
411
550
|
|
|
412
|
-
|
|
413
|
-
token_free(parser->current_token);
|
|
551
|
+
hb_buffer_append_string(&buffer, parser->current_token->value);
|
|
552
|
+
token_free(parser->current_token, parser->allocator);
|
|
414
553
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
415
554
|
|
|
416
555
|
while (!token_is(parser, TOKEN_EOF)
|
|
417
556
|
&& !(
|
|
418
557
|
token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
|
|
419
|
-
&&
|
|
558
|
+
&& hb_string_equals(parser->current_token->value, opening_quote->value)
|
|
420
559
|
)) {
|
|
421
560
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
422
561
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
@@ -428,13 +567,13 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
428
567
|
continue;
|
|
429
568
|
}
|
|
430
569
|
|
|
431
|
-
|
|
432
|
-
token_free(parser->current_token);
|
|
570
|
+
hb_buffer_append_string(&buffer, parser->current_token->value);
|
|
571
|
+
token_free(parser->current_token, parser->allocator);
|
|
433
572
|
|
|
434
573
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
435
574
|
}
|
|
436
575
|
} else {
|
|
437
|
-
token_free(parser->current_token);
|
|
576
|
+
token_free(parser->current_token, parser->allocator);
|
|
438
577
|
parser->current_token = potential_closing;
|
|
439
578
|
|
|
440
579
|
lexer_restore_state(parser->lexer, saved_state);
|
|
@@ -442,20 +581,10 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
442
581
|
}
|
|
443
582
|
|
|
444
583
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
445
|
-
|
|
584
|
+
hb_buffer_free(&buffer);
|
|
446
585
|
|
|
447
586
|
token_T* closing_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
|
|
448
587
|
|
|
449
|
-
if (opening_quote != NULL && closing_quote != NULL && !string_equals(opening_quote->value, closing_quote->value)) {
|
|
450
|
-
append_quotes_mismatch_error(
|
|
451
|
-
opening_quote,
|
|
452
|
-
closing_quote,
|
|
453
|
-
closing_quote->location.start,
|
|
454
|
-
closing_quote->location.end,
|
|
455
|
-
errors
|
|
456
|
-
);
|
|
457
|
-
}
|
|
458
|
-
|
|
459
588
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
|
|
460
589
|
opening_quote,
|
|
461
590
|
children,
|
|
@@ -463,18 +592,19 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
463
592
|
true,
|
|
464
593
|
opening_quote->location.start,
|
|
465
594
|
closing_quote->location.end,
|
|
466
|
-
errors
|
|
595
|
+
errors,
|
|
596
|
+
parser->allocator
|
|
467
597
|
);
|
|
468
598
|
|
|
469
|
-
token_free(opening_quote);
|
|
470
|
-
token_free(closing_quote);
|
|
599
|
+
token_free(opening_quote, parser->allocator);
|
|
600
|
+
token_free(closing_quote, parser->allocator);
|
|
471
601
|
|
|
472
602
|
return attribute_value;
|
|
473
603
|
}
|
|
474
604
|
|
|
475
605
|
static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser_T* parser) {
|
|
476
|
-
hb_array_T* children = hb_array_init(8);
|
|
477
|
-
hb_array_T* errors = hb_array_init(8);
|
|
606
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
607
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
478
608
|
|
|
479
609
|
// <div id=<%= "home" %>>
|
|
480
610
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
@@ -488,15 +618,16 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
488
618
|
false,
|
|
489
619
|
erb_node->base.location.start,
|
|
490
620
|
erb_node->base.location.end,
|
|
491
|
-
errors
|
|
621
|
+
errors,
|
|
622
|
+
parser->allocator
|
|
492
623
|
);
|
|
493
624
|
}
|
|
494
625
|
|
|
495
626
|
// <div id=home>
|
|
496
627
|
if (token_is(parser, TOKEN_IDENTIFIER)) {
|
|
497
628
|
token_T* identifier = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
|
|
498
|
-
AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier);
|
|
499
|
-
token_free(identifier);
|
|
629
|
+
AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier, parser->allocator);
|
|
630
|
+
token_free(identifier, parser->allocator);
|
|
500
631
|
|
|
501
632
|
hb_array_append(children, literal);
|
|
502
633
|
|
|
@@ -507,7 +638,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
507
638
|
false,
|
|
508
639
|
literal->base.location.start,
|
|
509
640
|
literal->base.location.end,
|
|
510
|
-
errors
|
|
641
|
+
errors,
|
|
642
|
+
parser->allocator
|
|
511
643
|
);
|
|
512
644
|
}
|
|
513
645
|
|
|
@@ -520,31 +652,37 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
520
652
|
position_T end = token->location.end;
|
|
521
653
|
|
|
522
654
|
append_unexpected_error(
|
|
523
|
-
"Invalid quote character for HTML attribute",
|
|
524
|
-
"single quote (') or double quote (\")",
|
|
525
|
-
"backtick
|
|
655
|
+
hb_string("Invalid quote character for HTML attribute"),
|
|
656
|
+
hb_string("single quote (') or double quote (\")"),
|
|
657
|
+
hb_string("a backtick"),
|
|
526
658
|
start,
|
|
527
659
|
end,
|
|
660
|
+
parser->allocator,
|
|
528
661
|
errors
|
|
529
662
|
);
|
|
530
663
|
|
|
531
664
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* value =
|
|
532
|
-
ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors);
|
|
665
|
+
ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors, parser->allocator);
|
|
533
666
|
|
|
534
|
-
token_free(token);
|
|
667
|
+
token_free(token, parser->allocator);
|
|
535
668
|
|
|
536
669
|
return value;
|
|
537
670
|
}
|
|
538
671
|
|
|
672
|
+
char* expected = token_types_to_friendly_string(parser->allocator, TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START);
|
|
673
|
+
|
|
539
674
|
append_unexpected_error(
|
|
540
|
-
"Unexpected Token",
|
|
541
|
-
|
|
542
|
-
|
|
675
|
+
hb_string("Unexpected Token"),
|
|
676
|
+
hb_string(expected),
|
|
677
|
+
token_type_to_friendly_string(parser->current_token->type),
|
|
543
678
|
parser->current_token->location.start,
|
|
544
679
|
parser->current_token->location.end,
|
|
680
|
+
parser->allocator,
|
|
545
681
|
errors
|
|
546
682
|
);
|
|
547
683
|
|
|
684
|
+
hb_allocator_dealloc(parser->allocator, expected);
|
|
685
|
+
|
|
548
686
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* value = ast_html_attribute_value_node_init(
|
|
549
687
|
NULL,
|
|
550
688
|
children,
|
|
@@ -552,7 +690,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
552
690
|
false,
|
|
553
691
|
parser->current_token->location.start,
|
|
554
692
|
parser->current_token->location.end,
|
|
555
|
-
errors
|
|
693
|
+
errors,
|
|
694
|
+
parser->allocator
|
|
556
695
|
);
|
|
557
696
|
|
|
558
697
|
return value;
|
|
@@ -567,7 +706,7 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
567
706
|
|
|
568
707
|
if (has_equals) {
|
|
569
708
|
hb_buffer_T equals_buffer;
|
|
570
|
-
hb_buffer_init(&equals_buffer, 256);
|
|
709
|
+
hb_buffer_init(&equals_buffer, 256, parser->allocator);
|
|
571
710
|
position_T equals_start = { 0 };
|
|
572
711
|
position_T equals_end = { 0 };
|
|
573
712
|
uint32_t range_start = 0;
|
|
@@ -583,8 +722,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
583
722
|
range_start = whitespace->range.from;
|
|
584
723
|
}
|
|
585
724
|
|
|
586
|
-
|
|
587
|
-
token_free(whitespace);
|
|
725
|
+
hb_buffer_append_string(&equals_buffer, whitespace->value);
|
|
726
|
+
token_free(whitespace, parser->allocator);
|
|
588
727
|
}
|
|
589
728
|
|
|
590
729
|
token_T* equals = parser_advance(parser);
|
|
@@ -595,27 +734,45 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
595
734
|
range_start = equals->range.from;
|
|
596
735
|
}
|
|
597
736
|
|
|
598
|
-
|
|
737
|
+
hb_buffer_append_string(&equals_buffer, equals->value);
|
|
599
738
|
equals_end = equals->location.end;
|
|
600
739
|
range_end = equals->range.to;
|
|
601
|
-
token_free(equals);
|
|
740
|
+
token_free(equals, parser->allocator);
|
|
602
741
|
|
|
603
742
|
while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
|
604
743
|
token_T* whitespace = parser_advance(parser);
|
|
605
|
-
|
|
744
|
+
hb_buffer_append_string(&equals_buffer, whitespace->value);
|
|
606
745
|
equals_end = whitespace->location.end;
|
|
607
746
|
range_end = whitespace->range.to;
|
|
608
|
-
token_free(whitespace);
|
|
747
|
+
token_free(whitespace, parser->allocator);
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
token_T* equals_with_whitespace = hb_allocator_alloc(parser->allocator, sizeof(token_T));
|
|
751
|
+
|
|
752
|
+
if (!equals_with_whitespace) {
|
|
753
|
+
hb_buffer_free(&equals_buffer);
|
|
754
|
+
|
|
755
|
+
return ast_html_attribute_node_init(
|
|
756
|
+
attribute_name,
|
|
757
|
+
NULL,
|
|
758
|
+
NULL,
|
|
759
|
+
attribute_name->base.location.start,
|
|
760
|
+
attribute_name->base.location.end,
|
|
761
|
+
NULL,
|
|
762
|
+
parser->allocator
|
|
763
|
+
);
|
|
609
764
|
}
|
|
610
765
|
|
|
611
|
-
token_T* equals_with_whitespace = calloc(1, sizeof(token_T));
|
|
612
766
|
equals_with_whitespace->type = TOKEN_EQUALS;
|
|
613
|
-
|
|
767
|
+
|
|
768
|
+
char* arena_copy = hb_allocator_strndup(parser->allocator, equals_buffer.value, equals_buffer.length);
|
|
769
|
+
equals_with_whitespace->value = (hb_string_T) { .data = arena_copy, .length = (uint32_t) equals_buffer.length };
|
|
770
|
+
|
|
771
|
+
hb_buffer_free(&equals_buffer);
|
|
772
|
+
|
|
614
773
|
equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end };
|
|
615
774
|
equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end };
|
|
616
775
|
|
|
617
|
-
free(equals_buffer.value);
|
|
618
|
-
|
|
619
776
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
|
|
620
777
|
|
|
621
778
|
return ast_html_attribute_node_init(
|
|
@@ -624,7 +781,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
624
781
|
attribute_value,
|
|
625
782
|
attribute_name->base.location.start,
|
|
626
783
|
attribute_value->base.location.end,
|
|
627
|
-
NULL
|
|
784
|
+
NULL,
|
|
785
|
+
parser->allocator
|
|
628
786
|
);
|
|
629
787
|
} else {
|
|
630
788
|
return ast_html_attribute_node_init(
|
|
@@ -633,7 +791,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
633
791
|
NULL,
|
|
634
792
|
attribute_name->base.location.start,
|
|
635
793
|
attribute_name->base.location.end,
|
|
636
|
-
NULL
|
|
794
|
+
NULL,
|
|
795
|
+
parser->allocator
|
|
637
796
|
);
|
|
638
797
|
}
|
|
639
798
|
} else {
|
|
@@ -645,6 +804,51 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
645
804
|
if (equals != NULL) {
|
|
646
805
|
parser_consume_whitespace(parser, NULL);
|
|
647
806
|
|
|
807
|
+
// <div class= >
|
|
808
|
+
if (token_is(parser, TOKEN_HTML_TAG_END) || token_is(parser, TOKEN_HTML_TAG_SELF_CLOSE)) {
|
|
809
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
810
|
+
hb_string_T attribute_name_string = hb_string("unknown");
|
|
811
|
+
|
|
812
|
+
if (hb_array_size(attribute_name->children) > 0) {
|
|
813
|
+
AST_LITERAL_NODE_T* first_child = (AST_LITERAL_NODE_T*) hb_array_get(attribute_name->children, 0);
|
|
814
|
+
|
|
815
|
+
if (first_child && !hb_string_is_empty(first_child->content)) { attribute_name_string = first_child->content; }
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
append_missing_attribute_value_error(
|
|
819
|
+
attribute_name_string,
|
|
820
|
+
equals->location.start,
|
|
821
|
+
parser->current_token->location.start,
|
|
822
|
+
parser->allocator,
|
|
823
|
+
errors
|
|
824
|
+
);
|
|
825
|
+
|
|
826
|
+
AST_HTML_ATTRIBUTE_VALUE_NODE_T* empty_value = ast_html_attribute_value_node_init(
|
|
827
|
+
NULL,
|
|
828
|
+
hb_array_init(8, parser->allocator),
|
|
829
|
+
NULL,
|
|
830
|
+
false,
|
|
831
|
+
equals->location.end,
|
|
832
|
+
parser->current_token->location.start,
|
|
833
|
+
errors,
|
|
834
|
+
parser->allocator
|
|
835
|
+
);
|
|
836
|
+
|
|
837
|
+
AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
|
|
838
|
+
attribute_name,
|
|
839
|
+
equals,
|
|
840
|
+
empty_value,
|
|
841
|
+
attribute_name->base.location.start,
|
|
842
|
+
parser->current_token->location.start,
|
|
843
|
+
NULL,
|
|
844
|
+
parser->allocator
|
|
845
|
+
);
|
|
846
|
+
|
|
847
|
+
token_free(equals, parser->allocator);
|
|
848
|
+
|
|
849
|
+
return attribute_node;
|
|
850
|
+
}
|
|
851
|
+
|
|
648
852
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
|
|
649
853
|
|
|
650
854
|
AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
|
|
@@ -653,10 +857,11 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
653
857
|
attribute_value,
|
|
654
858
|
attribute_name->base.location.start,
|
|
655
859
|
attribute_value->base.location.end,
|
|
656
|
-
NULL
|
|
860
|
+
NULL,
|
|
861
|
+
parser->allocator
|
|
657
862
|
);
|
|
658
863
|
|
|
659
|
-
token_free(equals);
|
|
864
|
+
token_free(equals, parser->allocator);
|
|
660
865
|
|
|
661
866
|
return attribute_node;
|
|
662
867
|
}
|
|
@@ -667,7 +872,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
667
872
|
NULL,
|
|
668
873
|
attribute_name->base.location.start,
|
|
669
874
|
attribute_name->base.location.end,
|
|
670
|
-
NULL
|
|
875
|
+
NULL,
|
|
876
|
+
parser->allocator
|
|
671
877
|
);
|
|
672
878
|
}
|
|
673
879
|
|
|
@@ -678,11 +884,11 @@ static void parser_skip_erb_content(lexer_T* lexer) {
|
|
|
678
884
|
token = lexer_next_token(lexer);
|
|
679
885
|
|
|
680
886
|
if (token->type == TOKEN_ERB_END) {
|
|
681
|
-
token_free(token);
|
|
887
|
+
token_free(token, lexer->allocator);
|
|
682
888
|
break;
|
|
683
889
|
}
|
|
684
890
|
|
|
685
|
-
token_free(token);
|
|
891
|
+
token_free(token, lexer->allocator);
|
|
686
892
|
} while (true);
|
|
687
893
|
}
|
|
688
894
|
|
|
@@ -693,12 +899,12 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
|
|
|
693
899
|
after = lexer_next_token(lexer);
|
|
694
900
|
|
|
695
901
|
if (after->type == TOKEN_EQUALS) {
|
|
696
|
-
token_free(after);
|
|
902
|
+
token_free(after, lexer->allocator);
|
|
697
903
|
return true;
|
|
698
904
|
}
|
|
699
905
|
|
|
700
906
|
if (after->type == TOKEN_WHITESPACE || after->type == TOKEN_NEWLINE) {
|
|
701
|
-
token_free(after);
|
|
907
|
+
token_free(after, lexer->allocator);
|
|
702
908
|
continue;
|
|
703
909
|
}
|
|
704
910
|
|
|
@@ -706,23 +912,56 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
|
|
|
706
912
|
|| after->type == TOKEN_ERB_START) {
|
|
707
913
|
|
|
708
914
|
if (after->type == TOKEN_ERB_START) {
|
|
709
|
-
token_free(after);
|
|
915
|
+
token_free(after, lexer->allocator);
|
|
710
916
|
parser_skip_erb_content(lexer);
|
|
711
917
|
} else {
|
|
712
|
-
token_free(after);
|
|
918
|
+
token_free(after, lexer->allocator);
|
|
713
919
|
}
|
|
714
920
|
continue;
|
|
715
921
|
}
|
|
716
922
|
|
|
717
|
-
token_free(after);
|
|
923
|
+
token_free(after, lexer->allocator);
|
|
718
924
|
return false;
|
|
719
925
|
|
|
720
926
|
} while (true);
|
|
721
927
|
}
|
|
722
928
|
|
|
929
|
+
static bool starts_with_keyword(hb_string_T string, const char* keyword) {
|
|
930
|
+
hb_string_T prefix = hb_string(keyword);
|
|
931
|
+
if (string.length < prefix.length) { return false; }
|
|
932
|
+
if (strncmp(string.data, prefix.data, prefix.length) != 0) { return false; }
|
|
933
|
+
|
|
934
|
+
if (string.length == prefix.length) { return true; }
|
|
935
|
+
|
|
936
|
+
return is_whitespace(string.data[prefix.length]);
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
// TODO: ideally we could avoid basing this off of strings, and use the step in analyze.c
|
|
940
|
+
static bool parser_lookahead_erb_is_control_flow(parser_T* parser) {
|
|
941
|
+
lexer_T lexer_copy = *parser->lexer;
|
|
942
|
+
token_T* content = lexer_next_token(&lexer_copy);
|
|
943
|
+
|
|
944
|
+
if (content == NULL || content->type != TOKEN_ERB_CONTENT) {
|
|
945
|
+
if (content) { token_free(content, parser->allocator); }
|
|
946
|
+
|
|
947
|
+
return false;
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
hb_string_T trimmed = hb_string_trim_start(content->value);
|
|
951
|
+
|
|
952
|
+
bool is_control_flow = starts_with_keyword(trimmed, "end") || starts_with_keyword(trimmed, "else")
|
|
953
|
+
|| starts_with_keyword(trimmed, "elsif") || starts_with_keyword(trimmed, "in")
|
|
954
|
+
|| starts_with_keyword(trimmed, "when") || starts_with_keyword(trimmed, "rescue")
|
|
955
|
+
|| starts_with_keyword(trimmed, "ensure");
|
|
956
|
+
|
|
957
|
+
token_free(content, parser->allocator);
|
|
958
|
+
|
|
959
|
+
return is_control_flow;
|
|
960
|
+
}
|
|
961
|
+
|
|
723
962
|
static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children) {
|
|
724
|
-
bool is_output_tag =
|
|
725
|
-
&&
|
|
963
|
+
bool is_output_tag = !hb_string_is_empty(parser->current_token->value)
|
|
964
|
+
&& hb_string_starts_with(parser->current_token->value, hb_string("<%="));
|
|
726
965
|
|
|
727
966
|
if (!is_output_tag) {
|
|
728
967
|
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
@@ -733,7 +972,7 @@ static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children
|
|
|
733
972
|
lexer_T lexer_copy = *parser->lexer;
|
|
734
973
|
|
|
735
974
|
token_T* erb_start = lexer_next_token(&lexer_copy);
|
|
736
|
-
token_free(erb_start);
|
|
975
|
+
token_free(erb_start, parser->allocator);
|
|
737
976
|
parser_skip_erb_content(&lexer_copy);
|
|
738
977
|
|
|
739
978
|
bool looks_like_attribute = parser_lookahead_erb_is_attribute(&lexer_copy);
|
|
@@ -759,13 +998,40 @@ static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* c
|
|
|
759
998
|
}
|
|
760
999
|
|
|
761
1000
|
static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
762
|
-
hb_array_T* errors = hb_array_init(8);
|
|
763
|
-
hb_array_T* children = hb_array_init(8);
|
|
1001
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1002
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
764
1003
|
|
|
765
1004
|
token_T* tag_start = parser_consume_expected(parser, TOKEN_HTML_TAG_START, errors);
|
|
766
1005
|
token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
|
|
767
1006
|
|
|
768
1007
|
while (token_is_none_of(parser, TOKEN_HTML_TAG_END, TOKEN_HTML_TAG_SELF_CLOSE, TOKEN_EOF)) {
|
|
1008
|
+
if (token_is_any_of(parser, TOKEN_HTML_TAG_START, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1009
|
+
append_unclosed_open_tag_error(
|
|
1010
|
+
tag_name,
|
|
1011
|
+
tag_name->location.start,
|
|
1012
|
+
parser->current_token->location.start,
|
|
1013
|
+
parser->allocator,
|
|
1014
|
+
errors
|
|
1015
|
+
);
|
|
1016
|
+
|
|
1017
|
+
AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init(
|
|
1018
|
+
tag_start,
|
|
1019
|
+
tag_name,
|
|
1020
|
+
NULL,
|
|
1021
|
+
children,
|
|
1022
|
+
false,
|
|
1023
|
+
tag_start->location.start,
|
|
1024
|
+
parser->current_token->location.start,
|
|
1025
|
+
errors,
|
|
1026
|
+
parser->allocator
|
|
1027
|
+
);
|
|
1028
|
+
|
|
1029
|
+
token_free(tag_start, parser->allocator);
|
|
1030
|
+
token_free(tag_name, parser->allocator);
|
|
1031
|
+
|
|
1032
|
+
return open_tag_node;
|
|
1033
|
+
}
|
|
1034
|
+
|
|
769
1035
|
if (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
|
770
1036
|
parser_handle_whitespace_in_open_tag(parser, children);
|
|
771
1037
|
continue;
|
|
@@ -791,21 +1057,79 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
791
1057
|
token_T* next_token = lexer_next_token(&lexer_copy);
|
|
792
1058
|
|
|
793
1059
|
if (next_token && next_token->type == TOKEN_IDENTIFIER) {
|
|
794
|
-
token_free(next_token);
|
|
1060
|
+
token_free(next_token, parser->allocator);
|
|
795
1061
|
hb_array_append(children, parser_parse_html_attribute(parser));
|
|
796
1062
|
|
|
797
1063
|
continue;
|
|
798
1064
|
}
|
|
799
1065
|
|
|
800
|
-
token_free(next_token);
|
|
1066
|
+
token_free(next_token, parser->allocator);
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
if (parser->current_token->type == TOKEN_PERCENT) {
|
|
1070
|
+
lexer_T lexer_copy = *parser->lexer;
|
|
1071
|
+
token_T* peek_token = lexer_next_token(&lexer_copy);
|
|
1072
|
+
|
|
1073
|
+
if (peek_token && peek_token->type == TOKEN_HTML_TAG_END) {
|
|
1074
|
+
position_T stray_start = parser->current_token->location.start;
|
|
1075
|
+
position_T stray_end = peek_token->location.end;
|
|
1076
|
+
token_free(peek_token, parser->allocator);
|
|
1077
|
+
|
|
1078
|
+
append_stray_erb_closing_tag_error(stray_start, stray_end, parser->allocator, errors);
|
|
1079
|
+
|
|
1080
|
+
token_T* percent = parser_advance(parser);
|
|
1081
|
+
token_T* gt = parser_advance(parser);
|
|
1082
|
+
|
|
1083
|
+
AST_LITERAL_NODE_T* literal =
|
|
1084
|
+
ast_literal_node_init(hb_string("%>"), stray_start, stray_end, NULL, parser->allocator);
|
|
1085
|
+
hb_array_append(children, literal);
|
|
1086
|
+
|
|
1087
|
+
token_free(percent, parser->allocator);
|
|
1088
|
+
token_free(gt, parser->allocator);
|
|
1089
|
+
|
|
1090
|
+
continue;
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
token_free(peek_token, parser->allocator);
|
|
801
1094
|
}
|
|
802
1095
|
|
|
803
1096
|
parser_append_unexpected_error(
|
|
804
1097
|
parser,
|
|
1098
|
+
errors,
|
|
805
1099
|
"Unexpected Token",
|
|
806
|
-
|
|
1100
|
+
TOKEN_IDENTIFIER,
|
|
1101
|
+
TOKEN_AT,
|
|
1102
|
+
TOKEN_ERB_START,
|
|
1103
|
+
TOKEN_WHITESPACE,
|
|
1104
|
+
TOKEN_NEWLINE
|
|
1105
|
+
);
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
if (token_is(parser, TOKEN_EOF)) {
|
|
1109
|
+
append_unclosed_open_tag_error(
|
|
1110
|
+
tag_name,
|
|
1111
|
+
tag_name->location.start,
|
|
1112
|
+
parser->current_token->location.start,
|
|
1113
|
+
parser->allocator,
|
|
807
1114
|
errors
|
|
808
1115
|
);
|
|
1116
|
+
|
|
1117
|
+
AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init(
|
|
1118
|
+
tag_start,
|
|
1119
|
+
tag_name,
|
|
1120
|
+
NULL,
|
|
1121
|
+
children,
|
|
1122
|
+
false,
|
|
1123
|
+
tag_start->location.start,
|
|
1124
|
+
parser->current_token->location.start,
|
|
1125
|
+
errors,
|
|
1126
|
+
parser->allocator
|
|
1127
|
+
);
|
|
1128
|
+
|
|
1129
|
+
token_free(tag_start, parser->allocator);
|
|
1130
|
+
token_free(tag_name, parser->allocator);
|
|
1131
|
+
|
|
1132
|
+
return open_tag_node;
|
|
809
1133
|
}
|
|
810
1134
|
|
|
811
1135
|
bool is_self_closing = false;
|
|
@@ -816,8 +1140,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
816
1140
|
tag_end = parser_consume_expected(parser, TOKEN_HTML_TAG_SELF_CLOSE, errors);
|
|
817
1141
|
|
|
818
1142
|
if (tag_end == NULL) {
|
|
819
|
-
token_free(tag_start);
|
|
820
|
-
token_free(tag_name);
|
|
1143
|
+
token_free(tag_start, parser->allocator);
|
|
1144
|
+
token_free(tag_name, parser->allocator);
|
|
821
1145
|
|
|
822
1146
|
hb_array_free(&children);
|
|
823
1147
|
hb_array_free(&errors);
|
|
@@ -836,19 +1160,20 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
836
1160
|
is_self_closing,
|
|
837
1161
|
tag_start->location.start,
|
|
838
1162
|
tag_end->location.end,
|
|
839
|
-
errors
|
|
1163
|
+
errors,
|
|
1164
|
+
parser->allocator
|
|
840
1165
|
);
|
|
841
1166
|
|
|
842
|
-
token_free(tag_start);
|
|
843
|
-
token_free(tag_name);
|
|
844
|
-
token_free(tag_end);
|
|
1167
|
+
token_free(tag_start, parser->allocator);
|
|
1168
|
+
token_free(tag_name, parser->allocator);
|
|
1169
|
+
token_free(tag_end, parser->allocator);
|
|
845
1170
|
|
|
846
1171
|
return open_tag_node;
|
|
847
1172
|
}
|
|
848
1173
|
|
|
849
1174
|
static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) {
|
|
850
|
-
hb_array_T* errors = hb_array_init(8);
|
|
851
|
-
hb_array_T* children = hb_array_init(8);
|
|
1175
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1176
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
852
1177
|
|
|
853
1178
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
|
|
854
1179
|
|
|
@@ -858,38 +1183,53 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
|
|
|
858
1183
|
|
|
859
1184
|
parser_consume_whitespace(parser, children);
|
|
860
1185
|
|
|
861
|
-
token_T* tag_closing =
|
|
1186
|
+
token_T* tag_closing = parser_consume_if_present(parser, TOKEN_HTML_TAG_END);
|
|
1187
|
+
|
|
1188
|
+
if (tag_closing == NULL) {
|
|
1189
|
+
append_unclosed_close_tag_error(
|
|
1190
|
+
tag_name,
|
|
1191
|
+
tag_opening->location.start,
|
|
1192
|
+
tag_name->location.end,
|
|
1193
|
+
parser->allocator,
|
|
1194
|
+
errors
|
|
1195
|
+
);
|
|
1196
|
+
}
|
|
862
1197
|
|
|
863
|
-
if (tag_name != NULL && is_void_element(
|
|
864
|
-
|
|
865
|
-
hb_string_T
|
|
1198
|
+
if (tag_closing != NULL && tag_name != NULL && is_void_element(tag_name->value)
|
|
1199
|
+
&& parser_in_svg_context(parser) == false) {
|
|
1200
|
+
hb_string_T expected = html_self_closing_tag_string(tag_name->value, parser->allocator);
|
|
1201
|
+
hb_string_T got = html_closing_tag_string(tag_name->value, parser->allocator);
|
|
866
1202
|
|
|
867
1203
|
append_void_element_closing_tag_error(
|
|
868
1204
|
tag_name,
|
|
869
|
-
expected
|
|
870
|
-
got
|
|
1205
|
+
expected,
|
|
1206
|
+
got,
|
|
871
1207
|
tag_opening->location.start,
|
|
872
1208
|
tag_closing->location.end,
|
|
1209
|
+
parser->allocator,
|
|
873
1210
|
errors
|
|
874
1211
|
);
|
|
875
1212
|
|
|
876
|
-
|
|
877
|
-
|
|
1213
|
+
hb_allocator_dealloc(parser->allocator, expected.data);
|
|
1214
|
+
hb_allocator_dealloc(parser->allocator, got.data);
|
|
878
1215
|
}
|
|
879
1216
|
|
|
1217
|
+
position_T end_position = tag_closing != NULL ? tag_closing->location.end : tag_name->location.end;
|
|
1218
|
+
|
|
880
1219
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = ast_html_close_tag_node_init(
|
|
881
1220
|
tag_opening,
|
|
882
1221
|
tag_name,
|
|
883
1222
|
children,
|
|
884
1223
|
tag_closing,
|
|
885
1224
|
tag_opening->location.start,
|
|
886
|
-
|
|
887
|
-
errors
|
|
1225
|
+
end_position,
|
|
1226
|
+
errors,
|
|
1227
|
+
parser->allocator
|
|
888
1228
|
);
|
|
889
1229
|
|
|
890
|
-
token_free(tag_opening);
|
|
891
|
-
token_free(tag_name);
|
|
892
|
-
token_free(tag_closing);
|
|
1230
|
+
token_free(tag_opening, parser->allocator);
|
|
1231
|
+
token_free(tag_name, parser->allocator);
|
|
1232
|
+
token_free(tag_closing, parser->allocator);
|
|
893
1233
|
|
|
894
1234
|
return close_tag;
|
|
895
1235
|
}
|
|
@@ -900,7 +1240,7 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element(
|
|
|
900
1240
|
AST_HTML_OPEN_TAG_NODE_T* open_tag
|
|
901
1241
|
) {
|
|
902
1242
|
return ast_html_element_node_init(
|
|
903
|
-
open_tag,
|
|
1243
|
+
(AST_NODE_T*) open_tag,
|
|
904
1244
|
open_tag->tag_name,
|
|
905
1245
|
NULL,
|
|
906
1246
|
NULL,
|
|
@@ -908,7 +1248,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element(
|
|
|
908
1248
|
ELEMENT_SOURCE_HTML,
|
|
909
1249
|
open_tag->base.location.start,
|
|
910
1250
|
open_tag->base.location.end,
|
|
911
|
-
NULL
|
|
1251
|
+
NULL,
|
|
1252
|
+
parser->allocator
|
|
912
1253
|
);
|
|
913
1254
|
}
|
|
914
1255
|
|
|
@@ -916,63 +1257,88 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
|
|
|
916
1257
|
parser_T* parser,
|
|
917
1258
|
AST_HTML_OPEN_TAG_NODE_T* open_tag
|
|
918
1259
|
) {
|
|
919
|
-
hb_array_T* errors = hb_array_init(8);
|
|
920
|
-
hb_array_T* body = hb_array_init(8);
|
|
1260
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1261
|
+
hb_array_T* body = hb_array_init(8, parser->allocator);
|
|
921
1262
|
|
|
922
1263
|
parser_push_open_tag(parser, open_tag->tag_name);
|
|
923
1264
|
|
|
924
|
-
if (open_tag->tag_name->value && parser_is_foreign_content_tag(
|
|
925
|
-
foreign_content_type_T content_type = parser_get_foreign_content_type(
|
|
1265
|
+
if (!hb_string_is_empty(open_tag->tag_name->value) && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
|
|
1266
|
+
foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value);
|
|
926
1267
|
parser_enter_foreign_content(parser, content_type);
|
|
927
1268
|
parser_parse_foreign_content(parser, body, errors);
|
|
928
1269
|
} else {
|
|
929
1270
|
parser_parse_in_data_state(parser, body, errors);
|
|
930
1271
|
}
|
|
931
1272
|
|
|
932
|
-
if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1273
|
+
if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1274
|
+
return parser_handle_missing_close_tag(parser, open_tag, body, errors);
|
|
1275
|
+
}
|
|
933
1276
|
|
|
934
1277
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser);
|
|
935
1278
|
|
|
936
|
-
if (parser_in_svg_context(parser) == false && is_void_element(
|
|
1279
|
+
if (parser_in_svg_context(parser) == false && is_void_element(close_tag->tag_name->value)) {
|
|
937
1280
|
hb_array_push(body, close_tag);
|
|
938
1281
|
parser_parse_in_data_state(parser, body, errors);
|
|
939
1282
|
close_tag = parser_parse_html_close_tag(parser);
|
|
940
1283
|
}
|
|
941
1284
|
|
|
942
|
-
bool matches_stack = parser_check_matching_tag(parser,
|
|
1285
|
+
bool matches_stack = parser_check_matching_tag(parser, close_tag->tag_name->value);
|
|
943
1286
|
|
|
944
1287
|
if (matches_stack) {
|
|
945
1288
|
token_T* popped_token = parser_pop_open_tag(parser);
|
|
946
|
-
token_free(popped_token);
|
|
1289
|
+
token_free(popped_token, parser->allocator);
|
|
1290
|
+
} else if (parser_can_close_ancestor(parser, close_tag->tag_name->value)) {
|
|
1291
|
+
size_t depth = parser_find_ancestor_depth(parser, close_tag->tag_name->value);
|
|
1292
|
+
|
|
1293
|
+
for (size_t i = 0; i < depth; i++) {
|
|
1294
|
+
token_T* unclosed = parser_pop_open_tag(parser);
|
|
1295
|
+
|
|
1296
|
+
if (unclosed != NULL) {
|
|
1297
|
+
append_missing_closing_tag_error(
|
|
1298
|
+
unclosed,
|
|
1299
|
+
unclosed->location.start,
|
|
1300
|
+
unclosed->location.end,
|
|
1301
|
+
parser->allocator,
|
|
1302
|
+
errors
|
|
1303
|
+
);
|
|
1304
|
+
token_free(unclosed, parser->allocator);
|
|
1305
|
+
}
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
token_T* popped_token = parser_pop_open_tag(parser);
|
|
1309
|
+
token_free(popped_token, parser->allocator);
|
|
947
1310
|
} else {
|
|
948
1311
|
parser_handle_mismatched_tags(parser, close_tag, errors);
|
|
949
1312
|
}
|
|
950
1313
|
|
|
951
1314
|
return ast_html_element_node_init(
|
|
952
|
-
open_tag,
|
|
1315
|
+
(AST_NODE_T*) open_tag,
|
|
953
1316
|
open_tag->tag_name,
|
|
954
1317
|
body,
|
|
955
|
-
close_tag,
|
|
1318
|
+
(AST_NODE_T*) close_tag,
|
|
956
1319
|
false,
|
|
957
1320
|
ELEMENT_SOURCE_HTML,
|
|
958
1321
|
open_tag->base.location.start,
|
|
959
1322
|
close_tag->base.location.end,
|
|
960
|
-
errors
|
|
1323
|
+
errors,
|
|
1324
|
+
parser->allocator
|
|
961
1325
|
);
|
|
962
1326
|
}
|
|
963
1327
|
|
|
964
1328
|
static AST_NODE_T* parser_parse_html_element(parser_T* parser) {
|
|
965
1329
|
AST_HTML_OPEN_TAG_NODE_T* open_tag = parser_parse_html_open_tag(parser);
|
|
966
1330
|
|
|
1331
|
+
if (open_tag->tag_closing == NULL) { return (AST_NODE_T*) open_tag; }
|
|
1332
|
+
|
|
967
1333
|
// <tag />
|
|
968
1334
|
if (open_tag->is_void) { return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag); }
|
|
969
1335
|
|
|
970
1336
|
// <tag>, in void element list, and not in inside an <svg> element
|
|
971
|
-
if (!open_tag->is_void && is_void_element(
|
|
1337
|
+
if (!open_tag->is_void && is_void_element(open_tag->tag_name->value) && !parser_in_svg_context(parser)) {
|
|
972
1338
|
return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag);
|
|
973
1339
|
}
|
|
974
1340
|
|
|
975
|
-
if (open_tag->tag_name->value && parser_is_foreign_content_tag(
|
|
1341
|
+
if (!hb_string_is_empty(open_tag->tag_name->value) && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
|
|
976
1342
|
AST_HTML_ELEMENT_NODE_T* regular_element = parser_parse_html_regular_element(parser, open_tag);
|
|
977
1343
|
|
|
978
1344
|
if (regular_element != NULL) { return (AST_NODE_T*) regular_element; }
|
|
@@ -982,11 +1348,38 @@ static AST_NODE_T* parser_parse_html_element(parser_T* parser) {
|
|
|
982
1348
|
}
|
|
983
1349
|
|
|
984
1350
|
static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
|
|
985
|
-
hb_array_T* errors = hb_array_init(8);
|
|
1351
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
986
1352
|
|
|
987
1353
|
token_T* opening_tag = parser_consume_expected(parser, TOKEN_ERB_START, errors);
|
|
988
1354
|
token_T* content = parser_consume_expected(parser, TOKEN_ERB_CONTENT, errors);
|
|
989
|
-
|
|
1355
|
+
|
|
1356
|
+
token_T* closing_tag = NULL;
|
|
1357
|
+
position_T end_position;
|
|
1358
|
+
|
|
1359
|
+
if (token_is(parser, TOKEN_ERB_END)) {
|
|
1360
|
+
closing_tag = parser_consume_expected(parser, TOKEN_ERB_END, errors);
|
|
1361
|
+
end_position = closing_tag->location.end;
|
|
1362
|
+
} else if (token_is(parser, TOKEN_ERB_START)) {
|
|
1363
|
+
append_nested_erb_tag_error(
|
|
1364
|
+
opening_tag,
|
|
1365
|
+
parser->current_token->location.start.line,
|
|
1366
|
+
parser->current_token->location.start.column,
|
|
1367
|
+
parser->current_token->location.start,
|
|
1368
|
+
parser->current_token->location.end,
|
|
1369
|
+
parser->allocator,
|
|
1370
|
+
errors
|
|
1371
|
+
);
|
|
1372
|
+
end_position = parser->current_token->location.start;
|
|
1373
|
+
} else {
|
|
1374
|
+
append_unclosed_erb_tag_error(
|
|
1375
|
+
opening_tag,
|
|
1376
|
+
opening_tag->location.start,
|
|
1377
|
+
parser->current_token->location.start,
|
|
1378
|
+
parser->allocator,
|
|
1379
|
+
errors
|
|
1380
|
+
);
|
|
1381
|
+
end_position = parser->current_token->location.start;
|
|
1382
|
+
}
|
|
990
1383
|
|
|
991
1384
|
AST_ERB_CONTENT_NODE_T* erb_node = ast_erb_content_node_init(
|
|
992
1385
|
opening_tag,
|
|
@@ -995,27 +1388,29 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
|
|
|
995
1388
|
NULL,
|
|
996
1389
|
false,
|
|
997
1390
|
false,
|
|
1391
|
+
HERB_PRISM_NODE_EMPTY,
|
|
998
1392
|
opening_tag->location.start,
|
|
999
|
-
|
|
1000
|
-
errors
|
|
1393
|
+
end_position,
|
|
1394
|
+
errors,
|
|
1395
|
+
parser->allocator
|
|
1001
1396
|
);
|
|
1002
1397
|
|
|
1003
|
-
token_free(opening_tag);
|
|
1004
|
-
token_free(content);
|
|
1005
|
-
token_free(closing_tag);
|
|
1398
|
+
token_free(opening_tag, parser->allocator);
|
|
1399
|
+
token_free(content, parser->allocator);
|
|
1400
|
+
if (closing_tag != NULL) { token_free(closing_tag, parser->allocator); }
|
|
1006
1401
|
|
|
1007
1402
|
return erb_node;
|
|
1008
1403
|
}
|
|
1009
1404
|
|
|
1010
1405
|
static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
|
|
1011
1406
|
hb_buffer_T content;
|
|
1012
|
-
hb_buffer_init(&content, 1024);
|
|
1407
|
+
hb_buffer_init(&content, 1024, parser->allocator);
|
|
1013
1408
|
position_T start = parser->current_token->location.start;
|
|
1014
1409
|
hb_string_T expected_closing_tag = parser_get_foreign_content_closing_tag(parser->foreign_content_type);
|
|
1015
1410
|
|
|
1016
1411
|
if (hb_string_is_empty(expected_closing_tag)) {
|
|
1017
1412
|
parser_exit_foreign_content(parser);
|
|
1018
|
-
|
|
1413
|
+
hb_buffer_free(&content);
|
|
1019
1414
|
|
|
1020
1415
|
return;
|
|
1021
1416
|
}
|
|
@@ -1038,33 +1433,32 @@ static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children,
|
|
|
1038
1433
|
token_T* next_token = lexer_next_token(parser->lexer);
|
|
1039
1434
|
bool is_potential_match = false;
|
|
1040
1435
|
|
|
1041
|
-
if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) {
|
|
1042
|
-
is_potential_match =
|
|
1043
|
-
parser_is_expected_closing_tag_name(hb_string(next_token->value), parser->foreign_content_type);
|
|
1436
|
+
if (next_token && next_token->type == TOKEN_IDENTIFIER && !hb_string_is_empty(next_token->value)) {
|
|
1437
|
+
is_potential_match = parser_is_expected_closing_tag_name(next_token->value, parser->foreign_content_type);
|
|
1044
1438
|
}
|
|
1045
1439
|
|
|
1046
1440
|
lexer_restore_state(parser->lexer, saved_state);
|
|
1047
1441
|
|
|
1048
|
-
if (next_token) { token_free(next_token); }
|
|
1442
|
+
if (next_token) { token_free(next_token, parser->allocator); }
|
|
1049
1443
|
|
|
1050
1444
|
if (is_potential_match) {
|
|
1051
1445
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
1052
1446
|
parser_exit_foreign_content(parser);
|
|
1053
1447
|
|
|
1054
|
-
|
|
1448
|
+
hb_buffer_free(&content);
|
|
1055
1449
|
|
|
1056
1450
|
return;
|
|
1057
1451
|
}
|
|
1058
1452
|
}
|
|
1059
1453
|
|
|
1060
1454
|
token_T* token = parser_advance(parser);
|
|
1061
|
-
|
|
1062
|
-
token_free(token);
|
|
1455
|
+
hb_buffer_append_string(&content, token->value);
|
|
1456
|
+
token_free(token, parser->allocator);
|
|
1063
1457
|
}
|
|
1064
1458
|
|
|
1065
1459
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
1066
1460
|
parser_exit_foreign_content(parser);
|
|
1067
|
-
|
|
1461
|
+
hb_buffer_free(&content);
|
|
1068
1462
|
}
|
|
1069
1463
|
|
|
1070
1464
|
static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
|
|
@@ -1072,36 +1466,43 @@ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, h
|
|
|
1072
1466
|
|
|
1073
1467
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
1074
1468
|
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
1469
|
+
parser->consecutive_error_count = 0;
|
|
1075
1470
|
continue;
|
|
1076
1471
|
}
|
|
1077
1472
|
|
|
1078
1473
|
if (token_is(parser, TOKEN_HTML_DOCTYPE)) {
|
|
1079
1474
|
hb_array_append(children, parser_parse_html_doctype(parser));
|
|
1475
|
+
parser->consecutive_error_count = 0;
|
|
1080
1476
|
continue;
|
|
1081
1477
|
}
|
|
1082
1478
|
|
|
1083
1479
|
if (token_is(parser, TOKEN_XML_DECLARATION)) {
|
|
1084
1480
|
hb_array_append(children, parser_parse_xml_declaration(parser));
|
|
1481
|
+
parser->consecutive_error_count = 0;
|
|
1085
1482
|
continue;
|
|
1086
1483
|
}
|
|
1087
1484
|
|
|
1088
1485
|
if (token_is(parser, TOKEN_CDATA_START)) {
|
|
1089
1486
|
hb_array_append(children, parser_parse_cdata(parser));
|
|
1487
|
+
parser->consecutive_error_count = 0;
|
|
1090
1488
|
continue;
|
|
1091
1489
|
}
|
|
1092
1490
|
|
|
1093
1491
|
if (token_is(parser, TOKEN_HTML_COMMENT_START)) {
|
|
1094
1492
|
hb_array_append(children, parser_parse_html_comment(parser));
|
|
1493
|
+
parser->consecutive_error_count = 0;
|
|
1095
1494
|
continue;
|
|
1096
1495
|
}
|
|
1097
1496
|
|
|
1098
1497
|
if (token_is(parser, TOKEN_HTML_TAG_START)) {
|
|
1099
1498
|
hb_array_append(children, parser_parse_html_element(parser));
|
|
1499
|
+
parser->consecutive_error_count = 0;
|
|
1100
1500
|
continue;
|
|
1101
1501
|
}
|
|
1102
1502
|
|
|
1103
1503
|
if (token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1104
1504
|
hb_array_append(children, parser_parse_html_close_tag(parser));
|
|
1505
|
+
parser->consecutive_error_count = 0;
|
|
1105
1506
|
continue;
|
|
1106
1507
|
}
|
|
1107
1508
|
|
|
@@ -1129,16 +1530,35 @@ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, h
|
|
|
1129
1530
|
TOKEN_WHITESPACE
|
|
1130
1531
|
)) {
|
|
1131
1532
|
hb_array_append(children, parser_parse_text_content(parser, errors));
|
|
1533
|
+
parser->consecutive_error_count = 0;
|
|
1534
|
+
continue;
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
parser->consecutive_error_count++;
|
|
1538
|
+
|
|
1539
|
+
if (parser->consecutive_error_count >= MAX_CONSECUTIVE_ERRORS) {
|
|
1540
|
+
parser->in_recovery_mode = true;
|
|
1541
|
+
parser_synchronize(parser, errors);
|
|
1542
|
+
parser->consecutive_error_count = 0;
|
|
1132
1543
|
continue;
|
|
1133
1544
|
}
|
|
1134
1545
|
|
|
1135
1546
|
parser_append_unexpected_error(
|
|
1136
1547
|
parser,
|
|
1548
|
+
errors,
|
|
1137
1549
|
"Unexpected token",
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1550
|
+
TOKEN_ERB_START,
|
|
1551
|
+
TOKEN_HTML_DOCTYPE,
|
|
1552
|
+
TOKEN_HTML_COMMENT_START,
|
|
1553
|
+
TOKEN_IDENTIFIER,
|
|
1554
|
+
TOKEN_WHITESPACE,
|
|
1555
|
+
TOKEN_NBSP,
|
|
1556
|
+
TOKEN_AT,
|
|
1557
|
+
TOKEN_BACKSLASH,
|
|
1558
|
+
TOKEN_NEWLINE
|
|
1141
1559
|
);
|
|
1560
|
+
|
|
1561
|
+
parser_synchronize(parser, errors);
|
|
1142
1562
|
}
|
|
1143
1563
|
}
|
|
1144
1564
|
|
|
@@ -1152,11 +1572,11 @@ static size_t find_matching_close_tag(hb_array_T* nodes, size_t start_idx, hb_st
|
|
|
1152
1572
|
if (node->type == AST_HTML_OPEN_TAG_NODE) {
|
|
1153
1573
|
AST_HTML_OPEN_TAG_NODE_T* open = (AST_HTML_OPEN_TAG_NODE_T*) node;
|
|
1154
1574
|
|
|
1155
|
-
if (hb_string_equals_case_insensitive(
|
|
1575
|
+
if (hb_string_equals_case_insensitive(open->tag_name->value, tag_name)) { depth++; }
|
|
1156
1576
|
} else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
|
|
1157
1577
|
AST_HTML_CLOSE_TAG_NODE_T* close = (AST_HTML_CLOSE_TAG_NODE_T*) node;
|
|
1158
1578
|
|
|
1159
|
-
if (hb_string_equals_case_insensitive(
|
|
1579
|
+
if (hb_string_equals_case_insensitive(close->tag_name->value, tag_name)) {
|
|
1160
1580
|
if (depth == 0) { return i; }
|
|
1161
1581
|
depth--;
|
|
1162
1582
|
}
|
|
@@ -1166,10 +1586,44 @@ static size_t find_matching_close_tag(hb_array_T* nodes, size_t start_idx, hb_st
|
|
|
1166
1586
|
return (size_t) -1;
|
|
1167
1587
|
}
|
|
1168
1588
|
|
|
1169
|
-
static
|
|
1589
|
+
static size_t find_implicit_close_index(hb_array_T* nodes, size_t start_idx, hb_string_T tag_name) {
|
|
1590
|
+
if (!has_optional_end_tag(tag_name)) { return (size_t) -1; }
|
|
1591
|
+
|
|
1592
|
+
for (size_t i = start_idx + 1; i < hb_array_size(nodes); i++) {
|
|
1593
|
+
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
|
|
1594
|
+
if (node == NULL) { continue; }
|
|
1595
|
+
|
|
1596
|
+
if (node->type == AST_HTML_OPEN_TAG_NODE) {
|
|
1597
|
+
AST_HTML_OPEN_TAG_NODE_T* open = (AST_HTML_OPEN_TAG_NODE_T*) node;
|
|
1598
|
+
hb_string_T next_tag_name = open->tag_name->value;
|
|
1599
|
+
|
|
1600
|
+
if (should_implicitly_close(tag_name, next_tag_name)) { return i; }
|
|
1601
|
+
} else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
|
|
1602
|
+
AST_HTML_CLOSE_TAG_NODE_T* close = (AST_HTML_CLOSE_TAG_NODE_T*) node;
|
|
1603
|
+
hb_string_T close_tag_name = close->tag_name->value;
|
|
1604
|
+
|
|
1605
|
+
if (parent_closes_element(tag_name, close_tag_name)) { return i; }
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
|
|
1609
|
+
return hb_array_size(nodes);
|
|
1610
|
+
}
|
|
1170
1611
|
|
|
1171
|
-
static hb_array_T* parser_build_elements_from_tags(
|
|
1172
|
-
hb_array_T*
|
|
1612
|
+
static hb_array_T* parser_build_elements_from_tags(
|
|
1613
|
+
hb_array_T* nodes,
|
|
1614
|
+
hb_array_T* errors,
|
|
1615
|
+
const parser_options_T* options,
|
|
1616
|
+
hb_allocator_T* allocator
|
|
1617
|
+
);
|
|
1618
|
+
|
|
1619
|
+
static hb_array_T* parser_build_elements_from_tags(
|
|
1620
|
+
hb_array_T* nodes,
|
|
1621
|
+
hb_array_T* errors,
|
|
1622
|
+
const parser_options_T* options,
|
|
1623
|
+
hb_allocator_T* allocator
|
|
1624
|
+
) {
|
|
1625
|
+
bool strict = options ? options->strict : false;
|
|
1626
|
+
hb_array_T* result = hb_array_init(hb_array_size(nodes), allocator);
|
|
1173
1627
|
|
|
1174
1628
|
for (size_t index = 0; index < hb_array_size(nodes); index++) {
|
|
1175
1629
|
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, index);
|
|
@@ -1177,45 +1631,105 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
|
|
|
1177
1631
|
|
|
1178
1632
|
if (node->type == AST_HTML_OPEN_TAG_NODE) {
|
|
1179
1633
|
AST_HTML_OPEN_TAG_NODE_T* open_tag = (AST_HTML_OPEN_TAG_NODE_T*) node;
|
|
1180
|
-
hb_string_T tag_name =
|
|
1634
|
+
hb_string_T tag_name = open_tag->tag_name->value;
|
|
1181
1635
|
|
|
1182
1636
|
size_t close_index = find_matching_close_tag(nodes, index, tag_name);
|
|
1183
1637
|
|
|
1184
1638
|
if (close_index == (size_t) -1) {
|
|
1185
|
-
|
|
1186
|
-
|
|
1639
|
+
size_t implicit_close_index = find_implicit_close_index(nodes, index, tag_name);
|
|
1640
|
+
|
|
1641
|
+
if (implicit_close_index != (size_t) -1 && implicit_close_index > index + 1) {
|
|
1642
|
+
hb_array_T* body = hb_array_init(implicit_close_index - index - 1, allocator);
|
|
1643
|
+
|
|
1644
|
+
for (size_t j = index + 1; j < implicit_close_index; j++) {
|
|
1645
|
+
hb_array_append(body, hb_array_get(nodes, j));
|
|
1646
|
+
}
|
|
1647
|
+
|
|
1648
|
+
hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, options, allocator);
|
|
1649
|
+
hb_array_free(&body);
|
|
1650
|
+
|
|
1651
|
+
position_T end_position = open_tag->base.location.end;
|
|
1652
|
+
|
|
1653
|
+
if (hb_array_size(processed_body) > 0) {
|
|
1654
|
+
AST_NODE_T* last_body_node = (AST_NODE_T*) hb_array_get(processed_body, hb_array_size(processed_body) - 1);
|
|
1655
|
+
if (last_body_node != NULL) { end_position = last_body_node->location.end; }
|
|
1656
|
+
}
|
|
1657
|
+
|
|
1658
|
+
hb_array_T* element_errors = hb_array_init(8, allocator);
|
|
1659
|
+
|
|
1660
|
+
if (strict) {
|
|
1661
|
+
append_omitted_closing_tag_error(
|
|
1662
|
+
open_tag->tag_name,
|
|
1663
|
+
end_position,
|
|
1664
|
+
open_tag->base.location.start,
|
|
1665
|
+
open_tag->base.location.end,
|
|
1666
|
+
allocator,
|
|
1667
|
+
element_errors
|
|
1668
|
+
);
|
|
1669
|
+
}
|
|
1670
|
+
|
|
1671
|
+
AST_HTML_OMITTED_CLOSE_TAG_NODE_T* omitted_close_tag = ast_html_omitted_close_tag_node_init(
|
|
1672
|
+
open_tag->tag_name,
|
|
1673
|
+
end_position,
|
|
1674
|
+
end_position,
|
|
1675
|
+
hb_array_init(8, allocator),
|
|
1676
|
+
allocator
|
|
1677
|
+
);
|
|
1678
|
+
|
|
1679
|
+
AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init(
|
|
1680
|
+
(AST_NODE_T*) open_tag,
|
|
1187
1681
|
open_tag->tag_name,
|
|
1682
|
+
processed_body,
|
|
1683
|
+
(AST_NODE_T*) omitted_close_tag,
|
|
1684
|
+
false,
|
|
1685
|
+
ELEMENT_SOURCE_HTML,
|
|
1188
1686
|
open_tag->base.location.start,
|
|
1189
|
-
|
|
1190
|
-
|
|
1687
|
+
end_position,
|
|
1688
|
+
element_errors,
|
|
1689
|
+
allocator
|
|
1191
1690
|
);
|
|
1192
|
-
}
|
|
1193
1691
|
|
|
1194
|
-
|
|
1692
|
+
hb_array_append(result, element);
|
|
1693
|
+
|
|
1694
|
+
index = implicit_close_index - 1;
|
|
1695
|
+
} else {
|
|
1696
|
+
if (hb_array_size(open_tag->base.errors) == 0) {
|
|
1697
|
+
append_missing_closing_tag_error(
|
|
1698
|
+
open_tag->tag_name,
|
|
1699
|
+
open_tag->base.location.start,
|
|
1700
|
+
open_tag->base.location.end,
|
|
1701
|
+
allocator,
|
|
1702
|
+
open_tag->base.errors
|
|
1703
|
+
);
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
hb_array_append(result, node);
|
|
1707
|
+
}
|
|
1195
1708
|
} else {
|
|
1196
1709
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) hb_array_get(nodes, close_index);
|
|
1197
1710
|
|
|
1198
|
-
hb_array_T* body = hb_array_init(close_index - index - 1);
|
|
1711
|
+
hb_array_T* body = hb_array_init(close_index - index - 1, allocator);
|
|
1199
1712
|
|
|
1200
1713
|
for (size_t j = index + 1; j < close_index; j++) {
|
|
1201
1714
|
hb_array_append(body, hb_array_get(nodes, j));
|
|
1202
1715
|
}
|
|
1203
1716
|
|
|
1204
|
-
hb_array_T* processed_body = parser_build_elements_from_tags(body, errors);
|
|
1717
|
+
hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, options, allocator);
|
|
1205
1718
|
hb_array_free(&body);
|
|
1206
1719
|
|
|
1207
|
-
hb_array_T* element_errors = hb_array_init(8);
|
|
1720
|
+
hb_array_T* element_errors = hb_array_init(8, allocator);
|
|
1208
1721
|
|
|
1209
1722
|
AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init(
|
|
1210
|
-
open_tag,
|
|
1723
|
+
(AST_NODE_T*) open_tag,
|
|
1211
1724
|
open_tag->tag_name,
|
|
1212
1725
|
processed_body,
|
|
1213
|
-
close_tag,
|
|
1726
|
+
(AST_NODE_T*) close_tag,
|
|
1214
1727
|
false,
|
|
1215
1728
|
ELEMENT_SOURCE_HTML,
|
|
1216
1729
|
open_tag->base.location.start,
|
|
1217
1730
|
close_tag->base.location.end,
|
|
1218
|
-
element_errors
|
|
1731
|
+
element_errors,
|
|
1732
|
+
allocator
|
|
1219
1733
|
);
|
|
1220
1734
|
|
|
1221
1735
|
hb_array_append(result, element);
|
|
@@ -1225,12 +1739,13 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
|
|
|
1225
1739
|
} else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
|
|
1226
1740
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) node;
|
|
1227
1741
|
|
|
1228
|
-
if (!is_void_element(
|
|
1742
|
+
if (!is_void_element(close_tag->tag_name->value)) {
|
|
1229
1743
|
if (hb_array_size(close_tag->base.errors) == 0) {
|
|
1230
1744
|
append_missing_opening_tag_error(
|
|
1231
1745
|
close_tag->tag_name,
|
|
1232
1746
|
close_tag->base.location.start,
|
|
1233
1747
|
close_tag->base.location.end,
|
|
1748
|
+
allocator,
|
|
1234
1749
|
close_tag->base.errors
|
|
1235
1750
|
);
|
|
1236
1751
|
}
|
|
@@ -1246,17 +1761,18 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
|
|
|
1246
1761
|
}
|
|
1247
1762
|
|
|
1248
1763
|
static AST_DOCUMENT_NODE_T* parser_parse_document(parser_T* parser) {
|
|
1249
|
-
hb_array_T* children = hb_array_init(8);
|
|
1250
|
-
hb_array_T* errors = hb_array_init(8);
|
|
1764
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
1765
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1251
1766
|
position_T start = parser->current_token->location.start;
|
|
1252
1767
|
|
|
1253
1768
|
parser_parse_in_data_state(parser, children, errors);
|
|
1254
1769
|
|
|
1255
1770
|
token_T* eof = parser_consume_expected(parser, TOKEN_EOF, errors);
|
|
1256
1771
|
|
|
1257
|
-
AST_DOCUMENT_NODE_T* document_node =
|
|
1772
|
+
AST_DOCUMENT_NODE_T* document_node =
|
|
1773
|
+
ast_document_node_init(children, NULL, HERB_PRISM_NODE_EMPTY, start, eof->location.end, errors, parser->allocator);
|
|
1258
1774
|
|
|
1259
|
-
token_free(eof);
|
|
1775
|
+
token_free(eof, parser->allocator);
|
|
1260
1776
|
|
|
1261
1777
|
return document_node;
|
|
1262
1778
|
}
|
|
@@ -1267,17 +1783,18 @@ AST_DOCUMENT_NODE_T* herb_parser_parse(parser_T* parser) {
|
|
|
1267
1783
|
|
|
1268
1784
|
static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, hb_array_T* children) {
|
|
1269
1785
|
if (parser->options.track_whitespace) {
|
|
1270
|
-
hb_array_T* errors = hb_array_init(8);
|
|
1786
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1271
1787
|
AST_WHITESPACE_NODE_T* whitespace_node = ast_whitespace_node_init(
|
|
1272
1788
|
whitespace_token,
|
|
1273
1789
|
whitespace_token->location.start,
|
|
1274
1790
|
whitespace_token->location.end,
|
|
1275
|
-
errors
|
|
1791
|
+
errors,
|
|
1792
|
+
parser->allocator
|
|
1276
1793
|
);
|
|
1277
1794
|
hb_array_append(children, whitespace_node);
|
|
1278
1795
|
}
|
|
1279
1796
|
|
|
1280
|
-
token_free(whitespace_token);
|
|
1797
|
+
token_free(whitespace_token, parser->allocator);
|
|
1281
1798
|
}
|
|
1282
1799
|
|
|
1283
1800
|
static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
|
|
@@ -1287,7 +1804,7 @@ static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
|
|
|
1287
1804
|
if (parser->options.track_whitespace && children != NULL) {
|
|
1288
1805
|
parser_handle_whitespace(parser, whitespace, children);
|
|
1289
1806
|
} else {
|
|
1290
|
-
token_free(whitespace);
|
|
1807
|
+
token_free(whitespace, parser->allocator);
|
|
1291
1808
|
}
|
|
1292
1809
|
}
|
|
1293
1810
|
}
|
|
@@ -1295,14 +1812,27 @@ static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
|
|
|
1295
1812
|
void herb_parser_deinit(parser_T* parser) {
|
|
1296
1813
|
if (parser == NULL) { return; }
|
|
1297
1814
|
|
|
1298
|
-
if (parser->current_token != NULL) { token_free(parser->current_token); }
|
|
1299
|
-
|
|
1815
|
+
if (parser->current_token != NULL) { token_free(parser->current_token, parser->allocator); }
|
|
1816
|
+
|
|
1817
|
+
if (parser->open_tags_stack != NULL) {
|
|
1818
|
+
for (size_t i = 0; i < hb_array_size(parser->open_tags_stack); i++) {
|
|
1819
|
+
token_T* token = (token_T*) hb_array_get(parser->open_tags_stack, i);
|
|
1820
|
+
if (token != NULL) { token_free(token, parser->allocator); }
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1823
|
+
hb_array_free(&parser->open_tags_stack);
|
|
1824
|
+
}
|
|
1300
1825
|
}
|
|
1301
1826
|
|
|
1302
|
-
void match_tags_in_node_array(
|
|
1827
|
+
void match_tags_in_node_array(
|
|
1828
|
+
hb_array_T* nodes,
|
|
1829
|
+
hb_array_T* errors,
|
|
1830
|
+
const parser_options_T* options,
|
|
1831
|
+
hb_allocator_T* allocator
|
|
1832
|
+
) {
|
|
1303
1833
|
if (nodes == NULL || hb_array_size(nodes) == 0) { return; }
|
|
1304
1834
|
|
|
1305
|
-
hb_array_T* processed = parser_build_elements_from_tags(nodes, errors);
|
|
1835
|
+
hb_array_T* processed = parser_build_elements_from_tags(nodes, errors, options, allocator);
|
|
1306
1836
|
|
|
1307
1837
|
nodes->size = 0;
|
|
1308
1838
|
|
|
@@ -1312,16 +1842,22 @@ void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors) {
|
|
|
1312
1842
|
|
|
1313
1843
|
hb_array_free(&processed);
|
|
1314
1844
|
|
|
1845
|
+
match_tags_context_T context = { .errors = errors, .options = options, .allocator = allocator };
|
|
1846
|
+
|
|
1315
1847
|
for (size_t i = 0; i < hb_array_size(nodes); i++) {
|
|
1316
1848
|
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
|
|
1317
1849
|
if (node == NULL) { continue; }
|
|
1318
1850
|
|
|
1319
|
-
herb_visit_node(node, match_tags_visitor,
|
|
1851
|
+
herb_visit_node(node, match_tags_visitor, &context);
|
|
1320
1852
|
}
|
|
1321
1853
|
}
|
|
1322
1854
|
|
|
1323
|
-
void herb_parser_match_html_tags_post_analyze(
|
|
1855
|
+
void herb_parser_match_html_tags_post_analyze(
|
|
1856
|
+
AST_DOCUMENT_NODE_T* document,
|
|
1857
|
+
const parser_options_T* options,
|
|
1858
|
+
hb_allocator_T* allocator
|
|
1859
|
+
) {
|
|
1324
1860
|
if (document == NULL) { return; }
|
|
1325
1861
|
|
|
1326
|
-
match_tags_in_node_array(document->children, document->base.errors);
|
|
1862
|
+
match_tags_in_node_array(document->children, document->base.errors, options, allocator);
|
|
1327
1863
|
}
|