@herb-tools/node 0.8.10 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +27 -8
- package/dist/herb-node.cjs +41 -12
- package/dist/herb-node.cjs.map +1 -1
- package/dist/herb-node.esm.js +8 -1
- package/dist/herb-node.esm.js.map +1 -1
- package/dist/types/node-backend.d.ts +3 -1
- package/extension/error_helpers.cpp +598 -73
- package/extension/error_helpers.h +20 -3
- package/extension/extension_helpers.cpp +40 -35
- package/extension/extension_helpers.h +2 -2
- package/extension/herb.cpp +194 -64
- package/extension/libherb/analyze/action_view/attribute_extraction_helpers.c +303 -0
- package/extension/libherb/analyze/action_view/attribute_extraction_helpers.h +36 -0
- package/extension/libherb/analyze/action_view/content_tag.c +78 -0
- package/extension/libherb/analyze/action_view/link_to.c +167 -0
- package/extension/libherb/analyze/action_view/registry.c +83 -0
- package/extension/libherb/analyze/action_view/tag.c +70 -0
- package/extension/libherb/analyze/action_view/tag_helper_handler.h +43 -0
- package/extension/libherb/analyze/action_view/tag_helper_node_builders.c +305 -0
- package/extension/libherb/analyze/action_view/tag_helper_node_builders.h +70 -0
- package/extension/libherb/analyze/action_view/tag_helpers.c +815 -0
- package/extension/libherb/analyze/action_view/tag_helpers.h +38 -0
- package/extension/libherb/analyze/action_view/turbo_frame_tag.c +88 -0
- package/extension/libherb/analyze/analyze.c +885 -0
- package/extension/libherb/{include → analyze}/analyze.h +14 -4
- package/extension/libherb/{analyzed_ruby.c → analyze/analyzed_ruby.c} +13 -11
- package/extension/libherb/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
- package/extension/libherb/analyze/builders.c +343 -0
- package/extension/libherb/analyze/builders.h +27 -0
- package/extension/libherb/analyze/conditional_elements.c +594 -0
- package/extension/libherb/analyze/conditional_elements.h +9 -0
- package/extension/libherb/analyze/conditional_open_tags.c +640 -0
- package/extension/libherb/analyze/conditional_open_tags.h +9 -0
- package/extension/libherb/analyze/control_type.c +250 -0
- package/extension/libherb/analyze/control_type.h +14 -0
- package/extension/libherb/{analyze_helpers.c → analyze/helpers.c} +48 -23
- package/extension/libherb/{analyze_helpers.h → analyze/helpers.h} +4 -2
- package/extension/libherb/analyze/invalid_structures.c +193 -0
- package/extension/libherb/analyze/invalid_structures.h +11 -0
- package/extension/libherb/{analyze_missing_end.c → analyze/missing_end.c} +33 -22
- package/extension/libherb/analyze/parse_errors.c +84 -0
- package/extension/libherb/analyze/prism_annotate.c +399 -0
- package/extension/libherb/analyze/prism_annotate.h +16 -0
- package/extension/libherb/analyze/render_nodes.c +761 -0
- package/extension/libherb/analyze/render_nodes.h +11 -0
- package/extension/libherb/{analyze_transform.c → analyze/transform.c} +24 -3
- package/extension/libherb/ast_node.c +17 -7
- package/extension/libherb/ast_node.h +11 -5
- package/extension/libherb/ast_nodes.c +760 -388
- package/extension/libherb/ast_nodes.h +155 -39
- package/extension/libherb/ast_pretty_print.c +265 -7
- package/extension/libherb/ast_pretty_print.h +6 -1
- package/extension/libherb/element_source.h +3 -8
- package/extension/libherb/errors.c +1455 -520
- package/extension/libherb/errors.h +207 -56
- package/extension/libherb/extract.c +145 -49
- package/extension/libherb/extract.h +21 -5
- package/extension/libherb/herb.c +52 -34
- package/extension/libherb/herb.h +18 -6
- package/extension/libherb/herb_prism_node.h +13 -0
- package/extension/libherb/html_util.c +241 -12
- package/extension/libherb/html_util.h +7 -2
- package/extension/libherb/include/analyze/action_view/attribute_extraction_helpers.h +36 -0
- package/extension/libherb/include/analyze/action_view/tag_helper_handler.h +43 -0
- package/extension/libherb/include/analyze/action_view/tag_helper_node_builders.h +70 -0
- package/extension/libherb/include/analyze/action_view/tag_helpers.h +38 -0
- package/extension/libherb/{analyze.h → include/analyze/analyze.h} +14 -4
- package/extension/libherb/include/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
- package/extension/libherb/include/analyze/builders.h +27 -0
- package/extension/libherb/include/analyze/conditional_elements.h +9 -0
- package/extension/libherb/include/analyze/conditional_open_tags.h +9 -0
- package/extension/libherb/include/analyze/control_type.h +14 -0
- package/extension/libherb/include/{analyze_helpers.h → analyze/helpers.h} +4 -2
- package/extension/libherb/include/analyze/invalid_structures.h +11 -0
- package/extension/libherb/include/analyze/prism_annotate.h +16 -0
- package/extension/libherb/include/analyze/render_nodes.h +11 -0
- package/extension/libherb/include/ast_node.h +11 -5
- package/extension/libherb/include/ast_nodes.h +155 -39
- package/extension/libherb/include/ast_pretty_print.h +6 -1
- package/extension/libherb/include/element_source.h +3 -8
- package/extension/libherb/include/errors.h +207 -56
- package/extension/libherb/include/extract.h +21 -5
- package/extension/libherb/include/herb.h +18 -6
- package/extension/libherb/include/herb_prism_node.h +13 -0
- package/extension/libherb/include/html_util.h +7 -2
- package/extension/libherb/include/io.h +3 -1
- package/extension/libherb/include/lex_helpers.h +29 -0
- package/extension/libherb/include/lexer.h +1 -1
- package/extension/libherb/include/lexer_peek_helpers.h +87 -13
- package/extension/libherb/include/lexer_struct.h +2 -0
- package/extension/libherb/include/location.h +2 -1
- package/extension/libherb/include/parser.h +28 -2
- package/extension/libherb/include/parser_helpers.h +19 -3
- package/extension/libherb/include/pretty_print.h +10 -5
- package/extension/libherb/include/prism_context.h +45 -0
- package/extension/libherb/include/prism_helpers.h +10 -7
- package/extension/libherb/include/prism_serialized.h +12 -0
- package/extension/libherb/include/token.h +16 -4
- package/extension/libherb/include/token_struct.h +10 -3
- package/extension/libherb/include/utf8.h +2 -1
- package/extension/libherb/include/util/hb_allocator.h +78 -0
- package/extension/libherb/include/util/hb_arena.h +6 -1
- package/extension/libherb/include/util/hb_arena_debug.h +12 -1
- package/extension/libherb/include/util/hb_array.h +7 -3
- package/extension/libherb/include/util/hb_buffer.h +6 -4
- package/extension/libherb/include/util/hb_foreach.h +79 -0
- package/extension/libherb/include/util/hb_narray.h +8 -4
- package/extension/libherb/include/util/hb_string.h +56 -9
- package/extension/libherb/include/util.h +6 -3
- package/extension/libherb/include/version.h +1 -1
- package/extension/libherb/io.c +3 -2
- package/extension/libherb/io.h +3 -1
- package/extension/libherb/lex_helpers.h +29 -0
- package/extension/libherb/lexer.c +42 -30
- package/extension/libherb/lexer.h +1 -1
- package/extension/libherb/lexer_peek_helpers.c +12 -74
- package/extension/libherb/lexer_peek_helpers.h +87 -13
- package/extension/libherb/lexer_struct.h +2 -0
- package/extension/libherb/location.c +2 -2
- package/extension/libherb/location.h +2 -1
- package/extension/libherb/main.c +53 -28
- package/extension/libherb/parser.c +784 -247
- package/extension/libherb/parser.h +28 -2
- package/extension/libherb/parser_helpers.c +110 -23
- package/extension/libherb/parser_helpers.h +19 -3
- package/extension/libherb/parser_match_tags.c +130 -49
- package/extension/libherb/pretty_print.c +29 -24
- package/extension/libherb/pretty_print.h +10 -5
- package/extension/libherb/prism_context.h +45 -0
- package/extension/libherb/prism_helpers.c +30 -27
- package/extension/libherb/prism_helpers.h +10 -7
- package/extension/libherb/prism_serialized.h +12 -0
- package/extension/libherb/ruby_parser.c +2 -0
- package/extension/libherb/token.c +151 -66
- package/extension/libherb/token.h +16 -4
- package/extension/libherb/token_matchers.c +0 -1
- package/extension/libherb/token_struct.h +10 -3
- package/extension/libherb/utf8.c +7 -6
- package/extension/libherb/utf8.h +2 -1
- package/extension/libherb/util/hb_allocator.c +341 -0
- package/extension/libherb/util/hb_allocator.h +78 -0
- package/extension/libherb/util/hb_arena.c +81 -56
- package/extension/libherb/util/hb_arena.h +6 -1
- package/extension/libherb/util/hb_arena_debug.c +32 -17
- package/extension/libherb/util/hb_arena_debug.h +12 -1
- package/extension/libherb/util/hb_array.c +30 -15
- package/extension/libherb/util/hb_array.h +7 -3
- package/extension/libherb/util/hb_buffer.c +17 -21
- package/extension/libherb/util/hb_buffer.h +6 -4
- package/extension/libherb/util/hb_foreach.h +79 -0
- package/extension/libherb/util/hb_narray.c +22 -7
- package/extension/libherb/util/hb_narray.h +8 -4
- package/extension/libherb/util/hb_string.c +49 -35
- package/extension/libherb/util/hb_string.h +56 -9
- package/extension/libherb/util.c +21 -11
- package/extension/libherb/util.h +6 -3
- package/extension/libherb/version.h +1 -1
- package/extension/libherb/visitor.c +68 -1
- package/extension/nodes.cpp +593 -6
- package/extension/nodes.h +10 -1
- package/package.json +12 -8
- package/src/node-backend.ts +11 -1
- package/dist/types/index-cjs.d.cts +0 -1
- package/extension/libherb/analyze.c +0 -1608
- package/extension/libherb/element_source.c +0 -12
- package/extension/libherb/include/util/hb_system.h +0 -9
- package/extension/libherb/util/hb_system.c +0 -30
- package/extension/libherb/util/hb_system.h +0 -9
- package/src/index-cjs.cts +0 -22
- /package/dist/types/{index-esm.d.mts → index.d.ts} +0 -0
- /package/src/{index-esm.mts → index.ts} +0 -0
|
@@ -20,6 +20,8 @@
|
|
|
20
20
|
#include <string.h>
|
|
21
21
|
#include <strings.h>
|
|
22
22
|
|
|
23
|
+
#define MAX_CONSECUTIVE_ERRORS 10
|
|
24
|
+
|
|
23
25
|
static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors);
|
|
24
26
|
static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors);
|
|
25
27
|
static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
|
|
@@ -27,29 +29,40 @@ static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token
|
|
|
27
29
|
static void parser_consume_whitespace(parser_T* parser, hb_array_T* children);
|
|
28
30
|
static void parser_skip_erb_content(lexer_T* lexer);
|
|
29
31
|
static bool parser_lookahead_erb_is_attribute(lexer_T* lexer);
|
|
32
|
+
static bool parser_lookahead_erb_is_control_flow(parser_T* parser);
|
|
30
33
|
static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children);
|
|
31
34
|
static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* children);
|
|
32
35
|
|
|
33
|
-
const parser_options_T HERB_DEFAULT_PARSER_OPTIONS = { .track_whitespace = false
|
|
36
|
+
const parser_options_T HERB_DEFAULT_PARSER_OPTIONS = { .track_whitespace = false,
|
|
37
|
+
.analyze = true,
|
|
38
|
+
.strict = true,
|
|
39
|
+
.action_view_helpers = false,
|
|
40
|
+
.render_nodes = false,
|
|
41
|
+
.prism_nodes_deep = false,
|
|
42
|
+
.prism_nodes = false,
|
|
43
|
+
.prism_program = false };
|
|
34
44
|
|
|
35
45
|
size_t parser_sizeof(void) {
|
|
36
46
|
return sizeof(struct PARSER_STRUCT);
|
|
37
47
|
}
|
|
38
48
|
|
|
39
49
|
void herb_parser_init(parser_T* parser, lexer_T* lexer, parser_options_T options) {
|
|
50
|
+
parser->allocator = lexer->allocator;
|
|
40
51
|
parser->lexer = lexer;
|
|
41
52
|
parser->current_token = lexer_next_token(lexer);
|
|
42
|
-
parser->open_tags_stack = hb_array_init(16);
|
|
53
|
+
parser->open_tags_stack = hb_array_init(16, parser->allocator);
|
|
43
54
|
parser->state = PARSER_STATE_DATA;
|
|
44
55
|
parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
|
|
45
56
|
parser->options = options;
|
|
57
|
+
parser->consecutive_error_count = 0;
|
|
58
|
+
parser->in_recovery_mode = false;
|
|
46
59
|
}
|
|
47
60
|
|
|
48
61
|
static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
49
|
-
hb_array_T* errors = hb_array_init(8);
|
|
50
|
-
hb_array_T* children = hb_array_init(8);
|
|
62
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
63
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
51
64
|
hb_buffer_T content;
|
|
52
|
-
hb_buffer_init(&content, 128);
|
|
65
|
+
hb_buffer_init(&content, 128, parser->allocator);
|
|
53
66
|
|
|
54
67
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_CDATA_START, errors);
|
|
55
68
|
position_T start = parser->current_token->location.start;
|
|
@@ -64,8 +77,8 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
|
64
77
|
}
|
|
65
78
|
|
|
66
79
|
token_T* token = parser_advance(parser);
|
|
67
|
-
|
|
68
|
-
token_free(token);
|
|
80
|
+
hb_buffer_append_string(&content, token->value);
|
|
81
|
+
token_free(token, parser->allocator);
|
|
69
82
|
}
|
|
70
83
|
|
|
71
84
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
@@ -77,26 +90,27 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
|
77
90
|
tag_closing,
|
|
78
91
|
tag_opening->location.start,
|
|
79
92
|
tag_closing->location.end,
|
|
80
|
-
errors
|
|
93
|
+
errors,
|
|
94
|
+
parser->allocator
|
|
81
95
|
);
|
|
82
96
|
|
|
83
|
-
|
|
84
|
-
token_free(tag_opening);
|
|
85
|
-
token_free(tag_closing);
|
|
97
|
+
hb_buffer_free(&content);
|
|
98
|
+
token_free(tag_opening, parser->allocator);
|
|
99
|
+
token_free(tag_closing, parser->allocator);
|
|
86
100
|
|
|
87
101
|
return cdata;
|
|
88
102
|
}
|
|
89
103
|
|
|
90
104
|
static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
91
|
-
hb_array_T* errors = hb_array_init(8);
|
|
92
|
-
hb_array_T* children = hb_array_init(8);
|
|
105
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
106
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
93
107
|
token_T* comment_start = parser_consume_expected(parser, TOKEN_HTML_COMMENT_START, errors);
|
|
94
108
|
position_T start = parser->current_token->location.start;
|
|
95
109
|
|
|
96
110
|
hb_buffer_T comment;
|
|
97
|
-
hb_buffer_init(&comment, 512);
|
|
111
|
+
hb_buffer_init(&comment, 512, parser->allocator);
|
|
98
112
|
|
|
99
|
-
while (token_is_none_of(parser, TOKEN_HTML_COMMENT_END, TOKEN_EOF)) {
|
|
113
|
+
while (token_is_none_of(parser, TOKEN_HTML_COMMENT_END, TOKEN_HTML_COMMENT_INVALID_END, TOKEN_EOF)) {
|
|
100
114
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
101
115
|
parser_append_literal_node_from_buffer(parser, &comment, children, start);
|
|
102
116
|
|
|
@@ -109,13 +123,26 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
|
109
123
|
}
|
|
110
124
|
|
|
111
125
|
token_T* token = parser_advance(parser);
|
|
112
|
-
|
|
113
|
-
token_free(token);
|
|
126
|
+
hb_buffer_append_string(&comment, token->value);
|
|
127
|
+
token_free(token, parser->allocator);
|
|
114
128
|
}
|
|
115
129
|
|
|
116
130
|
parser_append_literal_node_from_buffer(parser, &comment, children, start);
|
|
117
131
|
|
|
118
|
-
token_T* comment_end =
|
|
132
|
+
token_T* comment_end = NULL;
|
|
133
|
+
|
|
134
|
+
if (token_is(parser, TOKEN_HTML_COMMENT_INVALID_END)) {
|
|
135
|
+
comment_end = parser_advance(parser);
|
|
136
|
+
append_invalid_comment_closing_tag_error(
|
|
137
|
+
comment_end,
|
|
138
|
+
comment_end->location.start,
|
|
139
|
+
comment_end->location.end,
|
|
140
|
+
parser->allocator,
|
|
141
|
+
errors
|
|
142
|
+
);
|
|
143
|
+
} else {
|
|
144
|
+
comment_end = parser_consume_expected(parser, TOKEN_HTML_COMMENT_END, errors);
|
|
145
|
+
}
|
|
119
146
|
|
|
120
147
|
AST_HTML_COMMENT_NODE_T* comment_node = ast_html_comment_node_init(
|
|
121
148
|
comment_start,
|
|
@@ -123,21 +150,22 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
|
123
150
|
comment_end,
|
|
124
151
|
comment_start->location.start,
|
|
125
152
|
comment_end->location.end,
|
|
126
|
-
errors
|
|
153
|
+
errors,
|
|
154
|
+
parser->allocator
|
|
127
155
|
);
|
|
128
156
|
|
|
129
|
-
|
|
130
|
-
token_free(comment_start);
|
|
131
|
-
token_free(comment_end);
|
|
157
|
+
hb_buffer_free(&comment);
|
|
158
|
+
token_free(comment_start, parser->allocator);
|
|
159
|
+
token_free(comment_end, parser->allocator);
|
|
132
160
|
|
|
133
161
|
return comment_node;
|
|
134
162
|
}
|
|
135
163
|
|
|
136
164
|
static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
137
|
-
hb_array_T* errors = hb_array_init(8);
|
|
138
|
-
hb_array_T* children = hb_array_init(8);
|
|
165
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
166
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
139
167
|
hb_buffer_T content;
|
|
140
|
-
hb_buffer_init(&content, 64);
|
|
168
|
+
hb_buffer_init(&content, 64, parser->allocator);
|
|
141
169
|
|
|
142
170
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_DOCTYPE, errors);
|
|
143
171
|
|
|
@@ -154,8 +182,8 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
|
154
182
|
}
|
|
155
183
|
|
|
156
184
|
token_T* token = parser_consume_expected(parser, parser->current_token->type, errors);
|
|
157
|
-
|
|
158
|
-
token_free(token);
|
|
185
|
+
hb_buffer_append_string(&content, token->value);
|
|
186
|
+
token_free(token, parser->allocator);
|
|
159
187
|
}
|
|
160
188
|
|
|
161
189
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
@@ -168,21 +196,22 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
|
168
196
|
tag_closing,
|
|
169
197
|
tag_opening->location.start,
|
|
170
198
|
tag_closing->location.end,
|
|
171
|
-
errors
|
|
199
|
+
errors,
|
|
200
|
+
parser->allocator
|
|
172
201
|
);
|
|
173
202
|
|
|
174
|
-
token_free(tag_opening);
|
|
175
|
-
token_free(tag_closing);
|
|
176
|
-
|
|
203
|
+
token_free(tag_opening, parser->allocator);
|
|
204
|
+
token_free(tag_closing, parser->allocator);
|
|
205
|
+
hb_buffer_free(&content);
|
|
177
206
|
|
|
178
207
|
return doctype;
|
|
179
208
|
}
|
|
180
209
|
|
|
181
210
|
static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser) {
|
|
182
|
-
hb_array_T* errors = hb_array_init(8);
|
|
183
|
-
hb_array_T* children = hb_array_init(8);
|
|
211
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
212
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
184
213
|
hb_buffer_T content;
|
|
185
|
-
hb_buffer_init(&content, 64);
|
|
214
|
+
hb_buffer_init(&content, 64, parser->allocator);
|
|
186
215
|
|
|
187
216
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_XML_DECLARATION, errors);
|
|
188
217
|
|
|
@@ -201,8 +230,8 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
|
|
|
201
230
|
}
|
|
202
231
|
|
|
203
232
|
token_T* token = parser_advance(parser);
|
|
204
|
-
|
|
205
|
-
token_free(token);
|
|
233
|
+
hb_buffer_append_string(&content, token->value);
|
|
234
|
+
token_free(token, parser->allocator);
|
|
206
235
|
}
|
|
207
236
|
|
|
208
237
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
@@ -215,12 +244,13 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
|
|
|
215
244
|
tag_closing,
|
|
216
245
|
tag_opening->location.start,
|
|
217
246
|
tag_closing->location.end,
|
|
218
|
-
errors
|
|
247
|
+
errors,
|
|
248
|
+
parser->allocator
|
|
219
249
|
);
|
|
220
250
|
|
|
221
|
-
token_free(tag_opening);
|
|
222
|
-
token_free(tag_closing);
|
|
223
|
-
|
|
251
|
+
token_free(tag_opening, parser->allocator);
|
|
252
|
+
token_free(tag_closing, parser->allocator);
|
|
253
|
+
hb_buffer_free(&content);
|
|
224
254
|
|
|
225
255
|
return xml_declaration;
|
|
226
256
|
}
|
|
@@ -229,7 +259,7 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra
|
|
|
229
259
|
position_T start = parser->current_token->location.start;
|
|
230
260
|
|
|
231
261
|
hb_buffer_T content;
|
|
232
|
-
hb_buffer_init(&content, 2048);
|
|
262
|
+
hb_buffer_init(&content, 2048, parser->allocator);
|
|
233
263
|
|
|
234
264
|
while (token_is_none_of(
|
|
235
265
|
parser,
|
|
@@ -241,49 +271,66 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra
|
|
|
241
271
|
TOKEN_EOF
|
|
242
272
|
)) {
|
|
243
273
|
if (token_is(parser, TOKEN_ERROR)) {
|
|
244
|
-
|
|
274
|
+
hb_buffer_free(&content);
|
|
245
275
|
|
|
246
|
-
|
|
247
|
-
append_unexpected_error(
|
|
248
|
-
"Token Error",
|
|
249
|
-
"not TOKEN_ERROR",
|
|
250
|
-
token->value,
|
|
251
|
-
token->location.start,
|
|
252
|
-
token->location.end,
|
|
253
|
-
document_errors
|
|
254
|
-
);
|
|
255
|
-
|
|
256
|
-
token_free(token);
|
|
276
|
+
parser_append_unexpected_error_string(parser, document_errors, "Token Error", "not an error token");
|
|
257
277
|
|
|
258
278
|
return NULL;
|
|
259
279
|
}
|
|
260
280
|
|
|
281
|
+
if (parser->options.strict && parser->current_token->type == TOKEN_PERCENT) {
|
|
282
|
+
lexer_T lexer_copy = *parser->lexer;
|
|
283
|
+
token_T* peek_token = lexer_next_token(&lexer_copy);
|
|
284
|
+
|
|
285
|
+
if (peek_token && peek_token->type == TOKEN_HTML_TAG_END) {
|
|
286
|
+
position_T stray_start = parser->current_token->location.start;
|
|
287
|
+
position_T stray_end = peek_token->location.end;
|
|
288
|
+
token_free(peek_token, parser->allocator);
|
|
289
|
+
|
|
290
|
+
append_stray_erb_closing_tag_error(stray_start, stray_end, parser->allocator, document_errors);
|
|
291
|
+
|
|
292
|
+
token_T* percent = parser_advance(parser);
|
|
293
|
+
hb_buffer_append_string(&content, percent->value);
|
|
294
|
+
token_free(percent, parser->allocator);
|
|
295
|
+
|
|
296
|
+
token_T* gt = parser_advance(parser);
|
|
297
|
+
hb_buffer_append_string(&content, gt->value);
|
|
298
|
+
token_free(gt, parser->allocator);
|
|
299
|
+
|
|
300
|
+
continue;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
token_free(peek_token, parser->allocator);
|
|
304
|
+
}
|
|
305
|
+
|
|
261
306
|
token_T* token = parser_advance(parser);
|
|
262
|
-
|
|
263
|
-
token_free(token);
|
|
307
|
+
hb_buffer_append_string(&content, token->value);
|
|
308
|
+
token_free(token, parser->allocator);
|
|
264
309
|
}
|
|
265
310
|
|
|
266
|
-
hb_array_T* errors = hb_array_init(8);
|
|
311
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
267
312
|
|
|
268
313
|
AST_HTML_TEXT_NODE_T* text_node = NULL;
|
|
269
314
|
|
|
270
315
|
if (hb_buffer_length(&content) > 0) {
|
|
316
|
+
hb_string_T text_content = { .data = content.value, .length = (uint32_t) content.length };
|
|
271
317
|
text_node =
|
|
272
|
-
ast_html_text_node_init(
|
|
318
|
+
ast_html_text_node_init(text_content, start, parser->current_token->location.start, errors, parser->allocator);
|
|
273
319
|
} else {
|
|
274
|
-
text_node =
|
|
320
|
+
text_node =
|
|
321
|
+
ast_html_text_node_init(HB_STRING_EMPTY, start, parser->current_token->location.start, errors, parser->allocator);
|
|
275
322
|
}
|
|
276
323
|
|
|
277
|
-
|
|
324
|
+
hb_buffer_free(&content);
|
|
278
325
|
|
|
279
326
|
return text_node;
|
|
280
327
|
}
|
|
281
328
|
|
|
282
329
|
static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
|
|
283
|
-
hb_array_T* errors = hb_array_init(8);
|
|
284
|
-
hb_array_T* children = hb_array_init(8);
|
|
330
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
331
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
285
332
|
hb_buffer_T buffer;
|
|
286
|
-
hb_buffer_init(&buffer, 128);
|
|
333
|
+
hb_buffer_init(&buffer, 128, parser->allocator);
|
|
287
334
|
position_T start = parser->current_token->location.start;
|
|
288
335
|
|
|
289
336
|
while (token_is_none_of(
|
|
@@ -296,6 +343,16 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
|
|
|
296
343
|
TOKEN_EOF
|
|
297
344
|
)) {
|
|
298
345
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
346
|
+
hb_string_T tag = parser->current_token->value;
|
|
347
|
+
bool is_output_tag = (tag.length >= 3 && tag.data[2] == '=');
|
|
348
|
+
|
|
349
|
+
if (!is_output_tag) {
|
|
350
|
+
bool is_control_flow = parser_lookahead_erb_is_control_flow(parser);
|
|
351
|
+
|
|
352
|
+
if (hb_buffer_is_empty(&buffer) && hb_array_size(children) == 0) { break; }
|
|
353
|
+
if (is_control_flow) { break; }
|
|
354
|
+
}
|
|
355
|
+
|
|
299
356
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
300
357
|
|
|
301
358
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
@@ -306,8 +363,8 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
|
|
|
306
363
|
}
|
|
307
364
|
|
|
308
365
|
token_T* token = parser_advance(parser);
|
|
309
|
-
|
|
310
|
-
token_free(token);
|
|
366
|
+
hb_buffer_append_string(&buffer, token->value);
|
|
367
|
+
token_free(token, parser->allocator);
|
|
311
368
|
}
|
|
312
369
|
|
|
313
370
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
@@ -327,9 +384,9 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
|
|
|
327
384
|
}
|
|
328
385
|
|
|
329
386
|
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
|
|
330
|
-
ast_html_attribute_name_node_init(children, node_start, node_end, errors);
|
|
387
|
+
ast_html_attribute_name_node_init(children, node_start, node_end, errors, parser->allocator);
|
|
331
388
|
|
|
332
|
-
|
|
389
|
+
hb_buffer_free(&buffer);
|
|
333
390
|
|
|
334
391
|
return attribute_name;
|
|
335
392
|
}
|
|
@@ -340,55 +397,137 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
340
397
|
hb_array_T* errors
|
|
341
398
|
) {
|
|
342
399
|
hb_buffer_T buffer;
|
|
343
|
-
hb_buffer_init(&buffer, 512);
|
|
400
|
+
hb_buffer_init(&buffer, 512, parser->allocator);
|
|
344
401
|
token_T* opening_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
|
|
345
402
|
position_T start = parser->current_token->location.start;
|
|
346
403
|
|
|
347
404
|
while (!token_is(parser, TOKEN_EOF)
|
|
348
405
|
&& !(
|
|
349
406
|
token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
|
|
350
|
-
&&
|
|
407
|
+
&& hb_string_equals(parser->current_token->value, opening_quote->value)
|
|
351
408
|
)) {
|
|
352
|
-
if (token_is(parser,
|
|
409
|
+
if (token_is(parser, TOKEN_HTML_TAG_END) || token_is(parser, TOKEN_HTML_TAG_SELF_CLOSE)) {
|
|
410
|
+
lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
|
|
411
|
+
bool found_closing_quote = false;
|
|
412
|
+
token_T* lookahead = lexer_next_token(parser->lexer);
|
|
413
|
+
|
|
414
|
+
while (lookahead && lookahead->type != TOKEN_EOF) {
|
|
415
|
+
if (lookahead->type == TOKEN_QUOTE && opening_quote != NULL
|
|
416
|
+
&& hb_string_equals(lookahead->value, opening_quote->value)) {
|
|
417
|
+
found_closing_quote = true;
|
|
418
|
+
token_free(lookahead, parser->allocator);
|
|
419
|
+
break;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
token_free(lookahead, parser->allocator);
|
|
423
|
+
|
|
424
|
+
lookahead = lexer_next_token(parser->lexer);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
if (lookahead && !found_closing_quote && lookahead->type == TOKEN_EOF) {
|
|
428
|
+
token_free(lookahead, parser->allocator);
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
lexer_restore_state(parser->lexer, saved_state);
|
|
432
|
+
|
|
433
|
+
if (found_closing_quote) {
|
|
434
|
+
hb_buffer_append_string(&buffer, parser->current_token->value);
|
|
435
|
+
token_free(parser->current_token, parser->allocator);
|
|
436
|
+
parser->current_token = lexer_next_token(parser->lexer);
|
|
437
|
+
continue;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
append_unclosed_quote_error(
|
|
441
|
+
opening_quote,
|
|
442
|
+
opening_quote->location.start,
|
|
443
|
+
parser->current_token->location.start,
|
|
444
|
+
parser->allocator,
|
|
445
|
+
errors
|
|
446
|
+
);
|
|
447
|
+
|
|
353
448
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
449
|
+
hb_buffer_free(&buffer);
|
|
354
450
|
|
|
355
|
-
|
|
451
|
+
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
|
|
452
|
+
opening_quote,
|
|
453
|
+
children,
|
|
454
|
+
NULL,
|
|
455
|
+
true,
|
|
456
|
+
opening_quote->location.start,
|
|
457
|
+
parser->current_token->location.start,
|
|
458
|
+
errors,
|
|
459
|
+
parser->allocator
|
|
460
|
+
);
|
|
356
461
|
|
|
357
|
-
|
|
462
|
+
token_free(opening_quote, parser->allocator);
|
|
358
463
|
|
|
359
|
-
|
|
464
|
+
return attribute_value;
|
|
360
465
|
}
|
|
361
466
|
|
|
362
|
-
|
|
467
|
+
bool buffer_ends_with_whitespace = buffer.length > 0 && is_whitespace(buffer.value[buffer.length - 1]);
|
|
468
|
+
|
|
469
|
+
if (token_is(parser, TOKEN_IDENTIFIER) && buffer_ends_with_whitespace) {
|
|
363
470
|
lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
|
|
471
|
+
token_T* equals_token = lexer_next_token(parser->lexer);
|
|
472
|
+
bool looks_like_new_attribute = false;
|
|
364
473
|
|
|
365
|
-
|
|
474
|
+
if (equals_token && equals_token->type == TOKEN_EQUALS) {
|
|
475
|
+
token_T* after_equals = lexer_next_token(parser->lexer);
|
|
476
|
+
looks_like_new_attribute = (after_equals && after_equals->type == TOKEN_QUOTE);
|
|
366
477
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
hb_buffer_append(&buffer, parser->current_token->value);
|
|
370
|
-
hb_buffer_append(&buffer, next_token->value);
|
|
478
|
+
if (after_equals) { token_free(after_equals, parser->allocator); }
|
|
479
|
+
}
|
|
371
480
|
|
|
372
|
-
|
|
373
|
-
|
|
481
|
+
if (equals_token) { token_free(equals_token, parser->allocator); }
|
|
482
|
+
lexer_restore_state(parser->lexer, saved_state);
|
|
374
483
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
484
|
+
if (looks_like_new_attribute) {
|
|
485
|
+
append_unclosed_quote_error(
|
|
486
|
+
opening_quote,
|
|
487
|
+
opening_quote->location.start,
|
|
488
|
+
parser->current_token->location.start,
|
|
489
|
+
parser->allocator,
|
|
490
|
+
errors
|
|
491
|
+
);
|
|
492
|
+
|
|
493
|
+
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
494
|
+
hb_buffer_free(&buffer);
|
|
495
|
+
|
|
496
|
+
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
|
|
497
|
+
opening_quote,
|
|
498
|
+
children,
|
|
499
|
+
NULL,
|
|
500
|
+
true,
|
|
501
|
+
opening_quote->location.start,
|
|
502
|
+
parser->current_token->location.start,
|
|
503
|
+
errors,
|
|
504
|
+
parser->allocator
|
|
505
|
+
);
|
|
506
|
+
|
|
507
|
+
token_free(opening_quote, parser->allocator);
|
|
379
508
|
|
|
380
|
-
|
|
509
|
+
return attribute_value;
|
|
381
510
|
}
|
|
382
511
|
}
|
|
383
512
|
|
|
384
|
-
|
|
385
|
-
|
|
513
|
+
if (token_is(parser, TOKEN_ERB_START)) {
|
|
514
|
+
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
515
|
+
|
|
516
|
+
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
517
|
+
|
|
518
|
+
start = parser->current_token->location.start;
|
|
519
|
+
|
|
520
|
+
continue;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
hb_buffer_append_string(&buffer, parser->current_token->value);
|
|
524
|
+
token_free(parser->current_token, parser->allocator);
|
|
386
525
|
|
|
387
526
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
388
527
|
}
|
|
389
528
|
|
|
390
529
|
if (token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
|
|
391
|
-
&&
|
|
530
|
+
&& hb_string_equals(parser->current_token->value, opening_quote->value)) {
|
|
392
531
|
lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
|
|
393
532
|
|
|
394
533
|
token_T* potential_closing = parser->current_token;
|
|
@@ -396,27 +535,28 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
396
535
|
|
|
397
536
|
if (token_is(parser, TOKEN_IDENTIFIER) || token_is(parser, TOKEN_CHARACTER)) {
|
|
398
537
|
append_unexpected_error(
|
|
399
|
-
"Unescaped quote character in attribute value",
|
|
400
|
-
"
|
|
538
|
+
hb_string("Unescaped quote character in attribute value"),
|
|
539
|
+
hb_string("HTML entity ('/") or different quote style"),
|
|
401
540
|
opening_quote->value,
|
|
402
541
|
potential_closing->location.start,
|
|
403
542
|
potential_closing->location.end,
|
|
543
|
+
parser->allocator,
|
|
404
544
|
errors
|
|
405
545
|
);
|
|
406
546
|
|
|
407
547
|
lexer_restore_state(parser->lexer, saved_state);
|
|
408
548
|
|
|
409
|
-
token_free(parser->current_token);
|
|
549
|
+
token_free(parser->current_token, parser->allocator);
|
|
410
550
|
parser->current_token = potential_closing;
|
|
411
551
|
|
|
412
|
-
|
|
413
|
-
token_free(parser->current_token);
|
|
552
|
+
hb_buffer_append_string(&buffer, parser->current_token->value);
|
|
553
|
+
token_free(parser->current_token, parser->allocator);
|
|
414
554
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
415
555
|
|
|
416
556
|
while (!token_is(parser, TOKEN_EOF)
|
|
417
557
|
&& !(
|
|
418
558
|
token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
|
|
419
|
-
&&
|
|
559
|
+
&& hb_string_equals(parser->current_token->value, opening_quote->value)
|
|
420
560
|
)) {
|
|
421
561
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
422
562
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
@@ -428,13 +568,13 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
428
568
|
continue;
|
|
429
569
|
}
|
|
430
570
|
|
|
431
|
-
|
|
432
|
-
token_free(parser->current_token);
|
|
571
|
+
hb_buffer_append_string(&buffer, parser->current_token->value);
|
|
572
|
+
token_free(parser->current_token, parser->allocator);
|
|
433
573
|
|
|
434
574
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
435
575
|
}
|
|
436
576
|
} else {
|
|
437
|
-
token_free(parser->current_token);
|
|
577
|
+
token_free(parser->current_token, parser->allocator);
|
|
438
578
|
parser->current_token = potential_closing;
|
|
439
579
|
|
|
440
580
|
lexer_restore_state(parser->lexer, saved_state);
|
|
@@ -442,20 +582,10 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
442
582
|
}
|
|
443
583
|
|
|
444
584
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
445
|
-
|
|
585
|
+
hb_buffer_free(&buffer);
|
|
446
586
|
|
|
447
587
|
token_T* closing_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
|
|
448
588
|
|
|
449
|
-
if (opening_quote != NULL && closing_quote != NULL && !string_equals(opening_quote->value, closing_quote->value)) {
|
|
450
|
-
append_quotes_mismatch_error(
|
|
451
|
-
opening_quote,
|
|
452
|
-
closing_quote,
|
|
453
|
-
closing_quote->location.start,
|
|
454
|
-
closing_quote->location.end,
|
|
455
|
-
errors
|
|
456
|
-
);
|
|
457
|
-
}
|
|
458
|
-
|
|
459
589
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
|
|
460
590
|
opening_quote,
|
|
461
591
|
children,
|
|
@@ -463,18 +593,19 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
463
593
|
true,
|
|
464
594
|
opening_quote->location.start,
|
|
465
595
|
closing_quote->location.end,
|
|
466
|
-
errors
|
|
596
|
+
errors,
|
|
597
|
+
parser->allocator
|
|
467
598
|
);
|
|
468
599
|
|
|
469
|
-
token_free(opening_quote);
|
|
470
|
-
token_free(closing_quote);
|
|
600
|
+
token_free(opening_quote, parser->allocator);
|
|
601
|
+
token_free(closing_quote, parser->allocator);
|
|
471
602
|
|
|
472
603
|
return attribute_value;
|
|
473
604
|
}
|
|
474
605
|
|
|
475
606
|
static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser_T* parser) {
|
|
476
|
-
hb_array_T* children = hb_array_init(8);
|
|
477
|
-
hb_array_T* errors = hb_array_init(8);
|
|
607
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
608
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
478
609
|
|
|
479
610
|
// <div id=<%= "home" %>>
|
|
480
611
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
@@ -488,15 +619,16 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
488
619
|
false,
|
|
489
620
|
erb_node->base.location.start,
|
|
490
621
|
erb_node->base.location.end,
|
|
491
|
-
errors
|
|
622
|
+
errors,
|
|
623
|
+
parser->allocator
|
|
492
624
|
);
|
|
493
625
|
}
|
|
494
626
|
|
|
495
627
|
// <div id=home>
|
|
496
628
|
if (token_is(parser, TOKEN_IDENTIFIER)) {
|
|
497
629
|
token_T* identifier = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
|
|
498
|
-
AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier);
|
|
499
|
-
token_free(identifier);
|
|
630
|
+
AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier, parser->allocator);
|
|
631
|
+
token_free(identifier, parser->allocator);
|
|
500
632
|
|
|
501
633
|
hb_array_append(children, literal);
|
|
502
634
|
|
|
@@ -507,7 +639,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
507
639
|
false,
|
|
508
640
|
literal->base.location.start,
|
|
509
641
|
literal->base.location.end,
|
|
510
|
-
errors
|
|
642
|
+
errors,
|
|
643
|
+
parser->allocator
|
|
511
644
|
);
|
|
512
645
|
}
|
|
513
646
|
|
|
@@ -520,31 +653,37 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
520
653
|
position_T end = token->location.end;
|
|
521
654
|
|
|
522
655
|
append_unexpected_error(
|
|
523
|
-
"Invalid quote character for HTML attribute",
|
|
524
|
-
"single quote (') or double quote (\")",
|
|
525
|
-
"backtick
|
|
656
|
+
hb_string("Invalid quote character for HTML attribute"),
|
|
657
|
+
hb_string("single quote (') or double quote (\")"),
|
|
658
|
+
hb_string("a backtick"),
|
|
526
659
|
start,
|
|
527
660
|
end,
|
|
661
|
+
parser->allocator,
|
|
528
662
|
errors
|
|
529
663
|
);
|
|
530
664
|
|
|
531
665
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* value =
|
|
532
|
-
ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors);
|
|
666
|
+
ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors, parser->allocator);
|
|
533
667
|
|
|
534
|
-
token_free(token);
|
|
668
|
+
token_free(token, parser->allocator);
|
|
535
669
|
|
|
536
670
|
return value;
|
|
537
671
|
}
|
|
538
672
|
|
|
673
|
+
char* expected = token_types_to_friendly_string(parser->allocator, TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START);
|
|
674
|
+
|
|
539
675
|
append_unexpected_error(
|
|
540
|
-
"Unexpected Token",
|
|
541
|
-
|
|
542
|
-
|
|
676
|
+
hb_string("Unexpected Token"),
|
|
677
|
+
hb_string(expected),
|
|
678
|
+
token_type_to_friendly_string(parser->current_token->type),
|
|
543
679
|
parser->current_token->location.start,
|
|
544
680
|
parser->current_token->location.end,
|
|
681
|
+
parser->allocator,
|
|
545
682
|
errors
|
|
546
683
|
);
|
|
547
684
|
|
|
685
|
+
hb_allocator_dealloc(parser->allocator, expected);
|
|
686
|
+
|
|
548
687
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* value = ast_html_attribute_value_node_init(
|
|
549
688
|
NULL,
|
|
550
689
|
children,
|
|
@@ -552,7 +691,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
552
691
|
false,
|
|
553
692
|
parser->current_token->location.start,
|
|
554
693
|
parser->current_token->location.end,
|
|
555
|
-
errors
|
|
694
|
+
errors,
|
|
695
|
+
parser->allocator
|
|
556
696
|
);
|
|
557
697
|
|
|
558
698
|
return value;
|
|
@@ -567,7 +707,7 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
567
707
|
|
|
568
708
|
if (has_equals) {
|
|
569
709
|
hb_buffer_T equals_buffer;
|
|
570
|
-
hb_buffer_init(&equals_buffer, 256);
|
|
710
|
+
hb_buffer_init(&equals_buffer, 256, parser->allocator);
|
|
571
711
|
position_T equals_start = { 0 };
|
|
572
712
|
position_T equals_end = { 0 };
|
|
573
713
|
uint32_t range_start = 0;
|
|
@@ -583,8 +723,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
583
723
|
range_start = whitespace->range.from;
|
|
584
724
|
}
|
|
585
725
|
|
|
586
|
-
|
|
587
|
-
token_free(whitespace);
|
|
726
|
+
hb_buffer_append_string(&equals_buffer, whitespace->value);
|
|
727
|
+
token_free(whitespace, parser->allocator);
|
|
588
728
|
}
|
|
589
729
|
|
|
590
730
|
token_T* equals = parser_advance(parser);
|
|
@@ -595,27 +735,45 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
595
735
|
range_start = equals->range.from;
|
|
596
736
|
}
|
|
597
737
|
|
|
598
|
-
|
|
738
|
+
hb_buffer_append_string(&equals_buffer, equals->value);
|
|
599
739
|
equals_end = equals->location.end;
|
|
600
740
|
range_end = equals->range.to;
|
|
601
|
-
token_free(equals);
|
|
741
|
+
token_free(equals, parser->allocator);
|
|
602
742
|
|
|
603
743
|
while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
|
604
744
|
token_T* whitespace = parser_advance(parser);
|
|
605
|
-
|
|
745
|
+
hb_buffer_append_string(&equals_buffer, whitespace->value);
|
|
606
746
|
equals_end = whitespace->location.end;
|
|
607
747
|
range_end = whitespace->range.to;
|
|
608
|
-
token_free(whitespace);
|
|
748
|
+
token_free(whitespace, parser->allocator);
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
token_T* equals_with_whitespace = hb_allocator_alloc(parser->allocator, sizeof(token_T));
|
|
752
|
+
|
|
753
|
+
if (!equals_with_whitespace) {
|
|
754
|
+
hb_buffer_free(&equals_buffer);
|
|
755
|
+
|
|
756
|
+
return ast_html_attribute_node_init(
|
|
757
|
+
attribute_name,
|
|
758
|
+
NULL,
|
|
759
|
+
NULL,
|
|
760
|
+
attribute_name->base.location.start,
|
|
761
|
+
attribute_name->base.location.end,
|
|
762
|
+
NULL,
|
|
763
|
+
parser->allocator
|
|
764
|
+
);
|
|
609
765
|
}
|
|
610
766
|
|
|
611
|
-
token_T* equals_with_whitespace = calloc(1, sizeof(token_T));
|
|
612
767
|
equals_with_whitespace->type = TOKEN_EQUALS;
|
|
613
|
-
|
|
768
|
+
|
|
769
|
+
char* arena_copy = hb_allocator_strndup(parser->allocator, equals_buffer.value, equals_buffer.length);
|
|
770
|
+
equals_with_whitespace->value = (hb_string_T) { .data = arena_copy, .length = (uint32_t) equals_buffer.length };
|
|
771
|
+
|
|
772
|
+
hb_buffer_free(&equals_buffer);
|
|
773
|
+
|
|
614
774
|
equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end };
|
|
615
775
|
equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end };
|
|
616
776
|
|
|
617
|
-
free(equals_buffer.value);
|
|
618
|
-
|
|
619
777
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
|
|
620
778
|
|
|
621
779
|
return ast_html_attribute_node_init(
|
|
@@ -624,7 +782,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
624
782
|
attribute_value,
|
|
625
783
|
attribute_name->base.location.start,
|
|
626
784
|
attribute_value->base.location.end,
|
|
627
|
-
NULL
|
|
785
|
+
NULL,
|
|
786
|
+
parser->allocator
|
|
628
787
|
);
|
|
629
788
|
} else {
|
|
630
789
|
return ast_html_attribute_node_init(
|
|
@@ -633,7 +792,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
633
792
|
NULL,
|
|
634
793
|
attribute_name->base.location.start,
|
|
635
794
|
attribute_name->base.location.end,
|
|
636
|
-
NULL
|
|
795
|
+
NULL,
|
|
796
|
+
parser->allocator
|
|
637
797
|
);
|
|
638
798
|
}
|
|
639
799
|
} else {
|
|
@@ -645,6 +805,51 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
645
805
|
if (equals != NULL) {
|
|
646
806
|
parser_consume_whitespace(parser, NULL);
|
|
647
807
|
|
|
808
|
+
// <div class= >
|
|
809
|
+
if (token_is(parser, TOKEN_HTML_TAG_END) || token_is(parser, TOKEN_HTML_TAG_SELF_CLOSE)) {
|
|
810
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
811
|
+
hb_string_T attribute_name_string = hb_string("unknown");
|
|
812
|
+
|
|
813
|
+
if (hb_array_size(attribute_name->children) > 0) {
|
|
814
|
+
AST_LITERAL_NODE_T* first_child = (AST_LITERAL_NODE_T*) hb_array_get(attribute_name->children, 0);
|
|
815
|
+
|
|
816
|
+
if (first_child && !hb_string_is_empty(first_child->content)) { attribute_name_string = first_child->content; }
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
append_missing_attribute_value_error(
|
|
820
|
+
attribute_name_string,
|
|
821
|
+
equals->location.start,
|
|
822
|
+
parser->current_token->location.start,
|
|
823
|
+
parser->allocator,
|
|
824
|
+
errors
|
|
825
|
+
);
|
|
826
|
+
|
|
827
|
+
AST_HTML_ATTRIBUTE_VALUE_NODE_T* empty_value = ast_html_attribute_value_node_init(
|
|
828
|
+
NULL,
|
|
829
|
+
hb_array_init(8, parser->allocator),
|
|
830
|
+
NULL,
|
|
831
|
+
false,
|
|
832
|
+
equals->location.end,
|
|
833
|
+
parser->current_token->location.start,
|
|
834
|
+
errors,
|
|
835
|
+
parser->allocator
|
|
836
|
+
);
|
|
837
|
+
|
|
838
|
+
AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
|
|
839
|
+
attribute_name,
|
|
840
|
+
equals,
|
|
841
|
+
empty_value,
|
|
842
|
+
attribute_name->base.location.start,
|
|
843
|
+
parser->current_token->location.start,
|
|
844
|
+
NULL,
|
|
845
|
+
parser->allocator
|
|
846
|
+
);
|
|
847
|
+
|
|
848
|
+
token_free(equals, parser->allocator);
|
|
849
|
+
|
|
850
|
+
return attribute_node;
|
|
851
|
+
}
|
|
852
|
+
|
|
648
853
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
|
|
649
854
|
|
|
650
855
|
AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
|
|
@@ -653,10 +858,11 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
653
858
|
attribute_value,
|
|
654
859
|
attribute_name->base.location.start,
|
|
655
860
|
attribute_value->base.location.end,
|
|
656
|
-
NULL
|
|
861
|
+
NULL,
|
|
862
|
+
parser->allocator
|
|
657
863
|
);
|
|
658
864
|
|
|
659
|
-
token_free(equals);
|
|
865
|
+
token_free(equals, parser->allocator);
|
|
660
866
|
|
|
661
867
|
return attribute_node;
|
|
662
868
|
}
|
|
@@ -667,7 +873,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
667
873
|
NULL,
|
|
668
874
|
attribute_name->base.location.start,
|
|
669
875
|
attribute_name->base.location.end,
|
|
670
|
-
NULL
|
|
876
|
+
NULL,
|
|
877
|
+
parser->allocator
|
|
671
878
|
);
|
|
672
879
|
}
|
|
673
880
|
|
|
@@ -678,11 +885,11 @@ static void parser_skip_erb_content(lexer_T* lexer) {
|
|
|
678
885
|
token = lexer_next_token(lexer);
|
|
679
886
|
|
|
680
887
|
if (token->type == TOKEN_ERB_END) {
|
|
681
|
-
token_free(token);
|
|
888
|
+
token_free(token, lexer->allocator);
|
|
682
889
|
break;
|
|
683
890
|
}
|
|
684
891
|
|
|
685
|
-
token_free(token);
|
|
892
|
+
token_free(token, lexer->allocator);
|
|
686
893
|
} while (true);
|
|
687
894
|
}
|
|
688
895
|
|
|
@@ -693,12 +900,12 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
|
|
|
693
900
|
after = lexer_next_token(lexer);
|
|
694
901
|
|
|
695
902
|
if (after->type == TOKEN_EQUALS) {
|
|
696
|
-
token_free(after);
|
|
903
|
+
token_free(after, lexer->allocator);
|
|
697
904
|
return true;
|
|
698
905
|
}
|
|
699
906
|
|
|
700
907
|
if (after->type == TOKEN_WHITESPACE || after->type == TOKEN_NEWLINE) {
|
|
701
|
-
token_free(after);
|
|
908
|
+
token_free(after, lexer->allocator);
|
|
702
909
|
continue;
|
|
703
910
|
}
|
|
704
911
|
|
|
@@ -706,23 +913,56 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
|
|
|
706
913
|
|| after->type == TOKEN_ERB_START) {
|
|
707
914
|
|
|
708
915
|
if (after->type == TOKEN_ERB_START) {
|
|
709
|
-
token_free(after);
|
|
916
|
+
token_free(after, lexer->allocator);
|
|
710
917
|
parser_skip_erb_content(lexer);
|
|
711
918
|
} else {
|
|
712
|
-
token_free(after);
|
|
919
|
+
token_free(after, lexer->allocator);
|
|
713
920
|
}
|
|
714
921
|
continue;
|
|
715
922
|
}
|
|
716
923
|
|
|
717
|
-
token_free(after);
|
|
924
|
+
token_free(after, lexer->allocator);
|
|
718
925
|
return false;
|
|
719
926
|
|
|
720
927
|
} while (true);
|
|
721
928
|
}
|
|
722
929
|
|
|
930
|
+
static bool starts_with_keyword(hb_string_T string, const char* keyword) {
|
|
931
|
+
hb_string_T prefix = hb_string(keyword);
|
|
932
|
+
if (string.length < prefix.length) { return false; }
|
|
933
|
+
if (strncmp(string.data, prefix.data, prefix.length) != 0) { return false; }
|
|
934
|
+
|
|
935
|
+
if (string.length == prefix.length) { return true; }
|
|
936
|
+
|
|
937
|
+
return is_whitespace(string.data[prefix.length]);
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
// TODO: ideally we could avoid basing this off of strings, and use the step in analyze.c
|
|
941
|
+
static bool parser_lookahead_erb_is_control_flow(parser_T* parser) {
|
|
942
|
+
lexer_T lexer_copy = *parser->lexer;
|
|
943
|
+
token_T* content = lexer_next_token(&lexer_copy);
|
|
944
|
+
|
|
945
|
+
if (content == NULL || content->type != TOKEN_ERB_CONTENT) {
|
|
946
|
+
if (content) { token_free(content, parser->allocator); }
|
|
947
|
+
|
|
948
|
+
return false;
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
hb_string_T trimmed = hb_string_trim_start(content->value);
|
|
952
|
+
|
|
953
|
+
bool is_control_flow = starts_with_keyword(trimmed, "end") || starts_with_keyword(trimmed, "else")
|
|
954
|
+
|| starts_with_keyword(trimmed, "elsif") || starts_with_keyword(trimmed, "in")
|
|
955
|
+
|| starts_with_keyword(trimmed, "when") || starts_with_keyword(trimmed, "rescue")
|
|
956
|
+
|| starts_with_keyword(trimmed, "ensure");
|
|
957
|
+
|
|
958
|
+
token_free(content, parser->allocator);
|
|
959
|
+
|
|
960
|
+
return is_control_flow;
|
|
961
|
+
}
|
|
962
|
+
|
|
723
963
|
static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children) {
|
|
724
|
-
bool is_output_tag =
|
|
725
|
-
&&
|
|
964
|
+
bool is_output_tag = !hb_string_is_empty(parser->current_token->value)
|
|
965
|
+
&& hb_string_starts_with(parser->current_token->value, hb_string("<%="));
|
|
726
966
|
|
|
727
967
|
if (!is_output_tag) {
|
|
728
968
|
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
@@ -733,7 +973,7 @@ static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children
|
|
|
733
973
|
lexer_T lexer_copy = *parser->lexer;
|
|
734
974
|
|
|
735
975
|
token_T* erb_start = lexer_next_token(&lexer_copy);
|
|
736
|
-
token_free(erb_start);
|
|
976
|
+
token_free(erb_start, parser->allocator);
|
|
737
977
|
parser_skip_erb_content(&lexer_copy);
|
|
738
978
|
|
|
739
979
|
bool looks_like_attribute = parser_lookahead_erb_is_attribute(&lexer_copy);
|
|
@@ -759,13 +999,40 @@ static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* c
|
|
|
759
999
|
}
|
|
760
1000
|
|
|
761
1001
|
static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
762
|
-
hb_array_T* errors = hb_array_init(8);
|
|
763
|
-
hb_array_T* children = hb_array_init(8);
|
|
1002
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1003
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
764
1004
|
|
|
765
1005
|
token_T* tag_start = parser_consume_expected(parser, TOKEN_HTML_TAG_START, errors);
|
|
766
1006
|
token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
|
|
767
1007
|
|
|
768
1008
|
while (token_is_none_of(parser, TOKEN_HTML_TAG_END, TOKEN_HTML_TAG_SELF_CLOSE, TOKEN_EOF)) {
|
|
1009
|
+
if (token_is_any_of(parser, TOKEN_HTML_TAG_START, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1010
|
+
append_unclosed_open_tag_error(
|
|
1011
|
+
tag_name,
|
|
1012
|
+
tag_name->location.start,
|
|
1013
|
+
parser->current_token->location.start,
|
|
1014
|
+
parser->allocator,
|
|
1015
|
+
errors
|
|
1016
|
+
);
|
|
1017
|
+
|
|
1018
|
+
AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init(
|
|
1019
|
+
tag_start,
|
|
1020
|
+
tag_name,
|
|
1021
|
+
NULL,
|
|
1022
|
+
children,
|
|
1023
|
+
false,
|
|
1024
|
+
tag_start->location.start,
|
|
1025
|
+
parser->current_token->location.start,
|
|
1026
|
+
errors,
|
|
1027
|
+
parser->allocator
|
|
1028
|
+
);
|
|
1029
|
+
|
|
1030
|
+
token_free(tag_start, parser->allocator);
|
|
1031
|
+
token_free(tag_name, parser->allocator);
|
|
1032
|
+
|
|
1033
|
+
return open_tag_node;
|
|
1034
|
+
}
|
|
1035
|
+
|
|
769
1036
|
if (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
|
770
1037
|
parser_handle_whitespace_in_open_tag(parser, children);
|
|
771
1038
|
continue;
|
|
@@ -791,21 +1058,79 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
791
1058
|
token_T* next_token = lexer_next_token(&lexer_copy);
|
|
792
1059
|
|
|
793
1060
|
if (next_token && next_token->type == TOKEN_IDENTIFIER) {
|
|
794
|
-
token_free(next_token);
|
|
1061
|
+
token_free(next_token, parser->allocator);
|
|
795
1062
|
hb_array_append(children, parser_parse_html_attribute(parser));
|
|
796
1063
|
|
|
797
1064
|
continue;
|
|
798
1065
|
}
|
|
799
1066
|
|
|
800
|
-
token_free(next_token);
|
|
1067
|
+
token_free(next_token, parser->allocator);
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
if (parser->current_token->type == TOKEN_PERCENT) {
|
|
1071
|
+
lexer_T lexer_copy = *parser->lexer;
|
|
1072
|
+
token_T* peek_token = lexer_next_token(&lexer_copy);
|
|
1073
|
+
|
|
1074
|
+
if (peek_token && peek_token->type == TOKEN_HTML_TAG_END) {
|
|
1075
|
+
position_T stray_start = parser->current_token->location.start;
|
|
1076
|
+
position_T stray_end = peek_token->location.end;
|
|
1077
|
+
token_free(peek_token, parser->allocator);
|
|
1078
|
+
|
|
1079
|
+
append_stray_erb_closing_tag_error(stray_start, stray_end, parser->allocator, errors);
|
|
1080
|
+
|
|
1081
|
+
token_T* percent = parser_advance(parser);
|
|
1082
|
+
token_T* gt = parser_advance(parser);
|
|
1083
|
+
|
|
1084
|
+
AST_LITERAL_NODE_T* literal =
|
|
1085
|
+
ast_literal_node_init(hb_string("%>"), stray_start, stray_end, NULL, parser->allocator);
|
|
1086
|
+
hb_array_append(children, literal);
|
|
1087
|
+
|
|
1088
|
+
token_free(percent, parser->allocator);
|
|
1089
|
+
token_free(gt, parser->allocator);
|
|
1090
|
+
|
|
1091
|
+
continue;
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
token_free(peek_token, parser->allocator);
|
|
801
1095
|
}
|
|
802
1096
|
|
|
803
1097
|
parser_append_unexpected_error(
|
|
804
1098
|
parser,
|
|
1099
|
+
errors,
|
|
805
1100
|
"Unexpected Token",
|
|
806
|
-
|
|
1101
|
+
TOKEN_IDENTIFIER,
|
|
1102
|
+
TOKEN_AT,
|
|
1103
|
+
TOKEN_ERB_START,
|
|
1104
|
+
TOKEN_WHITESPACE,
|
|
1105
|
+
TOKEN_NEWLINE
|
|
1106
|
+
);
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
if (token_is(parser, TOKEN_EOF)) {
|
|
1110
|
+
append_unclosed_open_tag_error(
|
|
1111
|
+
tag_name,
|
|
1112
|
+
tag_name->location.start,
|
|
1113
|
+
parser->current_token->location.start,
|
|
1114
|
+
parser->allocator,
|
|
807
1115
|
errors
|
|
808
1116
|
);
|
|
1117
|
+
|
|
1118
|
+
AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init(
|
|
1119
|
+
tag_start,
|
|
1120
|
+
tag_name,
|
|
1121
|
+
NULL,
|
|
1122
|
+
children,
|
|
1123
|
+
false,
|
|
1124
|
+
tag_start->location.start,
|
|
1125
|
+
parser->current_token->location.start,
|
|
1126
|
+
errors,
|
|
1127
|
+
parser->allocator
|
|
1128
|
+
);
|
|
1129
|
+
|
|
1130
|
+
token_free(tag_start, parser->allocator);
|
|
1131
|
+
token_free(tag_name, parser->allocator);
|
|
1132
|
+
|
|
1133
|
+
return open_tag_node;
|
|
809
1134
|
}
|
|
810
1135
|
|
|
811
1136
|
bool is_self_closing = false;
|
|
@@ -816,8 +1141,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
816
1141
|
tag_end = parser_consume_expected(parser, TOKEN_HTML_TAG_SELF_CLOSE, errors);
|
|
817
1142
|
|
|
818
1143
|
if (tag_end == NULL) {
|
|
819
|
-
token_free(tag_start);
|
|
820
|
-
token_free(tag_name);
|
|
1144
|
+
token_free(tag_start, parser->allocator);
|
|
1145
|
+
token_free(tag_name, parser->allocator);
|
|
821
1146
|
|
|
822
1147
|
hb_array_free(&children);
|
|
823
1148
|
hb_array_free(&errors);
|
|
@@ -836,19 +1161,20 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
836
1161
|
is_self_closing,
|
|
837
1162
|
tag_start->location.start,
|
|
838
1163
|
tag_end->location.end,
|
|
839
|
-
errors
|
|
1164
|
+
errors,
|
|
1165
|
+
parser->allocator
|
|
840
1166
|
);
|
|
841
1167
|
|
|
842
|
-
token_free(tag_start);
|
|
843
|
-
token_free(tag_name);
|
|
844
|
-
token_free(tag_end);
|
|
1168
|
+
token_free(tag_start, parser->allocator);
|
|
1169
|
+
token_free(tag_name, parser->allocator);
|
|
1170
|
+
token_free(tag_end, parser->allocator);
|
|
845
1171
|
|
|
846
1172
|
return open_tag_node;
|
|
847
1173
|
}
|
|
848
1174
|
|
|
849
1175
|
static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) {
|
|
850
|
-
hb_array_T* errors = hb_array_init(8);
|
|
851
|
-
hb_array_T* children = hb_array_init(8);
|
|
1176
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1177
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
852
1178
|
|
|
853
1179
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
|
|
854
1180
|
|
|
@@ -858,38 +1184,53 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
|
|
|
858
1184
|
|
|
859
1185
|
parser_consume_whitespace(parser, children);
|
|
860
1186
|
|
|
861
|
-
token_T* tag_closing =
|
|
1187
|
+
token_T* tag_closing = parser_consume_if_present(parser, TOKEN_HTML_TAG_END);
|
|
1188
|
+
|
|
1189
|
+
if (tag_closing == NULL) {
|
|
1190
|
+
append_unclosed_close_tag_error(
|
|
1191
|
+
tag_name,
|
|
1192
|
+
tag_opening->location.start,
|
|
1193
|
+
tag_name->location.end,
|
|
1194
|
+
parser->allocator,
|
|
1195
|
+
errors
|
|
1196
|
+
);
|
|
1197
|
+
}
|
|
862
1198
|
|
|
863
|
-
if (tag_name != NULL && is_void_element(
|
|
864
|
-
|
|
865
|
-
hb_string_T
|
|
1199
|
+
if (tag_closing != NULL && tag_name != NULL && is_void_element(tag_name->value)
|
|
1200
|
+
&& parser_in_svg_context(parser) == false) {
|
|
1201
|
+
hb_string_T expected = html_self_closing_tag_string(tag_name->value, parser->allocator);
|
|
1202
|
+
hb_string_T got = html_closing_tag_string(tag_name->value, parser->allocator);
|
|
866
1203
|
|
|
867
1204
|
append_void_element_closing_tag_error(
|
|
868
1205
|
tag_name,
|
|
869
|
-
expected
|
|
870
|
-
got
|
|
1206
|
+
expected,
|
|
1207
|
+
got,
|
|
871
1208
|
tag_opening->location.start,
|
|
872
1209
|
tag_closing->location.end,
|
|
1210
|
+
parser->allocator,
|
|
873
1211
|
errors
|
|
874
1212
|
);
|
|
875
1213
|
|
|
876
|
-
|
|
877
|
-
|
|
1214
|
+
hb_allocator_dealloc(parser->allocator, expected.data);
|
|
1215
|
+
hb_allocator_dealloc(parser->allocator, got.data);
|
|
878
1216
|
}
|
|
879
1217
|
|
|
1218
|
+
position_T end_position = tag_closing != NULL ? tag_closing->location.end : tag_name->location.end;
|
|
1219
|
+
|
|
880
1220
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = ast_html_close_tag_node_init(
|
|
881
1221
|
tag_opening,
|
|
882
1222
|
tag_name,
|
|
883
1223
|
children,
|
|
884
1224
|
tag_closing,
|
|
885
1225
|
tag_opening->location.start,
|
|
886
|
-
|
|
887
|
-
errors
|
|
1226
|
+
end_position,
|
|
1227
|
+
errors,
|
|
1228
|
+
parser->allocator
|
|
888
1229
|
);
|
|
889
1230
|
|
|
890
|
-
token_free(tag_opening);
|
|
891
|
-
token_free(tag_name);
|
|
892
|
-
token_free(tag_closing);
|
|
1231
|
+
token_free(tag_opening, parser->allocator);
|
|
1232
|
+
token_free(tag_name, parser->allocator);
|
|
1233
|
+
token_free(tag_closing, parser->allocator);
|
|
893
1234
|
|
|
894
1235
|
return close_tag;
|
|
895
1236
|
}
|
|
@@ -900,7 +1241,7 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element(
|
|
|
900
1241
|
AST_HTML_OPEN_TAG_NODE_T* open_tag
|
|
901
1242
|
) {
|
|
902
1243
|
return ast_html_element_node_init(
|
|
903
|
-
open_tag,
|
|
1244
|
+
(AST_NODE_T*) open_tag,
|
|
904
1245
|
open_tag->tag_name,
|
|
905
1246
|
NULL,
|
|
906
1247
|
NULL,
|
|
@@ -908,7 +1249,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element(
|
|
|
908
1249
|
ELEMENT_SOURCE_HTML,
|
|
909
1250
|
open_tag->base.location.start,
|
|
910
1251
|
open_tag->base.location.end,
|
|
911
|
-
NULL
|
|
1252
|
+
NULL,
|
|
1253
|
+
parser->allocator
|
|
912
1254
|
);
|
|
913
1255
|
}
|
|
914
1256
|
|
|
@@ -916,63 +1258,88 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
|
|
|
916
1258
|
parser_T* parser,
|
|
917
1259
|
AST_HTML_OPEN_TAG_NODE_T* open_tag
|
|
918
1260
|
) {
|
|
919
|
-
hb_array_T* errors = hb_array_init(8);
|
|
920
|
-
hb_array_T* body = hb_array_init(8);
|
|
1261
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1262
|
+
hb_array_T* body = hb_array_init(8, parser->allocator);
|
|
921
1263
|
|
|
922
1264
|
parser_push_open_tag(parser, open_tag->tag_name);
|
|
923
1265
|
|
|
924
|
-
if (open_tag->tag_name->value && parser_is_foreign_content_tag(
|
|
925
|
-
foreign_content_type_T content_type = parser_get_foreign_content_type(
|
|
1266
|
+
if (!hb_string_is_empty(open_tag->tag_name->value) && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
|
|
1267
|
+
foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value);
|
|
926
1268
|
parser_enter_foreign_content(parser, content_type);
|
|
927
1269
|
parser_parse_foreign_content(parser, body, errors);
|
|
928
1270
|
} else {
|
|
929
1271
|
parser_parse_in_data_state(parser, body, errors);
|
|
930
1272
|
}
|
|
931
1273
|
|
|
932
|
-
if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1274
|
+
if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1275
|
+
return parser_handle_missing_close_tag(parser, open_tag, body, errors);
|
|
1276
|
+
}
|
|
933
1277
|
|
|
934
1278
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser);
|
|
935
1279
|
|
|
936
|
-
if (parser_in_svg_context(parser) == false && is_void_element(
|
|
1280
|
+
if (parser_in_svg_context(parser) == false && is_void_element(close_tag->tag_name->value)) {
|
|
937
1281
|
hb_array_push(body, close_tag);
|
|
938
1282
|
parser_parse_in_data_state(parser, body, errors);
|
|
939
1283
|
close_tag = parser_parse_html_close_tag(parser);
|
|
940
1284
|
}
|
|
941
1285
|
|
|
942
|
-
bool matches_stack = parser_check_matching_tag(parser,
|
|
1286
|
+
bool matches_stack = parser_check_matching_tag(parser, close_tag->tag_name->value);
|
|
943
1287
|
|
|
944
1288
|
if (matches_stack) {
|
|
945
1289
|
token_T* popped_token = parser_pop_open_tag(parser);
|
|
946
|
-
token_free(popped_token);
|
|
1290
|
+
token_free(popped_token, parser->allocator);
|
|
1291
|
+
} else if (parser_can_close_ancestor(parser, close_tag->tag_name->value)) {
|
|
1292
|
+
size_t depth = parser_find_ancestor_depth(parser, close_tag->tag_name->value);
|
|
1293
|
+
|
|
1294
|
+
for (size_t i = 0; i < depth; i++) {
|
|
1295
|
+
token_T* unclosed = parser_pop_open_tag(parser);
|
|
1296
|
+
|
|
1297
|
+
if (unclosed != NULL) {
|
|
1298
|
+
append_missing_closing_tag_error(
|
|
1299
|
+
unclosed,
|
|
1300
|
+
unclosed->location.start,
|
|
1301
|
+
unclosed->location.end,
|
|
1302
|
+
parser->allocator,
|
|
1303
|
+
errors
|
|
1304
|
+
);
|
|
1305
|
+
token_free(unclosed, parser->allocator);
|
|
1306
|
+
}
|
|
1307
|
+
}
|
|
1308
|
+
|
|
1309
|
+
token_T* popped_token = parser_pop_open_tag(parser);
|
|
1310
|
+
token_free(popped_token, parser->allocator);
|
|
947
1311
|
} else {
|
|
948
1312
|
parser_handle_mismatched_tags(parser, close_tag, errors);
|
|
949
1313
|
}
|
|
950
1314
|
|
|
951
1315
|
return ast_html_element_node_init(
|
|
952
|
-
open_tag,
|
|
1316
|
+
(AST_NODE_T*) open_tag,
|
|
953
1317
|
open_tag->tag_name,
|
|
954
1318
|
body,
|
|
955
|
-
close_tag,
|
|
1319
|
+
(AST_NODE_T*) close_tag,
|
|
956
1320
|
false,
|
|
957
1321
|
ELEMENT_SOURCE_HTML,
|
|
958
1322
|
open_tag->base.location.start,
|
|
959
1323
|
close_tag->base.location.end,
|
|
960
|
-
errors
|
|
1324
|
+
errors,
|
|
1325
|
+
parser->allocator
|
|
961
1326
|
);
|
|
962
1327
|
}
|
|
963
1328
|
|
|
964
1329
|
static AST_NODE_T* parser_parse_html_element(parser_T* parser) {
|
|
965
1330
|
AST_HTML_OPEN_TAG_NODE_T* open_tag = parser_parse_html_open_tag(parser);
|
|
966
1331
|
|
|
1332
|
+
if (open_tag->tag_closing == NULL) { return (AST_NODE_T*) open_tag; }
|
|
1333
|
+
|
|
967
1334
|
// <tag />
|
|
968
1335
|
if (open_tag->is_void) { return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag); }
|
|
969
1336
|
|
|
970
1337
|
// <tag>, in void element list, and not in inside an <svg> element
|
|
971
|
-
if (!open_tag->is_void && is_void_element(
|
|
1338
|
+
if (!open_tag->is_void && is_void_element(open_tag->tag_name->value) && !parser_in_svg_context(parser)) {
|
|
972
1339
|
return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag);
|
|
973
1340
|
}
|
|
974
1341
|
|
|
975
|
-
if (open_tag->tag_name->value && parser_is_foreign_content_tag(
|
|
1342
|
+
if (!hb_string_is_empty(open_tag->tag_name->value) && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
|
|
976
1343
|
AST_HTML_ELEMENT_NODE_T* regular_element = parser_parse_html_regular_element(parser, open_tag);
|
|
977
1344
|
|
|
978
1345
|
if (regular_element != NULL) { return (AST_NODE_T*) regular_element; }
|
|
@@ -982,11 +1349,38 @@ static AST_NODE_T* parser_parse_html_element(parser_T* parser) {
|
|
|
982
1349
|
}
|
|
983
1350
|
|
|
984
1351
|
static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
|
|
985
|
-
hb_array_T* errors = hb_array_init(8);
|
|
1352
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
986
1353
|
|
|
987
1354
|
token_T* opening_tag = parser_consume_expected(parser, TOKEN_ERB_START, errors);
|
|
988
1355
|
token_T* content = parser_consume_expected(parser, TOKEN_ERB_CONTENT, errors);
|
|
989
|
-
|
|
1356
|
+
|
|
1357
|
+
token_T* closing_tag = NULL;
|
|
1358
|
+
position_T end_position;
|
|
1359
|
+
|
|
1360
|
+
if (token_is(parser, TOKEN_ERB_END)) {
|
|
1361
|
+
closing_tag = parser_consume_expected(parser, TOKEN_ERB_END, errors);
|
|
1362
|
+
end_position = closing_tag->location.end;
|
|
1363
|
+
} else if (token_is(parser, TOKEN_ERB_START)) {
|
|
1364
|
+
append_nested_erb_tag_error(
|
|
1365
|
+
opening_tag,
|
|
1366
|
+
parser->current_token->location.start.line,
|
|
1367
|
+
parser->current_token->location.start.column,
|
|
1368
|
+
parser->current_token->location.start,
|
|
1369
|
+
parser->current_token->location.end,
|
|
1370
|
+
parser->allocator,
|
|
1371
|
+
errors
|
|
1372
|
+
);
|
|
1373
|
+
end_position = parser->current_token->location.start;
|
|
1374
|
+
} else {
|
|
1375
|
+
append_unclosed_erb_tag_error(
|
|
1376
|
+
opening_tag,
|
|
1377
|
+
opening_tag->location.start,
|
|
1378
|
+
parser->current_token->location.start,
|
|
1379
|
+
parser->allocator,
|
|
1380
|
+
errors
|
|
1381
|
+
);
|
|
1382
|
+
end_position = parser->current_token->location.start;
|
|
1383
|
+
}
|
|
990
1384
|
|
|
991
1385
|
AST_ERB_CONTENT_NODE_T* erb_node = ast_erb_content_node_init(
|
|
992
1386
|
opening_tag,
|
|
@@ -995,27 +1389,29 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
|
|
|
995
1389
|
NULL,
|
|
996
1390
|
false,
|
|
997
1391
|
false,
|
|
1392
|
+
HERB_PRISM_NODE_EMPTY,
|
|
998
1393
|
opening_tag->location.start,
|
|
999
|
-
|
|
1000
|
-
errors
|
|
1394
|
+
end_position,
|
|
1395
|
+
errors,
|
|
1396
|
+
parser->allocator
|
|
1001
1397
|
);
|
|
1002
1398
|
|
|
1003
|
-
token_free(opening_tag);
|
|
1004
|
-
token_free(content);
|
|
1005
|
-
token_free(closing_tag);
|
|
1399
|
+
token_free(opening_tag, parser->allocator);
|
|
1400
|
+
token_free(content, parser->allocator);
|
|
1401
|
+
if (closing_tag != NULL) { token_free(closing_tag, parser->allocator); }
|
|
1006
1402
|
|
|
1007
1403
|
return erb_node;
|
|
1008
1404
|
}
|
|
1009
1405
|
|
|
1010
1406
|
static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
|
|
1011
1407
|
hb_buffer_T content;
|
|
1012
|
-
hb_buffer_init(&content, 1024);
|
|
1408
|
+
hb_buffer_init(&content, 1024, parser->allocator);
|
|
1013
1409
|
position_T start = parser->current_token->location.start;
|
|
1014
1410
|
hb_string_T expected_closing_tag = parser_get_foreign_content_closing_tag(parser->foreign_content_type);
|
|
1015
1411
|
|
|
1016
1412
|
if (hb_string_is_empty(expected_closing_tag)) {
|
|
1017
1413
|
parser_exit_foreign_content(parser);
|
|
1018
|
-
|
|
1414
|
+
hb_buffer_free(&content);
|
|
1019
1415
|
|
|
1020
1416
|
return;
|
|
1021
1417
|
}
|
|
@@ -1038,33 +1434,32 @@ static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children,
|
|
|
1038
1434
|
token_T* next_token = lexer_next_token(parser->lexer);
|
|
1039
1435
|
bool is_potential_match = false;
|
|
1040
1436
|
|
|
1041
|
-
if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) {
|
|
1042
|
-
is_potential_match =
|
|
1043
|
-
parser_is_expected_closing_tag_name(hb_string(next_token->value), parser->foreign_content_type);
|
|
1437
|
+
if (next_token && next_token->type == TOKEN_IDENTIFIER && !hb_string_is_empty(next_token->value)) {
|
|
1438
|
+
is_potential_match = parser_is_expected_closing_tag_name(next_token->value, parser->foreign_content_type);
|
|
1044
1439
|
}
|
|
1045
1440
|
|
|
1046
1441
|
lexer_restore_state(parser->lexer, saved_state);
|
|
1047
1442
|
|
|
1048
|
-
if (next_token) { token_free(next_token); }
|
|
1443
|
+
if (next_token) { token_free(next_token, parser->allocator); }
|
|
1049
1444
|
|
|
1050
1445
|
if (is_potential_match) {
|
|
1051
1446
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
1052
1447
|
parser_exit_foreign_content(parser);
|
|
1053
1448
|
|
|
1054
|
-
|
|
1449
|
+
hb_buffer_free(&content);
|
|
1055
1450
|
|
|
1056
1451
|
return;
|
|
1057
1452
|
}
|
|
1058
1453
|
}
|
|
1059
1454
|
|
|
1060
1455
|
token_T* token = parser_advance(parser);
|
|
1061
|
-
|
|
1062
|
-
token_free(token);
|
|
1456
|
+
hb_buffer_append_string(&content, token->value);
|
|
1457
|
+
token_free(token, parser->allocator);
|
|
1063
1458
|
}
|
|
1064
1459
|
|
|
1065
1460
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
1066
1461
|
parser_exit_foreign_content(parser);
|
|
1067
|
-
|
|
1462
|
+
hb_buffer_free(&content);
|
|
1068
1463
|
}
|
|
1069
1464
|
|
|
1070
1465
|
static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
|
|
@@ -1072,36 +1467,43 @@ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, h
|
|
|
1072
1467
|
|
|
1073
1468
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
1074
1469
|
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
1470
|
+
parser->consecutive_error_count = 0;
|
|
1075
1471
|
continue;
|
|
1076
1472
|
}
|
|
1077
1473
|
|
|
1078
1474
|
if (token_is(parser, TOKEN_HTML_DOCTYPE)) {
|
|
1079
1475
|
hb_array_append(children, parser_parse_html_doctype(parser));
|
|
1476
|
+
parser->consecutive_error_count = 0;
|
|
1080
1477
|
continue;
|
|
1081
1478
|
}
|
|
1082
1479
|
|
|
1083
1480
|
if (token_is(parser, TOKEN_XML_DECLARATION)) {
|
|
1084
1481
|
hb_array_append(children, parser_parse_xml_declaration(parser));
|
|
1482
|
+
parser->consecutive_error_count = 0;
|
|
1085
1483
|
continue;
|
|
1086
1484
|
}
|
|
1087
1485
|
|
|
1088
1486
|
if (token_is(parser, TOKEN_CDATA_START)) {
|
|
1089
1487
|
hb_array_append(children, parser_parse_cdata(parser));
|
|
1488
|
+
parser->consecutive_error_count = 0;
|
|
1090
1489
|
continue;
|
|
1091
1490
|
}
|
|
1092
1491
|
|
|
1093
1492
|
if (token_is(parser, TOKEN_HTML_COMMENT_START)) {
|
|
1094
1493
|
hb_array_append(children, parser_parse_html_comment(parser));
|
|
1494
|
+
parser->consecutive_error_count = 0;
|
|
1095
1495
|
continue;
|
|
1096
1496
|
}
|
|
1097
1497
|
|
|
1098
1498
|
if (token_is(parser, TOKEN_HTML_TAG_START)) {
|
|
1099
1499
|
hb_array_append(children, parser_parse_html_element(parser));
|
|
1500
|
+
parser->consecutive_error_count = 0;
|
|
1100
1501
|
continue;
|
|
1101
1502
|
}
|
|
1102
1503
|
|
|
1103
1504
|
if (token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1104
1505
|
hb_array_append(children, parser_parse_html_close_tag(parser));
|
|
1506
|
+
parser->consecutive_error_count = 0;
|
|
1105
1507
|
continue;
|
|
1106
1508
|
}
|
|
1107
1509
|
|
|
@@ -1129,16 +1531,35 @@ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, h
|
|
|
1129
1531
|
TOKEN_WHITESPACE
|
|
1130
1532
|
)) {
|
|
1131
1533
|
hb_array_append(children, parser_parse_text_content(parser, errors));
|
|
1534
|
+
parser->consecutive_error_count = 0;
|
|
1535
|
+
continue;
|
|
1536
|
+
}
|
|
1537
|
+
|
|
1538
|
+
parser->consecutive_error_count++;
|
|
1539
|
+
|
|
1540
|
+
if (parser->consecutive_error_count >= MAX_CONSECUTIVE_ERRORS) {
|
|
1541
|
+
parser->in_recovery_mode = true;
|
|
1542
|
+
parser_synchronize(parser, errors);
|
|
1543
|
+
parser->consecutive_error_count = 0;
|
|
1132
1544
|
continue;
|
|
1133
1545
|
}
|
|
1134
1546
|
|
|
1135
1547
|
parser_append_unexpected_error(
|
|
1136
1548
|
parser,
|
|
1549
|
+
errors,
|
|
1137
1550
|
"Unexpected token",
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1551
|
+
TOKEN_ERB_START,
|
|
1552
|
+
TOKEN_HTML_DOCTYPE,
|
|
1553
|
+
TOKEN_HTML_COMMENT_START,
|
|
1554
|
+
TOKEN_IDENTIFIER,
|
|
1555
|
+
TOKEN_WHITESPACE,
|
|
1556
|
+
TOKEN_NBSP,
|
|
1557
|
+
TOKEN_AT,
|
|
1558
|
+
TOKEN_BACKSLASH,
|
|
1559
|
+
TOKEN_NEWLINE
|
|
1141
1560
|
);
|
|
1561
|
+
|
|
1562
|
+
parser_synchronize(parser, errors);
|
|
1142
1563
|
}
|
|
1143
1564
|
}
|
|
1144
1565
|
|
|
@@ -1152,11 +1573,11 @@ static size_t find_matching_close_tag(hb_array_T* nodes, size_t start_idx, hb_st
|
|
|
1152
1573
|
if (node->type == AST_HTML_OPEN_TAG_NODE) {
|
|
1153
1574
|
AST_HTML_OPEN_TAG_NODE_T* open = (AST_HTML_OPEN_TAG_NODE_T*) node;
|
|
1154
1575
|
|
|
1155
|
-
if (hb_string_equals_case_insensitive(
|
|
1576
|
+
if (hb_string_equals_case_insensitive(open->tag_name->value, tag_name)) { depth++; }
|
|
1156
1577
|
} else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
|
|
1157
1578
|
AST_HTML_CLOSE_TAG_NODE_T* close = (AST_HTML_CLOSE_TAG_NODE_T*) node;
|
|
1158
1579
|
|
|
1159
|
-
if (hb_string_equals_case_insensitive(
|
|
1580
|
+
if (hb_string_equals_case_insensitive(close->tag_name->value, tag_name)) {
|
|
1160
1581
|
if (depth == 0) { return i; }
|
|
1161
1582
|
depth--;
|
|
1162
1583
|
}
|
|
@@ -1166,10 +1587,44 @@ static size_t find_matching_close_tag(hb_array_T* nodes, size_t start_idx, hb_st
|
|
|
1166
1587
|
return (size_t) -1;
|
|
1167
1588
|
}
|
|
1168
1589
|
|
|
1169
|
-
static
|
|
1590
|
+
static size_t find_implicit_close_index(hb_array_T* nodes, size_t start_idx, hb_string_T tag_name) {
|
|
1591
|
+
if (!has_optional_end_tag(tag_name)) { return (size_t) -1; }
|
|
1592
|
+
|
|
1593
|
+
for (size_t i = start_idx + 1; i < hb_array_size(nodes); i++) {
|
|
1594
|
+
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
|
|
1595
|
+
if (node == NULL) { continue; }
|
|
1596
|
+
|
|
1597
|
+
if (node->type == AST_HTML_OPEN_TAG_NODE) {
|
|
1598
|
+
AST_HTML_OPEN_TAG_NODE_T* open = (AST_HTML_OPEN_TAG_NODE_T*) node;
|
|
1599
|
+
hb_string_T next_tag_name = open->tag_name->value;
|
|
1600
|
+
|
|
1601
|
+
if (should_implicitly_close(tag_name, next_tag_name)) { return i; }
|
|
1602
|
+
} else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
|
|
1603
|
+
AST_HTML_CLOSE_TAG_NODE_T* close = (AST_HTML_CLOSE_TAG_NODE_T*) node;
|
|
1604
|
+
hb_string_T close_tag_name = close->tag_name->value;
|
|
1605
|
+
|
|
1606
|
+
if (parent_closes_element(tag_name, close_tag_name)) { return i; }
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
|
|
1610
|
+
return hb_array_size(nodes);
|
|
1611
|
+
}
|
|
1170
1612
|
|
|
1171
|
-
static hb_array_T* parser_build_elements_from_tags(
|
|
1172
|
-
hb_array_T*
|
|
1613
|
+
static hb_array_T* parser_build_elements_from_tags(
|
|
1614
|
+
hb_array_T* nodes,
|
|
1615
|
+
hb_array_T* errors,
|
|
1616
|
+
const parser_options_T* options,
|
|
1617
|
+
hb_allocator_T* allocator
|
|
1618
|
+
);
|
|
1619
|
+
|
|
1620
|
+
static hb_array_T* parser_build_elements_from_tags(
|
|
1621
|
+
hb_array_T* nodes,
|
|
1622
|
+
hb_array_T* errors,
|
|
1623
|
+
const parser_options_T* options,
|
|
1624
|
+
hb_allocator_T* allocator
|
|
1625
|
+
) {
|
|
1626
|
+
bool strict = options ? options->strict : false;
|
|
1627
|
+
hb_array_T* result = hb_array_init(hb_array_size(nodes), allocator);
|
|
1173
1628
|
|
|
1174
1629
|
for (size_t index = 0; index < hb_array_size(nodes); index++) {
|
|
1175
1630
|
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, index);
|
|
@@ -1177,45 +1632,105 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
|
|
|
1177
1632
|
|
|
1178
1633
|
if (node->type == AST_HTML_OPEN_TAG_NODE) {
|
|
1179
1634
|
AST_HTML_OPEN_TAG_NODE_T* open_tag = (AST_HTML_OPEN_TAG_NODE_T*) node;
|
|
1180
|
-
hb_string_T tag_name =
|
|
1635
|
+
hb_string_T tag_name = open_tag->tag_name->value;
|
|
1181
1636
|
|
|
1182
1637
|
size_t close_index = find_matching_close_tag(nodes, index, tag_name);
|
|
1183
1638
|
|
|
1184
1639
|
if (close_index == (size_t) -1) {
|
|
1185
|
-
|
|
1186
|
-
|
|
1640
|
+
size_t implicit_close_index = find_implicit_close_index(nodes, index, tag_name);
|
|
1641
|
+
|
|
1642
|
+
if (implicit_close_index != (size_t) -1 && implicit_close_index > index + 1) {
|
|
1643
|
+
hb_array_T* body = hb_array_init(implicit_close_index - index - 1, allocator);
|
|
1644
|
+
|
|
1645
|
+
for (size_t j = index + 1; j < implicit_close_index; j++) {
|
|
1646
|
+
hb_array_append(body, hb_array_get(nodes, j));
|
|
1647
|
+
}
|
|
1648
|
+
|
|
1649
|
+
hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, options, allocator);
|
|
1650
|
+
hb_array_free(&body);
|
|
1651
|
+
|
|
1652
|
+
position_T end_position = open_tag->base.location.end;
|
|
1653
|
+
|
|
1654
|
+
if (hb_array_size(processed_body) > 0) {
|
|
1655
|
+
AST_NODE_T* last_body_node = (AST_NODE_T*) hb_array_get(processed_body, hb_array_size(processed_body) - 1);
|
|
1656
|
+
if (last_body_node != NULL) { end_position = last_body_node->location.end; }
|
|
1657
|
+
}
|
|
1658
|
+
|
|
1659
|
+
hb_array_T* element_errors = hb_array_init(8, allocator);
|
|
1660
|
+
|
|
1661
|
+
if (strict) {
|
|
1662
|
+
append_omitted_closing_tag_error(
|
|
1663
|
+
open_tag->tag_name,
|
|
1664
|
+
end_position,
|
|
1665
|
+
open_tag->base.location.start,
|
|
1666
|
+
open_tag->base.location.end,
|
|
1667
|
+
allocator,
|
|
1668
|
+
element_errors
|
|
1669
|
+
);
|
|
1670
|
+
}
|
|
1671
|
+
|
|
1672
|
+
AST_HTML_OMITTED_CLOSE_TAG_NODE_T* omitted_close_tag = ast_html_omitted_close_tag_node_init(
|
|
1673
|
+
open_tag->tag_name,
|
|
1674
|
+
end_position,
|
|
1675
|
+
end_position,
|
|
1676
|
+
hb_array_init(8, allocator),
|
|
1677
|
+
allocator
|
|
1678
|
+
);
|
|
1679
|
+
|
|
1680
|
+
AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init(
|
|
1681
|
+
(AST_NODE_T*) open_tag,
|
|
1187
1682
|
open_tag->tag_name,
|
|
1683
|
+
processed_body,
|
|
1684
|
+
(AST_NODE_T*) omitted_close_tag,
|
|
1685
|
+
false,
|
|
1686
|
+
ELEMENT_SOURCE_HTML,
|
|
1188
1687
|
open_tag->base.location.start,
|
|
1189
|
-
|
|
1190
|
-
|
|
1688
|
+
end_position,
|
|
1689
|
+
element_errors,
|
|
1690
|
+
allocator
|
|
1191
1691
|
);
|
|
1192
|
-
}
|
|
1193
1692
|
|
|
1194
|
-
|
|
1693
|
+
hb_array_append(result, element);
|
|
1694
|
+
|
|
1695
|
+
index = implicit_close_index - 1;
|
|
1696
|
+
} else {
|
|
1697
|
+
if (hb_array_size(open_tag->base.errors) == 0) {
|
|
1698
|
+
append_missing_closing_tag_error(
|
|
1699
|
+
open_tag->tag_name,
|
|
1700
|
+
open_tag->base.location.start,
|
|
1701
|
+
open_tag->base.location.end,
|
|
1702
|
+
allocator,
|
|
1703
|
+
open_tag->base.errors
|
|
1704
|
+
);
|
|
1705
|
+
}
|
|
1706
|
+
|
|
1707
|
+
hb_array_append(result, node);
|
|
1708
|
+
}
|
|
1195
1709
|
} else {
|
|
1196
1710
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) hb_array_get(nodes, close_index);
|
|
1197
1711
|
|
|
1198
|
-
hb_array_T* body = hb_array_init(close_index - index - 1);
|
|
1712
|
+
hb_array_T* body = hb_array_init(close_index - index - 1, allocator);
|
|
1199
1713
|
|
|
1200
1714
|
for (size_t j = index + 1; j < close_index; j++) {
|
|
1201
1715
|
hb_array_append(body, hb_array_get(nodes, j));
|
|
1202
1716
|
}
|
|
1203
1717
|
|
|
1204
|
-
hb_array_T* processed_body = parser_build_elements_from_tags(body, errors);
|
|
1718
|
+
hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, options, allocator);
|
|
1205
1719
|
hb_array_free(&body);
|
|
1206
1720
|
|
|
1207
|
-
hb_array_T* element_errors = hb_array_init(8);
|
|
1721
|
+
hb_array_T* element_errors = hb_array_init(8, allocator);
|
|
1208
1722
|
|
|
1209
1723
|
AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init(
|
|
1210
|
-
open_tag,
|
|
1724
|
+
(AST_NODE_T*) open_tag,
|
|
1211
1725
|
open_tag->tag_name,
|
|
1212
1726
|
processed_body,
|
|
1213
|
-
close_tag,
|
|
1727
|
+
(AST_NODE_T*) close_tag,
|
|
1214
1728
|
false,
|
|
1215
1729
|
ELEMENT_SOURCE_HTML,
|
|
1216
1730
|
open_tag->base.location.start,
|
|
1217
1731
|
close_tag->base.location.end,
|
|
1218
|
-
element_errors
|
|
1732
|
+
element_errors,
|
|
1733
|
+
allocator
|
|
1219
1734
|
);
|
|
1220
1735
|
|
|
1221
1736
|
hb_array_append(result, element);
|
|
@@ -1225,12 +1740,13 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
|
|
|
1225
1740
|
} else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
|
|
1226
1741
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) node;
|
|
1227
1742
|
|
|
1228
|
-
if (!is_void_element(
|
|
1743
|
+
if (!is_void_element(close_tag->tag_name->value)) {
|
|
1229
1744
|
if (hb_array_size(close_tag->base.errors) == 0) {
|
|
1230
1745
|
append_missing_opening_tag_error(
|
|
1231
1746
|
close_tag->tag_name,
|
|
1232
1747
|
close_tag->base.location.start,
|
|
1233
1748
|
close_tag->base.location.end,
|
|
1749
|
+
allocator,
|
|
1234
1750
|
close_tag->base.errors
|
|
1235
1751
|
);
|
|
1236
1752
|
}
|
|
@@ -1246,17 +1762,18 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
|
|
|
1246
1762
|
}
|
|
1247
1763
|
|
|
1248
1764
|
static AST_DOCUMENT_NODE_T* parser_parse_document(parser_T* parser) {
|
|
1249
|
-
hb_array_T* children = hb_array_init(8);
|
|
1250
|
-
hb_array_T* errors = hb_array_init(8);
|
|
1765
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
1766
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1251
1767
|
position_T start = parser->current_token->location.start;
|
|
1252
1768
|
|
|
1253
1769
|
parser_parse_in_data_state(parser, children, errors);
|
|
1254
1770
|
|
|
1255
1771
|
token_T* eof = parser_consume_expected(parser, TOKEN_EOF, errors);
|
|
1256
1772
|
|
|
1257
|
-
AST_DOCUMENT_NODE_T* document_node =
|
|
1773
|
+
AST_DOCUMENT_NODE_T* document_node =
|
|
1774
|
+
ast_document_node_init(children, NULL, HERB_PRISM_NODE_EMPTY, start, eof->location.end, errors, parser->allocator);
|
|
1258
1775
|
|
|
1259
|
-
token_free(eof);
|
|
1776
|
+
token_free(eof, parser->allocator);
|
|
1260
1777
|
|
|
1261
1778
|
return document_node;
|
|
1262
1779
|
}
|
|
@@ -1267,17 +1784,18 @@ AST_DOCUMENT_NODE_T* herb_parser_parse(parser_T* parser) {
|
|
|
1267
1784
|
|
|
1268
1785
|
static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, hb_array_T* children) {
|
|
1269
1786
|
if (parser->options.track_whitespace) {
|
|
1270
|
-
hb_array_T* errors = hb_array_init(8);
|
|
1787
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1271
1788
|
AST_WHITESPACE_NODE_T* whitespace_node = ast_whitespace_node_init(
|
|
1272
1789
|
whitespace_token,
|
|
1273
1790
|
whitespace_token->location.start,
|
|
1274
1791
|
whitespace_token->location.end,
|
|
1275
|
-
errors
|
|
1792
|
+
errors,
|
|
1793
|
+
parser->allocator
|
|
1276
1794
|
);
|
|
1277
1795
|
hb_array_append(children, whitespace_node);
|
|
1278
1796
|
}
|
|
1279
1797
|
|
|
1280
|
-
token_free(whitespace_token);
|
|
1798
|
+
token_free(whitespace_token, parser->allocator);
|
|
1281
1799
|
}
|
|
1282
1800
|
|
|
1283
1801
|
static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
|
|
@@ -1287,7 +1805,7 @@ static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
|
|
|
1287
1805
|
if (parser->options.track_whitespace && children != NULL) {
|
|
1288
1806
|
parser_handle_whitespace(parser, whitespace, children);
|
|
1289
1807
|
} else {
|
|
1290
|
-
token_free(whitespace);
|
|
1808
|
+
token_free(whitespace, parser->allocator);
|
|
1291
1809
|
}
|
|
1292
1810
|
}
|
|
1293
1811
|
}
|
|
@@ -1295,14 +1813,27 @@ static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
|
|
|
1295
1813
|
void herb_parser_deinit(parser_T* parser) {
|
|
1296
1814
|
if (parser == NULL) { return; }
|
|
1297
1815
|
|
|
1298
|
-
if (parser->current_token != NULL) { token_free(parser->current_token); }
|
|
1299
|
-
|
|
1816
|
+
if (parser->current_token != NULL) { token_free(parser->current_token, parser->allocator); }
|
|
1817
|
+
|
|
1818
|
+
if (parser->open_tags_stack != NULL) {
|
|
1819
|
+
for (size_t i = 0; i < hb_array_size(parser->open_tags_stack); i++) {
|
|
1820
|
+
token_T* token = (token_T*) hb_array_get(parser->open_tags_stack, i);
|
|
1821
|
+
if (token != NULL) { token_free(token, parser->allocator); }
|
|
1822
|
+
}
|
|
1823
|
+
|
|
1824
|
+
hb_array_free(&parser->open_tags_stack);
|
|
1825
|
+
}
|
|
1300
1826
|
}
|
|
1301
1827
|
|
|
1302
|
-
void match_tags_in_node_array(
|
|
1828
|
+
void match_tags_in_node_array(
|
|
1829
|
+
hb_array_T* nodes,
|
|
1830
|
+
hb_array_T* errors,
|
|
1831
|
+
const parser_options_T* options,
|
|
1832
|
+
hb_allocator_T* allocator
|
|
1833
|
+
) {
|
|
1303
1834
|
if (nodes == NULL || hb_array_size(nodes) == 0) { return; }
|
|
1304
1835
|
|
|
1305
|
-
hb_array_T* processed = parser_build_elements_from_tags(nodes, errors);
|
|
1836
|
+
hb_array_T* processed = parser_build_elements_from_tags(nodes, errors, options, allocator);
|
|
1306
1837
|
|
|
1307
1838
|
nodes->size = 0;
|
|
1308
1839
|
|
|
@@ -1312,16 +1843,22 @@ void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors) {
|
|
|
1312
1843
|
|
|
1313
1844
|
hb_array_free(&processed);
|
|
1314
1845
|
|
|
1846
|
+
match_tags_context_T context = { .errors = errors, .options = options, .allocator = allocator };
|
|
1847
|
+
|
|
1315
1848
|
for (size_t i = 0; i < hb_array_size(nodes); i++) {
|
|
1316
1849
|
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
|
|
1317
1850
|
if (node == NULL) { continue; }
|
|
1318
1851
|
|
|
1319
|
-
herb_visit_node(node, match_tags_visitor,
|
|
1852
|
+
herb_visit_node(node, match_tags_visitor, &context);
|
|
1320
1853
|
}
|
|
1321
1854
|
}
|
|
1322
1855
|
|
|
1323
|
-
void herb_parser_match_html_tags_post_analyze(
|
|
1856
|
+
void herb_parser_match_html_tags_post_analyze(
|
|
1857
|
+
AST_DOCUMENT_NODE_T* document,
|
|
1858
|
+
const parser_options_T* options,
|
|
1859
|
+
hb_allocator_T* allocator
|
|
1860
|
+
) {
|
|
1324
1861
|
if (document == NULL) { return; }
|
|
1325
1862
|
|
|
1326
|
-
match_tags_in_node_array(document->children, document->base.errors);
|
|
1863
|
+
match_tags_in_node_array(document->children, document->base.errors, options, allocator);
|
|
1327
1864
|
}
|