@herb-tools/node 0.8.9 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/binding.gyp +26 -8
- package/dist/herb-node.cjs +41 -12
- package/dist/herb-node.cjs.map +1 -1
- package/dist/herb-node.esm.js +8 -1
- package/dist/herb-node.esm.js.map +1 -1
- package/dist/types/node-backend.d.ts +3 -1
- package/extension/error_helpers.cpp +419 -71
- package/extension/error_helpers.h +14 -3
- package/extension/extension_helpers.cpp +38 -35
- package/extension/extension_helpers.h +2 -2
- package/extension/herb.cpp +183 -64
- package/extension/libherb/analyze/action_view/attribute_extraction_helpers.c +290 -0
- package/extension/libherb/analyze/action_view/attribute_extraction_helpers.h +36 -0
- package/extension/libherb/analyze/action_view/content_tag.c +70 -0
- package/extension/libherb/analyze/action_view/link_to.c +143 -0
- package/extension/libherb/analyze/action_view/registry.c +60 -0
- package/extension/libherb/analyze/action_view/tag.c +64 -0
- package/extension/libherb/analyze/action_view/tag_helper_handler.h +41 -0
- package/extension/libherb/analyze/action_view/tag_helper_node_builders.c +305 -0
- package/extension/libherb/analyze/action_view/tag_helper_node_builders.h +70 -0
- package/extension/libherb/analyze/action_view/tag_helpers.c +748 -0
- package/extension/libherb/analyze/action_view/tag_helpers.h +38 -0
- package/extension/libherb/analyze/action_view/turbo_frame_tag.c +88 -0
- package/extension/libherb/analyze/analyze.c +882 -0
- package/extension/libherb/{include → analyze}/analyze.h +14 -4
- package/extension/libherb/{analyzed_ruby.c → analyze/analyzed_ruby.c} +13 -11
- package/extension/libherb/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
- package/extension/libherb/analyze/builders.c +343 -0
- package/extension/libherb/analyze/builders.h +27 -0
- package/extension/libherb/analyze/conditional_elements.c +594 -0
- package/extension/libherb/analyze/conditional_elements.h +9 -0
- package/extension/libherb/analyze/conditional_open_tags.c +640 -0
- package/extension/libherb/analyze/conditional_open_tags.h +9 -0
- package/extension/libherb/analyze/control_type.c +250 -0
- package/extension/libherb/analyze/control_type.h +14 -0
- package/extension/libherb/{analyze_helpers.c → analyze/helpers.c} +79 -31
- package/extension/libherb/{analyze_helpers.h → analyze/helpers.h} +22 -17
- package/extension/libherb/analyze/invalid_structures.c +193 -0
- package/extension/libherb/analyze/invalid_structures.h +11 -0
- package/extension/libherb/{analyze_missing_end.c → analyze/missing_end.c} +33 -22
- package/extension/libherb/analyze/parse_errors.c +84 -0
- package/extension/libherb/analyze/prism_annotate.c +397 -0
- package/extension/libherb/analyze/prism_annotate.h +16 -0
- package/extension/libherb/{analyze_transform.c → analyze/transform.c} +17 -3
- package/extension/libherb/ast_node.c +17 -7
- package/extension/libherb/ast_node.h +11 -5
- package/extension/libherb/ast_nodes.c +663 -388
- package/extension/libherb/ast_nodes.h +118 -39
- package/extension/libherb/ast_pretty_print.c +191 -7
- package/extension/libherb/ast_pretty_print.h +6 -1
- package/extension/libherb/element_source.h +3 -8
- package/extension/libherb/errors.c +1100 -507
- package/extension/libherb/errors.h +155 -54
- package/extension/libherb/extract.c +148 -49
- package/extension/libherb/extract.h +21 -5
- package/extension/libherb/herb.c +52 -34
- package/extension/libherb/herb.h +18 -6
- package/extension/libherb/herb_prism_node.h +13 -0
- package/extension/libherb/html_util.c +241 -12
- package/extension/libherb/html_util.h +7 -2
- package/extension/libherb/include/analyze/action_view/attribute_extraction_helpers.h +36 -0
- package/extension/libherb/include/analyze/action_view/tag_helper_handler.h +41 -0
- package/extension/libherb/include/analyze/action_view/tag_helper_node_builders.h +70 -0
- package/extension/libherb/include/analyze/action_view/tag_helpers.h +38 -0
- package/extension/libherb/{analyze.h → include/analyze/analyze.h} +14 -4
- package/extension/libherb/include/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
- package/extension/libherb/include/analyze/builders.h +27 -0
- package/extension/libherb/include/analyze/conditional_elements.h +9 -0
- package/extension/libherb/include/analyze/conditional_open_tags.h +9 -0
- package/extension/libherb/include/analyze/control_type.h +14 -0
- package/extension/libherb/include/{analyze_helpers.h → analyze/helpers.h} +22 -17
- package/extension/libherb/include/analyze/invalid_structures.h +11 -0
- package/extension/libherb/include/analyze/prism_annotate.h +16 -0
- package/extension/libherb/include/ast_node.h +11 -5
- package/extension/libherb/include/ast_nodes.h +118 -39
- package/extension/libherb/include/ast_pretty_print.h +6 -1
- package/extension/libherb/include/element_source.h +3 -8
- package/extension/libherb/include/errors.h +155 -54
- package/extension/libherb/include/extract.h +21 -5
- package/extension/libherb/include/herb.h +18 -6
- package/extension/libherb/include/herb_prism_node.h +13 -0
- package/extension/libherb/include/html_util.h +7 -2
- package/extension/libherb/include/io.h +3 -1
- package/extension/libherb/include/lex_helpers.h +29 -0
- package/extension/libherb/include/lexer.h +1 -1
- package/extension/libherb/include/lexer_peek_helpers.h +87 -13
- package/extension/libherb/include/lexer_struct.h +2 -0
- package/extension/libherb/include/location.h +2 -1
- package/extension/libherb/include/parser.h +27 -2
- package/extension/libherb/include/parser_helpers.h +19 -3
- package/extension/libherb/include/pretty_print.h +10 -5
- package/extension/libherb/include/prism_context.h +45 -0
- package/extension/libherb/include/prism_helpers.h +10 -7
- package/extension/libherb/include/prism_serialized.h +12 -0
- package/extension/libherb/include/token.h +16 -4
- package/extension/libherb/include/token_struct.h +10 -3
- package/extension/libherb/include/utf8.h +2 -1
- package/extension/libherb/include/util/hb_allocator.h +78 -0
- package/extension/libherb/include/util/hb_arena.h +6 -1
- package/extension/libherb/include/util/hb_arena_debug.h +12 -1
- package/extension/libherb/include/util/hb_array.h +7 -3
- package/extension/libherb/include/util/hb_buffer.h +6 -4
- package/extension/libherb/include/util/hb_foreach.h +79 -0
- package/extension/libherb/include/util/hb_narray.h +8 -4
- package/extension/libherb/include/util/hb_string.h +56 -9
- package/extension/libherb/include/util/string.h +11 -0
- package/extension/libherb/include/util.h +6 -3
- package/extension/libherb/include/version.h +1 -1
- package/extension/libherb/io.c +3 -2
- package/extension/libherb/io.h +3 -1
- package/extension/libherb/lex_helpers.h +29 -0
- package/extension/libherb/lexer.c +42 -30
- package/extension/libherb/lexer.h +1 -1
- package/extension/libherb/lexer_peek_helpers.c +12 -74
- package/extension/libherb/lexer_peek_helpers.h +87 -13
- package/extension/libherb/lexer_struct.h +2 -0
- package/extension/libherb/location.c +2 -2
- package/extension/libherb/location.h +2 -1
- package/extension/libherb/main.c +79 -66
- package/extension/libherb/parser.c +784 -247
- package/extension/libherb/parser.h +27 -2
- package/extension/libherb/parser_helpers.c +110 -23
- package/extension/libherb/parser_helpers.h +19 -3
- package/extension/libherb/parser_match_tags.c +110 -49
- package/extension/libherb/pretty_print.c +29 -24
- package/extension/libherb/pretty_print.h +10 -5
- package/extension/libherb/prism_context.h +45 -0
- package/extension/libherb/prism_helpers.c +30 -27
- package/extension/libherb/prism_helpers.h +10 -7
- package/extension/libherb/prism_serialized.h +12 -0
- package/extension/libherb/ruby_parser.c +2 -0
- package/extension/libherb/token.c +151 -66
- package/extension/libherb/token.h +16 -4
- package/extension/libherb/token_matchers.c +0 -1
- package/extension/libherb/token_struct.h +10 -3
- package/extension/libherb/utf8.c +7 -6
- package/extension/libherb/utf8.h +2 -1
- package/extension/libherb/util/hb_allocator.c +341 -0
- package/extension/libherb/util/hb_allocator.h +78 -0
- package/extension/libherb/util/hb_arena.c +81 -56
- package/extension/libherb/util/hb_arena.h +6 -1
- package/extension/libherb/util/hb_arena_debug.c +32 -17
- package/extension/libherb/util/hb_arena_debug.h +12 -1
- package/extension/libherb/util/hb_array.c +30 -15
- package/extension/libherb/util/hb_array.h +7 -3
- package/extension/libherb/util/hb_buffer.c +17 -21
- package/extension/libherb/util/hb_buffer.h +6 -4
- package/extension/libherb/util/hb_foreach.h +79 -0
- package/extension/libherb/util/hb_narray.c +22 -7
- package/extension/libherb/util/hb_narray.h +8 -4
- package/extension/libherb/util/hb_string.c +49 -35
- package/extension/libherb/util/hb_string.h +56 -9
- package/extension/libherb/util/string.h +11 -0
- package/extension/libherb/util.c +21 -11
- package/extension/libherb/util.h +6 -3
- package/extension/libherb/version.h +1 -1
- package/extension/libherb/visitor.c +48 -1
- package/extension/nodes.cpp +451 -6
- package/extension/nodes.h +8 -1
- package/extension/prism/include/prism/ast.h +4 -4
- package/extension/prism/include/prism/version.h +2 -2
- package/extension/prism/src/prism.c +1 -1
- package/package.json +12 -8
- package/src/node-backend.ts +11 -1
- package/dist/types/index-cjs.d.cts +0 -1
- package/extension/libherb/analyze.c +0 -1594
- package/extension/libherb/element_source.c +0 -12
- package/extension/libherb/include/util/hb_system.h +0 -9
- package/extension/libherb/util/hb_system.c +0 -30
- package/extension/libherb/util/hb_system.h +0 -9
- package/src/index-cjs.cts +0 -22
- /package/dist/types/{index-esm.d.mts → index.d.ts} +0 -0
- /package/src/{index-esm.mts → index.ts} +0 -0
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
#include "include/util/hb_array.h"
|
|
13
13
|
#include "include/util/hb_buffer.h"
|
|
14
14
|
#include "include/util/hb_string.h"
|
|
15
|
+
#include "include/util/string.h"
|
|
15
16
|
#include "include/visitor.h"
|
|
16
17
|
|
|
17
18
|
#include <stdio.h>
|
|
@@ -19,6 +20,8 @@
|
|
|
19
20
|
#include <string.h>
|
|
20
21
|
#include <strings.h>
|
|
21
22
|
|
|
23
|
+
#define MAX_CONSECUTIVE_ERRORS 10
|
|
24
|
+
|
|
22
25
|
static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors);
|
|
23
26
|
static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors);
|
|
24
27
|
static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
|
|
@@ -26,29 +29,39 @@ static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token
|
|
|
26
29
|
static void parser_consume_whitespace(parser_T* parser, hb_array_T* children);
|
|
27
30
|
static void parser_skip_erb_content(lexer_T* lexer);
|
|
28
31
|
static bool parser_lookahead_erb_is_attribute(lexer_T* lexer);
|
|
32
|
+
static bool parser_lookahead_erb_is_control_flow(parser_T* parser);
|
|
29
33
|
static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children);
|
|
30
34
|
static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* children);
|
|
31
35
|
|
|
32
|
-
const parser_options_T HERB_DEFAULT_PARSER_OPTIONS = { .track_whitespace = false
|
|
36
|
+
const parser_options_T HERB_DEFAULT_PARSER_OPTIONS = { .track_whitespace = false,
|
|
37
|
+
.analyze = true,
|
|
38
|
+
.strict = true,
|
|
39
|
+
.action_view_helpers = false,
|
|
40
|
+
.prism_nodes_deep = false,
|
|
41
|
+
.prism_nodes = false,
|
|
42
|
+
.prism_program = false };
|
|
33
43
|
|
|
34
44
|
size_t parser_sizeof(void) {
|
|
35
45
|
return sizeof(struct PARSER_STRUCT);
|
|
36
46
|
}
|
|
37
47
|
|
|
38
48
|
void herb_parser_init(parser_T* parser, lexer_T* lexer, parser_options_T options) {
|
|
49
|
+
parser->allocator = lexer->allocator;
|
|
39
50
|
parser->lexer = lexer;
|
|
40
51
|
parser->current_token = lexer_next_token(lexer);
|
|
41
|
-
parser->open_tags_stack = hb_array_init(16);
|
|
52
|
+
parser->open_tags_stack = hb_array_init(16, parser->allocator);
|
|
42
53
|
parser->state = PARSER_STATE_DATA;
|
|
43
54
|
parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
|
|
44
55
|
parser->options = options;
|
|
56
|
+
parser->consecutive_error_count = 0;
|
|
57
|
+
parser->in_recovery_mode = false;
|
|
45
58
|
}
|
|
46
59
|
|
|
47
60
|
static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
48
|
-
hb_array_T* errors = hb_array_init(8);
|
|
49
|
-
hb_array_T* children = hb_array_init(8);
|
|
61
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
62
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
50
63
|
hb_buffer_T content;
|
|
51
|
-
hb_buffer_init(&content, 128);
|
|
64
|
+
hb_buffer_init(&content, 128, parser->allocator);
|
|
52
65
|
|
|
53
66
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_CDATA_START, errors);
|
|
54
67
|
position_T start = parser->current_token->location.start;
|
|
@@ -63,8 +76,8 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
|
63
76
|
}
|
|
64
77
|
|
|
65
78
|
token_T* token = parser_advance(parser);
|
|
66
|
-
|
|
67
|
-
token_free(token);
|
|
79
|
+
hb_buffer_append_string(&content, token->value);
|
|
80
|
+
token_free(token, parser->allocator);
|
|
68
81
|
}
|
|
69
82
|
|
|
70
83
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
@@ -76,26 +89,27 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
|
76
89
|
tag_closing,
|
|
77
90
|
tag_opening->location.start,
|
|
78
91
|
tag_closing->location.end,
|
|
79
|
-
errors
|
|
92
|
+
errors,
|
|
93
|
+
parser->allocator
|
|
80
94
|
);
|
|
81
95
|
|
|
82
|
-
|
|
83
|
-
token_free(tag_opening);
|
|
84
|
-
token_free(tag_closing);
|
|
96
|
+
hb_buffer_free(&content);
|
|
97
|
+
token_free(tag_opening, parser->allocator);
|
|
98
|
+
token_free(tag_closing, parser->allocator);
|
|
85
99
|
|
|
86
100
|
return cdata;
|
|
87
101
|
}
|
|
88
102
|
|
|
89
103
|
static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
90
|
-
hb_array_T* errors = hb_array_init(8);
|
|
91
|
-
hb_array_T* children = hb_array_init(8);
|
|
104
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
105
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
92
106
|
token_T* comment_start = parser_consume_expected(parser, TOKEN_HTML_COMMENT_START, errors);
|
|
93
107
|
position_T start = parser->current_token->location.start;
|
|
94
108
|
|
|
95
109
|
hb_buffer_T comment;
|
|
96
|
-
hb_buffer_init(&comment, 512);
|
|
110
|
+
hb_buffer_init(&comment, 512, parser->allocator);
|
|
97
111
|
|
|
98
|
-
while (token_is_none_of(parser, TOKEN_HTML_COMMENT_END, TOKEN_EOF)) {
|
|
112
|
+
while (token_is_none_of(parser, TOKEN_HTML_COMMENT_END, TOKEN_HTML_COMMENT_INVALID_END, TOKEN_EOF)) {
|
|
99
113
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
100
114
|
parser_append_literal_node_from_buffer(parser, &comment, children, start);
|
|
101
115
|
|
|
@@ -108,13 +122,26 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
|
108
122
|
}
|
|
109
123
|
|
|
110
124
|
token_T* token = parser_advance(parser);
|
|
111
|
-
|
|
112
|
-
token_free(token);
|
|
125
|
+
hb_buffer_append_string(&comment, token->value);
|
|
126
|
+
token_free(token, parser->allocator);
|
|
113
127
|
}
|
|
114
128
|
|
|
115
129
|
parser_append_literal_node_from_buffer(parser, &comment, children, start);
|
|
116
130
|
|
|
117
|
-
token_T* comment_end =
|
|
131
|
+
token_T* comment_end = NULL;
|
|
132
|
+
|
|
133
|
+
if (token_is(parser, TOKEN_HTML_COMMENT_INVALID_END)) {
|
|
134
|
+
comment_end = parser_advance(parser);
|
|
135
|
+
append_invalid_comment_closing_tag_error(
|
|
136
|
+
comment_end,
|
|
137
|
+
comment_end->location.start,
|
|
138
|
+
comment_end->location.end,
|
|
139
|
+
parser->allocator,
|
|
140
|
+
errors
|
|
141
|
+
);
|
|
142
|
+
} else {
|
|
143
|
+
comment_end = parser_consume_expected(parser, TOKEN_HTML_COMMENT_END, errors);
|
|
144
|
+
}
|
|
118
145
|
|
|
119
146
|
AST_HTML_COMMENT_NODE_T* comment_node = ast_html_comment_node_init(
|
|
120
147
|
comment_start,
|
|
@@ -122,21 +149,22 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
|
122
149
|
comment_end,
|
|
123
150
|
comment_start->location.start,
|
|
124
151
|
comment_end->location.end,
|
|
125
|
-
errors
|
|
152
|
+
errors,
|
|
153
|
+
parser->allocator
|
|
126
154
|
);
|
|
127
155
|
|
|
128
|
-
|
|
129
|
-
token_free(comment_start);
|
|
130
|
-
token_free(comment_end);
|
|
156
|
+
hb_buffer_free(&comment);
|
|
157
|
+
token_free(comment_start, parser->allocator);
|
|
158
|
+
token_free(comment_end, parser->allocator);
|
|
131
159
|
|
|
132
160
|
return comment_node;
|
|
133
161
|
}
|
|
134
162
|
|
|
135
163
|
static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
136
|
-
hb_array_T* errors = hb_array_init(8);
|
|
137
|
-
hb_array_T* children = hb_array_init(8);
|
|
164
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
165
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
138
166
|
hb_buffer_T content;
|
|
139
|
-
hb_buffer_init(&content, 64);
|
|
167
|
+
hb_buffer_init(&content, 64, parser->allocator);
|
|
140
168
|
|
|
141
169
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_DOCTYPE, errors);
|
|
142
170
|
|
|
@@ -153,8 +181,8 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
|
153
181
|
}
|
|
154
182
|
|
|
155
183
|
token_T* token = parser_consume_expected(parser, parser->current_token->type, errors);
|
|
156
|
-
|
|
157
|
-
token_free(token);
|
|
184
|
+
hb_buffer_append_string(&content, token->value);
|
|
185
|
+
token_free(token, parser->allocator);
|
|
158
186
|
}
|
|
159
187
|
|
|
160
188
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
@@ -167,21 +195,22 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
|
167
195
|
tag_closing,
|
|
168
196
|
tag_opening->location.start,
|
|
169
197
|
tag_closing->location.end,
|
|
170
|
-
errors
|
|
198
|
+
errors,
|
|
199
|
+
parser->allocator
|
|
171
200
|
);
|
|
172
201
|
|
|
173
|
-
token_free(tag_opening);
|
|
174
|
-
token_free(tag_closing);
|
|
175
|
-
|
|
202
|
+
token_free(tag_opening, parser->allocator);
|
|
203
|
+
token_free(tag_closing, parser->allocator);
|
|
204
|
+
hb_buffer_free(&content);
|
|
176
205
|
|
|
177
206
|
return doctype;
|
|
178
207
|
}
|
|
179
208
|
|
|
180
209
|
static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser) {
|
|
181
|
-
hb_array_T* errors = hb_array_init(8);
|
|
182
|
-
hb_array_T* children = hb_array_init(8);
|
|
210
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
211
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
183
212
|
hb_buffer_T content;
|
|
184
|
-
hb_buffer_init(&content, 64);
|
|
213
|
+
hb_buffer_init(&content, 64, parser->allocator);
|
|
185
214
|
|
|
186
215
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_XML_DECLARATION, errors);
|
|
187
216
|
|
|
@@ -200,8 +229,8 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
|
|
|
200
229
|
}
|
|
201
230
|
|
|
202
231
|
token_T* token = parser_advance(parser);
|
|
203
|
-
|
|
204
|
-
token_free(token);
|
|
232
|
+
hb_buffer_append_string(&content, token->value);
|
|
233
|
+
token_free(token, parser->allocator);
|
|
205
234
|
}
|
|
206
235
|
|
|
207
236
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
@@ -214,12 +243,13 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
|
|
|
214
243
|
tag_closing,
|
|
215
244
|
tag_opening->location.start,
|
|
216
245
|
tag_closing->location.end,
|
|
217
|
-
errors
|
|
246
|
+
errors,
|
|
247
|
+
parser->allocator
|
|
218
248
|
);
|
|
219
249
|
|
|
220
|
-
token_free(tag_opening);
|
|
221
|
-
token_free(tag_closing);
|
|
222
|
-
|
|
250
|
+
token_free(tag_opening, parser->allocator);
|
|
251
|
+
token_free(tag_closing, parser->allocator);
|
|
252
|
+
hb_buffer_free(&content);
|
|
223
253
|
|
|
224
254
|
return xml_declaration;
|
|
225
255
|
}
|
|
@@ -228,7 +258,7 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra
|
|
|
228
258
|
position_T start = parser->current_token->location.start;
|
|
229
259
|
|
|
230
260
|
hb_buffer_T content;
|
|
231
|
-
hb_buffer_init(&content, 2048);
|
|
261
|
+
hb_buffer_init(&content, 2048, parser->allocator);
|
|
232
262
|
|
|
233
263
|
while (token_is_none_of(
|
|
234
264
|
parser,
|
|
@@ -240,49 +270,66 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra
|
|
|
240
270
|
TOKEN_EOF
|
|
241
271
|
)) {
|
|
242
272
|
if (token_is(parser, TOKEN_ERROR)) {
|
|
243
|
-
|
|
273
|
+
hb_buffer_free(&content);
|
|
244
274
|
|
|
245
|
-
|
|
246
|
-
append_unexpected_error(
|
|
247
|
-
"Token Error",
|
|
248
|
-
"not TOKEN_ERROR",
|
|
249
|
-
token->value,
|
|
250
|
-
token->location.start,
|
|
251
|
-
token->location.end,
|
|
252
|
-
document_errors
|
|
253
|
-
);
|
|
254
|
-
|
|
255
|
-
token_free(token);
|
|
275
|
+
parser_append_unexpected_error_string(parser, document_errors, "Token Error", "not an error token");
|
|
256
276
|
|
|
257
277
|
return NULL;
|
|
258
278
|
}
|
|
259
279
|
|
|
280
|
+
if (parser->options.strict && parser->current_token->type == TOKEN_PERCENT) {
|
|
281
|
+
lexer_T lexer_copy = *parser->lexer;
|
|
282
|
+
token_T* peek_token = lexer_next_token(&lexer_copy);
|
|
283
|
+
|
|
284
|
+
if (peek_token && peek_token->type == TOKEN_HTML_TAG_END) {
|
|
285
|
+
position_T stray_start = parser->current_token->location.start;
|
|
286
|
+
position_T stray_end = peek_token->location.end;
|
|
287
|
+
token_free(peek_token, parser->allocator);
|
|
288
|
+
|
|
289
|
+
append_stray_erb_closing_tag_error(stray_start, stray_end, parser->allocator, document_errors);
|
|
290
|
+
|
|
291
|
+
token_T* percent = parser_advance(parser);
|
|
292
|
+
hb_buffer_append_string(&content, percent->value);
|
|
293
|
+
token_free(percent, parser->allocator);
|
|
294
|
+
|
|
295
|
+
token_T* gt = parser_advance(parser);
|
|
296
|
+
hb_buffer_append_string(&content, gt->value);
|
|
297
|
+
token_free(gt, parser->allocator);
|
|
298
|
+
|
|
299
|
+
continue;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
token_free(peek_token, parser->allocator);
|
|
303
|
+
}
|
|
304
|
+
|
|
260
305
|
token_T* token = parser_advance(parser);
|
|
261
|
-
|
|
262
|
-
token_free(token);
|
|
306
|
+
hb_buffer_append_string(&content, token->value);
|
|
307
|
+
token_free(token, parser->allocator);
|
|
263
308
|
}
|
|
264
309
|
|
|
265
|
-
hb_array_T* errors = hb_array_init(8);
|
|
310
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
266
311
|
|
|
267
312
|
AST_HTML_TEXT_NODE_T* text_node = NULL;
|
|
268
313
|
|
|
269
314
|
if (hb_buffer_length(&content) > 0) {
|
|
315
|
+
hb_string_T text_content = { .data = content.value, .length = (uint32_t) content.length };
|
|
270
316
|
text_node =
|
|
271
|
-
ast_html_text_node_init(
|
|
317
|
+
ast_html_text_node_init(text_content, start, parser->current_token->location.start, errors, parser->allocator);
|
|
272
318
|
} else {
|
|
273
|
-
text_node =
|
|
319
|
+
text_node =
|
|
320
|
+
ast_html_text_node_init(HB_STRING_EMPTY, start, parser->current_token->location.start, errors, parser->allocator);
|
|
274
321
|
}
|
|
275
322
|
|
|
276
|
-
|
|
323
|
+
hb_buffer_free(&content);
|
|
277
324
|
|
|
278
325
|
return text_node;
|
|
279
326
|
}
|
|
280
327
|
|
|
281
328
|
static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
|
|
282
|
-
hb_array_T* errors = hb_array_init(8);
|
|
283
|
-
hb_array_T* children = hb_array_init(8);
|
|
329
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
330
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
284
331
|
hb_buffer_T buffer;
|
|
285
|
-
hb_buffer_init(&buffer, 128);
|
|
332
|
+
hb_buffer_init(&buffer, 128, parser->allocator);
|
|
286
333
|
position_T start = parser->current_token->location.start;
|
|
287
334
|
|
|
288
335
|
while (token_is_none_of(
|
|
@@ -295,6 +342,16 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
|
|
|
295
342
|
TOKEN_EOF
|
|
296
343
|
)) {
|
|
297
344
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
345
|
+
hb_string_T tag = parser->current_token->value;
|
|
346
|
+
bool is_output_tag = (tag.length >= 3 && tag.data[2] == '=');
|
|
347
|
+
|
|
348
|
+
if (!is_output_tag) {
|
|
349
|
+
bool is_control_flow = parser_lookahead_erb_is_control_flow(parser);
|
|
350
|
+
|
|
351
|
+
if (hb_buffer_is_empty(&buffer) && hb_array_size(children) == 0) { break; }
|
|
352
|
+
if (is_control_flow) { break; }
|
|
353
|
+
}
|
|
354
|
+
|
|
298
355
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
299
356
|
|
|
300
357
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
@@ -305,8 +362,8 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
|
|
|
305
362
|
}
|
|
306
363
|
|
|
307
364
|
token_T* token = parser_advance(parser);
|
|
308
|
-
|
|
309
|
-
token_free(token);
|
|
365
|
+
hb_buffer_append_string(&buffer, token->value);
|
|
366
|
+
token_free(token, parser->allocator);
|
|
310
367
|
}
|
|
311
368
|
|
|
312
369
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
@@ -326,9 +383,9 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
|
|
|
326
383
|
}
|
|
327
384
|
|
|
328
385
|
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
|
|
329
|
-
ast_html_attribute_name_node_init(children, node_start, node_end, errors);
|
|
386
|
+
ast_html_attribute_name_node_init(children, node_start, node_end, errors, parser->allocator);
|
|
330
387
|
|
|
331
|
-
|
|
388
|
+
hb_buffer_free(&buffer);
|
|
332
389
|
|
|
333
390
|
return attribute_name;
|
|
334
391
|
}
|
|
@@ -339,55 +396,137 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
339
396
|
hb_array_T* errors
|
|
340
397
|
) {
|
|
341
398
|
hb_buffer_T buffer;
|
|
342
|
-
hb_buffer_init(&buffer, 512);
|
|
399
|
+
hb_buffer_init(&buffer, 512, parser->allocator);
|
|
343
400
|
token_T* opening_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
|
|
344
401
|
position_T start = parser->current_token->location.start;
|
|
345
402
|
|
|
346
403
|
while (!token_is(parser, TOKEN_EOF)
|
|
347
404
|
&& !(
|
|
348
405
|
token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
|
|
349
|
-
&&
|
|
406
|
+
&& hb_string_equals(parser->current_token->value, opening_quote->value)
|
|
350
407
|
)) {
|
|
351
|
-
if (token_is(parser,
|
|
408
|
+
if (token_is(parser, TOKEN_HTML_TAG_END) || token_is(parser, TOKEN_HTML_TAG_SELF_CLOSE)) {
|
|
409
|
+
lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
|
|
410
|
+
bool found_closing_quote = false;
|
|
411
|
+
token_T* lookahead = lexer_next_token(parser->lexer);
|
|
412
|
+
|
|
413
|
+
while (lookahead && lookahead->type != TOKEN_EOF) {
|
|
414
|
+
if (lookahead->type == TOKEN_QUOTE && opening_quote != NULL
|
|
415
|
+
&& hb_string_equals(lookahead->value, opening_quote->value)) {
|
|
416
|
+
found_closing_quote = true;
|
|
417
|
+
token_free(lookahead, parser->allocator);
|
|
418
|
+
break;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
token_free(lookahead, parser->allocator);
|
|
422
|
+
|
|
423
|
+
lookahead = lexer_next_token(parser->lexer);
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
if (lookahead && !found_closing_quote && lookahead->type == TOKEN_EOF) {
|
|
427
|
+
token_free(lookahead, parser->allocator);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
lexer_restore_state(parser->lexer, saved_state);
|
|
431
|
+
|
|
432
|
+
if (found_closing_quote) {
|
|
433
|
+
hb_buffer_append_string(&buffer, parser->current_token->value);
|
|
434
|
+
token_free(parser->current_token, parser->allocator);
|
|
435
|
+
parser->current_token = lexer_next_token(parser->lexer);
|
|
436
|
+
continue;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
append_unclosed_quote_error(
|
|
440
|
+
opening_quote,
|
|
441
|
+
opening_quote->location.start,
|
|
442
|
+
parser->current_token->location.start,
|
|
443
|
+
parser->allocator,
|
|
444
|
+
errors
|
|
445
|
+
);
|
|
446
|
+
|
|
352
447
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
448
|
+
hb_buffer_free(&buffer);
|
|
353
449
|
|
|
354
|
-
|
|
450
|
+
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
|
|
451
|
+
opening_quote,
|
|
452
|
+
children,
|
|
453
|
+
NULL,
|
|
454
|
+
true,
|
|
455
|
+
opening_quote->location.start,
|
|
456
|
+
parser->current_token->location.start,
|
|
457
|
+
errors,
|
|
458
|
+
parser->allocator
|
|
459
|
+
);
|
|
355
460
|
|
|
356
|
-
|
|
461
|
+
token_free(opening_quote, parser->allocator);
|
|
357
462
|
|
|
358
|
-
|
|
463
|
+
return attribute_value;
|
|
359
464
|
}
|
|
360
465
|
|
|
361
|
-
|
|
466
|
+
bool buffer_ends_with_whitespace = buffer.length > 0 && is_whitespace(buffer.value[buffer.length - 1]);
|
|
467
|
+
|
|
468
|
+
if (token_is(parser, TOKEN_IDENTIFIER) && buffer_ends_with_whitespace) {
|
|
362
469
|
lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
|
|
470
|
+
token_T* equals_token = lexer_next_token(parser->lexer);
|
|
471
|
+
bool looks_like_new_attribute = false;
|
|
363
472
|
|
|
364
|
-
|
|
473
|
+
if (equals_token && equals_token->type == TOKEN_EQUALS) {
|
|
474
|
+
token_T* after_equals = lexer_next_token(parser->lexer);
|
|
475
|
+
looks_like_new_attribute = (after_equals && after_equals->type == TOKEN_QUOTE);
|
|
365
476
|
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
hb_buffer_append(&buffer, parser->current_token->value);
|
|
369
|
-
hb_buffer_append(&buffer, next_token->value);
|
|
477
|
+
if (after_equals) { token_free(after_equals, parser->allocator); }
|
|
478
|
+
}
|
|
370
479
|
|
|
371
|
-
|
|
372
|
-
|
|
480
|
+
if (equals_token) { token_free(equals_token, parser->allocator); }
|
|
481
|
+
lexer_restore_state(parser->lexer, saved_state);
|
|
373
482
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
483
|
+
if (looks_like_new_attribute) {
|
|
484
|
+
append_unclosed_quote_error(
|
|
485
|
+
opening_quote,
|
|
486
|
+
opening_quote->location.start,
|
|
487
|
+
parser->current_token->location.start,
|
|
488
|
+
parser->allocator,
|
|
489
|
+
errors
|
|
490
|
+
);
|
|
491
|
+
|
|
492
|
+
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
493
|
+
hb_buffer_free(&buffer);
|
|
494
|
+
|
|
495
|
+
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
|
|
496
|
+
opening_quote,
|
|
497
|
+
children,
|
|
498
|
+
NULL,
|
|
499
|
+
true,
|
|
500
|
+
opening_quote->location.start,
|
|
501
|
+
parser->current_token->location.start,
|
|
502
|
+
errors,
|
|
503
|
+
parser->allocator
|
|
504
|
+
);
|
|
505
|
+
|
|
506
|
+
token_free(opening_quote, parser->allocator);
|
|
378
507
|
|
|
379
|
-
|
|
508
|
+
return attribute_value;
|
|
380
509
|
}
|
|
381
510
|
}
|
|
382
511
|
|
|
383
|
-
|
|
384
|
-
|
|
512
|
+
if (token_is(parser, TOKEN_ERB_START)) {
|
|
513
|
+
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
514
|
+
|
|
515
|
+
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
516
|
+
|
|
517
|
+
start = parser->current_token->location.start;
|
|
518
|
+
|
|
519
|
+
continue;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
hb_buffer_append_string(&buffer, parser->current_token->value);
|
|
523
|
+
token_free(parser->current_token, parser->allocator);
|
|
385
524
|
|
|
386
525
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
387
526
|
}
|
|
388
527
|
|
|
389
528
|
if (token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
|
|
390
|
-
&&
|
|
529
|
+
&& hb_string_equals(parser->current_token->value, opening_quote->value)) {
|
|
391
530
|
lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
|
|
392
531
|
|
|
393
532
|
token_T* potential_closing = parser->current_token;
|
|
@@ -395,27 +534,28 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
395
534
|
|
|
396
535
|
if (token_is(parser, TOKEN_IDENTIFIER) || token_is(parser, TOKEN_CHARACTER)) {
|
|
397
536
|
append_unexpected_error(
|
|
398
|
-
"Unescaped quote character in attribute value",
|
|
399
|
-
"
|
|
537
|
+
hb_string("Unescaped quote character in attribute value"),
|
|
538
|
+
hb_string("HTML entity ('/") or different quote style"),
|
|
400
539
|
opening_quote->value,
|
|
401
540
|
potential_closing->location.start,
|
|
402
541
|
potential_closing->location.end,
|
|
542
|
+
parser->allocator,
|
|
403
543
|
errors
|
|
404
544
|
);
|
|
405
545
|
|
|
406
546
|
lexer_restore_state(parser->lexer, saved_state);
|
|
407
547
|
|
|
408
|
-
token_free(parser->current_token);
|
|
548
|
+
token_free(parser->current_token, parser->allocator);
|
|
409
549
|
parser->current_token = potential_closing;
|
|
410
550
|
|
|
411
|
-
|
|
412
|
-
token_free(parser->current_token);
|
|
551
|
+
hb_buffer_append_string(&buffer, parser->current_token->value);
|
|
552
|
+
token_free(parser->current_token, parser->allocator);
|
|
413
553
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
414
554
|
|
|
415
555
|
while (!token_is(parser, TOKEN_EOF)
|
|
416
556
|
&& !(
|
|
417
557
|
token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
|
|
418
|
-
&&
|
|
558
|
+
&& hb_string_equals(parser->current_token->value, opening_quote->value)
|
|
419
559
|
)) {
|
|
420
560
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
421
561
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
@@ -427,13 +567,13 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
427
567
|
continue;
|
|
428
568
|
}
|
|
429
569
|
|
|
430
|
-
|
|
431
|
-
token_free(parser->current_token);
|
|
570
|
+
hb_buffer_append_string(&buffer, parser->current_token->value);
|
|
571
|
+
token_free(parser->current_token, parser->allocator);
|
|
432
572
|
|
|
433
573
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
434
574
|
}
|
|
435
575
|
} else {
|
|
436
|
-
token_free(parser->current_token);
|
|
576
|
+
token_free(parser->current_token, parser->allocator);
|
|
437
577
|
parser->current_token = potential_closing;
|
|
438
578
|
|
|
439
579
|
lexer_restore_state(parser->lexer, saved_state);
|
|
@@ -441,20 +581,10 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
441
581
|
}
|
|
442
582
|
|
|
443
583
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
444
|
-
|
|
584
|
+
hb_buffer_free(&buffer);
|
|
445
585
|
|
|
446
586
|
token_T* closing_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
|
|
447
587
|
|
|
448
|
-
if (opening_quote != NULL && closing_quote != NULL && strcmp(opening_quote->value, closing_quote->value) != 0) {
|
|
449
|
-
append_quotes_mismatch_error(
|
|
450
|
-
opening_quote,
|
|
451
|
-
closing_quote,
|
|
452
|
-
closing_quote->location.start,
|
|
453
|
-
closing_quote->location.end,
|
|
454
|
-
errors
|
|
455
|
-
);
|
|
456
|
-
}
|
|
457
|
-
|
|
458
588
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
|
|
459
589
|
opening_quote,
|
|
460
590
|
children,
|
|
@@ -462,18 +592,19 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
462
592
|
true,
|
|
463
593
|
opening_quote->location.start,
|
|
464
594
|
closing_quote->location.end,
|
|
465
|
-
errors
|
|
595
|
+
errors,
|
|
596
|
+
parser->allocator
|
|
466
597
|
);
|
|
467
598
|
|
|
468
|
-
token_free(opening_quote);
|
|
469
|
-
token_free(closing_quote);
|
|
599
|
+
token_free(opening_quote, parser->allocator);
|
|
600
|
+
token_free(closing_quote, parser->allocator);
|
|
470
601
|
|
|
471
602
|
return attribute_value;
|
|
472
603
|
}
|
|
473
604
|
|
|
474
605
|
static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser_T* parser) {
|
|
475
|
-
hb_array_T* children = hb_array_init(8);
|
|
476
|
-
hb_array_T* errors = hb_array_init(8);
|
|
606
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
607
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
477
608
|
|
|
478
609
|
// <div id=<%= "home" %>>
|
|
479
610
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
@@ -487,15 +618,16 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
487
618
|
false,
|
|
488
619
|
erb_node->base.location.start,
|
|
489
620
|
erb_node->base.location.end,
|
|
490
|
-
errors
|
|
621
|
+
errors,
|
|
622
|
+
parser->allocator
|
|
491
623
|
);
|
|
492
624
|
}
|
|
493
625
|
|
|
494
626
|
// <div id=home>
|
|
495
627
|
if (token_is(parser, TOKEN_IDENTIFIER)) {
|
|
496
628
|
token_T* identifier = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
|
|
497
|
-
AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier);
|
|
498
|
-
token_free(identifier);
|
|
629
|
+
AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier, parser->allocator);
|
|
630
|
+
token_free(identifier, parser->allocator);
|
|
499
631
|
|
|
500
632
|
hb_array_append(children, literal);
|
|
501
633
|
|
|
@@ -506,7 +638,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
506
638
|
false,
|
|
507
639
|
literal->base.location.start,
|
|
508
640
|
literal->base.location.end,
|
|
509
|
-
errors
|
|
641
|
+
errors,
|
|
642
|
+
parser->allocator
|
|
510
643
|
);
|
|
511
644
|
}
|
|
512
645
|
|
|
@@ -519,31 +652,37 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
519
652
|
position_T end = token->location.end;
|
|
520
653
|
|
|
521
654
|
append_unexpected_error(
|
|
522
|
-
"Invalid quote character for HTML attribute",
|
|
523
|
-
"single quote (') or double quote (\")",
|
|
524
|
-
"backtick
|
|
655
|
+
hb_string("Invalid quote character for HTML attribute"),
|
|
656
|
+
hb_string("single quote (') or double quote (\")"),
|
|
657
|
+
hb_string("a backtick"),
|
|
525
658
|
start,
|
|
526
659
|
end,
|
|
660
|
+
parser->allocator,
|
|
527
661
|
errors
|
|
528
662
|
);
|
|
529
663
|
|
|
530
664
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* value =
|
|
531
|
-
ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors);
|
|
665
|
+
ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors, parser->allocator);
|
|
532
666
|
|
|
533
|
-
token_free(token);
|
|
667
|
+
token_free(token, parser->allocator);
|
|
534
668
|
|
|
535
669
|
return value;
|
|
536
670
|
}
|
|
537
671
|
|
|
672
|
+
char* expected = token_types_to_friendly_string(parser->allocator, TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START);
|
|
673
|
+
|
|
538
674
|
append_unexpected_error(
|
|
539
|
-
"Unexpected Token",
|
|
540
|
-
|
|
541
|
-
|
|
675
|
+
hb_string("Unexpected Token"),
|
|
676
|
+
hb_string(expected),
|
|
677
|
+
token_type_to_friendly_string(parser->current_token->type),
|
|
542
678
|
parser->current_token->location.start,
|
|
543
679
|
parser->current_token->location.end,
|
|
680
|
+
parser->allocator,
|
|
544
681
|
errors
|
|
545
682
|
);
|
|
546
683
|
|
|
684
|
+
hb_allocator_dealloc(parser->allocator, expected);
|
|
685
|
+
|
|
547
686
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* value = ast_html_attribute_value_node_init(
|
|
548
687
|
NULL,
|
|
549
688
|
children,
|
|
@@ -551,7 +690,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
551
690
|
false,
|
|
552
691
|
parser->current_token->location.start,
|
|
553
692
|
parser->current_token->location.end,
|
|
554
|
-
errors
|
|
693
|
+
errors,
|
|
694
|
+
parser->allocator
|
|
555
695
|
);
|
|
556
696
|
|
|
557
697
|
return value;
|
|
@@ -566,7 +706,7 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
566
706
|
|
|
567
707
|
if (has_equals) {
|
|
568
708
|
hb_buffer_T equals_buffer;
|
|
569
|
-
hb_buffer_init(&equals_buffer, 256);
|
|
709
|
+
hb_buffer_init(&equals_buffer, 256, parser->allocator);
|
|
570
710
|
position_T equals_start = { 0 };
|
|
571
711
|
position_T equals_end = { 0 };
|
|
572
712
|
uint32_t range_start = 0;
|
|
@@ -582,8 +722,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
582
722
|
range_start = whitespace->range.from;
|
|
583
723
|
}
|
|
584
724
|
|
|
585
|
-
|
|
586
|
-
token_free(whitespace);
|
|
725
|
+
hb_buffer_append_string(&equals_buffer, whitespace->value);
|
|
726
|
+
token_free(whitespace, parser->allocator);
|
|
587
727
|
}
|
|
588
728
|
|
|
589
729
|
token_T* equals = parser_advance(parser);
|
|
@@ -594,27 +734,45 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
594
734
|
range_start = equals->range.from;
|
|
595
735
|
}
|
|
596
736
|
|
|
597
|
-
|
|
737
|
+
hb_buffer_append_string(&equals_buffer, equals->value);
|
|
598
738
|
equals_end = equals->location.end;
|
|
599
739
|
range_end = equals->range.to;
|
|
600
|
-
token_free(equals);
|
|
740
|
+
token_free(equals, parser->allocator);
|
|
601
741
|
|
|
602
742
|
while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
|
603
743
|
token_T* whitespace = parser_advance(parser);
|
|
604
|
-
|
|
744
|
+
hb_buffer_append_string(&equals_buffer, whitespace->value);
|
|
605
745
|
equals_end = whitespace->location.end;
|
|
606
746
|
range_end = whitespace->range.to;
|
|
607
|
-
token_free(whitespace);
|
|
747
|
+
token_free(whitespace, parser->allocator);
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
token_T* equals_with_whitespace = hb_allocator_alloc(parser->allocator, sizeof(token_T));
|
|
751
|
+
|
|
752
|
+
if (!equals_with_whitespace) {
|
|
753
|
+
hb_buffer_free(&equals_buffer);
|
|
754
|
+
|
|
755
|
+
return ast_html_attribute_node_init(
|
|
756
|
+
attribute_name,
|
|
757
|
+
NULL,
|
|
758
|
+
NULL,
|
|
759
|
+
attribute_name->base.location.start,
|
|
760
|
+
attribute_name->base.location.end,
|
|
761
|
+
NULL,
|
|
762
|
+
parser->allocator
|
|
763
|
+
);
|
|
608
764
|
}
|
|
609
765
|
|
|
610
|
-
token_T* equals_with_whitespace = calloc(1, sizeof(token_T));
|
|
611
766
|
equals_with_whitespace->type = TOKEN_EQUALS;
|
|
612
|
-
|
|
767
|
+
|
|
768
|
+
char* arena_copy = hb_allocator_strndup(parser->allocator, equals_buffer.value, equals_buffer.length);
|
|
769
|
+
equals_with_whitespace->value = (hb_string_T) { .data = arena_copy, .length = (uint32_t) equals_buffer.length };
|
|
770
|
+
|
|
771
|
+
hb_buffer_free(&equals_buffer);
|
|
772
|
+
|
|
613
773
|
equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end };
|
|
614
774
|
equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end };
|
|
615
775
|
|
|
616
|
-
free(equals_buffer.value);
|
|
617
|
-
|
|
618
776
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
|
|
619
777
|
|
|
620
778
|
return ast_html_attribute_node_init(
|
|
@@ -623,7 +781,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
623
781
|
attribute_value,
|
|
624
782
|
attribute_name->base.location.start,
|
|
625
783
|
attribute_value->base.location.end,
|
|
626
|
-
NULL
|
|
784
|
+
NULL,
|
|
785
|
+
parser->allocator
|
|
627
786
|
);
|
|
628
787
|
} else {
|
|
629
788
|
return ast_html_attribute_node_init(
|
|
@@ -632,7 +791,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
632
791
|
NULL,
|
|
633
792
|
attribute_name->base.location.start,
|
|
634
793
|
attribute_name->base.location.end,
|
|
635
|
-
NULL
|
|
794
|
+
NULL,
|
|
795
|
+
parser->allocator
|
|
636
796
|
);
|
|
637
797
|
}
|
|
638
798
|
} else {
|
|
@@ -644,6 +804,51 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
644
804
|
if (equals != NULL) {
|
|
645
805
|
parser_consume_whitespace(parser, NULL);
|
|
646
806
|
|
|
807
|
+
// <div class= >
|
|
808
|
+
if (token_is(parser, TOKEN_HTML_TAG_END) || token_is(parser, TOKEN_HTML_TAG_SELF_CLOSE)) {
|
|
809
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
810
|
+
hb_string_T attribute_name_string = hb_string("unknown");
|
|
811
|
+
|
|
812
|
+
if (hb_array_size(attribute_name->children) > 0) {
|
|
813
|
+
AST_LITERAL_NODE_T* first_child = (AST_LITERAL_NODE_T*) hb_array_get(attribute_name->children, 0);
|
|
814
|
+
|
|
815
|
+
if (first_child && !hb_string_is_empty(first_child->content)) { attribute_name_string = first_child->content; }
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
append_missing_attribute_value_error(
|
|
819
|
+
attribute_name_string,
|
|
820
|
+
equals->location.start,
|
|
821
|
+
parser->current_token->location.start,
|
|
822
|
+
parser->allocator,
|
|
823
|
+
errors
|
|
824
|
+
);
|
|
825
|
+
|
|
826
|
+
AST_HTML_ATTRIBUTE_VALUE_NODE_T* empty_value = ast_html_attribute_value_node_init(
|
|
827
|
+
NULL,
|
|
828
|
+
hb_array_init(8, parser->allocator),
|
|
829
|
+
NULL,
|
|
830
|
+
false,
|
|
831
|
+
equals->location.end,
|
|
832
|
+
parser->current_token->location.start,
|
|
833
|
+
errors,
|
|
834
|
+
parser->allocator
|
|
835
|
+
);
|
|
836
|
+
|
|
837
|
+
AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
|
|
838
|
+
attribute_name,
|
|
839
|
+
equals,
|
|
840
|
+
empty_value,
|
|
841
|
+
attribute_name->base.location.start,
|
|
842
|
+
parser->current_token->location.start,
|
|
843
|
+
NULL,
|
|
844
|
+
parser->allocator
|
|
845
|
+
);
|
|
846
|
+
|
|
847
|
+
token_free(equals, parser->allocator);
|
|
848
|
+
|
|
849
|
+
return attribute_node;
|
|
850
|
+
}
|
|
851
|
+
|
|
647
852
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
|
|
648
853
|
|
|
649
854
|
AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
|
|
@@ -652,10 +857,11 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
652
857
|
attribute_value,
|
|
653
858
|
attribute_name->base.location.start,
|
|
654
859
|
attribute_value->base.location.end,
|
|
655
|
-
NULL
|
|
860
|
+
NULL,
|
|
861
|
+
parser->allocator
|
|
656
862
|
);
|
|
657
863
|
|
|
658
|
-
token_free(equals);
|
|
864
|
+
token_free(equals, parser->allocator);
|
|
659
865
|
|
|
660
866
|
return attribute_node;
|
|
661
867
|
}
|
|
@@ -666,7 +872,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
666
872
|
NULL,
|
|
667
873
|
attribute_name->base.location.start,
|
|
668
874
|
attribute_name->base.location.end,
|
|
669
|
-
NULL
|
|
875
|
+
NULL,
|
|
876
|
+
parser->allocator
|
|
670
877
|
);
|
|
671
878
|
}
|
|
672
879
|
|
|
@@ -677,11 +884,11 @@ static void parser_skip_erb_content(lexer_T* lexer) {
|
|
|
677
884
|
token = lexer_next_token(lexer);
|
|
678
885
|
|
|
679
886
|
if (token->type == TOKEN_ERB_END) {
|
|
680
|
-
token_free(token);
|
|
887
|
+
token_free(token, lexer->allocator);
|
|
681
888
|
break;
|
|
682
889
|
}
|
|
683
890
|
|
|
684
|
-
token_free(token);
|
|
891
|
+
token_free(token, lexer->allocator);
|
|
685
892
|
} while (true);
|
|
686
893
|
}
|
|
687
894
|
|
|
@@ -692,12 +899,12 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
|
|
|
692
899
|
after = lexer_next_token(lexer);
|
|
693
900
|
|
|
694
901
|
if (after->type == TOKEN_EQUALS) {
|
|
695
|
-
token_free(after);
|
|
902
|
+
token_free(after, lexer->allocator);
|
|
696
903
|
return true;
|
|
697
904
|
}
|
|
698
905
|
|
|
699
906
|
if (after->type == TOKEN_WHITESPACE || after->type == TOKEN_NEWLINE) {
|
|
700
|
-
token_free(after);
|
|
907
|
+
token_free(after, lexer->allocator);
|
|
701
908
|
continue;
|
|
702
909
|
}
|
|
703
910
|
|
|
@@ -705,23 +912,56 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
|
|
|
705
912
|
|| after->type == TOKEN_ERB_START) {
|
|
706
913
|
|
|
707
914
|
if (after->type == TOKEN_ERB_START) {
|
|
708
|
-
token_free(after);
|
|
915
|
+
token_free(after, lexer->allocator);
|
|
709
916
|
parser_skip_erb_content(lexer);
|
|
710
917
|
} else {
|
|
711
|
-
token_free(after);
|
|
918
|
+
token_free(after, lexer->allocator);
|
|
712
919
|
}
|
|
713
920
|
continue;
|
|
714
921
|
}
|
|
715
922
|
|
|
716
|
-
token_free(after);
|
|
923
|
+
token_free(after, lexer->allocator);
|
|
717
924
|
return false;
|
|
718
925
|
|
|
719
926
|
} while (true);
|
|
720
927
|
}
|
|
721
928
|
|
|
929
|
+
static bool starts_with_keyword(hb_string_T string, const char* keyword) {
|
|
930
|
+
hb_string_T prefix = hb_string(keyword);
|
|
931
|
+
if (string.length < prefix.length) { return false; }
|
|
932
|
+
if (strncmp(string.data, prefix.data, prefix.length) != 0) { return false; }
|
|
933
|
+
|
|
934
|
+
if (string.length == prefix.length) { return true; }
|
|
935
|
+
|
|
936
|
+
return is_whitespace(string.data[prefix.length]);
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
// TODO: ideally we could avoid basing this off of strings, and use the step in analyze.c
|
|
940
|
+
static bool parser_lookahead_erb_is_control_flow(parser_T* parser) {
|
|
941
|
+
lexer_T lexer_copy = *parser->lexer;
|
|
942
|
+
token_T* content = lexer_next_token(&lexer_copy);
|
|
943
|
+
|
|
944
|
+
if (content == NULL || content->type != TOKEN_ERB_CONTENT) {
|
|
945
|
+
if (content) { token_free(content, parser->allocator); }
|
|
946
|
+
|
|
947
|
+
return false;
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
hb_string_T trimmed = hb_string_trim_start(content->value);
|
|
951
|
+
|
|
952
|
+
bool is_control_flow = starts_with_keyword(trimmed, "end") || starts_with_keyword(trimmed, "else")
|
|
953
|
+
|| starts_with_keyword(trimmed, "elsif") || starts_with_keyword(trimmed, "in")
|
|
954
|
+
|| starts_with_keyword(trimmed, "when") || starts_with_keyword(trimmed, "rescue")
|
|
955
|
+
|| starts_with_keyword(trimmed, "ensure");
|
|
956
|
+
|
|
957
|
+
token_free(content, parser->allocator);
|
|
958
|
+
|
|
959
|
+
return is_control_flow;
|
|
960
|
+
}
|
|
961
|
+
|
|
722
962
|
static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children) {
|
|
723
|
-
bool is_output_tag =
|
|
724
|
-
&&
|
|
963
|
+
bool is_output_tag = !hb_string_is_empty(parser->current_token->value)
|
|
964
|
+
&& hb_string_starts_with(parser->current_token->value, hb_string("<%="));
|
|
725
965
|
|
|
726
966
|
if (!is_output_tag) {
|
|
727
967
|
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
@@ -732,7 +972,7 @@ static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children
|
|
|
732
972
|
lexer_T lexer_copy = *parser->lexer;
|
|
733
973
|
|
|
734
974
|
token_T* erb_start = lexer_next_token(&lexer_copy);
|
|
735
|
-
token_free(erb_start);
|
|
975
|
+
token_free(erb_start, parser->allocator);
|
|
736
976
|
parser_skip_erb_content(&lexer_copy);
|
|
737
977
|
|
|
738
978
|
bool looks_like_attribute = parser_lookahead_erb_is_attribute(&lexer_copy);
|
|
@@ -758,13 +998,40 @@ static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* c
|
|
|
758
998
|
}
|
|
759
999
|
|
|
760
1000
|
static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
761
|
-
hb_array_T* errors = hb_array_init(8);
|
|
762
|
-
hb_array_T* children = hb_array_init(8);
|
|
1001
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1002
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
763
1003
|
|
|
764
1004
|
token_T* tag_start = parser_consume_expected(parser, TOKEN_HTML_TAG_START, errors);
|
|
765
1005
|
token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
|
|
766
1006
|
|
|
767
1007
|
while (token_is_none_of(parser, TOKEN_HTML_TAG_END, TOKEN_HTML_TAG_SELF_CLOSE, TOKEN_EOF)) {
|
|
1008
|
+
if (token_is_any_of(parser, TOKEN_HTML_TAG_START, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1009
|
+
append_unclosed_open_tag_error(
|
|
1010
|
+
tag_name,
|
|
1011
|
+
tag_name->location.start,
|
|
1012
|
+
parser->current_token->location.start,
|
|
1013
|
+
parser->allocator,
|
|
1014
|
+
errors
|
|
1015
|
+
);
|
|
1016
|
+
|
|
1017
|
+
AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init(
|
|
1018
|
+
tag_start,
|
|
1019
|
+
tag_name,
|
|
1020
|
+
NULL,
|
|
1021
|
+
children,
|
|
1022
|
+
false,
|
|
1023
|
+
tag_start->location.start,
|
|
1024
|
+
parser->current_token->location.start,
|
|
1025
|
+
errors,
|
|
1026
|
+
parser->allocator
|
|
1027
|
+
);
|
|
1028
|
+
|
|
1029
|
+
token_free(tag_start, parser->allocator);
|
|
1030
|
+
token_free(tag_name, parser->allocator);
|
|
1031
|
+
|
|
1032
|
+
return open_tag_node;
|
|
1033
|
+
}
|
|
1034
|
+
|
|
768
1035
|
if (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
|
769
1036
|
parser_handle_whitespace_in_open_tag(parser, children);
|
|
770
1037
|
continue;
|
|
@@ -790,21 +1057,79 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
790
1057
|
token_T* next_token = lexer_next_token(&lexer_copy);
|
|
791
1058
|
|
|
792
1059
|
if (next_token && next_token->type == TOKEN_IDENTIFIER) {
|
|
793
|
-
token_free(next_token);
|
|
1060
|
+
token_free(next_token, parser->allocator);
|
|
794
1061
|
hb_array_append(children, parser_parse_html_attribute(parser));
|
|
795
1062
|
|
|
796
1063
|
continue;
|
|
797
1064
|
}
|
|
798
1065
|
|
|
799
|
-
token_free(next_token);
|
|
1066
|
+
token_free(next_token, parser->allocator);
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
if (parser->current_token->type == TOKEN_PERCENT) {
|
|
1070
|
+
lexer_T lexer_copy = *parser->lexer;
|
|
1071
|
+
token_T* peek_token = lexer_next_token(&lexer_copy);
|
|
1072
|
+
|
|
1073
|
+
if (peek_token && peek_token->type == TOKEN_HTML_TAG_END) {
|
|
1074
|
+
position_T stray_start = parser->current_token->location.start;
|
|
1075
|
+
position_T stray_end = peek_token->location.end;
|
|
1076
|
+
token_free(peek_token, parser->allocator);
|
|
1077
|
+
|
|
1078
|
+
append_stray_erb_closing_tag_error(stray_start, stray_end, parser->allocator, errors);
|
|
1079
|
+
|
|
1080
|
+
token_T* percent = parser_advance(parser);
|
|
1081
|
+
token_T* gt = parser_advance(parser);
|
|
1082
|
+
|
|
1083
|
+
AST_LITERAL_NODE_T* literal =
|
|
1084
|
+
ast_literal_node_init(hb_string("%>"), stray_start, stray_end, NULL, parser->allocator);
|
|
1085
|
+
hb_array_append(children, literal);
|
|
1086
|
+
|
|
1087
|
+
token_free(percent, parser->allocator);
|
|
1088
|
+
token_free(gt, parser->allocator);
|
|
1089
|
+
|
|
1090
|
+
continue;
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
token_free(peek_token, parser->allocator);
|
|
800
1094
|
}
|
|
801
1095
|
|
|
802
1096
|
parser_append_unexpected_error(
|
|
803
1097
|
parser,
|
|
1098
|
+
errors,
|
|
804
1099
|
"Unexpected Token",
|
|
805
|
-
|
|
1100
|
+
TOKEN_IDENTIFIER,
|
|
1101
|
+
TOKEN_AT,
|
|
1102
|
+
TOKEN_ERB_START,
|
|
1103
|
+
TOKEN_WHITESPACE,
|
|
1104
|
+
TOKEN_NEWLINE
|
|
1105
|
+
);
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
if (token_is(parser, TOKEN_EOF)) {
|
|
1109
|
+
append_unclosed_open_tag_error(
|
|
1110
|
+
tag_name,
|
|
1111
|
+
tag_name->location.start,
|
|
1112
|
+
parser->current_token->location.start,
|
|
1113
|
+
parser->allocator,
|
|
806
1114
|
errors
|
|
807
1115
|
);
|
|
1116
|
+
|
|
1117
|
+
AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init(
|
|
1118
|
+
tag_start,
|
|
1119
|
+
tag_name,
|
|
1120
|
+
NULL,
|
|
1121
|
+
children,
|
|
1122
|
+
false,
|
|
1123
|
+
tag_start->location.start,
|
|
1124
|
+
parser->current_token->location.start,
|
|
1125
|
+
errors,
|
|
1126
|
+
parser->allocator
|
|
1127
|
+
);
|
|
1128
|
+
|
|
1129
|
+
token_free(tag_start, parser->allocator);
|
|
1130
|
+
token_free(tag_name, parser->allocator);
|
|
1131
|
+
|
|
1132
|
+
return open_tag_node;
|
|
808
1133
|
}
|
|
809
1134
|
|
|
810
1135
|
bool is_self_closing = false;
|
|
@@ -815,8 +1140,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
815
1140
|
tag_end = parser_consume_expected(parser, TOKEN_HTML_TAG_SELF_CLOSE, errors);
|
|
816
1141
|
|
|
817
1142
|
if (tag_end == NULL) {
|
|
818
|
-
token_free(tag_start);
|
|
819
|
-
token_free(tag_name);
|
|
1143
|
+
token_free(tag_start, parser->allocator);
|
|
1144
|
+
token_free(tag_name, parser->allocator);
|
|
820
1145
|
|
|
821
1146
|
hb_array_free(&children);
|
|
822
1147
|
hb_array_free(&errors);
|
|
@@ -835,19 +1160,20 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
835
1160
|
is_self_closing,
|
|
836
1161
|
tag_start->location.start,
|
|
837
1162
|
tag_end->location.end,
|
|
838
|
-
errors
|
|
1163
|
+
errors,
|
|
1164
|
+
parser->allocator
|
|
839
1165
|
);
|
|
840
1166
|
|
|
841
|
-
token_free(tag_start);
|
|
842
|
-
token_free(tag_name);
|
|
843
|
-
token_free(tag_end);
|
|
1167
|
+
token_free(tag_start, parser->allocator);
|
|
1168
|
+
token_free(tag_name, parser->allocator);
|
|
1169
|
+
token_free(tag_end, parser->allocator);
|
|
844
1170
|
|
|
845
1171
|
return open_tag_node;
|
|
846
1172
|
}
|
|
847
1173
|
|
|
848
1174
|
static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) {
|
|
849
|
-
hb_array_T* errors = hb_array_init(8);
|
|
850
|
-
hb_array_T* children = hb_array_init(8);
|
|
1175
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1176
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
851
1177
|
|
|
852
1178
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
|
|
853
1179
|
|
|
@@ -857,38 +1183,53 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
|
|
|
857
1183
|
|
|
858
1184
|
parser_consume_whitespace(parser, children);
|
|
859
1185
|
|
|
860
|
-
token_T* tag_closing =
|
|
1186
|
+
token_T* tag_closing = parser_consume_if_present(parser, TOKEN_HTML_TAG_END);
|
|
1187
|
+
|
|
1188
|
+
if (tag_closing == NULL) {
|
|
1189
|
+
append_unclosed_close_tag_error(
|
|
1190
|
+
tag_name,
|
|
1191
|
+
tag_opening->location.start,
|
|
1192
|
+
tag_name->location.end,
|
|
1193
|
+
parser->allocator,
|
|
1194
|
+
errors
|
|
1195
|
+
);
|
|
1196
|
+
}
|
|
861
1197
|
|
|
862
|
-
if (tag_name != NULL && is_void_element(
|
|
863
|
-
|
|
864
|
-
hb_string_T
|
|
1198
|
+
if (tag_closing != NULL && tag_name != NULL && is_void_element(tag_name->value)
|
|
1199
|
+
&& parser_in_svg_context(parser) == false) {
|
|
1200
|
+
hb_string_T expected = html_self_closing_tag_string(tag_name->value, parser->allocator);
|
|
1201
|
+
hb_string_T got = html_closing_tag_string(tag_name->value, parser->allocator);
|
|
865
1202
|
|
|
866
1203
|
append_void_element_closing_tag_error(
|
|
867
1204
|
tag_name,
|
|
868
|
-
expected
|
|
869
|
-
got
|
|
1205
|
+
expected,
|
|
1206
|
+
got,
|
|
870
1207
|
tag_opening->location.start,
|
|
871
1208
|
tag_closing->location.end,
|
|
1209
|
+
parser->allocator,
|
|
872
1210
|
errors
|
|
873
1211
|
);
|
|
874
1212
|
|
|
875
|
-
|
|
876
|
-
|
|
1213
|
+
hb_allocator_dealloc(parser->allocator, expected.data);
|
|
1214
|
+
hb_allocator_dealloc(parser->allocator, got.data);
|
|
877
1215
|
}
|
|
878
1216
|
|
|
1217
|
+
position_T end_position = tag_closing != NULL ? tag_closing->location.end : tag_name->location.end;
|
|
1218
|
+
|
|
879
1219
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = ast_html_close_tag_node_init(
|
|
880
1220
|
tag_opening,
|
|
881
1221
|
tag_name,
|
|
882
1222
|
children,
|
|
883
1223
|
tag_closing,
|
|
884
1224
|
tag_opening->location.start,
|
|
885
|
-
|
|
886
|
-
errors
|
|
1225
|
+
end_position,
|
|
1226
|
+
errors,
|
|
1227
|
+
parser->allocator
|
|
887
1228
|
);
|
|
888
1229
|
|
|
889
|
-
token_free(tag_opening);
|
|
890
|
-
token_free(tag_name);
|
|
891
|
-
token_free(tag_closing);
|
|
1230
|
+
token_free(tag_opening, parser->allocator);
|
|
1231
|
+
token_free(tag_name, parser->allocator);
|
|
1232
|
+
token_free(tag_closing, parser->allocator);
|
|
892
1233
|
|
|
893
1234
|
return close_tag;
|
|
894
1235
|
}
|
|
@@ -899,7 +1240,7 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element(
|
|
|
899
1240
|
AST_HTML_OPEN_TAG_NODE_T* open_tag
|
|
900
1241
|
) {
|
|
901
1242
|
return ast_html_element_node_init(
|
|
902
|
-
open_tag,
|
|
1243
|
+
(AST_NODE_T*) open_tag,
|
|
903
1244
|
open_tag->tag_name,
|
|
904
1245
|
NULL,
|
|
905
1246
|
NULL,
|
|
@@ -907,7 +1248,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element(
|
|
|
907
1248
|
ELEMENT_SOURCE_HTML,
|
|
908
1249
|
open_tag->base.location.start,
|
|
909
1250
|
open_tag->base.location.end,
|
|
910
|
-
NULL
|
|
1251
|
+
NULL,
|
|
1252
|
+
parser->allocator
|
|
911
1253
|
);
|
|
912
1254
|
}
|
|
913
1255
|
|
|
@@ -915,63 +1257,88 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
|
|
|
915
1257
|
parser_T* parser,
|
|
916
1258
|
AST_HTML_OPEN_TAG_NODE_T* open_tag
|
|
917
1259
|
) {
|
|
918
|
-
hb_array_T* errors = hb_array_init(8);
|
|
919
|
-
hb_array_T* body = hb_array_init(8);
|
|
1260
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1261
|
+
hb_array_T* body = hb_array_init(8, parser->allocator);
|
|
920
1262
|
|
|
921
1263
|
parser_push_open_tag(parser, open_tag->tag_name);
|
|
922
1264
|
|
|
923
|
-
if (open_tag->tag_name->value && parser_is_foreign_content_tag(
|
|
924
|
-
foreign_content_type_T content_type = parser_get_foreign_content_type(
|
|
1265
|
+
if (!hb_string_is_empty(open_tag->tag_name->value) && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
|
|
1266
|
+
foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value);
|
|
925
1267
|
parser_enter_foreign_content(parser, content_type);
|
|
926
1268
|
parser_parse_foreign_content(parser, body, errors);
|
|
927
1269
|
} else {
|
|
928
1270
|
parser_parse_in_data_state(parser, body, errors);
|
|
929
1271
|
}
|
|
930
1272
|
|
|
931
|
-
if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1273
|
+
if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1274
|
+
return parser_handle_missing_close_tag(parser, open_tag, body, errors);
|
|
1275
|
+
}
|
|
932
1276
|
|
|
933
1277
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser);
|
|
934
1278
|
|
|
935
|
-
if (parser_in_svg_context(parser) == false && is_void_element(
|
|
1279
|
+
if (parser_in_svg_context(parser) == false && is_void_element(close_tag->tag_name->value)) {
|
|
936
1280
|
hb_array_push(body, close_tag);
|
|
937
1281
|
parser_parse_in_data_state(parser, body, errors);
|
|
938
1282
|
close_tag = parser_parse_html_close_tag(parser);
|
|
939
1283
|
}
|
|
940
1284
|
|
|
941
|
-
bool matches_stack = parser_check_matching_tag(parser,
|
|
1285
|
+
bool matches_stack = parser_check_matching_tag(parser, close_tag->tag_name->value);
|
|
942
1286
|
|
|
943
1287
|
if (matches_stack) {
|
|
944
1288
|
token_T* popped_token = parser_pop_open_tag(parser);
|
|
945
|
-
token_free(popped_token);
|
|
1289
|
+
token_free(popped_token, parser->allocator);
|
|
1290
|
+
} else if (parser_can_close_ancestor(parser, close_tag->tag_name->value)) {
|
|
1291
|
+
size_t depth = parser_find_ancestor_depth(parser, close_tag->tag_name->value);
|
|
1292
|
+
|
|
1293
|
+
for (size_t i = 0; i < depth; i++) {
|
|
1294
|
+
token_T* unclosed = parser_pop_open_tag(parser);
|
|
1295
|
+
|
|
1296
|
+
if (unclosed != NULL) {
|
|
1297
|
+
append_missing_closing_tag_error(
|
|
1298
|
+
unclosed,
|
|
1299
|
+
unclosed->location.start,
|
|
1300
|
+
unclosed->location.end,
|
|
1301
|
+
parser->allocator,
|
|
1302
|
+
errors
|
|
1303
|
+
);
|
|
1304
|
+
token_free(unclosed, parser->allocator);
|
|
1305
|
+
}
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
token_T* popped_token = parser_pop_open_tag(parser);
|
|
1309
|
+
token_free(popped_token, parser->allocator);
|
|
946
1310
|
} else {
|
|
947
1311
|
parser_handle_mismatched_tags(parser, close_tag, errors);
|
|
948
1312
|
}
|
|
949
1313
|
|
|
950
1314
|
return ast_html_element_node_init(
|
|
951
|
-
open_tag,
|
|
1315
|
+
(AST_NODE_T*) open_tag,
|
|
952
1316
|
open_tag->tag_name,
|
|
953
1317
|
body,
|
|
954
|
-
close_tag,
|
|
1318
|
+
(AST_NODE_T*) close_tag,
|
|
955
1319
|
false,
|
|
956
1320
|
ELEMENT_SOURCE_HTML,
|
|
957
1321
|
open_tag->base.location.start,
|
|
958
1322
|
close_tag->base.location.end,
|
|
959
|
-
errors
|
|
1323
|
+
errors,
|
|
1324
|
+
parser->allocator
|
|
960
1325
|
);
|
|
961
1326
|
}
|
|
962
1327
|
|
|
963
1328
|
static AST_NODE_T* parser_parse_html_element(parser_T* parser) {
|
|
964
1329
|
AST_HTML_OPEN_TAG_NODE_T* open_tag = parser_parse_html_open_tag(parser);
|
|
965
1330
|
|
|
1331
|
+
if (open_tag->tag_closing == NULL) { return (AST_NODE_T*) open_tag; }
|
|
1332
|
+
|
|
966
1333
|
// <tag />
|
|
967
1334
|
if (open_tag->is_void) { return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag); }
|
|
968
1335
|
|
|
969
1336
|
// <tag>, in void element list, and not in inside an <svg> element
|
|
970
|
-
if (!open_tag->is_void && is_void_element(
|
|
1337
|
+
if (!open_tag->is_void && is_void_element(open_tag->tag_name->value) && !parser_in_svg_context(parser)) {
|
|
971
1338
|
return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag);
|
|
972
1339
|
}
|
|
973
1340
|
|
|
974
|
-
if (open_tag->tag_name->value && parser_is_foreign_content_tag(
|
|
1341
|
+
if (!hb_string_is_empty(open_tag->tag_name->value) && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
|
|
975
1342
|
AST_HTML_ELEMENT_NODE_T* regular_element = parser_parse_html_regular_element(parser, open_tag);
|
|
976
1343
|
|
|
977
1344
|
if (regular_element != NULL) { return (AST_NODE_T*) regular_element; }
|
|
@@ -981,11 +1348,38 @@ static AST_NODE_T* parser_parse_html_element(parser_T* parser) {
|
|
|
981
1348
|
}
|
|
982
1349
|
|
|
983
1350
|
static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
|
|
984
|
-
hb_array_T* errors = hb_array_init(8);
|
|
1351
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
985
1352
|
|
|
986
1353
|
token_T* opening_tag = parser_consume_expected(parser, TOKEN_ERB_START, errors);
|
|
987
1354
|
token_T* content = parser_consume_expected(parser, TOKEN_ERB_CONTENT, errors);
|
|
988
|
-
|
|
1355
|
+
|
|
1356
|
+
token_T* closing_tag = NULL;
|
|
1357
|
+
position_T end_position;
|
|
1358
|
+
|
|
1359
|
+
if (token_is(parser, TOKEN_ERB_END)) {
|
|
1360
|
+
closing_tag = parser_consume_expected(parser, TOKEN_ERB_END, errors);
|
|
1361
|
+
end_position = closing_tag->location.end;
|
|
1362
|
+
} else if (token_is(parser, TOKEN_ERB_START)) {
|
|
1363
|
+
append_nested_erb_tag_error(
|
|
1364
|
+
opening_tag,
|
|
1365
|
+
parser->current_token->location.start.line,
|
|
1366
|
+
parser->current_token->location.start.column,
|
|
1367
|
+
parser->current_token->location.start,
|
|
1368
|
+
parser->current_token->location.end,
|
|
1369
|
+
parser->allocator,
|
|
1370
|
+
errors
|
|
1371
|
+
);
|
|
1372
|
+
end_position = parser->current_token->location.start;
|
|
1373
|
+
} else {
|
|
1374
|
+
append_unclosed_erb_tag_error(
|
|
1375
|
+
opening_tag,
|
|
1376
|
+
opening_tag->location.start,
|
|
1377
|
+
parser->current_token->location.start,
|
|
1378
|
+
parser->allocator,
|
|
1379
|
+
errors
|
|
1380
|
+
);
|
|
1381
|
+
end_position = parser->current_token->location.start;
|
|
1382
|
+
}
|
|
989
1383
|
|
|
990
1384
|
AST_ERB_CONTENT_NODE_T* erb_node = ast_erb_content_node_init(
|
|
991
1385
|
opening_tag,
|
|
@@ -994,27 +1388,29 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
|
|
|
994
1388
|
NULL,
|
|
995
1389
|
false,
|
|
996
1390
|
false,
|
|
1391
|
+
HERB_PRISM_NODE_EMPTY,
|
|
997
1392
|
opening_tag->location.start,
|
|
998
|
-
|
|
999
|
-
errors
|
|
1393
|
+
end_position,
|
|
1394
|
+
errors,
|
|
1395
|
+
parser->allocator
|
|
1000
1396
|
);
|
|
1001
1397
|
|
|
1002
|
-
token_free(opening_tag);
|
|
1003
|
-
token_free(content);
|
|
1004
|
-
token_free(closing_tag);
|
|
1398
|
+
token_free(opening_tag, parser->allocator);
|
|
1399
|
+
token_free(content, parser->allocator);
|
|
1400
|
+
if (closing_tag != NULL) { token_free(closing_tag, parser->allocator); }
|
|
1005
1401
|
|
|
1006
1402
|
return erb_node;
|
|
1007
1403
|
}
|
|
1008
1404
|
|
|
1009
1405
|
static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
|
|
1010
1406
|
hb_buffer_T content;
|
|
1011
|
-
hb_buffer_init(&content, 1024);
|
|
1407
|
+
hb_buffer_init(&content, 1024, parser->allocator);
|
|
1012
1408
|
position_T start = parser->current_token->location.start;
|
|
1013
1409
|
hb_string_T expected_closing_tag = parser_get_foreign_content_closing_tag(parser->foreign_content_type);
|
|
1014
1410
|
|
|
1015
1411
|
if (hb_string_is_empty(expected_closing_tag)) {
|
|
1016
1412
|
parser_exit_foreign_content(parser);
|
|
1017
|
-
|
|
1413
|
+
hb_buffer_free(&content);
|
|
1018
1414
|
|
|
1019
1415
|
return;
|
|
1020
1416
|
}
|
|
@@ -1037,33 +1433,32 @@ static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children,
|
|
|
1037
1433
|
token_T* next_token = lexer_next_token(parser->lexer);
|
|
1038
1434
|
bool is_potential_match = false;
|
|
1039
1435
|
|
|
1040
|
-
if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) {
|
|
1041
|
-
is_potential_match =
|
|
1042
|
-
parser_is_expected_closing_tag_name(hb_string(next_token->value), parser->foreign_content_type);
|
|
1436
|
+
if (next_token && next_token->type == TOKEN_IDENTIFIER && !hb_string_is_empty(next_token->value)) {
|
|
1437
|
+
is_potential_match = parser_is_expected_closing_tag_name(next_token->value, parser->foreign_content_type);
|
|
1043
1438
|
}
|
|
1044
1439
|
|
|
1045
1440
|
lexer_restore_state(parser->lexer, saved_state);
|
|
1046
1441
|
|
|
1047
|
-
if (next_token) { token_free(next_token); }
|
|
1442
|
+
if (next_token) { token_free(next_token, parser->allocator); }
|
|
1048
1443
|
|
|
1049
1444
|
if (is_potential_match) {
|
|
1050
1445
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
1051
1446
|
parser_exit_foreign_content(parser);
|
|
1052
1447
|
|
|
1053
|
-
|
|
1448
|
+
hb_buffer_free(&content);
|
|
1054
1449
|
|
|
1055
1450
|
return;
|
|
1056
1451
|
}
|
|
1057
1452
|
}
|
|
1058
1453
|
|
|
1059
1454
|
token_T* token = parser_advance(parser);
|
|
1060
|
-
|
|
1061
|
-
token_free(token);
|
|
1455
|
+
hb_buffer_append_string(&content, token->value);
|
|
1456
|
+
token_free(token, parser->allocator);
|
|
1062
1457
|
}
|
|
1063
1458
|
|
|
1064
1459
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
1065
1460
|
parser_exit_foreign_content(parser);
|
|
1066
|
-
|
|
1461
|
+
hb_buffer_free(&content);
|
|
1067
1462
|
}
|
|
1068
1463
|
|
|
1069
1464
|
static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
|
|
@@ -1071,36 +1466,43 @@ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, h
|
|
|
1071
1466
|
|
|
1072
1467
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
1073
1468
|
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
1469
|
+
parser->consecutive_error_count = 0;
|
|
1074
1470
|
continue;
|
|
1075
1471
|
}
|
|
1076
1472
|
|
|
1077
1473
|
if (token_is(parser, TOKEN_HTML_DOCTYPE)) {
|
|
1078
1474
|
hb_array_append(children, parser_parse_html_doctype(parser));
|
|
1475
|
+
parser->consecutive_error_count = 0;
|
|
1079
1476
|
continue;
|
|
1080
1477
|
}
|
|
1081
1478
|
|
|
1082
1479
|
if (token_is(parser, TOKEN_XML_DECLARATION)) {
|
|
1083
1480
|
hb_array_append(children, parser_parse_xml_declaration(parser));
|
|
1481
|
+
parser->consecutive_error_count = 0;
|
|
1084
1482
|
continue;
|
|
1085
1483
|
}
|
|
1086
1484
|
|
|
1087
1485
|
if (token_is(parser, TOKEN_CDATA_START)) {
|
|
1088
1486
|
hb_array_append(children, parser_parse_cdata(parser));
|
|
1487
|
+
parser->consecutive_error_count = 0;
|
|
1089
1488
|
continue;
|
|
1090
1489
|
}
|
|
1091
1490
|
|
|
1092
1491
|
if (token_is(parser, TOKEN_HTML_COMMENT_START)) {
|
|
1093
1492
|
hb_array_append(children, parser_parse_html_comment(parser));
|
|
1493
|
+
parser->consecutive_error_count = 0;
|
|
1094
1494
|
continue;
|
|
1095
1495
|
}
|
|
1096
1496
|
|
|
1097
1497
|
if (token_is(parser, TOKEN_HTML_TAG_START)) {
|
|
1098
1498
|
hb_array_append(children, parser_parse_html_element(parser));
|
|
1499
|
+
parser->consecutive_error_count = 0;
|
|
1099
1500
|
continue;
|
|
1100
1501
|
}
|
|
1101
1502
|
|
|
1102
1503
|
if (token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1103
1504
|
hb_array_append(children, parser_parse_html_close_tag(parser));
|
|
1505
|
+
parser->consecutive_error_count = 0;
|
|
1104
1506
|
continue;
|
|
1105
1507
|
}
|
|
1106
1508
|
|
|
@@ -1128,16 +1530,35 @@ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, h
|
|
|
1128
1530
|
TOKEN_WHITESPACE
|
|
1129
1531
|
)) {
|
|
1130
1532
|
hb_array_append(children, parser_parse_text_content(parser, errors));
|
|
1533
|
+
parser->consecutive_error_count = 0;
|
|
1534
|
+
continue;
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
parser->consecutive_error_count++;
|
|
1538
|
+
|
|
1539
|
+
if (parser->consecutive_error_count >= MAX_CONSECUTIVE_ERRORS) {
|
|
1540
|
+
parser->in_recovery_mode = true;
|
|
1541
|
+
parser_synchronize(parser, errors);
|
|
1542
|
+
parser->consecutive_error_count = 0;
|
|
1131
1543
|
continue;
|
|
1132
1544
|
}
|
|
1133
1545
|
|
|
1134
1546
|
parser_append_unexpected_error(
|
|
1135
1547
|
parser,
|
|
1548
|
+
errors,
|
|
1136
1549
|
"Unexpected token",
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1550
|
+
TOKEN_ERB_START,
|
|
1551
|
+
TOKEN_HTML_DOCTYPE,
|
|
1552
|
+
TOKEN_HTML_COMMENT_START,
|
|
1553
|
+
TOKEN_IDENTIFIER,
|
|
1554
|
+
TOKEN_WHITESPACE,
|
|
1555
|
+
TOKEN_NBSP,
|
|
1556
|
+
TOKEN_AT,
|
|
1557
|
+
TOKEN_BACKSLASH,
|
|
1558
|
+
TOKEN_NEWLINE
|
|
1140
1559
|
);
|
|
1560
|
+
|
|
1561
|
+
parser_synchronize(parser, errors);
|
|
1141
1562
|
}
|
|
1142
1563
|
}
|
|
1143
1564
|
|
|
@@ -1151,11 +1572,11 @@ static size_t find_matching_close_tag(hb_array_T* nodes, size_t start_idx, hb_st
|
|
|
1151
1572
|
if (node->type == AST_HTML_OPEN_TAG_NODE) {
|
|
1152
1573
|
AST_HTML_OPEN_TAG_NODE_T* open = (AST_HTML_OPEN_TAG_NODE_T*) node;
|
|
1153
1574
|
|
|
1154
|
-
if (hb_string_equals_case_insensitive(
|
|
1575
|
+
if (hb_string_equals_case_insensitive(open->tag_name->value, tag_name)) { depth++; }
|
|
1155
1576
|
} else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
|
|
1156
1577
|
AST_HTML_CLOSE_TAG_NODE_T* close = (AST_HTML_CLOSE_TAG_NODE_T*) node;
|
|
1157
1578
|
|
|
1158
|
-
if (hb_string_equals_case_insensitive(
|
|
1579
|
+
if (hb_string_equals_case_insensitive(close->tag_name->value, tag_name)) {
|
|
1159
1580
|
if (depth == 0) { return i; }
|
|
1160
1581
|
depth--;
|
|
1161
1582
|
}
|
|
@@ -1165,10 +1586,44 @@ static size_t find_matching_close_tag(hb_array_T* nodes, size_t start_idx, hb_st
|
|
|
1165
1586
|
return (size_t) -1;
|
|
1166
1587
|
}
|
|
1167
1588
|
|
|
1168
|
-
static
|
|
1589
|
+
static size_t find_implicit_close_index(hb_array_T* nodes, size_t start_idx, hb_string_T tag_name) {
|
|
1590
|
+
if (!has_optional_end_tag(tag_name)) { return (size_t) -1; }
|
|
1591
|
+
|
|
1592
|
+
for (size_t i = start_idx + 1; i < hb_array_size(nodes); i++) {
|
|
1593
|
+
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
|
|
1594
|
+
if (node == NULL) { continue; }
|
|
1595
|
+
|
|
1596
|
+
if (node->type == AST_HTML_OPEN_TAG_NODE) {
|
|
1597
|
+
AST_HTML_OPEN_TAG_NODE_T* open = (AST_HTML_OPEN_TAG_NODE_T*) node;
|
|
1598
|
+
hb_string_T next_tag_name = open->tag_name->value;
|
|
1599
|
+
|
|
1600
|
+
if (should_implicitly_close(tag_name, next_tag_name)) { return i; }
|
|
1601
|
+
} else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
|
|
1602
|
+
AST_HTML_CLOSE_TAG_NODE_T* close = (AST_HTML_CLOSE_TAG_NODE_T*) node;
|
|
1603
|
+
hb_string_T close_tag_name = close->tag_name->value;
|
|
1604
|
+
|
|
1605
|
+
if (parent_closes_element(tag_name, close_tag_name)) { return i; }
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
|
|
1609
|
+
return hb_array_size(nodes);
|
|
1610
|
+
}
|
|
1169
1611
|
|
|
1170
|
-
static hb_array_T* parser_build_elements_from_tags(
|
|
1171
|
-
hb_array_T*
|
|
1612
|
+
static hb_array_T* parser_build_elements_from_tags(
|
|
1613
|
+
hb_array_T* nodes,
|
|
1614
|
+
hb_array_T* errors,
|
|
1615
|
+
const parser_options_T* options,
|
|
1616
|
+
hb_allocator_T* allocator
|
|
1617
|
+
);
|
|
1618
|
+
|
|
1619
|
+
static hb_array_T* parser_build_elements_from_tags(
|
|
1620
|
+
hb_array_T* nodes,
|
|
1621
|
+
hb_array_T* errors,
|
|
1622
|
+
const parser_options_T* options,
|
|
1623
|
+
hb_allocator_T* allocator
|
|
1624
|
+
) {
|
|
1625
|
+
bool strict = options ? options->strict : false;
|
|
1626
|
+
hb_array_T* result = hb_array_init(hb_array_size(nodes), allocator);
|
|
1172
1627
|
|
|
1173
1628
|
for (size_t index = 0; index < hb_array_size(nodes); index++) {
|
|
1174
1629
|
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, index);
|
|
@@ -1176,45 +1631,105 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
|
|
|
1176
1631
|
|
|
1177
1632
|
if (node->type == AST_HTML_OPEN_TAG_NODE) {
|
|
1178
1633
|
AST_HTML_OPEN_TAG_NODE_T* open_tag = (AST_HTML_OPEN_TAG_NODE_T*) node;
|
|
1179
|
-
hb_string_T tag_name =
|
|
1634
|
+
hb_string_T tag_name = open_tag->tag_name->value;
|
|
1180
1635
|
|
|
1181
1636
|
size_t close_index = find_matching_close_tag(nodes, index, tag_name);
|
|
1182
1637
|
|
|
1183
1638
|
if (close_index == (size_t) -1) {
|
|
1184
|
-
|
|
1185
|
-
|
|
1639
|
+
size_t implicit_close_index = find_implicit_close_index(nodes, index, tag_name);
|
|
1640
|
+
|
|
1641
|
+
if (implicit_close_index != (size_t) -1 && implicit_close_index > index + 1) {
|
|
1642
|
+
hb_array_T* body = hb_array_init(implicit_close_index - index - 1, allocator);
|
|
1643
|
+
|
|
1644
|
+
for (size_t j = index + 1; j < implicit_close_index; j++) {
|
|
1645
|
+
hb_array_append(body, hb_array_get(nodes, j));
|
|
1646
|
+
}
|
|
1647
|
+
|
|
1648
|
+
hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, options, allocator);
|
|
1649
|
+
hb_array_free(&body);
|
|
1650
|
+
|
|
1651
|
+
position_T end_position = open_tag->base.location.end;
|
|
1652
|
+
|
|
1653
|
+
if (hb_array_size(processed_body) > 0) {
|
|
1654
|
+
AST_NODE_T* last_body_node = (AST_NODE_T*) hb_array_get(processed_body, hb_array_size(processed_body) - 1);
|
|
1655
|
+
if (last_body_node != NULL) { end_position = last_body_node->location.end; }
|
|
1656
|
+
}
|
|
1657
|
+
|
|
1658
|
+
hb_array_T* element_errors = hb_array_init(8, allocator);
|
|
1659
|
+
|
|
1660
|
+
if (strict) {
|
|
1661
|
+
append_omitted_closing_tag_error(
|
|
1662
|
+
open_tag->tag_name,
|
|
1663
|
+
end_position,
|
|
1664
|
+
open_tag->base.location.start,
|
|
1665
|
+
open_tag->base.location.end,
|
|
1666
|
+
allocator,
|
|
1667
|
+
element_errors
|
|
1668
|
+
);
|
|
1669
|
+
}
|
|
1670
|
+
|
|
1671
|
+
AST_HTML_OMITTED_CLOSE_TAG_NODE_T* omitted_close_tag = ast_html_omitted_close_tag_node_init(
|
|
1672
|
+
open_tag->tag_name,
|
|
1673
|
+
end_position,
|
|
1674
|
+
end_position,
|
|
1675
|
+
hb_array_init(8, allocator),
|
|
1676
|
+
allocator
|
|
1677
|
+
);
|
|
1678
|
+
|
|
1679
|
+
AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init(
|
|
1680
|
+
(AST_NODE_T*) open_tag,
|
|
1186
1681
|
open_tag->tag_name,
|
|
1682
|
+
processed_body,
|
|
1683
|
+
(AST_NODE_T*) omitted_close_tag,
|
|
1684
|
+
false,
|
|
1685
|
+
ELEMENT_SOURCE_HTML,
|
|
1187
1686
|
open_tag->base.location.start,
|
|
1188
|
-
|
|
1189
|
-
|
|
1687
|
+
end_position,
|
|
1688
|
+
element_errors,
|
|
1689
|
+
allocator
|
|
1190
1690
|
);
|
|
1191
|
-
}
|
|
1192
1691
|
|
|
1193
|
-
|
|
1692
|
+
hb_array_append(result, element);
|
|
1693
|
+
|
|
1694
|
+
index = implicit_close_index - 1;
|
|
1695
|
+
} else {
|
|
1696
|
+
if (hb_array_size(open_tag->base.errors) == 0) {
|
|
1697
|
+
append_missing_closing_tag_error(
|
|
1698
|
+
open_tag->tag_name,
|
|
1699
|
+
open_tag->base.location.start,
|
|
1700
|
+
open_tag->base.location.end,
|
|
1701
|
+
allocator,
|
|
1702
|
+
open_tag->base.errors
|
|
1703
|
+
);
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
hb_array_append(result, node);
|
|
1707
|
+
}
|
|
1194
1708
|
} else {
|
|
1195
1709
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) hb_array_get(nodes, close_index);
|
|
1196
1710
|
|
|
1197
|
-
hb_array_T* body = hb_array_init(close_index - index - 1);
|
|
1711
|
+
hb_array_T* body = hb_array_init(close_index - index - 1, allocator);
|
|
1198
1712
|
|
|
1199
1713
|
for (size_t j = index + 1; j < close_index; j++) {
|
|
1200
1714
|
hb_array_append(body, hb_array_get(nodes, j));
|
|
1201
1715
|
}
|
|
1202
1716
|
|
|
1203
|
-
hb_array_T* processed_body = parser_build_elements_from_tags(body, errors);
|
|
1717
|
+
hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, options, allocator);
|
|
1204
1718
|
hb_array_free(&body);
|
|
1205
1719
|
|
|
1206
|
-
hb_array_T* element_errors = hb_array_init(8);
|
|
1720
|
+
hb_array_T* element_errors = hb_array_init(8, allocator);
|
|
1207
1721
|
|
|
1208
1722
|
AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init(
|
|
1209
|
-
open_tag,
|
|
1723
|
+
(AST_NODE_T*) open_tag,
|
|
1210
1724
|
open_tag->tag_name,
|
|
1211
1725
|
processed_body,
|
|
1212
|
-
close_tag,
|
|
1726
|
+
(AST_NODE_T*) close_tag,
|
|
1213
1727
|
false,
|
|
1214
1728
|
ELEMENT_SOURCE_HTML,
|
|
1215
1729
|
open_tag->base.location.start,
|
|
1216
1730
|
close_tag->base.location.end,
|
|
1217
|
-
element_errors
|
|
1731
|
+
element_errors,
|
|
1732
|
+
allocator
|
|
1218
1733
|
);
|
|
1219
1734
|
|
|
1220
1735
|
hb_array_append(result, element);
|
|
@@ -1224,12 +1739,13 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
|
|
|
1224
1739
|
} else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
|
|
1225
1740
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) node;
|
|
1226
1741
|
|
|
1227
|
-
if (!is_void_element(
|
|
1742
|
+
if (!is_void_element(close_tag->tag_name->value)) {
|
|
1228
1743
|
if (hb_array_size(close_tag->base.errors) == 0) {
|
|
1229
1744
|
append_missing_opening_tag_error(
|
|
1230
1745
|
close_tag->tag_name,
|
|
1231
1746
|
close_tag->base.location.start,
|
|
1232
1747
|
close_tag->base.location.end,
|
|
1748
|
+
allocator,
|
|
1233
1749
|
close_tag->base.errors
|
|
1234
1750
|
);
|
|
1235
1751
|
}
|
|
@@ -1245,17 +1761,18 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
|
|
|
1245
1761
|
}
|
|
1246
1762
|
|
|
1247
1763
|
static AST_DOCUMENT_NODE_T* parser_parse_document(parser_T* parser) {
|
|
1248
|
-
hb_array_T* children = hb_array_init(8);
|
|
1249
|
-
hb_array_T* errors = hb_array_init(8);
|
|
1764
|
+
hb_array_T* children = hb_array_init(8, parser->allocator);
|
|
1765
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1250
1766
|
position_T start = parser->current_token->location.start;
|
|
1251
1767
|
|
|
1252
1768
|
parser_parse_in_data_state(parser, children, errors);
|
|
1253
1769
|
|
|
1254
1770
|
token_T* eof = parser_consume_expected(parser, TOKEN_EOF, errors);
|
|
1255
1771
|
|
|
1256
|
-
AST_DOCUMENT_NODE_T* document_node =
|
|
1772
|
+
AST_DOCUMENT_NODE_T* document_node =
|
|
1773
|
+
ast_document_node_init(children, NULL, HERB_PRISM_NODE_EMPTY, start, eof->location.end, errors, parser->allocator);
|
|
1257
1774
|
|
|
1258
|
-
token_free(eof);
|
|
1775
|
+
token_free(eof, parser->allocator);
|
|
1259
1776
|
|
|
1260
1777
|
return document_node;
|
|
1261
1778
|
}
|
|
@@ -1266,17 +1783,18 @@ AST_DOCUMENT_NODE_T* herb_parser_parse(parser_T* parser) {
|
|
|
1266
1783
|
|
|
1267
1784
|
static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, hb_array_T* children) {
|
|
1268
1785
|
if (parser->options.track_whitespace) {
|
|
1269
|
-
hb_array_T* errors = hb_array_init(8);
|
|
1786
|
+
hb_array_T* errors = hb_array_init(8, parser->allocator);
|
|
1270
1787
|
AST_WHITESPACE_NODE_T* whitespace_node = ast_whitespace_node_init(
|
|
1271
1788
|
whitespace_token,
|
|
1272
1789
|
whitespace_token->location.start,
|
|
1273
1790
|
whitespace_token->location.end,
|
|
1274
|
-
errors
|
|
1791
|
+
errors,
|
|
1792
|
+
parser->allocator
|
|
1275
1793
|
);
|
|
1276
1794
|
hb_array_append(children, whitespace_node);
|
|
1277
1795
|
}
|
|
1278
1796
|
|
|
1279
|
-
token_free(whitespace_token);
|
|
1797
|
+
token_free(whitespace_token, parser->allocator);
|
|
1280
1798
|
}
|
|
1281
1799
|
|
|
1282
1800
|
static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
|
|
@@ -1286,7 +1804,7 @@ static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
|
|
|
1286
1804
|
if (parser->options.track_whitespace && children != NULL) {
|
|
1287
1805
|
parser_handle_whitespace(parser, whitespace, children);
|
|
1288
1806
|
} else {
|
|
1289
|
-
token_free(whitespace);
|
|
1807
|
+
token_free(whitespace, parser->allocator);
|
|
1290
1808
|
}
|
|
1291
1809
|
}
|
|
1292
1810
|
}
|
|
@@ -1294,14 +1812,27 @@ static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
|
|
|
1294
1812
|
void herb_parser_deinit(parser_T* parser) {
|
|
1295
1813
|
if (parser == NULL) { return; }
|
|
1296
1814
|
|
|
1297
|
-
if (parser->current_token != NULL) { token_free(parser->current_token); }
|
|
1298
|
-
|
|
1815
|
+
if (parser->current_token != NULL) { token_free(parser->current_token, parser->allocator); }
|
|
1816
|
+
|
|
1817
|
+
if (parser->open_tags_stack != NULL) {
|
|
1818
|
+
for (size_t i = 0; i < hb_array_size(parser->open_tags_stack); i++) {
|
|
1819
|
+
token_T* token = (token_T*) hb_array_get(parser->open_tags_stack, i);
|
|
1820
|
+
if (token != NULL) { token_free(token, parser->allocator); }
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1823
|
+
hb_array_free(&parser->open_tags_stack);
|
|
1824
|
+
}
|
|
1299
1825
|
}
|
|
1300
1826
|
|
|
1301
|
-
void match_tags_in_node_array(
|
|
1827
|
+
void match_tags_in_node_array(
|
|
1828
|
+
hb_array_T* nodes,
|
|
1829
|
+
hb_array_T* errors,
|
|
1830
|
+
const parser_options_T* options,
|
|
1831
|
+
hb_allocator_T* allocator
|
|
1832
|
+
) {
|
|
1302
1833
|
if (nodes == NULL || hb_array_size(nodes) == 0) { return; }
|
|
1303
1834
|
|
|
1304
|
-
hb_array_T* processed = parser_build_elements_from_tags(nodes, errors);
|
|
1835
|
+
hb_array_T* processed = parser_build_elements_from_tags(nodes, errors, options, allocator);
|
|
1305
1836
|
|
|
1306
1837
|
nodes->size = 0;
|
|
1307
1838
|
|
|
@@ -1311,16 +1842,22 @@ void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors) {
|
|
|
1311
1842
|
|
|
1312
1843
|
hb_array_free(&processed);
|
|
1313
1844
|
|
|
1845
|
+
match_tags_context_T context = { .errors = errors, .options = options, .allocator = allocator };
|
|
1846
|
+
|
|
1314
1847
|
for (size_t i = 0; i < hb_array_size(nodes); i++) {
|
|
1315
1848
|
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
|
|
1316
1849
|
if (node == NULL) { continue; }
|
|
1317
1850
|
|
|
1318
|
-
herb_visit_node(node, match_tags_visitor,
|
|
1851
|
+
herb_visit_node(node, match_tags_visitor, &context);
|
|
1319
1852
|
}
|
|
1320
1853
|
}
|
|
1321
1854
|
|
|
1322
|
-
void herb_parser_match_html_tags_post_analyze(
|
|
1855
|
+
void herb_parser_match_html_tags_post_analyze(
|
|
1856
|
+
AST_DOCUMENT_NODE_T* document,
|
|
1857
|
+
const parser_options_T* options,
|
|
1858
|
+
hb_allocator_T* allocator
|
|
1859
|
+
) {
|
|
1323
1860
|
if (document == NULL) { return; }
|
|
1324
1861
|
|
|
1325
|
-
match_tags_in_node_array(document->children, document->base.errors);
|
|
1862
|
+
match_tags_in_node_array(document->children, document->base.errors, options, allocator);
|
|
1326
1863
|
}
|