herb 0.7.4-aarch64-linux-gnu → 0.8.0-aarch64-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +8 -5
- data/config.yml +40 -20
- data/ext/herb/error_helpers.c +57 -3
- data/ext/herb/error_helpers.h +1 -1
- data/ext/herb/extconf.rb +1 -0
- data/ext/herb/extension.c +10 -24
- data/ext/herb/extension_helpers.c +12 -18
- data/ext/herb/extension_helpers.h +4 -4
- data/ext/herb/nodes.c +72 -37
- data/herb.gemspec +0 -2
- data/lib/herb/3.0/herb.so +0 -0
- data/lib/herb/3.1/herb.so +0 -0
- data/lib/herb/3.2/herb.so +0 -0
- data/lib/herb/3.3/herb.so +0 -0
- data/lib/herb/3.4/herb.so +0 -0
- data/lib/herb/ast/helpers.rb +11 -0
- data/lib/herb/ast/node.rb +15 -6
- data/lib/herb/ast/nodes.rb +609 -392
- data/lib/herb/cli.rb +31 -0
- data/lib/herb/colors.rb +82 -0
- data/lib/herb/engine/compiler.rb +140 -14
- data/lib/herb/engine/debug_visitor.rb +1 -5
- data/lib/herb/engine/parser_error_overlay.rb +1 -1
- data/lib/herb/engine.rb +18 -20
- data/lib/herb/errors.rb +166 -56
- data/lib/herb/location.rb +2 -2
- data/lib/herb/project.rb +86 -21
- data/lib/herb/token.rb +14 -2
- data/lib/herb/version.rb +1 -1
- data/lib/herb.rb +1 -0
- data/sig/herb/ast/helpers.rbs +3 -0
- data/sig/herb/ast/node.rbs +12 -5
- data/sig/herb/ast/nodes.rbs +124 -62
- data/sig/herb/colors.rbs +35 -0
- data/sig/herb/engine/compiler.rbs +23 -1
- data/sig/herb/errors.rbs +74 -20
- data/sig/herb/token.rbs +8 -0
- data/sig/herb_c_extension.rbs +1 -1
- data/sig/serialized_ast_errors.rbs +8 -0
- data/src/analyze.c +461 -249
- data/src/analyze_helpers.c +5 -0
- data/src/analyze_missing_end.c +147 -0
- data/src/analyze_transform.c +196 -0
- data/src/analyzed_ruby.c +23 -2
- data/src/ast_node.c +14 -17
- data/src/ast_nodes.c +179 -181
- data/src/ast_pretty_print.c +232 -232
- data/src/element_source.c +7 -6
- data/src/errors.c +272 -152
- data/src/extract.c +92 -34
- data/src/herb.c +37 -49
- data/src/html_util.c +34 -96
- data/src/include/analyze.h +10 -2
- data/src/include/analyze_helpers.h +3 -0
- data/src/include/analyzed_ruby.h +4 -2
- data/src/include/ast_node.h +4 -4
- data/src/include/ast_nodes.h +68 -67
- data/src/include/ast_pretty_print.h +2 -2
- data/src/include/element_source.h +3 -1
- data/src/include/errors.h +42 -26
- data/src/include/extract.h +4 -4
- data/src/include/herb.h +6 -7
- data/src/include/html_util.h +4 -5
- data/src/include/lexer.h +1 -3
- data/src/include/lexer_peek_helpers.h +21 -19
- data/src/include/lexer_struct.h +12 -10
- data/src/include/location.h +10 -13
- data/src/include/macros.h +4 -0
- data/src/include/parser.h +12 -6
- data/src/include/parser_helpers.h +26 -16
- data/src/include/position.h +3 -14
- data/src/include/pretty_print.h +38 -28
- data/src/include/prism_helpers.h +1 -1
- data/src/include/range.h +4 -13
- data/src/include/token.h +5 -11
- data/src/include/token_struct.h +2 -2
- data/src/include/utf8.h +3 -2
- data/src/include/util/hb_arena.h +31 -0
- data/src/include/util/hb_arena_debug.h +8 -0
- data/src/include/util/hb_array.h +33 -0
- data/src/include/util/hb_buffer.h +34 -0
- data/src/include/util/hb_string.h +29 -0
- data/src/include/util/hb_system.h +9 -0
- data/src/include/util.h +3 -14
- data/src/include/version.h +1 -1
- data/src/include/visitor.h +1 -1
- data/src/io.c +7 -4
- data/src/lexer.c +62 -88
- data/src/lexer_peek_helpers.c +42 -38
- data/src/location.c +9 -37
- data/src/main.c +19 -23
- data/src/parser.c +373 -313
- data/src/parser_helpers.c +60 -54
- data/src/parser_match_tags.c +316 -0
- data/src/pretty_print.c +88 -117
- data/src/prism_helpers.c +7 -7
- data/src/range.c +2 -35
- data/src/token.c +36 -87
- data/src/utf8.c +4 -4
- data/src/util/hb_arena.c +179 -0
- data/src/util/hb_arena_debug.c +237 -0
- data/src/{array.c → util/hb_array.c} +26 -27
- data/src/util/hb_buffer.c +203 -0
- data/src/util/hb_string.c +85 -0
- data/src/util/hb_system.c +30 -0
- data/src/util.c +29 -99
- data/src/visitor.c +54 -54
- data/templates/ext/herb/error_helpers.c.erb +3 -3
- data/templates/ext/herb/error_helpers.h.erb +1 -1
- data/templates/ext/herb/nodes.c.erb +11 -6
- data/templates/java/error_helpers.c.erb +75 -0
- data/templates/java/error_helpers.h.erb +20 -0
- data/templates/java/nodes.c.erb +97 -0
- data/templates/java/nodes.h.erb +23 -0
- data/templates/java/org/herb/ast/Errors.java.erb +121 -0
- data/templates/java/org/herb/ast/NodeVisitor.java.erb +14 -0
- data/templates/java/org/herb/ast/Nodes.java.erb +220 -0
- data/templates/java/org/herb/ast/Visitor.java.erb +56 -0
- data/templates/javascript/packages/core/src/visitor.ts.erb +29 -1
- data/templates/javascript/packages/node/extension/error_helpers.cpp.erb +8 -8
- data/templates/javascript/packages/node/extension/error_helpers.h.erb +1 -1
- data/templates/javascript/packages/node/extension/nodes.cpp.erb +9 -9
- data/templates/javascript/packages/node/extension/nodes.h.erb +1 -1
- data/templates/lib/herb/ast/nodes.rb.erb +28 -16
- data/templates/lib/herb/errors.rb.erb +17 -12
- data/templates/rust/src/ast/nodes.rs.erb +220 -0
- data/templates/rust/src/errors.rs.erb +216 -0
- data/templates/rust/src/nodes.rs.erb +374 -0
- data/templates/src/analyze_missing_end.c.erb +36 -0
- data/templates/src/analyze_transform.c.erb +24 -0
- data/templates/src/ast_nodes.c.erb +14 -16
- data/templates/src/ast_pretty_print.c.erb +36 -36
- data/templates/src/errors.c.erb +36 -38
- data/templates/src/include/ast_nodes.h.erb +11 -10
- data/templates/src/include/ast_pretty_print.h.erb +2 -2
- data/templates/src/include/errors.h.erb +9 -9
- data/templates/src/parser_match_tags.c.erb +38 -0
- data/templates/src/visitor.c.erb +4 -4
- data/templates/template.rb +22 -3
- data/templates/wasm/error_helpers.cpp.erb +9 -9
- data/templates/wasm/error_helpers.h.erb +1 -1
- data/templates/wasm/nodes.cpp.erb +9 -9
- data/templates/wasm/nodes.h.erb +1 -1
- data/vendor/prism/Rakefile +4 -1
- data/vendor/prism/config.yml +2 -1
- data/vendor/prism/include/prism/ast.h +31 -1
- data/vendor/prism/include/prism/diagnostic.h +1 -0
- data/vendor/prism/include/prism/version.h +3 -3
- data/vendor/prism/src/diagnostic.c +3 -1
- data/vendor/prism/src/prism.c +130 -71
- data/vendor/prism/src/util/pm_string.c +6 -8
- data/vendor/prism/templates/include/prism/ast.h.erb +2 -0
- data/vendor/prism/templates/java/org/prism/Loader.java.erb +2 -2
- data/vendor/prism/templates/javascript/src/deserialize.js.erb +2 -2
- data/vendor/prism/templates/lib/prism/serialize.rb.erb +2 -2
- data/vendor/prism/templates/sig/prism.rbs.erb +4 -0
- data/vendor/prism/templates/src/diagnostic.c.erb +1 -0
- metadata +34 -21
- data/lib/herb/libherb/array.rb +0 -51
- data/lib/herb/libherb/ast_node.rb +0 -50
- data/lib/herb/libherb/buffer.rb +0 -56
- data/lib/herb/libherb/extract_result.rb +0 -20
- data/lib/herb/libherb/lex_result.rb +0 -32
- data/lib/herb/libherb/libherb.rb +0 -52
- data/lib/herb/libherb/parse_result.rb +0 -20
- data/lib/herb/libherb/token.rb +0 -46
- data/lib/herb/libherb.rb +0 -35
- data/src/buffer.c +0 -232
- data/src/include/array.h +0 -33
- data/src/include/buffer.h +0 -39
- data/src/include/json.h +0 -28
- data/src/include/memory.h +0 -12
- data/src/json.c +0 -205
- data/src/memory.c +0 -53
- data/src/position.c +0 -33
data/src/parser.c
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
#include "include/parser.h"
|
|
2
|
-
#include "include/array.h"
|
|
3
2
|
#include "include/ast_node.h"
|
|
4
3
|
#include "include/ast_nodes.h"
|
|
5
|
-
#include "include/buffer.h"
|
|
6
4
|
#include "include/errors.h"
|
|
7
5
|
#include "include/html_util.h"
|
|
8
6
|
#include "include/lexer.h"
|
|
@@ -11,65 +9,61 @@
|
|
|
11
9
|
#include "include/token.h"
|
|
12
10
|
#include "include/token_matchers.h"
|
|
13
11
|
#include "include/util.h"
|
|
12
|
+
#include "include/util/hb_array.h"
|
|
13
|
+
#include "include/util/hb_buffer.h"
|
|
14
|
+
#include "include/util/hb_string.h"
|
|
15
|
+
#include "include/visitor.h"
|
|
14
16
|
|
|
15
17
|
#include <stdio.h>
|
|
16
18
|
#include <stdlib.h>
|
|
17
19
|
#include <string.h>
|
|
18
20
|
#include <strings.h>
|
|
19
21
|
|
|
20
|
-
static void parser_parse_in_data_state(parser_T* parser,
|
|
21
|
-
static void parser_parse_foreign_content(parser_T* parser,
|
|
22
|
+
static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors);
|
|
23
|
+
static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors);
|
|
22
24
|
static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
|
|
23
|
-
static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token,
|
|
24
|
-
static void parser_consume_whitespace(parser_T* parser,
|
|
25
|
+
static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, hb_array_T* children);
|
|
26
|
+
static void parser_consume_whitespace(parser_T* parser, hb_array_T* children);
|
|
25
27
|
static void parser_skip_erb_content(lexer_T* lexer);
|
|
26
28
|
static bool parser_lookahead_erb_is_attribute(lexer_T* lexer);
|
|
27
|
-
static void parser_handle_erb_in_open_tag(parser_T* parser,
|
|
28
|
-
static void parser_handle_whitespace_in_open_tag(parser_T* parser,
|
|
29
|
+
static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children);
|
|
30
|
+
static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* children);
|
|
31
|
+
|
|
32
|
+
const parser_options_T HERB_DEFAULT_PARSER_OPTIONS = { .track_whitespace = false };
|
|
29
33
|
|
|
30
34
|
size_t parser_sizeof(void) {
|
|
31
35
|
return sizeof(struct PARSER_STRUCT);
|
|
32
36
|
}
|
|
33
37
|
|
|
34
|
-
parser_T*
|
|
35
|
-
parser_T* parser = calloc(1, parser_sizeof());
|
|
36
|
-
|
|
38
|
+
void herb_parser_init(parser_T* parser, lexer_T* lexer, parser_options_T options) {
|
|
37
39
|
parser->lexer = lexer;
|
|
38
40
|
parser->current_token = lexer_next_token(lexer);
|
|
39
|
-
parser->open_tags_stack =
|
|
41
|
+
parser->open_tags_stack = hb_array_init(16);
|
|
40
42
|
parser->state = PARSER_STATE_DATA;
|
|
41
43
|
parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
|
|
42
|
-
|
|
43
|
-
if (options) {
|
|
44
|
-
parser->options = calloc(1, sizeof(parser_options_T));
|
|
45
|
-
parser->options->track_whitespace = options->track_whitespace;
|
|
46
|
-
} else {
|
|
47
|
-
parser->options = NULL;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
return parser;
|
|
44
|
+
parser->options = options;
|
|
51
45
|
}
|
|
52
46
|
|
|
53
47
|
static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
48
|
+
hb_array_T* errors = hb_array_init(8);
|
|
49
|
+
hb_array_T* children = hb_array_init(8);
|
|
50
|
+
hb_buffer_T content;
|
|
51
|
+
hb_buffer_init(&content, 128);
|
|
57
52
|
|
|
58
53
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_CDATA_START, errors);
|
|
59
|
-
position_T
|
|
54
|
+
position_T start = parser->current_token->location.start;
|
|
60
55
|
|
|
61
56
|
while (token_is_none_of(parser, TOKEN_CDATA_END, TOKEN_EOF)) {
|
|
62
57
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
63
58
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
64
59
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
start = position_copy(parser->current_token->location->start);
|
|
60
|
+
hb_array_append(children, erb_node);
|
|
61
|
+
start = parser->current_token->location.start;
|
|
68
62
|
continue;
|
|
69
63
|
}
|
|
70
64
|
|
|
71
65
|
token_T* token = parser_advance(parser);
|
|
72
|
-
|
|
66
|
+
hb_buffer_append(&content, token->value);
|
|
73
67
|
token_free(token);
|
|
74
68
|
}
|
|
75
69
|
|
|
@@ -80,13 +74,12 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
|
80
74
|
tag_opening,
|
|
81
75
|
children,
|
|
82
76
|
tag_closing,
|
|
83
|
-
tag_opening->location
|
|
84
|
-
tag_closing->location
|
|
77
|
+
tag_opening->location.start,
|
|
78
|
+
tag_closing->location.end,
|
|
85
79
|
errors
|
|
86
80
|
);
|
|
87
81
|
|
|
88
|
-
|
|
89
|
-
buffer_free(&content);
|
|
82
|
+
free(content.value);
|
|
90
83
|
token_free(tag_opening);
|
|
91
84
|
token_free(tag_closing);
|
|
92
85
|
|
|
@@ -94,28 +87,28 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
|
|
94
87
|
}
|
|
95
88
|
|
|
96
89
|
static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
97
|
-
|
|
98
|
-
|
|
90
|
+
hb_array_T* errors = hb_array_init(8);
|
|
91
|
+
hb_array_T* children = hb_array_init(8);
|
|
99
92
|
token_T* comment_start = parser_consume_expected(parser, TOKEN_HTML_COMMENT_START, errors);
|
|
100
|
-
position_T
|
|
93
|
+
position_T start = parser->current_token->location.start;
|
|
101
94
|
|
|
102
|
-
|
|
95
|
+
hb_buffer_T comment;
|
|
96
|
+
hb_buffer_init(&comment, 512);
|
|
103
97
|
|
|
104
98
|
while (token_is_none_of(parser, TOKEN_HTML_COMMENT_END, TOKEN_EOF)) {
|
|
105
99
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
106
100
|
parser_append_literal_node_from_buffer(parser, &comment, children, start);
|
|
107
101
|
|
|
108
102
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
109
|
-
|
|
103
|
+
hb_array_append(children, erb_node);
|
|
110
104
|
|
|
111
|
-
|
|
112
|
-
start = position_copy(parser->current_token->location->start);
|
|
105
|
+
start = parser->current_token->location.start;
|
|
113
106
|
|
|
114
107
|
continue;
|
|
115
108
|
}
|
|
116
109
|
|
|
117
110
|
token_T* token = parser_advance(parser);
|
|
118
|
-
|
|
111
|
+
hb_buffer_append(&comment, token->value);
|
|
119
112
|
token_free(token);
|
|
120
113
|
}
|
|
121
114
|
|
|
@@ -127,13 +120,12 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
|
127
120
|
comment_start,
|
|
128
121
|
children,
|
|
129
122
|
comment_end,
|
|
130
|
-
comment_start->location
|
|
131
|
-
comment_end->location
|
|
123
|
+
comment_start->location.start,
|
|
124
|
+
comment_end->location.end,
|
|
132
125
|
errors
|
|
133
126
|
);
|
|
134
127
|
|
|
135
|
-
|
|
136
|
-
position_free(start);
|
|
128
|
+
free(comment.value);
|
|
137
129
|
token_free(comment_start);
|
|
138
130
|
token_free(comment_end);
|
|
139
131
|
|
|
@@ -141,26 +133,27 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
|
|
141
133
|
}
|
|
142
134
|
|
|
143
135
|
static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
136
|
+
hb_array_T* errors = hb_array_init(8);
|
|
137
|
+
hb_array_T* children = hb_array_init(8);
|
|
138
|
+
hb_buffer_T content;
|
|
139
|
+
hb_buffer_init(&content, 64);
|
|
147
140
|
|
|
148
141
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_DOCTYPE, errors);
|
|
149
142
|
|
|
150
|
-
position_T
|
|
143
|
+
position_T start = parser->current_token->location.start;
|
|
151
144
|
|
|
152
145
|
while (token_is_none_of(parser, TOKEN_HTML_TAG_END, TOKEN_EOF)) {
|
|
153
146
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
154
147
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
155
148
|
|
|
156
149
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
157
|
-
|
|
150
|
+
hb_array_append(children, erb_node);
|
|
158
151
|
|
|
159
152
|
continue;
|
|
160
153
|
}
|
|
161
154
|
|
|
162
155
|
token_T* token = parser_consume_expected(parser, parser->current_token->type, errors);
|
|
163
|
-
|
|
156
|
+
hb_buffer_append(&content, token->value);
|
|
164
157
|
token_free(token);
|
|
165
158
|
}
|
|
166
159
|
|
|
@@ -172,43 +165,42 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
|
172
165
|
tag_opening,
|
|
173
166
|
children,
|
|
174
167
|
tag_closing,
|
|
175
|
-
tag_opening->location
|
|
176
|
-
tag_closing->location
|
|
168
|
+
tag_opening->location.start,
|
|
169
|
+
tag_closing->location.end,
|
|
177
170
|
errors
|
|
178
171
|
);
|
|
179
172
|
|
|
180
|
-
position_free(start);
|
|
181
173
|
token_free(tag_opening);
|
|
182
174
|
token_free(tag_closing);
|
|
183
|
-
|
|
175
|
+
free(content.value);
|
|
184
176
|
|
|
185
177
|
return doctype;
|
|
186
178
|
}
|
|
187
179
|
|
|
188
180
|
static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser) {
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
181
|
+
hb_array_T* errors = hb_array_init(8);
|
|
182
|
+
hb_array_T* children = hb_array_init(8);
|
|
183
|
+
hb_buffer_T content;
|
|
184
|
+
hb_buffer_init(&content, 64);
|
|
192
185
|
|
|
193
186
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_XML_DECLARATION, errors);
|
|
194
187
|
|
|
195
|
-
position_T
|
|
188
|
+
position_T start = parser->current_token->location.start;
|
|
196
189
|
|
|
197
190
|
while (token_is_none_of(parser, TOKEN_XML_DECLARATION_END, TOKEN_EOF)) {
|
|
198
191
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
199
192
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
200
193
|
|
|
201
194
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
202
|
-
|
|
195
|
+
hb_array_append(children, erb_node);
|
|
203
196
|
|
|
204
|
-
|
|
205
|
-
start = position_copy(parser->current_token->location->start);
|
|
197
|
+
start = parser->current_token->location.start;
|
|
206
198
|
|
|
207
199
|
continue;
|
|
208
200
|
}
|
|
209
201
|
|
|
210
202
|
token_T* token = parser_advance(parser);
|
|
211
|
-
|
|
203
|
+
hb_buffer_append(&content, token->value);
|
|
212
204
|
token_free(token);
|
|
213
205
|
}
|
|
214
206
|
|
|
@@ -220,23 +212,23 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
|
|
|
220
212
|
tag_opening,
|
|
221
213
|
children,
|
|
222
214
|
tag_closing,
|
|
223
|
-
tag_opening->location
|
|
224
|
-
tag_closing->location
|
|
215
|
+
tag_opening->location.start,
|
|
216
|
+
tag_closing->location.end,
|
|
225
217
|
errors
|
|
226
218
|
);
|
|
227
219
|
|
|
228
|
-
position_free(start);
|
|
229
220
|
token_free(tag_opening);
|
|
230
221
|
token_free(tag_closing);
|
|
231
|
-
|
|
222
|
+
free(content.value);
|
|
232
223
|
|
|
233
224
|
return xml_declaration;
|
|
234
225
|
}
|
|
235
226
|
|
|
236
|
-
static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser,
|
|
237
|
-
position_T
|
|
227
|
+
static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_array_T* document_errors) {
|
|
228
|
+
position_T start = parser->current_token->location.start;
|
|
238
229
|
|
|
239
|
-
|
|
230
|
+
hb_buffer_T content;
|
|
231
|
+
hb_buffer_init(&content, 2048);
|
|
240
232
|
|
|
241
233
|
while (token_is_none_of(
|
|
242
234
|
parser,
|
|
@@ -248,54 +240,50 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T
|
|
|
248
240
|
TOKEN_EOF
|
|
249
241
|
)) {
|
|
250
242
|
if (token_is(parser, TOKEN_ERROR)) {
|
|
251
|
-
|
|
243
|
+
free(content.value);
|
|
252
244
|
|
|
253
245
|
token_T* token = parser_consume_expected(parser, TOKEN_ERROR, document_errors);
|
|
254
246
|
append_unexpected_error(
|
|
255
247
|
"Token Error",
|
|
256
248
|
"not TOKEN_ERROR",
|
|
257
249
|
token->value,
|
|
258
|
-
token->location
|
|
259
|
-
token->location
|
|
250
|
+
token->location.start,
|
|
251
|
+
token->location.end,
|
|
260
252
|
document_errors
|
|
261
253
|
);
|
|
262
254
|
|
|
263
255
|
token_free(token);
|
|
264
|
-
position_free(start);
|
|
265
256
|
|
|
266
257
|
return NULL;
|
|
267
258
|
}
|
|
268
259
|
|
|
269
260
|
token_T* token = parser_advance(parser);
|
|
270
|
-
|
|
261
|
+
hb_buffer_append(&content, token->value);
|
|
271
262
|
token_free(token);
|
|
272
263
|
}
|
|
273
264
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
if (buffer_length(&content) > 0) {
|
|
277
|
-
AST_HTML_TEXT_NODE_T* text_node =
|
|
278
|
-
ast_html_text_node_init(buffer_value(&content), start, parser->current_token->location->start, errors);
|
|
265
|
+
hb_array_T* errors = hb_array_init(8);
|
|
279
266
|
|
|
280
|
-
|
|
281
|
-
buffer_free(&content);
|
|
267
|
+
AST_HTML_TEXT_NODE_T* text_node = NULL;
|
|
282
268
|
|
|
283
|
-
|
|
269
|
+
if (hb_buffer_length(&content) > 0) {
|
|
270
|
+
text_node =
|
|
271
|
+
ast_html_text_node_init(hb_buffer_value(&content), start, parser->current_token->location.start, errors);
|
|
272
|
+
} else {
|
|
273
|
+
text_node = ast_html_text_node_init("", start, parser->current_token->location.start, errors);
|
|
284
274
|
}
|
|
285
275
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
position_free(start);
|
|
289
|
-
buffer_free(&content);
|
|
276
|
+
free(content.value);
|
|
290
277
|
|
|
291
278
|
return text_node;
|
|
292
279
|
}
|
|
293
280
|
|
|
294
281
|
static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
282
|
+
hb_array_T* errors = hb_array_init(8);
|
|
283
|
+
hb_array_T* children = hb_array_init(8);
|
|
284
|
+
hb_buffer_T buffer;
|
|
285
|
+
hb_buffer_init(&buffer, 128);
|
|
286
|
+
position_T start = parser->current_token->location.start;
|
|
299
287
|
|
|
300
288
|
while (token_is_none_of(
|
|
301
289
|
parser,
|
|
@@ -310,53 +298,50 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
|
|
|
310
298
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
311
299
|
|
|
312
300
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
313
|
-
|
|
301
|
+
hb_array_append(children, erb_node);
|
|
314
302
|
|
|
315
|
-
|
|
316
|
-
start = position_copy(parser->current_token->location->start);
|
|
303
|
+
start = parser->current_token->location.start;
|
|
317
304
|
continue;
|
|
318
305
|
}
|
|
319
306
|
|
|
320
307
|
token_T* token = parser_advance(parser);
|
|
321
|
-
|
|
308
|
+
hb_buffer_append(&buffer, token->value);
|
|
322
309
|
token_free(token);
|
|
323
310
|
}
|
|
324
311
|
|
|
325
312
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
326
313
|
|
|
327
|
-
position_T
|
|
328
|
-
position_T
|
|
314
|
+
position_T node_start = { 0 };
|
|
315
|
+
position_T node_end = { 0 };
|
|
329
316
|
|
|
330
317
|
if (children->size > 0) {
|
|
331
|
-
AST_NODE_T* first_child =
|
|
332
|
-
AST_NODE_T* last_child =
|
|
318
|
+
AST_NODE_T* first_child = hb_array_get(children, 0);
|
|
319
|
+
AST_NODE_T* last_child = hb_array_get(children, children->size - 1);
|
|
333
320
|
|
|
334
|
-
node_start =
|
|
335
|
-
node_end =
|
|
321
|
+
node_start = first_child->location.start;
|
|
322
|
+
node_end = last_child->location.end;
|
|
336
323
|
} else {
|
|
337
|
-
node_start =
|
|
338
|
-
node_end =
|
|
324
|
+
node_start = parser->current_token->location.start;
|
|
325
|
+
node_end = parser->current_token->location.start;
|
|
339
326
|
}
|
|
340
327
|
|
|
341
328
|
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
|
|
342
329
|
ast_html_attribute_name_node_init(children, node_start, node_end, errors);
|
|
343
330
|
|
|
344
|
-
|
|
345
|
-
position_free(node_start);
|
|
346
|
-
position_free(node_end);
|
|
347
|
-
buffer_free(&buffer);
|
|
331
|
+
free(buffer.value);
|
|
348
332
|
|
|
349
333
|
return attribute_name;
|
|
350
334
|
}
|
|
351
335
|
|
|
352
336
|
static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value(
|
|
353
337
|
parser_T* parser,
|
|
354
|
-
|
|
355
|
-
|
|
338
|
+
hb_array_T* children,
|
|
339
|
+
hb_array_T* errors
|
|
356
340
|
) {
|
|
357
|
-
|
|
341
|
+
hb_buffer_T buffer;
|
|
342
|
+
hb_buffer_init(&buffer, 512);
|
|
358
343
|
token_T* opening_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
|
|
359
|
-
position_T
|
|
344
|
+
position_T start = parser->current_token->location.start;
|
|
360
345
|
|
|
361
346
|
while (!token_is(parser, TOKEN_EOF)
|
|
362
347
|
&& !(
|
|
@@ -366,10 +351,9 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
366
351
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
367
352
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
368
353
|
|
|
369
|
-
|
|
354
|
+
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
370
355
|
|
|
371
|
-
|
|
372
|
-
start = position_copy(parser->current_token->location->start);
|
|
356
|
+
start = parser->current_token->location.start;
|
|
373
357
|
|
|
374
358
|
continue;
|
|
375
359
|
}
|
|
@@ -381,8 +365,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
381
365
|
|
|
382
366
|
if (next_token && next_token->type == TOKEN_QUOTE && opening_quote != NULL
|
|
383
367
|
&& strcmp(next_token->value, opening_quote->value) == 0) {
|
|
384
|
-
|
|
385
|
-
|
|
368
|
+
hb_buffer_append(&buffer, parser->current_token->value);
|
|
369
|
+
hb_buffer_append(&buffer, next_token->value);
|
|
386
370
|
|
|
387
371
|
token_free(parser->current_token);
|
|
388
372
|
token_free(next_token);
|
|
@@ -396,7 +380,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
396
380
|
}
|
|
397
381
|
}
|
|
398
382
|
|
|
399
|
-
|
|
383
|
+
hb_buffer_append(&buffer, parser->current_token->value);
|
|
400
384
|
token_free(parser->current_token);
|
|
401
385
|
|
|
402
386
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
@@ -414,8 +398,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
414
398
|
"Unescaped quote character in attribute value",
|
|
415
399
|
"escaped quote (\\') or different quote style (\")",
|
|
416
400
|
opening_quote->value,
|
|
417
|
-
potential_closing->location
|
|
418
|
-
potential_closing->location
|
|
401
|
+
potential_closing->location.start,
|
|
402
|
+
potential_closing->location.end,
|
|
419
403
|
errors
|
|
420
404
|
);
|
|
421
405
|
|
|
@@ -424,7 +408,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
424
408
|
token_free(parser->current_token);
|
|
425
409
|
parser->current_token = potential_closing;
|
|
426
410
|
|
|
427
|
-
|
|
411
|
+
hb_buffer_append(&buffer, parser->current_token->value);
|
|
428
412
|
token_free(parser->current_token);
|
|
429
413
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
430
414
|
|
|
@@ -436,15 +420,14 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
436
420
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
437
421
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
438
422
|
|
|
439
|
-
|
|
423
|
+
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
440
424
|
|
|
441
|
-
|
|
442
|
-
start = position_copy(parser->current_token->location->start);
|
|
425
|
+
start = parser->current_token->location.start;
|
|
443
426
|
|
|
444
427
|
continue;
|
|
445
428
|
}
|
|
446
429
|
|
|
447
|
-
|
|
430
|
+
hb_buffer_append(&buffer, parser->current_token->value);
|
|
448
431
|
token_free(parser->current_token);
|
|
449
432
|
|
|
450
433
|
parser->current_token = lexer_next_token(parser->lexer);
|
|
@@ -458,8 +441,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
458
441
|
}
|
|
459
442
|
|
|
460
443
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
|
461
|
-
|
|
462
|
-
buffer_free(&buffer);
|
|
444
|
+
free(buffer.value);
|
|
463
445
|
|
|
464
446
|
token_T* closing_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
|
|
465
447
|
|
|
@@ -467,8 +449,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
467
449
|
append_quotes_mismatch_error(
|
|
468
450
|
opening_quote,
|
|
469
451
|
closing_quote,
|
|
470
|
-
closing_quote->location
|
|
471
|
-
closing_quote->location
|
|
452
|
+
closing_quote->location.start,
|
|
453
|
+
closing_quote->location.end,
|
|
472
454
|
errors
|
|
473
455
|
);
|
|
474
456
|
}
|
|
@@ -478,8 +460,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
478
460
|
children,
|
|
479
461
|
closing_quote,
|
|
480
462
|
true,
|
|
481
|
-
opening_quote->location
|
|
482
|
-
closing_quote->location
|
|
463
|
+
opening_quote->location.start,
|
|
464
|
+
closing_quote->location.end,
|
|
483
465
|
errors
|
|
484
466
|
);
|
|
485
467
|
|
|
@@ -490,21 +472,21 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
|
490
472
|
}
|
|
491
473
|
|
|
492
474
|
static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser_T* parser) {
|
|
493
|
-
|
|
494
|
-
|
|
475
|
+
hb_array_T* children = hb_array_init(8);
|
|
476
|
+
hb_array_T* errors = hb_array_init(8);
|
|
495
477
|
|
|
496
478
|
// <div id=<%= "home" %>>
|
|
497
479
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
498
480
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
499
|
-
|
|
481
|
+
hb_array_append(children, erb_node);
|
|
500
482
|
|
|
501
483
|
return ast_html_attribute_value_node_init(
|
|
502
484
|
NULL,
|
|
503
485
|
children,
|
|
504
486
|
NULL,
|
|
505
487
|
false,
|
|
506
|
-
erb_node->base.location
|
|
507
|
-
erb_node->base.location
|
|
488
|
+
erb_node->base.location.start,
|
|
489
|
+
erb_node->base.location.end,
|
|
508
490
|
errors
|
|
509
491
|
);
|
|
510
492
|
}
|
|
@@ -515,15 +497,15 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
515
497
|
AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier);
|
|
516
498
|
token_free(identifier);
|
|
517
499
|
|
|
518
|
-
|
|
500
|
+
hb_array_append(children, literal);
|
|
519
501
|
|
|
520
502
|
return ast_html_attribute_value_node_init(
|
|
521
503
|
NULL,
|
|
522
504
|
children,
|
|
523
505
|
NULL,
|
|
524
506
|
false,
|
|
525
|
-
literal->base.location
|
|
526
|
-
literal->base.location
|
|
507
|
+
literal->base.location.start,
|
|
508
|
+
literal->base.location.end,
|
|
527
509
|
errors
|
|
528
510
|
);
|
|
529
511
|
}
|
|
@@ -533,8 +515,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
533
515
|
|
|
534
516
|
if (token_is(parser, TOKEN_BACKTICK)) {
|
|
535
517
|
token_T* token = parser_advance(parser);
|
|
536
|
-
position_T
|
|
537
|
-
position_T
|
|
518
|
+
position_T start = token->location.start;
|
|
519
|
+
position_T end = token->location.end;
|
|
538
520
|
|
|
539
521
|
append_unexpected_error(
|
|
540
522
|
"Invalid quote character for HTML attribute",
|
|
@@ -548,8 +530,6 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
548
530
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* value =
|
|
549
531
|
ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors);
|
|
550
532
|
|
|
551
|
-
position_free(start);
|
|
552
|
-
position_free(end);
|
|
553
533
|
token_free(token);
|
|
554
534
|
|
|
555
535
|
return value;
|
|
@@ -559,8 +539,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
559
539
|
"Unexpected Token",
|
|
560
540
|
"TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START",
|
|
561
541
|
token_type_to_string(parser->current_token->type),
|
|
562
|
-
parser->current_token->location
|
|
563
|
-
parser->current_token->location
|
|
542
|
+
parser->current_token->location.start,
|
|
543
|
+
parser->current_token->location.end,
|
|
564
544
|
errors
|
|
565
545
|
);
|
|
566
546
|
|
|
@@ -569,8 +549,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
569
549
|
children,
|
|
570
550
|
NULL,
|
|
571
551
|
false,
|
|
572
|
-
parser->current_token->location
|
|
573
|
-
parser->current_token->location
|
|
552
|
+
parser->current_token->location.start,
|
|
553
|
+
parser->current_token->location.end,
|
|
574
554
|
errors
|
|
575
555
|
);
|
|
576
556
|
|
|
@@ -580,56 +560,60 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
|
580
560
|
static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) {
|
|
581
561
|
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = parser_parse_html_attribute_name(parser);
|
|
582
562
|
|
|
583
|
-
if (parser->options
|
|
563
|
+
if (parser->options.track_whitespace) {
|
|
584
564
|
bool has_equals = (parser->current_token->type == TOKEN_EQUALS)
|
|
585
565
|
|| lexer_peek_for_token_type_after_whitespace(parser->lexer, TOKEN_EQUALS);
|
|
586
566
|
|
|
587
567
|
if (has_equals) {
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
position_T
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
568
|
+
hb_buffer_T equals_buffer;
|
|
569
|
+
hb_buffer_init(&equals_buffer, 256);
|
|
570
|
+
position_T equals_start = { 0 };
|
|
571
|
+
position_T equals_end = { 0 };
|
|
572
|
+
uint32_t range_start = 0;
|
|
573
|
+
uint32_t range_end = 0;
|
|
574
|
+
|
|
575
|
+
bool equals_start_present = false;
|
|
594
576
|
while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
|
595
577
|
token_T* whitespace = parser_advance(parser);
|
|
596
578
|
|
|
597
|
-
if (
|
|
598
|
-
|
|
599
|
-
|
|
579
|
+
if (equals_start_present == false) {
|
|
580
|
+
equals_start_present = true;
|
|
581
|
+
equals_start = whitespace->location.start;
|
|
582
|
+
range_start = whitespace->range.from;
|
|
600
583
|
}
|
|
601
584
|
|
|
602
|
-
|
|
585
|
+
hb_buffer_append(&equals_buffer, whitespace->value);
|
|
603
586
|
token_free(whitespace);
|
|
604
587
|
}
|
|
605
588
|
|
|
606
589
|
token_T* equals = parser_advance(parser);
|
|
607
590
|
|
|
608
|
-
if (
|
|
609
|
-
|
|
610
|
-
|
|
591
|
+
if (equals_start_present == false) {
|
|
592
|
+
equals_start_present = true;
|
|
593
|
+
equals_start = equals->location.start;
|
|
594
|
+
range_start = equals->range.from;
|
|
611
595
|
}
|
|
612
596
|
|
|
613
|
-
|
|
614
|
-
equals_end =
|
|
615
|
-
range_end = equals->range
|
|
597
|
+
hb_buffer_append(&equals_buffer, equals->value);
|
|
598
|
+
equals_end = equals->location.end;
|
|
599
|
+
range_end = equals->range.to;
|
|
616
600
|
token_free(equals);
|
|
617
601
|
|
|
618
602
|
while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
|
619
603
|
token_T* whitespace = parser_advance(parser);
|
|
620
|
-
|
|
621
|
-
equals_end =
|
|
622
|
-
range_end = whitespace->range
|
|
604
|
+
hb_buffer_append(&equals_buffer, whitespace->value);
|
|
605
|
+
equals_end = whitespace->location.end;
|
|
606
|
+
range_end = whitespace->range.to;
|
|
623
607
|
token_free(whitespace);
|
|
624
608
|
}
|
|
625
609
|
|
|
626
610
|
token_T* equals_with_whitespace = calloc(1, sizeof(token_T));
|
|
627
611
|
equals_with_whitespace->type = TOKEN_EQUALS;
|
|
628
612
|
equals_with_whitespace->value = herb_strdup(equals_buffer.value);
|
|
629
|
-
equals_with_whitespace->location =
|
|
630
|
-
equals_with_whitespace->range =
|
|
613
|
+
equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end };
|
|
614
|
+
equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end };
|
|
631
615
|
|
|
632
|
-
|
|
616
|
+
free(equals_buffer.value);
|
|
633
617
|
|
|
634
618
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
|
|
635
619
|
|
|
@@ -637,8 +621,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
637
621
|
attribute_name,
|
|
638
622
|
equals_with_whitespace,
|
|
639
623
|
attribute_value,
|
|
640
|
-
attribute_name->base.location
|
|
641
|
-
attribute_value->base.location
|
|
624
|
+
attribute_name->base.location.start,
|
|
625
|
+
attribute_value->base.location.end,
|
|
642
626
|
NULL
|
|
643
627
|
);
|
|
644
628
|
} else {
|
|
@@ -646,8 +630,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
646
630
|
attribute_name,
|
|
647
631
|
NULL,
|
|
648
632
|
NULL,
|
|
649
|
-
attribute_name->base.location
|
|
650
|
-
attribute_name->base.location
|
|
633
|
+
attribute_name->base.location.start,
|
|
634
|
+
attribute_name->base.location.end,
|
|
651
635
|
NULL
|
|
652
636
|
);
|
|
653
637
|
}
|
|
@@ -666,8 +650,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
666
650
|
attribute_name,
|
|
667
651
|
equals,
|
|
668
652
|
attribute_value,
|
|
669
|
-
attribute_name->base.location
|
|
670
|
-
attribute_value->base.location
|
|
653
|
+
attribute_name->base.location.start,
|
|
654
|
+
attribute_value->base.location.end,
|
|
671
655
|
NULL
|
|
672
656
|
);
|
|
673
657
|
|
|
@@ -680,8 +664,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
|
680
664
|
attribute_name,
|
|
681
665
|
NULL,
|
|
682
666
|
NULL,
|
|
683
|
-
attribute_name->base.location
|
|
684
|
-
attribute_name->base.location
|
|
667
|
+
attribute_name->base.location.start,
|
|
668
|
+
attribute_name->base.location.end,
|
|
685
669
|
NULL
|
|
686
670
|
);
|
|
687
671
|
}
|
|
@@ -735,12 +719,12 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
|
|
|
735
719
|
} while (true);
|
|
736
720
|
}
|
|
737
721
|
|
|
738
|
-
static void parser_handle_erb_in_open_tag(parser_T* parser,
|
|
722
|
+
static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children) {
|
|
739
723
|
bool is_output_tag = parser->current_token->value && strlen(parser->current_token->value) >= 3
|
|
740
724
|
&& strncmp(parser->current_token->value, "<%=", 3) == 0;
|
|
741
725
|
|
|
742
726
|
if (!is_output_tag) {
|
|
743
|
-
|
|
727
|
+
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
744
728
|
|
|
745
729
|
return;
|
|
746
730
|
}
|
|
@@ -754,13 +738,13 @@ static void parser_handle_erb_in_open_tag(parser_T* parser, array_T* children) {
|
|
|
754
738
|
bool looks_like_attribute = parser_lookahead_erb_is_attribute(&lexer_copy);
|
|
755
739
|
|
|
756
740
|
if (looks_like_attribute) {
|
|
757
|
-
|
|
741
|
+
hb_array_append(children, parser_parse_html_attribute(parser));
|
|
758
742
|
} else {
|
|
759
|
-
|
|
743
|
+
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
760
744
|
}
|
|
761
745
|
}
|
|
762
746
|
|
|
763
|
-
static void parser_handle_whitespace_in_open_tag(parser_T* parser,
|
|
747
|
+
static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* children) {
|
|
764
748
|
token_T* whitespace = parser_consume_if_present(parser, TOKEN_WHITESPACE);
|
|
765
749
|
|
|
766
750
|
if (whitespace != NULL) {
|
|
@@ -774,8 +758,8 @@ static void parser_handle_whitespace_in_open_tag(parser_T* parser, array_T* chil
|
|
|
774
758
|
}
|
|
775
759
|
|
|
776
760
|
static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
777
|
-
|
|
778
|
-
|
|
761
|
+
hb_array_T* errors = hb_array_init(8);
|
|
762
|
+
hb_array_T* children = hb_array_init(8);
|
|
779
763
|
|
|
780
764
|
token_T* tag_start = parser_consume_expected(parser, TOKEN_HTML_TAG_START, errors);
|
|
781
765
|
token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
|
|
@@ -787,7 +771,7 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
787
771
|
}
|
|
788
772
|
|
|
789
773
|
if (parser->current_token->type == TOKEN_IDENTIFIER) {
|
|
790
|
-
|
|
774
|
+
hb_array_append(children, parser_parse_html_attribute(parser));
|
|
791
775
|
continue;
|
|
792
776
|
}
|
|
793
777
|
|
|
@@ -797,7 +781,7 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
797
781
|
}
|
|
798
782
|
|
|
799
783
|
if (parser->current_token->type == TOKEN_AT) {
|
|
800
|
-
|
|
784
|
+
hb_array_append(children, parser_parse_html_attribute(parser));
|
|
801
785
|
continue;
|
|
802
786
|
}
|
|
803
787
|
|
|
@@ -807,7 +791,7 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
807
791
|
|
|
808
792
|
if (next_token && next_token->type == TOKEN_IDENTIFIER) {
|
|
809
793
|
token_free(next_token);
|
|
810
|
-
|
|
794
|
+
hb_array_append(children, parser_parse_html_attribute(parser));
|
|
811
795
|
|
|
812
796
|
continue;
|
|
813
797
|
}
|
|
@@ -834,8 +818,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
834
818
|
token_free(tag_start);
|
|
835
819
|
token_free(tag_name);
|
|
836
820
|
|
|
837
|
-
|
|
838
|
-
|
|
821
|
+
hb_array_free(&children);
|
|
822
|
+
hb_array_free(&errors);
|
|
839
823
|
|
|
840
824
|
return NULL;
|
|
841
825
|
}
|
|
@@ -849,8 +833,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
849
833
|
tag_end,
|
|
850
834
|
children,
|
|
851
835
|
is_self_closing,
|
|
852
|
-
tag_start->location
|
|
853
|
-
tag_end->location
|
|
836
|
+
tag_start->location.start,
|
|
837
|
+
tag_end->location.end,
|
|
854
838
|
errors
|
|
855
839
|
);
|
|
856
840
|
|
|
@@ -862,8 +846,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
|
862
846
|
}
|
|
863
847
|
|
|
864
848
|
static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) {
|
|
865
|
-
|
|
866
|
-
|
|
849
|
+
hb_array_T* errors = hb_array_init(8);
|
|
850
|
+
hb_array_T* children = hb_array_init(8);
|
|
867
851
|
|
|
868
852
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
|
|
869
853
|
|
|
@@ -875,21 +859,21 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
|
|
|
875
859
|
|
|
876
860
|
token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
|
|
877
861
|
|
|
878
|
-
if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) {
|
|
879
|
-
|
|
880
|
-
|
|
862
|
+
if (tag_name != NULL && is_void_element(hb_string(tag_name->value)) && parser_in_svg_context(parser) == false) {
|
|
863
|
+
hb_string_T expected = html_self_closing_tag_string(hb_string(tag_name->value));
|
|
864
|
+
hb_string_T got = html_closing_tag_string(hb_string(tag_name->value));
|
|
881
865
|
|
|
882
866
|
append_void_element_closing_tag_error(
|
|
883
867
|
tag_name,
|
|
884
|
-
expected,
|
|
885
|
-
got,
|
|
886
|
-
tag_opening->location
|
|
887
|
-
tag_closing->location
|
|
868
|
+
expected.data,
|
|
869
|
+
got.data,
|
|
870
|
+
tag_opening->location.start,
|
|
871
|
+
tag_closing->location.end,
|
|
888
872
|
errors
|
|
889
873
|
);
|
|
890
874
|
|
|
891
|
-
free(expected);
|
|
892
|
-
free(got);
|
|
875
|
+
free(expected.data);
|
|
876
|
+
free(got.data);
|
|
893
877
|
}
|
|
894
878
|
|
|
895
879
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = ast_html_close_tag_node_init(
|
|
@@ -897,8 +881,8 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
|
|
|
897
881
|
tag_name,
|
|
898
882
|
children,
|
|
899
883
|
tag_closing,
|
|
900
|
-
tag_opening->location
|
|
901
|
-
tag_closing->location
|
|
884
|
+
tag_opening->location.start,
|
|
885
|
+
tag_closing->location.end,
|
|
902
886
|
errors
|
|
903
887
|
);
|
|
904
888
|
|
|
@@ -921,8 +905,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element(
|
|
|
921
905
|
NULL,
|
|
922
906
|
true,
|
|
923
907
|
ELEMENT_SOURCE_HTML,
|
|
924
|
-
open_tag->base.location
|
|
925
|
-
open_tag->base.location
|
|
908
|
+
open_tag->base.location.start,
|
|
909
|
+
open_tag->base.location.end,
|
|
926
910
|
NULL
|
|
927
911
|
);
|
|
928
912
|
}
|
|
@@ -931,13 +915,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
|
|
|
931
915
|
parser_T* parser,
|
|
932
916
|
AST_HTML_OPEN_TAG_NODE_T* open_tag
|
|
933
917
|
) {
|
|
934
|
-
|
|
935
|
-
|
|
918
|
+
hb_array_T* errors = hb_array_init(8);
|
|
919
|
+
hb_array_T* body = hb_array_init(8);
|
|
936
920
|
|
|
937
921
|
parser_push_open_tag(parser, open_tag->tag_name);
|
|
938
922
|
|
|
939
|
-
if (open_tag->tag_name->value && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
|
|
940
|
-
foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value);
|
|
923
|
+
if (open_tag->tag_name->value && parser_is_foreign_content_tag(hb_string(open_tag->tag_name->value))) {
|
|
924
|
+
foreign_content_type_T content_type = parser_get_foreign_content_type(hb_string(open_tag->tag_name->value));
|
|
941
925
|
parser_enter_foreign_content(parser, content_type);
|
|
942
926
|
parser_parse_foreign_content(parser, body, errors);
|
|
943
927
|
} else {
|
|
@@ -948,13 +932,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
|
|
|
948
932
|
|
|
949
933
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser);
|
|
950
934
|
|
|
951
|
-
if (parser_in_svg_context(parser) == false && is_void_element(close_tag->tag_name->value)) {
|
|
952
|
-
|
|
935
|
+
if (parser_in_svg_context(parser) == false && is_void_element(hb_string(close_tag->tag_name->value))) {
|
|
936
|
+
hb_array_push(body, close_tag);
|
|
953
937
|
parser_parse_in_data_state(parser, body, errors);
|
|
954
938
|
close_tag = parser_parse_html_close_tag(parser);
|
|
955
939
|
}
|
|
956
940
|
|
|
957
|
-
bool matches_stack = parser_check_matching_tag(parser, close_tag->tag_name->value);
|
|
941
|
+
bool matches_stack = parser_check_matching_tag(parser, hb_string(close_tag->tag_name->value));
|
|
958
942
|
|
|
959
943
|
if (matches_stack) {
|
|
960
944
|
token_T* popped_token = parser_pop_open_tag(parser);
|
|
@@ -970,45 +954,34 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
|
|
|
970
954
|
close_tag,
|
|
971
955
|
false,
|
|
972
956
|
ELEMENT_SOURCE_HTML,
|
|
973
|
-
open_tag->base.location
|
|
974
|
-
close_tag->base.location
|
|
957
|
+
open_tag->base.location.start,
|
|
958
|
+
close_tag->base.location.end,
|
|
975
959
|
errors
|
|
976
960
|
);
|
|
977
961
|
}
|
|
978
962
|
|
|
979
|
-
static
|
|
963
|
+
static AST_NODE_T* parser_parse_html_element(parser_T* parser) {
|
|
980
964
|
AST_HTML_OPEN_TAG_NODE_T* open_tag = parser_parse_html_open_tag(parser);
|
|
981
965
|
|
|
982
966
|
// <tag />
|
|
983
|
-
if (open_tag->is_void) { return parser_parse_html_self_closing_element(parser, open_tag); }
|
|
967
|
+
if (open_tag->is_void) { return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag); }
|
|
984
968
|
|
|
985
969
|
// <tag>, in void element list, and not in inside an <svg> element
|
|
986
|
-
if (!open_tag->is_void && is_void_element(open_tag->tag_name->value) && !parser_in_svg_context(parser)) {
|
|
987
|
-
return parser_parse_html_self_closing_element(parser, open_tag);
|
|
970
|
+
if (!open_tag->is_void && is_void_element(hb_string(open_tag->tag_name->value)) && !parser_in_svg_context(parser)) {
|
|
971
|
+
return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag);
|
|
988
972
|
}
|
|
989
973
|
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
array_T* errors = array_init(8);
|
|
974
|
+
if (open_tag->tag_name->value && parser_is_foreign_content_tag(hb_string(open_tag->tag_name->value))) {
|
|
975
|
+
AST_HTML_ELEMENT_NODE_T* regular_element = parser_parse_html_regular_element(parser, open_tag);
|
|
994
976
|
|
|
995
|
-
|
|
977
|
+
if (regular_element != NULL) { return (AST_NODE_T*) regular_element; }
|
|
978
|
+
}
|
|
996
979
|
|
|
997
|
-
return
|
|
998
|
-
open_tag,
|
|
999
|
-
open_tag->tag_name,
|
|
1000
|
-
NULL,
|
|
1001
|
-
NULL,
|
|
1002
|
-
false,
|
|
1003
|
-
ELEMENT_SOURCE_HTML,
|
|
1004
|
-
open_tag->base.location->start,
|
|
1005
|
-
open_tag->base.location->end,
|
|
1006
|
-
errors
|
|
1007
|
-
);
|
|
980
|
+
return (AST_NODE_T*) open_tag;
|
|
1008
981
|
}
|
|
1009
982
|
|
|
1010
983
|
static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
|
|
1011
|
-
|
|
984
|
+
hb_array_T* errors = hb_array_init(8);
|
|
1012
985
|
|
|
1013
986
|
token_T* opening_tag = parser_consume_expected(parser, TOKEN_ERB_START, errors);
|
|
1014
987
|
token_T* content = parser_consume_expected(parser, TOKEN_ERB_CONTENT, errors);
|
|
@@ -1021,8 +994,8 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
|
|
|
1021
994
|
NULL,
|
|
1022
995
|
false,
|
|
1023
996
|
false,
|
|
1024
|
-
opening_tag->location
|
|
1025
|
-
closing_tag->location
|
|
997
|
+
opening_tag->location.start,
|
|
998
|
+
closing_tag->location.end,
|
|
1026
999
|
errors
|
|
1027
1000
|
);
|
|
1028
1001
|
|
|
@@ -1033,15 +1006,15 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
|
|
|
1033
1006
|
return erb_node;
|
|
1034
1007
|
}
|
|
1035
1008
|
|
|
1036
|
-
static void parser_parse_foreign_content(parser_T* parser,
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1009
|
+
static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
|
|
1010
|
+
hb_buffer_T content;
|
|
1011
|
+
hb_buffer_init(&content, 1024);
|
|
1012
|
+
position_T start = parser->current_token->location.start;
|
|
1013
|
+
hb_string_T expected_closing_tag = parser_get_foreign_content_closing_tag(parser->foreign_content_type);
|
|
1040
1014
|
|
|
1041
|
-
if (expected_closing_tag
|
|
1015
|
+
if (hb_string_is_empty(expected_closing_tag)) {
|
|
1042
1016
|
parser_exit_foreign_content(parser);
|
|
1043
|
-
|
|
1044
|
-
buffer_free(&content);
|
|
1017
|
+
free(content.value);
|
|
1045
1018
|
|
|
1046
1019
|
return;
|
|
1047
1020
|
}
|
|
@@ -1051,10 +1024,9 @@ static void parser_parse_foreign_content(parser_T* parser, array_T* children, ar
|
|
|
1051
1024
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
1052
1025
|
|
|
1053
1026
|
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
|
1054
|
-
|
|
1027
|
+
hb_array_append(children, erb_node);
|
|
1055
1028
|
|
|
1056
|
-
|
|
1057
|
-
start = position_copy(parser->current_token->location->start);
|
|
1029
|
+
start = parser->current_token->location.start;
|
|
1058
1030
|
|
|
1059
1031
|
continue;
|
|
1060
1032
|
}
|
|
@@ -1066,7 +1038,8 @@ static void parser_parse_foreign_content(parser_T* parser, array_T* children, ar
|
|
|
1066
1038
|
bool is_potential_match = false;
|
|
1067
1039
|
|
|
1068
1040
|
if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) {
|
|
1069
|
-
is_potential_match =
|
|
1041
|
+
is_potential_match =
|
|
1042
|
+
parser_is_expected_closing_tag_name(hb_string(next_token->value), parser->foreign_content_type);
|
|
1070
1043
|
}
|
|
1071
1044
|
|
|
1072
1045
|
lexer_restore_state(parser->lexer, saved_state);
|
|
@@ -1077,53 +1050,57 @@ static void parser_parse_foreign_content(parser_T* parser, array_T* children, ar
|
|
|
1077
1050
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
1078
1051
|
parser_exit_foreign_content(parser);
|
|
1079
1052
|
|
|
1080
|
-
|
|
1081
|
-
buffer_free(&content);
|
|
1053
|
+
free(content.value);
|
|
1082
1054
|
|
|
1083
1055
|
return;
|
|
1084
1056
|
}
|
|
1085
1057
|
}
|
|
1086
1058
|
|
|
1087
1059
|
token_T* token = parser_advance(parser);
|
|
1088
|
-
|
|
1060
|
+
hb_buffer_append(&content, token->value);
|
|
1089
1061
|
token_free(token);
|
|
1090
1062
|
}
|
|
1091
1063
|
|
|
1092
1064
|
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
|
1093
1065
|
parser_exit_foreign_content(parser);
|
|
1094
|
-
|
|
1095
|
-
buffer_free(&content);
|
|
1066
|
+
free(content.value);
|
|
1096
1067
|
}
|
|
1097
1068
|
|
|
1098
|
-
static void parser_parse_in_data_state(parser_T* parser,
|
|
1099
|
-
while (
|
|
1069
|
+
static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
|
|
1070
|
+
while (token_is_not(parser, TOKEN_EOF)) {
|
|
1071
|
+
|
|
1100
1072
|
if (token_is(parser, TOKEN_ERB_START)) {
|
|
1101
|
-
|
|
1073
|
+
hb_array_append(children, parser_parse_erb_tag(parser));
|
|
1102
1074
|
continue;
|
|
1103
1075
|
}
|
|
1104
1076
|
|
|
1105
1077
|
if (token_is(parser, TOKEN_HTML_DOCTYPE)) {
|
|
1106
|
-
|
|
1078
|
+
hb_array_append(children, parser_parse_html_doctype(parser));
|
|
1107
1079
|
continue;
|
|
1108
1080
|
}
|
|
1109
1081
|
|
|
1110
1082
|
if (token_is(parser, TOKEN_XML_DECLARATION)) {
|
|
1111
|
-
|
|
1083
|
+
hb_array_append(children, parser_parse_xml_declaration(parser));
|
|
1112
1084
|
continue;
|
|
1113
1085
|
}
|
|
1114
1086
|
|
|
1115
1087
|
if (token_is(parser, TOKEN_CDATA_START)) {
|
|
1116
|
-
|
|
1088
|
+
hb_array_append(children, parser_parse_cdata(parser));
|
|
1117
1089
|
continue;
|
|
1118
1090
|
}
|
|
1119
1091
|
|
|
1120
1092
|
if (token_is(parser, TOKEN_HTML_COMMENT_START)) {
|
|
1121
|
-
|
|
1093
|
+
hb_array_append(children, parser_parse_html_comment(parser));
|
|
1122
1094
|
continue;
|
|
1123
1095
|
}
|
|
1124
1096
|
|
|
1125
1097
|
if (token_is(parser, TOKEN_HTML_TAG_START)) {
|
|
1126
|
-
|
|
1098
|
+
hb_array_append(children, parser_parse_html_element(parser));
|
|
1099
|
+
continue;
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
if (token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1103
|
+
hb_array_append(children, parser_parse_html_close_tag(parser));
|
|
1127
1104
|
continue;
|
|
1128
1105
|
}
|
|
1129
1106
|
|
|
@@ -1131,6 +1108,7 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
|
|
|
1131
1108
|
parser,
|
|
1132
1109
|
TOKEN_AMPERSAND,
|
|
1133
1110
|
TOKEN_AT,
|
|
1111
|
+
TOKEN_BACKSLASH,
|
|
1134
1112
|
TOKEN_BACKTICK,
|
|
1135
1113
|
TOKEN_CHARACTER,
|
|
1136
1114
|
TOKEN_COLON,
|
|
@@ -1147,7 +1125,7 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
|
|
|
1147
1125
|
TOKEN_UNDERSCORE,
|
|
1148
1126
|
TOKEN_WHITESPACE
|
|
1149
1127
|
)) {
|
|
1150
|
-
|
|
1128
|
+
hb_array_append(children, parser_parse_text_content(parser, errors));
|
|
1151
1129
|
continue;
|
|
1152
1130
|
}
|
|
1153
1131
|
|
|
@@ -1155,69 +1133,126 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
|
|
|
1155
1133
|
parser,
|
|
1156
1134
|
"Unexpected token",
|
|
1157
1135
|
"TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
|
|
1158
|
-
"TOKEN_NBSP, TOKEN_AT, or TOKEN_NEWLINE",
|
|
1136
|
+
"TOKEN_NBSP, TOKEN_AT, TOKEN_BACKSLASH, or TOKEN_NEWLINE",
|
|
1159
1137
|
errors
|
|
1160
1138
|
);
|
|
1161
1139
|
}
|
|
1162
1140
|
}
|
|
1163
1141
|
|
|
1164
|
-
static
|
|
1165
|
-
|
|
1166
|
-
token_T* unclosed_tag = parser_pop_open_tag(parser);
|
|
1142
|
+
static size_t find_matching_close_tag(hb_array_T* nodes, size_t start_idx, hb_string_T tag_name) {
|
|
1143
|
+
int depth = 0;
|
|
1167
1144
|
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1145
|
+
for (size_t i = start_idx + 1; i < hb_array_size(nodes); i++) {
|
|
1146
|
+
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
|
|
1147
|
+
if (node == NULL) { continue; }
|
|
1148
|
+
|
|
1149
|
+
if (node->type == AST_HTML_OPEN_TAG_NODE) {
|
|
1150
|
+
AST_HTML_OPEN_TAG_NODE_T* open = (AST_HTML_OPEN_TAG_NODE_T*) node;
|
|
1151
|
+
|
|
1152
|
+
if (hb_string_equals(hb_string(open->tag_name->value), tag_name)) { depth++; }
|
|
1153
|
+
} else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
|
|
1154
|
+
AST_HTML_CLOSE_TAG_NODE_T* close = (AST_HTML_CLOSE_TAG_NODE_T*) node;
|
|
1174
1155
|
|
|
1175
|
-
|
|
1156
|
+
if (hb_string_equals(hb_string(close->tag_name->value), tag_name)) {
|
|
1157
|
+
if (depth == 0) { return i; }
|
|
1158
|
+
depth--;
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1176
1161
|
}
|
|
1162
|
+
|
|
1163
|
+
return (size_t) -1;
|
|
1177
1164
|
}
|
|
1178
1165
|
|
|
1179
|
-
static
|
|
1180
|
-
while (token_is_not(parser, TOKEN_EOF)) {
|
|
1181
|
-
if (token_is_not(parser, TOKEN_HTML_TAG_START_CLOSE)) {
|
|
1182
|
-
parser_append_unexpected_token_error(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
|
|
1166
|
+
static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors);
|
|
1183
1167
|
|
|
1184
|
-
|
|
1185
|
-
|
|
1168
|
+
static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors) {
|
|
1169
|
+
hb_array_T* result = hb_array_init(hb_array_size(nodes));
|
|
1186
1170
|
|
|
1187
|
-
|
|
1188
|
-
|
|
1171
|
+
for (size_t index = 0; index < hb_array_size(nodes); index++) {
|
|
1172
|
+
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, index);
|
|
1173
|
+
if (node == NULL) { continue; }
|
|
1189
1174
|
|
|
1190
|
-
|
|
1175
|
+
if (node->type == AST_HTML_OPEN_TAG_NODE) {
|
|
1176
|
+
AST_HTML_OPEN_TAG_NODE_T* open_tag = (AST_HTML_OPEN_TAG_NODE_T*) node;
|
|
1177
|
+
hb_string_T tag_name = hb_string(open_tag->tag_name->value);
|
|
1191
1178
|
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1179
|
+
size_t close_index = find_matching_close_tag(nodes, index, tag_name);
|
|
1180
|
+
|
|
1181
|
+
if (close_index == (size_t) -1) {
|
|
1182
|
+
if (hb_array_size(open_tag->base.errors) == 0) {
|
|
1183
|
+
append_missing_closing_tag_error(
|
|
1184
|
+
open_tag->tag_name,
|
|
1185
|
+
open_tag->base.location.start,
|
|
1186
|
+
open_tag->base.location.end,
|
|
1187
|
+
open_tag->base.errors
|
|
1188
|
+
);
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
hb_array_append(result, node);
|
|
1192
|
+
} else {
|
|
1193
|
+
AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) hb_array_get(nodes, close_index);
|
|
1194
|
+
|
|
1195
|
+
hb_array_T* body = hb_array_init(close_index - index - 1);
|
|
1200
1196
|
|
|
1201
|
-
|
|
1197
|
+
for (size_t j = index + 1; j < close_index; j++) {
|
|
1198
|
+
hb_array_append(body, hb_array_get(nodes, j));
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
hb_array_T* processed_body = parser_build_elements_from_tags(body, errors);
|
|
1202
|
+
hb_array_free(&body);
|
|
1203
|
+
|
|
1204
|
+
hb_array_T* element_errors = hb_array_init(8);
|
|
1205
|
+
|
|
1206
|
+
AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init(
|
|
1207
|
+
open_tag,
|
|
1208
|
+
open_tag->tag_name,
|
|
1209
|
+
processed_body,
|
|
1210
|
+
close_tag,
|
|
1211
|
+
false,
|
|
1212
|
+
ELEMENT_SOURCE_HTML,
|
|
1213
|
+
open_tag->base.location.start,
|
|
1214
|
+
close_tag->base.location.end,
|
|
1215
|
+
element_errors
|
|
1216
|
+
);
|
|
1217
|
+
|
|
1218
|
+
hb_array_append(result, element);
|
|
1219
|
+
|
|
1220
|
+
index = close_index;
|
|
1221
|
+
}
|
|
1222
|
+
} else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
|
|
1223
|
+
AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) node;
|
|
1224
|
+
|
|
1225
|
+
if (!is_void_element(hb_string(close_tag->tag_name->value))) {
|
|
1226
|
+
if (hb_array_size(close_tag->base.errors) == 0) {
|
|
1227
|
+
append_missing_opening_tag_error(
|
|
1228
|
+
close_tag->tag_name,
|
|
1229
|
+
close_tag->base.location.start,
|
|
1230
|
+
close_tag->base.location.end,
|
|
1231
|
+
close_tag->base.errors
|
|
1232
|
+
);
|
|
1233
|
+
}
|
|
1234
|
+
}
|
|
1202
1235
|
|
|
1203
|
-
|
|
1236
|
+
hb_array_append(result, node);
|
|
1237
|
+
} else {
|
|
1238
|
+
hb_array_append(result, node);
|
|
1239
|
+
}
|
|
1204
1240
|
}
|
|
1241
|
+
|
|
1242
|
+
return result;
|
|
1205
1243
|
}
|
|
1206
1244
|
|
|
1207
1245
|
static AST_DOCUMENT_NODE_T* parser_parse_document(parser_T* parser) {
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
position_T
|
|
1246
|
+
hb_array_T* children = hb_array_init(8);
|
|
1247
|
+
hb_array_T* errors = hb_array_init(8);
|
|
1248
|
+
position_T start = parser->current_token->location.start;
|
|
1211
1249
|
|
|
1212
1250
|
parser_parse_in_data_state(parser, children, errors);
|
|
1213
|
-
parser_parse_unclosed_html_tags(parser, errors);
|
|
1214
|
-
parser_parse_stray_closing_tags(parser, children, errors);
|
|
1215
1251
|
|
|
1216
1252
|
token_T* eof = parser_consume_expected(parser, TOKEN_EOF, errors);
|
|
1217
1253
|
|
|
1218
|
-
AST_DOCUMENT_NODE_T* document_node = ast_document_node_init(children, start, eof->location
|
|
1254
|
+
AST_DOCUMENT_NODE_T* document_node = ast_document_node_init(children, start, eof->location.end, errors);
|
|
1219
1255
|
|
|
1220
|
-
position_free(start);
|
|
1221
1256
|
token_free(eof);
|
|
1222
1257
|
|
|
1223
1258
|
return document_node;
|
|
@@ -1227,26 +1262,26 @@ AST_DOCUMENT_NODE_T* herb_parser_parse(parser_T* parser) {
|
|
|
1227
1262
|
return parser_parse_document(parser);
|
|
1228
1263
|
}
|
|
1229
1264
|
|
|
1230
|
-
static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token,
|
|
1231
|
-
if (parser->options
|
|
1232
|
-
|
|
1265
|
+
static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, hb_array_T* children) {
|
|
1266
|
+
if (parser->options.track_whitespace) {
|
|
1267
|
+
hb_array_T* errors = hb_array_init(8);
|
|
1233
1268
|
AST_WHITESPACE_NODE_T* whitespace_node = ast_whitespace_node_init(
|
|
1234
1269
|
whitespace_token,
|
|
1235
|
-
whitespace_token->location
|
|
1236
|
-
whitespace_token->location
|
|
1270
|
+
whitespace_token->location.start,
|
|
1271
|
+
whitespace_token->location.end,
|
|
1237
1272
|
errors
|
|
1238
1273
|
);
|
|
1239
|
-
|
|
1274
|
+
hb_array_append(children, whitespace_node);
|
|
1240
1275
|
}
|
|
1241
1276
|
|
|
1242
1277
|
token_free(whitespace_token);
|
|
1243
1278
|
}
|
|
1244
1279
|
|
|
1245
|
-
static void parser_consume_whitespace(parser_T* parser,
|
|
1280
|
+
static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
|
|
1246
1281
|
while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
|
1247
1282
|
token_T* whitespace = parser_advance(parser);
|
|
1248
1283
|
|
|
1249
|
-
if (parser->options
|
|
1284
|
+
if (parser->options.track_whitespace && children != NULL) {
|
|
1250
1285
|
parser_handle_whitespace(parser, whitespace, children);
|
|
1251
1286
|
} else {
|
|
1252
1287
|
token_free(whitespace);
|
|
@@ -1254,13 +1289,38 @@ static void parser_consume_whitespace(parser_T* parser, array_T* children) {
|
|
|
1254
1289
|
}
|
|
1255
1290
|
}
|
|
1256
1291
|
|
|
1257
|
-
void
|
|
1292
|
+
void herb_parser_deinit(parser_T* parser) {
|
|
1258
1293
|
if (parser == NULL) { return; }
|
|
1259
1294
|
|
|
1260
|
-
if (parser->lexer != NULL) { lexer_free(parser->lexer); }
|
|
1261
1295
|
if (parser->current_token != NULL) { token_free(parser->current_token); }
|
|
1262
|
-
if (parser->open_tags_stack != NULL) {
|
|
1263
|
-
|
|
1296
|
+
if (parser->open_tags_stack != NULL) { hb_array_free(&parser->open_tags_stack); }
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors) {
|
|
1300
|
+
if (nodes == NULL || hb_array_size(nodes) == 0) { return; }
|
|
1301
|
+
|
|
1302
|
+
hb_array_T* processed = parser_build_elements_from_tags(nodes, errors);
|
|
1303
|
+
|
|
1304
|
+
while (hb_array_size(nodes) > 0) {
|
|
1305
|
+
hb_array_remove(nodes, 0);
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
for (size_t i = 0; i < hb_array_size(processed); i++) {
|
|
1309
|
+
hb_array_append(nodes, hb_array_get(processed, i));
|
|
1310
|
+
}
|
|
1311
|
+
|
|
1312
|
+
hb_array_free(&processed);
|
|
1313
|
+
|
|
1314
|
+
for (size_t i = 0; i < hb_array_size(nodes); i++) {
|
|
1315
|
+
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
|
|
1316
|
+
if (node == NULL) { continue; }
|
|
1317
|
+
|
|
1318
|
+
herb_visit_node(node, match_tags_visitor, errors);
|
|
1319
|
+
}
|
|
1320
|
+
}
|
|
1321
|
+
|
|
1322
|
+
void herb_parser_match_html_tags_post_analyze(AST_DOCUMENT_NODE_T* document) {
|
|
1323
|
+
if (document == NULL) { return; }
|
|
1264
1324
|
|
|
1265
|
-
|
|
1325
|
+
match_tags_in_node_array(document->children, document->base.errors);
|
|
1266
1326
|
}
|