herb 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +8 -5
  3. data/config.yml +26 -6
  4. data/ext/herb/error_helpers.c +57 -3
  5. data/ext/herb/error_helpers.h +1 -1
  6. data/ext/herb/extconf.rb +1 -0
  7. data/ext/herb/extension.c +10 -24
  8. data/ext/herb/extension_helpers.c +3 -3
  9. data/ext/herb/extension_helpers.h +1 -1
  10. data/ext/herb/nodes.c +72 -37
  11. data/herb.gemspec +0 -2
  12. data/lib/herb/ast/helpers.rb +11 -0
  13. data/lib/herb/ast/node.rb +15 -6
  14. data/lib/herb/ast/nodes.rb +609 -392
  15. data/lib/herb/cli.rb +31 -0
  16. data/lib/herb/colors.rb +82 -0
  17. data/lib/herb/engine/compiler.rb +140 -14
  18. data/lib/herb/engine/debug_visitor.rb +1 -5
  19. data/lib/herb/engine/parser_error_overlay.rb +1 -1
  20. data/lib/herb/engine.rb +8 -14
  21. data/lib/herb/errors.rb +166 -56
  22. data/lib/herb/location.rb +2 -2
  23. data/lib/herb/project.rb +86 -21
  24. data/lib/herb/token.rb +14 -2
  25. data/lib/herb/version.rb +1 -1
  26. data/lib/herb.rb +1 -0
  27. data/sig/herb/ast/helpers.rbs +3 -0
  28. data/sig/herb/ast/node.rbs +12 -5
  29. data/sig/herb/ast/nodes.rbs +124 -62
  30. data/sig/herb/colors.rbs +35 -0
  31. data/sig/herb/engine/compiler.rbs +23 -1
  32. data/sig/herb/errors.rbs +74 -20
  33. data/sig/herb/token.rbs +8 -0
  34. data/sig/herb_c_extension.rbs +1 -1
  35. data/sig/serialized_ast_errors.rbs +8 -0
  36. data/src/analyze.c +420 -171
  37. data/src/analyze_helpers.c +5 -0
  38. data/src/analyze_missing_end.c +147 -0
  39. data/src/analyze_transform.c +196 -0
  40. data/src/analyzed_ruby.c +23 -2
  41. data/src/ast_node.c +5 -5
  42. data/src/ast_nodes.c +179 -179
  43. data/src/ast_pretty_print.c +232 -232
  44. data/src/element_source.c +7 -6
  45. data/src/errors.c +246 -126
  46. data/src/extract.c +92 -34
  47. data/src/herb.c +37 -49
  48. data/src/html_util.c +34 -96
  49. data/src/include/analyze.h +10 -2
  50. data/src/include/analyze_helpers.h +3 -0
  51. data/src/include/analyzed_ruby.h +4 -2
  52. data/src/include/ast_node.h +2 -2
  53. data/src/include/ast_nodes.h +67 -66
  54. data/src/include/ast_pretty_print.h +2 -2
  55. data/src/include/element_source.h +3 -1
  56. data/src/include/errors.h +30 -14
  57. data/src/include/extract.h +4 -4
  58. data/src/include/herb.h +6 -7
  59. data/src/include/html_util.h +4 -5
  60. data/src/include/lexer.h +1 -3
  61. data/src/include/lexer_peek_helpers.h +14 -14
  62. data/src/include/lexer_struct.h +3 -2
  63. data/src/include/macros.h +4 -0
  64. data/src/include/parser.h +12 -6
  65. data/src/include/parser_helpers.h +25 -15
  66. data/src/include/pretty_print.h +38 -28
  67. data/src/include/token.h +5 -8
  68. data/src/include/utf8.h +3 -2
  69. data/src/include/util/hb_arena.h +31 -0
  70. data/src/include/util/hb_arena_debug.h +8 -0
  71. data/src/include/util/hb_array.h +33 -0
  72. data/src/include/util/hb_buffer.h +34 -0
  73. data/src/include/util/hb_string.h +29 -0
  74. data/src/include/util/hb_system.h +9 -0
  75. data/src/include/util.h +3 -14
  76. data/src/include/version.h +1 -1
  77. data/src/include/visitor.h +1 -1
  78. data/src/io.c +7 -4
  79. data/src/lexer.c +61 -88
  80. data/src/lexer_peek_helpers.c +35 -37
  81. data/src/main.c +19 -23
  82. data/src/parser.c +282 -201
  83. data/src/parser_helpers.c +46 -40
  84. data/src/parser_match_tags.c +316 -0
  85. data/src/pretty_print.c +82 -106
  86. data/src/token.c +18 -65
  87. data/src/utf8.c +4 -4
  88. data/src/util/hb_arena.c +179 -0
  89. data/src/util/hb_arena_debug.c +237 -0
  90. data/src/{array.c → util/hb_array.c} +26 -27
  91. data/src/util/hb_buffer.c +203 -0
  92. data/src/util/hb_string.c +85 -0
  93. data/src/util/hb_system.c +30 -0
  94. data/src/util.c +29 -99
  95. data/src/visitor.c +54 -54
  96. data/templates/ext/herb/error_helpers.c.erb +3 -3
  97. data/templates/ext/herb/error_helpers.h.erb +1 -1
  98. data/templates/ext/herb/nodes.c.erb +11 -6
  99. data/templates/java/error_helpers.c.erb +75 -0
  100. data/templates/java/error_helpers.h.erb +20 -0
  101. data/templates/java/nodes.c.erb +97 -0
  102. data/templates/java/nodes.h.erb +23 -0
  103. data/templates/java/org/herb/ast/Errors.java.erb +121 -0
  104. data/templates/java/org/herb/ast/NodeVisitor.java.erb +14 -0
  105. data/templates/java/org/herb/ast/Nodes.java.erb +220 -0
  106. data/templates/java/org/herb/ast/Visitor.java.erb +56 -0
  107. data/templates/javascript/packages/node/extension/error_helpers.cpp.erb +8 -8
  108. data/templates/javascript/packages/node/extension/error_helpers.h.erb +1 -1
  109. data/templates/javascript/packages/node/extension/nodes.cpp.erb +9 -9
  110. data/templates/javascript/packages/node/extension/nodes.h.erb +1 -1
  111. data/templates/lib/herb/ast/nodes.rb.erb +28 -16
  112. data/templates/lib/herb/errors.rb.erb +17 -12
  113. data/templates/rust/src/ast/nodes.rs.erb +220 -0
  114. data/templates/rust/src/errors.rs.erb +216 -0
  115. data/templates/rust/src/nodes.rs.erb +374 -0
  116. data/templates/src/analyze_missing_end.c.erb +36 -0
  117. data/templates/src/analyze_transform.c.erb +24 -0
  118. data/templates/src/ast_nodes.c.erb +14 -14
  119. data/templates/src/ast_pretty_print.c.erb +36 -36
  120. data/templates/src/errors.c.erb +31 -31
  121. data/templates/src/include/ast_nodes.h.erb +10 -9
  122. data/templates/src/include/ast_pretty_print.h.erb +2 -2
  123. data/templates/src/include/errors.h.erb +6 -6
  124. data/templates/src/parser_match_tags.c.erb +38 -0
  125. data/templates/src/visitor.c.erb +4 -4
  126. data/templates/template.rb +22 -3
  127. data/templates/wasm/error_helpers.cpp.erb +9 -9
  128. data/templates/wasm/error_helpers.h.erb +1 -1
  129. data/templates/wasm/nodes.cpp.erb +9 -9
  130. data/templates/wasm/nodes.h.erb +1 -1
  131. data/vendor/prism/Rakefile +4 -1
  132. data/vendor/prism/config.yml +2 -1
  133. data/vendor/prism/include/prism/ast.h +31 -1
  134. data/vendor/prism/include/prism/diagnostic.h +1 -0
  135. data/vendor/prism/include/prism/version.h +3 -3
  136. data/vendor/prism/src/diagnostic.c +3 -1
  137. data/vendor/prism/src/prism.c +130 -71
  138. data/vendor/prism/src/util/pm_string.c +6 -8
  139. data/vendor/prism/templates/include/prism/ast.h.erb +2 -0
  140. data/vendor/prism/templates/java/org/prism/Loader.java.erb +2 -2
  141. data/vendor/prism/templates/javascript/src/deserialize.js.erb +2 -2
  142. data/vendor/prism/templates/lib/prism/serialize.rb.erb +2 -2
  143. data/vendor/prism/templates/sig/prism.rbs.erb +4 -0
  144. data/vendor/prism/templates/src/diagnostic.c.erb +1 -0
  145. metadata +34 -20
  146. data/lib/herb/libherb/array.rb +0 -51
  147. data/lib/herb/libherb/ast_node.rb +0 -50
  148. data/lib/herb/libherb/buffer.rb +0 -56
  149. data/lib/herb/libherb/extract_result.rb +0 -20
  150. data/lib/herb/libherb/lex_result.rb +0 -32
  151. data/lib/herb/libherb/libherb.rb +0 -52
  152. data/lib/herb/libherb/parse_result.rb +0 -20
  153. data/lib/herb/libherb/token.rb +0 -46
  154. data/lib/herb/libherb.rb +0 -35
  155. data/src/buffer.c +0 -241
  156. data/src/include/array.h +0 -33
  157. data/src/include/buffer.h +0 -39
  158. data/src/include/json.h +0 -28
  159. data/src/include/memory.h +0 -12
  160. data/src/json.c +0 -205
  161. data/src/memory.c +0 -53
data/src/parser.c CHANGED
@@ -1,8 +1,6 @@
1
1
  #include "include/parser.h"
2
- #include "include/array.h"
3
2
  #include "include/ast_node.h"
4
3
  #include "include/ast_nodes.h"
5
- #include "include/buffer.h"
6
4
  #include "include/errors.h"
7
5
  #include "include/html_util.h"
8
6
  #include "include/lexer.h"
@@ -11,49 +9,46 @@
11
9
  #include "include/token.h"
12
10
  #include "include/token_matchers.h"
13
11
  #include "include/util.h"
12
+ #include "include/util/hb_array.h"
13
+ #include "include/util/hb_buffer.h"
14
+ #include "include/util/hb_string.h"
15
+ #include "include/visitor.h"
14
16
 
15
17
  #include <stdio.h>
16
18
  #include <stdlib.h>
17
19
  #include <string.h>
18
20
  #include <strings.h>
19
21
 
20
- static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors);
21
- static void parser_parse_foreign_content(parser_T* parser, array_T* children, array_T* errors);
22
+ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors);
23
+ static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors);
22
24
  static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
23
- static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, array_T* children);
24
- static void parser_consume_whitespace(parser_T* parser, array_T* children);
25
+ static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, hb_array_T* children);
26
+ static void parser_consume_whitespace(parser_T* parser, hb_array_T* children);
25
27
  static void parser_skip_erb_content(lexer_T* lexer);
26
28
  static bool parser_lookahead_erb_is_attribute(lexer_T* lexer);
27
- static void parser_handle_erb_in_open_tag(parser_T* parser, array_T* children);
28
- static void parser_handle_whitespace_in_open_tag(parser_T* parser, array_T* children);
29
+ static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children);
30
+ static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* children);
31
+
32
+ const parser_options_T HERB_DEFAULT_PARSER_OPTIONS = { .track_whitespace = false };
29
33
 
30
34
  size_t parser_sizeof(void) {
31
35
  return sizeof(struct PARSER_STRUCT);
32
36
  }
33
37
 
34
- parser_T* herb_parser_init(lexer_T* lexer, parser_options_T* options) {
35
- parser_T* parser = calloc(1, parser_sizeof());
36
-
38
+ void herb_parser_init(parser_T* parser, lexer_T* lexer, parser_options_T options) {
37
39
  parser->lexer = lexer;
38
40
  parser->current_token = lexer_next_token(lexer);
39
- parser->open_tags_stack = array_init(16);
41
+ parser->open_tags_stack = hb_array_init(16);
40
42
  parser->state = PARSER_STATE_DATA;
41
43
  parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
42
-
43
- if (options) {
44
- parser->options = calloc(1, sizeof(parser_options_T));
45
- parser->options->track_whitespace = options->track_whitespace;
46
- } else {
47
- parser->options = NULL;
48
- }
49
-
50
- return parser;
44
+ parser->options = options;
51
45
  }
52
46
 
53
47
  static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
54
- array_T* errors = array_init(8);
55
- array_T* children = array_init(8);
56
- buffer_T content = buffer_new();
48
+ hb_array_T* errors = hb_array_init(8);
49
+ hb_array_T* children = hb_array_init(8);
50
+ hb_buffer_T content;
51
+ hb_buffer_init(&content, 128);
57
52
 
58
53
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_CDATA_START, errors);
59
54
  position_T start = parser->current_token->location.start;
@@ -62,13 +57,13 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
62
57
  if (token_is(parser, TOKEN_ERB_START)) {
63
58
  parser_append_literal_node_from_buffer(parser, &content, children, start);
64
59
  AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
65
- array_append(children, erb_node);
60
+ hb_array_append(children, erb_node);
66
61
  start = parser->current_token->location.start;
67
62
  continue;
68
63
  }
69
64
 
70
65
  token_T* token = parser_advance(parser);
71
- buffer_append(&content, token->value);
66
+ hb_buffer_append(&content, token->value);
72
67
  token_free(token);
73
68
  }
74
69
 
@@ -84,7 +79,7 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
84
79
  errors
85
80
  );
86
81
 
87
- buffer_free(&content);
82
+ free(content.value);
88
83
  token_free(tag_opening);
89
84
  token_free(tag_closing);
90
85
 
@@ -92,19 +87,20 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
92
87
  }
93
88
 
94
89
  static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
95
- array_T* errors = array_init(8);
96
- array_T* children = array_init(8);
90
+ hb_array_T* errors = hb_array_init(8);
91
+ hb_array_T* children = hb_array_init(8);
97
92
  token_T* comment_start = parser_consume_expected(parser, TOKEN_HTML_COMMENT_START, errors);
98
93
  position_T start = parser->current_token->location.start;
99
94
 
100
- buffer_T comment = buffer_new();
95
+ hb_buffer_T comment;
96
+ hb_buffer_init(&comment, 512);
101
97
 
102
98
  while (token_is_none_of(parser, TOKEN_HTML_COMMENT_END, TOKEN_EOF)) {
103
99
  if (token_is(parser, TOKEN_ERB_START)) {
104
100
  parser_append_literal_node_from_buffer(parser, &comment, children, start);
105
101
 
106
102
  AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
107
- array_append(children, erb_node);
103
+ hb_array_append(children, erb_node);
108
104
 
109
105
  start = parser->current_token->location.start;
110
106
 
@@ -112,7 +108,7 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
112
108
  }
113
109
 
114
110
  token_T* token = parser_advance(parser);
115
- buffer_append(&comment, token->value);
111
+ hb_buffer_append(&comment, token->value);
116
112
  token_free(token);
117
113
  }
118
114
 
@@ -129,7 +125,7 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
129
125
  errors
130
126
  );
131
127
 
132
- buffer_free(&comment);
128
+ free(comment.value);
133
129
  token_free(comment_start);
134
130
  token_free(comment_end);
135
131
 
@@ -137,9 +133,10 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
137
133
  }
138
134
 
139
135
  static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
140
- array_T* errors = array_init(8);
141
- array_T* children = array_init(8);
142
- buffer_T content = buffer_new();
136
+ hb_array_T* errors = hb_array_init(8);
137
+ hb_array_T* children = hb_array_init(8);
138
+ hb_buffer_T content;
139
+ hb_buffer_init(&content, 64);
143
140
 
144
141
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_DOCTYPE, errors);
145
142
 
@@ -150,13 +147,13 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
150
147
  parser_append_literal_node_from_buffer(parser, &content, children, start);
151
148
 
152
149
  AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
153
- array_append(children, erb_node);
150
+ hb_array_append(children, erb_node);
154
151
 
155
152
  continue;
156
153
  }
157
154
 
158
155
  token_T* token = parser_consume_expected(parser, parser->current_token->type, errors);
159
- buffer_append(&content, token->value);
156
+ hb_buffer_append(&content, token->value);
160
157
  token_free(token);
161
158
  }
162
159
 
@@ -175,15 +172,16 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
175
172
 
176
173
  token_free(tag_opening);
177
174
  token_free(tag_closing);
178
- buffer_free(&content);
175
+ free(content.value);
179
176
 
180
177
  return doctype;
181
178
  }
182
179
 
183
180
  static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser) {
184
- array_T* errors = array_init(8);
185
- array_T* children = array_init(8);
186
- buffer_T content = buffer_new();
181
+ hb_array_T* errors = hb_array_init(8);
182
+ hb_array_T* children = hb_array_init(8);
183
+ hb_buffer_T content;
184
+ hb_buffer_init(&content, 64);
187
185
 
188
186
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_XML_DECLARATION, errors);
189
187
 
@@ -194,7 +192,7 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
194
192
  parser_append_literal_node_from_buffer(parser, &content, children, start);
195
193
 
196
194
  AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
197
- array_append(children, erb_node);
195
+ hb_array_append(children, erb_node);
198
196
 
199
197
  start = parser->current_token->location.start;
200
198
 
@@ -202,7 +200,7 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
202
200
  }
203
201
 
204
202
  token_T* token = parser_advance(parser);
205
- buffer_append(&content, token->value);
203
+ hb_buffer_append(&content, token->value);
206
204
  token_free(token);
207
205
  }
208
206
 
@@ -221,15 +219,16 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
221
219
 
222
220
  token_free(tag_opening);
223
221
  token_free(tag_closing);
224
- buffer_free(&content);
222
+ free(content.value);
225
223
 
226
224
  return xml_declaration;
227
225
  }
228
226
 
229
- static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T* document_errors) {
227
+ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_array_T* document_errors) {
230
228
  position_T start = parser->current_token->location.start;
231
229
 
232
- buffer_T content = buffer_new();
230
+ hb_buffer_T content;
231
+ hb_buffer_init(&content, 2048);
233
232
 
234
233
  while (token_is_none_of(
235
234
  parser,
@@ -241,7 +240,7 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T
241
240
  TOKEN_EOF
242
241
  )) {
243
242
  if (token_is(parser, TOKEN_ERROR)) {
244
- buffer_free(&content);
243
+ free(content.value);
245
244
 
246
245
  token_T* token = parser_consume_expected(parser, TOKEN_ERROR, document_errors);
247
246
  append_unexpected_error(
@@ -259,32 +258,31 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T
259
258
  }
260
259
 
261
260
  token_T* token = parser_advance(parser);
262
- buffer_append(&content, token->value);
261
+ hb_buffer_append(&content, token->value);
263
262
  token_free(token);
264
263
  }
265
264
 
266
- array_T* errors = array_init(8);
267
-
268
- if (buffer_length(&content) > 0) {
269
- AST_HTML_TEXT_NODE_T* text_node =
270
- ast_html_text_node_init(buffer_value(&content), start, parser->current_token->location.start, errors);
265
+ hb_array_T* errors = hb_array_init(8);
271
266
 
272
- buffer_free(&content);
267
+ AST_HTML_TEXT_NODE_T* text_node = NULL;
273
268
 
274
- return text_node;
269
+ if (hb_buffer_length(&content) > 0) {
270
+ text_node =
271
+ ast_html_text_node_init(hb_buffer_value(&content), start, parser->current_token->location.start, errors);
272
+ } else {
273
+ text_node = ast_html_text_node_init("", start, parser->current_token->location.start, errors);
275
274
  }
276
275
 
277
- AST_HTML_TEXT_NODE_T* text_node = ast_html_text_node_init("", start, parser->current_token->location.start, errors);
278
-
279
- buffer_free(&content);
276
+ free(content.value);
280
277
 
281
278
  return text_node;
282
279
  }
283
280
 
284
281
  static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
285
- array_T* errors = array_init(8);
286
- array_T* children = array_init(8);
287
- buffer_T buffer = buffer_new();
282
+ hb_array_T* errors = hb_array_init(8);
283
+ hb_array_T* children = hb_array_init(8);
284
+ hb_buffer_T buffer;
285
+ hb_buffer_init(&buffer, 128);
288
286
  position_T start = parser->current_token->location.start;
289
287
 
290
288
  while (token_is_none_of(
@@ -300,14 +298,14 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
300
298
  parser_append_literal_node_from_buffer(parser, &buffer, children, start);
301
299
 
302
300
  AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
303
- array_append(children, erb_node);
301
+ hb_array_append(children, erb_node);
304
302
 
305
303
  start = parser->current_token->location.start;
306
304
  continue;
307
305
  }
308
306
 
309
307
  token_T* token = parser_advance(parser);
310
- buffer_append(&buffer, token->value);
308
+ hb_buffer_append(&buffer, token->value);
311
309
  token_free(token);
312
310
  }
313
311
 
@@ -317,8 +315,8 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
317
315
  position_T node_end = { 0 };
318
316
 
319
317
  if (children->size > 0) {
320
- AST_NODE_T* first_child = array_get(children, 0);
321
- AST_NODE_T* last_child = array_get(children, children->size - 1);
318
+ AST_NODE_T* first_child = hb_array_get(children, 0);
319
+ AST_NODE_T* last_child = hb_array_get(children, children->size - 1);
322
320
 
323
321
  node_start = first_child->location.start;
324
322
  node_end = last_child->location.end;
@@ -330,17 +328,18 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
330
328
  AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
331
329
  ast_html_attribute_name_node_init(children, node_start, node_end, errors);
332
330
 
333
- buffer_free(&buffer);
331
+ free(buffer.value);
334
332
 
335
333
  return attribute_name;
336
334
  }
337
335
 
338
336
  static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value(
339
337
  parser_T* parser,
340
- array_T* children,
341
- array_T* errors
338
+ hb_array_T* children,
339
+ hb_array_T* errors
342
340
  ) {
343
- buffer_T buffer = buffer_new();
341
+ hb_buffer_T buffer;
342
+ hb_buffer_init(&buffer, 512);
344
343
  token_T* opening_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
345
344
  position_T start = parser->current_token->location.start;
346
345
 
@@ -352,7 +351,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
352
351
  if (token_is(parser, TOKEN_ERB_START)) {
353
352
  parser_append_literal_node_from_buffer(parser, &buffer, children, start);
354
353
 
355
- array_append(children, parser_parse_erb_tag(parser));
354
+ hb_array_append(children, parser_parse_erb_tag(parser));
356
355
 
357
356
  start = parser->current_token->location.start;
358
357
 
@@ -366,8 +365,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
366
365
 
367
366
  if (next_token && next_token->type == TOKEN_QUOTE && opening_quote != NULL
368
367
  && strcmp(next_token->value, opening_quote->value) == 0) {
369
- buffer_append(&buffer, parser->current_token->value);
370
- buffer_append(&buffer, next_token->value);
368
+ hb_buffer_append(&buffer, parser->current_token->value);
369
+ hb_buffer_append(&buffer, next_token->value);
371
370
 
372
371
  token_free(parser->current_token);
373
372
  token_free(next_token);
@@ -381,7 +380,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
381
380
  }
382
381
  }
383
382
 
384
- buffer_append(&buffer, parser->current_token->value);
383
+ hb_buffer_append(&buffer, parser->current_token->value);
385
384
  token_free(parser->current_token);
386
385
 
387
386
  parser->current_token = lexer_next_token(parser->lexer);
@@ -409,7 +408,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
409
408
  token_free(parser->current_token);
410
409
  parser->current_token = potential_closing;
411
410
 
412
- buffer_append(&buffer, parser->current_token->value);
411
+ hb_buffer_append(&buffer, parser->current_token->value);
413
412
  token_free(parser->current_token);
414
413
  parser->current_token = lexer_next_token(parser->lexer);
415
414
 
@@ -421,14 +420,14 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
421
420
  if (token_is(parser, TOKEN_ERB_START)) {
422
421
  parser_append_literal_node_from_buffer(parser, &buffer, children, start);
423
422
 
424
- array_append(children, parser_parse_erb_tag(parser));
423
+ hb_array_append(children, parser_parse_erb_tag(parser));
425
424
 
426
425
  start = parser->current_token->location.start;
427
426
 
428
427
  continue;
429
428
  }
430
429
 
431
- buffer_append(&buffer, parser->current_token->value);
430
+ hb_buffer_append(&buffer, parser->current_token->value);
432
431
  token_free(parser->current_token);
433
432
 
434
433
  parser->current_token = lexer_next_token(parser->lexer);
@@ -442,7 +441,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
442
441
  }
443
442
 
444
443
  parser_append_literal_node_from_buffer(parser, &buffer, children, start);
445
- buffer_free(&buffer);
444
+ free(buffer.value);
446
445
 
447
446
  token_T* closing_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
448
447
 
@@ -473,13 +472,13 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
473
472
  }
474
473
 
475
474
  static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser_T* parser) {
476
- array_T* children = array_init(8);
477
- array_T* errors = array_init(8);
475
+ hb_array_T* children = hb_array_init(8);
476
+ hb_array_T* errors = hb_array_init(8);
478
477
 
479
478
  // <div id=<%= "home" %>>
480
479
  if (token_is(parser, TOKEN_ERB_START)) {
481
480
  AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
482
- array_append(children, erb_node);
481
+ hb_array_append(children, erb_node);
483
482
 
484
483
  return ast_html_attribute_value_node_init(
485
484
  NULL,
@@ -498,7 +497,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
498
497
  AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier);
499
498
  token_free(identifier);
500
499
 
501
- array_append(children, literal);
500
+ hb_array_append(children, literal);
502
501
 
503
502
  return ast_html_attribute_value_node_init(
504
503
  NULL,
@@ -561,12 +560,13 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
561
560
  static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) {
562
561
  AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = parser_parse_html_attribute_name(parser);
563
562
 
564
- if (parser->options && parser->options->track_whitespace) {
563
+ if (parser->options.track_whitespace) {
565
564
  bool has_equals = (parser->current_token->type == TOKEN_EQUALS)
566
565
  || lexer_peek_for_token_type_after_whitespace(parser->lexer, TOKEN_EQUALS);
567
566
 
568
567
  if (has_equals) {
569
- buffer_T equals_buffer = buffer_new();
568
+ hb_buffer_T equals_buffer;
569
+ hb_buffer_init(&equals_buffer, 256);
570
570
  position_T equals_start = { 0 };
571
571
  position_T equals_end = { 0 };
572
572
  uint32_t range_start = 0;
@@ -582,7 +582,7 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
582
582
  range_start = whitespace->range.from;
583
583
  }
584
584
 
585
- buffer_append(&equals_buffer, whitespace->value);
585
+ hb_buffer_append(&equals_buffer, whitespace->value);
586
586
  token_free(whitespace);
587
587
  }
588
588
 
@@ -594,14 +594,14 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
594
594
  range_start = equals->range.from;
595
595
  }
596
596
 
597
- buffer_append(&equals_buffer, equals->value);
597
+ hb_buffer_append(&equals_buffer, equals->value);
598
598
  equals_end = equals->location.end;
599
599
  range_end = equals->range.to;
600
600
  token_free(equals);
601
601
 
602
602
  while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
603
603
  token_T* whitespace = parser_advance(parser);
604
- buffer_append(&equals_buffer, whitespace->value);
604
+ hb_buffer_append(&equals_buffer, whitespace->value);
605
605
  equals_end = whitespace->location.end;
606
606
  range_end = whitespace->range.to;
607
607
  token_free(whitespace);
@@ -613,7 +613,7 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
613
613
  equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end };
614
614
  equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end };
615
615
 
616
- buffer_free(&equals_buffer);
616
+ free(equals_buffer.value);
617
617
 
618
618
  AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
619
619
 
@@ -719,12 +719,12 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
719
719
  } while (true);
720
720
  }
721
721
 
722
- static void parser_handle_erb_in_open_tag(parser_T* parser, array_T* children) {
722
+ static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children) {
723
723
  bool is_output_tag = parser->current_token->value && strlen(parser->current_token->value) >= 3
724
724
  && strncmp(parser->current_token->value, "<%=", 3) == 0;
725
725
 
726
726
  if (!is_output_tag) {
727
- array_append(children, parser_parse_erb_tag(parser));
727
+ hb_array_append(children, parser_parse_erb_tag(parser));
728
728
 
729
729
  return;
730
730
  }
@@ -738,13 +738,13 @@ static void parser_handle_erb_in_open_tag(parser_T* parser, array_T* children) {
738
738
  bool looks_like_attribute = parser_lookahead_erb_is_attribute(&lexer_copy);
739
739
 
740
740
  if (looks_like_attribute) {
741
- array_append(children, parser_parse_html_attribute(parser));
741
+ hb_array_append(children, parser_parse_html_attribute(parser));
742
742
  } else {
743
- array_append(children, parser_parse_erb_tag(parser));
743
+ hb_array_append(children, parser_parse_erb_tag(parser));
744
744
  }
745
745
  }
746
746
 
747
- static void parser_handle_whitespace_in_open_tag(parser_T* parser, array_T* children) {
747
+ static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* children) {
748
748
  token_T* whitespace = parser_consume_if_present(parser, TOKEN_WHITESPACE);
749
749
 
750
750
  if (whitespace != NULL) {
@@ -758,8 +758,8 @@ static void parser_handle_whitespace_in_open_tag(parser_T* parser, array_T* chil
758
758
  }
759
759
 
760
760
  static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
761
- array_T* errors = array_init(8);
762
- array_T* children = array_init(8);
761
+ hb_array_T* errors = hb_array_init(8);
762
+ hb_array_T* children = hb_array_init(8);
763
763
 
764
764
  token_T* tag_start = parser_consume_expected(parser, TOKEN_HTML_TAG_START, errors);
765
765
  token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
@@ -771,7 +771,7 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
771
771
  }
772
772
 
773
773
  if (parser->current_token->type == TOKEN_IDENTIFIER) {
774
- array_append(children, parser_parse_html_attribute(parser));
774
+ hb_array_append(children, parser_parse_html_attribute(parser));
775
775
  continue;
776
776
  }
777
777
 
@@ -781,7 +781,7 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
781
781
  }
782
782
 
783
783
  if (parser->current_token->type == TOKEN_AT) {
784
- array_append(children, parser_parse_html_attribute(parser));
784
+ hb_array_append(children, parser_parse_html_attribute(parser));
785
785
  continue;
786
786
  }
787
787
 
@@ -791,7 +791,7 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
791
791
 
792
792
  if (next_token && next_token->type == TOKEN_IDENTIFIER) {
793
793
  token_free(next_token);
794
- array_append(children, parser_parse_html_attribute(parser));
794
+ hb_array_append(children, parser_parse_html_attribute(parser));
795
795
 
796
796
  continue;
797
797
  }
@@ -818,8 +818,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
818
818
  token_free(tag_start);
819
819
  token_free(tag_name);
820
820
 
821
- array_free(&children);
822
- array_free(&errors);
821
+ hb_array_free(&children);
822
+ hb_array_free(&errors);
823
823
 
824
824
  return NULL;
825
825
  }
@@ -846,8 +846,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
846
846
  }
847
847
 
848
848
  static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) {
849
- array_T* errors = array_init(8);
850
- array_T* children = array_init(8);
849
+ hb_array_T* errors = hb_array_init(8);
850
+ hb_array_T* children = hb_array_init(8);
851
851
 
852
852
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
853
853
 
@@ -859,21 +859,21 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
859
859
 
860
860
  token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
861
861
 
862
- if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) {
863
- char* expected = html_self_closing_tag_string(tag_name->value);
864
- char* got = html_closing_tag_string(tag_name->value);
862
+ if (tag_name != NULL && is_void_element(hb_string(tag_name->value)) && parser_in_svg_context(parser) == false) {
863
+ hb_string_T expected = html_self_closing_tag_string(hb_string(tag_name->value));
864
+ hb_string_T got = html_closing_tag_string(hb_string(tag_name->value));
865
865
 
866
866
  append_void_element_closing_tag_error(
867
867
  tag_name,
868
- expected,
869
- got,
868
+ expected.data,
869
+ got.data,
870
870
  tag_opening->location.start,
871
871
  tag_closing->location.end,
872
872
  errors
873
873
  );
874
874
 
875
- free(expected);
876
- free(got);
875
+ free(expected.data);
876
+ free(got.data);
877
877
  }
878
878
 
879
879
  AST_HTML_CLOSE_TAG_NODE_T* close_tag = ast_html_close_tag_node_init(
@@ -915,13 +915,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
915
915
  parser_T* parser,
916
916
  AST_HTML_OPEN_TAG_NODE_T* open_tag
917
917
  ) {
918
- array_T* errors = array_init(8);
919
- array_T* body = array_init(8);
918
+ hb_array_T* errors = hb_array_init(8);
919
+ hb_array_T* body = hb_array_init(8);
920
920
 
921
921
  parser_push_open_tag(parser, open_tag->tag_name);
922
922
 
923
- if (open_tag->tag_name->value && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
924
- foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value);
923
+ if (open_tag->tag_name->value && parser_is_foreign_content_tag(hb_string(open_tag->tag_name->value))) {
924
+ foreign_content_type_T content_type = parser_get_foreign_content_type(hb_string(open_tag->tag_name->value));
925
925
  parser_enter_foreign_content(parser, content_type);
926
926
  parser_parse_foreign_content(parser, body, errors);
927
927
  } else {
@@ -932,13 +932,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
932
932
 
933
933
  AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser);
934
934
 
935
- if (parser_in_svg_context(parser) == false && is_void_element(close_tag->tag_name->value)) {
936
- array_push(body, close_tag);
935
+ if (parser_in_svg_context(parser) == false && is_void_element(hb_string(close_tag->tag_name->value))) {
936
+ hb_array_push(body, close_tag);
937
937
  parser_parse_in_data_state(parser, body, errors);
938
938
  close_tag = parser_parse_html_close_tag(parser);
939
939
  }
940
940
 
941
- bool matches_stack = parser_check_matching_tag(parser, close_tag->tag_name->value);
941
+ bool matches_stack = parser_check_matching_tag(parser, hb_string(close_tag->tag_name->value));
942
942
 
943
943
  if (matches_stack) {
944
944
  token_T* popped_token = parser_pop_open_tag(parser);
@@ -960,39 +960,28 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
960
960
  );
961
961
  }
962
962
 
963
- static AST_HTML_ELEMENT_NODE_T* parser_parse_html_element(parser_T* parser) {
963
+ static AST_NODE_T* parser_parse_html_element(parser_T* parser) {
964
964
  AST_HTML_OPEN_TAG_NODE_T* open_tag = parser_parse_html_open_tag(parser);
965
965
 
966
966
  // <tag />
967
- if (open_tag->is_void) { return parser_parse_html_self_closing_element(parser, open_tag); }
967
+ if (open_tag->is_void) { return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag); }
968
968
 
969
969
  // <tag>, in void element list, and not in inside an <svg> element
970
- if (!open_tag->is_void && is_void_element(open_tag->tag_name->value) && !parser_in_svg_context(parser)) {
971
- return parser_parse_html_self_closing_element(parser, open_tag);
970
+ if (!open_tag->is_void && is_void_element(hb_string(open_tag->tag_name->value)) && !parser_in_svg_context(parser)) {
971
+ return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag);
972
972
  }
973
973
 
974
- AST_HTML_ELEMENT_NODE_T* regular_element = parser_parse_html_regular_element(parser, open_tag);
975
- if (regular_element != NULL) { return regular_element; }
974
+ if (open_tag->tag_name->value && parser_is_foreign_content_tag(hb_string(open_tag->tag_name->value))) {
975
+ AST_HTML_ELEMENT_NODE_T* regular_element = parser_parse_html_regular_element(parser, open_tag);
976
976
 
977
- array_T* errors = array_init(8);
978
-
979
- parser_append_unexpected_error(parser, "Unknown HTML open tag type", "HTMLOpenTag or HTMLSelfCloseTag", errors);
977
+ if (regular_element != NULL) { return (AST_NODE_T*) regular_element; }
978
+ }
980
979
 
981
- return ast_html_element_node_init(
982
- open_tag,
983
- open_tag->tag_name,
984
- NULL,
985
- NULL,
986
- false,
987
- ELEMENT_SOURCE_HTML,
988
- open_tag->base.location.start,
989
- open_tag->base.location.end,
990
- errors
991
- );
980
+ return (AST_NODE_T*) open_tag;
992
981
  }
993
982
 
994
983
  static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
995
- array_T* errors = array_init(8);
984
+ hb_array_T* errors = hb_array_init(8);
996
985
 
997
986
  token_T* opening_tag = parser_consume_expected(parser, TOKEN_ERB_START, errors);
998
987
  token_T* content = parser_consume_expected(parser, TOKEN_ERB_CONTENT, errors);
@@ -1017,14 +1006,15 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
1017
1006
  return erb_node;
1018
1007
  }
1019
1008
 
1020
- static void parser_parse_foreign_content(parser_T* parser, array_T* children, array_T* errors) {
1021
- buffer_T content = buffer_new();
1009
+ static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
1010
+ hb_buffer_T content;
1011
+ hb_buffer_init(&content, 1024);
1022
1012
  position_T start = parser->current_token->location.start;
1023
- const char* expected_closing_tag = parser_get_foreign_content_closing_tag(parser->foreign_content_type);
1013
+ hb_string_T expected_closing_tag = parser_get_foreign_content_closing_tag(parser->foreign_content_type);
1024
1014
 
1025
- if (expected_closing_tag == NULL) {
1015
+ if (hb_string_is_empty(expected_closing_tag)) {
1026
1016
  parser_exit_foreign_content(parser);
1027
- buffer_free(&content);
1017
+ free(content.value);
1028
1018
 
1029
1019
  return;
1030
1020
  }
@@ -1034,7 +1024,7 @@ static void parser_parse_foreign_content(parser_T* parser, array_T* children, ar
1034
1024
  parser_append_literal_node_from_buffer(parser, &content, children, start);
1035
1025
 
1036
1026
  AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
1037
- array_append(children, erb_node);
1027
+ hb_array_append(children, erb_node);
1038
1028
 
1039
1029
  start = parser->current_token->location.start;
1040
1030
 
@@ -1048,7 +1038,8 @@ static void parser_parse_foreign_content(parser_T* parser, array_T* children, ar
1048
1038
  bool is_potential_match = false;
1049
1039
 
1050
1040
  if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) {
1051
- is_potential_match = parser_is_expected_closing_tag_name(next_token->value, parser->foreign_content_type);
1041
+ is_potential_match =
1042
+ parser_is_expected_closing_tag_name(hb_string(next_token->value), parser->foreign_content_type);
1052
1043
  }
1053
1044
 
1054
1045
  lexer_restore_state(parser->lexer, saved_state);
@@ -1059,51 +1050,57 @@ static void parser_parse_foreign_content(parser_T* parser, array_T* children, ar
1059
1050
  parser_append_literal_node_from_buffer(parser, &content, children, start);
1060
1051
  parser_exit_foreign_content(parser);
1061
1052
 
1062
- buffer_free(&content);
1053
+ free(content.value);
1063
1054
 
1064
1055
  return;
1065
1056
  }
1066
1057
  }
1067
1058
 
1068
1059
  token_T* token = parser_advance(parser);
1069
- buffer_append(&content, token->value);
1060
+ hb_buffer_append(&content, token->value);
1070
1061
  token_free(token);
1071
1062
  }
1072
1063
 
1073
1064
  parser_append_literal_node_from_buffer(parser, &content, children, start);
1074
1065
  parser_exit_foreign_content(parser);
1075
- buffer_free(&content);
1066
+ free(content.value);
1076
1067
  }
1077
1068
 
1078
- static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors) {
1079
- while (token_is_none_of(parser, TOKEN_HTML_TAG_START_CLOSE, TOKEN_EOF)) {
1069
+ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
1070
+ while (token_is_not(parser, TOKEN_EOF)) {
1071
+
1080
1072
  if (token_is(parser, TOKEN_ERB_START)) {
1081
- array_append(children, parser_parse_erb_tag(parser));
1073
+ hb_array_append(children, parser_parse_erb_tag(parser));
1082
1074
  continue;
1083
1075
  }
1084
1076
 
1085
1077
  if (token_is(parser, TOKEN_HTML_DOCTYPE)) {
1086
- array_append(children, parser_parse_html_doctype(parser));
1078
+ hb_array_append(children, parser_parse_html_doctype(parser));
1087
1079
  continue;
1088
1080
  }
1089
1081
 
1090
1082
  if (token_is(parser, TOKEN_XML_DECLARATION)) {
1091
- array_append(children, parser_parse_xml_declaration(parser));
1083
+ hb_array_append(children, parser_parse_xml_declaration(parser));
1092
1084
  continue;
1093
1085
  }
1094
1086
 
1095
1087
  if (token_is(parser, TOKEN_CDATA_START)) {
1096
- array_append(children, parser_parse_cdata(parser));
1088
+ hb_array_append(children, parser_parse_cdata(parser));
1097
1089
  continue;
1098
1090
  }
1099
1091
 
1100
1092
  if (token_is(parser, TOKEN_HTML_COMMENT_START)) {
1101
- array_append(children, parser_parse_html_comment(parser));
1093
+ hb_array_append(children, parser_parse_html_comment(parser));
1102
1094
  continue;
1103
1095
  }
1104
1096
 
1105
1097
  if (token_is(parser, TOKEN_HTML_TAG_START)) {
1106
- array_append(children, parser_parse_html_element(parser));
1098
+ hb_array_append(children, parser_parse_html_element(parser));
1099
+ continue;
1100
+ }
1101
+
1102
+ if (token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
1103
+ hb_array_append(children, parser_parse_html_close_tag(parser));
1107
1104
  continue;
1108
1105
  }
1109
1106
 
@@ -1111,6 +1108,7 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
1111
1108
  parser,
1112
1109
  TOKEN_AMPERSAND,
1113
1110
  TOKEN_AT,
1111
+ TOKEN_BACKSLASH,
1114
1112
  TOKEN_BACKTICK,
1115
1113
  TOKEN_CHARACTER,
1116
1114
  TOKEN_COLON,
@@ -1127,7 +1125,7 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
1127
1125
  TOKEN_UNDERSCORE,
1128
1126
  TOKEN_WHITESPACE
1129
1127
  )) {
1130
- array_append(children, parser_parse_text_content(parser, errors));
1128
+ hb_array_append(children, parser_parse_text_content(parser, errors));
1131
1129
  continue;
1132
1130
  }
1133
1131
 
@@ -1135,63 +1133,121 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
1135
1133
  parser,
1136
1134
  "Unexpected token",
1137
1135
  "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
1138
- "TOKEN_NBSP, TOKEN_AT, or TOKEN_NEWLINE",
1136
+ "TOKEN_NBSP, TOKEN_AT, TOKEN_BACKSLASH, or TOKEN_NEWLINE",
1139
1137
  errors
1140
1138
  );
1141
1139
  }
1142
1140
  }
1143
1141
 
1144
- static void parser_parse_unclosed_html_tags(const parser_T* parser, array_T* errors) {
1145
- while (array_size(parser->open_tags_stack) > 0) {
1146
- token_T* unclosed_tag = parser_pop_open_tag(parser);
1142
+ static size_t find_matching_close_tag(hb_array_T* nodes, size_t start_idx, hb_string_T tag_name) {
1143
+ int depth = 0;
1147
1144
 
1148
- append_unclosed_element_error(
1149
- unclosed_tag,
1150
- parser->current_token->location.start,
1151
- parser->current_token->location.end,
1152
- errors
1153
- );
1145
+ for (size_t i = start_idx + 1; i < hb_array_size(nodes); i++) {
1146
+ AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
1147
+ if (node == NULL) { continue; }
1148
+
1149
+ if (node->type == AST_HTML_OPEN_TAG_NODE) {
1150
+ AST_HTML_OPEN_TAG_NODE_T* open = (AST_HTML_OPEN_TAG_NODE_T*) node;
1154
1151
 
1155
- token_free(unclosed_tag);
1152
+ if (hb_string_equals(hb_string(open->tag_name->value), tag_name)) { depth++; }
1153
+ } else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
1154
+ AST_HTML_CLOSE_TAG_NODE_T* close = (AST_HTML_CLOSE_TAG_NODE_T*) node;
1155
+
1156
+ if (hb_string_equals(hb_string(close->tag_name->value), tag_name)) {
1157
+ if (depth == 0) { return i; }
1158
+ depth--;
1159
+ }
1160
+ }
1156
1161
  }
1162
+
1163
+ return (size_t) -1;
1157
1164
  }
1158
1165
 
1159
- static void parser_parse_stray_closing_tags(parser_T* parser, array_T* children, array_T* errors) {
1160
- while (token_is_not(parser, TOKEN_EOF)) {
1161
- if (token_is_not(parser, TOKEN_HTML_TAG_START_CLOSE)) {
1162
- parser_append_unexpected_token_error(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
1166
+ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors);
1163
1167
 
1164
- token_T* unexpected = parser_advance(parser);
1165
- token_free(unexpected);
1168
+ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors) {
1169
+ hb_array_T* result = hb_array_init(hb_array_size(nodes));
1166
1170
 
1167
- continue;
1168
- }
1171
+ for (size_t index = 0; index < hb_array_size(nodes); index++) {
1172
+ AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, index);
1173
+ if (node == NULL) { continue; }
1169
1174
 
1170
- AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser);
1175
+ if (node->type == AST_HTML_OPEN_TAG_NODE) {
1176
+ AST_HTML_OPEN_TAG_NODE_T* open_tag = (AST_HTML_OPEN_TAG_NODE_T*) node;
1177
+ hb_string_T tag_name = hb_string(open_tag->tag_name->value);
1171
1178
 
1172
- if (!is_void_element(close_tag->tag_name->value)) {
1173
- append_missing_opening_tag_error(
1174
- close_tag->tag_name,
1175
- close_tag->base.location.start,
1176
- close_tag->base.location.end,
1177
- close_tag->base.errors
1178
- );
1179
- }
1179
+ size_t close_index = find_matching_close_tag(nodes, index, tag_name);
1180
+
1181
+ if (close_index == (size_t) -1) {
1182
+ if (hb_array_size(open_tag->base.errors) == 0) {
1183
+ append_missing_closing_tag_error(
1184
+ open_tag->tag_name,
1185
+ open_tag->base.location.start,
1186
+ open_tag->base.location.end,
1187
+ open_tag->base.errors
1188
+ );
1189
+ }
1190
+
1191
+ hb_array_append(result, node);
1192
+ } else {
1193
+ AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) hb_array_get(nodes, close_index);
1180
1194
 
1181
- array_append(children, close_tag);
1195
+ hb_array_T* body = hb_array_init(close_index - index - 1);
1182
1196
 
1183
- parser_parse_in_data_state(parser, children, errors);
1197
+ for (size_t j = index + 1; j < close_index; j++) {
1198
+ hb_array_append(body, hb_array_get(nodes, j));
1199
+ }
1200
+
1201
+ hb_array_T* processed_body = parser_build_elements_from_tags(body, errors);
1202
+ hb_array_free(&body);
1203
+
1204
+ hb_array_T* element_errors = hb_array_init(8);
1205
+
1206
+ AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init(
1207
+ open_tag,
1208
+ open_tag->tag_name,
1209
+ processed_body,
1210
+ close_tag,
1211
+ false,
1212
+ ELEMENT_SOURCE_HTML,
1213
+ open_tag->base.location.start,
1214
+ close_tag->base.location.end,
1215
+ element_errors
1216
+ );
1217
+
1218
+ hb_array_append(result, element);
1219
+
1220
+ index = close_index;
1221
+ }
1222
+ } else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
1223
+ AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) node;
1224
+
1225
+ if (!is_void_element(hb_string(close_tag->tag_name->value))) {
1226
+ if (hb_array_size(close_tag->base.errors) == 0) {
1227
+ append_missing_opening_tag_error(
1228
+ close_tag->tag_name,
1229
+ close_tag->base.location.start,
1230
+ close_tag->base.location.end,
1231
+ close_tag->base.errors
1232
+ );
1233
+ }
1234
+ }
1235
+
1236
+ hb_array_append(result, node);
1237
+ } else {
1238
+ hb_array_append(result, node);
1239
+ }
1184
1240
  }
1241
+
1242
+ return result;
1185
1243
  }
1186
1244
 
1187
1245
  static AST_DOCUMENT_NODE_T* parser_parse_document(parser_T* parser) {
1188
- array_T* children = array_init(8);
1189
- array_T* errors = array_init(8);
1246
+ hb_array_T* children = hb_array_init(8);
1247
+ hb_array_T* errors = hb_array_init(8);
1190
1248
  position_T start = parser->current_token->location.start;
1191
1249
 
1192
1250
  parser_parse_in_data_state(parser, children, errors);
1193
- parser_parse_unclosed_html_tags(parser, errors);
1194
- parser_parse_stray_closing_tags(parser, children, errors);
1195
1251
 
1196
1252
  token_T* eof = parser_consume_expected(parser, TOKEN_EOF, errors);
1197
1253
 
@@ -1206,26 +1262,26 @@ AST_DOCUMENT_NODE_T* herb_parser_parse(parser_T* parser) {
1206
1262
  return parser_parse_document(parser);
1207
1263
  }
1208
1264
 
1209
- static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, array_T* children) {
1210
- if (parser->options && parser->options->track_whitespace) {
1211
- array_T* errors = array_init(8);
1265
+ static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, hb_array_T* children) {
1266
+ if (parser->options.track_whitespace) {
1267
+ hb_array_T* errors = hb_array_init(8);
1212
1268
  AST_WHITESPACE_NODE_T* whitespace_node = ast_whitespace_node_init(
1213
1269
  whitespace_token,
1214
1270
  whitespace_token->location.start,
1215
1271
  whitespace_token->location.end,
1216
1272
  errors
1217
1273
  );
1218
- array_append(children, whitespace_node);
1274
+ hb_array_append(children, whitespace_node);
1219
1275
  }
1220
1276
 
1221
1277
  token_free(whitespace_token);
1222
1278
  }
1223
1279
 
1224
- static void parser_consume_whitespace(parser_T* parser, array_T* children) {
1280
+ static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
1225
1281
  while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
1226
1282
  token_T* whitespace = parser_advance(parser);
1227
1283
 
1228
- if (parser->options && parser->options->track_whitespace && children != NULL) {
1284
+ if (parser->options.track_whitespace && children != NULL) {
1229
1285
  parser_handle_whitespace(parser, whitespace, children);
1230
1286
  } else {
1231
1287
  token_free(whitespace);
@@ -1233,13 +1289,38 @@ static void parser_consume_whitespace(parser_T* parser, array_T* children) {
1233
1289
  }
1234
1290
  }
1235
1291
 
1236
- void parser_free(parser_T* parser) {
1292
+ void herb_parser_deinit(parser_T* parser) {
1237
1293
  if (parser == NULL) { return; }
1238
1294
 
1239
- if (parser->lexer != NULL) { lexer_free(parser->lexer); }
1240
1295
  if (parser->current_token != NULL) { token_free(parser->current_token); }
1241
- if (parser->open_tags_stack != NULL) { array_free(&parser->open_tags_stack); }
1242
- if (parser->options != NULL) { free(parser->options); }
1296
+ if (parser->open_tags_stack != NULL) { hb_array_free(&parser->open_tags_stack); }
1297
+ }
1298
+
1299
+ void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors) {
1300
+ if (nodes == NULL || hb_array_size(nodes) == 0) { return; }
1301
+
1302
+ hb_array_T* processed = parser_build_elements_from_tags(nodes, errors);
1303
+
1304
+ while (hb_array_size(nodes) > 0) {
1305
+ hb_array_remove(nodes, 0);
1306
+ }
1307
+
1308
+ for (size_t i = 0; i < hb_array_size(processed); i++) {
1309
+ hb_array_append(nodes, hb_array_get(processed, i));
1310
+ }
1311
+
1312
+ hb_array_free(&processed);
1313
+
1314
+ for (size_t i = 0; i < hb_array_size(nodes); i++) {
1315
+ AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
1316
+ if (node == NULL) { continue; }
1317
+
1318
+ herb_visit_node(node, match_tags_visitor, errors);
1319
+ }
1320
+ }
1321
+
1322
+ void herb_parser_match_html_tags_post_analyze(AST_DOCUMENT_NODE_T* document) {
1323
+ if (document == NULL) { return; }
1243
1324
 
1244
- free(parser);
1325
+ match_tags_in_node_array(document->children, document->base.errors);
1245
1326
  }