@herb-tools/node 0.8.10 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/binding.gyp +26 -8
  3. package/dist/herb-node.cjs +41 -12
  4. package/dist/herb-node.cjs.map +1 -1
  5. package/dist/herb-node.esm.js +8 -1
  6. package/dist/herb-node.esm.js.map +1 -1
  7. package/dist/types/node-backend.d.ts +3 -1
  8. package/extension/error_helpers.cpp +395 -73
  9. package/extension/error_helpers.h +13 -3
  10. package/extension/extension_helpers.cpp +38 -35
  11. package/extension/extension_helpers.h +2 -2
  12. package/extension/herb.cpp +183 -64
  13. package/extension/libherb/analyze/action_view/attribute_extraction_helpers.c +290 -0
  14. package/extension/libherb/analyze/action_view/attribute_extraction_helpers.h +36 -0
  15. package/extension/libherb/analyze/action_view/content_tag.c +70 -0
  16. package/extension/libherb/analyze/action_view/link_to.c +143 -0
  17. package/extension/libherb/analyze/action_view/registry.c +60 -0
  18. package/extension/libherb/analyze/action_view/tag.c +64 -0
  19. package/extension/libherb/analyze/action_view/tag_helper_handler.h +41 -0
  20. package/extension/libherb/analyze/action_view/tag_helper_node_builders.c +305 -0
  21. package/extension/libherb/analyze/action_view/tag_helper_node_builders.h +70 -0
  22. package/extension/libherb/analyze/action_view/tag_helpers.c +748 -0
  23. package/extension/libherb/analyze/action_view/tag_helpers.h +38 -0
  24. package/extension/libherb/analyze/action_view/turbo_frame_tag.c +88 -0
  25. package/extension/libherb/analyze/analyze.c +882 -0
  26. package/extension/libherb/{include → analyze}/analyze.h +14 -4
  27. package/extension/libherb/{analyzed_ruby.c → analyze/analyzed_ruby.c} +13 -11
  28. package/extension/libherb/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
  29. package/extension/libherb/analyze/builders.c +343 -0
  30. package/extension/libherb/analyze/builders.h +27 -0
  31. package/extension/libherb/analyze/conditional_elements.c +594 -0
  32. package/extension/libherb/analyze/conditional_elements.h +9 -0
  33. package/extension/libherb/analyze/conditional_open_tags.c +640 -0
  34. package/extension/libherb/analyze/conditional_open_tags.h +9 -0
  35. package/extension/libherb/analyze/control_type.c +250 -0
  36. package/extension/libherb/analyze/control_type.h +14 -0
  37. package/extension/libherb/{analyze_helpers.c → analyze/helpers.c} +48 -23
  38. package/extension/libherb/{analyze_helpers.h → analyze/helpers.h} +4 -2
  39. package/extension/libherb/analyze/invalid_structures.c +193 -0
  40. package/extension/libherb/analyze/invalid_structures.h +11 -0
  41. package/extension/libherb/{analyze_missing_end.c → analyze/missing_end.c} +33 -22
  42. package/extension/libherb/analyze/parse_errors.c +84 -0
  43. package/extension/libherb/analyze/prism_annotate.c +397 -0
  44. package/extension/libherb/analyze/prism_annotate.h +16 -0
  45. package/extension/libherb/{analyze_transform.c → analyze/transform.c} +17 -3
  46. package/extension/libherb/ast_node.c +17 -7
  47. package/extension/libherb/ast_node.h +11 -5
  48. package/extension/libherb/ast_nodes.c +663 -388
  49. package/extension/libherb/ast_nodes.h +118 -39
  50. package/extension/libherb/ast_pretty_print.c +191 -7
  51. package/extension/libherb/ast_pretty_print.h +6 -1
  52. package/extension/libherb/element_source.h +3 -8
  53. package/extension/libherb/errors.c +1077 -521
  54. package/extension/libherb/errors.h +149 -56
  55. package/extension/libherb/extract.c +145 -49
  56. package/extension/libherb/extract.h +21 -5
  57. package/extension/libherb/herb.c +52 -34
  58. package/extension/libherb/herb.h +18 -6
  59. package/extension/libherb/herb_prism_node.h +13 -0
  60. package/extension/libherb/html_util.c +241 -12
  61. package/extension/libherb/html_util.h +7 -2
  62. package/extension/libherb/include/analyze/action_view/attribute_extraction_helpers.h +36 -0
  63. package/extension/libherb/include/analyze/action_view/tag_helper_handler.h +41 -0
  64. package/extension/libherb/include/analyze/action_view/tag_helper_node_builders.h +70 -0
  65. package/extension/libherb/include/analyze/action_view/tag_helpers.h +38 -0
  66. package/extension/libherb/{analyze.h → include/analyze/analyze.h} +14 -4
  67. package/extension/libherb/include/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
  68. package/extension/libherb/include/analyze/builders.h +27 -0
  69. package/extension/libherb/include/analyze/conditional_elements.h +9 -0
  70. package/extension/libherb/include/analyze/conditional_open_tags.h +9 -0
  71. package/extension/libherb/include/analyze/control_type.h +14 -0
  72. package/extension/libherb/include/{analyze_helpers.h → analyze/helpers.h} +4 -2
  73. package/extension/libherb/include/analyze/invalid_structures.h +11 -0
  74. package/extension/libherb/include/analyze/prism_annotate.h +16 -0
  75. package/extension/libherb/include/ast_node.h +11 -5
  76. package/extension/libherb/include/ast_nodes.h +118 -39
  77. package/extension/libherb/include/ast_pretty_print.h +6 -1
  78. package/extension/libherb/include/element_source.h +3 -8
  79. package/extension/libherb/include/errors.h +149 -56
  80. package/extension/libherb/include/extract.h +21 -5
  81. package/extension/libherb/include/herb.h +18 -6
  82. package/extension/libherb/include/herb_prism_node.h +13 -0
  83. package/extension/libherb/include/html_util.h +7 -2
  84. package/extension/libherb/include/io.h +3 -1
  85. package/extension/libherb/include/lex_helpers.h +29 -0
  86. package/extension/libherb/include/lexer.h +1 -1
  87. package/extension/libherb/include/lexer_peek_helpers.h +87 -13
  88. package/extension/libherb/include/lexer_struct.h +2 -0
  89. package/extension/libherb/include/location.h +2 -1
  90. package/extension/libherb/include/parser.h +27 -2
  91. package/extension/libherb/include/parser_helpers.h +19 -3
  92. package/extension/libherb/include/pretty_print.h +10 -5
  93. package/extension/libherb/include/prism_context.h +45 -0
  94. package/extension/libherb/include/prism_helpers.h +10 -7
  95. package/extension/libherb/include/prism_serialized.h +12 -0
  96. package/extension/libherb/include/token.h +16 -4
  97. package/extension/libherb/include/token_struct.h +10 -3
  98. package/extension/libherb/include/utf8.h +2 -1
  99. package/extension/libherb/include/util/hb_allocator.h +78 -0
  100. package/extension/libherb/include/util/hb_arena.h +6 -1
  101. package/extension/libherb/include/util/hb_arena_debug.h +12 -1
  102. package/extension/libherb/include/util/hb_array.h +7 -3
  103. package/extension/libherb/include/util/hb_buffer.h +6 -4
  104. package/extension/libherb/include/util/hb_foreach.h +79 -0
  105. package/extension/libherb/include/util/hb_narray.h +8 -4
  106. package/extension/libherb/include/util/hb_string.h +56 -9
  107. package/extension/libherb/include/util.h +6 -3
  108. package/extension/libherb/include/version.h +1 -1
  109. package/extension/libherb/io.c +3 -2
  110. package/extension/libherb/io.h +3 -1
  111. package/extension/libherb/lex_helpers.h +29 -0
  112. package/extension/libherb/lexer.c +42 -30
  113. package/extension/libherb/lexer.h +1 -1
  114. package/extension/libherb/lexer_peek_helpers.c +12 -74
  115. package/extension/libherb/lexer_peek_helpers.h +87 -13
  116. package/extension/libherb/lexer_struct.h +2 -0
  117. package/extension/libherb/location.c +2 -2
  118. package/extension/libherb/location.h +2 -1
  119. package/extension/libherb/main.c +53 -28
  120. package/extension/libherb/parser.c +783 -247
  121. package/extension/libherb/parser.h +27 -2
  122. package/extension/libherb/parser_helpers.c +110 -23
  123. package/extension/libherb/parser_helpers.h +19 -3
  124. package/extension/libherb/parser_match_tags.c +110 -49
  125. package/extension/libherb/pretty_print.c +29 -24
  126. package/extension/libherb/pretty_print.h +10 -5
  127. package/extension/libherb/prism_context.h +45 -0
  128. package/extension/libherb/prism_helpers.c +30 -27
  129. package/extension/libherb/prism_helpers.h +10 -7
  130. package/extension/libherb/prism_serialized.h +12 -0
  131. package/extension/libherb/ruby_parser.c +2 -0
  132. package/extension/libherb/token.c +151 -66
  133. package/extension/libherb/token.h +16 -4
  134. package/extension/libherb/token_matchers.c +0 -1
  135. package/extension/libherb/token_struct.h +10 -3
  136. package/extension/libherb/utf8.c +7 -6
  137. package/extension/libherb/utf8.h +2 -1
  138. package/extension/libherb/util/hb_allocator.c +341 -0
  139. package/extension/libherb/util/hb_allocator.h +78 -0
  140. package/extension/libherb/util/hb_arena.c +81 -56
  141. package/extension/libherb/util/hb_arena.h +6 -1
  142. package/extension/libherb/util/hb_arena_debug.c +32 -17
  143. package/extension/libherb/util/hb_arena_debug.h +12 -1
  144. package/extension/libherb/util/hb_array.c +30 -15
  145. package/extension/libherb/util/hb_array.h +7 -3
  146. package/extension/libherb/util/hb_buffer.c +17 -21
  147. package/extension/libherb/util/hb_buffer.h +6 -4
  148. package/extension/libherb/util/hb_foreach.h +79 -0
  149. package/extension/libherb/util/hb_narray.c +22 -7
  150. package/extension/libherb/util/hb_narray.h +8 -4
  151. package/extension/libherb/util/hb_string.c +49 -35
  152. package/extension/libherb/util/hb_string.h +56 -9
  153. package/extension/libherb/util.c +21 -11
  154. package/extension/libherb/util.h +6 -3
  155. package/extension/libherb/version.h +1 -1
  156. package/extension/libherb/visitor.c +48 -1
  157. package/extension/nodes.cpp +451 -6
  158. package/extension/nodes.h +8 -1
  159. package/package.json +12 -8
  160. package/src/node-backend.ts +11 -1
  161. package/dist/types/index-cjs.d.cts +0 -1
  162. package/extension/libherb/analyze.c +0 -1608
  163. package/extension/libherb/element_source.c +0 -12
  164. package/extension/libherb/include/util/hb_system.h +0 -9
  165. package/extension/libherb/util/hb_system.c +0 -30
  166. package/extension/libherb/util/hb_system.h +0 -9
  167. package/src/index-cjs.cts +0 -22
  168. /package/dist/types/{index-esm.d.mts → index.d.ts} +0 -0
  169. /package/src/{index-esm.mts → index.ts} +0 -0
@@ -20,6 +20,8 @@
20
20
  #include <string.h>
21
21
  #include <strings.h>
22
22
 
23
+ #define MAX_CONSECUTIVE_ERRORS 10
24
+
23
25
  static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors);
24
26
  static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors);
25
27
  static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
@@ -27,29 +29,39 @@ static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token
27
29
  static void parser_consume_whitespace(parser_T* parser, hb_array_T* children);
28
30
  static void parser_skip_erb_content(lexer_T* lexer);
29
31
  static bool parser_lookahead_erb_is_attribute(lexer_T* lexer);
32
+ static bool parser_lookahead_erb_is_control_flow(parser_T* parser);
30
33
  static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children);
31
34
  static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* children);
32
35
 
33
- const parser_options_T HERB_DEFAULT_PARSER_OPTIONS = { .track_whitespace = false };
36
+ const parser_options_T HERB_DEFAULT_PARSER_OPTIONS = { .track_whitespace = false,
37
+ .analyze = true,
38
+ .strict = true,
39
+ .action_view_helpers = false,
40
+ .prism_nodes_deep = false,
41
+ .prism_nodes = false,
42
+ .prism_program = false };
34
43
 
35
44
  size_t parser_sizeof(void) {
36
45
  return sizeof(struct PARSER_STRUCT);
37
46
  }
38
47
 
39
48
  void herb_parser_init(parser_T* parser, lexer_T* lexer, parser_options_T options) {
49
+ parser->allocator = lexer->allocator;
40
50
  parser->lexer = lexer;
41
51
  parser->current_token = lexer_next_token(lexer);
42
- parser->open_tags_stack = hb_array_init(16);
52
+ parser->open_tags_stack = hb_array_init(16, parser->allocator);
43
53
  parser->state = PARSER_STATE_DATA;
44
54
  parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
45
55
  parser->options = options;
56
+ parser->consecutive_error_count = 0;
57
+ parser->in_recovery_mode = false;
46
58
  }
47
59
 
48
60
  static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
49
- hb_array_T* errors = hb_array_init(8);
50
- hb_array_T* children = hb_array_init(8);
61
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
62
+ hb_array_T* children = hb_array_init(8, parser->allocator);
51
63
  hb_buffer_T content;
52
- hb_buffer_init(&content, 128);
64
+ hb_buffer_init(&content, 128, parser->allocator);
53
65
 
54
66
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_CDATA_START, errors);
55
67
  position_T start = parser->current_token->location.start;
@@ -64,8 +76,8 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
64
76
  }
65
77
 
66
78
  token_T* token = parser_advance(parser);
67
- hb_buffer_append(&content, token->value);
68
- token_free(token);
79
+ hb_buffer_append_string(&content, token->value);
80
+ token_free(token, parser->allocator);
69
81
  }
70
82
 
71
83
  parser_append_literal_node_from_buffer(parser, &content, children, start);
@@ -77,26 +89,27 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
77
89
  tag_closing,
78
90
  tag_opening->location.start,
79
91
  tag_closing->location.end,
80
- errors
92
+ errors,
93
+ parser->allocator
81
94
  );
82
95
 
83
- free(content.value);
84
- token_free(tag_opening);
85
- token_free(tag_closing);
96
+ hb_buffer_free(&content);
97
+ token_free(tag_opening, parser->allocator);
98
+ token_free(tag_closing, parser->allocator);
86
99
 
87
100
  return cdata;
88
101
  }
89
102
 
90
103
  static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
91
- hb_array_T* errors = hb_array_init(8);
92
- hb_array_T* children = hb_array_init(8);
104
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
105
+ hb_array_T* children = hb_array_init(8, parser->allocator);
93
106
  token_T* comment_start = parser_consume_expected(parser, TOKEN_HTML_COMMENT_START, errors);
94
107
  position_T start = parser->current_token->location.start;
95
108
 
96
109
  hb_buffer_T comment;
97
- hb_buffer_init(&comment, 512);
110
+ hb_buffer_init(&comment, 512, parser->allocator);
98
111
 
99
- while (token_is_none_of(parser, TOKEN_HTML_COMMENT_END, TOKEN_EOF)) {
112
+ while (token_is_none_of(parser, TOKEN_HTML_COMMENT_END, TOKEN_HTML_COMMENT_INVALID_END, TOKEN_EOF)) {
100
113
  if (token_is(parser, TOKEN_ERB_START)) {
101
114
  parser_append_literal_node_from_buffer(parser, &comment, children, start);
102
115
 
@@ -109,13 +122,26 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
109
122
  }
110
123
 
111
124
  token_T* token = parser_advance(parser);
112
- hb_buffer_append(&comment, token->value);
113
- token_free(token);
125
+ hb_buffer_append_string(&comment, token->value);
126
+ token_free(token, parser->allocator);
114
127
  }
115
128
 
116
129
  parser_append_literal_node_from_buffer(parser, &comment, children, start);
117
130
 
118
- token_T* comment_end = parser_consume_expected(parser, TOKEN_HTML_COMMENT_END, errors);
131
+ token_T* comment_end = NULL;
132
+
133
+ if (token_is(parser, TOKEN_HTML_COMMENT_INVALID_END)) {
134
+ comment_end = parser_advance(parser);
135
+ append_invalid_comment_closing_tag_error(
136
+ comment_end,
137
+ comment_end->location.start,
138
+ comment_end->location.end,
139
+ parser->allocator,
140
+ errors
141
+ );
142
+ } else {
143
+ comment_end = parser_consume_expected(parser, TOKEN_HTML_COMMENT_END, errors);
144
+ }
119
145
 
120
146
  AST_HTML_COMMENT_NODE_T* comment_node = ast_html_comment_node_init(
121
147
  comment_start,
@@ -123,21 +149,22 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
123
149
  comment_end,
124
150
  comment_start->location.start,
125
151
  comment_end->location.end,
126
- errors
152
+ errors,
153
+ parser->allocator
127
154
  );
128
155
 
129
- free(comment.value);
130
- token_free(comment_start);
131
- token_free(comment_end);
156
+ hb_buffer_free(&comment);
157
+ token_free(comment_start, parser->allocator);
158
+ token_free(comment_end, parser->allocator);
132
159
 
133
160
  return comment_node;
134
161
  }
135
162
 
136
163
  static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
137
- hb_array_T* errors = hb_array_init(8);
138
- hb_array_T* children = hb_array_init(8);
164
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
165
+ hb_array_T* children = hb_array_init(8, parser->allocator);
139
166
  hb_buffer_T content;
140
- hb_buffer_init(&content, 64);
167
+ hb_buffer_init(&content, 64, parser->allocator);
141
168
 
142
169
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_DOCTYPE, errors);
143
170
 
@@ -154,8 +181,8 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
154
181
  }
155
182
 
156
183
  token_T* token = parser_consume_expected(parser, parser->current_token->type, errors);
157
- hb_buffer_append(&content, token->value);
158
- token_free(token);
184
+ hb_buffer_append_string(&content, token->value);
185
+ token_free(token, parser->allocator);
159
186
  }
160
187
 
161
188
  parser_append_literal_node_from_buffer(parser, &content, children, start);
@@ -168,21 +195,22 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
168
195
  tag_closing,
169
196
  tag_opening->location.start,
170
197
  tag_closing->location.end,
171
- errors
198
+ errors,
199
+ parser->allocator
172
200
  );
173
201
 
174
- token_free(tag_opening);
175
- token_free(tag_closing);
176
- free(content.value);
202
+ token_free(tag_opening, parser->allocator);
203
+ token_free(tag_closing, parser->allocator);
204
+ hb_buffer_free(&content);
177
205
 
178
206
  return doctype;
179
207
  }
180
208
 
181
209
  static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser) {
182
- hb_array_T* errors = hb_array_init(8);
183
- hb_array_T* children = hb_array_init(8);
210
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
211
+ hb_array_T* children = hb_array_init(8, parser->allocator);
184
212
  hb_buffer_T content;
185
- hb_buffer_init(&content, 64);
213
+ hb_buffer_init(&content, 64, parser->allocator);
186
214
 
187
215
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_XML_DECLARATION, errors);
188
216
 
@@ -201,8 +229,8 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
201
229
  }
202
230
 
203
231
  token_T* token = parser_advance(parser);
204
- hb_buffer_append(&content, token->value);
205
- token_free(token);
232
+ hb_buffer_append_string(&content, token->value);
233
+ token_free(token, parser->allocator);
206
234
  }
207
235
 
208
236
  parser_append_literal_node_from_buffer(parser, &content, children, start);
@@ -215,12 +243,13 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
215
243
  tag_closing,
216
244
  tag_opening->location.start,
217
245
  tag_closing->location.end,
218
- errors
246
+ errors,
247
+ parser->allocator
219
248
  );
220
249
 
221
- token_free(tag_opening);
222
- token_free(tag_closing);
223
- free(content.value);
250
+ token_free(tag_opening, parser->allocator);
251
+ token_free(tag_closing, parser->allocator);
252
+ hb_buffer_free(&content);
224
253
 
225
254
  return xml_declaration;
226
255
  }
@@ -229,7 +258,7 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra
229
258
  position_T start = parser->current_token->location.start;
230
259
 
231
260
  hb_buffer_T content;
232
- hb_buffer_init(&content, 2048);
261
+ hb_buffer_init(&content, 2048, parser->allocator);
233
262
 
234
263
  while (token_is_none_of(
235
264
  parser,
@@ -241,49 +270,66 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra
241
270
  TOKEN_EOF
242
271
  )) {
243
272
  if (token_is(parser, TOKEN_ERROR)) {
244
- free(content.value);
273
+ hb_buffer_free(&content);
245
274
 
246
- token_T* token = parser_consume_expected(parser, TOKEN_ERROR, document_errors);
247
- append_unexpected_error(
248
- "Token Error",
249
- "not TOKEN_ERROR",
250
- token->value,
251
- token->location.start,
252
- token->location.end,
253
- document_errors
254
- );
255
-
256
- token_free(token);
275
+ parser_append_unexpected_error_string(parser, document_errors, "Token Error", "not an error token");
257
276
 
258
277
  return NULL;
259
278
  }
260
279
 
280
+ if (parser->options.strict && parser->current_token->type == TOKEN_PERCENT) {
281
+ lexer_T lexer_copy = *parser->lexer;
282
+ token_T* peek_token = lexer_next_token(&lexer_copy);
283
+
284
+ if (peek_token && peek_token->type == TOKEN_HTML_TAG_END) {
285
+ position_T stray_start = parser->current_token->location.start;
286
+ position_T stray_end = peek_token->location.end;
287
+ token_free(peek_token, parser->allocator);
288
+
289
+ append_stray_erb_closing_tag_error(stray_start, stray_end, parser->allocator, document_errors);
290
+
291
+ token_T* percent = parser_advance(parser);
292
+ hb_buffer_append_string(&content, percent->value);
293
+ token_free(percent, parser->allocator);
294
+
295
+ token_T* gt = parser_advance(parser);
296
+ hb_buffer_append_string(&content, gt->value);
297
+ token_free(gt, parser->allocator);
298
+
299
+ continue;
300
+ }
301
+
302
+ token_free(peek_token, parser->allocator);
303
+ }
304
+
261
305
  token_T* token = parser_advance(parser);
262
- hb_buffer_append(&content, token->value);
263
- token_free(token);
306
+ hb_buffer_append_string(&content, token->value);
307
+ token_free(token, parser->allocator);
264
308
  }
265
309
 
266
- hb_array_T* errors = hb_array_init(8);
310
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
267
311
 
268
312
  AST_HTML_TEXT_NODE_T* text_node = NULL;
269
313
 
270
314
  if (hb_buffer_length(&content) > 0) {
315
+ hb_string_T text_content = { .data = content.value, .length = (uint32_t) content.length };
271
316
  text_node =
272
- ast_html_text_node_init(hb_buffer_value(&content), start, parser->current_token->location.start, errors);
317
+ ast_html_text_node_init(text_content, start, parser->current_token->location.start, errors, parser->allocator);
273
318
  } else {
274
- text_node = ast_html_text_node_init("", start, parser->current_token->location.start, errors);
319
+ text_node =
320
+ ast_html_text_node_init(HB_STRING_EMPTY, start, parser->current_token->location.start, errors, parser->allocator);
275
321
  }
276
322
 
277
- free(content.value);
323
+ hb_buffer_free(&content);
278
324
 
279
325
  return text_node;
280
326
  }
281
327
 
282
328
  static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
283
- hb_array_T* errors = hb_array_init(8);
284
- hb_array_T* children = hb_array_init(8);
329
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
330
+ hb_array_T* children = hb_array_init(8, parser->allocator);
285
331
  hb_buffer_T buffer;
286
- hb_buffer_init(&buffer, 128);
332
+ hb_buffer_init(&buffer, 128, parser->allocator);
287
333
  position_T start = parser->current_token->location.start;
288
334
 
289
335
  while (token_is_none_of(
@@ -296,6 +342,16 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
296
342
  TOKEN_EOF
297
343
  )) {
298
344
  if (token_is(parser, TOKEN_ERB_START)) {
345
+ hb_string_T tag = parser->current_token->value;
346
+ bool is_output_tag = (tag.length >= 3 && tag.data[2] == '=');
347
+
348
+ if (!is_output_tag) {
349
+ bool is_control_flow = parser_lookahead_erb_is_control_flow(parser);
350
+
351
+ if (hb_buffer_is_empty(&buffer) && hb_array_size(children) == 0) { break; }
352
+ if (is_control_flow) { break; }
353
+ }
354
+
299
355
  parser_append_literal_node_from_buffer(parser, &buffer, children, start);
300
356
 
301
357
  AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
@@ -306,8 +362,8 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
306
362
  }
307
363
 
308
364
  token_T* token = parser_advance(parser);
309
- hb_buffer_append(&buffer, token->value);
310
- token_free(token);
365
+ hb_buffer_append_string(&buffer, token->value);
366
+ token_free(token, parser->allocator);
311
367
  }
312
368
 
313
369
  parser_append_literal_node_from_buffer(parser, &buffer, children, start);
@@ -327,9 +383,9 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
327
383
  }
328
384
 
329
385
  AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
330
- ast_html_attribute_name_node_init(children, node_start, node_end, errors);
386
+ ast_html_attribute_name_node_init(children, node_start, node_end, errors, parser->allocator);
331
387
 
332
- free(buffer.value);
388
+ hb_buffer_free(&buffer);
333
389
 
334
390
  return attribute_name;
335
391
  }
@@ -340,55 +396,137 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
340
396
  hb_array_T* errors
341
397
  ) {
342
398
  hb_buffer_T buffer;
343
- hb_buffer_init(&buffer, 512);
399
+ hb_buffer_init(&buffer, 512, parser->allocator);
344
400
  token_T* opening_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
345
401
  position_T start = parser->current_token->location.start;
346
402
 
347
403
  while (!token_is(parser, TOKEN_EOF)
348
404
  && !(
349
405
  token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
350
- && string_equals(parser->current_token->value, opening_quote->value)
406
+ && hb_string_equals(parser->current_token->value, opening_quote->value)
351
407
  )) {
352
- if (token_is(parser, TOKEN_ERB_START)) {
408
+ if (token_is(parser, TOKEN_HTML_TAG_END) || token_is(parser, TOKEN_HTML_TAG_SELF_CLOSE)) {
409
+ lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
410
+ bool found_closing_quote = false;
411
+ token_T* lookahead = lexer_next_token(parser->lexer);
412
+
413
+ while (lookahead && lookahead->type != TOKEN_EOF) {
414
+ if (lookahead->type == TOKEN_QUOTE && opening_quote != NULL
415
+ && hb_string_equals(lookahead->value, opening_quote->value)) {
416
+ found_closing_quote = true;
417
+ token_free(lookahead, parser->allocator);
418
+ break;
419
+ }
420
+
421
+ token_free(lookahead, parser->allocator);
422
+
423
+ lookahead = lexer_next_token(parser->lexer);
424
+ }
425
+
426
+ if (lookahead && !found_closing_quote && lookahead->type == TOKEN_EOF) {
427
+ token_free(lookahead, parser->allocator);
428
+ }
429
+
430
+ lexer_restore_state(parser->lexer, saved_state);
431
+
432
+ if (found_closing_quote) {
433
+ hb_buffer_append_string(&buffer, parser->current_token->value);
434
+ token_free(parser->current_token, parser->allocator);
435
+ parser->current_token = lexer_next_token(parser->lexer);
436
+ continue;
437
+ }
438
+
439
+ append_unclosed_quote_error(
440
+ opening_quote,
441
+ opening_quote->location.start,
442
+ parser->current_token->location.start,
443
+ parser->allocator,
444
+ errors
445
+ );
446
+
353
447
  parser_append_literal_node_from_buffer(parser, &buffer, children, start);
448
+ hb_buffer_free(&buffer);
354
449
 
355
- hb_array_append(children, parser_parse_erb_tag(parser));
450
+ AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
451
+ opening_quote,
452
+ children,
453
+ NULL,
454
+ true,
455
+ opening_quote->location.start,
456
+ parser->current_token->location.start,
457
+ errors,
458
+ parser->allocator
459
+ );
356
460
 
357
- start = parser->current_token->location.start;
461
+ token_free(opening_quote, parser->allocator);
358
462
 
359
- continue;
463
+ return attribute_value;
360
464
  }
361
465
 
362
- if (token_is(parser, TOKEN_BACKSLASH)) {
466
+ bool buffer_ends_with_whitespace = buffer.length > 0 && is_whitespace(buffer.value[buffer.length - 1]);
467
+
468
+ if (token_is(parser, TOKEN_IDENTIFIER) && buffer_ends_with_whitespace) {
363
469
  lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
470
+ token_T* equals_token = lexer_next_token(parser->lexer);
471
+ bool looks_like_new_attribute = false;
364
472
 
365
- token_T* next_token = lexer_next_token(parser->lexer);
473
+ if (equals_token && equals_token->type == TOKEN_EQUALS) {
474
+ token_T* after_equals = lexer_next_token(parser->lexer);
475
+ looks_like_new_attribute = (after_equals && after_equals->type == TOKEN_QUOTE);
366
476
 
367
- if (next_token && next_token->type == TOKEN_QUOTE && opening_quote != NULL
368
- && string_equals(next_token->value, opening_quote->value)) {
369
- hb_buffer_append(&buffer, parser->current_token->value);
370
- hb_buffer_append(&buffer, next_token->value);
477
+ if (after_equals) { token_free(after_equals, parser->allocator); }
478
+ }
371
479
 
372
- token_free(parser->current_token);
373
- token_free(next_token);
480
+ if (equals_token) { token_free(equals_token, parser->allocator); }
481
+ lexer_restore_state(parser->lexer, saved_state);
374
482
 
375
- parser->current_token = lexer_next_token(parser->lexer);
376
- continue;
377
- } else {
378
- lexer_restore_state(parser->lexer, saved_state);
483
+ if (looks_like_new_attribute) {
484
+ append_unclosed_quote_error(
485
+ opening_quote,
486
+ opening_quote->location.start,
487
+ parser->current_token->location.start,
488
+ parser->allocator,
489
+ errors
490
+ );
491
+
492
+ parser_append_literal_node_from_buffer(parser, &buffer, children, start);
493
+ hb_buffer_free(&buffer);
494
+
495
+ AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
496
+ opening_quote,
497
+ children,
498
+ NULL,
499
+ true,
500
+ opening_quote->location.start,
501
+ parser->current_token->location.start,
502
+ errors,
503
+ parser->allocator
504
+ );
505
+
506
+ token_free(opening_quote, parser->allocator);
379
507
 
380
- if (next_token) { token_free(next_token); }
508
+ return attribute_value;
381
509
  }
382
510
  }
383
511
 
384
- hb_buffer_append(&buffer, parser->current_token->value);
385
- token_free(parser->current_token);
512
+ if (token_is(parser, TOKEN_ERB_START)) {
513
+ parser_append_literal_node_from_buffer(parser, &buffer, children, start);
514
+
515
+ hb_array_append(children, parser_parse_erb_tag(parser));
516
+
517
+ start = parser->current_token->location.start;
518
+
519
+ continue;
520
+ }
521
+
522
+ hb_buffer_append_string(&buffer, parser->current_token->value);
523
+ token_free(parser->current_token, parser->allocator);
386
524
 
387
525
  parser->current_token = lexer_next_token(parser->lexer);
388
526
  }
389
527
 
390
528
  if (token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
391
- && string_equals(parser->current_token->value, opening_quote->value)) {
529
+ && hb_string_equals(parser->current_token->value, opening_quote->value)) {
392
530
  lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
393
531
 
394
532
  token_T* potential_closing = parser->current_token;
@@ -396,27 +534,28 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
396
534
 
397
535
  if (token_is(parser, TOKEN_IDENTIFIER) || token_is(parser, TOKEN_CHARACTER)) {
398
536
  append_unexpected_error(
399
- "Unescaped quote character in attribute value",
400
- "escaped quote (\\') or different quote style (\")",
537
+ hb_string("Unescaped quote character in attribute value"),
538
+ hb_string("HTML entity (&apos;/&quot;) or different quote style"),
401
539
  opening_quote->value,
402
540
  potential_closing->location.start,
403
541
  potential_closing->location.end,
542
+ parser->allocator,
404
543
  errors
405
544
  );
406
545
 
407
546
  lexer_restore_state(parser->lexer, saved_state);
408
547
 
409
- token_free(parser->current_token);
548
+ token_free(parser->current_token, parser->allocator);
410
549
  parser->current_token = potential_closing;
411
550
 
412
- hb_buffer_append(&buffer, parser->current_token->value);
413
- token_free(parser->current_token);
551
+ hb_buffer_append_string(&buffer, parser->current_token->value);
552
+ token_free(parser->current_token, parser->allocator);
414
553
  parser->current_token = lexer_next_token(parser->lexer);
415
554
 
416
555
  while (!token_is(parser, TOKEN_EOF)
417
556
  && !(
418
557
  token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
419
- && string_equals(parser->current_token->value, opening_quote->value)
558
+ && hb_string_equals(parser->current_token->value, opening_quote->value)
420
559
  )) {
421
560
  if (token_is(parser, TOKEN_ERB_START)) {
422
561
  parser_append_literal_node_from_buffer(parser, &buffer, children, start);
@@ -428,13 +567,13 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
428
567
  continue;
429
568
  }
430
569
 
431
- hb_buffer_append(&buffer, parser->current_token->value);
432
- token_free(parser->current_token);
570
+ hb_buffer_append_string(&buffer, parser->current_token->value);
571
+ token_free(parser->current_token, parser->allocator);
433
572
 
434
573
  parser->current_token = lexer_next_token(parser->lexer);
435
574
  }
436
575
  } else {
437
- token_free(parser->current_token);
576
+ token_free(parser->current_token, parser->allocator);
438
577
  parser->current_token = potential_closing;
439
578
 
440
579
  lexer_restore_state(parser->lexer, saved_state);
@@ -442,20 +581,10 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
442
581
  }
443
582
 
444
583
  parser_append_literal_node_from_buffer(parser, &buffer, children, start);
445
- free(buffer.value);
584
+ hb_buffer_free(&buffer);
446
585
 
447
586
  token_T* closing_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
448
587
 
449
- if (opening_quote != NULL && closing_quote != NULL && !string_equals(opening_quote->value, closing_quote->value)) {
450
- append_quotes_mismatch_error(
451
- opening_quote,
452
- closing_quote,
453
- closing_quote->location.start,
454
- closing_quote->location.end,
455
- errors
456
- );
457
- }
458
-
459
588
  AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
460
589
  opening_quote,
461
590
  children,
@@ -463,18 +592,19 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
463
592
  true,
464
593
  opening_quote->location.start,
465
594
  closing_quote->location.end,
466
- errors
595
+ errors,
596
+ parser->allocator
467
597
  );
468
598
 
469
- token_free(opening_quote);
470
- token_free(closing_quote);
599
+ token_free(opening_quote, parser->allocator);
600
+ token_free(closing_quote, parser->allocator);
471
601
 
472
602
  return attribute_value;
473
603
  }
474
604
 
475
605
  static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser_T* parser) {
476
- hb_array_T* children = hb_array_init(8);
477
- hb_array_T* errors = hb_array_init(8);
606
+ hb_array_T* children = hb_array_init(8, parser->allocator);
607
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
478
608
 
479
609
  // <div id=<%= "home" %>>
480
610
  if (token_is(parser, TOKEN_ERB_START)) {
@@ -488,15 +618,16 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
488
618
  false,
489
619
  erb_node->base.location.start,
490
620
  erb_node->base.location.end,
491
- errors
621
+ errors,
622
+ parser->allocator
492
623
  );
493
624
  }
494
625
 
495
626
  // <div id=home>
496
627
  if (token_is(parser, TOKEN_IDENTIFIER)) {
497
628
  token_T* identifier = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
498
- AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier);
499
- token_free(identifier);
629
+ AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier, parser->allocator);
630
+ token_free(identifier, parser->allocator);
500
631
 
501
632
  hb_array_append(children, literal);
502
633
 
@@ -507,7 +638,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
507
638
  false,
508
639
  literal->base.location.start,
509
640
  literal->base.location.end,
510
- errors
641
+ errors,
642
+ parser->allocator
511
643
  );
512
644
  }
513
645
 
@@ -520,31 +652,37 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
520
652
  position_T end = token->location.end;
521
653
 
522
654
  append_unexpected_error(
523
- "Invalid quote character for HTML attribute",
524
- "single quote (') or double quote (\")",
525
- "backtick (`)",
655
+ hb_string("Invalid quote character for HTML attribute"),
656
+ hb_string("single quote (') or double quote (\")"),
657
+ hb_string("a backtick"),
526
658
  start,
527
659
  end,
660
+ parser->allocator,
528
661
  errors
529
662
  );
530
663
 
531
664
  AST_HTML_ATTRIBUTE_VALUE_NODE_T* value =
532
- ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors);
665
+ ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors, parser->allocator);
533
666
 
534
- token_free(token);
667
+ token_free(token, parser->allocator);
535
668
 
536
669
  return value;
537
670
  }
538
671
 
672
+ char* expected = token_types_to_friendly_string(parser->allocator, TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START);
673
+
539
674
  append_unexpected_error(
540
- "Unexpected Token",
541
- "TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START",
542
- token_type_to_string(parser->current_token->type),
675
+ hb_string("Unexpected Token"),
676
+ hb_string(expected),
677
+ token_type_to_friendly_string(parser->current_token->type),
543
678
  parser->current_token->location.start,
544
679
  parser->current_token->location.end,
680
+ parser->allocator,
545
681
  errors
546
682
  );
547
683
 
684
+ hb_allocator_dealloc(parser->allocator, expected);
685
+
548
686
  AST_HTML_ATTRIBUTE_VALUE_NODE_T* value = ast_html_attribute_value_node_init(
549
687
  NULL,
550
688
  children,
@@ -552,7 +690,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
552
690
  false,
553
691
  parser->current_token->location.start,
554
692
  parser->current_token->location.end,
555
- errors
693
+ errors,
694
+ parser->allocator
556
695
  );
557
696
 
558
697
  return value;
@@ -567,7 +706,7 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
567
706
 
568
707
  if (has_equals) {
569
708
  hb_buffer_T equals_buffer;
570
- hb_buffer_init(&equals_buffer, 256);
709
+ hb_buffer_init(&equals_buffer, 256, parser->allocator);
571
710
  position_T equals_start = { 0 };
572
711
  position_T equals_end = { 0 };
573
712
  uint32_t range_start = 0;
@@ -583,8 +722,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
583
722
  range_start = whitespace->range.from;
584
723
  }
585
724
 
586
- hb_buffer_append(&equals_buffer, whitespace->value);
587
- token_free(whitespace);
725
+ hb_buffer_append_string(&equals_buffer, whitespace->value);
726
+ token_free(whitespace, parser->allocator);
588
727
  }
589
728
 
590
729
  token_T* equals = parser_advance(parser);
@@ -595,27 +734,45 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
595
734
  range_start = equals->range.from;
596
735
  }
597
736
 
598
- hb_buffer_append(&equals_buffer, equals->value);
737
+ hb_buffer_append_string(&equals_buffer, equals->value);
599
738
  equals_end = equals->location.end;
600
739
  range_end = equals->range.to;
601
- token_free(equals);
740
+ token_free(equals, parser->allocator);
602
741
 
603
742
  while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
604
743
  token_T* whitespace = parser_advance(parser);
605
- hb_buffer_append(&equals_buffer, whitespace->value);
744
+ hb_buffer_append_string(&equals_buffer, whitespace->value);
606
745
  equals_end = whitespace->location.end;
607
746
  range_end = whitespace->range.to;
608
- token_free(whitespace);
747
+ token_free(whitespace, parser->allocator);
748
+ }
749
+
750
+ token_T* equals_with_whitespace = hb_allocator_alloc(parser->allocator, sizeof(token_T));
751
+
752
+ if (!equals_with_whitespace) {
753
+ hb_buffer_free(&equals_buffer);
754
+
755
+ return ast_html_attribute_node_init(
756
+ attribute_name,
757
+ NULL,
758
+ NULL,
759
+ attribute_name->base.location.start,
760
+ attribute_name->base.location.end,
761
+ NULL,
762
+ parser->allocator
763
+ );
609
764
  }
610
765
 
611
- token_T* equals_with_whitespace = calloc(1, sizeof(token_T));
612
766
  equals_with_whitespace->type = TOKEN_EQUALS;
613
- equals_with_whitespace->value = herb_strdup(equals_buffer.value);
767
+
768
+ char* arena_copy = hb_allocator_strndup(parser->allocator, equals_buffer.value, equals_buffer.length);
769
+ equals_with_whitespace->value = (hb_string_T) { .data = arena_copy, .length = (uint32_t) equals_buffer.length };
770
+
771
+ hb_buffer_free(&equals_buffer);
772
+
614
773
  equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end };
615
774
  equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end };
616
775
 
617
- free(equals_buffer.value);
618
-
619
776
  AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
620
777
 
621
778
  return ast_html_attribute_node_init(
@@ -624,7 +781,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
624
781
  attribute_value,
625
782
  attribute_name->base.location.start,
626
783
  attribute_value->base.location.end,
627
- NULL
784
+ NULL,
785
+ parser->allocator
628
786
  );
629
787
  } else {
630
788
  return ast_html_attribute_node_init(
@@ -633,7 +791,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
633
791
  NULL,
634
792
  attribute_name->base.location.start,
635
793
  attribute_name->base.location.end,
636
- NULL
794
+ NULL,
795
+ parser->allocator
637
796
  );
638
797
  }
639
798
  } else {
@@ -645,6 +804,51 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
645
804
  if (equals != NULL) {
646
805
  parser_consume_whitespace(parser, NULL);
647
806
 
807
+ // <div class= >
808
+ if (token_is(parser, TOKEN_HTML_TAG_END) || token_is(parser, TOKEN_HTML_TAG_SELF_CLOSE)) {
809
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
810
+ hb_string_T attribute_name_string = hb_string("unknown");
811
+
812
+ if (hb_array_size(attribute_name->children) > 0) {
813
+ AST_LITERAL_NODE_T* first_child = (AST_LITERAL_NODE_T*) hb_array_get(attribute_name->children, 0);
814
+
815
+ if (first_child && !hb_string_is_empty(first_child->content)) { attribute_name_string = first_child->content; }
816
+ }
817
+
818
+ append_missing_attribute_value_error(
819
+ attribute_name_string,
820
+ equals->location.start,
821
+ parser->current_token->location.start,
822
+ parser->allocator,
823
+ errors
824
+ );
825
+
826
+ AST_HTML_ATTRIBUTE_VALUE_NODE_T* empty_value = ast_html_attribute_value_node_init(
827
+ NULL,
828
+ hb_array_init(8, parser->allocator),
829
+ NULL,
830
+ false,
831
+ equals->location.end,
832
+ parser->current_token->location.start,
833
+ errors,
834
+ parser->allocator
835
+ );
836
+
837
+ AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
838
+ attribute_name,
839
+ equals,
840
+ empty_value,
841
+ attribute_name->base.location.start,
842
+ parser->current_token->location.start,
843
+ NULL,
844
+ parser->allocator
845
+ );
846
+
847
+ token_free(equals, parser->allocator);
848
+
849
+ return attribute_node;
850
+ }
851
+
648
852
  AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
649
853
 
650
854
  AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
@@ -653,10 +857,11 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
653
857
  attribute_value,
654
858
  attribute_name->base.location.start,
655
859
  attribute_value->base.location.end,
656
- NULL
860
+ NULL,
861
+ parser->allocator
657
862
  );
658
863
 
659
- token_free(equals);
864
+ token_free(equals, parser->allocator);
660
865
 
661
866
  return attribute_node;
662
867
  }
@@ -667,7 +872,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
667
872
  NULL,
668
873
  attribute_name->base.location.start,
669
874
  attribute_name->base.location.end,
670
- NULL
875
+ NULL,
876
+ parser->allocator
671
877
  );
672
878
  }
673
879
 
@@ -678,11 +884,11 @@ static void parser_skip_erb_content(lexer_T* lexer) {
678
884
  token = lexer_next_token(lexer);
679
885
 
680
886
  if (token->type == TOKEN_ERB_END) {
681
- token_free(token);
887
+ token_free(token, lexer->allocator);
682
888
  break;
683
889
  }
684
890
 
685
- token_free(token);
891
+ token_free(token, lexer->allocator);
686
892
  } while (true);
687
893
  }
688
894
 
@@ -693,12 +899,12 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
693
899
  after = lexer_next_token(lexer);
694
900
 
695
901
  if (after->type == TOKEN_EQUALS) {
696
- token_free(after);
902
+ token_free(after, lexer->allocator);
697
903
  return true;
698
904
  }
699
905
 
700
906
  if (after->type == TOKEN_WHITESPACE || after->type == TOKEN_NEWLINE) {
701
- token_free(after);
907
+ token_free(after, lexer->allocator);
702
908
  continue;
703
909
  }
704
910
 
@@ -706,23 +912,56 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
706
912
  || after->type == TOKEN_ERB_START) {
707
913
 
708
914
  if (after->type == TOKEN_ERB_START) {
709
- token_free(after);
915
+ token_free(after, lexer->allocator);
710
916
  parser_skip_erb_content(lexer);
711
917
  } else {
712
- token_free(after);
918
+ token_free(after, lexer->allocator);
713
919
  }
714
920
  continue;
715
921
  }
716
922
 
717
- token_free(after);
923
+ token_free(after, lexer->allocator);
718
924
  return false;
719
925
 
720
926
  } while (true);
721
927
  }
722
928
 
929
+ static bool starts_with_keyword(hb_string_T string, const char* keyword) {
930
+ hb_string_T prefix = hb_string(keyword);
931
+ if (string.length < prefix.length) { return false; }
932
+ if (strncmp(string.data, prefix.data, prefix.length) != 0) { return false; }
933
+
934
+ if (string.length == prefix.length) { return true; }
935
+
936
+ return is_whitespace(string.data[prefix.length]);
937
+ }
938
+
939
+ // TODO: ideally we could avoid basing this off of strings, and use the step in analyze.c
940
+ static bool parser_lookahead_erb_is_control_flow(parser_T* parser) {
941
+ lexer_T lexer_copy = *parser->lexer;
942
+ token_T* content = lexer_next_token(&lexer_copy);
943
+
944
+ if (content == NULL || content->type != TOKEN_ERB_CONTENT) {
945
+ if (content) { token_free(content, parser->allocator); }
946
+
947
+ return false;
948
+ }
949
+
950
+ hb_string_T trimmed = hb_string_trim_start(content->value);
951
+
952
+ bool is_control_flow = starts_with_keyword(trimmed, "end") || starts_with_keyword(trimmed, "else")
953
+ || starts_with_keyword(trimmed, "elsif") || starts_with_keyword(trimmed, "in")
954
+ || starts_with_keyword(trimmed, "when") || starts_with_keyword(trimmed, "rescue")
955
+ || starts_with_keyword(trimmed, "ensure");
956
+
957
+ token_free(content, parser->allocator);
958
+
959
+ return is_control_flow;
960
+ }
961
+
723
962
  static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children) {
724
- bool is_output_tag = parser->current_token->value && strlen(parser->current_token->value) >= 3
725
- && strncmp(parser->current_token->value, "<%=", 3) == 0;
963
+ bool is_output_tag = !hb_string_is_empty(parser->current_token->value)
964
+ && hb_string_starts_with(parser->current_token->value, hb_string("<%="));
726
965
 
727
966
  if (!is_output_tag) {
728
967
  hb_array_append(children, parser_parse_erb_tag(parser));
@@ -733,7 +972,7 @@ static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children
733
972
  lexer_T lexer_copy = *parser->lexer;
734
973
 
735
974
  token_T* erb_start = lexer_next_token(&lexer_copy);
736
- token_free(erb_start);
975
+ token_free(erb_start, parser->allocator);
737
976
  parser_skip_erb_content(&lexer_copy);
738
977
 
739
978
  bool looks_like_attribute = parser_lookahead_erb_is_attribute(&lexer_copy);
@@ -759,13 +998,40 @@ static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* c
759
998
  }
760
999
 
761
1000
  static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
762
- hb_array_T* errors = hb_array_init(8);
763
- hb_array_T* children = hb_array_init(8);
1001
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
1002
+ hb_array_T* children = hb_array_init(8, parser->allocator);
764
1003
 
765
1004
  token_T* tag_start = parser_consume_expected(parser, TOKEN_HTML_TAG_START, errors);
766
1005
  token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
767
1006
 
768
1007
  while (token_is_none_of(parser, TOKEN_HTML_TAG_END, TOKEN_HTML_TAG_SELF_CLOSE, TOKEN_EOF)) {
1008
+ if (token_is_any_of(parser, TOKEN_HTML_TAG_START, TOKEN_HTML_TAG_START_CLOSE)) {
1009
+ append_unclosed_open_tag_error(
1010
+ tag_name,
1011
+ tag_name->location.start,
1012
+ parser->current_token->location.start,
1013
+ parser->allocator,
1014
+ errors
1015
+ );
1016
+
1017
+ AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init(
1018
+ tag_start,
1019
+ tag_name,
1020
+ NULL,
1021
+ children,
1022
+ false,
1023
+ tag_start->location.start,
1024
+ parser->current_token->location.start,
1025
+ errors,
1026
+ parser->allocator
1027
+ );
1028
+
1029
+ token_free(tag_start, parser->allocator);
1030
+ token_free(tag_name, parser->allocator);
1031
+
1032
+ return open_tag_node;
1033
+ }
1034
+
769
1035
  if (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
770
1036
  parser_handle_whitespace_in_open_tag(parser, children);
771
1037
  continue;
@@ -791,21 +1057,79 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
791
1057
  token_T* next_token = lexer_next_token(&lexer_copy);
792
1058
 
793
1059
  if (next_token && next_token->type == TOKEN_IDENTIFIER) {
794
- token_free(next_token);
1060
+ token_free(next_token, parser->allocator);
795
1061
  hb_array_append(children, parser_parse_html_attribute(parser));
796
1062
 
797
1063
  continue;
798
1064
  }
799
1065
 
800
- token_free(next_token);
1066
+ token_free(next_token, parser->allocator);
1067
+ }
1068
+
1069
+ if (parser->current_token->type == TOKEN_PERCENT) {
1070
+ lexer_T lexer_copy = *parser->lexer;
1071
+ token_T* peek_token = lexer_next_token(&lexer_copy);
1072
+
1073
+ if (peek_token && peek_token->type == TOKEN_HTML_TAG_END) {
1074
+ position_T stray_start = parser->current_token->location.start;
1075
+ position_T stray_end = peek_token->location.end;
1076
+ token_free(peek_token, parser->allocator);
1077
+
1078
+ append_stray_erb_closing_tag_error(stray_start, stray_end, parser->allocator, errors);
1079
+
1080
+ token_T* percent = parser_advance(parser);
1081
+ token_T* gt = parser_advance(parser);
1082
+
1083
+ AST_LITERAL_NODE_T* literal =
1084
+ ast_literal_node_init(hb_string("%>"), stray_start, stray_end, NULL, parser->allocator);
1085
+ hb_array_append(children, literal);
1086
+
1087
+ token_free(percent, parser->allocator);
1088
+ token_free(gt, parser->allocator);
1089
+
1090
+ continue;
1091
+ }
1092
+
1093
+ token_free(peek_token, parser->allocator);
801
1094
  }
802
1095
 
803
1096
  parser_append_unexpected_error(
804
1097
  parser,
1098
+ errors,
805
1099
  "Unexpected Token",
806
- "TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
1100
+ TOKEN_IDENTIFIER,
1101
+ TOKEN_AT,
1102
+ TOKEN_ERB_START,
1103
+ TOKEN_WHITESPACE,
1104
+ TOKEN_NEWLINE
1105
+ );
1106
+ }
1107
+
1108
+ if (token_is(parser, TOKEN_EOF)) {
1109
+ append_unclosed_open_tag_error(
1110
+ tag_name,
1111
+ tag_name->location.start,
1112
+ parser->current_token->location.start,
1113
+ parser->allocator,
807
1114
  errors
808
1115
  );
1116
+
1117
+ AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init(
1118
+ tag_start,
1119
+ tag_name,
1120
+ NULL,
1121
+ children,
1122
+ false,
1123
+ tag_start->location.start,
1124
+ parser->current_token->location.start,
1125
+ errors,
1126
+ parser->allocator
1127
+ );
1128
+
1129
+ token_free(tag_start, parser->allocator);
1130
+ token_free(tag_name, parser->allocator);
1131
+
1132
+ return open_tag_node;
809
1133
  }
810
1134
 
811
1135
  bool is_self_closing = false;
@@ -816,8 +1140,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
816
1140
  tag_end = parser_consume_expected(parser, TOKEN_HTML_TAG_SELF_CLOSE, errors);
817
1141
 
818
1142
  if (tag_end == NULL) {
819
- token_free(tag_start);
820
- token_free(tag_name);
1143
+ token_free(tag_start, parser->allocator);
1144
+ token_free(tag_name, parser->allocator);
821
1145
 
822
1146
  hb_array_free(&children);
823
1147
  hb_array_free(&errors);
@@ -836,19 +1160,20 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
836
1160
  is_self_closing,
837
1161
  tag_start->location.start,
838
1162
  tag_end->location.end,
839
- errors
1163
+ errors,
1164
+ parser->allocator
840
1165
  );
841
1166
 
842
- token_free(tag_start);
843
- token_free(tag_name);
844
- token_free(tag_end);
1167
+ token_free(tag_start, parser->allocator);
1168
+ token_free(tag_name, parser->allocator);
1169
+ token_free(tag_end, parser->allocator);
845
1170
 
846
1171
  return open_tag_node;
847
1172
  }
848
1173
 
849
1174
  static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) {
850
- hb_array_T* errors = hb_array_init(8);
851
- hb_array_T* children = hb_array_init(8);
1175
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
1176
+ hb_array_T* children = hb_array_init(8, parser->allocator);
852
1177
 
853
1178
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
854
1179
 
@@ -858,38 +1183,53 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
858
1183
 
859
1184
  parser_consume_whitespace(parser, children);
860
1185
 
861
- token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
1186
+ token_T* tag_closing = parser_consume_if_present(parser, TOKEN_HTML_TAG_END);
1187
+
1188
+ if (tag_closing == NULL) {
1189
+ append_unclosed_close_tag_error(
1190
+ tag_name,
1191
+ tag_opening->location.start,
1192
+ tag_name->location.end,
1193
+ parser->allocator,
1194
+ errors
1195
+ );
1196
+ }
862
1197
 
863
- if (tag_name != NULL && is_void_element(hb_string(tag_name->value)) && parser_in_svg_context(parser) == false) {
864
- hb_string_T expected = html_self_closing_tag_string(hb_string(tag_name->value));
865
- hb_string_T got = html_closing_tag_string(hb_string(tag_name->value));
1198
+ if (tag_closing != NULL && tag_name != NULL && is_void_element(tag_name->value)
1199
+ && parser_in_svg_context(parser) == false) {
1200
+ hb_string_T expected = html_self_closing_tag_string(tag_name->value, parser->allocator);
1201
+ hb_string_T got = html_closing_tag_string(tag_name->value, parser->allocator);
866
1202
 
867
1203
  append_void_element_closing_tag_error(
868
1204
  tag_name,
869
- expected.data,
870
- got.data,
1205
+ expected,
1206
+ got,
871
1207
  tag_opening->location.start,
872
1208
  tag_closing->location.end,
1209
+ parser->allocator,
873
1210
  errors
874
1211
  );
875
1212
 
876
- free(expected.data);
877
- free(got.data);
1213
+ hb_allocator_dealloc(parser->allocator, expected.data);
1214
+ hb_allocator_dealloc(parser->allocator, got.data);
878
1215
  }
879
1216
 
1217
+ position_T end_position = tag_closing != NULL ? tag_closing->location.end : tag_name->location.end;
1218
+
880
1219
  AST_HTML_CLOSE_TAG_NODE_T* close_tag = ast_html_close_tag_node_init(
881
1220
  tag_opening,
882
1221
  tag_name,
883
1222
  children,
884
1223
  tag_closing,
885
1224
  tag_opening->location.start,
886
- tag_closing->location.end,
887
- errors
1225
+ end_position,
1226
+ errors,
1227
+ parser->allocator
888
1228
  );
889
1229
 
890
- token_free(tag_opening);
891
- token_free(tag_name);
892
- token_free(tag_closing);
1230
+ token_free(tag_opening, parser->allocator);
1231
+ token_free(tag_name, parser->allocator);
1232
+ token_free(tag_closing, parser->allocator);
893
1233
 
894
1234
  return close_tag;
895
1235
  }
@@ -900,7 +1240,7 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element(
900
1240
  AST_HTML_OPEN_TAG_NODE_T* open_tag
901
1241
  ) {
902
1242
  return ast_html_element_node_init(
903
- open_tag,
1243
+ (AST_NODE_T*) open_tag,
904
1244
  open_tag->tag_name,
905
1245
  NULL,
906
1246
  NULL,
@@ -908,7 +1248,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element(
908
1248
  ELEMENT_SOURCE_HTML,
909
1249
  open_tag->base.location.start,
910
1250
  open_tag->base.location.end,
911
- NULL
1251
+ NULL,
1252
+ parser->allocator
912
1253
  );
913
1254
  }
914
1255
 
@@ -916,63 +1257,88 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
916
1257
  parser_T* parser,
917
1258
  AST_HTML_OPEN_TAG_NODE_T* open_tag
918
1259
  ) {
919
- hb_array_T* errors = hb_array_init(8);
920
- hb_array_T* body = hb_array_init(8);
1260
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
1261
+ hb_array_T* body = hb_array_init(8, parser->allocator);
921
1262
 
922
1263
  parser_push_open_tag(parser, open_tag->tag_name);
923
1264
 
924
- if (open_tag->tag_name->value && parser_is_foreign_content_tag(hb_string(open_tag->tag_name->value))) {
925
- foreign_content_type_T content_type = parser_get_foreign_content_type(hb_string(open_tag->tag_name->value));
1265
+ if (!hb_string_is_empty(open_tag->tag_name->value) && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
1266
+ foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value);
926
1267
  parser_enter_foreign_content(parser, content_type);
927
1268
  parser_parse_foreign_content(parser, body, errors);
928
1269
  } else {
929
1270
  parser_parse_in_data_state(parser, body, errors);
930
1271
  }
931
1272
 
932
- if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) { return parser_handle_missing_close_tag(open_tag, body, errors); }
1273
+ if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
1274
+ return parser_handle_missing_close_tag(parser, open_tag, body, errors);
1275
+ }
933
1276
 
934
1277
  AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser);
935
1278
 
936
- if (parser_in_svg_context(parser) == false && is_void_element(hb_string(close_tag->tag_name->value))) {
1279
+ if (parser_in_svg_context(parser) == false && is_void_element(close_tag->tag_name->value)) {
937
1280
  hb_array_push(body, close_tag);
938
1281
  parser_parse_in_data_state(parser, body, errors);
939
1282
  close_tag = parser_parse_html_close_tag(parser);
940
1283
  }
941
1284
 
942
- bool matches_stack = parser_check_matching_tag(parser, hb_string(close_tag->tag_name->value));
1285
+ bool matches_stack = parser_check_matching_tag(parser, close_tag->tag_name->value);
943
1286
 
944
1287
  if (matches_stack) {
945
1288
  token_T* popped_token = parser_pop_open_tag(parser);
946
- token_free(popped_token);
1289
+ token_free(popped_token, parser->allocator);
1290
+ } else if (parser_can_close_ancestor(parser, close_tag->tag_name->value)) {
1291
+ size_t depth = parser_find_ancestor_depth(parser, close_tag->tag_name->value);
1292
+
1293
+ for (size_t i = 0; i < depth; i++) {
1294
+ token_T* unclosed = parser_pop_open_tag(parser);
1295
+
1296
+ if (unclosed != NULL) {
1297
+ append_missing_closing_tag_error(
1298
+ unclosed,
1299
+ unclosed->location.start,
1300
+ unclosed->location.end,
1301
+ parser->allocator,
1302
+ errors
1303
+ );
1304
+ token_free(unclosed, parser->allocator);
1305
+ }
1306
+ }
1307
+
1308
+ token_T* popped_token = parser_pop_open_tag(parser);
1309
+ token_free(popped_token, parser->allocator);
947
1310
  } else {
948
1311
  parser_handle_mismatched_tags(parser, close_tag, errors);
949
1312
  }
950
1313
 
951
1314
  return ast_html_element_node_init(
952
- open_tag,
1315
+ (AST_NODE_T*) open_tag,
953
1316
  open_tag->tag_name,
954
1317
  body,
955
- close_tag,
1318
+ (AST_NODE_T*) close_tag,
956
1319
  false,
957
1320
  ELEMENT_SOURCE_HTML,
958
1321
  open_tag->base.location.start,
959
1322
  close_tag->base.location.end,
960
- errors
1323
+ errors,
1324
+ parser->allocator
961
1325
  );
962
1326
  }
963
1327
 
964
1328
  static AST_NODE_T* parser_parse_html_element(parser_T* parser) {
965
1329
  AST_HTML_OPEN_TAG_NODE_T* open_tag = parser_parse_html_open_tag(parser);
966
1330
 
1331
+ if (open_tag->tag_closing == NULL) { return (AST_NODE_T*) open_tag; }
1332
+
967
1333
  // <tag />
968
1334
  if (open_tag->is_void) { return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag); }
969
1335
 
970
1336
  // <tag>, in void element list, and not in inside an <svg> element
971
- if (!open_tag->is_void && is_void_element(hb_string(open_tag->tag_name->value)) && !parser_in_svg_context(parser)) {
1337
+ if (!open_tag->is_void && is_void_element(open_tag->tag_name->value) && !parser_in_svg_context(parser)) {
972
1338
  return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag);
973
1339
  }
974
1340
 
975
- if (open_tag->tag_name->value && parser_is_foreign_content_tag(hb_string(open_tag->tag_name->value))) {
1341
+ if (!hb_string_is_empty(open_tag->tag_name->value) && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
976
1342
  AST_HTML_ELEMENT_NODE_T* regular_element = parser_parse_html_regular_element(parser, open_tag);
977
1343
 
978
1344
  if (regular_element != NULL) { return (AST_NODE_T*) regular_element; }
@@ -982,11 +1348,38 @@ static AST_NODE_T* parser_parse_html_element(parser_T* parser) {
982
1348
  }
983
1349
 
984
1350
  static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
985
- hb_array_T* errors = hb_array_init(8);
1351
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
986
1352
 
987
1353
  token_T* opening_tag = parser_consume_expected(parser, TOKEN_ERB_START, errors);
988
1354
  token_T* content = parser_consume_expected(parser, TOKEN_ERB_CONTENT, errors);
989
- token_T* closing_tag = parser_consume_expected(parser, TOKEN_ERB_END, errors);
1355
+
1356
+ token_T* closing_tag = NULL;
1357
+ position_T end_position;
1358
+
1359
+ if (token_is(parser, TOKEN_ERB_END)) {
1360
+ closing_tag = parser_consume_expected(parser, TOKEN_ERB_END, errors);
1361
+ end_position = closing_tag->location.end;
1362
+ } else if (token_is(parser, TOKEN_ERB_START)) {
1363
+ append_nested_erb_tag_error(
1364
+ opening_tag,
1365
+ parser->current_token->location.start.line,
1366
+ parser->current_token->location.start.column,
1367
+ parser->current_token->location.start,
1368
+ parser->current_token->location.end,
1369
+ parser->allocator,
1370
+ errors
1371
+ );
1372
+ end_position = parser->current_token->location.start;
1373
+ } else {
1374
+ append_unclosed_erb_tag_error(
1375
+ opening_tag,
1376
+ opening_tag->location.start,
1377
+ parser->current_token->location.start,
1378
+ parser->allocator,
1379
+ errors
1380
+ );
1381
+ end_position = parser->current_token->location.start;
1382
+ }
990
1383
 
991
1384
  AST_ERB_CONTENT_NODE_T* erb_node = ast_erb_content_node_init(
992
1385
  opening_tag,
@@ -995,27 +1388,29 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
995
1388
  NULL,
996
1389
  false,
997
1390
  false,
1391
+ HERB_PRISM_NODE_EMPTY,
998
1392
  opening_tag->location.start,
999
- closing_tag->location.end,
1000
- errors
1393
+ end_position,
1394
+ errors,
1395
+ parser->allocator
1001
1396
  );
1002
1397
 
1003
- token_free(opening_tag);
1004
- token_free(content);
1005
- token_free(closing_tag);
1398
+ token_free(opening_tag, parser->allocator);
1399
+ token_free(content, parser->allocator);
1400
+ if (closing_tag != NULL) { token_free(closing_tag, parser->allocator); }
1006
1401
 
1007
1402
  return erb_node;
1008
1403
  }
1009
1404
 
1010
1405
  static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
1011
1406
  hb_buffer_T content;
1012
- hb_buffer_init(&content, 1024);
1407
+ hb_buffer_init(&content, 1024, parser->allocator);
1013
1408
  position_T start = parser->current_token->location.start;
1014
1409
  hb_string_T expected_closing_tag = parser_get_foreign_content_closing_tag(parser->foreign_content_type);
1015
1410
 
1016
1411
  if (hb_string_is_empty(expected_closing_tag)) {
1017
1412
  parser_exit_foreign_content(parser);
1018
- free(content.value);
1413
+ hb_buffer_free(&content);
1019
1414
 
1020
1415
  return;
1021
1416
  }
@@ -1038,33 +1433,32 @@ static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children,
1038
1433
  token_T* next_token = lexer_next_token(parser->lexer);
1039
1434
  bool is_potential_match = false;
1040
1435
 
1041
- if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) {
1042
- is_potential_match =
1043
- parser_is_expected_closing_tag_name(hb_string(next_token->value), parser->foreign_content_type);
1436
+ if (next_token && next_token->type == TOKEN_IDENTIFIER && !hb_string_is_empty(next_token->value)) {
1437
+ is_potential_match = parser_is_expected_closing_tag_name(next_token->value, parser->foreign_content_type);
1044
1438
  }
1045
1439
 
1046
1440
  lexer_restore_state(parser->lexer, saved_state);
1047
1441
 
1048
- if (next_token) { token_free(next_token); }
1442
+ if (next_token) { token_free(next_token, parser->allocator); }
1049
1443
 
1050
1444
  if (is_potential_match) {
1051
1445
  parser_append_literal_node_from_buffer(parser, &content, children, start);
1052
1446
  parser_exit_foreign_content(parser);
1053
1447
 
1054
- free(content.value);
1448
+ hb_buffer_free(&content);
1055
1449
 
1056
1450
  return;
1057
1451
  }
1058
1452
  }
1059
1453
 
1060
1454
  token_T* token = parser_advance(parser);
1061
- hb_buffer_append(&content, token->value);
1062
- token_free(token);
1455
+ hb_buffer_append_string(&content, token->value);
1456
+ token_free(token, parser->allocator);
1063
1457
  }
1064
1458
 
1065
1459
  parser_append_literal_node_from_buffer(parser, &content, children, start);
1066
1460
  parser_exit_foreign_content(parser);
1067
- free(content.value);
1461
+ hb_buffer_free(&content);
1068
1462
  }
1069
1463
 
1070
1464
  static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
@@ -1072,36 +1466,43 @@ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, h
1072
1466
 
1073
1467
  if (token_is(parser, TOKEN_ERB_START)) {
1074
1468
  hb_array_append(children, parser_parse_erb_tag(parser));
1469
+ parser->consecutive_error_count = 0;
1075
1470
  continue;
1076
1471
  }
1077
1472
 
1078
1473
  if (token_is(parser, TOKEN_HTML_DOCTYPE)) {
1079
1474
  hb_array_append(children, parser_parse_html_doctype(parser));
1475
+ parser->consecutive_error_count = 0;
1080
1476
  continue;
1081
1477
  }
1082
1478
 
1083
1479
  if (token_is(parser, TOKEN_XML_DECLARATION)) {
1084
1480
  hb_array_append(children, parser_parse_xml_declaration(parser));
1481
+ parser->consecutive_error_count = 0;
1085
1482
  continue;
1086
1483
  }
1087
1484
 
1088
1485
  if (token_is(parser, TOKEN_CDATA_START)) {
1089
1486
  hb_array_append(children, parser_parse_cdata(parser));
1487
+ parser->consecutive_error_count = 0;
1090
1488
  continue;
1091
1489
  }
1092
1490
 
1093
1491
  if (token_is(parser, TOKEN_HTML_COMMENT_START)) {
1094
1492
  hb_array_append(children, parser_parse_html_comment(parser));
1493
+ parser->consecutive_error_count = 0;
1095
1494
  continue;
1096
1495
  }
1097
1496
 
1098
1497
  if (token_is(parser, TOKEN_HTML_TAG_START)) {
1099
1498
  hb_array_append(children, parser_parse_html_element(parser));
1499
+ parser->consecutive_error_count = 0;
1100
1500
  continue;
1101
1501
  }
1102
1502
 
1103
1503
  if (token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
1104
1504
  hb_array_append(children, parser_parse_html_close_tag(parser));
1505
+ parser->consecutive_error_count = 0;
1105
1506
  continue;
1106
1507
  }
1107
1508
 
@@ -1129,16 +1530,35 @@ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, h
1129
1530
  TOKEN_WHITESPACE
1130
1531
  )) {
1131
1532
  hb_array_append(children, parser_parse_text_content(parser, errors));
1533
+ parser->consecutive_error_count = 0;
1534
+ continue;
1535
+ }
1536
+
1537
+ parser->consecutive_error_count++;
1538
+
1539
+ if (parser->consecutive_error_count >= MAX_CONSECUTIVE_ERRORS) {
1540
+ parser->in_recovery_mode = true;
1541
+ parser_synchronize(parser, errors);
1542
+ parser->consecutive_error_count = 0;
1132
1543
  continue;
1133
1544
  }
1134
1545
 
1135
1546
  parser_append_unexpected_error(
1136
1547
  parser,
1548
+ errors,
1137
1549
  "Unexpected token",
1138
- "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
1139
- "TOKEN_NBSP, TOKEN_AT, TOKEN_BACKSLASH, or TOKEN_NEWLINE",
1140
- errors
1550
+ TOKEN_ERB_START,
1551
+ TOKEN_HTML_DOCTYPE,
1552
+ TOKEN_HTML_COMMENT_START,
1553
+ TOKEN_IDENTIFIER,
1554
+ TOKEN_WHITESPACE,
1555
+ TOKEN_NBSP,
1556
+ TOKEN_AT,
1557
+ TOKEN_BACKSLASH,
1558
+ TOKEN_NEWLINE
1141
1559
  );
1560
+
1561
+ parser_synchronize(parser, errors);
1142
1562
  }
1143
1563
  }
1144
1564
 
@@ -1152,11 +1572,11 @@ static size_t find_matching_close_tag(hb_array_T* nodes, size_t start_idx, hb_st
1152
1572
  if (node->type == AST_HTML_OPEN_TAG_NODE) {
1153
1573
  AST_HTML_OPEN_TAG_NODE_T* open = (AST_HTML_OPEN_TAG_NODE_T*) node;
1154
1574
 
1155
- if (hb_string_equals_case_insensitive(hb_string(open->tag_name->value), tag_name)) { depth++; }
1575
+ if (hb_string_equals_case_insensitive(open->tag_name->value, tag_name)) { depth++; }
1156
1576
  } else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
1157
1577
  AST_HTML_CLOSE_TAG_NODE_T* close = (AST_HTML_CLOSE_TAG_NODE_T*) node;
1158
1578
 
1159
- if (hb_string_equals_case_insensitive(hb_string(close->tag_name->value), tag_name)) {
1579
+ if (hb_string_equals_case_insensitive(close->tag_name->value, tag_name)) {
1160
1580
  if (depth == 0) { return i; }
1161
1581
  depth--;
1162
1582
  }
@@ -1166,10 +1586,44 @@ static size_t find_matching_close_tag(hb_array_T* nodes, size_t start_idx, hb_st
1166
1586
  return (size_t) -1;
1167
1587
  }
1168
1588
 
1169
- static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors);
1589
+ static size_t find_implicit_close_index(hb_array_T* nodes, size_t start_idx, hb_string_T tag_name) {
1590
+ if (!has_optional_end_tag(tag_name)) { return (size_t) -1; }
1591
+
1592
+ for (size_t i = start_idx + 1; i < hb_array_size(nodes); i++) {
1593
+ AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
1594
+ if (node == NULL) { continue; }
1595
+
1596
+ if (node->type == AST_HTML_OPEN_TAG_NODE) {
1597
+ AST_HTML_OPEN_TAG_NODE_T* open = (AST_HTML_OPEN_TAG_NODE_T*) node;
1598
+ hb_string_T next_tag_name = open->tag_name->value;
1599
+
1600
+ if (should_implicitly_close(tag_name, next_tag_name)) { return i; }
1601
+ } else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
1602
+ AST_HTML_CLOSE_TAG_NODE_T* close = (AST_HTML_CLOSE_TAG_NODE_T*) node;
1603
+ hb_string_T close_tag_name = close->tag_name->value;
1604
+
1605
+ if (parent_closes_element(tag_name, close_tag_name)) { return i; }
1606
+ }
1607
+ }
1608
+
1609
+ return hb_array_size(nodes);
1610
+ }
1170
1611
 
1171
- static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors) {
1172
- hb_array_T* result = hb_array_init(hb_array_size(nodes));
1612
+ static hb_array_T* parser_build_elements_from_tags(
1613
+ hb_array_T* nodes,
1614
+ hb_array_T* errors,
1615
+ const parser_options_T* options,
1616
+ hb_allocator_T* allocator
1617
+ );
1618
+
1619
+ static hb_array_T* parser_build_elements_from_tags(
1620
+ hb_array_T* nodes,
1621
+ hb_array_T* errors,
1622
+ const parser_options_T* options,
1623
+ hb_allocator_T* allocator
1624
+ ) {
1625
+ bool strict = options ? options->strict : false;
1626
+ hb_array_T* result = hb_array_init(hb_array_size(nodes), allocator);
1173
1627
 
1174
1628
  for (size_t index = 0; index < hb_array_size(nodes); index++) {
1175
1629
  AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, index);
@@ -1177,45 +1631,105 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
1177
1631
 
1178
1632
  if (node->type == AST_HTML_OPEN_TAG_NODE) {
1179
1633
  AST_HTML_OPEN_TAG_NODE_T* open_tag = (AST_HTML_OPEN_TAG_NODE_T*) node;
1180
- hb_string_T tag_name = hb_string(open_tag->tag_name->value);
1634
+ hb_string_T tag_name = open_tag->tag_name->value;
1181
1635
 
1182
1636
  size_t close_index = find_matching_close_tag(nodes, index, tag_name);
1183
1637
 
1184
1638
  if (close_index == (size_t) -1) {
1185
- if (hb_array_size(open_tag->base.errors) == 0) {
1186
- append_missing_closing_tag_error(
1639
+ size_t implicit_close_index = find_implicit_close_index(nodes, index, tag_name);
1640
+
1641
+ if (implicit_close_index != (size_t) -1 && implicit_close_index > index + 1) {
1642
+ hb_array_T* body = hb_array_init(implicit_close_index - index - 1, allocator);
1643
+
1644
+ for (size_t j = index + 1; j < implicit_close_index; j++) {
1645
+ hb_array_append(body, hb_array_get(nodes, j));
1646
+ }
1647
+
1648
+ hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, options, allocator);
1649
+ hb_array_free(&body);
1650
+
1651
+ position_T end_position = open_tag->base.location.end;
1652
+
1653
+ if (hb_array_size(processed_body) > 0) {
1654
+ AST_NODE_T* last_body_node = (AST_NODE_T*) hb_array_get(processed_body, hb_array_size(processed_body) - 1);
1655
+ if (last_body_node != NULL) { end_position = last_body_node->location.end; }
1656
+ }
1657
+
1658
+ hb_array_T* element_errors = hb_array_init(8, allocator);
1659
+
1660
+ if (strict) {
1661
+ append_omitted_closing_tag_error(
1662
+ open_tag->tag_name,
1663
+ end_position,
1664
+ open_tag->base.location.start,
1665
+ open_tag->base.location.end,
1666
+ allocator,
1667
+ element_errors
1668
+ );
1669
+ }
1670
+
1671
+ AST_HTML_OMITTED_CLOSE_TAG_NODE_T* omitted_close_tag = ast_html_omitted_close_tag_node_init(
1672
+ open_tag->tag_name,
1673
+ end_position,
1674
+ end_position,
1675
+ hb_array_init(8, allocator),
1676
+ allocator
1677
+ );
1678
+
1679
+ AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init(
1680
+ (AST_NODE_T*) open_tag,
1187
1681
  open_tag->tag_name,
1682
+ processed_body,
1683
+ (AST_NODE_T*) omitted_close_tag,
1684
+ false,
1685
+ ELEMENT_SOURCE_HTML,
1188
1686
  open_tag->base.location.start,
1189
- open_tag->base.location.end,
1190
- open_tag->base.errors
1687
+ end_position,
1688
+ element_errors,
1689
+ allocator
1191
1690
  );
1192
- }
1193
1691
 
1194
- hb_array_append(result, node);
1692
+ hb_array_append(result, element);
1693
+
1694
+ index = implicit_close_index - 1;
1695
+ } else {
1696
+ if (hb_array_size(open_tag->base.errors) == 0) {
1697
+ append_missing_closing_tag_error(
1698
+ open_tag->tag_name,
1699
+ open_tag->base.location.start,
1700
+ open_tag->base.location.end,
1701
+ allocator,
1702
+ open_tag->base.errors
1703
+ );
1704
+ }
1705
+
1706
+ hb_array_append(result, node);
1707
+ }
1195
1708
  } else {
1196
1709
  AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) hb_array_get(nodes, close_index);
1197
1710
 
1198
- hb_array_T* body = hb_array_init(close_index - index - 1);
1711
+ hb_array_T* body = hb_array_init(close_index - index - 1, allocator);
1199
1712
 
1200
1713
  for (size_t j = index + 1; j < close_index; j++) {
1201
1714
  hb_array_append(body, hb_array_get(nodes, j));
1202
1715
  }
1203
1716
 
1204
- hb_array_T* processed_body = parser_build_elements_from_tags(body, errors);
1717
+ hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, options, allocator);
1205
1718
  hb_array_free(&body);
1206
1719
 
1207
- hb_array_T* element_errors = hb_array_init(8);
1720
+ hb_array_T* element_errors = hb_array_init(8, allocator);
1208
1721
 
1209
1722
  AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init(
1210
- open_tag,
1723
+ (AST_NODE_T*) open_tag,
1211
1724
  open_tag->tag_name,
1212
1725
  processed_body,
1213
- close_tag,
1726
+ (AST_NODE_T*) close_tag,
1214
1727
  false,
1215
1728
  ELEMENT_SOURCE_HTML,
1216
1729
  open_tag->base.location.start,
1217
1730
  close_tag->base.location.end,
1218
- element_errors
1731
+ element_errors,
1732
+ allocator
1219
1733
  );
1220
1734
 
1221
1735
  hb_array_append(result, element);
@@ -1225,12 +1739,13 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
1225
1739
  } else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
1226
1740
  AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) node;
1227
1741
 
1228
- if (!is_void_element(hb_string(close_tag->tag_name->value))) {
1742
+ if (!is_void_element(close_tag->tag_name->value)) {
1229
1743
  if (hb_array_size(close_tag->base.errors) == 0) {
1230
1744
  append_missing_opening_tag_error(
1231
1745
  close_tag->tag_name,
1232
1746
  close_tag->base.location.start,
1233
1747
  close_tag->base.location.end,
1748
+ allocator,
1234
1749
  close_tag->base.errors
1235
1750
  );
1236
1751
  }
@@ -1246,17 +1761,18 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
1246
1761
  }
1247
1762
 
1248
1763
  static AST_DOCUMENT_NODE_T* parser_parse_document(parser_T* parser) {
1249
- hb_array_T* children = hb_array_init(8);
1250
- hb_array_T* errors = hb_array_init(8);
1764
+ hb_array_T* children = hb_array_init(8, parser->allocator);
1765
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
1251
1766
  position_T start = parser->current_token->location.start;
1252
1767
 
1253
1768
  parser_parse_in_data_state(parser, children, errors);
1254
1769
 
1255
1770
  token_T* eof = parser_consume_expected(parser, TOKEN_EOF, errors);
1256
1771
 
1257
- AST_DOCUMENT_NODE_T* document_node = ast_document_node_init(children, start, eof->location.end, errors);
1772
+ AST_DOCUMENT_NODE_T* document_node =
1773
+ ast_document_node_init(children, NULL, HERB_PRISM_NODE_EMPTY, start, eof->location.end, errors, parser->allocator);
1258
1774
 
1259
- token_free(eof);
1775
+ token_free(eof, parser->allocator);
1260
1776
 
1261
1777
  return document_node;
1262
1778
  }
@@ -1267,17 +1783,18 @@ AST_DOCUMENT_NODE_T* herb_parser_parse(parser_T* parser) {
1267
1783
 
1268
1784
  static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, hb_array_T* children) {
1269
1785
  if (parser->options.track_whitespace) {
1270
- hb_array_T* errors = hb_array_init(8);
1786
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
1271
1787
  AST_WHITESPACE_NODE_T* whitespace_node = ast_whitespace_node_init(
1272
1788
  whitespace_token,
1273
1789
  whitespace_token->location.start,
1274
1790
  whitespace_token->location.end,
1275
- errors
1791
+ errors,
1792
+ parser->allocator
1276
1793
  );
1277
1794
  hb_array_append(children, whitespace_node);
1278
1795
  }
1279
1796
 
1280
- token_free(whitespace_token);
1797
+ token_free(whitespace_token, parser->allocator);
1281
1798
  }
1282
1799
 
1283
1800
  static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
@@ -1287,7 +1804,7 @@ static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
1287
1804
  if (parser->options.track_whitespace && children != NULL) {
1288
1805
  parser_handle_whitespace(parser, whitespace, children);
1289
1806
  } else {
1290
- token_free(whitespace);
1807
+ token_free(whitespace, parser->allocator);
1291
1808
  }
1292
1809
  }
1293
1810
  }
@@ -1295,14 +1812,27 @@ static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
1295
1812
  void herb_parser_deinit(parser_T* parser) {
1296
1813
  if (parser == NULL) { return; }
1297
1814
 
1298
- if (parser->current_token != NULL) { token_free(parser->current_token); }
1299
- if (parser->open_tags_stack != NULL) { hb_array_free(&parser->open_tags_stack); }
1815
+ if (parser->current_token != NULL) { token_free(parser->current_token, parser->allocator); }
1816
+
1817
+ if (parser->open_tags_stack != NULL) {
1818
+ for (size_t i = 0; i < hb_array_size(parser->open_tags_stack); i++) {
1819
+ token_T* token = (token_T*) hb_array_get(parser->open_tags_stack, i);
1820
+ if (token != NULL) { token_free(token, parser->allocator); }
1821
+ }
1822
+
1823
+ hb_array_free(&parser->open_tags_stack);
1824
+ }
1300
1825
  }
1301
1826
 
1302
- void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors) {
1827
+ void match_tags_in_node_array(
1828
+ hb_array_T* nodes,
1829
+ hb_array_T* errors,
1830
+ const parser_options_T* options,
1831
+ hb_allocator_T* allocator
1832
+ ) {
1303
1833
  if (nodes == NULL || hb_array_size(nodes) == 0) { return; }
1304
1834
 
1305
- hb_array_T* processed = parser_build_elements_from_tags(nodes, errors);
1835
+ hb_array_T* processed = parser_build_elements_from_tags(nodes, errors, options, allocator);
1306
1836
 
1307
1837
  nodes->size = 0;
1308
1838
 
@@ -1312,16 +1842,22 @@ void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors) {
1312
1842
 
1313
1843
  hb_array_free(&processed);
1314
1844
 
1845
+ match_tags_context_T context = { .errors = errors, .options = options, .allocator = allocator };
1846
+
1315
1847
  for (size_t i = 0; i < hb_array_size(nodes); i++) {
1316
1848
  AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
1317
1849
  if (node == NULL) { continue; }
1318
1850
 
1319
- herb_visit_node(node, match_tags_visitor, errors);
1851
+ herb_visit_node(node, match_tags_visitor, &context);
1320
1852
  }
1321
1853
  }
1322
1854
 
1323
- void herb_parser_match_html_tags_post_analyze(AST_DOCUMENT_NODE_T* document) {
1855
+ void herb_parser_match_html_tags_post_analyze(
1856
+ AST_DOCUMENT_NODE_T* document,
1857
+ const parser_options_T* options,
1858
+ hb_allocator_T* allocator
1859
+ ) {
1324
1860
  if (document == NULL) { return; }
1325
1861
 
1326
- match_tags_in_node_array(document->children, document->base.errors);
1862
+ match_tags_in_node_array(document->children, document->base.errors, options, allocator);
1327
1863
  }