@herb-tools/node 0.8.10 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/binding.gyp +27 -8
  2. package/dist/herb-node.cjs +41 -12
  3. package/dist/herb-node.cjs.map +1 -1
  4. package/dist/herb-node.esm.js +8 -1
  5. package/dist/herb-node.esm.js.map +1 -1
  6. package/dist/types/node-backend.d.ts +3 -1
  7. package/extension/error_helpers.cpp +598 -73
  8. package/extension/error_helpers.h +20 -3
  9. package/extension/extension_helpers.cpp +40 -35
  10. package/extension/extension_helpers.h +2 -2
  11. package/extension/herb.cpp +194 -64
  12. package/extension/libherb/analyze/action_view/attribute_extraction_helpers.c +303 -0
  13. package/extension/libherb/analyze/action_view/attribute_extraction_helpers.h +36 -0
  14. package/extension/libherb/analyze/action_view/content_tag.c +78 -0
  15. package/extension/libherb/analyze/action_view/link_to.c +167 -0
  16. package/extension/libherb/analyze/action_view/registry.c +83 -0
  17. package/extension/libherb/analyze/action_view/tag.c +70 -0
  18. package/extension/libherb/analyze/action_view/tag_helper_handler.h +43 -0
  19. package/extension/libherb/analyze/action_view/tag_helper_node_builders.c +305 -0
  20. package/extension/libherb/analyze/action_view/tag_helper_node_builders.h +70 -0
  21. package/extension/libherb/analyze/action_view/tag_helpers.c +815 -0
  22. package/extension/libherb/analyze/action_view/tag_helpers.h +38 -0
  23. package/extension/libherb/analyze/action_view/turbo_frame_tag.c +88 -0
  24. package/extension/libherb/analyze/analyze.c +885 -0
  25. package/extension/libherb/{include → analyze}/analyze.h +14 -4
  26. package/extension/libherb/{analyzed_ruby.c → analyze/analyzed_ruby.c} +13 -11
  27. package/extension/libherb/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
  28. package/extension/libherb/analyze/builders.c +343 -0
  29. package/extension/libherb/analyze/builders.h +27 -0
  30. package/extension/libherb/analyze/conditional_elements.c +594 -0
  31. package/extension/libherb/analyze/conditional_elements.h +9 -0
  32. package/extension/libherb/analyze/conditional_open_tags.c +640 -0
  33. package/extension/libherb/analyze/conditional_open_tags.h +9 -0
  34. package/extension/libherb/analyze/control_type.c +250 -0
  35. package/extension/libherb/analyze/control_type.h +14 -0
  36. package/extension/libherb/{analyze_helpers.c → analyze/helpers.c} +48 -23
  37. package/extension/libherb/{analyze_helpers.h → analyze/helpers.h} +4 -2
  38. package/extension/libherb/analyze/invalid_structures.c +193 -0
  39. package/extension/libherb/analyze/invalid_structures.h +11 -0
  40. package/extension/libherb/{analyze_missing_end.c → analyze/missing_end.c} +33 -22
  41. package/extension/libherb/analyze/parse_errors.c +84 -0
  42. package/extension/libherb/analyze/prism_annotate.c +399 -0
  43. package/extension/libherb/analyze/prism_annotate.h +16 -0
  44. package/extension/libherb/analyze/render_nodes.c +761 -0
  45. package/extension/libherb/analyze/render_nodes.h +11 -0
  46. package/extension/libherb/{analyze_transform.c → analyze/transform.c} +24 -3
  47. package/extension/libherb/ast_node.c +17 -7
  48. package/extension/libherb/ast_node.h +11 -5
  49. package/extension/libherb/ast_nodes.c +760 -388
  50. package/extension/libherb/ast_nodes.h +155 -39
  51. package/extension/libherb/ast_pretty_print.c +265 -7
  52. package/extension/libherb/ast_pretty_print.h +6 -1
  53. package/extension/libherb/element_source.h +3 -8
  54. package/extension/libherb/errors.c +1455 -520
  55. package/extension/libherb/errors.h +207 -56
  56. package/extension/libherb/extract.c +145 -49
  57. package/extension/libherb/extract.h +21 -5
  58. package/extension/libherb/herb.c +52 -34
  59. package/extension/libherb/herb.h +18 -6
  60. package/extension/libherb/herb_prism_node.h +13 -0
  61. package/extension/libherb/html_util.c +241 -12
  62. package/extension/libherb/html_util.h +7 -2
  63. package/extension/libherb/include/analyze/action_view/attribute_extraction_helpers.h +36 -0
  64. package/extension/libherb/include/analyze/action_view/tag_helper_handler.h +43 -0
  65. package/extension/libherb/include/analyze/action_view/tag_helper_node_builders.h +70 -0
  66. package/extension/libherb/include/analyze/action_view/tag_helpers.h +38 -0
  67. package/extension/libherb/{analyze.h → include/analyze/analyze.h} +14 -4
  68. package/extension/libherb/include/{analyzed_ruby.h → analyze/analyzed_ruby.h} +3 -3
  69. package/extension/libherb/include/analyze/builders.h +27 -0
  70. package/extension/libherb/include/analyze/conditional_elements.h +9 -0
  71. package/extension/libherb/include/analyze/conditional_open_tags.h +9 -0
  72. package/extension/libherb/include/analyze/control_type.h +14 -0
  73. package/extension/libherb/include/{analyze_helpers.h → analyze/helpers.h} +4 -2
  74. package/extension/libherb/include/analyze/invalid_structures.h +11 -0
  75. package/extension/libherb/include/analyze/prism_annotate.h +16 -0
  76. package/extension/libherb/include/analyze/render_nodes.h +11 -0
  77. package/extension/libherb/include/ast_node.h +11 -5
  78. package/extension/libherb/include/ast_nodes.h +155 -39
  79. package/extension/libherb/include/ast_pretty_print.h +6 -1
  80. package/extension/libherb/include/element_source.h +3 -8
  81. package/extension/libherb/include/errors.h +207 -56
  82. package/extension/libherb/include/extract.h +21 -5
  83. package/extension/libherb/include/herb.h +18 -6
  84. package/extension/libherb/include/herb_prism_node.h +13 -0
  85. package/extension/libherb/include/html_util.h +7 -2
  86. package/extension/libherb/include/io.h +3 -1
  87. package/extension/libherb/include/lex_helpers.h +29 -0
  88. package/extension/libherb/include/lexer.h +1 -1
  89. package/extension/libherb/include/lexer_peek_helpers.h +87 -13
  90. package/extension/libherb/include/lexer_struct.h +2 -0
  91. package/extension/libherb/include/location.h +2 -1
  92. package/extension/libherb/include/parser.h +28 -2
  93. package/extension/libherb/include/parser_helpers.h +19 -3
  94. package/extension/libherb/include/pretty_print.h +10 -5
  95. package/extension/libherb/include/prism_context.h +45 -0
  96. package/extension/libherb/include/prism_helpers.h +10 -7
  97. package/extension/libherb/include/prism_serialized.h +12 -0
  98. package/extension/libherb/include/token.h +16 -4
  99. package/extension/libherb/include/token_struct.h +10 -3
  100. package/extension/libherb/include/utf8.h +2 -1
  101. package/extension/libherb/include/util/hb_allocator.h +78 -0
  102. package/extension/libherb/include/util/hb_arena.h +6 -1
  103. package/extension/libherb/include/util/hb_arena_debug.h +12 -1
  104. package/extension/libherb/include/util/hb_array.h +7 -3
  105. package/extension/libherb/include/util/hb_buffer.h +6 -4
  106. package/extension/libherb/include/util/hb_foreach.h +79 -0
  107. package/extension/libherb/include/util/hb_narray.h +8 -4
  108. package/extension/libherb/include/util/hb_string.h +56 -9
  109. package/extension/libherb/include/util.h +6 -3
  110. package/extension/libherb/include/version.h +1 -1
  111. package/extension/libherb/io.c +3 -2
  112. package/extension/libherb/io.h +3 -1
  113. package/extension/libherb/lex_helpers.h +29 -0
  114. package/extension/libherb/lexer.c +42 -30
  115. package/extension/libherb/lexer.h +1 -1
  116. package/extension/libherb/lexer_peek_helpers.c +12 -74
  117. package/extension/libherb/lexer_peek_helpers.h +87 -13
  118. package/extension/libherb/lexer_struct.h +2 -0
  119. package/extension/libherb/location.c +2 -2
  120. package/extension/libherb/location.h +2 -1
  121. package/extension/libherb/main.c +53 -28
  122. package/extension/libherb/parser.c +784 -247
  123. package/extension/libherb/parser.h +28 -2
  124. package/extension/libherb/parser_helpers.c +110 -23
  125. package/extension/libherb/parser_helpers.h +19 -3
  126. package/extension/libherb/parser_match_tags.c +130 -49
  127. package/extension/libherb/pretty_print.c +29 -24
  128. package/extension/libherb/pretty_print.h +10 -5
  129. package/extension/libherb/prism_context.h +45 -0
  130. package/extension/libherb/prism_helpers.c +30 -27
  131. package/extension/libherb/prism_helpers.h +10 -7
  132. package/extension/libherb/prism_serialized.h +12 -0
  133. package/extension/libherb/ruby_parser.c +2 -0
  134. package/extension/libherb/token.c +151 -66
  135. package/extension/libherb/token.h +16 -4
  136. package/extension/libherb/token_matchers.c +0 -1
  137. package/extension/libherb/token_struct.h +10 -3
  138. package/extension/libherb/utf8.c +7 -6
  139. package/extension/libherb/utf8.h +2 -1
  140. package/extension/libherb/util/hb_allocator.c +341 -0
  141. package/extension/libherb/util/hb_allocator.h +78 -0
  142. package/extension/libherb/util/hb_arena.c +81 -56
  143. package/extension/libherb/util/hb_arena.h +6 -1
  144. package/extension/libherb/util/hb_arena_debug.c +32 -17
  145. package/extension/libherb/util/hb_arena_debug.h +12 -1
  146. package/extension/libherb/util/hb_array.c +30 -15
  147. package/extension/libherb/util/hb_array.h +7 -3
  148. package/extension/libherb/util/hb_buffer.c +17 -21
  149. package/extension/libherb/util/hb_buffer.h +6 -4
  150. package/extension/libherb/util/hb_foreach.h +79 -0
  151. package/extension/libherb/util/hb_narray.c +22 -7
  152. package/extension/libherb/util/hb_narray.h +8 -4
  153. package/extension/libherb/util/hb_string.c +49 -35
  154. package/extension/libherb/util/hb_string.h +56 -9
  155. package/extension/libherb/util.c +21 -11
  156. package/extension/libherb/util.h +6 -3
  157. package/extension/libherb/version.h +1 -1
  158. package/extension/libherb/visitor.c +68 -1
  159. package/extension/nodes.cpp +593 -6
  160. package/extension/nodes.h +10 -1
  161. package/package.json +12 -8
  162. package/src/node-backend.ts +11 -1
  163. package/dist/types/index-cjs.d.cts +0 -1
  164. package/extension/libherb/analyze.c +0 -1608
  165. package/extension/libherb/element_source.c +0 -12
  166. package/extension/libherb/include/util/hb_system.h +0 -9
  167. package/extension/libherb/util/hb_system.c +0 -30
  168. package/extension/libherb/util/hb_system.h +0 -9
  169. package/src/index-cjs.cts +0 -22
  170. /package/dist/types/{index-esm.d.mts → index.d.ts} +0 -0
  171. /package/src/{index-esm.mts → index.ts} +0 -0
@@ -20,6 +20,8 @@
20
20
  #include <string.h>
21
21
  #include <strings.h>
22
22
 
23
+ #define MAX_CONSECUTIVE_ERRORS 10
24
+
23
25
  static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors);
24
26
  static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors);
25
27
  static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
@@ -27,29 +29,40 @@ static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token
27
29
  static void parser_consume_whitespace(parser_T* parser, hb_array_T* children);
28
30
  static void parser_skip_erb_content(lexer_T* lexer);
29
31
  static bool parser_lookahead_erb_is_attribute(lexer_T* lexer);
32
+ static bool parser_lookahead_erb_is_control_flow(parser_T* parser);
30
33
  static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children);
31
34
  static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* children);
32
35
 
33
- const parser_options_T HERB_DEFAULT_PARSER_OPTIONS = { .track_whitespace = false };
36
+ const parser_options_T HERB_DEFAULT_PARSER_OPTIONS = { .track_whitespace = false,
37
+ .analyze = true,
38
+ .strict = true,
39
+ .action_view_helpers = false,
40
+ .render_nodes = false,
41
+ .prism_nodes_deep = false,
42
+ .prism_nodes = false,
43
+ .prism_program = false };
34
44
 
35
45
  size_t parser_sizeof(void) {
36
46
  return sizeof(struct PARSER_STRUCT);
37
47
  }
38
48
 
39
49
  void herb_parser_init(parser_T* parser, lexer_T* lexer, parser_options_T options) {
50
+ parser->allocator = lexer->allocator;
40
51
  parser->lexer = lexer;
41
52
  parser->current_token = lexer_next_token(lexer);
42
- parser->open_tags_stack = hb_array_init(16);
53
+ parser->open_tags_stack = hb_array_init(16, parser->allocator);
43
54
  parser->state = PARSER_STATE_DATA;
44
55
  parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
45
56
  parser->options = options;
57
+ parser->consecutive_error_count = 0;
58
+ parser->in_recovery_mode = false;
46
59
  }
47
60
 
48
61
  static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
49
- hb_array_T* errors = hb_array_init(8);
50
- hb_array_T* children = hb_array_init(8);
62
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
63
+ hb_array_T* children = hb_array_init(8, parser->allocator);
51
64
  hb_buffer_T content;
52
- hb_buffer_init(&content, 128);
65
+ hb_buffer_init(&content, 128, parser->allocator);
53
66
 
54
67
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_CDATA_START, errors);
55
68
  position_T start = parser->current_token->location.start;
@@ -64,8 +77,8 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
64
77
  }
65
78
 
66
79
  token_T* token = parser_advance(parser);
67
- hb_buffer_append(&content, token->value);
68
- token_free(token);
80
+ hb_buffer_append_string(&content, token->value);
81
+ token_free(token, parser->allocator);
69
82
  }
70
83
 
71
84
  parser_append_literal_node_from_buffer(parser, &content, children, start);
@@ -77,26 +90,27 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
77
90
  tag_closing,
78
91
  tag_opening->location.start,
79
92
  tag_closing->location.end,
80
- errors
93
+ errors,
94
+ parser->allocator
81
95
  );
82
96
 
83
- free(content.value);
84
- token_free(tag_opening);
85
- token_free(tag_closing);
97
+ hb_buffer_free(&content);
98
+ token_free(tag_opening, parser->allocator);
99
+ token_free(tag_closing, parser->allocator);
86
100
 
87
101
  return cdata;
88
102
  }
89
103
 
90
104
  static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
91
- hb_array_T* errors = hb_array_init(8);
92
- hb_array_T* children = hb_array_init(8);
105
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
106
+ hb_array_T* children = hb_array_init(8, parser->allocator);
93
107
  token_T* comment_start = parser_consume_expected(parser, TOKEN_HTML_COMMENT_START, errors);
94
108
  position_T start = parser->current_token->location.start;
95
109
 
96
110
  hb_buffer_T comment;
97
- hb_buffer_init(&comment, 512);
111
+ hb_buffer_init(&comment, 512, parser->allocator);
98
112
 
99
- while (token_is_none_of(parser, TOKEN_HTML_COMMENT_END, TOKEN_EOF)) {
113
+ while (token_is_none_of(parser, TOKEN_HTML_COMMENT_END, TOKEN_HTML_COMMENT_INVALID_END, TOKEN_EOF)) {
100
114
  if (token_is(parser, TOKEN_ERB_START)) {
101
115
  parser_append_literal_node_from_buffer(parser, &comment, children, start);
102
116
 
@@ -109,13 +123,26 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
109
123
  }
110
124
 
111
125
  token_T* token = parser_advance(parser);
112
- hb_buffer_append(&comment, token->value);
113
- token_free(token);
126
+ hb_buffer_append_string(&comment, token->value);
127
+ token_free(token, parser->allocator);
114
128
  }
115
129
 
116
130
  parser_append_literal_node_from_buffer(parser, &comment, children, start);
117
131
 
118
- token_T* comment_end = parser_consume_expected(parser, TOKEN_HTML_COMMENT_END, errors);
132
+ token_T* comment_end = NULL;
133
+
134
+ if (token_is(parser, TOKEN_HTML_COMMENT_INVALID_END)) {
135
+ comment_end = parser_advance(parser);
136
+ append_invalid_comment_closing_tag_error(
137
+ comment_end,
138
+ comment_end->location.start,
139
+ comment_end->location.end,
140
+ parser->allocator,
141
+ errors
142
+ );
143
+ } else {
144
+ comment_end = parser_consume_expected(parser, TOKEN_HTML_COMMENT_END, errors);
145
+ }
119
146
 
120
147
  AST_HTML_COMMENT_NODE_T* comment_node = ast_html_comment_node_init(
121
148
  comment_start,
@@ -123,21 +150,22 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
123
150
  comment_end,
124
151
  comment_start->location.start,
125
152
  comment_end->location.end,
126
- errors
153
+ errors,
154
+ parser->allocator
127
155
  );
128
156
 
129
- free(comment.value);
130
- token_free(comment_start);
131
- token_free(comment_end);
157
+ hb_buffer_free(&comment);
158
+ token_free(comment_start, parser->allocator);
159
+ token_free(comment_end, parser->allocator);
132
160
 
133
161
  return comment_node;
134
162
  }
135
163
 
136
164
  static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
137
- hb_array_T* errors = hb_array_init(8);
138
- hb_array_T* children = hb_array_init(8);
165
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
166
+ hb_array_T* children = hb_array_init(8, parser->allocator);
139
167
  hb_buffer_T content;
140
- hb_buffer_init(&content, 64);
168
+ hb_buffer_init(&content, 64, parser->allocator);
141
169
 
142
170
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_DOCTYPE, errors);
143
171
 
@@ -154,8 +182,8 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
154
182
  }
155
183
 
156
184
  token_T* token = parser_consume_expected(parser, parser->current_token->type, errors);
157
- hb_buffer_append(&content, token->value);
158
- token_free(token);
185
+ hb_buffer_append_string(&content, token->value);
186
+ token_free(token, parser->allocator);
159
187
  }
160
188
 
161
189
  parser_append_literal_node_from_buffer(parser, &content, children, start);
@@ -168,21 +196,22 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
168
196
  tag_closing,
169
197
  tag_opening->location.start,
170
198
  tag_closing->location.end,
171
- errors
199
+ errors,
200
+ parser->allocator
172
201
  );
173
202
 
174
- token_free(tag_opening);
175
- token_free(tag_closing);
176
- free(content.value);
203
+ token_free(tag_opening, parser->allocator);
204
+ token_free(tag_closing, parser->allocator);
205
+ hb_buffer_free(&content);
177
206
 
178
207
  return doctype;
179
208
  }
180
209
 
181
210
  static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser) {
182
- hb_array_T* errors = hb_array_init(8);
183
- hb_array_T* children = hb_array_init(8);
211
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
212
+ hb_array_T* children = hb_array_init(8, parser->allocator);
184
213
  hb_buffer_T content;
185
- hb_buffer_init(&content, 64);
214
+ hb_buffer_init(&content, 64, parser->allocator);
186
215
 
187
216
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_XML_DECLARATION, errors);
188
217
 
@@ -201,8 +230,8 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
201
230
  }
202
231
 
203
232
  token_T* token = parser_advance(parser);
204
- hb_buffer_append(&content, token->value);
205
- token_free(token);
233
+ hb_buffer_append_string(&content, token->value);
234
+ token_free(token, parser->allocator);
206
235
  }
207
236
 
208
237
  parser_append_literal_node_from_buffer(parser, &content, children, start);
@@ -215,12 +244,13 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser
215
244
  tag_closing,
216
245
  tag_opening->location.start,
217
246
  tag_closing->location.end,
218
- errors
247
+ errors,
248
+ parser->allocator
219
249
  );
220
250
 
221
- token_free(tag_opening);
222
- token_free(tag_closing);
223
- free(content.value);
251
+ token_free(tag_opening, parser->allocator);
252
+ token_free(tag_closing, parser->allocator);
253
+ hb_buffer_free(&content);
224
254
 
225
255
  return xml_declaration;
226
256
  }
@@ -229,7 +259,7 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra
229
259
  position_T start = parser->current_token->location.start;
230
260
 
231
261
  hb_buffer_T content;
232
- hb_buffer_init(&content, 2048);
262
+ hb_buffer_init(&content, 2048, parser->allocator);
233
263
 
234
264
  while (token_is_none_of(
235
265
  parser,
@@ -241,49 +271,66 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra
241
271
  TOKEN_EOF
242
272
  )) {
243
273
  if (token_is(parser, TOKEN_ERROR)) {
244
- free(content.value);
274
+ hb_buffer_free(&content);
245
275
 
246
- token_T* token = parser_consume_expected(parser, TOKEN_ERROR, document_errors);
247
- append_unexpected_error(
248
- "Token Error",
249
- "not TOKEN_ERROR",
250
- token->value,
251
- token->location.start,
252
- token->location.end,
253
- document_errors
254
- );
255
-
256
- token_free(token);
276
+ parser_append_unexpected_error_string(parser, document_errors, "Token Error", "not an error token");
257
277
 
258
278
  return NULL;
259
279
  }
260
280
 
281
+ if (parser->options.strict && parser->current_token->type == TOKEN_PERCENT) {
282
+ lexer_T lexer_copy = *parser->lexer;
283
+ token_T* peek_token = lexer_next_token(&lexer_copy);
284
+
285
+ if (peek_token && peek_token->type == TOKEN_HTML_TAG_END) {
286
+ position_T stray_start = parser->current_token->location.start;
287
+ position_T stray_end = peek_token->location.end;
288
+ token_free(peek_token, parser->allocator);
289
+
290
+ append_stray_erb_closing_tag_error(stray_start, stray_end, parser->allocator, document_errors);
291
+
292
+ token_T* percent = parser_advance(parser);
293
+ hb_buffer_append_string(&content, percent->value);
294
+ token_free(percent, parser->allocator);
295
+
296
+ token_T* gt = parser_advance(parser);
297
+ hb_buffer_append_string(&content, gt->value);
298
+ token_free(gt, parser->allocator);
299
+
300
+ continue;
301
+ }
302
+
303
+ token_free(peek_token, parser->allocator);
304
+ }
305
+
261
306
  token_T* token = parser_advance(parser);
262
- hb_buffer_append(&content, token->value);
263
- token_free(token);
307
+ hb_buffer_append_string(&content, token->value);
308
+ token_free(token, parser->allocator);
264
309
  }
265
310
 
266
- hb_array_T* errors = hb_array_init(8);
311
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
267
312
 
268
313
  AST_HTML_TEXT_NODE_T* text_node = NULL;
269
314
 
270
315
  if (hb_buffer_length(&content) > 0) {
316
+ hb_string_T text_content = { .data = content.value, .length = (uint32_t) content.length };
271
317
  text_node =
272
- ast_html_text_node_init(hb_buffer_value(&content), start, parser->current_token->location.start, errors);
318
+ ast_html_text_node_init(text_content, start, parser->current_token->location.start, errors, parser->allocator);
273
319
  } else {
274
- text_node = ast_html_text_node_init("", start, parser->current_token->location.start, errors);
320
+ text_node =
321
+ ast_html_text_node_init(HB_STRING_EMPTY, start, parser->current_token->location.start, errors, parser->allocator);
275
322
  }
276
323
 
277
- free(content.value);
324
+ hb_buffer_free(&content);
278
325
 
279
326
  return text_node;
280
327
  }
281
328
 
282
329
  static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
283
- hb_array_T* errors = hb_array_init(8);
284
- hb_array_T* children = hb_array_init(8);
330
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
331
+ hb_array_T* children = hb_array_init(8, parser->allocator);
285
332
  hb_buffer_T buffer;
286
- hb_buffer_init(&buffer, 128);
333
+ hb_buffer_init(&buffer, 128, parser->allocator);
287
334
  position_T start = parser->current_token->location.start;
288
335
 
289
336
  while (token_is_none_of(
@@ -296,6 +343,16 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
296
343
  TOKEN_EOF
297
344
  )) {
298
345
  if (token_is(parser, TOKEN_ERB_START)) {
346
+ hb_string_T tag = parser->current_token->value;
347
+ bool is_output_tag = (tag.length >= 3 && tag.data[2] == '=');
348
+
349
+ if (!is_output_tag) {
350
+ bool is_control_flow = parser_lookahead_erb_is_control_flow(parser);
351
+
352
+ if (hb_buffer_is_empty(&buffer) && hb_array_size(children) == 0) { break; }
353
+ if (is_control_flow) { break; }
354
+ }
355
+
299
356
  parser_append_literal_node_from_buffer(parser, &buffer, children, start);
300
357
 
301
358
  AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
@@ -306,8 +363,8 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
306
363
  }
307
364
 
308
365
  token_T* token = parser_advance(parser);
309
- hb_buffer_append(&buffer, token->value);
310
- token_free(token);
366
+ hb_buffer_append_string(&buffer, token->value);
367
+ token_free(token, parser->allocator);
311
368
  }
312
369
 
313
370
  parser_append_literal_node_from_buffer(parser, &buffer, children, start);
@@ -327,9 +384,9 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T
327
384
  }
328
385
 
329
386
  AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
330
- ast_html_attribute_name_node_init(children, node_start, node_end, errors);
387
+ ast_html_attribute_name_node_init(children, node_start, node_end, errors, parser->allocator);
331
388
 
332
- free(buffer.value);
389
+ hb_buffer_free(&buffer);
333
390
 
334
391
  return attribute_name;
335
392
  }
@@ -340,55 +397,137 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
340
397
  hb_array_T* errors
341
398
  ) {
342
399
  hb_buffer_T buffer;
343
- hb_buffer_init(&buffer, 512);
400
+ hb_buffer_init(&buffer, 512, parser->allocator);
344
401
  token_T* opening_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
345
402
  position_T start = parser->current_token->location.start;
346
403
 
347
404
  while (!token_is(parser, TOKEN_EOF)
348
405
  && !(
349
406
  token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
350
- && string_equals(parser->current_token->value, opening_quote->value)
407
+ && hb_string_equals(parser->current_token->value, opening_quote->value)
351
408
  )) {
352
- if (token_is(parser, TOKEN_ERB_START)) {
409
+ if (token_is(parser, TOKEN_HTML_TAG_END) || token_is(parser, TOKEN_HTML_TAG_SELF_CLOSE)) {
410
+ lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
411
+ bool found_closing_quote = false;
412
+ token_T* lookahead = lexer_next_token(parser->lexer);
413
+
414
+ while (lookahead && lookahead->type != TOKEN_EOF) {
415
+ if (lookahead->type == TOKEN_QUOTE && opening_quote != NULL
416
+ && hb_string_equals(lookahead->value, opening_quote->value)) {
417
+ found_closing_quote = true;
418
+ token_free(lookahead, parser->allocator);
419
+ break;
420
+ }
421
+
422
+ token_free(lookahead, parser->allocator);
423
+
424
+ lookahead = lexer_next_token(parser->lexer);
425
+ }
426
+
427
+ if (lookahead && !found_closing_quote && lookahead->type == TOKEN_EOF) {
428
+ token_free(lookahead, parser->allocator);
429
+ }
430
+
431
+ lexer_restore_state(parser->lexer, saved_state);
432
+
433
+ if (found_closing_quote) {
434
+ hb_buffer_append_string(&buffer, parser->current_token->value);
435
+ token_free(parser->current_token, parser->allocator);
436
+ parser->current_token = lexer_next_token(parser->lexer);
437
+ continue;
438
+ }
439
+
440
+ append_unclosed_quote_error(
441
+ opening_quote,
442
+ opening_quote->location.start,
443
+ parser->current_token->location.start,
444
+ parser->allocator,
445
+ errors
446
+ );
447
+
353
448
  parser_append_literal_node_from_buffer(parser, &buffer, children, start);
449
+ hb_buffer_free(&buffer);
354
450
 
355
- hb_array_append(children, parser_parse_erb_tag(parser));
451
+ AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
452
+ opening_quote,
453
+ children,
454
+ NULL,
455
+ true,
456
+ opening_quote->location.start,
457
+ parser->current_token->location.start,
458
+ errors,
459
+ parser->allocator
460
+ );
356
461
 
357
- start = parser->current_token->location.start;
462
+ token_free(opening_quote, parser->allocator);
358
463
 
359
- continue;
464
+ return attribute_value;
360
465
  }
361
466
 
362
- if (token_is(parser, TOKEN_BACKSLASH)) {
467
+ bool buffer_ends_with_whitespace = buffer.length > 0 && is_whitespace(buffer.value[buffer.length - 1]);
468
+
469
+ if (token_is(parser, TOKEN_IDENTIFIER) && buffer_ends_with_whitespace) {
363
470
  lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
471
+ token_T* equals_token = lexer_next_token(parser->lexer);
472
+ bool looks_like_new_attribute = false;
364
473
 
365
- token_T* next_token = lexer_next_token(parser->lexer);
474
+ if (equals_token && equals_token->type == TOKEN_EQUALS) {
475
+ token_T* after_equals = lexer_next_token(parser->lexer);
476
+ looks_like_new_attribute = (after_equals && after_equals->type == TOKEN_QUOTE);
366
477
 
367
- if (next_token && next_token->type == TOKEN_QUOTE && opening_quote != NULL
368
- && string_equals(next_token->value, opening_quote->value)) {
369
- hb_buffer_append(&buffer, parser->current_token->value);
370
- hb_buffer_append(&buffer, next_token->value);
478
+ if (after_equals) { token_free(after_equals, parser->allocator); }
479
+ }
371
480
 
372
- token_free(parser->current_token);
373
- token_free(next_token);
481
+ if (equals_token) { token_free(equals_token, parser->allocator); }
482
+ lexer_restore_state(parser->lexer, saved_state);
374
483
 
375
- parser->current_token = lexer_next_token(parser->lexer);
376
- continue;
377
- } else {
378
- lexer_restore_state(parser->lexer, saved_state);
484
+ if (looks_like_new_attribute) {
485
+ append_unclosed_quote_error(
486
+ opening_quote,
487
+ opening_quote->location.start,
488
+ parser->current_token->location.start,
489
+ parser->allocator,
490
+ errors
491
+ );
492
+
493
+ parser_append_literal_node_from_buffer(parser, &buffer, children, start);
494
+ hb_buffer_free(&buffer);
495
+
496
+ AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
497
+ opening_quote,
498
+ children,
499
+ NULL,
500
+ true,
501
+ opening_quote->location.start,
502
+ parser->current_token->location.start,
503
+ errors,
504
+ parser->allocator
505
+ );
506
+
507
+ token_free(opening_quote, parser->allocator);
379
508
 
380
- if (next_token) { token_free(next_token); }
509
+ return attribute_value;
381
510
  }
382
511
  }
383
512
 
384
- hb_buffer_append(&buffer, parser->current_token->value);
385
- token_free(parser->current_token);
513
+ if (token_is(parser, TOKEN_ERB_START)) {
514
+ parser_append_literal_node_from_buffer(parser, &buffer, children, start);
515
+
516
+ hb_array_append(children, parser_parse_erb_tag(parser));
517
+
518
+ start = parser->current_token->location.start;
519
+
520
+ continue;
521
+ }
522
+
523
+ hb_buffer_append_string(&buffer, parser->current_token->value);
524
+ token_free(parser->current_token, parser->allocator);
386
525
 
387
526
  parser->current_token = lexer_next_token(parser->lexer);
388
527
  }
389
528
 
390
529
  if (token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
391
- && string_equals(parser->current_token->value, opening_quote->value)) {
530
+ && hb_string_equals(parser->current_token->value, opening_quote->value)) {
392
531
  lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
393
532
 
394
533
  token_T* potential_closing = parser->current_token;
@@ -396,27 +535,28 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
396
535
 
397
536
  if (token_is(parser, TOKEN_IDENTIFIER) || token_is(parser, TOKEN_CHARACTER)) {
398
537
  append_unexpected_error(
399
- "Unescaped quote character in attribute value",
400
- "escaped quote (\\') or different quote style (\")",
538
+ hb_string("Unescaped quote character in attribute value"),
539
+ hb_string("HTML entity (&apos;/&quot;) or different quote style"),
401
540
  opening_quote->value,
402
541
  potential_closing->location.start,
403
542
  potential_closing->location.end,
543
+ parser->allocator,
404
544
  errors
405
545
  );
406
546
 
407
547
  lexer_restore_state(parser->lexer, saved_state);
408
548
 
409
- token_free(parser->current_token);
549
+ token_free(parser->current_token, parser->allocator);
410
550
  parser->current_token = potential_closing;
411
551
 
412
- hb_buffer_append(&buffer, parser->current_token->value);
413
- token_free(parser->current_token);
552
+ hb_buffer_append_string(&buffer, parser->current_token->value);
553
+ token_free(parser->current_token, parser->allocator);
414
554
  parser->current_token = lexer_next_token(parser->lexer);
415
555
 
416
556
  while (!token_is(parser, TOKEN_EOF)
417
557
  && !(
418
558
  token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
419
- && string_equals(parser->current_token->value, opening_quote->value)
559
+ && hb_string_equals(parser->current_token->value, opening_quote->value)
420
560
  )) {
421
561
  if (token_is(parser, TOKEN_ERB_START)) {
422
562
  parser_append_literal_node_from_buffer(parser, &buffer, children, start);
@@ -428,13 +568,13 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
428
568
  continue;
429
569
  }
430
570
 
431
- hb_buffer_append(&buffer, parser->current_token->value);
432
- token_free(parser->current_token);
571
+ hb_buffer_append_string(&buffer, parser->current_token->value);
572
+ token_free(parser->current_token, parser->allocator);
433
573
 
434
574
  parser->current_token = lexer_next_token(parser->lexer);
435
575
  }
436
576
  } else {
437
- token_free(parser->current_token);
577
+ token_free(parser->current_token, parser->allocator);
438
578
  parser->current_token = potential_closing;
439
579
 
440
580
  lexer_restore_state(parser->lexer, saved_state);
@@ -442,20 +582,10 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
442
582
  }
443
583
 
444
584
  parser_append_literal_node_from_buffer(parser, &buffer, children, start);
445
- free(buffer.value);
585
+ hb_buffer_free(&buffer);
446
586
 
447
587
  token_T* closing_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
448
588
 
449
- if (opening_quote != NULL && closing_quote != NULL && !string_equals(opening_quote->value, closing_quote->value)) {
450
- append_quotes_mismatch_error(
451
- opening_quote,
452
- closing_quote,
453
- closing_quote->location.start,
454
- closing_quote->location.end,
455
- errors
456
- );
457
- }
458
-
459
589
  AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
460
590
  opening_quote,
461
591
  children,
@@ -463,18 +593,19 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
463
593
  true,
464
594
  opening_quote->location.start,
465
595
  closing_quote->location.end,
466
- errors
596
+ errors,
597
+ parser->allocator
467
598
  );
468
599
 
469
- token_free(opening_quote);
470
- token_free(closing_quote);
600
+ token_free(opening_quote, parser->allocator);
601
+ token_free(closing_quote, parser->allocator);
471
602
 
472
603
  return attribute_value;
473
604
  }
474
605
 
475
606
  static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser_T* parser) {
476
- hb_array_T* children = hb_array_init(8);
477
- hb_array_T* errors = hb_array_init(8);
607
+ hb_array_T* children = hb_array_init(8, parser->allocator);
608
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
478
609
 
479
610
  // <div id=<%= "home" %>>
480
611
  if (token_is(parser, TOKEN_ERB_START)) {
@@ -488,15 +619,16 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
488
619
  false,
489
620
  erb_node->base.location.start,
490
621
  erb_node->base.location.end,
491
- errors
622
+ errors,
623
+ parser->allocator
492
624
  );
493
625
  }
494
626
 
495
627
  // <div id=home>
496
628
  if (token_is(parser, TOKEN_IDENTIFIER)) {
497
629
  token_T* identifier = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
498
- AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier);
499
- token_free(identifier);
630
+ AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier, parser->allocator);
631
+ token_free(identifier, parser->allocator);
500
632
 
501
633
  hb_array_append(children, literal);
502
634
 
@@ -507,7 +639,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
507
639
  false,
508
640
  literal->base.location.start,
509
641
  literal->base.location.end,
510
- errors
642
+ errors,
643
+ parser->allocator
511
644
  );
512
645
  }
513
646
 
@@ -520,31 +653,37 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
520
653
  position_T end = token->location.end;
521
654
 
522
655
  append_unexpected_error(
523
- "Invalid quote character for HTML attribute",
524
- "single quote (') or double quote (\")",
525
- "backtick (`)",
656
+ hb_string("Invalid quote character for HTML attribute"),
657
+ hb_string("single quote (') or double quote (\")"),
658
+ hb_string("a backtick"),
526
659
  start,
527
660
  end,
661
+ parser->allocator,
528
662
  errors
529
663
  );
530
664
 
531
665
  AST_HTML_ATTRIBUTE_VALUE_NODE_T* value =
532
- ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors);
666
+ ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors, parser->allocator);
533
667
 
534
- token_free(token);
668
+ token_free(token, parser->allocator);
535
669
 
536
670
  return value;
537
671
  }
538
672
 
673
+ char* expected = token_types_to_friendly_string(parser->allocator, TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START);
674
+
539
675
  append_unexpected_error(
540
- "Unexpected Token",
541
- "TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START",
542
- token_type_to_string(parser->current_token->type),
676
+ hb_string("Unexpected Token"),
677
+ hb_string(expected),
678
+ token_type_to_friendly_string(parser->current_token->type),
543
679
  parser->current_token->location.start,
544
680
  parser->current_token->location.end,
681
+ parser->allocator,
545
682
  errors
546
683
  );
547
684
 
685
+ hb_allocator_dealloc(parser->allocator, expected);
686
+
548
687
  AST_HTML_ATTRIBUTE_VALUE_NODE_T* value = ast_html_attribute_value_node_init(
549
688
  NULL,
550
689
  children,
@@ -552,7 +691,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
552
691
  false,
553
692
  parser->current_token->location.start,
554
693
  parser->current_token->location.end,
555
- errors
694
+ errors,
695
+ parser->allocator
556
696
  );
557
697
 
558
698
  return value;
@@ -567,7 +707,7 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
567
707
 
568
708
  if (has_equals) {
569
709
  hb_buffer_T equals_buffer;
570
- hb_buffer_init(&equals_buffer, 256);
710
+ hb_buffer_init(&equals_buffer, 256, parser->allocator);
571
711
  position_T equals_start = { 0 };
572
712
  position_T equals_end = { 0 };
573
713
  uint32_t range_start = 0;
@@ -583,8 +723,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
583
723
  range_start = whitespace->range.from;
584
724
  }
585
725
 
586
- hb_buffer_append(&equals_buffer, whitespace->value);
587
- token_free(whitespace);
726
+ hb_buffer_append_string(&equals_buffer, whitespace->value);
727
+ token_free(whitespace, parser->allocator);
588
728
  }
589
729
 
590
730
  token_T* equals = parser_advance(parser);
@@ -595,27 +735,45 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
595
735
  range_start = equals->range.from;
596
736
  }
597
737
 
598
- hb_buffer_append(&equals_buffer, equals->value);
738
+ hb_buffer_append_string(&equals_buffer, equals->value);
599
739
  equals_end = equals->location.end;
600
740
  range_end = equals->range.to;
601
- token_free(equals);
741
+ token_free(equals, parser->allocator);
602
742
 
603
743
  while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
604
744
  token_T* whitespace = parser_advance(parser);
605
- hb_buffer_append(&equals_buffer, whitespace->value);
745
+ hb_buffer_append_string(&equals_buffer, whitespace->value);
606
746
  equals_end = whitespace->location.end;
607
747
  range_end = whitespace->range.to;
608
- token_free(whitespace);
748
+ token_free(whitespace, parser->allocator);
749
+ }
750
+
751
+ token_T* equals_with_whitespace = hb_allocator_alloc(parser->allocator, sizeof(token_T));
752
+
753
+ if (!equals_with_whitespace) {
754
+ hb_buffer_free(&equals_buffer);
755
+
756
+ return ast_html_attribute_node_init(
757
+ attribute_name,
758
+ NULL,
759
+ NULL,
760
+ attribute_name->base.location.start,
761
+ attribute_name->base.location.end,
762
+ NULL,
763
+ parser->allocator
764
+ );
609
765
  }
610
766
 
611
- token_T* equals_with_whitespace = calloc(1, sizeof(token_T));
612
767
  equals_with_whitespace->type = TOKEN_EQUALS;
613
- equals_with_whitespace->value = herb_strdup(equals_buffer.value);
768
+
769
+ char* arena_copy = hb_allocator_strndup(parser->allocator, equals_buffer.value, equals_buffer.length);
770
+ equals_with_whitespace->value = (hb_string_T) { .data = arena_copy, .length = (uint32_t) equals_buffer.length };
771
+
772
+ hb_buffer_free(&equals_buffer);
773
+
614
774
  equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end };
615
775
  equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end };
616
776
 
617
- free(equals_buffer.value);
618
-
619
777
  AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
620
778
 
621
779
  return ast_html_attribute_node_init(
@@ -624,7 +782,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
624
782
  attribute_value,
625
783
  attribute_name->base.location.start,
626
784
  attribute_value->base.location.end,
627
- NULL
785
+ NULL,
786
+ parser->allocator
628
787
  );
629
788
  } else {
630
789
  return ast_html_attribute_node_init(
@@ -633,7 +792,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
633
792
  NULL,
634
793
  attribute_name->base.location.start,
635
794
  attribute_name->base.location.end,
636
- NULL
795
+ NULL,
796
+ parser->allocator
637
797
  );
638
798
  }
639
799
  } else {
@@ -645,6 +805,51 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
645
805
  if (equals != NULL) {
646
806
  parser_consume_whitespace(parser, NULL);
647
807
 
808
+ // <div class= >
809
+ if (token_is(parser, TOKEN_HTML_TAG_END) || token_is(parser, TOKEN_HTML_TAG_SELF_CLOSE)) {
810
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
811
+ hb_string_T attribute_name_string = hb_string("unknown");
812
+
813
+ if (hb_array_size(attribute_name->children) > 0) {
814
+ AST_LITERAL_NODE_T* first_child = (AST_LITERAL_NODE_T*) hb_array_get(attribute_name->children, 0);
815
+
816
+ if (first_child && !hb_string_is_empty(first_child->content)) { attribute_name_string = first_child->content; }
817
+ }
818
+
819
+ append_missing_attribute_value_error(
820
+ attribute_name_string,
821
+ equals->location.start,
822
+ parser->current_token->location.start,
823
+ parser->allocator,
824
+ errors
825
+ );
826
+
827
+ AST_HTML_ATTRIBUTE_VALUE_NODE_T* empty_value = ast_html_attribute_value_node_init(
828
+ NULL,
829
+ hb_array_init(8, parser->allocator),
830
+ NULL,
831
+ false,
832
+ equals->location.end,
833
+ parser->current_token->location.start,
834
+ errors,
835
+ parser->allocator
836
+ );
837
+
838
+ AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
839
+ attribute_name,
840
+ equals,
841
+ empty_value,
842
+ attribute_name->base.location.start,
843
+ parser->current_token->location.start,
844
+ NULL,
845
+ parser->allocator
846
+ );
847
+
848
+ token_free(equals, parser->allocator);
849
+
850
+ return attribute_node;
851
+ }
852
+
648
853
  AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
649
854
 
650
855
  AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
@@ -653,10 +858,11 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
653
858
  attribute_value,
654
859
  attribute_name->base.location.start,
655
860
  attribute_value->base.location.end,
656
- NULL
861
+ NULL,
862
+ parser->allocator
657
863
  );
658
864
 
659
- token_free(equals);
865
+ token_free(equals, parser->allocator);
660
866
 
661
867
  return attribute_node;
662
868
  }
@@ -667,7 +873,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
667
873
  NULL,
668
874
  attribute_name->base.location.start,
669
875
  attribute_name->base.location.end,
670
- NULL
876
+ NULL,
877
+ parser->allocator
671
878
  );
672
879
  }
673
880
 
@@ -678,11 +885,11 @@ static void parser_skip_erb_content(lexer_T* lexer) {
678
885
  token = lexer_next_token(lexer);
679
886
 
680
887
  if (token->type == TOKEN_ERB_END) {
681
- token_free(token);
888
+ token_free(token, lexer->allocator);
682
889
  break;
683
890
  }
684
891
 
685
- token_free(token);
892
+ token_free(token, lexer->allocator);
686
893
  } while (true);
687
894
  }
688
895
 
@@ -693,12 +900,12 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
693
900
  after = lexer_next_token(lexer);
694
901
 
695
902
  if (after->type == TOKEN_EQUALS) {
696
- token_free(after);
903
+ token_free(after, lexer->allocator);
697
904
  return true;
698
905
  }
699
906
 
700
907
  if (after->type == TOKEN_WHITESPACE || after->type == TOKEN_NEWLINE) {
701
- token_free(after);
908
+ token_free(after, lexer->allocator);
702
909
  continue;
703
910
  }
704
911
 
@@ -706,23 +913,56 @@ static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
706
913
  || after->type == TOKEN_ERB_START) {
707
914
 
708
915
  if (after->type == TOKEN_ERB_START) {
709
- token_free(after);
916
+ token_free(after, lexer->allocator);
710
917
  parser_skip_erb_content(lexer);
711
918
  } else {
712
- token_free(after);
919
+ token_free(after, lexer->allocator);
713
920
  }
714
921
  continue;
715
922
  }
716
923
 
717
- token_free(after);
924
+ token_free(after, lexer->allocator);
718
925
  return false;
719
926
 
720
927
  } while (true);
721
928
  }
722
929
 
930
+ static bool starts_with_keyword(hb_string_T string, const char* keyword) {
931
+ hb_string_T prefix = hb_string(keyword);
932
+ if (string.length < prefix.length) { return false; }
933
+ if (strncmp(string.data, prefix.data, prefix.length) != 0) { return false; }
934
+
935
+ if (string.length == prefix.length) { return true; }
936
+
937
+ return is_whitespace(string.data[prefix.length]);
938
+ }
939
+
940
+ // TODO: ideally we could avoid basing this off of strings, and use the step in analyze.c
941
+ static bool parser_lookahead_erb_is_control_flow(parser_T* parser) {
942
+ lexer_T lexer_copy = *parser->lexer;
943
+ token_T* content = lexer_next_token(&lexer_copy);
944
+
945
+ if (content == NULL || content->type != TOKEN_ERB_CONTENT) {
946
+ if (content) { token_free(content, parser->allocator); }
947
+
948
+ return false;
949
+ }
950
+
951
+ hb_string_T trimmed = hb_string_trim_start(content->value);
952
+
953
+ bool is_control_flow = starts_with_keyword(trimmed, "end") || starts_with_keyword(trimmed, "else")
954
+ || starts_with_keyword(trimmed, "elsif") || starts_with_keyword(trimmed, "in")
955
+ || starts_with_keyword(trimmed, "when") || starts_with_keyword(trimmed, "rescue")
956
+ || starts_with_keyword(trimmed, "ensure");
957
+
958
+ token_free(content, parser->allocator);
959
+
960
+ return is_control_flow;
961
+ }
962
+
723
963
  static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children) {
724
- bool is_output_tag = parser->current_token->value && strlen(parser->current_token->value) >= 3
725
- && strncmp(parser->current_token->value, "<%=", 3) == 0;
964
+ bool is_output_tag = !hb_string_is_empty(parser->current_token->value)
965
+ && hb_string_starts_with(parser->current_token->value, hb_string("<%="));
726
966
 
727
967
  if (!is_output_tag) {
728
968
  hb_array_append(children, parser_parse_erb_tag(parser));
@@ -733,7 +973,7 @@ static void parser_handle_erb_in_open_tag(parser_T* parser, hb_array_T* children
733
973
  lexer_T lexer_copy = *parser->lexer;
734
974
 
735
975
  token_T* erb_start = lexer_next_token(&lexer_copy);
736
- token_free(erb_start);
976
+ token_free(erb_start, parser->allocator);
737
977
  parser_skip_erb_content(&lexer_copy);
738
978
 
739
979
  bool looks_like_attribute = parser_lookahead_erb_is_attribute(&lexer_copy);
@@ -759,13 +999,40 @@ static void parser_handle_whitespace_in_open_tag(parser_T* parser, hb_array_T* c
759
999
  }
760
1000
 
761
1001
  static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
762
- hb_array_T* errors = hb_array_init(8);
763
- hb_array_T* children = hb_array_init(8);
1002
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
1003
+ hb_array_T* children = hb_array_init(8, parser->allocator);
764
1004
 
765
1005
  token_T* tag_start = parser_consume_expected(parser, TOKEN_HTML_TAG_START, errors);
766
1006
  token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
767
1007
 
768
1008
  while (token_is_none_of(parser, TOKEN_HTML_TAG_END, TOKEN_HTML_TAG_SELF_CLOSE, TOKEN_EOF)) {
1009
+ if (token_is_any_of(parser, TOKEN_HTML_TAG_START, TOKEN_HTML_TAG_START_CLOSE)) {
1010
+ append_unclosed_open_tag_error(
1011
+ tag_name,
1012
+ tag_name->location.start,
1013
+ parser->current_token->location.start,
1014
+ parser->allocator,
1015
+ errors
1016
+ );
1017
+
1018
+ AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init(
1019
+ tag_start,
1020
+ tag_name,
1021
+ NULL,
1022
+ children,
1023
+ false,
1024
+ tag_start->location.start,
1025
+ parser->current_token->location.start,
1026
+ errors,
1027
+ parser->allocator
1028
+ );
1029
+
1030
+ token_free(tag_start, parser->allocator);
1031
+ token_free(tag_name, parser->allocator);
1032
+
1033
+ return open_tag_node;
1034
+ }
1035
+
769
1036
  if (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
770
1037
  parser_handle_whitespace_in_open_tag(parser, children);
771
1038
  continue;
@@ -791,21 +1058,79 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
791
1058
  token_T* next_token = lexer_next_token(&lexer_copy);
792
1059
 
793
1060
  if (next_token && next_token->type == TOKEN_IDENTIFIER) {
794
- token_free(next_token);
1061
+ token_free(next_token, parser->allocator);
795
1062
  hb_array_append(children, parser_parse_html_attribute(parser));
796
1063
 
797
1064
  continue;
798
1065
  }
799
1066
 
800
- token_free(next_token);
1067
+ token_free(next_token, parser->allocator);
1068
+ }
1069
+
1070
+ if (parser->current_token->type == TOKEN_PERCENT) {
1071
+ lexer_T lexer_copy = *parser->lexer;
1072
+ token_T* peek_token = lexer_next_token(&lexer_copy);
1073
+
1074
+ if (peek_token && peek_token->type == TOKEN_HTML_TAG_END) {
1075
+ position_T stray_start = parser->current_token->location.start;
1076
+ position_T stray_end = peek_token->location.end;
1077
+ token_free(peek_token, parser->allocator);
1078
+
1079
+ append_stray_erb_closing_tag_error(stray_start, stray_end, parser->allocator, errors);
1080
+
1081
+ token_T* percent = parser_advance(parser);
1082
+ token_T* gt = parser_advance(parser);
1083
+
1084
+ AST_LITERAL_NODE_T* literal =
1085
+ ast_literal_node_init(hb_string("%>"), stray_start, stray_end, NULL, parser->allocator);
1086
+ hb_array_append(children, literal);
1087
+
1088
+ token_free(percent, parser->allocator);
1089
+ token_free(gt, parser->allocator);
1090
+
1091
+ continue;
1092
+ }
1093
+
1094
+ token_free(peek_token, parser->allocator);
801
1095
  }
802
1096
 
803
1097
  parser_append_unexpected_error(
804
1098
  parser,
1099
+ errors,
805
1100
  "Unexpected Token",
806
- "TOKEN_IDENTIFIER, TOKEN_AT, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
1101
+ TOKEN_IDENTIFIER,
1102
+ TOKEN_AT,
1103
+ TOKEN_ERB_START,
1104
+ TOKEN_WHITESPACE,
1105
+ TOKEN_NEWLINE
1106
+ );
1107
+ }
1108
+
1109
+ if (token_is(parser, TOKEN_EOF)) {
1110
+ append_unclosed_open_tag_error(
1111
+ tag_name,
1112
+ tag_name->location.start,
1113
+ parser->current_token->location.start,
1114
+ parser->allocator,
807
1115
  errors
808
1116
  );
1117
+
1118
+ AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init(
1119
+ tag_start,
1120
+ tag_name,
1121
+ NULL,
1122
+ children,
1123
+ false,
1124
+ tag_start->location.start,
1125
+ parser->current_token->location.start,
1126
+ errors,
1127
+ parser->allocator
1128
+ );
1129
+
1130
+ token_free(tag_start, parser->allocator);
1131
+ token_free(tag_name, parser->allocator);
1132
+
1133
+ return open_tag_node;
809
1134
  }
810
1135
 
811
1136
  bool is_self_closing = false;
@@ -816,8 +1141,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
816
1141
  tag_end = parser_consume_expected(parser, TOKEN_HTML_TAG_SELF_CLOSE, errors);
817
1142
 
818
1143
  if (tag_end == NULL) {
819
- token_free(tag_start);
820
- token_free(tag_name);
1144
+ token_free(tag_start, parser->allocator);
1145
+ token_free(tag_name, parser->allocator);
821
1146
 
822
1147
  hb_array_free(&children);
823
1148
  hb_array_free(&errors);
@@ -836,19 +1161,20 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
836
1161
  is_self_closing,
837
1162
  tag_start->location.start,
838
1163
  tag_end->location.end,
839
- errors
1164
+ errors,
1165
+ parser->allocator
840
1166
  );
841
1167
 
842
- token_free(tag_start);
843
- token_free(tag_name);
844
- token_free(tag_end);
1168
+ token_free(tag_start, parser->allocator);
1169
+ token_free(tag_name, parser->allocator);
1170
+ token_free(tag_end, parser->allocator);
845
1171
 
846
1172
  return open_tag_node;
847
1173
  }
848
1174
 
849
1175
  static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) {
850
- hb_array_T* errors = hb_array_init(8);
851
- hb_array_T* children = hb_array_init(8);
1176
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
1177
+ hb_array_T* children = hb_array_init(8, parser->allocator);
852
1178
 
853
1179
  token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
854
1180
 
@@ -858,38 +1184,53 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
858
1184
 
859
1185
  parser_consume_whitespace(parser, children);
860
1186
 
861
- token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
1187
+ token_T* tag_closing = parser_consume_if_present(parser, TOKEN_HTML_TAG_END);
1188
+
1189
+ if (tag_closing == NULL) {
1190
+ append_unclosed_close_tag_error(
1191
+ tag_name,
1192
+ tag_opening->location.start,
1193
+ tag_name->location.end,
1194
+ parser->allocator,
1195
+ errors
1196
+ );
1197
+ }
862
1198
 
863
- if (tag_name != NULL && is_void_element(hb_string(tag_name->value)) && parser_in_svg_context(parser) == false) {
864
- hb_string_T expected = html_self_closing_tag_string(hb_string(tag_name->value));
865
- hb_string_T got = html_closing_tag_string(hb_string(tag_name->value));
1199
+ if (tag_closing != NULL && tag_name != NULL && is_void_element(tag_name->value)
1200
+ && parser_in_svg_context(parser) == false) {
1201
+ hb_string_T expected = html_self_closing_tag_string(tag_name->value, parser->allocator);
1202
+ hb_string_T got = html_closing_tag_string(tag_name->value, parser->allocator);
866
1203
 
867
1204
  append_void_element_closing_tag_error(
868
1205
  tag_name,
869
- expected.data,
870
- got.data,
1206
+ expected,
1207
+ got,
871
1208
  tag_opening->location.start,
872
1209
  tag_closing->location.end,
1210
+ parser->allocator,
873
1211
  errors
874
1212
  );
875
1213
 
876
- free(expected.data);
877
- free(got.data);
1214
+ hb_allocator_dealloc(parser->allocator, expected.data);
1215
+ hb_allocator_dealloc(parser->allocator, got.data);
878
1216
  }
879
1217
 
1218
+ position_T end_position = tag_closing != NULL ? tag_closing->location.end : tag_name->location.end;
1219
+
880
1220
  AST_HTML_CLOSE_TAG_NODE_T* close_tag = ast_html_close_tag_node_init(
881
1221
  tag_opening,
882
1222
  tag_name,
883
1223
  children,
884
1224
  tag_closing,
885
1225
  tag_opening->location.start,
886
- tag_closing->location.end,
887
- errors
1226
+ end_position,
1227
+ errors,
1228
+ parser->allocator
888
1229
  );
889
1230
 
890
- token_free(tag_opening);
891
- token_free(tag_name);
892
- token_free(tag_closing);
1231
+ token_free(tag_opening, parser->allocator);
1232
+ token_free(tag_name, parser->allocator);
1233
+ token_free(tag_closing, parser->allocator);
893
1234
 
894
1235
  return close_tag;
895
1236
  }
@@ -900,7 +1241,7 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element(
900
1241
  AST_HTML_OPEN_TAG_NODE_T* open_tag
901
1242
  ) {
902
1243
  return ast_html_element_node_init(
903
- open_tag,
1244
+ (AST_NODE_T*) open_tag,
904
1245
  open_tag->tag_name,
905
1246
  NULL,
906
1247
  NULL,
@@ -908,7 +1249,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element(
908
1249
  ELEMENT_SOURCE_HTML,
909
1250
  open_tag->base.location.start,
910
1251
  open_tag->base.location.end,
911
- NULL
1252
+ NULL,
1253
+ parser->allocator
912
1254
  );
913
1255
  }
914
1256
 
@@ -916,63 +1258,88 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
916
1258
  parser_T* parser,
917
1259
  AST_HTML_OPEN_TAG_NODE_T* open_tag
918
1260
  ) {
919
- hb_array_T* errors = hb_array_init(8);
920
- hb_array_T* body = hb_array_init(8);
1261
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
1262
+ hb_array_T* body = hb_array_init(8, parser->allocator);
921
1263
 
922
1264
  parser_push_open_tag(parser, open_tag->tag_name);
923
1265
 
924
- if (open_tag->tag_name->value && parser_is_foreign_content_tag(hb_string(open_tag->tag_name->value))) {
925
- foreign_content_type_T content_type = parser_get_foreign_content_type(hb_string(open_tag->tag_name->value));
1266
+ if (!hb_string_is_empty(open_tag->tag_name->value) && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
1267
+ foreign_content_type_T content_type = parser_get_foreign_content_type(open_tag->tag_name->value);
926
1268
  parser_enter_foreign_content(parser, content_type);
927
1269
  parser_parse_foreign_content(parser, body, errors);
928
1270
  } else {
929
1271
  parser_parse_in_data_state(parser, body, errors);
930
1272
  }
931
1273
 
932
- if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) { return parser_handle_missing_close_tag(open_tag, body, errors); }
1274
+ if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
1275
+ return parser_handle_missing_close_tag(parser, open_tag, body, errors);
1276
+ }
933
1277
 
934
1278
  AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser);
935
1279
 
936
- if (parser_in_svg_context(parser) == false && is_void_element(hb_string(close_tag->tag_name->value))) {
1280
+ if (parser_in_svg_context(parser) == false && is_void_element(close_tag->tag_name->value)) {
937
1281
  hb_array_push(body, close_tag);
938
1282
  parser_parse_in_data_state(parser, body, errors);
939
1283
  close_tag = parser_parse_html_close_tag(parser);
940
1284
  }
941
1285
 
942
- bool matches_stack = parser_check_matching_tag(parser, hb_string(close_tag->tag_name->value));
1286
+ bool matches_stack = parser_check_matching_tag(parser, close_tag->tag_name->value);
943
1287
 
944
1288
  if (matches_stack) {
945
1289
  token_T* popped_token = parser_pop_open_tag(parser);
946
- token_free(popped_token);
1290
+ token_free(popped_token, parser->allocator);
1291
+ } else if (parser_can_close_ancestor(parser, close_tag->tag_name->value)) {
1292
+ size_t depth = parser_find_ancestor_depth(parser, close_tag->tag_name->value);
1293
+
1294
+ for (size_t i = 0; i < depth; i++) {
1295
+ token_T* unclosed = parser_pop_open_tag(parser);
1296
+
1297
+ if (unclosed != NULL) {
1298
+ append_missing_closing_tag_error(
1299
+ unclosed,
1300
+ unclosed->location.start,
1301
+ unclosed->location.end,
1302
+ parser->allocator,
1303
+ errors
1304
+ );
1305
+ token_free(unclosed, parser->allocator);
1306
+ }
1307
+ }
1308
+
1309
+ token_T* popped_token = parser_pop_open_tag(parser);
1310
+ token_free(popped_token, parser->allocator);
947
1311
  } else {
948
1312
  parser_handle_mismatched_tags(parser, close_tag, errors);
949
1313
  }
950
1314
 
951
1315
  return ast_html_element_node_init(
952
- open_tag,
1316
+ (AST_NODE_T*) open_tag,
953
1317
  open_tag->tag_name,
954
1318
  body,
955
- close_tag,
1319
+ (AST_NODE_T*) close_tag,
956
1320
  false,
957
1321
  ELEMENT_SOURCE_HTML,
958
1322
  open_tag->base.location.start,
959
1323
  close_tag->base.location.end,
960
- errors
1324
+ errors,
1325
+ parser->allocator
961
1326
  );
962
1327
  }
963
1328
 
964
1329
  static AST_NODE_T* parser_parse_html_element(parser_T* parser) {
965
1330
  AST_HTML_OPEN_TAG_NODE_T* open_tag = parser_parse_html_open_tag(parser);
966
1331
 
1332
+ if (open_tag->tag_closing == NULL) { return (AST_NODE_T*) open_tag; }
1333
+
967
1334
  // <tag />
968
1335
  if (open_tag->is_void) { return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag); }
969
1336
 
970
1337
  // <tag>, in void element list, and not in inside an <svg> element
971
- if (!open_tag->is_void && is_void_element(hb_string(open_tag->tag_name->value)) && !parser_in_svg_context(parser)) {
1338
+ if (!open_tag->is_void && is_void_element(open_tag->tag_name->value) && !parser_in_svg_context(parser)) {
972
1339
  return (AST_NODE_T*) parser_parse_html_self_closing_element(parser, open_tag);
973
1340
  }
974
1341
 
975
- if (open_tag->tag_name->value && parser_is_foreign_content_tag(hb_string(open_tag->tag_name->value))) {
1342
+ if (!hb_string_is_empty(open_tag->tag_name->value) && parser_is_foreign_content_tag(open_tag->tag_name->value)) {
976
1343
  AST_HTML_ELEMENT_NODE_T* regular_element = parser_parse_html_regular_element(parser, open_tag);
977
1344
 
978
1345
  if (regular_element != NULL) { return (AST_NODE_T*) regular_element; }
@@ -982,11 +1349,38 @@ static AST_NODE_T* parser_parse_html_element(parser_T* parser) {
982
1349
  }
983
1350
 
984
1351
  static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
985
- hb_array_T* errors = hb_array_init(8);
1352
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
986
1353
 
987
1354
  token_T* opening_tag = parser_consume_expected(parser, TOKEN_ERB_START, errors);
988
1355
  token_T* content = parser_consume_expected(parser, TOKEN_ERB_CONTENT, errors);
989
- token_T* closing_tag = parser_consume_expected(parser, TOKEN_ERB_END, errors);
1356
+
1357
+ token_T* closing_tag = NULL;
1358
+ position_T end_position;
1359
+
1360
+ if (token_is(parser, TOKEN_ERB_END)) {
1361
+ closing_tag = parser_consume_expected(parser, TOKEN_ERB_END, errors);
1362
+ end_position = closing_tag->location.end;
1363
+ } else if (token_is(parser, TOKEN_ERB_START)) {
1364
+ append_nested_erb_tag_error(
1365
+ opening_tag,
1366
+ parser->current_token->location.start.line,
1367
+ parser->current_token->location.start.column,
1368
+ parser->current_token->location.start,
1369
+ parser->current_token->location.end,
1370
+ parser->allocator,
1371
+ errors
1372
+ );
1373
+ end_position = parser->current_token->location.start;
1374
+ } else {
1375
+ append_unclosed_erb_tag_error(
1376
+ opening_tag,
1377
+ opening_tag->location.start,
1378
+ parser->current_token->location.start,
1379
+ parser->allocator,
1380
+ errors
1381
+ );
1382
+ end_position = parser->current_token->location.start;
1383
+ }
990
1384
 
991
1385
  AST_ERB_CONTENT_NODE_T* erb_node = ast_erb_content_node_init(
992
1386
  opening_tag,
@@ -995,27 +1389,29 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
995
1389
  NULL,
996
1390
  false,
997
1391
  false,
1392
+ HERB_PRISM_NODE_EMPTY,
998
1393
  opening_tag->location.start,
999
- closing_tag->location.end,
1000
- errors
1394
+ end_position,
1395
+ errors,
1396
+ parser->allocator
1001
1397
  );
1002
1398
 
1003
- token_free(opening_tag);
1004
- token_free(content);
1005
- token_free(closing_tag);
1399
+ token_free(opening_tag, parser->allocator);
1400
+ token_free(content, parser->allocator);
1401
+ if (closing_tag != NULL) { token_free(closing_tag, parser->allocator); }
1006
1402
 
1007
1403
  return erb_node;
1008
1404
  }
1009
1405
 
1010
1406
  static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
1011
1407
  hb_buffer_T content;
1012
- hb_buffer_init(&content, 1024);
1408
+ hb_buffer_init(&content, 1024, parser->allocator);
1013
1409
  position_T start = parser->current_token->location.start;
1014
1410
  hb_string_T expected_closing_tag = parser_get_foreign_content_closing_tag(parser->foreign_content_type);
1015
1411
 
1016
1412
  if (hb_string_is_empty(expected_closing_tag)) {
1017
1413
  parser_exit_foreign_content(parser);
1018
- free(content.value);
1414
+ hb_buffer_free(&content);
1019
1415
 
1020
1416
  return;
1021
1417
  }
@@ -1038,33 +1434,32 @@ static void parser_parse_foreign_content(parser_T* parser, hb_array_T* children,
1038
1434
  token_T* next_token = lexer_next_token(parser->lexer);
1039
1435
  bool is_potential_match = false;
1040
1436
 
1041
- if (next_token && next_token->type == TOKEN_IDENTIFIER && next_token->value) {
1042
- is_potential_match =
1043
- parser_is_expected_closing_tag_name(hb_string(next_token->value), parser->foreign_content_type);
1437
+ if (next_token && next_token->type == TOKEN_IDENTIFIER && !hb_string_is_empty(next_token->value)) {
1438
+ is_potential_match = parser_is_expected_closing_tag_name(next_token->value, parser->foreign_content_type);
1044
1439
  }
1045
1440
 
1046
1441
  lexer_restore_state(parser->lexer, saved_state);
1047
1442
 
1048
- if (next_token) { token_free(next_token); }
1443
+ if (next_token) { token_free(next_token, parser->allocator); }
1049
1444
 
1050
1445
  if (is_potential_match) {
1051
1446
  parser_append_literal_node_from_buffer(parser, &content, children, start);
1052
1447
  parser_exit_foreign_content(parser);
1053
1448
 
1054
- free(content.value);
1449
+ hb_buffer_free(&content);
1055
1450
 
1056
1451
  return;
1057
1452
  }
1058
1453
  }
1059
1454
 
1060
1455
  token_T* token = parser_advance(parser);
1061
- hb_buffer_append(&content, token->value);
1062
- token_free(token);
1456
+ hb_buffer_append_string(&content, token->value);
1457
+ token_free(token, parser->allocator);
1063
1458
  }
1064
1459
 
1065
1460
  parser_append_literal_node_from_buffer(parser, &content, children, start);
1066
1461
  parser_exit_foreign_content(parser);
1067
- free(content.value);
1462
+ hb_buffer_free(&content);
1068
1463
  }
1069
1464
 
1070
1465
  static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, hb_array_T* errors) {
@@ -1072,36 +1467,43 @@ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, h
1072
1467
 
1073
1468
  if (token_is(parser, TOKEN_ERB_START)) {
1074
1469
  hb_array_append(children, parser_parse_erb_tag(parser));
1470
+ parser->consecutive_error_count = 0;
1075
1471
  continue;
1076
1472
  }
1077
1473
 
1078
1474
  if (token_is(parser, TOKEN_HTML_DOCTYPE)) {
1079
1475
  hb_array_append(children, parser_parse_html_doctype(parser));
1476
+ parser->consecutive_error_count = 0;
1080
1477
  continue;
1081
1478
  }
1082
1479
 
1083
1480
  if (token_is(parser, TOKEN_XML_DECLARATION)) {
1084
1481
  hb_array_append(children, parser_parse_xml_declaration(parser));
1482
+ parser->consecutive_error_count = 0;
1085
1483
  continue;
1086
1484
  }
1087
1485
 
1088
1486
  if (token_is(parser, TOKEN_CDATA_START)) {
1089
1487
  hb_array_append(children, parser_parse_cdata(parser));
1488
+ parser->consecutive_error_count = 0;
1090
1489
  continue;
1091
1490
  }
1092
1491
 
1093
1492
  if (token_is(parser, TOKEN_HTML_COMMENT_START)) {
1094
1493
  hb_array_append(children, parser_parse_html_comment(parser));
1494
+ parser->consecutive_error_count = 0;
1095
1495
  continue;
1096
1496
  }
1097
1497
 
1098
1498
  if (token_is(parser, TOKEN_HTML_TAG_START)) {
1099
1499
  hb_array_append(children, parser_parse_html_element(parser));
1500
+ parser->consecutive_error_count = 0;
1100
1501
  continue;
1101
1502
  }
1102
1503
 
1103
1504
  if (token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
1104
1505
  hb_array_append(children, parser_parse_html_close_tag(parser));
1506
+ parser->consecutive_error_count = 0;
1105
1507
  continue;
1106
1508
  }
1107
1509
 
@@ -1129,16 +1531,35 @@ static void parser_parse_in_data_state(parser_T* parser, hb_array_T* children, h
1129
1531
  TOKEN_WHITESPACE
1130
1532
  )) {
1131
1533
  hb_array_append(children, parser_parse_text_content(parser, errors));
1534
+ parser->consecutive_error_count = 0;
1535
+ continue;
1536
+ }
1537
+
1538
+ parser->consecutive_error_count++;
1539
+
1540
+ if (parser->consecutive_error_count >= MAX_CONSECUTIVE_ERRORS) {
1541
+ parser->in_recovery_mode = true;
1542
+ parser_synchronize(parser, errors);
1543
+ parser->consecutive_error_count = 0;
1132
1544
  continue;
1133
1545
  }
1134
1546
 
1135
1547
  parser_append_unexpected_error(
1136
1548
  parser,
1549
+ errors,
1137
1550
  "Unexpected token",
1138
- "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, "
1139
- "TOKEN_NBSP, TOKEN_AT, TOKEN_BACKSLASH, or TOKEN_NEWLINE",
1140
- errors
1551
+ TOKEN_ERB_START,
1552
+ TOKEN_HTML_DOCTYPE,
1553
+ TOKEN_HTML_COMMENT_START,
1554
+ TOKEN_IDENTIFIER,
1555
+ TOKEN_WHITESPACE,
1556
+ TOKEN_NBSP,
1557
+ TOKEN_AT,
1558
+ TOKEN_BACKSLASH,
1559
+ TOKEN_NEWLINE
1141
1560
  );
1561
+
1562
+ parser_synchronize(parser, errors);
1142
1563
  }
1143
1564
  }
1144
1565
 
@@ -1152,11 +1573,11 @@ static size_t find_matching_close_tag(hb_array_T* nodes, size_t start_idx, hb_st
1152
1573
  if (node->type == AST_HTML_OPEN_TAG_NODE) {
1153
1574
  AST_HTML_OPEN_TAG_NODE_T* open = (AST_HTML_OPEN_TAG_NODE_T*) node;
1154
1575
 
1155
- if (hb_string_equals_case_insensitive(hb_string(open->tag_name->value), tag_name)) { depth++; }
1576
+ if (hb_string_equals_case_insensitive(open->tag_name->value, tag_name)) { depth++; }
1156
1577
  } else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
1157
1578
  AST_HTML_CLOSE_TAG_NODE_T* close = (AST_HTML_CLOSE_TAG_NODE_T*) node;
1158
1579
 
1159
- if (hb_string_equals_case_insensitive(hb_string(close->tag_name->value), tag_name)) {
1580
+ if (hb_string_equals_case_insensitive(close->tag_name->value, tag_name)) {
1160
1581
  if (depth == 0) { return i; }
1161
1582
  depth--;
1162
1583
  }
@@ -1166,10 +1587,44 @@ static size_t find_matching_close_tag(hb_array_T* nodes, size_t start_idx, hb_st
1166
1587
  return (size_t) -1;
1167
1588
  }
1168
1589
 
1169
- static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors);
1590
+ static size_t find_implicit_close_index(hb_array_T* nodes, size_t start_idx, hb_string_T tag_name) {
1591
+ if (!has_optional_end_tag(tag_name)) { return (size_t) -1; }
1592
+
1593
+ for (size_t i = start_idx + 1; i < hb_array_size(nodes); i++) {
1594
+ AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
1595
+ if (node == NULL) { continue; }
1596
+
1597
+ if (node->type == AST_HTML_OPEN_TAG_NODE) {
1598
+ AST_HTML_OPEN_TAG_NODE_T* open = (AST_HTML_OPEN_TAG_NODE_T*) node;
1599
+ hb_string_T next_tag_name = open->tag_name->value;
1600
+
1601
+ if (should_implicitly_close(tag_name, next_tag_name)) { return i; }
1602
+ } else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
1603
+ AST_HTML_CLOSE_TAG_NODE_T* close = (AST_HTML_CLOSE_TAG_NODE_T*) node;
1604
+ hb_string_T close_tag_name = close->tag_name->value;
1605
+
1606
+ if (parent_closes_element(tag_name, close_tag_name)) { return i; }
1607
+ }
1608
+ }
1609
+
1610
+ return hb_array_size(nodes);
1611
+ }
1170
1612
 
1171
- static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors) {
1172
- hb_array_T* result = hb_array_init(hb_array_size(nodes));
1613
+ static hb_array_T* parser_build_elements_from_tags(
1614
+ hb_array_T* nodes,
1615
+ hb_array_T* errors,
1616
+ const parser_options_T* options,
1617
+ hb_allocator_T* allocator
1618
+ );
1619
+
1620
+ static hb_array_T* parser_build_elements_from_tags(
1621
+ hb_array_T* nodes,
1622
+ hb_array_T* errors,
1623
+ const parser_options_T* options,
1624
+ hb_allocator_T* allocator
1625
+ ) {
1626
+ bool strict = options ? options->strict : false;
1627
+ hb_array_T* result = hb_array_init(hb_array_size(nodes), allocator);
1173
1628
 
1174
1629
  for (size_t index = 0; index < hb_array_size(nodes); index++) {
1175
1630
  AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, index);
@@ -1177,45 +1632,105 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
1177
1632
 
1178
1633
  if (node->type == AST_HTML_OPEN_TAG_NODE) {
1179
1634
  AST_HTML_OPEN_TAG_NODE_T* open_tag = (AST_HTML_OPEN_TAG_NODE_T*) node;
1180
- hb_string_T tag_name = hb_string(open_tag->tag_name->value);
1635
+ hb_string_T tag_name = open_tag->tag_name->value;
1181
1636
 
1182
1637
  size_t close_index = find_matching_close_tag(nodes, index, tag_name);
1183
1638
 
1184
1639
  if (close_index == (size_t) -1) {
1185
- if (hb_array_size(open_tag->base.errors) == 0) {
1186
- append_missing_closing_tag_error(
1640
+ size_t implicit_close_index = find_implicit_close_index(nodes, index, tag_name);
1641
+
1642
+ if (implicit_close_index != (size_t) -1 && implicit_close_index > index + 1) {
1643
+ hb_array_T* body = hb_array_init(implicit_close_index - index - 1, allocator);
1644
+
1645
+ for (size_t j = index + 1; j < implicit_close_index; j++) {
1646
+ hb_array_append(body, hb_array_get(nodes, j));
1647
+ }
1648
+
1649
+ hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, options, allocator);
1650
+ hb_array_free(&body);
1651
+
1652
+ position_T end_position = open_tag->base.location.end;
1653
+
1654
+ if (hb_array_size(processed_body) > 0) {
1655
+ AST_NODE_T* last_body_node = (AST_NODE_T*) hb_array_get(processed_body, hb_array_size(processed_body) - 1);
1656
+ if (last_body_node != NULL) { end_position = last_body_node->location.end; }
1657
+ }
1658
+
1659
+ hb_array_T* element_errors = hb_array_init(8, allocator);
1660
+
1661
+ if (strict) {
1662
+ append_omitted_closing_tag_error(
1663
+ open_tag->tag_name,
1664
+ end_position,
1665
+ open_tag->base.location.start,
1666
+ open_tag->base.location.end,
1667
+ allocator,
1668
+ element_errors
1669
+ );
1670
+ }
1671
+
1672
+ AST_HTML_OMITTED_CLOSE_TAG_NODE_T* omitted_close_tag = ast_html_omitted_close_tag_node_init(
1673
+ open_tag->tag_name,
1674
+ end_position,
1675
+ end_position,
1676
+ hb_array_init(8, allocator),
1677
+ allocator
1678
+ );
1679
+
1680
+ AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init(
1681
+ (AST_NODE_T*) open_tag,
1187
1682
  open_tag->tag_name,
1683
+ processed_body,
1684
+ (AST_NODE_T*) omitted_close_tag,
1685
+ false,
1686
+ ELEMENT_SOURCE_HTML,
1188
1687
  open_tag->base.location.start,
1189
- open_tag->base.location.end,
1190
- open_tag->base.errors
1688
+ end_position,
1689
+ element_errors,
1690
+ allocator
1191
1691
  );
1192
- }
1193
1692
 
1194
- hb_array_append(result, node);
1693
+ hb_array_append(result, element);
1694
+
1695
+ index = implicit_close_index - 1;
1696
+ } else {
1697
+ if (hb_array_size(open_tag->base.errors) == 0) {
1698
+ append_missing_closing_tag_error(
1699
+ open_tag->tag_name,
1700
+ open_tag->base.location.start,
1701
+ open_tag->base.location.end,
1702
+ allocator,
1703
+ open_tag->base.errors
1704
+ );
1705
+ }
1706
+
1707
+ hb_array_append(result, node);
1708
+ }
1195
1709
  } else {
1196
1710
  AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) hb_array_get(nodes, close_index);
1197
1711
 
1198
- hb_array_T* body = hb_array_init(close_index - index - 1);
1712
+ hb_array_T* body = hb_array_init(close_index - index - 1, allocator);
1199
1713
 
1200
1714
  for (size_t j = index + 1; j < close_index; j++) {
1201
1715
  hb_array_append(body, hb_array_get(nodes, j));
1202
1716
  }
1203
1717
 
1204
- hb_array_T* processed_body = parser_build_elements_from_tags(body, errors);
1718
+ hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, options, allocator);
1205
1719
  hb_array_free(&body);
1206
1720
 
1207
- hb_array_T* element_errors = hb_array_init(8);
1721
+ hb_array_T* element_errors = hb_array_init(8, allocator);
1208
1722
 
1209
1723
  AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init(
1210
- open_tag,
1724
+ (AST_NODE_T*) open_tag,
1211
1725
  open_tag->tag_name,
1212
1726
  processed_body,
1213
- close_tag,
1727
+ (AST_NODE_T*) close_tag,
1214
1728
  false,
1215
1729
  ELEMENT_SOURCE_HTML,
1216
1730
  open_tag->base.location.start,
1217
1731
  close_tag->base.location.end,
1218
- element_errors
1732
+ element_errors,
1733
+ allocator
1219
1734
  );
1220
1735
 
1221
1736
  hb_array_append(result, element);
@@ -1225,12 +1740,13 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
1225
1740
  } else if (node->type == AST_HTML_CLOSE_TAG_NODE) {
1226
1741
  AST_HTML_CLOSE_TAG_NODE_T* close_tag = (AST_HTML_CLOSE_TAG_NODE_T*) node;
1227
1742
 
1228
- if (!is_void_element(hb_string(close_tag->tag_name->value))) {
1743
+ if (!is_void_element(close_tag->tag_name->value)) {
1229
1744
  if (hb_array_size(close_tag->base.errors) == 0) {
1230
1745
  append_missing_opening_tag_error(
1231
1746
  close_tag->tag_name,
1232
1747
  close_tag->base.location.start,
1233
1748
  close_tag->base.location.end,
1749
+ allocator,
1234
1750
  close_tag->base.errors
1235
1751
  );
1236
1752
  }
@@ -1246,17 +1762,18 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T
1246
1762
  }
1247
1763
 
1248
1764
  static AST_DOCUMENT_NODE_T* parser_parse_document(parser_T* parser) {
1249
- hb_array_T* children = hb_array_init(8);
1250
- hb_array_T* errors = hb_array_init(8);
1765
+ hb_array_T* children = hb_array_init(8, parser->allocator);
1766
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
1251
1767
  position_T start = parser->current_token->location.start;
1252
1768
 
1253
1769
  parser_parse_in_data_state(parser, children, errors);
1254
1770
 
1255
1771
  token_T* eof = parser_consume_expected(parser, TOKEN_EOF, errors);
1256
1772
 
1257
- AST_DOCUMENT_NODE_T* document_node = ast_document_node_init(children, start, eof->location.end, errors);
1773
+ AST_DOCUMENT_NODE_T* document_node =
1774
+ ast_document_node_init(children, NULL, HERB_PRISM_NODE_EMPTY, start, eof->location.end, errors, parser->allocator);
1258
1775
 
1259
- token_free(eof);
1776
+ token_free(eof, parser->allocator);
1260
1777
 
1261
1778
  return document_node;
1262
1779
  }
@@ -1267,17 +1784,18 @@ AST_DOCUMENT_NODE_T* herb_parser_parse(parser_T* parser) {
1267
1784
 
1268
1785
  static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, hb_array_T* children) {
1269
1786
  if (parser->options.track_whitespace) {
1270
- hb_array_T* errors = hb_array_init(8);
1787
+ hb_array_T* errors = hb_array_init(8, parser->allocator);
1271
1788
  AST_WHITESPACE_NODE_T* whitespace_node = ast_whitespace_node_init(
1272
1789
  whitespace_token,
1273
1790
  whitespace_token->location.start,
1274
1791
  whitespace_token->location.end,
1275
- errors
1792
+ errors,
1793
+ parser->allocator
1276
1794
  );
1277
1795
  hb_array_append(children, whitespace_node);
1278
1796
  }
1279
1797
 
1280
- token_free(whitespace_token);
1798
+ token_free(whitespace_token, parser->allocator);
1281
1799
  }
1282
1800
 
1283
1801
  static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
@@ -1287,7 +1805,7 @@ static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
1287
1805
  if (parser->options.track_whitespace && children != NULL) {
1288
1806
  parser_handle_whitespace(parser, whitespace, children);
1289
1807
  } else {
1290
- token_free(whitespace);
1808
+ token_free(whitespace, parser->allocator);
1291
1809
  }
1292
1810
  }
1293
1811
  }
@@ -1295,14 +1813,27 @@ static void parser_consume_whitespace(parser_T* parser, hb_array_T* children) {
1295
1813
  void herb_parser_deinit(parser_T* parser) {
1296
1814
  if (parser == NULL) { return; }
1297
1815
 
1298
- if (parser->current_token != NULL) { token_free(parser->current_token); }
1299
- if (parser->open_tags_stack != NULL) { hb_array_free(&parser->open_tags_stack); }
1816
+ if (parser->current_token != NULL) { token_free(parser->current_token, parser->allocator); }
1817
+
1818
+ if (parser->open_tags_stack != NULL) {
1819
+ for (size_t i = 0; i < hb_array_size(parser->open_tags_stack); i++) {
1820
+ token_T* token = (token_T*) hb_array_get(parser->open_tags_stack, i);
1821
+ if (token != NULL) { token_free(token, parser->allocator); }
1822
+ }
1823
+
1824
+ hb_array_free(&parser->open_tags_stack);
1825
+ }
1300
1826
  }
1301
1827
 
1302
- void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors) {
1828
+ void match_tags_in_node_array(
1829
+ hb_array_T* nodes,
1830
+ hb_array_T* errors,
1831
+ const parser_options_T* options,
1832
+ hb_allocator_T* allocator
1833
+ ) {
1303
1834
  if (nodes == NULL || hb_array_size(nodes) == 0) { return; }
1304
1835
 
1305
- hb_array_T* processed = parser_build_elements_from_tags(nodes, errors);
1836
+ hb_array_T* processed = parser_build_elements_from_tags(nodes, errors, options, allocator);
1306
1837
 
1307
1838
  nodes->size = 0;
1308
1839
 
@@ -1312,16 +1843,22 @@ void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors) {
1312
1843
 
1313
1844
  hb_array_free(&processed);
1314
1845
 
1846
+ match_tags_context_T context = { .errors = errors, .options = options, .allocator = allocator };
1847
+
1315
1848
  for (size_t i = 0; i < hb_array_size(nodes); i++) {
1316
1849
  AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
1317
1850
  if (node == NULL) { continue; }
1318
1851
 
1319
- herb_visit_node(node, match_tags_visitor, errors);
1852
+ herb_visit_node(node, match_tags_visitor, &context);
1320
1853
  }
1321
1854
  }
1322
1855
 
1323
- void herb_parser_match_html_tags_post_analyze(AST_DOCUMENT_NODE_T* document) {
1856
+ void herb_parser_match_html_tags_post_analyze(
1857
+ AST_DOCUMENT_NODE_T* document,
1858
+ const parser_options_T* options,
1859
+ hb_allocator_T* allocator
1860
+ ) {
1324
1861
  if (document == NULL) { return; }
1325
1862
 
1326
- match_tags_in_node_array(document->children, document->base.errors);
1863
+ match_tags_in_node_array(document->children, document->base.errors, options, allocator);
1327
1864
  }