herb 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. checksums.yaml +5 -5
  2. data/{LICENSE → LICENSE.txt} +4 -3
  3. data/Makefile +121 -0
  4. data/README.md +102 -107
  5. data/Rakefile +184 -0
  6. data/exe/herb +5 -0
  7. data/ext/herb/error_helpers.c +302 -0
  8. data/ext/herb/error_helpers.h +15 -0
  9. data/ext/herb/extconf.rb +75 -0
  10. data/ext/herb/extension.c +110 -0
  11. data/ext/herb/extension.h +6 -0
  12. data/ext/herb/extension_helpers.c +117 -0
  13. data/ext/herb/extension_helpers.h +24 -0
  14. data/ext/herb/nodes.c +936 -0
  15. data/ext/herb/nodes.h +12 -0
  16. data/herb.gemspec +49 -0
  17. data/lib/herb/ast/node.rb +61 -0
  18. data/lib/herb/ast/nodes.rb +1542 -0
  19. data/lib/herb/ast.rb +6 -0
  20. data/lib/herb/cli.rb +164 -0
  21. data/lib/herb/errors.rb +352 -0
  22. data/lib/herb/lex_result.rb +20 -0
  23. data/lib/herb/libherb/array.rb +48 -0
  24. data/lib/herb/libherb/ast_node.rb +47 -0
  25. data/lib/herb/libherb/buffer.rb +53 -0
  26. data/lib/herb/libherb/extract_result.rb +17 -0
  27. data/lib/herb/libherb/lex_result.rb +29 -0
  28. data/lib/herb/libherb/libherb.rb +49 -0
  29. data/lib/herb/libherb/parse_result.rb +17 -0
  30. data/lib/herb/libherb/token.rb +43 -0
  31. data/lib/herb/libherb.rb +32 -0
  32. data/lib/herb/location.rb +42 -0
  33. data/lib/herb/parse_result.rb +26 -0
  34. data/lib/herb/position.rb +36 -0
  35. data/lib/herb/project.rb +361 -0
  36. data/lib/herb/range.rb +40 -0
  37. data/lib/herb/result.rb +21 -0
  38. data/lib/herb/token.rb +43 -0
  39. data/lib/herb/token_list.rb +11 -0
  40. data/lib/herb/version.rb +5 -0
  41. data/lib/herb.rb +21 -68
  42. data/src/analyze.c +989 -0
  43. data/src/analyze_helpers.c +241 -0
  44. data/src/analyzed_ruby.c +35 -0
  45. data/src/array.c +137 -0
  46. data/src/ast_node.c +81 -0
  47. data/src/ast_nodes.c +866 -0
  48. data/src/ast_pretty_print.c +588 -0
  49. data/src/buffer.c +199 -0
  50. data/src/errors.c +740 -0
  51. data/src/extract.c +110 -0
  52. data/src/herb.c +103 -0
  53. data/src/html_util.c +143 -0
  54. data/src/include/analyze.h +36 -0
  55. data/src/include/analyze_helpers.h +43 -0
  56. data/src/include/analyzed_ruby.h +33 -0
  57. data/src/include/array.h +33 -0
  58. data/src/include/ast_node.h +35 -0
  59. data/src/include/ast_nodes.h +303 -0
  60. data/src/include/ast_pretty_print.h +17 -0
  61. data/src/include/buffer.h +36 -0
  62. data/src/include/errors.h +125 -0
  63. data/src/include/extract.h +20 -0
  64. data/src/include/herb.h +32 -0
  65. data/src/include/html_util.h +13 -0
  66. data/src/include/io.h +9 -0
  67. data/src/include/json.h +28 -0
  68. data/src/include/lexer.h +13 -0
  69. data/src/include/lexer_peek_helpers.h +23 -0
  70. data/src/include/lexer_struct.h +32 -0
  71. data/src/include/location.h +25 -0
  72. data/src/include/macros.h +10 -0
  73. data/src/include/memory.h +12 -0
  74. data/src/include/parser.h +22 -0
  75. data/src/include/parser_helpers.h +33 -0
  76. data/src/include/position.h +22 -0
  77. data/src/include/pretty_print.h +53 -0
  78. data/src/include/prism_helpers.h +18 -0
  79. data/src/include/range.h +23 -0
  80. data/src/include/ruby_parser.h +6 -0
  81. data/src/include/token.h +25 -0
  82. data/src/include/token_matchers.h +21 -0
  83. data/src/include/token_struct.h +51 -0
  84. data/src/include/util.h +25 -0
  85. data/src/include/version.h +6 -0
  86. data/src/include/visitor.h +11 -0
  87. data/src/io.c +30 -0
  88. data/src/json.c +205 -0
  89. data/src/lexer.c +284 -0
  90. data/src/lexer_peek_helpers.c +59 -0
  91. data/src/location.c +41 -0
  92. data/src/main.c +162 -0
  93. data/src/memory.c +53 -0
  94. data/src/parser.c +704 -0
  95. data/src/parser_helpers.c +161 -0
  96. data/src/position.c +33 -0
  97. data/src/pretty_print.c +242 -0
  98. data/src/prism_helpers.c +50 -0
  99. data/src/range.c +38 -0
  100. data/src/ruby_parser.c +47 -0
  101. data/src/token.c +194 -0
  102. data/src/token_matchers.c +32 -0
  103. data/src/util.c +128 -0
  104. data/src/visitor.c +321 -0
  105. metadata +126 -82
  106. data/test/helper.rb +0 -7
  107. data/test/helpers_test.rb +0 -25
  108. data/test/parsing_test.rb +0 -110
data/src/parser.c ADDED
@@ -0,0 +1,704 @@
1
+ #include "include/parser.h"
2
+ #include "include/array.h"
3
+ #include "include/ast_node.h"
4
+ #include "include/ast_nodes.h"
5
+ #include "include/buffer.h"
6
+ #include "include/errors.h"
7
+ #include "include/html_util.h"
8
+ #include "include/lexer.h"
9
+ #include "include/parser_helpers.h"
10
+ #include "include/token.h"
11
+ #include "include/token_matchers.h"
12
+
13
+ #include <stdio.h>
14
+ #include <stdlib.h>
15
+ #include <string.h>
16
+ #include <strings.h>
17
+
18
+ static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors);
19
+ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
20
+
21
+ size_t parser_sizeof(void) {
22
+ return sizeof(struct PARSER_STRUCT);
23
+ }
24
+
25
+ parser_T* parser_init(lexer_T* lexer) {
26
+ parser_T* parser = calloc(1, parser_sizeof());
27
+
28
+ parser->lexer = lexer;
29
+ parser->current_token = lexer_next_token(lexer);
30
+ parser->open_tags_stack = array_init(16);
31
+
32
+ return parser;
33
+ }
34
+
35
+ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
36
+ array_T* errors = array_init(8);
37
+ array_T* children = array_init(8);
38
+ token_T* comment_start = parser_consume_expected(parser, TOKEN_HTML_COMMENT_START, errors);
39
+ position_T* start = position_copy(parser->current_token->location->start);
40
+
41
+ buffer_T comment = buffer_new();
42
+
43
+ while (token_is_none_of(parser, TOKEN_HTML_COMMENT_END, TOKEN_EOF)) {
44
+ if (token_is(parser, TOKEN_ERB_START)) {
45
+ parser_append_literal_node_from_buffer(parser, &comment, children, start);
46
+
47
+ AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
48
+ array_append(children, erb_node);
49
+
50
+ position_free(start);
51
+ start = position_copy(parser->current_token->location->start);
52
+
53
+ continue;
54
+ }
55
+
56
+ token_T* token = parser_advance(parser);
57
+ buffer_append(&comment, token->value);
58
+ token_free(token);
59
+ }
60
+
61
+ parser_append_literal_node_from_buffer(parser, &comment, children, start);
62
+
63
+ token_T* comment_end = parser_consume_expected(parser, TOKEN_HTML_COMMENT_END, errors);
64
+
65
+ AST_HTML_COMMENT_NODE_T* comment_node = ast_html_comment_node_init(
66
+ comment_start,
67
+ children,
68
+ comment_end,
69
+ comment_start->location->start,
70
+ comment_end->location->end,
71
+ errors
72
+ );
73
+
74
+ buffer_free(&comment);
75
+ position_free(start);
76
+ token_free(comment_start);
77
+ token_free(comment_end);
78
+
79
+ return comment_node;
80
+ }
81
+
82
+ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
83
+ array_T* errors = array_init(8);
84
+ array_T* children = array_init(8);
85
+ buffer_T content = buffer_new();
86
+
87
+ token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_DOCTYPE, errors);
88
+
89
+ position_T* start = position_copy(parser->current_token->location->start);
90
+
91
+ while (token_is_none_of(parser, TOKEN_HTML_TAG_END, TOKEN_EOF)) {
92
+ if (token_is(parser, TOKEN_ERB_START)) {
93
+ parser_append_literal_node_from_buffer(parser, &content, children, start);
94
+
95
+ AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
96
+ array_append(children, erb_node);
97
+
98
+ continue;
99
+ }
100
+
101
+ token_T* token = parser_consume_expected(parser, parser->current_token->type, errors);
102
+ buffer_append(&content, token->value);
103
+ token_free(token);
104
+ }
105
+
106
+ parser_append_literal_node_from_buffer(parser, &content, children, start);
107
+
108
+ token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
109
+
110
+ AST_HTML_DOCTYPE_NODE_T* doctype = ast_html_doctype_node_init(
111
+ tag_opening,
112
+ children,
113
+ tag_closing,
114
+ tag_opening->location->start,
115
+ tag_closing->location->end,
116
+ errors
117
+ );
118
+
119
+ position_free(start);
120
+ token_free(tag_opening);
121
+ token_free(tag_closing);
122
+ buffer_free(&content);
123
+
124
+ return doctype;
125
+ }
126
+
127
+ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser) {
128
+ position_T* start = position_copy(parser->current_token->location->start);
129
+
130
+ array_T* errors = array_init(8);
131
+ buffer_T content = buffer_new();
132
+
133
+ while (token_is_none_of(
134
+ parser,
135
+ TOKEN_HTML_TAG_START,
136
+ TOKEN_HTML_TAG_START_CLOSE,
137
+ TOKEN_HTML_DOCTYPE,
138
+ TOKEN_HTML_COMMENT_START,
139
+ TOKEN_ERB_START,
140
+ TOKEN_EOF
141
+ )) {
142
+ if (token_is(parser, TOKEN_ERROR)) {
143
+ buffer_free(&content);
144
+
145
+ token_T* token = parser_consume_expected(parser, TOKEN_ERROR, errors);
146
+ append_unexpected_error(
147
+ "Token Error",
148
+ "not TOKEN_ERROR",
149
+ token->value,
150
+ token->location->start,
151
+ token->location->end,
152
+ errors
153
+ );
154
+ token_free(token);
155
+
156
+ return NULL;
157
+ }
158
+
159
+ token_T* token = parser_advance(parser);
160
+ buffer_append(&content, token->value);
161
+ token_free(token);
162
+ }
163
+
164
+ if (buffer_length(&content) > 0) {
165
+ AST_HTML_TEXT_NODE_T* text_node =
166
+ ast_html_text_node_init(buffer_value(&content), start, parser->current_token->location->start, errors);
167
+
168
+ position_free(start);
169
+ buffer_free(&content);
170
+
171
+ return text_node;
172
+ }
173
+
174
+ AST_HTML_TEXT_NODE_T* text_node = ast_html_text_node_init("", start, parser->current_token->location->start, errors);
175
+
176
+ position_free(start);
177
+ buffer_free(&content);
178
+
179
+ return text_node;
180
+ }
181
+
182
+ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
183
+ array_T* errors = array_init(8);
184
+ token_T* identifier = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
185
+
186
+ if (identifier == NULL) { parser_append_unexpected_token_error(parser, TOKEN_IDENTIFIER, errors); }
187
+
188
+ AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
189
+ ast_html_attribute_name_node_init(identifier, identifier->location->start, identifier->location->end, errors);
190
+
191
+ token_free(identifier);
192
+
193
+ return attribute_name;
194
+ }
195
+
196
+ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value(
197
+ parser_T* parser, array_T* children, array_T* errors
198
+ ) {
199
+ buffer_T buffer = buffer_new();
200
+ token_T* opening_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
201
+ position_T* start = position_copy(parser->current_token->location->start);
202
+
203
+ while (token_is_none_of(parser, TOKEN_QUOTE, TOKEN_EOF)) {
204
+ if (token_is(parser, TOKEN_ERB_START)) {
205
+ parser_append_literal_node_from_buffer(parser, &buffer, children, start);
206
+
207
+ array_append(children, parser_parse_erb_tag(parser));
208
+
209
+ position_free(start);
210
+ start = position_copy(parser->current_token->location->start);
211
+
212
+ continue;
213
+ }
214
+
215
+ buffer_append(&buffer, parser->current_token->value);
216
+ token_free(parser->current_token);
217
+ parser->current_token = lexer_next_token(parser->lexer);
218
+ }
219
+
220
+ parser_append_literal_node_from_buffer(parser, &buffer, children, start);
221
+ position_free(start);
222
+ buffer_free(&buffer);
223
+
224
+ token_T* closing_quote = parser_consume_expected(parser, TOKEN_QUOTE, errors);
225
+
226
+ if (opening_quote != NULL && closing_quote != NULL && strcmp(opening_quote->value, closing_quote->value) != 0) {
227
+ append_quotes_mismatch_error(
228
+ opening_quote,
229
+ closing_quote,
230
+ closing_quote->location->start,
231
+ closing_quote->location->end,
232
+ errors
233
+ );
234
+ }
235
+
236
+ AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = ast_html_attribute_value_node_init(
237
+ opening_quote,
238
+ children,
239
+ closing_quote,
240
+ true,
241
+ opening_quote->location->start,
242
+ closing_quote->location->end,
243
+ errors
244
+ );
245
+
246
+ token_free(opening_quote);
247
+ token_free(closing_quote);
248
+
249
+ return attribute_value;
250
+ }
251
+
252
+ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser_T* parser) {
253
+ array_T* children = array_init(8);
254
+ array_T* errors = array_init(8);
255
+
256
+ // <div id=<%= "home" %>>
257
+ if (token_is(parser, TOKEN_ERB_START)) {
258
+ AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
259
+ array_append(children, erb_node);
260
+
261
+ return ast_html_attribute_value_node_init(
262
+ NULL,
263
+ children,
264
+ NULL,
265
+ false,
266
+ erb_node->base.location->start,
267
+ erb_node->base.location->end,
268
+ NULL
269
+ );
270
+ }
271
+
272
+ // <div id=home>
273
+ if (token_is(parser, TOKEN_IDENTIFIER)) {
274
+ token_T* identifier = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
275
+ AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier);
276
+ token_free(identifier);
277
+
278
+ array_append(children, literal);
279
+
280
+ return ast_html_attribute_value_node_init(
281
+ NULL,
282
+ children,
283
+ NULL,
284
+ false,
285
+ literal->base.location->start,
286
+ literal->base.location->end,
287
+ NULL
288
+ );
289
+ }
290
+
291
+ // <div id="home">
292
+ if (token_is(parser, TOKEN_QUOTE)) { return parser_parse_quoted_html_attribute_value(parser, children, errors); }
293
+
294
+ token_T* token = parser_advance(parser);
295
+
296
+ append_unexpected_error(
297
+ "Unexpected Token",
298
+ "TOKEN_IDENTIFIER, TOKEN_QUOTE, TOKEN_ERB_START",
299
+ token_type_to_string(token->type),
300
+ token->location->start,
301
+ token->location->end,
302
+ errors
303
+ );
304
+
305
+ AST_HTML_ATTRIBUTE_VALUE_NODE_T* value = ast_html_attribute_value_node_init(
306
+ NULL,
307
+ children,
308
+ NULL,
309
+ false,
310
+ token->location->start,
311
+ token->location->end,
312
+ errors
313
+ );
314
+
315
+ token_free(token);
316
+
317
+ return value;
318
+ }
319
+
320
+ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) {
321
+ AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = parser_parse_html_attribute_name(parser);
322
+
323
+ token_T* equals = parser_consume_if_present(parser, TOKEN_EQUALS);
324
+
325
+ if (equals != NULL) {
326
+ AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
327
+
328
+ AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init(
329
+ attribute_name,
330
+ equals,
331
+ attribute_value,
332
+ attribute_name->base.location->start,
333
+ attribute_value->base.location->end,
334
+ NULL
335
+ );
336
+
337
+ token_free(equals);
338
+
339
+ return attribute_node;
340
+ }
341
+
342
+ return ast_html_attribute_node_init(
343
+ attribute_name,
344
+ NULL,
345
+ NULL,
346
+ attribute_name->base.location->start,
347
+ attribute_name->base.location->end,
348
+ NULL
349
+ );
350
+ }
351
+
352
+ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
353
+ array_T* errors = array_init(8);
354
+ array_T* children = array_init(8);
355
+
356
+ token_T* tag_start = parser_consume_expected(parser, TOKEN_HTML_TAG_START, errors);
357
+ token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
358
+
359
+ while (token_is_none_of(parser, TOKEN_HTML_TAG_END, TOKEN_HTML_TAG_SELF_CLOSE, TOKEN_EOF)) {
360
+ token_T* whitespace = parser_consume_if_present(parser, TOKEN_WHITESPACE);
361
+
362
+ if (whitespace != NULL) {
363
+ token_free(whitespace);
364
+ continue;
365
+ }
366
+
367
+ token_T* newline = parser_consume_if_present(parser, TOKEN_NEWLINE);
368
+
369
+ if (newline != NULL) {
370
+ token_free(newline);
371
+ continue;
372
+ }
373
+
374
+ if (parser->current_token->type == TOKEN_ERB_START) {
375
+ array_append(children, parser_parse_erb_tag(parser));
376
+ continue;
377
+ }
378
+
379
+ if (parser->current_token->type == TOKEN_IDENTIFIER) {
380
+ array_append(children, parser_parse_html_attribute(parser));
381
+ continue;
382
+ }
383
+
384
+ parser_append_unexpected_error(
385
+ parser,
386
+ "Unexpected Token",
387
+ "TOKEN_IDENTIFIER, TOKEN_ERB_START,TOKEN_WHITESPACE, or TOKEN_NEWLINE",
388
+ errors
389
+ );
390
+ }
391
+
392
+ bool is_self_closing = false;
393
+
394
+ token_T* tag_end = parser_consume_if_present(parser, TOKEN_HTML_TAG_END);
395
+
396
+ if (tag_end == NULL) {
397
+ tag_end = parser_consume_expected(parser, TOKEN_HTML_TAG_SELF_CLOSE, errors);
398
+
399
+ if (tag_end == NULL) {
400
+ token_free(tag_start);
401
+ token_free(tag_name);
402
+
403
+ return NULL;
404
+ }
405
+
406
+ is_self_closing = true;
407
+ }
408
+
409
+ AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init(
410
+ tag_start,
411
+ tag_name,
412
+ tag_end,
413
+ children,
414
+ is_self_closing,
415
+ tag_start->location->start,
416
+ tag_end->location->end,
417
+ errors
418
+ );
419
+
420
+ token_free(tag_start);
421
+ token_free(tag_name);
422
+ token_free(tag_end);
423
+
424
+ return open_tag_node;
425
+ }
426
+
427
+ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) {
428
+ array_T* errors = array_init(8);
429
+
430
+ token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
431
+ token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
432
+ token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
433
+
434
+ if (tag_name != NULL && is_void_element(tag_name->value) && parser_in_svg_context(parser) == false) {
435
+ char* expected = html_self_closing_tag_string(tag_name->value);
436
+ char* got = html_closing_tag_string(tag_name->value);
437
+
438
+ append_void_element_closing_tag_error(
439
+ tag_name,
440
+ expected,
441
+ got,
442
+ tag_opening->location->start,
443
+ tag_closing->location->end,
444
+ errors
445
+ );
446
+
447
+ free(expected);
448
+ free(got);
449
+ }
450
+
451
+ AST_HTML_CLOSE_TAG_NODE_T* close_tag = ast_html_close_tag_node_init(
452
+ tag_opening,
453
+ tag_name,
454
+ tag_closing,
455
+ tag_opening->location->start,
456
+ tag_closing->location->end,
457
+ errors
458
+ );
459
+
460
+ token_free(tag_opening);
461
+ token_free(tag_name);
462
+ token_free(tag_closing);
463
+
464
+ return close_tag;
465
+ }
466
+
467
+ // TODO: this should probably be AST_HTML_ELEMENT_NODE_T with a AST_HTML_SELF_CLOSING_TAG_NODE_T
468
+ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element(
469
+ const parser_T* parser, AST_HTML_OPEN_TAG_NODE_T* open_tag
470
+ ) {
471
+ return ast_html_element_node_init(
472
+ open_tag,
473
+ open_tag->tag_name,
474
+ NULL,
475
+ NULL,
476
+ true,
477
+ open_tag->base.location->start,
478
+ open_tag->base.location->end,
479
+ NULL
480
+ );
481
+ }
482
+
483
+ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element(
484
+ parser_T* parser, AST_HTML_OPEN_TAG_NODE_T* open_tag
485
+ ) {
486
+ array_T* errors = array_init(8);
487
+ array_T* body = array_init(8);
488
+
489
+ parser_push_open_tag(parser, open_tag->tag_name);
490
+
491
+ parser_parse_in_data_state(parser, body, errors);
492
+
493
+ if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) { return parser_handle_missing_close_tag(open_tag, body, errors); }
494
+
495
+ AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser);
496
+
497
+ if (parser_in_svg_context(parser) == false && is_void_element(close_tag->tag_name->value)) {
498
+ array_push(body, close_tag);
499
+ parser_parse_in_data_state(parser, body, errors);
500
+ close_tag = parser_parse_html_close_tag(parser);
501
+ }
502
+
503
+ bool matches_stack = parser_check_matching_tag(parser, close_tag->tag_name->value);
504
+
505
+ if (matches_stack) {
506
+ token_T* popped_token = parser_pop_open_tag(parser);
507
+ token_free(popped_token);
508
+ } else {
509
+ parser_handle_mismatched_tags(parser, close_tag, errors);
510
+ }
511
+
512
+ return ast_html_element_node_init(
513
+ open_tag,
514
+ open_tag->tag_name,
515
+ body,
516
+ close_tag,
517
+ false,
518
+ open_tag->base.location->start,
519
+ close_tag->base.location->end,
520
+ errors
521
+ );
522
+ }
523
+
524
+ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_element(parser_T* parser) {
525
+ AST_HTML_OPEN_TAG_NODE_T* open_tag = parser_parse_html_open_tag(parser);
526
+
527
+ // <tag />
528
+ if (open_tag->is_void || ast_node_is((AST_NODE_T*) open_tag, AST_HTML_SELF_CLOSE_TAG_NODE)) {
529
+ return parser_parse_html_self_closing_element(parser, open_tag);
530
+ }
531
+
532
+ // <tag>, in void element list, and not in inside an <svg> element
533
+ if (!open_tag->is_void && is_void_element(open_tag->tag_name->value) && !parser_in_svg_context(parser)) {
534
+ return parser_parse_html_self_closing_element(parser, open_tag);
535
+ }
536
+
537
+ AST_HTML_ELEMENT_NODE_T* regular_element = parser_parse_html_regular_element(parser, open_tag);
538
+ if (regular_element != NULL) { return regular_element; }
539
+
540
+ array_T* errors = array_init(8);
541
+
542
+ parser_append_unexpected_error(parser, "Unknown HTML open tag type", "HTMLOpenTag or HTMLSelfCloseTag", errors);
543
+
544
+ return ast_html_element_node_init(
545
+ open_tag,
546
+ open_tag->tag_name,
547
+ NULL,
548
+ NULL,
549
+ false,
550
+ open_tag->base.location->start,
551
+ open_tag->base.location->end,
552
+ errors
553
+ );
554
+ }
555
+
556
+ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) {
557
+ array_T* errors = array_init(8);
558
+
559
+ token_T* opening_tag = parser_consume_expected(parser, TOKEN_ERB_START, errors);
560
+ token_T* content = parser_consume_expected(parser, TOKEN_ERB_CONTENT, errors);
561
+ token_T* closing_tag = parser_consume_expected(parser, TOKEN_ERB_END, errors);
562
+
563
+ AST_ERB_CONTENT_NODE_T* erb_node = ast_erb_content_node_init(
564
+ opening_tag,
565
+ content,
566
+ closing_tag,
567
+ NULL,
568
+ false,
569
+ false,
570
+ opening_tag->location->start,
571
+ closing_tag->location->end,
572
+ errors
573
+ );
574
+
575
+ token_free(opening_tag);
576
+ token_free(content);
577
+ token_free(closing_tag);
578
+
579
+ return erb_node;
580
+ }
581
+
582
+ static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors) {
583
+ while (token_is_none_of(parser, TOKEN_HTML_TAG_START_CLOSE, TOKEN_EOF)) {
584
+ if (token_is(parser, TOKEN_ERB_START)) {
585
+ array_append(children, parser_parse_erb_tag(parser));
586
+ continue;
587
+ }
588
+
589
+ if (token_is(parser, TOKEN_HTML_DOCTYPE)) {
590
+ array_append(children, parser_parse_html_doctype(parser));
591
+ continue;
592
+ }
593
+
594
+ if (token_is(parser, TOKEN_HTML_COMMENT_START)) {
595
+ array_append(children, parser_parse_html_comment(parser));
596
+ continue;
597
+ }
598
+
599
+ if (token_is(parser, TOKEN_HTML_TAG_START)) {
600
+ array_append(children, parser_parse_html_element(parser));
601
+ continue;
602
+ }
603
+
604
+ if (token_is_any_of(
605
+ parser,
606
+ TOKEN_AMPERSAND,
607
+ TOKEN_CHARACTER,
608
+ TOKEN_COLON,
609
+ TOKEN_DASH,
610
+ TOKEN_EQUALS,
611
+ TOKEN_EXCLAMATION,
612
+ TOKEN_IDENTIFIER,
613
+ TOKEN_NEWLINE,
614
+ TOKEN_PERCENT,
615
+ TOKEN_SEMICOLON,
616
+ TOKEN_SLASH,
617
+ TOKEN_UNDERSCORE,
618
+ TOKEN_WHITESPACE
619
+ )) {
620
+ array_append(children, parser_parse_text_content(parser));
621
+ continue;
622
+ }
623
+
624
+ parser_append_unexpected_error(
625
+ parser,
626
+ "Unexpected token",
627
+ "TOKEN_ERB_START, TOKEN_HTML_DOCTYPE, TOKEN_HTML_COMMENT_START, TOKEN_IDENTIFIER, TOKEN_WHITESPACE, or "
628
+ "TOKEN_NEWLINE",
629
+ errors
630
+ );
631
+ }
632
+ }
633
+
634
+ static void parser_parse_unclosed_html_tags(const parser_T* parser, array_T* errors) {
635
+ while (array_size(parser->open_tags_stack) > 0) {
636
+ token_T* unclosed_tag = parser_pop_open_tag(parser);
637
+
638
+ append_unclosed_element_error(
639
+ unclosed_tag,
640
+ parser->current_token->location->start,
641
+ parser->current_token->location->end,
642
+ errors
643
+ );
644
+
645
+ token_free(unclosed_tag);
646
+ }
647
+ }
648
+
649
+ static void parser_parse_stray_closing_tags(parser_T* parser, array_T* children, array_T* errors) {
650
+ while (token_is_not(parser, TOKEN_EOF)) {
651
+ if (token_is_not(parser, TOKEN_HTML_TAG_START_CLOSE)) {
652
+ parser_append_unexpected_token_error(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
653
+ continue;
654
+ }
655
+
656
+ AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser);
657
+
658
+ if (!is_void_element(close_tag->tag_name->value)) {
659
+ append_missing_opening_tag_error(
660
+ close_tag->tag_name,
661
+ close_tag->base.location->start,
662
+ close_tag->base.location->end,
663
+ close_tag->base.errors
664
+ );
665
+ }
666
+
667
+ array_append(children, close_tag);
668
+
669
+ parser_parse_in_data_state(parser, children, errors);
670
+ }
671
+ }
672
+
673
+ static AST_DOCUMENT_NODE_T* parser_parse_document(parser_T* parser) {
674
+ array_T* children = array_init(8);
675
+ array_T* errors = array_init(8);
676
+ position_T* start = position_copy(parser->current_token->location->start);
677
+
678
+ parser_parse_in_data_state(parser, children, errors);
679
+ parser_parse_unclosed_html_tags(parser, errors);
680
+ parser_parse_stray_closing_tags(parser, children, errors);
681
+
682
+ token_T* eof = parser_consume_expected(parser, TOKEN_EOF, errors);
683
+
684
+ AST_DOCUMENT_NODE_T* document_node = ast_document_node_init(children, start, eof->location->end, errors);
685
+
686
+ position_free(start);
687
+ token_free(eof);
688
+
689
+ return document_node;
690
+ }
691
+
692
+ AST_DOCUMENT_NODE_T* parser_parse(parser_T* parser) {
693
+ return parser_parse_document(parser);
694
+ }
695
+
696
+ void parser_free(parser_T* parser) {
697
+ if (parser == NULL) { return; }
698
+
699
+ if (parser->lexer != NULL) { lexer_free(parser->lexer); }
700
+ if (parser->current_token != NULL) { token_free(parser->current_token); }
701
+ if (parser->open_tags_stack != NULL) { array_free(&parser->open_tags_stack); }
702
+
703
+ free(parser);
704
+ }