herb 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +6 -3
- data/ext/herb/error_helpers.c +1 -1
- data/ext/herb/error_helpers.h +1 -1
- data/ext/herb/extension.c +20 -4
- data/ext/herb/nodes.c +70 -41
- data/ext/herb/nodes.h +1 -1
- data/lib/herb/ast/nodes.rb +149 -91
- data/lib/herb/cli.rb +19 -6
- data/lib/herb/errors.rb +1 -1
- data/lib/herb/version.rb +1 -1
- data/lib/herb/visitor.rb +11 -6
- data/sig/herb/ast/nodes.rbs +65 -38
- data/sig/herb/visitor.rbs +6 -3
- data/sig/serialized_ast_errors.rbs +1 -1
- data/sig/serialized_ast_nodes.rbs +15 -10
- data/src/ast_nodes.c +79 -36
- data/src/ast_pretty_print.c +21 -13
- data/src/errors.c +1 -1
- data/src/herb.c +2 -2
- data/src/include/ast_nodes.h +23 -15
- data/src/include/ast_pretty_print.h +1 -1
- data/src/include/errors.h +1 -1
- data/src/include/herb.h +2 -1
- data/src/include/lexer_peek_helpers.h +21 -0
- data/src/include/parser.h +6 -1
- data/src/include/token_struct.h +6 -1
- data/src/include/version.h +1 -1
- data/src/lexer.c +20 -1
- data/src/lexer_peek_helpers.c +77 -0
- data/src/main.c +2 -2
- data/src/parser.c +435 -122
- data/src/token.c +5 -0
- data/src/visitor.c +39 -6
- metadata +2 -2
data/src/parser.c
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
#include "include/errors.h"
|
7
7
|
#include "include/html_util.h"
|
8
8
|
#include "include/lexer.h"
|
9
|
+
#include "include/lexer_peek_helpers.h"
|
9
10
|
#include "include/parser_helpers.h"
|
10
11
|
#include "include/token.h"
|
11
12
|
#include "include/token_matchers.h"
|
@@ -19,12 +20,18 @@
|
|
19
20
|
static void parser_parse_in_data_state(parser_T* parser, array_T* children, array_T* errors);
|
20
21
|
static void parser_parse_foreign_content(parser_T* parser, array_T* children, array_T* errors);
|
21
22
|
static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser);
|
23
|
+
static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, array_T* children);
|
24
|
+
static void parser_consume_whitespace(parser_T* parser, array_T* children);
|
25
|
+
static void parser_skip_erb_content(lexer_T* lexer);
|
26
|
+
static bool parser_lookahead_erb_is_attribute(lexer_T* lexer);
|
27
|
+
static void parser_handle_erb_in_open_tag(parser_T* parser, array_T* children);
|
28
|
+
static void parser_handle_whitespace_in_open_tag(parser_T* parser, array_T* children);
|
22
29
|
|
23
30
|
size_t parser_sizeof(void) {
|
24
31
|
return sizeof(struct PARSER_STRUCT);
|
25
32
|
}
|
26
33
|
|
27
|
-
parser_T* parser_init(lexer_T* lexer) {
|
34
|
+
parser_T* parser_init(lexer_T* lexer, parser_options_T* options) {
|
28
35
|
parser_T* parser = calloc(1, parser_sizeof());
|
29
36
|
|
30
37
|
parser->lexer = lexer;
|
@@ -33,9 +40,59 @@ parser_T* parser_init(lexer_T* lexer) {
|
|
33
40
|
parser->state = PARSER_STATE_DATA;
|
34
41
|
parser->foreign_content_type = FOREIGN_CONTENT_UNKNOWN;
|
35
42
|
|
43
|
+
if (options) {
|
44
|
+
parser->options = calloc(1, sizeof(parser_options_T));
|
45
|
+
parser->options->track_whitespace = options->track_whitespace;
|
46
|
+
} else {
|
47
|
+
parser->options = NULL;
|
48
|
+
}
|
49
|
+
|
36
50
|
return parser;
|
37
51
|
}
|
38
52
|
|
53
|
+
static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) {
|
54
|
+
array_T* errors = array_init(8);
|
55
|
+
array_T* children = array_init(8);
|
56
|
+
buffer_T content = buffer_new();
|
57
|
+
|
58
|
+
token_T* tag_opening = parser_consume_expected(parser, TOKEN_CDATA_START, errors);
|
59
|
+
position_T* start = position_copy(parser->current_token->location->start);
|
60
|
+
|
61
|
+
while (token_is_none_of(parser, TOKEN_CDATA_END, TOKEN_EOF)) {
|
62
|
+
if (token_is(parser, TOKEN_ERB_START)) {
|
63
|
+
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
64
|
+
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
65
|
+
array_append(children, erb_node);
|
66
|
+
position_free(start);
|
67
|
+
start = position_copy(parser->current_token->location->start);
|
68
|
+
continue;
|
69
|
+
}
|
70
|
+
|
71
|
+
token_T* token = parser_advance(parser);
|
72
|
+
buffer_append(&content, token->value);
|
73
|
+
token_free(token);
|
74
|
+
}
|
75
|
+
|
76
|
+
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
77
|
+
token_T* tag_closing = parser_consume_expected(parser, TOKEN_CDATA_END, errors);
|
78
|
+
|
79
|
+
AST_CDATA_NODE_T* cdata = ast_cdata_node_init(
|
80
|
+
tag_opening,
|
81
|
+
children,
|
82
|
+
tag_closing,
|
83
|
+
tag_opening->location->start,
|
84
|
+
tag_closing->location->end,
|
85
|
+
errors
|
86
|
+
);
|
87
|
+
|
88
|
+
position_free(start);
|
89
|
+
buffer_free(&content);
|
90
|
+
token_free(tag_opening);
|
91
|
+
token_free(tag_closing);
|
92
|
+
|
93
|
+
return cdata;
|
94
|
+
}
|
95
|
+
|
39
96
|
static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) {
|
40
97
|
array_T* errors = array_init(8);
|
41
98
|
array_T* children = array_init(8);
|
@@ -128,6 +185,54 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) {
|
|
128
185
|
return doctype;
|
129
186
|
}
|
130
187
|
|
188
|
+
static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser) {
|
189
|
+
array_T* errors = array_init(8);
|
190
|
+
array_T* children = array_init(8);
|
191
|
+
buffer_T content = buffer_new();
|
192
|
+
|
193
|
+
token_T* tag_opening = parser_consume_expected(parser, TOKEN_XML_DECLARATION, errors);
|
194
|
+
|
195
|
+
position_T* start = position_copy(parser->current_token->location->start);
|
196
|
+
|
197
|
+
while (token_is_none_of(parser, TOKEN_XML_DECLARATION_END, TOKEN_EOF)) {
|
198
|
+
if (token_is(parser, TOKEN_ERB_START)) {
|
199
|
+
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
200
|
+
|
201
|
+
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
202
|
+
array_append(children, erb_node);
|
203
|
+
|
204
|
+
position_free(start);
|
205
|
+
start = position_copy(parser->current_token->location->start);
|
206
|
+
|
207
|
+
continue;
|
208
|
+
}
|
209
|
+
|
210
|
+
token_T* token = parser_advance(parser);
|
211
|
+
buffer_append(&content, token->value);
|
212
|
+
token_free(token);
|
213
|
+
}
|
214
|
+
|
215
|
+
parser_append_literal_node_from_buffer(parser, &content, children, start);
|
216
|
+
|
217
|
+
token_T* tag_closing = parser_consume_expected(parser, TOKEN_XML_DECLARATION_END, errors);
|
218
|
+
|
219
|
+
AST_XML_DECLARATION_NODE_T* xml_declaration = ast_xml_declaration_node_init(
|
220
|
+
tag_opening,
|
221
|
+
children,
|
222
|
+
tag_closing,
|
223
|
+
tag_opening->location->start,
|
224
|
+
tag_closing->location->end,
|
225
|
+
errors
|
226
|
+
);
|
227
|
+
|
228
|
+
position_free(start);
|
229
|
+
token_free(tag_opening);
|
230
|
+
token_free(tag_closing);
|
231
|
+
buffer_free(&content);
|
232
|
+
|
233
|
+
return xml_declaration;
|
234
|
+
}
|
235
|
+
|
131
236
|
static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T* document_errors) {
|
132
237
|
position_T* start = position_copy(parser->current_token->location->start);
|
133
238
|
|
@@ -188,96 +293,58 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, array_T
|
|
188
293
|
|
189
294
|
static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T* parser) {
|
190
295
|
array_T* errors = array_init(8);
|
296
|
+
array_T* children = array_init(8);
|
297
|
+
buffer_T buffer = buffer_new();
|
298
|
+
position_T* start = position_copy(parser->current_token->location->start);
|
191
299
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
token_free(at_token);
|
300
|
+
while (token_is_none_of(
|
301
|
+
parser,
|
302
|
+
TOKEN_EQUALS,
|
303
|
+
TOKEN_WHITESPACE,
|
304
|
+
TOKEN_NEWLINE,
|
305
|
+
TOKEN_HTML_TAG_END,
|
306
|
+
TOKEN_HTML_TAG_SELF_CLOSE,
|
307
|
+
TOKEN_EOF
|
308
|
+
)) {
|
309
|
+
if (token_is(parser, TOKEN_ERB_START)) {
|
310
|
+
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
205
311
|
|
206
|
-
|
207
|
-
|
208
|
-
} else {
|
209
|
-
first_token = parser_consume_if_present(parser, TOKEN_IDENTIFIER);
|
312
|
+
AST_ERB_CONTENT_NODE_T* erb_node = parser_parse_erb_tag(parser);
|
313
|
+
array_append(children, erb_node);
|
210
314
|
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
return attribute_name;
|
315
|
+
position_free(start);
|
316
|
+
start = position_copy(parser->current_token->location->start);
|
317
|
+
continue;
|
215
318
|
}
|
216
|
-
}
|
217
|
-
|
218
|
-
buffer_T name_buffer = buffer_new();
|
219
319
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
buffer_append(&name_buffer, at_token->value);
|
224
|
-
start_position = position_copy(at_token->location->start);
|
225
|
-
} else {
|
226
|
-
start_position = position_copy(first_token->location->start);
|
320
|
+
token_T* token = parser_advance(parser);
|
321
|
+
buffer_append(&buffer, token->value);
|
322
|
+
token_free(token);
|
227
323
|
}
|
228
324
|
|
229
|
-
|
230
|
-
|
231
|
-
position_T* end_position = position_copy(first_token->location->end);
|
232
|
-
size_t range_end = first_token->range->to;
|
233
|
-
|
234
|
-
while (parser->current_token->type == TOKEN_CHARACTER && parser->current_token->value
|
235
|
-
&& strcmp(parser->current_token->value, ".") == 0) {
|
236
|
-
|
237
|
-
token_T* dot_token = parser_advance(parser);
|
238
|
-
|
239
|
-
buffer_append(&name_buffer, dot_token->value);
|
240
|
-
position_free(end_position);
|
241
|
-
|
242
|
-
end_position = position_copy(dot_token->location->end);
|
243
|
-
range_end = dot_token->range->to;
|
244
|
-
|
245
|
-
token_free(dot_token);
|
325
|
+
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
246
326
|
|
247
|
-
|
248
|
-
|
327
|
+
position_T* node_start = NULL;
|
328
|
+
position_T* node_end = NULL;
|
249
329
|
|
250
|
-
|
251
|
-
|
330
|
+
if (children->size > 0) {
|
331
|
+
AST_NODE_T* first_child = array_get(children, 0);
|
332
|
+
AST_NODE_T* last_child = array_get(children, children->size - 1);
|
252
333
|
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
}
|
334
|
+
node_start = position_copy(first_child->location->start);
|
335
|
+
node_end = position_copy(last_child->location->end);
|
336
|
+
} else {
|
337
|
+
node_start = position_copy(parser->current_token->location->start);
|
338
|
+
node_end = position_copy(parser->current_token->location->start);
|
259
339
|
}
|
260
340
|
|
261
|
-
token_T* combined_token = calloc(1, sizeof(token_T));
|
262
|
-
combined_token->value = herb_strdup(name_buffer.value);
|
263
|
-
combined_token->type = TOKEN_IDENTIFIER;
|
264
|
-
combined_token->location =
|
265
|
-
location_from(start_position->line, start_position->column, end_position->line, end_position->column);
|
266
|
-
|
267
|
-
size_t range_start = at_token != NULL ? at_token->range->from : first_token->range->from;
|
268
|
-
combined_token->range = range_init(range_start, range_end);
|
269
|
-
|
270
341
|
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name =
|
271
|
-
ast_html_attribute_name_node_init(
|
272
|
-
|
273
|
-
buffer_free(&name_buffer);
|
274
|
-
position_free(start_position);
|
275
|
-
position_free(end_position);
|
276
|
-
token_free(first_token);
|
277
|
-
|
278
|
-
if (at_token != NULL) { token_free(at_token); }
|
342
|
+
ast_html_attribute_name_node_init(children, node_start, node_end, errors);
|
279
343
|
|
280
|
-
|
344
|
+
position_free(start);
|
345
|
+
position_free(node_start);
|
346
|
+
position_free(node_end);
|
347
|
+
buffer_free(&buffer);
|
281
348
|
|
282
349
|
return attribute_name;
|
283
350
|
}
|
@@ -307,11 +374,89 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value
|
|
307
374
|
continue;
|
308
375
|
}
|
309
376
|
|
377
|
+
if (token_is(parser, TOKEN_BACKSLASH)) {
|
378
|
+
lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
|
379
|
+
|
380
|
+
token_T* next_token = lexer_next_token(parser->lexer);
|
381
|
+
|
382
|
+
if (next_token && next_token->type == TOKEN_QUOTE && opening_quote != NULL
|
383
|
+
&& strcmp(next_token->value, opening_quote->value) == 0) {
|
384
|
+
buffer_append(&buffer, parser->current_token->value);
|
385
|
+
buffer_append(&buffer, next_token->value);
|
386
|
+
|
387
|
+
token_free(parser->current_token);
|
388
|
+
token_free(next_token);
|
389
|
+
|
390
|
+
parser->current_token = lexer_next_token(parser->lexer);
|
391
|
+
continue;
|
392
|
+
} else {
|
393
|
+
lexer_restore_state(parser->lexer, saved_state);
|
394
|
+
|
395
|
+
if (next_token) { token_free(next_token); }
|
396
|
+
}
|
397
|
+
}
|
398
|
+
|
310
399
|
buffer_append(&buffer, parser->current_token->value);
|
311
400
|
token_free(parser->current_token);
|
401
|
+
|
312
402
|
parser->current_token = lexer_next_token(parser->lexer);
|
313
403
|
}
|
314
404
|
|
405
|
+
if (token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
|
406
|
+
&& strcmp(parser->current_token->value, opening_quote->value) == 0) {
|
407
|
+
lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
|
408
|
+
|
409
|
+
token_T* potential_closing = parser->current_token;
|
410
|
+
parser->current_token = lexer_next_token(parser->lexer);
|
411
|
+
|
412
|
+
if (token_is(parser, TOKEN_IDENTIFIER) || token_is(parser, TOKEN_CHARACTER)) {
|
413
|
+
append_unexpected_error(
|
414
|
+
"Unescaped quote character in attribute value",
|
415
|
+
"escaped quote (\\') or different quote style (\")",
|
416
|
+
opening_quote->value,
|
417
|
+
potential_closing->location->start,
|
418
|
+
potential_closing->location->end,
|
419
|
+
errors
|
420
|
+
);
|
421
|
+
|
422
|
+
lexer_restore_state(parser->lexer, saved_state);
|
423
|
+
|
424
|
+
token_free(parser->current_token);
|
425
|
+
parser->current_token = potential_closing;
|
426
|
+
|
427
|
+
buffer_append(&buffer, parser->current_token->value);
|
428
|
+
token_free(parser->current_token);
|
429
|
+
parser->current_token = lexer_next_token(parser->lexer);
|
430
|
+
|
431
|
+
while (!token_is(parser, TOKEN_EOF)
|
432
|
+
&& !(
|
433
|
+
token_is(parser, TOKEN_QUOTE) && opening_quote != NULL
|
434
|
+
&& strcmp(parser->current_token->value, opening_quote->value) == 0
|
435
|
+
)) {
|
436
|
+
if (token_is(parser, TOKEN_ERB_START)) {
|
437
|
+
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
438
|
+
|
439
|
+
array_append(children, parser_parse_erb_tag(parser));
|
440
|
+
|
441
|
+
position_free(start);
|
442
|
+
start = position_copy(parser->current_token->location->start);
|
443
|
+
|
444
|
+
continue;
|
445
|
+
}
|
446
|
+
|
447
|
+
buffer_append(&buffer, parser->current_token->value);
|
448
|
+
token_free(parser->current_token);
|
449
|
+
|
450
|
+
parser->current_token = lexer_next_token(parser->lexer);
|
451
|
+
}
|
452
|
+
} else {
|
453
|
+
token_free(parser->current_token);
|
454
|
+
parser->current_token = potential_closing;
|
455
|
+
|
456
|
+
lexer_restore_state(parser->lexer, saved_state);
|
457
|
+
}
|
458
|
+
}
|
459
|
+
|
315
460
|
parser_append_literal_node_from_buffer(parser, &buffer, children, start);
|
316
461
|
position_free(start);
|
317
462
|
buffer_free(&buffer);
|
@@ -439,18 +584,85 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser
|
|
439
584
|
static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) {
|
440
585
|
AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = parser_parse_html_attribute_name(parser);
|
441
586
|
|
442
|
-
|
443
|
-
|
444
|
-
|
587
|
+
if (parser->options && parser->options->track_whitespace) {
|
588
|
+
bool has_equals = (parser->current_token->type == TOKEN_EQUALS)
|
589
|
+
|| lexer_peek_for_token_type_after_whitespace(parser->lexer, TOKEN_EQUALS);
|
590
|
+
|
591
|
+
if (has_equals) {
|
592
|
+
buffer_T equals_buffer = buffer_new();
|
593
|
+
position_T* equals_start = NULL;
|
594
|
+
position_T* equals_end = NULL;
|
595
|
+
size_t range_start = 0;
|
596
|
+
size_t range_end = 0;
|
597
|
+
|
598
|
+
while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
599
|
+
token_T* whitespace = parser_advance(parser);
|
600
|
+
|
601
|
+
if (equals_start == NULL) {
|
602
|
+
equals_start = position_copy(whitespace->location->start);
|
603
|
+
range_start = whitespace->range->from;
|
604
|
+
}
|
605
|
+
|
606
|
+
buffer_append(&equals_buffer, whitespace->value);
|
607
|
+
token_free(whitespace);
|
608
|
+
}
|
609
|
+
|
610
|
+
token_T* equals = parser_advance(parser);
|
611
|
+
|
612
|
+
if (equals_start == NULL) {
|
613
|
+
equals_start = position_copy(equals->location->start);
|
614
|
+
range_start = equals->range->from;
|
615
|
+
}
|
616
|
+
|
617
|
+
buffer_append(&equals_buffer, equals->value);
|
618
|
+
equals_end = position_copy(equals->location->end);
|
619
|
+
range_end = equals->range->to;
|
620
|
+
token_free(equals);
|
621
|
+
|
622
|
+
while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
623
|
+
token_T* whitespace = parser_advance(parser);
|
624
|
+
buffer_append(&equals_buffer, whitespace->value);
|
625
|
+
equals_end = position_copy(whitespace->location->end);
|
626
|
+
range_end = whitespace->range->to;
|
627
|
+
token_free(whitespace);
|
628
|
+
}
|
629
|
+
|
630
|
+
token_T* equals_with_whitespace = calloc(1, sizeof(token_T));
|
631
|
+
equals_with_whitespace->type = TOKEN_EQUALS;
|
632
|
+
equals_with_whitespace->value = herb_strdup(equals_buffer.value);
|
633
|
+
equals_with_whitespace->location = location_init(equals_start, equals_end);
|
634
|
+
equals_with_whitespace->range = range_init(range_start, range_end);
|
635
|
+
|
636
|
+
buffer_free(&equals_buffer);
|
637
|
+
|
638
|
+
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
|
639
|
+
|
640
|
+
return ast_html_attribute_node_init(
|
641
|
+
attribute_name,
|
642
|
+
equals_with_whitespace,
|
643
|
+
attribute_value,
|
644
|
+
attribute_name->base.location->start,
|
645
|
+
attribute_value->base.location->end,
|
646
|
+
NULL
|
647
|
+
);
|
648
|
+
} else {
|
649
|
+
return ast_html_attribute_node_init(
|
650
|
+
attribute_name,
|
651
|
+
NULL,
|
652
|
+
NULL,
|
653
|
+
attribute_name->base.location->start,
|
654
|
+
attribute_name->base.location->end,
|
655
|
+
NULL
|
656
|
+
);
|
657
|
+
}
|
658
|
+
} else {
|
659
|
+
parser_consume_whitespace(parser, NULL);
|
445
660
|
}
|
446
661
|
|
447
662
|
token_T* equals = parser_consume_if_present(parser, TOKEN_EQUALS);
|
448
663
|
|
449
664
|
if (equals != NULL) {
|
450
|
-
|
451
|
-
token_T* whitespace = parser_advance(parser);
|
452
|
-
token_free(whitespace);
|
453
|
-
}
|
665
|
+
parser_consume_whitespace(parser, NULL);
|
454
666
|
|
455
667
|
AST_HTML_ATTRIBUTE_VALUE_NODE_T* attribute_value = parser_parse_html_attribute_value(parser);
|
456
668
|
|
@@ -478,30 +690,103 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser)
|
|
478
690
|
);
|
479
691
|
}
|
480
692
|
|
481
|
-
static
|
482
|
-
|
483
|
-
array_T* children = array_init(8);
|
693
|
+
static void parser_skip_erb_content(lexer_T* lexer) {
|
694
|
+
token_T* token = NULL;
|
484
695
|
|
485
|
-
|
486
|
-
|
696
|
+
do {
|
697
|
+
token = lexer_next_token(lexer);
|
487
698
|
|
488
|
-
|
489
|
-
|
699
|
+
if (token->type == TOKEN_ERB_END) {
|
700
|
+
token_free(token);
|
701
|
+
break;
|
702
|
+
}
|
490
703
|
|
491
|
-
|
492
|
-
|
704
|
+
token_free(token);
|
705
|
+
} while (true);
|
706
|
+
}
|
707
|
+
|
708
|
+
static bool parser_lookahead_erb_is_attribute(lexer_T* lexer) {
|
709
|
+
token_T* after = NULL;
|
710
|
+
|
711
|
+
do {
|
712
|
+
after = lexer_next_token(lexer);
|
713
|
+
|
714
|
+
if (after->type == TOKEN_EQUALS) {
|
715
|
+
token_free(after);
|
716
|
+
return true;
|
717
|
+
}
|
718
|
+
|
719
|
+
if (after->type == TOKEN_WHITESPACE || after->type == TOKEN_NEWLINE) {
|
720
|
+
token_free(after);
|
493
721
|
continue;
|
494
722
|
}
|
495
723
|
|
496
|
-
|
724
|
+
if (after->type == TOKEN_IDENTIFIER || after->type == TOKEN_CHARACTER || after->type == TOKEN_DASH
|
725
|
+
|| after->type == TOKEN_ERB_START) {
|
497
726
|
|
498
|
-
|
499
|
-
|
727
|
+
if (after->type == TOKEN_ERB_START) {
|
728
|
+
token_free(after);
|
729
|
+
parser_skip_erb_content(lexer);
|
730
|
+
} else {
|
731
|
+
token_free(after);
|
732
|
+
}
|
500
733
|
continue;
|
501
734
|
}
|
502
735
|
|
503
|
-
|
504
|
-
|
736
|
+
token_free(after);
|
737
|
+
return false;
|
738
|
+
|
739
|
+
} while (true);
|
740
|
+
}
|
741
|
+
|
742
|
+
static void parser_handle_erb_in_open_tag(parser_T* parser, array_T* children) {
|
743
|
+
bool is_output_tag = parser->current_token->value && strlen(parser->current_token->value) >= 3
|
744
|
+
&& strncmp(parser->current_token->value, "<%=", 3) == 0;
|
745
|
+
|
746
|
+
if (!is_output_tag) {
|
747
|
+
array_append(children, parser_parse_erb_tag(parser));
|
748
|
+
|
749
|
+
return;
|
750
|
+
}
|
751
|
+
|
752
|
+
lexer_T lexer_copy = *parser->lexer;
|
753
|
+
|
754
|
+
token_T* erb_start = lexer_next_token(&lexer_copy);
|
755
|
+
token_free(erb_start);
|
756
|
+
parser_skip_erb_content(&lexer_copy);
|
757
|
+
|
758
|
+
bool looks_like_attribute = parser_lookahead_erb_is_attribute(&lexer_copy);
|
759
|
+
|
760
|
+
if (looks_like_attribute) {
|
761
|
+
array_append(children, parser_parse_html_attribute(parser));
|
762
|
+
} else {
|
763
|
+
array_append(children, parser_parse_erb_tag(parser));
|
764
|
+
}
|
765
|
+
}
|
766
|
+
|
767
|
+
static void parser_handle_whitespace_in_open_tag(parser_T* parser, array_T* children) {
|
768
|
+
token_T* whitespace = parser_consume_if_present(parser, TOKEN_WHITESPACE);
|
769
|
+
|
770
|
+
if (whitespace != NULL) {
|
771
|
+
parser_handle_whitespace(parser, whitespace, children);
|
772
|
+
return;
|
773
|
+
}
|
774
|
+
|
775
|
+
token_T* newline = parser_consume_if_present(parser, TOKEN_NEWLINE);
|
776
|
+
|
777
|
+
if (newline != NULL) { parser_handle_whitespace(parser, newline, children); }
|
778
|
+
}
|
779
|
+
|
780
|
+
static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
781
|
+
array_T* errors = array_init(8);
|
782
|
+
array_T* children = array_init(8);
|
783
|
+
|
784
|
+
token_T* tag_start = parser_consume_expected(parser, TOKEN_HTML_TAG_START, errors);
|
785
|
+
token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
|
786
|
+
|
787
|
+
while (token_is_none_of(parser, TOKEN_HTML_TAG_END, TOKEN_HTML_TAG_SELF_CLOSE, TOKEN_EOF)) {
|
788
|
+
if (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
789
|
+
parser_handle_whitespace_in_open_tag(parser, children);
|
505
790
|
continue;
|
506
791
|
}
|
507
792
|
|
@@ -510,6 +795,11 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
510
795
|
continue;
|
511
796
|
}
|
512
797
|
|
798
|
+
if (parser->current_token->type == TOKEN_ERB_START) {
|
799
|
+
parser_handle_erb_in_open_tag(parser, children);
|
800
|
+
continue;
|
801
|
+
}
|
802
|
+
|
513
803
|
if (parser->current_token->type == TOKEN_AT) {
|
514
804
|
array_append(children, parser_parse_html_attribute(parser));
|
515
805
|
continue;
|
@@ -563,14 +853,15 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) {
|
|
563
853
|
|
564
854
|
static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) {
|
565
855
|
array_T* errors = array_init(8);
|
856
|
+
array_T* children = array_init(8);
|
566
857
|
|
567
858
|
token_T* tag_opening = parser_consume_expected(parser, TOKEN_HTML_TAG_START_CLOSE, errors);
|
859
|
+
|
860
|
+
parser_consume_whitespace(parser, children);
|
861
|
+
|
568
862
|
token_T* tag_name = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors);
|
569
863
|
|
570
|
-
|
571
|
-
token_T* whitespace = parser_advance(parser);
|
572
|
-
token_free(whitespace);
|
573
|
-
}
|
864
|
+
parser_consume_whitespace(parser, children);
|
574
865
|
|
575
866
|
token_T* tag_closing = parser_consume_expected(parser, TOKEN_HTML_TAG_END, errors);
|
576
867
|
|
@@ -594,6 +885,7 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser)
|
|
594
885
|
AST_HTML_CLOSE_TAG_NODE_T* close_tag = ast_html_close_tag_node_init(
|
595
886
|
tag_opening,
|
596
887
|
tag_name,
|
888
|
+
children,
|
597
889
|
tag_closing,
|
598
890
|
tag_opening->location->start,
|
599
891
|
tag_closing->location->end,
|
@@ -676,9 +968,7 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_element(parser_T* parser) {
|
|
676
968
|
AST_HTML_OPEN_TAG_NODE_T* open_tag = parser_parse_html_open_tag(parser);
|
677
969
|
|
678
970
|
// <tag />
|
679
|
-
if (open_tag->is_void
|
680
|
-
return parser_parse_html_self_closing_element(parser, open_tag);
|
681
|
-
}
|
971
|
+
if (open_tag->is_void) { return parser_parse_html_self_closing_element(parser, open_tag); }
|
682
972
|
|
683
973
|
// <tag>, in void element list, and not in inside an <svg> element
|
684
974
|
if (!open_tag->is_void && is_void_element(open_tag->tag_name->value) && !parser_in_svg_context(parser)) {
|
@@ -757,15 +1047,7 @@ static void parser_parse_foreign_content(parser_T* parser, array_T* children, ar
|
|
757
1047
|
}
|
758
1048
|
|
759
1049
|
if (token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) {
|
760
|
-
|
761
|
-
size_t saved_line = parser->lexer->current_line;
|
762
|
-
size_t saved_column = parser->lexer->current_column;
|
763
|
-
size_t saved_previous_position = parser->lexer->previous_position;
|
764
|
-
size_t saved_previous_line = parser->lexer->previous_line;
|
765
|
-
size_t saved_previous_column = parser->lexer->previous_column;
|
766
|
-
|
767
|
-
char saved_char = parser->lexer->current_character;
|
768
|
-
lexer_state_T saved_state = parser->lexer->state;
|
1050
|
+
lexer_state_snapshot_T saved_state = lexer_save_state(parser->lexer);
|
769
1051
|
|
770
1052
|
token_T* next_token = lexer_next_token(parser->lexer);
|
771
1053
|
bool is_potential_match = false;
|
@@ -774,14 +1056,7 @@ static void parser_parse_foreign_content(parser_T* parser, array_T* children, ar
|
|
774
1056
|
is_potential_match = parser_is_expected_closing_tag_name(next_token->value, parser->foreign_content_type);
|
775
1057
|
}
|
776
1058
|
|
777
|
-
parser->lexer
|
778
|
-
parser->lexer->current_line = saved_line;
|
779
|
-
parser->lexer->current_column = saved_column;
|
780
|
-
parser->lexer->previous_position = saved_previous_position;
|
781
|
-
parser->lexer->previous_line = saved_previous_line;
|
782
|
-
parser->lexer->previous_column = saved_previous_column;
|
783
|
-
parser->lexer->current_character = saved_char;
|
784
|
-
parser->lexer->state = saved_state;
|
1059
|
+
lexer_restore_state(parser->lexer, saved_state);
|
785
1060
|
|
786
1061
|
if (next_token) { token_free(next_token); }
|
787
1062
|
|
@@ -819,6 +1094,16 @@ static void parser_parse_in_data_state(parser_T* parser, array_T* children, arra
|
|
819
1094
|
continue;
|
820
1095
|
}
|
821
1096
|
|
1097
|
+
if (token_is(parser, TOKEN_XML_DECLARATION)) {
|
1098
|
+
array_append(children, parser_parse_xml_declaration(parser));
|
1099
|
+
continue;
|
1100
|
+
}
|
1101
|
+
|
1102
|
+
if (token_is(parser, TOKEN_CDATA_START)) {
|
1103
|
+
array_append(children, parser_parse_cdata(parser));
|
1104
|
+
continue;
|
1105
|
+
}
|
1106
|
+
|
822
1107
|
if (token_is(parser, TOKEN_HTML_COMMENT_START)) {
|
823
1108
|
array_append(children, parser_parse_html_comment(parser));
|
824
1109
|
continue;
|
@@ -928,12 +1213,40 @@ AST_DOCUMENT_NODE_T* parser_parse(parser_T* parser) {
|
|
928
1213
|
return parser_parse_document(parser);
|
929
1214
|
}
|
930
1215
|
|
1216
|
+
static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token, array_T* children) {
|
1217
|
+
if (parser->options && parser->options->track_whitespace) {
|
1218
|
+
array_T* errors = array_init(8);
|
1219
|
+
AST_WHITESPACE_NODE_T* whitespace_node = ast_whitespace_node_init(
|
1220
|
+
whitespace_token,
|
1221
|
+
whitespace_token->location->start,
|
1222
|
+
whitespace_token->location->end,
|
1223
|
+
errors
|
1224
|
+
);
|
1225
|
+
array_append(children, whitespace_node);
|
1226
|
+
}
|
1227
|
+
|
1228
|
+
token_free(whitespace_token);
|
1229
|
+
}
|
1230
|
+
|
1231
|
+
static void parser_consume_whitespace(parser_T* parser, array_T* children) {
|
1232
|
+
while (token_is_any_of(parser, TOKEN_WHITESPACE, TOKEN_NEWLINE)) {
|
1233
|
+
token_T* whitespace = parser_advance(parser);
|
1234
|
+
|
1235
|
+
if (parser->options && parser->options->track_whitespace && children != NULL) {
|
1236
|
+
parser_handle_whitespace(parser, whitespace, children);
|
1237
|
+
} else {
|
1238
|
+
token_free(whitespace);
|
1239
|
+
}
|
1240
|
+
}
|
1241
|
+
}
|
1242
|
+
|
931
1243
|
void parser_free(parser_T* parser) {
|
932
1244
|
if (parser == NULL) { return; }
|
933
1245
|
|
934
1246
|
if (parser->lexer != NULL) { lexer_free(parser->lexer); }
|
935
1247
|
if (parser->current_token != NULL) { token_free(parser->current_token); }
|
936
1248
|
if (parser->open_tags_stack != NULL) { array_free(&parser->open_tags_stack); }
|
1249
|
+
if (parser->options != NULL) { free(parser->options); }
|
937
1250
|
|
938
1251
|
free(parser);
|
939
1252
|
}
|