@reteps/tree-sitter-htmlmustache 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +105 -0
- package/binding.gyp +30 -0
- package/bindings/node/binding.cc +19 -0
- package/bindings/node/binding_test.js +9 -0
- package/bindings/node/index.d.ts +27 -0
- package/bindings/node/index.js +13 -0
- package/cli/out/check.js +210 -0
- package/grammar.js +426 -0
- package/package.json +82 -0
- package/queries/highlights.scm +30 -0
- package/queries/injections.scm +7 -0
- package/src/custom_raw_tags.h +3 -0
- package/src/grammar.json +1417 -0
- package/src/mustache_tag.h +29 -0
- package/src/node-types.json +972 -0
- package/src/parser.c +10469 -0
- package/src/parser.o +0 -0
- package/src/scanner.c +640 -0
- package/src/scanner.o +0 -0
- package/src/tag.h +390 -0
- package/src/tree_sitter/alloc.h +54 -0
- package/src/tree_sitter/array.h +291 -0
- package/src/tree_sitter/parser.h +286 -0
- package/tree-sitter-htmlmustache.wasm +0 -0
- package/tree-sitter.json +36 -0
package/src/parser.o
ADDED
|
Binary file
|
package/src/scanner.c
ADDED
|
@@ -0,0 +1,640 @@
|
|
|
1
|
+
#include "tag.h"
|
|
2
|
+
#include "mustache_tag.h"
|
|
3
|
+
#include "custom_raw_tags.h"
|
|
4
|
+
#include "tree_sitter/parser.h"
|
|
5
|
+
|
|
6
|
+
#include <wctype.h>
|
|
7
|
+
|
|
8
|
+
#ifdef CUSTOM_RAW_TAGS
|
|
9
|
+
static const char *CUSTOM_RAW_TAG_NAMES[] = { CUSTOM_RAW_TAGS };
|
|
10
|
+
#define NUM_CUSTOM_RAW_TAGS (sizeof(CUSTOM_RAW_TAG_NAMES) / sizeof(CUSTOM_RAW_TAG_NAMES[0]))
|
|
11
|
+
|
|
12
|
+
static bool is_custom_raw_tag(const String *tag_name) {
|
|
13
|
+
for (unsigned i = 0; i < NUM_CUSTOM_RAW_TAGS; i++) {
|
|
14
|
+
const char *raw_name = CUSTOM_RAW_TAG_NAMES[i];
|
|
15
|
+
if (strlen(raw_name) == tag_name->size &&
|
|
16
|
+
memcmp(raw_name, tag_name->contents, tag_name->size) == 0) {
|
|
17
|
+
return true;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
return false;
|
|
21
|
+
}
|
|
22
|
+
#else
|
|
23
|
+
#define NUM_CUSTOM_RAW_TAGS 0
|
|
24
|
+
#endif
|
|
25
|
+
|
|
26
|
+
enum TokenType {
|
|
27
|
+
HTML_START_TAG_NAME,
|
|
28
|
+
HTML_SCRIPT_START_TAG_NAME,
|
|
29
|
+
HTML_STYLE_START_TAG_NAME,
|
|
30
|
+
HTML_RAW_START_TAG_NAME,
|
|
31
|
+
HTML_END_TAG_NAME,
|
|
32
|
+
HTML_ERRONEOUS_END_TAG_NAME,
|
|
33
|
+
HTML_SELF_CLOSING_TAG_DELIMITER,
|
|
34
|
+
HTML_IMPLICIT_END_TAG,
|
|
35
|
+
HTML_RAW_TEXT,
|
|
36
|
+
HTML_COMMENT,
|
|
37
|
+
// Mustache tokens
|
|
38
|
+
MUSTACHE_START_TAG_NAME,
|
|
39
|
+
MUSTACHE_END_TAG_NAME,
|
|
40
|
+
MUSTACHE_ERRONEOUS_END_TAG_NAME,
|
|
41
|
+
MUSTACHE_END_TAG_HTML_IMPLICIT_END_TAG,
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
typedef struct {
|
|
45
|
+
Array(Tag) tags;
|
|
46
|
+
Array(MustacheTag) mustache_tags;
|
|
47
|
+
} Scanner;
|
|
48
|
+
|
|
49
|
+
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
|
50
|
+
|
|
51
|
+
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
|
|
52
|
+
|
|
53
|
+
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
|
|
54
|
+
|
|
55
|
+
static unsigned serialize(Scanner *scanner, char *buffer) {
|
|
56
|
+
uint16_t tag_count = scanner->tags.size > UINT16_MAX ? UINT16_MAX : scanner->tags.size;
|
|
57
|
+
uint16_t serialized_tag_count = 0;
|
|
58
|
+
|
|
59
|
+
unsigned size = sizeof(tag_count);
|
|
60
|
+
memcpy(&buffer[size], &tag_count, sizeof(tag_count));
|
|
61
|
+
size += sizeof(tag_count);
|
|
62
|
+
|
|
63
|
+
for (; serialized_tag_count < tag_count; serialized_tag_count++) {
|
|
64
|
+
Tag tag = scanner->tags.contents[serialized_tag_count];
|
|
65
|
+
if (tag.type == CUSTOM) {
|
|
66
|
+
unsigned name_length = tag.custom_tag_name.size;
|
|
67
|
+
if (name_length > UINT8_MAX) {
|
|
68
|
+
name_length = UINT8_MAX;
|
|
69
|
+
}
|
|
70
|
+
if (size + 1 + name_length >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
|
|
71
|
+
break;
|
|
72
|
+
}
|
|
73
|
+
buffer[size++] = (char)tag.type;
|
|
74
|
+
buffer[size++] = (char)name_length;
|
|
75
|
+
strncpy(&buffer[size], tag.custom_tag_name.contents, name_length);
|
|
76
|
+
size += name_length;
|
|
77
|
+
} else {
|
|
78
|
+
if (size + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
|
|
79
|
+
break;
|
|
80
|
+
}
|
|
81
|
+
buffer[size++] = (char)tag.type;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
memcpy(&buffer[0], &serialized_tag_count, sizeof(serialized_tag_count));
|
|
86
|
+
// printf("[S] serialized_tag_count: %d\n", serialized_tag_count);
|
|
87
|
+
// Mustache tags
|
|
88
|
+
uint16_t m_tag_count =
|
|
89
|
+
scanner->mustache_tags.size > UINT16_MAX ? UINT16_MAX : scanner->mustache_tags.size;
|
|
90
|
+
uint16_t m_serialized_tag_count = 0;
|
|
91
|
+
|
|
92
|
+
unsigned mustache_start_offset = size;
|
|
93
|
+
size += sizeof(m_serialized_tag_count);
|
|
94
|
+
memcpy(&buffer[size], &m_tag_count, sizeof(m_tag_count));
|
|
95
|
+
size += sizeof(m_tag_count);
|
|
96
|
+
// printf("[S] m_tag_count: %d\n", m_tag_count);
|
|
97
|
+
|
|
98
|
+
for (; m_serialized_tag_count < m_tag_count; m_serialized_tag_count++) {
|
|
99
|
+
MustacheTag tag = scanner->mustache_tags.contents[m_serialized_tag_count];
|
|
100
|
+
unsigned name_length = tag.tag_name.size;
|
|
101
|
+
if (name_length > UINT8_MAX) {
|
|
102
|
+
name_length = UINT8_MAX;
|
|
103
|
+
}
|
|
104
|
+
// position + size(uint8_t) + size(name) + size(unsigned)
|
|
105
|
+
if (size + 1 + name_length + sizeof(unsigned) >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
|
|
106
|
+
break;
|
|
107
|
+
}
|
|
108
|
+
buffer[size++] = (char)name_length;
|
|
109
|
+
strncpy(&buffer[size], tag.tag_name.contents, name_length);
|
|
110
|
+
size += name_length;
|
|
111
|
+
memcpy(&buffer[size], &tag.html_tag_stack_size, sizeof(unsigned));
|
|
112
|
+
size += sizeof(unsigned);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
memcpy(&buffer[mustache_start_offset], &m_serialized_tag_count, sizeof(m_serialized_tag_count));
|
|
116
|
+
// printf("[S] m_serialized_tag_count: %d\n", m_serialized_tag_count);
|
|
117
|
+
return size;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
|
|
121
|
+
// printf("deserialize\n");
|
|
122
|
+
for (unsigned i = 0; i < scanner->tags.size; i++) {
|
|
123
|
+
tag_free(&scanner->tags.contents[i]);
|
|
124
|
+
}
|
|
125
|
+
for (unsigned i = 0; i < scanner->mustache_tags.size; i++) {
|
|
126
|
+
mustache_tag_free(&scanner->mustache_tags.contents[i]);
|
|
127
|
+
}
|
|
128
|
+
array_clear(&scanner->tags);
|
|
129
|
+
array_clear(&scanner->mustache_tags);
|
|
130
|
+
|
|
131
|
+
if (length > 0) {
|
|
132
|
+
unsigned size = 0;
|
|
133
|
+
uint16_t tag_count = 0;
|
|
134
|
+
uint16_t serialized_tag_count = 0;
|
|
135
|
+
|
|
136
|
+
// copy serialized_tag_count and tag_count to from start of buffer
|
|
137
|
+
memcpy(&serialized_tag_count, &buffer[size], sizeof(serialized_tag_count));
|
|
138
|
+
size += sizeof(serialized_tag_count);
|
|
139
|
+
|
|
140
|
+
memcpy(&tag_count, &buffer[size], sizeof(tag_count));
|
|
141
|
+
size += sizeof(tag_count);
|
|
142
|
+
|
|
143
|
+
array_reserve(&scanner->tags, tag_count);
|
|
144
|
+
if (tag_count > 0) {
|
|
145
|
+
unsigned iter = 0;
|
|
146
|
+
for (iter = 0; iter < serialized_tag_count; iter++) {
|
|
147
|
+
Tag tag = tag_new();
|
|
148
|
+
tag.type = (TagType)buffer[size++];
|
|
149
|
+
if (tag.type == CUSTOM) {
|
|
150
|
+
uint16_t name_length = (uint8_t)buffer[size++];
|
|
151
|
+
array_reserve(&tag.custom_tag_name, name_length);
|
|
152
|
+
tag.custom_tag_name.size = name_length;
|
|
153
|
+
memcpy(tag.custom_tag_name.contents, &buffer[size], name_length);
|
|
154
|
+
size += name_length;
|
|
155
|
+
}
|
|
156
|
+
array_push(&scanner->tags, tag);
|
|
157
|
+
}
|
|
158
|
+
// add zero tags if we didn't read enough, this is because the
|
|
159
|
+
// buffer had no more room but we held more tags.
|
|
160
|
+
for (; iter < tag_count; iter++) {
|
|
161
|
+
array_push(&scanner->tags, tag_new());
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Mustache tags
|
|
166
|
+
uint16_t m_tag_count = 0;
|
|
167
|
+
uint16_t m_serialized_tag_count = 0;
|
|
168
|
+
|
|
169
|
+
memcpy(&m_serialized_tag_count, &buffer[size], sizeof(m_serialized_tag_count));
|
|
170
|
+
size += sizeof(m_serialized_tag_count);
|
|
171
|
+
|
|
172
|
+
memcpy(&m_tag_count, &buffer[size], sizeof(m_tag_count));
|
|
173
|
+
size += sizeof(m_tag_count);
|
|
174
|
+
|
|
175
|
+
array_reserve(&scanner->mustache_tags, m_tag_count);
|
|
176
|
+
if (m_tag_count > 0) {
|
|
177
|
+
unsigned iter = 0;
|
|
178
|
+
for (iter = 0; iter < m_serialized_tag_count; iter++) {
|
|
179
|
+
MustacheTag tag = mustache_tag_new();
|
|
180
|
+
uint16_t name_length = (uint8_t)buffer[size++];
|
|
181
|
+
array_reserve(&tag.tag_name, name_length);
|
|
182
|
+
tag.tag_name.size = name_length;
|
|
183
|
+
memcpy(tag.tag_name.contents, &buffer[size], name_length);
|
|
184
|
+
size += name_length;
|
|
185
|
+
memcpy(&tag.html_tag_stack_size, &buffer[size], sizeof(unsigned));
|
|
186
|
+
size += sizeof(unsigned);
|
|
187
|
+
array_push(&scanner->mustache_tags, tag);
|
|
188
|
+
}
|
|
189
|
+
// add zero tags if we didn't read enough, this is because the
|
|
190
|
+
// buffer had no more room but we held more tags.
|
|
191
|
+
for (; iter < m_tag_count; iter++) {
|
|
192
|
+
array_push(&scanner->mustache_tags, mustache_tag_new());
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
static void print_tag_name(String *tag_name) {
|
|
199
|
+
// printf("tag_size: %d\n", tag->tag_name.size);
|
|
200
|
+
for (uint32_t i = 0; i < tag_name->size; i++) {
|
|
201
|
+
printf("%c", tag_name->contents[i]);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
static String scan_html_tag_name(TSLexer *lexer) {
|
|
206
|
+
String tag_name = array_new();
|
|
207
|
+
while (iswalnum(lexer->lookahead) || lexer->lookahead == '-' || lexer->lookahead == ':') {
|
|
208
|
+
array_push(&tag_name, towupper(lexer->lookahead));
|
|
209
|
+
advance(lexer);
|
|
210
|
+
}
|
|
211
|
+
// printf("tag_name: ");
|
|
212
|
+
// print_tag_name(&tag_name);
|
|
213
|
+
// printf("\n");
|
|
214
|
+
return tag_name;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
static bool scan_html_comment(TSLexer *lexer) {
|
|
218
|
+
if (lexer->lookahead != '-') {
|
|
219
|
+
return false;
|
|
220
|
+
}
|
|
221
|
+
advance(lexer);
|
|
222
|
+
if (lexer->lookahead != '-') {
|
|
223
|
+
return false;
|
|
224
|
+
}
|
|
225
|
+
advance(lexer);
|
|
226
|
+
|
|
227
|
+
unsigned dashes = 0;
|
|
228
|
+
while (lexer->lookahead) {
|
|
229
|
+
switch (lexer->lookahead) {
|
|
230
|
+
case '-':
|
|
231
|
+
++dashes;
|
|
232
|
+
break;
|
|
233
|
+
case '>':
|
|
234
|
+
if (dashes >= 2) {
|
|
235
|
+
lexer->result_symbol = HTML_COMMENT;
|
|
236
|
+
advance(lexer);
|
|
237
|
+
lexer->mark_end(lexer);
|
|
238
|
+
return true;
|
|
239
|
+
}
|
|
240
|
+
default:
|
|
241
|
+
dashes = 0;
|
|
242
|
+
}
|
|
243
|
+
advance(lexer);
|
|
244
|
+
}
|
|
245
|
+
return false;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
static bool scan_raw_text(Scanner *scanner, TSLexer *lexer) {
|
|
249
|
+
if (scanner->tags.size == 0) {
|
|
250
|
+
return false;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
lexer->mark_end(lexer);
|
|
254
|
+
|
|
255
|
+
Tag *tag = array_back(&scanner->tags);
|
|
256
|
+
|
|
257
|
+
#ifdef CUSTOM_RAW_TAGS
|
|
258
|
+
char end_delimiter_buf[256];
|
|
259
|
+
const char *end_delimiter;
|
|
260
|
+
if (tag->type == SCRIPT) {
|
|
261
|
+
end_delimiter = "</SCRIPT";
|
|
262
|
+
} else if (tag->type == STYLE) {
|
|
263
|
+
end_delimiter = "</STYLE";
|
|
264
|
+
} else if (tag->type == CUSTOM && is_custom_raw_tag(&tag->custom_tag_name)) {
|
|
265
|
+
end_delimiter_buf[0] = '<';
|
|
266
|
+
end_delimiter_buf[1] = '/';
|
|
267
|
+
unsigned len = tag->custom_tag_name.size;
|
|
268
|
+
if (len > sizeof(end_delimiter_buf) - 3) {
|
|
269
|
+
len = sizeof(end_delimiter_buf) - 3;
|
|
270
|
+
}
|
|
271
|
+
memcpy(end_delimiter_buf + 2, tag->custom_tag_name.contents, len);
|
|
272
|
+
end_delimiter_buf[2 + len] = '\0';
|
|
273
|
+
end_delimiter = end_delimiter_buf;
|
|
274
|
+
} else {
|
|
275
|
+
return false;
|
|
276
|
+
}
|
|
277
|
+
#else
|
|
278
|
+
const char *end_delimiter = tag->type == SCRIPT ? "</SCRIPT" : "</STYLE";
|
|
279
|
+
#endif
|
|
280
|
+
|
|
281
|
+
unsigned delimiter_index = 0;
|
|
282
|
+
while (lexer->lookahead) {
|
|
283
|
+
if (towupper(lexer->lookahead) == end_delimiter[delimiter_index]) {
|
|
284
|
+
delimiter_index++;
|
|
285
|
+
if (delimiter_index == strlen(end_delimiter)) {
|
|
286
|
+
break;
|
|
287
|
+
}
|
|
288
|
+
advance(lexer);
|
|
289
|
+
} else {
|
|
290
|
+
delimiter_index = 0;
|
|
291
|
+
advance(lexer);
|
|
292
|
+
lexer->mark_end(lexer);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
lexer->result_symbol = HTML_RAW_TEXT;
|
|
297
|
+
return true;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
static void pop_html_tag(Scanner *scanner) {
|
|
301
|
+
Tag popped_tag = array_pop(&scanner->tags);
|
|
302
|
+
tag_free(&popped_tag);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
static bool scan_implicit_end_tag(Scanner *scanner, TSLexer *lexer) {
|
|
306
|
+
Tag *parent = scanner->tags.size == 0 ? NULL : array_back(&scanner->tags);
|
|
307
|
+
|
|
308
|
+
bool is_closing_tag = false;
|
|
309
|
+
if (lexer->lookahead == '/') {
|
|
310
|
+
is_closing_tag = true;
|
|
311
|
+
advance(lexer);
|
|
312
|
+
} else {
|
|
313
|
+
if (parent && tag_is_void(parent)) {
|
|
314
|
+
pop_html_tag(scanner);
|
|
315
|
+
lexer->result_symbol = HTML_IMPLICIT_END_TAG;
|
|
316
|
+
return true;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
String tag_name = scan_html_tag_name(lexer);
|
|
321
|
+
if (tag_name.size == 0 && !lexer->eof(lexer)) {
|
|
322
|
+
array_delete(&tag_name);
|
|
323
|
+
return false;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
Tag next_tag = tag_for_name(tag_name);
|
|
327
|
+
|
|
328
|
+
if (is_closing_tag) {
|
|
329
|
+
// The tag correctly closes the topmost element on the stack
|
|
330
|
+
if (scanner->tags.size > 0 && tag_eq(array_back(&scanner->tags), &next_tag)) {
|
|
331
|
+
tag_free(&next_tag);
|
|
332
|
+
return false;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Otherwise, dig deeper and queue implicit end tags (to be nice in
|
|
336
|
+
// the case of malformed HTML)
|
|
337
|
+
for (unsigned i = scanner->tags.size; i > 0; i--) {
|
|
338
|
+
if (scanner->tags.contents[i - 1].type == next_tag.type) {
|
|
339
|
+
pop_html_tag(scanner);
|
|
340
|
+
lexer->result_symbol = HTML_IMPLICIT_END_TAG;
|
|
341
|
+
tag_free(&next_tag);
|
|
342
|
+
return true;
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
} else if (
|
|
346
|
+
parent &&
|
|
347
|
+
(
|
|
348
|
+
!tag_can_contain(parent, &next_tag) ||
|
|
349
|
+
((parent->type == HTML || parent->type == HEAD || parent->type == BODY) && lexer->eof(lexer))
|
|
350
|
+
)
|
|
351
|
+
) {
|
|
352
|
+
pop_html_tag(scanner);
|
|
353
|
+
lexer->result_symbol = HTML_IMPLICIT_END_TAG;
|
|
354
|
+
tag_free(&next_tag);
|
|
355
|
+
return true;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
tag_free(&next_tag);
|
|
359
|
+
return false;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
static bool scan_start_tag_name(Scanner *scanner, TSLexer *lexer) {
|
|
363
|
+
String tag_name = scan_html_tag_name(lexer);
|
|
364
|
+
if (tag_name.size == 0) {
|
|
365
|
+
array_delete(&tag_name);
|
|
366
|
+
return false;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
Tag tag = tag_for_name(tag_name);
|
|
370
|
+
array_push(&scanner->tags, tag);
|
|
371
|
+
switch (tag.type) {
|
|
372
|
+
case SCRIPT:
|
|
373
|
+
lexer->result_symbol = HTML_SCRIPT_START_TAG_NAME;
|
|
374
|
+
break;
|
|
375
|
+
case STYLE:
|
|
376
|
+
lexer->result_symbol = HTML_STYLE_START_TAG_NAME;
|
|
377
|
+
break;
|
|
378
|
+
default:
|
|
379
|
+
#ifdef CUSTOM_RAW_TAGS
|
|
380
|
+
if (tag.type == CUSTOM && is_custom_raw_tag(&tag.custom_tag_name)) {
|
|
381
|
+
lexer->result_symbol = HTML_RAW_START_TAG_NAME;
|
|
382
|
+
break;
|
|
383
|
+
}
|
|
384
|
+
#endif
|
|
385
|
+
lexer->result_symbol = HTML_START_TAG_NAME;
|
|
386
|
+
break;
|
|
387
|
+
}
|
|
388
|
+
return true;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
static bool scan_end_tag_name(Scanner *scanner, TSLexer *lexer) {
|
|
392
|
+
String tag_name = scan_html_tag_name(lexer);
|
|
393
|
+
|
|
394
|
+
if (tag_name.size == 0) {
|
|
395
|
+
array_delete(&tag_name);
|
|
396
|
+
return false;
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
Tag tag = tag_for_name(tag_name);
|
|
401
|
+
if (scanner->tags.size > 0 && tag_eq(array_back(&scanner->tags), &tag)) {
|
|
402
|
+
pop_html_tag(scanner);
|
|
403
|
+
lexer->result_symbol = HTML_END_TAG_NAME;
|
|
404
|
+
} else {
|
|
405
|
+
lexer->result_symbol = HTML_ERRONEOUS_END_TAG_NAME;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
tag_free(&tag);
|
|
409
|
+
return true;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
static bool scan_self_closing_tag_delimiter(Scanner *scanner, TSLexer *lexer) {
|
|
413
|
+
advance(lexer);
|
|
414
|
+
if (lexer->lookahead == '>') {
|
|
415
|
+
advance(lexer);
|
|
416
|
+
if (scanner->tags.size > 0) {
|
|
417
|
+
pop_html_tag(scanner);
|
|
418
|
+
lexer->result_symbol = HTML_SELF_CLOSING_TAG_DELIMITER;
|
|
419
|
+
}
|
|
420
|
+
return true;
|
|
421
|
+
}
|
|
422
|
+
return false;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
static String scan_mustache_tag_name(Scanner *scanner, TSLexer *lexer) {
|
|
426
|
+
String tag_name = array_new();
|
|
427
|
+
while (lexer->lookahead != '}' && !lexer->eof(lexer)) {
|
|
428
|
+
if (iswspace(lexer->lookahead))
|
|
429
|
+
break;
|
|
430
|
+
|
|
431
|
+
array_push(&tag_name, lexer->lookahead);
|
|
432
|
+
advance(lexer);
|
|
433
|
+
}
|
|
434
|
+
// print_tag_name(&tag_name);
|
|
435
|
+
// printf("\n");
|
|
436
|
+
return tag_name;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
static bool scan_mustache_start_tag_name(Scanner *scanner, TSLexer *lexer) {
|
|
440
|
+
String tag_name = scan_mustache_tag_name(scanner, lexer);
|
|
441
|
+
if (tag_name.size == 0) {
|
|
442
|
+
array_delete(&tag_name);
|
|
443
|
+
return false;
|
|
444
|
+
}
|
|
445
|
+
MustacheTag tag = mustache_tag_new();
|
|
446
|
+
tag.tag_name = tag_name;
|
|
447
|
+
tag.html_tag_stack_size = scanner->tags.size;
|
|
448
|
+
// printf("pushing tag: ");
|
|
449
|
+
// print_tag_name(&tag.tag_name);
|
|
450
|
+
// printf("\n");
|
|
451
|
+
array_push(&scanner->mustache_tags, tag);
|
|
452
|
+
// printf("--------------------------------\n");
|
|
453
|
+
// for (unsigned i = 0; i < scanner->mustache_tags.size; i++) {
|
|
454
|
+
// printf("\tSTACK (START), tag_name: ");
|
|
455
|
+
// print_tag_name(&scanner->mustache_tags.contents[i]);
|
|
456
|
+
// printf("\n");
|
|
457
|
+
// }
|
|
458
|
+
// printf("--------------------------------\n");
|
|
459
|
+
lexer->result_symbol = MUSTACHE_START_TAG_NAME;
|
|
460
|
+
return true;
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
static bool scan_mustache_end_tag_name(Scanner *scanner, TSLexer *lexer) {
|
|
464
|
+
String tag_name = scan_mustache_tag_name(scanner, lexer);
|
|
465
|
+
|
|
466
|
+
if (tag_name.size == 0) {
|
|
467
|
+
array_delete(&tag_name);
|
|
468
|
+
return false;
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
// Print whole stack
|
|
473
|
+
// printf("--------------------------------\n");
|
|
474
|
+
// printf("num tags: %d\n", scanner->mustache_tags.size);
|
|
475
|
+
// for (unsigned i = 0; i < scanner->mustache_tags.size; i++) {
|
|
476
|
+
// printf("\tSTACK (END), tag_name: ");
|
|
477
|
+
// print_tag_name(&scanner->mustache_tags.contents[i].tag_name);
|
|
478
|
+
// printf("\n");
|
|
479
|
+
// }
|
|
480
|
+
// printf("--------------------------------\n");
|
|
481
|
+
MustacheTag tag = mustache_tag_new();
|
|
482
|
+
tag.tag_name = tag_name;
|
|
483
|
+
if (scanner->mustache_tags.size > 0 && mustache_tag_eq(array_back(&scanner->mustache_tags), &tag)) {
|
|
484
|
+
MustacheTag popped_tag = array_pop(&scanner->mustache_tags);
|
|
485
|
+
mustache_tag_free(&popped_tag);
|
|
486
|
+
// printf("popped tag (correct): ");
|
|
487
|
+
// print_tag_name(&popped_tag.tag_name);
|
|
488
|
+
// printf("\n");
|
|
489
|
+
lexer->result_symbol = MUSTACHE_END_TAG_NAME;
|
|
490
|
+
} else {
|
|
491
|
+
if (scanner->mustache_tags.size > 0) {
|
|
492
|
+
MustacheTag popped_tag = array_pop(&scanner->mustache_tags);
|
|
493
|
+
mustache_tag_free(&popped_tag);
|
|
494
|
+
}
|
|
495
|
+
lexer->result_symbol = MUSTACHE_ERRONEOUS_END_TAG_NAME;
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
mustache_tag_free(&tag);
|
|
499
|
+
return true;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
static bool scan_mustache_end_tag_html_implicit_end_tag(Scanner *scanner, TSLexer *lexer) {
|
|
503
|
+
lexer->mark_end(lexer);
|
|
504
|
+
// printf("next char: %c\n", lexer->lookahead);
|
|
505
|
+
if (lexer->lookahead != '{') {
|
|
506
|
+
return false;
|
|
507
|
+
}
|
|
508
|
+
advance(lexer);
|
|
509
|
+
if (lexer->lookahead != '{') {
|
|
510
|
+
return false;
|
|
511
|
+
}
|
|
512
|
+
advance(lexer);
|
|
513
|
+
if (lexer->lookahead != '/') {
|
|
514
|
+
return false;
|
|
515
|
+
}
|
|
516
|
+
if (scanner->mustache_tags.size > 0) {
|
|
517
|
+
MustacheTag *current_mustache_tag = array_back(&scanner->mustache_tags);
|
|
518
|
+
if (scanner->tags.size > current_mustache_tag->html_tag_stack_size) {
|
|
519
|
+
pop_html_tag(scanner);
|
|
520
|
+
lexer->result_symbol = MUSTACHE_END_TAG_HTML_IMPLICIT_END_TAG;
|
|
521
|
+
return true;
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
return false;
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
|
|
528
|
+
// During error recovery, tree-sitter sets all valid_symbols to true.
|
|
529
|
+
// Bail out to avoid corrupting the tag stacks with garbage state.
|
|
530
|
+
if (valid_symbols[HTML_START_TAG_NAME] && valid_symbols[HTML_END_TAG_NAME]) {
|
|
531
|
+
return false;
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
if (valid_symbols[HTML_RAW_TEXT] && !valid_symbols[HTML_START_TAG_NAME] && !valid_symbols[HTML_END_TAG_NAME]) {
|
|
535
|
+
return scan_raw_text(scanner, lexer);
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
while (iswspace(lexer->lookahead)) {
|
|
539
|
+
skip(lexer);
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
if (valid_symbols[MUSTACHE_START_TAG_NAME]) {
|
|
544
|
+
// printf("MUSTACHE_START_TAG_NAME\n");
|
|
545
|
+
return scan_mustache_start_tag_name(scanner, lexer);
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
if (valid_symbols[MUSTACHE_END_TAG_NAME] || valid_symbols[MUSTACHE_ERRONEOUS_END_TAG_NAME]) {
|
|
549
|
+
return scan_mustache_end_tag_name(scanner, lexer);
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
if (valid_symbols[MUSTACHE_END_TAG_HTML_IMPLICIT_END_TAG]) {
|
|
553
|
+
if (scan_mustache_end_tag_html_implicit_end_tag(scanner, lexer)) {
|
|
554
|
+
return true;
|
|
555
|
+
}
|
|
556
|
+
// Don't return false - continue to other checks
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
// Check for void element implicit end tag before other processing
|
|
560
|
+
// This handles cases where a void element is followed by EOF or content other than '<'
|
|
561
|
+
if (valid_symbols[HTML_IMPLICIT_END_TAG]) {
|
|
562
|
+
Tag *parent = scanner->tags.size == 0 ? NULL : array_back(&scanner->tags);
|
|
563
|
+
if (parent && tag_is_void(parent)) {
|
|
564
|
+
pop_html_tag(scanner);
|
|
565
|
+
lexer->result_symbol = HTML_IMPLICIT_END_TAG;
|
|
566
|
+
return true;
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
switch (lexer->lookahead) {
|
|
571
|
+
case '<':
|
|
572
|
+
lexer->mark_end(lexer);
|
|
573
|
+
advance(lexer);
|
|
574
|
+
|
|
575
|
+
if (lexer->lookahead == '!') {
|
|
576
|
+
advance(lexer);
|
|
577
|
+
return scan_html_comment(lexer);
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
if (valid_symbols[HTML_IMPLICIT_END_TAG]) {
|
|
581
|
+
return scan_implicit_end_tag(scanner, lexer);
|
|
582
|
+
}
|
|
583
|
+
break;
|
|
584
|
+
|
|
585
|
+
case '\0':
|
|
586
|
+
if (valid_symbols[HTML_IMPLICIT_END_TAG]) {
|
|
587
|
+
return scan_implicit_end_tag(scanner, lexer);
|
|
588
|
+
}
|
|
589
|
+
break;
|
|
590
|
+
|
|
591
|
+
case '/':
|
|
592
|
+
if (valid_symbols[HTML_SELF_CLOSING_TAG_DELIMITER]) {
|
|
593
|
+
return scan_self_closing_tag_delimiter(scanner, lexer);
|
|
594
|
+
}
|
|
595
|
+
break;
|
|
596
|
+
|
|
597
|
+
default:
|
|
598
|
+
if ((valid_symbols[HTML_START_TAG_NAME] || valid_symbols[HTML_END_TAG_NAME]) && !valid_symbols[HTML_RAW_TEXT]) {
|
|
599
|
+
return valid_symbols[HTML_START_TAG_NAME] ? scan_start_tag_name(scanner, lexer)
|
|
600
|
+
: scan_end_tag_name(scanner, lexer);
|
|
601
|
+
} else if (valid_symbols[HTML_ERRONEOUS_END_TAG_NAME]) {
|
|
602
|
+
return scan_end_tag_name(scanner, lexer);
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
return false;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
void *tree_sitter_htmlmustache_external_scanner_create() {
|
|
610
|
+
Scanner *scanner = (Scanner *)ts_calloc(1, sizeof(Scanner));
|
|
611
|
+
return scanner;
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
bool tree_sitter_htmlmustache_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
|
|
615
|
+
Scanner *scanner = (Scanner *)payload;
|
|
616
|
+
return scan(scanner, lexer, valid_symbols);
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
unsigned tree_sitter_htmlmustache_external_scanner_serialize(void *payload, char *buffer) {
|
|
620
|
+
Scanner *scanner = (Scanner *)payload;
|
|
621
|
+
return serialize(scanner, buffer);
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
void tree_sitter_htmlmustache_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
|
|
625
|
+
Scanner *scanner = (Scanner *)payload;
|
|
626
|
+
deserialize(scanner, buffer, length);
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
void tree_sitter_htmlmustache_external_scanner_destroy(void *payload) {
|
|
630
|
+
Scanner *scanner = (Scanner *)payload;
|
|
631
|
+
for (unsigned i = 0; i < scanner->tags.size; i++) {
|
|
632
|
+
tag_free(&scanner->tags.contents[i]);
|
|
633
|
+
}
|
|
634
|
+
array_delete(&scanner->tags);
|
|
635
|
+
for (unsigned i = 0; i < scanner->mustache_tags.size; i++) {
|
|
636
|
+
mustache_tag_free(&scanner->mustache_tags.contents[i]);
|
|
637
|
+
}
|
|
638
|
+
array_delete(&scanner->mustache_tags);
|
|
639
|
+
ts_free(scanner);
|
|
640
|
+
}
|
package/src/scanner.o
ADDED
|
Binary file
|