@ast-grep/lang-yaml 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/scanner.c ADDED
@@ -0,0 +1,1404 @@
1
+ #include "tree_sitter/array.h"
2
+ #include "tree_sitter/parser.h"
3
+
4
+ #define _str(x) #x
5
+ #define _file(x) _str(schema.x.c)
6
+
7
+ #ifndef YAML_SCHEMA
8
+ #define YAML_SCHEMA core
9
+ #endif
10
+
11
+ #include _file(YAML_SCHEMA)
12
+
13
+ // clang-format off
14
+
15
+ typedef enum {
16
+ END_OF_FILE,
17
+
18
+ S_DIR_YML_BGN, R_DIR_YML_VER,
19
+ S_DIR_TAG_BGN, R_DIR_TAG_HDL, R_DIR_TAG_PFX,
20
+ S_DIR_RSV_BGN, R_DIR_RSV_PRM,
21
+ S_DRS_END,
22
+ S_DOC_END,
23
+ R_BLK_SEQ_BGN, BR_BLK_SEQ_BGN, B_BLK_SEQ_BGN,
24
+ R_BLK_KEY_BGN, BR_BLK_KEY_BGN, B_BLK_KEY_BGN,
25
+ R_BLK_VAL_BGN, BR_BLK_VAL_BGN, B_BLK_VAL_BGN,
26
+ R_BLK_IMP_BGN,
27
+ R_BLK_LIT_BGN, BR_BLK_LIT_BGN,
28
+ R_BLK_FLD_BGN, BR_BLK_FLD_BGN,
29
+ BR_BLK_STR_CTN,
30
+ R_FLW_SEQ_BGN, BR_FLW_SEQ_BGN, B_FLW_SEQ_BGN,
31
+ R_FLW_SEQ_END, BR_FLW_SEQ_END, B_FLW_SEQ_END,
32
+ R_FLW_MAP_BGN, BR_FLW_MAP_BGN, B_FLW_MAP_BGN,
33
+ R_FLW_MAP_END, BR_FLW_MAP_END, B_FLW_MAP_END,
34
+ R_FLW_SEP_BGN, BR_FLW_SEP_BGN,
35
+ R_FLW_KEY_BGN, BR_FLW_KEY_BGN,
36
+ R_FLW_JSV_BGN, BR_FLW_JSV_BGN,
37
+ R_FLW_NJV_BGN, BR_FLW_NJV_BGN,
38
+ R_DQT_STR_BGN, BR_DQT_STR_BGN, B_DQT_STR_BGN,
39
+ R_DQT_STR_CTN, BR_DQT_STR_CTN,
40
+ R_DQT_ESC_NWL, BR_DQT_ESC_NWL,
41
+ R_DQT_ESC_SEQ, BR_DQT_ESC_SEQ,
42
+ R_DQT_STR_END, BR_DQT_STR_END,
43
+ R_SQT_STR_BGN, BR_SQT_STR_BGN, B_SQT_STR_BGN,
44
+ R_SQT_STR_CTN, BR_SQT_STR_CTN,
45
+ R_SQT_ESC_SQT, BR_SQT_ESC_SQT,
46
+ R_SQT_STR_END, BR_SQT_STR_END,
47
+
48
+ R_SGL_PLN_NUL_BLK, BR_SGL_PLN_NUL_BLK, B_SGL_PLN_NUL_BLK, R_SGL_PLN_NUL_FLW, BR_SGL_PLN_NUL_FLW,
49
+ R_SGL_PLN_BOL_BLK, BR_SGL_PLN_BOL_BLK, B_SGL_PLN_BOL_BLK, R_SGL_PLN_BOL_FLW, BR_SGL_PLN_BOL_FLW,
50
+ R_SGL_PLN_INT_BLK, BR_SGL_PLN_INT_BLK, B_SGL_PLN_INT_BLK, R_SGL_PLN_INT_FLW, BR_SGL_PLN_INT_FLW,
51
+ R_SGL_PLN_FLT_BLK, BR_SGL_PLN_FLT_BLK, B_SGL_PLN_FLT_BLK, R_SGL_PLN_FLT_FLW, BR_SGL_PLN_FLT_FLW,
52
+ R_SGL_PLN_STR_BLK, BR_SGL_PLN_STR_BLK, B_SGL_PLN_STR_BLK, R_SGL_PLN_STR_FLW, BR_SGL_PLN_STR_FLW,
53
+
54
+ R_MTL_PLN_STR_BLK, BR_MTL_PLN_STR_BLK,
55
+ R_MTL_PLN_STR_FLW, BR_MTL_PLN_STR_FLW,
56
+
57
+ R_TAG, BR_TAG, B_TAG,
58
+ R_ACR_BGN, BR_ACR_BGN, B_ACR_BGN, R_ACR_CTN,
59
+ R_ALS_BGN, BR_ALS_BGN, B_ALS_BGN, R_ALS_CTN,
60
+
61
+ BL,
62
+ COMMENT,
63
+
64
+ ERR_REC,
65
+ } TokenType;
66
+
67
+ // clang-format on
68
+
69
+ #define SCN_SUCC 1
70
+ #define SCN_STOP 0
71
+ #define SCN_FAIL (-1)
72
+
73
+ #define IND_ROT 'r'
74
+ #define IND_MAP 'm'
75
+ #define IND_SEQ 'q'
76
+ #define IND_STR 's'
77
+
78
+ #define RET_SYM(RESULT_SYMBOL) \
79
+ { \
80
+ flush(scanner); \
81
+ lexer->result_symbol = RESULT_SYMBOL; \
82
+ return true; \
83
+ }
84
+
85
+ #define POP_IND() \
86
+ { \
87
+ /* incorrect status caused by error recovering */ \
88
+ if (scanner->ind_typ_stk.size == 1) { \
89
+ return false; \
90
+ } \
91
+ pop_ind(scanner); \
92
+ }
93
+
94
+ #define PUSH_IND(TYP, LEN) push_ind(scanner, TYP, LEN)
95
+
96
+ #define PUSH_BGN_IND(TYP) \
97
+ { \
98
+ if (has_tab_ind) \
99
+ return false; \
100
+ push_ind(scanner, TYP, bgn_col); \
101
+ }
102
+
103
+ #define MAY_PUSH_IMP_IND(TYP) \
104
+ { \
105
+ if (cur_ind != scanner->blk_imp_col) { \
106
+ if (scanner->blk_imp_tab) \
107
+ return false; \
108
+ push_ind(scanner, IND_MAP, scanner->blk_imp_col); \
109
+ } \
110
+ }
111
+
112
+ #define MAY_PUSH_SPC_SEQ_IND() \
113
+ { \
114
+ if (cur_ind_typ == IND_MAP) { \
115
+ push_ind(scanner, IND_SEQ, bgn_col); \
116
+ } \
117
+ }
118
+
119
+ #define MAY_UPD_IMP_COL() \
120
+ { \
121
+ if (scanner->blk_imp_row != bgn_row) { \
122
+ scanner->blk_imp_row = bgn_row; \
123
+ scanner->blk_imp_col = bgn_col; \
124
+ scanner->blk_imp_tab = has_tab_ind; \
125
+ } \
126
+ }
127
+
128
+ #define SGL_PLN_SYM(POS, CTX) \
129
+ (scanner->rlt_sch == RS_NULL ? POS##_SGL_PLN_NUL_##CTX \
130
+ : scanner->rlt_sch == RS_BOOL ? POS##_SGL_PLN_BOL_##CTX \
131
+ : scanner->rlt_sch == RS_INT ? POS##_SGL_PLN_INT_##CTX \
132
+ : scanner->rlt_sch == RS_FLOAT ? POS##_SGL_PLN_FLT_##CTX \
133
+ : POS##_SGL_PLN_STR_##CTX)
134
+
135
+ typedef struct {
136
+ int16_t row;
137
+ int16_t col;
138
+ int16_t blk_imp_row;
139
+ int16_t blk_imp_col;
140
+ int16_t blk_imp_tab;
141
+ Array(int16_t) ind_typ_stk;
142
+ Array(int16_t) ind_len_stk;
143
+
144
+ // temp
145
+ int16_t end_row;
146
+ int16_t end_col;
147
+ int16_t cur_row;
148
+ int16_t cur_col;
149
+ int32_t cur_chr;
150
+ int8_t sch_stt;
151
+ ResultSchema rlt_sch;
152
+ } Scanner;
153
+
154
+ static unsigned serialize(Scanner *scanner, char *buffer) {
155
+ size_t size = 0;
156
+ *(int16_t *)&buffer[size] = scanner->row;
157
+ size += sizeof(int16_t);
158
+ *(int16_t *)&buffer[size] = scanner->col;
159
+ size += sizeof(int16_t);
160
+ *(int16_t *)&buffer[size] = scanner->blk_imp_row;
161
+ size += sizeof(int16_t);
162
+ *(int16_t *)&buffer[size] = scanner->blk_imp_col;
163
+ size += sizeof(int16_t);
164
+ *(int16_t *)&buffer[size] = scanner->blk_imp_tab;
165
+ size += sizeof(int16_t);
166
+ int16_t *typ_itr = scanner->ind_typ_stk.contents + 1;
167
+ int16_t *typ_end = scanner->ind_typ_stk.contents + scanner->ind_typ_stk.size;
168
+ int16_t *len_itr = scanner->ind_len_stk.contents + 1;
169
+ for (; typ_itr != typ_end && size < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++typ_itr, ++len_itr) {
170
+ *(int16_t *)&buffer[size] = *typ_itr;
171
+ size += sizeof(int16_t);
172
+ *(int16_t *)&buffer[size] = *len_itr;
173
+ size += sizeof(int16_t);
174
+ }
175
+ return size;
176
+ }
177
+
178
+ static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
179
+ scanner->row = 0;
180
+ scanner->col = 0;
181
+ scanner->blk_imp_row = -1;
182
+ scanner->blk_imp_col = -1;
183
+ scanner->blk_imp_tab = 0;
184
+ array_delete(&scanner->ind_typ_stk);
185
+ array_push(&scanner->ind_typ_stk, IND_ROT);
186
+ array_delete(&scanner->ind_len_stk);
187
+ array_push(&scanner->ind_len_stk, -1);
188
+ if (length > 0) {
189
+ size_t size = 0;
190
+ scanner->row = *(int16_t *)&buffer[size];
191
+ size += sizeof(int16_t);
192
+ scanner->col = *(int16_t *)&buffer[size];
193
+ size += sizeof(int16_t);
194
+ scanner->blk_imp_row = *(int16_t *)&buffer[size];
195
+ size += sizeof(int16_t);
196
+ scanner->blk_imp_col = *(int16_t *)&buffer[size];
197
+ size += sizeof(int16_t);
198
+ scanner->blk_imp_tab = *(int16_t *)&buffer[size];
199
+ size += sizeof(int16_t);
200
+ while (size < length) {
201
+ array_push(&scanner->ind_typ_stk, *(int16_t *)&buffer[size]);
202
+ size += sizeof(int16_t);
203
+ array_push(&scanner->ind_len_stk, *(int16_t *)&buffer[size]);
204
+ size += sizeof(int16_t);
205
+ }
206
+ assert(size == length);
207
+ }
208
+ }
209
+
210
+ static inline void adv(Scanner *scanner, TSLexer *lexer) {
211
+ scanner->cur_col++;
212
+ scanner->cur_chr = lexer->lookahead;
213
+ lexer->advance(lexer, false);
214
+ }
215
+
216
+ static inline void adv_nwl(Scanner *scanner, TSLexer *lexer) {
217
+ scanner->cur_row++;
218
+ scanner->cur_col = 0;
219
+ scanner->cur_chr = lexer->lookahead;
220
+ lexer->advance(lexer, false);
221
+ }
222
+
223
+ static inline void skp(Scanner *scanner, TSLexer *lexer) {
224
+ scanner->cur_col++;
225
+ scanner->cur_chr = lexer->lookahead;
226
+ lexer->advance(lexer, true);
227
+ }
228
+
229
+ static inline void skp_nwl(Scanner *scanner, TSLexer *lexer) {
230
+ scanner->cur_row++;
231
+ scanner->cur_col = 0;
232
+ scanner->cur_chr = lexer->lookahead;
233
+ lexer->advance(lexer, true);
234
+ }
235
+
236
+ static inline void mrk_end(Scanner *scanner, TSLexer *lexer) {
237
+ scanner->end_row = scanner->cur_row;
238
+ scanner->end_col = scanner->cur_col;
239
+ lexer->mark_end(lexer);
240
+ }
241
+
242
+ static inline void init(Scanner *scanner) {
243
+ scanner->cur_row = scanner->row;
244
+ scanner->cur_col = scanner->col;
245
+ scanner->cur_chr = 0;
246
+ scanner->sch_stt = 0;
247
+ scanner->rlt_sch = RS_STR;
248
+ }
249
+
250
+ static inline void flush(Scanner *scanner) {
251
+ scanner->row = scanner->end_row;
252
+ scanner->col = scanner->end_col;
253
+ }
254
+
255
+ static inline void pop_ind(Scanner *scanner) {
256
+ array_pop(&scanner->ind_len_stk);
257
+ array_pop(&scanner->ind_typ_stk);
258
+ }
259
+
260
+ static inline void push_ind(Scanner *scanner, int16_t typ, int16_t len) {
261
+ array_push(&scanner->ind_len_stk, len);
262
+ array_push(&scanner->ind_typ_stk, typ);
263
+ }
264
+
265
+ static inline bool is_wsp(int32_t c) { return c == ' ' || c == '\t'; }
266
+
267
+ static inline bool is_nwl(int32_t c) { return c == '\r' || c == '\n'; }
268
+
269
+ static inline bool is_wht(int32_t c) { return is_wsp(c) || is_nwl(c) || c == 0; }
270
+
271
+ static inline bool is_ns_dec_digit(int32_t c) { return c >= '0' && c <= '9'; }
272
+
273
+ static inline bool is_ns_hex_digit(int32_t c) {
274
+ return is_ns_dec_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
275
+ }
276
+
277
+ static inline bool is_ns_word_char(int32_t c) {
278
+ return c == '-' || (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
279
+ }
280
+
281
+ static inline bool is_nb_json(int32_t c) { return c == 0x09 || (c >= 0x20 && c <= 0x10ffff); }
282
+
283
+ static inline bool is_nb_double_char(int32_t c) { return is_nb_json(c) && c != '\\' && c != '"'; }
284
+
285
+ static inline bool is_nb_single_char(int32_t c) { return is_nb_json(c) && c != '\''; }
286
+
287
+ static inline bool is_ns_char(int32_t c) {
288
+ return (c >= 0x21 && c <= 0x7e) || c == 0x85 || (c >= 0xa0 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xfefe) ||
289
+ (c >= 0xff00 && c <= 0xfffd) || (c >= 0x10000 && c <= 0x10ffff);
290
+ }
291
+
292
+ static inline bool is_c_indicator(int32_t c) {
293
+ return c == '-' || c == '?' || c == ':' || c == ',' || c == '[' || c == ']' || c == '{' || c == '}' || c == '#' ||
294
+ c == '&' || c == '*' || c == '!' || c == '|' || c == '>' || c == '\'' || c == '"' || c == '%' || c == '@' ||
295
+ c == '`';
296
+ }
297
+
298
+ static inline bool is_c_flow_indicator(int32_t c) { return c == ',' || c == '[' || c == ']' || c == '{' || c == '}'; }
299
+
300
+ static inline bool is_plain_safe_in_block(int32_t c) { return is_ns_char(c); }
301
+
302
+ static inline bool is_plain_safe_in_flow(int32_t c) { return is_ns_char(c) && !is_c_flow_indicator(c); }
303
+
304
+ static inline bool is_ns_uri_char(int32_t c) {
305
+ return is_ns_word_char(c) || c == '#' || c == ';' || c == '/' || c == '?' || c == ':' || c == '@' || c == '&' ||
306
+ c == '=' || c == '+' || c == '$' || c == ',' || c == '_' || c == '.' || c == '!' || c == '~' || c == '*' ||
307
+ c == '\'' || c == '(' || c == ')' || c == '[' || c == ']';
308
+ }
309
+
310
+ static inline bool is_ns_tag_char(int32_t c) {
311
+ return is_ns_word_char(c) || c == '#' || c == ';' || c == '/' || c == '?' || c == ':' || c == '@' || c == '&' ||
312
+ c == '=' || c == '+' || c == '$' || c == '_' || c == '.' || c == '~' || c == '*' || c == '\'' || c == '(' ||
313
+ c == ')';
314
+ }
315
+
316
+ static inline bool is_ns_anchor_char(int32_t c) { return is_ns_char(c) && !is_c_flow_indicator(c); }
317
+
318
+ static char scn_uri_esc(Scanner *scanner, TSLexer *lexer) {
319
+ if (lexer->lookahead != '%') {
320
+ return SCN_STOP;
321
+ }
322
+ mrk_end(scanner, lexer);
323
+ adv(scanner, lexer);
324
+ if (!is_ns_hex_digit(lexer->lookahead)) {
325
+ return SCN_FAIL;
326
+ }
327
+ adv(scanner, lexer);
328
+ if (!is_ns_hex_digit(lexer->lookahead)) {
329
+ return SCN_FAIL;
330
+ }
331
+ adv(scanner, lexer);
332
+ return SCN_SUCC;
333
+ }
334
+
335
+ static char scn_ns_uri_char(Scanner *scanner, TSLexer *lexer) {
336
+ if (is_ns_uri_char(lexer->lookahead)) {
337
+ adv(scanner, lexer);
338
+ return SCN_SUCC;
339
+ }
340
+ return scn_uri_esc(scanner, lexer);
341
+ }
342
+
343
+ static char scn_ns_tag_char(Scanner *scanner, TSLexer *lexer) {
344
+ if (is_ns_tag_char(lexer->lookahead)) {
345
+ adv(scanner, lexer);
346
+ return SCN_SUCC;
347
+ }
348
+ return scn_uri_esc(scanner, lexer);
349
+ }
350
+
351
+ static bool scn_dir_bgn(Scanner *scanner, TSLexer *lexer) {
352
+ adv(scanner, lexer);
353
+ if (lexer->lookahead == 'Y') {
354
+ adv(scanner, lexer);
355
+ if (lexer->lookahead == 'A') {
356
+ adv(scanner, lexer);
357
+ if (lexer->lookahead == 'M') {
358
+ adv(scanner, lexer);
359
+ if (lexer->lookahead == 'L') {
360
+ adv(scanner, lexer);
361
+ if (is_wht(lexer->lookahead)) {
362
+ mrk_end(scanner, lexer);
363
+ RET_SYM(S_DIR_YML_BGN);
364
+ }
365
+ }
366
+ }
367
+ }
368
+ } else if (lexer->lookahead == 'T') {
369
+ adv(scanner, lexer);
370
+ if (lexer->lookahead == 'A') {
371
+ adv(scanner, lexer);
372
+ if (lexer->lookahead == 'G') {
373
+ adv(scanner, lexer);
374
+ if (is_wht(lexer->lookahead)) {
375
+ mrk_end(scanner, lexer);
376
+ RET_SYM(S_DIR_TAG_BGN);
377
+ }
378
+ }
379
+ }
380
+ }
381
+ for (;;) {
382
+ if (!is_ns_char(lexer->lookahead)) {
383
+ break;
384
+ }
385
+ adv(scanner, lexer);
386
+ }
387
+ if (scanner->cur_col > 1 && is_wht(lexer->lookahead)) {
388
+ mrk_end(scanner, lexer);
389
+ RET_SYM(S_DIR_RSV_BGN);
390
+ }
391
+ return false;
392
+ }
393
+
394
+ static bool scn_dir_yml_ver(Scanner *scanner, TSLexer *lexer, TSSymbol result_symbol) {
395
+ uint16_t n1 = 0;
396
+ uint16_t n2 = 0;
397
+ while (is_ns_dec_digit(lexer->lookahead)) {
398
+ adv(scanner, lexer);
399
+ n1++;
400
+ }
401
+ if (lexer->lookahead != '.') {
402
+ return false;
403
+ }
404
+ adv(scanner, lexer);
405
+ while (is_ns_dec_digit(lexer->lookahead)) {
406
+ adv(scanner, lexer);
407
+ n2++;
408
+ }
409
+ if (n1 == 0 || n2 == 0) {
410
+ return false;
411
+ }
412
+ mrk_end(scanner, lexer);
413
+ RET_SYM(result_symbol);
414
+ }
415
+
416
+ static bool scn_tag_hdl_tal(Scanner *scanner, TSLexer *lexer) {
417
+ if (lexer->lookahead == '!') {
418
+ adv(scanner, lexer);
419
+ return true;
420
+ }
421
+ uint16_t n = 0;
422
+ while (is_ns_word_char(lexer->lookahead)) {
423
+ adv(scanner, lexer);
424
+ n++;
425
+ }
426
+ if (n == 0) {
427
+ return true;
428
+ }
429
+ if (lexer->lookahead == '!') {
430
+ adv(scanner, lexer);
431
+ return true;
432
+ }
433
+ return false;
434
+ }
435
+
436
+ static bool scn_dir_tag_hdl(Scanner *scanner, TSLexer *lexer, TSSymbol result_symbol) {
437
+ if (lexer->lookahead == '!') {
438
+ adv(scanner, lexer);
439
+ if (scn_tag_hdl_tal(scanner, lexer)) {
440
+ mrk_end(scanner, lexer);
441
+ RET_SYM(result_symbol);
442
+ }
443
+ }
444
+ return false;
445
+ }
446
+
447
+ static bool scn_dir_tag_pfx(Scanner *scanner, TSLexer *lexer, TSSymbol result_symbol) {
448
+ if (lexer->lookahead == '!') {
449
+ adv(scanner, lexer);
450
+ } else if (scn_ns_tag_char(scanner, lexer) == SCN_SUCC) {
451
+ ;
452
+ } else {
453
+ return false;
454
+ }
455
+ for (;;) {
456
+ switch (scn_ns_uri_char(scanner, lexer)) {
457
+ case SCN_STOP:
458
+ mrk_end(scanner, lexer);
459
+ case SCN_FAIL:
460
+ RET_SYM(result_symbol);
461
+ default:
462
+ break;
463
+ }
464
+ }
465
+ }
466
+
467
+ static bool scn_dir_rsv_prm(Scanner *scanner, TSLexer *lexer, TSSymbol result_symbol) {
468
+ if (!is_ns_char(lexer->lookahead)) {
469
+ return false;
470
+ }
471
+ adv(scanner, lexer);
472
+ while (is_ns_char(lexer->lookahead)) {
473
+ adv(scanner, lexer);
474
+ }
475
+ mrk_end(scanner, lexer);
476
+ RET_SYM(result_symbol);
477
+ }
478
+
479
+ static bool scn_tag(Scanner *scanner, TSLexer *lexer, TSSymbol result_symbol) {
480
+ if (lexer->lookahead != '!') {
481
+ return false;
482
+ }
483
+ adv(scanner, lexer);
484
+ if (is_wht(lexer->lookahead)) {
485
+ mrk_end(scanner, lexer);
486
+ RET_SYM(result_symbol);
487
+ }
488
+ if (lexer->lookahead == '<') {
489
+ adv(scanner, lexer);
490
+ if (scn_ns_uri_char(scanner, lexer) != SCN_SUCC) {
491
+ return false;
492
+ }
493
+ for (;;) {
494
+ switch (scn_ns_uri_char(scanner, lexer)) {
495
+ case SCN_STOP:
496
+ if (lexer->lookahead == '>') {
497
+ adv(scanner, lexer);
498
+ mrk_end(scanner, lexer);
499
+ RET_SYM(result_symbol);
500
+ }
501
+ case SCN_FAIL:
502
+ return false;
503
+ default:
504
+ break;
505
+ }
506
+ }
507
+ } else {
508
+ if (scn_tag_hdl_tal(scanner, lexer) && scn_ns_tag_char(scanner, lexer) != SCN_SUCC) {
509
+ return false;
510
+ }
511
+ for (;;) {
512
+ switch (scn_ns_tag_char(scanner, lexer)) {
513
+ case SCN_STOP:
514
+ mrk_end(scanner, lexer);
515
+ case SCN_FAIL:
516
+ RET_SYM(result_symbol);
517
+ default:
518
+ break;
519
+ }
520
+ }
521
+ }
522
+ return false;
523
+ }
524
+
525
+ static bool scn_acr_bgn(Scanner *scanner, TSLexer *lexer, TSSymbol result_symbol) {
526
+ if (lexer->lookahead != '&') {
527
+ return false;
528
+ }
529
+ adv(scanner, lexer);
530
+ if (!is_ns_anchor_char(lexer->lookahead)) {
531
+ return false;
532
+ }
533
+ mrk_end(scanner, lexer);
534
+ RET_SYM(result_symbol);
535
+ }
536
+
537
+ static bool scn_acr_ctn(Scanner *scanner, TSLexer *lexer, TSSymbol result_symbol) {
538
+ while (is_ns_anchor_char(lexer->lookahead)) {
539
+ adv(scanner, lexer);
540
+ }
541
+ mrk_end(scanner, lexer);
542
+ RET_SYM(result_symbol);
543
+ }
544
+
545
+ static bool scn_als_bgn(Scanner *scanner, TSLexer *lexer, TSSymbol result_symbol) {
546
+ if (lexer->lookahead != '*') {
547
+ return false;
548
+ }
549
+ adv(scanner, lexer);
550
+ if (!is_ns_anchor_char(lexer->lookahead)) {
551
+ return false;
552
+ }
553
+ mrk_end(scanner, lexer);
554
+ RET_SYM(result_symbol);
555
+ }
556
+
557
+ static bool scn_als_ctn(Scanner *scanner, TSLexer *lexer, TSSymbol result_symbol) {
558
+ while (is_ns_anchor_char(lexer->lookahead)) {
559
+ adv(scanner, lexer);
560
+ }
561
+ mrk_end(scanner, lexer);
562
+ RET_SYM(result_symbol);
563
+ }
564
+
565
+ static bool scn_dqt_esc_seq(Scanner *scanner, TSLexer *lexer, TSSymbol result_symbol) {
566
+ uint16_t i;
567
+ switch (lexer->lookahead) {
568
+ case '0':
569
+ case 'a':
570
+ case 'b':
571
+ case 't':
572
+ case '\t':
573
+ case 'n':
574
+ case 'v':
575
+ case 'r':
576
+ case 'e':
577
+ case 'f':
578
+ case ' ':
579
+ case '"':
580
+ case '/':
581
+ case '\\':
582
+ case 'N':
583
+ case '_':
584
+ case 'L':
585
+ case 'P':
586
+ adv(scanner, lexer);
587
+ break;
588
+ case 'U':
589
+ adv(scanner, lexer);
590
+ for (i = 0; i < 8; i++) {
591
+ if (is_ns_hex_digit(lexer->lookahead)) {
592
+ adv(scanner, lexer);
593
+ } else {
594
+ return false;
595
+ }
596
+ }
597
+ break;
598
+ case 'u':
599
+ adv(scanner, lexer);
600
+ for (i = 0; i < 4; i++) {
601
+ if (is_ns_hex_digit(lexer->lookahead)) {
602
+ adv(scanner, lexer);
603
+ } else {
604
+ return false;
605
+ }
606
+ }
607
+ break;
608
+ case 'x':
609
+ adv(scanner, lexer);
610
+ for (i = 0; i < 2; i++) {
611
+ if (is_ns_hex_digit(lexer->lookahead)) {
612
+ adv(scanner, lexer);
613
+ } else {
614
+ return false;
615
+ }
616
+ }
617
+ break;
618
+ default:
619
+ return false;
620
+ }
621
+ mrk_end(scanner, lexer);
622
+ RET_SYM(result_symbol);
623
+ }
624
+
625
+ static bool scn_drs_doc_end(Scanner *scanner, TSLexer *lexer) {
626
+ if (lexer->lookahead != '-' && lexer->lookahead != '.') {
627
+ return false;
628
+ }
629
+ int32_t delimeter = lexer->lookahead;
630
+ adv(scanner, lexer);
631
+ if (lexer->lookahead == delimeter) {
632
+ adv(scanner, lexer);
633
+ if (lexer->lookahead == delimeter) {
634
+ adv(scanner, lexer);
635
+ if (is_wht(lexer->lookahead)) {
636
+ return true;
637
+ }
638
+ }
639
+ }
640
+ mrk_end(scanner, lexer);
641
+ return false;
642
+ }
643
+
644
+ static bool scn_dqt_str_cnt(Scanner *scanner, TSLexer *lexer, TSSymbol result_symbol) {
645
+ if (!is_nb_double_char(lexer->lookahead)) {
646
+ return false;
647
+ }
648
+ if (scanner->cur_col == 0 && scn_drs_doc_end(scanner, lexer)) {
649
+ mrk_end(scanner, lexer);
650
+ RET_SYM(scanner->cur_chr == '-' ? S_DRS_END : S_DOC_END);
651
+ } else {
652
+ adv(scanner, lexer);
653
+ }
654
+ while (is_nb_double_char(lexer->lookahead)) {
655
+ adv(scanner, lexer);
656
+ }
657
+ mrk_end(scanner, lexer);
658
+ RET_SYM(result_symbol);
659
+ }
660
+
661
+ static bool scn_sqt_str_cnt(Scanner *scanner, TSLexer *lexer, TSSymbol result_symbol) {
662
+ if (!is_nb_single_char(lexer->lookahead)) {
663
+ return false;
664
+ }
665
+ if (scanner->cur_col == 0 && scn_drs_doc_end(scanner, lexer)) {
666
+ mrk_end(scanner, lexer);
667
+ RET_SYM(scanner->cur_chr == '-' ? S_DRS_END : S_DOC_END);
668
+ } else {
669
+ adv(scanner, lexer);
670
+ }
671
+ while (is_nb_single_char(lexer->lookahead)) {
672
+ adv(scanner, lexer);
673
+ }
674
+ mrk_end(scanner, lexer);
675
+ RET_SYM(result_symbol);
676
+ }
677
+
678
+ static bool scn_blk_str_bgn(Scanner *scanner, TSLexer *lexer, TSSymbol result_symbol) {
679
+ if (lexer->lookahead != '|' && lexer->lookahead != '>') {
680
+ return false;
681
+ }
682
+ adv(scanner, lexer);
683
+ int16_t cur_ind = *array_back(&scanner->ind_len_stk);
684
+ int16_t ind = -1;
685
+ if (lexer->lookahead >= '1' && lexer->lookahead <= '9') {
686
+ ind = lexer->lookahead - '1';
687
+ adv(scanner, lexer);
688
+ if (lexer->lookahead == '+' || lexer->lookahead == '-') {
689
+ adv(scanner, lexer);
690
+ }
691
+ } else if (lexer->lookahead == '+' || lexer->lookahead == '-') {
692
+ adv(scanner, lexer);
693
+ if (lexer->lookahead >= '1' && lexer->lookahead <= '9') {
694
+ ind = lexer->lookahead - '1';
695
+ adv(scanner, lexer);
696
+ }
697
+ }
698
+ if (!is_wht(lexer->lookahead)) {
699
+ return false;
700
+ }
701
+ mrk_end(scanner, lexer);
702
+ if (ind != -1) {
703
+ ind += cur_ind;
704
+ } else {
705
+ ind = cur_ind;
706
+ while (is_wsp(lexer->lookahead)) {
707
+ adv(scanner, lexer);
708
+ }
709
+ if (lexer->lookahead == '#') {
710
+ adv(scanner, lexer);
711
+ while (!is_nwl(lexer->lookahead) && lexer->lookahead != 0) {
712
+ adv(scanner, lexer);
713
+ }
714
+ }
715
+ if (is_nwl(lexer->lookahead)) {
716
+ adv_nwl(scanner, lexer);
717
+ }
718
+ while (lexer->lookahead != 0) {
719
+ if (lexer->lookahead == ' ') {
720
+ adv(scanner, lexer);
721
+ } else if (is_nwl(lexer->lookahead)) {
722
+ if (scanner->cur_col - 1 < ind) {
723
+ break;
724
+ }
725
+ ind = scanner->cur_col - 1;
726
+ adv_nwl(scanner, lexer);
727
+ } else {
728
+ if (scanner->cur_col - 1 > ind) {
729
+ ind = scanner->cur_col - 1;
730
+ }
731
+ break;
732
+ }
733
+ }
734
+ }
735
+ PUSH_IND(IND_STR, ind);
736
+ RET_SYM(result_symbol);
737
+ }
738
+
739
+ static bool scn_blk_str_cnt(Scanner *scanner, TSLexer *lexer, TSSymbol result_symbol) {
740
+ if (!is_ns_char(lexer->lookahead)) {
741
+ return false;
742
+ }
743
+ if (scanner->cur_col == 0 && scn_drs_doc_end(scanner, lexer)) {
744
+ POP_IND();
745
+ RET_SYM(BL);
746
+ } else {
747
+ adv(scanner, lexer);
748
+ }
749
+ mrk_end(scanner, lexer);
750
+ for (;;) {
751
+ if (is_ns_char(lexer->lookahead)) {
752
+ adv(scanner, lexer);
753
+ while (is_ns_char(lexer->lookahead)) {
754
+ adv(scanner, lexer);
755
+ }
756
+ mrk_end(scanner, lexer);
757
+ }
758
+ if (is_wsp(lexer->lookahead)) {
759
+ adv(scanner, lexer);
760
+ while (is_wsp(lexer->lookahead)) {
761
+ adv(scanner, lexer);
762
+ }
763
+ } else {
764
+ break;
765
+ }
766
+ }
767
+ RET_SYM(result_symbol);
768
+ }
769
+
770
+ static char scn_pln_cnt(Scanner *scanner, TSLexer *lexer, bool (*is_plain_safe)(int32_t)) {
771
+ bool is_cur_wsp = is_wsp(scanner->cur_chr);
772
+ bool is_cur_saf = is_plain_safe(scanner->cur_chr);
773
+ bool is_lka_wsp = is_wsp(lexer->lookahead);
774
+ bool is_lka_saf = is_plain_safe(lexer->lookahead);
775
+ if (is_lka_saf || is_lka_wsp) {
776
+ for (;;) {
777
+ if (is_lka_saf && lexer->lookahead != '#' && lexer->lookahead != ':') {
778
+ adv(scanner, lexer);
779
+ mrk_end(scanner, lexer);
780
+ scanner->sch_stt = adv_sch_stt(scanner->sch_stt, scanner->cur_chr, &scanner->rlt_sch);
781
+ } else if (is_cur_saf && lexer->lookahead == '#') {
782
+ adv(scanner, lexer);
783
+ mrk_end(scanner, lexer);
784
+ scanner->sch_stt = adv_sch_stt(scanner->sch_stt, scanner->cur_chr, &scanner->rlt_sch);
785
+ } else if (is_lka_wsp) {
786
+ adv(scanner, lexer);
787
+ scanner->sch_stt = adv_sch_stt(scanner->sch_stt, scanner->cur_chr, &scanner->rlt_sch);
788
+ } else if (lexer->lookahead == ':') {
789
+ adv(scanner, lexer); // check later
790
+ } else {
791
+ break;
792
+ }
793
+
794
+ is_cur_wsp = is_lka_wsp;
795
+ is_cur_saf = is_lka_saf;
796
+ is_lka_wsp = is_wsp(lexer->lookahead);
797
+ is_lka_saf = is_plain_safe(lexer->lookahead);
798
+
799
+ if (scanner->cur_chr == ':') {
800
+ if (is_lka_saf) {
801
+ mrk_end(scanner, lexer);
802
+ scanner->sch_stt = adv_sch_stt(scanner->sch_stt, scanner->cur_chr, &scanner->rlt_sch);
803
+ } else {
804
+ return SCN_FAIL;
805
+ }
806
+ }
807
+ }
808
+ } else {
809
+ return SCN_STOP;
810
+ }
811
+ return SCN_SUCC;
812
+ }
813
+
814
+ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
815
+ init(scanner);
816
+ mrk_end(scanner, lexer);
817
+
818
+ bool allow_comment = !(valid_symbols[R_DQT_STR_CTN] || valid_symbols[BR_DQT_STR_CTN] ||
819
+ valid_symbols[R_SQT_STR_CTN] || valid_symbols[BR_SQT_STR_CTN]);
820
+ int16_t *ind_ptr = scanner->ind_len_stk.contents + scanner->ind_len_stk.size - 1;
821
+ int16_t *ind_end = scanner->ind_len_stk.contents - 1;
822
+ int16_t cur_ind = *ind_ptr--;
823
+ int16_t prt_ind = ind_ptr == ind_end ? -1 : *ind_ptr;
824
+ int16_t cur_ind_typ = *array_back(&scanner->ind_typ_stk);
825
+
826
+ bool has_tab_ind = false;
827
+ int16_t leading_spaces = 0;
828
+
829
+ for (;;) {
830
+ if (lexer->lookahead == ' ') {
831
+ if (!has_tab_ind) {
832
+ leading_spaces++;
833
+ }
834
+ skp(scanner, lexer);
835
+ } else if (lexer->lookahead == '\t') {
836
+ has_tab_ind = true;
837
+ skp(scanner, lexer);
838
+ } else if (is_nwl(lexer->lookahead)) {
839
+ has_tab_ind = false;
840
+ leading_spaces = 0;
841
+ skp_nwl(scanner, lexer);
842
+ } else if (allow_comment && lexer->lookahead == '#') {
843
+ if (valid_symbols[BR_BLK_STR_CTN] && valid_symbols[BL] && scanner->cur_col <= cur_ind) {
844
+ POP_IND();
845
+ RET_SYM(BL);
846
+ }
847
+ if (valid_symbols[BR_BLK_STR_CTN]
848
+ ? scanner->cur_row == scanner->row
849
+ : scanner->cur_col == 0 || scanner->cur_row != scanner->row || scanner->cur_col > scanner->col) {
850
+ adv(scanner, lexer);
851
+ while (!is_nwl(lexer->lookahead) && lexer->lookahead != 0) {
852
+ adv(scanner, lexer);
853
+ }
854
+ mrk_end(scanner, lexer);
855
+ RET_SYM(COMMENT);
856
+ } else {
857
+ break;
858
+ }
859
+ } else {
860
+ break;
861
+ }
862
+ }
863
+
864
+ if (lexer->lookahead == 0) {
865
+ if (valid_symbols[BL]) {
866
+ mrk_end(scanner, lexer);
867
+ POP_IND();
868
+ RET_SYM(BL)
869
+ }
870
+ if (valid_symbols[END_OF_FILE]) {
871
+ mrk_end(scanner, lexer);
872
+ RET_SYM(END_OF_FILE)
873
+ }
874
+ return false;
875
+ }
876
+
877
+ int16_t bgn_row = scanner->cur_row;
878
+ int16_t bgn_col = scanner->cur_col;
879
+ int32_t bgn_chr = lexer->lookahead;
880
+
881
+ if (valid_symbols[BL] && bgn_col <= cur_ind && !has_tab_ind) {
882
+ if (cur_ind == prt_ind && cur_ind_typ == IND_SEQ ? bgn_col < cur_ind || lexer->lookahead != '-'
883
+ : bgn_col <= prt_ind || cur_ind_typ == IND_STR) {
884
+ POP_IND();
885
+ RET_SYM(BL);
886
+ }
887
+ }
888
+
889
+ bool has_nwl = scanner->cur_row > scanner->row;
890
+ bool is_r = !has_nwl;
891
+ bool is_br = has_nwl && leading_spaces > cur_ind;
892
+ bool is_b = has_nwl && leading_spaces == cur_ind && !has_tab_ind;
893
+ bool is_s = bgn_col == 0;
894
+
895
+ if (valid_symbols[R_DIR_YML_VER] && is_r) {
896
+ return scn_dir_yml_ver(scanner, lexer, R_DIR_YML_VER);
897
+ }
898
+ if (valid_symbols[R_DIR_TAG_HDL] && is_r) {
899
+ return scn_dir_tag_hdl(scanner, lexer, R_DIR_TAG_HDL);
900
+ }
901
+ if (valid_symbols[R_DIR_TAG_PFX] && is_r) {
902
+ return scn_dir_tag_pfx(scanner, lexer, R_DIR_TAG_PFX);
903
+ }
904
+ if (valid_symbols[R_DIR_RSV_PRM] && is_r) {
905
+ return scn_dir_rsv_prm(scanner, lexer, R_DIR_RSV_PRM);
906
+ }
907
+ if (valid_symbols[BR_BLK_STR_CTN] && is_br && scn_blk_str_cnt(scanner, lexer, BR_BLK_STR_CTN)) {
908
+ return true;
909
+ }
910
+
911
+ if ((valid_symbols[R_DQT_STR_CTN] && is_r && scn_dqt_str_cnt(scanner, lexer, R_DQT_STR_CTN)) ||
912
+ (valid_symbols[BR_DQT_STR_CTN] && is_br && scn_dqt_str_cnt(scanner, lexer, BR_DQT_STR_CTN))) {
913
+ return true;
914
+ }
915
+
916
+ if ((valid_symbols[R_SQT_STR_CTN] && is_r && scn_sqt_str_cnt(scanner, lexer, R_SQT_STR_CTN)) ||
917
+ (valid_symbols[BR_SQT_STR_CTN] && is_br && scn_sqt_str_cnt(scanner, lexer, BR_SQT_STR_CTN))) {
918
+ return true;
919
+ }
920
+
921
+ if (valid_symbols[R_ACR_CTN] && is_r) {
922
+ return scn_acr_ctn(scanner, lexer, R_ACR_CTN);
923
+ }
924
+ if (valid_symbols[R_ALS_CTN] && is_r) {
925
+ return scn_als_ctn(scanner, lexer, R_ALS_CTN);
926
+ }
927
+
928
+ if (lexer->lookahead == '%') {
929
+ if (valid_symbols[S_DIR_YML_BGN] && is_s) {
930
+ return scn_dir_bgn(scanner, lexer);
931
+ }
932
+ } else if (lexer->lookahead == '*') {
933
+ if (valid_symbols[R_ALS_BGN] && is_r) {
934
+ MAY_UPD_IMP_COL();
935
+ return scn_als_bgn(scanner, lexer, R_ALS_BGN);
936
+ }
937
+ if (valid_symbols[BR_ALS_BGN] && is_br) {
938
+ MAY_UPD_IMP_COL();
939
+ return scn_als_bgn(scanner, lexer, BR_ALS_BGN);
940
+ }
941
+ if (valid_symbols[B_ALS_BGN] && is_b) {
942
+ MAY_UPD_IMP_COL();
943
+ return scn_als_bgn(scanner, lexer, B_ALS_BGN);
944
+ }
945
+ } else if (lexer->lookahead == '&') {
946
+ if (valid_symbols[R_ACR_BGN] && is_r) {
947
+ MAY_UPD_IMP_COL();
948
+ return scn_acr_bgn(scanner, lexer, R_ACR_BGN);
949
+ }
950
+ if (valid_symbols[BR_ACR_BGN] && is_br) {
951
+ MAY_UPD_IMP_COL();
952
+ return scn_acr_bgn(scanner, lexer, BR_ACR_BGN);
953
+ }
954
+ if (valid_symbols[B_ACR_BGN] && is_b) {
955
+ MAY_UPD_IMP_COL();
956
+ return scn_acr_bgn(scanner, lexer, B_ACR_BGN);
957
+ }
958
+ } else if (lexer->lookahead == '!') {
959
+ if (valid_symbols[R_TAG] && is_r) {
960
+ MAY_UPD_IMP_COL();
961
+ return scn_tag(scanner, lexer, R_TAG);
962
+ }
963
+ if (valid_symbols[BR_TAG] && is_br) {
964
+ MAY_UPD_IMP_COL();
965
+ return scn_tag(scanner, lexer, BR_TAG);
966
+ }
967
+ if (valid_symbols[B_TAG] && is_b) {
968
+ MAY_UPD_IMP_COL();
969
+ return scn_tag(scanner, lexer, B_TAG);
970
+ }
971
+ } else if (lexer->lookahead == '[') {
972
+ if (valid_symbols[R_FLW_SEQ_BGN] && is_r) {
973
+ MAY_UPD_IMP_COL();
974
+ adv(scanner, lexer);
975
+ mrk_end(scanner, lexer);
976
+ RET_SYM(R_FLW_SEQ_BGN)
977
+ }
978
+ if (valid_symbols[BR_FLW_SEQ_BGN] && is_br) {
979
+ MAY_UPD_IMP_COL();
980
+ adv(scanner, lexer);
981
+ mrk_end(scanner, lexer);
982
+ RET_SYM(BR_FLW_SEQ_BGN)
983
+ }
984
+ if (valid_symbols[B_FLW_SEQ_BGN] && is_b) {
985
+ MAY_UPD_IMP_COL();
986
+ adv(scanner, lexer);
987
+ mrk_end(scanner, lexer);
988
+ RET_SYM(B_FLW_SEQ_BGN)
989
+ }
990
+ } else if (lexer->lookahead == ']') {
991
+ if (valid_symbols[R_FLW_SEQ_END] && is_r) {
992
+ adv(scanner, lexer);
993
+ mrk_end(scanner, lexer);
994
+ RET_SYM(R_FLW_SEQ_END)
995
+ }
996
+ if (valid_symbols[BR_FLW_SEQ_END] && is_br) {
997
+ adv(scanner, lexer);
998
+ mrk_end(scanner, lexer);
999
+ RET_SYM(BR_FLW_SEQ_END)
1000
+ }
1001
+ if (valid_symbols[B_FLW_SEQ_END] && is_b) {
1002
+ adv(scanner, lexer);
1003
+ mrk_end(scanner, lexer);
1004
+ RET_SYM(BR_FLW_SEQ_END)
1005
+ }
1006
+ } else if (lexer->lookahead == '{') {
1007
+ if (valid_symbols[R_FLW_MAP_BGN] && is_r) {
1008
+ MAY_UPD_IMP_COL();
1009
+ adv(scanner, lexer);
1010
+ mrk_end(scanner, lexer);
1011
+ RET_SYM(R_FLW_MAP_BGN)
1012
+ }
1013
+ if (valid_symbols[BR_FLW_MAP_BGN] && is_br) {
1014
+ MAY_UPD_IMP_COL();
1015
+ adv(scanner, lexer);
1016
+ mrk_end(scanner, lexer);
1017
+ RET_SYM(BR_FLW_MAP_BGN)
1018
+ }
1019
+ if (valid_symbols[B_FLW_MAP_BGN] && is_b) {
1020
+ MAY_UPD_IMP_COL();
1021
+ adv(scanner, lexer);
1022
+ mrk_end(scanner, lexer);
1023
+ RET_SYM(B_FLW_MAP_BGN)
1024
+ }
1025
+ } else if (lexer->lookahead == '}') {
1026
+ if (valid_symbols[R_FLW_MAP_END] && is_r) {
1027
+ adv(scanner, lexer);
1028
+ mrk_end(scanner, lexer);
1029
+ RET_SYM(R_FLW_MAP_END)
1030
+ }
1031
+ if (valid_symbols[BR_FLW_MAP_END] && is_br) {
1032
+ adv(scanner, lexer);
1033
+ mrk_end(scanner, lexer);
1034
+ RET_SYM(BR_FLW_MAP_END)
1035
+ }
1036
+ if (valid_symbols[B_FLW_MAP_END] && is_b) {
1037
+ adv(scanner, lexer);
1038
+ mrk_end(scanner, lexer);
1039
+ RET_SYM(BR_FLW_MAP_END)
1040
+ }
1041
+ } else if (lexer->lookahead == ',') {
1042
+ if (valid_symbols[R_FLW_SEP_BGN] && is_r) {
1043
+ adv(scanner, lexer);
1044
+ mrk_end(scanner, lexer);
1045
+ RET_SYM(R_FLW_SEP_BGN)
1046
+ }
1047
+ if (valid_symbols[BR_FLW_SEP_BGN] && is_br) {
1048
+ adv(scanner, lexer);
1049
+ mrk_end(scanner, lexer);
1050
+ RET_SYM(BR_FLW_SEP_BGN)
1051
+ }
1052
+ } else if (lexer->lookahead == '"') {
1053
+ if (valid_symbols[R_DQT_STR_BGN] && is_r) {
1054
+ MAY_UPD_IMP_COL();
1055
+ adv(scanner, lexer);
1056
+ mrk_end(scanner, lexer);
1057
+ RET_SYM(R_DQT_STR_BGN)
1058
+ }
1059
+ if (valid_symbols[BR_DQT_STR_BGN] && is_br) {
1060
+ MAY_UPD_IMP_COL();
1061
+ adv(scanner, lexer);
1062
+ mrk_end(scanner, lexer);
1063
+ RET_SYM(BR_DQT_STR_BGN)
1064
+ }
1065
+ if (valid_symbols[B_DQT_STR_BGN] && is_b) {
1066
+ MAY_UPD_IMP_COL();
1067
+ adv(scanner, lexer);
1068
+ mrk_end(scanner, lexer);
1069
+ RET_SYM(B_DQT_STR_BGN)
1070
+ }
1071
+ if (valid_symbols[R_DQT_STR_END] && is_r) {
1072
+ adv(scanner, lexer);
1073
+ mrk_end(scanner, lexer);
1074
+ RET_SYM(R_DQT_STR_END)
1075
+ }
1076
+ if (valid_symbols[BR_DQT_STR_END] && is_br) {
1077
+ adv(scanner, lexer);
1078
+ mrk_end(scanner, lexer);
1079
+ RET_SYM(BR_DQT_STR_END)
1080
+ }
1081
+ } else if (lexer->lookahead == '\'') {
1082
+ if (valid_symbols[R_SQT_STR_BGN] && is_r) {
1083
+ MAY_UPD_IMP_COL();
1084
+ adv(scanner, lexer);
1085
+ mrk_end(scanner, lexer);
1086
+ RET_SYM(R_SQT_STR_BGN)
1087
+ }
1088
+ if (valid_symbols[BR_SQT_STR_BGN] && is_br) {
1089
+ MAY_UPD_IMP_COL();
1090
+ adv(scanner, lexer);
1091
+ mrk_end(scanner, lexer);
1092
+ RET_SYM(BR_SQT_STR_BGN)
1093
+ }
1094
+ if (valid_symbols[B_SQT_STR_BGN] && is_b) {
1095
+ MAY_UPD_IMP_COL();
1096
+ adv(scanner, lexer);
1097
+ mrk_end(scanner, lexer);
1098
+ RET_SYM(B_SQT_STR_BGN)
1099
+ }
1100
+ if (valid_symbols[R_SQT_STR_END] && is_r) {
1101
+ adv(scanner, lexer);
1102
+ if (lexer->lookahead == '\'') {
1103
+ adv(scanner, lexer);
1104
+ mrk_end(scanner, lexer);
1105
+ RET_SYM(R_SQT_ESC_SQT)
1106
+ } else {
1107
+ mrk_end(scanner, lexer);
1108
+ RET_SYM(R_SQT_STR_END)
1109
+ }
1110
+ }
1111
+ if (valid_symbols[BR_SQT_STR_END] && is_br) {
1112
+ adv(scanner, lexer);
1113
+ if (lexer->lookahead == '\'') {
1114
+ adv(scanner, lexer);
1115
+ mrk_end(scanner, lexer);
1116
+ RET_SYM(BR_SQT_ESC_SQT)
1117
+ } else {
1118
+ mrk_end(scanner, lexer);
1119
+ RET_SYM(BR_SQT_STR_END)
1120
+ }
1121
+ }
1122
+ } else if (lexer->lookahead == '?') {
1123
+ bool is_r_blk_key_bgn = valid_symbols[R_BLK_KEY_BGN] && is_r;
1124
+ bool is_br_blk_key_bgn = valid_symbols[BR_BLK_KEY_BGN] && is_br;
1125
+ bool is_b_blk_key_bgn = valid_symbols[B_BLK_KEY_BGN] && is_b;
1126
+ bool is_r_flw_key_bgn = valid_symbols[R_FLW_KEY_BGN] && is_r;
1127
+ bool is_br_flw_key_bgn = valid_symbols[BR_FLW_KEY_BGN] && is_br;
1128
+ if (is_r_blk_key_bgn || is_br_blk_key_bgn || is_b_blk_key_bgn || is_r_flw_key_bgn || is_br_flw_key_bgn) {
1129
+ adv(scanner, lexer);
1130
+ if (is_wht(lexer->lookahead)) {
1131
+ mrk_end(scanner, lexer);
1132
+ if (is_r_blk_key_bgn) {
1133
+ PUSH_BGN_IND(IND_MAP);
1134
+ RET_SYM(R_BLK_KEY_BGN);
1135
+ }
1136
+ if (is_br_blk_key_bgn) {
1137
+ PUSH_BGN_IND(IND_MAP);
1138
+ RET_SYM(BR_BLK_KEY_BGN);
1139
+ }
1140
+ if (is_b_blk_key_bgn)
1141
+ RET_SYM(B_BLK_KEY_BGN);
1142
+ if (is_r_flw_key_bgn)
1143
+ RET_SYM(R_FLW_KEY_BGN);
1144
+ if (is_br_flw_key_bgn)
1145
+ RET_SYM(BR_FLW_KEY_BGN);
1146
+ }
1147
+ }
1148
+ } else if (lexer->lookahead == ':') {
1149
+ if (valid_symbols[R_FLW_JSV_BGN] && is_r) {
1150
+ adv(scanner, lexer);
1151
+ mrk_end(scanner, lexer);
1152
+ RET_SYM(R_FLW_JSV_BGN);
1153
+ }
1154
+ if (valid_symbols[BR_FLW_JSV_BGN] && is_br) {
1155
+ adv(scanner, lexer);
1156
+ mrk_end(scanner, lexer);
1157
+ RET_SYM(BR_FLW_JSV_BGN);
1158
+ }
1159
+ bool is_r_blk_val_bgn = valid_symbols[R_BLK_VAL_BGN] && is_r;
1160
+ bool is_br_blk_val_bgn = valid_symbols[BR_BLK_VAL_BGN] && is_br;
1161
+ bool is_b_blk_val_bgn = valid_symbols[B_BLK_VAL_BGN] && is_b;
1162
+ bool is_r_blk_imp_bgn = valid_symbols[R_BLK_IMP_BGN] && is_r;
1163
+ bool is_r_flw_njv_bgn = valid_symbols[R_FLW_NJV_BGN] && is_r;
1164
+ bool is_br_flw_njv_bgn = valid_symbols[BR_FLW_NJV_BGN] && is_br;
1165
+ if (is_r_blk_val_bgn || is_br_blk_val_bgn || is_b_blk_val_bgn || is_r_blk_imp_bgn || is_r_flw_njv_bgn ||
1166
+ is_br_flw_njv_bgn) {
1167
+ adv(scanner, lexer);
1168
+ bool is_lka_wht = is_wht(lexer->lookahead);
1169
+ if (is_lka_wht) {
1170
+ if (is_r_blk_val_bgn) {
1171
+ PUSH_BGN_IND(IND_MAP);
1172
+ mrk_end(scanner, lexer);
1173
+ RET_SYM(R_BLK_VAL_BGN);
1174
+ }
1175
+ if (is_br_blk_val_bgn) {
1176
+ PUSH_BGN_IND(IND_MAP);
1177
+ mrk_end(scanner, lexer);
1178
+ RET_SYM(BR_BLK_VAL_BGN);
1179
+ }
1180
+ if (is_b_blk_val_bgn) {
1181
+ mrk_end(scanner, lexer);
1182
+ RET_SYM(B_BLK_VAL_BGN);
1183
+ }
1184
+ if (is_r_blk_imp_bgn) {
1185
+ MAY_PUSH_IMP_IND();
1186
+ mrk_end(scanner, lexer);
1187
+ RET_SYM(R_BLK_IMP_BGN);
1188
+ }
1189
+ }
1190
+ if (is_lka_wht || lexer->lookahead == ',' || lexer->lookahead == ']' || lexer->lookahead == '}') {
1191
+ if (is_r_flw_njv_bgn) {
1192
+ mrk_end(scanner, lexer);
1193
+ RET_SYM(R_FLW_NJV_BGN);
1194
+ }
1195
+ if (is_br_flw_njv_bgn) {
1196
+ mrk_end(scanner, lexer);
1197
+ RET_SYM(BR_FLW_NJV_BGN);
1198
+ }
1199
+ }
1200
+ }
1201
+ } else if (lexer->lookahead == '-') {
1202
+ bool is_r_blk_seq_bgn = valid_symbols[R_BLK_SEQ_BGN] && is_r;
1203
+ bool is_br_blk_seq_bgn = valid_symbols[BR_BLK_SEQ_BGN] && is_br;
1204
+ bool is_b_blk_seq_bgn = valid_symbols[B_BLK_SEQ_BGN] && is_b;
1205
+ bool is_s_drs_end = is_s;
1206
+ if (is_r_blk_seq_bgn || is_br_blk_seq_bgn || is_b_blk_seq_bgn || is_s_drs_end) {
1207
+ adv(scanner, lexer);
1208
+ if (is_wht(lexer->lookahead)) {
1209
+ if (is_r_blk_seq_bgn) {
1210
+ PUSH_BGN_IND(IND_SEQ);
1211
+ mrk_end(scanner, lexer);
1212
+ RET_SYM(R_BLK_SEQ_BGN)
1213
+ }
1214
+ if (is_br_blk_seq_bgn) {
1215
+ PUSH_BGN_IND(IND_SEQ);
1216
+ mrk_end(scanner, lexer);
1217
+ RET_SYM(BR_BLK_SEQ_BGN)
1218
+ }
1219
+ if (is_b_blk_seq_bgn) {
1220
+ MAY_PUSH_SPC_SEQ_IND();
1221
+ mrk_end(scanner, lexer);
1222
+ RET_SYM(B_BLK_SEQ_BGN)
1223
+ }
1224
+ } else if (lexer->lookahead == '-' && is_s_drs_end) {
1225
+ adv(scanner, lexer);
1226
+ if (lexer->lookahead == '-') {
1227
+ adv(scanner, lexer);
1228
+ if (is_wht(lexer->lookahead)) {
1229
+ if (valid_symbols[BL]) {
1230
+ POP_IND();
1231
+ RET_SYM(BL);
1232
+ }
1233
+ mrk_end(scanner, lexer);
1234
+ RET_SYM(S_DRS_END);
1235
+ }
1236
+ }
1237
+ }
1238
+ }
1239
+ } else if (lexer->lookahead == '.') {
1240
+ if (is_s) {
1241
+ adv(scanner, lexer);
1242
+ if (lexer->lookahead == '.') {
1243
+ adv(scanner, lexer);
1244
+ if (lexer->lookahead == '.') {
1245
+ adv(scanner, lexer);
1246
+ if (is_wht(lexer->lookahead)) {
1247
+ if (valid_symbols[BL]) {
1248
+ POP_IND();
1249
+ RET_SYM(BL);
1250
+ }
1251
+ mrk_end(scanner, lexer);
1252
+ RET_SYM(S_DOC_END);
1253
+ }
1254
+ }
1255
+ }
1256
+ }
1257
+ } else if (lexer->lookahead == '\\') {
1258
+ bool is_r_dqt_esc_nwl = valid_symbols[R_DQT_ESC_NWL] && is_r;
1259
+ bool is_br_dqt_esc_nwl = valid_symbols[BR_DQT_ESC_NWL] && is_br;
1260
+ bool is_r_dqt_esc_seq = valid_symbols[R_DQT_ESC_SEQ] && is_r;
1261
+ bool is_br_dqt_esc_seq = valid_symbols[BR_DQT_ESC_SEQ] && is_br;
1262
+ if (is_r_dqt_esc_nwl || is_br_dqt_esc_nwl || is_r_dqt_esc_seq || is_br_dqt_esc_seq) {
1263
+ adv(scanner, lexer);
1264
+ if (is_nwl(lexer->lookahead)) {
1265
+ if (is_r_dqt_esc_nwl) {
1266
+ mrk_end(scanner, lexer);
1267
+ RET_SYM(R_DQT_ESC_NWL)
1268
+ }
1269
+ if (is_br_dqt_esc_nwl) {
1270
+ mrk_end(scanner, lexer);
1271
+ RET_SYM(BR_DQT_ESC_NWL)
1272
+ }
1273
+ }
1274
+ if (is_r_dqt_esc_seq) {
1275
+ return scn_dqt_esc_seq(scanner, lexer, R_DQT_ESC_SEQ);
1276
+ }
1277
+ if (is_br_dqt_esc_seq) {
1278
+ return scn_dqt_esc_seq(scanner, lexer, BR_DQT_ESC_SEQ);
1279
+ }
1280
+ return false;
1281
+ }
1282
+ } else if (lexer->lookahead == '|') {
1283
+ if (valid_symbols[R_BLK_LIT_BGN] && is_r) {
1284
+ return scn_blk_str_bgn(scanner, lexer, R_BLK_LIT_BGN);
1285
+ }
1286
+ if (valid_symbols[BR_BLK_LIT_BGN] && is_br) {
1287
+ return scn_blk_str_bgn(scanner, lexer, BR_BLK_LIT_BGN);
1288
+ }
1289
+ } else if (lexer->lookahead == '>') {
1290
+ if (valid_symbols[R_BLK_FLD_BGN] && is_r) {
1291
+ return scn_blk_str_bgn(scanner, lexer, R_BLK_FLD_BGN);
1292
+ }
1293
+ if (valid_symbols[BR_BLK_FLD_BGN] && is_br) {
1294
+ return scn_blk_str_bgn(scanner, lexer, BR_BLK_FLD_BGN);
1295
+ }
1296
+ }
1297
+
1298
+ bool maybe_sgl_pln_blk = (valid_symbols[R_SGL_PLN_STR_BLK] && is_r) ||
1299
+ (valid_symbols[BR_SGL_PLN_STR_BLK] && is_br) || (valid_symbols[B_SGL_PLN_STR_BLK] && is_b);
1300
+ bool maybe_sgl_pln_flw = (valid_symbols[R_SGL_PLN_STR_FLW] && is_r) || (valid_symbols[BR_SGL_PLN_STR_FLW] && is_br);
1301
+ bool maybe_mtl_pln_blk = (valid_symbols[R_MTL_PLN_STR_BLK] && is_r) || (valid_symbols[BR_MTL_PLN_STR_BLK] && is_br);
1302
+ bool maybe_mtl_pln_flw = (valid_symbols[R_MTL_PLN_STR_FLW] && is_r) || (valid_symbols[BR_MTL_PLN_STR_FLW] && is_br);
1303
+
1304
+ if (maybe_sgl_pln_blk || maybe_sgl_pln_flw || maybe_mtl_pln_blk || maybe_mtl_pln_flw) {
1305
+ bool is_in_blk = maybe_sgl_pln_blk || maybe_mtl_pln_blk;
1306
+ bool (*is_plain_safe)(int32_t) = is_in_blk ? is_plain_safe_in_block : is_plain_safe_in_flow;
1307
+ if (scanner->cur_col - bgn_col == 0) {
1308
+ adv(scanner, lexer);
1309
+ }
1310
+ if (scanner->cur_col - bgn_col == 1) {
1311
+ bool is_plain_first =
1312
+ (is_ns_char(bgn_chr) && !is_c_indicator(bgn_chr)) ||
1313
+ ((bgn_chr == '-' || bgn_chr == '?' || bgn_chr == ':') && is_plain_safe(lexer->lookahead));
1314
+ if (!is_plain_first) {
1315
+ return false;
1316
+ }
1317
+ scanner->sch_stt = adv_sch_stt(scanner->sch_stt, scanner->cur_chr, &scanner->rlt_sch);
1318
+ } else {
1319
+ // no need to check the following cases:
1320
+ // ..X
1321
+ // ...X
1322
+ // --X
1323
+ // ---X
1324
+ // X: lookahead
1325
+ scanner->sch_stt = SCH_STT_FRZ; // must be RS_STR
1326
+ }
1327
+
1328
+ mrk_end(scanner, lexer);
1329
+
1330
+ for (;;) {
1331
+ if (!is_nwl(lexer->lookahead)) {
1332
+ if (scn_pln_cnt(scanner, lexer, is_plain_safe) != SCN_SUCC) {
1333
+ break;
1334
+ }
1335
+ }
1336
+ if (lexer->lookahead == 0 || !is_nwl(lexer->lookahead)) {
1337
+ break;
1338
+ }
1339
+ for (;;) {
1340
+ if (is_nwl(lexer->lookahead)) {
1341
+ adv_nwl(scanner, lexer);
1342
+ } else if (is_wsp(lexer->lookahead)) {
1343
+ adv(scanner, lexer);
1344
+ } else {
1345
+ break;
1346
+ }
1347
+ }
1348
+ if (lexer->lookahead == 0 || scanner->cur_col <= cur_ind) {
1349
+ break;
1350
+ }
1351
+ if (scanner->cur_col == 0 && scn_drs_doc_end(scanner, lexer)) {
1352
+ break;
1353
+ }
1354
+ }
1355
+
1356
+ if (scanner->end_row == bgn_row) {
1357
+ if (maybe_sgl_pln_blk) {
1358
+ MAY_UPD_IMP_COL();
1359
+ RET_SYM(is_r ? SGL_PLN_SYM(R, BLK) : is_br ? SGL_PLN_SYM(BR, BLK) : SGL_PLN_SYM(B, BLK));
1360
+ }
1361
+ if (maybe_sgl_pln_flw)
1362
+ RET_SYM(is_r ? SGL_PLN_SYM(R, FLW) : SGL_PLN_SYM(BR, FLW));
1363
+ } else {
1364
+ if (maybe_mtl_pln_blk) {
1365
+ MAY_UPD_IMP_COL();
1366
+ RET_SYM(is_r ? R_MTL_PLN_STR_BLK : BR_MTL_PLN_STR_BLK);
1367
+ }
1368
+ if (maybe_mtl_pln_flw)
1369
+ RET_SYM(is_r ? R_MTL_PLN_STR_FLW : BR_MTL_PLN_STR_FLW);
1370
+ }
1371
+
1372
+ return false;
1373
+ }
1374
+
1375
+ return !valid_symbols[ERR_REC];
1376
+ }
1377
+
1378
+ void *tree_sitter_yaml_external_scanner_create() {
1379
+ Scanner *scanner = ts_calloc(1, sizeof(Scanner));
1380
+ deserialize(scanner, NULL, 0);
1381
+ return scanner;
1382
+ }
1383
+
1384
+ void tree_sitter_yaml_external_scanner_destroy(void *payload) {
1385
+ Scanner *scanner = (Scanner *)payload;
1386
+ array_delete(&scanner->ind_len_stk);
1387
+ array_delete(&scanner->ind_typ_stk);
1388
+ ts_free(scanner);
1389
+ }
1390
+
1391
+ unsigned tree_sitter_yaml_external_scanner_serialize(void *payload, char *buffer) {
1392
+ Scanner *scanner = (Scanner *)payload;
1393
+ return serialize(scanner, buffer);
1394
+ }
1395
+
1396
+ void tree_sitter_yaml_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
1397
+ Scanner *scanner = (Scanner *)payload;
1398
+ deserialize(scanner, buffer, length);
1399
+ }
1400
+
1401
+ bool tree_sitter_yaml_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
1402
+ Scanner *scanner = (Scanner *)payload;
1403
+ return scan(scanner, lexer, valid_symbols);
1404
+ }