tree-sitter-ucode 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +32 -81
  2. package/grammar.js +214 -28
  3. package/markup/grammar.js +1057 -0
  4. package/markup/queries/folds.scm +20 -0
  5. package/markup/queries/highlights.scm +38 -0
  6. package/markup/queries/indents.scm +51 -0
  7. package/markup/queries/injections.scm +40 -0
  8. package/markup/queries/locals.scm +107 -0
  9. package/markup/queries/tags.scm +65 -0
  10. package/markup/queries/textobjects.scm +56 -0
  11. package/markup/src/grammar.json +5786 -0
  12. package/markup/src/node-types.json +3211 -0
  13. package/markup/src/parser.c +134461 -0
  14. package/markup/src/scanner.c +22 -0
  15. package/package.json +6 -5
  16. package/prebuilds/darwin-arm64/tree-sitter-ucode.node +0 -0
  17. package/prebuilds/linux-arm64/tree-sitter-ucode.node +0 -0
  18. package/prebuilds/linux-x64/tree-sitter-ucode.node +0 -0
  19. package/prebuilds/win32-x64/tree-sitter-ucode.node +0 -0
  20. package/queries/locals.scm +1 -0
  21. package/scripts/generate-markup-grammar.js +93 -0
  22. package/src/grammar.json +1069 -226
  23. package/src/node-types.json +662 -8
  24. package/src/parser.c +106401 -25117
  25. package/src/scanner.c +16 -193
  26. package/src/scanner_impl.h +494 -0
  27. package/tree-sitter-ucode.wasm +0 -0
  28. package/tree-sitter-ucode_markup.wasm +0 -0
  29. package/tree-sitter.json +33 -22
  30. package/tmpl/grammar.js +0 -68
  31. package/tmpl/queries/folds.scm +0 -4
  32. package/tmpl/queries/highlights.scm +0 -23
  33. package/tmpl/queries/indents.scm +0 -5
  34. package/tmpl/queries/injections.scm +0 -8
  35. package/tmpl/queries/locals.scm +0 -3
  36. package/tmpl/src/grammar.json +0 -251
  37. package/tmpl/src/node-types.json +0 -238
  38. package/tmpl/src/parser.c +0 -724
  39. package/tmpl/src/scanner.c +0 -174
  40. package/tree-sitter-ucode_tmpl.wasm +0 -0
  41. /package/{tmpl → markup}/src/tree_sitter/alloc.h +0 -0
  42. /package/{tmpl → markup}/src/tree_sitter/array.h +0 -0
  43. /package/{tmpl → markup}/src/tree_sitter/parser.h +0 -0
@@ -0,0 +1,494 @@
1
+ /*
2
+ * Shared external-scanner implementation for ucode and ucode_markup.
3
+ *
4
+ * Included by src/scanner.c and markup/src/scanner.c. Every function here
5
+ * is static so each compilation unit gets its own copy and no ucode_*
6
+ * symbols leak into the markup shared library.
7
+ *
8
+ * Token order MUST match the `externals` array in grammar.js:
9
+ * 0 AUTOMATIC_SEMICOLON $._automatic_semicolon
10
+ * 1 TEMPLATE_CHARS $._template_chars
11
+ * 2 TERNARY_QMARK $._ternary_qmark
12
+ * 3 RAW_TEXT $.raw_text
13
+ * 4 STATEMENT_TAG_OPEN $.statement_tag_open {%
14
+ * 5 STATEMENT_TAG_TRIM_OPEN $.statement_tag_trim_open {%-
15
+ * 6 STATEMENT_TAG_LSTRIP_OPEN $.statement_tag_lstrip_open {%+
16
+ * 7 STATEMENT_TAG_CLOSE $.statement_tag_close %}
17
+ * 8 STATEMENT_TAG_TRIM_CLOSE $.statement_tag_trim_close -%}
18
+ * 9 EXPRESSION_TAG_OPEN $.expression_tag_open {{
19
+ * 10 EXPRESSION_TAG_TRIM_OPEN $.expression_tag_trim_open {{-
20
+ * 11 EXPRESSION_TAG_CLOSE $.expression_tag_close }}
21
+ * 12 EXPRESSION_TAG_TRIM_CLOSE $.expression_tag_trim_close -}}
22
+ */
23
+
24
+ #ifndef UCODE_SCANNER_IMPL_H_
25
+ #define UCODE_SCANNER_IMPL_H_
26
+
27
+ #include "tree_sitter/parser.h"
28
+ #include <wctype.h>
29
+
30
+ enum TokenType {
31
+ AUTOMATIC_SEMICOLON,
32
+ TEMPLATE_CHARS,
33
+ TERNARY_QMARK,
34
+ RAW_TEXT,
35
+ STATEMENT_TAG_OPEN,
36
+ STATEMENT_TAG_TRIM_OPEN,
37
+ STATEMENT_TAG_LSTRIP_OPEN,
38
+ STATEMENT_TAG_CLOSE,
39
+ STATEMENT_TAG_TRIM_CLOSE,
40
+ EXPRESSION_TAG_OPEN,
41
+ EXPRESSION_TAG_TRIM_OPEN,
42
+ EXPRESSION_TAG_CLOSE,
43
+ EXPRESSION_TAG_TRIM_CLOSE,
44
+ };
45
+
46
+ static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
47
+ static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
48
+
49
+ /* -------------------------------------------------------------------------
50
+ * Markup-mode tokens
51
+ * ---------------------------------------------------------------------- */
52
+
53
+ /*
54
+ * scan_raw_text_from(lexer, has_content)
55
+ *
56
+ * Core raw-text loop. Caller sets has_content=true when it has already
57
+ * consumed one or more characters (e.g. a lone '{' that turned out not to
58
+ * be a tag opener) so that the scanner returns true even if no additional
59
+ * characters follow.
60
+ *
61
+ * Stops BEFORE '{' that is followed by '%', '{', or '#' (tag/comment openers).
62
+ * A lone '{' is committed on the next iteration's mark_end.
63
+ */
64
+ static bool scan_raw_text_from(TSLexer *lexer, bool has_content) {
65
+ lexer->result_symbol = RAW_TEXT;
66
+ while (true) {
67
+ lexer->mark_end(lexer);
68
+ if (lexer->lookahead == '\0') return has_content;
69
+ if (lexer->lookahead == '{') {
70
+ advance(lexer);
71
+ if (lexer->lookahead == '%' ||
72
+ lexer->lookahead == '{' ||
73
+ lexer->lookahead == '#')
74
+ return has_content;
75
+ } else {
76
+ advance(lexer);
77
+ }
78
+ has_content = true;
79
+ }
80
+ }
81
+
82
+ /*
83
+ * scan_markup(lexer, valid_symbols)
84
+ *
85
+ * Unified handler for all three markup-opener tokens (RAW_TEXT,
86
+ * STATEMENT_TAG_OPEN, EXPRESSION_TAG_OPEN). Must be called when at
87
+ * least one of those three is valid.
88
+ *
89
+ * Problem with calling separate sub-scanners sequentially:
90
+ * scan_raw_text advances past '{' when it returns false (tag found),
91
+ * leaving the lexer at position+1. Subsequent sub-scanners then see
92
+ * the wrong character and also fail, so the whole scanner returns false
93
+ * and tree-sitter falls back to the internal '{' token — which has no
94
+ * valid action in the markup root state and triggers error recovery.
95
+ *
96
+ * Fix: handle '{' atomically here. Advance past '{' exactly once, inspect
97
+ * the second character, then dispatch without any further position skew.
98
+ */
99
+ static bool scan_markup(TSLexer *lexer, const bool *valid_symbols) {
100
+ /* Not at '{': only raw text is possible. */
101
+ if (lexer->lookahead != '{')
102
+ return valid_symbols[RAW_TEXT] ? scan_raw_text_from(lexer, false) : false;
103
+
104
+ /* Peek at the second character by advancing past '{'. */
105
+ advance(lexer);
106
+
107
+ /* {% {%- {%+ — statement tag open (emit the precise variant) */
108
+ if (lexer->lookahead == '%' &&
109
+ (valid_symbols[STATEMENT_TAG_OPEN] ||
110
+ valid_symbols[STATEMENT_TAG_TRIM_OPEN] ||
111
+ valid_symbols[STATEMENT_TAG_LSTRIP_OPEN])) {
112
+ advance(lexer);
113
+ if (lexer->lookahead == '-') {
114
+ advance(lexer);
115
+ lexer->mark_end(lexer);
116
+ lexer->result_symbol = STATEMENT_TAG_TRIM_OPEN;
117
+ return true;
118
+ }
119
+ if (lexer->lookahead == '+') {
120
+ advance(lexer);
121
+ lexer->mark_end(lexer);
122
+ lexer->result_symbol = STATEMENT_TAG_LSTRIP_OPEN;
123
+ return true;
124
+ }
125
+ lexer->mark_end(lexer);
126
+ lexer->result_symbol = STATEMENT_TAG_OPEN;
127
+ return true;
128
+ }
129
+
130
+ /* {{ {{- — expression tag open (emit the precise variant) */
131
+ if (lexer->lookahead == '{' &&
132
+ (valid_symbols[EXPRESSION_TAG_OPEN] || valid_symbols[EXPRESSION_TAG_TRIM_OPEN])) {
133
+ advance(lexer);
134
+ if (lexer->lookahead == '-') {
135
+ advance(lexer);
136
+ lexer->mark_end(lexer);
137
+ lexer->result_symbol = EXPRESSION_TAG_TRIM_OPEN;
138
+ return true;
139
+ }
140
+ lexer->mark_end(lexer);
141
+ lexer->result_symbol = EXPRESSION_TAG_OPEN;
142
+ return true;
143
+ }
144
+
145
+ /* {# {#- — comment tag; let the internal lexer match the literal '{#'. */
146
+ if (lexer->lookahead == '#') return false;
147
+
148
+ /* '{' followed by anything else: include it in raw text. */
149
+ return valid_symbols[RAW_TEXT] ? scan_raw_text_from(lexer, true) : false;
150
+ }
151
+
152
+
153
+ /*
154
+ * Scan statement tag close: %} -%}
155
+ * Skip leading whitespace — the scanner is responsible for consuming optional
156
+ * spaces/tabs between the last code token and the close marker.
157
+ */
158
+ static bool scan_statement_tag_close(TSLexer *lexer) {
159
+ while (lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
160
+ lexer->lookahead == '\r' || lexer->lookahead == '\n')
161
+ skip(lexer);
162
+
163
+ if (lexer->lookahead == '-') {
164
+ advance(lexer);
165
+ if (lexer->lookahead != '%') return false;
166
+ advance(lexer);
167
+ if (lexer->lookahead != '}') return false;
168
+ advance(lexer);
169
+ lexer->mark_end(lexer);
170
+ lexer->result_symbol = STATEMENT_TAG_TRIM_CLOSE;
171
+ return true;
172
+ }
173
+ if (lexer->lookahead == '%') {
174
+ advance(lexer);
175
+ if (lexer->lookahead != '}') return false;
176
+ advance(lexer);
177
+ lexer->mark_end(lexer);
178
+ lexer->result_symbol = STATEMENT_TAG_CLOSE;
179
+ return true;
180
+ }
181
+ return false;
182
+ }
183
+
184
+ /*
185
+ * Scan expression tag close: }} -}}
186
+ * Skip leading whitespace — spaces between the expression and }} are ignored.
187
+ */
188
+ static bool scan_expression_tag_close(TSLexer *lexer) {
189
+ while (lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
190
+ lexer->lookahead == '\r' || lexer->lookahead == '\n')
191
+ skip(lexer);
192
+
193
+ if (lexer->lookahead == '-') {
194
+ advance(lexer);
195
+ if (lexer->lookahead != '}') return false;
196
+ advance(lexer);
197
+ if (lexer->lookahead != '}') return false;
198
+ advance(lexer);
199
+ lexer->mark_end(lexer);
200
+ lexer->result_symbol = EXPRESSION_TAG_TRIM_CLOSE;
201
+ return true;
202
+ }
203
+ if (lexer->lookahead == '}') {
204
+ advance(lexer);
205
+ if (lexer->lookahead != '}') return false;
206
+ advance(lexer);
207
+ lexer->mark_end(lexer);
208
+ lexer->result_symbol = EXPRESSION_TAG_CLOSE;
209
+ return true;
210
+ }
211
+ return false;
212
+ }
213
+
214
+ /* -------------------------------------------------------------------------
215
+ * Code-mode tokens (carried over and extended from the original scanner)
216
+ * ---------------------------------------------------------------------- */
217
+
218
+ static bool scan_template_chars(TSLexer *lexer) {
219
+ lexer->result_symbol = TEMPLATE_CHARS;
220
+ for (bool has_content = false;; has_content = true) {
221
+ lexer->mark_end(lexer);
222
+ switch (lexer->lookahead) {
223
+ case '`': return has_content;
224
+ case '\0': return false;
225
+ case '$':
226
+ advance(lexer);
227
+ if (lexer->lookahead == '{') return has_content;
228
+ break;
229
+ case '\\': return has_content;
230
+ default: advance(lexer);
231
+ }
232
+ }
233
+ }
234
+
235
+ /*
236
+ * Return true if the lookahead is the start of %} or -%} (statement tag
237
+ * close). Used during ASI scanning to allow a zero-length semicolon to be
238
+ * inserted immediately before the tag close without consuming any characters.
239
+ */
240
+ static bool lookahead_is_stmt_close(TSLexer *lexer) {
241
+ if (lexer->lookahead == '%') {
242
+ advance(lexer);
243
+ return lexer->lookahead == '}';
244
+ }
245
+ if (lexer->lookahead == '-') {
246
+ advance(lexer);
247
+ if (lexer->lookahead == '%') {
248
+ advance(lexer);
249
+ return lexer->lookahead == '}';
250
+ }
251
+ }
252
+ return false;
253
+ }
254
+
255
+ /*
256
+ * Automatic Semicolon Insertion (ECMA-262 §12.10).
257
+ *
258
+ * Extended to allow ASI immediately before %} and -%} so that the last
259
+ * statement in a statement tag does not need an explicit trailing semicolon:
260
+ * {% let x = 1 %} — works without a semicolon
261
+ * {% return val %} — works without a semicolon
262
+ *
263
+ * The tag-close characters are never consumed; mark_end stays at position 0,
264
+ * so the zero-length semicolon token is emitted and the scanner is called
265
+ * again immediately at the same position for STATEMENT_TAG_CLOSE.
266
+ */
267
+ static bool scan_automatic_semicolon(TSLexer *lexer) {
268
+ lexer->result_symbol = AUTOMATIC_SEMICOLON;
269
+ lexer->mark_end(lexer);
270
+
271
+ /* ECMAScript rule 1: '}' → insert */
272
+ if (lexer->lookahead == '}') return true;
273
+ /* ECMAScript rule 3: EOF → insert */
274
+ if (lexer->lookahead == 0) return true;
275
+
276
+ /* Ucode extension: %} or -%} at end of statement tag → insert */
277
+ if (lexer->lookahead == '%' || lexer->lookahead == '-') {
278
+ if (lookahead_is_stmt_close(lexer)) return true;
279
+ /* lookahead_is_stmt_close advanced but mark_end is still at 0,
280
+ so the peek is harmless — return false to suppress ASI for
281
+ regular '-' or '%' continuations. */
282
+ return false;
283
+ }
284
+
285
+ /*
286
+ * ECMAScript rule 2: scan for a line terminator before the next token.
287
+ * Skip inline whitespace and comments; bail on anything else on the
288
+ * same line.
289
+ */
290
+ for (;;) {
291
+ if (lexer->lookahead == 0) return true;
292
+ if (lexer->lookahead == '}') return true;
293
+
294
+ if (lexer->lookahead == '\r' || lexer->lookahead == '\n' ||
295
+ lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
296
+ skip(lexer);
297
+ break; /* found line terminator */
298
+ }
299
+
300
+ if (iswspace(lexer->lookahead)) { skip(lexer); continue; }
301
+
302
+ /* Line comment — skip to end of line */
303
+ if (lexer->lookahead == '/') {
304
+ skip(lexer);
305
+ if (lexer->lookahead == '/') {
306
+ skip(lexer);
307
+ while (lexer->lookahead != 0 &&
308
+ lexer->lookahead != '\r' && lexer->lookahead != '\n' &&
309
+ lexer->lookahead != 0x2028 && lexer->lookahead != 0x2029)
310
+ skip(lexer);
311
+ continue;
312
+ }
313
+ /* Block comment — check for embedded newline */
314
+ if (lexer->lookahead == '*') {
315
+ skip(lexer);
316
+ bool has_newline = false;
317
+ while (lexer->lookahead != 0) {
318
+ if (lexer->lookahead == '\r' || lexer->lookahead == '\n' ||
319
+ lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029)
320
+ has_newline = true;
321
+ if (lexer->lookahead == '*') {
322
+ skip(lexer);
323
+ if (lexer->lookahead == '/') { skip(lexer); break; }
324
+ } else {
325
+ skip(lexer);
326
+ }
327
+ }
328
+ if (has_newline) break;
329
+ continue;
330
+ }
331
+ /* Division slash — not a comment, no ASI */
332
+ return false;
333
+ }
334
+
335
+ /* %} / -%} on the same line as the expression: still allow ASI */
336
+ if (lexer->lookahead == '%' || lexer->lookahead == '-') {
337
+ if (lookahead_is_stmt_close(lexer)) return true;
338
+ return false;
339
+ }
340
+
341
+ /* Any other non-whitespace on the same line → no ASI */
342
+ return false;
343
+ }
344
+
345
+ /*
346
+ * Found a line terminator. Skip trailing whitespace/comments after it,
347
+ * then check whether the next real token would suppress ASI.
348
+ */
349
+ for (;;) {
350
+ if (lexer->lookahead == 0) return true;
351
+
352
+ if (iswspace(lexer->lookahead)) { skip(lexer); continue; }
353
+
354
+ if (lexer->lookahead == '/') {
355
+ skip(lexer);
356
+ if (lexer->lookahead == '/') {
357
+ skip(lexer);
358
+ while (lexer->lookahead != 0 &&
359
+ lexer->lookahead != '\r' && lexer->lookahead != '\n' &&
360
+ lexer->lookahead != 0x2028 && lexer->lookahead != 0x2029)
361
+ skip(lexer);
362
+ continue;
363
+ }
364
+ if (lexer->lookahead == '*') {
365
+ skip(lexer);
366
+ while (lexer->lookahead != 0) {
367
+ if (lexer->lookahead == '*') {
368
+ skip(lexer);
369
+ if (lexer->lookahead == '/') { skip(lexer); break; }
370
+ } else {
371
+ skip(lexer);
372
+ }
373
+ }
374
+ continue;
375
+ }
376
+ return false; /* division slash after newline → no ASI */
377
+ }
378
+ break;
379
+ }
380
+
381
+ /*
382
+ * Tokens that can continue the prior expression suppress ASI.
383
+ * %} and -%} are tag closers and always allow ASI even though they
384
+ * start with '%' or '-'.
385
+ */
386
+ switch (lexer->lookahead) {
387
+ case '(': case '[': case '`':
388
+ case '.': case ',': case ';':
389
+ case '+': case '*':
390
+ case '=': case '<': case '>': case '!': case '~':
391
+ case '&': case '|': case '^': case '?':
392
+ return false;
393
+ case '-':
394
+ case '%':
395
+ if (lookahead_is_stmt_close(lexer)) return true;
396
+ return false;
397
+ default:
398
+ return true;
399
+ }
400
+ }
401
+
402
+ static bool scan_ternary_qmark(TSLexer *lexer) {
403
+ while (lexer->lookahead != '\r' && lexer->lookahead != '\n' &&
404
+ lexer->lookahead != 0x2028 && lexer->lookahead != 0x2029 &&
405
+ iswspace(lexer->lookahead))
406
+ skip(lexer);
407
+
408
+ if (lexer->lookahead != '?') return false;
409
+ advance(lexer);
410
+
411
+ if (lexer->lookahead == '?') return false; /* nullish coalescing */
412
+
413
+ lexer->mark_end(lexer);
414
+ lexer->result_symbol = TERNARY_QMARK;
415
+
416
+ if (lexer->lookahead == '.') {
417
+ advance(lexer);
418
+ return iswdigit(lexer->lookahead); /* ?. followed by digit is ternary */
419
+ }
420
+
421
+ return true;
422
+ }
423
+
424
+ /* -------------------------------------------------------------------------
425
+ * Main dispatch
426
+ * ---------------------------------------------------------------------- */
427
+
428
+ static bool ucode_scanner_scan(
429
+ void *payload, TSLexer *lexer, const bool *valid_symbols
430
+ ) {
431
+ (void)payload;
432
+
433
+ /*
434
+ * Error-recovery guard.
435
+ *
436
+ * During error recovery tree-sitter sets every external token valid at
437
+ * once. AUTOMATIC_SEMICOLON (code context) and RAW_TEXT (markup context)
438
+ * are never simultaneously valid in a normal parse, so their co-presence
439
+ * signals error recovery. Return false so the parser uses its own grammar
440
+ * tokens for recovery instead of the scanner consuming raw_text.
441
+ */
442
+ if (valid_symbols[AUTOMATIC_SEMICOLON] && valid_symbols[RAW_TEXT])
443
+ return false;
444
+
445
+ /*
446
+ * Template chars: only when we are unambiguously inside a template
447
+ * literal body (not competing with ASI).
448
+ */
449
+ if (valid_symbols[TEMPLATE_CHARS] && !valid_symbols[AUTOMATIC_SEMICOLON])
450
+ return scan_template_chars(lexer);
451
+
452
+ /*
453
+ * Markup-mode tokens.
454
+ *
455
+ * All three markup openers are dispatched through scan_markup(), which
456
+ * handles the '{' character atomically — advancing past it once and then
457
+ * inspecting the second character — to avoid the position-skew bug that
458
+ * arises when sequential sub-scanners each try to advance past '{'.
459
+ */
460
+ if (valid_symbols[RAW_TEXT] ||
461
+ valid_symbols[STATEMENT_TAG_OPEN] ||
462
+ valid_symbols[STATEMENT_TAG_TRIM_OPEN] ||
463
+ valid_symbols[STATEMENT_TAG_LSTRIP_OPEN] ||
464
+ valid_symbols[EXPRESSION_TAG_OPEN] ||
465
+ valid_symbols[EXPRESSION_TAG_TRIM_OPEN]) {
466
+ if (scan_markup(lexer, valid_symbols)) return true;
467
+ }
468
+
469
+ /*
470
+ * Tag close tokens. Checked before ASI so that %} / -%} / }} / -}}
471
+ * are preferred over a zero-length semicolon when both are valid.
472
+ * When neither matches, fall through to ASI.
473
+ */
474
+ if (valid_symbols[STATEMENT_TAG_CLOSE] || valid_symbols[STATEMENT_TAG_TRIM_CLOSE]) {
475
+ if (scan_statement_tag_close(lexer)) return true;
476
+ }
477
+ if (valid_symbols[EXPRESSION_TAG_CLOSE] || valid_symbols[EXPRESSION_TAG_TRIM_CLOSE]) {
478
+ if (scan_expression_tag_close(lexer)) return true;
479
+ }
480
+
481
+ /* ASI and ternary */
482
+ if (valid_symbols[AUTOMATIC_SEMICOLON]) {
483
+ if (scan_automatic_semicolon(lexer)) return true;
484
+ if (valid_symbols[TERNARY_QMARK]) return scan_ternary_qmark(lexer);
485
+ return false;
486
+ }
487
+
488
+ if (valid_symbols[TERNARY_QMARK])
489
+ return scan_ternary_qmark(lexer);
490
+
491
+ return false;
492
+ }
493
+
494
+ #endif /* UCODE_SCANNER_IMPL_H_ */
Binary file
Binary file
package/tree-sitter.json CHANGED
@@ -1,48 +1,59 @@
1
1
  {
2
2
  "grammars": [
3
3
  {
4
- "name": "ucode_tmpl",
5
- "camelcase": "UcodeTmpl",
6
- "scope": "source.uc.tmpl",
7
- "path": "./tmpl",
4
+ "name": "ucode",
5
+ "camelcase": "Ucode",
6
+ "scope": "source.uc",
7
+ "path": ".",
8
8
  "file-types": [
9
- "uc",
10
- "utpl"
9
+ "ucode", "uc", "ut"
11
10
  ],
12
- "content-regex": "(?m)^\\s*\\{[%{#]",
13
11
  "highlights": [
14
- "tmpl/queries/highlights.scm"
12
+ "queries/highlights.scm"
15
13
  ],
16
14
  "locals": [
17
- "tmpl/queries/locals.scm"
15
+ "queries/locals.scm"
18
16
  ],
19
- "injections": [
20
- "tmpl/queries/injections.scm"
17
+ "tags": [
18
+ "queries/tags.scm"
21
19
  ],
22
- "injection-regex": "^ucode_tmpl$"
20
+ "injection-regex": "^ucode$"
23
21
  },
24
22
  {
25
- "name": "ucode",
26
- "camelcase": "Ucode",
27
- "scope": "source.uc",
28
- "path": ".",
23
+ "name": "ucode_markup",
24
+ "camelcase": "UcodeMarkup",
25
+ "scope": "source.ucode.markup",
26
+ "path": "./markup",
29
27
  "file-types": [
30
- "uc"
28
+ "ucode", "uc", "ut"
31
29
  ],
30
+ "content-regex": "(?m)^\\s*\\{[%{#]",
32
31
  "highlights": [
33
- "queries/highlights.scm"
32
+ "markup/queries/highlights.scm"
34
33
  ],
35
34
  "locals": [
36
- "queries/locals.scm"
35
+ "markup/queries/locals.scm"
36
+ ],
37
+ "injections": [
38
+ "markup/queries/injections.scm"
37
39
  ],
38
40
  "tags": [
39
- "queries/tags.scm"
41
+ "markup/queries/tags.scm"
40
42
  ],
41
- "injection-regex": "^ucode$"
43
+ "folds": [
44
+ "markup/queries/folds.scm"
45
+ ],
46
+ "indents": [
47
+ "markup/queries/indents.scm"
48
+ ],
49
+ "textobjects": [
50
+ "markup/queries/textobjects.scm"
51
+ ],
52
+ "injection-regex": "^ucode_markup$"
42
53
  }
43
54
  ],
44
55
  "metadata": {
45
- "version": "0.3.0",
56
+ "version": "0.4.0",
46
57
  "license": "MIT",
47
58
  "description": "Ucode grammar for tree-sitter",
48
59
  "links": {
package/tmpl/grammar.js DELETED
@@ -1,68 +0,0 @@
1
- /**
2
- * @file Ucode template grammar for tree-sitter
3
- * @license MIT
4
- *
5
- * Handles .uc.tmpl / .utpl files: raw text interspersed with ucode code
6
- * blocks ({%...%}), expression blocks ({{...}}), and comments ({#...#}).
7
- *
8
- * The grammar captures raw text verbatim and the inner content of code /
9
- * expression blocks as opaque `code` nodes. Editors use language injection
10
- * (see tmpl/queries/injections.scm) to parse those nodes as ucode.
11
- *
12
- * Whitespace-stripping markers are supported on both openers and closers:
13
- *
14
- * Opener variants Closer variants
15
- * ───────────── ──────────────
16
- * {% {%- {%+ %} -%} (statement)
17
- * {{ {{- }} -}} (expression)
18
- * {# {#- #} -#} (comment)
19
- *
20
- * {%- strips trailing whitespace from the preceding raw-text block.
21
- * -%} strips leading whitespace from the following raw-text block.
22
- * {%+ suppresses stripping even when lstrip_blocks is configured.
23
- */
24
-
25
- /// <reference types="tree-sitter-cli/dsl" />
26
- // @ts-check
27
-
28
- module.exports = grammar({
29
- name: 'ucode_tmpl',
30
-
31
- externals: $ => [
32
- $._raw_text, // literal text outside any tag
33
- $._stmt_code, // content between {% ... %} (before the closer)
34
- $._expr_code, // content between {{ ... }} (before the closer)
35
- $._comment_body, // content between {# ... #} (before the closer)
36
- $.eof_close, // emitted at EOF when inside a statement tag (implicit close)
37
- ],
38
-
39
- // The template scanner handles all whitespace explicitly; no implicit extras.
40
- extras: _ => [],
41
-
42
- rules: {
43
- document: $ => repeat(choice(
44
- alias($._raw_text, $.raw_text),
45
- $.statement_tag,
46
- $.expression_tag,
47
- $.comment_tag,
48
- )),
49
-
50
- statement_tag: $ => seq(
51
- field('open', choice('{%-', '{%+', '{%')),
52
- field('code', optional(alias($._stmt_code, $.code))),
53
- field('close', choice('-%}', '%}', $.eof_close)),
54
- ),
55
-
56
- expression_tag: $ => seq(
57
- field('open', choice('{{-', '{{')),
58
- field('code', optional(alias($._expr_code, $.code))),
59
- field('close', choice('-}}', '}}')),
60
- ),
61
-
62
- comment_tag: $ => seq(
63
- field('open', choice('{#-', '{#')),
64
- field('content', optional(alias($._comment_body, $.comment_content))),
65
- field('close', choice('-#}', '#}')),
66
- ),
67
- },
68
- });
@@ -1,4 +0,0 @@
1
- ; Fold queries for ucode_tmpl.
2
- ; Folds are not meaningful at the template document level since each tag is
3
- ; typically one or a few lines. Folding inside code blocks is handled by the
4
- ; injected ucode grammar.
@@ -1,23 +0,0 @@
1
- ; Highlight queries for ucode_tmpl.
2
- ; Template structure is styled here; ucode code inside tags is highlighted
3
- ; via language injection (see injections.scm).
4
-
5
- ; -------------------------------------------------------------------------
6
- ; Tag delimiters
7
- ; -------------------------------------------------------------------------
8
-
9
- [
10
- "{%" "%}"
11
- "{%-" "-%}"
12
- "{%+"
13
- "{{" "}}"
14
- "{{-" "-}}"
15
- "{#" "#}"
16
- "{#-" "-#}"
17
- ] @keyword
18
-
19
- ; -------------------------------------------------------------------------
20
- ; Template comments
21
- ; -------------------------------------------------------------------------
22
-
23
- (comment_tag) @comment @spell
@@ -1,5 +0,0 @@
1
- ; Indentation queries for ucode_tmpl.
2
- ; Raw text between tags is not indented by the template engine, so there are
3
- ; no template-level indent rules. Code inside statement_tag and expression_tag
4
- ; is parsed via language injection (see injections.scm) and indented according
5
- ; to the injected ucode grammar's indents.scm.
@@ -1,8 +0,0 @@
1
- ; Injection queries for ucode_tmpl.
2
- ; Instructs editors to parse code/expression block content as ucode.
3
-
4
- ((statement_tag (code) @injection.content)
5
- (#set! injection.language "ucode"))
6
-
7
- ((expression_tag (code) @injection.content)
8
- (#set! injection.language "ucode"))