tree-sitter-ucode 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +49 -58
  2. package/grammar.js +214 -28
  3. package/markup/grammar.js +1057 -0
  4. package/markup/queries/folds.scm +20 -0
  5. package/markup/queries/highlights.scm +38 -0
  6. package/markup/queries/indents.scm +51 -0
  7. package/markup/queries/injections.scm +40 -0
  8. package/markup/queries/locals.scm +107 -0
  9. package/markup/queries/tags.scm +65 -0
  10. package/markup/queries/textobjects.scm +56 -0
  11. package/markup/src/grammar.json +5786 -0
  12. package/markup/src/node-types.json +3211 -0
  13. package/markup/src/parser.c +134461 -0
  14. package/markup/src/scanner.c +22 -0
  15. package/package.json +8 -7
  16. package/prebuilds/darwin-arm64/tree-sitter-ucode.node +0 -0
  17. package/prebuilds/linux-arm64/tree-sitter-ucode.node +0 -0
  18. package/prebuilds/linux-x64/tree-sitter-ucode.node +0 -0
  19. package/prebuilds/win32-x64/tree-sitter-ucode.node +0 -0
  20. package/queries/folds.scm +38 -0
  21. package/queries/highlights.scm +6 -0
  22. package/queries/indents.scm +63 -0
  23. package/queries/locals.scm +1 -0
  24. package/queries/textobjects.scm +84 -0
  25. package/scripts/generate-markup-grammar.js +93 -0
  26. package/src/grammar.json +1069 -226
  27. package/src/node-types.json +662 -8
  28. package/src/parser.c +106401 -25117
  29. package/src/scanner.c +16 -193
  30. package/src/scanner_impl.h +494 -0
  31. package/tree-sitter-ucode.wasm +0 -0
  32. package/tree-sitter-ucode_markup.wasm +0 -0
  33. package/tree-sitter.json +24 -12
  34. package/tmpl/grammar.js +0 -67
  35. package/tmpl/queries/highlights.scm +0 -23
  36. package/tmpl/queries/injections.scm +0 -8
  37. package/tmpl/queries/locals.scm +0 -3
  38. package/tmpl/src/grammar.json +0 -243
  39. package/tmpl/src/node-types.json +0 -230
  40. package/tmpl/src/parser.c +0 -707
  41. package/tmpl/src/scanner.c +0 -169
  42. package/tree-sitter-ucode_tmpl.wasm +0 -0
  43. /package/{tmpl → markup}/src/tree_sitter/alloc.h +0 -0
  44. /package/{tmpl → markup}/src/tree_sitter/array.h +0 -0
  45. /package/{tmpl → markup}/src/tree_sitter/parser.h +0 -0
package/src/scanner.c CHANGED
@@ -1,199 +1,22 @@
1
- #include "tree_sitter/parser.h"
2
- #include <wctype.h>
1
+ /*
2
+ * External scanner for the ucode grammar.
3
+ *
4
+ * All implementation lives in scanner_impl.h (static functions). This file
5
+ * only exports the five tree_sitter_ucode_external_scanner_* entry points
6
+ * that the generated parser expects.
7
+ */
3
8
 
4
- // Must match the order of externals in grammar.js
5
- enum TokenType {
6
- AUTOMATIC_SEMICOLON,
7
- TEMPLATE_CHARS,
8
- TERNARY_QMARK,
9
- };
9
+ #include "scanner_impl.h"
10
10
 
11
11
  void *tree_sitter_ucode_external_scanner_create(void) { return NULL; }
12
- void tree_sitter_ucode_external_scanner_destroy(void *p) { (void)p; }
13
- unsigned tree_sitter_ucode_external_scanner_serialize(void *p, char *buf) { (void)p; (void)buf; return 0; }
14
- void tree_sitter_ucode_external_scanner_deserialize(void *p, const char *buf, unsigned n) { (void)p; (void)buf; (void)n; }
15
-
16
- static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
17
- static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
18
-
19
- static bool scan_template_chars(TSLexer *lexer) {
20
- lexer->result_symbol = TEMPLATE_CHARS;
21
- for (bool has_content = false;; has_content = true) {
22
- lexer->mark_end(lexer);
23
- switch (lexer->lookahead) {
24
- case '`': return has_content;
25
- case '\0': return false;
26
- case '$':
27
- advance(lexer);
28
- if (lexer->lookahead == '{') return has_content;
29
- break;
30
- case '\\': return has_content;
31
- default: advance(lexer);
32
- }
33
- }
34
- }
35
-
36
- // Implements the three ASI rules from ECMA-262 §12.10 (Automatic Semicolon
37
- // Insertion): insert before a token that the grammar doesn't allow if (1) a
38
- // line terminator precedes it, (2) it is `}`, or (3) the input is exhausted.
39
- static bool scan_automatic_semicolon(TSLexer *lexer) {
40
- lexer->result_symbol = AUTOMATIC_SEMICOLON;
41
- lexer->mark_end(lexer);
42
-
43
- // EOF is always a valid semicolon position
44
- if (lexer->lookahead == 0) return true;
45
-
46
- // `}` closes the current block — valid semicolon
47
- if (lexer->lookahead == '}') return true;
48
-
49
- // Skip whitespace and comments looking for a line terminator.
50
- // Per the ECMAScript spec, a line terminator inside a block comment
51
- // counts as a line terminator for ASI purposes.
52
- for (;;) {
53
- if (lexer->lookahead == 0) return true;
54
- if (lexer->lookahead == '}') return true;
55
-
56
- if (lexer->lookahead == '\r' || lexer->lookahead == '\n' ||
57
- lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
58
- skip(lexer);
59
- break;
60
- }
61
-
62
- // Skip inline whitespace (any Unicode space that is not a line terminator)
63
- if (iswspace(lexer->lookahead)) {
64
- skip(lexer);
65
- continue;
66
- }
67
-
68
- // Skip line comment or block comment
69
- if (lexer->lookahead == '/') {
70
- skip(lexer);
71
- if (lexer->lookahead == '/') {
72
- skip(lexer);
73
- while (lexer->lookahead != 0 && lexer->lookahead != '\r' &&
74
- lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
75
- lexer->lookahead != 0x2029) {
76
- skip(lexer);
77
- }
78
- continue;
79
- }
80
- if (lexer->lookahead == '*') {
81
- skip(lexer);
82
- bool has_newline = false;
83
- while (lexer->lookahead != 0) {
84
- if (lexer->lookahead == '\r' || lexer->lookahead == '\n' ||
85
- lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
86
- has_newline = true;
87
- skip(lexer);
88
- } else if (lexer->lookahead == '*') {
89
- skip(lexer);
90
- if (lexer->lookahead == '/') { skip(lexer); break; }
91
- } else {
92
- skip(lexer);
93
- }
94
- }
95
- if (has_newline) break;
96
- continue;
97
- }
98
- // Not a comment — division or start of something else, no ASI
99
- return false;
100
- }
101
-
102
- // Any other non-whitespace on same line: no ASI
103
- return false;
104
- }
105
-
106
- // We found a line terminator. Skip whitespace and comments before
107
- // checking whether the next token could continue the expression.
108
- for (;;) {
109
- if (lexer->lookahead == 0) return true;
110
-
111
- if (iswspace(lexer->lookahead)) {
112
- skip(lexer);
113
- continue;
114
- }
115
-
116
- if (lexer->lookahead == '/') {
117
- skip(lexer);
118
- if (lexer->lookahead == '/') {
119
- skip(lexer);
120
- while (lexer->lookahead != 0 && lexer->lookahead != '\r' &&
121
- lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
122
- lexer->lookahead != 0x2029) {
123
- skip(lexer);
124
- }
125
- continue;
126
- }
127
- if (lexer->lookahead == '*') {
128
- skip(lexer);
129
- while (lexer->lookahead != 0) {
130
- if (lexer->lookahead == '*') {
131
- skip(lexer);
132
- if (lexer->lookahead == '/') { skip(lexer); break; }
133
- } else {
134
- skip(lexer);
135
- }
136
- }
137
- continue;
138
- }
139
- // Not a comment — treat like any other '/'
140
- return false;
141
- }
142
-
143
- break;
144
- }
145
-
146
- // Tokens that can continue the prior expression suppress ASI;
147
- // anything else (identifier, keyword, number, …) gets one inserted.
148
- switch (lexer->lookahead) {
149
- case '(': case '[': case '`':
150
- case '.': case ',': case ';':
151
- case '+': case '-': case '*': case '%':
152
- case '=': case '<': case '>': case '!': case '~':
153
- case '&': case '|': case '^': case '?':
154
- return false;
155
- default:
156
- return true;
157
- }
12
+ void tree_sitter_ucode_external_scanner_destroy(void *p) { (void)p; }
13
+ unsigned tree_sitter_ucode_external_scanner_serialize(void *p, char *b) { (void)p; (void)b; return 0; }
14
+ void tree_sitter_ucode_external_scanner_deserialize(void *p, const char *b, unsigned n) {
15
+ (void)p; (void)b; (void)n;
158
16
  }
159
17
 
160
- static bool scan_ternary_qmark(TSLexer *lexer) {
161
- while (lexer->lookahead != '\r' && lexer->lookahead != '\n' &&
162
- lexer->lookahead != 0x2028 && lexer->lookahead != 0x2029 &&
163
- iswspace(lexer->lookahead)) skip(lexer);
164
-
165
- if (lexer->lookahead != '?') return false;
166
- advance(lexer);
167
-
168
- // `??` is nullish coalescing, not ternary
169
- if (lexer->lookahead == '?') return false;
170
-
171
- lexer->mark_end(lexer);
172
- lexer->result_symbol = TERNARY_QMARK;
173
-
174
- // `?.` followed by digit is ternary (not optional chain)
175
- if (lexer->lookahead == '.') {
176
- advance(lexer);
177
- return iswdigit(lexer->lookahead);
178
- }
179
-
180
- return true;
181
- }
182
-
183
- bool tree_sitter_ucode_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
184
- if (valid_symbols[TEMPLATE_CHARS] && !valid_symbols[AUTOMATIC_SEMICOLON]) {
185
- return scan_template_chars(lexer);
186
- }
187
-
188
- if (valid_symbols[AUTOMATIC_SEMICOLON]) {
189
- if (scan_automatic_semicolon(lexer)) return true;
190
- if (valid_symbols[TERNARY_QMARK]) return scan_ternary_qmark(lexer);
191
- return false;
192
- }
193
-
194
- if (valid_symbols[TERNARY_QMARK]) {
195
- return scan_ternary_qmark(lexer);
196
- }
197
-
198
- return false;
18
+ bool tree_sitter_ucode_external_scanner_scan(
19
+ void *payload, TSLexer *lexer, const bool *valid_symbols
20
+ ) {
21
+ return ucode_scanner_scan(payload, lexer, valid_symbols);
199
22
  }