@stacksjs/zig-dtsx 0.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,908 @@
1
+ /// Core TypeScript declaration scanner.
2
+ /// Single-pass character-by-character scanner that extracts declarations
3
+ /// and builds DTS text inline. Port of scanner.ts (~3200 lines).
4
+ const std = @import("std");
5
+ const ch = @import("char_utils.zig");
6
+ const types = @import("types.zig");
7
+ const Declaration = types.Declaration;
8
+ const DeclarationKind = types.DeclarationKind;
9
+ const Allocator = std.mem.Allocator;
10
+
11
+ /// Result of scanning: declarations + non-exported types
12
+ pub const ScanResult = struct {
13
+ declarations: std.array_list.Managed(Declaration),
14
+ non_exported_types: std.StringHashMap(Declaration),
15
+ };
16
+
17
+ /// The main scanner state
18
+ pub const Scanner = struct {
19
+ source: []const u8,
20
+ pos: usize,
21
+ len: usize,
22
+ allocator: Allocator,
23
+ declarations: std.array_list.Managed(Declaration),
24
+ non_exported_types: std.StringHashMap(Declaration),
25
+ func_body_indices: std.AutoHashMap(usize, void),
26
+ keep_comments: bool,
27
+ isolated_declarations: bool,
28
+
29
+ pub fn init(allocator: Allocator, source: []const u8, keep_comments: bool, isolated_declarations: bool) Scanner {
30
+ // Pre-size declarations: typical TS file has ~1 declaration per 200 bytes
31
+ var declarations = std.array_list.Managed(Declaration).init(allocator);
32
+ declarations.ensureTotalCapacity(@max(source.len / 200, 8)) catch {};
33
+ return .{
34
+ .source = source,
35
+ .pos = 0,
36
+ .len = source.len,
37
+ .allocator = allocator,
38
+ .declarations = declarations,
39
+ .non_exported_types = std.StringHashMap(Declaration).init(allocator),
40
+ .func_body_indices = std.AutoHashMap(usize, void).init(allocator),
41
+ .keep_comments = keep_comments,
42
+ .isolated_declarations = isolated_declarations,
43
+ };
44
+ }
45
+
46
+ pub fn deinit(self: *Scanner) void {
47
+ self.declarations.deinit();
48
+ self.non_exported_types.deinit();
49
+ self.func_body_indices.deinit();
50
+ }
51
+
52
+ // ========================================================================
53
+ // Primitive scanning helpers (port of scanner.ts lines 70-588)
54
+ // ========================================================================
55
+
56
+ /// Slice source[start..end) with leading/trailing whitespace trimmed
57
+ pub fn sliceTrimmed(self: *const Scanner, start: usize, end: usize) []const u8 {
58
+ return ch.sliceTrimmed(self.source, start, end);
59
+ }
60
+
61
+ /// Skip whitespace and comments (line and block)
62
+ pub fn skipWhitespaceAndComments(self: *Scanner) void {
63
+ if (self.pos >= self.len) return;
64
+ const first = self.source[self.pos];
65
+ if (first != ch.CH_SPACE and first != ch.CH_TAB and first != ch.CH_LF and first != ch.CH_CR and first != ch.CH_SLASH) return;
66
+
67
+ while (self.pos < self.len) {
68
+ // SIMD fast path: skip 16 whitespace bytes at a time
69
+ // All whitespace chars (space=32, tab=9, LF=10, CR=13) are <= 32;
70
+ // non-whitespace control chars (0-8, 14-31) are absent in TS source.
71
+ while (self.pos + 16 <= self.len) {
72
+ const chunk: @Vector(16, u8) = self.source[self.pos..][0..16].*;
73
+ if (@reduce(.And, chunk <= @as(@Vector(16, u8), @splat(32)))) {
74
+ self.pos += 16;
75
+ } else {
76
+ break;
77
+ }
78
+ }
79
+ if (self.pos >= self.len) break;
80
+
81
+ const c = self.source[self.pos];
82
+ if (c == ch.CH_SPACE or c == ch.CH_TAB or c == ch.CH_LF or c == ch.CH_CR) {
83
+ self.pos += 1;
84
+ continue;
85
+ }
86
+ if (c == ch.CH_SLASH and self.pos + 1 < self.len) {
87
+ const next = self.source[self.pos + 1];
88
+ if (next == ch.CH_SLASH) {
89
+ // Line comment
90
+ const nl = ch.indexOfChar(self.source, ch.CH_LF, self.pos + 2);
91
+ self.pos = if (nl) |n| n + 1 else self.len;
92
+ continue;
93
+ }
94
+ if (next == ch.CH_STAR) {
95
+ // Block comment
96
+ const end_idx = ch.indexOf(self.source, "*/", self.pos + 2);
97
+ self.pos = if (end_idx) |e| e + 2 else self.len;
98
+ continue;
99
+ }
100
+ }
101
+ break;
102
+ }
103
+ }
104
+
105
+ /// Skip past a quoted string (single or double quote).
106
+ /// Uses SIMD to find quote/backslash in 16-byte chunks.
107
+ pub fn skipString(self: *Scanner, quote: u8) void {
108
+ self.pos += 1; // skip opening quote
109
+ const src = self.source;
110
+ const len = self.len;
111
+ var pos = self.pos;
112
+
113
+ while (pos < len) {
114
+ // SIMD: scan 16 bytes at a time for quote or backslash
115
+ while (pos + 16 <= len) {
116
+ const chunk: @Vector(16, u8) = src[pos..][0..16].*;
117
+ const quote_vec: @Vector(16, u8) = @splat(quote);
118
+ const bs_vec: @Vector(16, u8) = @splat(ch.CH_BACKSLASH);
119
+ const match_mask = (chunk == quote_vec) | (chunk == bs_vec);
120
+ if (@reduce(.Or, match_mask)) {
121
+ // Found a quote or backslash in this chunk — find first match
122
+ const match_bits: u16 = @bitCast(match_mask);
123
+ const offset = @ctz(match_bits);
124
+ pos += offset;
125
+ break;
126
+ }
127
+ pos += 16;
128
+ }
129
+
130
+ // Scalar fallback for remaining bytes or after SIMD found a match
131
+ if (pos >= len) break;
132
+ const c = src[pos];
133
+ if (c == quote) {
134
+ self.pos = pos + 1;
135
+ return;
136
+ }
137
+ if (c == ch.CH_BACKSLASH) {
138
+ pos += 2; // skip escaped character
139
+ continue;
140
+ }
141
+ pos += 1;
142
+ }
143
+ self.pos = len;
144
+ }
145
+
146
+ /// Skip past a template literal (backtick string with ${} interpolation)
147
+ pub fn skipTemplateLiteral(self: *Scanner) void {
148
+ self.pos += 1; // skip opening backtick
149
+ var depth: usize = 0;
150
+ while (self.pos < self.len) {
151
+ const c = self.source[self.pos];
152
+ if (c == ch.CH_BACKSLASH) {
153
+ self.pos += 1;
154
+ if (self.pos < self.len) self.pos += 1;
155
+ continue;
156
+ }
157
+ if (c == ch.CH_BACKTICK and depth == 0) {
158
+ self.pos += 1;
159
+ return;
160
+ }
161
+ if (c == ch.CH_DOLLAR and self.pos + 1 < self.len and self.source[self.pos + 1] == ch.CH_LBRACE) {
162
+ self.pos += 2;
163
+ depth += 1;
164
+ continue;
165
+ }
166
+ if (c == ch.CH_RBRACE and depth > 0) {
167
+ depth -= 1;
168
+ self.pos += 1;
169
+ continue;
170
+ }
171
+ self.pos += 1;
172
+ }
173
+ }
174
+
175
+ /// Check if `/` at current pos starts a regex literal (not division)
176
+ pub fn isRegexStart(self: *const Scanner) bool {
177
+ var p: isize = @as(isize, @intCast(self.pos)) - 1;
178
+ while (p >= 0 and ch.isWhitespace(self.source[@intCast(p)])) p -= 1;
179
+ if (p < 0) return true; // start of file
180
+ const prev = self.source[@intCast(p)];
181
+ // After these chars, `/` starts a regex
182
+ if (prev == ch.CH_EQUAL or prev == ch.CH_LPAREN or prev == ch.CH_LBRACKET or
183
+ prev == ch.CH_EXCL or prev == ch.CH_AMP or prev == ch.CH_PIPE or
184
+ prev == ch.CH_QUESTION or prev == ch.CH_COLON or prev == ch.CH_COMMA or
185
+ prev == ch.CH_SEMI or prev == ch.CH_LBRACE or prev == ch.CH_RBRACE or
186
+ prev == ch.CH_CARET or prev == ch.CH_TILDE or
187
+ prev == ch.CH_PLUS or prev == ch.CH_MINUS or prev == ch.CH_STAR or
188
+ prev == ch.CH_PERCENT or prev == ch.CH_LANGLE or prev == ch.CH_RANGLE)
189
+ {
190
+ return true;
191
+ }
192
+ // After keywords like return, typeof, void, etc.
193
+ if (ch.isIdentChar(prev)) {
194
+ var wp = p;
195
+ while (wp >= 0 and ch.isIdentChar(self.source[@intCast(wp)])) wp -= 1;
196
+ const word_start: usize = @intCast(wp + 1);
197
+ const word_end: usize = @intCast(p + 1);
198
+ const word = self.source[word_start..word_end];
199
+ const keywords = [_][]const u8{ "return", "typeof", "void", "delete", "throw", "new", "in", "of", "case", "instanceof", "yield", "await" };
200
+ for (keywords) |kw| {
201
+ if (std.mem.eql(u8, word, kw)) return true;
202
+ }
203
+ }
204
+ return false;
205
+ }
206
+
207
+ /// Skip a regex literal /.../ including flags
208
+ pub fn skipRegex(self: *Scanner) void {
209
+ self.pos += 1; // skip opening /
210
+ var in_char_class = false;
211
+ while (self.pos < self.len) {
212
+ const c = self.source[self.pos];
213
+ if (c == ch.CH_BACKSLASH) {
214
+ self.pos += 2;
215
+ continue;
216
+ }
217
+ if (in_char_class) {
218
+ if (c == ch.CH_RBRACKET) in_char_class = false;
219
+ self.pos += 1;
220
+ continue;
221
+ }
222
+ if (c == ch.CH_LBRACKET) {
223
+ in_char_class = true;
224
+ self.pos += 1;
225
+ continue;
226
+ }
227
+ if (c == ch.CH_SLASH) {
228
+ self.pos += 1;
229
+ break;
230
+ }
231
+ if (c == ch.CH_LF or c == ch.CH_CR) break;
232
+ self.pos += 1;
233
+ }
234
+ // Skip flags
235
+ while (self.pos < self.len and ch.isIdentChar(self.source[self.pos])) self.pos += 1;
236
+ }
237
+
238
+ /// Skip past a string/comment/template literal if at one. Returns true if skipped.
239
+ pub fn skipNonCode(self: *Scanner) bool {
240
+ if (self.pos >= self.len) return false;
241
+ const c = self.source[self.pos];
242
+ if (c == ch.CH_SQUOTE or c == ch.CH_DQUOTE) {
243
+ self.skipString(c);
244
+ return true;
245
+ }
246
+ if (c == ch.CH_BACKTICK) {
247
+ self.skipTemplateLiteral();
248
+ return true;
249
+ }
250
+ if (c == ch.CH_SLASH and self.pos + 1 < self.len) {
251
+ const next = self.source[self.pos + 1];
252
+ if (next == ch.CH_SLASH) {
253
+ const nl = ch.indexOfChar(self.source, ch.CH_LF, self.pos + 2);
254
+ self.pos = if (nl) |n| n + 1 else self.len;
255
+ return true;
256
+ }
257
+ if (next == ch.CH_STAR) {
258
+ const end_idx = ch.indexOf(self.source, "*/", self.pos + 2);
259
+ self.pos = if (end_idx) |e| e + 2 else self.len;
260
+ return true;
261
+ }
262
+ if (self.isRegexStart()) {
263
+ self.skipRegex();
264
+ return true;
265
+ }
266
+ }
267
+ return false;
268
+ }
269
+
270
+ /// Read an identifier at current position
271
+ pub fn readIdent(self: *Scanner) []const u8 {
272
+ const start = self.pos;
273
+ while (self.pos < self.len and ch.isIdentChar(self.source[self.pos])) self.pos += 1;
274
+ return self.source[start..self.pos];
275
+ }
276
+
277
+ /// Check if source matches a word at pos (followed by non-ident char)
278
+ pub fn matchWord(self: *const Scanner, word: []const u8) bool {
279
+ if (self.pos + word.len > self.len) return false;
280
+ if (!std.mem.eql(u8, self.source[self.pos .. self.pos + word.len], word)) return false;
281
+ if (self.pos + word.len < self.len and ch.isIdentChar(self.source[self.pos + word.len])) return false;
282
+ return true;
283
+ }
284
+
285
+ /// Check if current position is at a top-level statement-starting keyword
286
+ pub fn isTopLevelKeyword(self: *const Scanner) bool {
287
+ if (self.pos >= self.len) return false;
288
+ const c = self.source[self.pos];
289
+ return switch (c) {
290
+ 'e' => self.matchWord("export") or self.matchWord("enum"),
291
+ 'i' => self.matchWord("import") or self.matchWord("interface"),
292
+ 'f' => self.matchWord("function"),
293
+ 'c' => self.matchWord("class") or self.matchWord("const"),
294
+ 't' => self.matchWord("type"),
295
+ 'l' => self.matchWord("let"),
296
+ 'v' => self.matchWord("var"),
297
+ 'd' => self.matchWord("declare") or self.matchWord("default"),
298
+ 'm' => self.matchWord("module"),
299
+ 'n' => self.matchWord("namespace"),
300
+ 'a' => self.matchWord("abstract") or self.matchWord("async"),
301
+ else => false,
302
+ };
303
+ }
304
+
305
+ /// Check for ASI boundary at top level
306
+ pub fn checkASITopLevel(self: *Scanner) bool {
307
+ if (self.pos >= self.len) return false;
308
+ const c = self.source[self.pos];
309
+ if (c != ch.CH_LF and c != ch.CH_CR) return false;
310
+ const saved = self.pos;
311
+ self.pos += 1;
312
+ if (c == ch.CH_CR and self.pos < self.len and self.source[self.pos] == ch.CH_LF) self.pos += 1;
313
+ while (self.pos < self.len) {
314
+ const sc = self.source[self.pos];
315
+ if (sc == ch.CH_SPACE or sc == ch.CH_TAB or sc == ch.CH_CR or sc == ch.CH_LF) {
316
+ self.pos += 1;
317
+ continue;
318
+ }
319
+ if (sc == ch.CH_SLASH and self.pos + 1 < self.len) {
320
+ const next = self.source[self.pos + 1];
321
+ if (next == ch.CH_SLASH) {
322
+ const nl = ch.indexOfChar(self.source, ch.CH_LF, self.pos + 2);
323
+ self.pos = if (nl) |n| n + 1 else self.len;
324
+ continue;
325
+ }
326
+ if (next == ch.CH_STAR) {
327
+ const end_idx = ch.indexOf(self.source, "*/", self.pos + 2);
328
+ self.pos = if (end_idx) |e| e + 2 else self.len;
329
+ continue;
330
+ }
331
+ }
332
+ break;
333
+ }
334
+ const result = self.pos >= self.len or self.isTopLevelKeyword() or self.source[self.pos] == ch.CH_RBRACE;
335
+ self.pos = saved;
336
+ return result;
337
+ }
338
+
339
+ /// Check for ASI boundary in class member context
340
+ pub fn checkASIMember(self: *Scanner) bool {
341
+ if (self.pos >= self.len) return false;
342
+ const c = self.source[self.pos];
343
+ if (c != ch.CH_LF and c != ch.CH_CR) return false;
344
+ const saved = self.pos;
345
+ self.pos += 1;
346
+ if (c == ch.CH_CR and self.pos < self.len and self.source[self.pos] == ch.CH_LF) self.pos += 1;
347
+ while (self.pos < self.len) {
348
+ const sc = self.source[self.pos];
349
+ if (sc == ch.CH_SPACE or sc == ch.CH_TAB or sc == ch.CH_CR or sc == ch.CH_LF) {
350
+ self.pos += 1;
351
+ continue;
352
+ }
353
+ if (sc == ch.CH_SLASH and self.pos + 1 < self.len and self.source[self.pos + 1] == ch.CH_SLASH) {
354
+ const nl = ch.indexOfChar(self.source, ch.CH_LF, self.pos + 2);
355
+ self.pos = if (nl) |n| n + 1 else self.len;
356
+ continue;
357
+ }
358
+ break;
359
+ }
360
+ if (self.pos >= self.len) {
361
+ self.pos = saved;
362
+ return true;
363
+ }
364
+ const nc = self.source[self.pos];
365
+ // Type continuation operators — NOT end of member
366
+ if (nc == ch.CH_PIPE or nc == ch.CH_AMP or nc == ch.CH_DOT or nc == ch.CH_QUESTION) {
367
+ self.pos = saved;
368
+ return false;
369
+ }
370
+ // Type continuation keywords
371
+ if (self.matchWord("extends") or self.matchWord("keyof") or self.matchWord("typeof") or
372
+ self.matchWord("infer") or self.matchWord("is") or self.matchWord("as") or self.matchWord("in"))
373
+ {
374
+ self.pos = saved;
375
+ return false;
376
+ }
377
+ self.pos = saved;
378
+ return true;
379
+ }
380
+
381
+ /// Check if > at current pos is part of => (arrow function)
382
+ pub fn isArrowGT(self: *const Scanner) bool {
383
+ return self.pos > 0 and self.source[self.pos - 1] == ch.CH_EQUAL;
384
+ }
385
+
386
+ /// Find matching closing bracket, respecting nesting and strings/comments.
387
+ /// Uses SIMD to skip 16 "boring" bytes (alphanumeric/spaces) at a time.
388
+ pub fn findMatchingClose(self: *Scanner, open: u8, close: u8) usize {
389
+ var depth: usize = 1;
390
+ self.pos += 1; // skip opening
391
+
392
+ while (self.pos < self.len) {
393
+ // SIMD fast-skip: skip 16 bytes at a time when none are structural chars.
394
+ // Structural chars: open/close brackets, quotes, slash, backtick.
395
+ // Most bytes in TS source are alphanumeric/spaces — skip them in bulk.
396
+ while (self.pos + 16 <= self.len) {
397
+ const chunk: @Vector(16, u8) = self.source[self.pos..][0..16].*;
398
+ const is_open = chunk == @as(@Vector(16, u8), @splat(open));
399
+ const is_close = chunk == @as(@Vector(16, u8), @splat(close));
400
+ const is_squote = chunk == @as(@Vector(16, u8), @splat(ch.CH_SQUOTE));
401
+ const is_dquote = chunk == @as(@Vector(16, u8), @splat(ch.CH_DQUOTE));
402
+ const is_btick = chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKTICK));
403
+ const is_slash = chunk == @as(@Vector(16, u8), @splat(ch.CH_SLASH));
404
+ const interesting = is_open | is_close | is_squote | is_dquote | is_btick | is_slash;
405
+ if (!@reduce(.Or, interesting)) {
406
+ self.pos += 16;
407
+ } else {
408
+ break;
409
+ }
410
+ }
411
+ if (self.pos >= self.len) break;
412
+
413
+ const c = self.source[self.pos];
414
+
415
+ // Handle string/comment/template delimiters
416
+ if (c == ch.CH_SQUOTE or c == ch.CH_DQUOTE) {
417
+ self.skipString(c);
418
+ continue;
419
+ }
420
+ if (c == ch.CH_BACKTICK) {
421
+ self.skipTemplateLiteral();
422
+ continue;
423
+ }
424
+ if (c == ch.CH_SLASH and self.pos + 1 < self.len) {
425
+ // Inline slash handling — avoid skipNonCode overhead
426
+ const next = self.source[self.pos + 1];
427
+ if (next == ch.CH_SLASH) {
428
+ const nl = ch.indexOfChar(self.source, ch.CH_LF, self.pos + 2);
429
+ self.pos = if (nl) |n| n + 1 else self.len;
430
+ continue;
431
+ }
432
+ if (next == ch.CH_STAR) {
433
+ const end_idx = ch.indexOf(self.source, "*/", self.pos + 2);
434
+ self.pos = if (end_idx) |e| e + 2 else self.len;
435
+ continue;
436
+ }
437
+ if (self.isRegexStart()) {
438
+ self.skipRegex();
439
+ continue;
440
+ }
441
+ }
442
+
443
+ if (c == open) {
444
+ depth += 1;
445
+ } else if (c == close) {
446
+ // Don't match > that's part of =>
447
+ if (close == ch.CH_RANGLE and self.pos > 0 and self.source[self.pos - 1] == ch.CH_EQUAL) {
448
+ self.pos += 1;
449
+ continue;
450
+ }
451
+ depth -= 1;
452
+ if (depth == 0) {
453
+ self.pos += 1;
454
+ return self.pos;
455
+ }
456
+ }
457
+ self.pos += 1;
458
+ }
459
+ return self.pos;
460
+ }
461
+
462
+ /// Skip to statement end (semicolon at depth 0, matching brace, or ASI).
463
+ /// Uses SIMD to skip 16 non-structural bytes at a time.
464
+ pub fn skipToStatementEnd(self: *Scanner) void {
465
+ var brace_depth: isize = 0;
466
+ while (self.pos < self.len) {
467
+ // SIMD fast-skip: bulk-skip bytes that can't be structural.
468
+ if (brace_depth > 0) {
469
+ // Inside braces: look for { } ' " ` /
470
+ while (self.pos + 16 <= self.len) {
471
+ const chunk: @Vector(16, u8) = self.source[self.pos..][0..16].*;
472
+ const interesting = (chunk == @as(@Vector(16, u8), @splat(ch.CH_LBRACE))) |
473
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_RBRACE))) |
474
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_SQUOTE))) |
475
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_DQUOTE))) |
476
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKTICK))) |
477
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_SLASH)));
478
+ if (!@reduce(.Or, interesting)) {
479
+ self.pos += 16;
480
+ } else {
481
+ break;
482
+ }
483
+ }
484
+ } else {
485
+ // At depth 0: also look for ; \n \r (statement terminators)
486
+ while (self.pos + 16 <= self.len) {
487
+ const chunk: @Vector(16, u8) = self.source[self.pos..][0..16].*;
488
+ const interesting = (chunk == @as(@Vector(16, u8), @splat(ch.CH_LBRACE))) |
489
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_RBRACE))) |
490
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_SQUOTE))) |
491
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_DQUOTE))) |
492
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKTICK))) |
493
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_SLASH))) |
494
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_SEMI))) |
495
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_LF))) |
496
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_CR)));
497
+ if (!@reduce(.Or, interesting)) {
498
+ self.pos += 16;
499
+ } else {
500
+ break;
501
+ }
502
+ }
503
+ }
504
+ if (self.pos >= self.len) break;
505
+
506
+ const c = self.source[self.pos];
507
+
508
+ if (c == ch.CH_SQUOTE or c == ch.CH_DQUOTE) {
509
+ self.skipString(c);
510
+ continue;
511
+ }
512
+ if (c == ch.CH_BACKTICK) {
513
+ self.skipTemplateLiteral();
514
+ continue;
515
+ }
516
+ if (c == ch.CH_SLASH) {
517
+ if (self.skipNonCode()) continue;
518
+ self.pos += 1;
519
+ continue;
520
+ }
521
+ if (c == ch.CH_LBRACE) {
522
+ brace_depth += 1;
523
+ self.pos += 1;
524
+ continue;
525
+ }
526
+ if (c == ch.CH_RBRACE) {
527
+ brace_depth -= 1;
528
+ if (brace_depth <= 0) {
529
+ self.pos += 1;
530
+ return;
531
+ }
532
+ self.pos += 1;
533
+ continue;
534
+ }
535
+ if (c == ch.CH_SEMI and brace_depth == 0) {
536
+ self.pos += 1;
537
+ return;
538
+ }
539
+ if ((c == ch.CH_LF or c == ch.CH_CR) and brace_depth == 0) {
540
+ if (self.checkASITopLevel()) return;
541
+ }
542
+ self.pos += 1;
543
+ }
544
+ }
545
+
546
+ /// Skip export braces: { ... } [from '...'] [;]
547
+ pub fn skipExportBraces(self: *Scanner) void {
548
+ _ = self.findMatchingClose(ch.CH_LBRACE, ch.CH_RBRACE);
549
+ while (self.pos < self.len and ch.isWhitespace(self.source[self.pos])) self.pos += 1;
550
+ if (self.matchWord("from")) {
551
+ self.pos += 4;
552
+ while (self.pos < self.len and ch.isWhitespace(self.source[self.pos])) self.pos += 1;
553
+ if (self.pos < self.len) {
554
+ const qc = self.source[self.pos];
555
+ if (qc == ch.CH_SQUOTE or qc == ch.CH_DQUOTE) self.skipString(qc);
556
+ }
557
+ }
558
+ while (self.pos < self.len and (self.source[self.pos] == ch.CH_SPACE or self.source[self.pos] == ch.CH_TAB)) self.pos += 1;
559
+ if (self.pos < self.len and self.source[self.pos] == ch.CH_SEMI) self.pos += 1;
560
+ }
561
+
562
+ /// Skip export star: * [as name] from '...' [;]
563
+ pub fn skipExportStar(self: *Scanner) void {
564
+ self.pos += 1; // skip *
565
+ while (self.pos < self.len and ch.isWhitespace(self.source[self.pos])) self.pos += 1;
566
+ if (self.matchWord("as")) {
567
+ self.pos += 2;
568
+ while (self.pos < self.len and ch.isWhitespace(self.source[self.pos])) self.pos += 1;
569
+ _ = self.readIdent();
570
+ while (self.pos < self.len and ch.isWhitespace(self.source[self.pos])) self.pos += 1;
571
+ }
572
+ if (self.matchWord("from")) {
573
+ self.pos += 4;
574
+ while (self.pos < self.len and ch.isWhitespace(self.source[self.pos])) self.pos += 1;
575
+ if (self.pos < self.len) {
576
+ const qc = self.source[self.pos];
577
+ if (qc == ch.CH_SQUOTE or qc == ch.CH_DQUOTE) self.skipString(qc);
578
+ }
579
+ }
580
+ while (self.pos < self.len and (self.source[self.pos] == ch.CH_SPACE or self.source[self.pos] == ch.CH_TAB)) self.pos += 1;
581
+ if (self.pos < self.len and self.source[self.pos] == ch.CH_SEMI) self.pos += 1;
582
+ }
583
+
584
+ /// Peek at what char comes after a word (skipping whitespace)
585
+ pub fn peekAfterWord(self: *const Scanner, word: []const u8) u8 {
586
+ var p = self.pos + word.len;
587
+ while (p < self.len and ch.isWhitespace(self.source[p])) p += 1;
588
+ return if (p < self.len) self.source[p] else 0;
589
+ }
590
+
591
+ /// Peek ahead to check if word2 follows word1
592
+ pub fn peekAfterKeyword(self: *const Scanner, word1: []const u8, word2: []const u8) bool {
593
+ var p = self.pos + word1.len;
594
+ while (p < self.len and ch.isWhitespace(self.source[p])) p += 1;
595
+ if (p + word2.len > self.len) return false;
596
+ if (!std.mem.eql(u8, self.source[p .. p + word2.len], word2)) return false;
597
+ return p + word2.len >= self.len or !ch.isIdentChar(self.source[p + word2.len]);
598
+ }
599
+
600
+ /// Extract a brace-enclosed block as text from current position
601
+ pub fn extractBraceBlock(self: *Scanner) []const u8 {
602
+ const block_start = self.pos;
603
+ _ = self.findMatchingClose(ch.CH_LBRACE, ch.CH_RBRACE);
604
+ return self.source[block_start..self.pos];
605
+ }
606
+
607
+ /// Read a member name (identifier, computed property [expr], or #private)
608
+ pub fn readMemberName(self: *Scanner) []const u8 {
609
+ if (self.pos >= self.len) return "";
610
+ const c = self.source[self.pos];
611
+ if (c == ch.CH_LBRACKET) {
612
+ const start = self.pos;
613
+ _ = self.findMatchingClose(ch.CH_LBRACKET, ch.CH_RBRACKET);
614
+ return self.source[start..self.pos];
615
+ }
616
+ if (c == ch.CH_HASH) {
617
+ self.pos += 1;
618
+ const ident = self.readIdent();
619
+ // Return "#name" — we need to allocate this
620
+ const result = self.allocator.alloc(u8, 1 + ident.len) catch return "";
621
+ result[0] = '#';
622
+ @memcpy(result[1..], ident);
623
+ return result;
624
+ }
625
+ return self.readIdent();
626
+ }
627
+
628
+ /// Skip a class member (to next member boundary)
629
+ pub fn skipClassMember(self: *Scanner) void {
630
+ var depth: isize = 0;
631
+ while (self.pos < self.len) {
632
+ if (self.skipNonCode()) continue;
633
+ const c = self.source[self.pos];
634
+ if (c == ch.CH_LBRACE or c == ch.CH_LPAREN) {
635
+ depth += 1;
636
+ } else if (c == ch.CH_RBRACE or c == ch.CH_RPAREN) {
637
+ if (depth == 0) return;
638
+ depth -= 1;
639
+ } else if (c == ch.CH_SEMI and depth == 0) {
640
+ self.pos += 1;
641
+ return;
642
+ }
643
+ if (depth == 0 and self.checkASIMember()) return;
644
+ self.pos += 1;
645
+ }
646
+ }
647
+
648
+ // ========================================================================
649
+ // Public scan entry point
650
+ // ========================================================================
651
+
652
+ /// Scan TypeScript source and extract all declarations
653
+ pub fn scan(self: *Scanner) !ScanResult {
654
+ // Skip BOM
655
+ if (self.pos < self.len and self.source[0] >= 0xEF) {
656
+ // UTF-8 BOM is EF BB BF
657
+ if (self.len >= 3 and self.source[0] == 0xEF and self.source[1] == 0xBB and self.source[2] == 0xBF) {
658
+ self.pos = 3;
659
+ }
660
+ }
661
+
662
+ // Main scan loop — delegate to scan_loop module
663
+ try @import("scan_loop.zig").scanMainLoop(self);
664
+
665
+ // Post-process: resolve referenced non-exported types
666
+ if (self.non_exported_types.count() > 0) {
667
+ resolveReferencedTypes(&self.declarations, &self.non_exported_types);
668
+ }
669
+
670
+ // Post-process: remove implementation signatures of overloaded functions
671
+ if (self.func_body_indices.count() > 0) {
672
+ removeOverloadImplementations(self);
673
+ }
674
+
675
+ return .{
676
+ .declarations = self.declarations,
677
+ .non_exported_types = self.non_exported_types,
678
+ };
679
+ }
680
+ };
681
+
682
+ // ========================================================================
683
+ // Post-processing helpers (outside Scanner for clarity)
684
+ // ========================================================================
685
+
686
+ /// Check if name appears as a whole word in text (fast indexOf + boundary check)
687
+ pub fn isWordInText(name: []const u8, text: []const u8) bool {
688
+ var search_from: usize = 0;
689
+ while (search_from < text.len) {
690
+ const idx = ch.indexOf(text, name, search_from) orelse return false;
691
+ const before: u8 = if (idx > 0) text[idx - 1] else ' ';
692
+ const after: u8 = if (idx + name.len < text.len) text[idx + name.len] else ' ';
693
+ const before_ok = !ch.isIdentChar(before);
694
+ const after_ok = !ch.isIdentChar(after);
695
+ if (before_ok and after_ok) return true;
696
+ search_from = idx + 1;
697
+ }
698
+ return false;
699
+ }
700
+
701
+ /// Extract all identifier words from text into a HashSet (single pass, O(n))
702
+ fn extractWordsFromText(alloc: std.mem.Allocator, text: []const u8) std.StringHashMap(void) {
703
+ var words = std.StringHashMap(void).init(alloc);
704
+ var i: usize = 0;
705
+ while (i < text.len) {
706
+ const c = text[i];
707
+ if (ch.isIdentStart(c)) {
708
+ const start = i;
709
+ i += 1;
710
+ while (i < text.len and ch.isIdentChar(text[i])) i += 1;
711
+ words.put(text[start..i], {}) catch {};
712
+ } else {
713
+ i += 1;
714
+ }
715
+ }
716
+ return words;
717
+ }
718
+
719
+ /// Resolve non-exported types that are referenced by exported declarations
720
+ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), non_exported_types: *std.StringHashMap(Declaration)) void {
721
+ var resolved = std.StringHashMap(void).init(declarations.allocator);
722
+ defer resolved.deinit();
723
+ var decl_names = std.StringHashMap(void).init(declarations.allocator);
724
+ defer decl_names.deinit();
725
+
726
+ for (declarations.items) |d| {
727
+ decl_names.put(d.name, {}) catch {};
728
+ }
729
+
730
+ // Collect text parts for searching
731
+ var text_parts = std.array_list.Managed([]const u8).init(declarations.allocator);
732
+ text_parts.ensureTotalCapacity(declarations.items.len) catch {};
733
+ defer text_parts.deinit();
734
+ for (declarations.items) |d| {
735
+ if (d.kind != .import_decl) {
736
+ text_parts.append(d.text) catch {};
737
+ }
738
+ }
739
+
740
+ var word_set = std.StringHashMap(void).init(declarations.allocator);
741
+ defer word_set.deinit();
742
+
743
+ // Track how far we've extracted words — only process new text_parts each iteration
744
+ var words_extracted_up_to: usize = 0;
745
+
746
+ while (true) {
747
+ // Incrementally extract words from only the NEW text parts
748
+ for (text_parts.items[words_extracted_up_to..]) |part| {
749
+ var i: usize = 0;
750
+ while (i < part.len) {
751
+ if (ch.isIdentStart(part[i])) {
752
+ const start = i;
753
+ i += 1;
754
+ while (i < part.len and ch.isIdentChar(part[i])) i += 1;
755
+ word_set.put(part[start..i], {}) catch {};
756
+ } else {
757
+ i += 1;
758
+ }
759
+ }
760
+ }
761
+ words_extracted_up_to = text_parts.items.len;
762
+
763
+ var to_insert = std.array_list.Managed(Declaration).init(declarations.allocator);
764
+ to_insert.ensureTotalCapacity(non_exported_types.count()) catch {};
765
+ defer to_insert.deinit();
766
+
767
+ var it = non_exported_types.iterator();
768
+ while (it.next()) |entry| {
769
+ const name = entry.key_ptr.*;
770
+ if (resolved.contains(name)) continue;
771
+
772
+ // O(1) lookup instead of O(n*m) text scanning
773
+ if (word_set.contains(name)) {
774
+ if (!decl_names.contains(name)) {
775
+ to_insert.append(entry.value_ptr.*) catch {};
776
+ decl_names.put(name, {}) catch {};
777
+ }
778
+ resolved.put(name, {}) catch {};
779
+ }
780
+ }
781
+
782
+ if (to_insert.items.len == 0) break;
783
+
784
+ // Sort by start position
785
+ std.mem.sort(Declaration, to_insert.items, {}, struct {
786
+ fn cmp(_: void, a: Declaration, b: Declaration) bool {
787
+ return a.start < b.start;
788
+ }
789
+ }.cmp);
790
+
791
+ // Merge at correct source positions
792
+ var merged = std.array_list.Managed(Declaration).init(declarations.allocator);
793
+ merged.ensureTotalCapacity(declarations.items.len + to_insert.items.len) catch {};
794
+ var ti: usize = 0;
795
+ for (declarations.items) |d| {
796
+ while (ti < to_insert.items.len and to_insert.items[ti].start <= d.start) {
797
+ merged.append(to_insert.items[ti]) catch {};
798
+ ti += 1;
799
+ }
800
+ merged.append(d) catch {};
801
+ }
802
+ while (ti < to_insert.items.len) {
803
+ merged.append(to_insert.items[ti]) catch {};
804
+ ti += 1;
805
+ }
806
+
807
+ declarations.clearRetainingCapacity();
808
+ declarations.appendSlice(merged.items) catch {};
809
+ merged.deinit();
810
+
811
+ // Add new texts to search
812
+ for (to_insert.items) |d| {
813
+ if (d.kind != .import_decl) {
814
+ text_parts.append(d.text) catch {};
815
+ }
816
+ }
817
+ }
818
+ }
819
+
820
+ /// Remove implementation signatures of overloaded functions
821
+ fn removeOverloadImplementations(scanner: *Scanner) void {
822
+ // Count function names
823
+ var func_name_counts = std.StringHashMap(usize).init(scanner.allocator);
824
+ defer func_name_counts.deinit();
825
+
826
+ for (scanner.declarations.items) |d| {
827
+ if (d.kind == .function_decl) {
828
+ const entry = func_name_counts.getOrPut(d.name) catch continue;
829
+ if (!entry.found_existing) {
830
+ entry.value_ptr.* = 1;
831
+ } else {
832
+ entry.value_ptr.* += 1;
833
+ }
834
+ }
835
+ }
836
+
837
+ // Find overloaded names (count > 1)
838
+ var overloaded = std.StringHashMap(void).init(scanner.allocator);
839
+ defer overloaded.deinit();
840
+ var it = func_name_counts.iterator();
841
+ while (it.next()) |entry| {
842
+ if (entry.value_ptr.* > 1) {
843
+ overloaded.put(entry.key_ptr.*, {}) catch {};
844
+ }
845
+ }
846
+
847
+ if (overloaded.count() == 0) return;
848
+
849
+ // Find last body-bearing index for each overloaded name and remove them
850
+ var to_remove = std.AutoHashMap(usize, void).init(scanner.allocator);
851
+ defer to_remove.deinit();
852
+
853
+ var oit = overloaded.iterator();
854
+ while (oit.next()) |entry| {
855
+ const name = entry.key_ptr.*;
856
+ // Walk backwards
857
+ var i: usize = scanner.declarations.items.len;
858
+ while (i > 0) {
859
+ i -= 1;
860
+ const d = scanner.declarations.items[i];
861
+ if (d.kind == .function_decl and std.mem.eql(u8, d.name, name) and scanner.func_body_indices.contains(i)) {
862
+ to_remove.put(i, {}) catch {};
863
+ break;
864
+ }
865
+ }
866
+ }
867
+
868
+ if (to_remove.count() == 0) return;
869
+
870
+ // Filter in single pass — O(n) instead of O(k*n)
871
+ var write: usize = 0;
872
+ for (scanner.declarations.items, 0..) |d, i| {
873
+ if (!to_remove.contains(i)) {
874
+ scanner.declarations.items[write] = d;
875
+ write += 1;
876
+ }
877
+ }
878
+ scanner.declarations.shrinkRetainingCapacity(write);
879
+ }
880
+
881
+ // Tests
882
+ test "isWordInText" {
883
+ try std.testing.expect(isWordInText("Foo", "extends Foo {"));
884
+ try std.testing.expect(!isWordInText("Foo", "extends FooBar {"));
885
+ try std.testing.expect(isWordInText("Bar", "type: Bar;"));
886
+ try std.testing.expect(!isWordInText("Bar", "type: BarBaz;"));
887
+ }
888
+
889
+ test "scanner skipString" {
890
+ var s = Scanner.init(std.testing.allocator, "'hello world' rest", true, false);
891
+ defer s.deinit();
892
+ s.skipString(ch.CH_SQUOTE);
893
+ try std.testing.expectEqual(@as(usize, 13), s.pos);
894
+ }
895
+
896
+ test "scanner matchWord" {
897
+ var s = Scanner.init(std.testing.allocator, "export const x", true, false);
898
+ defer s.deinit();
899
+ try std.testing.expect(s.matchWord("export"));
900
+ try std.testing.expect(!s.matchWord("expo"));
901
+ }
902
+
903
+ test "scanner skipWhitespaceAndComments" {
904
+ var s = Scanner.init(std.testing.allocator, " // comment\n hello", true, false);
905
+ defer s.deinit();
906
+ s.skipWhitespaceAndComments();
907
+ try std.testing.expectEqualStrings("hello", s.source[s.pos .. s.pos + 5]);
908
+ }