@stacksjs/zig-dtsx 0.9.13 → 0.9.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -0
- package/build.zig +1 -1
- package/package.json +2 -2
- package/src/char_utils.zig +78 -12
- package/src/emitter.zig +324 -179
- package/src/extractors.zig +724 -404
- package/src/lib.zig +35 -8
- package/src/main.zig +108 -77
- package/src/scan_loop.zig +101 -65
- package/src/scanner.zig +293 -106
- package/src/type_inference.zig +215 -130
- package/test/zig-dtsx.test.ts +5 -1
- package/zig-out/bin/zig-dtsx +0 -0
- package/zig-out/bin/zig-dtsx.exe +0 -0
package/src/scanner.zig
CHANGED
|
@@ -8,6 +8,23 @@ const Declaration = types.Declaration;
|
|
|
8
8
|
const DeclarationKind = types.DeclarationKind;
|
|
9
9
|
const Allocator = std.mem.Allocator;
|
|
10
10
|
|
|
11
|
+
/// Comptime-optimized word comparison: uses integer casts for power-of-2 sizes (1/2/4/8 bytes).
|
|
12
|
+
inline fn comptime_match(comptime N: usize, src: *const [N]u8, comptime word: []const u8) bool {
|
|
13
|
+
// Use integer comparison for power-of-2 sizes (u8, u16, u32, u64)
|
|
14
|
+
if (N == 1 or N == 2 or N == 4 or N == 8) {
|
|
15
|
+
const T = std.meta.Int(.unsigned, N * 8);
|
|
16
|
+
const expected: T = comptime blk: {
|
|
17
|
+
var val: T = 0;
|
|
18
|
+
for (word, 0..) |b, i| {
|
|
19
|
+
val |= @as(T, b) << @intCast(i * 8);
|
|
20
|
+
}
|
|
21
|
+
break :blk val;
|
|
22
|
+
};
|
|
23
|
+
return @as(*align(1) const T, @ptrCast(src)).* == expected;
|
|
24
|
+
}
|
|
25
|
+
return std.mem.eql(u8, src, word);
|
|
26
|
+
}
|
|
27
|
+
|
|
11
28
|
/// Result of scanning: declarations + non-exported types
|
|
12
29
|
pub const ScanResult = struct {
|
|
13
30
|
declarations: std.array_list.Managed(Declaration),
|
|
@@ -21,8 +38,12 @@ pub const Scanner = struct {
|
|
|
21
38
|
len: usize,
|
|
22
39
|
allocator: Allocator,
|
|
23
40
|
declarations: std.array_list.Managed(Declaration),
|
|
41
|
+
/// Lazy-initialized: only allocated on first insertion to avoid overhead for
|
|
42
|
+
/// files that have no non-exported types or overloaded functions.
|
|
24
43
|
non_exported_types: std.StringHashMap(Declaration),
|
|
25
44
|
func_body_indices: std.AutoHashMap(usize, void),
|
|
45
|
+
non_exported_types_inited: bool,
|
|
46
|
+
func_body_indices_inited: bool,
|
|
26
47
|
keep_comments: bool,
|
|
27
48
|
isolated_declarations: bool,
|
|
28
49
|
|
|
@@ -38,6 +59,8 @@ pub const Scanner = struct {
|
|
|
38
59
|
.declarations = declarations,
|
|
39
60
|
.non_exported_types = std.StringHashMap(Declaration).init(allocator),
|
|
40
61
|
.func_body_indices = std.AutoHashMap(usize, void).init(allocator),
|
|
62
|
+
.non_exported_types_inited = false,
|
|
63
|
+
.func_body_indices_inited = false,
|
|
41
64
|
.keep_comments = keep_comments,
|
|
42
65
|
.isolated_declarations = isolated_declarations,
|
|
43
66
|
};
|
|
@@ -45,8 +68,24 @@ pub const Scanner = struct {
|
|
|
45
68
|
|
|
46
69
|
pub fn deinit(self: *Scanner) void {
|
|
47
70
|
self.declarations.deinit();
|
|
48
|
-
self.non_exported_types.deinit();
|
|
49
|
-
self.func_body_indices.deinit();
|
|
71
|
+
if (self.non_exported_types_inited) self.non_exported_types.deinit();
|
|
72
|
+
if (self.func_body_indices_inited) self.func_body_indices.deinit();
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/// Lazy-put into non_exported_types (initializes the map on first use)
|
|
76
|
+
pub fn putNonExportedType(self: *Scanner, name: []const u8, decl: Declaration) void {
|
|
77
|
+
if (!self.non_exported_types_inited) {
|
|
78
|
+
self.non_exported_types_inited = true;
|
|
79
|
+
}
|
|
80
|
+
self.non_exported_types.put(name, decl) catch {};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/// Lazy-put into func_body_indices (initializes on first use)
|
|
84
|
+
pub fn putFuncBodyIndex(self: *Scanner, idx: usize) void {
|
|
85
|
+
if (!self.func_body_indices_inited) {
|
|
86
|
+
self.func_body_indices_inited = true;
|
|
87
|
+
}
|
|
88
|
+
self.func_body_indices.put(idx, {}) catch {};
|
|
50
89
|
}
|
|
51
90
|
|
|
52
91
|
// ========================================================================
|
|
@@ -78,21 +117,30 @@ pub const Scanner = struct {
|
|
|
78
117
|
}
|
|
79
118
|
if (self.pos >= self.len) break;
|
|
80
119
|
|
|
120
|
+
// Scalar: consume single whitespace bytes
|
|
81
121
|
const c = self.source[self.pos];
|
|
82
122
|
if (c == ch.CH_SPACE or c == ch.CH_TAB or c == ch.CH_LF or c == ch.CH_CR) {
|
|
83
123
|
self.pos += 1;
|
|
124
|
+
// Fast scalar drain: consume consecutive whitespace without re-entering the outer loop
|
|
125
|
+
while (self.pos < self.len) {
|
|
126
|
+
const c2 = self.source[self.pos];
|
|
127
|
+
if (c2 == ch.CH_SPACE or c2 == ch.CH_TAB or c2 == ch.CH_LF or c2 == ch.CH_CR) {
|
|
128
|
+
self.pos += 1;
|
|
129
|
+
} else break;
|
|
130
|
+
}
|
|
84
131
|
continue;
|
|
85
132
|
}
|
|
86
133
|
if (c == ch.CH_SLASH and self.pos + 1 < self.len) {
|
|
87
134
|
const next = self.source[self.pos + 1];
|
|
88
135
|
if (next == ch.CH_SLASH) {
|
|
89
|
-
// Line comment
|
|
136
|
+
// Line comment — SIMD scan via indexOfChar.
|
|
90
137
|
const nl = ch.indexOfChar(self.source, ch.CH_LF, self.pos + 2);
|
|
91
138
|
self.pos = if (nl) |n| n + 1 else self.len;
|
|
92
139
|
continue;
|
|
93
140
|
}
|
|
94
141
|
if (next == ch.CH_STAR) {
|
|
95
|
-
// Block comment
|
|
142
|
+
// Block comment — `*/` is two specific bytes; the SIMD
|
|
143
|
+
// first-byte scan in indexOf handles this efficiently.
|
|
96
144
|
const end_idx = ch.indexOf(self.source, "*/", self.pos + 2);
|
|
97
145
|
self.pos = if (end_idx) |e| e + 2 else self.len;
|
|
98
146
|
continue;
|
|
@@ -143,33 +191,70 @@ pub const Scanner = struct {
|
|
|
143
191
|
self.pos = len;
|
|
144
192
|
}
|
|
145
193
|
|
|
146
|
-
/// Skip past a template literal (backtick string with ${} interpolation)
|
|
194
|
+
/// Skip past a template literal (backtick string with ${} interpolation).
|
|
195
|
+
/// Uses SIMD to scan 16 bytes at a time for structural characters.
|
|
147
196
|
pub fn skipTemplateLiteral(self: *Scanner) void {
|
|
148
197
|
self.pos += 1; // skip opening backtick
|
|
198
|
+
const src = self.source;
|
|
199
|
+
const len = self.len;
|
|
200
|
+
var pos = self.pos;
|
|
149
201
|
var depth: usize = 0;
|
|
150
|
-
|
|
151
|
-
|
|
202
|
+
|
|
203
|
+
while (pos < len) {
|
|
204
|
+
// SIMD fast-skip when at depth 0: scan for backtick, backslash, or $
|
|
205
|
+
// At depth > 0: also scan for }
|
|
206
|
+
if (depth == 0) {
|
|
207
|
+
while (pos + 16 <= len) {
|
|
208
|
+
const chunk: @Vector(16, u8) = src[pos..][0..16].*;
|
|
209
|
+
const match_mask = (chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKTICK))) |
|
|
210
|
+
(chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKSLASH))) |
|
|
211
|
+
(chunk == @as(@Vector(16, u8), @splat(ch.CH_DOLLAR)));
|
|
212
|
+
if (@reduce(.Or, match_mask)) {
|
|
213
|
+
const bits: u16 = @bitCast(match_mask);
|
|
214
|
+
pos += @ctz(bits);
|
|
215
|
+
break;
|
|
216
|
+
}
|
|
217
|
+
pos += 16;
|
|
218
|
+
}
|
|
219
|
+
} else {
|
|
220
|
+
while (pos + 16 <= len) {
|
|
221
|
+
const chunk: @Vector(16, u8) = src[pos..][0..16].*;
|
|
222
|
+
const match_mask = (chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKTICK))) |
|
|
223
|
+
(chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKSLASH))) |
|
|
224
|
+
(chunk == @as(@Vector(16, u8), @splat(ch.CH_DOLLAR))) |
|
|
225
|
+
(chunk == @as(@Vector(16, u8), @splat(ch.CH_RBRACE)));
|
|
226
|
+
if (@reduce(.Or, match_mask)) {
|
|
227
|
+
const bits: u16 = @bitCast(match_mask);
|
|
228
|
+
pos += @ctz(bits);
|
|
229
|
+
break;
|
|
230
|
+
}
|
|
231
|
+
pos += 16;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
if (pos >= len) break;
|
|
236
|
+
const c = src[pos];
|
|
152
237
|
if (c == ch.CH_BACKSLASH) {
|
|
153
|
-
|
|
154
|
-
if (self.pos < self.len) self.pos += 1;
|
|
238
|
+
pos += 2; // skip escaped char
|
|
155
239
|
continue;
|
|
156
240
|
}
|
|
157
241
|
if (c == ch.CH_BACKTICK and depth == 0) {
|
|
158
|
-
self.pos
|
|
242
|
+
self.pos = pos + 1;
|
|
159
243
|
return;
|
|
160
244
|
}
|
|
161
|
-
if (c == ch.CH_DOLLAR and
|
|
162
|
-
|
|
245
|
+
if (c == ch.CH_DOLLAR and pos + 1 < len and src[pos + 1] == ch.CH_LBRACE) {
|
|
246
|
+
pos += 2;
|
|
163
247
|
depth += 1;
|
|
164
248
|
continue;
|
|
165
249
|
}
|
|
166
250
|
if (c == ch.CH_RBRACE and depth > 0) {
|
|
167
251
|
depth -= 1;
|
|
168
|
-
|
|
252
|
+
pos += 1;
|
|
169
253
|
continue;
|
|
170
254
|
}
|
|
171
|
-
|
|
255
|
+
pos += 1;
|
|
172
256
|
}
|
|
257
|
+
self.pos = len;
|
|
173
258
|
}
|
|
174
259
|
|
|
175
260
|
/// Check if `/` at current pos starts a regex literal (not division)
|
|
@@ -196,10 +281,22 @@ pub const Scanner = struct {
|
|
|
196
281
|
const word_start: usize = @intCast(wp + 1);
|
|
197
282
|
const word_end: usize = @intCast(p + 1);
|
|
198
283
|
const word = self.source[word_start..word_end];
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
284
|
+
// First-char dispatch to avoid 12 std.mem.eql calls per slash. The
|
|
285
|
+
// hot path here runs on every '/' that might start division.
|
|
286
|
+
if (word.len < 2 or word.len > 10) return false;
|
|
287
|
+
return switch (word[0]) {
|
|
288
|
+
'r' => std.mem.eql(u8, word, "return"),
|
|
289
|
+
't' => std.mem.eql(u8, word, "typeof") or std.mem.eql(u8, word, "throw"),
|
|
290
|
+
'v' => std.mem.eql(u8, word, "void"),
|
|
291
|
+
'd' => std.mem.eql(u8, word, "delete"),
|
|
292
|
+
'n' => std.mem.eql(u8, word, "new"),
|
|
293
|
+
'i' => std.mem.eql(u8, word, "in") or std.mem.eql(u8, word, "instanceof"),
|
|
294
|
+
'o' => std.mem.eql(u8, word, "of"),
|
|
295
|
+
'c' => std.mem.eql(u8, word, "case"),
|
|
296
|
+
'y' => std.mem.eql(u8, word, "yield"),
|
|
297
|
+
'a' => std.mem.eql(u8, word, "await"),
|
|
298
|
+
else => false,
|
|
299
|
+
};
|
|
203
300
|
}
|
|
204
301
|
return false;
|
|
205
302
|
}
|
|
@@ -268,18 +365,21 @@ pub const Scanner = struct {
|
|
|
268
365
|
}
|
|
269
366
|
|
|
270
367
|
/// Read an identifier at current position
|
|
271
|
-
pub fn readIdent(self: *Scanner) []const u8 {
|
|
368
|
+
pub inline fn readIdent(self: *Scanner) []const u8 {
|
|
272
369
|
const start = self.pos;
|
|
273
370
|
while (self.pos < self.len and ch.isIdentChar(self.source[self.pos])) self.pos += 1;
|
|
274
371
|
return self.source[start..self.pos];
|
|
275
372
|
}
|
|
276
373
|
|
|
277
|
-
/// Check if source matches a word at pos (followed by non-ident char)
|
|
278
|
-
|
|
374
|
+
/// Check if source matches a word at pos (followed by non-ident char).
|
|
375
|
+
/// Uses integer comparison for short words to avoid byte-by-byte loop.
|
|
376
|
+
pub fn matchWord(self: *const Scanner, comptime word: []const u8) bool {
|
|
279
377
|
if (self.pos + word.len > self.len) return false;
|
|
280
|
-
|
|
378
|
+
// Boundary check: word must not be followed by an identifier char
|
|
281
379
|
if (self.pos + word.len < self.len and ch.isIdentChar(self.source[self.pos + word.len])) return false;
|
|
282
|
-
|
|
380
|
+
// Integer comparison: compare 1-8 bytes as a single integer (comptime-optimized)
|
|
381
|
+
const src_slice = self.source[self.pos..][0..word.len];
|
|
382
|
+
return comptime_match(word.len, src_slice, word);
|
|
283
383
|
}
|
|
284
384
|
|
|
285
385
|
/// Check if current position is at a top-level statement-starting keyword
|
|
@@ -367,10 +467,17 @@ pub const Scanner = struct {
|
|
|
367
467
|
self.pos = saved;
|
|
368
468
|
return false;
|
|
369
469
|
}
|
|
370
|
-
// Type continuation keywords
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
470
|
+
// Type continuation keywords — first-byte dispatch avoids running 7
|
|
471
|
+
// sequential matchWord calls on every newline at member depth 0.
|
|
472
|
+
const is_type_kw = switch (nc) {
|
|
473
|
+
'e' => self.matchWord("extends"),
|
|
474
|
+
'k' => self.matchWord("keyof"),
|
|
475
|
+
't' => self.matchWord("typeof"),
|
|
476
|
+
'i' => self.matchWord("infer") or self.matchWord("is") or self.matchWord("in"),
|
|
477
|
+
'a' => self.matchWord("as"),
|
|
478
|
+
else => false,
|
|
479
|
+
};
|
|
480
|
+
if (is_type_kw) {
|
|
374
481
|
self.pos = saved;
|
|
375
482
|
return false;
|
|
376
483
|
}
|
|
@@ -581,8 +688,11 @@ pub const Scanner = struct {
|
|
|
581
688
|
if (self.pos < self.len and self.source[self.pos] == ch.CH_SEMI) self.pos += 1;
|
|
582
689
|
}
|
|
583
690
|
|
|
584
|
-
/// Peek at what char comes after a word (skipping whitespace)
|
|
585
|
-
|
|
691
|
+
/// Peek at what char comes after a word (skipping whitespace).
|
|
692
|
+
/// Defensive bounds check on pos+word.len handles the edge where the
|
|
693
|
+
/// caller positions at end-of-source.
|
|
694
|
+
pub inline fn peekAfterWord(self: *const Scanner, word: []const u8) u8 {
|
|
695
|
+
if (self.pos + word.len > self.len) return 0;
|
|
586
696
|
var p = self.pos + word.len;
|
|
587
697
|
while (p < self.len and ch.isWhitespace(self.source[p])) p += 1;
|
|
588
698
|
return if (p < self.len) self.source[p] else 0;
|
|
@@ -592,6 +702,9 @@ pub const Scanner = struct {
|
|
|
592
702
|
pub fn peekAfterKeyword(self: *const Scanner, word1: []const u8, word2: []const u8) bool {
|
|
593
703
|
var p = self.pos + word1.len;
|
|
594
704
|
while (p < self.len and ch.isWhitespace(self.source[p])) p += 1;
|
|
705
|
+
// First-byte fast-fail: avoid std.mem.eql call when first char doesn't match.
|
|
706
|
+
if (p >= self.len or word2.len == 0) return false;
|
|
707
|
+
if (self.source[p] != word2[0]) return false;
|
|
595
708
|
if (p + word2.len > self.len) return false;
|
|
596
709
|
if (!std.mem.eql(u8, self.source[p .. p + word2.len], word2)) return false;
|
|
597
710
|
return p + word2.len >= self.len or !ch.isIdentChar(self.source[p + word2.len]);
|
|
@@ -614,13 +727,12 @@ pub const Scanner = struct {
|
|
|
614
727
|
return self.source[start..self.pos];
|
|
615
728
|
}
|
|
616
729
|
if (c == ch.CH_HASH) {
|
|
730
|
+
// Zero-copy: keep the leading `#` as part of the source slice instead
|
|
731
|
+
// of allocating a new buffer to prepend it.
|
|
732
|
+
const start = self.pos;
|
|
617
733
|
self.pos += 1;
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
const result = self.allocator.alloc(u8, 1 + ident.len) catch return "";
|
|
621
|
-
result[0] = '#';
|
|
622
|
-
@memcpy(result[1..], ident);
|
|
623
|
-
return result;
|
|
734
|
+
_ = self.readIdent();
|
|
735
|
+
return self.source[start..self.pos];
|
|
624
736
|
}
|
|
625
737
|
return self.readIdent();
|
|
626
738
|
}
|
|
@@ -651,24 +763,23 @@ pub const Scanner = struct {
|
|
|
651
763
|
|
|
652
764
|
/// Scan TypeScript source and extract all declarations
|
|
653
765
|
pub fn scan(self: *Scanner) !ScanResult {
|
|
654
|
-
// Skip BOM
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
}
|
|
766
|
+
// Skip UTF-8 BOM (EF BB BF). Check exact bytes — the previous `>= 0xEF`
|
|
767
|
+
// guard was over-permissive, accepting non-BOM high bytes only to fall
|
|
768
|
+
// through the strict triple-check.
|
|
769
|
+
if (self.len >= 3 and self.source[0] == 0xEF and self.source[1] == 0xBB and self.source[2] == 0xBF) {
|
|
770
|
+
self.pos = 3;
|
|
660
771
|
}
|
|
661
772
|
|
|
662
773
|
// Main scan loop — delegate to scan_loop module
|
|
663
774
|
try @import("scan_loop.zig").scanMainLoop(self);
|
|
664
775
|
|
|
665
|
-
// Post-process: resolve referenced non-exported types
|
|
666
|
-
if (self.non_exported_types.count() > 0) {
|
|
776
|
+
// Post-process: resolve referenced non-exported types (only if map was used)
|
|
777
|
+
if (self.non_exported_types_inited and self.non_exported_types.count() > 0) {
|
|
667
778
|
resolveReferencedTypes(&self.declarations, &self.non_exported_types);
|
|
668
779
|
}
|
|
669
780
|
|
|
670
|
-
// Post-process: remove implementation signatures of overloaded functions
|
|
671
|
-
if (self.func_body_indices.count() > 0) {
|
|
781
|
+
// Post-process: remove implementation signatures of overloaded functions (only if map was used)
|
|
782
|
+
if (self.func_body_indices_inited and self.func_body_indices.count() > 0) {
|
|
672
783
|
removeOverloadImplementations(self);
|
|
673
784
|
}
|
|
674
785
|
|
|
@@ -685,6 +796,7 @@ pub const Scanner = struct {
|
|
|
685
796
|
|
|
686
797
|
/// Check if name appears as a whole word in text (fast indexOf + boundary check)
|
|
687
798
|
pub fn isWordInText(name: []const u8, text: []const u8) bool {
|
|
799
|
+
if (name.len == 0 or name.len > text.len) return false;
|
|
688
800
|
var search_from: usize = 0;
|
|
689
801
|
while (search_from < text.len) {
|
|
690
802
|
const idx = ch.indexOf(text, name, search_from) orelse return false;
|
|
@@ -693,29 +805,14 @@ pub fn isWordInText(name: []const u8, text: []const u8) bool {
|
|
|
693
805
|
const before_ok = !ch.isIdentChar(before);
|
|
694
806
|
const after_ok = !ch.isIdentChar(after);
|
|
695
807
|
if (before_ok and after_ok) return true;
|
|
696
|
-
|
|
808
|
+
// When the right boundary fails (we matched a longer identifier), skip past
|
|
809
|
+
// the entire identifier so we don't re-scan its prefix. When only the left
|
|
810
|
+
// boundary fails, advance just by 1 to allow overlapping self-similar names.
|
|
811
|
+
search_from = if (!after_ok) idx + name.len else idx + 1;
|
|
697
812
|
}
|
|
698
813
|
return false;
|
|
699
814
|
}
|
|
700
815
|
|
|
701
|
-
/// Extract all identifier words from text into a HashSet (single pass, O(n))
|
|
702
|
-
fn extractWordsFromText(alloc: std.mem.Allocator, text: []const u8) std.StringHashMap(void) {
|
|
703
|
-
var words = std.StringHashMap(void).init(alloc);
|
|
704
|
-
var i: usize = 0;
|
|
705
|
-
while (i < text.len) {
|
|
706
|
-
const c = text[i];
|
|
707
|
-
if (ch.isIdentStart(c)) {
|
|
708
|
-
const start = i;
|
|
709
|
-
i += 1;
|
|
710
|
-
while (i < text.len and ch.isIdentChar(text[i])) i += 1;
|
|
711
|
-
words.put(text[start..i], {}) catch {};
|
|
712
|
-
} else {
|
|
713
|
-
i += 1;
|
|
714
|
-
}
|
|
715
|
-
}
|
|
716
|
-
return words;
|
|
717
|
-
}
|
|
718
|
-
|
|
719
816
|
/// Resolve non-exported types that are referenced by exported declarations
|
|
720
817
|
fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), non_exported_types: *std.StringHashMap(Declaration)) void {
|
|
721
818
|
var resolved = std.StringHashMap(void).init(declarations.allocator);
|
|
@@ -743,6 +840,15 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
|
|
|
743
840
|
// Track how far we've extracted words — only process new text_parts each iteration
|
|
744
841
|
var words_extracted_up_to: usize = 0;
|
|
745
842
|
|
|
843
|
+
// Move outside loop — reuse across iterations
|
|
844
|
+
var to_insert = std.array_list.Managed(Declaration).init(declarations.allocator);
|
|
845
|
+
to_insert.ensureTotalCapacity(non_exported_types.count()) catch {};
|
|
846
|
+
defer to_insert.deinit();
|
|
847
|
+
|
|
848
|
+
var merged = std.array_list.Managed(Declaration).init(declarations.allocator);
|
|
849
|
+
merged.ensureTotalCapacity(declarations.items.len + non_exported_types.count()) catch {};
|
|
850
|
+
defer merged.deinit();
|
|
851
|
+
|
|
746
852
|
while (true) {
|
|
747
853
|
// Incrementally extract words from only the NEW text parts
|
|
748
854
|
for (text_parts.items[words_extracted_up_to..]) |part| {
|
|
@@ -760,9 +866,7 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
|
|
|
760
866
|
}
|
|
761
867
|
words_extracted_up_to = text_parts.items.len;
|
|
762
868
|
|
|
763
|
-
|
|
764
|
-
to_insert.ensureTotalCapacity(non_exported_types.count()) catch {};
|
|
765
|
-
defer to_insert.deinit();
|
|
869
|
+
to_insert.clearRetainingCapacity();
|
|
766
870
|
|
|
767
871
|
var it = non_exported_types.iterator();
|
|
768
872
|
while (it.next()) |entry| {
|
|
@@ -789,7 +893,7 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
|
|
|
789
893
|
}.cmp);
|
|
790
894
|
|
|
791
895
|
// Merge at correct source positions
|
|
792
|
-
|
|
896
|
+
merged.clearRetainingCapacity();
|
|
793
897
|
merged.ensureTotalCapacity(declarations.items.len + to_insert.items.len) catch {};
|
|
794
898
|
var ti: usize = 0;
|
|
795
899
|
for (declarations.items) |d| {
|
|
@@ -806,7 +910,6 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
|
|
|
806
910
|
|
|
807
911
|
declarations.clearRetainingCapacity();
|
|
808
912
|
declarations.appendSlice(merged.items) catch {};
|
|
809
|
-
merged.deinit();
|
|
810
913
|
|
|
811
914
|
// Add new texts to search
|
|
812
915
|
for (to_insert.items) |d| {
|
|
@@ -818,59 +921,71 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
|
|
|
818
921
|
}
|
|
819
922
|
|
|
820
923
|
/// Remove implementation signatures of overloaded functions
|
|
924
|
+
/// Remove implementation signatures of overloaded functions.
|
|
925
|
+
/// Single-HashMap approach: count function names, then find and remove body-bearing
|
|
926
|
+
/// implementations in one backward pass.
|
|
821
927
|
fn removeOverloadImplementations(scanner: *Scanner) void {
|
|
822
|
-
//
|
|
823
|
-
|
|
824
|
-
|
|
928
|
+
// Single HashMap: count function declarations by name. Track overloads inline
|
|
929
|
+
// so we don't need a second iteration pass to count them.
|
|
930
|
+
var func_counts = std.StringHashMap(usize).init(scanner.allocator);
|
|
931
|
+
defer func_counts.deinit();
|
|
932
|
+
var overload_count: usize = 0;
|
|
825
933
|
|
|
826
934
|
for (scanner.declarations.items) |d| {
|
|
827
935
|
if (d.kind == .function_decl) {
|
|
828
|
-
const entry =
|
|
936
|
+
const entry = func_counts.getOrPut(d.name) catch continue;
|
|
829
937
|
if (!entry.found_existing) {
|
|
830
938
|
entry.value_ptr.* = 1;
|
|
831
939
|
} else {
|
|
832
940
|
entry.value_ptr.* += 1;
|
|
941
|
+
if (entry.value_ptr.* == 2) overload_count += 1;
|
|
833
942
|
}
|
|
834
943
|
}
|
|
835
944
|
}
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
const
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
945
|
+
if (overload_count == 0) return;
|
|
946
|
+
|
|
947
|
+
// Single backward pass: for each overloaded name (count > 1),
|
|
948
|
+
// find and mark the last body-bearing declaration for removal.
|
|
949
|
+
// Use a bitset (array of bools) instead of a HashMap for to_remove.
|
|
950
|
+
const len = scanner.declarations.items.len;
|
|
951
|
+
const remove_flags = scanner.allocator.alloc(bool, len) catch return;
|
|
952
|
+
defer scanner.allocator.free(remove_flags);
|
|
953
|
+
@memset(remove_flags, false);
|
|
954
|
+
|
|
955
|
+
var found_count: usize = 0;
|
|
956
|
+
|
|
957
|
+
// Walk backward: for each overloaded function, mark its last body-bearing impl
|
|
958
|
+
var names_found = std.StringHashMap(void).init(scanner.allocator);
|
|
959
|
+
defer names_found.deinit();
|
|
960
|
+
|
|
961
|
+
var i: usize = len;
|
|
962
|
+
while (i > 0) {
|
|
963
|
+
i -= 1;
|
|
964
|
+
const d = scanner.declarations.items[i];
|
|
965
|
+
if (d.kind != .function_decl) continue;
|
|
966
|
+
|
|
967
|
+
// Check if this function is overloaded
|
|
968
|
+
const count_entry = func_counts.get(d.name) orelse continue;
|
|
969
|
+
if (count_entry <= 1) continue;
|
|
970
|
+
|
|
971
|
+
// Already found the impl for this name?
|
|
972
|
+
if (names_found.contains(d.name)) continue;
|
|
973
|
+
|
|
974
|
+
// Is this the implementation (has body)?
|
|
975
|
+
if (scanner.func_body_indices.contains(i)) {
|
|
976
|
+
remove_flags[i] = true;
|
|
977
|
+
found_count += 1;
|
|
978
|
+
names_found.put(d.name, {}) catch {};
|
|
979
|
+
if (found_count == overload_count) break; // All found
|
|
865
980
|
}
|
|
866
981
|
}
|
|
867
982
|
|
|
868
|
-
if (
|
|
983
|
+
if (found_count == 0) return;
|
|
869
984
|
|
|
870
|
-
// Filter in single pass
|
|
985
|
+
// Filter in single pass
|
|
871
986
|
var write: usize = 0;
|
|
872
|
-
for (scanner.declarations.items, 0..) |d,
|
|
873
|
-
if (!
|
|
987
|
+
for (scanner.declarations.items, 0..) |d, fi| {
|
|
988
|
+
if (!remove_flags[fi]) {
|
|
874
989
|
scanner.declarations.items[write] = d;
|
|
875
990
|
write += 1;
|
|
876
991
|
}
|
|
@@ -906,3 +1021,75 @@ test "scanner skipWhitespaceAndComments" {
|
|
|
906
1021
|
s.skipWhitespaceAndComments();
|
|
907
1022
|
try std.testing.expectEqualStrings("hello", s.source[s.pos .. s.pos + 5]);
|
|
908
1023
|
}
|
|
1024
|
+
|
|
1025
|
+
// --- Tests added for performance/fix patches ---
|
|
1026
|
+
|
|
1027
|
+
// isWordInText: when a hit fails the right-boundary check (e.g. searching for
|
|
1028
|
+
// "Foo" inside "FooBar"), the new implementation skips past the matched
|
|
1029
|
+
// substring instead of advancing one byte. These cases verify both the
|
|
1030
|
+
// short-circuit and the normal advancement paths still report correctly.
|
|
1031
|
+
test "isWordInText skips past failing right boundary" {
|
|
1032
|
+
// Multiple longer-ident matches followed by a real match — the optimization
|
|
1033
|
+
// must not skip the legitimate later match.
|
|
1034
|
+
try std.testing.expect(isWordInText("Foo", "FooBar FooBaz Foo;"));
|
|
1035
|
+
try std.testing.expect(!isWordInText("Foo", "FooBar FooBaz FooQuux"));
|
|
1036
|
+
// Left-boundary failure at an early position (`xxaa`) must advance by 1
|
|
1037
|
+
// and still find a real later match (`aa`) — exercises the advance-by-1
|
|
1038
|
+
// branch. (Without the trailing standalone `aa`, this string contains
|
|
1039
|
+
// no whole-word match: the only "aa" is glued to `xx`.)
|
|
1040
|
+
try std.testing.expect(isWordInText("aa", "xxaa aa"));
|
|
1041
|
+
// Empty needle is rejected.
|
|
1042
|
+
try std.testing.expect(!isWordInText("", "anything"));
|
|
1043
|
+
// Needle longer than text is rejected without indexOf.
|
|
1044
|
+
try std.testing.expect(!isWordInText("longer", "no"));
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
// peekAfterKeyword: the first-byte fast-fail must short-circuit cleanly when
|
|
1048
|
+
// the next ident byte after `word1` doesn't even match `word2[0]`.
|
|
1049
|
+
test "peekAfterKeyword first-byte fast-fail" {
|
|
1050
|
+
var s1 = Scanner.init(std.testing.allocator, "async function foo", true, false);
|
|
1051
|
+
defer s1.deinit();
|
|
1052
|
+
try std.testing.expect(s1.peekAfterKeyword("async", "function"));
|
|
1053
|
+
|
|
1054
|
+
var s2 = Scanner.init(std.testing.allocator, "async const foo", true, false);
|
|
1055
|
+
defer s2.deinit();
|
|
1056
|
+
try std.testing.expect(!s2.peekAfterKeyword("async", "function"));
|
|
1057
|
+
|
|
1058
|
+
// Empty word2 must be rejected (avoids matching anything spuriously).
|
|
1059
|
+
var s3 = Scanner.init(std.testing.allocator, "async ", true, false);
|
|
1060
|
+
defer s3.deinit();
|
|
1061
|
+
try std.testing.expect(!s3.peekAfterKeyword("async", ""));
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
// isRegexStart: the first-char dispatch must still recognize every keyword
|
|
1065
|
+
// previously checked, and must reject identifiers that share a prefix with
|
|
1066
|
+
// one of the regex-introducing keywords.
|
|
1067
|
+
test "isRegexStart keyword dispatch covers all branches" {
|
|
1068
|
+
// Helper: position the scanner at the slash and ask isRegexStart.
|
|
1069
|
+
const cases = [_]struct { src: []const u8, expected: bool }{
|
|
1070
|
+
.{ .src = "return /a/", .expected = true },
|
|
1071
|
+
.{ .src = "typeof /a/", .expected = true },
|
|
1072
|
+
.{ .src = "throw /a/", .expected = true },
|
|
1073
|
+
.{ .src = "void /a/", .expected = true },
|
|
1074
|
+
.{ .src = "delete /a/", .expected = true },
|
|
1075
|
+
.{ .src = "new /a/", .expected = true },
|
|
1076
|
+
.{ .src = "in /a/", .expected = true },
|
|
1077
|
+
.{ .src = "instanceof /a/", .expected = true },
|
|
1078
|
+
.{ .src = "of /a/", .expected = true },
|
|
1079
|
+
.{ .src = "case /a/", .expected = true },
|
|
1080
|
+
.{ .src = "yield /a/", .expected = true },
|
|
1081
|
+
.{ .src = "await /a/", .expected = true },
|
|
1082
|
+
// Look-alike identifiers must NOT trigger regex-mode.
|
|
1083
|
+
.{ .src = "myReturn /a/", .expected = false }, // "myReturn" is not "return"
|
|
1084
|
+
.{ .src = "thrower /a/", .expected = false },
|
|
1085
|
+
.{ .src = "x /a/", .expected = false }, // 1-char ident, length-bucket reject
|
|
1086
|
+
};
|
|
1087
|
+
for (cases) |c| {
|
|
1088
|
+
var s = Scanner.init(std.testing.allocator, c.src, true, false);
|
|
1089
|
+
defer s.deinit();
|
|
1090
|
+
// Walk to the slash position
|
|
1091
|
+
const slash_idx = std.mem.indexOfScalar(u8, c.src, '/').?;
|
|
1092
|
+
s.pos = slash_idx;
|
|
1093
|
+
try std.testing.expectEqual(c.expected, s.isRegexStart());
|
|
1094
|
+
}
|
|
1095
|
+
}
|