@stacksjs/zig-dtsx 0.9.12 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/scanner.zig CHANGED
@@ -8,6 +8,23 @@ const Declaration = types.Declaration;
8
8
  const DeclarationKind = types.DeclarationKind;
9
9
  const Allocator = std.mem.Allocator;
10
10
 
11
+ /// Comptime-optimized word comparison: uses integer casts for power-of-2 sizes (1/2/4/8 bytes).
12
+ inline fn comptime_match(comptime N: usize, src: *const [N]u8, comptime word: []const u8) bool {
13
+ // Use integer comparison for power-of-2 sizes (u8, u16, u32, u64)
14
+ if (N == 1 or N == 2 or N == 4 or N == 8) {
15
+ const T = std.meta.Int(.unsigned, N * 8);
16
+ const expected: T = comptime blk: {
17
+ var val: T = 0;
18
+ for (word, 0..) |b, i| {
19
+ val |= @as(T, b) << @intCast(i * 8);
20
+ }
21
+ break :blk val;
22
+ };
23
+ return @as(*align(1) const T, @ptrCast(src)).* == expected;
24
+ }
25
+ return std.mem.eql(u8, src, word);
26
+ }
27
+
11
28
  /// Result of scanning: declarations + non-exported types
12
29
  pub const ScanResult = struct {
13
30
  declarations: std.array_list.Managed(Declaration),
@@ -21,8 +38,12 @@ pub const Scanner = struct {
21
38
  len: usize,
22
39
  allocator: Allocator,
23
40
  declarations: std.array_list.Managed(Declaration),
41
+ /// Lazy-initialized: only allocated on first insertion to avoid overhead for
42
+ /// files that have no non-exported types or overloaded functions.
24
43
  non_exported_types: std.StringHashMap(Declaration),
25
44
  func_body_indices: std.AutoHashMap(usize, void),
45
+ non_exported_types_inited: bool,
46
+ func_body_indices_inited: bool,
26
47
  keep_comments: bool,
27
48
  isolated_declarations: bool,
28
49
 
@@ -38,6 +59,8 @@ pub const Scanner = struct {
38
59
  .declarations = declarations,
39
60
  .non_exported_types = std.StringHashMap(Declaration).init(allocator),
40
61
  .func_body_indices = std.AutoHashMap(usize, void).init(allocator),
62
+ .non_exported_types_inited = false,
63
+ .func_body_indices_inited = false,
41
64
  .keep_comments = keep_comments,
42
65
  .isolated_declarations = isolated_declarations,
43
66
  };
@@ -45,8 +68,24 @@ pub const Scanner = struct {
45
68
 
46
69
  pub fn deinit(self: *Scanner) void {
47
70
  self.declarations.deinit();
48
- self.non_exported_types.deinit();
49
- self.func_body_indices.deinit();
71
+ if (self.non_exported_types_inited) self.non_exported_types.deinit();
72
+ if (self.func_body_indices_inited) self.func_body_indices.deinit();
73
+ }
74
+
75
+ /// Lazy-put into non_exported_types (initializes the map on first use)
76
+ pub fn putNonExportedType(self: *Scanner, name: []const u8, decl: Declaration) void {
77
+ if (!self.non_exported_types_inited) {
78
+ self.non_exported_types_inited = true;
79
+ }
80
+ self.non_exported_types.put(name, decl) catch {};
81
+ }
82
+
83
+ /// Lazy-put into func_body_indices (initializes on first use)
84
+ pub fn putFuncBodyIndex(self: *Scanner, idx: usize) void {
85
+ if (!self.func_body_indices_inited) {
86
+ self.func_body_indices_inited = true;
87
+ }
88
+ self.func_body_indices.put(idx, {}) catch {};
50
89
  }
51
90
 
52
91
  // ========================================================================
@@ -78,21 +117,30 @@ pub const Scanner = struct {
78
117
  }
79
118
  if (self.pos >= self.len) break;
80
119
 
120
+ // Scalar: consume single whitespace bytes
81
121
  const c = self.source[self.pos];
82
122
  if (c == ch.CH_SPACE or c == ch.CH_TAB or c == ch.CH_LF or c == ch.CH_CR) {
83
123
  self.pos += 1;
124
+ // Fast scalar drain: consume consecutive whitespace without re-entering the outer loop
125
+ while (self.pos < self.len) {
126
+ const c2 = self.source[self.pos];
127
+ if (c2 == ch.CH_SPACE or c2 == ch.CH_TAB or c2 == ch.CH_LF or c2 == ch.CH_CR) {
128
+ self.pos += 1;
129
+ } else break;
130
+ }
84
131
  continue;
85
132
  }
86
133
  if (c == ch.CH_SLASH and self.pos + 1 < self.len) {
87
134
  const next = self.source[self.pos + 1];
88
135
  if (next == ch.CH_SLASH) {
89
- // Line comment
136
+ // Line comment — SIMD scan via indexOfChar.
90
137
  const nl = ch.indexOfChar(self.source, ch.CH_LF, self.pos + 2);
91
138
  self.pos = if (nl) |n| n + 1 else self.len;
92
139
  continue;
93
140
  }
94
141
  if (next == ch.CH_STAR) {
95
- // Block comment
142
+ // Block comment — `*/` is two specific bytes; the SIMD
143
+ // first-byte scan in indexOf handles this efficiently.
96
144
  const end_idx = ch.indexOf(self.source, "*/", self.pos + 2);
97
145
  self.pos = if (end_idx) |e| e + 2 else self.len;
98
146
  continue;
@@ -143,33 +191,70 @@ pub const Scanner = struct {
143
191
  self.pos = len;
144
192
  }
145
193
 
146
- /// Skip past a template literal (backtick string with ${} interpolation)
194
+ /// Skip past a template literal (backtick string with ${} interpolation).
195
+ /// Uses SIMD to scan 16 bytes at a time for structural characters.
147
196
  pub fn skipTemplateLiteral(self: *Scanner) void {
148
197
  self.pos += 1; // skip opening backtick
198
+ const src = self.source;
199
+ const len = self.len;
200
+ var pos = self.pos;
149
201
  var depth: usize = 0;
150
- while (self.pos < self.len) {
151
- const c = self.source[self.pos];
202
+
203
+ while (pos < len) {
204
+ // SIMD fast-skip when at depth 0: scan for backtick, backslash, or $
205
+ // At depth > 0: also scan for }
206
+ if (depth == 0) {
207
+ while (pos + 16 <= len) {
208
+ const chunk: @Vector(16, u8) = src[pos..][0..16].*;
209
+ const match_mask = (chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKTICK))) |
210
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKSLASH))) |
211
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_DOLLAR)));
212
+ if (@reduce(.Or, match_mask)) {
213
+ const bits: u16 = @bitCast(match_mask);
214
+ pos += @ctz(bits);
215
+ break;
216
+ }
217
+ pos += 16;
218
+ }
219
+ } else {
220
+ while (pos + 16 <= len) {
221
+ const chunk: @Vector(16, u8) = src[pos..][0..16].*;
222
+ const match_mask = (chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKTICK))) |
223
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKSLASH))) |
224
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_DOLLAR))) |
225
+ (chunk == @as(@Vector(16, u8), @splat(ch.CH_RBRACE)));
226
+ if (@reduce(.Or, match_mask)) {
227
+ const bits: u16 = @bitCast(match_mask);
228
+ pos += @ctz(bits);
229
+ break;
230
+ }
231
+ pos += 16;
232
+ }
233
+ }
234
+
235
+ if (pos >= len) break;
236
+ const c = src[pos];
152
237
  if (c == ch.CH_BACKSLASH) {
153
- self.pos += 1;
154
- if (self.pos < self.len) self.pos += 1;
238
+ pos += 2; // skip escaped char
155
239
  continue;
156
240
  }
157
241
  if (c == ch.CH_BACKTICK and depth == 0) {
158
- self.pos += 1;
242
+ self.pos = pos + 1;
159
243
  return;
160
244
  }
161
- if (c == ch.CH_DOLLAR and self.pos + 1 < self.len and self.source[self.pos + 1] == ch.CH_LBRACE) {
162
- self.pos += 2;
245
+ if (c == ch.CH_DOLLAR and pos + 1 < len and src[pos + 1] == ch.CH_LBRACE) {
246
+ pos += 2;
163
247
  depth += 1;
164
248
  continue;
165
249
  }
166
250
  if (c == ch.CH_RBRACE and depth > 0) {
167
251
  depth -= 1;
168
- self.pos += 1;
252
+ pos += 1;
169
253
  continue;
170
254
  }
171
- self.pos += 1;
255
+ pos += 1;
172
256
  }
257
+ self.pos = len;
173
258
  }
174
259
 
175
260
  /// Check if `/` at current pos starts a regex literal (not division)
@@ -196,10 +281,22 @@ pub const Scanner = struct {
196
281
  const word_start: usize = @intCast(wp + 1);
197
282
  const word_end: usize = @intCast(p + 1);
198
283
  const word = self.source[word_start..word_end];
199
- const keywords = [_][]const u8{ "return", "typeof", "void", "delete", "throw", "new", "in", "of", "case", "instanceof", "yield", "await" };
200
- for (keywords) |kw| {
201
- if (std.mem.eql(u8, word, kw)) return true;
202
- }
284
+ // First-char dispatch to avoid 12 std.mem.eql calls per slash. The
285
+ // hot path here runs on every '/' that might start division.
286
+ if (word.len < 2 or word.len > 10) return false;
287
+ return switch (word[0]) {
288
+ 'r' => std.mem.eql(u8, word, "return"),
289
+ 't' => std.mem.eql(u8, word, "typeof") or std.mem.eql(u8, word, "throw"),
290
+ 'v' => std.mem.eql(u8, word, "void"),
291
+ 'd' => std.mem.eql(u8, word, "delete"),
292
+ 'n' => std.mem.eql(u8, word, "new"),
293
+ 'i' => std.mem.eql(u8, word, "in") or std.mem.eql(u8, word, "instanceof"),
294
+ 'o' => std.mem.eql(u8, word, "of"),
295
+ 'c' => std.mem.eql(u8, word, "case"),
296
+ 'y' => std.mem.eql(u8, word, "yield"),
297
+ 'a' => std.mem.eql(u8, word, "await"),
298
+ else => false,
299
+ };
203
300
  }
204
301
  return false;
205
302
  }
@@ -268,18 +365,21 @@ pub const Scanner = struct {
268
365
  }
269
366
 
270
367
  /// Read an identifier at current position
271
- pub fn readIdent(self: *Scanner) []const u8 {
368
+ pub inline fn readIdent(self: *Scanner) []const u8 {
272
369
  const start = self.pos;
273
370
  while (self.pos < self.len and ch.isIdentChar(self.source[self.pos])) self.pos += 1;
274
371
  return self.source[start..self.pos];
275
372
  }
276
373
 
277
- /// Check if source matches a word at pos (followed by non-ident char)
278
- pub fn matchWord(self: *const Scanner, word: []const u8) bool {
374
+ /// Check if source matches a word at pos (followed by non-ident char).
375
+ /// Uses integer comparison for short words to avoid byte-by-byte loop.
376
+ pub fn matchWord(self: *const Scanner, comptime word: []const u8) bool {
279
377
  if (self.pos + word.len > self.len) return false;
280
- if (!std.mem.eql(u8, self.source[self.pos .. self.pos + word.len], word)) return false;
378
+ // Boundary check: word must not be followed by an identifier char
281
379
  if (self.pos + word.len < self.len and ch.isIdentChar(self.source[self.pos + word.len])) return false;
282
- return true;
380
+ // Integer comparison: compare 1-8 bytes as a single integer (comptime-optimized)
381
+ const src_slice = self.source[self.pos..][0..word.len];
382
+ return comptime_match(word.len, src_slice, word);
283
383
  }
284
384
 
285
385
  /// Check if current position is at a top-level statement-starting keyword
@@ -367,10 +467,17 @@ pub const Scanner = struct {
367
467
  self.pos = saved;
368
468
  return false;
369
469
  }
370
- // Type continuation keywords
371
- if (self.matchWord("extends") or self.matchWord("keyof") or self.matchWord("typeof") or
372
- self.matchWord("infer") or self.matchWord("is") or self.matchWord("as") or self.matchWord("in"))
373
- {
470
+ // Type continuation keywords — first-byte dispatch avoids running 7
471
+ // sequential matchWord calls on every newline at member depth 0.
472
+ const is_type_kw = switch (nc) {
473
+ 'e' => self.matchWord("extends"),
474
+ 'k' => self.matchWord("keyof"),
475
+ 't' => self.matchWord("typeof"),
476
+ 'i' => self.matchWord("infer") or self.matchWord("is") or self.matchWord("in"),
477
+ 'a' => self.matchWord("as"),
478
+ else => false,
479
+ };
480
+ if (is_type_kw) {
374
481
  self.pos = saved;
375
482
  return false;
376
483
  }
@@ -581,8 +688,11 @@ pub const Scanner = struct {
581
688
  if (self.pos < self.len and self.source[self.pos] == ch.CH_SEMI) self.pos += 1;
582
689
  }
583
690
 
584
- /// Peek at what char comes after a word (skipping whitespace)
585
- pub fn peekAfterWord(self: *const Scanner, word: []const u8) u8 {
691
+ /// Peek at what char comes after a word (skipping whitespace).
692
+ /// Defensive bounds check on pos+word.len handles the edge where the
693
+ /// caller positions at end-of-source.
694
+ pub inline fn peekAfterWord(self: *const Scanner, word: []const u8) u8 {
695
+ if (self.pos + word.len > self.len) return 0;
586
696
  var p = self.pos + word.len;
587
697
  while (p < self.len and ch.isWhitespace(self.source[p])) p += 1;
588
698
  return if (p < self.len) self.source[p] else 0;
@@ -592,6 +702,9 @@ pub const Scanner = struct {
592
702
  pub fn peekAfterKeyword(self: *const Scanner, word1: []const u8, word2: []const u8) bool {
593
703
  var p = self.pos + word1.len;
594
704
  while (p < self.len and ch.isWhitespace(self.source[p])) p += 1;
705
+ // First-byte fast-fail: avoid std.mem.eql call when first char doesn't match.
706
+ if (p >= self.len or word2.len == 0) return false;
707
+ if (self.source[p] != word2[0]) return false;
595
708
  if (p + word2.len > self.len) return false;
596
709
  if (!std.mem.eql(u8, self.source[p .. p + word2.len], word2)) return false;
597
710
  return p + word2.len >= self.len or !ch.isIdentChar(self.source[p + word2.len]);
@@ -614,13 +727,12 @@ pub const Scanner = struct {
614
727
  return self.source[start..self.pos];
615
728
  }
616
729
  if (c == ch.CH_HASH) {
730
+ // Zero-copy: keep the leading `#` as part of the source slice instead
731
+ // of allocating a new buffer to prepend it.
732
+ const start = self.pos;
617
733
  self.pos += 1;
618
- const ident = self.readIdent();
619
- // Return "#name" — we need to allocate this
620
- const result = self.allocator.alloc(u8, 1 + ident.len) catch return "";
621
- result[0] = '#';
622
- @memcpy(result[1..], ident);
623
- return result;
734
+ _ = self.readIdent();
735
+ return self.source[start..self.pos];
624
736
  }
625
737
  return self.readIdent();
626
738
  }
@@ -651,24 +763,23 @@ pub const Scanner = struct {
651
763
 
652
764
  /// Scan TypeScript source and extract all declarations
653
765
  pub fn scan(self: *Scanner) !ScanResult {
654
- // Skip BOM
655
- if (self.pos < self.len and self.source[0] >= 0xEF) {
656
- // UTF-8 BOM is EF BB BF
657
- if (self.len >= 3 and self.source[0] == 0xEF and self.source[1] == 0xBB and self.source[2] == 0xBF) {
658
- self.pos = 3;
659
- }
766
+ // Skip UTF-8 BOM (EF BB BF). Check exact bytes — the previous `>= 0xEF`
767
+ // guard was over-permissive, accepting non-BOM high bytes only to fall
768
+ // through the strict triple-check.
769
+ if (self.len >= 3 and self.source[0] == 0xEF and self.source[1] == 0xBB and self.source[2] == 0xBF) {
770
+ self.pos = 3;
660
771
  }
661
772
 
662
773
  // Main scan loop — delegate to scan_loop module
663
774
  try @import("scan_loop.zig").scanMainLoop(self);
664
775
 
665
- // Post-process: resolve referenced non-exported types
666
- if (self.non_exported_types.count() > 0) {
776
+ // Post-process: resolve referenced non-exported types (only if map was used)
777
+ if (self.non_exported_types_inited and self.non_exported_types.count() > 0) {
667
778
  resolveReferencedTypes(&self.declarations, &self.non_exported_types);
668
779
  }
669
780
 
670
- // Post-process: remove implementation signatures of overloaded functions
671
- if (self.func_body_indices.count() > 0) {
781
+ // Post-process: remove implementation signatures of overloaded functions (only if map was used)
782
+ if (self.func_body_indices_inited and self.func_body_indices.count() > 0) {
672
783
  removeOverloadImplementations(self);
673
784
  }
674
785
 
@@ -685,6 +796,7 @@ pub const Scanner = struct {
685
796
 
686
797
  /// Check if name appears as a whole word in text (fast indexOf + boundary check)
687
798
  pub fn isWordInText(name: []const u8, text: []const u8) bool {
799
+ if (name.len == 0 or name.len > text.len) return false;
688
800
  var search_from: usize = 0;
689
801
  while (search_from < text.len) {
690
802
  const idx = ch.indexOf(text, name, search_from) orelse return false;
@@ -693,29 +805,14 @@ pub fn isWordInText(name: []const u8, text: []const u8) bool {
693
805
  const before_ok = !ch.isIdentChar(before);
694
806
  const after_ok = !ch.isIdentChar(after);
695
807
  if (before_ok and after_ok) return true;
696
- search_from = idx + 1;
808
+ // When the right boundary fails (we matched a longer identifier), skip past
809
+ // the entire identifier so we don't re-scan its prefix. When only the left
810
+ // boundary fails, advance just by 1 to allow overlapping self-similar names.
811
+ search_from = if (!after_ok) idx + name.len else idx + 1;
697
812
  }
698
813
  return false;
699
814
  }
700
815
 
701
- /// Extract all identifier words from text into a HashSet (single pass, O(n))
702
- fn extractWordsFromText(alloc: std.mem.Allocator, text: []const u8) std.StringHashMap(void) {
703
- var words = std.StringHashMap(void).init(alloc);
704
- var i: usize = 0;
705
- while (i < text.len) {
706
- const c = text[i];
707
- if (ch.isIdentStart(c)) {
708
- const start = i;
709
- i += 1;
710
- while (i < text.len and ch.isIdentChar(text[i])) i += 1;
711
- words.put(text[start..i], {}) catch {};
712
- } else {
713
- i += 1;
714
- }
715
- }
716
- return words;
717
- }
718
-
719
816
  /// Resolve non-exported types that are referenced by exported declarations
720
817
  fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), non_exported_types: *std.StringHashMap(Declaration)) void {
721
818
  var resolved = std.StringHashMap(void).init(declarations.allocator);
@@ -743,6 +840,15 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
743
840
  // Track how far we've extracted words — only process new text_parts each iteration
744
841
  var words_extracted_up_to: usize = 0;
745
842
 
843
+ // Move outside loop — reuse across iterations
844
+ var to_insert = std.array_list.Managed(Declaration).init(declarations.allocator);
845
+ to_insert.ensureTotalCapacity(non_exported_types.count()) catch {};
846
+ defer to_insert.deinit();
847
+
848
+ var merged = std.array_list.Managed(Declaration).init(declarations.allocator);
849
+ merged.ensureTotalCapacity(declarations.items.len + non_exported_types.count()) catch {};
850
+ defer merged.deinit();
851
+
746
852
  while (true) {
747
853
  // Incrementally extract words from only the NEW text parts
748
854
  for (text_parts.items[words_extracted_up_to..]) |part| {
@@ -760,9 +866,7 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
760
866
  }
761
867
  words_extracted_up_to = text_parts.items.len;
762
868
 
763
- var to_insert = std.array_list.Managed(Declaration).init(declarations.allocator);
764
- to_insert.ensureTotalCapacity(non_exported_types.count()) catch {};
765
- defer to_insert.deinit();
869
+ to_insert.clearRetainingCapacity();
766
870
 
767
871
  var it = non_exported_types.iterator();
768
872
  while (it.next()) |entry| {
@@ -789,7 +893,7 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
789
893
  }.cmp);
790
894
 
791
895
  // Merge at correct source positions
792
- var merged = std.array_list.Managed(Declaration).init(declarations.allocator);
896
+ merged.clearRetainingCapacity();
793
897
  merged.ensureTotalCapacity(declarations.items.len + to_insert.items.len) catch {};
794
898
  var ti: usize = 0;
795
899
  for (declarations.items) |d| {
@@ -806,7 +910,6 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
806
910
 
807
911
  declarations.clearRetainingCapacity();
808
912
  declarations.appendSlice(merged.items) catch {};
809
- merged.deinit();
810
913
 
811
914
  // Add new texts to search
812
915
  for (to_insert.items) |d| {
@@ -818,59 +921,71 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
818
921
  }
819
922
 
820
923
  /// Remove implementation signatures of overloaded functions
924
+ /// Remove implementation signatures of overloaded functions.
925
+ /// Single-HashMap approach: count function names, then find and remove body-bearing
926
+ /// implementations in one backward pass.
821
927
  fn removeOverloadImplementations(scanner: *Scanner) void {
822
- // Count function names
823
- var func_name_counts = std.StringHashMap(usize).init(scanner.allocator);
824
- defer func_name_counts.deinit();
928
+ // Single HashMap: count function declarations by name. Track overloads inline
929
+ // so we don't need a second iteration pass to count them.
930
+ var func_counts = std.StringHashMap(usize).init(scanner.allocator);
931
+ defer func_counts.deinit();
932
+ var overload_count: usize = 0;
825
933
 
826
934
  for (scanner.declarations.items) |d| {
827
935
  if (d.kind == .function_decl) {
828
- const entry = func_name_counts.getOrPut(d.name) catch continue;
936
+ const entry = func_counts.getOrPut(d.name) catch continue;
829
937
  if (!entry.found_existing) {
830
938
  entry.value_ptr.* = 1;
831
939
  } else {
832
940
  entry.value_ptr.* += 1;
941
+ if (entry.value_ptr.* == 2) overload_count += 1;
833
942
  }
834
943
  }
835
944
  }
836
-
837
- // Find overloaded names (count > 1)
838
- var overloaded = std.StringHashMap(void).init(scanner.allocator);
839
- defer overloaded.deinit();
840
- var it = func_name_counts.iterator();
841
- while (it.next()) |entry| {
842
- if (entry.value_ptr.* > 1) {
843
- overloaded.put(entry.key_ptr.*, {}) catch {};
844
- }
845
- }
846
-
847
- if (overloaded.count() == 0) return;
848
-
849
- // Find last body-bearing index for each overloaded name and remove them
850
- var to_remove = std.AutoHashMap(usize, void).init(scanner.allocator);
851
- defer to_remove.deinit();
852
-
853
- var oit = overloaded.iterator();
854
- while (oit.next()) |entry| {
855
- const name = entry.key_ptr.*;
856
- // Walk backwards
857
- var i: usize = scanner.declarations.items.len;
858
- while (i > 0) {
859
- i -= 1;
860
- const d = scanner.declarations.items[i];
861
- if (d.kind == .function_decl and std.mem.eql(u8, d.name, name) and scanner.func_body_indices.contains(i)) {
862
- to_remove.put(i, {}) catch {};
863
- break;
864
- }
945
+ if (overload_count == 0) return;
946
+
947
+ // Single backward pass: for each overloaded name (count > 1),
948
+ // find and mark the last body-bearing declaration for removal.
949
+ // Use a bitset (array of bools) instead of a HashMap for to_remove.
950
+ const len = scanner.declarations.items.len;
951
+ const remove_flags = scanner.allocator.alloc(bool, len) catch return;
952
+ defer scanner.allocator.free(remove_flags);
953
+ @memset(remove_flags, false);
954
+
955
+ var found_count: usize = 0;
956
+
957
+ // Walk backward: for each overloaded function, mark its last body-bearing impl
958
+ var names_found = std.StringHashMap(void).init(scanner.allocator);
959
+ defer names_found.deinit();
960
+
961
+ var i: usize = len;
962
+ while (i > 0) {
963
+ i -= 1;
964
+ const d = scanner.declarations.items[i];
965
+ if (d.kind != .function_decl) continue;
966
+
967
+ // Check if this function is overloaded
968
+ const count_entry = func_counts.get(d.name) orelse continue;
969
+ if (count_entry <= 1) continue;
970
+
971
+ // Already found the impl for this name?
972
+ if (names_found.contains(d.name)) continue;
973
+
974
+ // Is this the implementation (has body)?
975
+ if (scanner.func_body_indices.contains(i)) {
976
+ remove_flags[i] = true;
977
+ found_count += 1;
978
+ names_found.put(d.name, {}) catch {};
979
+ if (found_count == overload_count) break; // All found
865
980
  }
866
981
  }
867
982
 
868
- if (to_remove.count() == 0) return;
983
+ if (found_count == 0) return;
869
984
 
870
- // Filter in single pass — O(n) instead of O(k*n)
985
+ // Filter in single pass
871
986
  var write: usize = 0;
872
- for (scanner.declarations.items, 0..) |d, i| {
873
- if (!to_remove.contains(i)) {
987
+ for (scanner.declarations.items, 0..) |d, fi| {
988
+ if (!remove_flags[fi]) {
874
989
  scanner.declarations.items[write] = d;
875
990
  write += 1;
876
991
  }
@@ -906,3 +1021,75 @@ test "scanner skipWhitespaceAndComments" {
906
1021
  s.skipWhitespaceAndComments();
907
1022
  try std.testing.expectEqualStrings("hello", s.source[s.pos .. s.pos + 5]);
908
1023
  }
1024
+
1025
+ // --- Tests added for performance/fix patches ---
1026
+
1027
+ // isWordInText: when a hit fails the right-boundary check (e.g. searching for
1028
+ // "Foo" inside "FooBar"), the new implementation skips past the matched
1029
+ // substring instead of advancing one byte. These cases verify both the
1030
+ // short-circuit and the normal advancement paths still report correctly.
1031
+ test "isWordInText skips past failing right boundary" {
1032
+ // Multiple longer-ident matches followed by a real match — the optimization
1033
+ // must not skip the legitimate later match.
1034
+ try std.testing.expect(isWordInText("Foo", "FooBar FooBaz Foo;"));
1035
+ try std.testing.expect(!isWordInText("Foo", "FooBar FooBaz FooQuux"));
1036
+ // Left-boundary failure at an early position (`xxaa`) must advance by 1
1037
+ // and still find a real later match (`aa`) — exercises the advance-by-1
1038
+ // branch. (Without the trailing standalone `aa`, this string contains
1039
+ // no whole-word match: the only "aa" is glued to `xx`.)
1040
+ try std.testing.expect(isWordInText("aa", "xxaa aa"));
1041
+ // Empty needle is rejected.
1042
+ try std.testing.expect(!isWordInText("", "anything"));
1043
+ // Needle longer than text is rejected without indexOf.
1044
+ try std.testing.expect(!isWordInText("longer", "no"));
1045
+ }
1046
+
1047
+ // peekAfterKeyword: the first-byte fast-fail must short-circuit cleanly when
1048
+ // the next ident byte after `word1` doesn't even match `word2[0]`.
1049
+ test "peekAfterKeyword first-byte fast-fail" {
1050
+ var s1 = Scanner.init(std.testing.allocator, "async function foo", true, false);
1051
+ defer s1.deinit();
1052
+ try std.testing.expect(s1.peekAfterKeyword("async", "function"));
1053
+
1054
+ var s2 = Scanner.init(std.testing.allocator, "async const foo", true, false);
1055
+ defer s2.deinit();
1056
+ try std.testing.expect(!s2.peekAfterKeyword("async", "function"));
1057
+
1058
+ // Empty word2 must be rejected (avoids matching anything spuriously).
1059
+ var s3 = Scanner.init(std.testing.allocator, "async ", true, false);
1060
+ defer s3.deinit();
1061
+ try std.testing.expect(!s3.peekAfterKeyword("async", ""));
1062
+ }
1063
+
1064
+ // isRegexStart: the first-char dispatch must still recognize every keyword
1065
+ // previously checked, and must reject identifiers that share a prefix with
1066
+ // one of the regex-introducing keywords.
1067
+ test "isRegexStart keyword dispatch covers all branches" {
1068
+ // Helper: position the scanner at the slash and ask isRegexStart.
1069
+ const cases = [_]struct { src: []const u8, expected: bool }{
1070
+ .{ .src = "return /a/", .expected = true },
1071
+ .{ .src = "typeof /a/", .expected = true },
1072
+ .{ .src = "throw /a/", .expected = true },
1073
+ .{ .src = "void /a/", .expected = true },
1074
+ .{ .src = "delete /a/", .expected = true },
1075
+ .{ .src = "new /a/", .expected = true },
1076
+ .{ .src = "in /a/", .expected = true },
1077
+ .{ .src = "instanceof /a/", .expected = true },
1078
+ .{ .src = "of /a/", .expected = true },
1079
+ .{ .src = "case /a/", .expected = true },
1080
+ .{ .src = "yield /a/", .expected = true },
1081
+ .{ .src = "await /a/", .expected = true },
1082
+ // Look-alike identifiers must NOT trigger regex-mode.
1083
+ .{ .src = "myReturn /a/", .expected = false }, // "myReturn" is not "return"
1084
+ .{ .src = "thrower /a/", .expected = false },
1085
+ .{ .src = "x /a/", .expected = false }, // 1-char ident, length-bucket reject
1086
+ };
1087
+ for (cases) |c| {
1088
+ var s = Scanner.init(std.testing.allocator, c.src, true, false);
1089
+ defer s.deinit();
1090
+ // Walk to the slash position
1091
+ const slash_idx = std.mem.indexOfScalar(u8, c.src, '/').?;
1092
+ s.pos = slash_idx;
1093
+ try std.testing.expectEqual(c.expected, s.isRegexStart());
1094
+ }
1095
+ }