npm - @stacksjs/zig-dtsx - Versions diffs - 0.9.13 → 0.9.16 - Mend

@stacksjs/zig-dtsx 0.9.13 → 0.9.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +28 -0
package/build.zig +1 -1
package/package.json +2 -2
package/src/char_utils.zig +78 -12
package/src/emitter.zig +324 -179
package/src/extractors.zig +724 -404
package/src/lib.zig +35 -8
package/src/main.zig +108 -77
package/src/scan_loop.zig +101 -65
package/src/scanner.zig +293 -106
package/src/type_inference.zig +215 -130
package/test/zig-dtsx.test.ts +5 -1
package/zig-out/bin/zig-dtsx +0 -0
package/zig-out/bin/zig-dtsx.exe +0 -0

package/src/scanner.zig CHANGED Viewed

@@ -8,6 +8,23 @@ const Declaration = types.Declaration;
 const DeclarationKind = types.DeclarationKind;
 const Allocator = std.mem.Allocator;
+/// Comptime-optimized word comparison: uses integer casts for power-of-2 sizes (1/2/4/8 bytes).
+inline fn comptime_match(comptime N: usize, src: *const [N]u8, comptime word: []const u8) bool {
+    // Use integer comparison for power-of-2 sizes (u8, u16, u32, u64)
+    if (N == 1 or N == 2 or N == 4 or N == 8) {
+        const T = std.meta.Int(.unsigned, N * 8);
+        const expected: T = comptime blk: {
+            var val: T = 0;
+            for (word, 0..) |b, i| {
+                val |= @as(T, b) << @intCast(i * 8);
+            }
+            break :blk val;
+        };
+        return @as(*align(1) const T, @ptrCast(src)).* == expected;
+    }
+    return std.mem.eql(u8, src, word);
+}
 /// Result of scanning: declarations + non-exported types
 pub const ScanResult = struct {
     declarations: std.array_list.Managed(Declaration),
@@ -21,8 +38,12 @@ pub const Scanner = struct {
     len: usize,
     allocator: Allocator,
     declarations: std.array_list.Managed(Declaration),
+    /// Lazy-initialized: only allocated on first insertion to avoid overhead for
+    /// files that have no non-exported types or overloaded functions.
     non_exported_types: std.StringHashMap(Declaration),
     func_body_indices: std.AutoHashMap(usize, void),
+    non_exported_types_inited: bool,
+    func_body_indices_inited: bool,
     keep_comments: bool,
     isolated_declarations: bool,
@@ -38,6 +59,8 @@ pub const Scanner = struct {
             .declarations = declarations,
             .non_exported_types = std.StringHashMap(Declaration).init(allocator),
             .func_body_indices = std.AutoHashMap(usize, void).init(allocator),
+            .non_exported_types_inited = false,
+            .func_body_indices_inited = false,
             .keep_comments = keep_comments,
             .isolated_declarations = isolated_declarations,
         };
@@ -45,8 +68,24 @@ pub const Scanner = struct {
     pub fn deinit(self: *Scanner) void {
         self.declarations.deinit();
-        self.non_exported_types.deinit();
-        self.func_body_indices.deinit();
+        if (self.non_exported_types_inited) self.non_exported_types.deinit();
+        if (self.func_body_indices_inited) self.func_body_indices.deinit();
+    }
+    /// Lazy-put into non_exported_types (initializes the map on first use)
+    pub fn putNonExportedType(self: *Scanner, name: []const u8, decl: Declaration) void {
+        if (!self.non_exported_types_inited) {
+            self.non_exported_types_inited = true;
+        }
+        self.non_exported_types.put(name, decl) catch {};
+    }
+    /// Lazy-put into func_body_indices (initializes on first use)
+    pub fn putFuncBodyIndex(self: *Scanner, idx: usize) void {
+        if (!self.func_body_indices_inited) {
+            self.func_body_indices_inited = true;
+        }
+        self.func_body_indices.put(idx, {}) catch {};
     }
     // ========================================================================
@@ -78,21 +117,30 @@ pub const Scanner = struct {
             }
             if (self.pos >= self.len) break;
+            // Scalar: consume single whitespace bytes
             const c = self.source[self.pos];
             if (c == ch.CH_SPACE or c == ch.CH_TAB or c == ch.CH_LF or c == ch.CH_CR) {
                 self.pos += 1;
+                // Fast scalar drain: consume consecutive whitespace without re-entering the outer loop
+                while (self.pos < self.len) {
+                    const c2 = self.source[self.pos];
+                    if (c2 == ch.CH_SPACE or c2 == ch.CH_TAB or c2 == ch.CH_LF or c2 == ch.CH_CR) {
+                        self.pos += 1;
+                    } else break;
+                }
                 continue;
             }
             if (c == ch.CH_SLASH and self.pos + 1 < self.len) {
                 const next = self.source[self.pos + 1];
                 if (next == ch.CH_SLASH) {
-                    // Line comment
+                    // Line comment — SIMD scan via indexOfChar.
                     const nl = ch.indexOfChar(self.source, ch.CH_LF, self.pos + 2);
                     self.pos = if (nl) |n| n + 1 else self.len;
                     continue;
                 }
                 if (next == ch.CH_STAR) {
-                    // Block comment
+                    // Block comment — `*/` is two specific bytes; the SIMD
+                    // first-byte scan in indexOf handles this efficiently.
                     const end_idx = ch.indexOf(self.source, "*/", self.pos + 2);
                     self.pos = if (end_idx) |e| e + 2 else self.len;
                     continue;
@@ -143,33 +191,70 @@ pub const Scanner = struct {
         self.pos = len;
     }
-    /// Skip past a template literal (backtick string with ${} interpolation)
+    /// Skip past a template literal (backtick string with ${} interpolation).
+    /// Uses SIMD to scan 16 bytes at a time for structural characters.
     pub fn skipTemplateLiteral(self: *Scanner) void {
         self.pos += 1; // skip opening backtick
+        const src = self.source;
+        const len = self.len;
+        var pos = self.pos;
         var depth: usize = 0;
-        while (self.pos < self.len) {
-            const c = self.source[self.pos];
+        while (pos < len) {
+            // SIMD fast-skip when at depth 0: scan for backtick, backslash, or $
+            // At depth > 0: also scan for }
+            if (depth == 0) {
+                while (pos + 16 <= len) {
+                    const chunk: @Vector(16, u8) = src[pos..][0..16].*;
+                    const match_mask = (chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKTICK))) |
+                        (chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKSLASH))) |
+                        (chunk == @as(@Vector(16, u8), @splat(ch.CH_DOLLAR)));
+                    if (@reduce(.Or, match_mask)) {
+                        const bits: u16 = @bitCast(match_mask);
+                        pos += @ctz(bits);
+                        break;
+                    }
+                    pos += 16;
+                }
+            } else {
+                while (pos + 16 <= len) {
+                    const chunk: @Vector(16, u8) = src[pos..][0..16].*;
+                    const match_mask = (chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKTICK))) |
+                        (chunk == @as(@Vector(16, u8), @splat(ch.CH_BACKSLASH))) |
+                        (chunk == @as(@Vector(16, u8), @splat(ch.CH_DOLLAR))) |
+                        (chunk == @as(@Vector(16, u8), @splat(ch.CH_RBRACE)));
+                    if (@reduce(.Or, match_mask)) {
+                        const bits: u16 = @bitCast(match_mask);
+                        pos += @ctz(bits);
+                        break;
+                    }
+                    pos += 16;
+                }
+            }
+            if (pos >= len) break;
+            const c = src[pos];
             if (c == ch.CH_BACKSLASH) {
-                self.pos += 1;
-                if (self.pos < self.len) self.pos += 1;
+                pos += 2; // skip escaped char
                 continue;
             }
             if (c == ch.CH_BACKTICK and depth == 0) {
-                self.pos += 1;
+                self.pos = pos + 1;
                 return;
             }
-            if (c == ch.CH_DOLLAR and self.pos + 1 < self.len and self.source[self.pos + 1] == ch.CH_LBRACE) {
-                self.pos += 2;
+            if (c == ch.CH_DOLLAR and pos + 1 < len and src[pos + 1] == ch.CH_LBRACE) {
+                pos += 2;
                 depth += 1;
                 continue;
             }
             if (c == ch.CH_RBRACE and depth > 0) {
                 depth -= 1;
-                self.pos += 1;
+                pos += 1;
                 continue;
             }
-            self.pos += 1;
+            pos += 1;
         }
+        self.pos = len;
     }
     /// Check if `/` at current pos starts a regex literal (not division)
@@ -196,10 +281,22 @@ pub const Scanner = struct {
             const word_start: usize = @intCast(wp + 1);
             const word_end: usize = @intCast(p + 1);
             const word = self.source[word_start..word_end];
-            const keywords = [_][]const u8{ "return", "typeof", "void", "delete", "throw", "new", "in", "of", "case", "instanceof", "yield", "await" };
-            for (keywords) |kw| {
-                if (std.mem.eql(u8, word, kw)) return true;
-            }
+            // First-char dispatch to avoid 12 std.mem.eql calls per slash. The
+            // hot path here runs on every '/' that might start division.
+            if (word.len < 2 or word.len > 10) return false;
+            return switch (word[0]) {
+                'r' => std.mem.eql(u8, word, "return"),
+                't' => std.mem.eql(u8, word, "typeof") or std.mem.eql(u8, word, "throw"),
+                'v' => std.mem.eql(u8, word, "void"),
+                'd' => std.mem.eql(u8, word, "delete"),
+                'n' => std.mem.eql(u8, word, "new"),
+                'i' => std.mem.eql(u8, word, "in") or std.mem.eql(u8, word, "instanceof"),
+                'o' => std.mem.eql(u8, word, "of"),
+                'c' => std.mem.eql(u8, word, "case"),
+                'y' => std.mem.eql(u8, word, "yield"),
+                'a' => std.mem.eql(u8, word, "await"),
+                else => false,
+            };
         }
         return false;
     }
@@ -268,18 +365,21 @@ pub const Scanner = struct {
     }
     /// Read an identifier at current position
-    pub fn readIdent(self: *Scanner) []const u8 {
+    pub inline fn readIdent(self: *Scanner) []const u8 {
         const start = self.pos;
         while (self.pos < self.len and ch.isIdentChar(self.source[self.pos])) self.pos += 1;
         return self.source[start..self.pos];
     }
-    /// Check if source matches a word at pos (followed by non-ident char)
-    pub fn matchWord(self: *const Scanner, word: []const u8) bool {
+    /// Check if source matches a word at pos (followed by non-ident char).
+    /// Uses integer comparison for short words to avoid byte-by-byte loop.
+    pub fn matchWord(self: *const Scanner, comptime word: []const u8) bool {
         if (self.pos + word.len > self.len) return false;
-        if (!std.mem.eql(u8, self.source[self.pos .. self.pos + word.len], word)) return false;
+        // Boundary check: word must not be followed by an identifier char
         if (self.pos + word.len < self.len and ch.isIdentChar(self.source[self.pos + word.len])) return false;
-        return true;
+        // Integer comparison: compare 1-8 bytes as a single integer (comptime-optimized)
+        const src_slice = self.source[self.pos..][0..word.len];
+        return comptime_match(word.len, src_slice, word);
     }
     /// Check if current position is at a top-level statement-starting keyword
@@ -367,10 +467,17 @@ pub const Scanner = struct {
             self.pos = saved;
             return false;
         }
-        // Type continuation keywords
-        if (self.matchWord("extends") or self.matchWord("keyof") or self.matchWord("typeof") or
-            self.matchWord("infer") or self.matchWord("is") or self.matchWord("as") or self.matchWord("in"))
-        {
+        // Type continuation keywords — first-byte dispatch avoids running 7
+        // sequential matchWord calls on every newline at member depth 0.
+        const is_type_kw = switch (nc) {
+            'e' => self.matchWord("extends"),
+            'k' => self.matchWord("keyof"),
+            't' => self.matchWord("typeof"),
+            'i' => self.matchWord("infer") or self.matchWord("is") or self.matchWord("in"),
+            'a' => self.matchWord("as"),
+            else => false,
+        };
+        if (is_type_kw) {
             self.pos = saved;
             return false;
         }
@@ -581,8 +688,11 @@ pub const Scanner = struct {
         if (self.pos < self.len and self.source[self.pos] == ch.CH_SEMI) self.pos += 1;
     }
-    /// Peek at what char comes after a word (skipping whitespace)
-    pub fn peekAfterWord(self: *const Scanner, word: []const u8) u8 {
+    /// Peek at what char comes after a word (skipping whitespace).
+    /// Defensive bounds check on pos+word.len handles the edge where the
+    /// caller positions at end-of-source.
+    pub inline fn peekAfterWord(self: *const Scanner, word: []const u8) u8 {
+        if (self.pos + word.len > self.len) return 0;
         var p = self.pos + word.len;
         while (p < self.len and ch.isWhitespace(self.source[p])) p += 1;
         return if (p < self.len) self.source[p] else 0;
@@ -592,6 +702,9 @@ pub const Scanner = struct {
     pub fn peekAfterKeyword(self: *const Scanner, word1: []const u8, word2: []const u8) bool {
         var p = self.pos + word1.len;
         while (p < self.len and ch.isWhitespace(self.source[p])) p += 1;
+        // First-byte fast-fail: avoid std.mem.eql call when first char doesn't match.
+        if (p >= self.len or word2.len == 0) return false;
+        if (self.source[p] != word2[0]) return false;
         if (p + word2.len > self.len) return false;
         if (!std.mem.eql(u8, self.source[p .. p + word2.len], word2)) return false;
         return p + word2.len >= self.len or !ch.isIdentChar(self.source[p + word2.len]);
@@ -614,13 +727,12 @@ pub const Scanner = struct {
             return self.source[start..self.pos];
         }
         if (c == ch.CH_HASH) {
+            // Zero-copy: keep the leading `#` as part of the source slice instead
+            // of allocating a new buffer to prepend it.
+            const start = self.pos;
             self.pos += 1;
-            const ident = self.readIdent();
-            // Return "#name" — we need to allocate this
-            const result = self.allocator.alloc(u8, 1 + ident.len) catch return "";
-            result[0] = '#';
-            @memcpy(result[1..], ident);
-            return result;
+            _ = self.readIdent();
+            return self.source[start..self.pos];
         }
         return self.readIdent();
     }
@@ -651,24 +763,23 @@ pub const Scanner = struct {
     /// Scan TypeScript source and extract all declarations
     pub fn scan(self: *Scanner) !ScanResult {
-        // Skip BOM
-        if (self.pos < self.len and self.source[0] >= 0xEF) {
-            // UTF-8 BOM is EF BB BF
-            if (self.len >= 3 and self.source[0] == 0xEF and self.source[1] == 0xBB and self.source[2] == 0xBF) {
-                self.pos = 3;
-            }
+        // Skip UTF-8 BOM (EF BB BF). Check exact bytes — the previous `>= 0xEF`
+        // guard was over-permissive, accepting non-BOM high bytes only to fall
+        // through the strict triple-check.
+        if (self.len >= 3 and self.source[0] == 0xEF and self.source[1] == 0xBB and self.source[2] == 0xBF) {
+            self.pos = 3;
         }
         // Main scan loop — delegate to scan_loop module
         try @import("scan_loop.zig").scanMainLoop(self);
-        // Post-process: resolve referenced non-exported types
-        if (self.non_exported_types.count() > 0) {
+        // Post-process: resolve referenced non-exported types (only if map was used)
+        if (self.non_exported_types_inited and self.non_exported_types.count() > 0) {
             resolveReferencedTypes(&self.declarations, &self.non_exported_types);
         }
-        // Post-process: remove implementation signatures of overloaded functions
-        if (self.func_body_indices.count() > 0) {
+        // Post-process: remove implementation signatures of overloaded functions (only if map was used)
+        if (self.func_body_indices_inited and self.func_body_indices.count() > 0) {
             removeOverloadImplementations(self);
         }
@@ -685,6 +796,7 @@ pub const Scanner = struct {
 /// Check if name appears as a whole word in text (fast indexOf + boundary check)
 pub fn isWordInText(name: []const u8, text: []const u8) bool {
+    if (name.len == 0 or name.len > text.len) return false;
     var search_from: usize = 0;
     while (search_from < text.len) {
         const idx = ch.indexOf(text, name, search_from) orelse return false;
@@ -693,29 +805,14 @@ pub fn isWordInText(name: []const u8, text: []const u8) bool {
         const before_ok = !ch.isIdentChar(before);
         const after_ok = !ch.isIdentChar(after);
         if (before_ok and after_ok) return true;
-        search_from = idx + 1;
+        // When the right boundary fails (we matched a longer identifier), skip past
+        // the entire identifier so we don't re-scan its prefix. When only the left
+        // boundary fails, advance just by 1 to allow overlapping self-similar names.
+        search_from = if (!after_ok) idx + name.len else idx + 1;
     }
     return false;
 }
-/// Extract all identifier words from text into a HashSet (single pass, O(n))
-fn extractWordsFromText(alloc: std.mem.Allocator, text: []const u8) std.StringHashMap(void) {
-    var words = std.StringHashMap(void).init(alloc);
-    var i: usize = 0;
-    while (i < text.len) {
-        const c = text[i];
-        if (ch.isIdentStart(c)) {
-            const start = i;
-            i += 1;
-            while (i < text.len and ch.isIdentChar(text[i])) i += 1;
-            words.put(text[start..i], {}) catch {};
-        } else {
-            i += 1;
-        }
-    }
-    return words;
-}
 /// Resolve non-exported types that are referenced by exported declarations
 fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), non_exported_types: *std.StringHashMap(Declaration)) void {
     var resolved = std.StringHashMap(void).init(declarations.allocator);
@@ -743,6 +840,15 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
     // Track how far we've extracted words — only process new text_parts each iteration
     var words_extracted_up_to: usize = 0;
+    // Move outside loop — reuse across iterations
+    var to_insert = std.array_list.Managed(Declaration).init(declarations.allocator);
+    to_insert.ensureTotalCapacity(non_exported_types.count()) catch {};
+    defer to_insert.deinit();
+    var merged = std.array_list.Managed(Declaration).init(declarations.allocator);
+    merged.ensureTotalCapacity(declarations.items.len + non_exported_types.count()) catch {};
+    defer merged.deinit();
     while (true) {
         // Incrementally extract words from only the NEW text parts
         for (text_parts.items[words_extracted_up_to..]) |part| {
@@ -760,9 +866,7 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
         }
         words_extracted_up_to = text_parts.items.len;
-        var to_insert = std.array_list.Managed(Declaration).init(declarations.allocator);
-        to_insert.ensureTotalCapacity(non_exported_types.count()) catch {};
-        defer to_insert.deinit();
+        to_insert.clearRetainingCapacity();
         var it = non_exported_types.iterator();
         while (it.next()) |entry| {
@@ -789,7 +893,7 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
         }.cmp);
         // Merge at correct source positions
-        var merged = std.array_list.Managed(Declaration).init(declarations.allocator);
+        merged.clearRetainingCapacity();
         merged.ensureTotalCapacity(declarations.items.len + to_insert.items.len) catch {};
         var ti: usize = 0;
         for (declarations.items) |d| {
@@ -806,7 +910,6 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
         declarations.clearRetainingCapacity();
         declarations.appendSlice(merged.items) catch {};
-        merged.deinit();
         // Add new texts to search
         for (to_insert.items) |d| {
@@ -818,59 +921,71 @@ fn resolveReferencedTypes(declarations: *std.array_list.Managed(Declaration), no
 }
 /// Remove implementation signatures of overloaded functions
+/// Remove implementation signatures of overloaded functions.
+/// Single-HashMap approach: count function names, then find and remove body-bearing
+/// implementations in one backward pass.
 fn removeOverloadImplementations(scanner: *Scanner) void {
-    // Count function names
-    var func_name_counts = std.StringHashMap(usize).init(scanner.allocator);
-    defer func_name_counts.deinit();
+    // Single HashMap: count function declarations by name. Track overloads inline
+    // so we don't need a second iteration pass to count them.
+    var func_counts = std.StringHashMap(usize).init(scanner.allocator);
+    defer func_counts.deinit();
+    var overload_count: usize = 0;
     for (scanner.declarations.items) |d| {
         if (d.kind == .function_decl) {
-            const entry = func_name_counts.getOrPut(d.name) catch continue;
+            const entry = func_counts.getOrPut(d.name) catch continue;
             if (!entry.found_existing) {
                 entry.value_ptr.* = 1;
             } else {
                 entry.value_ptr.* += 1;
+                if (entry.value_ptr.* == 2) overload_count += 1;
             }
         }
     }
-    // Find overloaded names (count > 1)
-    var overloaded = std.StringHashMap(void).init(scanner.allocator);
-    defer overloaded.deinit();
-    var it = func_name_counts.iterator();
-    while (it.next()) |entry| {
-        if (entry.value_ptr.* > 1) {
-            overloaded.put(entry.key_ptr.*, {}) catch {};
-        }
-    }
-    if (overloaded.count() == 0) return;
-    // Find last body-bearing index for each overloaded name and remove them
-    var to_remove = std.AutoHashMap(usize, void).init(scanner.allocator);
-    defer to_remove.deinit();
-    var oit = overloaded.iterator();
-    while (oit.next()) |entry| {
-        const name = entry.key_ptr.*;
-        // Walk backwards
-        var i: usize = scanner.declarations.items.len;
-        while (i > 0) {
-            i -= 1;
-            const d = scanner.declarations.items[i];
-            if (d.kind == .function_decl and std.mem.eql(u8, d.name, name) and scanner.func_body_indices.contains(i)) {
-                to_remove.put(i, {}) catch {};
-                break;
-            }
+    if (overload_count == 0) return;
+    // Single backward pass: for each overloaded name (count > 1),
+    // find and mark the last body-bearing declaration for removal.
+    // Use a bitset (array of bools) instead of a HashMap for to_remove.
+    const len = scanner.declarations.items.len;
+    const remove_flags = scanner.allocator.alloc(bool, len) catch return;
+    defer scanner.allocator.free(remove_flags);
+    @memset(remove_flags, false);
+    var found_count: usize = 0;
+    // Walk backward: for each overloaded function, mark its last body-bearing impl
+    var names_found = std.StringHashMap(void).init(scanner.allocator);
+    defer names_found.deinit();
+    var i: usize = len;
+    while (i > 0) {
+        i -= 1;
+        const d = scanner.declarations.items[i];
+        if (d.kind != .function_decl) continue;
+        // Check if this function is overloaded
+        const count_entry = func_counts.get(d.name) orelse continue;
+        if (count_entry <= 1) continue;
+        // Already found the impl for this name?
+        if (names_found.contains(d.name)) continue;
+        // Is this the implementation (has body)?
+        if (scanner.func_body_indices.contains(i)) {
+            remove_flags[i] = true;
+            found_count += 1;
+            names_found.put(d.name, {}) catch {};
+            if (found_count == overload_count) break; // All found
         }
     }
-    if (to_remove.count() == 0) return;
+    if (found_count == 0) return;
-    // Filter in single pass — O(n) instead of O(k*n)
+    // Filter in single pass
     var write: usize = 0;
-    for (scanner.declarations.items, 0..) |d, i| {
-        if (!to_remove.contains(i)) {
+    for (scanner.declarations.items, 0..) |d, fi| {
+        if (!remove_flags[fi]) {
             scanner.declarations.items[write] = d;
             write += 1;
         }
@@ -906,3 +1021,75 @@ test "scanner skipWhitespaceAndComments" {
     s.skipWhitespaceAndComments();
     try std.testing.expectEqualStrings("hello", s.source[s.pos .. s.pos + 5]);
 }
+// --- Tests added for performance/fix patches ---
+// isWordInText: when a hit fails the right-boundary check (e.g. searching for
+// "Foo" inside "FooBar"), the new implementation skips past the matched
+// substring instead of advancing one byte. These cases verify both the
+// short-circuit and the normal advancement paths still report correctly.
+test "isWordInText skips past failing right boundary" {
+    // Multiple longer-ident matches followed by a real match — the optimization
+    // must not skip the legitimate later match.
+    try std.testing.expect(isWordInText("Foo", "FooBar FooBaz Foo;"));
+    try std.testing.expect(!isWordInText("Foo", "FooBar FooBaz FooQuux"));
+    // Left-boundary failure at an early position (`xxaa`) must advance by 1
+    // and still find a real later match (`aa`) — exercises the advance-by-1
+    // branch. (Without the trailing standalone `aa`, this string contains
+    // no whole-word match: the only "aa" is glued to `xx`.)
+    try std.testing.expect(isWordInText("aa", "xxaa aa"));
+    // Empty needle is rejected.
+    try std.testing.expect(!isWordInText("", "anything"));
+    // Needle longer than text is rejected without indexOf.
+    try std.testing.expect(!isWordInText("longer", "no"));
+}
+// peekAfterKeyword: the first-byte fast-fail must short-circuit cleanly when
+// the next ident byte after `word1` doesn't even match `word2[0]`.
+test "peekAfterKeyword first-byte fast-fail" {
+    var s1 = Scanner.init(std.testing.allocator, "async function foo", true, false);
+    defer s1.deinit();
+    try std.testing.expect(s1.peekAfterKeyword("async", "function"));
+    var s2 = Scanner.init(std.testing.allocator, "async const foo", true, false);
+    defer s2.deinit();
+    try std.testing.expect(!s2.peekAfterKeyword("async", "function"));
+    // Empty word2 must be rejected (avoids matching anything spuriously).
+    var s3 = Scanner.init(std.testing.allocator, "async ", true, false);
+    defer s3.deinit();
+    try std.testing.expect(!s3.peekAfterKeyword("async", ""));
+}
+// isRegexStart: the first-char dispatch must still recognize every keyword
+// previously checked, and must reject identifiers that share a prefix with
+// one of the regex-introducing keywords.
+test "isRegexStart keyword dispatch covers all branches" {
+    // Helper: position the scanner at the slash and ask isRegexStart.
+    const cases = [_]struct { src: []const u8, expected: bool }{
+        .{ .src = "return /a/", .expected = true },
+        .{ .src = "typeof /a/", .expected = true },
+        .{ .src = "throw /a/", .expected = true },
+        .{ .src = "void /a/", .expected = true },
+        .{ .src = "delete /a/", .expected = true },
+        .{ .src = "new /a/", .expected = true },
+        .{ .src = "in /a/", .expected = true },
+        .{ .src = "instanceof /a/", .expected = true },
+        .{ .src = "of /a/", .expected = true },
+        .{ .src = "case /a/", .expected = true },
+        .{ .src = "yield /a/", .expected = true },
+        .{ .src = "await /a/", .expected = true },
+        // Look-alike identifiers must NOT trigger regex-mode.
+        .{ .src = "myReturn /a/", .expected = false }, // "myReturn" is not "return"
+        .{ .src = "thrower /a/", .expected = false },
+        .{ .src = "x /a/", .expected = false }, // 1-char ident, length-bucket reject
+    };
+    for (cases) |c| {
+        var s = Scanner.init(std.testing.allocator, c.src, true, false);
+        defer s.deinit();
+        // Walk to the slash position
+        const slash_idx = std.mem.indexOfScalar(u8, c.src, '/').?;
+        s.pos = slash_idx;
+        try std.testing.expectEqual(c.expected, s.isRegexStart());
+    }
+}