npm - querysub - Versions diffs - 0.447.0 → 0.449.0 - Mend

querysub 0.447.0 → 0.449.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/package.json +1 -1
package/src/-a-archives/archiveCache.ts +37 -10
package/src/0-path-value-core/pathValueArchives.ts +24 -3
package/src/deployManager/components/CommitModal.tsx +13 -2
package/src/diagnostics/MachineThreadInfo.tsx +0 -3
package/src/diagnostics/logs/IndexedLogs/BufferIndex.ts +2 -0
package/src/diagnostics/logs/IndexedLogs/BufferIndexHelpers.ts +12 -1
package/src/diagnostics/logs/IndexedLogs/IndexedLogs.ts +3 -1
package/src/diagnostics/logs/IndexedLogs/MCPIndexedLogs.ts +294 -102
package/src/diagnostics/logs/IndexedLogs/TimeFileTree.ts +58 -0
package/src/diagnostics/pathAuditer.ts +4 -3

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "querysub",
-  "version": "0.447.0",
+  "version": "0.449.0",
   "main": "index.js",
   "license": "MIT",
   "note1": "note on node-forge fork, see https://github.com/digitalbazaar/forge/issues/744 for details",

package/src/-a-archives/archiveCache.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { getStorageDir, getSubFolder } from "../fs";
 import { Archives, createArchivesOverride } from "./archives";
 import fs from "fs";
+import os from "os";
 import { list, nextId, timeInHour, timeInMinute } from "socket-function/src/misc";
 import { cache, lazy } from "socket-function/src/caching";
@@ -32,6 +33,10 @@ const LOCK_SUFFIX = ".lock";
 const TEMP_SUFFIX = ".tmp";
 const TEMP_THRESHOLD = timeInHour * 3;
+function getTempFilePath() {
+    return os.tmpdir() + "/" + nextId() + TEMP_SUFFIX;
+}
 const LARGE_FILE_CHUNK = 1024 * 1024 * 32;
 const CACHE_SUFFIX = ".cache";
 export function getArchiveCachePath(archives: Archives, key: string): string {
@@ -157,11 +162,17 @@ const getDiskMetricsBase = async () => {
         availableFiles--;
         try {
             await fs.promises.rename(sourceTempFile, path);
-        } catch (e) {
-            let destExists = fs.existsSync(path);
-            if (!destExists) {
-                console.error("Error renaming file", sourceTempFile, path, e);
+        } catch {
+            try {
+                await fs.promises.copyFile(sourceTempFile, path);
+            } catch (e) {
+                if (!fs.existsSync(path)) {
+                    console.error("Error caching file", sourceTempFile, path, e);
+                }
             }
+            try {
+                await fs.promises.unlink(sourceTempFile);
+            } catch { }
         }
     }
     async function getCacheFile(
@@ -252,7 +263,9 @@ let cacheArchivesSymbol = Symbol("cacheArchives");
 /** IMPORTANT! The cache assumes the files contents immutable, and they will only be created
  *      and deleted, never mutated.
  */
-export function wrapArchivesWithCache(archives: Archives): Archives & {
+export function wrapArchivesWithCache(archives: Archives, rootConfig?: {
+    immutable?: boolean;
+}): Archives & {
     // NOTE: lockRegion / path based functions are preferred for external accesses, as they ensure files
     //  won't be garbage collected, and uses paths, which will be required for external processes.
     //  - Locks only protect the local cache. The values can still be deleted explicitly.
@@ -267,7 +280,7 @@ export function wrapArchivesWithCache(archives: Archives): Archives & {
         return archives as any;
     }
     async function setLargeFile(config: Args<Archives["setLargeFile"]>[0]) {
-        const tempPath = cacheArchives2 + nextId() + TEMP_SUFFIX;
+        const tempPath = getTempFilePath();
         let handle: fs.promises.FileHandle | undefined;
         try {
             handle = await fs.promises.open(tempPath, "w");
@@ -393,7 +406,7 @@ export function wrapArchivesWithCache(archives: Archives): Archives & {
             };
             let size = 0;
-            const tempPath = cacheArchives2 + nextId() + TEMP_SUFFIX;
+            const tempPath = getTempFilePath();
             let handle: fs.promises.FileHandle | undefined;
             try {
                 handle = await fs.promises.open(tempPath, "w");
@@ -509,7 +522,7 @@ export function wrapArchivesWithCache(archives: Archives): Archives & {
             //  way to check)
             //  - TODO: Set hash in file metadata (maybe for all archive writes?), and use this to
             //      compare it against our cache file (which can have the hash in the file name).
-            if (!config?.fastRead) {
+            if (!(config?.fastRead || rootConfig?.immutable)) {
                 let info = await archives.getInfo(fileName);
                 if (!info) {
                     // If it is gone remotely, remove it from the cache, to save space.
@@ -521,7 +534,7 @@ export function wrapArchivesWithCache(archives: Archives): Archives & {
             if (buffer) return buffer;
             let result = await archives.get(fileName);
             if (result) {
-                const tempPath = cacheArchives2 + nextId() + TEMP_SUFFIX;
+                const tempPath = getTempFilePath();
                 await fs.promises.writeFile(tempPath, result);
                 await metrics.addCacheFile(archives, fileName, tempPath);
                 let range = config?.range;
@@ -531,9 +544,23 @@ export function wrapArchivesWithCache(archives: Archives): Archives & {
             }
             return result;
         },
+        getInfo: async (fileName: string) => {
+            // When the archives are immutable, the local cache file is a
+            // byte-for-byte copy, so we can answer getInfo from a local stat and
+            // skip the (slow, network) getInfo on the underlying archives.
+            if (rootConfig?.immutable) {
+                try {
+                    let stat = await fs.promises.stat(getArchiveCachePath(archives, fileName));
+                    return { writeTime: stat.mtimeMs, size: stat.size };
+                } catch {
+                    // Not in our cache yet — fall through to the source.
+                }
+            }
+            return archives.getInfo(fileName);
+        },
         set: async (fileName: string, data: Buffer) => {
             let metrics = await getDiskMetrics();
-            const tempPath = cacheArchives2 + nextId() + TEMP_SUFFIX;
+            const tempPath = getTempFilePath();
             await fs.promises.writeFile(tempPath, data);
             await archives.set(fileName, data);
             await metrics.addCacheFile(archives, fileName, tempPath);

package/src/0-path-value-core/pathValueArchives.ts CHANGED Viewed

@@ -14,8 +14,10 @@ import { devDebugbreak, isNoNetwork } from "../config";
 import { wrapArchivesWithCache } from "../-a-archives/archiveCache";
 import { AuthoritySpec, PathRouter, debugSpec } from "./PathRouter";
 import { authorityLookup } from "./AuthorityLookup";
-import { delay } from "socket-function/src/batching";
+import { delay, retryFunctional } from "socket-function/src/batching";
 import { safeLoop } from "socket-function/src/batching";
+import { errorToUndefined } from "../errors";
+import { shutdown } from "../diagnostics/periodic";
 export const archives = lazy(() => wrapArchivesWithCache(getArchives("path-values/")));
 export const archivesLocks = lazy(() => getArchives("path-values-locks/"));
@@ -197,14 +199,33 @@ export class PathValueArchives {
             let slowestFileWriteTime = oldestTime + ARCHIVE_FLUSH_LIMIT;
             let fullPath = pathIdentifier + "/" + file;
             console.log(`Write archive file ${fullPath}, with size ${formatNumber(data.byteLength)}B, and count ${formatNumber(values.length)}`);
-            await archives().set(fullPath, data);
+            try {
+                await retryFunctional(() => archives().set(fullPath, data), {
+                    maxRetries: 10,
+                    minDelay: 1000,
+                    maxDelay: 5000,
+                });
+            } catch (e) {
+                console.error(`Error writing archive file ${fullPath}. THIS IS BAD! WE ARE SHUTTING DOWN SO THIS ERROR IS LOUDER! WHAT'S THE POINT OF A DB THAT DOESN'T SAVE DATA!: ${(e as Error).stack ?? e}`);
+                await delay(5000);
+                try {
+                    await shutdown();
+                } catch {
+                    process.exit();
+                }
+            }
             let fileInfo = await archives().getInfo(fullPath);
             // NOTE: If no fileInfo... then our file was merged? Which... is BAD, as it means we took
             //      too long to read it, so we probably took too long to write it too!
             if (!fileInfo || fileInfo.writeTime > slowestFileWriteTime) {
                 console.error(red(`File ${fullPath} was written too slowly, ${fileInfo?.writeTime || "undefined"} < ${slowestFileWriteTime}. This means some values will be rejected by reads. Killing server, our state is irrecoverable. Our watches have invalid data, and we have to stop before we create more invalid dependencies.`));
-                process.exit();
+                await delay(5000);
+                try {
+                    await shutdown();
+                } catch {
+                    process.exit();
+                }
             }

package/src/deployManager/components/CommitModal.tsx CHANGED Viewed

@@ -67,6 +67,16 @@ function cleanCommitMessage(text: string): string {
     return text;
 }
+/** Extracts just the added/removed lines from a unified diff, dropping context
+ *      lines and diff/hunk headers — i.e. only the lines that actually changed. */
+function extractChangedLines(diff: string): string {
+    return diff.split("\n")
+        .filter(line =>
+            (line.startsWith("+") || line.startsWith("-"))
+            && !line.startsWith("+++") && !line.startsWith("---"))
+        .join("\n");
+}
 const rowButtonStyle = css.pad2(12, 8).button.bord2(0, 0, 20).fontWeight("bold");
 export class CommitModal extends qreact.Component<{
@@ -101,6 +111,7 @@ export class CommitModal extends qreact.Component<{
             let isQuerysub = Querysub.localRead(() => this.props.isQuerysub);
             let controller = MachineServiceController(SocketFunction.browserNodeId());
             let diff = await controller.getGitDiff.promise({ useQuerysub: isQuerysub });
+            console.log("Received commit diff, changed lines:\n" + extractChangedLines(diff));
             Querysub.localCommit(() => {
                 this.state.diff = diff;
                 this.state.diffLoading = false;
@@ -176,7 +187,6 @@ export class CommitModal extends qreact.Component<{
     render() {
         let { diff, diffLoading, summarizing, committing } = this.state;
-        let diffBytes = diff ? new TextEncoder().encode(diff).length : 0;
         let summarizeLabel: string;
         if (diffLoading) {
@@ -184,7 +194,7 @@ export class CommitModal extends qreact.Component<{
         } else if (summarizing) {
             summarizeLabel = "Summarizing…";
         } else {
-            summarizeLabel = `Summarize ${diffBytes.toLocaleString()} bytes of changes`;
+            summarizeLabel = `Summarize ${extractChangedLines(diff).length} changed characters`;
         }
         return <div className={css.vbox(12).minWidth(620)}>
@@ -211,6 +221,7 @@ export class CommitModal extends qreact.Component<{
                 <button
                     className={rowButtonStyle.hsl(45, 80, 85)}
                     disabled={diffLoading || summarizing}
+                    title={diff ? extractChangedLines(diff) : ""}
                     onClick={() => void this.doSummarize()}
                 >
                     🤖 {summarizeLabel}

package/src/diagnostics/MachineThreadInfo.tsx CHANGED Viewed

@@ -21,9 +21,6 @@ import { showFullscreenModal } from "../5-diagnostics/FullscreenModal";
 import { css } from "../4-dom/css";
 import { formatVeryNiceDateTime } from "socket-function/src/formatting/format";
-module.hotreload = true;
-module.noserverhotreload = false;
 type NodeSpecialInfo = {
     nodeId: string;
     machineId: string;

package/src/diagnostics/logs/IndexedLogs/BufferIndex.ts CHANGED Viewed

@@ -431,6 +431,7 @@ export class BufferIndex {
         }
         if (type === STREAM_TYPE) {
+            results.typeCounts.stream++;
             if (index.length === 0) {
                 index = await BufferIndex.rebuildLocalIndexFromData(dataReader);
                 if (index.length === 0) return [];
@@ -460,6 +461,7 @@ export class BufferIndex {
             }
             return matching;
         } else if (type === BULK_TYPE) {
+            results.typeCounts.bulk++;
             let candidateSet = new Set<number>();
             for (let or of allSearchUnits) {
                 let blocks = BufferUnitIndex.findBlocks({ units: or, index });

package/src/diagnostics/logs/IndexedLogs/BufferIndexHelpers.ts CHANGED Viewed

@@ -139,6 +139,13 @@ export type IndexedLogResults = {
     totalBlockCount: number;
     blockCheckedCount: number;
+    // Per-format file counts — one increment per file searched, classified by its
+    // index/data format (stream = pending/streaming files, bulk = promoted files).
+    typeCounts: {
+        stream: number;
+        bulk: number;
+    };
     remoteBlockCount: number;
     localBlockCount: number;
     remoteBlockCheckedCount: number;
@@ -167,7 +174,7 @@ export type IndexedLogResults = {
 };
 export function createEmptyIndexedLogResults(): IndexedLogResults {
     return {
-        matchCount: 0, reads: [], totalLocalFiles: 0, totalBackblazeFiles: 0, localFilesSearched: 0, backblazeFilesSearched: 0, totalBackblazeLogs: 0, backblazeLogsSearched: 0, totalBlockCount: 0, blockCheckedCount: 0, remoteBlockCount: 0, localBlockCount: 0, remoteBlockCheckedCount: 0, localBlockCheckedCount: 0, blocksCheckedCompressedSize: 0, blocksCheckedDecompressedSize: 0, backblazeUncompressedSize: 0, blockErrors: [], fileErrors: [], remoteIndexesSearched: 0, remoteIndexSize: 0, localIndexesSearched: 0, localIndexSize: 0, timeToFirstMatch: -1, fileFindTime: 0, indexSearchTime: 0, blockSearchTime: 0, totalSearchTime: 0, cancel: undefined, limitGroup: undefined,
+        matchCount: 0, reads: [], totalLocalFiles: 0, totalBackblazeFiles: 0, localFilesSearched: 0, backblazeFilesSearched: 0, totalBackblazeLogs: 0, backblazeLogsSearched: 0, totalBlockCount: 0, blockCheckedCount: 0, typeCounts: { stream: 0, bulk: 0 }, remoteBlockCount: 0, localBlockCount: 0, remoteBlockCheckedCount: 0, localBlockCheckedCount: 0, blocksCheckedCompressedSize: 0, blocksCheckedDecompressedSize: 0, backblazeUncompressedSize: 0, blockErrors: [], fileErrors: [], remoteIndexesSearched: 0, remoteIndexSize: 0, localIndexesSearched: 0, localIndexSize: 0, timeToFirstMatch: -1, fileFindTime: 0, indexSearchTime: 0, blockSearchTime: 0, totalSearchTime: 0, cancel: undefined, limitGroup: undefined,
     };
 }
@@ -211,6 +218,10 @@ export function mergeIndexedLogResults(existing: IndexedLogResults, incoming: In
         backblazeLogsSearched: existing.backblazeLogsSearched + incoming.backblazeLogsSearched,
         totalBlockCount: existing.totalBlockCount + incoming.totalBlockCount,
         blockCheckedCount: existing.blockCheckedCount + incoming.blockCheckedCount,
+        typeCounts: {
+            stream: existing.typeCounts.stream + incoming.typeCounts.stream,
+            bulk: existing.typeCounts.bulk + incoming.typeCounts.bulk,
+        },
         blocksCheckedCompressedSize: existing.blocksCheckedCompressedSize + incoming.blocksCheckedCompressedSize,
         blocksCheckedDecompressedSize: existing.blocksCheckedDecompressedSize + incoming.blocksCheckedDecompressedSize,
         backblazeUncompressedSize: existing.backblazeUncompressedSize + incoming.backblazeUncompressedSize,

package/src/diagnostics/logs/IndexedLogs/IndexedLogs.ts CHANGED Viewed

@@ -120,7 +120,9 @@ export class IndexedLogs<T> {
         let usePublic = config.type === "public";
         let archives = usePublic ? getArchivesBackblaze(getDomain()) : getArchivesHome(getDomain());
         archives = nestArchives("final-indexed-logs/" + this.config.name, archives);
-        archives = wrapArchivesWithCache(archives);
+        archives = wrapArchivesWithCache(archives, {
+            immutable: true,
+        });
         archives = createArchivesMemoryCache(archives, {
             maxSize: 1024 * 1024 * 1024 * 12,
             maxCount: 1000 * 500,

package/src/diagnostics/logs/IndexedLogs/MCPIndexedLogs.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { timeInHour, timeInMinute, timeoutToError, timeoutToUndefined, timeoutToUndefinedSilent } from "socket-function/src/misc";
+import { timeInHour, timeInMinute, timeInSecond, timeoutToError, timeoutToUndefined, timeoutToUndefinedSilent } from "socket-function/src/misc";
 import { lazy } from "socket-function/src/caching";
 import { getMachineId } from "../../../-a-auth/certs";
 import { getAllNodeIds, getOwnMachineId, isOwnNodeId } from "../../../-f-node-discovery/NodeDiscovery";
@@ -11,7 +11,7 @@ import { BufferIndex } from "./BufferIndex";
 import { LogStreamer } from "./LogStreamer";
 import { createMatchesPattern } from "./bufferSearchFindMatcher";
 import { IndexedLogs, IndexedLogShimController } from "./IndexedLogs";
-import { formatDateTime, formatTime } from "socket-function/src/formatting/format";
+import { formatDateTime, formatNumber, formatTime } from "socket-function/src/formatting/format";
 // endTime must be at least this far in the past — we never want to search the
 // most-recent slice (it's still being written to and not yet promoted to public).
@@ -20,16 +20,74 @@ const END_TIME_MIN_AGE = timeInMinute;
 // machine's logs are guaranteed to be in public storage up through (now - this).
 const MOVE_GRACE = timeInMinute;
 // TTL for the path-cache (the slow TimeFileTree.findAllPaths walk).
-const PATHS_CACHE_TTL = timeInMinute;
+const PATHS_CACHE_TTL = timeInMinute * 15;
+// During the read/scan loops, emit a progress line at most this often.
+const PROGRESS_LOG_INTERVAL = timeInSecond * 5;
 const LOGGER_NAMES = ["logs/log", "logs/info", "logs/warn", "logs/error"] as const;
 type LoggerName = typeof LOGGER_NAMES[number];
+// Per-logger accounting for one search. Byte counts are raw buffer sizes.
+export type LoggerStats = {
+    // Files in range matching the requested machine.
+    total: number;
+    // Of those, files we actually opened and ran the block matcher on
+    // (cutoff-pruning skips the rest once we've hit `limit`).
+    scanned: number;
+    // Combined byte size of the data + index files we opened.
+    scannedBytes: number;
+    // Candidate blocks the index matcher flagged across the scanned files.
+    blocksMatched: number;
+    // Of those, blocks we actually decoded (we stop early once `limit` is hit).
+    blocksRead: number;
+    // Combined byte size of the block buffers we decoded.
+    blockBytesRead: number;
+    // Result rows produced from this logger's files.
+    rows: number;
+    // Wall-clock time (ms) spent reading the data + index files from archives.
+    readFilesMs: number;
+    // Wall-clock time (ms) spent inside BufferIndex.findMatchingBlocks — the
+    // index scan that picks candidate blocks.
+    findMatchingBlocksMs: number;
+    // Wall-clock time (ms) spent inside BufferIndex.getBlockBuffers — decoding
+    // the candidate blocks' buffers.
+    getBlockBuffersMs: number;
+};
+function createEmptyLoggerStats(): LoggerStats {
+    return {
+        total: 0, scanned: 0, scannedBytes: 0, blocksMatched: 0, blocksRead: 0, blockBytesRead: 0, rows: 0,
+        readFilesMs: 0, findMatchingBlocksMs: 0, getBlockBuffersMs: 0,
+    };
+}
+function addLoggerStats(into: LoggerStats, from: LoggerStats): void {
+    into.total += from.total;
+    into.scanned += from.scanned;
+    into.scannedBytes += from.scannedBytes;
+    into.blocksMatched += from.blocksMatched;
+    into.blocksRead += from.blocksRead;
+    into.blockBytesRead += from.blockBytesRead;
+    into.rows += from.rows;
+    into.readFilesMs += from.readFilesMs;
+    into.findMatchingBlocksMs += from.findMatchingBlocksMs;
+    into.getBlockBuffersMs += from.getBlockBuffersMs;
+}
+// What the MCP `search` tool returns to the caller.
+//
+// DO NOT ADD MORE FIELDS HERE. The result is consumed by an LLM with a hard
+// token budget, and every extra field of metadata eats into that budget and
+// degrades the actual log output. `LoggerStats` (above) carries plenty of
+// byte/block/timing detail — that is for server-side console.log ONLY and must
+// never be surfaced here. The only per-logger numbers callers get are `total`
+// and `scanned`. If you think a new field "would be nice to return": it would
+// not. Leave this type alone.
 export type SearchResult = {
     allColumns: string[];
     results: Record<string, string>[];
-    // Per-logger file counts: how many we found in range vs how many we
-    // actually searched (cutoff-pruning skips the rest).
+    // Per-logger file counts only: total files in range, and how many we
+    // actually scanned. Nothing else — see the warning above.
     files: Record<string, { total: number; scanned: number }>;
     // Both fields are only present when we stopped early because we hit
     // `limit`. Their *presence* signals truncation; their absence means the
@@ -41,6 +99,21 @@ export type SearchResult = {
 type Direction = "fromStart" | "fromEnd";
+// One log file within a specific logger's archive, as gathered for searching.
+type FileEntry = {
+    path: TimeFilePath;
+    loggerName: LoggerName;
+    archives: Archives;
+};
+// Mutable accumulators the file/block walk appends into, instead of returning
+// values back up the call stack.
+type SearchSink = {
+    resultRows: Record<string, string>[];
+    allColumns: Set<string>;
+    loggerStats: LoggerStats;
+};
 // Accept epoch ms (number) or any string `new Date(...)` understands. String
 // inputs without a timezone designator are interpreted as local time, which is
 // what callers typically have on hand (e.g. "2026-05-09 03:00").
@@ -55,6 +128,19 @@ function normalizeTime(value: string | number, label: string): number {
     return parsed;
 }
+// Returns a logger that emits at most once per PROGRESS_LOG_INTERVAL, so a long
+// loop can report progress without printing a line per iteration.
+function createProgressLogger(): (message: string) => void {
+    let lastLog = Date.now();
+    return (message: string) => {
+        if (Date.now() - lastLog < PROGRESS_LOG_INTERVAL) {
+            return;
+        }
+        lastLog = Date.now();
+        console.log(message);
+    };
+}
 export class MCPIndexedLogs {
     // machineId -> latest timestamp guaranteed to already be moved-to-public.
     private movedThroughByMachine = new Map<string, number>();
@@ -96,17 +182,11 @@ export class MCPIndexedLogs {
         let matchesPattern = createMatchesPattern(queryBuffer, false);
         // Gather all files (across the 4 loggers) into a single time-ordered list.
-        type FileEntry = {
-            path: TimeFilePath;
-            loggerName: LoggerName;
-            archives: Archives;
-        };
         let allFiles: FileEntry[] = [];
-        // Per-logger counts: total = files in range matching this machine,
-        // scanned = files we actually opened and ran findMatchingBlocks on.
-        let fileCounts: Record<string, { total: number; scanned: number }> = {};
-        for (let name of LOGGER_NAMES) fileCounts[name] = { total: 0, scanned: 0 };
+        // Per-logger file/block/byte counts, accumulated as we walk the files.
+        let fileCounts: Record<string, LoggerStats> = {};
+        for (let name of LOGGER_NAMES) fileCounts[name] = createEmptyLoggerStats();
         let pathsStart = Date.now();
         let totalPathsSeen = 0;
@@ -138,119 +218,98 @@ export class MCPIndexedLogs {
             allFiles.sort((a, b) => b.path.startTime - a.path.startTime);
         }
-        // Sequentially walk files in time-order, applying a moving cutoff once
-        // we have `limit` rows: any unprocessed file whose entire range is past
-        // the cutoff cannot contribute results we'd keep.
-        // - fromStart: cutoff = min(processed.endTime); skip files with startTime >= cutoff
-        // - fromEnd:   cutoff = max(processed.startTime); skip files with endTime   <= cutoff
         let resultRows: Record<string, string>[] = [];
         let allColumnsSet = new Set<string>();
         let stats = createEmptyIndexedLogResults();
-        let cutoff: number | undefined;
-        let filesScanned = 0;
         let searchStart = Date.now();
-        outer: for (let entry of allFiles) {
+        // Phase 1: read every file's buffers up front. The files are cached, so
+        // reading them all — even ones the cutoff would later skip — is cheap, and
+        // it keeps the scan phase free of slow, interleaved reads.
+        let readCount = 0;
+        let logReadProgress = createProgressLogger();
+        let readFiles = await Promise.all(allFiles.map(async (entry) => {
+            let buffers = await this.readFile({ entry, loggerStats: fileCounts[entry.loggerName] });
+            readCount++;
+            logReadProgress(`[search] reading files ${readCount}/${allFiles.length}`);
+            return { entry, buffers };
+        }));
+        console.log(`[search] read ${allFiles.length} files in ${formatTime(Date.now() - searchStart)}`);
+        // Phase 2: scan the already-read files in time order, applying a moving
+        // cutoff once we have `limit` rows: any unprocessed file whose entire range
+        // is past the cutoff cannot contribute results we'd keep.
+        // - fromStart: cutoff = min(processed.endTime); skip files with startTime >= cutoff
+        // - fromEnd:   cutoff = max(processed.startTime); skip files with endTime   <= cutoff
+        let scanCount = 0;
+        let logScanProgress = createProgressLogger();
+        let cutoff: number | undefined;
+        for (let { entry, buffers } of readFiles) {
+            scanCount++;
+            logScanProgress(`[search] scanning files ${scanCount}/${readFiles.length}`);
             if (resultRows.length >= limit) break;
-            let p = entry.path;
+            if (buffers === undefined) continue;
             if (cutoff !== undefined) {
                 if (config.direction === "fromStart") {
-                    if (p.startTime >= cutoff) continue;
+                    if (entry.path.startTime >= cutoff) continue;
                 } else {
-                    if (p.endTime <= cutoff) continue;
+                    if (entry.path.endTime <= cutoff) continue;
                 }
             }
-            let indexBuf = await entry.archives.get(p.fullPath + INDEX_EXTENSION);
-            if (!indexBuf) continue;
-            let dataBuf = await entry.archives.get(p.fullPath);
-            if (!dataBuf) continue;
-            let dataReader = new BufferReader(dataBuf);
-            let blocks: number[];
-            try {
-                blocks = await BufferIndex.findMatchingBlocks({
-                    index: indexBuf,
-                    dataReader,
-                    query: queryBuffer,
-                    results: stats,
-                });
-                filesScanned++;
-                fileCounts[entry.loggerName].scanned++;
-            } catch (e) {
-                console.warn(`MCPIndexedLogs.search: error scanning ${p.fullPath + INDEX_EXTENSION}: ${(e as Error).stack ?? e}`);
-                continue;
-            }
-            if (config.direction === "fromStart") {
-                blocks.sort((a, b) => a - b);
-            } else {
-                blocks.sort((a, b) => b - a);
-            }
-            for (let block of blocks) {
-                if (resultRows.length >= limit) break outer;
-                let buffers: Buffer[];
-                try {
-                    buffers = await BufferIndex.getBlockBuffers({
-                        index: indexBuf,
-                        dataReader,
-                        blockIndex: block,
-                    });
-                } catch (e) {
-                    console.warn(`MCPIndexedLogs.search: error reading block ${block} of ${p.fullPath}: ${(e as Error).stack ?? e}`);
-                    continue;
-                }
-                let iterateForward = config.direction === "fromStart";
-                let bStart = iterateForward ? 0 : buffers.length - 1;
-                let bEnd = iterateForward ? buffers.length : -1;
-                let bStep = iterateForward ? 1 : -1;
-                for (let i = bStart; iterateForward ? i < bEnd : i > bEnd; i += bStep) {
-                    if (resultRows.length >= limit) break outer;
-                    let buf = buffers[i];
-                    if (!matchesPattern(buf)) continue;
-                    let datum: LogDatum;
-                    try {
-                        datum = LogStreamer.deserialize<LogDatum>(buf);
-                    } catch {
-                        continue;
-                    }
-                    if (typeof datum.time !== "number") continue;
-                    if (datum.time < startTime || datum.time > endTime) continue;
-                    let row: Record<string, string> = {};
-                    for (let col of config.columns) {
-                        if (col in datum) {
-                            row[col] = stringifyCell(datum[col]);
-                        }
-                    }
-                    for (let key of Object.keys(datum)) {
-                        allColumnsSet.add(key);
-                    }
-                    resultRows.push(row);
-                }
-            }
+            // scanFile appends straight into resultRows / allColumnsSet / the
+            // logger's stats.
+            await this.scanFile({
+                entry,
+                indexBuf: buffers.indexBuf,
+                dataBuf: buffers.dataBuf,
+                direction: config.direction,
+                limit,
+                queryBuffer,
+                matchesPattern,
+                columns: config.columns,
+                startTime,
+                endTime,
+                stats,
+                sink: {
+                    resultRows,
+                    allColumns: allColumnsSet,
+                    loggerStats: fileCounts[entry.loggerName],
+                },
+            });
             if (resultRows.length >= limit) {
                 if (config.direction === "fromStart") {
-                    cutoff = cutoff === undefined ? p.endTime : Math.min(cutoff, p.endTime);
+                    cutoff = cutoff === undefined ? entry.path.endTime : Math.min(cutoff, entry.path.endTime);
                 } else {
-                    cutoff = cutoff === undefined ? p.startTime : Math.max(cutoff, p.startTime);
+                    cutoff = cutoff === undefined ? entry.path.startTime : Math.max(cutoff, entry.path.startTime);
                 }
             }
         }
+        let totals = createEmptyLoggerStats();
+        for (let name of LOGGER_NAMES) addLoggerStats(totals, fileCounts[name]);
         let limitHit = resultRows.length >= limit;
-        console.log(`[search] done in ${formatTime(Date.now() - searchStart)} (filesScanned=${filesScanned}/${allFiles.length} results=${resultRows.length} limit=${limit}${limitHit ? " HIT" : ""})`);
+        console.log(`[search] done in ${formatTime(Date.now() - searchStart)} (filesScanned=${totals.scanned}/${allFiles.length} scannedBytes=${formatNumber(totals.scannedBytes)}B blocksMatched=${totals.blocksMatched} blocksRead=${totals.blocksRead} blockBytesRead=${formatNumber(totals.blockBytesRead)}B results=${resultRows.length} limit=${limit}${limitHit ? " HIT" : ""})`);
+        console.log(`[search] buffer types: stream=${stats.typeCounts.stream} bulk=${stats.typeCounts.bulk}`);
+        console.log(`[search] timing: readFiles=${formatTime(totals.readFilesMs)} findMatchingBlocks=${formatTime(totals.findMatchingBlocksMs)} getBlockBuffers=${formatTime(totals.getBlockBuffersMs)}`);
+        // Trim the internal LoggerStats down to just total + scanned. The rest
+        // (bytes/blocks/timing) stays in the console.log above and is NOT
+        // returned — see the warning on SearchResult.
+        let files: Record<string, { total: number; scanned: number }> = {};
+        for (let name of LOGGER_NAMES) {
+            files[name] = { total: fileCounts[name].total, scanned: fileCounts[name].scanned };
+        }
         return {
             allColumns: Array.from(allColumnsSet),
             results: resultRows,
-            files: fileCounts,
+            files,
             limitHit: limitHit ? true : undefined,
             note: limitHit
                 ? `Stopped at limit=${limit}. Results are truncated — there are likely more matches outside what's returned. This is NOT missing data; raise the limit or narrow the time range to see more.`
@@ -258,6 +317,139 @@ export class MCPIndexedLogs {
         };
     }
+    // Reads one file's index + data buffers. Returns undefined if either is
+    // missing. Timed into the logger's readFilesMs.
+    private async readFile(read: {
+        entry: FileEntry;
+        loggerStats: LoggerStats;
+    }): Promise<{ indexBuf: Buffer; dataBuf: Buffer } | undefined> {
+        let { entry, loggerStats } = read;
+        let p = entry.path;
+        let readFilesStart = Date.now();
+        let indexBuf = await entry.archives.get(p.fullPath + INDEX_EXTENSION);
+        let dataBuf = indexBuf === undefined ? undefined : await entry.archives.get(p.fullPath);
+        loggerStats.readFilesMs += Date.now() - readFilesStart;
+        if (indexBuf === undefined || dataBuf === undefined) return undefined;
+        return { indexBuf, dataBuf };
+    }
+    // Runs the block matcher over one already-read file and appends matching rows
+    // and accounting straight into `sink`.
+    private async scanFile(scan: {
+        entry: FileEntry;
+        indexBuf: Buffer;
+        dataBuf: Buffer;
+        direction: Direction;
+        limit: number;
+        queryBuffer: Buffer;
+        matchesPattern: (buf: Buffer) => boolean;
+        columns: string[];
+        startTime: number;
+        endTime: number;
+        stats: ReturnType<typeof createEmptyIndexedLogResults>;
+        sink: SearchSink;
+    }): Promise<void> {
+        let { entry, indexBuf, dataBuf, direction, limit, queryBuffer, matchesPattern, columns, startTime, endTime, stats, sink } = scan;
+        let { resultRows, loggerStats } = sink;
+        let p = entry.path;
+        // Region 1: the index scan that picks candidate blocks.
+        let findStart = Date.now();
+        let dataReader = new BufferReader(dataBuf);
+        let blocks: number[] | undefined;
+        try {
+            blocks = await BufferIndex.findMatchingBlocks({
+                index: indexBuf,
+                dataReader,
+                query: queryBuffer,
+                results: stats,
+            });
+        } catch (e) {
+            console.warn(`MCPIndexedLogs.search: error scanning ${p.fullPath + INDEX_EXTENSION}: ${(e as Error).stack ?? e}`);
+        }
+        loggerStats.findMatchingBlocksMs += Date.now() - findStart;
+        if (blocks === undefined) return;
+        let blockStart = Date.now();
+        // The file is now counted as scanned.
+        loggerStats.scanned++;
+        loggerStats.scannedBytes += indexBuf.length + dataBuf.length;
+        loggerStats.blocksMatched += blocks.length;
+        if (direction === "fromStart") {
+            blocks.sort((a, b) => a - b);
+        } else {
+            blocks.sort((a, b) => b - a);
+        }
+        for (let block of blocks) {
+            if (resultRows.length >= limit) break;
+            // Region 2: decoding the candidate block's buffers.
+            let buffers: Buffer[] | undefined;
+            try {
+                buffers = await BufferIndex.getBlockBuffers({
+                    index: indexBuf,
+                    dataReader,
+                    blockIndex: block,
+                });
+            } catch (e) {
+                console.warn(`MCPIndexedLogs.search: error reading block ${block} of ${p.fullPath}: ${(e as Error).stack ?? e}`);
+            }
+            if (buffers === undefined) continue;
+            loggerStats.blocksRead++;
+            for (let buf of buffers) loggerStats.blockBytesRead += buf.length;
+            let ordered = direction === "fromStart" ? buffers : [...buffers].reverse();
+            for (let buf of ordered) {
+                if (resultRows.length >= limit) break;
+                this.appendRow({ buf, matchesPattern, columns, startTime, endTime, sink });
+            }
+        }
+        loggerStats.getBlockBuffersMs += Date.now() - blockStart;
+    }
+    // Appends one result row for `buf` into `sink` if it matches the query and
+    // falls inside the time range; otherwise does nothing. Records every key it
+    // sees as a column.
+    private appendRow(append: {
+        buf: Buffer;
+        matchesPattern: (buf: Buffer) => boolean;
+        columns: string[];
+        startTime: number;
+        endTime: number;
+        sink: SearchSink;
+    }): void {
+        let { buf, matchesPattern, columns, startTime, endTime, sink } = append;
+        if (!matchesPattern(buf)) return;
+        let datum: LogDatum;
+        try {
+            datum = LogStreamer.deserialize<LogDatum>(buf);
+        } catch {
+            return;
+        }
+        if (typeof datum.time !== "number") return;
+        if (datum.time < startTime || datum.time > endTime) return;
+        let row: Record<string, string> = {};
+        for (let col of columns) {
+            if (col in datum) {
+                row[col] = stringifyCell(datum[col]);
+            }
+        }
+        for (let key of Object.keys(datum)) {
+            sink.allColumns.add(key);
+        }
+        sink.resultRows.push(row);
+        sink.loggerStats.rows++;
+    }
     // For each logger, asks each remote node on the target machine whether it
     // has pending logs overlapping [0, endTime]. The first node that answers
     // without throwing wins; if it says yes, we ask the same node to flush.

package/src/diagnostics/logs/IndexedLogs/TimeFileTree.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import { timeInDay } from "socket-function/src/misc";
 import { Archives } from "../../../-a-archives/archives";
 import { getOwnThreadId, getOwnMachineId } from "../../../-f-node-discovery/NodeDiscovery";
@@ -23,6 +24,10 @@ export type TimeFilePath = {
 const LOG_FILE_EXTENSION = ".logfile";
+// When the requested range spans fewer than this many days, we skip the recursive folder
+//  scan and instead guess every day folder path directly, reading them all in parallel.
+const MAX_RANGE_DAYS_FOR_DIRECT_READ = 10;
 function encodeLogFilePath(path: Omit<TimeFilePath, "fullPath">): string {
     // Create folder structure: year/month/day/
     const date = new Date(path.startTime);
@@ -113,6 +118,59 @@ export class TimeFileTree {
     public async findAllPaths(config: {
         startTime: number;
         endTime: number;
+    }): Promise<TimeFilePath[]> {
+        // For short ranges, guessing the day folder paths and reading them all in parallel is
+        //  much faster than recursively listing year/month/day folders.
+        if (config.endTime - config.startTime < MAX_RANGE_DAYS_FOR_DIRECT_READ * timeInDay) {
+            return await this.findAllPathsByDayGuess(config);
+        }
+        return await this.findAllPathsByScan(config);
+    }
+    // Reads every day folder in the range directly, in parallel. Missing day folders simply
+    //  return no files (find on a non-existent prefix is empty), so we don't special-case them.
+    private async findAllPathsByDayGuess(config: {
+        startTime: number;
+        endTime: number;
+    }): Promise<TimeFilePath[]> {
+        // Build the list of UTC day folder prefixes spanning the range.
+        const dayFolders: string[] = [];
+        const startDate = new Date(config.startTime);
+        let cursor = Date.UTC(startDate.getUTCFullYear(), startDate.getUTCMonth(), startDate.getUTCDate());
+        while (cursor <= config.endTime) {
+            const date = new Date(cursor);
+            const year = date.getUTCFullYear();
+            const month = String(date.getUTCMonth() + 1).padStart(2, "0");
+            const day = String(date.getUTCDate()).padStart(2, "0");
+            dayFolders.push(`${year}/${month}/${day}/`);
+            cursor += timeInDay;
+        }
+        const results: TimeFilePath[] = [];
+        await Promise.all(dayFolders.map(async (dayFolder) => {
+            const files = await this.archives.find(dayFolder, { shallow: true, type: "files" });
+            for (const file of files) {
+                const decoded = decodeLogFilePath(file);
+                if (decoded === undefined) {
+                    continue;
+                }
+                // Check if file's time range overlaps with requested range
+                if (decoded.endTime >= config.startTime && decoded.startTime <= config.endTime) {
+                    results.push(decoded);
+                }
+            }
+        }));
+        return results;
+    }
+    private async findAllPathsByScan(config: {
+        startTime: number;
+        endTime: number;
     }): Promise<TimeFilePath[]> {
         const results: TimeFilePath[] = [];

package/src/diagnostics/pathAuditer.ts CHANGED Viewed

@@ -20,6 +20,7 @@ import { isClient } from "../config2";
 import { isLocal } from "../config";
 import { pathWatcher } from "../0-path-value-core/PathWatcher";
 import { debugNodeId } from "../-c-identity/IdentityController";
+import debugbreak from "debugbreak";
 if (!isClient()) {
     // Comment this line out to disable our functionality
@@ -332,7 +333,7 @@ async function auditAuthority(nodeId: string, pathsToAudit: { path: string }[],
         if (response.valid && response.time && compareTime(response.time, ourValue.time) > 0) {
             valuesToRequest.push({ path: response.path, time: response.time });
             let authorities = PathRouter.getAllAuthorities(response.path);
-            require("debugbreak")(2);
+            debugbreak(2);
             debugger;
             trackSyncAge({
                 path: response.path,
@@ -348,7 +349,7 @@ async function auditAuthority(nodeId: string, pathsToAudit: { path: string }[],
         //      - Send it our value
         else if (response.valid === undefined && (!response.time || compareTime(ourValue.time, response.time) > 0)) {
             valuesToSend.push(ourValue);
-            require("debugbreak")(2);
+            debugbreak(2);
             debugger;
             trackSyncAge({
                 path: response.path,
@@ -369,7 +370,7 @@ async function auditAuthority(nodeId: string, pathsToAudit: { path: string }[],
             let age = now - ourValue.time.time;
             if (age >= MAX_CHANGE_AGE) {
                 pathsToForceSync.add(response.path);
-                require("debugbreak")(2);
+                debugbreak(2);
                 debugger;
                 trackSyncAge({
                     path: response.path,