querysub 0.447.0 → 0.449.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "querysub",
3
- "version": "0.447.0",
3
+ "version": "0.449.0",
4
4
  "main": "index.js",
5
5
  "license": "MIT",
6
6
  "note1": "note on node-forge fork, see https://github.com/digitalbazaar/forge/issues/744 for details",
@@ -1,6 +1,7 @@
1
1
  import { getStorageDir, getSubFolder } from "../fs";
2
2
  import { Archives, createArchivesOverride } from "./archives";
3
3
  import fs from "fs";
4
+ import os from "os";
4
5
 
5
6
  import { list, nextId, timeInHour, timeInMinute } from "socket-function/src/misc";
6
7
  import { cache, lazy } from "socket-function/src/caching";
@@ -32,6 +33,10 @@ const LOCK_SUFFIX = ".lock";
32
33
  const TEMP_SUFFIX = ".tmp";
33
34
  const TEMP_THRESHOLD = timeInHour * 3;
34
35
 
36
+ function getTempFilePath() {
37
+ return os.tmpdir() + "/" + nextId() + TEMP_SUFFIX;
38
+ }
39
+
35
40
  const LARGE_FILE_CHUNK = 1024 * 1024 * 32;
36
41
  const CACHE_SUFFIX = ".cache";
37
42
  export function getArchiveCachePath(archives: Archives, key: string): string {
@@ -157,11 +162,17 @@ const getDiskMetricsBase = async () => {
157
162
  availableFiles--;
158
163
  try {
159
164
  await fs.promises.rename(sourceTempFile, path);
160
- } catch (e) {
161
- let destExists = fs.existsSync(path);
162
- if (!destExists) {
163
- console.error("Error renaming file", sourceTempFile, path, e);
165
+ } catch {
166
+ try {
167
+ await fs.promises.copyFile(sourceTempFile, path);
168
+ } catch (e) {
169
+ if (!fs.existsSync(path)) {
170
+ console.error("Error caching file", sourceTempFile, path, e);
171
+ }
164
172
  }
173
+ try {
174
+ await fs.promises.unlink(sourceTempFile);
175
+ } catch { }
165
176
  }
166
177
  }
167
178
  async function getCacheFile(
@@ -252,7 +263,9 @@ let cacheArchivesSymbol = Symbol("cacheArchives");
252
263
  /** IMPORTANT! The cache assumes the files contents immutable, and they will only be created
253
264
  * and deleted, never mutated.
254
265
  */
255
- export function wrapArchivesWithCache(archives: Archives): Archives & {
266
+ export function wrapArchivesWithCache(archives: Archives, rootConfig?: {
267
+ immutable?: boolean;
268
+ }): Archives & {
256
269
  // NOTE: lockRegion / path based functions are preferred for external accesses, as they ensure files
257
270
  // won't be garbage collected, and uses paths, which will be required for external processes.
258
271
  // - Locks only protect the local cache. The values can still be deleted explicitly.
@@ -267,7 +280,7 @@ export function wrapArchivesWithCache(archives: Archives): Archives & {
267
280
  return archives as any;
268
281
  }
269
282
  async function setLargeFile(config: Args<Archives["setLargeFile"]>[0]) {
270
- const tempPath = cacheArchives2 + nextId() + TEMP_SUFFIX;
283
+ const tempPath = getTempFilePath();
271
284
  let handle: fs.promises.FileHandle | undefined;
272
285
  try {
273
286
  handle = await fs.promises.open(tempPath, "w");
@@ -393,7 +406,7 @@ export function wrapArchivesWithCache(archives: Archives): Archives & {
393
406
  };
394
407
 
395
408
  let size = 0;
396
- const tempPath = cacheArchives2 + nextId() + TEMP_SUFFIX;
409
+ const tempPath = getTempFilePath();
397
410
  let handle: fs.promises.FileHandle | undefined;
398
411
  try {
399
412
  handle = await fs.promises.open(tempPath, "w");
@@ -509,7 +522,7 @@ export function wrapArchivesWithCache(archives: Archives): Archives & {
509
522
  // way to check)
510
523
  // - TODO: Set hash in file metadata (maybe for all archive writes?), and use this to
511
524
  // compare it against our cache file (which can have the hash in the file name).
512
- if (!config?.fastRead) {
525
+ if (!(config?.fastRead || rootConfig?.immutable)) {
513
526
  let info = await archives.getInfo(fileName);
514
527
  if (!info) {
515
528
  // If it is gone remotely, remove it from the cache, to save space.
@@ -521,7 +534,7 @@ export function wrapArchivesWithCache(archives: Archives): Archives & {
521
534
  if (buffer) return buffer;
522
535
  let result = await archives.get(fileName);
523
536
  if (result) {
524
- const tempPath = cacheArchives2 + nextId() + TEMP_SUFFIX;
537
+ const tempPath = getTempFilePath();
525
538
  await fs.promises.writeFile(tempPath, result);
526
539
  await metrics.addCacheFile(archives, fileName, tempPath);
527
540
  let range = config?.range;
@@ -531,9 +544,23 @@ export function wrapArchivesWithCache(archives: Archives): Archives & {
531
544
  }
532
545
  return result;
533
546
  },
547
+ getInfo: async (fileName: string) => {
548
+ // When the archives are immutable, the local cache file is a
549
+ // byte-for-byte copy, so we can answer getInfo from a local stat and
550
+ // skip the (slow, network) getInfo on the underlying archives.
551
+ if (rootConfig?.immutable) {
552
+ try {
553
+ let stat = await fs.promises.stat(getArchiveCachePath(archives, fileName));
554
+ return { writeTime: stat.mtimeMs, size: stat.size };
555
+ } catch {
556
+ // Not in our cache yet — fall through to the source.
557
+ }
558
+ }
559
+ return archives.getInfo(fileName);
560
+ },
534
561
  set: async (fileName: string, data: Buffer) => {
535
562
  let metrics = await getDiskMetrics();
536
- const tempPath = cacheArchives2 + nextId() + TEMP_SUFFIX;
563
+ const tempPath = getTempFilePath();
537
564
  await fs.promises.writeFile(tempPath, data);
538
565
  await archives.set(fileName, data);
539
566
  await metrics.addCacheFile(archives, fileName, tempPath);
@@ -14,8 +14,10 @@ import { devDebugbreak, isNoNetwork } from "../config";
14
14
  import { wrapArchivesWithCache } from "../-a-archives/archiveCache";
15
15
  import { AuthoritySpec, PathRouter, debugSpec } from "./PathRouter";
16
16
  import { authorityLookup } from "./AuthorityLookup";
17
- import { delay } from "socket-function/src/batching";
17
+ import { delay, retryFunctional } from "socket-function/src/batching";
18
18
  import { safeLoop } from "socket-function/src/batching";
19
+ import { errorToUndefined } from "../errors";
20
+ import { shutdown } from "../diagnostics/periodic";
19
21
 
20
22
  export const archives = lazy(() => wrapArchivesWithCache(getArchives("path-values/")));
21
23
  export const archivesLocks = lazy(() => getArchives("path-values-locks/"));
@@ -197,14 +199,33 @@ export class PathValueArchives {
197
199
  let slowestFileWriteTime = oldestTime + ARCHIVE_FLUSH_LIMIT;
198
200
  let fullPath = pathIdentifier + "/" + file;
199
201
  console.log(`Write archive file ${fullPath}, with size ${formatNumber(data.byteLength)}B, and count ${formatNumber(values.length)}`);
200
- await archives().set(fullPath, data);
202
+ try {
203
+ await retryFunctional(() => archives().set(fullPath, data), {
204
+ maxRetries: 10,
205
+ minDelay: 1000,
206
+ maxDelay: 5000,
207
+ });
208
+ } catch (e) {
209
+ console.error(`Error writing archive file ${fullPath}. THIS IS BAD! WE ARE SHUTTING DOWN SO THIS ERROR IS LOUDER! WHAT'S THE POINT OF A DB THAT DOESN'T SAVE DATA!: ${(e as Error).stack ?? e}`);
210
+ await delay(5000);
211
+ try {
212
+ await shutdown();
213
+ } catch {
214
+ process.exit();
215
+ }
216
+ }
201
217
 
202
218
  let fileInfo = await archives().getInfo(fullPath);
203
219
  // NOTE: If no fileInfo... then our file was merged? Which... is BAD, as it means we took
204
220
  // too long to read it, so we probably took too long to write it too!
205
221
  if (!fileInfo || fileInfo.writeTime > slowestFileWriteTime) {
206
222
  console.error(red(`File ${fullPath} was written too slowly, ${fileInfo?.writeTime || "undefined"} < ${slowestFileWriteTime}. This means some values will be rejected by reads. Killing server, our state is irrecoverable. Our watches have invalid data, and we have to stop before we create more invalid dependencies.`));
207
- process.exit();
223
+ await delay(5000);
224
+ try {
225
+ await shutdown();
226
+ } catch {
227
+ process.exit();
228
+ }
208
229
  }
209
230
 
210
231
 
@@ -67,6 +67,16 @@ function cleanCommitMessage(text: string): string {
67
67
  return text;
68
68
  }
69
69
 
70
+ /** Extracts just the added/removed lines from a unified diff, dropping context
71
+ * lines and diff/hunk headers — i.e. only the lines that actually changed. */
72
+ function extractChangedLines(diff: string): string {
73
+ return diff.split("\n")
74
+ .filter(line =>
75
+ (line.startsWith("+") || line.startsWith("-"))
76
+ && !line.startsWith("+++") && !line.startsWith("---"))
77
+ .join("\n");
78
+ }
79
+
70
80
  const rowButtonStyle = css.pad2(12, 8).button.bord2(0, 0, 20).fontWeight("bold");
71
81
 
72
82
  export class CommitModal extends qreact.Component<{
@@ -101,6 +111,7 @@ export class CommitModal extends qreact.Component<{
101
111
  let isQuerysub = Querysub.localRead(() => this.props.isQuerysub);
102
112
  let controller = MachineServiceController(SocketFunction.browserNodeId());
103
113
  let diff = await controller.getGitDiff.promise({ useQuerysub: isQuerysub });
114
+ console.log("Received commit diff, changed lines:\n" + extractChangedLines(diff));
104
115
  Querysub.localCommit(() => {
105
116
  this.state.diff = diff;
106
117
  this.state.diffLoading = false;
@@ -176,7 +187,6 @@ export class CommitModal extends qreact.Component<{
176
187
 
177
188
  render() {
178
189
  let { diff, diffLoading, summarizing, committing } = this.state;
179
- let diffBytes = diff ? new TextEncoder().encode(diff).length : 0;
180
190
 
181
191
  let summarizeLabel: string;
182
192
  if (diffLoading) {
@@ -184,7 +194,7 @@ export class CommitModal extends qreact.Component<{
184
194
  } else if (summarizing) {
185
195
  summarizeLabel = "Summarizing…";
186
196
  } else {
187
- summarizeLabel = `Summarize ${diffBytes.toLocaleString()} bytes of changes`;
197
+ summarizeLabel = `Summarize ${extractChangedLines(diff).length} changed characters`;
188
198
  }
189
199
 
190
200
  return <div className={css.vbox(12).minWidth(620)}>
@@ -211,6 +221,7 @@ export class CommitModal extends qreact.Component<{
211
221
  <button
212
222
  className={rowButtonStyle.hsl(45, 80, 85)}
213
223
  disabled={diffLoading || summarizing}
224
+ title={diff ? extractChangedLines(diff) : ""}
214
225
  onClick={() => void this.doSummarize()}
215
226
  >
216
227
  🤖 {summarizeLabel}
@@ -21,9 +21,6 @@ import { showFullscreenModal } from "../5-diagnostics/FullscreenModal";
21
21
  import { css } from "../4-dom/css";
22
22
  import { formatVeryNiceDateTime } from "socket-function/src/formatting/format";
23
23
 
24
- module.hotreload = true;
25
- module.noserverhotreload = false;
26
-
27
24
  type NodeSpecialInfo = {
28
25
  nodeId: string;
29
26
  machineId: string;
@@ -431,6 +431,7 @@ export class BufferIndex {
431
431
  }
432
432
 
433
433
  if (type === STREAM_TYPE) {
434
+ results.typeCounts.stream++;
434
435
  if (index.length === 0) {
435
436
  index = await BufferIndex.rebuildLocalIndexFromData(dataReader);
436
437
  if (index.length === 0) return [];
@@ -460,6 +461,7 @@ export class BufferIndex {
460
461
  }
461
462
  return matching;
462
463
  } else if (type === BULK_TYPE) {
464
+ results.typeCounts.bulk++;
463
465
  let candidateSet = new Set<number>();
464
466
  for (let or of allSearchUnits) {
465
467
  let blocks = BufferUnitIndex.findBlocks({ units: or, index });
@@ -139,6 +139,13 @@ export type IndexedLogResults = {
139
139
  totalBlockCount: number;
140
140
  blockCheckedCount: number;
141
141
 
142
+ // Per-format file counts — one increment per file searched, classified by its
143
+ // index/data format (stream = pending/streaming files, bulk = promoted files).
144
+ typeCounts: {
145
+ stream: number;
146
+ bulk: number;
147
+ };
148
+
142
149
  remoteBlockCount: number;
143
150
  localBlockCount: number;
144
151
  remoteBlockCheckedCount: number;
@@ -167,7 +174,7 @@ export type IndexedLogResults = {
167
174
  };
168
175
  export function createEmptyIndexedLogResults(): IndexedLogResults {
169
176
  return {
170
- matchCount: 0, reads: [], totalLocalFiles: 0, totalBackblazeFiles: 0, localFilesSearched: 0, backblazeFilesSearched: 0, totalBackblazeLogs: 0, backblazeLogsSearched: 0, totalBlockCount: 0, blockCheckedCount: 0, remoteBlockCount: 0, localBlockCount: 0, remoteBlockCheckedCount: 0, localBlockCheckedCount: 0, blocksCheckedCompressedSize: 0, blocksCheckedDecompressedSize: 0, backblazeUncompressedSize: 0, blockErrors: [], fileErrors: [], remoteIndexesSearched: 0, remoteIndexSize: 0, localIndexesSearched: 0, localIndexSize: 0, timeToFirstMatch: -1, fileFindTime: 0, indexSearchTime: 0, blockSearchTime: 0, totalSearchTime: 0, cancel: undefined, limitGroup: undefined,
177
+ matchCount: 0, reads: [], totalLocalFiles: 0, totalBackblazeFiles: 0, localFilesSearched: 0, backblazeFilesSearched: 0, totalBackblazeLogs: 0, backblazeLogsSearched: 0, totalBlockCount: 0, blockCheckedCount: 0, typeCounts: { stream: 0, bulk: 0 }, remoteBlockCount: 0, localBlockCount: 0, remoteBlockCheckedCount: 0, localBlockCheckedCount: 0, blocksCheckedCompressedSize: 0, blocksCheckedDecompressedSize: 0, backblazeUncompressedSize: 0, blockErrors: [], fileErrors: [], remoteIndexesSearched: 0, remoteIndexSize: 0, localIndexesSearched: 0, localIndexSize: 0, timeToFirstMatch: -1, fileFindTime: 0, indexSearchTime: 0, blockSearchTime: 0, totalSearchTime: 0, cancel: undefined, limitGroup: undefined,
171
178
  };
172
179
  }
173
180
 
@@ -211,6 +218,10 @@ export function mergeIndexedLogResults(existing: IndexedLogResults, incoming: In
211
218
  backblazeLogsSearched: existing.backblazeLogsSearched + incoming.backblazeLogsSearched,
212
219
  totalBlockCount: existing.totalBlockCount + incoming.totalBlockCount,
213
220
  blockCheckedCount: existing.blockCheckedCount + incoming.blockCheckedCount,
221
+ typeCounts: {
222
+ stream: existing.typeCounts.stream + incoming.typeCounts.stream,
223
+ bulk: existing.typeCounts.bulk + incoming.typeCounts.bulk,
224
+ },
214
225
  blocksCheckedCompressedSize: existing.blocksCheckedCompressedSize + incoming.blocksCheckedCompressedSize,
215
226
  blocksCheckedDecompressedSize: existing.blocksCheckedDecompressedSize + incoming.blocksCheckedDecompressedSize,
216
227
  backblazeUncompressedSize: existing.backblazeUncompressedSize + incoming.backblazeUncompressedSize,
@@ -120,7 +120,9 @@ export class IndexedLogs<T> {
120
120
  let usePublic = config.type === "public";
121
121
  let archives = usePublic ? getArchivesBackblaze(getDomain()) : getArchivesHome(getDomain());
122
122
  archives = nestArchives("final-indexed-logs/" + this.config.name, archives);
123
- archives = wrapArchivesWithCache(archives);
123
+ archives = wrapArchivesWithCache(archives, {
124
+ immutable: true,
125
+ });
124
126
  archives = createArchivesMemoryCache(archives, {
125
127
  maxSize: 1024 * 1024 * 1024 * 12,
126
128
  maxCount: 1000 * 500,
@@ -1,4 +1,4 @@
1
- import { timeInHour, timeInMinute, timeoutToError, timeoutToUndefined, timeoutToUndefinedSilent } from "socket-function/src/misc";
1
+ import { timeInHour, timeInMinute, timeInSecond, timeoutToError, timeoutToUndefined, timeoutToUndefinedSilent } from "socket-function/src/misc";
2
2
  import { lazy } from "socket-function/src/caching";
3
3
  import { getMachineId } from "../../../-a-auth/certs";
4
4
  import { getAllNodeIds, getOwnMachineId, isOwnNodeId } from "../../../-f-node-discovery/NodeDiscovery";
@@ -11,7 +11,7 @@ import { BufferIndex } from "./BufferIndex";
11
11
  import { LogStreamer } from "./LogStreamer";
12
12
  import { createMatchesPattern } from "./bufferSearchFindMatcher";
13
13
  import { IndexedLogs, IndexedLogShimController } from "./IndexedLogs";
14
- import { formatDateTime, formatTime } from "socket-function/src/formatting/format";
14
+ import { formatDateTime, formatNumber, formatTime } from "socket-function/src/formatting/format";
15
15
 
16
16
  // endTime must be at least this far in the past — we never want to search the
17
17
  // most-recent slice (it's still being written to and not yet promoted to public).
@@ -20,16 +20,74 @@ const END_TIME_MIN_AGE = timeInMinute;
20
20
  // machine's logs are guaranteed to be in public storage up through (now - this).
21
21
  const MOVE_GRACE = timeInMinute;
22
22
  // TTL for the path-cache (the slow TimeFileTree.findAllPaths walk).
23
- const PATHS_CACHE_TTL = timeInMinute;
23
+ const PATHS_CACHE_TTL = timeInMinute * 15;
24
+ // During the read/scan loops, emit a progress line at most this often.
25
+ const PROGRESS_LOG_INTERVAL = timeInSecond * 5;
24
26
 
25
27
  const LOGGER_NAMES = ["logs/log", "logs/info", "logs/warn", "logs/error"] as const;
26
28
  type LoggerName = typeof LOGGER_NAMES[number];
27
29
 
30
+ // Per-logger accounting for one search. Byte counts are raw buffer sizes.
31
+ export type LoggerStats = {
32
+ // Files in range matching the requested machine.
33
+ total: number;
34
+ // Of those, files we actually opened and ran the block matcher on
35
+ // (cutoff-pruning skips the rest once we've hit `limit`).
36
+ scanned: number;
37
+ // Combined byte size of the data + index files we opened.
38
+ scannedBytes: number;
39
+ // Candidate blocks the index matcher flagged across the scanned files.
40
+ blocksMatched: number;
41
+ // Of those, blocks we actually decoded (we stop early once `limit` is hit).
42
+ blocksRead: number;
43
+ // Combined byte size of the block buffers we decoded.
44
+ blockBytesRead: number;
45
+ // Result rows produced from this logger's files.
46
+ rows: number;
47
+ // Wall-clock time (ms) spent reading the data + index files from archives.
48
+ readFilesMs: number;
49
+ // Wall-clock time (ms) spent inside BufferIndex.findMatchingBlocks — the
50
+ // index scan that picks candidate blocks.
51
+ findMatchingBlocksMs: number;
52
+ // Wall-clock time (ms) spent inside BufferIndex.getBlockBuffers — decoding
53
+ // the candidate blocks' buffers.
54
+ getBlockBuffersMs: number;
55
+ };
56
+
57
+ function createEmptyLoggerStats(): LoggerStats {
58
+ return {
59
+ total: 0, scanned: 0, scannedBytes: 0, blocksMatched: 0, blocksRead: 0, blockBytesRead: 0, rows: 0,
60
+ readFilesMs: 0, findMatchingBlocksMs: 0, getBlockBuffersMs: 0,
61
+ };
62
+ }
63
+
64
+ function addLoggerStats(into: LoggerStats, from: LoggerStats): void {
65
+ into.total += from.total;
66
+ into.scanned += from.scanned;
67
+ into.scannedBytes += from.scannedBytes;
68
+ into.blocksMatched += from.blocksMatched;
69
+ into.blocksRead += from.blocksRead;
70
+ into.blockBytesRead += from.blockBytesRead;
71
+ into.rows += from.rows;
72
+ into.readFilesMs += from.readFilesMs;
73
+ into.findMatchingBlocksMs += from.findMatchingBlocksMs;
74
+ into.getBlockBuffersMs += from.getBlockBuffersMs;
75
+ }
76
+
77
+ // What the MCP `search` tool returns to the caller.
78
+ //
79
+ // DO NOT ADD MORE FIELDS HERE. The result is consumed by an LLM with a hard
80
+ // token budget, and every extra field of metadata eats into that budget and
81
+ // degrades the actual log output. `LoggerStats` (above) carries plenty of
82
+ // byte/block/timing detail — that is for server-side console.log ONLY and must
83
+ // never be surfaced here. The only per-logger numbers callers get are `total`
84
+ // and `scanned`. If you think a new field "would be nice to return": it would
85
+ // not. Leave this type alone.
28
86
  export type SearchResult = {
29
87
  allColumns: string[];
30
88
  results: Record<string, string>[];
31
- // Per-logger file counts: how many we found in range vs how many we
32
- // actually searched (cutoff-pruning skips the rest).
89
+ // Per-logger file counts only: total files in range, and how many we
90
+ // actually scanned. Nothing else — see the warning above.
33
91
  files: Record<string, { total: number; scanned: number }>;
34
92
  // Both fields are only present when we stopped early because we hit
35
93
  // `limit`. Their *presence* signals truncation; their absence means the
@@ -41,6 +99,21 @@ export type SearchResult = {
41
99
 
42
100
  type Direction = "fromStart" | "fromEnd";
43
101
 
102
+ // One log file within a specific logger's archive, as gathered for searching.
103
+ type FileEntry = {
104
+ path: TimeFilePath;
105
+ loggerName: LoggerName;
106
+ archives: Archives;
107
+ };
108
+
109
+ // Mutable accumulators the file/block walk appends into, instead of returning
110
+ // values back up the call stack.
111
+ type SearchSink = {
112
+ resultRows: Record<string, string>[];
113
+ allColumns: Set<string>;
114
+ loggerStats: LoggerStats;
115
+ };
116
+
44
117
  // Accept epoch ms (number) or any string `new Date(...)` understands. String
45
118
  // inputs without a timezone designator are interpreted as local time, which is
46
119
  // what callers typically have on hand (e.g. "2026-05-09 03:00").
@@ -55,6 +128,19 @@ function normalizeTime(value: string | number, label: string): number {
55
128
  return parsed;
56
129
  }
57
130
 
131
+ // Returns a logger that emits at most once per PROGRESS_LOG_INTERVAL, so a long
132
+ // loop can report progress without printing a line per iteration.
133
+ function createProgressLogger(): (message: string) => void {
134
+ let lastLog = Date.now();
135
+ return (message: string) => {
136
+ if (Date.now() - lastLog < PROGRESS_LOG_INTERVAL) {
137
+ return;
138
+ }
139
+ lastLog = Date.now();
140
+ console.log(message);
141
+ };
142
+ }
143
+
58
144
  export class MCPIndexedLogs {
59
145
  // machineId -> latest timestamp guaranteed to already be moved-to-public.
60
146
  private movedThroughByMachine = new Map<string, number>();
@@ -96,17 +182,11 @@ export class MCPIndexedLogs {
96
182
  let matchesPattern = createMatchesPattern(queryBuffer, false);
97
183
 
98
184
  // Gather all files (across the 4 loggers) into a single time-ordered list.
99
- type FileEntry = {
100
- path: TimeFilePath;
101
- loggerName: LoggerName;
102
- archives: Archives;
103
- };
104
185
  let allFiles: FileEntry[] = [];
105
186
 
106
- // Per-logger counts: total = files in range matching this machine,
107
- // scanned = files we actually opened and ran findMatchingBlocks on.
108
- let fileCounts: Record<string, { total: number; scanned: number }> = {};
109
- for (let name of LOGGER_NAMES) fileCounts[name] = { total: 0, scanned: 0 };
187
+ // Per-logger file/block/byte counts, accumulated as we walk the files.
188
+ let fileCounts: Record<string, LoggerStats> = {};
189
+ for (let name of LOGGER_NAMES) fileCounts[name] = createEmptyLoggerStats();
110
190
 
111
191
  let pathsStart = Date.now();
112
192
  let totalPathsSeen = 0;
@@ -138,119 +218,98 @@ export class MCPIndexedLogs {
138
218
  allFiles.sort((a, b) => b.path.startTime - a.path.startTime);
139
219
  }
140
220
 
141
- // Sequentially walk files in time-order, applying a moving cutoff once
142
- // we have `limit` rows: any unprocessed file whose entire range is past
143
- // the cutoff cannot contribute results we'd keep.
144
- // - fromStart: cutoff = min(processed.endTime); skip files with startTime >= cutoff
145
- // - fromEnd: cutoff = max(processed.startTime); skip files with endTime <= cutoff
146
221
  let resultRows: Record<string, string>[] = [];
147
222
  let allColumnsSet = new Set<string>();
148
223
  let stats = createEmptyIndexedLogResults();
149
- let cutoff: number | undefined;
150
- let filesScanned = 0;
151
224
 
152
225
  let searchStart = Date.now();
153
- outer: for (let entry of allFiles) {
226
+
227
+ // Phase 1: read every file's buffers up front. The files are cached, so
228
+ // reading them all — even ones the cutoff would later skip — is cheap, and
229
+ // it keeps the scan phase free of slow, interleaved reads.
230
+ let readCount = 0;
231
+ let logReadProgress = createProgressLogger();
232
+ let readFiles = await Promise.all(allFiles.map(async (entry) => {
233
+ let buffers = await this.readFile({ entry, loggerStats: fileCounts[entry.loggerName] });
234
+ readCount++;
235
+ logReadProgress(`[search] reading files ${readCount}/${allFiles.length}`);
236
+ return { entry, buffers };
237
+ }));
238
+ console.log(`[search] read ${allFiles.length} files in ${formatTime(Date.now() - searchStart)}`);
239
+
240
+ // Phase 2: scan the already-read files in time order, applying a moving
241
+ // cutoff once we have `limit` rows: any unprocessed file whose entire range
242
+ // is past the cutoff cannot contribute results we'd keep.
243
+ // - fromStart: cutoff = min(processed.endTime); skip files with startTime >= cutoff
244
+ // - fromEnd: cutoff = max(processed.startTime); skip files with endTime <= cutoff
245
+ let scanCount = 0;
246
+ let logScanProgress = createProgressLogger();
247
+ let cutoff: number | undefined;
248
+ for (let { entry, buffers } of readFiles) {
249
+ scanCount++;
250
+ logScanProgress(`[search] scanning files ${scanCount}/${readFiles.length}`);
251
+
154
252
  if (resultRows.length >= limit) break;
155
- let p = entry.path;
253
+ if (buffers === undefined) continue;
254
+
156
255
  if (cutoff !== undefined) {
157
256
  if (config.direction === "fromStart") {
158
- if (p.startTime >= cutoff) continue;
257
+ if (entry.path.startTime >= cutoff) continue;
159
258
  } else {
160
- if (p.endTime <= cutoff) continue;
259
+ if (entry.path.endTime <= cutoff) continue;
161
260
  }
162
261
  }
163
262
 
164
- let indexBuf = await entry.archives.get(p.fullPath + INDEX_EXTENSION);
165
- if (!indexBuf) continue;
166
- let dataBuf = await entry.archives.get(p.fullPath);
167
- if (!dataBuf) continue;
168
- let dataReader = new BufferReader(dataBuf);
169
-
170
- let blocks: number[];
171
- try {
172
- blocks = await BufferIndex.findMatchingBlocks({
173
- index: indexBuf,
174
- dataReader,
175
- query: queryBuffer,
176
- results: stats,
177
- });
178
- filesScanned++;
179
- fileCounts[entry.loggerName].scanned++;
180
- } catch (e) {
181
- console.warn(`MCPIndexedLogs.search: error scanning ${p.fullPath + INDEX_EXTENSION}: ${(e as Error).stack ?? e}`);
182
- continue;
183
- }
184
-
185
- if (config.direction === "fromStart") {
186
- blocks.sort((a, b) => a - b);
187
- } else {
188
- blocks.sort((a, b) => b - a);
189
- }
190
-
191
- for (let block of blocks) {
192
- if (resultRows.length >= limit) break outer;
193
-
194
- let buffers: Buffer[];
195
- try {
196
- buffers = await BufferIndex.getBlockBuffers({
197
- index: indexBuf,
198
- dataReader,
199
- blockIndex: block,
200
- });
201
- } catch (e) {
202
- console.warn(`MCPIndexedLogs.search: error reading block ${block} of ${p.fullPath}: ${(e as Error).stack ?? e}`);
203
- continue;
204
- }
205
-
206
- let iterateForward = config.direction === "fromStart";
207
- let bStart = iterateForward ? 0 : buffers.length - 1;
208
- let bEnd = iterateForward ? buffers.length : -1;
209
- let bStep = iterateForward ? 1 : -1;
210
- for (let i = bStart; iterateForward ? i < bEnd : i > bEnd; i += bStep) {
211
- if (resultRows.length >= limit) break outer;
212
- let buf = buffers[i];
213
- if (!matchesPattern(buf)) continue;
214
-
215
- let datum: LogDatum;
216
- try {
217
- datum = LogStreamer.deserialize<LogDatum>(buf);
218
- } catch {
219
- continue;
220
- }
221
-
222
- if (typeof datum.time !== "number") continue;
223
- if (datum.time < startTime || datum.time > endTime) continue;
224
-
225
- let row: Record<string, string> = {};
226
- for (let col of config.columns) {
227
- if (col in datum) {
228
- row[col] = stringifyCell(datum[col]);
229
- }
230
- }
231
- for (let key of Object.keys(datum)) {
232
- allColumnsSet.add(key);
233
- }
234
- resultRows.push(row);
235
- }
236
- }
263
+ // scanFile appends straight into resultRows / allColumnsSet / the
264
+ // logger's stats.
265
+ await this.scanFile({
266
+ entry,
267
+ indexBuf: buffers.indexBuf,
268
+ dataBuf: buffers.dataBuf,
269
+ direction: config.direction,
270
+ limit,
271
+ queryBuffer,
272
+ matchesPattern,
273
+ columns: config.columns,
274
+ startTime,
275
+ endTime,
276
+ stats,
277
+ sink: {
278
+ resultRows,
279
+ allColumns: allColumnsSet,
280
+ loggerStats: fileCounts[entry.loggerName],
281
+ },
282
+ });
237
283
 
238
284
  if (resultRows.length >= limit) {
239
285
  if (config.direction === "fromStart") {
240
- cutoff = cutoff === undefined ? p.endTime : Math.min(cutoff, p.endTime);
286
+ cutoff = cutoff === undefined ? entry.path.endTime : Math.min(cutoff, entry.path.endTime);
241
287
  } else {
242
- cutoff = cutoff === undefined ? p.startTime : Math.max(cutoff, p.startTime);
288
+ cutoff = cutoff === undefined ? entry.path.startTime : Math.max(cutoff, entry.path.startTime);
243
289
  }
244
290
  }
245
291
  }
246
292
 
293
+ let totals = createEmptyLoggerStats();
294
+ for (let name of LOGGER_NAMES) addLoggerStats(totals, fileCounts[name]);
295
+
247
296
  let limitHit = resultRows.length >= limit;
248
- console.log(`[search] done in ${formatTime(Date.now() - searchStart)} (filesScanned=${filesScanned}/${allFiles.length} results=${resultRows.length} limit=${limit}${limitHit ? " HIT" : ""})`);
297
+ console.log(`[search] done in ${formatTime(Date.now() - searchStart)} (filesScanned=${totals.scanned}/${allFiles.length} scannedBytes=${formatNumber(totals.scannedBytes)}B blocksMatched=${totals.blocksMatched} blocksRead=${totals.blocksRead} blockBytesRead=${formatNumber(totals.blockBytesRead)}B results=${resultRows.length} limit=${limit}${limitHit ? " HIT" : ""})`);
298
+ console.log(`[search] buffer types: stream=${stats.typeCounts.stream} bulk=${stats.typeCounts.bulk}`);
299
+ console.log(`[search] timing: readFiles=${formatTime(totals.readFilesMs)} findMatchingBlocks=${formatTime(totals.findMatchingBlocksMs)} getBlockBuffers=${formatTime(totals.getBlockBuffersMs)}`);
300
+
301
+ // Trim the internal LoggerStats down to just total + scanned. The rest
302
+ // (bytes/blocks/timing) stays in the console.log above and is NOT
303
+ // returned — see the warning on SearchResult.
304
+ let files: Record<string, { total: number; scanned: number }> = {};
305
+ for (let name of LOGGER_NAMES) {
306
+ files[name] = { total: fileCounts[name].total, scanned: fileCounts[name].scanned };
307
+ }
249
308
 
250
309
  return {
251
310
  allColumns: Array.from(allColumnsSet),
252
311
  results: resultRows,
253
- files: fileCounts,
312
+ files,
254
313
  limitHit: limitHit ? true : undefined,
255
314
  note: limitHit
256
315
  ? `Stopped at limit=${limit}. Results are truncated — there are likely more matches outside what's returned. This is NOT missing data; raise the limit or narrow the time range to see more.`
@@ -258,6 +317,139 @@ export class MCPIndexedLogs {
258
317
  };
259
318
  }
260
319
 
320
+ // Reads one file's index + data buffers. Returns undefined if either is
321
+ // missing. Timed into the logger's readFilesMs.
322
+ private async readFile(read: {
323
+ entry: FileEntry;
324
+ loggerStats: LoggerStats;
325
+ }): Promise<{ indexBuf: Buffer; dataBuf: Buffer } | undefined> {
326
+ let { entry, loggerStats } = read;
327
+ let p = entry.path;
328
+
329
+ let readFilesStart = Date.now();
330
+ let indexBuf = await entry.archives.get(p.fullPath + INDEX_EXTENSION);
331
+ let dataBuf = indexBuf === undefined ? undefined : await entry.archives.get(p.fullPath);
332
+ loggerStats.readFilesMs += Date.now() - readFilesStart;
333
+
334
+ if (indexBuf === undefined || dataBuf === undefined) return undefined;
335
+ return { indexBuf, dataBuf };
336
+ }
337
+
338
+ // Runs the block matcher over one already-read file and appends matching rows
339
+ // and accounting straight into `sink`.
340
+ private async scanFile(scan: {
341
+ entry: FileEntry;
342
+ indexBuf: Buffer;
343
+ dataBuf: Buffer;
344
+ direction: Direction;
345
+ limit: number;
346
+ queryBuffer: Buffer;
347
+ matchesPattern: (buf: Buffer) => boolean;
348
+ columns: string[];
349
+ startTime: number;
350
+ endTime: number;
351
+ stats: ReturnType<typeof createEmptyIndexedLogResults>;
352
+ sink: SearchSink;
353
+ }): Promise<void> {
354
+ let { entry, indexBuf, dataBuf, direction, limit, queryBuffer, matchesPattern, columns, startTime, endTime, stats, sink } = scan;
355
+ let { resultRows, loggerStats } = sink;
356
+ let p = entry.path;
357
+
358
+ // Region 1: the index scan that picks candidate blocks.
359
+ let findStart = Date.now();
360
+ let dataReader = new BufferReader(dataBuf);
361
+
362
+ let blocks: number[] | undefined;
363
+ try {
364
+ blocks = await BufferIndex.findMatchingBlocks({
365
+ index: indexBuf,
366
+ dataReader,
367
+ query: queryBuffer,
368
+ results: stats,
369
+ });
370
+ } catch (e) {
371
+ console.warn(`MCPIndexedLogs.search: error scanning ${p.fullPath + INDEX_EXTENSION}: ${(e as Error).stack ?? e}`);
372
+ }
373
+ loggerStats.findMatchingBlocksMs += Date.now() - findStart;
374
+ if (blocks === undefined) return;
375
+
376
+ let blockStart = Date.now();
377
+ // The file is now counted as scanned.
378
+ loggerStats.scanned++;
379
+ loggerStats.scannedBytes += indexBuf.length + dataBuf.length;
380
+ loggerStats.blocksMatched += blocks.length;
381
+
382
+ if (direction === "fromStart") {
383
+ blocks.sort((a, b) => a - b);
384
+ } else {
385
+ blocks.sort((a, b) => b - a);
386
+ }
387
+
388
+ for (let block of blocks) {
389
+ if (resultRows.length >= limit) break;
390
+
391
+ // Region 2: decoding the candidate block's buffers.
392
+ let buffers: Buffer[] | undefined;
393
+ try {
394
+ buffers = await BufferIndex.getBlockBuffers({
395
+ index: indexBuf,
396
+ dataReader,
397
+ blockIndex: block,
398
+ });
399
+ } catch (e) {
400
+ console.warn(`MCPIndexedLogs.search: error reading block ${block} of ${p.fullPath}: ${(e as Error).stack ?? e}`);
401
+ }
402
+ if (buffers === undefined) continue;
403
+
404
+ loggerStats.blocksRead++;
405
+ for (let buf of buffers) loggerStats.blockBytesRead += buf.length;
406
+
407
+ let ordered = direction === "fromStart" ? buffers : [...buffers].reverse();
408
+ for (let buf of ordered) {
409
+ if (resultRows.length >= limit) break;
410
+ this.appendRow({ buf, matchesPattern, columns, startTime, endTime, sink });
411
+ }
412
+ }
413
+ loggerStats.getBlockBuffersMs += Date.now() - blockStart;
414
+ }
415
+
416
+ // Appends one result row for `buf` into `sink` if it matches the query and
417
+ // falls inside the time range; otherwise does nothing. Records every key it
418
+ // sees as a column.
419
+ private appendRow(append: {
420
+ buf: Buffer;
421
+ matchesPattern: (buf: Buffer) => boolean;
422
+ columns: string[];
423
+ startTime: number;
424
+ endTime: number;
425
+ sink: SearchSink;
426
+ }): void {
427
+ let { buf, matchesPattern, columns, startTime, endTime, sink } = append;
428
+ if (!matchesPattern(buf)) return;
429
+
430
+ let datum: LogDatum;
431
+ try {
432
+ datum = LogStreamer.deserialize<LogDatum>(buf);
433
+ } catch {
434
+ return;
435
+ }
436
+
437
+ if (typeof datum.time !== "number") return;
438
+ if (datum.time < startTime || datum.time > endTime) return;
439
+
440
+ let row: Record<string, string> = {};
441
+ for (let col of columns) {
442
+ if (col in datum) {
443
+ row[col] = stringifyCell(datum[col]);
444
+ }
445
+ }
446
+ for (let key of Object.keys(datum)) {
447
+ sink.allColumns.add(key);
448
+ }
449
+ sink.resultRows.push(row);
450
+ sink.loggerStats.rows++;
451
+ }
452
+
261
453
  // For each logger, asks each remote node on the target machine whether it
262
454
  // has pending logs overlapping [0, endTime]. The first node that answers
263
455
  // without throwing wins; if it says yes, we ask the same node to flush.
@@ -1,3 +1,4 @@
1
+ import { timeInDay } from "socket-function/src/misc";
1
2
  import { Archives } from "../../../-a-archives/archives";
2
3
  import { getOwnThreadId, getOwnMachineId } from "../../../-f-node-discovery/NodeDiscovery";
3
4
 
@@ -23,6 +24,10 @@ export type TimeFilePath = {
23
24
 
24
25
  const LOG_FILE_EXTENSION = ".logfile";
25
26
 
27
+ // When the requested range spans fewer than this many days, we skip the recursive folder
28
+ // scan and instead guess every day folder path directly, reading them all in parallel.
29
+ const MAX_RANGE_DAYS_FOR_DIRECT_READ = 10;
30
+
26
31
  function encodeLogFilePath(path: Omit<TimeFilePath, "fullPath">): string {
27
32
  // Create folder structure: year/month/day/
28
33
  const date = new Date(path.startTime);
@@ -113,6 +118,59 @@ export class TimeFileTree {
113
118
  public async findAllPaths(config: {
114
119
  startTime: number;
115
120
  endTime: number;
121
+ }): Promise<TimeFilePath[]> {
122
+ // For short ranges, guessing the day folder paths and reading them all in parallel is
123
+ // much faster than recursively listing year/month/day folders.
124
+ if (config.endTime - config.startTime < MAX_RANGE_DAYS_FOR_DIRECT_READ * timeInDay) {
125
+ return await this.findAllPathsByDayGuess(config);
126
+ }
127
+ return await this.findAllPathsByScan(config);
128
+ }
129
+
130
+ // Reads every day folder in the range directly, in parallel. Missing day folders simply
131
+ // return no files (find on a non-existent prefix is empty), so we don't special-case them.
132
+ private async findAllPathsByDayGuess(config: {
133
+ startTime: number;
134
+ endTime: number;
135
+ }): Promise<TimeFilePath[]> {
136
+ // Build the list of UTC day folder prefixes spanning the range.
137
+ const dayFolders: string[] = [];
138
+ const startDate = new Date(config.startTime);
139
+ let cursor = Date.UTC(startDate.getUTCFullYear(), startDate.getUTCMonth(), startDate.getUTCDate());
140
+ while (cursor <= config.endTime) {
141
+ const date = new Date(cursor);
142
+ const year = date.getUTCFullYear();
143
+ const month = String(date.getUTCMonth() + 1).padStart(2, "0");
144
+ const day = String(date.getUTCDate()).padStart(2, "0");
145
+ dayFolders.push(`${year}/${month}/${day}/`);
146
+ cursor += timeInDay;
147
+ }
148
+
149
+ const results: TimeFilePath[] = [];
150
+
151
+ await Promise.all(dayFolders.map(async (dayFolder) => {
152
+ const files = await this.archives.find(dayFolder, { shallow: true, type: "files" });
153
+
154
+ for (const file of files) {
155
+ const decoded = decodeLogFilePath(file);
156
+
157
+ if (decoded === undefined) {
158
+ continue;
159
+ }
160
+
161
+ // Check if file's time range overlaps with requested range
162
+ if (decoded.endTime >= config.startTime && decoded.startTime <= config.endTime) {
163
+ results.push(decoded);
164
+ }
165
+ }
166
+ }));
167
+
168
+ return results;
169
+ }
170
+
171
+ private async findAllPathsByScan(config: {
172
+ startTime: number;
173
+ endTime: number;
116
174
  }): Promise<TimeFilePath[]> {
117
175
  const results: TimeFilePath[] = [];
118
176
 
@@ -20,6 +20,7 @@ import { isClient } from "../config2";
20
20
  import { isLocal } from "../config";
21
21
  import { pathWatcher } from "../0-path-value-core/PathWatcher";
22
22
  import { debugNodeId } from "../-c-identity/IdentityController";
23
+ import debugbreak from "debugbreak";
23
24
 
24
25
  if (!isClient()) {
25
26
  // Comment this line out to disable our functionality
@@ -332,7 +333,7 @@ async function auditAuthority(nodeId: string, pathsToAudit: { path: string }[],
332
333
  if (response.valid && response.time && compareTime(response.time, ourValue.time) > 0) {
333
334
  valuesToRequest.push({ path: response.path, time: response.time });
334
335
  let authorities = PathRouter.getAllAuthorities(response.path);
335
- require("debugbreak")(2);
336
+ debugbreak(2);
336
337
  debugger;
337
338
  trackSyncAge({
338
339
  path: response.path,
@@ -348,7 +349,7 @@ async function auditAuthority(nodeId: string, pathsToAudit: { path: string }[],
348
349
  // - Send it our value
349
350
  else if (response.valid === undefined && (!response.time || compareTime(ourValue.time, response.time) > 0)) {
350
351
  valuesToSend.push(ourValue);
351
- require("debugbreak")(2);
352
+ debugbreak(2);
352
353
  debugger;
353
354
  trackSyncAge({
354
355
  path: response.path,
@@ -369,7 +370,7 @@ async function auditAuthority(nodeId: string, pathsToAudit: { path: string }[],
369
370
  let age = now - ourValue.time.time;
370
371
  if (age >= MAX_CHANGE_AGE) {
371
372
  pathsToForceSync.add(response.path);
372
- require("debugbreak")(2);
373
+ debugbreak(2);
373
374
  debugger;
374
375
  trackSyncAge({
375
376
  path: response.path,