querysub 0.459.0 → 0.460.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,7 +23,8 @@
23
23
  "mcp__node-debugger__resume",
24
24
  "mcp__node-debugger__listBreakpoints",
25
25
  "mcp__node-debugger__removeBreakpoint",
26
- "mcp__hottest__runTest"
26
+ "mcp__hottest__runTest",
27
+ "Bash(yarn test *)"
27
28
  ]
28
29
  }
29
30
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "querysub",
3
- "version": "0.459.0",
3
+ "version": "0.460.0",
4
4
  "main": "index.js",
5
5
  "license": "MIT",
6
6
  "note1": "note on node-forge fork, see https://github.com/digitalbazaar/forge/issues/744 for details",
package/src/config.ts CHANGED
@@ -134,6 +134,9 @@ export function isPublic() {
134
134
  }
135
135
  return !!yargObj.public;
136
136
  }
137
+ export function setIsPublic(value: boolean) {
138
+ yargObj.public = value;
139
+ }
137
140
 
138
141
  /** @deprecated Use !isPublic() instead */
139
142
  export function isLocal() {
@@ -164,6 +164,15 @@ async function main() {
164
164
  // Test command to verify ssh credentials work
165
165
  await runPromise(`ssh ${sshRemote} whoami`);
166
166
 
167
+ // Detect Hetzner rescue system — if we're still in rescue, installimage hasn't been run yet
168
+ let rescueProbe = await runPromise(`ssh ${sshRemote} "hostname; command -v installimage || true"`, { nothrow: true });
169
+ if (/(^|\n)rescue(\s|$)/i.test(rescueProbe) || /installimage/.test(rescueProbe)) {
170
+ console.error(`❌ Remote ${sshRemote} appears to be running the Hetzner rescue system (no OS installed yet).`);
171
+ console.error(` Run \`installimage\` on the remote first to provision the OS, reboot into the installed system, then re-run \`yarn setup-machine ${sshRemote}\`.`);
172
+ console.error(` Detected:\n${rescueProbe.trim()}`);
173
+ process.exit(1);
174
+ }
175
+
167
176
  // Setup swap space if not already configured
168
177
  console.log("Checking swap configuration...");
169
178
  const swapInfo = await runPromise(`ssh ${sshRemote} "free -m | grep Swap"`);
@@ -300,9 +300,6 @@ export class BufferIndex {
300
300
  dataReader: Reader;
301
301
  params: SearchParams;
302
302
  keepIterating: () => boolean;
303
- // Returns true iff the caller actually retained the value. We use that
304
- // to drive the per-file matchCount cap below: see the note at the
305
- // `matchesPattern(buffer)` call for why we can't blindly count emits.
306
303
  onResult: (match: Buffer) => boolean;
307
304
  results: IndexedLogResults;
308
305
  allSearchUnits: Unit[][];
@@ -331,7 +328,20 @@ export class BufferIndex {
331
328
  }, `BufferIndex|readLocalBlocks`);
332
329
  });
333
330
 
334
- let matchCount = 0;
331
+ // NOTE: The per-file matchCount cap (commented out below in both the
332
+ // block loop and the inner buffer loop) is intentionally disabled.
333
+ // We tested (see test.ts) and confirmed that blocks within a file
334
+ // aren't time-ordered — the move-to-public pipeline can leave a
335
+ // late-index block holding earlier-time entries than earlier-index
336
+ // blocks, and buffers within a single block aren't time-ordered
337
+ // either. The old `matchCount >= params.limit` short-circuit assumed
338
+ // ordering and was silently dropping blocks whose entries would have
339
+ // survived the top-K trim (broad queries lost their earliest
340
+ // matches). Without the cap we scan every candidate block per file,
341
+ // but the index pre-filter bounds the work and it isn't measurably
342
+ // slower in practice. If blocks/buffers ever get written in
343
+ // guaranteed time order this code can be re-enabled.
344
+ // let matchCount = 0;
335
345
  let blockSearchTimeStart = Date.now();
336
346
 
337
347
  results.totalBlockCount += indexEntries.length;
@@ -344,7 +354,8 @@ export class BufferIndex {
344
354
  const step = iterateForward ? 1 : -1;
345
355
 
346
356
  for (let i = startIdx; iterateForward ? i < endIdx : i > endIdx; i += step) {
347
- if (matchCount >= params.limit || !config.keepIterating()) break;
357
+ // if (matchCount >= params.limit || !config.keepIterating()) break;
358
+ if (!config.keepIterating()) break;
348
359
  await config.results.limitGroup?.wait();
349
360
  const blockIndex = i;
350
361
 
@@ -393,38 +404,16 @@ export class BufferIndex {
393
404
  const bufferStep = iterateForward ? 1 : -1;
394
405
 
395
406
  for (let bufferIndex = bufferStartIdx; iterateForward ? bufferIndex < bufferEndIdx : bufferIndex > bufferEndIdx; bufferIndex += bufferStep) {
396
- // No `matchCount >= params.limit` cap inside the block.
397
- // Buffer order within a block is not guaranteed to follow
398
- // the search direction (blocks are time-ordered, buffers
399
- // inside them are not), so stopping mid-block on a match
400
- // count would drop earlier-time buffers we haven't reached
401
- // yet. The block-level cap above is the only safe stop;
402
- // here we only honor cross-file `keepIterating` (which
403
- // applies to the whole file at once, so it's safe at any
404
- // granularity).
407
+ // See the note above the outer loop for why the
408
+ // matchCount-based stop is gone.
409
+ // if (matchCount >= params.limit || !config.keepIterating()) break;
405
410
  if (!config.keepIterating()) break;
406
411
  await config.results.limitGroup?.wait();
407
412
 
408
413
  let buffer = buffers[bufferIndex];
409
414
  if (matchesPattern(buffer)) {
410
- // Only count matches the caller actually kept. `onResult`
411
- // routes through `FindProgressTracker.addResult`, which
412
- // can reject for reasons we don't see from here — most
413
- // notably time-range filtering (an entry whose time is
414
- // outside the search window matched the text pattern
415
- // but isn't a real hit). Counting rejected emits would
416
- // let a stretch of out-of-window matches at the start
417
- // of a file blow the per-file cap and short-circuit the
418
- // scan before we reach the in-window region.
419
- //
420
- // The cost is that we keep matching and calling onResult
421
- // through those out-of-window blocks (mild inefficiency).
422
- // We can't skip ahead — buffers are scanned linearly and
423
- // we don't know up front which entries the caller will
424
- // reject — so this is the best we can do here.
425
- if (config.onResult(buffer)) {
426
- matchCount++;
427
- }
415
+ config.onResult(buffer);
416
+ // matchCount++;
428
417
  }
429
418
  }
430
419
  } catch (e: any) {
@@ -559,9 +548,7 @@ export class BufferIndex {
559
548
  params: SearchParams;
560
549
 
561
550
  keepIterating: () => boolean;
562
- // See the note on `findLocal.onResult` — return value drives the
563
- // per-file matchCount cap so out-of-window emits don't short-circuit
564
- // the scan.
551
+ // Return value is unused — see `findLocal.onResult`.
565
552
  onResult: (match: Buffer) => boolean;
566
553
  results: IndexedLogResults;
567
554
  }): Promise<{
@@ -505,9 +505,6 @@ export class BufferUnitIndex {
505
505
  params: SearchParams;
506
506
  allSearchUnits: Unit[][];
507
507
  keepIterating: () => boolean;
508
- // Returns true iff the caller actually retained the value. Drives the
509
- // `matchCounts` cap below — see the comment at the `matchesPattern`
510
- // call for why we can't blindly count emits.
511
508
  onResult: (match: Buffer) => boolean;
512
509
  index: Buffer;
513
510
  reader: Reader;
@@ -534,26 +531,42 @@ export class BufferUnitIndex {
534
531
  // Read blocks and search for matches
535
532
  let blockSearchTimeStart = Date.now();
536
533
  await measureBlock(async () => {
537
- let matchCount = 0;
538
- let matchCounts = list(blockCount).fill(0);
534
+ // NOTE: The matchCount / matchCounts tracking and the
535
+ // `stopIterating` cap below are commented out, not deleted.
536
+ // We tested (see test.ts) and confirmed that blocks within a
537
+ // file aren't actually time-ordered — the move-to-public
538
+ // pipeline can leave a late-index block holding earlier-time
539
+ // entries than earlier-index blocks. The old `relevantCount
540
+ // >= params.limit` short-circuit assumed time-ordered blocks
541
+ // and was silently skipping blocks whose entries would have
542
+ // survived the top-K trim (broad queries lost their earliest
543
+ // matches). The same applied to the inner-buffer cap (buffers
544
+ // within a block aren't time-ordered either). Removing both
545
+ // caps means we scan every candidate block per file, but the
546
+ // index pre-filter bounds the work and it isn't measurably
547
+ // slower in practice. If blocks ever get written in
548
+ // guaranteed time order, this code can be re-enabled.
549
+ // let matchCount = 0;
550
+ // let matchCounts = list(blockCount).fill(0);
539
551
 
540
552
  const searchBlock = async (blockIndex: number) => {
541
553
  if (!candidateBlocksSet.has(blockIndex)) return;
542
- // Check if we should stop iterating based on match counts and direction
543
- let stopIterating = () => {
544
- let relevantCount = 0;
545
- if (params.searchFromStart) {
546
- for (let i = 0; i <= blockIndex; i++) {
547
- relevantCount += matchCounts[i];
548
- }
549
- } else {
550
- for (let i = blockIndex; i < blockCount; i++) {
551
- relevantCount += matchCounts[i];
552
- }
553
- }
554
- return relevantCount >= params.limit || !keepIterating();
555
- };
556
- if (stopIterating()) return;
554
+ // // Check if we should stop iterating based on match counts and direction
555
+ // let stopIterating = () => {
556
+ // let relevantCount = 0;
557
+ // if (params.searchFromStart) {
558
+ // for (let i = 0; i <= blockIndex; i++) {
559
+ // relevantCount += matchCounts[i];
560
+ // }
561
+ // } else {
562
+ // for (let i = blockIndex; i < blockCount; i++) {
563
+ // relevantCount += matchCounts[i];
564
+ // }
565
+ // }
566
+ // return relevantCount >= params.limit || !keepIterating();
567
+ // };
568
+ // if (stopIterating()) return;
569
+ if (!keepIterating()) return;
557
570
 
558
571
  let debugOffsets = {
559
572
  startOffset: 0,
@@ -580,38 +593,17 @@ export class BufferUnitIndex {
580
593
  const step = iterateForward ? 1 : -1;
581
594
 
582
595
  for (let i = startIdx; iterateForward ? i < endIdx : i > endIdx; i += step) {
583
- // No matchCount-based cap inside the block. Buffer
584
- // order within a block is not guaranteed to follow the
585
- // search direction (blocks are time-ordered, buffers
586
- // inside them are not), so a mid-block stop on
587
- // `relevantCount >= limit` would drop earlier-time
588
- // buffers we haven't reached yet. Block-level
589
- // `stopIterating` is the safe granularity; here we
590
- // only honor cross-file `keepIterating`, which applies
591
- // to the whole file at once.
596
+ // See the note at the top of this function for why
597
+ // the matchCount-based stop is gone (commented out).
598
+ // if (stopIterating()) break;
592
599
  if (!keepIterating()) break;
593
600
  await results.limitGroup?.wait();
594
601
 
595
602
  const buffer = await this.getBufferFromBlock(blockReader, i);
596
603
  if (matchesPattern(buffer)) {
597
- // Only count matches the caller actually kept.
598
- // `onResult` routes through
599
- // `FindProgressTracker.addResult`, which can reject
600
- // for reasons opaque to us — most notably
601
- // time-range filtering. Counting rejected emits
602
- // would let a stretch of out-of-window matches at
603
- // the start of a file blow the per-file cap and
604
- // short-circuit the scan before we reach the
605
- // in-window region. The cost is that we keep
606
- // matching and calling onResult through those
607
- // out-of-window blocks (mild inefficiency); we
608
- // can't skip ahead because buffers are scanned
609
- // linearly and we don't know up front which
610
- // entries the caller will reject.
611
- if (config.onResult(buffer)) {
612
- matchCount++;
613
- matchCounts[blockIndex]++;
614
- }
604
+ config.onResult(buffer);
605
+ // matchCount++;
606
+ // matchCounts[blockIndex]++;
615
607
  }
616
608
  }
617
609
  } catch (e: any) {
package/test.ts CHANGED
@@ -1,81 +1,198 @@
1
1
  import { chdir } from "process";
2
2
  chdir("D:/repos/qs-cyoa/");
3
3
 
4
+ import { isPublic, setIsPublic } from "./src/config";
5
+ setIsPublic(true);
6
+
4
7
  import "./inject";
5
- import { SocketFunction } from "socket-function/SocketFunction";
6
- import { NodeCapabilitiesController, getControllerNodeIdList } from "./src/-g-core-values/NodeCapabilities";
7
- import { delay } from "socket-function/src/batching";
8
- import { green, yellow } from "socket-function/src/formatting/logColors";
9
- import { Querysub, t } from "./src/4-querysub/Querysub";
10
- import { archives, pathValueArchives } from "./src/0-path-value-core/pathValueArchives";
11
- import { getAllAuthoritySpec } from "./src/0-path-value-core/PathRouterServerAuthoritySpec";
12
- import { deploySchema } from "./src/4-deploy/deploySchema";
13
- import { getDomain } from "./src/config";
14
- import { getProxyPath } from "./src/2-proxy/pathValueProxy";
15
- import { ClientWatcher } from "./src/1-path-client/pathValueClientWatcher";
16
- import { RemoteWatcher } from "./src/1-path-client/RemoteWatcher";
17
- import { PathRouter } from "./src/0-path-value-core/PathRouter";
18
- import { shutdown } from "./src/diagnostics/periodic";
19
- import { getShardPrefixes } from "./src/0-path-value-core/ShardPrefixes";
20
- import { PathValue, epochTime } from "./src/0-path-value-core/pathValueCore";
21
- import { pathValueSerializer } from "./src/-h-path-value-serialize/PathValueSerializer";
22
- import { getAllNodeIds } from "./src/-f-node-discovery/NodeDiscovery";
23
- import { errorToUndefinedSilent } from "./src/errors";
24
- import { timeoutToUndefinedSilent } from "socket-function/src/misc";
25
-
26
- let tempTestSchema = Querysub.createSchema({
27
- value: t.number,
28
- })({
29
- domainName: getDomain(),
30
- moduleId: "tempTest",
31
- module: module,
32
- functions: {},
33
- });
8
+ import { Querysub } from "./src/4-querysub/Querysub";
9
+ import { getLoggers2Async, LogDatum } from "./src/diagnostics/logs/diskLogger";
10
+ import { IndexedLogs } from "./src/diagnostics/logs/IndexedLogs/IndexedLogs";
11
+ import { SearchParams } from "./src/diagnostics/logs/IndexedLogs/BufferIndexHelpers";
12
+ import { formatDateTimeDetailed, formatNumber, formatTime } from "socket-function/src/formatting/format";
13
+
14
+ // Pulled verbatim from the URLs the user shared.
15
+ const START_TIME = 1779598800000;
16
+ const END_TIME = 1779604200000;
17
+ const LIMIT = 1600;
18
+ const SEARCH_BROAD = `wvupofthbgq & "__threadId":"1f72e0ea774fcc81"`;
19
+ const SEARCH_NARROW = `wvupofthbgq & "__threadId":"1f72e0ea774fcc81" & new`;
20
+
21
+ type Emit = {
22
+ time: number;
23
+ logger: string;
24
+ datum: LogDatum;
25
+ };
26
+
27
+ // Stable identity for cross-query comparison. `time` alone isn't unique (many
28
+ // entries share the same ms), so we fold in threadId + entry text.
29
+ function emitKey(e: Emit): string {
30
+ return `${e.time}|${e.datum.__threadId ?? ""}|${e.datum.__entry ?? ""}|${e.datum.param0 ?? ""}`;
31
+ }
32
+
33
+ async function runQuery(label: string, searchText: string, limit: number = LIMIT): Promise<Emit[]> {
34
+ console.log(`\n=== ${label}: ${JSON.stringify(searchText)} (limit=${limit}) ===`);
35
+ let loggers = await getLoggers2Async();
36
+ let allLoggers: { name: string; logger: IndexedLogs<LogDatum> }[] = [
37
+ { name: "info", logger: loggers.infoLogs },
38
+ ];
39
+
40
+ let params: SearchParams = {
41
+ startTime: START_TIME,
42
+ endTime: END_TIME,
43
+ limit,
44
+ findBuffer: Buffer.from(searchText, "utf8"),
45
+ searchFromStart: true,
46
+ only: "public",
47
+ };
48
+
49
+ let allEmits: Emit[] = [];
50
+ let queryStart = Date.now();
51
+
52
+ await Promise.all(allLoggers.map(async ({ name, logger }) => {
53
+ let perLoggerEmits: Emit[] = [];
54
+ let loggerStart = Date.now();
55
+ let result = await logger.find({
56
+ params,
57
+ onResult: (match: LogDatum) => {
58
+ perLoggerEmits.push({ time: match.time, logger: name, datum: match });
59
+ },
60
+ });
61
+
62
+ let earliest = perLoggerEmits.length > 0 ? Math.min(...perLoggerEmits.map(e => e.time)) : undefined;
63
+ let latest = perLoggerEmits.length > 0 ? Math.max(...perLoggerEmits.map(e => e.time)) : undefined;
64
+ console.log(
65
+ ` [${name}] emits=${perLoggerEmits.length} ` +
66
+ `matchCount=${result.matchCount} ` +
67
+ `blocksChecked=${result.blockCheckedCount}/${result.totalBlockCount} ` +
68
+ `filesScanned=${result.backblazeFilesSearched}/${result.totalBackblazeFiles} ` +
69
+ `earliest=${earliest !== undefined ? formatDateTimeDetailed(earliest) : "—"} ` +
70
+ `latest=${latest !== undefined ? formatDateTimeDetailed(latest) : "—"} ` +
71
+ `time=${formatTime(Date.now() - loggerStart)}`
72
+ );
73
+ allEmits.push(...perLoggerEmits);
74
+ }));
75
+
76
+ console.log(` total emits=${allEmits.length} in ${formatTime(Date.now() - queryStart)}`);
77
+
78
+ // Sort + trim to limit (mirroring the client-side display).
79
+ allEmits.sort((a, b) => a.time - b.time);
80
+ if (allEmits.length > limit) allEmits.length = limit;
81
+
82
+ console.log(
83
+ ` kept top-${allEmits.length} ` +
84
+ `earliest=${allEmits.length > 0 ? formatDateTimeDetailed(allEmits[0].time) : "—"} ` +
85
+ `latest=${allEmits.length > 0 ? formatDateTimeDetailed(allEmits[allEmits.length - 1].time) : "—"}`
86
+ );
87
+ return allEmits;
88
+ }
34
89
 
35
90
  async function main() {
36
91
  await Querysub.hostService("test");
37
92
 
38
- // let testValues: PathValue[] = [];
39
- // let buffers = await pathValueSerializer.serialize(testValues);
40
- // let values = await pathValueSerializer.deserialize(buffers);
93
+ // Dump every info file in range — declared startTime / endTime — so we
94
+ // can spot files whose declared range disagrees with the entries inside.
95
+ let loggers = await getLoggers2Async();
96
+ let paths = await loggers.infoLogs.getPaths({
97
+ startTime: START_TIME,
98
+ endTime: END_TIME,
99
+ only: "public",
100
+ });
101
+ paths.sort((a, b) => a.startTime - b.startTime);
102
+ console.log(`\n=== INFO FILES IN RANGE (${paths.length}) — declared ranges ===`);
103
+ for (let p of paths) {
104
+ console.log(` start=${formatDateTimeDetailed(p.startTime)} end=${formatDateTimeDetailed(p.endTime)} logCount=${p.logCount ?? "?"} ${p.fullPath}`);
105
+ }
41
106
 
42
- //let values = await pathValueArchives.loadValues(await getAllAuthoritySpec());
107
+ let broad = await runQuery("BROAD", SEARCH_BROAD);
108
+ let narrow = await runQuery("NARROW", SEARCH_NARROW);
109
+ // Sanity: run broad again with a huge limit. If those 4 entries appear
110
+ // here but not in the limit=1600 run, the per-file `stopIterating` cap is
111
+ // skipping the block that contains them. If they're still missing, the
112
+ // bug is upstream (index pre-filter or block scanner missing them).
113
+ let broadHuge = await runQuery("BROAD_HUGE", SEARCH_BROAD, 1_000_000);
43
114
 
44
- let nodes = await getAllNodeIds();
45
- let values = await Promise.all(nodes.map(async node => {
46
- let metadata = await timeoutToUndefinedSilent(5000, NodeCapabilitiesController.nodes[node].getMetadata());
47
- return metadata?.entryPoint;
48
- }));
49
- console.log({ values });
50
-
51
- // let path = getProxyPath(() => tempTestSchema.data().value);
52
- // console.log({ path });
53
- // let authorities = PathRouter.getAllAuthorities(path);
54
- // for (let authority of authorities) {
55
- // console.log({ authority });
56
- // }
57
-
58
- // ClientWatcher.DEBUG_READS = true;
59
- // ClientWatcher.DEBUG_WRITES = true;
60
- // RemoteWatcher.DEBUG = true;
61
-
62
- // let value = await Querysub.commitAsync(() => tempTestSchema.data().value);
63
- // console.log({ value });
64
- // await Querysub.commitAsync(() => tempTestSchema.data().value++, { doNotStoreWritesAsPredictions: true });
65
- // await delay(3000);
66
- // let value2 = await Querysub.commitAsync(() => tempTestSchema.data().value);
67
- // console.log({ value2 });
68
-
69
- // await shutdown();
70
-
71
- // let test = await Querysub.commitAsync(() => {
72
- // let live = deploySchema()[getDomain()].deploy.live.hash;
73
- // console.log({ live });
74
- // return String(live);
75
- // });
76
- // console.log({ test });
115
+ // The diagnostic: narrow ⊂ broad by definition. So every narrow result
116
+ // whose time falls inside broad's kept window MUST appear in broad. If any
117
+ // are missing, the broad scan dropped them — that's the bug.
118
+ if (broad.length === 0 || narrow.length === 0) {
119
+ console.log(`\nSkipping comparison: broad=${broad.length} narrow=${narrow.length}`);
120
+ return;
121
+ }
122
+
123
+ let broadKeys = new Set(broad.map(emitKey));
124
+ let broadCutoff = broad[broad.length - 1].time;
125
+ let broadEarliest = broad[0].time;
126
+
127
+ console.log(`\n=== COMPARE ===`);
128
+ console.log(`broad window: [${formatDateTimeDetailed(broadEarliest)}, ${formatDateTimeDetailed(broadCutoff)}]`);
129
+
130
+ let narrowInWindow = narrow.filter(n => n.time <= broadCutoff);
131
+ let missing = narrowInWindow.filter(n => !broadKeys.has(emitKey(n)));
132
+
133
+ console.log(`narrow total: ${narrow.length}`);
134
+ console.log(`narrow within broad window (<= broad cutoff): ${narrowInWindow.length}`);
135
+ console.log(`narrow missing from broad kept top-K: ${missing.length}`);
136
+
137
+ if (missing.length > 0) {
138
+ console.log(`\nFirst ${Math.min(20, missing.length)} missing entries (these prove broad dropped them):`);
139
+ for (let m of missing.slice(0, 20)) {
140
+ console.log(
141
+ ` time=${formatDateTimeDetailed(m.time)} ` +
142
+ `logger=${m.logger} ` +
143
+ `entry=${(m.datum.__entry ?? "").slice(0, 80)} ` +
144
+ `param0=${String(m.datum.param0 ?? "").slice(0, 80)}`
145
+ );
146
+ }
147
+
148
+ // Group missing by logger so we can tell which scan dropped them.
149
+ let byLogger = new Map<string, number>();
150
+ for (let m of missing) byLogger.set(m.logger, (byLogger.get(m.logger) ?? 0) + 1);
151
+ console.log(`\nMissing by logger:`);
152
+ for (let [k, v] of byLogger) console.log(` ${k}: ${formatNumber(v)}`);
153
+ } else {
154
+ console.log(`\nNo missing entries — broad correctly contains all narrow results in its window.`);
155
+ }
156
+
157
+ // For each missing entry, identify the file whose declared range *should*
158
+ // cover its time, and find the file whose declared range *doesn't* cover
159
+ // it (the bug indicator).
160
+ if (missing.length > 0) {
161
+ let missingTimes = missing.map(m => m.time);
162
+ let minMissing = Math.min(...missingTimes);
163
+ let maxMissing = Math.max(...missingTimes);
164
+ console.log(`\nMissing entry times span: [${formatDateTimeDetailed(minMissing)}, ${formatDateTimeDetailed(maxMissing)}]`);
165
+ console.log(`Files whose declared range overlaps [${formatDateTimeDetailed(minMissing)}, ${formatDateTimeDetailed(maxMissing)}]:`);
166
+ for (let p of paths) {
167
+ if (p.endTime < minMissing || p.startTime > maxMissing) continue;
168
+ console.log(` start=${formatDateTimeDetailed(p.startTime)} end=${formatDateTimeDetailed(p.endTime)} ${p.fullPath}`);
169
+ }
170
+ // Also: any file whose declared startTime > broad cutoff (and so would
171
+ // be isSourceRelevant-pruned) — these are candidates for the bug.
172
+ console.log(`Files whose declared startTime > broad cutoff (${formatDateTimeDetailed(broadCutoff)}) — these would be pruned by isSourceRelevant once broad fills:`);
173
+ for (let p of paths) {
174
+ if (p.startTime > broadCutoff) {
175
+ let overlap = p.startTime <= maxMissing && p.endTime >= minMissing ? " <- OVERLAPS MISSING" : "";
176
+ console.log(` start=${formatDateTimeDetailed(p.startTime)} end=${formatDateTimeDetailed(p.endTime)} ${p.fullPath}${overlap}`);
177
+ }
178
+ }
179
+ }
180
+
181
+ // Check whether broad-with-huge-limit catches the missing entries.
182
+ let broadHugeKeys = new Set(broadHuge.map(emitKey));
183
+ let stillMissingFromHuge = narrow.filter(n => !broadHugeKeys.has(emitKey(n)));
184
+ console.log(`\n=== BROAD_HUGE check ===`);
185
+ console.log(`narrow missing from broad_huge: ${stillMissingFromHuge.length}`);
186
+ if (stillMissingFromHuge.length > 0) {
187
+ console.log(`First ${Math.min(20, stillMissingFromHuge.length)} still-missing:`);
188
+ for (let m of stillMissingFromHuge.slice(0, 20)) {
189
+ console.log(` time=${formatDateTimeDetailed(m.time)} param0=${String(m.datum.param0 ?? "").slice(0, 60)}`);
190
+ }
191
+ console.log(`-> bug is NOT (only) the per-file cap; the scanner / index pre-filter is dropping entries even without the cap.`);
192
+ } else {
193
+ console.log(`-> all narrow entries are in broad_huge; the per-file stopIterating cap is the culprit for the limit=1600 miss.`);
194
+ }
77
195
  }
78
196
 
79
- main().catch(console.error)
80
- .finally(() => process.exit(0))
81
- ;
197
+ main().catch(e => console.error((e as Error).stack ?? e))
198
+ .finally(() => process.exit(0));