querysub 0.459.0 → 0.460.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +2 -1
- package/package.json +1 -1
- package/src/config.ts +3 -0
- package/src/deployManager/setupMachineMain.ts +9 -0
- package/src/diagnostics/logs/IndexedLogs/BufferIndex.ts +22 -35
- package/src/diagnostics/logs/IndexedLogs/BufferUnitIndex.ts +39 -47
- package/test.ts +186 -69
package/package.json
CHANGED
package/src/config.ts
CHANGED
|
@@ -164,6 +164,15 @@ async function main() {
|
|
|
164
164
|
// Test command to verify ssh credentials work
|
|
165
165
|
await runPromise(`ssh ${sshRemote} whoami`);
|
|
166
166
|
|
|
167
|
+
// Detect Hetzner rescue system — if we're still in rescue, installimage hasn't been run yet
|
|
168
|
+
let rescueProbe = await runPromise(`ssh ${sshRemote} "hostname; command -v installimage || true"`, { nothrow: true });
|
|
169
|
+
if (/(^|\n)rescue(\s|$)/i.test(rescueProbe) || /installimage/.test(rescueProbe)) {
|
|
170
|
+
console.error(`❌ Remote ${sshRemote} appears to be running the Hetzner rescue system (no OS installed yet).`);
|
|
171
|
+
console.error(` Run \`installimage\` on the remote first to provision the OS, reboot into the installed system, then re-run \`yarn setup-machine ${sshRemote}\`.`);
|
|
172
|
+
console.error(` Detected:\n${rescueProbe.trim()}`);
|
|
173
|
+
process.exit(1);
|
|
174
|
+
}
|
|
175
|
+
|
|
167
176
|
// Setup swap space if not already configured
|
|
168
177
|
console.log("Checking swap configuration...");
|
|
169
178
|
const swapInfo = await runPromise(`ssh ${sshRemote} "free -m | grep Swap"`);
|
|
@@ -300,9 +300,6 @@ export class BufferIndex {
|
|
|
300
300
|
dataReader: Reader;
|
|
301
301
|
params: SearchParams;
|
|
302
302
|
keepIterating: () => boolean;
|
|
303
|
-
// Returns true iff the caller actually retained the value. We use that
|
|
304
|
-
// to drive the per-file matchCount cap below: see the note at the
|
|
305
|
-
// `matchesPattern(buffer)` call for why we can't blindly count emits.
|
|
306
303
|
onResult: (match: Buffer) => boolean;
|
|
307
304
|
results: IndexedLogResults;
|
|
308
305
|
allSearchUnits: Unit[][];
|
|
@@ -331,7 +328,20 @@ export class BufferIndex {
|
|
|
331
328
|
}, `BufferIndex|readLocalBlocks`);
|
|
332
329
|
});
|
|
333
330
|
|
|
334
|
-
|
|
331
|
+
// NOTE: The per-file matchCount cap (commented out below in both the
|
|
332
|
+
// block loop and the inner buffer loop) is intentionally disabled.
|
|
333
|
+
// We tested (see test.ts) and confirmed that blocks within a file
|
|
334
|
+
// aren't time-ordered — the move-to-public pipeline can leave a
|
|
335
|
+
// late-index block holding earlier-time entries than earlier-index
|
|
336
|
+
// blocks, and buffers within a single block aren't time-ordered
|
|
337
|
+
// either. The old `matchCount >= params.limit` short-circuit assumed
|
|
338
|
+
// ordering and was silently dropping blocks whose entries would have
|
|
339
|
+
// survived the top-K trim (broad queries lost their earliest
|
|
340
|
+
// matches). Without the cap we scan every candidate block per file,
|
|
341
|
+
// but the index pre-filter bounds the work and it isn't measurably
|
|
342
|
+
// slower in practice. If blocks/buffers ever get written in
|
|
343
|
+
// guaranteed time order this code can be re-enabled.
|
|
344
|
+
// let matchCount = 0;
|
|
335
345
|
let blockSearchTimeStart = Date.now();
|
|
336
346
|
|
|
337
347
|
results.totalBlockCount += indexEntries.length;
|
|
@@ -344,7 +354,8 @@ export class BufferIndex {
|
|
|
344
354
|
const step = iterateForward ? 1 : -1;
|
|
345
355
|
|
|
346
356
|
for (let i = startIdx; iterateForward ? i < endIdx : i > endIdx; i += step) {
|
|
347
|
-
if (matchCount >= params.limit || !config.keepIterating()) break;
|
|
357
|
+
// if (matchCount >= params.limit || !config.keepIterating()) break;
|
|
358
|
+
if (!config.keepIterating()) break;
|
|
348
359
|
await config.results.limitGroup?.wait();
|
|
349
360
|
const blockIndex = i;
|
|
350
361
|
|
|
@@ -393,38 +404,16 @@ export class BufferIndex {
|
|
|
393
404
|
const bufferStep = iterateForward ? 1 : -1;
|
|
394
405
|
|
|
395
406
|
for (let bufferIndex = bufferStartIdx; iterateForward ? bufferIndex < bufferEndIdx : bufferIndex > bufferEndIdx; bufferIndex += bufferStep) {
|
|
396
|
-
//
|
|
397
|
-
//
|
|
398
|
-
//
|
|
399
|
-
// inside them are not), so stopping mid-block on a match
|
|
400
|
-
// count would drop earlier-time buffers we haven't reached
|
|
401
|
-
// yet. The block-level cap above is the only safe stop;
|
|
402
|
-
// here we only honor cross-file `keepIterating` (which
|
|
403
|
-
// applies to the whole file at once, so it's safe at any
|
|
404
|
-
// granularity).
|
|
407
|
+
// See the note above the outer loop for why the
|
|
408
|
+
// matchCount-based stop is gone.
|
|
409
|
+
// if (matchCount >= params.limit || !config.keepIterating()) break;
|
|
405
410
|
if (!config.keepIterating()) break;
|
|
406
411
|
await config.results.limitGroup?.wait();
|
|
407
412
|
|
|
408
413
|
let buffer = buffers[bufferIndex];
|
|
409
414
|
if (matchesPattern(buffer)) {
|
|
410
|
-
|
|
411
|
-
//
|
|
412
|
-
// can reject for reasons we don't see from here — most
|
|
413
|
-
// notably time-range filtering (an entry whose time is
|
|
414
|
-
// outside the search window matched the text pattern
|
|
415
|
-
// but isn't a real hit). Counting rejected emits would
|
|
416
|
-
// let a stretch of out-of-window matches at the start
|
|
417
|
-
// of a file blow the per-file cap and short-circuit the
|
|
418
|
-
// scan before we reach the in-window region.
|
|
419
|
-
//
|
|
420
|
-
// The cost is that we keep matching and calling onResult
|
|
421
|
-
// through those out-of-window blocks (mild inefficiency).
|
|
422
|
-
// We can't skip ahead — buffers are scanned linearly and
|
|
423
|
-
// we don't know up front which entries the caller will
|
|
424
|
-
// reject — so this is the best we can do here.
|
|
425
|
-
if (config.onResult(buffer)) {
|
|
426
|
-
matchCount++;
|
|
427
|
-
}
|
|
415
|
+
config.onResult(buffer);
|
|
416
|
+
// matchCount++;
|
|
428
417
|
}
|
|
429
418
|
}
|
|
430
419
|
} catch (e: any) {
|
|
@@ -559,9 +548,7 @@ export class BufferIndex {
|
|
|
559
548
|
params: SearchParams;
|
|
560
549
|
|
|
561
550
|
keepIterating: () => boolean;
|
|
562
|
-
//
|
|
563
|
-
// per-file matchCount cap so out-of-window emits don't short-circuit
|
|
564
|
-
// the scan.
|
|
551
|
+
// Return value is unused — see `findLocal.onResult`.
|
|
565
552
|
onResult: (match: Buffer) => boolean;
|
|
566
553
|
results: IndexedLogResults;
|
|
567
554
|
}): Promise<{
|
|
@@ -505,9 +505,6 @@ export class BufferUnitIndex {
|
|
|
505
505
|
params: SearchParams;
|
|
506
506
|
allSearchUnits: Unit[][];
|
|
507
507
|
keepIterating: () => boolean;
|
|
508
|
-
// Returns true iff the caller actually retained the value. Drives the
|
|
509
|
-
// `matchCounts` cap below — see the comment at the `matchesPattern`
|
|
510
|
-
// call for why we can't blindly count emits.
|
|
511
508
|
onResult: (match: Buffer) => boolean;
|
|
512
509
|
index: Buffer;
|
|
513
510
|
reader: Reader;
|
|
@@ -534,26 +531,42 @@ export class BufferUnitIndex {
|
|
|
534
531
|
// Read blocks and search for matches
|
|
535
532
|
let blockSearchTimeStart = Date.now();
|
|
536
533
|
await measureBlock(async () => {
|
|
537
|
-
|
|
538
|
-
|
|
534
|
+
// NOTE: The matchCount / matchCounts tracking and the
|
|
535
|
+
// `stopIterating` cap below are commented out, not deleted.
|
|
536
|
+
// We tested (see test.ts) and confirmed that blocks within a
|
|
537
|
+
// file aren't actually time-ordered — the move-to-public
|
|
538
|
+
// pipeline can leave a late-index block holding earlier-time
|
|
539
|
+
// entries than earlier-index blocks. The old `relevantCount
|
|
540
|
+
// >= params.limit` short-circuit assumed time-ordered blocks
|
|
541
|
+
// and was silently skipping blocks whose entries would have
|
|
542
|
+
// survived the top-K trim (broad queries lost their earliest
|
|
543
|
+
// matches). The same applied to the inner-buffer cap (buffers
|
|
544
|
+
// within a block aren't time-ordered either). Removing both
|
|
545
|
+
// caps means we scan every candidate block per file, but the
|
|
546
|
+
// index pre-filter bounds the work and it isn't measurably
|
|
547
|
+
// slower in practice. If blocks ever get written in
|
|
548
|
+
// guaranteed time order, this code can be re-enabled.
|
|
549
|
+
// let matchCount = 0;
|
|
550
|
+
// let matchCounts = list(blockCount).fill(0);
|
|
539
551
|
|
|
540
552
|
const searchBlock = async (blockIndex: number) => {
|
|
541
553
|
if (!candidateBlocksSet.has(blockIndex)) return;
|
|
542
|
-
// Check if we should stop iterating based on match counts and direction
|
|
543
|
-
let stopIterating = () => {
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
};
|
|
556
|
-
if (stopIterating()) return;
|
|
554
|
+
// // Check if we should stop iterating based on match counts and direction
|
|
555
|
+
// let stopIterating = () => {
|
|
556
|
+
// let relevantCount = 0;
|
|
557
|
+
// if (params.searchFromStart) {
|
|
558
|
+
// for (let i = 0; i <= blockIndex; i++) {
|
|
559
|
+
// relevantCount += matchCounts[i];
|
|
560
|
+
// }
|
|
561
|
+
// } else {
|
|
562
|
+
// for (let i = blockIndex; i < blockCount; i++) {
|
|
563
|
+
// relevantCount += matchCounts[i];
|
|
564
|
+
// }
|
|
565
|
+
// }
|
|
566
|
+
// return relevantCount >= params.limit || !keepIterating();
|
|
567
|
+
// };
|
|
568
|
+
// if (stopIterating()) return;
|
|
569
|
+
if (!keepIterating()) return;
|
|
557
570
|
|
|
558
571
|
let debugOffsets = {
|
|
559
572
|
startOffset: 0,
|
|
@@ -580,38 +593,17 @@ export class BufferUnitIndex {
|
|
|
580
593
|
const step = iterateForward ? 1 : -1;
|
|
581
594
|
|
|
582
595
|
for (let i = startIdx; iterateForward ? i < endIdx : i > endIdx; i += step) {
|
|
583
|
-
//
|
|
584
|
-
//
|
|
585
|
-
//
|
|
586
|
-
// inside them are not), so a mid-block stop on
|
|
587
|
-
// `relevantCount >= limit` would drop earlier-time
|
|
588
|
-
// buffers we haven't reached yet. Block-level
|
|
589
|
-
// `stopIterating` is the safe granularity; here we
|
|
590
|
-
// only honor cross-file `keepIterating`, which applies
|
|
591
|
-
// to the whole file at once.
|
|
596
|
+
// See the note at the top of this function for why
|
|
597
|
+
// the matchCount-based stop is gone (commented out).
|
|
598
|
+
// if (stopIterating()) break;
|
|
592
599
|
if (!keepIterating()) break;
|
|
593
600
|
await results.limitGroup?.wait();
|
|
594
601
|
|
|
595
602
|
const buffer = await this.getBufferFromBlock(blockReader, i);
|
|
596
603
|
if (matchesPattern(buffer)) {
|
|
597
|
-
|
|
598
|
-
//
|
|
599
|
-
//
|
|
600
|
-
// for reasons opaque to us — most notably
|
|
601
|
-
// time-range filtering. Counting rejected emits
|
|
602
|
-
// would let a stretch of out-of-window matches at
|
|
603
|
-
// the start of a file blow the per-file cap and
|
|
604
|
-
// short-circuit the scan before we reach the
|
|
605
|
-
// in-window region. The cost is that we keep
|
|
606
|
-
// matching and calling onResult through those
|
|
607
|
-
// out-of-window blocks (mild inefficiency); we
|
|
608
|
-
// can't skip ahead because buffers are scanned
|
|
609
|
-
// linearly and we don't know up front which
|
|
610
|
-
// entries the caller will reject.
|
|
611
|
-
if (config.onResult(buffer)) {
|
|
612
|
-
matchCount++;
|
|
613
|
-
matchCounts[blockIndex]++;
|
|
614
|
-
}
|
|
604
|
+
config.onResult(buffer);
|
|
605
|
+
// matchCount++;
|
|
606
|
+
// matchCounts[blockIndex]++;
|
|
615
607
|
}
|
|
616
608
|
}
|
|
617
609
|
} catch (e: any) {
|
package/test.ts
CHANGED
|
@@ -1,81 +1,198 @@
|
|
|
1
1
|
import { chdir } from "process";
|
|
2
2
|
chdir("D:/repos/qs-cyoa/");
|
|
3
3
|
|
|
4
|
+
import { isPublic, setIsPublic } from "./src/config";
|
|
5
|
+
setIsPublic(true);
|
|
6
|
+
|
|
4
7
|
import "./inject";
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
}
|
|
8
|
+
import { Querysub } from "./src/4-querysub/Querysub";
|
|
9
|
+
import { getLoggers2Async, LogDatum } from "./src/diagnostics/logs/diskLogger";
|
|
10
|
+
import { IndexedLogs } from "./src/diagnostics/logs/IndexedLogs/IndexedLogs";
|
|
11
|
+
import { SearchParams } from "./src/diagnostics/logs/IndexedLogs/BufferIndexHelpers";
|
|
12
|
+
import { formatDateTimeDetailed, formatNumber, formatTime } from "socket-function/src/formatting/format";
|
|
13
|
+
|
|
14
|
+
// Pulled verbatim from the URLs the user shared.
|
|
15
|
+
const START_TIME = 1779598800000;
|
|
16
|
+
const END_TIME = 1779604200000;
|
|
17
|
+
const LIMIT = 1600;
|
|
18
|
+
const SEARCH_BROAD = `wvupofthbgq & "__threadId":"1f72e0ea774fcc81"`;
|
|
19
|
+
const SEARCH_NARROW = `wvupofthbgq & "__threadId":"1f72e0ea774fcc81" & new`;
|
|
20
|
+
|
|
21
|
+
type Emit = {
|
|
22
|
+
time: number;
|
|
23
|
+
logger: string;
|
|
24
|
+
datum: LogDatum;
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
// Stable identity for cross-query comparison. `time` alone isn't unique (many
|
|
28
|
+
// entries share the same ms), so we fold in threadId + entry text.
|
|
29
|
+
function emitKey(e: Emit): string {
|
|
30
|
+
return `${e.time}|${e.datum.__threadId ?? ""}|${e.datum.__entry ?? ""}|${e.datum.param0 ?? ""}`;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async function runQuery(label: string, searchText: string, limit: number = LIMIT): Promise<Emit[]> {
|
|
34
|
+
console.log(`\n=== ${label}: ${JSON.stringify(searchText)} (limit=${limit}) ===`);
|
|
35
|
+
let loggers = await getLoggers2Async();
|
|
36
|
+
let allLoggers: { name: string; logger: IndexedLogs<LogDatum> }[] = [
|
|
37
|
+
{ name: "info", logger: loggers.infoLogs },
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
let params: SearchParams = {
|
|
41
|
+
startTime: START_TIME,
|
|
42
|
+
endTime: END_TIME,
|
|
43
|
+
limit,
|
|
44
|
+
findBuffer: Buffer.from(searchText, "utf8"),
|
|
45
|
+
searchFromStart: true,
|
|
46
|
+
only: "public",
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
let allEmits: Emit[] = [];
|
|
50
|
+
let queryStart = Date.now();
|
|
51
|
+
|
|
52
|
+
await Promise.all(allLoggers.map(async ({ name, logger }) => {
|
|
53
|
+
let perLoggerEmits: Emit[] = [];
|
|
54
|
+
let loggerStart = Date.now();
|
|
55
|
+
let result = await logger.find({
|
|
56
|
+
params,
|
|
57
|
+
onResult: (match: LogDatum) => {
|
|
58
|
+
perLoggerEmits.push({ time: match.time, logger: name, datum: match });
|
|
59
|
+
},
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
let earliest = perLoggerEmits.length > 0 ? Math.min(...perLoggerEmits.map(e => e.time)) : undefined;
|
|
63
|
+
let latest = perLoggerEmits.length > 0 ? Math.max(...perLoggerEmits.map(e => e.time)) : undefined;
|
|
64
|
+
console.log(
|
|
65
|
+
` [${name}] emits=${perLoggerEmits.length} ` +
|
|
66
|
+
`matchCount=${result.matchCount} ` +
|
|
67
|
+
`blocksChecked=${result.blockCheckedCount}/${result.totalBlockCount} ` +
|
|
68
|
+
`filesScanned=${result.backblazeFilesSearched}/${result.totalBackblazeFiles} ` +
|
|
69
|
+
`earliest=${earliest !== undefined ? formatDateTimeDetailed(earliest) : "—"} ` +
|
|
70
|
+
`latest=${latest !== undefined ? formatDateTimeDetailed(latest) : "—"} ` +
|
|
71
|
+
`time=${formatTime(Date.now() - loggerStart)}`
|
|
72
|
+
);
|
|
73
|
+
allEmits.push(...perLoggerEmits);
|
|
74
|
+
}));
|
|
75
|
+
|
|
76
|
+
console.log(` total emits=${allEmits.length} in ${formatTime(Date.now() - queryStart)}`);
|
|
77
|
+
|
|
78
|
+
// Sort + trim to limit (mirroring the client-side display).
|
|
79
|
+
allEmits.sort((a, b) => a.time - b.time);
|
|
80
|
+
if (allEmits.length > limit) allEmits.length = limit;
|
|
81
|
+
|
|
82
|
+
console.log(
|
|
83
|
+
` kept top-${allEmits.length} ` +
|
|
84
|
+
`earliest=${allEmits.length > 0 ? formatDateTimeDetailed(allEmits[0].time) : "—"} ` +
|
|
85
|
+
`latest=${allEmits.length > 0 ? formatDateTimeDetailed(allEmits[allEmits.length - 1].time) : "—"}`
|
|
86
|
+
);
|
|
87
|
+
return allEmits;
|
|
88
|
+
}
|
|
34
89
|
|
|
35
90
|
async function main() {
|
|
36
91
|
await Querysub.hostService("test");
|
|
37
92
|
|
|
38
|
-
//
|
|
39
|
-
//
|
|
40
|
-
|
|
93
|
+
// Dump every info file in range — declared startTime / endTime — so we
|
|
94
|
+
// can spot files whose declared range disagrees with the entries inside.
|
|
95
|
+
let loggers = await getLoggers2Async();
|
|
96
|
+
let paths = await loggers.infoLogs.getPaths({
|
|
97
|
+
startTime: START_TIME,
|
|
98
|
+
endTime: END_TIME,
|
|
99
|
+
only: "public",
|
|
100
|
+
});
|
|
101
|
+
paths.sort((a, b) => a.startTime - b.startTime);
|
|
102
|
+
console.log(`\n=== INFO FILES IN RANGE (${paths.length}) — declared ranges ===`);
|
|
103
|
+
for (let p of paths) {
|
|
104
|
+
console.log(` start=${formatDateTimeDetailed(p.startTime)} end=${formatDateTimeDetailed(p.endTime)} logCount=${p.logCount ?? "?"} ${p.fullPath}`);
|
|
105
|
+
}
|
|
41
106
|
|
|
42
|
-
|
|
107
|
+
let broad = await runQuery("BROAD", SEARCH_BROAD);
|
|
108
|
+
let narrow = await runQuery("NARROW", SEARCH_NARROW);
|
|
109
|
+
// Sanity: run broad again with a huge limit. If those 4 entries appear
|
|
110
|
+
// here but not in the limit=1600 run, the per-file `stopIterating` cap is
|
|
111
|
+
// skipping the block that contains them. If they're still missing, the
|
|
112
|
+
// bug is upstream (index pre-filter or block scanner missing them).
|
|
113
|
+
let broadHuge = await runQuery("BROAD_HUGE", SEARCH_BROAD, 1_000_000);
|
|
43
114
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
115
|
+
// The diagnostic: narrow ⊂ broad by definition. So every narrow result
|
|
116
|
+
// whose time falls inside broad's kept window MUST appear in broad. If any
|
|
117
|
+
// are missing, the broad scan dropped them — that's the bug.
|
|
118
|
+
if (broad.length === 0 || narrow.length === 0) {
|
|
119
|
+
console.log(`\nSkipping comparison: broad=${broad.length} narrow=${narrow.length}`);
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
let broadKeys = new Set(broad.map(emitKey));
|
|
124
|
+
let broadCutoff = broad[broad.length - 1].time;
|
|
125
|
+
let broadEarliest = broad[0].time;
|
|
126
|
+
|
|
127
|
+
console.log(`\n=== COMPARE ===`);
|
|
128
|
+
console.log(`broad window: [${formatDateTimeDetailed(broadEarliest)}, ${formatDateTimeDetailed(broadCutoff)}]`);
|
|
129
|
+
|
|
130
|
+
let narrowInWindow = narrow.filter(n => n.time <= broadCutoff);
|
|
131
|
+
let missing = narrowInWindow.filter(n => !broadKeys.has(emitKey(n)));
|
|
132
|
+
|
|
133
|
+
console.log(`narrow total: ${narrow.length}`);
|
|
134
|
+
console.log(`narrow within broad window (<= broad cutoff): ${narrowInWindow.length}`);
|
|
135
|
+
console.log(`narrow missing from broad kept top-K: ${missing.length}`);
|
|
136
|
+
|
|
137
|
+
if (missing.length > 0) {
|
|
138
|
+
console.log(`\nFirst ${Math.min(20, missing.length)} missing entries (these prove broad dropped them):`);
|
|
139
|
+
for (let m of missing.slice(0, 20)) {
|
|
140
|
+
console.log(
|
|
141
|
+
` time=${formatDateTimeDetailed(m.time)} ` +
|
|
142
|
+
`logger=${m.logger} ` +
|
|
143
|
+
`entry=${(m.datum.__entry ?? "").slice(0, 80)} ` +
|
|
144
|
+
`param0=${String(m.datum.param0 ?? "").slice(0, 80)}`
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Group missing by logger so we can tell which scan dropped them.
|
|
149
|
+
let byLogger = new Map<string, number>();
|
|
150
|
+
for (let m of missing) byLogger.set(m.logger, (byLogger.get(m.logger) ?? 0) + 1);
|
|
151
|
+
console.log(`\nMissing by logger:`);
|
|
152
|
+
for (let [k, v] of byLogger) console.log(` ${k}: ${formatNumber(v)}`);
|
|
153
|
+
} else {
|
|
154
|
+
console.log(`\nNo missing entries — broad correctly contains all narrow results in its window.`);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// For each missing entry, identify the file whose declared range *should*
|
|
158
|
+
// cover its time, and find the file whose declared range *doesn't* cover
|
|
159
|
+
// it (the bug indicator).
|
|
160
|
+
if (missing.length > 0) {
|
|
161
|
+
let missingTimes = missing.map(m => m.time);
|
|
162
|
+
let minMissing = Math.min(...missingTimes);
|
|
163
|
+
let maxMissing = Math.max(...missingTimes);
|
|
164
|
+
console.log(`\nMissing entry times span: [${formatDateTimeDetailed(minMissing)}, ${formatDateTimeDetailed(maxMissing)}]`);
|
|
165
|
+
console.log(`Files whose declared range overlaps [${formatDateTimeDetailed(minMissing)}, ${formatDateTimeDetailed(maxMissing)}]:`);
|
|
166
|
+
for (let p of paths) {
|
|
167
|
+
if (p.endTime < minMissing || p.startTime > maxMissing) continue;
|
|
168
|
+
console.log(` start=${formatDateTimeDetailed(p.startTime)} end=${formatDateTimeDetailed(p.endTime)} ${p.fullPath}`);
|
|
169
|
+
}
|
|
170
|
+
// Also: any file whose declared startTime > broad cutoff (and so would
|
|
171
|
+
// be isSourceRelevant-pruned) — these are candidates for the bug.
|
|
172
|
+
console.log(`Files whose declared startTime > broad cutoff (${formatDateTimeDetailed(broadCutoff)}) — these would be pruned by isSourceRelevant once broad fills:`);
|
|
173
|
+
for (let p of paths) {
|
|
174
|
+
if (p.startTime > broadCutoff) {
|
|
175
|
+
let overlap = p.startTime <= maxMissing && p.endTime >= minMissing ? " <- OVERLAPS MISSING" : "";
|
|
176
|
+
console.log(` start=${formatDateTimeDetailed(p.startTime)} end=${formatDateTimeDetailed(p.endTime)} ${p.fullPath}${overlap}`);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Check whether broad-with-huge-limit catches the missing entries.
|
|
182
|
+
let broadHugeKeys = new Set(broadHuge.map(emitKey));
|
|
183
|
+
let stillMissingFromHuge = narrow.filter(n => !broadHugeKeys.has(emitKey(n)));
|
|
184
|
+
console.log(`\n=== BROAD_HUGE check ===`);
|
|
185
|
+
console.log(`narrow missing from broad_huge: ${stillMissingFromHuge.length}`);
|
|
186
|
+
if (stillMissingFromHuge.length > 0) {
|
|
187
|
+
console.log(`First ${Math.min(20, stillMissingFromHuge.length)} still-missing:`);
|
|
188
|
+
for (let m of stillMissingFromHuge.slice(0, 20)) {
|
|
189
|
+
console.log(` time=${formatDateTimeDetailed(m.time)} param0=${String(m.datum.param0 ?? "").slice(0, 60)}`);
|
|
190
|
+
}
|
|
191
|
+
console.log(`-> bug is NOT (only) the per-file cap; the scanner / index pre-filter is dropping entries even without the cap.`);
|
|
192
|
+
} else {
|
|
193
|
+
console.log(`-> all narrow entries are in broad_huge; the per-file stopIterating cap is the culprit for the limit=1600 miss.`);
|
|
194
|
+
}
|
|
77
195
|
}
|
|
78
196
|
|
|
79
|
-
main().catch(console.error)
|
|
80
|
-
.finally(() => process.exit(0))
|
|
81
|
-
;
|
|
197
|
+
main().catch(e => console.error((e as Error).stack ?? e))
|
|
198
|
+
.finally(() => process.exit(0));
|