querysub 0.458.0 → 0.460.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +2 -1
- package/package.json +1 -1
- package/src/config.ts +3 -0
- package/src/deployManager/setupMachineMain.ts +9 -0
- package/src/diagnostics/grossStats/GrossStatsPage.tsx +57 -19
- package/src/diagnostics/logs/IndexedLogs/BufferIndex.ts +23 -27
- package/src/diagnostics/logs/IndexedLogs/BufferUnitIndex.ts +40 -39
- package/test.ts +186 -69
package/package.json
CHANGED
package/src/config.ts
CHANGED
|
@@ -164,6 +164,15 @@ async function main() {
|
|
|
164
164
|
// Test command to verify ssh credentials work
|
|
165
165
|
await runPromise(`ssh ${sshRemote} whoami`);
|
|
166
166
|
|
|
167
|
+
// Detect Hetzner rescue system — if we're still in rescue, installimage hasn't been run yet
|
|
168
|
+
let rescueProbe = await runPromise(`ssh ${sshRemote} "hostname; command -v installimage || true"`, { nothrow: true });
|
|
169
|
+
if (/(^|\n)rescue(\s|$)/i.test(rescueProbe) || /installimage/.test(rescueProbe)) {
|
|
170
|
+
console.error(`❌ Remote ${sshRemote} appears to be running the Hetzner rescue system (no OS installed yet).`);
|
|
171
|
+
console.error(` Run \`installimage\` on the remote first to provision the OS, reboot into the installed system, then re-run \`yarn setup-machine ${sshRemote}\`.`);
|
|
172
|
+
console.error(` Detected:\n${rescueProbe.trim()}`);
|
|
173
|
+
process.exit(1);
|
|
174
|
+
}
|
|
175
|
+
|
|
167
176
|
// Setup swap space if not already configured
|
|
168
177
|
console.log("Checking swap configuration...");
|
|
169
178
|
const swapInfo = await runPromise(`ssh ${sshRemote} "free -m | grep Swap"`);
|
|
@@ -69,7 +69,7 @@ const renderChartPNG = cacheArgsEqual((
|
|
|
69
69
|
height: number,
|
|
70
70
|
nodeIds: readonly string[],
|
|
71
71
|
bucketsArrays: readonly GrossStatsBucket[][],
|
|
72
|
-
): string => {
|
|
72
|
+
): { pngUrl: string; maxTotal: number } => {
|
|
73
73
|
let canvas = document.createElement("canvas");
|
|
74
74
|
canvas.width = width;
|
|
75
75
|
canvas.height = height;
|
|
@@ -115,7 +115,7 @@ const renderChartPNG = cacheArgsEqual((
|
|
|
115
115
|
}
|
|
116
116
|
}
|
|
117
117
|
|
|
118
|
-
return canvas.toDataURL();
|
|
118
|
+
return { pngUrl: canvas.toDataURL(), maxTotal };
|
|
119
119
|
}, 5);
|
|
120
120
|
|
|
121
121
|
export class GrossStatsPage extends qreact.Component {
|
|
@@ -136,23 +136,31 @@ export class GrossStatsPage extends qreact.Component {
|
|
|
136
136
|
<div className={css.hbox(6)}>
|
|
137
137
|
<span>Range:</span>
|
|
138
138
|
{TIME_RANGES.map(r =>
|
|
139
|
-
<
|
|
140
|
-
className={css.
|
|
139
|
+
<button
|
|
140
|
+
className={css.pad2(8, 4) + (state.rangeMs === r.ms ? " " + css.hsl(210, 70, 60).hslcolor(0, 0, 100).bold : "")}
|
|
141
141
|
onClick={() => Querysub.commit(() => { state.rangeMs = r.ms; })}
|
|
142
|
-
>{r.label}</
|
|
142
|
+
>{r.label}</button>
|
|
143
143
|
)}
|
|
144
144
|
</div>
|
|
145
145
|
<div className={css.hbox(6).wrap}>
|
|
146
146
|
<span>Field:</span>
|
|
147
147
|
{GROSS_STATS_FIELDS.map(f =>
|
|
148
|
-
<
|
|
149
|
-
className={css.
|
|
148
|
+
<button
|
|
149
|
+
className={css.pad2(8, 4) + (state.selectedField === f ? " " + css.hsl(140, 60, 50).hslcolor(0, 0, 100).bold : "")}
|
|
150
150
|
onClick={() => Querysub.commit(() => { state.selectedField = f; })}
|
|
151
|
-
>{f}</
|
|
151
|
+
>{f}</button>
|
|
152
152
|
)}
|
|
153
153
|
</div>
|
|
154
154
|
<div className={css.hbox(8).wrap}>
|
|
155
155
|
<span>Servers:</span>
|
|
156
|
+
<button
|
|
157
|
+
className={css.pad2(8, 4)}
|
|
158
|
+
onClick={() => Querysub.commit(() => { state.excludedNodes = new Set(); })}
|
|
159
|
+
>Select all</button>
|
|
160
|
+
<button
|
|
161
|
+
className={css.pad2(8, 4)}
|
|
162
|
+
onClick={() => Querysub.commit(() => { state.excludedNodes = new Set(nodeIds); })}
|
|
163
|
+
>Select none</button>
|
|
156
164
|
{nodeIds.map((nodeId, i) =>
|
|
157
165
|
<label className={css.hbox(4)}>
|
|
158
166
|
<input
|
|
@@ -198,6 +206,15 @@ export class GrossStatsPage extends qreact.Component {
|
|
|
198
206
|
perNodeTotals.push(totals);
|
|
199
207
|
}
|
|
200
208
|
|
|
209
|
+
let maxPerField = {} as Record<GrossStatsField, number>;
|
|
210
|
+
for (let f of GROSS_STATS_FIELDS) {
|
|
211
|
+
maxPerField[f] = 0;
|
|
212
|
+
for (let totals of perNodeTotals) {
|
|
213
|
+
if (totals[f] > maxPerField[f]) maxPerField[f] = totals[f];
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
let highlightStyle = css.hsl(60, 90, 75);
|
|
217
|
+
|
|
201
218
|
return <table className={css.fillWidth}>
|
|
202
219
|
<thead>
|
|
203
220
|
<tr>
|
|
@@ -215,9 +232,12 @@ export class GrossStatsPage extends qreact.Component {
|
|
|
215
232
|
<td className={css.pad2(4)} style={{ borderLeft: `4px solid ${colorForNode(i)}` }}>
|
|
216
233
|
{shortNodeId(nodeId)}
|
|
217
234
|
</td>
|
|
218
|
-
{GROSS_STATS_FIELDS.map(f =>
|
|
219
|
-
|
|
220
|
-
|
|
235
|
+
{GROSS_STATS_FIELDS.map(f => {
|
|
236
|
+
let isMax = perNodeTotals[i][f] > 0 && perNodeTotals[i][f] === maxPerField[f];
|
|
237
|
+
return <td className={css.textAlign("right").pad2(4) + (isMax ? " " + highlightStyle : "")}>
|
|
238
|
+
{formatNumber(perNodeTotals[i][f])}
|
|
239
|
+
</td>;
|
|
240
|
+
})}
|
|
221
241
|
</tr>
|
|
222
242
|
)}
|
|
223
243
|
</tbody>
|
|
@@ -234,7 +254,7 @@ export class GrossStatsPage extends qreact.Component {
|
|
|
234
254
|
let bucketsArrays: GrossStatsBucket[][] = selectedNodeIds.map(n => bucketsByNode.get(n) ?? []);
|
|
235
255
|
let anyLoading = !result;
|
|
236
256
|
|
|
237
|
-
let
|
|
257
|
+
let chart = renderChartPNG(
|
|
238
258
|
state.selectedField,
|
|
239
259
|
state.rangeMs,
|
|
240
260
|
CHART_WIDTH,
|
|
@@ -242,18 +262,36 @@ export class GrossStatsPage extends qreact.Component {
|
|
|
242
262
|
selectedNodeIds,
|
|
243
263
|
bucketsArrays,
|
|
244
264
|
);
|
|
265
|
+
let now = Date.now();
|
|
266
|
+
let windowStart = now - state.rangeMs;
|
|
245
267
|
|
|
246
268
|
return <div className={css.vbox(8).pad2(8).fillWidth}>
|
|
247
269
|
<h2>Cluster Stats</h2>
|
|
248
270
|
{this.renderControls(allNodeIds)}
|
|
249
|
-
<
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
271
|
+
<div className={css.vbox(4)}>
|
|
272
|
+
<div className={css.hbox(8).bold}>
|
|
273
|
+
<span>peak: {formatNumber(chart.maxTotal)} {state.selectedField} / minute</span>
|
|
274
|
+
</div>
|
|
275
|
+
<img
|
|
276
|
+
src={chart.pngUrl}
|
|
277
|
+
width={CHART_WIDTH}
|
|
278
|
+
height={CHART_HEIGHT}
|
|
279
|
+
className={css.hsl(0, 0, 100)}
|
|
280
|
+
style={anyLoading ? { opacity: 0.5 } : undefined}
|
|
281
|
+
/>
|
|
282
|
+
<div className={css.hbox(0).fillWidth}>
|
|
283
|
+
<span className={css.flexShrink0}>{formatLocalTime(windowStart)}</span>
|
|
284
|
+
<span className={css.fillBoth}></span>
|
|
285
|
+
<span className={css.flexShrink0}>{formatLocalTime(now)}</span>
|
|
286
|
+
</div>
|
|
287
|
+
</div>
|
|
256
288
|
{this.renderTable(selectedNodeIds, bucketsArrays)}
|
|
257
289
|
</div>;
|
|
258
290
|
}
|
|
259
291
|
}
|
|
292
|
+
|
|
293
|
+
function formatLocalTime(ms: number): string {
|
|
294
|
+
let d = new Date(ms);
|
|
295
|
+
let pad = (n: number) => String(n).padStart(2, "0");
|
|
296
|
+
return `${d.getFullYear()}-${pad(d.getMonth() + 1)}-${pad(d.getDate())} ${pad(d.getHours())}:${pad(d.getMinutes())}`;
|
|
297
|
+
}
|
|
@@ -300,9 +300,6 @@ export class BufferIndex {
|
|
|
300
300
|
dataReader: Reader;
|
|
301
301
|
params: SearchParams;
|
|
302
302
|
keepIterating: () => boolean;
|
|
303
|
-
// Returns true iff the caller actually retained the value. We use that
|
|
304
|
-
// to drive the per-file matchCount cap below: see the note at the
|
|
305
|
-
// `matchesPattern(buffer)` call for why we can't blindly count emits.
|
|
306
303
|
onResult: (match: Buffer) => boolean;
|
|
307
304
|
results: IndexedLogResults;
|
|
308
305
|
allSearchUnits: Unit[][];
|
|
@@ -331,7 +328,20 @@ export class BufferIndex {
|
|
|
331
328
|
}, `BufferIndex|readLocalBlocks`);
|
|
332
329
|
});
|
|
333
330
|
|
|
334
|
-
|
|
331
|
+
// NOTE: The per-file matchCount cap (commented out below in both the
|
|
332
|
+
// block loop and the inner buffer loop) is intentionally disabled.
|
|
333
|
+
// We tested (see test.ts) and confirmed that blocks within a file
|
|
334
|
+
// aren't time-ordered — the move-to-public pipeline can leave a
|
|
335
|
+
// late-index block holding earlier-time entries than earlier-index
|
|
336
|
+
// blocks, and buffers within a single block aren't time-ordered
|
|
337
|
+
// either. The old `matchCount >= params.limit` short-circuit assumed
|
|
338
|
+
// ordering and was silently dropping blocks whose entries would have
|
|
339
|
+
// survived the top-K trim (broad queries lost their earliest
|
|
340
|
+
// matches). Without the cap we scan every candidate block per file,
|
|
341
|
+
// but the index pre-filter bounds the work and it isn't measurably
|
|
342
|
+
// slower in practice. If blocks/buffers ever get written in
|
|
343
|
+
// guaranteed time order this code can be re-enabled.
|
|
344
|
+
// let matchCount = 0;
|
|
335
345
|
let blockSearchTimeStart = Date.now();
|
|
336
346
|
|
|
337
347
|
results.totalBlockCount += indexEntries.length;
|
|
@@ -344,7 +354,8 @@ export class BufferIndex {
|
|
|
344
354
|
const step = iterateForward ? 1 : -1;
|
|
345
355
|
|
|
346
356
|
for (let i = startIdx; iterateForward ? i < endIdx : i > endIdx; i += step) {
|
|
347
|
-
if (matchCount >= params.limit || !config.keepIterating()) break;
|
|
357
|
+
// if (matchCount >= params.limit || !config.keepIterating()) break;
|
|
358
|
+
if (!config.keepIterating()) break;
|
|
348
359
|
await config.results.limitGroup?.wait();
|
|
349
360
|
const blockIndex = i;
|
|
350
361
|
|
|
@@ -393,29 +404,16 @@ export class BufferIndex {
|
|
|
393
404
|
const bufferStep = iterateForward ? 1 : -1;
|
|
394
405
|
|
|
395
406
|
for (let bufferIndex = bufferStartIdx; iterateForward ? bufferIndex < bufferEndIdx : bufferIndex > bufferEndIdx; bufferIndex += bufferStep) {
|
|
396
|
-
|
|
407
|
+
// See the note above the outer loop for why the
|
|
408
|
+
// matchCount-based stop is gone.
|
|
409
|
+
// if (matchCount >= params.limit || !config.keepIterating()) break;
|
|
410
|
+
if (!config.keepIterating()) break;
|
|
397
411
|
await config.results.limitGroup?.wait();
|
|
398
412
|
|
|
399
413
|
let buffer = buffers[bufferIndex];
|
|
400
414
|
if (matchesPattern(buffer)) {
|
|
401
|
-
|
|
402
|
-
//
|
|
403
|
-
// can reject for reasons we don't see from here — most
|
|
404
|
-
// notably time-range filtering (an entry whose time is
|
|
405
|
-
// outside the search window matched the text pattern
|
|
406
|
-
// but isn't a real hit). Counting rejected emits would
|
|
407
|
-
// let a stretch of out-of-window matches at the start
|
|
408
|
-
// of a file blow the per-file cap and short-circuit the
|
|
409
|
-
// scan before we reach the in-window region.
|
|
410
|
-
//
|
|
411
|
-
// The cost is that we keep matching and calling onResult
|
|
412
|
-
// through those out-of-window blocks (mild inefficiency).
|
|
413
|
-
// We can't skip ahead — buffers are scanned linearly and
|
|
414
|
-
// we don't know up front which entries the caller will
|
|
415
|
-
// reject — so this is the best we can do here.
|
|
416
|
-
if (config.onResult(buffer)) {
|
|
417
|
-
matchCount++;
|
|
418
|
-
}
|
|
415
|
+
config.onResult(buffer);
|
|
416
|
+
// matchCount++;
|
|
419
417
|
}
|
|
420
418
|
}
|
|
421
419
|
} catch (e: any) {
|
|
@@ -550,9 +548,7 @@ export class BufferIndex {
|
|
|
550
548
|
params: SearchParams;
|
|
551
549
|
|
|
552
550
|
keepIterating: () => boolean;
|
|
553
|
-
//
|
|
554
|
-
// per-file matchCount cap so out-of-window emits don't short-circuit
|
|
555
|
-
// the scan.
|
|
551
|
+
// Return value is unused — see `findLocal.onResult`.
|
|
556
552
|
onResult: (match: Buffer) => boolean;
|
|
557
553
|
results: IndexedLogResults;
|
|
558
554
|
}): Promise<{
|
|
@@ -505,9 +505,6 @@ export class BufferUnitIndex {
|
|
|
505
505
|
params: SearchParams;
|
|
506
506
|
allSearchUnits: Unit[][];
|
|
507
507
|
keepIterating: () => boolean;
|
|
508
|
-
// Returns true iff the caller actually retained the value. Drives the
|
|
509
|
-
// `matchCounts` cap below — see the comment at the `matchesPattern`
|
|
510
|
-
// call for why we can't blindly count emits.
|
|
511
508
|
onResult: (match: Buffer) => boolean;
|
|
512
509
|
index: Buffer;
|
|
513
510
|
reader: Reader;
|
|
@@ -534,26 +531,42 @@ export class BufferUnitIndex {
|
|
|
534
531
|
// Read blocks and search for matches
|
|
535
532
|
let blockSearchTimeStart = Date.now();
|
|
536
533
|
await measureBlock(async () => {
|
|
537
|
-
|
|
538
|
-
|
|
534
|
+
// NOTE: The matchCount / matchCounts tracking and the
|
|
535
|
+
// `stopIterating` cap below are commented out, not deleted.
|
|
536
|
+
// We tested (see test.ts) and confirmed that blocks within a
|
|
537
|
+
// file aren't actually time-ordered — the move-to-public
|
|
538
|
+
// pipeline can leave a late-index block holding earlier-time
|
|
539
|
+
// entries than earlier-index blocks. The old `relevantCount
|
|
540
|
+
// >= params.limit` short-circuit assumed time-ordered blocks
|
|
541
|
+
// and was silently skipping blocks whose entries would have
|
|
542
|
+
// survived the top-K trim (broad queries lost their earliest
|
|
543
|
+
// matches). The same applied to the inner-buffer cap (buffers
|
|
544
|
+
// within a block aren't time-ordered either). Removing both
|
|
545
|
+
// caps means we scan every candidate block per file, but the
|
|
546
|
+
// index pre-filter bounds the work and it isn't measurably
|
|
547
|
+
// slower in practice. If blocks ever get written in
|
|
548
|
+
// guaranteed time order, this code can be re-enabled.
|
|
549
|
+
// let matchCount = 0;
|
|
550
|
+
// let matchCounts = list(blockCount).fill(0);
|
|
539
551
|
|
|
540
552
|
const searchBlock = async (blockIndex: number) => {
|
|
541
553
|
if (!candidateBlocksSet.has(blockIndex)) return;
|
|
542
|
-
// Check if we should stop iterating based on match counts and direction
|
|
543
|
-
let stopIterating = () => {
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
};
|
|
556
|
-
if (stopIterating()) return;
|
|
554
|
+
// // Check if we should stop iterating based on match counts and direction
|
|
555
|
+
// let stopIterating = () => {
|
|
556
|
+
// let relevantCount = 0;
|
|
557
|
+
// if (params.searchFromStart) {
|
|
558
|
+
// for (let i = 0; i <= blockIndex; i++) {
|
|
559
|
+
// relevantCount += matchCounts[i];
|
|
560
|
+
// }
|
|
561
|
+
// } else {
|
|
562
|
+
// for (let i = blockIndex; i < blockCount; i++) {
|
|
563
|
+
// relevantCount += matchCounts[i];
|
|
564
|
+
// }
|
|
565
|
+
// }
|
|
566
|
+
// return relevantCount >= params.limit || !keepIterating();
|
|
567
|
+
// };
|
|
568
|
+
// if (stopIterating()) return;
|
|
569
|
+
if (!keepIterating()) return;
|
|
557
570
|
|
|
558
571
|
let debugOffsets = {
|
|
559
572
|
startOffset: 0,
|
|
@@ -580,29 +593,17 @@ export class BufferUnitIndex {
|
|
|
580
593
|
const step = iterateForward ? 1 : -1;
|
|
581
594
|
|
|
582
595
|
for (let i = startIdx; iterateForward ? i < endIdx : i > endIdx; i += step) {
|
|
583
|
-
|
|
596
|
+
// See the note at the top of this function for why
|
|
597
|
+
// the matchCount-based stop is gone (commented out).
|
|
598
|
+
// if (stopIterating()) break;
|
|
599
|
+
if (!keepIterating()) break;
|
|
584
600
|
await results.limitGroup?.wait();
|
|
585
601
|
|
|
586
602
|
const buffer = await this.getBufferFromBlock(blockReader, i);
|
|
587
603
|
if (matchesPattern(buffer)) {
|
|
588
|
-
|
|
589
|
-
//
|
|
590
|
-
//
|
|
591
|
-
// for reasons opaque to us — most notably
|
|
592
|
-
// time-range filtering. Counting rejected emits
|
|
593
|
-
// would let a stretch of out-of-window matches at
|
|
594
|
-
// the start of a file blow the per-file cap and
|
|
595
|
-
// short-circuit the scan before we reach the
|
|
596
|
-
// in-window region. The cost is that we keep
|
|
597
|
-
// matching and calling onResult through those
|
|
598
|
-
// out-of-window blocks (mild inefficiency); we
|
|
599
|
-
// can't skip ahead because buffers are scanned
|
|
600
|
-
// linearly and we don't know up front which
|
|
601
|
-
// entries the caller will reject.
|
|
602
|
-
if (config.onResult(buffer)) {
|
|
603
|
-
matchCount++;
|
|
604
|
-
matchCounts[blockIndex]++;
|
|
605
|
-
}
|
|
604
|
+
config.onResult(buffer);
|
|
605
|
+
// matchCount++;
|
|
606
|
+
// matchCounts[blockIndex]++;
|
|
606
607
|
}
|
|
607
608
|
}
|
|
608
609
|
} catch (e: any) {
|
package/test.ts
CHANGED
|
@@ -1,81 +1,198 @@
|
|
|
1
1
|
import { chdir } from "process";
|
|
2
2
|
chdir("D:/repos/qs-cyoa/");
|
|
3
3
|
|
|
4
|
+
import { isPublic, setIsPublic } from "./src/config";
|
|
5
|
+
setIsPublic(true);
|
|
6
|
+
|
|
4
7
|
import "./inject";
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
}
|
|
8
|
+
import { Querysub } from "./src/4-querysub/Querysub";
|
|
9
|
+
import { getLoggers2Async, LogDatum } from "./src/diagnostics/logs/diskLogger";
|
|
10
|
+
import { IndexedLogs } from "./src/diagnostics/logs/IndexedLogs/IndexedLogs";
|
|
11
|
+
import { SearchParams } from "./src/diagnostics/logs/IndexedLogs/BufferIndexHelpers";
|
|
12
|
+
import { formatDateTimeDetailed, formatNumber, formatTime } from "socket-function/src/formatting/format";
|
|
13
|
+
|
|
14
|
+
// Pulled verbatim from the URLs the user shared.
|
|
15
|
+
const START_TIME = 1779598800000;
|
|
16
|
+
const END_TIME = 1779604200000;
|
|
17
|
+
const LIMIT = 1600;
|
|
18
|
+
const SEARCH_BROAD = `wvupofthbgq & "__threadId":"1f72e0ea774fcc81"`;
|
|
19
|
+
const SEARCH_NARROW = `wvupofthbgq & "__threadId":"1f72e0ea774fcc81" & new`;
|
|
20
|
+
|
|
21
|
+
type Emit = {
|
|
22
|
+
time: number;
|
|
23
|
+
logger: string;
|
|
24
|
+
datum: LogDatum;
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
// Stable identity for cross-query comparison. `time` alone isn't unique (many
|
|
28
|
+
// entries share the same ms), so we fold in threadId + entry text.
|
|
29
|
+
function emitKey(e: Emit): string {
|
|
30
|
+
return `${e.time}|${e.datum.__threadId ?? ""}|${e.datum.__entry ?? ""}|${e.datum.param0 ?? ""}`;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async function runQuery(label: string, searchText: string, limit: number = LIMIT): Promise<Emit[]> {
|
|
34
|
+
console.log(`\n=== ${label}: ${JSON.stringify(searchText)} (limit=${limit}) ===`);
|
|
35
|
+
let loggers = await getLoggers2Async();
|
|
36
|
+
let allLoggers: { name: string; logger: IndexedLogs<LogDatum> }[] = [
|
|
37
|
+
{ name: "info", logger: loggers.infoLogs },
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
let params: SearchParams = {
|
|
41
|
+
startTime: START_TIME,
|
|
42
|
+
endTime: END_TIME,
|
|
43
|
+
limit,
|
|
44
|
+
findBuffer: Buffer.from(searchText, "utf8"),
|
|
45
|
+
searchFromStart: true,
|
|
46
|
+
only: "public",
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
let allEmits: Emit[] = [];
|
|
50
|
+
let queryStart = Date.now();
|
|
51
|
+
|
|
52
|
+
await Promise.all(allLoggers.map(async ({ name, logger }) => {
|
|
53
|
+
let perLoggerEmits: Emit[] = [];
|
|
54
|
+
let loggerStart = Date.now();
|
|
55
|
+
let result = await logger.find({
|
|
56
|
+
params,
|
|
57
|
+
onResult: (match: LogDatum) => {
|
|
58
|
+
perLoggerEmits.push({ time: match.time, logger: name, datum: match });
|
|
59
|
+
},
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
let earliest = perLoggerEmits.length > 0 ? Math.min(...perLoggerEmits.map(e => e.time)) : undefined;
|
|
63
|
+
let latest = perLoggerEmits.length > 0 ? Math.max(...perLoggerEmits.map(e => e.time)) : undefined;
|
|
64
|
+
console.log(
|
|
65
|
+
` [${name}] emits=${perLoggerEmits.length} ` +
|
|
66
|
+
`matchCount=${result.matchCount} ` +
|
|
67
|
+
`blocksChecked=${result.blockCheckedCount}/${result.totalBlockCount} ` +
|
|
68
|
+
`filesScanned=${result.backblazeFilesSearched}/${result.totalBackblazeFiles} ` +
|
|
69
|
+
`earliest=${earliest !== undefined ? formatDateTimeDetailed(earliest) : "—"} ` +
|
|
70
|
+
`latest=${latest !== undefined ? formatDateTimeDetailed(latest) : "—"} ` +
|
|
71
|
+
`time=${formatTime(Date.now() - loggerStart)}`
|
|
72
|
+
);
|
|
73
|
+
allEmits.push(...perLoggerEmits);
|
|
74
|
+
}));
|
|
75
|
+
|
|
76
|
+
console.log(` total emits=${allEmits.length} in ${formatTime(Date.now() - queryStart)}`);
|
|
77
|
+
|
|
78
|
+
// Sort + trim to limit (mirroring the client-side display).
|
|
79
|
+
allEmits.sort((a, b) => a.time - b.time);
|
|
80
|
+
if (allEmits.length > limit) allEmits.length = limit;
|
|
81
|
+
|
|
82
|
+
console.log(
|
|
83
|
+
` kept top-${allEmits.length} ` +
|
|
84
|
+
`earliest=${allEmits.length > 0 ? formatDateTimeDetailed(allEmits[0].time) : "—"} ` +
|
|
85
|
+
`latest=${allEmits.length > 0 ? formatDateTimeDetailed(allEmits[allEmits.length - 1].time) : "—"}`
|
|
86
|
+
);
|
|
87
|
+
return allEmits;
|
|
88
|
+
}
|
|
34
89
|
|
|
35
90
|
async function main() {
|
|
36
91
|
await Querysub.hostService("test");
|
|
37
92
|
|
|
38
|
-
//
|
|
39
|
-
//
|
|
40
|
-
|
|
93
|
+
// Dump every info file in range — declared startTime / endTime — so we
|
|
94
|
+
// can spot files whose declared range disagrees with the entries inside.
|
|
95
|
+
let loggers = await getLoggers2Async();
|
|
96
|
+
let paths = await loggers.infoLogs.getPaths({
|
|
97
|
+
startTime: START_TIME,
|
|
98
|
+
endTime: END_TIME,
|
|
99
|
+
only: "public",
|
|
100
|
+
});
|
|
101
|
+
paths.sort((a, b) => a.startTime - b.startTime);
|
|
102
|
+
console.log(`\n=== INFO FILES IN RANGE (${paths.length}) — declared ranges ===`);
|
|
103
|
+
for (let p of paths) {
|
|
104
|
+
console.log(` start=${formatDateTimeDetailed(p.startTime)} end=${formatDateTimeDetailed(p.endTime)} logCount=${p.logCount ?? "?"} ${p.fullPath}`);
|
|
105
|
+
}
|
|
41
106
|
|
|
42
|
-
|
|
107
|
+
let broad = await runQuery("BROAD", SEARCH_BROAD);
|
|
108
|
+
let narrow = await runQuery("NARROW", SEARCH_NARROW);
|
|
109
|
+
// Sanity: run broad again with a huge limit. If those 4 entries appear
|
|
110
|
+
// here but not in the limit=1600 run, the per-file `stopIterating` cap is
|
|
111
|
+
// skipping the block that contains them. If they're still missing, the
|
|
112
|
+
// bug is upstream (index pre-filter or block scanner missing them).
|
|
113
|
+
let broadHuge = await runQuery("BROAD_HUGE", SEARCH_BROAD, 1_000_000);
|
|
43
114
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
115
|
+
// The diagnostic: narrow ⊂ broad by definition. So every narrow result
|
|
116
|
+
// whose time falls inside broad's kept window MUST appear in broad. If any
|
|
117
|
+
// are missing, the broad scan dropped them — that's the bug.
|
|
118
|
+
if (broad.length === 0 || narrow.length === 0) {
|
|
119
|
+
console.log(`\nSkipping comparison: broad=${broad.length} narrow=${narrow.length}`);
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
let broadKeys = new Set(broad.map(emitKey));
|
|
124
|
+
let broadCutoff = broad[broad.length - 1].time;
|
|
125
|
+
let broadEarliest = broad[0].time;
|
|
126
|
+
|
|
127
|
+
console.log(`\n=== COMPARE ===`);
|
|
128
|
+
console.log(`broad window: [${formatDateTimeDetailed(broadEarliest)}, ${formatDateTimeDetailed(broadCutoff)}]`);
|
|
129
|
+
|
|
130
|
+
let narrowInWindow = narrow.filter(n => n.time <= broadCutoff);
|
|
131
|
+
let missing = narrowInWindow.filter(n => !broadKeys.has(emitKey(n)));
|
|
132
|
+
|
|
133
|
+
console.log(`narrow total: ${narrow.length}`);
|
|
134
|
+
console.log(`narrow within broad window (<= broad cutoff): ${narrowInWindow.length}`);
|
|
135
|
+
console.log(`narrow missing from broad kept top-K: ${missing.length}`);
|
|
136
|
+
|
|
137
|
+
if (missing.length > 0) {
|
|
138
|
+
console.log(`\nFirst ${Math.min(20, missing.length)} missing entries (these prove broad dropped them):`);
|
|
139
|
+
for (let m of missing.slice(0, 20)) {
|
|
140
|
+
console.log(
|
|
141
|
+
` time=${formatDateTimeDetailed(m.time)} ` +
|
|
142
|
+
`logger=${m.logger} ` +
|
|
143
|
+
`entry=${(m.datum.__entry ?? "").slice(0, 80)} ` +
|
|
144
|
+
`param0=${String(m.datum.param0 ?? "").slice(0, 80)}`
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Group missing by logger so we can tell which scan dropped them.
|
|
149
|
+
let byLogger = new Map<string, number>();
|
|
150
|
+
for (let m of missing) byLogger.set(m.logger, (byLogger.get(m.logger) ?? 0) + 1);
|
|
151
|
+
console.log(`\nMissing by logger:`);
|
|
152
|
+
for (let [k, v] of byLogger) console.log(` ${k}: ${formatNumber(v)}`);
|
|
153
|
+
} else {
|
|
154
|
+
console.log(`\nNo missing entries — broad correctly contains all narrow results in its window.`);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// For each missing entry, identify the file whose declared range *should*
|
|
158
|
+
// cover its time, and find the file whose declared range *doesn't* cover
|
|
159
|
+
// it (the bug indicator).
|
|
160
|
+
if (missing.length > 0) {
|
|
161
|
+
let missingTimes = missing.map(m => m.time);
|
|
162
|
+
let minMissing = Math.min(...missingTimes);
|
|
163
|
+
let maxMissing = Math.max(...missingTimes);
|
|
164
|
+
console.log(`\nMissing entry times span: [${formatDateTimeDetailed(minMissing)}, ${formatDateTimeDetailed(maxMissing)}]`);
|
|
165
|
+
console.log(`Files whose declared range overlaps [${formatDateTimeDetailed(minMissing)}, ${formatDateTimeDetailed(maxMissing)}]:`);
|
|
166
|
+
for (let p of paths) {
|
|
167
|
+
if (p.endTime < minMissing || p.startTime > maxMissing) continue;
|
|
168
|
+
console.log(` start=${formatDateTimeDetailed(p.startTime)} end=${formatDateTimeDetailed(p.endTime)} ${p.fullPath}`);
|
|
169
|
+
}
|
|
170
|
+
// Also: any file whose declared startTime > broad cutoff (and so would
|
|
171
|
+
// be isSourceRelevant-pruned) — these are candidates for the bug.
|
|
172
|
+
console.log(`Files whose declared startTime > broad cutoff (${formatDateTimeDetailed(broadCutoff)}) — these would be pruned by isSourceRelevant once broad fills:`);
|
|
173
|
+
for (let p of paths) {
|
|
174
|
+
if (p.startTime > broadCutoff) {
|
|
175
|
+
let overlap = p.startTime <= maxMissing && p.endTime >= minMissing ? " <- OVERLAPS MISSING" : "";
|
|
176
|
+
console.log(` start=${formatDateTimeDetailed(p.startTime)} end=${formatDateTimeDetailed(p.endTime)} ${p.fullPath}${overlap}`);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Check whether broad-with-huge-limit catches the missing entries.
|
|
182
|
+
let broadHugeKeys = new Set(broadHuge.map(emitKey));
|
|
183
|
+
let stillMissingFromHuge = narrow.filter(n => !broadHugeKeys.has(emitKey(n)));
|
|
184
|
+
console.log(`\n=== BROAD_HUGE check ===`);
|
|
185
|
+
console.log(`narrow missing from broad_huge: ${stillMissingFromHuge.length}`);
|
|
186
|
+
if (stillMissingFromHuge.length > 0) {
|
|
187
|
+
console.log(`First ${Math.min(20, stillMissingFromHuge.length)} still-missing:`);
|
|
188
|
+
for (let m of stillMissingFromHuge.slice(0, 20)) {
|
|
189
|
+
console.log(` time=${formatDateTimeDetailed(m.time)} param0=${String(m.datum.param0 ?? "").slice(0, 60)}`);
|
|
190
|
+
}
|
|
191
|
+
console.log(`-> bug is NOT (only) the per-file cap; the scanner / index pre-filter is dropping entries even without the cap.`);
|
|
192
|
+
} else {
|
|
193
|
+
console.log(`-> all narrow entries are in broad_huge; the per-file stopIterating cap is the culprit for the limit=1600 miss.`);
|
|
194
|
+
}
|
|
77
195
|
}
|
|
78
196
|
|
|
79
|
-
main().catch(console.error)
|
|
80
|
-
.finally(() => process.exit(0))
|
|
81
|
-
;
|
|
197
|
+
main().catch(e => console.error((e as Error).stack ?? e))
|
|
198
|
+
.finally(() => process.exit(0));
|