querysub 0.456.0 → 0.458.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { timeInHour, timeInMinute, timeInSecond, timeoutToError, timeoutToUndefined, timeoutToUndefinedSilent } from "socket-function/src/misc";
1
+ import { sort, timeInHour, timeInMinute, timeInSecond, timeoutToError, timeoutToUndefined, timeoutToUndefinedSilent } from "socket-function/src/misc";
2
2
  import { lazy } from "socket-function/src/caching";
3
3
  import { getMachineId } from "../../../-a-auth/certs";
4
4
  import { getAllNodeIds, getOwnMachineId, isOwnNodeId } from "../../../-f-node-discovery/NodeDiscovery";
@@ -27,32 +27,6 @@ const PROGRESS_LOG_INTERVAL = timeInSecond * 5;
27
27
  const LOGGER_NAMES = ["logs/log", "logs/info", "logs/warn", "logs/error"] as const;
28
28
  type LoggerName = typeof LOGGER_NAMES[number];
29
29
 
30
- // Public-facing short names callers pass in via the `logTypes` parameter, mapped
31
- // to the internal logger names above.
32
- const EXTERNAL_TO_INTERNAL_LOGGER: Record<string, LoggerName> = {
33
- "log": "logs/log",
34
- "info": "logs/info",
35
- "warn": "logs/warn",
36
- "error": "logs/error",
37
- };
38
-
39
- // Parses the caller's `logTypes` string (e.g. "warn|error") into the matching
40
- // internal logger names. Empty / undefined means "all four" (no restriction).
41
- function parseLogTypes(value: string | undefined): readonly LoggerName[] {
42
- if (!value) return LOGGER_NAMES;
43
- let parts = value.split("|").map(s => s.trim().toLowerCase()).filter(s => s);
44
- if (parts.length === 0) return LOGGER_NAMES;
45
- let result: LoggerName[] = [];
46
- for (let p of parts) {
47
- let internal = EXTERNAL_TO_INTERNAL_LOGGER[p];
48
- if (!internal) {
49
- throw new Error(`logTypes: unknown log type ${JSON.stringify(p)}; expected one of log, info, warn, error (separated by "|")`);
50
- }
51
- if (!result.includes(internal)) result.push(internal);
52
- }
53
- return result;
54
- }
55
-
56
30
  // Per-logger accounting for one search. Byte counts are raw buffer sizes.
57
31
  export type LoggerStats = {
58
32
  // Files in range matching the requested machine.
@@ -168,11 +142,8 @@ function createProgressLogger(): (message: string) => void {
168
142
  }
169
143
 
170
144
  export class MCPIndexedLogs {
171
- // `${machineId}|${loggerName}` -> latest timestamp guaranteed to already be
172
- // moved-to-public for that specific (machine, logger). Keyed per-logger so
173
- // a search scoped to only some loggers doesn't poison the cache for the
174
- // others.
175
- private movedThroughByMachineLogger = new Map<string, number>();
145
+ // machineId -> latest timestamp guaranteed to already be moved-to-public.
146
+ private movedThroughByMachine = new Map<string, number>();
176
147
 
177
148
  // Cache: `${type}|${loggerName}|${startBucket}|${endBucket}` -> { time, paths }.
178
149
  // Buckets are hour-aligned start/end so adjacent searches reuse work.
@@ -186,15 +157,14 @@ export class MCPIndexedLogs {
186
157
  direction: Direction;
187
158
  columns: string[];
188
159
  limit?: number;
189
- // Optional pipe-separated list restricting which log streams to scan
190
- // (e.g. "warn|error", "log"). Omit / empty = all four streams.
191
- logTypes?: string;
192
160
  }): Promise<SearchResult> {
193
161
  let limit = config.limit ?? 100;
194
162
  let startTime = normalizeTime(config.startTime, "startTime");
195
163
  let endTime = normalizeTime(config.endTime, "endTime");
196
- let enabledLoggers = parseLogTypes(config.logTypes);
197
- console.log(`[search] query=${JSON.stringify(config.query)} | machine=${config.machine} | startTime=${formatDateTime(startTime)} | endTime=${formatDateTime(endTime)} | direction=${config.direction} | columns=[${config.columns.join(",")}] | limit=${config.limit ?? "(default)"} | logTypes=${config.logTypes ?? "(all)"}`);
164
+ // `time` is always projected — the final sort needs it, and callers can't
165
+ // meaningfully read a log row without knowing when it happened.
166
+ let columns = config.columns.includes("time") ? config.columns : ["time", ...config.columns];
167
+ console.log(`[search] query=${JSON.stringify(config.query)} | machine=${config.machine} | startTime=${formatDateTime(startTime)} | endTime=${formatDateTime(endTime)} | direction=${config.direction} | columns=[${config.columns.join(",")}] | limit=${config.limit ?? "(default)"}`);
198
168
  let now = Date.now();
199
169
  if (endTime > now - END_TIME_MIN_AGE) {
200
170
  throw new Error(`endTime must be at least ${formatTime(END_TIME_MIN_AGE)} in the past (got ${formatTime(now - endTime)} ago)`);
@@ -206,7 +176,7 @@ export class MCPIndexedLogs {
206
176
  let machineId = config.machine === "local" ? getOwnMachineId() : config.machine;
207
177
 
208
178
  let moveStart = Date.now();
209
- let moveOutcome = await this.ensureMovedThrough(machineId, endTime, enabledLoggers);
179
+ let moveOutcome = await this.ensureMovedThrough(machineId, endTime);
210
180
  console.log(`[search] ensureMovedThrough ${moveOutcome} in ${formatTime(Date.now() - moveStart)}`);
211
181
 
212
182
  let loggers = await getLoggers2Async();
@@ -223,7 +193,7 @@ export class MCPIndexedLogs {
223
193
 
224
194
  let pathsStart = Date.now();
225
195
  let totalPathsSeen = 0;
226
- await Promise.all(enabledLoggers.map(async (loggerName) => {
196
+ await Promise.all(LOGGER_NAMES.map(async (loggerName) => {
227
197
  let logger = this.getLoggerByName(loggers, loggerName);
228
198
  let archives = logger.debugGetCachedLogs({ type: useType });
229
199
 
@@ -270,6 +240,9 @@ export class MCPIndexedLogs {
270
240
  }));
271
241
  console.log(`[search] read ${allFiles.length} files in ${formatTime(Date.now() - searchStart)}`);
272
242
 
243
+ let dir = config.direction === "fromStart" ? 1 : -1;
244
+ sort(readFiles, x => x.entry.path.startTime * dir);
245
+
273
246
  // Phase 2: scan the already-read files in time order, applying a moving
274
247
  // cutoff once we have `limit` rows: any unprocessed file whose entire range
275
248
  // is past the cutoff cannot contribute results we'd keep.
@@ -282,7 +255,6 @@ export class MCPIndexedLogs {
282
255
  scanCount++;
283
256
  logScanProgress(`[search] scanning files ${scanCount}/${readFiles.length}`);
284
257
 
285
- if (resultRows.length >= limit) break;
286
258
  if (buffers === undefined) continue;
287
259
 
288
260
  if (cutoff !== undefined) {
@@ -303,7 +275,7 @@ export class MCPIndexedLogs {
303
275
  limit,
304
276
  queryBuffer,
305
277
  matchesPattern,
306
- columns: config.columns,
278
+ columns,
307
279
  startTime,
308
280
  endTime,
309
281
  stats,
@@ -326,18 +298,26 @@ export class MCPIndexedLogs {
326
298
  let totals = createEmptyLoggerStats();
327
299
  for (let name of LOGGER_NAMES) addLoggerStats(totals, fileCounts[name]);
328
300
 
329
- let limitHit = resultRows.length >= limit;
330
- console.log(`[search] done in ${formatTime(Date.now() - searchStart)} (filesScanned=${totals.scanned}/${allFiles.length} scannedBytes=${formatNumber(totals.scannedBytes)}B blocksMatched=${totals.blocksMatched} blocksRead=${totals.blocksRead} blockBytesRead=${formatNumber(totals.blockBytesRead)}B results=${resultRows.length} limit=${limit}${limitHit ? " HIT" : ""})`);
301
+ // Files from different loggers can overlap in time, so rows come out of
302
+ // phase 2 only roughly time-ordered. Sort by row time in the scan
303
+ // direction and slice to `limit`. The slice is the *only* place the
304
+ // global limit is enforced on the returned set — per-file/per-block caps
305
+ // upstream are bounded by `limit` to keep memory sane but don't define
306
+ // truncation by themselves.
307
+ sort(resultRows, r => Number(r.time) * dir);
308
+ let totalMatched = resultRows.length;
309
+ if (totalMatched > limit) resultRows = resultRows.slice(0, limit);
310
+
311
+ let limitHit = totalMatched > limit;
312
+ console.log(`[search] done in ${formatTime(Date.now() - searchStart)} (filesScanned=${totals.scanned}/${allFiles.length} scannedBytes=${formatNumber(totals.scannedBytes)}B blocksMatched=${totals.blocksMatched} blocksRead=${totals.blocksRead} blockBytesRead=${formatNumber(totals.blockBytesRead)}B matched=${totalMatched} returned=${resultRows.length} limit=${limit}${limitHit ? " HIT" : ""})`);
331
313
  console.log(`[search] buffer types: stream=${stats.typeCounts.stream} bulk=${stats.typeCounts.bulk}`);
332
314
  console.log(`[search] timing: readFiles=${formatTime(totals.readFilesMs)} findMatchingBlocks=${formatTime(totals.findMatchingBlocksMs)} getBlockBuffers=${formatTime(totals.getBlockBuffersMs)}`);
333
315
 
334
316
  // Trim the internal LoggerStats down to just total + scanned. The rest
335
317
  // (bytes/blocks/timing) stays in the console.log above and is NOT
336
- // returned — see the warning on SearchResult. We only emit entries for
337
- // the loggers we actually searched, so a caller who scoped to
338
- // `warn|error` doesn't see misleading 0s for the loggers they skipped.
318
+ // returned — see the warning on SearchResult.
339
319
  let files: Record<string, { total: number; scanned: number }> = {};
340
- for (let name of enabledLoggers) {
320
+ for (let name of LOGGER_NAMES) {
341
321
  files[name] = { total: fileCounts[name].total, scanned: fileCounts[name].scanned };
342
322
  }
343
323
 
@@ -387,8 +367,13 @@ export class MCPIndexedLogs {
387
367
  sink: SearchSink;
388
368
  }): Promise<void> {
389
369
  let { entry, indexBuf, dataBuf, direction, limit, queryBuffer, matchesPattern, columns, startTime, endTime, stats, sink } = scan;
390
- let { resultRows, loggerStats } = sink;
370
+ let { loggerStats } = sink;
391
371
  let p = entry.path;
372
+ // Per-file cap. Blocks are scanned in time order, so anything past the
373
+ // first `limit` rows from this file would lose the caller's final
374
+ // sort+slice anyway. Tracked locally (not against the shared sink)
375
+ // so a noisy earlier file doesn't starve overlapping later files.
376
+ let rowsFromThisFile = 0;
392
377
 
393
378
  // Region 1: the index scan that picks candidate blocks.
394
379
  let findStart = Date.now();
@@ -421,7 +406,7 @@ export class MCPIndexedLogs {
421
406
  }
422
407
 
423
408
  for (let block of blocks) {
424
- if (resultRows.length >= limit) break;
409
+ if (rowsFromThisFile >= limit) break;
425
410
 
426
411
  // Region 2: decoding the candidate block's buffers.
427
412
  let buffers: Buffer[] | undefined;
@@ -441,8 +426,10 @@ export class MCPIndexedLogs {
441
426
 
442
427
  let ordered = direction === "fromStart" ? buffers : [...buffers].reverse();
443
428
  for (let buf of ordered) {
444
- if (resultRows.length >= limit) break;
429
+ if (rowsFromThisFile >= limit) break;
430
+ let before = sink.resultRows.length;
445
431
  this.appendRow({ buf, matchesPattern, columns, startTime, endTime, sink });
432
+ if (sink.resultRows.length > before) rowsFromThisFile++;
446
433
  }
447
434
  }
448
435
  loggerStats.getBlockBuffersMs += Date.now() - blockStart;
@@ -485,20 +472,16 @@ export class MCPIndexedLogs {
485
472
  sink.loggerStats.rows++;
486
473
  }
487
474
 
488
- // For each requested logger, asks each remote node on the target machine
489
- // whether it has pending logs overlapping [0, endTime]. The first node that
490
- // answers without throwing wins; if it says yes, we ask the same node to
491
- // flush. We iterate because not every node necessarily exposes the new
492
- // endpoints (e.g. older versions still running). Records moved-through up
493
- // to now - MOVE_GRACE per (machine, logger) so we skip this on subsequent
494
- // calls covering the same window. Only the loggers listed in `loggers` are
495
- // touched; the others aren't queried or flushed.
496
- private async ensureMovedThrough(machineId: string, endTime: number, loggers: readonly LoggerName[]): Promise<"cached" | "no-node" | "moved"> {
497
- let needed = loggers.filter(name => {
498
- let lastMoved = this.movedThroughByMachineLogger.get(`${machineId}|${name}`) ?? 0;
499
- return lastMoved < endTime;
500
- });
501
- if (needed.length === 0) return "cached";
475
+ // For each logger, asks each remote node on the target machine whether it
476
+ // has pending logs overlapping [0, endTime]. The first node that answers
477
+ // without throwing wins; if it says yes, we ask the same node to flush.
478
+ // We iterate because not every node necessarily exposes the new endpoints
479
+ // (e.g. older versions still running). Records moved-through up to
480
+ // now - MOVE_GRACE so we skip this on subsequent calls covering the same
481
+ // window.
482
+ private async ensureMovedThrough(machineId: string, endTime: number): Promise<"cached" | "no-node" | "moved"> {
483
+ let lastMoved = this.movedThroughByMachine.get(machineId) ?? 0;
484
+ if (lastMoved >= endTime) return "cached";
502
485
 
503
486
  let nodeIds = await this.findRemoteNodesOnMachine(machineId);
504
487
  if (nodeIds.length === 0) {
@@ -506,7 +489,7 @@ export class MCPIndexedLogs {
506
489
  return "no-node";
507
490
  }
508
491
 
509
- for (let loggerName of needed) {
492
+ for (let loggerName of LOGGER_NAMES) {
510
493
  let answered = false;
511
494
  for (let nodeId of nodeIds) {
512
495
  try {
@@ -534,10 +517,7 @@ export class MCPIndexedLogs {
534
517
  }
535
518
  }
536
519
 
537
- let recordTime = Date.now() - MOVE_GRACE;
538
- for (let loggerName of needed) {
539
- this.movedThroughByMachineLogger.set(`${machineId}|${loggerName}`, recordTime);
540
- }
520
+ this.movedThroughByMachine.set(machineId, Date.now() - MOVE_GRACE);
541
521
  return "moved";
542
522
  }
543
523
 
@@ -10,6 +10,9 @@ import { isPublic } from "../../config";
10
10
  import type { IndexedLogs } from "./IndexedLogs/IndexedLogs";
11
11
  // IMPORTANT! We can't have any real imports here, because we are depended on so early in startup!
12
12
 
13
+ let logWriteCount = 0;
14
+ export function getLogWriteCount() { return logWriteCount; }
15
+
13
16
  if (isNode()) {
14
17
  // Delayed setup, as we depend on diskLogger early, and we don't want to force high level
15
18
  // modules to be required before their level
@@ -150,6 +153,7 @@ export function logDisk(type: "log" | "warn" | "info" | "error", ...args: unknow
150
153
  } else {
151
154
  errorLogs.append(logObj);
152
155
  }
156
+ logWriteCount++;
153
157
  }
154
158
 
155
159
  } catch (e: any) {
@@ -167,6 +167,12 @@ export async function registerManagementPages2(config: {
167
167
  controllerName: "SyncTestController",
168
168
  getModule: () => import("./SyncTestPage"),
169
169
  });
170
+ inputPages.push({
171
+ title: "Gross Stats",
172
+ componentName: "GrossStatsPage",
173
+ controllerName: "GrossStatsController",
174
+ getModule: () => import("./grossStats/GrossStatsPage"),
175
+ });
170
176
  inputPages.push(...config.pages);
171
177
 
172
178
  // NOTE: We don't store the UI in the database (here, or anywhere else, at least
@@ -313,6 +319,7 @@ export function renderIsManagementUser() {
313
319
 
314
320
  const ErrorWarning = createLazyComponent(() => import("./logs/errorNotifications2/ErrorWarning"))("ErrorWarning");
315
321
  const LaunchTrackingHeader = createLazyComponent(() => import("../deployManager/LaunchTrackingHeader"))("LaunchTrackingHeader");
322
+ const GrossStatsInfo = createLazyComponent(() => import("./grossStats/GrossStatsInfo"))("GrossStatsInfo");
316
323
 
317
324
  class ManagementRoot extends qreact.Component {
318
325
  state = {
@@ -386,6 +393,7 @@ class ManagementRoot extends qreact.Component {
386
393
  <PathDistributionInfo />
387
394
  <ValuePathWarning />
388
395
  <LaunchTrackingHeader />
396
+ <GrossStatsInfo />
389
397
  </div>}
390
398
  </div>
391
399
  {currentPage &&