membot 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,6 @@
1
1
  import { z } from "zod";
2
+ import { resolveEmbeddingWorkers } from "../context.ts";
3
+ import { withEmbedderPool } from "../ingest/embedder-pool.ts";
2
4
  import {
3
5
  countResolvedEntries,
4
6
  type IngestCallbacks,
@@ -105,101 +107,107 @@ Pass \`logical_path\` to override. For a multi-source / directory / glob walk it
105
107
  return `${lines.join("\n")}\n${parts.join(", ")}`;
106
108
  },
107
109
  handler: async (input, ctx) => {
108
- const { sources, ...rest } = input;
109
- const followSymlinks = rest.follow_symlinks ?? true;
110
+ // Spin up an ephemeral embedder pool for the whole `add` command —
111
+ // `withEmbedderPool` handles the workers=1 short-circuit and disposes
112
+ // the children when the closure returns (see embedder-pool.ts).
113
+ const workers = resolveEmbeddingWorkers(ctx.config.embedding.workers);
114
+ return withEmbedderPool(workers, ctx.config.embedding_model, async () => {
115
+ const { sources, ...rest } = input;
116
+ const followSymlinks = rest.follow_symlinks ?? true;
110
117
 
111
- // Phase 1: resolve every source upfront so the shared progress bar
112
- // knows its total. A resolve failure (bad path, glob with no base) is
113
- // captured per-source so one bad arg doesn't abort the whole batch.
114
- type ResolveOutcome = { source: string; resolved: ResolvedSource } | { source: string; error: Error };
115
- const outcomes: ResolveOutcome[] = [];
116
- for (const source of sources) {
117
- try {
118
- const resolved = await resolveSource(source, {
119
- include: rest.include,
120
- exclude: rest.exclude,
121
- followSymlinks,
122
- });
123
- outcomes.push({ source, resolved });
124
- } catch (err) {
125
- outcomes.push({ source, error: err instanceof Error ? err : new Error(String(err)) });
118
+ // Phase 1: resolve every source upfront so the shared progress bar
119
+ // knows its total. A resolve failure (bad path, glob with no base) is
120
+ // captured per-source so one bad arg doesn't abort the whole batch.
121
+ type ResolveOutcome = { source: string; resolved: ResolvedSource } | { source: string; error: Error };
122
+ const outcomes: ResolveOutcome[] = [];
123
+ for (const source of sources) {
124
+ try {
125
+ const resolved = await resolveSource(source, {
126
+ include: rest.include,
127
+ exclude: rest.exclude,
128
+ followSymlinks,
129
+ });
130
+ outcomes.push({ source, resolved });
131
+ } catch (err) {
132
+ outcomes.push({ source, error: err instanceof Error ? err : new Error(String(err)) });
133
+ }
126
134
  }
127
- }
128
135
 
129
- const total = outcomes.reduce((n, o) => ("error" in o ? n + 1 : n + countResolvedEntries(o.resolved)), 0);
136
+ const total = outcomes.reduce((n, o) => ("error" in o ? n + 1 : n + countResolvedEntries(o.resolved)), 0);
130
137
 
131
- const aggregated: IngestResult = {
132
- ingested: [],
133
- total: 0,
134
- ok: 0,
135
- unchanged: 0,
136
- failed: 0,
137
- };
138
+ const aggregated: IngestResult = {
139
+ ingested: [],
140
+ total: 0,
141
+ ok: 0,
142
+ unchanged: 0,
143
+ failed: 0,
144
+ };
138
145
 
139
- ctx.progress.start(total, "ingest");
140
- const callbacks: IngestCallbacks = {
141
- onEntryStart: (label) => ctx.progress.tick(label),
142
- onEntryComplete: (entry) => ctx.progress.entry(formatEntryLine(entry)),
143
- onEntryProgress: (_label, sublabel) => ctx.progress.update(sublabel),
144
- };
146
+ ctx.progress.start(total, "ingest");
147
+ const callbacks: IngestCallbacks = {
148
+ onEntryStart: (label) => ctx.progress.tick(label),
149
+ onEntryComplete: (entry) => ctx.progress.entry(formatEntryLine(entry)),
150
+ onEntryProgress: (_label, sublabel) => ctx.progress.update(sublabel),
151
+ };
145
152
 
146
- for (const outcome of outcomes) {
147
- if ("error" in outcome) {
148
- const failed: IngestEntryResult = {
149
- source_path: outcome.source,
150
- logical_path: outcome.source,
151
- version_id: null,
152
- status: "failed",
153
- error: outcome.error.message,
154
- mime_type: null,
155
- size_bytes: 0,
156
- fetcher: "local",
157
- source_sha256: "",
158
- };
159
- callbacks.onEntryStart?.(outcome.source);
160
- callbacks.onEntryComplete?.(failed);
161
- aggregated.ingested.push(failed);
162
- aggregated.total += 1;
163
- aggregated.failed += 1;
164
- continue;
165
- }
153
+ for (const outcome of outcomes) {
154
+ if ("error" in outcome) {
155
+ const failed: IngestEntryResult = {
156
+ source_path: outcome.source,
157
+ logical_path: outcome.source,
158
+ version_id: null,
159
+ status: "failed",
160
+ error: outcome.error.message,
161
+ mime_type: null,
162
+ size_bytes: 0,
163
+ fetcher: "local",
164
+ source_sha256: "",
165
+ };
166
+ callbacks.onEntryStart?.(outcome.source);
167
+ callbacks.onEntryComplete?.(failed);
168
+ aggregated.ingested.push(failed);
169
+ aggregated.total += 1;
170
+ aggregated.failed += 1;
171
+ continue;
172
+ }
166
173
 
167
- try {
168
- const r = await ingestResolved(outcome.resolved, { ...rest, source: outcome.source }, ctx, callbacks);
169
- aggregated.ingested.push(...r.ingested);
170
- aggregated.total += r.total;
171
- aggregated.ok += r.ok;
172
- aggregated.unchanged += r.unchanged;
173
- aggregated.failed += r.failed;
174
- } catch (err) {
175
- const message = err instanceof Error ? err.message : String(err);
176
- const failed: IngestEntryResult = {
177
- source_path: outcome.source,
178
- logical_path: outcome.source,
179
- version_id: null,
180
- status: "failed",
181
- error: message,
182
- mime_type: null,
183
- size_bytes: 0,
184
- fetcher: "local",
185
- source_sha256: "",
186
- };
187
- callbacks.onEntryStart?.(outcome.source);
188
- callbacks.onEntryComplete?.(failed);
189
- aggregated.ingested.push(failed);
190
- aggregated.total += 1;
191
- aggregated.failed += 1;
192
- } finally {
193
- // Release the DB lock between sources so other consumers (a
194
- // concurrent CLI call, the daemon, or a separate MCP server)
195
- // can wedge in. The next source's first DB call reopens.
196
- await ctx.db.release();
174
+ try {
175
+ const r = await ingestResolved(outcome.resolved, { ...rest, source: outcome.source }, ctx, callbacks);
176
+ aggregated.ingested.push(...r.ingested);
177
+ aggregated.total += r.total;
178
+ aggregated.ok += r.ok;
179
+ aggregated.unchanged += r.unchanged;
180
+ aggregated.failed += r.failed;
181
+ } catch (err) {
182
+ const message = err instanceof Error ? err.message : String(err);
183
+ const failed: IngestEntryResult = {
184
+ source_path: outcome.source,
185
+ logical_path: outcome.source,
186
+ version_id: null,
187
+ status: "failed",
188
+ error: message,
189
+ mime_type: null,
190
+ size_bytes: 0,
191
+ fetcher: "local",
192
+ source_sha256: "",
193
+ };
194
+ callbacks.onEntryStart?.(outcome.source);
195
+ callbacks.onEntryComplete?.(failed);
196
+ aggregated.ingested.push(failed);
197
+ aggregated.total += 1;
198
+ aggregated.failed += 1;
199
+ } finally {
200
+ // Release the DB lock between sources so other consumers (a
201
+ // concurrent CLI call, the daemon, or a separate MCP server)
202
+ // can wedge in. The next source's first DB call reopens.
203
+ await ctx.db.release();
204
+ }
197
205
  }
198
- }
199
206
 
200
- const summary = formatSummary(aggregated);
201
- ctx.progress.done(summary);
202
- return aggregated;
207
+ const summary = formatSummary(aggregated);
208
+ ctx.progress.done(summary);
209
+ return aggregated;
210
+ });
203
211
  },
204
212
  });
205
213
 
@@ -8,6 +8,7 @@ import { readOperation } from "./read.ts";
8
8
  import { refreshOperation } from "./refresh.ts";
9
9
  import { removeOperation } from "./remove.ts";
10
10
  import { searchOperation } from "./search.ts";
11
+ import { statsOperation } from "./stats.ts";
11
12
  import { treeOperation } from "./tree.ts";
12
13
  import type { Operation } from "./types.ts";
13
14
  import { versionsOperation } from "./versions.ts";
@@ -28,6 +29,7 @@ export const OPERATIONS: Operation<any, any>[] = [
28
29
  readOperation,
29
30
  searchOperation,
30
31
  infoOperation,
32
+ statsOperation,
31
33
  versionsOperation,
32
34
  diffOperation,
33
35
  writeOperation,
@@ -1,5 +1,7 @@
1
1
  import { z } from "zod";
2
+ import { resolveEmbeddingWorkers } from "../context.ts";
2
3
  import { listDueRefreshes } from "../db/files.ts";
4
+ import { withEmbedderPool } from "../ingest/embedder-pool.ts";
3
5
  import { colors } from "../output/formatter.ts";
4
6
  import { refreshOne } from "../refresh/runner.ts";
5
7
  import { defineOperation } from "./types.ts";
@@ -47,26 +49,32 @@ export const refreshOperation = defineOperation({
47
49
  return `${lines.join("\n")}\n${parts.join(", ")}`;
48
50
  },
49
51
  handler: async (input, ctx) => {
50
- const targets = input.logical_path
51
- ? [input.logical_path]
52
- : (await listDueRefreshes(ctx.db)).map((r) => r.logical_path);
53
- const out: Array<{
54
- logical_path: string;
55
- status: "ok" | "unchanged" | "failed";
56
- new_version_id?: string;
57
- error?: string;
58
- }> = [];
59
- ctx.progress.start(targets.length, "refresh");
60
- for (const path of targets) {
61
- ctx.progress.tick(path);
62
- try {
63
- const r = await refreshOne(ctx, path, input.force, (sublabel) => ctx.progress.update(sublabel));
64
- out.push(r);
65
- } catch (err) {
66
- out.push({ logical_path: path, status: "failed", error: err instanceof Error ? err.message : String(err) });
52
+ // Per-command embedder pool: workers come up at the start of the
53
+ // refresh sweep and are killed before we return, so a manual
54
+ // `membot refresh` doesn't leave subprocesses around.
55
+ const workers = resolveEmbeddingWorkers(ctx.config.embedding.workers);
56
+ return withEmbedderPool(workers, ctx.config.embedding_model, async () => {
57
+ const targets = input.logical_path
58
+ ? [input.logical_path]
59
+ : (await listDueRefreshes(ctx.db)).map((r) => r.logical_path);
60
+ const out: Array<{
61
+ logical_path: string;
62
+ status: "ok" | "unchanged" | "failed";
63
+ new_version_id?: string;
64
+ error?: string;
65
+ }> = [];
66
+ ctx.progress.start(targets.length, "refresh");
67
+ for (const path of targets) {
68
+ ctx.progress.tick(path);
69
+ try {
70
+ const r = await refreshOne(ctx, path, input.force, (sublabel) => ctx.progress.update(sublabel));
71
+ out.push(r);
72
+ } catch (err) {
73
+ out.push({ logical_path: path, status: "failed", error: err instanceof Error ? err.message : String(err) });
74
+ }
67
75
  }
68
- }
69
- ctx.progress.done(`refresh: ${out.filter((r) => r.status === "ok").length}/${out.length} updated`);
70
- return { processed: out, count: out.length };
76
+ ctx.progress.done(`refresh: ${out.filter((r) => r.status === "ok").length}/${out.length} updated`);
77
+ return { processed: out, count: out.length };
78
+ });
71
79
  },
72
80
  });
@@ -0,0 +1,342 @@
1
+ import { z } from "zod";
2
+ import type { DbConnection, SqlParam } from "../db/connection.ts";
3
+ import { listDueRefreshes } from "../db/files.ts";
4
+ import { colors } from "../output/formatter.ts";
5
+ import { defineOperation } from "./types.ts";
6
+
7
+ export const statsOperation = defineOperation({
8
+ name: "membot_stats",
9
+ cliName: "stats",
10
+ description: `Summarize the local membot index: file/version/chunk/blob counts, total content and on-disk size, refresh health, and breakdowns by source_type, downloader, and mime_type. Optional prefix narrows aggregates to a subtree (same semantics as 'membot tree <prefix>'). Read-only. Use this before membot_prune to gauge how much there is to drop, or as a first call to confirm the index has anything in it.`,
11
+ inputSchema: z.object({
12
+ prefix: z
13
+ .string()
14
+ .optional()
15
+ .describe(
16
+ "Restrict aggregates to logical paths starting with this prefix (e.g. 'docs/api/'). Omit to summarize the whole index.",
17
+ ),
18
+ }),
19
+ outputSchema: z.object({
20
+ prefix: z.string().nullable(),
21
+ db_path: z.string(),
22
+ db_size_bytes: z.number(),
23
+ files: z.object({
24
+ current: z.number(),
25
+ tombstoned_paths: z.number(),
26
+ total_versions: z.number(),
27
+ distinct_paths: z.number(),
28
+ by_source_type: z.record(z.string(), z.number()),
29
+ by_downloader: z.record(z.string(), z.number()),
30
+ by_mime_type: z.record(z.string(), z.number()),
31
+ }),
32
+ content: z.object({
33
+ total_bytes: z.number(),
34
+ total_versions_bytes: z.number(),
35
+ }),
36
+ chunks: z.object({
37
+ current: z.number(),
38
+ total: z.number(),
39
+ }),
40
+ blobs: z.object({
41
+ count: z.number(),
42
+ total_bytes: z.number(),
43
+ }),
44
+ refresh: z.object({
45
+ scheduled: z.number(),
46
+ due_now: z.number(),
47
+ last_status: z.record(z.string(), z.number()),
48
+ }),
49
+ }),
50
+ cli: { positional: ["prefix"] },
51
+ console_formatter: (result) => {
52
+ const lines: string[] = [];
53
+ const heading = (s: string) => colors.bold(s);
54
+ // Always leave at least 2 spaces between key and value, even when the
55
+ // key is wider than the target column (long mime types, long keys).
56
+ const kv = (k: string, v: string, indent = 0) => {
57
+ const target = Math.max(22 - indent, k.length + 2);
58
+ return `${" ".repeat(indent)}${colors.dim(k.padEnd(target))}${v}`;
59
+ };
60
+ const orNone = (record: Record<string, number>): string[] => {
61
+ const keys = Object.keys(record);
62
+ if (keys.length === 0) return [` ${colors.dim("(none)")}`];
63
+ return keys.map((k) => kv(k, String(record[k]), 4));
64
+ };
65
+ const header = result.prefix
66
+ ? `${heading("membot index summary")} ${colors.dim(`[prefix=${result.prefix}]`)}`
67
+ : heading("membot index summary");
68
+ lines.push(header);
69
+ lines.push(kv("db_path", result.db_path));
70
+ lines.push(kv("db_size_bytes", formatBytes(result.db_size_bytes)));
71
+
72
+ lines.push("");
73
+ lines.push(heading("files"));
74
+ lines.push(kv("current", String(result.files.current), 2));
75
+ lines.push(kv("tombstoned_paths", String(result.files.tombstoned_paths), 2));
76
+ lines.push(kv("total_versions", String(result.files.total_versions), 2));
77
+ lines.push(kv("distinct_paths", String(result.files.distinct_paths), 2));
78
+ lines.push(kv("by_source_type", "", 2));
79
+ lines.push(...orNone(result.files.by_source_type));
80
+ lines.push(kv("by_downloader", "", 2));
81
+ lines.push(...orNone(result.files.by_downloader));
82
+ lines.push(kv("by_mime_type", "", 2));
83
+ lines.push(...orNone(result.files.by_mime_type));
84
+
85
+ lines.push("");
86
+ lines.push(heading("content"));
87
+ lines.push(kv("total_bytes", formatBytes(result.content.total_bytes), 2));
88
+ lines.push(kv("total_versions_bytes", formatBytes(result.content.total_versions_bytes), 2));
89
+
90
+ lines.push("");
91
+ lines.push(heading("chunks"));
92
+ lines.push(kv("current", String(result.chunks.current), 2));
93
+ lines.push(kv("total", String(result.chunks.total), 2));
94
+
95
+ lines.push("");
96
+ lines.push(heading("blobs"));
97
+ lines.push(kv("count", String(result.blobs.count), 2));
98
+ lines.push(kv("total_bytes", formatBytes(result.blobs.total_bytes), 2));
99
+
100
+ lines.push("");
101
+ lines.push(heading("refresh"));
102
+ lines.push(kv("scheduled", String(result.refresh.scheduled), 2));
103
+ lines.push(kv("due_now", String(result.refresh.due_now), 2));
104
+ lines.push(kv("last_status", "", 2));
105
+ lines.push(...orNone(result.refresh.last_status));
106
+
107
+ return lines.join("\n");
108
+ },
109
+ handler: async (input, ctx) => {
110
+ const prefix = input.prefix ?? null;
111
+ const dbSize = await dbFileSize(ctx.db.path);
112
+
113
+ const files = await collectFileStats(ctx.db, prefix);
114
+ const content = await collectContentStats(ctx.db, prefix);
115
+ const chunks = await collectChunkStats(ctx.db, prefix);
116
+ const blobs = await collectBlobStats(ctx.db, prefix);
117
+ const refresh = await collectRefreshStats(ctx.db, prefix);
118
+
119
+ return {
120
+ prefix,
121
+ db_path: ctx.db.path,
122
+ db_size_bytes: dbSize,
123
+ files,
124
+ content,
125
+ chunks,
126
+ blobs,
127
+ refresh,
128
+ };
129
+ },
130
+ });
131
+
132
+ /** Stat the DuckDB file. Returns 0 if the file isn't on disk yet (in-memory or freshly opened). */
133
+ async function dbFileSize(path: string): Promise<number> {
134
+ try {
135
+ const f = Bun.file(path);
136
+ const exists = await f.exists();
137
+ return exists ? f.size : 0;
138
+ } catch {
139
+ return 0;
140
+ }
141
+ }
142
+
143
+ /** Build a `logical_path LIKE ?1` clause + params, or empty when prefix is null. */
144
+ function prefixFilter(prefix: string | null): { clause: string; params: SqlParam[] } {
145
+ if (!prefix) return { clause: "", params: [] };
146
+ return { clause: "logical_path LIKE ?1", params: [`${prefix}%`] };
147
+ }
148
+
149
+ /** Combine an existing WHERE fragment with an optional prefix filter. */
150
+ function and(base: string, extra: string): string {
151
+ if (!base) return extra;
152
+ if (!extra) return base;
153
+ return `${base} AND ${extra}`;
154
+ }
155
+
156
+ interface FileStats {
157
+ current: number;
158
+ tombstoned_paths: number;
159
+ total_versions: number;
160
+ distinct_paths: number;
161
+ by_source_type: Record<string, number>;
162
+ by_downloader: Record<string, number>;
163
+ by_mime_type: Record<string, number>;
164
+ }
165
+
166
+ async function collectFileStats(db: DbConnection, prefix: string | null): Promise<FileStats> {
167
+ const pf = prefixFilter(prefix);
168
+ const where = pf.clause ? `WHERE ${pf.clause}` : "";
169
+
170
+ const current = await scalar(db, `SELECT COUNT(*) AS n FROM current_files ${where}`, ...pf.params);
171
+ const totalVersions = await scalar(db, `SELECT COUNT(*) AS n FROM files ${where}`, ...pf.params);
172
+ const distinctPaths = await scalar(db, `SELECT COUNT(DISTINCT logical_path) AS n FROM files ${where}`, ...pf.params);
173
+ // Tombstoned path = a logical_path whose latest (max version_id) row is a tombstone.
174
+ // current_files already excludes those, so we join "latest per path" against files
175
+ // and count rows where tombstone = TRUE.
176
+ const tombstonedPaths = await scalar(
177
+ db,
178
+ `SELECT COUNT(*) AS n
179
+ FROM files f
180
+ JOIN (
181
+ SELECT logical_path, MAX(version_id) AS v FROM files ${where} GROUP BY logical_path
182
+ ) m ON f.logical_path = m.logical_path AND f.version_id = m.v
183
+ WHERE f.tombstone = TRUE`,
184
+ ...pf.params,
185
+ );
186
+
187
+ const by_source_type = await groupCount(db, "source_type", "current_files", pf);
188
+ const by_downloader = await groupCount(db, "downloader", "current_files", pf, { skipNull: true });
189
+ const by_mime_type = await groupCount(db, "mime_type", "current_files", pf, { topN: 10, skipNull: true });
190
+
191
+ return {
192
+ current,
193
+ tombstoned_paths: tombstonedPaths,
194
+ total_versions: totalVersions,
195
+ distinct_paths: distinctPaths,
196
+ by_source_type,
197
+ by_downloader,
198
+ by_mime_type,
199
+ };
200
+ }
201
+
202
+ async function collectContentStats(
203
+ db: DbConnection,
204
+ prefix: string | null,
205
+ ): Promise<{ total_bytes: number; total_versions_bytes: number }> {
206
+ const pf = prefixFilter(prefix);
207
+ const where = pf.clause ? `WHERE ${pf.clause}` : "";
208
+ const total_bytes = await scalar(
209
+ db,
210
+ `SELECT COALESCE(SUM(size_bytes), 0) AS n FROM current_files ${where}`,
211
+ ...pf.params,
212
+ );
213
+ const total_versions_bytes = await scalar(
214
+ db,
215
+ `SELECT COALESCE(SUM(size_bytes), 0) AS n FROM files ${where}`,
216
+ ...pf.params,
217
+ );
218
+ return { total_bytes, total_versions_bytes };
219
+ }
220
+
221
+ async function collectChunkStats(db: DbConnection, prefix: string | null): Promise<{ current: number; total: number }> {
222
+ if (!prefix) {
223
+ const current = await scalar(db, `SELECT COUNT(*) AS n FROM current_chunks`);
224
+ const total = await scalar(db, `SELECT COUNT(*) AS n FROM chunks`);
225
+ return { current, total };
226
+ }
227
+ const pf = prefixFilter(prefix);
228
+ const current = await scalar(db, `SELECT COUNT(*) AS n FROM current_chunks WHERE ${pf.clause}`, ...pf.params);
229
+ const total = await scalar(db, `SELECT COUNT(*) AS n FROM chunks WHERE ${pf.clause}`, ...pf.params);
230
+ return { current, total };
231
+ }
232
+
233
+ async function collectBlobStats(
234
+ db: DbConnection,
235
+ prefix: string | null,
236
+ ): Promise<{ count: number; total_bytes: number }> {
237
+ if (!prefix) {
238
+ const row = await db.queryGet<{ count: number | bigint; total: number | bigint | null }>(
239
+ `SELECT COUNT(*) AS count, COALESCE(SUM(size_bytes), 0) AS total FROM blobs`,
240
+ );
241
+ return { count: Number(row?.count ?? 0), total_bytes: Number(row?.total ?? 0) };
242
+ }
243
+ const pf = prefixFilter(prefix);
244
+ const row = await db.queryGet<{ count: number | bigint; total: number | bigint | null }>(
245
+ `SELECT COUNT(*) AS count, COALESCE(SUM(size_bytes), 0) AS total
246
+ FROM blobs
247
+ WHERE sha256 IN (
248
+ SELECT blob_sha256 FROM current_files
249
+ WHERE ${pf.clause} AND blob_sha256 IS NOT NULL
250
+ )`,
251
+ ...pf.params,
252
+ );
253
+ return { count: Number(row?.count ?? 0), total_bytes: Number(row?.total ?? 0) };
254
+ }
255
+
256
+ async function collectRefreshStats(
257
+ db: DbConnection,
258
+ prefix: string | null,
259
+ ): Promise<{ scheduled: number; due_now: number; last_status: Record<string, number> }> {
260
+ const pf = prefixFilter(prefix);
261
+ const scheduledWhere = and(pf.clause, "refresh_frequency_sec IS NOT NULL");
262
+ const scheduled = await scalar(db, `SELECT COUNT(*) AS n FROM current_files WHERE ${scheduledWhere}`, ...pf.params);
263
+
264
+ const due = await listDueRefreshes(db);
265
+ const due_now = prefix ? due.filter((r) => r.logical_path.startsWith(prefix)).length : due.length;
266
+
267
+ const statusRows = await db.queryAll<{ k: string | null; n: number | bigint }>(
268
+ `SELECT last_refresh_status AS k, COUNT(*) AS n
269
+ FROM current_files
270
+ WHERE last_refresh_status IS NOT NULL${pf.clause ? ` AND ${pf.clause}` : ""}
271
+ GROUP BY last_refresh_status
272
+ ORDER BY n DESC`,
273
+ ...pf.params,
274
+ );
275
+ const last_status: Record<string, number> = {};
276
+ for (const r of statusRows) {
277
+ if (r.k !== null) last_status[r.k] = Number(r.n);
278
+ }
279
+
280
+ return { scheduled, due_now, last_status };
281
+ }
282
+
283
+ /** Run a query whose first row has a single numeric column `n`, returning that number (0 when null). */
284
+ async function scalar(db: DbConnection, sql: string, ...params: SqlParam[]): Promise<number> {
285
+ const row = await db.queryGet<{ n: number | bigint | null }>(sql, ...params);
286
+ return Number(row?.n ?? 0);
287
+ }
288
+
289
+ interface GroupOptions {
290
+ skipNull?: boolean;
291
+ topN?: number;
292
+ }
293
+
294
+ /**
295
+ * GROUP BY a column on a current_files-shaped table, optionally dropping NULLs
296
+ * and rolling overflow into an "(other)" bucket when topN is set.
297
+ */
298
+ async function groupCount(
299
+ db: DbConnection,
300
+ column: string,
301
+ table: string,
302
+ pf: { clause: string; params: SqlParam[] },
303
+ opts: GroupOptions = {},
304
+ ): Promise<Record<string, number>> {
305
+ const filters: string[] = [];
306
+ if (pf.clause) filters.push(pf.clause);
307
+ if (opts.skipNull) filters.push(`${column} IS NOT NULL`);
308
+ const where = filters.length ? `WHERE ${filters.join(" AND ")}` : "";
309
+ const rows = await db.queryAll<{ k: string | null; n: number | bigint }>(
310
+ `SELECT ${column} AS k, COUNT(*) AS n FROM ${table} ${where} GROUP BY ${column} ORDER BY n DESC`,
311
+ ...pf.params,
312
+ );
313
+ const out: Record<string, number> = {};
314
+ if (opts.topN && rows.length > opts.topN) {
315
+ let other = 0;
316
+ for (let i = 0; i < rows.length; i++) {
317
+ const r = rows[i]!;
318
+ const key = r.k ?? "(null)";
319
+ if (i < opts.topN) out[key] = Number(r.n);
320
+ else other += Number(r.n);
321
+ }
322
+ if (other > 0) out["(other)"] = other;
323
+ return out;
324
+ }
325
+ for (const r of rows) {
326
+ out[r.k ?? "(null)"] = Number(r.n);
327
+ }
328
+ return out;
329
+ }
330
+
331
+ /** Format a byte count in human units. 1024 boundary, 1-decimal precision past KB. */
332
+ function formatBytes(bytes: number): string {
333
+ if (bytes < 1024) return `${bytes} B`;
334
+ const units = ["KB", "MB", "GB", "TB"];
335
+ let i = -1;
336
+ let n = bytes;
337
+ while (n >= 1024 && i < units.length - 1) {
338
+ n /= 1024;
339
+ i++;
340
+ }
341
+ return `${n.toFixed(n >= 100 ? 0 : 1)} ${units[i]}`;
342
+ }