@desplega.ai/agent-swarm 1.92.1 → 1.93.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/openapi.json +63 -3
  2. package/package.json +5 -5
  3. package/src/be/db.ts +180 -6
  4. package/src/be/memory/boot-reembed.ts +84 -0
  5. package/src/be/memory/constants.ts +42 -1
  6. package/src/be/memory/providers/openai-embedding.ts +13 -0
  7. package/src/be/memory/providers/sqlite-store.ts +75 -26
  8. package/src/be/memory/raters/llm-client.ts +12 -5
  9. package/src/be/memory/reranker.ts +35 -17
  10. package/src/be/memory/types.ts +11 -0
  11. package/src/be/migrations/088_script_runs_list_indexes.sql +10 -0
  12. package/src/be/migrations/089_harness_variant.sql +2 -0
  13. package/src/be/modelsdev-cache.json +6478 -3099
  14. package/src/be/seed-pricing.ts +1 -0
  15. package/src/be/seed-scripts/catalog/boot-triage.inline.ts +221 -0
  16. package/src/be/seed-scripts/catalog/catalog-report.inline.ts +457 -0
  17. package/src/be/seed-scripts/catalog/compound-insights.inline.ts +863 -0
  18. package/src/be/seed-scripts/catalog/compound-insights.ts +371 -0
  19. package/src/be/seed-scripts/catalog/ops-catalog-audit.inline.ts +506 -0
  20. package/src/be/seed-scripts/index.ts +5 -5
  21. package/src/be/skill-sync.ts +28 -179
  22. package/src/commands/runner.ts +124 -7
  23. package/src/http/api-keys.ts +42 -0
  24. package/src/http/index.ts +9 -0
  25. package/src/http/mcp-bridge.ts +1 -1
  26. package/src/http/memory.ts +27 -24
  27. package/src/http/tasks.ts +10 -6
  28. package/src/providers/claude-adapter.ts +33 -1
  29. package/src/providers/claude-managed-adapter.ts +3 -0
  30. package/src/providers/claude-managed-models.ts +7 -0
  31. package/src/providers/codex-adapter.ts +8 -1
  32. package/src/providers/codex-models.ts +1 -0
  33. package/src/providers/codex-oauth/auth-json.ts +1 -0
  34. package/src/providers/harness-version.ts +7 -0
  35. package/src/providers/opencode-adapter.ts +11 -4
  36. package/src/providers/pi-mono-adapter.ts +12 -2
  37. package/src/providers/types.ts +2 -0
  38. package/src/scripts-runtime/egress-secrets.ts +83 -0
  39. package/src/scripts-runtime/eval-harness.ts +4 -0
  40. package/src/scripts-runtime/executors/types.ts +7 -0
  41. package/src/scripts-runtime/loader.ts +2 -0
  42. package/src/server-user.ts +2 -2
  43. package/src/slack/channel-join.ts +41 -0
  44. package/src/tasks/worker-follow-up.ts +12 -0
  45. package/src/tests/additive-buffer.test.ts +0 -1
  46. package/src/tests/api-key-tracking.test.ts +113 -0
  47. package/src/tests/approval-requests.test.ts +0 -6
  48. package/src/tests/claude-managed-setup.test.ts +0 -4
  49. package/src/tests/codex-pool.test.ts +2 -6
  50. package/src/tests/http-api-integration.test.ts +4 -6
  51. package/src/tests/memory-e2e.test.ts +6 -6
  52. package/src/tests/memory-edges.test.ts +0 -2
  53. package/src/tests/memory-rate-endpoint.test.ts +0 -2
  54. package/src/tests/memory-rater-e2e.test.ts +4 -7
  55. package/src/tests/memory-reranker.test.ts +135 -124
  56. package/src/tests/memory-store.test.ts +19 -1
  57. package/src/tests/memory.test.ts +64 -12
  58. package/src/tests/model-control.test.ts +1 -1
  59. package/src/tests/reload-config.test.ts +33 -17
  60. package/src/tests/runner-skills-refresh.test.ts +216 -46
  61. package/src/tests/script-runs-http.test.ts +7 -1
  62. package/src/tests/scripts-runtime-secret-egress.test.ts +129 -0
  63. package/src/tests/seed-scripts.test.ts +218 -1
  64. package/src/tests/session-attach.test.ts +6 -6
  65. package/src/tests/skill-fs-writer.test.ts +250 -0
  66. package/src/tests/slack-attachments-block.test.ts +0 -1
  67. package/src/tests/slack-blocks.test.ts +0 -1
  68. package/src/tests/slack-channel-join.test.ts +80 -0
  69. package/src/tests/slack-identity-resolution.test.ts +0 -1
  70. package/src/tests/structured-output.test.ts +0 -2
  71. package/src/tests/task-cascade-fail.test.ts +304 -0
  72. package/src/tests/use-dismissible-card.test.ts +0 -4
  73. package/src/tools/schedules/create-schedule.ts +2 -2
  74. package/src/tools/schedules/update-schedule.ts +1 -1
  75. package/src/tools/send-task.ts +2 -2
  76. package/src/tools/slack-post.ts +18 -15
  77. package/src/tools/slack-read.ts +9 -11
  78. package/src/tools/slack-reply.ts +18 -15
  79. package/src/tools/slack-start-thread.ts +17 -14
  80. package/src/tools/task-action.ts +2 -2
  81. package/src/types.ts +11 -0
  82. package/src/utils/context-window.ts +3 -0
  83. package/src/utils/credentials.ts +22 -2
  84. package/src/utils/skill-fs-writer.ts +220 -0
  85. package/src/utils/skills-refresh.ts +123 -40
  86. package/templates/workflows/llm-safe-release-context/config.json +13 -0
  87. package/templates/workflows/llm-safe-release-context/content.md +69 -0
@@ -0,0 +1,863 @@
1
+ import { z } from "zod";
2
+ import { publishCatalogReportPage } from "./catalog-report";
3
+
4
+ export const argsSchema = z.object({
5
+ days: z
6
+ .number()
7
+ .int()
8
+ .positive()
9
+ .optional()
10
+ .describe("Look back this many days (default 3)"),
11
+ includeToolUsage: z.boolean().optional().describe("Include tool usage histogram (default true)"),
12
+ includeScheduleHealth: z
13
+ .boolean()
14
+ .optional()
15
+ .describe("Include schedule health flags (default true)"),
16
+ includeMemoryHealth: z.boolean().optional().describe("Include memory health stats (default true)"),
17
+ includeScriptCandidates: z
18
+ .boolean()
19
+ .optional()
20
+ .describe("Include high-frequency tool-triplet candidates for future seed scripts (default true)"),
21
+ includeScriptUsage: z
22
+ .boolean()
23
+ .optional()
24
+ .describe("Include actual script run, creation, and edit metrics (default true)"),
25
+ includeCostAndTokens: z
26
+ .boolean()
27
+ .optional()
28
+ .describe("Include session cost and token metrics with honesty rails (default true)"),
29
+ includeByAgent: z
30
+ .boolean()
31
+ .optional()
32
+ .describe("Include per-agent task/completion/failure breakdown (default true)"),
33
+ publishPage: z.boolean().optional().describe("Publish an authed HTML page (default true)"),
34
+ });
35
+
36
+ /**
37
+ * Failure reasons that are swarm bookkeeping, not real failures. Excluded from
38
+ * failureClusters, scheduleHealth and byAgent failure counts (Lead Rule #16):
39
+ * the run engine collapses redundant sibling tasks into these statuses, so
40
+ * counting them produces phantom failure spikes.
41
+ */
42
+ const EXCLUDED_FAIL = ["superseded_workflow_task", "cancelled"];
43
+
44
+ /**
45
+ * `db_query` returns positional rows (`rows: unknown[][]`) plus a `columns`
46
+ * array — NOT an array of objects. Zip them back into objects so callers can
47
+ * read by column name.
48
+ */
49
+ function rowsToObjects(res: any): any[] {
50
+ const p = res?.data ?? res;
51
+ const cols: string[] = p?.columns ?? [];
52
+ return (p?.rows ?? []).map((r: any) =>
53
+ Array.isArray(r) ? Object.fromEntries(cols.map((c, i) => [c, r[i]])) : r,
54
+ );
55
+ }
56
+
57
+ function asNumber(value: any): number {
58
+ const n = Number(value ?? 0);
59
+ return Number.isFinite(n) ? n : 0;
60
+ }
61
+
62
+ function round1(value: number): number {
63
+ return Math.round(value * 10) / 10;
64
+ }
65
+
66
+ function percent(part: number, total: number): number {
67
+ return total > 0 ? round1((part / total) * 100) : 0;
68
+ }
69
+
70
+ function round4(value: number): number {
71
+ return Math.round(value * 10000) / 10000;
72
+ }
73
+
74
+ function percentile(values: number[], p: number): number | null {
75
+ if (values.length === 0) return null;
76
+ const sorted = [...values].sort((a, b) => a - b);
77
+ const index = Math.ceil((p / 100) * sorted.length) - 1;
78
+ return sorted[Math.max(0, Math.min(sorted.length - 1, index))] ?? null;
79
+ }
80
+
81
+ function extractToolName(content: string): string | null {
82
+ const match = content.match(/"type"\s*:\s*"tool_use"[\s\S]*?"name"\s*:\s*"([^"]+)"/);
83
+ return match?.[1] ?? null;
84
+ }
85
+
86
+ function toolSlug(tool: string): string {
87
+ return tool
88
+ .replace(/^mcp__/, "")
89
+ .replace(/__/g, "-")
90
+ .replace(/_/g, "-")
91
+ .replace(/[^a-zA-Z0-9-]+/g, "-")
92
+ .replace(/^-+|-+$/g, "")
93
+ .toLowerCase();
94
+ }
95
+
96
+ function decodeFloat32Blob(value: any): Float32Array | null {
97
+ if (!value) return null;
98
+ let bytes: Uint8Array | null = null;
99
+ if (value instanceof Uint8Array) bytes = value;
100
+ else if (Array.isArray(value)) bytes = Uint8Array.from(value);
101
+ else if (typeof value === "object" && Array.isArray(value.data)) bytes = Uint8Array.from(value.data);
102
+ else if (typeof value === "object") {
103
+ const keys = Object.keys(value);
104
+ if (keys.length > 0 && keys.every((key) => /^\d+$/.test(key))) {
105
+ bytes = Uint8Array.from(Object.values(value) as number[]);
106
+ }
107
+ }
108
+ if (!bytes || bytes.byteLength < 4 || bytes.byteLength % 4 !== 0) return null;
109
+ return new Float32Array(bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength));
110
+ }
111
+
112
+ function cosineSimilarity(a: Float32Array, b: Float32Array): number {
113
+ const len = Math.min(a.length, b.length);
114
+ let dot = 0;
115
+ let na = 0;
116
+ let nb = 0;
117
+ for (let i = 0; i < len; i++) {
118
+ const av = a[i] ?? 0;
119
+ const bv = b[i] ?? 0;
120
+ dot += av * bv;
121
+ na += av * av;
122
+ nb += bv * bv;
123
+ }
124
+ if (na === 0 || nb === 0) return 0;
125
+ return dot / (Math.sqrt(na) * Math.sqrt(nb));
126
+ }
127
+
128
+ function summarizeScriptUsage(rows: any[], creationRows: any[], editRows: any[], toolRows: any[]) {
129
+ const terminalStatuses = new Set(["completed", "failed", "cancelled", "aborted_limit"]);
130
+ const failureStatuses = new Set(["failed", "cancelled", "aborted_limit"]);
131
+ const durations = rows
132
+ .map((r) => asNumber(r.durationMs))
133
+ .filter((duration) => duration > 0);
134
+ const byScript = new Map<
135
+ string,
136
+ {
137
+ scriptName: string;
138
+ runs: number;
139
+ completed: number;
140
+ failed: number;
141
+ successRate: number;
142
+ durationP50Ms: number | null;
143
+ durationP95Ms: number | null;
144
+ inline: number;
145
+ workflow: number;
146
+ durations: number[];
147
+ }
148
+ >();
149
+
150
+ for (const row of rows) {
151
+ const name = String(row.scriptName || "(inline source)");
152
+ const current =
153
+ byScript.get(name) ??
154
+ {
155
+ scriptName: name,
156
+ runs: 0,
157
+ completed: 0,
158
+ failed: 0,
159
+ successRate: 0,
160
+ durationP50Ms: null,
161
+ durationP95Ms: null,
162
+ inline: 0,
163
+ workflow: 0,
164
+ durations: [],
165
+ };
166
+ current.runs += 1;
167
+ if (row.kind === "inline") current.inline += 1;
168
+ if (row.kind === "workflow") current.workflow += 1;
169
+ if (row.status === "completed") current.completed += 1;
170
+ if (failureStatuses.has(String(row.status))) current.failed += 1;
171
+ const duration = asNumber(row.durationMs);
172
+ if (duration > 0) current.durations.push(duration);
173
+ byScript.set(name, current);
174
+ }
175
+
176
+ const perScript = [...byScript.values()]
177
+ .map((script) => ({
178
+ scriptName: script.scriptName,
179
+ runs: script.runs,
180
+ completed: script.completed,
181
+ failed: script.failed,
182
+ successRate: percent(script.completed, script.runs),
183
+ durationP50Ms: percentile(script.durations, 50),
184
+ durationP95Ms: percentile(script.durations, 95),
185
+ inline: script.inline,
186
+ workflow: script.workflow,
187
+ }))
188
+ .sort((a, b) => b.runs - a.runs)
189
+ .slice(0, 20);
190
+
191
+ const creationsByScope: Record<string, number> = {};
192
+ let creations = 0;
193
+ let scratchCreations = 0;
194
+ for (const row of creationRows) {
195
+ const count = asNumber(row.count);
196
+ if (asNumber(row.isScratch) === 1) {
197
+ scratchCreations += count;
198
+ } else {
199
+ creations += count;
200
+ creationsByScope[String(row.scope || "unknown")] =
201
+ (creationsByScope[String(row.scope || "unknown")] ?? 0) + count;
202
+ }
203
+ }
204
+
205
+ const editsByScope: Record<string, number> = {};
206
+ let edits = 0;
207
+ for (const row of editRows) {
208
+ const count = asNumber(row.count);
209
+ edits += count;
210
+ editsByScope[String(row.scope || "unknown")] =
211
+ (editsByScope[String(row.scope || "unknown")] ?? 0) + count;
212
+ }
213
+
214
+ return {
215
+ source: {
216
+ authoritativeRuns: "script_runs",
217
+ mcpCallSignal: "session_logs tool_use for script tools",
218
+ reconciliation:
219
+ "`script-run` via MCP calls /api/scripts/run, which records kind='inline' rows in script_runs; launch-script-run/workflows record kind='workflow'. session_logs counts agent tool calls and must not be added to script_runs totals.",
220
+ },
221
+ runs: {
222
+ total: rows.length,
223
+ inline: rows.filter((r) => r.kind === "inline").length,
224
+ workflow: rows.filter((r) => r.kind === "workflow").length,
225
+ completed: rows.filter((r) => r.status === "completed").length,
226
+ failed: rows.filter((r) => failureStatuses.has(String(r.status))).length,
227
+ runningOrPaused: rows.filter((r) => !terminalStatuses.has(String(r.status))).length,
228
+ successRate: percent(
229
+ rows.filter((r) => r.status === "completed").length,
230
+ rows.length,
231
+ ),
232
+ durationP50Ms: percentile(durations, 50),
233
+ durationP95Ms: percentile(durations, 95),
234
+ perScript,
235
+ },
236
+ creations: {
237
+ totalNonScratch: creations,
238
+ scratch: scratchCreations,
239
+ byScope: creationsByScope,
240
+ },
241
+ edits: {
242
+ total: edits,
243
+ byScope: editsByScope,
244
+ },
245
+ mcpToolCalls: toolRows.map((r) => ({ tool: r.tool, calls: asNumber(r.calls) })),
246
+ };
247
+ }
248
+
249
+ function summarizeCostAndTokens(rows: any[]) {
250
+ const trustedSources = new Set(["harness", "pricing-table"]);
251
+ const trustedRows = rows.filter((r) => trustedSources.has(String(r.costSource)));
252
+ const unpricedRows = rows.filter((r) => String(r.costSource) === "unpriced");
253
+ const trustedTaskRows = trustedRows.filter((r) => r.taskId);
254
+ const trustedTaskIds = new Set(trustedTaskRows.map((r) => String(r.taskId)));
255
+ const trustedTaskSpend = trustedTaskRows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0);
256
+ const nonTaskRows = rows.filter((r) => !r.taskId);
257
+ const totalSpend = rows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0);
258
+ const trustedSpend = trustedRows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0);
259
+
260
+ const sumToken = (field: string) =>
261
+ rows.reduce((sum, r) => (r[field] === null || r[field] === undefined ? sum : sum + asNumber(r[field])), 0);
262
+ const unknownCount = (field: string) =>
263
+ rows.filter((r) => r[field] === null || r[field] === undefined).length;
264
+
265
+ const groupBy = (field: string) => {
266
+ const grouped = new Map<
267
+ string,
268
+ {
269
+ key: string;
270
+ rows: number;
271
+ spendUsd: number;
272
+ trustedSpendUsd: number;
273
+ unpricedRows: number;
274
+ }
275
+ >();
276
+ for (const row of rows) {
277
+ const key = String(row[field] || "unknown");
278
+ const current =
279
+ grouped.get(key) ?? {
280
+ key,
281
+ rows: 0,
282
+ spendUsd: 0,
283
+ trustedSpendUsd: 0,
284
+ unpricedRows: 0,
285
+ };
286
+ current.rows += 1;
287
+ current.spendUsd += asNumber(row.totalCostUsd);
288
+ if (trustedSources.has(String(row.costSource))) current.trustedSpendUsd += asNumber(row.totalCostUsd);
289
+ if (String(row.costSource) === "unpriced") current.unpricedRows += 1;
290
+ grouped.set(key, current);
291
+ }
292
+ return [...grouped.values()]
293
+ .map((r) => ({
294
+ ...r,
295
+ spendUsd: round4(r.spendUsd),
296
+ trustedSpendUsd: round4(r.trustedSpendUsd),
297
+ }))
298
+ .sort((a, b) => b.spendUsd - a.spendUsd);
299
+ };
300
+
301
+ return {
302
+ source: {
303
+ table: "session_costs",
304
+ providerDerivation:
305
+ "provider is derived from agents.harness_provider, then agents.provider, because session_costs does not carry a provider column",
306
+ headlineAvgCostRule:
307
+ "avgCostPerTaskUsd excludes unpriced rows and rows with null taskId; null-task sessions are reported separately",
308
+ },
309
+ rows: rows.length,
310
+ taskCountForHeadlineAvg: trustedTaskIds.size,
311
+ avgCostPerTaskUsd:
312
+ trustedTaskIds.size > 0 ? round4(trustedTaskSpend / trustedTaskIds.size) : null,
313
+ totalSpendUsd: round4(totalSpend),
314
+ trustedSpendUsd: round4(trustedSpend),
315
+ trustedRows: trustedRows.length,
316
+ trustedRowPercent: percent(trustedRows.length, rows.length),
317
+ unpricedRows: unpricedRows.length,
318
+ unpricedSpendUsd: round4(unpricedRows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0)),
319
+ nonTaskSessionRows: nonTaskRows.length,
320
+ nonTaskSessionSpendUsd: round4(
321
+ nonTaskRows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0),
322
+ ),
323
+ tokenTotals: {
324
+ inputTokens: sumToken("inputTokens"),
325
+ outputTokens: sumToken("outputTokens"),
326
+ cacheReadTokens: sumToken("cacheReadTokens"),
327
+ cacheWriteTokens: sumToken("cacheWriteTokens"),
328
+ reasoningOutputTokens: sumToken("reasoningOutputTokens"),
329
+ thinkingTokens: sumToken("thinkingTokens"),
330
+ },
331
+ unknownCounts: {
332
+ cacheWriteTokens: unknownCount("cacheWriteTokens"),
333
+ numTurns: unknownCount("numTurns"),
334
+ },
335
+ byModel: groupBy("model"),
336
+ byAgent: groupBy("agentName"),
337
+ byProvider: groupBy("provider"),
338
+ byCostSource: groupBy("costSource"),
339
+ };
340
+ }
341
+
342
+ /**
343
+ * Daily compounding insights — compressed JSON for Phase 0 evolution.
344
+ *
345
+ * Swarm-wide by design: every section aggregates across ALL agents via direct
346
+ * read-only SQL (no per-agent scoping), so a single call replaces ~25 raw tool
347
+ * roundtrips. Parametric via `days` + the `include*` flags.
348
+ */
349
+ export default async function compoundInsights(args: any, ctx: any) {
350
+ const parsed = argsSchema.safeParse(args || {});
351
+ if (!parsed.success) return { error: "invalid args: " + parsed.error.message };
352
+ const days = parsed.data.days || 3;
353
+ const includeToolUsage = parsed.data.includeToolUsage !== false;
354
+ const includeScheduleHealth = parsed.data.includeScheduleHealth !== false;
355
+ const includeMemoryHealth = parsed.data.includeMemoryHealth !== false;
356
+ const includeScriptCandidates = parsed.data.includeScriptCandidates !== false;
357
+ const includeScriptUsage = parsed.data.includeScriptUsage !== false;
358
+ const includeCostAndTokens = parsed.data.includeCostAndTokens !== false;
359
+ const includeByAgent = parsed.data.includeByAgent !== false;
360
+ const publishPage = parsed.data.publishPage !== false;
361
+
362
+ // `days` is a validated positive int, so it is safe to interpolate into the
363
+ // SQLite datetime modifier. EXCLUDED_FAIL is a fixed constant list.
364
+ const w = `datetime('now','-${days} days')`;
365
+ const exclList = EXCLUDED_FAIL.map((r) => `'${r}'`).join(",");
366
+ // A "real" failure = status failed AND not one of the bookkeeping reasons.
367
+ const realFail = `t.status='failed' AND (t.failureReason IS NULL OR t.failureReason NOT IN (${exclList}))`;
368
+
369
+ const insights: any = { days, generatedAt: new Date().toISOString() };
370
+
371
+ // Task summary (all agents, direct SQL).
372
+ const statusRows = rowsToObjects(
373
+ await ctx.swarm.db_query({
374
+ sql: `SELECT status, count(*) as cnt FROM agent_tasks t WHERE t.createdAt > ${w} GROUP BY status`,
375
+ }),
376
+ );
377
+ const statusCounts: Record<string, number> = {};
378
+ let total = 0;
379
+ for (const r of statusRows) {
380
+ statusCounts[r.status] = r.cnt;
381
+ total += r.cnt;
382
+ }
383
+ const completed = statusCounts.completed ?? 0;
384
+ const failed = statusCounts.failed ?? 0;
385
+ insights.taskSummary = {
386
+ total,
387
+ completed,
388
+ failed,
389
+ completionRate: total > 0 ? Math.round((completed / total) * 1000) / 10 : 0,
390
+ failureRate: total > 0 ? Math.round((failed / total) * 1000) / 10 : 0,
391
+ statusCounts,
392
+ };
393
+
394
+ // Failure clusters (real failures only, normalized to a 60-char lowercased prefix).
395
+ insights.failureClusters = rowsToObjects(
396
+ await ctx.swarm.db_query({
397
+ sql: `SELECT substr(lower(t.failureReason),1,60) as reason, count(*) as count
398
+ FROM agent_tasks t
399
+ WHERE ${realFail} AND t.failureReason IS NOT NULL AND t.createdAt > ${w}
400
+ GROUP BY reason ORDER BY count DESC LIMIT 10`,
401
+ }),
402
+ );
403
+
404
+ // Schedule health (>= 2 runs, > 20% real-failure rate).
405
+ if (includeScheduleHealth) {
406
+ const sh = rowsToObjects(
407
+ await ctx.swarm.db_query({
408
+ sql: `SELECT s.name as name, s.id as id, count(t.id) as runs,
409
+ sum(case when ${realFail} then 1 else 0 end) as failed
410
+ FROM scheduled_tasks s
411
+ JOIN agent_tasks t ON t.scheduleId = s.id
412
+ WHERE t.createdAt > ${w} AND t.status != 'cancelled'
413
+ GROUP BY s.id, s.name HAVING runs >= 2`,
414
+ }),
415
+ );
416
+ insights.scheduleHealth = sh
417
+ .map((r: any) => ({
418
+ name: r.name,
419
+ id: r.id,
420
+ runs: r.runs,
421
+ failureRate: r.runs > 0 ? Math.round((r.failed / r.runs) * 100) : 0,
422
+ }))
423
+ .filter((r: any) => r.failureRate > 20)
424
+ .sort((a: any, b: any) => b.failureRate - a.failureRate);
425
+ }
426
+
427
+ // Tool usage (top 25). Tool names live inside the `content` JSON of
428
+ // session_logs (no dedicated column), so extract the name SQL-side: the
429
+ // `'%"type":"tool_use"%'` filter excludes tool_result rows (which only carry
430
+ // `tool_use_id`), and instr/substr pull the first tool name per log line.
431
+ // Approximate: a log line with parallel tool_use blocks counts only its first.
432
+ if (includeToolUsage) {
433
+ insights.toolUsage = rowsToObjects(
434
+ await ctx.swarm.db_query({
435
+ sql: `WITH tu AS (
436
+ SELECT substr(content, instr(content,'"type":"tool_use"')) AS tail
437
+ FROM session_logs
438
+ WHERE content LIKE '%"type":"tool_use"%' AND createdAt > ${w}
439
+ ),
440
+ nm AS (
441
+ SELECT substr(tail, instr(tail,'"name":"')+8) AS rest
442
+ FROM tu WHERE instr(tail,'"name":"') > 0
443
+ )
444
+ SELECT substr(rest,1,instr(rest,'"')-1) AS tool, count(*) AS calls
445
+ FROM nm GROUP BY tool ORDER BY calls DESC LIMIT 25`,
446
+ }),
447
+ ).map((r: any) => ({ tool: r.tool, calls: r.calls }));
448
+ }
449
+
450
+ // Memory health (whole store, by scope + source). Pollution markers are
451
+ // SQL-light counts plus JS-side embedding similarity where available; prod
452
+ // SQLite does not expose a scalar cosine_similarity() function.
453
+ if (includeMemoryHealth) {
454
+ const memRows = rowsToObjects(
455
+ await ctx.swarm.db_query({
456
+ sql: `SELECT scope, source, count(*) as cnt,
457
+ sum(case when accessCount = 0 then 1 else 0 end) as zeroAccess,
458
+ sum(case when sourceTaskId IS NOT NULL OR sourcePath IS NOT NULL then 1 else 0 end) as referenced
459
+ FROM agent_memory GROUP BY scope, source`,
460
+ }),
461
+ );
462
+ const totalMem = memRows.reduce((s: number, r: any) => s + (r.cnt ?? 0), 0);
463
+ const bySource: any = {};
464
+ for (const r of memRows) {
465
+ bySource[r.source] ??= {
466
+ total: 0,
467
+ percentOfStore: 0,
468
+ zeroAccess: 0,
469
+ zeroAccessPercent: 0,
470
+ referenced: 0,
471
+ };
472
+ bySource[r.source].total += asNumber(r.cnt);
473
+ bySource[r.source].zeroAccess += asNumber(r.zeroAccess);
474
+ bySource[r.source].referenced += asNumber(r.referenced);
475
+ }
476
+ for (const source of Object.keys(bySource)) {
477
+ bySource[source].percentOfStore = percent(bySource[source].total, totalMem);
478
+ bySource[source].zeroAccessPercent = percent(bySource[source].zeroAccess, bySource[source].total);
479
+ }
480
+
481
+ const autoSnapshotSources = ["session_summary", "task_completion"];
482
+ const autoSnapshotTotal = autoSnapshotSources.reduce(
483
+ (sum, source) => sum + (bySource[source]?.total ?? 0),
484
+ 0,
485
+ );
486
+ const popularButUseless = rowsToObjects(
487
+ await ctx.swarm.db_query({
488
+ sql: `SELECT id, name, source, accessCount, alpha, beta,
489
+ round(alpha / nullif(alpha + beta, 0), 3) as usefulness,
490
+ substr(content, 1, 180) as preview
491
+ FROM agent_memory
492
+ WHERE source IN ('session_summary','task_completion')
493
+ AND accessCount >= 5
494
+ AND alpha <= beta
495
+ ORDER BY accessCount DESC, beta DESC LIMIT 10`,
496
+ }),
497
+ ).map((r: any) => ({
498
+ id: r.id,
499
+ name: r.name,
500
+ source: r.source,
501
+ accessCount: asNumber(r.accessCount),
502
+ usefulness: Number(r.usefulness ?? 0),
503
+ preview: r.preview,
504
+ }));
505
+ const zeroAccessStaleRefRows = rowsToObjects(
506
+ await ctx.swarm.db_query({
507
+ sql: `SELECT source, count(*) as count
508
+ FROM agent_memory
509
+ WHERE accessCount = 0
510
+ AND (sourceTaskId IS NOT NULL OR sourcePath IS NOT NULL)
511
+ AND createdAt < datetime('now','-${days} days')
512
+ GROUP BY source ORDER BY count DESC`,
513
+ }),
514
+ );
515
+
516
+ const similarityRows = rowsToObjects(
517
+ await ctx.swarm.db_query({
518
+ sql: `SELECT id, name, source, accessCount, embedding
519
+ FROM agent_memory
520
+ WHERE source IN ('session_summary','task_completion')
521
+ AND embedding IS NOT NULL
522
+ ORDER BY accessCount DESC LIMIT 30`,
523
+ }),
524
+ );
525
+ let strongestAutoSnapshotPair: any = null;
526
+ const vectors = similarityRows
527
+ .map((r: any) => ({ ...r, vector: decodeFloat32Blob(r.embedding) }))
528
+ .filter((r: any) => r.vector);
529
+ for (let i = 0; i < vectors.length; i++) {
530
+ for (let j = i + 1; j < vectors.length; j++) {
531
+ const similarity = cosineSimilarity(vectors[i].vector, vectors[j].vector);
532
+ if (!strongestAutoSnapshotPair || similarity > strongestAutoSnapshotPair.similarity) {
533
+ strongestAutoSnapshotPair = {
534
+ similarity: round1(similarity * 100) / 100,
535
+ a: { id: vectors[i].id, name: vectors[i].name, source: vectors[i].source },
536
+ b: { id: vectors[j].id, name: vectors[j].name, source: vectors[j].source },
537
+ };
538
+ }
539
+ }
540
+ }
541
+
542
+ insights.memoryHealth = {
543
+ total: totalMem,
544
+ byScope: memRows.reduce((m: any, r: any) => {
545
+ m[r.scope] = (m[r.scope] ?? 0) + r.cnt;
546
+ return m;
547
+ }, {}),
548
+ bySource,
549
+ pollution: {
550
+ autoSnapshotSources,
551
+ autoSnapshotTotal,
552
+ autoSnapshotPercent: percent(autoSnapshotTotal, totalMem),
553
+ popularButUselessAutoSnapshots: popularButUseless,
554
+ zeroAccessStaleRefs: {
555
+ total: zeroAccessStaleRefRows.reduce((sum: number, r: any) => sum + asNumber(r.count), 0),
556
+ bySource: zeroAccessStaleRefRows.reduce((m: any, r: any) => {
557
+ m[r.source] = asNumber(r.count);
558
+ return m;
559
+ }, {}),
560
+ },
561
+ similarityCheck: {
562
+ sqliteCosineSimilarityAvailable: false,
563
+ path: "js",
564
+ sampledAutoSnapshots: vectors.length,
565
+ strongestAutoSnapshotPair,
566
+ },
567
+ },
568
+ };
569
+ }
570
+
571
+ // Evolution/self-scripting candidates: high-frequency consecutive tool
572
+ // triplets are good prompts for a future seed script.
573
+ if (includeScriptCandidates) {
574
+ const rows = rowsToObjects(
575
+ await ctx.swarm.db_query({
576
+ sql: `WITH raw AS (
577
+ SELECT sessionId, iteration, lineNumber, content,
578
+ json_extract(content, '$.tool_name') as jsonToolName
579
+ FROM session_logs
580
+ WHERE createdAt > ${w}
581
+ AND (content LIKE '%"type":"tool_use"%' OR json_extract(content, '$.tool_name') IS NOT NULL)
582
+ )
583
+ SELECT sessionId, iteration, lineNumber, jsonToolName, content
584
+ FROM raw ORDER BY sessionId, iteration, lineNumber LIMIT 100`,
585
+ }),
586
+ );
587
+ const bySession = new Map<string, string[]>();
588
+ for (const row of rows) {
589
+ const tool = row.jsonToolName || extractToolName(String(row.content ?? ""));
590
+ if (!tool) continue;
591
+ const key = String(row.sessionId ?? "unknown");
592
+ const tools = bySession.get(key) ?? [];
593
+ tools.push(tool);
594
+ bySession.set(key, tools);
595
+ }
596
+ const counts = new Map<string, { tools: string[]; count: number }>();
597
+ for (const tools of bySession.values()) {
598
+ for (let i = 0; i <= tools.length - 3; i++) {
599
+ const triplet = tools.slice(i, i + 3);
600
+ const key = triplet.join(" -> ");
601
+ const current = counts.get(key) ?? { tools: triplet, count: 0 };
602
+ current.count += 1;
603
+ counts.set(key, current);
604
+ }
605
+ }
606
+ insights.scriptCandidates = [...counts.values()]
607
+ .sort((a, b) => b.count - a.count)
608
+ .slice(0, 10)
609
+ .map((r) => ({
610
+ tools: r.tools,
611
+ count: r.count,
612
+ suggestedName: r.tools.map(toolSlug).filter(Boolean).slice(0, 3).join("-").slice(0, 80),
613
+ }));
614
+ }
615
+
616
+ // Actual script usage. Authoritative run counts come from `script_runs`;
617
+ // session_logs tool_use rows are a separate MCP-call signal for reconciliation
618
+ // and are intentionally not added to run totals.
619
+ if (includeScriptUsage) {
620
+ const runRows = rowsToObjects(
621
+ await ctx.swarm.db_query({
622
+ sql: `WITH journal_durations AS (
623
+ SELECT runId, sum(durationMs) AS journalDurationMs
624
+ FROM script_run_journal
625
+ WHERE durationMs IS NOT NULL
626
+ GROUP BY runId
627
+ )
628
+ SELECT sr.scriptName, sr.kind, sr.status, sr.startedAt, sr.finishedAt,
629
+ COALESCE(
630
+ jd.journalDurationMs,
631
+ CASE
632
+ WHEN sr.finishedAt IS NOT NULL
633
+ THEN CAST((julianday(sr.finishedAt) - julianday(sr.startedAt)) * 86400000 AS INTEGER)
634
+ ELSE NULL
635
+ END
636
+ ) AS durationMs
637
+ FROM script_runs sr
638
+ LEFT JOIN journal_durations jd ON jd.runId = sr.id
639
+ WHERE sr.startedAt > ${w}
640
+ ORDER BY sr.startedAt DESC`,
641
+ }),
642
+ );
643
+ const creationRows = rowsToObjects(
644
+ await ctx.swarm.db_query({
645
+ sql: `SELECT scope, isScratch, count(*) AS count
646
+ FROM scripts
647
+ WHERE createdAt > ${w}
648
+ GROUP BY scope, isScratch`,
649
+ }),
650
+ );
651
+ const editRows = rowsToObjects(
652
+ await ctx.swarm.db_query({
653
+ sql: `SELECT s.scope, count(*) AS count
654
+ FROM script_versions sv
655
+ JOIN scripts s ON s.id = sv.scriptId
656
+ WHERE sv.changedAt > ${w} AND sv.version > 1
657
+ GROUP BY s.scope`,
658
+ }),
659
+ );
660
+ const scriptToolRows = rowsToObjects(
661
+ await ctx.swarm.db_query({
662
+ sql: `WITH tu AS (
663
+ SELECT substr(content, instr(content,'"type":"tool_use"')) AS tail,
664
+ json_extract(content, '$.tool_name') as jsonToolName
665
+ FROM session_logs
666
+ WHERE createdAt > ${w}
667
+ AND (content LIKE '%script-run%'
668
+ OR content LIKE '%launch-script-run%'
669
+ OR content LIKE '%get-script-run%'
670
+ OR content LIKE '%list-script-runs%')
671
+ ),
672
+ nm AS (
673
+ SELECT COALESCE(
674
+ jsonToolName,
675
+ CASE
676
+ WHEN instr(tail,'"name":"') > 0
677
+ THEN substr(substr(tail, instr(tail,'"name":"')+8), 1, instr(substr(tail, instr(tail,'"name":"')+8), '"')-1)
678
+ ELSE NULL
679
+ END
680
+ ) AS tool
681
+ FROM tu
682
+ )
683
+ SELECT tool, count(*) AS calls
684
+ FROM nm
685
+ WHERE tool IS NOT NULL AND tool LIKE '%script%'
686
+ GROUP BY tool
687
+ ORDER BY calls DESC`,
688
+ }),
689
+ );
690
+ insights.scriptUsage = summarizeScriptUsage(runRows, creationRows, editRows, scriptToolRows);
691
+ }
692
+
693
+ // Cost and token accounting. `costSource='unpriced'` rows are excluded from
694
+ // the headline per-task average, and null taskId rows are reported separately.
695
+ if (includeCostAndTokens) {
696
+ const costRows = rowsToObjects(
697
+ await ctx.swarm.db_query({
698
+ sql: `SELECT sc.taskId, sc.agentId, COALESCE(a.name, sc.agentId, 'unknown') AS agentName,
699
+ COALESCE(a.harness_provider, a.provider, 'unknown') AS provider,
700
+ sc.totalCostUsd, sc.inputTokens, sc.outputTokens, sc.cacheReadTokens,
701
+ sc.cacheWriteTokens, sc.reasoningOutputTokens, sc.thinkingTokens,
702
+ sc.numTurns, sc.model, sc.costSource
703
+ FROM session_costs sc
704
+ LEFT JOIN agents a ON a.id = sc.agentId
705
+ WHERE sc.createdAt > ${w}`,
706
+ }),
707
+ );
708
+ insights.costAndTokens = summarizeCostAndTokens(costRows);
709
+ }
710
+
711
+ // Per-agent breakdown — covers every agent that ran a task in the window.
712
+ if (includeByAgent) {
713
+ insights.byAgent = rowsToObjects(
714
+ await ctx.swarm.db_query({
715
+ sql: `SELECT a.name as agent, count(*) as total,
716
+ sum(case when t.status='completed' then 1 else 0 end) as completed,
717
+ sum(case when ${realFail} then 1 else 0 end) as failed
718
+ FROM agent_tasks t LEFT JOIN agents a ON a.id = t.agentId
719
+ WHERE t.createdAt > ${w} AND t.agentId IS NOT NULL
720
+ GROUP BY t.agentId, a.name ORDER BY total DESC LIMIT 30`,
721
+ }),
722
+ ).map((r: any) => ({
723
+ agent: r.agent,
724
+ total: r.total,
725
+ completed: r.completed,
726
+ failed: r.failed,
727
+ }));
728
+ }
729
+
730
+ if (publishPage) {
731
+ const failureFindings = (insights.failureClusters || []).map((cluster: any) => ({
732
+ id: `failure.${String(cluster.reason || "unknown").slice(0, 48)}`,
733
+ severity: cluster.count >= 5 ? "high" : cluster.count >= 2 ? "medium" : "low",
734
+ summary: `${cluster.count} real failure(s): ${cluster.reason}`,
735
+ action: "Review the repeated failure mode and decide whether to fix, retry, or add a temporary watch item.",
736
+ samples: [cluster],
737
+ }));
738
+ const scheduleFindings = (insights.scheduleHealth || []).map((schedule: any) => ({
739
+ id: `schedule.${schedule.id}`,
740
+ severity: schedule.failureRate >= 50 ? "high" : "medium",
741
+ summary: `${schedule.name} has ${schedule.failureRate}% real-failure rate.`,
742
+ action: "Inspect recent schedule tasks and repair, retarget, or disable the schedule.",
743
+ samples: [schedule],
744
+ }));
745
+ const memoryPollution = insights.memoryHealth?.pollution;
746
+ const memoryFindings = memoryPollution?.autoSnapshotPercent
747
+ ? [
748
+ {
749
+ id: "memory.auto-snapshot-share",
750
+ severity: memoryPollution.autoSnapshotPercent >= 40 ? "high" : "medium",
751
+ summary: `Automatic snapshots are ${memoryPollution.autoSnapshotPercent}% of memory.`,
752
+ action: "Review memory gates and prune low-use automatic snapshots before adding more.",
753
+ samples: [memoryPollution],
754
+ },
755
+ ]
756
+ : [];
757
+ const scriptFindings = (insights.scriptCandidates || []).map((candidate: any) => ({
758
+ id: `script-candidate.${candidate.suggestedName || "unnamed"}`,
759
+ severity: candidate.count >= 3 ? "medium" : "low",
760
+ summary: `${candidate.count} repeated tool triplet(s): ${candidate.tools.join(" -> ")}`,
761
+ action: "Consider turning this repeated workflow into a reusable seeded script.",
762
+ samples: [candidate],
763
+ }));
764
+ const scriptUsageFindings = insights.scriptUsage
765
+ ? [
766
+ {
767
+ id: "script-usage.actual-runs",
768
+ severity: "low",
769
+ summary: `${insights.scriptUsage.runs.total} actual script run(s): ${insights.scriptUsage.runs.inline} one-off, ${insights.scriptUsage.runs.workflow} recurring/workflow.`,
770
+ action: "Use script_runs as the authoritative run count; use session_logs only as an MCP-call reconciliation signal.",
771
+ samples: [insights.scriptUsage],
772
+ },
773
+ ]
774
+ : [];
775
+ const costFindings = insights.costAndTokens
776
+ ? [
777
+ {
778
+ id: "cost-and-tokens.headline",
779
+ severity:
780
+ insights.costAndTokens.unpricedRows > 0 || insights.costAndTokens.nonTaskSessionRows > 0
781
+ ? "medium"
782
+ : "low",
783
+ summary: `$${insights.costAndTokens.totalSpendUsd} total session spend; avg task cost $${insights.costAndTokens.avgCostPerTaskUsd ?? "n/a"} over trusted task rows.`,
784
+ action: "Keep unpriced and null-task session spend separate from the headline per-task average.",
785
+ samples: [insights.costAndTokens],
786
+ },
787
+ ]
788
+ : [];
789
+
790
+ insights.page = await publishCatalogReportPage(
791
+ {
792
+ title: "Compound Insights Audit",
793
+ slug: "compound-insights",
794
+ description: "Swarm-wide daily ops snapshot for compounding and reliability review.",
795
+ generatedAt: insights.generatedAt,
796
+ lede: `Swarm-wide ${days}-day snapshot: ${insights.taskSummary.total} task(s), ${insights.taskSummary.completionRate}% completion rate, ${insights.taskSummary.failureRate}% failure rate.`,
797
+ metrics: [
798
+ ["Tasks", insights.taskSummary.total],
799
+ ["Completed", insights.taskSummary.completed],
800
+ ["Failed", insights.taskSummary.failed],
801
+ ["Failure clusters", insights.failureClusters?.length || 0],
802
+ ["Script runs", insights.scriptUsage?.runs?.total ?? 0],
803
+ ["Total spend", insights.costAndTokens?.totalSpendUsd ?? 0],
804
+ ],
805
+ sections: [
806
+ {
807
+ key: "failures",
808
+ goal: "Expose repeated real failure modes without counting bookkeeping noise.",
809
+ findingCount: failureFindings.length,
810
+ checks: insights.taskSummary,
811
+ findings: failureFindings,
812
+ },
813
+ {
814
+ key: "schedules",
815
+ goal: "Keep schedule failures visible before daily work compounds stale assumptions.",
816
+ findingCount: scheduleFindings.length,
817
+ checks: { unhealthySchedules: scheduleFindings.length },
818
+ findings: scheduleFindings,
819
+ },
820
+ {
821
+ key: "memory",
822
+ goal: "Detect memory bloat and low-use automatic snapshots.",
823
+ findingCount: memoryFindings.length,
824
+ checks: insights.memoryHealth
825
+ ? {
826
+ total: insights.memoryHealth.total,
827
+ autoSnapshotPercent: memoryPollution?.autoSnapshotPercent ?? 0,
828
+ sampledAutoSnapshots:
829
+ memoryPollution?.similarityCheck?.sampledAutoSnapshots ?? 0,
830
+ }
831
+ : {},
832
+ findings: memoryFindings,
833
+ },
834
+ {
835
+ key: "script-candidates",
836
+ goal: "Find repeated tool chains worth compressing into reusable scripts.",
837
+ findingCount: scriptFindings.length,
838
+ checks: { candidates: scriptFindings.length },
839
+ findings: scriptFindings,
840
+ },
841
+ {
842
+ key: "script-usage",
843
+ goal: "Track actual one-off and recurring script execution without double-counting MCP tool-use logs.",
844
+ findingCount: scriptUsageFindings.length,
845
+ checks: insights.scriptUsage ?? {},
846
+ findings: scriptUsageFindings,
847
+ },
848
+ {
849
+ key: "cost-and-tokens",
850
+ goal: "Track per-task cost and token consumption while separating unpriced and non-task sessions.",
851
+ findingCount: costFindings.length,
852
+ checks: insights.costAndTokens ?? {},
853
+ findings: costFindings,
854
+ },
855
+ ],
856
+ appendix: insights,
857
+ },
858
+ ctx,
859
+ );
860
+ }
861
+
862
+ return insights;
863
+ }