@desplega.ai/agent-swarm 1.92.0 → 1.92.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/openapi.json +276 -3
- package/package.json +6 -6
- package/plugin/skills/pages/SKILL.md +5 -2
- package/src/be/db.ts +416 -20
- package/src/be/memory/boot-reembed.ts +85 -0
- package/src/be/memory/constants.ts +44 -2
- package/src/be/memory/providers/openai-embedding.ts +15 -5
- package/src/be/memory/providers/sqlite-store.ts +325 -76
- package/src/be/memory/reranker.ts +35 -17
- package/src/be/memory/types.ts +43 -0
- package/src/be/migrations/084_script_run_journal_duration.sql +5 -0
- package/src/be/migrations/085_script_runs_kind.sql +9 -0
- package/src/be/migrations/086_pages_default_authed.sql +64 -0
- package/src/be/migrations/087_skill_files.sql +19 -0
- package/src/be/modelsdev-cache.json +5622 -2543
- package/src/be/seed-scripts/catalog/boot-triage.ts +221 -0
- package/src/be/seed-scripts/catalog/catalog-report.ts +457 -0
- package/src/be/seed-scripts/catalog/compound-insights.ts +465 -0
- package/src/be/seed-scripts/catalog/gh-pr-snapshot.ts +1 -1
- package/src/be/seed-scripts/catalog/memory-eval.ts +1059 -0
- package/src/be/seed-scripts/catalog/ops-catalog-audit.ts +34 -439
- package/src/be/seed-scripts/catalog/schedule-health.ts +78 -2
- package/src/be/seed-scripts/catalog/task-failure-audit.ts +48 -1
- package/src/be/seed-scripts/index.ts +32 -4
- package/src/be/seed-skills/index.ts +0 -7
- package/src/be/skill-sync.ts +91 -7
- package/src/commands/runner.ts +6 -2
- package/src/heartbeat/templates.ts +20 -16
- package/src/http/index.ts +50 -7
- package/src/http/mcp-user.ts +23 -0
- package/src/http/mcp.ts +58 -0
- package/src/http/memory.ts +62 -0
- package/src/http/pages.ts +1 -1
- package/src/http/script-runs.ts +2 -0
- package/src/http/scripts.ts +39 -2
- package/src/http/skills.ts +225 -0
- package/src/providers/claude-adapter.ts +56 -24
- package/src/script-workflows/workflow-ctx.ts +7 -3
- package/src/scripts-runtime/sdk-allowlist.ts +1 -0
- package/src/scripts-runtime/swarm-sdk.ts +13 -0
- package/src/scripts-runtime/types/stdlib.d.ts +1 -0
- package/src/scripts-runtime/types/swarm-sdk.d.ts +1 -0
- package/src/server.ts +2 -0
- package/src/tasks/worker-follow-up.ts +12 -0
- package/src/tests/claude-adapter-binary.test.ts +135 -81
- package/src/tests/create-page-tool.test.ts +19 -2
- package/src/tests/heartbeat-checklist.test.ts +36 -0
- package/src/tests/mcp-transport-gc.test.ts +58 -0
- package/src/tests/memory-e2e.test.ts +6 -6
- package/src/tests/memory-health-endpoint.test.ts +78 -0
- package/src/tests/memory-rater-e2e.test.ts +4 -5
- package/src/tests/memory-reranker.test.ts +135 -124
- package/src/tests/memory-store.test.ts +221 -1
- package/src/tests/memory.test.ts +13 -12
- package/src/tests/pages-http.test.ts +20 -2
- package/src/tests/pages-storage.test.ts +26 -0
- package/src/tests/scripts-mcp-e2e.test.ts +53 -0
- package/src/tests/seed-scripts.test.ts +328 -3
- package/src/tests/skill-files-http.test.ts +171 -0
- package/src/tests/skill-files.test.ts +162 -0
- package/src/tests/skill-get-file-tool.test.ts +110 -0
- package/src/tests/skill-sync.test.ts +125 -6
- package/src/tests/task-cascade-fail.test.ts +304 -0
- package/src/tools/create-page.ts +2 -2
- package/src/tools/skills/index.ts +1 -0
- package/src/tools/skills/skill-get-file.ts +80 -0
- package/src/tools/tool-config.ts +2 -1
- package/src/types.ts +20 -0
- package/src/utils/internal-ai/complete-structured.ts +2 -2
- package/templates/schedules/daily-blocker-digest/content.md +68 -54
- package/templates/schedules/daily-compounding-reflection/content.md +4 -4
- package/templates/schedules/daily-hn-briefing/content.md +5 -5
- package/templates/schedules/daily-workflow-health-audit/content.md +6 -6
- package/templates/schedules/gtm-weekly-review/content.md +9 -9
- package/templates/schedules/weekly-dependabot-triage/content.md +24 -20
- package/templates/skills/agentmail-sending/content.md +6 -7
- package/templates/skills/desloppify/content.md +8 -9
- package/templates/skills/jira-interaction/content.md +25 -33
- package/templates/skills/kapso-whatsapp/content.md +29 -30
- package/templates/skills/linear-interaction/content.md +8 -9
- package/templates/skills/profile-corruption-escalation/content.md +44 -85
- package/templates/skills/sprite-cli/content.md +4 -5
- package/templates/skills/turso-interaction/content.md +14 -17
- package/templates/skills/workflow-iterate/content.md +38 -391
- package/templates/skills/x-api-interactions/content.md +4 -6
- package/templates/workflows/llm-safe-release-context/config.json +13 -0
- package/templates/workflows/llm-safe-release-context/content.md +69 -0
- package/templates/skills/scheduled-task-resilience/config.json +0 -14
- package/templates/skills/scheduled-task-resilience/content.md +0 -95
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
+
import { publishCatalogReportPage } from "./catalog-report";
|
|
2
3
|
|
|
3
4
|
export const argsSchema = z.object({
|
|
4
5
|
days: z
|
|
@@ -17,10 +18,19 @@ export const argsSchema = z.object({
|
|
|
17
18
|
.boolean()
|
|
18
19
|
.optional()
|
|
19
20
|
.describe("Include high-frequency tool-triplet candidates for future seed scripts (default true)"),
|
|
21
|
+
includeScriptUsage: z
|
|
22
|
+
.boolean()
|
|
23
|
+
.optional()
|
|
24
|
+
.describe("Include actual script run, creation, and edit metrics (default true)"),
|
|
25
|
+
includeCostAndTokens: z
|
|
26
|
+
.boolean()
|
|
27
|
+
.optional()
|
|
28
|
+
.describe("Include session cost and token metrics with honesty rails (default true)"),
|
|
20
29
|
includeByAgent: z
|
|
21
30
|
.boolean()
|
|
22
31
|
.optional()
|
|
23
32
|
.describe("Include per-agent task/completion/failure breakdown (default true)"),
|
|
33
|
+
publishPage: z.boolean().optional().describe("Publish an authed HTML page (default true)"),
|
|
24
34
|
});
|
|
25
35
|
|
|
26
36
|
/**
|
|
@@ -57,6 +67,17 @@ function percent(part: number, total: number): number {
|
|
|
57
67
|
return total > 0 ? round1((part / total) * 100) : 0;
|
|
58
68
|
}
|
|
59
69
|
|
|
70
|
+
function round4(value: number): number {
|
|
71
|
+
return Math.round(value * 10000) / 10000;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function percentile(values: number[], p: number): number | null {
|
|
75
|
+
if (values.length === 0) return null;
|
|
76
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
77
|
+
const index = Math.ceil((p / 100) * sorted.length) - 1;
|
|
78
|
+
return sorted[Math.max(0, Math.min(sorted.length - 1, index))] ?? null;
|
|
79
|
+
}
|
|
80
|
+
|
|
60
81
|
function extractToolName(content: string): string | null {
|
|
61
82
|
const match = content.match(/"type"\s*:\s*"tool_use"[\s\S]*?"name"\s*:\s*"([^"]+)"/);
|
|
62
83
|
return match?.[1] ?? null;
|
|
@@ -104,6 +125,220 @@ function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
|
|
104
125
|
return dot / (Math.sqrt(na) * Math.sqrt(nb));
|
|
105
126
|
}
|
|
106
127
|
|
|
128
|
+
function summarizeScriptUsage(rows: any[], creationRows: any[], editRows: any[], toolRows: any[]) {
|
|
129
|
+
const terminalStatuses = new Set(["completed", "failed", "cancelled", "aborted_limit"]);
|
|
130
|
+
const failureStatuses = new Set(["failed", "cancelled", "aborted_limit"]);
|
|
131
|
+
const durations = rows
|
|
132
|
+
.map((r) => asNumber(r.durationMs))
|
|
133
|
+
.filter((duration) => duration > 0);
|
|
134
|
+
const byScript = new Map<
|
|
135
|
+
string,
|
|
136
|
+
{
|
|
137
|
+
scriptName: string;
|
|
138
|
+
runs: number;
|
|
139
|
+
completed: number;
|
|
140
|
+
failed: number;
|
|
141
|
+
successRate: number;
|
|
142
|
+
durationP50Ms: number | null;
|
|
143
|
+
durationP95Ms: number | null;
|
|
144
|
+
inline: number;
|
|
145
|
+
workflow: number;
|
|
146
|
+
durations: number[];
|
|
147
|
+
}
|
|
148
|
+
>();
|
|
149
|
+
|
|
150
|
+
for (const row of rows) {
|
|
151
|
+
const name = String(row.scriptName || "(inline source)");
|
|
152
|
+
const current =
|
|
153
|
+
byScript.get(name) ??
|
|
154
|
+
{
|
|
155
|
+
scriptName: name,
|
|
156
|
+
runs: 0,
|
|
157
|
+
completed: 0,
|
|
158
|
+
failed: 0,
|
|
159
|
+
successRate: 0,
|
|
160
|
+
durationP50Ms: null,
|
|
161
|
+
durationP95Ms: null,
|
|
162
|
+
inline: 0,
|
|
163
|
+
workflow: 0,
|
|
164
|
+
durations: [],
|
|
165
|
+
};
|
|
166
|
+
current.runs += 1;
|
|
167
|
+
if (row.kind === "inline") current.inline += 1;
|
|
168
|
+
if (row.kind === "workflow") current.workflow += 1;
|
|
169
|
+
if (row.status === "completed") current.completed += 1;
|
|
170
|
+
if (failureStatuses.has(String(row.status))) current.failed += 1;
|
|
171
|
+
const duration = asNumber(row.durationMs);
|
|
172
|
+
if (duration > 0) current.durations.push(duration);
|
|
173
|
+
byScript.set(name, current);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const perScript = [...byScript.values()]
|
|
177
|
+
.map((script) => ({
|
|
178
|
+
scriptName: script.scriptName,
|
|
179
|
+
runs: script.runs,
|
|
180
|
+
completed: script.completed,
|
|
181
|
+
failed: script.failed,
|
|
182
|
+
successRate: percent(script.completed, script.runs),
|
|
183
|
+
durationP50Ms: percentile(script.durations, 50),
|
|
184
|
+
durationP95Ms: percentile(script.durations, 95),
|
|
185
|
+
inline: script.inline,
|
|
186
|
+
workflow: script.workflow,
|
|
187
|
+
}))
|
|
188
|
+
.sort((a, b) => b.runs - a.runs)
|
|
189
|
+
.slice(0, 20);
|
|
190
|
+
|
|
191
|
+
const creationsByScope: Record<string, number> = {};
|
|
192
|
+
let creations = 0;
|
|
193
|
+
let scratchCreations = 0;
|
|
194
|
+
for (const row of creationRows) {
|
|
195
|
+
const count = asNumber(row.count);
|
|
196
|
+
if (asNumber(row.isScratch) === 1) {
|
|
197
|
+
scratchCreations += count;
|
|
198
|
+
} else {
|
|
199
|
+
creations += count;
|
|
200
|
+
creationsByScope[String(row.scope || "unknown")] =
|
|
201
|
+
(creationsByScope[String(row.scope || "unknown")] ?? 0) + count;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const editsByScope: Record<string, number> = {};
|
|
206
|
+
let edits = 0;
|
|
207
|
+
for (const row of editRows) {
|
|
208
|
+
const count = asNumber(row.count);
|
|
209
|
+
edits += count;
|
|
210
|
+
editsByScope[String(row.scope || "unknown")] =
|
|
211
|
+
(editsByScope[String(row.scope || "unknown")] ?? 0) + count;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
return {
|
|
215
|
+
source: {
|
|
216
|
+
authoritativeRuns: "script_runs",
|
|
217
|
+
mcpCallSignal: "session_logs tool_use for script tools",
|
|
218
|
+
reconciliation:
|
|
219
|
+
"`script-run` via MCP calls /api/scripts/run, which records kind='inline' rows in script_runs; launch-script-run/workflows record kind='workflow'. session_logs counts agent tool calls and must not be added to script_runs totals.",
|
|
220
|
+
},
|
|
221
|
+
runs: {
|
|
222
|
+
total: rows.length,
|
|
223
|
+
inline: rows.filter((r) => r.kind === "inline").length,
|
|
224
|
+
workflow: rows.filter((r) => r.kind === "workflow").length,
|
|
225
|
+
completed: rows.filter((r) => r.status === "completed").length,
|
|
226
|
+
failed: rows.filter((r) => failureStatuses.has(String(r.status))).length,
|
|
227
|
+
runningOrPaused: rows.filter((r) => !terminalStatuses.has(String(r.status))).length,
|
|
228
|
+
successRate: percent(
|
|
229
|
+
rows.filter((r) => r.status === "completed").length,
|
|
230
|
+
rows.length,
|
|
231
|
+
),
|
|
232
|
+
durationP50Ms: percentile(durations, 50),
|
|
233
|
+
durationP95Ms: percentile(durations, 95),
|
|
234
|
+
perScript,
|
|
235
|
+
},
|
|
236
|
+
creations: {
|
|
237
|
+
totalNonScratch: creations,
|
|
238
|
+
scratch: scratchCreations,
|
|
239
|
+
byScope: creationsByScope,
|
|
240
|
+
},
|
|
241
|
+
edits: {
|
|
242
|
+
total: edits,
|
|
243
|
+
byScope: editsByScope,
|
|
244
|
+
},
|
|
245
|
+
mcpToolCalls: toolRows.map((r) => ({ tool: r.tool, calls: asNumber(r.calls) })),
|
|
246
|
+
};
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function summarizeCostAndTokens(rows: any[]) {
|
|
250
|
+
const trustedSources = new Set(["harness", "pricing-table"]);
|
|
251
|
+
const trustedRows = rows.filter((r) => trustedSources.has(String(r.costSource)));
|
|
252
|
+
const unpricedRows = rows.filter((r) => String(r.costSource) === "unpriced");
|
|
253
|
+
const trustedTaskRows = trustedRows.filter((r) => r.taskId);
|
|
254
|
+
const trustedTaskIds = new Set(trustedTaskRows.map((r) => String(r.taskId)));
|
|
255
|
+
const trustedTaskSpend = trustedTaskRows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0);
|
|
256
|
+
const nonTaskRows = rows.filter((r) => !r.taskId);
|
|
257
|
+
const totalSpend = rows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0);
|
|
258
|
+
const trustedSpend = trustedRows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0);
|
|
259
|
+
|
|
260
|
+
const sumToken = (field: string) =>
|
|
261
|
+
rows.reduce((sum, r) => (r[field] === null || r[field] === undefined ? sum : sum + asNumber(r[field])), 0);
|
|
262
|
+
const unknownCount = (field: string) =>
|
|
263
|
+
rows.filter((r) => r[field] === null || r[field] === undefined).length;
|
|
264
|
+
|
|
265
|
+
const groupBy = (field: string) => {
|
|
266
|
+
const grouped = new Map<
|
|
267
|
+
string,
|
|
268
|
+
{
|
|
269
|
+
key: string;
|
|
270
|
+
rows: number;
|
|
271
|
+
spendUsd: number;
|
|
272
|
+
trustedSpendUsd: number;
|
|
273
|
+
unpricedRows: number;
|
|
274
|
+
}
|
|
275
|
+
>();
|
|
276
|
+
for (const row of rows) {
|
|
277
|
+
const key = String(row[field] || "unknown");
|
|
278
|
+
const current =
|
|
279
|
+
grouped.get(key) ?? {
|
|
280
|
+
key,
|
|
281
|
+
rows: 0,
|
|
282
|
+
spendUsd: 0,
|
|
283
|
+
trustedSpendUsd: 0,
|
|
284
|
+
unpricedRows: 0,
|
|
285
|
+
};
|
|
286
|
+
current.rows += 1;
|
|
287
|
+
current.spendUsd += asNumber(row.totalCostUsd);
|
|
288
|
+
if (trustedSources.has(String(row.costSource))) current.trustedSpendUsd += asNumber(row.totalCostUsd);
|
|
289
|
+
if (String(row.costSource) === "unpriced") current.unpricedRows += 1;
|
|
290
|
+
grouped.set(key, current);
|
|
291
|
+
}
|
|
292
|
+
return [...grouped.values()]
|
|
293
|
+
.map((r) => ({
|
|
294
|
+
...r,
|
|
295
|
+
spendUsd: round4(r.spendUsd),
|
|
296
|
+
trustedSpendUsd: round4(r.trustedSpendUsd),
|
|
297
|
+
}))
|
|
298
|
+
.sort((a, b) => b.spendUsd - a.spendUsd);
|
|
299
|
+
};
|
|
300
|
+
|
|
301
|
+
return {
|
|
302
|
+
source: {
|
|
303
|
+
table: "session_costs",
|
|
304
|
+
providerDerivation:
|
|
305
|
+
"provider is derived from agents.harness_provider, then agents.provider, because session_costs does not carry a provider column",
|
|
306
|
+
headlineAvgCostRule:
|
|
307
|
+
"avgCostPerTaskUsd excludes unpriced rows and rows with null taskId; null-task sessions are reported separately",
|
|
308
|
+
},
|
|
309
|
+
rows: rows.length,
|
|
310
|
+
taskCountForHeadlineAvg: trustedTaskIds.size,
|
|
311
|
+
avgCostPerTaskUsd:
|
|
312
|
+
trustedTaskIds.size > 0 ? round4(trustedTaskSpend / trustedTaskIds.size) : null,
|
|
313
|
+
totalSpendUsd: round4(totalSpend),
|
|
314
|
+
trustedSpendUsd: round4(trustedSpend),
|
|
315
|
+
trustedRows: trustedRows.length,
|
|
316
|
+
trustedRowPercent: percent(trustedRows.length, rows.length),
|
|
317
|
+
unpricedRows: unpricedRows.length,
|
|
318
|
+
unpricedSpendUsd: round4(unpricedRows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0)),
|
|
319
|
+
nonTaskSessionRows: nonTaskRows.length,
|
|
320
|
+
nonTaskSessionSpendUsd: round4(
|
|
321
|
+
nonTaskRows.reduce((sum, r) => sum + asNumber(r.totalCostUsd), 0),
|
|
322
|
+
),
|
|
323
|
+
tokenTotals: {
|
|
324
|
+
inputTokens: sumToken("inputTokens"),
|
|
325
|
+
outputTokens: sumToken("outputTokens"),
|
|
326
|
+
cacheReadTokens: sumToken("cacheReadTokens"),
|
|
327
|
+
cacheWriteTokens: sumToken("cacheWriteTokens"),
|
|
328
|
+
reasoningOutputTokens: sumToken("reasoningOutputTokens"),
|
|
329
|
+
thinkingTokens: sumToken("thinkingTokens"),
|
|
330
|
+
},
|
|
331
|
+
unknownCounts: {
|
|
332
|
+
cacheWriteTokens: unknownCount("cacheWriteTokens"),
|
|
333
|
+
numTurns: unknownCount("numTurns"),
|
|
334
|
+
},
|
|
335
|
+
byModel: groupBy("model"),
|
|
336
|
+
byAgent: groupBy("agentName"),
|
|
337
|
+
byProvider: groupBy("provider"),
|
|
338
|
+
byCostSource: groupBy("costSource"),
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
|
|
107
342
|
/**
|
|
108
343
|
* Daily compounding insights — compressed JSON for Phase 0 evolution.
|
|
109
344
|
*
|
|
@@ -119,7 +354,10 @@ export default async function compoundInsights(args: any, ctx: any) {
|
|
|
119
354
|
const includeScheduleHealth = parsed.data.includeScheduleHealth !== false;
|
|
120
355
|
const includeMemoryHealth = parsed.data.includeMemoryHealth !== false;
|
|
121
356
|
const includeScriptCandidates = parsed.data.includeScriptCandidates !== false;
|
|
357
|
+
const includeScriptUsage = parsed.data.includeScriptUsage !== false;
|
|
358
|
+
const includeCostAndTokens = parsed.data.includeCostAndTokens !== false;
|
|
122
359
|
const includeByAgent = parsed.data.includeByAgent !== false;
|
|
360
|
+
const publishPage = parsed.data.publishPage !== false;
|
|
123
361
|
|
|
124
362
|
// `days` is a validated positive int, so it is safe to interpolate into the
|
|
125
363
|
// SQLite datetime modifier. EXCLUDED_FAIL is a fixed constant list.
|
|
@@ -375,6 +613,101 @@ export default async function compoundInsights(args: any, ctx: any) {
|
|
|
375
613
|
}));
|
|
376
614
|
}
|
|
377
615
|
|
|
616
|
+
// Actual script usage. Authoritative run counts come from `script_runs`;
|
|
617
|
+
// session_logs tool_use rows are a separate MCP-call signal for reconciliation
|
|
618
|
+
// and are intentionally not added to run totals.
|
|
619
|
+
if (includeScriptUsage) {
|
|
620
|
+
const runRows = rowsToObjects(
|
|
621
|
+
await ctx.swarm.db_query({
|
|
622
|
+
sql: `WITH journal_durations AS (
|
|
623
|
+
SELECT runId, sum(durationMs) AS journalDurationMs
|
|
624
|
+
FROM script_run_journal
|
|
625
|
+
WHERE durationMs IS NOT NULL
|
|
626
|
+
GROUP BY runId
|
|
627
|
+
)
|
|
628
|
+
SELECT sr.scriptName, sr.kind, sr.status, sr.startedAt, sr.finishedAt,
|
|
629
|
+
COALESCE(
|
|
630
|
+
jd.journalDurationMs,
|
|
631
|
+
CASE
|
|
632
|
+
WHEN sr.finishedAt IS NOT NULL
|
|
633
|
+
THEN CAST((julianday(sr.finishedAt) - julianday(sr.startedAt)) * 86400000 AS INTEGER)
|
|
634
|
+
ELSE NULL
|
|
635
|
+
END
|
|
636
|
+
) AS durationMs
|
|
637
|
+
FROM script_runs sr
|
|
638
|
+
LEFT JOIN journal_durations jd ON jd.runId = sr.id
|
|
639
|
+
WHERE sr.startedAt > ${w}
|
|
640
|
+
ORDER BY sr.startedAt DESC`,
|
|
641
|
+
}),
|
|
642
|
+
);
|
|
643
|
+
const creationRows = rowsToObjects(
|
|
644
|
+
await ctx.swarm.db_query({
|
|
645
|
+
sql: `SELECT scope, isScratch, count(*) AS count
|
|
646
|
+
FROM scripts
|
|
647
|
+
WHERE createdAt > ${w}
|
|
648
|
+
GROUP BY scope, isScratch`,
|
|
649
|
+
}),
|
|
650
|
+
);
|
|
651
|
+
const editRows = rowsToObjects(
|
|
652
|
+
await ctx.swarm.db_query({
|
|
653
|
+
sql: `SELECT s.scope, count(*) AS count
|
|
654
|
+
FROM script_versions sv
|
|
655
|
+
JOIN scripts s ON s.id = sv.scriptId
|
|
656
|
+
WHERE sv.changedAt > ${w} AND sv.version > 1
|
|
657
|
+
GROUP BY s.scope`,
|
|
658
|
+
}),
|
|
659
|
+
);
|
|
660
|
+
const scriptToolRows = rowsToObjects(
|
|
661
|
+
await ctx.swarm.db_query({
|
|
662
|
+
sql: `WITH tu AS (
|
|
663
|
+
SELECT substr(content, instr(content,'"type":"tool_use"')) AS tail,
|
|
664
|
+
json_extract(content, '$.tool_name') as jsonToolName
|
|
665
|
+
FROM session_logs
|
|
666
|
+
WHERE createdAt > ${w}
|
|
667
|
+
AND (content LIKE '%script-run%'
|
|
668
|
+
OR content LIKE '%launch-script-run%'
|
|
669
|
+
OR content LIKE '%get-script-run%'
|
|
670
|
+
OR content LIKE '%list-script-runs%')
|
|
671
|
+
),
|
|
672
|
+
nm AS (
|
|
673
|
+
SELECT COALESCE(
|
|
674
|
+
jsonToolName,
|
|
675
|
+
CASE
|
|
676
|
+
WHEN instr(tail,'"name":"') > 0
|
|
677
|
+
THEN substr(substr(tail, instr(tail,'"name":"')+8), 1, instr(substr(tail, instr(tail,'"name":"')+8), '"')-1)
|
|
678
|
+
ELSE NULL
|
|
679
|
+
END
|
|
680
|
+
) AS tool
|
|
681
|
+
FROM tu
|
|
682
|
+
)
|
|
683
|
+
SELECT tool, count(*) AS calls
|
|
684
|
+
FROM nm
|
|
685
|
+
WHERE tool IS NOT NULL AND tool LIKE '%script%'
|
|
686
|
+
GROUP BY tool
|
|
687
|
+
ORDER BY calls DESC`,
|
|
688
|
+
}),
|
|
689
|
+
);
|
|
690
|
+
insights.scriptUsage = summarizeScriptUsage(runRows, creationRows, editRows, scriptToolRows);
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
// Cost and token accounting. `costSource='unpriced'` rows are excluded from
|
|
694
|
+
// the headline per-task average, and null taskId rows are reported separately.
|
|
695
|
+
if (includeCostAndTokens) {
|
|
696
|
+
const costRows = rowsToObjects(
|
|
697
|
+
await ctx.swarm.db_query({
|
|
698
|
+
sql: `SELECT sc.taskId, sc.agentId, COALESCE(a.name, sc.agentId, 'unknown') AS agentName,
|
|
699
|
+
COALESCE(a.harness_provider, a.provider, 'unknown') AS provider,
|
|
700
|
+
sc.totalCostUsd, sc.inputTokens, sc.outputTokens, sc.cacheReadTokens,
|
|
701
|
+
sc.cacheWriteTokens, sc.reasoningOutputTokens, sc.thinkingTokens,
|
|
702
|
+
sc.numTurns, sc.model, sc.costSource
|
|
703
|
+
FROM session_costs sc
|
|
704
|
+
LEFT JOIN agents a ON a.id = sc.agentId
|
|
705
|
+
WHERE sc.createdAt > ${w}`,
|
|
706
|
+
}),
|
|
707
|
+
);
|
|
708
|
+
insights.costAndTokens = summarizeCostAndTokens(costRows);
|
|
709
|
+
}
|
|
710
|
+
|
|
378
711
|
// Per-agent breakdown — covers every agent that ran a task in the window.
|
|
379
712
|
if (includeByAgent) {
|
|
380
713
|
insights.byAgent = rowsToObjects(
|
|
@@ -394,5 +727,137 @@ export default async function compoundInsights(args: any, ctx: any) {
|
|
|
394
727
|
}));
|
|
395
728
|
}
|
|
396
729
|
|
|
730
|
+
if (publishPage) {
|
|
731
|
+
const failureFindings = (insights.failureClusters || []).map((cluster: any) => ({
|
|
732
|
+
id: `failure.${String(cluster.reason || "unknown").slice(0, 48)}`,
|
|
733
|
+
severity: cluster.count >= 5 ? "high" : cluster.count >= 2 ? "medium" : "low",
|
|
734
|
+
summary: `${cluster.count} real failure(s): ${cluster.reason}`,
|
|
735
|
+
action: "Review the repeated failure mode and decide whether to fix, retry, or add a temporary watch item.",
|
|
736
|
+
samples: [cluster],
|
|
737
|
+
}));
|
|
738
|
+
const scheduleFindings = (insights.scheduleHealth || []).map((schedule: any) => ({
|
|
739
|
+
id: `schedule.${schedule.id}`,
|
|
740
|
+
severity: schedule.failureRate >= 50 ? "high" : "medium",
|
|
741
|
+
summary: `${schedule.name} has ${schedule.failureRate}% real-failure rate.`,
|
|
742
|
+
action: "Inspect recent schedule tasks and repair, retarget, or disable the schedule.",
|
|
743
|
+
samples: [schedule],
|
|
744
|
+
}));
|
|
745
|
+
const memoryPollution = insights.memoryHealth?.pollution;
|
|
746
|
+
const memoryFindings = memoryPollution?.autoSnapshotPercent
|
|
747
|
+
? [
|
|
748
|
+
{
|
|
749
|
+
id: "memory.auto-snapshot-share",
|
|
750
|
+
severity: memoryPollution.autoSnapshotPercent >= 40 ? "high" : "medium",
|
|
751
|
+
summary: `Automatic snapshots are ${memoryPollution.autoSnapshotPercent}% of memory.`,
|
|
752
|
+
action: "Review memory gates and prune low-use automatic snapshots before adding more.",
|
|
753
|
+
samples: [memoryPollution],
|
|
754
|
+
},
|
|
755
|
+
]
|
|
756
|
+
: [];
|
|
757
|
+
const scriptFindings = (insights.scriptCandidates || []).map((candidate: any) => ({
|
|
758
|
+
id: `script-candidate.${candidate.suggestedName || "unnamed"}`,
|
|
759
|
+
severity: candidate.count >= 3 ? "medium" : "low",
|
|
760
|
+
summary: `${candidate.count} repeated tool triplet(s): ${candidate.tools.join(" -> ")}`,
|
|
761
|
+
action: "Consider turning this repeated workflow into a reusable seeded script.",
|
|
762
|
+
samples: [candidate],
|
|
763
|
+
}));
|
|
764
|
+
const scriptUsageFindings = insights.scriptUsage
|
|
765
|
+
? [
|
|
766
|
+
{
|
|
767
|
+
id: "script-usage.actual-runs",
|
|
768
|
+
severity: "low",
|
|
769
|
+
summary: `${insights.scriptUsage.runs.total} actual script run(s): ${insights.scriptUsage.runs.inline} one-off, ${insights.scriptUsage.runs.workflow} recurring/workflow.`,
|
|
770
|
+
action: "Use script_runs as the authoritative run count; use session_logs only as an MCP-call reconciliation signal.",
|
|
771
|
+
samples: [insights.scriptUsage],
|
|
772
|
+
},
|
|
773
|
+
]
|
|
774
|
+
: [];
|
|
775
|
+
const costFindings = insights.costAndTokens
|
|
776
|
+
? [
|
|
777
|
+
{
|
|
778
|
+
id: "cost-and-tokens.headline",
|
|
779
|
+
severity:
|
|
780
|
+
insights.costAndTokens.unpricedRows > 0 || insights.costAndTokens.nonTaskSessionRows > 0
|
|
781
|
+
? "medium"
|
|
782
|
+
: "low",
|
|
783
|
+
summary: `$${insights.costAndTokens.totalSpendUsd} total session spend; avg task cost $${insights.costAndTokens.avgCostPerTaskUsd ?? "n/a"} over trusted task rows.`,
|
|
784
|
+
action: "Keep unpriced and null-task session spend separate from the headline per-task average.",
|
|
785
|
+
samples: [insights.costAndTokens],
|
|
786
|
+
},
|
|
787
|
+
]
|
|
788
|
+
: [];
|
|
789
|
+
|
|
790
|
+
insights.page = await publishCatalogReportPage(
|
|
791
|
+
{
|
|
792
|
+
title: "Compound Insights Audit",
|
|
793
|
+
slug: "compound-insights",
|
|
794
|
+
description: "Swarm-wide daily ops snapshot for compounding and reliability review.",
|
|
795
|
+
generatedAt: insights.generatedAt,
|
|
796
|
+
lede: `Swarm-wide ${days}-day snapshot: ${insights.taskSummary.total} task(s), ${insights.taskSummary.completionRate}% completion rate, ${insights.taskSummary.failureRate}% failure rate.`,
|
|
797
|
+
metrics: [
|
|
798
|
+
["Tasks", insights.taskSummary.total],
|
|
799
|
+
["Completed", insights.taskSummary.completed],
|
|
800
|
+
["Failed", insights.taskSummary.failed],
|
|
801
|
+
["Failure clusters", insights.failureClusters?.length || 0],
|
|
802
|
+
["Script runs", insights.scriptUsage?.runs?.total ?? 0],
|
|
803
|
+
["Total spend", insights.costAndTokens?.totalSpendUsd ?? 0],
|
|
804
|
+
],
|
|
805
|
+
sections: [
|
|
806
|
+
{
|
|
807
|
+
key: "failures",
|
|
808
|
+
goal: "Expose repeated real failure modes without counting bookkeeping noise.",
|
|
809
|
+
findingCount: failureFindings.length,
|
|
810
|
+
checks: insights.taskSummary,
|
|
811
|
+
findings: failureFindings,
|
|
812
|
+
},
|
|
813
|
+
{
|
|
814
|
+
key: "schedules",
|
|
815
|
+
goal: "Keep schedule failures visible before daily work compounds stale assumptions.",
|
|
816
|
+
findingCount: scheduleFindings.length,
|
|
817
|
+
checks: { unhealthySchedules: scheduleFindings.length },
|
|
818
|
+
findings: scheduleFindings,
|
|
819
|
+
},
|
|
820
|
+
{
|
|
821
|
+
key: "memory",
|
|
822
|
+
goal: "Detect memory bloat and low-use automatic snapshots.",
|
|
823
|
+
findingCount: memoryFindings.length,
|
|
824
|
+
checks: insights.memoryHealth
|
|
825
|
+
? {
|
|
826
|
+
total: insights.memoryHealth.total,
|
|
827
|
+
autoSnapshotPercent: memoryPollution?.autoSnapshotPercent ?? 0,
|
|
828
|
+
sampledAutoSnapshots:
|
|
829
|
+
memoryPollution?.similarityCheck?.sampledAutoSnapshots ?? 0,
|
|
830
|
+
}
|
|
831
|
+
: {},
|
|
832
|
+
findings: memoryFindings,
|
|
833
|
+
},
|
|
834
|
+
{
|
|
835
|
+
key: "script-candidates",
|
|
836
|
+
goal: "Find repeated tool chains worth compressing into reusable scripts.",
|
|
837
|
+
findingCount: scriptFindings.length,
|
|
838
|
+
checks: { candidates: scriptFindings.length },
|
|
839
|
+
findings: scriptFindings,
|
|
840
|
+
},
|
|
841
|
+
{
|
|
842
|
+
key: "script-usage",
|
|
843
|
+
goal: "Track actual one-off and recurring script execution without double-counting MCP tool-use logs.",
|
|
844
|
+
findingCount: scriptUsageFindings.length,
|
|
845
|
+
checks: insights.scriptUsage ?? {},
|
|
846
|
+
findings: scriptUsageFindings,
|
|
847
|
+
},
|
|
848
|
+
{
|
|
849
|
+
key: "cost-and-tokens",
|
|
850
|
+
goal: "Track per-task cost and token consumption while separating unpriced and non-task sessions.",
|
|
851
|
+
findingCount: costFindings.length,
|
|
852
|
+
checks: insights.costAndTokens ?? {},
|
|
853
|
+
findings: costFindings,
|
|
854
|
+
},
|
|
855
|
+
],
|
|
856
|
+
appendix: insights,
|
|
857
|
+
},
|
|
858
|
+
ctx,
|
|
859
|
+
);
|
|
860
|
+
}
|
|
861
|
+
|
|
397
862
|
return insights;
|
|
398
863
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
|
|
3
3
|
export const argsSchema = z.object({
|
|
4
|
-
repo: z.string().describe("Repository in 'owner/name' form, e.g. '
|
|
4
|
+
repo: z.string().describe("Repository in 'owner/name' form, e.g. 'owner/name'"),
|
|
5
5
|
number: z.number().int().positive().describe("Pull request number"),
|
|
6
6
|
token: z
|
|
7
7
|
.string()
|