@desplega.ai/agent-swarm 1.91.0 → 1.92.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +2 -1
  2. package/openapi.json +585 -5
  3. package/package.json +1 -1
  4. package/src/be/db.ts +337 -1
  5. package/src/be/migrations/083_script_workflows.sql +51 -0
  6. package/src/be/modelsdev-cache.json +42352 -38595
  7. package/src/be/scripts/typecheck.ts +49 -0
  8. package/src/be/seed-scripts/catalog/compound-insights.ts +216 -6
  9. package/src/be/seed-scripts/catalog/ops-catalog-audit.ts +911 -0
  10. package/src/be/seed-scripts/catalog/task-context-gathering.ts +92 -0
  11. package/src/be/seed-scripts/catalog/tool-usage.ts +6 -3
  12. package/src/be/seed-scripts/index.ts +20 -2
  13. package/src/be/seed-skills/index.ts +7 -0
  14. package/src/be/swarm-config-guard.ts +17 -0
  15. package/src/commands/runner.ts +43 -2
  16. package/src/http/db-query.ts +20 -5
  17. package/src/http/index.ts +10 -0
  18. package/src/http/script-runs.ts +555 -0
  19. package/src/prompts/session-templates.ts +24 -4
  20. package/src/providers/claude-adapter.ts +60 -13
  21. package/src/script-workflows/executor.ts +110 -0
  22. package/src/script-workflows/harness.ts +73 -0
  23. package/src/script-workflows/label-lint.ts +51 -0
  24. package/src/script-workflows/limits.ts +22 -0
  25. package/src/script-workflows/supervisor.ts +139 -0
  26. package/src/script-workflows/workflow-ctx.ts +205 -0
  27. package/src/scripts-runtime/sdk-allowlist.ts +3 -0
  28. package/src/scripts-runtime/types/stdlib.d.ts +60 -0
  29. package/src/scripts-runtime/types/swarm-sdk.d.ts +60 -0
  30. package/src/server.ts +2 -0
  31. package/src/slack/handlers.ts +11 -4
  32. package/src/slack/message-text.ts +98 -0
  33. package/src/slack/thread-buffer.ts +5 -3
  34. package/src/tests/claude-adapter-binary.test.ts +147 -4
  35. package/src/tests/db-query.test.ts +28 -0
  36. package/src/tests/error-tracker.test.ts +121 -0
  37. package/src/tests/harness-provider-resolution.test.ts +33 -0
  38. package/src/tests/mcp-tools.test.ts +6 -0
  39. package/src/tests/prompt-template-session.test.ts +34 -5
  40. package/src/tests/script-runs-http.test.ts +278 -0
  41. package/src/tests/script-workflows-label-lint.test.ts +43 -0
  42. package/src/tests/script-workflows-runtime-e2e.test.ts +170 -0
  43. package/src/tests/scripts-mcp-e2e.test.ts +49 -2
  44. package/src/tests/seed-scripts.test.ts +347 -2
  45. package/src/tests/slack-message-text.test.ts +250 -0
  46. package/src/tests/system-default-skills.test.ts +40 -0
  47. package/src/tools/db-query.ts +16 -6
  48. package/src/tools/script-runs.ts +123 -0
  49. package/src/tools/slack-read.ts +12 -3
  50. package/src/tools/tool-config.ts +4 -1
  51. package/src/types.ts +52 -0
  52. package/src/utils/error-tracker.ts +40 -1
  53. package/src/utils/internal-ai/complete-structured.ts +10 -4
  54. package/src/workflows/executors/raw-llm.ts +76 -59
  55. package/templates/skills/pages/content.md +205 -55
  56. package/templates/skills/script-workflows/config.json +14 -0
  57. package/templates/skills/script-workflows/content.md +68 -0
  58. package/templates/skills/swarm-scripts/content.md +2 -3
@@ -162,6 +162,9 @@ export interface SwarmSdk {
162
162
  script_upsert(args: { name: string; source: string; description?: string; intent?: string; scope?: ScriptScope; fsMode?: ScriptFsMode }): Promise<unknown>;
163
163
  script_delete(args: { name: string; scope?: ScriptScope }): Promise<unknown>;
164
164
  script_queryTypes(args: { name: string; scope?: ScriptScope }): Promise<unknown>;
165
+ script_launchRun(args: { source: string; args?: unknown; idempotencyKey?: string; scriptName?: string; requestedByUserId?: string }): Promise<unknown>;
166
+ script_getRun(args: { id: string }): Promise<unknown>;
167
+ script_listRuns(args?: { status?: "running" | "paused" | "completed" | "failed" | "cancelled" | "aborted_limit"; agentId?: string; limit?: number; offset?: number }): Promise<unknown>;
165
168
 
166
169
  // --- write: repos ---
167
170
  repo_update(args: Record<string, unknown>): Promise<unknown>;
@@ -209,7 +212,53 @@ export interface ScriptStdlib {
209
212
 
210
213
  export interface ScriptLogger extends Console {}
211
214
 
215
+ export interface ScriptRunContext {
216
+ id: string;
217
+ agentId: string;
218
+ args: unknown;
219
+ }
220
+
221
+ export interface ScriptWorkflowSteps {
222
+ rawLlm(
223
+ label: string,
224
+ config: { prompt: string; model?: string; schema?: Record<string, unknown> },
225
+ ): Promise<unknown>;
226
+ agentTask(
227
+ label: string,
228
+ config: {
229
+ template?: string;
230
+ task?: string;
231
+ agentId?: string;
232
+ tags?: string[];
233
+ priority?: number;
234
+ offerMode?: boolean;
235
+ dir?: string;
236
+ vcsRepo?: string;
237
+ model?: string;
238
+ parentTaskId?: string;
239
+ requestedByUserId?: string;
240
+ outputSchema?: Record<string, unknown>;
241
+ },
242
+ ): Promise<unknown>;
243
+ swarmScript(
244
+ label: string,
245
+ config: {
246
+ name?: string;
247
+ scriptName?: string;
248
+ source?: string;
249
+ args?: unknown;
250
+ scope?: ScriptScope;
251
+ fsMode?: ScriptFsMode;
252
+ intent?: string;
253
+ idempotencyKey?: string;
254
+ },
255
+ ): Promise<unknown>;
256
+ humanInTheLoop(): Promise<never>;
257
+ }
258
+
212
259
  export interface ScriptContext {
260
+ run?: ScriptRunContext;
261
+ step?: ScriptWorkflowSteps;
213
262
  swarm: SwarmSdk & { config: SwarmConfig };
214
263
  stdlib: ScriptStdlib;
215
264
  logger: ScriptLogger;
@@ -13,6 +13,10 @@ export const argsSchema = z.object({
13
13
  .optional()
14
14
  .describe("Include schedule health flags (default true)"),
15
15
  includeMemoryHealth: z.boolean().optional().describe("Include memory health stats (default true)"),
16
+ includeScriptCandidates: z
17
+ .boolean()
18
+ .optional()
19
+ .describe("Include high-frequency tool-triplet candidates for future seed scripts (default true)"),
16
20
  includeByAgent: z
17
21
  .boolean()
18
22
  .optional()
@@ -40,6 +44,66 @@ function rowsToObjects(res: any): any[] {
40
44
  );
41
45
  }
42
46
 
47
+ function asNumber(value: any): number {
48
+ const n = Number(value ?? 0);
49
+ return Number.isFinite(n) ? n : 0;
50
+ }
51
+
52
+ function round1(value: number): number {
53
+ return Math.round(value * 10) / 10;
54
+ }
55
+
56
+ function percent(part: number, total: number): number {
57
+ return total > 0 ? round1((part / total) * 100) : 0;
58
+ }
59
+
60
+ function extractToolName(content: string): string | null {
61
+ const match = content.match(/"type"\s*:\s*"tool_use"[\s\S]*?"name"\s*:\s*"([^"]+)"/);
62
+ return match?.[1] ?? null;
63
+ }
64
+
65
+ function toolSlug(tool: string): string {
66
+ return tool
67
+ .replace(/^mcp__/, "")
68
+ .replace(/__/g, "-")
69
+ .replace(/_/g, "-")
70
+ .replace(/[^a-zA-Z0-9-]+/g, "-")
71
+ .replace(/^-+|-+$/g, "")
72
+ .toLowerCase();
73
+ }
74
+
75
+ function decodeFloat32Blob(value: any): Float32Array | null {
76
+ if (!value) return null;
77
+ let bytes: Uint8Array | null = null;
78
+ if (value instanceof Uint8Array) bytes = value;
79
+ else if (Array.isArray(value)) bytes = Uint8Array.from(value);
80
+ else if (typeof value === "object" && Array.isArray(value.data)) bytes = Uint8Array.from(value.data);
81
+ else if (typeof value === "object") {
82
+ const keys = Object.keys(value);
83
+ if (keys.length > 0 && keys.every((key) => /^\d+$/.test(key))) {
84
+ bytes = Uint8Array.from(Object.values(value) as number[]);
85
+ }
86
+ }
87
+ if (!bytes || bytes.byteLength < 4 || bytes.byteLength % 4 !== 0) return null;
88
+ return new Float32Array(bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength));
89
+ }
90
+
91
+ function cosineSimilarity(a: Float32Array, b: Float32Array): number {
92
+ const len = Math.min(a.length, b.length);
93
+ let dot = 0;
94
+ let na = 0;
95
+ let nb = 0;
96
+ for (let i = 0; i < len; i++) {
97
+ const av = a[i] ?? 0;
98
+ const bv = b[i] ?? 0;
99
+ dot += av * bv;
100
+ na += av * av;
101
+ nb += bv * bv;
102
+ }
103
+ if (na === 0 || nb === 0) return 0;
104
+ return dot / (Math.sqrt(na) * Math.sqrt(nb));
105
+ }
106
+
43
107
  /**
44
108
  * Daily compounding insights — compressed JSON for Phase 0 evolution.
45
109
  *
@@ -54,6 +118,7 @@ export default async function compoundInsights(args: any, ctx: any) {
54
118
  const includeToolUsage = parsed.data.includeToolUsage !== false;
55
119
  const includeScheduleHealth = parsed.data.includeScheduleHealth !== false;
56
120
  const includeMemoryHealth = parsed.data.includeMemoryHealth !== false;
121
+ const includeScriptCandidates = parsed.data.includeScriptCandidates !== false;
57
122
  const includeByAgent = parsed.data.includeByAgent !== false;
58
123
 
59
124
  // `days` is a validated positive int, so it is safe to interpolate into the
@@ -144,27 +209,172 @@ export default async function compoundInsights(args: any, ctx: any) {
144
209
  ).map((r: any) => ({ tool: r.tool, calls: r.calls }));
145
210
  }
146
211
 
147
- // Memory health (whole store, by scope + source).
212
+ // Memory health (whole store, by scope + source). Pollution markers are
213
+ // SQL-light counts plus JS-side embedding similarity where available; prod
214
+ // SQLite does not expose a scalar cosine_similarity() function.
148
215
  if (includeMemoryHealth) {
149
216
  const memRows = rowsToObjects(
150
217
  await ctx.swarm.db_query({
151
- sql: `SELECT scope, source, count(*) as cnt FROM agent_memory GROUP BY scope, source`,
218
+ sql: `SELECT scope, source, count(*) as cnt,
219
+ sum(case when accessCount = 0 then 1 else 0 end) as zeroAccess,
220
+ sum(case when sourceTaskId IS NOT NULL OR sourcePath IS NOT NULL then 1 else 0 end) as referenced
221
+ FROM agent_memory GROUP BY scope, source`,
152
222
  }),
153
223
  );
154
224
  const totalMem = memRows.reduce((s: number, r: any) => s + (r.cnt ?? 0), 0);
225
+ const bySource: any = {};
226
+ for (const r of memRows) {
227
+ bySource[r.source] ??= {
228
+ total: 0,
229
+ percentOfStore: 0,
230
+ zeroAccess: 0,
231
+ zeroAccessPercent: 0,
232
+ referenced: 0,
233
+ };
234
+ bySource[r.source].total += asNumber(r.cnt);
235
+ bySource[r.source].zeroAccess += asNumber(r.zeroAccess);
236
+ bySource[r.source].referenced += asNumber(r.referenced);
237
+ }
238
+ for (const source of Object.keys(bySource)) {
239
+ bySource[source].percentOfStore = percent(bySource[source].total, totalMem);
240
+ bySource[source].zeroAccessPercent = percent(bySource[source].zeroAccess, bySource[source].total);
241
+ }
242
+
243
+ const autoSnapshotSources = ["session_summary", "task_completion"];
244
+ const autoSnapshotTotal = autoSnapshotSources.reduce(
245
+ (sum, source) => sum + (bySource[source]?.total ?? 0),
246
+ 0,
247
+ );
248
+ const popularButUseless = rowsToObjects(
249
+ await ctx.swarm.db_query({
250
+ sql: `SELECT id, name, source, accessCount, alpha, beta,
251
+ round(alpha / nullif(alpha + beta, 0), 3) as usefulness,
252
+ substr(content, 1, 180) as preview
253
+ FROM agent_memory
254
+ WHERE source IN ('session_summary','task_completion')
255
+ AND accessCount >= 5
256
+ AND alpha <= beta
257
+ ORDER BY accessCount DESC, beta DESC LIMIT 10`,
258
+ }),
259
+ ).map((r: any) => ({
260
+ id: r.id,
261
+ name: r.name,
262
+ source: r.source,
263
+ accessCount: asNumber(r.accessCount),
264
+ usefulness: Number(r.usefulness ?? 0),
265
+ preview: r.preview,
266
+ }));
267
+ const zeroAccessStaleRefRows = rowsToObjects(
268
+ await ctx.swarm.db_query({
269
+ sql: `SELECT source, count(*) as count
270
+ FROM agent_memory
271
+ WHERE accessCount = 0
272
+ AND (sourceTaskId IS NOT NULL OR sourcePath IS NOT NULL)
273
+ AND createdAt < datetime('now','-${days} days')
274
+ GROUP BY source ORDER BY count DESC`,
275
+ }),
276
+ );
277
+
278
+ const similarityRows = rowsToObjects(
279
+ await ctx.swarm.db_query({
280
+ sql: `SELECT id, name, source, accessCount, embedding
281
+ FROM agent_memory
282
+ WHERE source IN ('session_summary','task_completion')
283
+ AND embedding IS NOT NULL
284
+ ORDER BY accessCount DESC LIMIT 30`,
285
+ }),
286
+ );
287
+ let strongestAutoSnapshotPair: any = null;
288
+ const vectors = similarityRows
289
+ .map((r: any) => ({ ...r, vector: decodeFloat32Blob(r.embedding) }))
290
+ .filter((r: any) => r.vector);
291
+ for (let i = 0; i < vectors.length; i++) {
292
+ for (let j = i + 1; j < vectors.length; j++) {
293
+ const similarity = cosineSimilarity(vectors[i].vector, vectors[j].vector);
294
+ if (!strongestAutoSnapshotPair || similarity > strongestAutoSnapshotPair.similarity) {
295
+ strongestAutoSnapshotPair = {
296
+ similarity: round1(similarity * 100) / 100,
297
+ a: { id: vectors[i].id, name: vectors[i].name, source: vectors[i].source },
298
+ b: { id: vectors[j].id, name: vectors[j].name, source: vectors[j].source },
299
+ };
300
+ }
301
+ }
302
+ }
303
+
155
304
  insights.memoryHealth = {
156
305
  total: totalMem,
157
306
  byScope: memRows.reduce((m: any, r: any) => {
158
307
  m[r.scope] = (m[r.scope] ?? 0) + r.cnt;
159
308
  return m;
160
309
  }, {}),
161
- bySource: memRows.reduce((m: any, r: any) => {
162
- m[r.source] = (m[r.source] ?? 0) + r.cnt;
163
- return m;
164
- }, {}),
310
+ bySource,
311
+ pollution: {
312
+ autoSnapshotSources,
313
+ autoSnapshotTotal,
314
+ autoSnapshotPercent: percent(autoSnapshotTotal, totalMem),
315
+ popularButUselessAutoSnapshots: popularButUseless,
316
+ zeroAccessStaleRefs: {
317
+ total: zeroAccessStaleRefRows.reduce((sum: number, r: any) => sum + asNumber(r.count), 0),
318
+ bySource: zeroAccessStaleRefRows.reduce((m: any, r: any) => {
319
+ m[r.source] = asNumber(r.count);
320
+ return m;
321
+ }, {}),
322
+ },
323
+ similarityCheck: {
324
+ sqliteCosineSimilarityAvailable: false,
325
+ path: "js",
326
+ sampledAutoSnapshots: vectors.length,
327
+ strongestAutoSnapshotPair,
328
+ },
329
+ },
165
330
  };
166
331
  }
167
332
 
333
+ // Evolution/self-scripting candidates: high-frequency consecutive tool
334
+ // triplets are good prompts for a future seed script.
335
+ if (includeScriptCandidates) {
336
+ const rows = rowsToObjects(
337
+ await ctx.swarm.db_query({
338
+ sql: `WITH raw AS (
339
+ SELECT sessionId, iteration, lineNumber, content,
340
+ json_extract(content, '$.tool_name') as jsonToolName
341
+ FROM session_logs
342
+ WHERE createdAt > ${w}
343
+ AND (content LIKE '%"type":"tool_use"%' OR json_extract(content, '$.tool_name') IS NOT NULL)
344
+ )
345
+ SELECT sessionId, iteration, lineNumber, jsonToolName, content
346
+ FROM raw ORDER BY sessionId, iteration, lineNumber LIMIT 100`,
347
+ }),
348
+ );
349
+ const bySession = new Map<string, string[]>();
350
+ for (const row of rows) {
351
+ const tool = row.jsonToolName || extractToolName(String(row.content ?? ""));
352
+ if (!tool) continue;
353
+ const key = String(row.sessionId ?? "unknown");
354
+ const tools = bySession.get(key) ?? [];
355
+ tools.push(tool);
356
+ bySession.set(key, tools);
357
+ }
358
+ const counts = new Map<string, { tools: string[]; count: number }>();
359
+ for (const tools of bySession.values()) {
360
+ for (let i = 0; i <= tools.length - 3; i++) {
361
+ const triplet = tools.slice(i, i + 3);
362
+ const key = triplet.join(" -> ");
363
+ const current = counts.get(key) ?? { tools: triplet, count: 0 };
364
+ current.count += 1;
365
+ counts.set(key, current);
366
+ }
367
+ }
368
+ insights.scriptCandidates = [...counts.values()]
369
+ .sort((a, b) => b.count - a.count)
370
+ .slice(0, 10)
371
+ .map((r) => ({
372
+ tools: r.tools,
373
+ count: r.count,
374
+ suggestedName: r.tools.map(toolSlug).filter(Boolean).slice(0, 3).join("-").slice(0, 80),
375
+ }));
376
+ }
377
+
168
378
  // Per-agent breakdown — covers every agent that ran a task in the window.
169
379
  if (includeByAgent) {
170
380
  insights.byAgent = rowsToObjects(