@desplega.ai/agent-swarm 1.76.2 → 1.77.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/hooks/hook.ts CHANGED
@@ -3,15 +3,14 @@
3
3
  import pkg from "../../package.json";
4
4
  import {
5
5
  buildRatingsFromLlm,
6
- buildSummaryWithRatingsPrompt,
7
6
  dedupeRetrievalsForRater,
8
7
  fetchRetrievalsForTask,
9
8
  isLlmRaterEnabled,
10
9
  postRatings,
11
10
  type RetrievalRow,
12
11
  } from "../be/memory/raters/llm";
13
- import { runMemoryRater } from "../be/memory/raters/llm-summarizer";
14
12
  import type { Agent } from "../types";
13
+ import { summarizeSession as runSummarize } from "../utils/internal-ai";
15
14
  import { checkToolLoop, clearToolHistory } from "./tool-loop-detection";
16
15
 
17
16
  const SERVER_NAME = pkg.config?.name ?? "agent-swarm";
@@ -231,6 +230,168 @@ export async function resolveStopHookTaskContext(
231
230
  return { taskContext, taskId };
232
231
  }
233
232
 
233
+ /**
234
+ * Test-injection points for `runStopHookSessionSummary`. Production callers
235
+ * omit `deps` entirely — the claude Stop hook uses the default implementations
236
+ * bound at module-import time.
237
+ *
238
+ * Why explicit DI: `bun:test`'s `mock.module()` is process-wide and leaks
239
+ * across test files, so the Stop-hook test cannot stub out `runSummarize` /
240
+ * `postRatings` via module mocking without breaking siblings. Mirrors the
241
+ * `summarizeSessionForPi` pattern in `src/providers/pi-mono-extension.ts`.
242
+ */
243
+ export interface RunStopHookSessionSummaryDeps {
244
+ runSummarize?: typeof runSummarize;
245
+ fetchRetrievalsForTask?: typeof fetchRetrievalsForTask;
246
+ postRatings?: typeof postRatings;
247
+ buildRatingsFromLlm?: typeof buildRatingsFromLlm;
248
+ }
249
+
250
+ export interface RunStopHookSessionSummaryOpts {
251
+ agentId: string;
252
+ transcriptPath: string;
253
+ /** Defaulted to `process.env`; injectable for tests. */
254
+ env?: NodeJS.ProcessEnv;
255
+ }
256
+
257
+ /**
258
+ * Run session summarization for the claude Stop hook via the shared
259
+ * `internal-ai` abstraction. Replaces the previous `runMemoryRater` call so
260
+ * `CLAUDE_CODE_OAUTH_TOKEN`-only environments (Pro/Max OAuth users without
261
+ * OpenRouter) keep working via the `claude -p` fallback inside the wrapper.
262
+ *
263
+ * Flow:
264
+ * 1. Read tail of transcript file (last 20 KB).
265
+ * 2. Resolve task context + (optionally) memory retrievals for ratings.
266
+ * 3. Call `runSummarize` from `src/utils/internal-ai` — picks credentials
267
+ * out of env / codex OAuth / CLAUDE_CODE_OAUTH_TOKEN, returns structured
268
+ * `{summary, ratings}`.
269
+ * 4. Apply length/quality gate; POST summary to `/api/memory/index`.
270
+ * 5. If `MEMORY_RATERS` includes `llm` AND ratings came back, POST them
271
+ * via `postRatings` (events-based).
272
+ *
273
+ * Non-blocking — any thrown error is swallowed so session shutdown never blocks.
274
+ */
275
+ export async function runStopHookSessionSummary(
276
+ opts: RunStopHookSessionSummaryOpts,
277
+ deps: RunStopHookSessionSummaryDeps = {},
278
+ ): Promise<void> {
279
+ const env = opts.env ?? process.env;
280
+ if (env.SKIP_SESSION_SUMMARY) return;
281
+
282
+ const _runSummarize = deps.runSummarize ?? runSummarize;
283
+ const _fetchRetrievals = deps.fetchRetrievalsForTask ?? fetchRetrievalsForTask;
284
+ const _postRatings = deps.postRatings ?? postRatings;
285
+ const _buildRatings = deps.buildRatingsFromLlm ?? buildRatingsFromLlm;
286
+
287
+ try {
288
+ let transcript = "";
289
+ try {
290
+ const fullTranscript = await Bun.file(opts.transcriptPath).text();
291
+ transcript = fullTranscript.length > 20000 ? fullTranscript.slice(-20000) : fullTranscript;
292
+ } catch {
293
+ /* no transcript */
294
+ }
295
+
296
+ if (transcript.length <= 100) return;
297
+
298
+ // Prefer AGENT_SWARM_TASK_ID env var; fall back to TASK_FILE on
299
+ // disk. PR #444 gate-trace showed the file disappears mid-session
300
+ // and the silent catch dropped every LLM rater piggyback.
301
+ const { taskContext, taskId } = await resolveStopHookTaskContext(env);
302
+
303
+ const apiUrl = env.MCP_BASE_URL || `http://localhost:${env.PORT || "3013"}`;
304
+ const apiKey = env.API_KEY || "";
305
+
306
+ // Memory-rater v1.5 step-4: piggyback per-memory ratings on the
307
+ // existing summary call when MEMORY_RATERS includes `llm`.
308
+ const llmRaterEnabled = isLlmRaterEnabled();
309
+ let retrievals: RetrievalRow[] = [];
310
+ if (llmRaterEnabled && taskId) {
311
+ const rawRetrievals = await _fetchRetrievals({
312
+ apiUrl,
313
+ apiKey,
314
+ agentId: opts.agentId,
315
+ taskId,
316
+ });
317
+ // Dedup self-similar cron-task memories before sending to the
318
+ // rater — see `dedupeRetrievalsForRater` doc for the why.
319
+ retrievals = dedupeRetrievalsForRater(rawRetrievals);
320
+ }
321
+
322
+ const result = await _runSummarize({
323
+ harness: "claude",
324
+ transcript,
325
+ retrievals,
326
+ taskContext: {
327
+ sourceTaskId: taskId ?? "",
328
+ agentId: opts.agentId,
329
+ // claude's path doesn't pass the user prompt here today — leave undefined.
330
+ prompt: undefined,
331
+ },
332
+ apiUrl,
333
+ apiKey,
334
+ });
335
+ // null = no auth resolved (no OPENROUTER, ANTHROPIC, OPENAI, codex OAuth,
336
+ // or CLAUDE_CODE_OAUTH_TOKEN) — silent skip, same as today's no-key behavior.
337
+ if (!result) return;
338
+
339
+ const summary = result.summary.trim();
340
+ const ratings = result.ratings ?? [];
341
+
342
+ // Skip indexing if the session had no significant learnings.
343
+ if (summary.length > 20 && !summary.toLowerCase().includes("no significant learnings")) {
344
+ await fetch(`${apiUrl}/api/memory/index`, {
345
+ method: "POST",
346
+ headers: {
347
+ "Content-Type": "application/json",
348
+ ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
349
+ "X-Agent-ID": opts.agentId,
350
+ },
351
+ body: JSON.stringify({
352
+ agentId: opts.agentId,
353
+ content: summary,
354
+ name: taskContext
355
+ ? `Session: ${taskContext.slice(0, 80)}`
356
+ : `Session: ${new Date().toISOString().slice(0, 16)}`,
357
+ scope: "agent",
358
+ source: "session_summary",
359
+ ...(taskId ? { sourceTaskId: taskId } : {}),
360
+ }),
361
+ });
362
+ }
363
+
364
+ // Best-effort: post LLM ratings. Never blocks summary indexing.
365
+ if (llmRaterEnabled && taskId && retrievals.length > 0 && ratings.length === 0) {
366
+ console.error("[memory-rater:llm] piggyback produced no ratings", {
367
+ retrievalsLen: retrievals.length,
368
+ ratingsLen: 0,
369
+ });
370
+ }
371
+ if (llmRaterEnabled && taskId && ratings.length > 0) {
372
+ try {
373
+ const events = _buildRatings(ratings, retrievals);
374
+ if (events.length > 0) {
375
+ await _postRatings({
376
+ apiUrl,
377
+ apiKey,
378
+ agentId: opts.agentId,
379
+ taskId,
380
+ events,
381
+ });
382
+ }
383
+ } catch (err) {
384
+ console.error(
385
+ "[memory-rater:llm] piggyback rating emission failed:",
386
+ (err as Error).message,
387
+ );
388
+ }
389
+ }
390
+ } catch {
391
+ // Non-blocking — session summarization failure should never block shutdown
392
+ }
393
+ }
394
+
234
395
  /**
235
396
  * Main hook handler - processes Claude Code hook events
236
397
  */
@@ -1079,151 +1240,17 @@ ${hasAgentIdHeader() ? `You have a pre-defined agent ID via header: ${mcpConfig?
1079
1240
  }
1080
1241
  }
1081
1242
 
1082
- // Session summarization + LLM rater piggyback via OpenRouter (Vercel AI SDK).
1083
- //
1084
- // No-op when OPENROUTER_API_KEY is unset self-hosters / OSS users
1085
- // without OpenRouter skip session summary + LLM ratings entirely. The
1086
- // previous `claude -p` path silently produced "Not logged in · Please
1087
- // run /login" rows after the 2026-05-05 CLAUDE_CODE_VERSION bump
1088
- // stopped propagating CLAUDE_CODE_OAUTH_TOKEN to hook subprocesses.
1089
- if (
1090
- agentInfo?.id &&
1091
- msg.transcript_path &&
1092
- !process.env.SKIP_SESSION_SUMMARY &&
1093
- process.env.OPENROUTER_API_KEY
1094
- ) {
1095
- try {
1096
- let transcript = "";
1097
- try {
1098
- const fullTranscript = await Bun.file(msg.transcript_path).text();
1099
- transcript =
1100
- fullTranscript.length > 20000 ? fullTranscript.slice(-20000) : fullTranscript;
1101
- } catch {
1102
- /* no transcript */
1103
- }
1104
-
1105
- if (transcript.length > 100) {
1106
- // Prefer AGENT_SWARM_TASK_ID env var; fall back to TASK_FILE on
1107
- // disk. PR #444 gate-trace showed the file disappears mid-session
1108
- // and the silent catch dropped every LLM rater piggyback.
1109
- const { taskContext, taskId } = await resolveStopHookTaskContext();
1110
-
1111
- const apiUrl =
1112
- process.env.MCP_BASE_URL || `http://localhost:${process.env.PORT || "3013"}`;
1113
- const apiKey = process.env.API_KEY || "";
1114
-
1115
- // Memory-rater v1.5 step-4: piggyback per-memory ratings on the
1116
- // existing summary call when MEMORY_RATERS includes `llm`.
1117
- // Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-4.md §3
1118
- const llmRaterEnabled = isLlmRaterEnabled();
1119
- let retrievals: RetrievalRow[] = [];
1120
- if (llmRaterEnabled && taskId) {
1121
- const rawRetrievals = await fetchRetrievalsForTask({
1122
- apiUrl,
1123
- apiKey,
1124
- agentId: agentInfo.id,
1125
- taskId,
1126
- });
1127
- // Dedup self-similar cron-task memories before sending to the
1128
- // rater — see `dedupeRetrievalsForRater` doc for the why.
1129
- retrievals = dedupeRetrievalsForRater(rawRetrievals);
1130
- }
1131
-
1132
- const baseSummarizePrompt = `You are summarizing an AI agent's work session. Extract ONLY high-value learnings.
1133
-
1134
- DO NOT include:
1135
- - Generic descriptions of what was done ("worked on task X")
1136
- - Tool calls or file reads
1137
- - Routine progress updates
1138
-
1139
- DO include (if present):
1140
- - **Mistakes made and corrections** — what went wrong and what fixed it
1141
- - **Discovered patterns** — reusable approaches, APIs, or codebase conventions
1142
- - **Codebase knowledge** — important file paths, architecture decisions, gotchas
1143
- - **Environment knowledge** — service URLs, config details, tool quirks
1144
- - **Failed approaches** — what was tried and didn't work (and why)
1145
-
1146
- Format as a bulleted list of concrete, reusable facts. If the session was routine with no significant learnings, respond with exactly: "No significant learnings."
1147
- ${taskContext ? `\nTask context: ${taskContext}` : ""}
1148
- Transcript:
1149
- ${transcript}`;
1150
-
1151
- // Always ask for the structured (summary + ratings) payload — same
1152
- // cost as the unstructured path. Empty retrievals → empty memory
1153
- // block → ratings: []; the postRatings gate below still skips the
1154
- // POST when there's nothing to send.
1155
- const summarizePrompt = buildSummaryWithRatingsPrompt(baseSummarizePrompt, retrievals);
1156
-
1157
- const raterResult = await runMemoryRater({
1158
- prompt: summarizePrompt,
1159
- apiKey: process.env.OPENROUTER_API_KEY,
1160
- });
1161
- if (!raterResult.ok) {
1162
- console.error("[memory-rater:llm] runMemoryRater returned non-ok", {
1163
- reason: raterResult.reason,
1164
- ...(raterResult.status !== undefined ? { status: raterResult.status } : {}),
1165
- });
1166
- } else {
1167
- const summary = raterResult.data.summary;
1168
- const ratings = raterResult.data.ratings;
1169
-
1170
- // Skip indexing if the session had no significant learnings
1171
- if (
1172
- summary &&
1173
- summary.length > 20 &&
1174
- !summary.trim().toLowerCase().includes("no significant learnings")
1175
- ) {
1176
- await fetch(`${apiUrl}/api/memory/index`, {
1177
- method: "POST",
1178
- headers: {
1179
- "Content-Type": "application/json",
1180
- ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
1181
- "X-Agent-ID": agentInfo.id,
1182
- },
1183
- body: JSON.stringify({
1184
- agentId: agentInfo.id,
1185
- content: summary,
1186
- name: taskContext
1187
- ? `Session: ${taskContext.slice(0, 80)}`
1188
- : `Session: ${new Date().toISOString().slice(0, 16)}`,
1189
- scope: "agent",
1190
- source: "session_summary",
1191
- ...(taskId ? { sourceTaskId: taskId } : {}),
1192
- }),
1193
- });
1194
- }
1195
-
1196
- // Best-effort: post LLM ratings. Never blocks summary indexing.
1197
- if (llmRaterEnabled && taskId && retrievals.length > 0 && ratings.length === 0) {
1198
- console.error("[memory-rater:llm] piggyback produced no ratings", {
1199
- retrievalsLen: retrievals.length,
1200
- ratingsLen: 0,
1201
- });
1202
- }
1203
- if (llmRaterEnabled && taskId && ratings.length > 0) {
1204
- try {
1205
- const events = buildRatingsFromLlm(ratings, retrievals);
1206
- if (events.length > 0) {
1207
- await postRatings({
1208
- apiUrl,
1209
- apiKey,
1210
- agentId: agentInfo.id,
1211
- taskId,
1212
- events,
1213
- });
1214
- }
1215
- } catch (err) {
1216
- console.error(
1217
- "[memory-rater:llm] piggyback rating emission failed:",
1218
- (err as Error).message,
1219
- );
1220
- }
1221
- }
1222
- }
1223
- }
1224
- } catch {
1225
- // Non-blocking — session summarization failure should never block shutdown
1226
- }
1243
+ // Session summarization + LLM rater piggyback via the shared internal-ai
1244
+ // wrapper (OpenRouter → Anthropic → OpenAI → codex OAuth →
1245
+ // CLAUDE_CODE_OAUTH_TOKEN claude -p fallback). The wrapper handles
1246
+ // credential resolution and returns null when nothing resolves, so Pro/Max
1247
+ // OAuth users keep working without OpenRouter (the working path PR #450
1248
+ // restored). Non-blocking failures never block shutdown.
1249
+ if (agentInfo?.id && msg.transcript_path) {
1250
+ await runStopHookSessionSummary({
1251
+ agentId: agentInfo.id,
1252
+ transcriptPath: msg.transcript_path,
1253
+ });
1227
1254
  }
1228
1255
 
1229
1256
  // Mark the agent as offline
@@ -87,6 +87,13 @@ export const StatusIdentitySchema = z.object({
87
87
  is_cloud: z.boolean(),
88
88
  marketing_url: z.string().nullable(),
89
89
  hide_cloud_promo: z.boolean(),
90
+ /**
91
+ * Stable identifier for the org/tenant this swarm belongs to. Set by the
92
+ * orchestrator on cloud deployments via `SWARM_ORG_ID`; null on self-host
93
+ * unless the operator opts in. Threaded into telemetry events so multi-org
94
+ * cloud installs can be sliced server-side.
95
+ */
96
+ org_id: z.string().nullable(),
90
97
  });
91
98
  export type StatusIdentity = z.infer<typeof StatusIdentitySchema>;
92
99
 
@@ -240,6 +247,7 @@ function buildIdentity(): StatusIdentity {
240
247
  is_cloud: cloudRaw === "true" || cloudRaw === "1",
241
248
  marketing_url: process.env.SWARM_MARKETING_URL?.trim() || null,
242
249
  hide_cloud_promo: hideRaw === "true" || hideRaw === "1",
250
+ org_id: process.env.SWARM_ORG_ID?.trim() || null,
243
251
  };
244
252
  }
245
253
 
@@ -188,17 +188,25 @@ class ClaudeSession implements ProviderSession {
188
188
  `\x1b[2m[${config.role}]\x1b[0m \x1b[36m▸\x1b[0m Spawning Claude (model: ${model}) for task ${config.taskId.slice(0, 8)}`,
189
189
  );
190
190
 
191
+ const sourceEnv = config.env || process.env;
191
192
  this.proc = Bun.spawn(cmd, {
192
193
  cwd: this.config.cwd,
193
194
  env: {
194
195
  ENABLE_PROMPT_CACHING_1H: "1",
195
- ...(config.env || process.env),
196
+ ...sourceEnv,
196
197
  TASK_FILE: taskFilePath,
197
198
  // Belt-and-braces: TASK_FILE on disk can disappear mid-session (race
198
199
  // with task lifecycle), which silently drops the Stop-hook memory
199
200
  // rater. The hook prefers these env vars when present. See PR #444.
200
201
  AGENT_SWARM_TASK_ID: config.taskId,
201
202
  AGENT_SWARM_AGENT_ID: config.agentId,
203
+ // claude CLI strips CLAUDE_CODE_OAUTH_TOKEN from hook subprocess env
204
+ // (security: prevents OAuth-token leakage to user-written hooks).
205
+ // Mirror it under a name claude doesn't recognize so the Stop hook
206
+ // can resolve the claude-cli fallback in internal-ai/credentials.ts.
207
+ ...(sourceEnv.CLAUDE_CODE_OAUTH_TOKEN
208
+ ? { AGENT_SWARM_CLAUDE_OAUTH_TOKEN: sourceEnv.CLAUDE_CODE_OAUTH_TOKEN }
209
+ : {}),
202
210
  } as Record<string, string>,
203
211
  stdout: "pipe",
204
212
  stderr: "pipe",