@desplega.ai/agent-swarm 1.76.2 → 1.76.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -65,6 +65,8 @@ import {
65
65
  type Usage,
66
66
  type WebSearchItem,
67
67
  } from "@openai/codex-sdk";
68
+ import { buildRatingsFromLlm, fetchRetrievalsForTask, postRatings } from "../be/memory/raters/llm";
69
+ import { summarizeSession as runSummarize } from "../utils/internal-ai";
68
70
  import { scrubSecrets } from "../utils/secret-scrubber";
69
71
  import { type CodexAgentsMdHandle, writeCodexAgentsMd } from "./codex-agents-md";
70
72
  import { computeCodexCostUsd, getCodexContextWindow, resolveCodexModel } from "./codex-models";
@@ -340,6 +342,26 @@ export async function buildCodexConfig(
340
342
  };
341
343
  }
342
344
 
345
+ /**
346
+ * Test-injection points for the codex session-end summarization path.
347
+ *
348
+ * Production callers omit `deps` entirely — the `CodexSession.summarizeAtEnd`
349
+ * helper falls back to the symbols imported at the top of this file. Tests
350
+ * override each function so we can exercise the summarize/index/rate flow
351
+ * without standing up a real API server or LLM.
352
+ *
353
+ * Why this exists (mirrors `SummarizeSessionForPiDeps`): `bun:test`'s
354
+ * `mock.module()` is process-wide and leaks across test files in the same
355
+ * `bun test` run, breaking siblings that import the real symbols. Explicit DI
356
+ * keeps the boundary local to this adapter.
357
+ */
358
+ export interface SummarizeSessionForCodexDeps {
359
+ runSummarize?: typeof runSummarize;
360
+ fetchRetrievalsForTask?: typeof fetchRetrievalsForTask;
361
+ postRatings?: typeof postRatings;
362
+ buildRatingsFromLlm?: typeof buildRatingsFromLlm;
363
+ }
364
+
343
365
  /** Running session backed by a Codex `Thread`. */
344
366
  class CodexSession implements ProviderSession {
345
367
  private readonly thread: Thread;
@@ -348,6 +370,7 @@ class CodexSession implements ProviderSession {
348
370
  private readonly resolvedModel: string;
349
371
  private readonly contextWindow: number;
350
372
  private readonly skillsDir: string;
373
+ private readonly summarizeDeps: SummarizeSessionForCodexDeps;
351
374
  private readonly listeners: Array<(event: ProviderEvent) => void> = [];
352
375
  private readonly eventQueue: ProviderEvent[] = [];
353
376
  private readonly logFileHandle: ReturnType<ReturnType<typeof Bun.file>["writer"]>;
@@ -355,6 +378,11 @@ class CodexSession implements ProviderSession {
355
378
  private readonly completionPromise: Promise<ProviderResult>;
356
379
  private resolveCompletion!: (result: ProviderResult) => void;
357
380
  private abortController: AbortController | null = null;
381
+ /**
382
+ * Per-session transcript buffer used to feed the session-end summarizer.
383
+ * Reset at the start of `runSession` and appended in `handleEvent`.
384
+ */
385
+ private transcript: string[] = [];
358
386
  /**
359
387
  * Mutable holder for the current turn's `AbortController`. Shared with the
360
388
  * swarm event handler so it can trigger an abort from outside `runSession`
@@ -366,6 +394,14 @@ class CodexSession implements ProviderSession {
366
394
  private lastUsage: Usage | null = null;
367
395
  private aborted = false;
368
396
  private settled = false;
397
+ /**
398
+ * Result captured by `settle` but held back from `resolveCompletion` until
399
+ * `runSession`'s `finally` block has fully cleaned up (log writer flush,
400
+ * AGENTS.md cleanup, session summary). Without this, callers awaiting
401
+ * `waitForCompletion` would race the cleanup. Phase 3 added the session
402
+ * summary path which materially relies on this ordering for testability.
403
+ */
404
+ private pendingResult: ProviderResult | null = null;
369
405
 
370
406
  constructor(
371
407
  thread: Thread,
@@ -374,6 +410,7 @@ class CodexSession implements ProviderSession {
374
410
  resolvedModel: string,
375
411
  initialEvents: ProviderEvent[] = [],
376
412
  skillsDir?: string,
413
+ summarizeDeps: SummarizeSessionForCodexDeps = {},
377
414
  ) {
378
415
  this.thread = thread;
379
416
  this.config = config;
@@ -385,6 +422,7 @@ class CodexSession implements ProviderSession {
385
422
  // runtime default of `${HOME}/.codex/skills`.
386
423
  this.skillsDir =
387
424
  skillsDir ?? process.env.CODEX_SKILLS_DIR ?? join(os.homedir(), ".codex", "skills");
425
+ this.summarizeDeps = summarizeDeps;
388
426
  this.logFileHandle = Bun.file(config.logFile).writer();
389
427
 
390
428
  this.completionPromise = new Promise<ProviderResult>((resolve) => {
@@ -474,7 +512,10 @@ class CodexSession implements ProviderSession {
474
512
  private settle(result: ProviderResult): void {
475
513
  if (this.settled) return;
476
514
  this.settled = true;
477
- this.resolveCompletion(result);
515
+ // Resolution deferred until `runSession`'s finally-block fully cleans up
516
+ // (see `pendingResult` rationale on the field above). Caller-visible
517
+ // ordering: cleanup → resolve waitForCompletion.
518
+ this.pendingResult = result;
478
519
  }
479
520
 
480
521
  /** Build CostData from the most recent turn usage. */
@@ -559,6 +600,41 @@ class CodexSession implements ProviderSession {
559
600
  );
560
601
  }
561
602
 
603
+ /**
604
+ * Render a completed tool item as a short, signal-dense one-liner for the
605
+ * session-end summarization transcript. Picks tool-type-specific fields so
606
+ * the transcript doesn't get drowned in raw JSON (a single `command_execution`
607
+ * can carry 100KB+ of `aggregated_output`). Each branch is capped at 500
608
+ * chars; unknown types fall back to a trimmed `JSON.stringify`.
609
+ */
610
+ private shortenItemResult(item: ThreadItem): string {
611
+ switch (item.type) {
612
+ case "command_execution": {
613
+ const cmd = item as CommandExecutionItem;
614
+ const stdout = (cmd.aggregated_output ?? "").slice(0, 500);
615
+ return `exit=${cmd.exit_code ?? "?"} status=${cmd.status ?? "?"} stdout=${stdout}`;
616
+ }
617
+ case "file_change": {
618
+ const fc = item as FileChangeItem;
619
+ const summarised = (fc.changes ?? [])
620
+ .slice(0, 5)
621
+ .map((c) => `${c.kind}:${c.path}`)
622
+ .join(",");
623
+ return `changes=[${summarised}]`;
624
+ }
625
+ case "mcp_tool_call": {
626
+ const mcp = item as McpToolCallItem;
627
+ return `server=${mcp.server} tool=${mcp.tool} status=${mcp.status ?? "?"}`;
628
+ }
629
+ case "web_search": {
630
+ const ws = item as WebSearchItem;
631
+ return `query=${(ws.query ?? "").slice(0, 200)}`;
632
+ }
633
+ default:
634
+ return JSON.stringify(item).slice(0, 500);
635
+ }
636
+ }
637
+
562
638
  private handleEvent(event: ThreadEvent): void {
563
639
  // Mirror every raw SDK event into the log as raw_log for debugability —
564
640
  // parity with Claude's JSONL envelope.
@@ -582,6 +658,14 @@ class CodexSession implements ProviderSession {
582
658
  toolName: this.toolNameForItem(event.item),
583
659
  args: this.toolArgsForItem(event.item),
584
660
  });
661
+ // Mirror into the transcript buffer for session-end summarization.
662
+ // Tools are the bulk of useful signal in a codex session, so we
663
+ // capture both the start (args) and the completion (result digest).
664
+ this.transcript.push(
665
+ `Tool[${this.toolNameForItem(event.item)}] started: ${JSON.stringify(
666
+ this.toolArgsForItem(event.item),
667
+ ).slice(0, 500)}`,
668
+ );
585
669
  }
586
670
  break;
587
671
  }
@@ -615,6 +699,9 @@ class CodexSession implements ProviderSession {
615
699
  toolName: this.toolNameForItem(item),
616
700
  result: item,
617
701
  });
702
+ this.transcript.push(
703
+ `Tool[${this.toolNameForItem(item)}] completed: ${this.shortenItemResult(item)}`,
704
+ );
618
705
  break;
619
706
  }
620
707
  switch (item.type) {
@@ -622,6 +709,7 @@ class CodexSession implements ProviderSession {
622
709
  const msg = item as AgentMessageItem;
623
710
  if (msg.text) {
624
711
  this.emit({ type: "message", role: "assistant", content: msg.text });
712
+ this.transcript.push(`Assistant: ${msg.text}`);
625
713
  }
626
714
  break;
627
715
  }
@@ -764,6 +852,11 @@ class CodexSession implements ProviderSession {
764
852
  this.emit(event),
765
853
  );
766
854
 
855
+ // Reset + seed the transcript buffer so the session-end summarizer has
856
+ // the user's prompt as anchor context. Subsequent appends happen in
857
+ // `handleEvent` (tool start/end, agent_message).
858
+ this.transcript = [`User: ${resolvedPrompt}`];
859
+
767
860
  const streamed = await this.thread.runStreamed(resolvedPrompt, {
768
861
  signal: this.abortController.signal,
769
862
  });
@@ -827,6 +920,18 @@ class CodexSession implements ProviderSession {
827
920
  failureReason: message,
828
921
  });
829
922
  } finally {
923
+ // Session-end summarization. Pure addition for codex — no behavior to
924
+ // preserve. Wrapped in its own try/catch so summary failure must NOT
925
+ // block the existing log/AGENTS.md cleanup below. Gate `SKIP_SESSION_SUMMARY=1`
926
+ // matches the parity convention used by the claude Stop hook + pi/opencode.
927
+ if (process.env.SKIP_SESSION_SUMMARY !== "1") {
928
+ try {
929
+ await this.summarizeAtEnd();
930
+ } catch (err) {
931
+ console.error("session_summary failed (codex):", err);
932
+ }
933
+ }
934
+
830
935
  // Detach the abort controller now that the turn has settled.
831
936
  this.abortRef.current = null;
832
937
  try {
@@ -835,6 +940,121 @@ class CodexSession implements ProviderSession {
835
940
  // Ignore log writer cleanup failures.
836
941
  }
837
942
  await this.agentsMdHandle.cleanup();
943
+
944
+ // Resolve `waitForCompletion()` only AFTER all cleanup has finished so
945
+ // downstream observers (tests + the runner's `.then(...)` chain) don't
946
+ // race the finally-block side effects. Fallback to an error result if
947
+ // we somehow never called `settle` (defensive — every codepath in the
948
+ // try/catch above calls settle exactly once).
949
+ const finalResult =
950
+ this.pendingResult ??
951
+ ({
952
+ exitCode: 1,
953
+ sessionId: this._sessionId,
954
+ cost: this.buildCostData(this.lastUsage, true),
955
+ isError: true,
956
+ failureReason: "session did not settle",
957
+ } as ProviderResult);
958
+ this.resolveCompletion(finalResult);
959
+ }
960
+ }
961
+
962
+ /**
963
+ * Index a session summary into agent memory at the end of a codex turn.
964
+ *
965
+ * Mirrors `summarizeSessionForPi` and the claude Stop hook:
966
+ * 1. Truncate the in-memory transcript buffer to the last 20 KB.
967
+ * 2. Bail when the transcript is too short or the swarm context is
968
+ * missing (no agentId / no taskId / no apiUrl / no apiKey).
969
+ * 3. (Optional) Pre-fetch retrievals when `MEMORY_RATERS` includes `llm`
970
+ * so the LLM can score them alongside the summary.
971
+ * 4. Call `runSummarize` from `src/utils/internal-ai` (Phase 0). Returns
972
+ * `null` when no credential resolves — silent skip.
973
+ * 5. Apply length/quality gate; POST to `/api/memory/index`.
974
+ * 6. POST ratings (`events:` key, NOT `ratings:`) via `postRatings` when
975
+ * `MEMORY_RATERS=llm` and the LLM returned per-memory scores.
976
+ *
977
+ * All catches log via `console.error(..., err)` — silent-fail behavior is
978
+ * gone. The outer try in `runSession`'s finally-block is the final safety
979
+ * net guaranteeing existing cleanup runs regardless.
980
+ */
981
+ private async summarizeAtEnd(): Promise<void> {
982
+ const transcriptStr = this.transcript.join("\n").slice(-20_000);
983
+ const { agentId, taskId, apiUrl, apiKey } = this.config;
984
+ if (!agentId || !taskId || !apiUrl || !apiKey) return;
985
+ if (transcriptStr.length <= 100) return;
986
+
987
+ const _runSummarize = this.summarizeDeps.runSummarize ?? runSummarize;
988
+ const _fetchRetrievals = this.summarizeDeps.fetchRetrievalsForTask ?? fetchRetrievalsForTask;
989
+ const _postRatings = this.summarizeDeps.postRatings ?? postRatings;
990
+ const _buildRatings = this.summarizeDeps.buildRatingsFromLlm ?? buildRatingsFromLlm;
991
+
992
+ const memoryRaters = (process.env.MEMORY_RATERS ?? "")
993
+ .split(",")
994
+ .map((s) => s.trim())
995
+ .filter(Boolean);
996
+ const wantRatings = memoryRaters.includes("llm");
997
+ const retrievals = wantRatings
998
+ ? await _fetchRetrievals({ apiUrl, apiKey, agentId, taskId }).catch(() => [])
999
+ : [];
1000
+
1001
+ const result = await _runSummarize({
1002
+ harness: "codex",
1003
+ transcript: transcriptStr,
1004
+ retrievals,
1005
+ taskContext: {
1006
+ sourceTaskId: taskId,
1007
+ agentId,
1008
+ prompt: this.config.prompt,
1009
+ },
1010
+ apiUrl,
1011
+ apiKey,
1012
+ });
1013
+ // null = no auth resolved or wrapper exhausted retries (already logged inside)
1014
+ if (!result) return;
1015
+
1016
+ const summary = result.summary.trim();
1017
+ if (summary.length <= 20 || summary.toLowerCase().includes("no significant learnings")) {
1018
+ return;
1019
+ }
1020
+
1021
+ const indexResp = await fetch(`${apiUrl}/api/memory/index`, {
1022
+ method: "POST",
1023
+ headers: {
1024
+ "Content-Type": "application/json",
1025
+ Authorization: `Bearer ${apiKey}`,
1026
+ "X-Agent-ID": agentId,
1027
+ },
1028
+ body: JSON.stringify({
1029
+ scope: "agent",
1030
+ source: "session_summary",
1031
+ sourceTaskId: taskId,
1032
+ content: summary,
1033
+ name: "session-summary",
1034
+ agentId,
1035
+ }),
1036
+ });
1037
+ if (!indexResp.ok) {
1038
+ const text = await indexResp.text().catch(() => "");
1039
+ console.error(
1040
+ "session_summary: /api/memory/index POST failed (codex):",
1041
+ indexResp.status,
1042
+ text,
1043
+ );
1044
+ return;
1045
+ }
1046
+
1047
+ if (wantRatings && result.ratings && result.ratings.length > 0) {
1048
+ const ratingEvents = _buildRatings(result.ratings, retrievals);
1049
+ if (ratingEvents.length > 0) {
1050
+ await _postRatings({
1051
+ apiUrl,
1052
+ apiKey,
1053
+ agentId,
1054
+ taskId,
1055
+ events: ratingEvents,
1056
+ }).catch((err) => console.error("session_summary: postRatings failed (codex):", err));
1057
+ }
838
1058
  }
839
1059
  }
840
1060
  }
@@ -851,8 +1071,17 @@ export class CodexAdapter implements ProviderAdapter {
851
1071
  */
852
1072
  private readonly skillsDir?: string;
853
1073
 
854
- constructor(opts: { skillsDir?: string } = {}) {
1074
+ /**
1075
+ * Optional dependency-injection points for session-end summarization. Tests
1076
+ * pass stubs in here to exercise the summarize → index → rate flow without
1077
+ * standing up a real API server or LLM. Production callers omit this and the
1078
+ * `CodexSession` falls back to the module-level imports.
1079
+ */
1080
+ private readonly summarizeDeps: SummarizeSessionForCodexDeps;
1081
+
1082
+ constructor(opts: { skillsDir?: string; summarizeDeps?: SummarizeSessionForCodexDeps } = {}) {
855
1083
  this.skillsDir = opts.skillsDir;
1084
+ this.summarizeDeps = opts.summarizeDeps ?? {};
856
1085
  }
857
1086
 
858
1087
  async createSession(config: ProviderSessionConfig): Promise<ProviderSession> {
@@ -937,6 +1166,7 @@ export class CodexAdapter implements ProviderAdapter {
937
1166
  resolvedModel,
938
1167
  preSessionEvents,
939
1168
  this.skillsDir,
1169
+ this.summarizeDeps,
940
1170
  );
941
1171
  } catch (err) {
942
1172
  // If we failed to construct the thread, clean up the managed AGENTS.md
@@ -85,6 +85,27 @@ export async function deleteCodexOAuth(apiUrl: string, apiKey: string): Promise<
85
85
  });
86
86
  }
87
87
 
88
+ /**
89
+ * Best-effort persistence of refreshed OAuth credentials back to the config
90
+ * store. Wraps {@link storeCodexOAuth} with a try/catch + `console.error` —
91
+ * a write failure MUST NOT block the current caller from using the refreshed
92
+ * `apiKey` (the rotation will just have to retry on the next call). This is
93
+ * called from `src/utils/internal-ai/credentials.ts` after pi-ai's
94
+ * `getOAuthApiKey` returns `{ newCredentials, apiKey }` so the rotated refresh
95
+ * token isn't lost in-memory.
96
+ */
97
+ export async function persistCodexOAuth(
98
+ apiUrl: string,
99
+ apiKey: string,
100
+ creds: CodexOAuthCredentials,
101
+ ): Promise<void> {
102
+ try {
103
+ await storeCodexOAuth(apiUrl, apiKey, creds);
104
+ } catch (err) {
105
+ console.error("[codex-oauth] persistCodexOAuth failed (non-fatal):", err);
106
+ }
107
+ }
108
+
88
109
  export async function getValidCodexOAuth(
89
110
  apiUrl: string,
90
111
  apiKey: string,
@@ -7,9 +7,11 @@
7
7
  */
8
8
 
9
9
  import type { ExtensionFactory } from "@mariozechner/pi-coding-agent";
10
+ import { buildRatingsFromLlm, fetchRetrievalsForTask, postRatings } from "../be/memory/raters/llm";
10
11
  import { checkToolLoop, clearToolHistory } from "../hooks/tool-loop-detection";
12
+ import { summarizeSession as runSummarize } from "../utils/internal-ai";
11
13
 
12
- interface SwarmHooksConfig {
14
+ export interface SwarmHooksConfig {
13
15
  apiUrl: string;
14
16
  apiKey: string;
15
17
  agentId: string;
@@ -276,13 +278,54 @@ async function fetchConcurrentContext(config: SwarmHooksConfig): Promise<string
276
278
  }
277
279
  }
278
280
 
279
- /** Run session summarization via Claude Haiku on shutdown */
280
- async function summarizeSession(
281
+ /**
282
+ * Test-injection points for `summarizeSessionForPi`. Production callers omit
283
+ * `deps` entirely — pi-mono-extension's `session_shutdown` handler uses the
284
+ * default implementations bound via `import` at the top of this file.
285
+ *
286
+ * Why this exists: `bun:test`'s `mock.module()` is process-wide and leaks
287
+ * across test files, so the pi-mono-extension test cannot stub out
288
+ * `runSummarize`/`postRatings` via module mocking without breaking siblings
289
+ * that import the real symbols (e.g. `buildRatingsFromLlm` step-6 tests,
290
+ * `summarize-session.test.ts`). Explicit DI is the safer pattern.
291
+ */
292
+ export interface SummarizeSessionForPiDeps {
293
+ runSummarize?: typeof runSummarize;
294
+ fetchRetrievalsForTask?: typeof fetchRetrievalsForTask;
295
+ postRatings?: typeof postRatings;
296
+ buildRatingsFromLlm?: typeof buildRatingsFromLlm;
297
+ }
298
+
299
+ /**
300
+ * Run session summarization via the shared `internal-ai` abstraction on
301
+ * shutdown. Replaces the previous `Bun.spawn(claude -p ...)` shellout
302
+ * (which silently failed in production because pi sessions typically don't
303
+ * have Anthropic CLI auth).
304
+ *
305
+ * Flow:
306
+ * 1. Read tail of session transcript file.
307
+ * 2. Fetch task details + (optionally) memory retrievals for ratings.
308
+ * 3. Call `runSummarize` from `src/utils/internal-ai` — picks credentials
309
+ * out of env / codex OAuth, returns structured `{summary, ratings}`.
310
+ * 4. Apply length/quality gate; POST summary to `/api/memory/index`.
311
+ * 5. If `MEMORY_RATERS` includes `llm` AND ratings came back, POST them
312
+ * via `postRatings` (events-based; mirrors the claude Stop hook).
313
+ *
314
+ * All catches log via `console.error(..., err)` — silent-fail behavior is
315
+ * gone.
316
+ */
317
+ export async function summarizeSessionForPi(
281
318
  config: SwarmHooksConfig,
282
319
  sessionFile: string | undefined,
320
+ deps: SummarizeSessionForPiDeps = {},
283
321
  ): Promise<void> {
284
322
  if (!sessionFile) return;
285
323
 
324
+ const _runSummarize = deps.runSummarize ?? runSummarize;
325
+ const _fetchRetrievals = deps.fetchRetrievalsForTask ?? fetchRetrievalsForTask;
326
+ const _postRatings = deps.postRatings ?? postRatings;
327
+ const _buildRatings = deps.buildRatingsFromLlm ?? buildRatingsFromLlm;
328
+
286
329
  try {
287
330
  let transcript = "";
288
331
  try {
@@ -294,84 +337,78 @@ async function summarizeSession(
294
337
 
295
338
  if (transcript.length <= 100) return;
296
339
 
297
- let taskContext = "";
298
- try {
299
- const taskDetails = await fetchTaskDetails(config);
300
- if (taskDetails) {
301
- taskContext = `Task: ${taskDetails.task}`;
302
- }
303
- } catch {
304
- /* no task context */
305
- }
306
-
307
- const summarizePrompt = `You are summarizing an AI agent's work session. Extract ONLY high-value learnings.
308
-
309
- DO NOT include:
310
- - Generic descriptions of what was done ("worked on task X")
311
- - Tool calls or file reads
312
- - Routine progress updates
313
-
314
- DO include (if present):
315
- - **Mistakes made and corrections** — what went wrong and what fixed it
316
- - **Discovered patterns** — reusable approaches, APIs, or codebase conventions
317
- - **Codebase knowledge** important file paths, architecture decisions, gotchas
318
- - **Environment knowledge** — service URLs, config details, tool quirks
319
- - **Failed approaches** — what was tried and didn't work (and why)
320
-
321
- Format as a bulleted list of concrete, reusable facts. If the session was routine with no significant learnings, respond with exactly: "No significant learnings."
322
- ${taskContext ? `\nTask context: ${taskContext}` : ""}
323
- Transcript:
324
- ${transcript}`;
325
-
326
- const tmpFile = `/tmp/session-summary-${Date.now()}.txt`;
327
- await Bun.write(tmpFile, summarizePrompt);
328
- const proc = Bun.spawn(
329
- [
330
- "bash",
331
- "-c",
332
- `cat "${tmpFile}" | ${process.env.CLAUDE_BINARY || "claude"} -p --model haiku --output-format json`,
333
- ],
334
- {
335
- stdout: "pipe",
336
- stderr: "pipe",
337
- env: { ...process.env, SKIP_SESSION_SUMMARY: "1" },
340
+ const sourceTaskId = config.taskId;
341
+ const agentId = config.agentId;
342
+ if (!sourceTaskId || !agentId) return;
343
+
344
+ const taskDetails = await fetchTaskDetails(config).catch(() => null);
345
+
346
+ const memoryRaters = (process.env.MEMORY_RATERS ?? "")
347
+ .split(",")
348
+ .map((s) => s.trim())
349
+ .filter(Boolean);
350
+ const wantRatings = memoryRaters.includes("llm");
351
+ const retrievals = wantRatings
352
+ ? await _fetchRetrievals({
353
+ apiUrl: config.apiUrl,
354
+ apiKey: config.apiKey,
355
+ agentId,
356
+ taskId: sourceTaskId,
357
+ }).catch(() => [])
358
+ : [];
359
+
360
+ const result = await _runSummarize({
361
+ harness: "pi",
362
+ transcript,
363
+ retrievals,
364
+ taskContext: {
365
+ sourceTaskId,
366
+ agentId,
367
+ prompt: taskDetails?.task,
338
368
  },
339
- );
340
- const timeoutId = setTimeout(() => proc.kill(), 30000);
341
- const result = { stdout: await new Response(proc.stdout).text() };
342
- clearTimeout(timeoutId);
343
- await Bun.$`rm -f ${tmpFile}`.quiet();
369
+ apiUrl: config.apiUrl,
370
+ apiKey: config.apiKey,
371
+ });
372
+ // null = no auth resolved or wrapper exhausted retries (already logged inside)
373
+ if (!result) return;
344
374
 
345
- let summary: string;
346
- try {
347
- const summaryOutput = JSON.parse(result.stdout);
348
- summary = summaryOutput.result ?? result.stdout;
349
- } catch {
350
- summary = result.stdout;
375
+ const summary = result.summary.trim();
376
+ if (summary.length <= 20 || summary.toLowerCase().includes("no significant learnings")) {
377
+ return;
351
378
  }
352
379
 
353
- if (
354
- summary &&
355
- summary.length > 20 &&
356
- !summary.trim().toLowerCase().includes("no significant learnings")
357
- ) {
358
- await fetch(`${config.apiUrl}/api/memory/index`, {
359
- method: "POST",
360
- headers: apiHeaders(config),
361
- body: JSON.stringify({
362
- agentId: config.agentId,
363
- content: summary,
364
- name: taskContext
365
- ? `Session: ${taskContext.slice(0, 80)}`
366
- : `Session: ${new Date().toISOString().slice(0, 16)}`,
367
- scope: "agent",
368
- source: "session_summary",
369
- sourceTaskId: config.taskId,
370
- }),
371
- });
380
+ const indexResp = await fetch(`${config.apiUrl}/api/memory/index`, {
381
+ method: "POST",
382
+ headers: apiHeaders(config),
383
+ body: JSON.stringify({
384
+ scope: "agent",
385
+ source: "session_summary",
386
+ sourceTaskId,
387
+ content: summary,
388
+ name: "session-summary",
389
+ agentId,
390
+ }),
391
+ });
392
+ if (!indexResp.ok) {
393
+ const text = await indexResp.text().catch(() => "");
394
+ console.error("session_summary: /api/memory/index POST failed (pi):", indexResp.status, text);
395
+ return;
372
396
  }
373
- } catch {
374
- /* non-blocking */
397
+
398
+ if (wantRatings && result.ratings && result.ratings.length > 0) {
399
+ const ratingEvents = _buildRatings(result.ratings, retrievals);
400
+ if (ratingEvents.length > 0) {
401
+ await _postRatings({
402
+ apiUrl: config.apiUrl,
403
+ apiKey: config.apiKey,
404
+ agentId,
405
+ taskId: sourceTaskId,
406
+ events: ratingEvents,
407
+ }).catch((err) => console.error("session_summary: postRatings failed (pi):", err));
408
+ }
409
+ }
410
+ } catch (err) {
411
+ console.error("session_summary failed (pi):", err);
375
412
  }
376
413
  }
377
414
 
@@ -661,7 +698,7 @@ export function createSwarmHooksExtension(config: SwarmHooksConfig): ExtensionFa
661
698
  // Session summarization — get session file from context's session manager
662
699
  const sessionFile = ctx.sessionManager.getSessionFile?.();
663
700
  if (!process.env.SKIP_SESSION_SUMMARY) {
664
- await summarizeSession(config, sessionFile);
701
+ await summarizeSessionForPi(config, sessionFile);
665
702
  }
666
703
 
667
704
  // Mark agent offline
package/src/telemetry.ts CHANGED
@@ -73,6 +73,32 @@ interface TrackOptions {
73
73
  metadata?: Record<string, unknown>;
74
74
  }
75
75
 
76
+ /**
77
+ * Read SWARM_ORG_ID / SWARM_ORG_NAME from process.env at call time. Reading
78
+ * fresh each track() lets reloaded swarm_config values land in telemetry
79
+ * without restarting (loadGlobalConfigsIntoEnv mutates process.env on
80
+ * `POST /api/config/reload` with override=true). Returns only the keys that
81
+ * are set, so the spread below stays a clean noop on self-host.
82
+ */
83
+ function getOrgIdentity(): { organization_id?: string; organization_name?: string } {
84
+ const out: { organization_id?: string; organization_name?: string } = {};
85
+ const orgId = process.env.SWARM_ORG_ID?.trim();
86
+ if (orgId) out.organization_id = orgId;
87
+ const orgName = process.env.SWARM_ORG_NAME?.trim();
88
+ if (orgName) out.organization_name = orgName;
89
+ return out;
90
+ }
91
+
92
+ /**
93
+ * Mirror of `buildIdentity()`'s SWARM_CLOUD parsing — accepts "true" or "1".
94
+ * Always emitted (not optional) so consumers can split cloud vs self-host
95
+ * cohorts without ambiguity between "false" and "unset".
96
+ */
97
+ function isCloudDeployment(): boolean {
98
+ const raw = process.env.SWARM_CLOUD;
99
+ return raw === "true" || raw === "1";
100
+ }
101
+
76
102
  /** Fire-and-forget telemetry event. Never throws, never blocks. */
77
103
  export function track(options: TrackOptions): void {
78
104
  if (!isEnabled() || !installationId) return;
@@ -89,6 +115,8 @@ export function track(options: TrackOptions): void {
89
115
  transport: "https",
90
116
  schema_version: 1,
91
117
  environment: process.env.NODE_ENV ?? "production",
118
+ is_cloud: isCloudDeployment(),
119
+ ...getOrgIdentity(),
92
120
  ...options.metadata,
93
121
  },
94
122
  };