npm - @desplega.ai/agent-swarm - Versions diffs - 1.76.2 → 1.76.3 - Mend

@desplega.ai/agent-swarm 1.76.2 → 1.76.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/openapi.json +9 -2
package/package.json +1 -1
package/src/be/memory/raters/llm.ts +26 -0
package/src/hooks/hook.ts +174 -147
package/src/http/status.ts +8 -0
package/src/providers/claude-adapter.ts +9 -1
package/src/providers/codex-adapter.ts +232 -2
package/src/providers/codex-oauth/storage.ts +21 -0
package/src/providers/pi-mono-extension.ts +114 -77
package/src/telemetry.ts +28 -0
package/src/tests/claude-stop-hook.test.ts +432 -0
package/src/tests/codex-adapter.test.ts +436 -1
package/src/tests/internal-ai/complete-structured.test.ts +276 -0
package/src/tests/internal-ai/credentials.test.ts +264 -0
package/src/tests/internal-ai/schema-parity.test.ts +103 -0
package/src/tests/internal-ai/summarize-session.test.ts +105 -0
package/src/tests/opencode-plugin.test.ts +496 -0
package/src/tests/pi-mono-extension.test.ts +347 -0
package/src/tests/reload-config.test.ts +9 -1
package/src/tests/status.test.ts +4 -0
package/src/tests/telemetry-init.test.ts +137 -1
package/src/tests/template-recommendations.test.ts +1 -0
package/src/utils/internal-ai/complete-structured.ts +296 -0
package/src/utils/internal-ai/credentials.ts +175 -0
package/src/utils/internal-ai/index.ts +31 -0
package/src/utils/internal-ai/models.ts +46 -0
package/src/utils/internal-ai/summarize-session.ts +101 -0

package/src/providers/codex-adapter.ts CHANGED Viewed

@@ -65,6 +65,8 @@ import {
   type Usage,
   type WebSearchItem,
 } from "@openai/codex-sdk";
+import { buildRatingsFromLlm, fetchRetrievalsForTask, postRatings } from "../be/memory/raters/llm";
+import { summarizeSession as runSummarize } from "../utils/internal-ai";
 import { scrubSecrets } from "../utils/secret-scrubber";
 import { type CodexAgentsMdHandle, writeCodexAgentsMd } from "./codex-agents-md";
 import { computeCodexCostUsd, getCodexContextWindow, resolveCodexModel } from "./codex-models";
@@ -340,6 +342,26 @@ export async function buildCodexConfig(
   };
 }
+/**
+ * Test-injection points for the codex session-end summarization path.
+ *
+ * Production callers omit `deps` entirely — the `CodexSession.summarizeAtEnd`
+ * helper falls back to the symbols imported at the top of this file. Tests
+ * override each function so we can exercise the summarize/index/rate flow
+ * without standing up a real API server or LLM.
+ *
+ * Why this exists (mirrors `SummarizeSessionForPiDeps`): `bun:test`'s
+ * `mock.module()` is process-wide and leaks across test files in the same
+ * `bun test` run, breaking siblings that import the real symbols. Explicit DI
+ * keeps the boundary local to this adapter.
+ */
+export interface SummarizeSessionForCodexDeps {
+  runSummarize?: typeof runSummarize;
+  fetchRetrievalsForTask?: typeof fetchRetrievalsForTask;
+  postRatings?: typeof postRatings;
+  buildRatingsFromLlm?: typeof buildRatingsFromLlm;
+}
 /** Running session backed by a Codex `Thread`. */
 class CodexSession implements ProviderSession {
   private readonly thread: Thread;
@@ -348,6 +370,7 @@ class CodexSession implements ProviderSession {
   private readonly resolvedModel: string;
   private readonly contextWindow: number;
   private readonly skillsDir: string;
+  private readonly summarizeDeps: SummarizeSessionForCodexDeps;
   private readonly listeners: Array<(event: ProviderEvent) => void> = [];
   private readonly eventQueue: ProviderEvent[] = [];
   private readonly logFileHandle: ReturnType<ReturnType<typeof Bun.file>["writer"]>;
@@ -355,6 +378,11 @@ class CodexSession implements ProviderSession {
   private readonly completionPromise: Promise<ProviderResult>;
   private resolveCompletion!: (result: ProviderResult) => void;
   private abortController: AbortController | null = null;
+  /**
+   * Per-session transcript buffer used to feed the session-end summarizer.
+   * Reset at the start of `runSession` and appended in `handleEvent`.
+   */
+  private transcript: string[] = [];
   /**
    * Mutable holder for the current turn's `AbortController`. Shared with the
    * swarm event handler so it can trigger an abort from outside `runSession`
@@ -366,6 +394,14 @@ class CodexSession implements ProviderSession {
   private lastUsage: Usage | null = null;
   private aborted = false;
   private settled = false;
+  /**
+   * Result captured by `settle` but held back from `resolveCompletion` until
+   * `runSession`'s `finally` block has fully cleaned up (log writer flush,
+   * AGENTS.md cleanup, session summary). Without this, callers awaiting
+   * `waitForCompletion` would race the cleanup. Phase 3 added the session
+   * summary path which materially relies on this ordering for testability.
+   */
+  private pendingResult: ProviderResult | null = null;
   constructor(
     thread: Thread,
@@ -374,6 +410,7 @@ class CodexSession implements ProviderSession {
     resolvedModel: string,
     initialEvents: ProviderEvent[] = [],
     skillsDir?: string,
+    summarizeDeps: SummarizeSessionForCodexDeps = {},
   ) {
     this.thread = thread;
     this.config = config;
@@ -385,6 +422,7 @@ class CodexSession implements ProviderSession {
     // runtime default of `${HOME}/.codex/skills`.
     this.skillsDir =
       skillsDir ?? process.env.CODEX_SKILLS_DIR ?? join(os.homedir(), ".codex", "skills");
+    this.summarizeDeps = summarizeDeps;
     this.logFileHandle = Bun.file(config.logFile).writer();
     this.completionPromise = new Promise<ProviderResult>((resolve) => {
@@ -474,7 +512,10 @@ class CodexSession implements ProviderSession {
   private settle(result: ProviderResult): void {
     if (this.settled) return;
     this.settled = true;
-    this.resolveCompletion(result);
+    // Resolution deferred until `runSession`'s finally-block fully cleans up
+    // (see `pendingResult` rationale on the field above). Caller-visible
+    // ordering: cleanup → resolve waitForCompletion.
+    this.pendingResult = result;
   }
   /** Build CostData from the most recent turn usage. */
@@ -559,6 +600,41 @@ class CodexSession implements ProviderSession {
     );
   }
+  /**
+   * Render a completed tool item as a short, signal-dense one-liner for the
+   * session-end summarization transcript. Picks tool-type-specific fields so
+   * the transcript doesn't get drowned in raw JSON (a single `command_execution`
+   * can carry 100KB+ of `aggregated_output`). Each branch is capped at 500
+   * chars; unknown types fall back to a trimmed `JSON.stringify`.
+   */
+  private shortenItemResult(item: ThreadItem): string {
+    switch (item.type) {
+      case "command_execution": {
+        const cmd = item as CommandExecutionItem;
+        const stdout = (cmd.aggregated_output ?? "").slice(0, 500);
+        return `exit=${cmd.exit_code ?? "?"} status=${cmd.status ?? "?"} stdout=${stdout}`;
+      }
+      case "file_change": {
+        const fc = item as FileChangeItem;
+        const summarised = (fc.changes ?? [])
+          .slice(0, 5)
+          .map((c) => `${c.kind}:${c.path}`)
+          .join(",");
+        return `changes=[${summarised}]`;
+      }
+      case "mcp_tool_call": {
+        const mcp = item as McpToolCallItem;
+        return `server=${mcp.server} tool=${mcp.tool} status=${mcp.status ?? "?"}`;
+      }
+      case "web_search": {
+        const ws = item as WebSearchItem;
+        return `query=${(ws.query ?? "").slice(0, 200)}`;
+      }
+      default:
+        return JSON.stringify(item).slice(0, 500);
+    }
+  }
   private handleEvent(event: ThreadEvent): void {
     // Mirror every raw SDK event into the log as raw_log for debugability —
     // parity with Claude's JSONL envelope.
@@ -582,6 +658,14 @@ class CodexSession implements ProviderSession {
             toolName: this.toolNameForItem(event.item),
             args: this.toolArgsForItem(event.item),
           });
+          // Mirror into the transcript buffer for session-end summarization.
+          // Tools are the bulk of useful signal in a codex session, so we
+          // capture both the start (args) and the completion (result digest).
+          this.transcript.push(
+            `Tool[${this.toolNameForItem(event.item)}] started: ${JSON.stringify(
+              this.toolArgsForItem(event.item),
+            ).slice(0, 500)}`,
+          );
         }
         break;
       }
@@ -615,6 +699,9 @@ class CodexSession implements ProviderSession {
             toolName: this.toolNameForItem(item),
             result: item,
           });
+          this.transcript.push(
+            `Tool[${this.toolNameForItem(item)}] completed: ${this.shortenItemResult(item)}`,
+          );
           break;
         }
         switch (item.type) {
@@ -622,6 +709,7 @@ class CodexSession implements ProviderSession {
             const msg = item as AgentMessageItem;
             if (msg.text) {
               this.emit({ type: "message", role: "assistant", content: msg.text });
+              this.transcript.push(`Assistant: ${msg.text}`);
             }
             break;
           }
@@ -764,6 +852,11 @@ class CodexSession implements ProviderSession {
         this.emit(event),
       );
+      // Reset + seed the transcript buffer so the session-end summarizer has
+      // the user's prompt as anchor context. Subsequent appends happen in
+      // `handleEvent` (tool start/end, agent_message).
+      this.transcript = [`User: ${resolvedPrompt}`];
       const streamed = await this.thread.runStreamed(resolvedPrompt, {
         signal: this.abortController.signal,
       });
@@ -827,6 +920,18 @@ class CodexSession implements ProviderSession {
         failureReason: message,
       });
     } finally {
+      // Session-end summarization. Pure addition for codex — no behavior to
+      // preserve. Wrapped in its own try/catch so summary failure must NOT
+      // block the existing log/AGENTS.md cleanup below. Gate `SKIP_SESSION_SUMMARY=1`
+      // matches the parity convention used by the claude Stop hook + pi/opencode.
+      if (process.env.SKIP_SESSION_SUMMARY !== "1") {
+        try {
+          await this.summarizeAtEnd();
+        } catch (err) {
+          console.error("session_summary failed (codex):", err);
+        }
+      }
       // Detach the abort controller now that the turn has settled.
       this.abortRef.current = null;
       try {
@@ -835,6 +940,121 @@ class CodexSession implements ProviderSession {
         // Ignore log writer cleanup failures.
       }
       await this.agentsMdHandle.cleanup();
+      // Resolve `waitForCompletion()` only AFTER all cleanup has finished so
+      // downstream observers (tests + the runner's `.then(...)` chain) don't
+      // race the finally-block side effects. Fallback to an error result if
+      // we somehow never called `settle` (defensive — every codepath in the
+      // try/catch above calls settle exactly once).
+      const finalResult =
+        this.pendingResult ??
+        ({
+          exitCode: 1,
+          sessionId: this._sessionId,
+          cost: this.buildCostData(this.lastUsage, true),
+          isError: true,
+          failureReason: "session did not settle",
+        } as ProviderResult);
+      this.resolveCompletion(finalResult);
+    }
+  }
+  /**
+   * Index a session summary into agent memory at the end of a codex turn.
+   *
+   * Mirrors `summarizeSessionForPi` and the claude Stop hook:
+   *   1. Truncate the in-memory transcript buffer to the last 20 KB.
+   *   2. Bail when the transcript is too short or the swarm context is
+   *      missing (no agentId / no taskId / no apiUrl / no apiKey).
+   *   3. (Optional) Pre-fetch retrievals when `MEMORY_RATERS` includes `llm`
+   *      so the LLM can score them alongside the summary.
+   *   4. Call `runSummarize` from `src/utils/internal-ai` (Phase 0). Returns
+   *      `null` when no credential resolves — silent skip.
+   *   5. Apply length/quality gate; POST to `/api/memory/index`.
+   *   6. POST ratings (`events:` key, NOT `ratings:`) via `postRatings` when
+   *      `MEMORY_RATERS=llm` and the LLM returned per-memory scores.
+   *
+   * All catches log via `console.error(..., err)` — silent-fail behavior is
+   * gone. The outer try in `runSession`'s finally-block is the final safety
+   * net guaranteeing existing cleanup runs regardless.
+   */
+  private async summarizeAtEnd(): Promise<void> {
+    const transcriptStr = this.transcript.join("\n").slice(-20_000);
+    const { agentId, taskId, apiUrl, apiKey } = this.config;
+    if (!agentId || !taskId || !apiUrl || !apiKey) return;
+    if (transcriptStr.length <= 100) return;
+    const _runSummarize = this.summarizeDeps.runSummarize ?? runSummarize;
+    const _fetchRetrievals = this.summarizeDeps.fetchRetrievalsForTask ?? fetchRetrievalsForTask;
+    const _postRatings = this.summarizeDeps.postRatings ?? postRatings;
+    const _buildRatings = this.summarizeDeps.buildRatingsFromLlm ?? buildRatingsFromLlm;
+    const memoryRaters = (process.env.MEMORY_RATERS ?? "")
+      .split(",")
+      .map((s) => s.trim())
+      .filter(Boolean);
+    const wantRatings = memoryRaters.includes("llm");
+    const retrievals = wantRatings
+      ? await _fetchRetrievals({ apiUrl, apiKey, agentId, taskId }).catch(() => [])
+      : [];
+    const result = await _runSummarize({
+      harness: "codex",
+      transcript: transcriptStr,
+      retrievals,
+      taskContext: {
+        sourceTaskId: taskId,
+        agentId,
+        prompt: this.config.prompt,
+      },
+      apiUrl,
+      apiKey,
+    });
+    // null = no auth resolved or wrapper exhausted retries (already logged inside)
+    if (!result) return;
+    const summary = result.summary.trim();
+    if (summary.length <= 20 || summary.toLowerCase().includes("no significant learnings")) {
+      return;
+    }
+    const indexResp = await fetch(`${apiUrl}/api/memory/index`, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: `Bearer ${apiKey}`,
+        "X-Agent-ID": agentId,
+      },
+      body: JSON.stringify({
+        scope: "agent",
+        source: "session_summary",
+        sourceTaskId: taskId,
+        content: summary,
+        name: "session-summary",
+        agentId,
+      }),
+    });
+    if (!indexResp.ok) {
+      const text = await indexResp.text().catch(() => "");
+      console.error(
+        "session_summary: /api/memory/index POST failed (codex):",
+        indexResp.status,
+        text,
+      );
+      return;
+    }
+    if (wantRatings && result.ratings && result.ratings.length > 0) {
+      const ratingEvents = _buildRatings(result.ratings, retrievals);
+      if (ratingEvents.length > 0) {
+        await _postRatings({
+          apiUrl,
+          apiKey,
+          agentId,
+          taskId,
+          events: ratingEvents,
+        }).catch((err) => console.error("session_summary: postRatings failed (codex):", err));
+      }
     }
   }
 }
@@ -851,8 +1071,17 @@ export class CodexAdapter implements ProviderAdapter {
    */
   private readonly skillsDir?: string;
-  constructor(opts: { skillsDir?: string } = {}) {
+  /**
+   * Optional dependency-injection points for session-end summarization. Tests
+   * pass stubs in here to exercise the summarize → index → rate flow without
+   * standing up a real API server or LLM. Production callers omit this and the
+   * `CodexSession` falls back to the module-level imports.
+   */
+  private readonly summarizeDeps: SummarizeSessionForCodexDeps;
+  constructor(opts: { skillsDir?: string; summarizeDeps?: SummarizeSessionForCodexDeps } = {}) {
     this.skillsDir = opts.skillsDir;
+    this.summarizeDeps = opts.summarizeDeps ?? {};
   }
   async createSession(config: ProviderSessionConfig): Promise<ProviderSession> {
@@ -937,6 +1166,7 @@ export class CodexAdapter implements ProviderAdapter {
         resolvedModel,
         preSessionEvents,
         this.skillsDir,
+        this.summarizeDeps,
       );
     } catch (err) {
       // If we failed to construct the thread, clean up the managed AGENTS.md

package/src/providers/codex-oauth/storage.ts CHANGED Viewed

@@ -85,6 +85,27 @@ export async function deleteCodexOAuth(apiUrl: string, apiKey: string): Promise<
   });
 }
+/**
+ * Best-effort persistence of refreshed OAuth credentials back to the config
+ * store. Wraps {@link storeCodexOAuth} with a try/catch + `console.error` —
+ * a write failure MUST NOT block the current caller from using the refreshed
+ * `apiKey` (the rotation will just have to retry on the next call). This is
+ * called from `src/utils/internal-ai/credentials.ts` after pi-ai's
+ * `getOAuthApiKey` returns `{ newCredentials, apiKey }` so the rotated refresh
+ * token isn't lost in-memory.
+ */
+export async function persistCodexOAuth(
+  apiUrl: string,
+  apiKey: string,
+  creds: CodexOAuthCredentials,
+): Promise<void> {
+  try {
+    await storeCodexOAuth(apiUrl, apiKey, creds);
+  } catch (err) {
+    console.error("[codex-oauth] persistCodexOAuth failed (non-fatal):", err);
+  }
+}
 export async function getValidCodexOAuth(
   apiUrl: string,
   apiKey: string,

package/src/providers/pi-mono-extension.ts CHANGED Viewed

@@ -7,9 +7,11 @@
  */
 import type { ExtensionFactory } from "@mariozechner/pi-coding-agent";
+import { buildRatingsFromLlm, fetchRetrievalsForTask, postRatings } from "../be/memory/raters/llm";
 import { checkToolLoop, clearToolHistory } from "../hooks/tool-loop-detection";
+import { summarizeSession as runSummarize } from "../utils/internal-ai";
-interface SwarmHooksConfig {
+export interface SwarmHooksConfig {
   apiUrl: string;
   apiKey: string;
   agentId: string;
@@ -276,13 +278,54 @@ async function fetchConcurrentContext(config: SwarmHooksConfig): Promise<string
   }
 }
-/** Run session summarization via Claude Haiku on shutdown */
-async function summarizeSession(
+/**
+ * Test-injection points for `summarizeSessionForPi`. Production callers omit
+ * `deps` entirely — pi-mono-extension's `session_shutdown` handler uses the
+ * default implementations bound via `import` at the top of this file.
+ *
+ * Why this exists: `bun:test`'s `mock.module()` is process-wide and leaks
+ * across test files, so the pi-mono-extension test cannot stub out
+ * `runSummarize`/`postRatings` via module mocking without breaking siblings
+ * that import the real symbols (e.g. `buildRatingsFromLlm` step-6 tests,
+ * `summarize-session.test.ts`). Explicit DI is the safer pattern.
+ */
+export interface SummarizeSessionForPiDeps {
+  runSummarize?: typeof runSummarize;
+  fetchRetrievalsForTask?: typeof fetchRetrievalsForTask;
+  postRatings?: typeof postRatings;
+  buildRatingsFromLlm?: typeof buildRatingsFromLlm;
+}
+/**
+ * Run session summarization via the shared `internal-ai` abstraction on
+ * shutdown. Replaces the previous `Bun.spawn(claude -p ...)` shellout
+ * (which silently failed in production because pi sessions typically don't
+ * have Anthropic CLI auth).
+ *
+ * Flow:
+ *   1. Read tail of session transcript file.
+ *   2. Fetch task details + (optionally) memory retrievals for ratings.
+ *   3. Call `runSummarize` from `src/utils/internal-ai` — picks credentials
+ *      out of env / codex OAuth, returns structured `{summary, ratings}`.
+ *   4. Apply length/quality gate; POST summary to `/api/memory/index`.
+ *   5. If `MEMORY_RATERS` includes `llm` AND ratings came back, POST them
+ *      via `postRatings` (events-based; mirrors the claude Stop hook).
+ *
+ * All catches log via `console.error(..., err)` — silent-fail behavior is
+ * gone.
+ */
+export async function summarizeSessionForPi(
   config: SwarmHooksConfig,
   sessionFile: string | undefined,
+  deps: SummarizeSessionForPiDeps = {},
 ): Promise<void> {
   if (!sessionFile) return;
+  const _runSummarize = deps.runSummarize ?? runSummarize;
+  const _fetchRetrievals = deps.fetchRetrievalsForTask ?? fetchRetrievalsForTask;
+  const _postRatings = deps.postRatings ?? postRatings;
+  const _buildRatings = deps.buildRatingsFromLlm ?? buildRatingsFromLlm;
   try {
     let transcript = "";
     try {
@@ -294,84 +337,78 @@ async function summarizeSession(
     if (transcript.length <= 100) return;
-    let taskContext = "";
-    try {
-      const taskDetails = await fetchTaskDetails(config);
-      if (taskDetails) {
-        taskContext = `Task: ${taskDetails.task}`;
-      }
-    } catch {
-      /* no task context */
-    }
-    const summarizePrompt = `You are summarizing an AI agent's work session. Extract ONLY high-value learnings.
-DO NOT include:
-- Generic descriptions of what was done ("worked on task X")
-- Tool calls or file reads
-- Routine progress updates
-DO include (if present):
-- **Mistakes made and corrections** — what went wrong and what fixed it
-- **Discovered patterns** — reusable approaches, APIs, or codebase conventions
-- **Codebase knowledge** — important file paths, architecture decisions, gotchas
-- **Environment knowledge** — service URLs, config details, tool quirks
-- **Failed approaches** — what was tried and didn't work (and why)
-Format as a bulleted list of concrete, reusable facts. If the session was routine with no significant learnings, respond with exactly: "No significant learnings."
-${taskContext ? `\nTask context: ${taskContext}` : ""}
-Transcript:
-${transcript}`;
-    const tmpFile = `/tmp/session-summary-${Date.now()}.txt`;
-    await Bun.write(tmpFile, summarizePrompt);
-    const proc = Bun.spawn(
-      [
-        "bash",
-        "-c",
-        `cat "${tmpFile}" | ${process.env.CLAUDE_BINARY || "claude"} -p --model haiku --output-format json`,
-      ],
-      {
-        stdout: "pipe",
-        stderr: "pipe",
-        env: { ...process.env, SKIP_SESSION_SUMMARY: "1" },
+    const sourceTaskId = config.taskId;
+    const agentId = config.agentId;
+    if (!sourceTaskId || !agentId) return;
+    const taskDetails = await fetchTaskDetails(config).catch(() => null);
+    const memoryRaters = (process.env.MEMORY_RATERS ?? "")
+      .split(",")
+      .map((s) => s.trim())
+      .filter(Boolean);
+    const wantRatings = memoryRaters.includes("llm");
+    const retrievals = wantRatings
+      ? await _fetchRetrievals({
+          apiUrl: config.apiUrl,
+          apiKey: config.apiKey,
+          agentId,
+          taskId: sourceTaskId,
+        }).catch(() => [])
+      : [];
+    const result = await _runSummarize({
+      harness: "pi",
+      transcript,
+      retrievals,
+      taskContext: {
+        sourceTaskId,
+        agentId,
+        prompt: taskDetails?.task,
       },
-    );
-    const timeoutId = setTimeout(() => proc.kill(), 30000);
-    const result = { stdout: await new Response(proc.stdout).text() };
-    clearTimeout(timeoutId);
-    await Bun.$`rm -f ${tmpFile}`.quiet();
+      apiUrl: config.apiUrl,
+      apiKey: config.apiKey,
+    });
+    // null = no auth resolved or wrapper exhausted retries (already logged inside)
+    if (!result) return;
-    let summary: string;
-    try {
-      const summaryOutput = JSON.parse(result.stdout);
-      summary = summaryOutput.result ?? result.stdout;
-    } catch {
-      summary = result.stdout;
+    const summary = result.summary.trim();
+    if (summary.length <= 20 || summary.toLowerCase().includes("no significant learnings")) {
+      return;
     }
-    if (
-      summary &&
-      summary.length > 20 &&
-      !summary.trim().toLowerCase().includes("no significant learnings")
-    ) {
-      await fetch(`${config.apiUrl}/api/memory/index`, {
-        method: "POST",
-        headers: apiHeaders(config),
-        body: JSON.stringify({
-          agentId: config.agentId,
-          content: summary,
-          name: taskContext
-            ? `Session: ${taskContext.slice(0, 80)}`
-            : `Session: ${new Date().toISOString().slice(0, 16)}`,
-          scope: "agent",
-          source: "session_summary",
-          sourceTaskId: config.taskId,
-        }),
-      });
+    const indexResp = await fetch(`${config.apiUrl}/api/memory/index`, {
+      method: "POST",
+      headers: apiHeaders(config),
+      body: JSON.stringify({
+        scope: "agent",
+        source: "session_summary",
+        sourceTaskId,
+        content: summary,
+        name: "session-summary",
+        agentId,
+      }),
+    });
+    if (!indexResp.ok) {
+      const text = await indexResp.text().catch(() => "");
+      console.error("session_summary: /api/memory/index POST failed (pi):", indexResp.status, text);
+      return;
     }
-  } catch {
-    /* non-blocking */
+    if (wantRatings && result.ratings && result.ratings.length > 0) {
+      const ratingEvents = _buildRatings(result.ratings, retrievals);
+      if (ratingEvents.length > 0) {
+        await _postRatings({
+          apiUrl: config.apiUrl,
+          apiKey: config.apiKey,
+          agentId,
+          taskId: sourceTaskId,
+          events: ratingEvents,
+        }).catch((err) => console.error("session_summary: postRatings failed (pi):", err));
+      }
+    }
+  } catch (err) {
+    console.error("session_summary failed (pi):", err);
   }
 }
@@ -661,7 +698,7 @@ export function createSwarmHooksExtension(config: SwarmHooksConfig): ExtensionFa
       // Session summarization — get session file from context's session manager
       const sessionFile = ctx.sessionManager.getSessionFile?.();
       if (!process.env.SKIP_SESSION_SUMMARY) {
-        await summarizeSession(config, sessionFile);
+        await summarizeSessionForPi(config, sessionFile);
       }
       // Mark agent offline

package/src/telemetry.ts CHANGED Viewed

@@ -73,6 +73,32 @@ interface TrackOptions {
   metadata?: Record<string, unknown>;
 }
+/**
+ * Read SWARM_ORG_ID / SWARM_ORG_NAME from process.env at call time. Reading
+ * fresh each track() lets reloaded swarm_config values land in telemetry
+ * without restarting (loadGlobalConfigsIntoEnv mutates process.env on
+ * `POST /api/config/reload` with override=true). Returns only the keys that
+ * are set, so the spread below stays a clean noop on self-host.
+ */
+function getOrgIdentity(): { organization_id?: string; organization_name?: string } {
+  const out: { organization_id?: string; organization_name?: string } = {};
+  const orgId = process.env.SWARM_ORG_ID?.trim();
+  if (orgId) out.organization_id = orgId;
+  const orgName = process.env.SWARM_ORG_NAME?.trim();
+  if (orgName) out.organization_name = orgName;
+  return out;
+}
+/**
+ * Mirror of `buildIdentity()`'s SWARM_CLOUD parsing — accepts "true" or "1".
+ * Always emitted (not optional) so consumers can split cloud vs self-host
+ * cohorts without ambiguity between "false" and "unset".
+ */
+function isCloudDeployment(): boolean {
+  const raw = process.env.SWARM_CLOUD;
+  return raw === "true" || raw === "1";
+}
 /** Fire-and-forget telemetry event. Never throws, never blocks. */
 export function track(options: TrackOptions): void {
   if (!isEnabled() || !installationId) return;
@@ -89,6 +115,8 @@ export function track(options: TrackOptions): void {
         transport: "https",
         schema_version: 1,
         environment: process.env.NODE_ENV ?? "production",
+        is_cloud: isCloudDeployment(),
+        ...getOrgIdentity(),
         ...options.metadata,
       },
     };