npm - @desplega.ai/agent-swarm - Versions diffs - 1.76.2 → 1.77.0 - Mend

@desplega.ai/agent-swarm 1.76.2 → 1.77.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/openapi.json +9 -2
package/package.json +1 -1
package/src/be/memory/raters/llm.ts +26 -0
package/src/cli.tsx +3 -24
package/src/commands/credential-wait.ts +31 -6
package/src/commands/runner.ts +1045 -1059
package/src/hooks/hook.ts +174 -147
package/src/http/status.ts +8 -0
package/src/providers/claude-adapter.ts +9 -1
package/src/providers/codex-adapter.ts +232 -2
package/src/providers/codex-oauth/storage.ts +21 -0
package/src/providers/pi-mono-extension.ts +114 -77
package/src/telemetry.ts +28 -0
package/src/tests/claude-stop-hook.test.ts +432 -0
package/src/tests/codex-adapter.test.ts +436 -1
package/src/tests/internal-ai/complete-structured.test.ts +276 -0
package/src/tests/internal-ai/credentials.test.ts +264 -0
package/src/tests/internal-ai/schema-parity.test.ts +103 -0
package/src/tests/internal-ai/summarize-session.test.ts +105 -0
package/src/tests/opencode-plugin.test.ts +496 -0
package/src/tests/pi-mono-extension.test.ts +347 -0
package/src/tests/reload-config.test.ts +9 -1
package/src/tests/status.test.ts +4 -0
package/src/tests/telemetry-init.test.ts +137 -1
package/src/tests/template-recommendations.test.ts +1 -0
package/src/utils/internal-ai/complete-structured.ts +296 -0
package/src/utils/internal-ai/credentials.ts +175 -0
package/src/utils/internal-ai/index.ts +31 -0
package/src/utils/internal-ai/models.ts +46 -0
package/src/utils/internal-ai/summarize-session.ts +101 -0

package/src/hooks/hook.ts CHANGED Viewed

@@ -3,15 +3,14 @@
 import pkg from "../../package.json";
 import {
   buildRatingsFromLlm,
-  buildSummaryWithRatingsPrompt,
   dedupeRetrievalsForRater,
   fetchRetrievalsForTask,
   isLlmRaterEnabled,
   postRatings,
   type RetrievalRow,
 } from "../be/memory/raters/llm";
-import { runMemoryRater } from "../be/memory/raters/llm-summarizer";
 import type { Agent } from "../types";
+import { summarizeSession as runSummarize } from "../utils/internal-ai";
 import { checkToolLoop, clearToolHistory } from "./tool-loop-detection";
 const SERVER_NAME = pkg.config?.name ?? "agent-swarm";
@@ -231,6 +230,168 @@ export async function resolveStopHookTaskContext(
   return { taskContext, taskId };
 }
+/**
+ * Test-injection points for `runStopHookSessionSummary`. Production callers
+ * omit `deps` entirely — the claude Stop hook uses the default implementations
+ * bound at module-import time.
+ *
+ * Why explicit DI: `bun:test`'s `mock.module()` is process-wide and leaks
+ * across test files, so the Stop-hook test cannot stub out `runSummarize` /
+ * `postRatings` via module mocking without breaking siblings. Mirrors the
+ * `summarizeSessionForPi` pattern in `src/providers/pi-mono-extension.ts`.
+ */
+export interface RunStopHookSessionSummaryDeps {
+  runSummarize?: typeof runSummarize;
+  fetchRetrievalsForTask?: typeof fetchRetrievalsForTask;
+  postRatings?: typeof postRatings;
+  buildRatingsFromLlm?: typeof buildRatingsFromLlm;
+}
+export interface RunStopHookSessionSummaryOpts {
+  agentId: string;
+  transcriptPath: string;
+  /** Defaulted to `process.env`; injectable for tests. */
+  env?: NodeJS.ProcessEnv;
+}
+/**
+ * Run session summarization for the claude Stop hook via the shared
+ * `internal-ai` abstraction. Replaces the previous `runMemoryRater` call so
+ * `CLAUDE_CODE_OAUTH_TOKEN`-only environments (Pro/Max OAuth users without
+ * OpenRouter) keep working via the `claude -p` fallback inside the wrapper.
+ *
+ * Flow:
+ *   1. Read tail of transcript file (last 20 KB).
+ *   2. Resolve task context + (optionally) memory retrievals for ratings.
+ *   3. Call `runSummarize` from `src/utils/internal-ai` — picks credentials
+ *      out of env / codex OAuth / CLAUDE_CODE_OAUTH_TOKEN, returns structured
+ *      `{summary, ratings}`.
+ *   4. Apply length/quality gate; POST summary to `/api/memory/index`.
+ *   5. If `MEMORY_RATERS` includes `llm` AND ratings came back, POST them
+ *      via `postRatings` (events-based).
+ *
+ * Non-blocking — any thrown error is swallowed so session shutdown never blocks.
+ */
+export async function runStopHookSessionSummary(
+  opts: RunStopHookSessionSummaryOpts,
+  deps: RunStopHookSessionSummaryDeps = {},
+): Promise<void> {
+  const env = opts.env ?? process.env;
+  if (env.SKIP_SESSION_SUMMARY) return;
+  const _runSummarize = deps.runSummarize ?? runSummarize;
+  const _fetchRetrievals = deps.fetchRetrievalsForTask ?? fetchRetrievalsForTask;
+  const _postRatings = deps.postRatings ?? postRatings;
+  const _buildRatings = deps.buildRatingsFromLlm ?? buildRatingsFromLlm;
+  try {
+    let transcript = "";
+    try {
+      const fullTranscript = await Bun.file(opts.transcriptPath).text();
+      transcript = fullTranscript.length > 20000 ? fullTranscript.slice(-20000) : fullTranscript;
+    } catch {
+      /* no transcript */
+    }
+    if (transcript.length <= 100) return;
+    // Prefer AGENT_SWARM_TASK_ID env var; fall back to TASK_FILE on
+    // disk. PR #444 gate-trace showed the file disappears mid-session
+    // and the silent catch dropped every LLM rater piggyback.
+    const { taskContext, taskId } = await resolveStopHookTaskContext(env);
+    const apiUrl = env.MCP_BASE_URL || `http://localhost:${env.PORT || "3013"}`;
+    const apiKey = env.API_KEY || "";
+    // Memory-rater v1.5 step-4: piggyback per-memory ratings on the
+    // existing summary call when MEMORY_RATERS includes `llm`.
+    const llmRaterEnabled = isLlmRaterEnabled();
+    let retrievals: RetrievalRow[] = [];
+    if (llmRaterEnabled && taskId) {
+      const rawRetrievals = await _fetchRetrievals({
+        apiUrl,
+        apiKey,
+        agentId: opts.agentId,
+        taskId,
+      });
+      // Dedup self-similar cron-task memories before sending to the
+      // rater — see `dedupeRetrievalsForRater` doc for the why.
+      retrievals = dedupeRetrievalsForRater(rawRetrievals);
+    }
+    const result = await _runSummarize({
+      harness: "claude",
+      transcript,
+      retrievals,
+      taskContext: {
+        sourceTaskId: taskId ?? "",
+        agentId: opts.agentId,
+        // claude's path doesn't pass the user prompt here today — leave undefined.
+        prompt: undefined,
+      },
+      apiUrl,
+      apiKey,
+    });
+    // null = no auth resolved (no OPENROUTER, ANTHROPIC, OPENAI, codex OAuth,
+    // or CLAUDE_CODE_OAUTH_TOKEN) — silent skip, same as today's no-key behavior.
+    if (!result) return;
+    const summary = result.summary.trim();
+    const ratings = result.ratings ?? [];
+    // Skip indexing if the session had no significant learnings.
+    if (summary.length > 20 && !summary.toLowerCase().includes("no significant learnings")) {
+      await fetch(`${apiUrl}/api/memory/index`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
+          "X-Agent-ID": opts.agentId,
+        },
+        body: JSON.stringify({
+          agentId: opts.agentId,
+          content: summary,
+          name: taskContext
+            ? `Session: ${taskContext.slice(0, 80)}`
+            : `Session: ${new Date().toISOString().slice(0, 16)}`,
+          scope: "agent",
+          source: "session_summary",
+          ...(taskId ? { sourceTaskId: taskId } : {}),
+        }),
+      });
+    }
+    // Best-effort: post LLM ratings. Never blocks summary indexing.
+    if (llmRaterEnabled && taskId && retrievals.length > 0 && ratings.length === 0) {
+      console.error("[memory-rater:llm] piggyback produced no ratings", {
+        retrievalsLen: retrievals.length,
+        ratingsLen: 0,
+      });
+    }
+    if (llmRaterEnabled && taskId && ratings.length > 0) {
+      try {
+        const events = _buildRatings(ratings, retrievals);
+        if (events.length > 0) {
+          await _postRatings({
+            apiUrl,
+            apiKey,
+            agentId: opts.agentId,
+            taskId,
+            events,
+          });
+        }
+      } catch (err) {
+        console.error(
+          "[memory-rater:llm] piggyback rating emission failed:",
+          (err as Error).message,
+        );
+      }
+    }
+  } catch {
+    // Non-blocking — session summarization failure should never block shutdown
+  }
+}
 /**
  * Main hook handler - processes Claude Code hook events
  */
@@ -1079,151 +1240,17 @@ ${hasAgentIdHeader() ? `You have a pre-defined agent ID via header: ${mcpConfig?
         }
       }
-      // Session summarization + LLM rater piggyback via OpenRouter (Vercel AI SDK).
-      //
-      // No-op when OPENROUTER_API_KEY is unset — self-hosters / OSS users
-      // without OpenRouter skip session summary + LLM ratings entirely. The
-      // previous `claude -p` path silently produced "Not logged in · Please
-      // run /login" rows after the 2026-05-05 CLAUDE_CODE_VERSION bump
-      // stopped propagating CLAUDE_CODE_OAUTH_TOKEN to hook subprocesses.
-      if (
-        agentInfo?.id &&
-        msg.transcript_path &&
-        !process.env.SKIP_SESSION_SUMMARY &&
-        process.env.OPENROUTER_API_KEY
-      ) {
-        try {
-          let transcript = "";
-          try {
-            const fullTranscript = await Bun.file(msg.transcript_path).text();
-            transcript =
-              fullTranscript.length > 20000 ? fullTranscript.slice(-20000) : fullTranscript;
-          } catch {
-            /* no transcript */
-          }
-          if (transcript.length > 100) {
-            // Prefer AGENT_SWARM_TASK_ID env var; fall back to TASK_FILE on
-            // disk. PR #444 gate-trace showed the file disappears mid-session
-            // and the silent catch dropped every LLM rater piggyback.
-            const { taskContext, taskId } = await resolveStopHookTaskContext();
-            const apiUrl =
-              process.env.MCP_BASE_URL || `http://localhost:${process.env.PORT || "3013"}`;
-            const apiKey = process.env.API_KEY || "";
-            // Memory-rater v1.5 step-4: piggyback per-memory ratings on the
-            // existing summary call when MEMORY_RATERS includes `llm`.
-            // Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-4.md §3
-            const llmRaterEnabled = isLlmRaterEnabled();
-            let retrievals: RetrievalRow[] = [];
-            if (llmRaterEnabled && taskId) {
-              const rawRetrievals = await fetchRetrievalsForTask({
-                apiUrl,
-                apiKey,
-                agentId: agentInfo.id,
-                taskId,
-              });
-              // Dedup self-similar cron-task memories before sending to the
-              // rater — see `dedupeRetrievalsForRater` doc for the why.
-              retrievals = dedupeRetrievalsForRater(rawRetrievals);
-            }
-            const baseSummarizePrompt = `You are summarizing an AI agent's work session. Extract ONLY high-value learnings.
-DO NOT include:
-- Generic descriptions of what was done ("worked on task X")
-- Tool calls or file reads
-- Routine progress updates
-DO include (if present):
-- **Mistakes made and corrections** — what went wrong and what fixed it
-- **Discovered patterns** — reusable approaches, APIs, or codebase conventions
-- **Codebase knowledge** — important file paths, architecture decisions, gotchas
-- **Environment knowledge** — service URLs, config details, tool quirks
-- **Failed approaches** — what was tried and didn't work (and why)
-Format as a bulleted list of concrete, reusable facts. If the session was routine with no significant learnings, respond with exactly: "No significant learnings."
-${taskContext ? `\nTask context: ${taskContext}` : ""}
-Transcript:
-${transcript}`;
-            // Always ask for the structured (summary + ratings) payload — same
-            // cost as the unstructured path. Empty retrievals → empty memory
-            // block → ratings: []; the postRatings gate below still skips the
-            // POST when there's nothing to send.
-            const summarizePrompt = buildSummaryWithRatingsPrompt(baseSummarizePrompt, retrievals);
-            const raterResult = await runMemoryRater({
-              prompt: summarizePrompt,
-              apiKey: process.env.OPENROUTER_API_KEY,
-            });
-            if (!raterResult.ok) {
-              console.error("[memory-rater:llm] runMemoryRater returned non-ok", {
-                reason: raterResult.reason,
-                ...(raterResult.status !== undefined ? { status: raterResult.status } : {}),
-              });
-            } else {
-              const summary = raterResult.data.summary;
-              const ratings = raterResult.data.ratings;
-              // Skip indexing if the session had no significant learnings
-              if (
-                summary &&
-                summary.length > 20 &&
-                !summary.trim().toLowerCase().includes("no significant learnings")
-              ) {
-                await fetch(`${apiUrl}/api/memory/index`, {
-                  method: "POST",
-                  headers: {
-                    "Content-Type": "application/json",
-                    ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
-                    "X-Agent-ID": agentInfo.id,
-                  },
-                  body: JSON.stringify({
-                    agentId: agentInfo.id,
-                    content: summary,
-                    name: taskContext
-                      ? `Session: ${taskContext.slice(0, 80)}`
-                      : `Session: ${new Date().toISOString().slice(0, 16)}`,
-                    scope: "agent",
-                    source: "session_summary",
-                    ...(taskId ? { sourceTaskId: taskId } : {}),
-                  }),
-                });
-              }
-              // Best-effort: post LLM ratings. Never blocks summary indexing.
-              if (llmRaterEnabled && taskId && retrievals.length > 0 && ratings.length === 0) {
-                console.error("[memory-rater:llm] piggyback produced no ratings", {
-                  retrievalsLen: retrievals.length,
-                  ratingsLen: 0,
-                });
-              }
-              if (llmRaterEnabled && taskId && ratings.length > 0) {
-                try {
-                  const events = buildRatingsFromLlm(ratings, retrievals);
-                  if (events.length > 0) {
-                    await postRatings({
-                      apiUrl,
-                      apiKey,
-                      agentId: agentInfo.id,
-                      taskId,
-                      events,
-                    });
-                  }
-                } catch (err) {
-                  console.error(
-                    "[memory-rater:llm] piggyback rating emission failed:",
-                    (err as Error).message,
-                  );
-                }
-              }
-            }
-          }
-        } catch {
-          // Non-blocking — session summarization failure should never block shutdown
-        }
+      // Session summarization + LLM rater piggyback via the shared internal-ai
+      // wrapper (OpenRouter → Anthropic → OpenAI → codex OAuth →
+      // CLAUDE_CODE_OAUTH_TOKEN → claude -p fallback). The wrapper handles
+      // credential resolution and returns null when nothing resolves, so Pro/Max
+      // OAuth users keep working without OpenRouter (the working path PR #450
+      // restored). Non-blocking — failures never block shutdown.
+      if (agentInfo?.id && msg.transcript_path) {
+        await runStopHookSessionSummary({
+          agentId: agentInfo.id,
+          transcriptPath: msg.transcript_path,
+        });
       }
       // Mark the agent as offline

package/src/http/status.ts CHANGED Viewed

@@ -87,6 +87,13 @@ export const StatusIdentitySchema = z.object({
   is_cloud: z.boolean(),
   marketing_url: z.string().nullable(),
   hide_cloud_promo: z.boolean(),
+  /**
+   * Stable identifier for the org/tenant this swarm belongs to. Set by the
+   * orchestrator on cloud deployments via `SWARM_ORG_ID`; null on self-host
+   * unless the operator opts in. Threaded into telemetry events so multi-org
+   * cloud installs can be sliced server-side.
+   */
+  org_id: z.string().nullable(),
 });
 export type StatusIdentity = z.infer<typeof StatusIdentitySchema>;
@@ -240,6 +247,7 @@ function buildIdentity(): StatusIdentity {
     is_cloud: cloudRaw === "true" || cloudRaw === "1",
     marketing_url: process.env.SWARM_MARKETING_URL?.trim() || null,
     hide_cloud_promo: hideRaw === "true" || hideRaw === "1",
+    org_id: process.env.SWARM_ORG_ID?.trim() || null,
   };
 }

package/src/providers/claude-adapter.ts CHANGED Viewed

@@ -188,17 +188,25 @@ class ClaudeSession implements ProviderSession {
       `\x1b[2m[${config.role}]\x1b[0m \x1b[36m▸\x1b[0m Spawning Claude (model: ${model}) for task ${config.taskId.slice(0, 8)}`,
     );
+    const sourceEnv = config.env || process.env;
     this.proc = Bun.spawn(cmd, {
       cwd: this.config.cwd,
       env: {
         ENABLE_PROMPT_CACHING_1H: "1",
-        ...(config.env || process.env),
+        ...sourceEnv,
         TASK_FILE: taskFilePath,
         // Belt-and-braces: TASK_FILE on disk can disappear mid-session (race
         // with task lifecycle), which silently drops the Stop-hook memory
         // rater. The hook prefers these env vars when present. See PR #444.
         AGENT_SWARM_TASK_ID: config.taskId,
         AGENT_SWARM_AGENT_ID: config.agentId,
+        // claude CLI strips CLAUDE_CODE_OAUTH_TOKEN from hook subprocess env
+        // (security: prevents OAuth-token leakage to user-written hooks).
+        // Mirror it under a name claude doesn't recognize so the Stop hook
+        // can resolve the claude-cli fallback in internal-ai/credentials.ts.
+        ...(sourceEnv.CLAUDE_CODE_OAUTH_TOKEN
+          ? { AGENT_SWARM_CLAUDE_OAUTH_TOKEN: sourceEnv.CLAUDE_CODE_OAUTH_TOKEN }
+          : {}),
       } as Record<string, string>,
       stdout: "pipe",
       stderr: "pipe",