npm - @desplega.ai/agent-swarm - Versions diffs - 1.86.0 → 1.87.0 - Mend

@desplega.ai/agent-swarm 1.86.0 → 1.87.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/openapi.json +72 -1
package/package.json +3 -1
package/src/be/db-queries/tracker.ts +21 -0
package/src/be/db.ts +235 -14
package/src/be/migrations/079_task_followup_config.sql +1 -0
package/src/be/modelsdev-cache.json +77663 -74073
package/src/cli.tsx +26 -0
package/src/commands/context-preamble.ts +272 -0
package/src/commands/e2b.ts +728 -0
package/src/commands/resume-session.ts +35 -78
package/src/commands/runner.ts +125 -13
package/src/e2b/dispatch.ts +429 -0
package/src/e2b/env.ts +206 -0
package/src/heartbeat/heartbeat.ts +145 -30
package/src/heartbeat/templates.ts +11 -7
package/src/http/session-data.ts +8 -1
package/src/http/tasks.ts +152 -3
package/src/jira/sync.ts +4 -4
package/src/linear/sync.ts +6 -5
package/src/providers/claude-adapter.ts +10 -76
package/src/providers/claude-managed-adapter.ts +61 -75
package/src/providers/codex-adapter.ts +15 -18
package/src/providers/codex-oauth/auth-json.ts +18 -1
package/src/providers/codex-oauth/flow.ts +24 -1
package/src/providers/types.ts +6 -0
package/src/tasks/worker-follow-up.ts +162 -2
package/src/telemetry.ts +11 -1
package/src/tests/claude-adapter.test.ts +5 -27
package/src/tests/claude-managed-adapter.test.ts +38 -52
package/src/tests/codex-adapter.test.ts +6 -31
package/src/tests/codex-oauth.test.ts +149 -3
package/src/tests/codex-pool.test.ts +14 -3
package/src/tests/e2b-dispatch.test.ts +330 -0
package/src/tests/heartbeat-supersede-resume.test.ts +285 -0
package/src/tests/heartbeat.test.ts +26 -16
package/src/tests/prompt-template-remaining.test.ts +4 -0
package/src/tests/resume-session.test.ts +42 -50
package/src/tests/structured-output.test.ts +69 -0
package/src/tests/task-completion-idempotency.test.ts +185 -2
package/src/tests/task-supersede-resume.test.ts +722 -0
package/src/tests/telemetry-init.test.ts +69 -0
package/src/tests/vcs-tracking.test.ts +39 -0
package/src/tools/send-task.ts +12 -1
package/src/tools/store-progress.ts +2 -2
package/src/tools/templates.ts +14 -2
package/src/types.ts +46 -1
package/src/workflows/executors/agent-task.ts +3 -0

package/src/cli.tsx CHANGED Viewed

@@ -292,6 +292,27 @@ const COMMAND_HELP: Record<
       `  ${binName} claude-managed-setup --api-url https://swarm.example.com`,
     ].join("\n"),
   },
+  e2b: {
+    usage: `${binName} e2b <subcommand> [options]`,
+    description:
+      "Build Agent Swarm E2B templates and start API/worker sandboxes on demand for CI or Dockerless environments.",
+    options: [
+      "  build-template --role api|worker    Build or rebuild an E2B template",
+      "  delete-template <template...>        Delete E2B templates",
+      "  publish-template <template...>       Publish E2B templates",
+      "  unpublish-template <template...>     Make E2B templates private",
+      "  start-api --template <name>          Start the API in an E2B sandbox",
+      "  start-worker --api-url <url>         Start a worker against a public API URL",
+      "  start-stack                         Start API plus one or more workers",
+      "  list | kill <sandbox-id...>          Inspect or clean up sandboxes",
+      "  -h, --help                          Show this help",
+    ].join("\n"),
+    examples: [
+      `  ${binName} e2b build-template --role worker`,
+      `  ${binName} e2b start-worker --api-url https://swarm.example.com --api-key "$SWARM_API_KEY"`,
+      `  ${binName} e2b start-stack --workers 2 --api-key "$SWARM_API_KEY"`,
+    ].join("\n"),
+  },
 };
 function printHelp(command?: string) {
@@ -323,6 +344,7 @@ function printHelp(command?: string) {
     ["docs", "Open documentation (--open to launch in browser)"],
     ["codex-login", "Authenticate Codex via ChatGPT OAuth"],
     ["claude-managed-setup", "Bootstrap Anthropic Managed Agents (agent + env + skills)"],
+    ["e2b", "Build templates and start E2B API/worker sandboxes"],
     ["version", "Show version number"],
     ["help", "Show this help message"],
   ];
@@ -584,6 +606,10 @@ if (args.showHelp || args.command === "help" || args.command === undefined) {
   const { runClaudeManagedSetup } = await import("./commands/claude-managed-setup");
   const setupArgs = process.argv.slice(process.argv.indexOf("claude-managed-setup") + 1);
   await runClaudeManagedSetup(setupArgs);
+} else if (args.command === "e2b") {
+  const { runE2BCommand } = await import("./commands/e2b");
+  const e2bArgs = process.argv.slice(process.argv.indexOf("e2b") + 1);
+  await runE2BCommand(e2bArgs);
 } else {
   render(<App args={args} />);
 }

package/src/commands/context-preamble.ts CHANGED Viewed

@@ -11,6 +11,8 @@
  * resumes (see swarm memory sigterm-143-resumed-session-context-saturation-2026-05-13).
  */
+import { scrubSecrets } from "../utils/secret-scrubber";
 export const CONTEXT_PREAMBLE_MAX_TOKENS = Number(
   process.env.CONTEXT_PREAMBLE_MAX_TOKENS || "2000",
 );
@@ -18,12 +20,25 @@ export const CONTEXT_PREAMBLE_MAX_TOKENS = Number(
 export const CONTEXT_PREAMBLE_MAX_CHARS = CONTEXT_PREAMBLE_MAX_TOKENS * 4;
 export const CONTEXT_PREAMBLE_MAX_ANCESTORS = 5;
+/**
+ * Token budget for the resume-task preamble. Default 4000 = 2× the regular
+ * preamble, since the resume agent needs the original task brief verbatim
+ * plus a tool-call summary to avoid redoing completed work.
+ */
+export const CONTEXT_PREAMBLE_RESUME_MAX_TOKENS = Number(
+  process.env.CONTEXT_PREAMBLE_RESUME_MAX_TOKENS || "4000",
+);
+export const CONTEXT_PREAMBLE_RESUME_MAX_CHARS = CONTEXT_PREAMBLE_RESUME_MAX_TOKENS * 4;
+/** How many of the most recent session_logs rows to inspect for tool-call summary. */
+export const CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT = 50;
 export interface TaskContextForPreamble {
   id: string;
   task: string;
   output?: string;
   progress?: string;
   status?: string;
+  taskType?: string;
   parentTaskId?: string;
   attachments?: Array<{
     kind: string;
@@ -57,6 +72,7 @@ export async function fetchTaskContextForPreamble(
       output: data.output,
       progress: data.progress,
       status: data.status,
+      taskType: data.taskType,
       parentTaskId: data.parentTaskId,
       attachments: data.attachments,
     };
@@ -176,3 +192,259 @@ export async function buildContextPreamble(
   return preamble;
 }
+// ─── Resume Preamble ───────────────────────────────────────────────────────────
+interface SessionLogForPreamble {
+  id: string;
+  taskId?: string;
+  sessionId: string;
+  iteration: number;
+  cli: string;
+  content: string;
+  lineNumber: number;
+  createdAt: string;
+}
+async function fetchSessionLogsForResume(
+  apiUrl: string,
+  apiKey: string,
+  taskId: string,
+): Promise<SessionLogForPreamble[]> {
+  const headers: Record<string, string> = {};
+  if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
+  try {
+    // Bound server-side: long-running parents can accumulate large `session_logs`
+    // and the preamble only consumes the tail (see CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT).
+    // Passing `?limit=N` keeps dispatch fast and memory-flat regardless of run length.
+    const url = `${apiUrl}/api/tasks/${taskId}/session-logs?limit=${CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT}`;
+    const response = await fetch(url, { headers });
+    if (!response.ok) return [];
+    const data = (await response.json()) as { logs?: SessionLogForPreamble[] };
+    return Array.isArray(data.logs) ? data.logs : [];
+  } catch {
+    return [];
+  }
+}
+/**
+ * Format a single session_log line as a one-line tool-call summary. Falls back
+ * to a truncated content snippet when the line isn't recognizable as a
+ * tool call. The returned text is passed through `scrubSecrets` before
+ * insertion into the preamble (no secrets in /workspace/logs/*.jsonl).
+ */
+function summarizeSessionLogLine(line: SessionLogForPreamble): string | null {
+  const ts = line.createdAt.slice(11, 19); // HH:MM:SS
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(line.content);
+  } catch {
+    const snippet = line.content.replace(/\s+/g, " ").slice(0, 120);
+    return snippet ? `[${ts}] ${snippet}` : null;
+  }
+  if (!parsed || typeof parsed !== "object") return null;
+  const obj = parsed as Record<string, unknown>;
+  // Anthropic / claude message-style tool calls.
+  const message = obj.message as Record<string, unknown> | undefined;
+  const content = message?.content;
+  if (Array.isArray(content)) {
+    for (const block of content) {
+      if (!block || typeof block !== "object") continue;
+      const b = block as Record<string, unknown>;
+      if (b.type === "tool_use" && typeof b.name === "string") {
+        const input = b.input as Record<string, unknown> | undefined;
+        const file = input?.file_path ?? input?.path ?? input?.command;
+        const fileStr = typeof file === "string" ? ` ${file}` : "";
+        return `[${ts}] ${b.name}${fileStr}`;
+      }
+    }
+  }
+  // Codex / generic event-style: { type: 'tool_use', name: '...', input: {...} }
+  if (obj.type === "tool_use" && typeof obj.name === "string") {
+    const input = obj.input as Record<string, unknown> | undefined;
+    const file = input?.file_path ?? input?.path ?? input?.command;
+    const fileStr = typeof file === "string" ? ` ${file}` : "";
+    return `[${ts}] ${obj.name}${fileStr}`;
+  }
+  // Fallback: short content snippet (still useful for diff/insight)
+  const snippet = JSON.stringify(parsed).replace(/\s+/g, " ").slice(0, 120);
+  return snippet ? `[${ts}] ${snippet}` : null;
+}
+/**
+ * Build a resume-task preamble.
+ *
+ * Reads the parent task + its recent session_logs over HTTP (never touches
+ * `bun:sqlite` worker-side). Allocates the 4000-token budget:
+ *
+ *   - 40% — full parent task description (never truncated)
+ *   - 35% — last-N session_logs summary (tool-call one-liners; scrubbed)
+ *   - 15% — artifacts/attachments index (names + pointers only)
+ *   - 10% — fixed framing (header + continuation instructions)
+ *
+ * Truncation order: session-log summary (oldest first), then artifacts.
+ * The task description is never truncated.
+ */
+/**
+ * Walk up the parentTaskId chain through `taskType === "resume"` ancestors
+ * to find the original (non-resume) task. Returns the chain in order
+ * [immediateParent, ..., original]. Caps at MAX_RESUME_CHAIN_DEPTH to
+ * defend against cycles or runaway chains.
+ *
+ * PR #594 review: cascading resumes (original → resume1 → resume2) had
+ * `buildResumeContextPreamble` fetching only the immediate parent — whose
+ * `task` text is the synthetic "Resume interrupted task..." prompt rather
+ * than the original work brief. Walking the chain restores the original
+ * description and lets us merge session logs from all resume attempts.
+ */
+const MAX_RESUME_CHAIN_DEPTH = 10;
+async function walkResumeChain(
+  apiUrl: string,
+  apiKey: string,
+  immediateParentId: string,
+): Promise<TaskContextForPreamble[]> {
+  const chain: TaskContextForPreamble[] = [];
+  let currentId: string | undefined = immediateParentId;
+  for (let depth = 0; depth < MAX_RESUME_CHAIN_DEPTH && currentId; depth++) {
+    const ctx: TaskContextForPreamble | null = await fetchTaskContextForPreamble(
+      apiUrl,
+      apiKey,
+      currentId,
+    );
+    if (!ctx) break;
+    chain.push(ctx);
+    // Stop once we hit a non-resume ancestor — that's the original work.
+    if (ctx.taskType !== "resume") break;
+    currentId = ctx.parentTaskId;
+  }
+  return chain;
+}
+export async function buildResumeContextPreamble(
+  apiUrl: string,
+  apiKey: string,
+  parentTaskId: string,
+): Promise<string | null> {
+  const chain = await walkResumeChain(apiUrl, apiKey, parentTaskId);
+  if (chain.length === 0) return null;
+  // Original = last entry (non-resume ancestor, or the deepest reachable
+  // if the chain exceeds the depth cap or hits a fetch failure).
+  const original = chain[chain.length - 1] ?? chain[0];
+  if (!original) return null;
+  // Immediate parent — its attachments are the most recent "in flight" set.
+  const parent = chain[0] ?? original;
+  // Fetch session logs from EVERY chain member so a re-superseded resume
+  // still surfaces tool-call history from earlier attempts. Merge, sort by
+  // createdAt ASC, then keep the most recent N.
+  const logsBatches = await Promise.all(
+    chain.map((c) => fetchSessionLogsForResume(apiUrl, apiKey, c.id)),
+  );
+  const merged = logsBatches.flat();
+  merged.sort((a, b) => a.createdAt.localeCompare(b.createdAt));
+  const recentLogs = merged.slice(-CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT);
+  const descBudget = Math.floor(CONTEXT_PREAMBLE_RESUME_MAX_CHARS * 0.4);
+  let logsBudget = Math.floor(CONTEXT_PREAMBLE_RESUME_MAX_CHARS * 0.35);
+  let artBudget = Math.floor(CONTEXT_PREAMBLE_RESUME_MAX_CHARS * 0.15);
+  const header = [
+    "\n---",
+    "## Resuming Interrupted Task",
+    "",
+    "This task is a fresh-session continuation of an interrupted task (graceful",
+    "shutdown / context-limit / operator action). The block below summarizes the",
+    "original task, what was done so far, and the artifacts in flight.",
+    "",
+    "**Do not redo work already completed below — extend it.**",
+    "",
+    `Original task ID: \`${original.id}\``,
+    chain.length > 1
+      ? `Resume chain depth: ${chain.length} (this is at least the ${
+          chain.length === 2 ? "2nd" : chain.length === 3 ? "3rd" : `${chain.length}th`
+        } resume attempt).`
+      : "",
+    "",
+    "---",
+    "",
+    "### Original Task Description",
+    "",
+  ]
+    .filter((s) => s !== "")
+    .join("\n");
+  // 40% — full description (never truncated). Pulled from the ORIGINAL
+  // (non-resume) ancestor so cascading resumes don't read each other's
+  // synthetic "Resume interrupted task..." preamble bodies (PR #594 review).
+  const descSection = original.task;
+  // 35% — session-log summary (tool-call lines)
+  const summaryLines: string[] = [];
+  for (const line of recentLogs) {
+    const summary = summarizeSessionLogLine(line);
+    if (!summary) continue;
+    summaryLines.push(summary);
+  }
+  // Scrub secrets BEFORE budget enforcement so secret strings don't get
+  // sliced into half-redactions mid-truncate.
+  const scrubbedSummary = summaryLines.map((s) => scrubSecrets(s));
+  let logsSection = scrubbedSummary.join("\n");
+  // FIFO truncate (drop oldest first) until under budget.
+  // We use `Math.max(0, descBudget - descSection.length)` slack adjustment so
+  // an oversized description doesn't starve the logs section entirely.
+  if (descSection.length > descBudget) {
+    const overflow = descSection.length - descBudget;
+    logsBudget = Math.max(0, logsBudget - Math.ceil(overflow / 2));
+    artBudget = Math.max(0, artBudget - Math.floor(overflow / 2));
+  }
+  while (logsSection.length > logsBudget && scrubbedSummary.length > 0) {
+    scrubbedSummary.shift();
+    logsSection = scrubbedSummary.join("\n");
+  }
+  // 15% — artifacts (names + pointers only)
+  const atts = parent.attachments?.filter((a) => a.name && (a.url || a.path || a.pageId)) ?? [];
+  const artLines: string[] = [];
+  for (const att of atts) {
+    const pointer = formatAttachmentPointer(att);
+    artLines.push(`  - **${att.name}**: \`${pointer}\``);
+  }
+  let artSection = artLines.join("\n");
+  while (artSection.length > artBudget && artLines.length > 0) {
+    artLines.pop();
+    artSection = artLines.join("\n");
+  }
+  const sections: string[] = [header, descSection, ""];
+  if (logsSection) {
+    sections.push("### Recent Tool Calls", "", logsSection, "");
+  }
+  if (artSection) {
+    sections.push("### Artifacts In Flight", "", artSection, "");
+  }
+  sections.push(
+    "---",
+    "",
+    `To review the full prior session call \`get-task-details\` with taskId \`${original.id}\`.`,
+    "",
+    "---",
+    "",
+  );
+  let preamble = sections.join("\n");
+  // Final hard cap — should rarely trip given the per-section budgets above,
+  // but provides a safety net for very long descriptions.
+  if (preamble.length > CONTEXT_PREAMBLE_RESUME_MAX_CHARS) {
+    preamble = `${preamble.slice(0, CONTEXT_PREAMBLE_RESUME_MAX_CHARS)}\n\n[resume preamble truncated to ${CONTEXT_PREAMBLE_RESUME_MAX_TOKENS}-token budget]\n\n---\n`;
+  }
+  return preamble;
+}