npm - @desplega.ai/agent-swarm - Versions diffs - 1.84.0 → 1.85.0 - Mend

@desplega.ai/agent-swarm 1.84.0 → 1.85.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/README.md +48 -8
package/openapi.json +5 -3
package/package.json +1 -1
package/src/be/db-queries/oauth.ts +33 -0
package/src/be/db.ts +7 -1
package/src/be/migrations/076_kapso_sender_user_backfill.sql +43 -0
package/src/be/migrations/077_oauth_refresh_locks.sql +8 -0
package/src/commands/context-preamble.ts +178 -0
package/src/commands/runner.ts +87 -7
package/src/http/index.ts +11 -3
package/src/http/tasks.ts +17 -0
package/src/http/users.ts +11 -3
package/src/http/utils.ts +17 -0
package/src/integrations/kapso/inbound.ts +36 -0
package/src/oauth/ensure-token.ts +97 -11
package/src/prompts/base-prompt.ts +15 -2
package/src/prompts/session-templates.ts +26 -12
package/src/providers/pi-mono-adapter.ts +44 -25
package/src/server.ts +2 -0
package/src/tasks/worker-follow-up.ts +82 -0
package/src/tests/agentmail-sending-skill.test.ts +75 -0
package/src/tests/agents-list-model-display.test.ts +45 -0
package/src/tests/base-prompt.test.ts +90 -1
package/src/tests/db-queries-oauth.test.ts +27 -0
package/src/tests/ensure-token.test.ts +71 -0
package/src/tests/http-log-scrubbing.test.ts +24 -0
package/src/tests/http-users.test.ts +53 -0
package/src/tests/kapso-inbound.test.ts +60 -1
package/src/tests/kv-page-proxy.test.ts +1 -0
package/src/tests/list-endpoint-slimming.test.ts +22 -1
package/src/tests/oauth-access-token-tool.test.ts +138 -0
package/src/tests/pagination-metrics.test.ts +4 -4
package/src/tests/pi-mono-adapter.test.ts +37 -1
package/src/tests/prompt-template-session.test.ts +13 -3
package/src/tests/runner-context-preamble.test.ts +202 -0
package/src/tests/runner-fallback-output.test.ts +118 -39
package/src/tests/task-completion-idempotency.test.ts +89 -0
package/src/tools/cancel-task.ts +13 -5
package/src/tools/get-task-details.ts +18 -10
package/src/tools/get-tasks.ts +9 -4
package/src/tools/oauth-access-token.ts +118 -0
package/src/tools/send-task.ts +9 -5
package/src/tools/store-progress.ts +12 -77
package/src/tools/task-action.ts +20 -10
package/src/tools/tool-config.ts +2 -1
package/src/types.ts +5 -0
package/src/utils/secret-scrubber.ts +23 -0
package/templates/skills/agentmail-sending/SKILL.md +148 -28

package/README.md CHANGED Viewed

@@ -2,12 +2,10 @@
   <a href="https://github.com/desplega-ai/agent-swarm/stargazers"><img src="https://img.shields.io/github/stars/desplega-ai/agent-swarm?style=flat-square&color=yellow" alt="GitHub Stars"></a>
   <a href="https://github.com/desplega-ai/agent-swarm/blob/main/LICENSE"><img src="https://img.shields.io/github/license/desplega-ai/agent-swarm?style=flat-square" alt="MIT License"></a>
   <a href="https://github.com/desplega-ai/agent-swarm/pulls"><img src="https://img.shields.io/badge/PRs-welcome-brightgreen?style=flat-square" alt="PRs Welcome"></a>
-  <a href="https://discord.gg/KZgfyyDVZa"><img src="https://img.shields.io/badge/Discord-Join%20us-5865F2?style=flat-square&logo=discord&logoColor=white" alt="Discord"></a>
-  <a href="https://docs.agent-swarm.dev"><img src="https://img.shields.io/badge/docs-agent--swarm.dev-blue?style=flat-square" alt="Docs"></a>
 </p>
 <p align="center">
-  <b>Multi-agent orchestration for Claude Code, Codex, Gemini CLI, and other AI coding assistants.</b><br/>
+  <b>An engine to make your company AI Native</b><br/>
   <sub>Built by <a href="https://desplega.sh">desplega.sh</a> — by builders, for builders.</sub>
 </p>
@@ -39,11 +37,19 @@
   </a>
 </p>
-> **What if your AI agents remembered everything, learned from every mistake, and got better with every task?**
+> **Agent Swarm is your Company's Compounding Intelligence Layer. A system of AI agents that remember, reason, act and get better with every task.**
+> AI-Native · Compounds · Presence · Harness & LLM-Agnostic · Your Infra · Your Memory ·
 ## What it does
-Agent Swarm runs a team of AI coding agents that coordinate autonomously. A **lead agent** receives tasks — from Slack, GitHub, GitLab, email, or the API — breaks them down, and delegates to **worker agents** running in Docker containers. Workers execute tasks, ship code, and write their learnings back to a shared memory so the whole swarm gets smarter every session.
+Agent Swarm runs a team of AI agents that coordinate autonomously. A **lead agent** receives tasks ( from Slack, GitHub, GitLab, Linear, Jira, email, or the API) breaks them down, and delegates to **worker agents** running in isolated environments (Docker). Workers execute tasks, ship solutions, and write their learnings back to a shared memory so the whole swarm gets smarter every session.
+You can run agents for Marketing, Product, UX, Engineering, Support, Operations, HR, Finance, or any role you can think of. A centralized Lead coordinates them, and they share the learnings horizontally. That's the true difference between [*AI First*](https://www.pleasedontdeploy.com/i/197193364/ai-first) and [*AI Native*](https://www.pleasedontdeploy.com/i/197193364/third-the-ai-native-metamorphosis).
+Agent Swarm is the shared cloud brain and muscle that makes your whole company better every day.
+Sometimes humans are the blocker. We can help you. Contact us [contact@desplega.sh](mailto:contact@desplega.sh).
 Learn more in the [architecture overview](https://docs.agent-swarm.dev/docs/architecture/overview).
@@ -85,14 +91,39 @@ flowchart LR
     WORKERS --> OUT
 ```
+## Known Use Cases
+Use cases that are used daily by ourselves and others.
+Each playbook contains: the agents, the tools & skills, and workflows & schedules behind it. **[Browse all playbooks →](https://docs.agent-swarm.dev/docs/playbooks)**
+- **[Feature Development](https://docs.agent-swarm.dev/docs/playbooks/feature-development)** — Integrated with Linear and GitHub to take feature requests from Slack and turn them into pull requests.
+- **[Lead Prospecting](https://docs.agent-swarm.dev/docs/playbooks/lead-prospecting)** — Integrate your prospecting tools with the swarm and let agents handle outreach, scheduling, and follow-up.
+- **[Content Generation](https://docs.agent-swarm.dev/docs/playbooks/content-generation)** — Generate engagement tools, blog posts, manage social media presence, update your website, and more.
+- **[UX Command Center](https://docs.agent-swarm.dev/docs/playbooks/ux-command-center)** — Agents that keep your product usable: record agentic sessions, enforce your design system, and mine user logs to detect and propose UX improvements.
+- **[Proactive Customer Support](https://docs.agent-swarm.dev/docs/playbooks/proactive-customer-support)** — Agents that oversee your top accounts, prepare scheduled reports, and leverage everything they know about your platform to keep those accounts up to date.
+- **[Code Health & Alert Management](https://docs.agent-swarm.dev/docs/playbooks/code-health-alert-management)** — Datadog, New Relic, Sentry, or any alerting tool can kick off fixes or new proposals. Monitor code health and propose improvements weekly, daily, or hourly.
+- **[Reports from Multiple Sources](https://docs.agent-swarm.dev/docs/playbooks/reports-multiple-sources)** — Integrate your data warehouse to generate tailored reports and answer the key questions your team has, with fresh data. Your BI tool may be a thing of the past.
+- **[Self-Documenting & Release Reports](https://docs.agent-swarm.dev/docs/playbooks/self-documenting-release-reports)** — Update your docs and use frameworks like [Remotion](https://www.remotion.dev/), [qa-use](https://github.com/qa-use/qa-use), and [browser-use](https://github.com/browser-use/browser-use) to generate release videos and rich documentation in seconds, at the cadence you need.
+- Do you have a cool playbook to share? Send us a PR!
+> **The patterns that compound.** Five recipes show up in nearly every playbook — they're how the swarm stays reliable as it scales:
+> **[Litmus Tests](https://docs.agent-swarm.dev/docs/playbooks/patterns/litmus-tests)** (LLM-as-judge quality gates) ·
+> **[Drain Loops](https://docs.agent-swarm.dev/docs/playbooks/patterns/drain-loops)** (one ticket → a chain of reviewable PRs) ·
+> **[HITL Gates](https://docs.agent-swarm.dev/docs/playbooks/patterns/hitl-gates)** (pause for human approval on irreversible steps) ·
+> **[Per-Customer Working Directories](https://docs.agent-swarm.dev/docs/playbooks/patterns/per-customer-working-directories)** (context that compounds per account) ·
+> **[No-op Workflows](https://docs.agent-swarm.dev/docs/playbooks/patterns/no-op-workflows)** (skip silently when nothing changed).
+> **[See all patterns →](https://docs.agent-swarm.dev/docs/playbooks/patterns)**
+Check [our templates](https://templates.agent-swarm.dev) for a quick start.
 ## Highlights
 - **Lead/worker orchestration in Docker** — isolated dev environments, priority queues, pause/resume across deploys. [Architecture →](https://docs.agent-swarm.dev/docs/architecture/overview)
 - **Compounding memory & persistent identity** — agents remember past sessions and evolve their own persona, expertise, and notes. [Memory →](https://docs.agent-swarm.dev/docs/architecture/memory) · [Agents →](https://docs.agent-swarm.dev/docs/architecture/agents)
-- **Multi-channel inputs** — Slack, GitHub, GitLab, email, Linear, Jira, and the HTTP API all create tasks. [Integrations](#integrations)
+- **Multi-channel inputs** — Slack, GitHub, GitLab, email, WhatsApp, Linear, Jira, and the HTTP API all create tasks. [Integrations](#integrations)
 - **Workflow engine with Human-in-the-Loop** — DAG-based automation with approval gates, retries, and structured I/O. [Workflows →](https://docs.agent-swarm.dev/docs/concepts/workflows)
 - **Scheduled & recurring tasks** — cron-based automation for standing work. [Scheduling →](https://docs.agent-swarm.dev/docs/concepts/scheduling)
-- **Multi-provider** — run with Claude Code, OpenAI Codex, pi-mono, Devin, Claude Managed Agents, or opencode. [Harness config →](https://docs.agent-swarm.dev/docs/guides/harness-configuration) · [Add a new provider →](https://docs.agent-swarm.dev/docs/guides/harness-providers)
+- **Harness & LLM agnostic** — run with Claude Code, OpenAI Codex, pi-mono, Devin, Claude Managed Agents, raw LLMs, or opencode. [Harness config →](https://docs.agent-swarm.dev/docs/guides/harness-configuration) · [Add a new provider →](https://docs.agent-swarm.dev/docs/guides/harness-providers)
 - **Skills & MCP servers** — reusable procedural knowledge and per-agent MCP servers with scope cascade. [MCP tools →](https://docs.agent-swarm.dev/docs/reference/mcp-tools)
 - **DB-backed pages** — agents publish HTML or JSON pages (reports, dashboards, action specs) via the `create_page` MCP tool with public / authed / password modes, version history, view counters, diff helpers, and PDF export. [MCP tools → Pages](https://docs.agent-swarm.dev/docs/reference/mcp-tools#pages-tools)
 - **KV store** — Redis-like namespaced key/value store with auto-scoped context (Slack thread / PR / Linear issue / page). [MCP tools → KV](https://docs.agent-swarm.dev/docs/reference/mcp-tools#kv-tools)
@@ -100,6 +131,8 @@ flowchart LR
 ## Quick Start
+Need help? Contact us at [contact@desplega.sh](mailto:contact@desplega.sh).
 **Prerequisites:** [Docker](https://docker.com) and a [Claude Code](https://docs.anthropic.com/en/docs/claude-code) OAuth token (`claude setup-token`).
 The fastest way is the onboarding wizard — it collects credentials, picks presets, and generates a working `docker-compose.yml`:
@@ -144,22 +177,26 @@ Worker  Worker  Worker
 2. The lead plans and delegates subtasks to workers.
 3. Workers execute in isolated Docker containers (git, Node.js, Python, etc.).
 4. Progress streams to the dashboard, Slack threads, or the API.
-5. Results ship back out as PRs, issue replies, or Slack messages.
+5. Results ship back out as PRs, custom pages, issue replies, or Slack messages.
 6. Session learnings are extracted and become memory for future tasks.
 More detail in the [task lifecycle docs](https://docs.agent-swarm.dev/docs/concepts/task-lifecycle).
 ## Integrations
+Missing one? Ask the swarm to build it.
 | Integration | What it does | Setup |
 |---|---|---|
 | **Slack** | DM or @mention the bot to create tasks; workers reply in threads | [Guide](https://docs.agent-swarm.dev/docs/guides/slack-integration) |
 | **GitHub App** | @mention or assign the bot on issues/PRs; CI failures create follow-up tasks | [Guide](https://docs.agent-swarm.dev/docs/guides/github-integration) |
 | **GitLab** | Same model as GitHub — webhooks on issues/MRs, `glab` preinstalled in workers | [Guide](https://docs.agent-swarm.dev/docs/guides/gitlab-integration) |
 | **AgentMail** | Give each agent an inbox; emails become tasks or lead messages | [Guide](https://docs.agent-swarm.dev/docs/guides/agentmail-integration) |
+| **Kapso (WhatsApp)** | Native inbound WhatsApp webhook routing; agents reply over WhatsApp with MCP tools or the `kapso-whatsapp` skill | [Guide](https://docs.agent-swarm.dev/docs/integrations/kapso) |
 | **Linear** | Bidirectional ticket sync via OAuth + webhooks | [Guide](https://docs.agent-swarm.dev/docs/guides/linear-integration) |
 | **Jira Cloud** | OAuth 3LO ticket sync — assignee/comment events create tasks; lifecycle posts comments back | [Guide](https://docs.agent-swarm.dev/docs/guides/jira-integration) |
 | **Sentry** | Workers can triage Sentry issues with the `/investigate-sentry-issue` command | [Guide](https://docs.agent-swarm.dev/docs/guides/sentry-integration) |
+| **Devin** | Devin can be a node in your swarm — keep your existing configuration | [Guide](https://docs.agent-swarm.dev/docs/guides/harness-configuration#supported-providers) |
 ## Dashboard
@@ -196,6 +233,7 @@ Everything lives at **[docs.agent-swarm.dev](https://docs.agent-swarm.dev)**. Go
 - [Getting Started](https://docs.agent-swarm.dev/docs/getting-started) — install, configure, and run your first task
 - [Architecture overview](https://docs.agent-swarm.dev/docs/architecture/overview) — how the swarm is wired together
+- [Playbooks](https://docs.agent-swarm.dev/docs/playbooks) — eight production flows we use to run Desplega, plus the patterns behind them
 - [CLI reference](https://docs.agent-swarm.dev/docs/reference/cli) and [Environment variables](https://docs.agent-swarm.dev/docs/reference/environment-variables)
 - [API reference](https://docs.agent-swarm.dev/docs/api-reference) — every HTTP endpoint
@@ -212,6 +250,8 @@ See [CONTRIBUTING.md](./CONTRIBUTING.md) to get started. The quickest way to con
 Join our [Discord](https://discord.gg/KZgfyyDVZa) if you have questions or want to discuss ideas.
+> Are you an agent? Go to [agent-swarm.dev/skill.md](https://www.agent-swarm.dev/skill.md).
 ## Star History
 <a href="https://star-history.com/#desplega-ai/agent-swarm&Date">

package/openapi.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "openapi": "3.1.0",
   "info": {
     "title": "Agent Swarm API",
-    "version": "1.84.0",
+    "version": "1.85.0",
     "description": "Multi-agent orchestration API for Claude Code, Codex, and Gemini CLI. Enables task distribution, agent communication, and service discovery.\n\nMCP tools are documented separately in [MCP.md](./MCP.md)."
   },
   "servers": [
@@ -10839,11 +10839,13 @@
                       "email": {
                         "type": "string",
                         "format": "email"
+                      },
+                      "notes": {
+                        "type": "string"
                       }
                     },
                     "required": [
-                      "name",
-                      "email"
+                      "name"
                     ]
                   }
                 ]

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@desplega.ai/agent-swarm",
-  "version": "1.84.0",
+  "version": "1.85.0",
   "description": "Multi-agent orchestration for Claude Code, Codex, Gemini CLI, and other AI coding assistants",
   "license": "MIT",
   "author": "desplega.sh <contact@desplega.sh>",

package/src/be/db-queries/oauth.ts CHANGED Viewed

@@ -180,3 +180,36 @@ export function isTokenExpiringSoon(provider: string, bufferMs = 5 * 60 * 1000):
   const expiresAt = new Date(tokens.expiresAt).getTime();
   return expiresAt - Date.now() < bufferMs;
 }
+// ── OAuth Refresh Locks ──
+export function acquireOAuthRefreshLock(provider: string, ttlMs: number): string | null {
+  const owner = crypto.randomUUID();
+  const now = Date.now();
+  const expiresAt = new Date(now + ttlMs).toISOString();
+  const nowIso = new Date(now).toISOString();
+  getDb()
+    .query(
+      `INSERT INTO oauth_refresh_locks (provider, owner, expiresAt, createdAt, updatedAt)
+       VALUES (?, ?, ?, ?, ?)
+       ON CONFLICT(provider) DO UPDATE SET
+         owner = excluded.owner,
+         expiresAt = excluded.expiresAt,
+         updatedAt = excluded.updatedAt
+       WHERE oauth_refresh_locks.expiresAt <= ?`,
+    )
+    .run(provider, owner, expiresAt, nowIso, nowIso, nowIso);
+  const row = getDb()
+    .query("SELECT owner FROM oauth_refresh_locks WHERE provider = ?")
+    .get(provider) as { owner: string } | null;
+  return row?.owner === owner ? owner : null;
+}
+export function releaseOAuthRefreshLock(provider: string, owner: string): void {
+  getDb()
+    .query("DELETE FROM oauth_refresh_locks WHERE provider = ? AND owner = ?")
+    .run(provider, owner);
+}

package/src/be/db.ts CHANGED Viewed

@@ -1012,6 +1012,7 @@ type AgentTaskRow = {
   swarmVersion: string | null;
   provider: string | null;
   providerMeta: string | null;
+  totalCostUsd?: number | null;
 };
 function rowToAgentTask(row: AgentTaskRow): AgentTask {
@@ -1075,6 +1076,7 @@ function rowToAgentTask(row: AgentTaskRow): AgentTask {
     swarmVersion: row.swarmVersion ?? undefined,
     provider: (row.provider as ProviderName | null) ?? undefined,
     providerMeta: parseProviderMeta(row.provider as ProviderName | null, row.providerMeta),
+    totalCostUsd: row.totalCostUsd ?? undefined,
   };
 }
@@ -1110,6 +1112,7 @@ function rowToAgentTaskSummary(row: AgentTaskRow): AgentTaskSummary {
     lastUpdatedAt: t.lastUpdatedAt,
     finishedAt: t.finishedAt,
     peakContextPercent: t.peakContextPercent,
+    totalCostUsd: t.totalCostUsd,
   };
 }
@@ -1504,7 +1507,10 @@ export function getAllTasks(
   const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
   const limit = filters?.limit ?? 25;
   const offset = filters?.offset ?? 0;
-  const query = `SELECT * FROM agent_tasks ${whereClause} ORDER BY lastUpdatedAt DESC, priority DESC LIMIT ${limit} OFFSET ${offset}`;
+  const query = `SELECT agent_tasks.*,
+    (SELECT SUM(totalCostUsd) FROM session_costs WHERE session_costs.taskId = agent_tasks.id) AS totalCostUsd
+    FROM agent_tasks ${whereClause}
+    ORDER BY lastUpdatedAt DESC, priority DESC LIMIT ${limit} OFFSET ${offset}`;
   const rows = getDb()
     .prepare<AgentTaskRow, (string | AgentTaskStatus)[]>(query)

package/src/be/migrations/076_kapso_sender_user_backfill.sql ADDED Viewed

@@ -0,0 +1,43 @@
+-- Backfill Kapso/WhatsApp sender identities into the canonical user registry.
+--
+-- Native Kapso inbound messages resolve their sender through user_external_ids
+-- using kind='kapso' and the normalized WhatsApp phone number from
+-- message.from/conversation.phone_number. Existing user profiles can already
+-- carry WhatsApp numbers in notes in the documented form:
+--
+--   WhatsApp: +34 ... (E.164: 346...)
+--
+-- Link those existing, human-curated profile rows instead of leaving inbound
+-- Kapso sender rows unmapped. This is idempotent and preserves any existing
+-- mapping for a phone number.
+INSERT OR IGNORE INTO user_external_ids (kind, externalId, userId)
+WITH raw_notes AS (
+  SELECT
+    id AS userId,
+    substr(notes, instr(notes, 'E.164:') + length('E.164:')) AS e164_suffix
+  FROM users
+  WHERE notes LIKE '%WhatsApp:%'
+    AND notes LIKE '%E.164:%'
+),
+parsed AS (
+  SELECT
+    userId,
+    trim(
+      CASE
+        WHEN instr(e164_suffix, ')') > 0 THEN substr(e164_suffix, 1, instr(e164_suffix, ')') - 1)
+        ELSE e164_suffix
+      END
+    ) AS e164_value
+  FROM raw_notes
+),
+normalized AS (
+  SELECT
+    userId,
+    replace(replace(replace(replace(e164_value, '+', ''), ' ', ''), '-', ''), '.', '') AS externalId
+  FROM parsed
+)
+SELECT 'kapso', externalId, userId
+FROM normalized
+WHERE externalId <> ''
+  AND externalId NOT GLOB '*[^0-9]*';

package/src/be/migrations/077_oauth_refresh_locks.sql ADDED Viewed

@@ -0,0 +1,8 @@
+-- Cross-process mutex for OAuth refresh-token rotation.
+CREATE TABLE IF NOT EXISTS oauth_refresh_locks (
+  provider  TEXT PRIMARY KEY,
+  owner     TEXT NOT NULL,
+  expiresAt TEXT NOT NULL,
+  createdAt TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
+  updatedAt TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
+);

package/src/commands/context-preamble.ts ADDED Viewed

@@ -0,0 +1,178 @@
+/**
+ * Universal context preamble for follow-up task continuity.
+ *
+ * Builds a bounded text summary of prior task context (parent → ancestor chain)
+ * and prepends it to the child task's prompt. This makes follow-up continuity
+ * uniform across ALL harness providers — not just those that support native
+ * session resume (claude/codex).
+ *
+ * Token budget (CONTEXT_PREAMBLE_MAX_TOKENS, default 2000) prevents the
+ * SIGTERM-143 context-saturation failure mode seen with unbounded session
+ * resumes (see swarm memory sigterm-143-resumed-session-context-saturation-2026-05-13).
+ */
+export const CONTEXT_PREAMBLE_MAX_TOKENS = Number(
+  process.env.CONTEXT_PREAMBLE_MAX_TOKENS || "2000",
+);
+// ~4 chars per token (conservative approximation for mixed code/prose)
+export const CONTEXT_PREAMBLE_MAX_CHARS = CONTEXT_PREAMBLE_MAX_TOKENS * 4;
+export const CONTEXT_PREAMBLE_MAX_ANCESTORS = 5;
+export interface TaskContextForPreamble {
+  id: string;
+  task: string;
+  output?: string;
+  progress?: string;
+  status?: string;
+  parentTaskId?: string;
+  attachments?: Array<{
+    kind: string;
+    name: string;
+    url?: string;
+    path?: string;
+    pageId?: string;
+    orgId?: string;
+    driveId?: string;
+    description?: string;
+    intent?: string;
+    isPrimary?: boolean;
+  }>;
+}
+/** Fetch minimal task context for preamble generation (worker-side, via HTTP). */
+export async function fetchTaskContextForPreamble(
+  apiUrl: string,
+  apiKey: string,
+  taskId: string,
+): Promise<TaskContextForPreamble | null> {
+  const headers: Record<string, string> = {};
+  if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
+  try {
+    const response = await fetch(`${apiUrl}/api/tasks/${taskId}`, { headers });
+    if (!response.ok) return null;
+    const data = (await response.json()) as TaskContextForPreamble;
+    return {
+      id: data.id,
+      task: data.task,
+      output: data.output,
+      progress: data.progress,
+      status: data.status,
+      parentTaskId: data.parentTaskId,
+      attachments: data.attachments,
+    };
+  } catch {
+    return null;
+  }
+}
+function formatAttachmentPointer(
+  att: NonNullable<TaskContextForPreamble["attachments"]>[number],
+): string {
+  if (att.kind === "agent-fs" && att.path) {
+    const liveHost = process.env.AGENT_FS_LIVE_URL ?? "https://live.agent-fs.dev";
+    if (att.orgId && att.driveId) {
+      return `${liveHost}/file/~/${att.orgId}/${att.driveId}/${att.path}`;
+    }
+    return att.path;
+  }
+  if (att.kind === "url" && att.url) return att.url;
+  if (att.kind === "page" && att.pageId) return `(page:${att.pageId})`;
+  if (att.kind === "shared-fs" && att.path) return att.path;
+  return "(no pointer)";
+}
+/**
+ * Build a bounded context preamble for a follow-up task.
+ *
+ * Walks the ancestor chain (up to CONTEXT_PREAMBLE_MAX_ANCESTORS) via the API
+ * and returns a formatted markdown block that is prepended to the child prompt.
+ *
+ * - Immediate parent: inline detail (subject + output + attachments)
+ * - Older ancestors: pointer-only (taskId + one-line subject)
+ *
+ * Hard-capped at CONTEXT_PREAMBLE_MAX_CHARS (~CONTEXT_PREAMBLE_MAX_TOKENS
+ * tokens) to prevent context saturation.
+ */
+export async function buildContextPreamble(
+  apiUrl: string,
+  apiKey: string,
+  parentTaskId: string,
+): Promise<string | null> {
+  const ancestors: TaskContextForPreamble[] = [];
+  let currentId: string | undefined = parentTaskId;
+  while (currentId && ancestors.length < CONTEXT_PREAMBLE_MAX_ANCESTORS) {
+    const ctx = await fetchTaskContextForPreamble(apiUrl, apiKey, currentId);
+    if (!ctx) break;
+    ancestors.push(ctx);
+    currentId = ctx.parentTaskId;
+  }
+  if (ancestors.length === 0) return null;
+  // ancestors[0] is guaranteed by the length check above; TypeScript needs the guard.
+  const parent = ancestors[0];
+  if (!parent) return null;
+  const lines: string[] = [
+    "\n---",
+    "## Prior Conversation Context",
+    "",
+    "This task is a follow-up in an ongoing thread. Here is a summary of prior work to maintain continuity.",
+    "",
+  ];
+  const subjectPreview = parent.task.slice(0, 600).replace(/\n/g, " ");
+  lines.push(`### Immediate Prior Task (ID: \`${parent.id}\`)`);
+  lines.push(`**Task:** ${subjectPreview}`);
+  lines.push("");
+  const rawResult = parent.output || parent.progress;
+  if (rawResult) {
+    // Reserve ~55% of budget for the output content; rest for structure + older ancestors
+    const outputBudget = Math.floor(CONTEXT_PREAMBLE_MAX_CHARS * 0.55);
+    const truncated =
+      rawResult.length > outputBudget
+        ? `${rawResult.slice(0, outputBudget)}\n\n[output truncated — full history via \`get-task-details\` with taskId \`${parent.id}\`]`
+        : rawResult;
+    lines.push("**Outcome:**");
+    lines.push(truncated);
+    lines.push("");
+  } else {
+    lines.push("**Outcome:** (no output recorded yet — task may still be in progress)");
+    lines.push("");
+  }
+  const atts = parent.attachments?.filter((a) => a.name && (a.url || a.path || a.pageId));
+  if (atts && atts.length > 0) {
+    lines.push("**Artifacts from prior task:**");
+    for (const att of atts.slice(0, 10)) {
+      const pointer = formatAttachmentPointer(att);
+      const note = att.description || att.intent || "";
+      lines.push(`  - **${att.name}**: \`${pointer}\`${note ? ` — ${note}` : ""}`);
+    }
+    lines.push("");
+  }
+  lines.push(
+    `To review the full prior conversation call \`get-task-details\` with taskId \`${parent.id}\`.`,
+  );
+  if (ancestors.length > 1) {
+    lines.push("");
+    lines.push(
+      "### Older Ancestor Tasks (pointers only — call `get-task-details` for full details)",
+    );
+    for (const ancestor of ancestors.slice(1)) {
+      const brief = ancestor.task.slice(0, 200).replace(/\n/g, " ");
+      lines.push(`- \`${ancestor.id}\` — ${brief}`);
+    }
+  }
+  lines.push("", "---", "");
+  let preamble = lines.join("\n");
+  if (preamble.length > CONTEXT_PREAMBLE_MAX_CHARS) {
+    preamble = `${preamble.slice(0, CONTEXT_PREAMBLE_MAX_CHARS)}\n\n[context preamble truncated to ${CONTEXT_PREAMBLE_MAX_TOKENS}-token budget]\n\n---\n`;
+  }
+  return preamble;
+}

package/src/commands/runner.ts CHANGED Viewed

@@ -48,7 +48,9 @@ import { prettyPrintLine, prettyPrintStderr } from "../utils/pretty-print.ts";
 import { scrubSecrets } from "../utils/secret-scrubber.ts";
 import { refreshSkillsIfChanged } from "../utils/skills-refresh.ts";
 import { detectVcsProvider } from "../vcs/index.ts";
+import { validateJsonSchema } from "../workflows/json-schema-validator.ts";
 import { interpolate } from "../workflows/template.ts";
+import { buildContextPreamble } from "./context-preamble.ts";
 import { awaitCredentials, BootMaxWaitExceededError, EX_CONFIG } from "./credential-wait.ts";
 import {
   buildCredStatusReport,
@@ -703,6 +705,56 @@ Extract the structured data from the progress updates above. Return ONLY valid J
   }
 }
+async function validateProviderOutputIfNeeded(
+  config: ApiConfig,
+  taskId: string,
+  providerOutput: string,
+): Promise<{ ok: true } | { ok: false; failReason: string }> {
+  const headers: Record<string, string> = {
+    "Content-Type": "application/json",
+  };
+  if (config.apiKey) {
+    headers.Authorization = `Bearer ${config.apiKey}`;
+  }
+  try {
+    const taskRes = await fetch(`${config.apiUrl}/api/tasks/${taskId}`, { headers });
+    if (!taskRes.ok) {
+      return { ok: true };
+    }
+    const taskData = (await taskRes.json()) as {
+      outputSchema?: Record<string, unknown>;
+    };
+    if (!taskData.outputSchema || typeof taskData.outputSchema !== "object") {
+      return { ok: true };
+    }
+    let parsed: unknown;
+    try {
+      parsed = JSON.parse(providerOutput);
+    } catch {
+      return {
+        ok: false,
+        failReason:
+          "Structured output required by outputSchema but provider output was not valid JSON",
+      };
+    }
+    const validationErrors = validateJsonSchema(taskData.outputSchema, parsed);
+    if (validationErrors.length > 0) {
+      return {
+        ok: false,
+        failReason: `Structured output did not match outputSchema: ${validationErrors.join("; ")}`,
+      };
+    }
+  } catch {
+    return { ok: true };
+  }
+  return { ok: true };
+}
 export async function ensureTaskFinished(
   config: ApiConfig,
   role: string,
@@ -733,12 +785,14 @@ export async function ensureTaskFinished(
   if (status === "failed") {
     body.failureReason = failureReason || `Claude process exited with code ${exitCode}`;
   } else if (providerOutput) {
-    // Provider already supplied structured output (e.g. Devin) — use directly.
-    // NOTE: providerOutput is NOT validated against task.outputSchema here.
-    // Known gap for default-mode Devin; see runbooks/harness-providers.md
-    // ("Per-task outputSchema support"). Schema enforcement only happens on
-    // the MCP path via store-progress.
-    body.output = providerOutput;
+    const validation = await validateProviderOutputIfNeeded(config, taskId, providerOutput);
+    if (validation.ok) {
+      body.output = providerOutput;
+    } else {
+      status = "failed";
+      body.status = "failed";
+      body.failureReason = validation.failReason;
+    }
   } else {
     // Try structured output fallback if the task has an outputSchema
     const adapterType = provider ?? process.env.HARNESS_PROVIDER ?? "claude";
@@ -3752,6 +3806,19 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
           console.log(`[${role}] Injected relevant memories into resumed task prompt`);
         }
+        // Universal context preamble: inject for all providers when task is a follow-up.
+        // Gives non-resumable providers (opencode/pi/devin) prior-task context; also
+        // acts as a bounded safety net for resumable ones (claude/codex).
+        if (task.parentTaskId && apiUrl) {
+          const contextPreamble = await buildContextPreamble(apiUrl, apiKey, task.parentTaskId);
+          if (contextPreamble) {
+            resumePrompt = contextPreamble + resumePrompt;
+            console.log(
+              `[${role}] Injected context preamble into resumed follow-up task prompt (parent: ${task.parentTaskId.slice(0, 8)})`,
+            );
+          }
+        }
         // Resolve provider-aware resume: prefer own session, then parent.
         const resumeCandidates: ResumeSessionCandidate[] = [
           {
@@ -4107,9 +4174,22 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
           }
         }
+        // Universal context preamble: inject for all providers when task is a follow-up.
+        // Gives non-resumable providers (opencode/pi/devin) prior-task context; also
+        // acts as a bounded safety net for resumable ones (claude/codex).
+        const taskObj = trigger.task as { parentTaskId?: string } | undefined;
+        if (taskObj?.parentTaskId && apiUrl) {
+          const contextPreamble = await buildContextPreamble(apiUrl, apiKey, taskObj.parentTaskId);
+          if (contextPreamble) {
+            triggerPrompt = contextPreamble + triggerPrompt;
+            console.log(
+              `[${role}] Injected context preamble for follow-up task (parent: ${taskObj.parentTaskId.slice(0, 8)})`,
+            );
+          }
+        }
         // Resolve provider-aware resume for child tasks with parentTaskId.
         let resumeSessionId: string | undefined;
-        const taskObj = trigger.task as { parentTaskId?: string } | undefined;
         if (taskObj?.parentTaskId) {
           const parentSession = await fetchProviderSessionInfo(
             apiUrl,

package/src/http/index.ts CHANGED Viewed

@@ -24,6 +24,7 @@ import {
 import { startSlackApp, stopSlackApp } from "../slack";
 import { initTelemetry, telemetry } from "../telemetry";
 import { getApiKey } from "../utils/api-key";
+import { scrubSecrets } from "../utils/secret-scrubber";
 import { initWorkflows } from "../workflows";
 import { handleActiveSessions } from "./active-sessions";
 import { handleAgentRegister, handleAgentsRest } from "./agents";
@@ -68,6 +69,7 @@ import {
   getPathSegments,
   httpServerSemconvAttributes,
   parseQueryParams,
+  safeRequestUrlForLog,
   setCorsHeaders,
 } from "./utils";
 import { handleWebhooks } from "./webhooks";
@@ -124,7 +126,9 @@ const httpServer = createHttpServer(async (req, res) => {
   const logRequest = () => {
     const elapsed = (performance.now() - startTime).toFixed(1);
     const statusEmoji = statusCode >= 400 ? "⚠️" : "✓";
-    console.log(`[HTTP] ${statusEmoji} ${req.method} ${req.url} → ${statusCode} (${elapsed}ms)`);
+    console.log(
+      `[HTTP] ${statusEmoji} ${req.method} ${safeRequestUrlForLog(req.url)} → ${statusCode} (${elapsed}ms)`,
+    );
   };
   // Ensure we log on response finish
@@ -132,7 +136,9 @@ const httpServer = createHttpServer(async (req, res) => {
   // Log errors
   res.on("error", (err) => {
-    console.error(`[HTTP] ❌ ${req.method} ${req.url} → Error: ${err.message}`);
+    console.error(
+      `[HTTP] ❌ ${req.method} ${safeRequestUrlForLog(req.url)} → Error: ${scrubSecrets(err.message)}`,
+    );
   });
   await withRemoteContext(req.headers as Record<string, unknown>, async () => {
@@ -257,7 +263,9 @@ const httpServer = createHttpServer(async (req, res) => {
           span.setStatus({ code: 2, message: err instanceof Error ? err.message : String(err) });
         }
         const message = err instanceof Error ? err.message : String(err);
-        console.error(`[HTTP] ❌ ${req.method} ${req.url} → ${message}`);
+        console.error(
+          `[HTTP] ❌ ${req.method} ${safeRequestUrlForLog(req.url)} → ${scrubSecrets(message)}`,
+        );
         if (!res.headersSent) {
           res.writeHead(500, { "Content-Type": "application/json" });
           res.end(JSON.stringify({ error: message }));