@desplega.ai/agent-swarm 1.84.0 → 1.85.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +48 -8
  2. package/openapi.json +5 -3
  3. package/package.json +1 -1
  4. package/src/be/db-queries/oauth.ts +33 -0
  5. package/src/be/db.ts +7 -1
  6. package/src/be/migrations/076_kapso_sender_user_backfill.sql +43 -0
  7. package/src/be/migrations/077_oauth_refresh_locks.sql +8 -0
  8. package/src/commands/context-preamble.ts +178 -0
  9. package/src/commands/runner.ts +87 -7
  10. package/src/http/index.ts +11 -3
  11. package/src/http/tasks.ts +17 -0
  12. package/src/http/users.ts +11 -3
  13. package/src/http/utils.ts +17 -0
  14. package/src/integrations/kapso/inbound.ts +36 -0
  15. package/src/oauth/ensure-token.ts +97 -11
  16. package/src/prompts/base-prompt.ts +15 -2
  17. package/src/prompts/session-templates.ts +26 -12
  18. package/src/providers/pi-mono-adapter.ts +44 -25
  19. package/src/server.ts +2 -0
  20. package/src/tasks/worker-follow-up.ts +82 -0
  21. package/src/tests/agentmail-sending-skill.test.ts +75 -0
  22. package/src/tests/agents-list-model-display.test.ts +45 -0
  23. package/src/tests/base-prompt.test.ts +90 -1
  24. package/src/tests/db-queries-oauth.test.ts +27 -0
  25. package/src/tests/ensure-token.test.ts +71 -0
  26. package/src/tests/http-log-scrubbing.test.ts +24 -0
  27. package/src/tests/http-users.test.ts +53 -0
  28. package/src/tests/kapso-inbound.test.ts +60 -1
  29. package/src/tests/kv-page-proxy.test.ts +1 -0
  30. package/src/tests/list-endpoint-slimming.test.ts +22 -1
  31. package/src/tests/oauth-access-token-tool.test.ts +138 -0
  32. package/src/tests/pagination-metrics.test.ts +4 -4
  33. package/src/tests/pi-mono-adapter.test.ts +37 -1
  34. package/src/tests/prompt-template-session.test.ts +13 -3
  35. package/src/tests/runner-context-preamble.test.ts +202 -0
  36. package/src/tests/runner-fallback-output.test.ts +118 -39
  37. package/src/tests/task-completion-idempotency.test.ts +89 -0
  38. package/src/tools/cancel-task.ts +13 -5
  39. package/src/tools/get-task-details.ts +18 -10
  40. package/src/tools/get-tasks.ts +9 -4
  41. package/src/tools/oauth-access-token.ts +118 -0
  42. package/src/tools/send-task.ts +9 -5
  43. package/src/tools/store-progress.ts +12 -77
  44. package/src/tools/task-action.ts +20 -10
  45. package/src/tools/tool-config.ts +2 -1
  46. package/src/types.ts +5 -0
  47. package/src/utils/secret-scrubber.ts +23 -0
  48. package/templates/skills/agentmail-sending/SKILL.md +148 -28
package/README.md CHANGED
@@ -2,12 +2,10 @@
2
2
  <a href="https://github.com/desplega-ai/agent-swarm/stargazers"><img src="https://img.shields.io/github/stars/desplega-ai/agent-swarm?style=flat-square&color=yellow" alt="GitHub Stars"></a>
3
3
  <a href="https://github.com/desplega-ai/agent-swarm/blob/main/LICENSE"><img src="https://img.shields.io/github/license/desplega-ai/agent-swarm?style=flat-square" alt="MIT License"></a>
4
4
  <a href="https://github.com/desplega-ai/agent-swarm/pulls"><img src="https://img.shields.io/badge/PRs-welcome-brightgreen?style=flat-square" alt="PRs Welcome"></a>
5
- <a href="https://discord.gg/KZgfyyDVZa"><img src="https://img.shields.io/badge/Discord-Join%20us-5865F2?style=flat-square&logo=discord&logoColor=white" alt="Discord"></a>
6
- <a href="https://docs.agent-swarm.dev"><img src="https://img.shields.io/badge/docs-agent--swarm.dev-blue?style=flat-square" alt="Docs"></a>
7
5
  </p>
8
6
 
9
7
  <p align="center">
10
- <b>Multi-agent orchestration for Claude Code, Codex, Gemini CLI, and other AI coding assistants.</b><br/>
8
+ <b>An engine to make your company AI Native</b><br/>
11
9
  <sub>Built by <a href="https://desplega.sh">desplega.sh</a> — by builders, for builders.</sub>
12
10
  </p>
13
11
 
@@ -39,11 +37,19 @@
39
37
  </a>
40
38
  </p>
41
39
 
42
- > **What if your AI agents remembered everything, learned from every mistake, and got better with every task?**
40
+ > **Agent Swarm is your Company's Compounding Intelligence Layer. A system of AI agents that remember, reason, act and get better with every task.**
41
+
42
+ > AI-Native · Compounds · Presence · Harness & LLM-Agnostic · Your Infra · Your Memory ·
43
43
 
44
44
  ## What it does
45
45
 
46
- Agent Swarm runs a team of AI coding agents that coordinate autonomously. A **lead agent** receives tasks from Slack, GitHub, GitLab, email, or the API breaks them down, and delegates to **worker agents** running in Docker containers. Workers execute tasks, ship code, and write their learnings back to a shared memory so the whole swarm gets smarter every session.
46
+ Agent Swarm runs a team of AI agents that coordinate autonomously. A **lead agent** receives tasks ( from Slack, GitHub, GitLab, Linear, Jira, email, or the API) breaks them down, and delegates to **worker agents** running in isolated environments (Docker). Workers execute tasks, ship solutions, and write their learnings back to a shared memory so the whole swarm gets smarter every session.
47
+
48
+ You can run agents for Marketing, Product, UX, Engineering, Support, Operations, HR, Finance, or any role you can think of. A centralized Lead coordinates them, and they share the learnings horizontally. That's the true difference between [*AI First*](https://www.pleasedontdeploy.com/i/197193364/ai-first) and [*AI Native*](https://www.pleasedontdeploy.com/i/197193364/third-the-ai-native-metamorphosis).
49
+
50
+ Agent Swarm is the shared cloud brain and muscle that makes your whole company better every day.
51
+
52
+ Sometimes humans are the blocker. We can help you. Contact us [contact@desplega.sh](mailto:contact@desplega.sh).
47
53
 
48
54
  Learn more in the [architecture overview](https://docs.agent-swarm.dev/docs/architecture/overview).
49
55
 
@@ -85,14 +91,39 @@ flowchart LR
85
91
  WORKERS --> OUT
86
92
  ```
87
93
 
94
+ ## Known Use Cases
95
+
96
+ Use cases that are used daily by ourselves and others.
97
+ Each playbook contains: the agents, the tools & skills, and workflows & schedules behind it. **[Browse all playbooks →](https://docs.agent-swarm.dev/docs/playbooks)**
98
+
99
+ - **[Feature Development](https://docs.agent-swarm.dev/docs/playbooks/feature-development)** — Integrated with Linear and GitHub to take feature requests from Slack and turn them into pull requests.
100
+ - **[Lead Prospecting](https://docs.agent-swarm.dev/docs/playbooks/lead-prospecting)** — Integrate your prospecting tools with the swarm and let agents handle outreach, scheduling, and follow-up.
101
+ - **[Content Generation](https://docs.agent-swarm.dev/docs/playbooks/content-generation)** — Generate engagement tools, blog posts, manage social media presence, update your website, and more.
102
+ - **[UX Command Center](https://docs.agent-swarm.dev/docs/playbooks/ux-command-center)** — Agents that keep your product usable: record agentic sessions, enforce your design system, and mine user logs to detect and propose UX improvements.
103
+ - **[Proactive Customer Support](https://docs.agent-swarm.dev/docs/playbooks/proactive-customer-support)** — Agents that oversee your top accounts, prepare scheduled reports, and leverage everything they know about your platform to keep those accounts up to date.
104
+ - **[Code Health & Alert Management](https://docs.agent-swarm.dev/docs/playbooks/code-health-alert-management)** — Datadog, New Relic, Sentry, or any alerting tool can kick off fixes or new proposals. Monitor code health and propose improvements weekly, daily, or hourly.
105
+ - **[Reports from Multiple Sources](https://docs.agent-swarm.dev/docs/playbooks/reports-multiple-sources)** — Integrate your data warehouse to generate tailored reports and answer the key questions your team has, with fresh data. Your BI tool may be a thing of the past.
106
+ - **[Self-Documenting & Release Reports](https://docs.agent-swarm.dev/docs/playbooks/self-documenting-release-reports)** — Update your docs and use frameworks like [Remotion](https://www.remotion.dev/), [qa-use](https://github.com/qa-use/qa-use), and [browser-use](https://github.com/browser-use/browser-use) to generate release videos and rich documentation in seconds, at the cadence you need.
107
+ - Do you have a cool playbook to share? Send us a PR!
108
+
109
+ > **The patterns that compound.** Five recipes show up in nearly every playbook — they're how the swarm stays reliable as it scales:
110
+ > **[Litmus Tests](https://docs.agent-swarm.dev/docs/playbooks/patterns/litmus-tests)** (LLM-as-judge quality gates) ·
111
+ > **[Drain Loops](https://docs.agent-swarm.dev/docs/playbooks/patterns/drain-loops)** (one ticket → a chain of reviewable PRs) ·
112
+ > **[HITL Gates](https://docs.agent-swarm.dev/docs/playbooks/patterns/hitl-gates)** (pause for human approval on irreversible steps) ·
113
+ > **[Per-Customer Working Directories](https://docs.agent-swarm.dev/docs/playbooks/patterns/per-customer-working-directories)** (context that compounds per account) ·
114
+ > **[No-op Workflows](https://docs.agent-swarm.dev/docs/playbooks/patterns/no-op-workflows)** (skip silently when nothing changed).
115
+ > **[See all patterns →](https://docs.agent-swarm.dev/docs/playbooks/patterns)**
116
+
117
+ Check [our templates](https://templates.agent-swarm.dev) for a quick start.
118
+
88
119
  ## Highlights
89
120
 
90
121
  - **Lead/worker orchestration in Docker** — isolated dev environments, priority queues, pause/resume across deploys. [Architecture →](https://docs.agent-swarm.dev/docs/architecture/overview)
91
122
  - **Compounding memory & persistent identity** — agents remember past sessions and evolve their own persona, expertise, and notes. [Memory →](https://docs.agent-swarm.dev/docs/architecture/memory) · [Agents →](https://docs.agent-swarm.dev/docs/architecture/agents)
92
- - **Multi-channel inputs** — Slack, GitHub, GitLab, email, Linear, Jira, and the HTTP API all create tasks. [Integrations](#integrations)
123
+ - **Multi-channel inputs** — Slack, GitHub, GitLab, email, WhatsApp, Linear, Jira, and the HTTP API all create tasks. [Integrations](#integrations)
93
124
  - **Workflow engine with Human-in-the-Loop** — DAG-based automation with approval gates, retries, and structured I/O. [Workflows →](https://docs.agent-swarm.dev/docs/concepts/workflows)
94
125
  - **Scheduled & recurring tasks** — cron-based automation for standing work. [Scheduling →](https://docs.agent-swarm.dev/docs/concepts/scheduling)
95
- - **Multi-provider** — run with Claude Code, OpenAI Codex, pi-mono, Devin, Claude Managed Agents, or opencode. [Harness config →](https://docs.agent-swarm.dev/docs/guides/harness-configuration) · [Add a new provider →](https://docs.agent-swarm.dev/docs/guides/harness-providers)
126
+ - **Harness & LLM agnostic** — run with Claude Code, OpenAI Codex, pi-mono, Devin, Claude Managed Agents, raw LLMs, or opencode. [Harness config →](https://docs.agent-swarm.dev/docs/guides/harness-configuration) · [Add a new provider →](https://docs.agent-swarm.dev/docs/guides/harness-providers)
96
127
  - **Skills & MCP servers** — reusable procedural knowledge and per-agent MCP servers with scope cascade. [MCP tools →](https://docs.agent-swarm.dev/docs/reference/mcp-tools)
97
128
  - **DB-backed pages** — agents publish HTML or JSON pages (reports, dashboards, action specs) via the `create_page` MCP tool with public / authed / password modes, version history, view counters, diff helpers, and PDF export. [MCP tools → Pages](https://docs.agent-swarm.dev/docs/reference/mcp-tools#pages-tools)
98
129
  - **KV store** — Redis-like namespaced key/value store with auto-scoped context (Slack thread / PR / Linear issue / page). [MCP tools → KV](https://docs.agent-swarm.dev/docs/reference/mcp-tools#kv-tools)
@@ -100,6 +131,8 @@ flowchart LR
100
131
 
101
132
  ## Quick Start
102
133
 
134
+ Need help? Contact us at [contact@desplega.sh](mailto:contact@desplega.sh).
135
+
103
136
  **Prerequisites:** [Docker](https://docker.com) and a [Claude Code](https://docs.anthropic.com/en/docs/claude-code) OAuth token (`claude setup-token`).
104
137
 
105
138
  The fastest way is the onboarding wizard — it collects credentials, picks presets, and generates a working `docker-compose.yml`:
@@ -144,22 +177,26 @@ Worker Worker Worker
144
177
  2. The lead plans and delegates subtasks to workers.
145
178
  3. Workers execute in isolated Docker containers (git, Node.js, Python, etc.).
146
179
  4. Progress streams to the dashboard, Slack threads, or the API.
147
- 5. Results ship back out as PRs, issue replies, or Slack messages.
180
+ 5. Results ship back out as PRs, custom pages, issue replies, or Slack messages.
148
181
  6. Session learnings are extracted and become memory for future tasks.
149
182
 
150
183
  More detail in the [task lifecycle docs](https://docs.agent-swarm.dev/docs/concepts/task-lifecycle).
151
184
 
152
185
  ## Integrations
153
186
 
187
+ Missing one? Ask the swarm to build it.
188
+
154
189
  | Integration | What it does | Setup |
155
190
  |---|---|---|
156
191
  | **Slack** | DM or @mention the bot to create tasks; workers reply in threads | [Guide](https://docs.agent-swarm.dev/docs/guides/slack-integration) |
157
192
  | **GitHub App** | @mention or assign the bot on issues/PRs; CI failures create follow-up tasks | [Guide](https://docs.agent-swarm.dev/docs/guides/github-integration) |
158
193
  | **GitLab** | Same model as GitHub — webhooks on issues/MRs, `glab` preinstalled in workers | [Guide](https://docs.agent-swarm.dev/docs/guides/gitlab-integration) |
159
194
  | **AgentMail** | Give each agent an inbox; emails become tasks or lead messages | [Guide](https://docs.agent-swarm.dev/docs/guides/agentmail-integration) |
195
+ | **Kapso (WhatsApp)** | Native inbound WhatsApp webhook routing; agents reply over WhatsApp with MCP tools or the `kapso-whatsapp` skill | [Guide](https://docs.agent-swarm.dev/docs/integrations/kapso) |
160
196
  | **Linear** | Bidirectional ticket sync via OAuth + webhooks | [Guide](https://docs.agent-swarm.dev/docs/guides/linear-integration) |
161
197
  | **Jira Cloud** | OAuth 3LO ticket sync — assignee/comment events create tasks; lifecycle posts comments back | [Guide](https://docs.agent-swarm.dev/docs/guides/jira-integration) |
162
198
  | **Sentry** | Workers can triage Sentry issues with the `/investigate-sentry-issue` command | [Guide](https://docs.agent-swarm.dev/docs/guides/sentry-integration) |
199
+ | **Devin** | Devin can be a node in your swarm — keep your existing configuration | [Guide](https://docs.agent-swarm.dev/docs/guides/harness-configuration#supported-providers) |
163
200
 
164
201
  ## Dashboard
165
202
 
@@ -196,6 +233,7 @@ Everything lives at **[docs.agent-swarm.dev](https://docs.agent-swarm.dev)**. Go
196
233
 
197
234
  - [Getting Started](https://docs.agent-swarm.dev/docs/getting-started) — install, configure, and run your first task
198
235
  - [Architecture overview](https://docs.agent-swarm.dev/docs/architecture/overview) — how the swarm is wired together
236
+ - [Playbooks](https://docs.agent-swarm.dev/docs/playbooks) — eight production flows we use to run Desplega, plus the patterns behind them
199
237
  - [CLI reference](https://docs.agent-swarm.dev/docs/reference/cli) and [Environment variables](https://docs.agent-swarm.dev/docs/reference/environment-variables)
200
238
  - [API reference](https://docs.agent-swarm.dev/docs/api-reference) — every HTTP endpoint
201
239
 
@@ -212,6 +250,8 @@ See [CONTRIBUTING.md](./CONTRIBUTING.md) to get started. The quickest way to con
212
250
 
213
251
  Join our [Discord](https://discord.gg/KZgfyyDVZa) if you have questions or want to discuss ideas.
214
252
 
253
+ > Are you an agent? Go to [agent-swarm.dev/skill.md](https://www.agent-swarm.dev/skill.md).
254
+
215
255
  ## Star History
216
256
 
217
257
  <a href="https://star-history.com/#desplega-ai/agent-swarm&Date">
package/openapi.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "openapi": "3.1.0",
3
3
  "info": {
4
4
  "title": "Agent Swarm API",
5
- "version": "1.84.0",
5
+ "version": "1.85.0",
6
6
  "description": "Multi-agent orchestration API for Claude Code, Codex, and Gemini CLI. Enables task distribution, agent communication, and service discovery.\n\nMCP tools are documented separately in [MCP.md](./MCP.md)."
7
7
  },
8
8
  "servers": [
@@ -10839,11 +10839,13 @@
10839
10839
  "email": {
10840
10840
  "type": "string",
10841
10841
  "format": "email"
10842
+ },
10843
+ "notes": {
10844
+ "type": "string"
10842
10845
  }
10843
10846
  },
10844
10847
  "required": [
10845
- "name",
10846
- "email"
10848
+ "name"
10847
10849
  ]
10848
10850
  }
10849
10851
  ]
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@desplega.ai/agent-swarm",
3
- "version": "1.84.0",
3
+ "version": "1.85.0",
4
4
  "description": "Multi-agent orchestration for Claude Code, Codex, Gemini CLI, and other AI coding assistants",
5
5
  "license": "MIT",
6
6
  "author": "desplega.sh <contact@desplega.sh>",
@@ -180,3 +180,36 @@ export function isTokenExpiringSoon(provider: string, bufferMs = 5 * 60 * 1000):
180
180
  const expiresAt = new Date(tokens.expiresAt).getTime();
181
181
  return expiresAt - Date.now() < bufferMs;
182
182
  }
183
+
184
+ // ── OAuth Refresh Locks ──
185
+
186
+ export function acquireOAuthRefreshLock(provider: string, ttlMs: number): string | null {
187
+ const owner = crypto.randomUUID();
188
+ const now = Date.now();
189
+ const expiresAt = new Date(now + ttlMs).toISOString();
190
+ const nowIso = new Date(now).toISOString();
191
+
192
+ getDb()
193
+ .query(
194
+ `INSERT INTO oauth_refresh_locks (provider, owner, expiresAt, createdAt, updatedAt)
195
+ VALUES (?, ?, ?, ?, ?)
196
+ ON CONFLICT(provider) DO UPDATE SET
197
+ owner = excluded.owner,
198
+ expiresAt = excluded.expiresAt,
199
+ updatedAt = excluded.updatedAt
200
+ WHERE oauth_refresh_locks.expiresAt <= ?`,
201
+ )
202
+ .run(provider, owner, expiresAt, nowIso, nowIso, nowIso);
203
+
204
+ const row = getDb()
205
+ .query("SELECT owner FROM oauth_refresh_locks WHERE provider = ?")
206
+ .get(provider) as { owner: string } | null;
207
+
208
+ return row?.owner === owner ? owner : null;
209
+ }
210
+
211
+ export function releaseOAuthRefreshLock(provider: string, owner: string): void {
212
+ getDb()
213
+ .query("DELETE FROM oauth_refresh_locks WHERE provider = ? AND owner = ?")
214
+ .run(provider, owner);
215
+ }
package/src/be/db.ts CHANGED
@@ -1012,6 +1012,7 @@ type AgentTaskRow = {
1012
1012
  swarmVersion: string | null;
1013
1013
  provider: string | null;
1014
1014
  providerMeta: string | null;
1015
+ totalCostUsd?: number | null;
1015
1016
  };
1016
1017
 
1017
1018
  function rowToAgentTask(row: AgentTaskRow): AgentTask {
@@ -1075,6 +1076,7 @@ function rowToAgentTask(row: AgentTaskRow): AgentTask {
1075
1076
  swarmVersion: row.swarmVersion ?? undefined,
1076
1077
  provider: (row.provider as ProviderName | null) ?? undefined,
1077
1078
  providerMeta: parseProviderMeta(row.provider as ProviderName | null, row.providerMeta),
1079
+ totalCostUsd: row.totalCostUsd ?? undefined,
1078
1080
  };
1079
1081
  }
1080
1082
 
@@ -1110,6 +1112,7 @@ function rowToAgentTaskSummary(row: AgentTaskRow): AgentTaskSummary {
1110
1112
  lastUpdatedAt: t.lastUpdatedAt,
1111
1113
  finishedAt: t.finishedAt,
1112
1114
  peakContextPercent: t.peakContextPercent,
1115
+ totalCostUsd: t.totalCostUsd,
1113
1116
  };
1114
1117
  }
1115
1118
 
@@ -1504,7 +1507,10 @@ export function getAllTasks(
1504
1507
  const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
1505
1508
  const limit = filters?.limit ?? 25;
1506
1509
  const offset = filters?.offset ?? 0;
1507
- const query = `SELECT * FROM agent_tasks ${whereClause} ORDER BY lastUpdatedAt DESC, priority DESC LIMIT ${limit} OFFSET ${offset}`;
1510
+ const query = `SELECT agent_tasks.*,
1511
+ (SELECT SUM(totalCostUsd) FROM session_costs WHERE session_costs.taskId = agent_tasks.id) AS totalCostUsd
1512
+ FROM agent_tasks ${whereClause}
1513
+ ORDER BY lastUpdatedAt DESC, priority DESC LIMIT ${limit} OFFSET ${offset}`;
1508
1514
 
1509
1515
  const rows = getDb()
1510
1516
  .prepare<AgentTaskRow, (string | AgentTaskStatus)[]>(query)
@@ -0,0 +1,43 @@
1
+ -- Backfill Kapso/WhatsApp sender identities into the canonical user registry.
2
+ --
3
+ -- Native Kapso inbound messages resolve their sender through user_external_ids
4
+ -- using kind='kapso' and the normalized WhatsApp phone number from
5
+ -- message.from/conversation.phone_number. Existing user profiles can already
6
+ -- carry WhatsApp numbers in notes in the documented form:
7
+ --
8
+ -- WhatsApp: +34 ... (E.164: 346...)
9
+ --
10
+ -- Link those existing, human-curated profile rows instead of leaving inbound
11
+ -- Kapso sender rows unmapped. This is idempotent and preserves any existing
12
+ -- mapping for a phone number.
13
+
14
+ INSERT OR IGNORE INTO user_external_ids (kind, externalId, userId)
15
+ WITH raw_notes AS (
16
+ SELECT
17
+ id AS userId,
18
+ substr(notes, instr(notes, 'E.164:') + length('E.164:')) AS e164_suffix
19
+ FROM users
20
+ WHERE notes LIKE '%WhatsApp:%'
21
+ AND notes LIKE '%E.164:%'
22
+ ),
23
+ parsed AS (
24
+ SELECT
25
+ userId,
26
+ trim(
27
+ CASE
28
+ WHEN instr(e164_suffix, ')') > 0 THEN substr(e164_suffix, 1, instr(e164_suffix, ')') - 1)
29
+ ELSE e164_suffix
30
+ END
31
+ ) AS e164_value
32
+ FROM raw_notes
33
+ ),
34
+ normalized AS (
35
+ SELECT
36
+ userId,
37
+ replace(replace(replace(replace(e164_value, '+', ''), ' ', ''), '-', ''), '.', '') AS externalId
38
+ FROM parsed
39
+ )
40
+ SELECT 'kapso', externalId, userId
41
+ FROM normalized
42
+ WHERE externalId <> ''
43
+ AND externalId NOT GLOB '*[^0-9]*';
@@ -0,0 +1,8 @@
1
+ -- Cross-process mutex for OAuth refresh-token rotation.
2
+ CREATE TABLE IF NOT EXISTS oauth_refresh_locks (
3
+ provider TEXT PRIMARY KEY,
4
+ owner TEXT NOT NULL,
5
+ expiresAt TEXT NOT NULL,
6
+ createdAt TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
7
+ updatedAt TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
8
+ );
@@ -0,0 +1,178 @@
1
+ /**
2
+ * Universal context preamble for follow-up task continuity.
3
+ *
4
+ * Builds a bounded text summary of prior task context (parent → ancestor chain)
5
+ * and prepends it to the child task's prompt. This makes follow-up continuity
6
+ * uniform across ALL harness providers — not just those that support native
7
+ * session resume (claude/codex).
8
+ *
9
+ * Token budget (CONTEXT_PREAMBLE_MAX_TOKENS, default 2000) prevents the
10
+ * SIGTERM-143 context-saturation failure mode seen with unbounded session
11
+ * resumes (see swarm memory sigterm-143-resumed-session-context-saturation-2026-05-13).
12
+ */
13
+
14
+ export const CONTEXT_PREAMBLE_MAX_TOKENS = Number(
15
+ process.env.CONTEXT_PREAMBLE_MAX_TOKENS || "2000",
16
+ );
17
+ // ~4 chars per token (conservative approximation for mixed code/prose)
18
+ export const CONTEXT_PREAMBLE_MAX_CHARS = CONTEXT_PREAMBLE_MAX_TOKENS * 4;
19
+ export const CONTEXT_PREAMBLE_MAX_ANCESTORS = 5;
20
+
21
+ export interface TaskContextForPreamble {
22
+ id: string;
23
+ task: string;
24
+ output?: string;
25
+ progress?: string;
26
+ status?: string;
27
+ parentTaskId?: string;
28
+ attachments?: Array<{
29
+ kind: string;
30
+ name: string;
31
+ url?: string;
32
+ path?: string;
33
+ pageId?: string;
34
+ orgId?: string;
35
+ driveId?: string;
36
+ description?: string;
37
+ intent?: string;
38
+ isPrimary?: boolean;
39
+ }>;
40
+ }
41
+
42
+ /** Fetch minimal task context for preamble generation (worker-side, via HTTP). */
43
+ export async function fetchTaskContextForPreamble(
44
+ apiUrl: string,
45
+ apiKey: string,
46
+ taskId: string,
47
+ ): Promise<TaskContextForPreamble | null> {
48
+ const headers: Record<string, string> = {};
49
+ if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
50
+ try {
51
+ const response = await fetch(`${apiUrl}/api/tasks/${taskId}`, { headers });
52
+ if (!response.ok) return null;
53
+ const data = (await response.json()) as TaskContextForPreamble;
54
+ return {
55
+ id: data.id,
56
+ task: data.task,
57
+ output: data.output,
58
+ progress: data.progress,
59
+ status: data.status,
60
+ parentTaskId: data.parentTaskId,
61
+ attachments: data.attachments,
62
+ };
63
+ } catch {
64
+ return null;
65
+ }
66
+ }
67
+
68
+ function formatAttachmentPointer(
69
+ att: NonNullable<TaskContextForPreamble["attachments"]>[number],
70
+ ): string {
71
+ if (att.kind === "agent-fs" && att.path) {
72
+ const liveHost = process.env.AGENT_FS_LIVE_URL ?? "https://live.agent-fs.dev";
73
+ if (att.orgId && att.driveId) {
74
+ return `${liveHost}/file/~/${att.orgId}/${att.driveId}/${att.path}`;
75
+ }
76
+ return att.path;
77
+ }
78
+ if (att.kind === "url" && att.url) return att.url;
79
+ if (att.kind === "page" && att.pageId) return `(page:${att.pageId})`;
80
+ if (att.kind === "shared-fs" && att.path) return att.path;
81
+ return "(no pointer)";
82
+ }
83
+
84
+ /**
85
+ * Build a bounded context preamble for a follow-up task.
86
+ *
87
+ * Walks the ancestor chain (up to CONTEXT_PREAMBLE_MAX_ANCESTORS) via the API
88
+ * and returns a formatted markdown block that is prepended to the child prompt.
89
+ *
90
+ * - Immediate parent: inline detail (subject + output + attachments)
91
+ * - Older ancestors: pointer-only (taskId + one-line subject)
92
+ *
93
+ * Hard-capped at CONTEXT_PREAMBLE_MAX_CHARS (~CONTEXT_PREAMBLE_MAX_TOKENS
94
+ * tokens) to prevent context saturation.
95
+ */
96
+ export async function buildContextPreamble(
97
+ apiUrl: string,
98
+ apiKey: string,
99
+ parentTaskId: string,
100
+ ): Promise<string | null> {
101
+ const ancestors: TaskContextForPreamble[] = [];
102
+ let currentId: string | undefined = parentTaskId;
103
+ while (currentId && ancestors.length < CONTEXT_PREAMBLE_MAX_ANCESTORS) {
104
+ const ctx = await fetchTaskContextForPreamble(apiUrl, apiKey, currentId);
105
+ if (!ctx) break;
106
+ ancestors.push(ctx);
107
+ currentId = ctx.parentTaskId;
108
+ }
109
+ if (ancestors.length === 0) return null;
110
+ // ancestors[0] is guaranteed by the length check above; TypeScript needs the guard.
111
+ const parent = ancestors[0];
112
+ if (!parent) return null;
113
+
114
+ const lines: string[] = [
115
+ "\n---",
116
+ "## Prior Conversation Context",
117
+ "",
118
+ "This task is a follow-up in an ongoing thread. Here is a summary of prior work to maintain continuity.",
119
+ "",
120
+ ];
121
+
122
+ const subjectPreview = parent.task.slice(0, 600).replace(/\n/g, " ");
123
+ lines.push(`### Immediate Prior Task (ID: \`${parent.id}\`)`);
124
+ lines.push(`**Task:** ${subjectPreview}`);
125
+ lines.push("");
126
+
127
+ const rawResult = parent.output || parent.progress;
128
+ if (rawResult) {
129
+ // Reserve ~55% of budget for the output content; rest for structure + older ancestors
130
+ const outputBudget = Math.floor(CONTEXT_PREAMBLE_MAX_CHARS * 0.55);
131
+ const truncated =
132
+ rawResult.length > outputBudget
133
+ ? `${rawResult.slice(0, outputBudget)}\n\n[output truncated — full history via \`get-task-details\` with taskId \`${parent.id}\`]`
134
+ : rawResult;
135
+ lines.push("**Outcome:**");
136
+ lines.push(truncated);
137
+ lines.push("");
138
+ } else {
139
+ lines.push("**Outcome:** (no output recorded yet — task may still be in progress)");
140
+ lines.push("");
141
+ }
142
+
143
+ const atts = parent.attachments?.filter((a) => a.name && (a.url || a.path || a.pageId));
144
+ if (atts && atts.length > 0) {
145
+ lines.push("**Artifacts from prior task:**");
146
+ for (const att of atts.slice(0, 10)) {
147
+ const pointer = formatAttachmentPointer(att);
148
+ const note = att.description || att.intent || "";
149
+ lines.push(` - **${att.name}**: \`${pointer}\`${note ? ` — ${note}` : ""}`);
150
+ }
151
+ lines.push("");
152
+ }
153
+
154
+ lines.push(
155
+ `To review the full prior conversation call \`get-task-details\` with taskId \`${parent.id}\`.`,
156
+ );
157
+
158
+ if (ancestors.length > 1) {
159
+ lines.push("");
160
+ lines.push(
161
+ "### Older Ancestor Tasks (pointers only — call `get-task-details` for full details)",
162
+ );
163
+ for (const ancestor of ancestors.slice(1)) {
164
+ const brief = ancestor.task.slice(0, 200).replace(/\n/g, " ");
165
+ lines.push(`- \`${ancestor.id}\` — ${brief}`);
166
+ }
167
+ }
168
+
169
+ lines.push("", "---", "");
170
+
171
+ let preamble = lines.join("\n");
172
+
173
+ if (preamble.length > CONTEXT_PREAMBLE_MAX_CHARS) {
174
+ preamble = `${preamble.slice(0, CONTEXT_PREAMBLE_MAX_CHARS)}\n\n[context preamble truncated to ${CONTEXT_PREAMBLE_MAX_TOKENS}-token budget]\n\n---\n`;
175
+ }
176
+
177
+ return preamble;
178
+ }
@@ -48,7 +48,9 @@ import { prettyPrintLine, prettyPrintStderr } from "../utils/pretty-print.ts";
48
48
  import { scrubSecrets } from "../utils/secret-scrubber.ts";
49
49
  import { refreshSkillsIfChanged } from "../utils/skills-refresh.ts";
50
50
  import { detectVcsProvider } from "../vcs/index.ts";
51
+ import { validateJsonSchema } from "../workflows/json-schema-validator.ts";
51
52
  import { interpolate } from "../workflows/template.ts";
53
+ import { buildContextPreamble } from "./context-preamble.ts";
52
54
  import { awaitCredentials, BootMaxWaitExceededError, EX_CONFIG } from "./credential-wait.ts";
53
55
  import {
54
56
  buildCredStatusReport,
@@ -703,6 +705,56 @@ Extract the structured data from the progress updates above. Return ONLY valid J
703
705
  }
704
706
  }
705
707
 
708
+ async function validateProviderOutputIfNeeded(
709
+ config: ApiConfig,
710
+ taskId: string,
711
+ providerOutput: string,
712
+ ): Promise<{ ok: true } | { ok: false; failReason: string }> {
713
+ const headers: Record<string, string> = {
714
+ "Content-Type": "application/json",
715
+ };
716
+ if (config.apiKey) {
717
+ headers.Authorization = `Bearer ${config.apiKey}`;
718
+ }
719
+
720
+ try {
721
+ const taskRes = await fetch(`${config.apiUrl}/api/tasks/${taskId}`, { headers });
722
+ if (!taskRes.ok) {
723
+ return { ok: true };
724
+ }
725
+
726
+ const taskData = (await taskRes.json()) as {
727
+ outputSchema?: Record<string, unknown>;
728
+ };
729
+ if (!taskData.outputSchema || typeof taskData.outputSchema !== "object") {
730
+ return { ok: true };
731
+ }
732
+
733
+ let parsed: unknown;
734
+ try {
735
+ parsed = JSON.parse(providerOutput);
736
+ } catch {
737
+ return {
738
+ ok: false,
739
+ failReason:
740
+ "Structured output required by outputSchema but provider output was not valid JSON",
741
+ };
742
+ }
743
+
744
+ const validationErrors = validateJsonSchema(taskData.outputSchema, parsed);
745
+ if (validationErrors.length > 0) {
746
+ return {
747
+ ok: false,
748
+ failReason: `Structured output did not match outputSchema: ${validationErrors.join("; ")}`,
749
+ };
750
+ }
751
+ } catch {
752
+ return { ok: true };
753
+ }
754
+
755
+ return { ok: true };
756
+ }
757
+
706
758
  export async function ensureTaskFinished(
707
759
  config: ApiConfig,
708
760
  role: string,
@@ -733,12 +785,14 @@ export async function ensureTaskFinished(
733
785
  if (status === "failed") {
734
786
  body.failureReason = failureReason || `Claude process exited with code ${exitCode}`;
735
787
  } else if (providerOutput) {
736
- // Provider already supplied structured output (e.g. Devin) — use directly.
737
- // NOTE: providerOutput is NOT validated against task.outputSchema here.
738
- // Known gap for default-mode Devin; see runbooks/harness-providers.md
739
- // ("Per-task outputSchema support"). Schema enforcement only happens on
740
- // the MCP path via store-progress.
741
- body.output = providerOutput;
788
+ const validation = await validateProviderOutputIfNeeded(config, taskId, providerOutput);
789
+ if (validation.ok) {
790
+ body.output = providerOutput;
791
+ } else {
792
+ status = "failed";
793
+ body.status = "failed";
794
+ body.failureReason = validation.failReason;
795
+ }
742
796
  } else {
743
797
  // Try structured output fallback if the task has an outputSchema
744
798
  const adapterType = provider ?? process.env.HARNESS_PROVIDER ?? "claude";
@@ -3752,6 +3806,19 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
3752
3806
  console.log(`[${role}] Injected relevant memories into resumed task prompt`);
3753
3807
  }
3754
3808
 
3809
+ // Universal context preamble: inject for all providers when task is a follow-up.
3810
+ // Gives non-resumable providers (opencode/pi/devin) prior-task context; also
3811
+ // acts as a bounded safety net for resumable ones (claude/codex).
3812
+ if (task.parentTaskId && apiUrl) {
3813
+ const contextPreamble = await buildContextPreamble(apiUrl, apiKey, task.parentTaskId);
3814
+ if (contextPreamble) {
3815
+ resumePrompt = contextPreamble + resumePrompt;
3816
+ console.log(
3817
+ `[${role}] Injected context preamble into resumed follow-up task prompt (parent: ${task.parentTaskId.slice(0, 8)})`,
3818
+ );
3819
+ }
3820
+ }
3821
+
3755
3822
  // Resolve provider-aware resume: prefer own session, then parent.
3756
3823
  const resumeCandidates: ResumeSessionCandidate[] = [
3757
3824
  {
@@ -4107,9 +4174,22 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
4107
4174
  }
4108
4175
  }
4109
4176
 
4177
+ // Universal context preamble: inject for all providers when task is a follow-up.
4178
+ // Gives non-resumable providers (opencode/pi/devin) prior-task context; also
4179
+ // acts as a bounded safety net for resumable ones (claude/codex).
4180
+ const taskObj = trigger.task as { parentTaskId?: string } | undefined;
4181
+ if (taskObj?.parentTaskId && apiUrl) {
4182
+ const contextPreamble = await buildContextPreamble(apiUrl, apiKey, taskObj.parentTaskId);
4183
+ if (contextPreamble) {
4184
+ triggerPrompt = contextPreamble + triggerPrompt;
4185
+ console.log(
4186
+ `[${role}] Injected context preamble for follow-up task (parent: ${taskObj.parentTaskId.slice(0, 8)})`,
4187
+ );
4188
+ }
4189
+ }
4190
+
4110
4191
  // Resolve provider-aware resume for child tasks with parentTaskId.
4111
4192
  let resumeSessionId: string | undefined;
4112
- const taskObj = trigger.task as { parentTaskId?: string } | undefined;
4113
4193
  if (taskObj?.parentTaskId) {
4114
4194
  const parentSession = await fetchProviderSessionInfo(
4115
4195
  apiUrl,
package/src/http/index.ts CHANGED
@@ -24,6 +24,7 @@ import {
24
24
  import { startSlackApp, stopSlackApp } from "../slack";
25
25
  import { initTelemetry, telemetry } from "../telemetry";
26
26
  import { getApiKey } from "../utils/api-key";
27
+ import { scrubSecrets } from "../utils/secret-scrubber";
27
28
  import { initWorkflows } from "../workflows";
28
29
  import { handleActiveSessions } from "./active-sessions";
29
30
  import { handleAgentRegister, handleAgentsRest } from "./agents";
@@ -68,6 +69,7 @@ import {
68
69
  getPathSegments,
69
70
  httpServerSemconvAttributes,
70
71
  parseQueryParams,
72
+ safeRequestUrlForLog,
71
73
  setCorsHeaders,
72
74
  } from "./utils";
73
75
  import { handleWebhooks } from "./webhooks";
@@ -124,7 +126,9 @@ const httpServer = createHttpServer(async (req, res) => {
124
126
  const logRequest = () => {
125
127
  const elapsed = (performance.now() - startTime).toFixed(1);
126
128
  const statusEmoji = statusCode >= 400 ? "⚠️" : "✓";
127
- console.log(`[HTTP] ${statusEmoji} ${req.method} ${req.url} → ${statusCode} (${elapsed}ms)`);
129
+ console.log(
130
+ `[HTTP] ${statusEmoji} ${req.method} ${safeRequestUrlForLog(req.url)} → ${statusCode} (${elapsed}ms)`,
131
+ );
128
132
  };
129
133
 
130
134
  // Ensure we log on response finish
@@ -132,7 +136,9 @@ const httpServer = createHttpServer(async (req, res) => {
132
136
 
133
137
  // Log errors
134
138
  res.on("error", (err) => {
135
- console.error(`[HTTP] ❌ ${req.method} ${req.url} → Error: ${err.message}`);
139
+ console.error(
140
+ `[HTTP] ❌ ${req.method} ${safeRequestUrlForLog(req.url)} → Error: ${scrubSecrets(err.message)}`,
141
+ );
136
142
  });
137
143
 
138
144
  await withRemoteContext(req.headers as Record<string, unknown>, async () => {
@@ -257,7 +263,9 @@ const httpServer = createHttpServer(async (req, res) => {
257
263
  span.setStatus({ code: 2, message: err instanceof Error ? err.message : String(err) });
258
264
  }
259
265
  const message = err instanceof Error ? err.message : String(err);
260
- console.error(`[HTTP] ❌ ${req.method} ${req.url} → ${message}`);
266
+ console.error(
267
+ `[HTTP] ❌ ${req.method} ${safeRequestUrlForLog(req.url)} → ${scrubSecrets(message)}`,
268
+ );
261
269
  if (!res.headersSent) {
262
270
  res.writeHead(500, { "Content-Type": "application/json" });
263
271
  res.end(JSON.stringify({ error: message }));