@desplega.ai/agent-swarm 1.87.0 → 1.89.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/README.md +5 -1
  2. package/openapi.json +53 -1
  3. package/package.json +6 -5
  4. package/plugin/skills/composio/SKILL.md +98 -0
  5. package/src/be/db.ts +374 -9
  6. package/src/be/migrations/080_skill_system_defaults.sql +8 -0
  7. package/src/be/migrations/081_metrics.sql +39 -0
  8. package/src/be/migrations/082_user_audit_fields.sql +120 -0
  9. package/src/be/modelsdev-cache.json +3825 -2417
  10. package/src/be/seed/registry.ts +3 -2
  11. package/src/be/seed-skills/index.ts +179 -0
  12. package/src/cli.tsx +51 -4
  13. package/src/commands/e2b-stack-wizard.tsx +394 -0
  14. package/src/commands/e2b.ts +1352 -53
  15. package/src/commands/onboard/dashboard-url.ts +29 -0
  16. package/src/commands/onboard/steps/post-dashboard.tsx +3 -1
  17. package/src/commands/onboard.tsx +3 -1
  18. package/src/commands/runner.ts +154 -22
  19. package/src/commands/x.ts +118 -0
  20. package/src/e2b/dispatch.ts +234 -18
  21. package/src/github/handlers.ts +40 -1
  22. package/src/heartbeat/heartbeat.ts +26 -5
  23. package/src/http/active-sessions.ts +32 -1
  24. package/src/http/auth.ts +36 -0
  25. package/src/http/core.ts +20 -16
  26. package/src/http/db-query.ts +20 -0
  27. package/src/http/index.ts +2 -0
  28. package/src/http/memory.ts +13 -1
  29. package/src/http/metrics.ts +447 -0
  30. package/src/http/operator-actor.ts +9 -0
  31. package/src/http/poll.ts +11 -1
  32. package/src/http/skills.ts +53 -0
  33. package/src/http/tasks.ts +4 -1
  34. package/src/http/webhooks.ts +75 -0
  35. package/src/http/workflows.ts +5 -1
  36. package/src/integrations/kapso/client.ts +82 -0
  37. package/src/memory/automatic-task-gate.ts +47 -0
  38. package/src/metrics/version.ts +26 -0
  39. package/src/prompts/base-prompt.ts +24 -1
  40. package/src/prompts/session-templates.ts +74 -0
  41. package/src/providers/claude-adapter.ts +19 -0
  42. package/src/providers/codex-adapter.ts +22 -0
  43. package/src/providers/ctx-mode-env.ts +10 -0
  44. package/src/providers/opencode-adapter.ts +72 -7
  45. package/src/server.ts +10 -1
  46. package/src/slack/blocks.ts +12 -4
  47. package/src/slack/watcher.ts +3 -3
  48. package/src/telemetry.ts +14 -1
  49. package/src/templates.d.ts +4 -0
  50. package/src/tests/base-prompt.test.ts +76 -0
  51. package/src/tests/budget-claim-gate.test.ts +26 -0
  52. package/src/tests/claude-adapter.test.ts +86 -1
  53. package/src/tests/codex-adapter.test.ts +89 -0
  54. package/src/tests/core-auth.test.ts +8 -1
  55. package/src/tests/e2b-dispatch.test.ts +603 -11
  56. package/src/tests/events-http.test.ts +6 -2
  57. package/src/tests/github-handlers-cancel-config.test.ts +262 -0
  58. package/src/tests/heartbeat.test.ts +84 -3
  59. package/src/tests/http-api-integration.test.ts +116 -1
  60. package/src/tests/kapso-client.test.ts +74 -1
  61. package/src/tests/kapso-inbound.test.ts +60 -2
  62. package/src/tests/metrics-http.test.ts +247 -0
  63. package/src/tests/opencode-adapter.test.ts +185 -30
  64. package/src/tests/prompt-template-session.test.ts +4 -2
  65. package/src/tests/runner-repo-autostash.test.ts +117 -0
  66. package/src/tests/runner-requester-profile.test.ts +25 -0
  67. package/src/tests/runner-skills-refresh.test.ts +1 -1
  68. package/src/tests/self-improvement.test.ts +89 -0
  69. package/src/tests/skill-update-scope.test.ts +88 -1
  70. package/src/tests/slack-blocks.test.ts +15 -0
  71. package/src/tests/swarm-x-tool.test.ts +90 -0
  72. package/src/tests/system-default-skills.test.ts +122 -0
  73. package/src/tests/telemetry-init.test.ts +86 -0
  74. package/src/tests/ui-logs-parser.test.ts +271 -0
  75. package/src/tests/user-token-rest-auth.test.ts +129 -0
  76. package/src/tests/workflow-async-v2.test.ts +23 -0
  77. package/src/tests/x-composio.test.ts +122 -0
  78. package/src/tools/create-metric.ts +191 -0
  79. package/src/tools/skills/skill-delete.ts +14 -0
  80. package/src/tools/skills/skill-update.ts +14 -0
  81. package/src/tools/store-progress.ts +19 -5
  82. package/src/tools/swarm-x.ts +116 -0
  83. package/src/tools/tool-config.ts +6 -0
  84. package/src/types.ts +121 -0
  85. package/src/utils/request-auth-context.ts +28 -0
  86. package/src/utils/skills-refresh.ts +2 -2
  87. package/src/workflows/engine.ts +24 -2
  88. package/src/workflows/executors/agent-task.ts +2 -0
  89. package/src/x/composio.ts +295 -0
  90. package/templates/skills/artifacts/config.json +1 -0
  91. package/templates/skills/attio-interaction/SKILL.md +279 -0
  92. package/templates/skills/attio-interaction/config.json +14 -0
  93. package/templates/skills/attio-interaction/content.md +272 -0
  94. package/templates/skills/kv-storage/config.json +1 -0
  95. package/templates/skills/pages/config.json +1 -0
  96. package/templates/skills/scheduled-task-resilience/config.json +1 -0
  97. package/templates/skills/swarm-scripts/SKILL.md +91 -0
  98. package/templates/skills/swarm-scripts/config.json +14 -0
  99. package/templates/skills/swarm-scripts/content.md +86 -0
  100. package/templates/skills/workflow-iterate/config.json +1 -0
  101. package/templates/skills/workflow-structured-output/config.json +1 -0
  102. package/tsconfig.json +2 -1
@@ -41,7 +41,14 @@ import {
41
41
  isGitLabEnabled,
42
42
  verifyGitLabWebhook,
43
43
  } from "../gitlab";
44
+ import {
45
+ type KapsoMessageActionResult,
46
+ markKapsoMessageRead,
47
+ sendKapsoReaction,
48
+ } from "../integrations/kapso/client";
49
+ import type { KapsoConfig } from "../integrations/kapso/config";
44
50
  import { getKapsoConfig } from "../integrations/kapso/config";
51
+ import type { KapsoWebhookPayload } from "../integrations/kapso/inbound";
45
52
  import { routeKapsoInbound } from "../integrations/kapso/inbound";
46
53
  import { getExecutorRegistry } from "../workflows";
47
54
  import { workflowEventBus } from "../workflows/event-bus";
@@ -108,6 +115,72 @@ const kapsoWebhook = route({
108
115
 
109
116
  // ─── Handler ─────────────────────────────────────────────────────────────────
110
117
 
118
+ function logKapsoAckResult(action: string, result: KapsoMessageActionResult): void {
119
+ if (result.ok) return;
120
+ console.warn(
121
+ `[Kapso] Inbound acknowledgement ${action} failed: ${
122
+ result.errorMessage ?? `status ${result.status}`
123
+ }`,
124
+ );
125
+ }
126
+
127
+ async function acknowledgeKapsoInboundMessage(
128
+ payload: KapsoWebhookPayload,
129
+ config: KapsoConfig,
130
+ ): Promise<void> {
131
+ const message = payload.message;
132
+ const phoneNumberId = payload.phone_number_id;
133
+ const messageId = message?.id;
134
+ const to = message?.from ?? payload.conversation?.phone_number;
135
+
136
+ if (payload.test || message?.kapso?.direction !== "inbound" || !phoneNumberId || !messageId) {
137
+ return;
138
+ }
139
+
140
+ if (!config.apiKey) {
141
+ console.warn("[Kapso] Cannot acknowledge inbound message: KAPSO_API_KEY is not configured");
142
+ return;
143
+ }
144
+
145
+ const markRead = markKapsoMessageRead({
146
+ apiBaseUrl: config.apiBaseUrl,
147
+ apiKey: config.apiKey,
148
+ phoneNumberId,
149
+ messageId,
150
+ typingIndicatorType: "text",
151
+ });
152
+ const react =
153
+ to && to.length > 0
154
+ ? sendKapsoReaction({
155
+ apiBaseUrl: config.apiBaseUrl,
156
+ apiKey: config.apiKey,
157
+ phoneNumberId,
158
+ to,
159
+ messageId,
160
+ emoji: "👀",
161
+ })
162
+ : Promise.resolve<KapsoMessageActionResult>({
163
+ ok: false,
164
+ status: 0,
165
+ raw: null,
166
+ errorMessage: "missing sender phone",
167
+ });
168
+
169
+ const [readResult, reactionResult] = await Promise.allSettled([markRead, react]);
170
+ if (readResult.status === "fulfilled") {
171
+ logKapsoAckResult("mark-as-read/typing", readResult.value);
172
+ } else {
173
+ console.warn(
174
+ `[Kapso] Inbound acknowledgement mark-as-read/typing failed: ${readResult.reason}`,
175
+ );
176
+ }
177
+ if (reactionResult.status === "fulfilled") {
178
+ logKapsoAckResult("reaction", reactionResult.value);
179
+ } else {
180
+ console.warn(`[Kapso] Inbound acknowledgement reaction failed: ${reactionResult.reason}`);
181
+ }
182
+ }
183
+
111
184
  export async function handleWebhooks(
112
185
  req: IncomingMessage,
113
186
  res: ServerResponse,
@@ -494,6 +567,8 @@ export async function handleWebhooks(
494
567
  return true;
495
568
  }
496
569
 
570
+ void acknowledgeKapsoInboundMessage(payload, config);
571
+
497
572
  try {
498
573
  const routing = routeKapsoInbound(payload);
499
574
  switch (routing.kind) {
@@ -21,6 +21,7 @@ import {
21
21
  WorkflowPatchSchema,
22
22
  WorkflowRunStatusSchema,
23
23
  } from "../types";
24
+ import { getRequestAuth } from "../utils/request-auth-context";
24
25
  import { getExecutorRegistry, startWorkflowExecution } from "../workflows";
25
26
  import { applyDefinitionPatch, generateEdges, validateDefinition } from "../workflows/definition";
26
27
  import { TriggerSchemaError } from "../workflows/engine";
@@ -645,10 +646,13 @@ export async function handleWorkflows(
645
646
  return true;
646
647
  }
647
648
  const body = await parseBody<Record<string, unknown>>(req);
649
+ const auth = getRequestAuth(req);
648
650
 
649
651
  let runId: string;
650
652
  try {
651
- runId = await startWorkflowExecution(workflow, body, getExecutorRegistry());
653
+ runId = await startWorkflowExecution(workflow, body, getExecutorRegistry(), {
654
+ requestedByUserId: auth?.kind === "user" ? auth.userId : undefined,
655
+ });
652
656
  } catch (err) {
653
657
  if (err instanceof TriggerSchemaError) {
654
658
  triggerSchemaErrorResponse(res, err.message, err.validationErrors);
@@ -20,6 +20,14 @@ export interface KapsoSendResult {
20
20
  errorMessage?: string;
21
21
  }
22
22
 
23
+ /** Result of a lightweight message action through the Meta proxy. */
24
+ export interface KapsoMessageActionResult {
25
+ ok: boolean;
26
+ status: number;
27
+ raw: unknown;
28
+ errorMessage?: string;
29
+ }
30
+
23
31
  /** Meta error codes that mean "outside the 24h customer-service window". */
24
32
  const SESSION_WINDOW_ERROR_CODES = new Set([131047, 131051, 470]);
25
33
 
@@ -98,6 +106,80 @@ export async function sendKapsoText(params: {
98
106
  return { ok: true, status: res.status, messageId, raw, sessionWindowExpired: false };
99
107
  }
100
108
 
109
+ /** Mark an inbound WhatsApp message as read, optionally showing the typing indicator. */
110
+ export async function markKapsoMessageRead(params: {
111
+ apiBaseUrl: string;
112
+ apiKey: string;
113
+ phoneNumberId: string;
114
+ messageId: string;
115
+ typingIndicatorType?: "text";
116
+ }): Promise<KapsoMessageActionResult> {
117
+ const url = `${params.apiBaseUrl}/meta/whatsapp/v24.0/${params.phoneNumberId}/messages`;
118
+ const payload: Record<string, unknown> = {
119
+ messaging_product: "whatsapp",
120
+ status: "read",
121
+ message_id: params.messageId,
122
+ };
123
+ if (params.typingIndicatorType) {
124
+ payload.typing_indicator = { type: params.typingIndicatorType };
125
+ }
126
+
127
+ const res = await fetch(url, {
128
+ method: "POST",
129
+ headers: { "X-API-Key": params.apiKey, "Content-Type": "application/json" },
130
+ body: JSON.stringify(payload),
131
+ });
132
+ const raw = await parseJsonSafe(res);
133
+
134
+ if (!res.ok) {
135
+ const { message } = extractMetaError(raw);
136
+ return {
137
+ ok: false,
138
+ status: res.status,
139
+ raw,
140
+ errorMessage: message ?? `Kapso mark-as-read failed with status ${res.status}`,
141
+ };
142
+ }
143
+
144
+ return { ok: true, status: res.status, raw };
145
+ }
146
+
147
+ /** React to an inbound WhatsApp message with an emoji. Pass an empty emoji to clear. */
148
+ export async function sendKapsoReaction(params: {
149
+ apiBaseUrl: string;
150
+ apiKey: string;
151
+ phoneNumberId: string;
152
+ to: string;
153
+ messageId: string;
154
+ emoji: string;
155
+ }): Promise<KapsoMessageActionResult> {
156
+ const url = `${params.apiBaseUrl}/meta/whatsapp/v24.0/${params.phoneNumberId}/messages`;
157
+ const res = await fetch(url, {
158
+ method: "POST",
159
+ headers: { "X-API-Key": params.apiKey, "Content-Type": "application/json" },
160
+ body: JSON.stringify({
161
+ messaging_product: "whatsapp",
162
+ recipient_type: "individual",
163
+ to: params.to,
164
+ type: "reaction",
165
+ reaction: { message_id: params.messageId, emoji: params.emoji },
166
+ }),
167
+ });
168
+ const raw = await parseJsonSafe(res);
169
+
170
+ if (!res.ok) {
171
+ const { message } = extractMetaError(raw);
172
+ return {
173
+ ok: false,
174
+ status: res.status,
175
+ raw,
176
+ errorMessage: message ?? `Kapso reaction failed with status ${res.status}`,
177
+ };
178
+ }
179
+
180
+ return { ok: true, status: res.status, raw };
181
+ }
182
+
101
183
  /** Result of configuring a webhook on a phone number. */
102
184
  export interface KapsoWebhookResult {
103
185
  ok: boolean;
@@ -0,0 +1,47 @@
1
+ const SCHEDULE_TAG_PREFIX = "schedule:";
2
+ const AUTOMATIC_TASK_TYPES = new Set([
3
+ "boot-triage",
4
+ "heartbeat",
5
+ "heartbeat-checklist",
6
+ "health-check",
7
+ "health-probe",
8
+ "monitor",
9
+ "monitoring",
10
+ ]);
11
+
12
+ export interface MemoryGateTask {
13
+ source?: string | null;
14
+ taskType?: string | null;
15
+ tags?: string[] | null;
16
+ }
17
+
18
+ export function isScheduledTaskCompletion(task: { tags?: string[] | null }): boolean {
19
+ return task.tags?.some((tag) => tag.startsWith(SCHEDULE_TAG_PREFIX)) ?? false;
20
+ }
21
+
22
+ export function isAutomaticOrRecurringTaskCompletion(task: MemoryGateTask): boolean {
23
+ const tags = task.tags ?? [];
24
+ const taskType = task.taskType?.toLowerCase();
25
+
26
+ return (
27
+ task.source === "schedule" ||
28
+ task.source === "system" ||
29
+ tags.includes("scheduled") ||
30
+ tags.includes("auto-generated") ||
31
+ tags.some((tag) => tag.startsWith(SCHEDULE_TAG_PREFIX)) ||
32
+ (taskType !== undefined &&
33
+ (AUTOMATIC_TASK_TYPES.has(taskType) ||
34
+ taskType.endsWith("-monitor") ||
35
+ taskType.endsWith("-digest")))
36
+ );
37
+ }
38
+
39
+ export function shouldPersistAutomaticTaskMemory(
40
+ task: MemoryGateTask,
41
+ persistMemory?: boolean,
42
+ ): boolean {
43
+ if (persistMemory) return true;
44
+ return !isAutomaticOrRecurringTaskCompletion(task);
45
+ }
46
+
47
+ export const shouldPersistTaskCompletionMemory = shouldPersistAutomaticTaskMemory;
@@ -0,0 +1,26 @@
1
+ import { createMetricVersion, getMetric, getMetricVersions } from "../be/db";
2
+ import type { MetricSnapshot, MetricVersion } from "../types";
3
+
4
+ export function snapshotMetric(metricId: string, changedByAgentId?: string): MetricVersion {
5
+ const metric = getMetric(metricId);
6
+ if (!metric) {
7
+ throw new Error(`Metric ${metricId} not found — cannot create snapshot`);
8
+ }
9
+
10
+ const existingVersions = getMetricVersions(metricId);
11
+ const maxVersion = existingVersions.length > 0 ? existingVersions[0]!.version : 0;
12
+ const nextVersion = maxVersion + 1;
13
+
14
+ const snapshot: MetricSnapshot = {
15
+ title: metric.title,
16
+ description: metric.description,
17
+ definition: metric.definition,
18
+ };
19
+
20
+ return createMetricVersion({
21
+ metricId,
22
+ version: nextVersion,
23
+ snapshot,
24
+ changedByAgentId,
25
+ });
26
+ }
@@ -8,6 +8,7 @@
8
8
  */
9
9
 
10
10
  import type { ProviderTraits } from "../providers/types";
11
+ import type { ProviderName } from "../types";
11
12
  import { resolveTemplateAsync } from "./resolver";
12
13
 
13
14
  // Side-effect import: register all system + session templates
@@ -55,6 +56,12 @@ export type BasePromptArgs = {
55
56
  swarmUrl: string;
56
57
  capabilities?: string[];
57
58
  traits?: ProviderTraits;
59
+ /**
60
+ * Harness provider for this session. Gates provider-specific prompt blocks
61
+ * (e.g. the context-mode block is excluded for `pi`, which has no
62
+ * context-mode MCP wiring yet — deferred to DES-514).
63
+ */
64
+ provider?: ProviderName;
58
65
  name?: string;
59
66
  description?: string;
60
67
  soulMd?: string;
@@ -65,6 +72,7 @@ export type BasePromptArgs = {
65
72
  claudeMd?: string | null;
66
73
  clonePath: string;
67
74
  warning?: string | null;
75
+ autoStashes?: { ref: string; message: string }[];
68
76
  guidelines?: {
69
77
  prChecks: string[];
70
78
  mergeChecks: string[];
@@ -91,8 +99,16 @@ export const getBasePrompt = async (args: BasePromptArgs): Promise<string> => {
91
99
  if (!hasMcp) {
92
100
  // If no MCP, role cannot be lead
93
101
  compositeEventType = "system.session.worker.remote";
102
+ } else if (role === "lead") {
103
+ compositeEventType = "system.session.lead";
104
+ } else if (args.provider === "pi") {
105
+ // Pi has no context-mode MCP wiring yet (deferred to DES-514), so it uses a
106
+ // worker composite that omits the context_mode block to avoid advertising
107
+ // phantom `ctx_*` tools. All other local providers (claude, codex, opencode)
108
+ // keep the block via the standard worker composite.
109
+ compositeEventType = "system.session.worker.pi";
94
110
  } else {
95
- compositeEventType = role === "lead" ? "system.session.lead" : "system.session.worker";
111
+ compositeEventType = "system.session.worker";
96
112
  }
97
113
  const compositeResult = await resolveTemplateAsync(compositeEventType, vars);
98
114
  let prompt = compositeResult.text;
@@ -182,6 +198,13 @@ export const getBasePrompt = async (args: BasePromptArgs): Promise<string> => {
182
198
  } else if (!args.repoContext.warning) {
183
199
  prompt += `Repository is cloned at \`${args.repoContext.clonePath}\` but has no CLAUDE.md file.\n`;
184
200
  }
201
+
202
+ if (args.repoContext.autoStashes && args.repoContext.autoStashes.length > 0) {
203
+ const stashes = args.repoContext.autoStashes
204
+ .map((stash) => `- ${stash.ref}: ${stash.message}`)
205
+ .join("\n");
206
+ prompt += `\nPending auto-stashed work exists in this repo:\n${stashes}\nRestore if relevant with \`git stash apply <ref>\` or \`git stash pop <ref>\`.\n`;
207
+ }
185
208
  }
186
209
 
187
210
  // Inject repo guidelines
@@ -377,6 +377,31 @@ registerTemplate({
377
377
  ### Context Window Management
378
378
 
379
379
  You have access to the \`context-mode\` MCP tools (\`batch_execute\`, \`execute\`, \`execute_file\`, \`search\`, \`fetch_and_index\`, \`index\`) which compress tool output to save context window space. For data-heavy operations (web fetches, large file reads, CLI output processing), prefer these over raw Bash/WebFetch to avoid flooding your context window with raw output.
380
+
381
+ When a tool returns more than a few dozen lines — JSON payloads, log tails, search results, API responses — route it through \`ctx_execute\` or \`ctx_batch_execute\` so only the derived answer enters your conversation. This is especially important for tasks that make many Bash/Read/MCP calls in sequence; each raw response compounds context pressure.
382
+
383
+ ### Agent Scripts — for bulk, repetitive, or data-heavy work
384
+
385
+ Use **scripts** (\`script-upsert\` + \`script-run\`) when a task involves repetitive SDK calls, large data processing, or deterministic multi-step pipelines. Scripts run out-of-process and return only their final result — none of the intermediate output floods your context window.
386
+
387
+ **Decision rubric — when to use scripts vs. other approaches:**
388
+
389
+ | Situation | Preferred approach |
390
+ |---|---|
391
+ | 1–10 SDK calls, result fits in context | Direct tool call |
392
+ | 10+ items, bulk/fan-out SDK ops | **Script** (\`script-run\` with inline source or named) |
393
+ | Heavy data (fetch + parse + transform) | **Script** or \`ctx_*\` (context-mode) |
394
+ | Single expensive web fetch | \`ctx_fetch_and_index\` (context-mode) |
395
+ | Multi-agent fan-out, parallel work, deterministic pipeline | **Workflow** |
396
+ | One-off bash/TS with no reuse needed | \`code-mode run\` (Bash) |
397
+ | Same logic needed across sessions/agents | **Named script** (\`script-upsert\` + reuse) |
398
+
399
+ The 5 script tools (\`script-search\`, \`script-run\`, \`script-upsert\`, \`script-delete\`, \`script-query-types\`) are deferred tools. Call ToolSearch to load \`script-upsert\`, \`script-run\`, and \`script-query-types\` before using them.
400
+
401
+ **Key gotchas:**
402
+ - \`agentId\` IS propagated to scripts via the \`X-Agent-ID\` header.
403
+ - \`taskId\` is NOT propagated to scripts — there is no ambient task context. Pass \`taskId\` explicitly via \`args\` if the script needs to call \`ctx.swarm.task_storeProgress\`.
404
+ - Use \`script-query-types\` to inspect the live \`swarm-sdk.d.ts\` before authoring a complex script.
380
405
  `,
381
406
  variables: [],
382
407
  category: "system",
@@ -540,6 +565,29 @@ When working in a repository, your system prompt may include a **Repository Guid
540
565
  category: "system",
541
566
  });
542
567
 
568
+ // ============================================================================
569
+ // Per-task prompt templates (category: "task_lifecycle")
570
+ // ============================================================================
571
+
572
+ registerTemplate({
573
+ eventType: "task.requester.profile",
574
+ header: "",
575
+ defaultBody: `
576
+ ## Requester Profile
577
+ This task was requested by {{requester_name}}{{requester_role_suffix}}.{{requester_notes_section}}
578
+ Honor this requester profile in tone, depth, and format where it doesn't conflict with correctness or your operating rules.
579
+ `,
580
+ variables: [
581
+ { name: "requester_name", description: "The requesting user's display name" },
582
+ { name: "requester_role_suffix", description: "Formatted role suffix, including parentheses" },
583
+ {
584
+ name: "requester_notes_section",
585
+ description: "Formatted notes section sourced from users.notes, or empty string",
586
+ },
587
+ ],
588
+ category: "task_lifecycle",
589
+ });
590
+
543
591
  // ============================================================================
544
592
  // Composite session templates (category: "session")
545
593
  // ============================================================================
@@ -586,6 +634,32 @@ registerTemplate({
586
634
  category: "session",
587
635
  });
588
636
 
637
+ // Pi-specific worker composite. Identical to `system.session.worker` except it
638
+ // OMITS the `system.agent.context_mode` block — pi has no context-mode MCP
639
+ // wiring yet (deferred to DES-514), so advertising the `ctx_*` tools to pi
640
+ // workers would point at phantom tools. `getBasePrompt` selects this composite
641
+ // when `provider === 'pi'`; all other local providers (claude, codex, opencode)
642
+ // keep the context_mode block via `system.session.worker`.
643
+ registerTemplate({
644
+ eventType: "system.session.worker.pi",
645
+ header: "",
646
+ defaultBody: `{{@template[system.agent.role]}}
647
+
648
+ {{@template[system.agent.register]}}
649
+ {{@template[system.agent.worker]}}
650
+ {{@template[system.agent.filesystem]}}
651
+ {{@template[system.agent.self_awareness]}}
652
+
653
+ {{@template[system.agent.system]}}
654
+ {{@template[system.agent.share_urls]}}
655
+ {{@template[system.agent.code_quality]}}`,
656
+ variables: [
657
+ { name: "role", description: "The agent's role" },
658
+ { name: "agentId", description: "The agent's unique identifier" },
659
+ ],
660
+ category: "session",
661
+ });
662
+
589
663
  // ============================================================================
590
664
  // Remote provider templates (no MCP, no Docker container)
591
665
  // ============================================================================
@@ -16,6 +16,7 @@ import {
16
16
  } from "../utils/error-tracker";
17
17
  import { fetchInstalledMcpServers } from "../utils/mcp-server-fetcher";
18
18
  import { scrubSecrets } from "../utils/secret-scrubber";
19
+ import { CTX_MODE_NUDGE_EVERY } from "./ctx-mode-env";
19
20
  import { buildOtelTraceparentEnv, isHarnessOtelEnabled } from "./otel-env";
20
21
  import type {
21
22
  CostData,
@@ -256,6 +257,23 @@ export async function createSessionMcpConfig(
256
257
 
257
258
  if (Object.keys(mergedServers).length === 0 && !installedServers) return null;
258
259
 
260
+ // Inject the context-mode stdio MCP server so its `ctx_*` tools survive
261
+ // `--strict-mcp-config` (which restricts Claude to this file and structurally
262
+ // excludes plugin-provided MCP servers). The plugin's hooks still fire via the
263
+ // installed Claude plugin — strict-mcp-config only suppresses MCP servers, not
264
+ // hooks. Placed BEFORE mergeMcpConfig so an API-installed server can still
265
+ // override it (unlikely, but safe). Gated by CONTEXT_MODE_DISABLED so builds
266
+ // and deploys without context-mode don't break.
267
+ //
268
+ // Server key uses the plugin naming convention (`plugin_context-mode_context-mode`)
269
+ // so that the resulting tool names (`mcp__plugin_context-mode_context-mode__ctx_*`)
270
+ // match the names the plugin's hooks reference in guidance text. With the bare
271
+ // key `context-mode`, the tools would be `mcp__context-mode__ctx_*` — callable,
272
+ // but invisible to the hook nudges that point agents at the plugin-prefixed name.
273
+ if (process.env.CONTEXT_MODE_DISABLED !== "true") {
274
+ mergedServers["plugin_context-mode_context-mode"] = { command: "context-mode" };
275
+ }
276
+
259
277
  try {
260
278
  const config = mergeMcpConfig({ mcpServers: mergedServers }, installedServers ?? null, taskId);
261
279
  const sessionConfigPath = `/tmp/mcp-${taskId}.json`;
@@ -399,6 +417,7 @@ class ClaudeSession implements ProviderSession {
399
417
  ...(sourceEnv.CLAUDE_CODE_OAUTH_TOKEN
400
418
  ? { AGENT_SWARM_CLAUDE_OAUTH_TOKEN: sourceEnv.CLAUDE_CODE_OAUTH_TOKEN }
401
419
  : {}),
420
+ CONTEXT_MODE_EXTERNAL_MCP_NUDGE_EVERY: CTX_MODE_NUDGE_EVERY,
402
421
  } as Record<string, string>,
403
422
  stdout: "pipe",
404
423
  stderr: "pipe",
@@ -82,6 +82,7 @@ import { credentialsToAuthJson } from "./codex-oauth/auth-json.js";
82
82
  import { getValidCodexOAuth } from "./codex-oauth/storage.js";
83
83
  import { resolveCodexPrompt } from "./codex-skill-resolver";
84
84
  import { createCodexSwarmEventHandler } from "./codex-swarm-events";
85
+ import { CTX_MODE_NUDGE_EVERY } from "./ctx-mode-env";
85
86
  import { buildOtelTraceparentEnv } from "./otel-env";
86
87
  import type {
87
88
  CostData,
@@ -351,15 +352,34 @@ export async function buildCodexConfig(
351
352
  }
352
353
  }
353
354
 
355
+ // (4) context-mode — pre-installed stdio MCP server providing the `ctx_*`
356
+ // context-compression tools. Gated by `CONTEXT_MODE_DISABLED` so builds /
357
+ // deploys without the `context-mode` binary on PATH don't break the session.
358
+ // Same entry shape as the swarm + installed-server stdio entries above.
359
+ if (process.env.CONTEXT_MODE_DISABLED !== "true") {
360
+ mcpServers["context-mode"] = {
361
+ command: "context-mode",
362
+ enabled: true,
363
+ startup_timeout_sec: 30,
364
+ tool_timeout_sec: 120,
365
+ };
366
+ }
367
+
354
368
  // (1) Baseline overrides. Keep these aligned with the Dockerfile baseline
355
369
  // at `~/.codex/config.toml` (Phase 6). Repeating them here makes local dev
356
370
  // (no baseline file) behave identically to the Docker worker.
371
+ //
372
+ // `features.hooks` / `features.plugin_hooks` enable Codex's hook system and
373
+ // the hooks contributed by installed Codex plugins (context-mode's plugin:
374
+ // routing injection, PreToolUse safety blocks, output capture). The SDK
375
+ // flattens these to `--config features.hooks=true` / `features.plugin_hooks=true`.
357
376
  return {
358
377
  model,
359
378
  approval_policy: "never",
360
379
  sandbox_mode: "danger-full-access",
361
380
  skip_git_repo_check: true,
362
381
  show_raw_agent_reasoning: false,
382
+ features: { hooks: true, plugin_hooks: true },
363
383
  mcp_servers: mcpServers as CodexConfig,
364
384
  };
365
385
  }
@@ -1246,6 +1266,7 @@ export async function createInProcessCodexSession(
1246
1266
  ...(process.env.NODE_EXTRA_CA_CERTS
1247
1267
  ? { NODE_EXTRA_CA_CERTS: process.env.NODE_EXTRA_CA_CERTS }
1248
1268
  : {}),
1269
+ CONTEXT_MODE_EXTERNAL_MCP_NUDGE_EVERY: CTX_MODE_NUDGE_EVERY,
1249
1270
  ...(config.env ?? {}),
1250
1271
  // Gated cross-service OTel linking: when SWARM_ENABLE_HARNESS_OTEL (or
1251
1272
  // the deprecated SWARM_ENABLE_CLAUDE_CODE_OTEL alias) is on, inject
@@ -1420,6 +1441,7 @@ class CodexSubprocessSession implements ProviderSession {
1420
1441
  ? { CODEX_PATH_OVERRIDE: process.env.CODEX_PATH_OVERRIDE }
1421
1442
  : {}),
1422
1443
  ...(process.env.CODEX_SKILLS_DIR ? { CODEX_SKILLS_DIR: process.env.CODEX_SKILLS_DIR } : {}),
1444
+ CONTEXT_MODE_EXTERNAL_MCP_NUDGE_EVERY: CTX_MODE_NUDGE_EVERY,
1423
1445
  ...(process.env.SKIP_SESSION_SUMMARY
1424
1446
  ? { SKIP_SESSION_SUMMARY: process.env.SKIP_SESSION_SUMMARY }
1425
1447
  : {}),
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Shared context-mode plugin env config for harness subprocesses.
3
+ *
4
+ * The `context-mode` MCP plugin reads `CONTEXT_MODE_EXTERNAL_MCP_NUDGE_EVERY`
5
+ * to decide how often to surface its external-MCP guidance nudge (default 10).
6
+ * We lower it to 3 to increase adoption. All three adapters (claude, codex,
7
+ * opencode) inject this into the subprocess env.
8
+ */
9
+
10
+ export const CTX_MODE_NUDGE_EVERY = process.env.CONTEXT_MODE_EXTERNAL_MCP_NUDGE_EVERY ?? "3";