mercury-agent 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +438 -0
  3. package/container/Dockerfile +127 -0
  4. package/container/Dockerfile.base +109 -0
  5. package/container/Dockerfile.power +17 -0
  6. package/container/agent-package.json +8 -0
  7. package/container/build.sh +54 -0
  8. package/docs/TODOS.md +147 -0
  9. package/docs/auth/dashboard.md +28 -0
  10. package/docs/auth/overview.md +109 -0
  11. package/docs/auth/whatsapp.md +173 -0
  12. package/docs/configuration.md +54 -0
  13. package/docs/container-lifecycle.md +349 -0
  14. package/docs/context-architecture.md +87 -0
  15. package/docs/deployment.md +199 -0
  16. package/docs/extensions.md +375 -0
  17. package/docs/graceful-shutdown.md +62 -0
  18. package/docs/kb-distillation.md +77 -0
  19. package/docs/media/overview.md +140 -0
  20. package/docs/media/whatsapp.md +171 -0
  21. package/docs/memory.md +137 -0
  22. package/docs/permissions.md +217 -0
  23. package/docs/pipeline.md +228 -0
  24. package/docs/prd-chat-memory.md +76 -0
  25. package/docs/prd-config-load.md +82 -0
  26. package/docs/rate-limiting.md +166 -0
  27. package/docs/scheduler.md +288 -0
  28. package/docs/setup-discord.md +100 -0
  29. package/docs/setup-slack.md +119 -0
  30. package/docs/setup-whatsapp.md +94 -0
  31. package/docs/subagents.md +166 -0
  32. package/docs/web-search.md +62 -0
  33. package/examples/extensions/README.md +12 -0
  34. package/examples/extensions/charts/index.ts +13 -0
  35. package/examples/extensions/charts/skill/SKILL.md +98 -0
  36. package/examples/extensions/gws/README.md +52 -0
  37. package/examples/extensions/gws/index.ts +106 -0
  38. package/examples/extensions/gws/skill/SKILL.md +57 -0
  39. package/examples/extensions/gws/skill/references/calendar.md +101 -0
  40. package/examples/extensions/gws/skill/references/docs.md +65 -0
  41. package/examples/extensions/gws/skill/references/drive.md +79 -0
  42. package/examples/extensions/gws/skill/references/gmail.md +85 -0
  43. package/examples/extensions/gws/skill/references/sheets.md +60 -0
  44. package/examples/extensions/napkin/index.ts +821 -0
  45. package/examples/extensions/napkin/prompts/consolidation-monthly.md +73 -0
  46. package/examples/extensions/napkin/prompts/consolidation-weekly.md +67 -0
  47. package/examples/extensions/napkin/prompts/kb-distillation.md +176 -0
  48. package/examples/extensions/napkin/skill/SKILL.md +728 -0
  49. package/examples/extensions/pdf/index.ts +23 -0
  50. package/examples/extensions/pdf/skill/LICENSE.txt +30 -0
  51. package/examples/extensions/pdf/skill/SKILL.md +314 -0
  52. package/examples/extensions/pdf/skill/forms.md +294 -0
  53. package/examples/extensions/pdf/skill/reference.md +612 -0
  54. package/examples/extensions/pdf/skill/scripts/check_bounding_boxes.py +65 -0
  55. package/examples/extensions/pdf/skill/scripts/check_fillable_fields.py +11 -0
  56. package/examples/extensions/pdf/skill/scripts/convert_pdf_to_images.py +33 -0
  57. package/examples/extensions/pdf/skill/scripts/create_validation_image.py +37 -0
  58. package/examples/extensions/pdf/skill/scripts/extract_form_field_info.py +122 -0
  59. package/examples/extensions/pdf/skill/scripts/extract_form_structure.py +115 -0
  60. package/examples/extensions/pdf/skill/scripts/fill_fillable_fields.py +98 -0
  61. package/examples/extensions/pdf/skill/scripts/fill_pdf_form_with_annotations.py +107 -0
  62. package/examples/extensions/permission-guard/index.ts +65 -0
  63. package/examples/extensions/pinchtab/index.ts +199 -0
  64. package/examples/extensions/pinchtab/lib/session-injector.ts +144 -0
  65. package/examples/extensions/pinchtab/skill/SKILL.md +224 -0
  66. package/examples/extensions/pinchtab/skill/TRUST.md +69 -0
  67. package/examples/extensions/pinchtab/skill/references/api.md +297 -0
  68. package/examples/extensions/pinchtab/skill/references/env.md +45 -0
  69. package/examples/extensions/pinchtab/skill/references/profiles.md +107 -0
  70. package/examples/extensions/tradestation/host/refresh.ts +102 -0
  71. package/examples/extensions/tradestation/index.ts +153 -0
  72. package/examples/extensions/tradestation/skill/SKILL.md +67 -0
  73. package/examples/extensions/tradestation/skill/scripts/ts-cli.ts +111 -0
  74. package/examples/extensions/voice-synth/index.ts +94 -0
  75. package/examples/extensions/voice-synth/skill/SKILL.md +38 -0
  76. package/examples/extensions/voice-transcribe/index.ts +381 -0
  77. package/examples/extensions/voice-transcribe/requirements.txt +8 -0
  78. package/examples/extensions/voice-transcribe/scripts/transcribe.py +179 -0
  79. package/examples/extensions/voice-transcribe/skill/SKILL.md +53 -0
  80. package/examples/extensions/web-search/index.ts +22 -0
  81. package/examples/extensions/web-search/skill/SKILL.md +114 -0
  82. package/examples/extensions/web-search/skill/references/apartments.md +178 -0
  83. package/examples/extensions/web-search/skill/references/car-purchase.md +132 -0
  84. package/examples/extensions/web-search/skill/references/car-rental.md +113 -0
  85. package/examples/extensions/web-search/skill/references/flights.md +133 -0
  86. package/examples/extensions/web-search/skill/references/hotels.md +148 -0
  87. package/examples/extensions/yahoo-mail/cli/bun.lock +66 -0
  88. package/examples/extensions/yahoo-mail/cli/package.json +13 -0
  89. package/examples/extensions/yahoo-mail/cli/ymail.mjs +353 -0
  90. package/examples/extensions/yahoo-mail/index.ts +57 -0
  91. package/examples/extensions/yahoo-mail/skill/SKILL.md +78 -0
  92. package/package.json +106 -0
  93. package/resources/agents/explore.md +50 -0
  94. package/resources/agents/worker.md +24 -0
  95. package/resources/builtin-extensions.txt +3 -0
  96. package/resources/connection-env-vars.json +25 -0
  97. package/resources/extensions/.gitkeep +0 -0
  98. package/resources/pi-extensions/subagent/agents.ts +126 -0
  99. package/resources/pi-extensions/subagent/index.ts +964 -0
  100. package/resources/profiles/coding/AGENTS.md +43 -0
  101. package/resources/profiles/coding/mercury-profile.yaml +15 -0
  102. package/resources/profiles/general/AGENTS.md +31 -0
  103. package/resources/profiles/general/mercury-profile.yaml +15 -0
  104. package/resources/profiles/research/AGENTS.md +40 -0
  105. package/resources/profiles/research/mercury-profile.yaml +15 -0
  106. package/resources/skills/config/SKILL.md +25 -0
  107. package/resources/skills/context/SKILL.md +33 -0
  108. package/resources/skills/conversation-recap/SKILL.md +19 -0
  109. package/resources/skills/media/SKILL.md +27 -0
  110. package/resources/skills/mutes/SKILL.md +31 -0
  111. package/resources/skills/permissions/SKILL.md +19 -0
  112. package/resources/skills/preferences/SKILL.md +31 -0
  113. package/resources/skills/recall/SKILL.md +24 -0
  114. package/resources/skills/roles/SKILL.md +18 -0
  115. package/resources/skills/spaces/SKILL.md +18 -0
  116. package/resources/skills/tasks/SKILL.md +45 -0
  117. package/resources/templates/AGENTS.md +157 -0
  118. package/resources/templates/env.template +34 -0
  119. package/resources/templates/mercury.example.yaml +75 -0
  120. package/src/adapters/discord-native.ts +534 -0
  121. package/src/adapters/discord.ts +38 -0
  122. package/src/adapters/setup.ts +89 -0
  123. package/src/adapters/slack.ts +9 -0
  124. package/src/adapters/whatsapp-media.ts +337 -0
  125. package/src/adapters/whatsapp.ts +629 -0
  126. package/src/agent/api-socket.ts +127 -0
  127. package/src/agent/container-entry.ts +967 -0
  128. package/src/agent/container-error.ts +49 -0
  129. package/src/agent/container-runner.ts +1272 -0
  130. package/src/agent/model-capabilities-core.ts +23 -0
  131. package/src/agent/model-capabilities.ts +231 -0
  132. package/src/agent/pi-failure-class.ts +83 -0
  133. package/src/agent/pi-jsonl-parser.ts +306 -0
  134. package/src/agent/preferences-prompt.ts +20 -0
  135. package/src/agent/user-error-messages.ts +78 -0
  136. package/src/bridges/discord.ts +171 -0
  137. package/src/bridges/slack.ts +177 -0
  138. package/src/bridges/teams.ts +160 -0
  139. package/src/bridges/telegram.ts +571 -0
  140. package/src/bridges/whatsapp.ts +290 -0
  141. package/src/chat-shim.ts +259 -0
  142. package/src/cli/mercury.ts +2508 -0
  143. package/src/cli/mrctl-http.ts +27 -0
  144. package/src/cli/mrctl.ts +611 -0
  145. package/src/cli/whatsapp-auth.ts +260 -0
  146. package/src/config-file.ts +397 -0
  147. package/src/config-model-chain.ts +30 -0
  148. package/src/config.ts +316 -0
  149. package/src/core/api-types.ts +58 -0
  150. package/src/core/api.ts +105 -0
  151. package/src/core/commands.ts +76 -0
  152. package/src/core/conversation.ts +47 -0
  153. package/src/core/handler.ts +206 -0
  154. package/src/core/media.ts +200 -0
  155. package/src/core/mute-duration.ts +22 -0
  156. package/src/core/outbox.ts +76 -0
  157. package/src/core/permissions.ts +192 -0
  158. package/src/core/profiles.ts +245 -0
  159. package/src/core/rate-limiter.ts +127 -0
  160. package/src/core/router.ts +191 -0
  161. package/src/core/routes/chat.ts +172 -0
  162. package/src/core/routes/config-builtin.ts +107 -0
  163. package/src/core/routes/config.ts +81 -0
  164. package/src/core/routes/connections.ts +190 -0
  165. package/src/core/routes/console.ts +668 -0
  166. package/src/core/routes/control.ts +46 -0
  167. package/src/core/routes/conversations.ts +66 -0
  168. package/src/core/routes/dashboard.ts +2491 -0
  169. package/src/core/routes/extensions.ts +37 -0
  170. package/src/core/routes/index.ts +14 -0
  171. package/src/core/routes/media.ts +72 -0
  172. package/src/core/routes/messages.ts +37 -0
  173. package/src/core/routes/mutes.ts +89 -0
  174. package/src/core/routes/prefs.ts +95 -0
  175. package/src/core/routes/roles.ts +125 -0
  176. package/src/core/routes/spaces.ts +60 -0
  177. package/src/core/routes/storage.ts +126 -0
  178. package/src/core/routes/tasks.ts +189 -0
  179. package/src/core/routes/tradestation.ts +268 -0
  180. package/src/core/routes/tts.ts +51 -0
  181. package/src/core/runtime.ts +1140 -0
  182. package/src/core/space-queue.ts +103 -0
  183. package/src/core/storage-cleanup.ts +140 -0
  184. package/src/core/storage-guard.ts +24 -0
  185. package/src/core/task-scheduler.ts +132 -0
  186. package/src/core/telegram-format.ts +178 -0
  187. package/src/core/trigger.ts +142 -0
  188. package/src/dashboard/index.html +729 -0
  189. package/src/dashboard/tokens.css +53 -0
  190. package/src/extensions/api.ts +252 -0
  191. package/src/extensions/catalog.ts +117 -0
  192. package/src/extensions/config-registry.ts +83 -0
  193. package/src/extensions/context.ts +36 -0
  194. package/src/extensions/hooks.ts +156 -0
  195. package/src/extensions/image-builder.ts +617 -0
  196. package/src/extensions/installer.ts +306 -0
  197. package/src/extensions/jobs.ts +122 -0
  198. package/src/extensions/loader.ts +271 -0
  199. package/src/extensions/permission-guard.ts +52 -0
  200. package/src/extensions/reserved.ts +28 -0
  201. package/src/extensions/skills.ts +123 -0
  202. package/src/extensions/types.ts +462 -0
  203. package/src/logger.ts +174 -0
  204. package/src/main.ts +586 -0
  205. package/src/server.ts +391 -0
  206. package/src/storage/db.ts +1624 -0
  207. package/src/storage/memory.ts +45 -0
  208. package/src/storage/pi-auth.ts +95 -0
  209. package/src/text/markdown.ts +117 -0
  210. package/src/text/rtl.ts +38 -0
  211. package/src/tradestation/host-api.ts +77 -0
  212. package/src/tradestation/pending-orders.ts +69 -0
  213. package/src/tts/azure.ts +52 -0
  214. package/src/tts/google.ts +128 -0
  215. package/src/tts/index.ts +8 -0
  216. package/src/tts/language.ts +20 -0
  217. package/src/tts/synthesize.ts +133 -0
  218. package/src/types.ts +295 -0
@@ -0,0 +1,1272 @@
1
+ import { execFileSync, execSync, spawn } from "node:child_process";
2
+ import { randomBytes } from "node:crypto";
3
+ import fs from "node:fs";
4
+ import path, { dirname } from "node:path";
5
+ import { fileURLToPath } from "node:url";
6
+ import { type AppConfig, resolveProjectPath } from "../config.js";
7
+ import { scanOutbox } from "../core/outbox.js";
8
+ import type { ExtImageBuildState } from "../extensions/image-builder.js";
9
+ import { type Logger, logger } from "../logger.js";
10
+ import { getApiKeyFromPiAuthFile } from "../storage/pi-auth.js";
11
+ import type {
12
+ ContainerResult,
13
+ MessageAttachment,
14
+ StoredMessage,
15
+ TokenUsage,
16
+ } from "../types.js";
17
+ import {
18
+ apiSocketDir,
19
+ INNER_RUN_DIR,
20
+ innerApiSocketPath,
21
+ } from "./api-socket.js";
22
+ import { ContainerError } from "./container-error.js";
23
+
24
+ /**
25
+ * In-container mountpoint for the per-message IO dir. The host passes the request
26
+ * payload as `input.json` and the inner container writes its reply as `result.json`
27
+ * here. This is the reply channel that replaces the inner-container attach stream:
28
+ * launching the inner container detached (`docker create` + `docker start`, no
29
+ * attach) is the only pattern that works through the Bun `fetch()`-based body-proxy
30
+ * the cloud agent lane goes through — the proxy cannot carry Docker's hijacked
31
+ * attach connection, so an attached run hangs to its idleTimeout (see
32
+ * docs/debug/major/2026-05-25-agent-lane-docker-run-wait-hang-no-chat-response.md).
33
+ */
34
+ const INNER_IO_DIR = "/run/mercury-io";
35
+
36
+ /** Poll interval (ms) while waiting for the inner container's result file. */
37
+ const RESULT_POLL_MS = 150;
38
+ /** Run a `docker inspect` liveness probe every Nth poll (~2s) to fail fast on crash. */
39
+ const LIVENESS_EVERY = 14;
40
+ /** Default timeout for short Docker CLI commands (create, start, inspect, kill). */
41
+ const EXEC_DOCKER_TIMEOUT_MS = 20_000;
42
+
43
+ function sleep(ms: number): Promise<void> {
44
+ return new Promise((resolve) => setTimeout(resolve, ms));
45
+ }
46
+
47
+ /**
48
+ * Run a short, non-streaming `docker` command and capture its result. Used for
49
+ * `create` / `start` / `inspect` / `kill` — all plain request/response Docker API
50
+ * calls (no `/wait`, no attach), which the body-proxy forwards cleanly. Never
51
+ * rejects: a spawn error is surfaced as a non-zero `code` so callers branch on one
52
+ * shape. The timeout guards against a wedged daemon/proxy connection.
53
+ */
54
+ function execDocker(
55
+ args: string[],
56
+ timeoutMs = EXEC_DOCKER_TIMEOUT_MS,
57
+ ): Promise<{
58
+ code: number;
59
+ stdout: string;
60
+ stderr: string;
61
+ timedOut: boolean;
62
+ }> {
63
+ return new Promise((resolve) => {
64
+ const proc = spawn("docker", args, { stdio: ["ignore", "pipe", "pipe"] });
65
+ let stdout = "";
66
+ let stderr = "";
67
+ let killed = false;
68
+ let timer: ReturnType<typeof setTimeout> | null = setTimeout(() => {
69
+ killed = true;
70
+ try {
71
+ proc.kill("SIGKILL");
72
+ } catch {
73
+ // already exited
74
+ }
75
+ }, timeoutMs);
76
+ const done = (code: number, errOverride?: string) => {
77
+ if (timer) {
78
+ clearTimeout(timer);
79
+ timer = null;
80
+ }
81
+ resolve({
82
+ code,
83
+ stdout,
84
+ stderr: errOverride ?? stderr,
85
+ timedOut: killed,
86
+ });
87
+ };
88
+ proc.stdout.on("data", (chunk: Buffer) => {
89
+ stdout += chunk.toString("utf8");
90
+ });
91
+ proc.stderr.on("data", (chunk: Buffer) => {
92
+ stderr += chunk.toString("utf8");
93
+ });
94
+ proc.on("error", (error) => done(1, stderr || String(error)));
95
+ proc.on("close", (code) => done(code ?? 1));
96
+ });
97
+ }
98
+
99
+ // Anthropic OAuth constants — duplicated from console/src/lib/oauth.ts to avoid cross-package imports.
100
+ const ANTHROPIC_TOKEN_URL = "https://console.anthropic.com/v1/oauth/token";
101
+ const ANTHROPIC_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e";
102
+
103
+ type AnthropicOAuthCreds = { access: string; refresh: string; expires: number };
104
+
105
+ // Prevents hammering the OAuth token refresh endpoint on 429 responses — at most
106
+ // one refresh attempt per minute across all spawns in this process lifetime.
107
+ let lastOAuthRefreshAttemptAt = 0;
108
+ const OAUTH_REFRESH_COOLDOWN_MS = 60_000;
109
+
110
+ /**
111
+ * Persist a freshly refreshed Anthropic OAuth token back to the console DB so
112
+ * the next rolling deploy starts with a valid credential.
113
+ * Failures are logged but never throw — caller awaits this so the failure is
114
+ * known before continuing, but spawn is never blocked indefinitely.
115
+ */
116
+ async function pushOAuthTokenToConsole(
117
+ consoleUrl: string,
118
+ internalSecret: string,
119
+ agentId: string,
120
+ creds: AnthropicOAuthCreds,
121
+ ): Promise<void> {
122
+ try {
123
+ const res = await fetch(`${consoleUrl}/api/agent/oauth-token`, {
124
+ method: "POST",
125
+ headers: {
126
+ "Content-Type": "application/json",
127
+ Authorization: `Bearer ${internalSecret}`,
128
+ },
129
+ body: JSON.stringify({
130
+ agentId,
131
+ provider: "anthropic",
132
+ access: creds.access,
133
+ refresh: creds.refresh,
134
+ expires: creds.expires,
135
+ }),
136
+ signal: AbortSignal.timeout(3_000),
137
+ });
138
+ if (!res.ok) {
139
+ const body = await res.text().catch(() => "");
140
+ logger.warn("OAuth token write-back to console failed", {
141
+ status: res.status,
142
+ body: body.slice(0, 200),
143
+ });
144
+ } else {
145
+ logger.debug("OAuth token written back to console DB");
146
+ }
147
+ } catch (err) {
148
+ logger.warn(
149
+ "OAuth token write-back to console failed (network error)",
150
+ err instanceof Error ? err : undefined,
151
+ );
152
+ }
153
+ }
154
+
155
+ /**
156
+ * Fetch the current Anthropic OAuth credential blob from the console DB.
157
+ * Called after an invalid_grant failure — the user may have already reconnected
158
+ * in the console, so the DB may hold a fresh token the container doesn't know about.
159
+ * Returns null on any error (network, auth, not configured).
160
+ */
161
+ async function fetchOAuthTokenFromConsole(
162
+ consoleUrl: string,
163
+ internalSecret: string,
164
+ agentId: string,
165
+ ): Promise<AnthropicOAuthCreds | null> {
166
+ try {
167
+ const url = new URL(`${consoleUrl}/api/agent/oauth-token`);
168
+ url.searchParams.set("agentId", agentId);
169
+ url.searchParams.set("provider", "anthropic");
170
+ const res = await fetch(url.toString(), {
171
+ headers: { Authorization: `Bearer ${internalSecret}` },
172
+ signal: AbortSignal.timeout(3_000),
173
+ });
174
+ if (!res.ok) return null;
175
+ const data = (await res.json()) as {
176
+ access?: unknown;
177
+ refresh?: unknown;
178
+ expires?: unknown;
179
+ };
180
+ if (
181
+ typeof data.access === "string" &&
182
+ data.access &&
183
+ typeof data.refresh === "string" &&
184
+ data.refresh &&
185
+ typeof data.expires === "number"
186
+ ) {
187
+ return {
188
+ access: data.access,
189
+ refresh: data.refresh,
190
+ expires: data.expires,
191
+ };
192
+ }
193
+ return null;
194
+ } catch {
195
+ return null;
196
+ }
197
+ }
198
+
199
+ async function refreshAnthropicOAuth(
200
+ creds: AnthropicOAuthCreds,
201
+ ): Promise<AnthropicOAuthCreds> {
202
+ const res = await fetch(ANTHROPIC_TOKEN_URL, {
203
+ method: "POST",
204
+ headers: { "Content-Type": "application/json" },
205
+ body: JSON.stringify({
206
+ grant_type: "refresh_token",
207
+ refresh_token: creds.refresh,
208
+ client_id: ANTHROPIC_CLIENT_ID,
209
+ }),
210
+ });
211
+ if (!res.ok) {
212
+ const body = await res.text().catch(() => "");
213
+ throw new Error(`Anthropic OAuth refresh failed (${res.status}): ${body}`);
214
+ }
215
+ const data = (await res.json()) as {
216
+ access_token?: string;
217
+ refresh_token?: string;
218
+ expires_in?: number;
219
+ };
220
+ if (!data.access_token)
221
+ throw new Error("Anthropic refresh response missing access_token");
222
+ return {
223
+ access: data.access_token,
224
+ refresh: data.refresh_token ?? creds.refresh,
225
+ expires: Date.now() + (data.expires_in ?? 3600) * 1000,
226
+ };
227
+ }
228
+
229
+ /** External calls used by {@link resolveOAuthCredentialForSpawn}; injectable for tests. */
230
+ export type OAuthSpawnDeps = {
231
+ refresh: (creds: AnthropicOAuthCreds) => Promise<AnthropicOAuthCreds>;
232
+ pushToConsole: (
233
+ consoleUrl: string,
234
+ internalSecret: string,
235
+ agentId: string,
236
+ creds: AnthropicOAuthCreds,
237
+ ) => Promise<void>;
238
+ fetchFromConsole: (
239
+ consoleUrl: string,
240
+ internalSecret: string,
241
+ agentId: string,
242
+ ) => Promise<AnthropicOAuthCreds | null>;
243
+ now: () => number;
244
+ };
245
+
246
+ const defaultOAuthSpawnDeps: OAuthSpawnDeps = {
247
+ refresh: refreshAnthropicOAuth,
248
+ pushToConsole: pushOAuthTokenToConsole,
249
+ fetchFromConsole: fetchOAuthTokenFromConsole,
250
+ now: Date.now,
251
+ };
252
+
253
+ export type OAuthSpawnResolution = {
254
+ /** Bare access token to inject as ANTHROPIC_OAUTH_TOKEN into the inner container. */
255
+ access: string;
256
+ /** New full credential blob for the in-process env, or null if unchanged. */
257
+ updatedBlob: string | null;
258
+ };
259
+
260
+ /**
261
+ * Resolve an Anthropic OAuth credential blob into a fresh bare access token at
262
+ * container-spawn time. This is the single chokepoint for the OAuth credential
263
+ * lifecycle in mercury-fork — it embeds all three container-side steps:
264
+ * 1. refresh the access token when it is within the 60s expiry lookahead,
265
+ * 2. write the refreshed blob back to the console DB,
266
+ * 4. on `invalid_grant`, pull the current blob from the console DB.
267
+ *
268
+ * Throws only when the credential is genuinely unrecoverable and the user must
269
+ * reconnect. Transient failures (network, 429, cooldown) fall back to the
270
+ * current access token so the spawn is never blocked.
271
+ */
272
+ export async function resolveOAuthCredentialForSpawn(
273
+ parsedCreds: AnthropicOAuthCreds,
274
+ opts: {
275
+ consoleUrl?: string;
276
+ consoleInternalSecret?: string;
277
+ agentId?: string;
278
+ },
279
+ deps: OAuthSpawnDeps = defaultOAuthSpawnDeps,
280
+ ): Promise<OAuthSpawnResolution> {
281
+ let freshAccess = parsedCreds.access;
282
+ let updatedBlob: string | null = null;
283
+
284
+ const needsRefresh = deps.now() + 60_000 > parsedCreds.expires;
285
+ const canRetry =
286
+ deps.now() - lastOAuthRefreshAttemptAt > OAUTH_REFRESH_COOLDOWN_MS;
287
+
288
+ if (needsRefresh && canRetry) {
289
+ try {
290
+ lastOAuthRefreshAttemptAt = deps.now();
291
+ const refreshed = await deps.refresh(parsedCreds);
292
+ updatedBlob = JSON.stringify(refreshed);
293
+ freshAccess = refreshed.access;
294
+ // Persist to console DB so the next rolling deploy reads a valid token.
295
+ if (opts.consoleUrl && opts.consoleInternalSecret && opts.agentId) {
296
+ await deps.pushToConsole(
297
+ opts.consoleUrl,
298
+ opts.consoleInternalSecret,
299
+ opts.agentId,
300
+ refreshed,
301
+ );
302
+ } else if (opts.consoleUrl && !opts.consoleInternalSecret) {
303
+ logger.warn(
304
+ "Anthropic OAuth token refreshed but write-back to console is disabled — MERCURY_CONSOLE_INTERNAL_SECRET is not set; refreshed token will be lost on next container restart",
305
+ );
306
+ }
307
+ } catch (err) {
308
+ const msg = err instanceof Error ? err.message : String(err);
309
+ const isInvalidGrant = msg.includes("invalid_grant");
310
+ if (isInvalidGrant && opts.consoleUrl && opts.consoleInternalSecret) {
311
+ // The refresh token has been invalidated (user likely reconnected in the
312
+ // console). Try to pull the current credential blob from the DB — the
313
+ // console may already have fresh tokens that this container doesn't know about.
314
+ if (opts.agentId) {
315
+ const consoleCreds = await deps.fetchFromConsole(
316
+ opts.consoleUrl,
317
+ opts.consoleInternalSecret,
318
+ opts.agentId,
319
+ );
320
+ if (consoleCreds && consoleCreds.refresh !== parsedCreds.refresh) {
321
+ // Console has a different (newer) refresh token — user already reconnected.
322
+ updatedBlob = JSON.stringify(consoleCreds);
323
+ freshAccess = consoleCreds.access;
324
+ logger.info(
325
+ "Anthropic OAuth invalid_grant recovered from console DB — using fresh token",
326
+ );
327
+ } else if (consoleCreds === null) {
328
+ // Console was unreachable — cannot determine if the user has reconnected.
329
+ logger.error(
330
+ "Anthropic OAuth refresh failed with invalid_grant and console credential fetch failed — please reconnect or check connectivity",
331
+ { agentId: opts.agentId },
332
+ );
333
+ throw new Error(
334
+ "Anthropic OAuth token is invalid (invalid_grant) and fresh credentials could not be fetched from the console. Please reconnect your Anthropic account or check the console is reachable.",
335
+ );
336
+ } else {
337
+ // Same token in console — user has not reconnected yet.
338
+ logger.error(
339
+ "Anthropic OAuth refresh failed with invalid_grant and no fresh token is available — user must reconnect in the console",
340
+ { agentId: opts.agentId },
341
+ );
342
+ throw new Error(
343
+ "Anthropic OAuth token is invalid (invalid_grant). Please reconnect your Anthropic account in the console.",
344
+ );
345
+ }
346
+ } else {
347
+ logger.error(
348
+ "Anthropic OAuth refresh failed with invalid_grant; no MERCURY_AGENT_ID set for console fetch",
349
+ );
350
+ throw new Error(
351
+ "Anthropic OAuth token is invalid (invalid_grant). Please reconnect your Anthropic account in the console.",
352
+ );
353
+ }
354
+ } else if (isInvalidGrant) {
355
+ // No console configured — cannot recover; surface the failure.
356
+ logger.error(
357
+ "Anthropic OAuth refresh failed with invalid_grant; re-authentication required",
358
+ );
359
+ throw new Error(
360
+ "Anthropic OAuth token is invalid (invalid_grant). Please reconnect your Anthropic account.",
361
+ );
362
+ } else {
363
+ // Transient error (network, 429, etc.) — current access token may still be valid.
364
+ logger.warn(
365
+ "Anthropic OAuth refresh failed at spawn time; using current access token",
366
+ err instanceof Error ? err : undefined,
367
+ );
368
+ }
369
+ }
370
+ } else if (needsRefresh) {
371
+ logger.warn(
372
+ "Anthropic OAuth token expired; skipping refresh (rate-limit cooldown active)",
373
+ );
374
+ }
375
+
376
+ return { access: freshAccess, updatedBlob };
377
+ }
378
+
379
+ const CONTAINER_LABEL = "mercury.managed=true";
380
+ const AGENT_ID_LABEL_KEY = "mercury.agent-id";
381
+
382
+ const __dirname = dirname(fileURLToPath(import.meta.url));
383
+ const PACKAGE_ROOT = path.join(__dirname, "../..");
384
+
385
+ /** Exit code 137 = SIGKILL (128 + 9), typically from OOM killer */
386
+ const OOM_EXIT_CODE = 137;
387
+
388
+ export class AgentContainerRunner {
389
+ // Inner containers now run detached (no long-lived `docker` child process to
390
+ // hold a handle to), so we track only the container name — termination is by
391
+ // `docker kill <name>`, and the per-message poll loop owns cleanup.
392
+ private readonly runningBySpace = new Map<
393
+ string,
394
+ { containerName: string }
395
+ >();
396
+ private readonly abortedSpaces = new Set<string>();
397
+ private readonly timedOutSpaces = new Set<string>();
398
+ private containerCounter = 0;
399
+ private buildState: ExtImageBuildState | undefined = undefined;
400
+ private readonly resolvedApiHost: string;
401
+
402
+ constructor(private readonly config: AppConfig) {
403
+ this.validateImage();
404
+ this.resolvedApiHost = this.resolveApiHost();
405
+ }
406
+
407
+ /**
408
+ * Resolve the API host that inner containers will use to reach us.
409
+ *
410
+ * gVisor (runsc) cannot use Docker's embedded DNS (127.0.0.11 is unreachable
411
+ * from the gVisor network sandbox), so container-name hostnames don't resolve.
412
+ * Previously the outer container joined the shared default bridge (docker0) and
413
+ * handed inner containers its bridge IP — but that left the outer reachable by
414
+ * any neighbor on docker0, undercutting per-agent network isolation.
415
+ *
416
+ * Now inner containers reach the API over a per-agent unix socket (see
417
+ * api-socket.ts), so the outer no longer joins docker0. API_URL becomes a dummy
418
+ * (`http://localhost:<port>`) that mrctl ignores once API_SOCKET is set; we keep
419
+ * it non-empty only so mrctl's presence assertion passes. runc/local are
420
+ * unchanged and keep using the real hostname.
421
+ */
422
+ private resolveApiHost(): string {
423
+ const configured = this.config.containerApiHost;
424
+ if (!configured) return "host.docker.internal";
425
+ if (this.config.containerRuntime !== "runsc") return configured;
426
+ // gVisor: dummy host — the real transport is the unix socket (API_SOCKET).
427
+ return "localhost";
428
+ }
429
+
430
+ /** Set a background build state — currentImage() is resolved at each spawn. */
431
+ setBuildState(state: ExtImageBuildState): void {
432
+ this.buildState = state;
433
+ }
434
+
435
+ /** The image to use for container spawns. */
436
+ get image(): string {
437
+ if (this.buildState) return this.buildState.currentImage();
438
+ return this.config.agentContainerImage;
439
+ }
440
+
441
+ /**
442
+ * Warn if using a custom image that might be missing required tools.
443
+ * Known presets (mercury-agent:*) are assumed to be valid.
444
+ */
445
+ private validateImage(): void {
446
+ const image = this.config.agentContainerImage;
447
+
448
+ // Skip validation for known presets
449
+ if (
450
+ image.startsWith("mercury-agent:") ||
451
+ image.includes("/mercury-agent:")
452
+ ) {
453
+ return;
454
+ }
455
+
456
+ // For custom images, log a warning about requirements
457
+ logger.warn("Using custom agent image", {
458
+ image,
459
+ note: `Ensure image has: bun, pi, mrctl${this.config.containerRuntime === "runsc" ? "" : ", bubblewrap (runc mode)"}`,
460
+ docs: "See docs/container-lifecycle.md for custom image requirements",
461
+ });
462
+ }
463
+
464
+ /**
465
+ * Ensure the agent image is available locally, pulling it if needed.
466
+ * Should be called on startup before accepting work.
467
+ */
468
+ async ensureImage(): Promise<void> {
469
+ const image = this.image;
470
+ try {
471
+ execSync(`docker image inspect ${image}`, {
472
+ stdio: "ignore",
473
+ timeout: 10_000,
474
+ });
475
+ logger.debug("Agent image found locally", { image });
476
+ } catch {
477
+ logger.info("Agent image not found locally, pulling...", { image });
478
+ try {
479
+ execSync(`docker pull ${image}`, {
480
+ stdio: "inherit",
481
+ timeout: 300_000,
482
+ });
483
+ logger.info("Agent image pulled successfully", { image });
484
+ } catch {
485
+ throw new Error(
486
+ `Failed to pull agent image: ${image}\nRun manually: docker pull ${image}`,
487
+ );
488
+ }
489
+ }
490
+ }
491
+
492
+ isRunning(spaceId: string): boolean {
493
+ return this.runningBySpace.has(spaceId);
494
+ }
495
+
496
+ /**
497
+ * Clean up any orphaned containers from previous runs.
498
+ * Should be called on startup before accepting new work.
499
+ */
500
+ async cleanupOrphans(): Promise<number> {
501
+ try {
502
+ const agentId = process.env.MERCURY_AGENT_ID;
503
+ const filter = agentId
504
+ ? `--filter "label=${CONTAINER_LABEL}" --filter "label=${AGENT_ID_LABEL_KEY}=${agentId}"`
505
+ : `--filter "label=${CONTAINER_LABEL}"`;
506
+ // Find containers with our labels (running or stopped)
507
+ const result = execSync(`docker ps -a ${filter} --format "{{.ID}}"`, {
508
+ encoding: "utf8",
509
+ timeout: 10_000,
510
+ }).trim();
511
+
512
+ if (!result) return 0;
513
+
514
+ const containerIds = result.split("\n").filter(Boolean);
515
+ if (containerIds.length === 0) return 0;
516
+
517
+ logger.info("Found orphaned containers, cleaning up", {
518
+ count: containerIds.length,
519
+ });
520
+
521
+ // Force remove all orphaned containers
522
+ execSync(`docker rm -f ${containerIds.join(" ")}`, {
523
+ encoding: "utf8",
524
+ timeout: 30_000,
525
+ });
526
+
527
+ logger.info("Cleaned up orphaned containers", {
528
+ count: containerIds.length,
529
+ });
530
+ return containerIds.length;
531
+ } catch (error) {
532
+ // If docker command fails (e.g., docker not installed), log and continue
533
+ if (error instanceof Error && error.message.includes("ENOENT")) {
534
+ logger.warn("Docker not found, skipping orphan cleanup");
535
+ } else {
536
+ logger.warn(
537
+ "Failed to cleanup orphaned containers",
538
+ error instanceof Error ? error : undefined,
539
+ );
540
+ }
541
+ return 0;
542
+ }
543
+ }
544
+
545
+ /**
546
+ * Kill all running containers using docker kill for reliable termination.
547
+ * Note: runningBySpace entries are cleaned up by each reply()'s poll loop.
548
+ * During shutdown the loop may not run before exit, but that's fine —
549
+ * Docker cleans up --rm containers regardless once killed.
550
+ */
551
+ killAll(): void {
552
+ for (const [spaceId, { containerName }] of this.runningBySpace) {
553
+ this.abortedSpaces.add(spaceId);
554
+ try {
555
+ execSync(`docker kill ${containerName}`, { timeout: 5000 });
556
+ } catch {
557
+ // docker kill can fail (container already exited/reaped) — the poll loop
558
+ // observes abortedSpaces and unwinds either way.
559
+ }
560
+ }
561
+ }
562
+
563
+ get activeCount(): number {
564
+ return this.runningBySpace.size;
565
+ }
566
+
567
+ getActiveSpaces(): string[] {
568
+ return [...this.runningBySpace.keys()];
569
+ }
570
+
571
+ abort(spaceId: string): boolean {
572
+ const entry = this.runningBySpace.get(spaceId);
573
+ if (!entry) return false;
574
+
575
+ this.abortedSpaces.add(spaceId);
576
+
577
+ // Use docker kill for reliable container termination; the poll loop observes
578
+ // abortedSpaces and rejects the in-flight reply().
579
+ try {
580
+ execSync(`docker kill ${entry.containerName}`, { timeout: 5000 });
581
+ } catch {
582
+ // docker kill can fail (container already exited/reaped) — abortedSpaces
583
+ // still unwinds the poll loop.
584
+ }
585
+ return true;
586
+ }
587
+
588
+ private generateContainerName(): string {
589
+ const id = ++this.containerCounter;
590
+ const timestamp = Date.now();
591
+ const agentId = process.env.MERCURY_AGENT_ID;
592
+ return agentId
593
+ ? `mercury-${agentId}-${timestamp}-${id}`
594
+ : `mercury-${timestamp}-${id}`;
595
+ }
596
+
597
+ async reply(input: {
598
+ spaceId: string;
599
+ spaceWorkspace: string;
600
+ messages: StoredMessage[];
601
+ anchorMessages?: StoredMessage[];
602
+ prompt: string;
603
+ callerId: string;
604
+ callerRole?: string;
605
+ authorName?: string;
606
+ attachments?: MessageAttachment[];
607
+ preferences?: Array<{ key: string; value: string }>;
608
+ extraEnv?: Record<string, string>;
609
+ claimedEnvSources?: Set<string>;
610
+ }): Promise<ContainerResult> {
611
+ const globalDir = path.resolve(this.config.globalDir);
612
+ const spacesRoot = path.resolve(this.config.spacesDir);
613
+
614
+ fs.mkdirSync(globalDir, { recursive: true });
615
+ fs.mkdirSync(spacesRoot, { recursive: true });
616
+ try {
617
+ execFileSync("chown", ["-R", "1000:1000", globalDir], { stdio: "pipe" });
618
+ } catch {
619
+ // CAP_CHOWN may be unavailable (--cap-drop=ALL without --cap-add=CHOWN).
620
+ // Skills are installed world-readable, so the inner container (uid 1000)
621
+ // can still read them. New containers should have --cap-add=CHOWN.
622
+ logger.warn(
623
+ "chown globalDir failed (CAP_CHOWN unavailable), continuing",
624
+ { globalDir },
625
+ );
626
+ }
627
+
628
+ const authFromPi = await getApiKeyFromPiAuthFile({
629
+ provider: this.config.modelProvider,
630
+ authPath: this.config.authPath ?? path.join(globalDir, "auth.json"),
631
+ });
632
+
633
+ // Env vars that should never be passed to containers
634
+ const BLOCKED_ENV_VARS = new Set([
635
+ "MERCURY_API_SECRET",
636
+ // Host-only: the inner→outer API socket path is set by code per spawn;
637
+ // never let an agent override which socket mrctl targets.
638
+ "MERCURY_API_SOCKET",
639
+ "MERCURY_CHAT_API_KEY",
640
+ "MERCURY_ADMINS",
641
+ // Host-only: affects `docker run` flags, not the agent process inside the container
642
+ "MERCURY_CONTAINER_BWRAP_DOCKER_COMPAT",
643
+ // Host-only: selects the OCI runtime for `docker run --runtime`; not meaningful inside the container
644
+ "MERCURY_CONTAINER_RUNTIME",
645
+ // Host-only: resolved volume mountpoint on the host; inner containers don't need it
646
+ "MERCURY_HOST_DATA_DIR",
647
+ "MERCURY_SLACK_BOT_TOKEN",
648
+ "MERCURY_SLACK_SIGNING_SECRET",
649
+ "MERCURY_DISCORD_BOT_TOKEN",
650
+ "MERCURY_DISCORD_GATEWAY_SECRET",
651
+ "MERCURY_TELEGRAM_BOT_TOKEN",
652
+ "MERCURY_TELEGRAM_WEBHOOK_SECRET_TOKEN",
653
+ "MERCURY_TEAMS_APP_ID",
654
+ "MERCURY_TEAMS_APP_PASSWORD",
655
+ "MERCURY_WHATSAPP_AUTH_DIR",
656
+ ]);
657
+
658
+ // Pass MERCURY_* vars to container with prefix stripped, excluding blocked vars
659
+ const claimed = input.claimedEnvSources;
660
+ const passthroughEnvPairs = Object.entries(process.env)
661
+ .filter(
662
+ (entry): entry is [string, string] =>
663
+ entry[0].startsWith("MERCURY_") &&
664
+ entry[1] !== undefined &&
665
+ !BLOCKED_ENV_VARS.has(entry[0]) &&
666
+ !claimed?.has(entry[0]),
667
+ )
668
+ .map(([key, value]) => ({
669
+ key: key.replace("MERCURY_", ""),
670
+ value: value,
671
+ }));
672
+
673
+ // Legacy path: older console versions stored the OAuth credential blob in
674
+ // MERCURY_ANTHROPIC_API_KEY instead of MERCURY_ANTHROPIC_OAUTH_TOKEN.
675
+ // Current console uses MERCURY_ANTHROPIC_OAUTH_TOKEN (handled below), but
676
+ // keep this guard so agents provisioned before the migration don't break.
677
+ const anthApiKeyIdx = passthroughEnvPairs.findIndex(
678
+ (p) =>
679
+ p.key === "ANTHROPIC_API_KEY" && p.value.trimStart().startsWith("{"),
680
+ );
681
+ if (anthApiKeyIdx !== -1) {
682
+ try {
683
+ const raw = passthroughEnvPairs[anthApiKeyIdx]?.value ?? "";
684
+ const parsed = JSON.parse(raw) as Record<string, unknown>;
685
+ const access =
686
+ typeof parsed.access === "string" ? parsed.access : undefined;
687
+ if (access) {
688
+ passthroughEnvPairs.splice(anthApiKeyIdx, 1);
689
+ passthroughEnvPairs.push({
690
+ key: "ANTHROPIC_OAUTH_TOKEN",
691
+ value: access,
692
+ });
693
+ }
694
+ } catch {
695
+ // Not valid JSON — leave as-is and let pi handle/reject it
696
+ }
697
+ }
698
+
699
+ // MERCURY_ANTHROPIC_OAUTH_TOKEN now carries a full credential blob
700
+ // ({"access":"...","refresh":"...","expires":...}) so the fork can refresh
701
+ // the access token at each spawn instead of relying on the frozen value
702
+ // injected when the outer container started. Remove the blob unconditionally
703
+ // to ensure raw JSON never leaks into the inner container, then push a fresh
704
+ // bare token (or the current token if refresh fails / is rate-limited).
705
+ const anthOauthIdx = passthroughEnvPairs.findIndex(
706
+ (p) =>
707
+ p.key === "ANTHROPIC_OAUTH_TOKEN" &&
708
+ p.value.trimStart().startsWith("{"),
709
+ );
710
+ if (anthOauthIdx !== -1) {
711
+ const raw = passthroughEnvPairs[anthOauthIdx]?.value ?? "";
712
+ passthroughEnvPairs.splice(anthOauthIdx, 1);
713
+ let parsedCreds: AnthropicOAuthCreds | undefined;
714
+ try {
715
+ parsedCreds = JSON.parse(raw) as AnthropicOAuthCreds;
716
+ } catch {
717
+ logger.warn("Anthropic OAuth blob corrupt; skipping token injection");
718
+ }
719
+ if (parsedCreds) {
720
+ const { access, updatedBlob } = await resolveOAuthCredentialForSpawn(
721
+ parsedCreds,
722
+ {
723
+ consoleUrl: this.config.consoleUrl,
724
+ consoleInternalSecret: this.config.consoleInternalSecret,
725
+ agentId: process.env.MERCURY_AGENT_ID,
726
+ },
727
+ );
728
+ // Update outer container's in-process env so subsequent spawns within
729
+ // this process lifetime start with the fresh blob (avoids re-refreshing
730
+ // a token that was just fetched). Not persisted across process restarts.
731
+ if (updatedBlob) {
732
+ process.env.MERCURY_ANTHROPIC_OAUTH_TOKEN = updatedBlob;
733
+ }
734
+ passthroughEnvPairs.push({
735
+ key: "ANTHROPIC_OAUTH_TOKEN",
736
+ value: access,
737
+ });
738
+ }
739
+ }
740
+
741
+ // Check for pi auth file fallback for Anthropic
742
+ const hasAnthropicKey = passthroughEnvPairs.some(
743
+ (p) => p.key === "ANTHROPIC_API_KEY" || p.key === "ANTHROPIC_OAUTH_TOKEN",
744
+ );
745
+ if (
746
+ !hasAnthropicKey &&
747
+ this.config.modelProvider === "anthropic" &&
748
+ authFromPi
749
+ ) {
750
+ passthroughEnvPairs.push({
751
+ key: "ANTHROPIC_OAUTH_TOKEN",
752
+ value: authFromPi,
753
+ });
754
+ }
755
+
756
+ const envPairs = [
757
+ // Internal vars (set by code, not from env)
758
+ { key: "HOME", value: "/home/mercury" },
759
+ {
760
+ key: "PATH",
761
+ value:
762
+ "/home/mercury/.local/bin:/home/mercury/.bun/bin:/usr/local/go/bin:/usr/local/bin:/usr/bin:/bin",
763
+ },
764
+ { key: "PI_CODING_AGENT_DIR", value: "/home/mercury/.pi/agent" },
765
+ { key: "CALLER_ID", value: input.callerId },
766
+ { key: "SPACE_ID", value: input.spaceId },
767
+ {
768
+ key: "API_URL",
769
+ value: `http://${this.resolvedApiHost}:${this.config.port}`,
770
+ },
771
+ // API secret for mrctl auth from inside containers
772
+ { key: "API_SECRET", value: this.config.apiSecret ?? "" },
773
+ // gVisor: inner containers reach the API over a per-agent unix socket
774
+ // (the outer is off docker0). mrctl uses this transport when set; API_URL
775
+ // host/port above are then ignored. Absent for runc/local.
776
+ ...(this.config.containerRuntime === "runsc"
777
+ ? [{ key: "API_SOCKET", value: innerApiSocketPath() }]
778
+ : []),
779
+ // Passthrough vars (MERCURY_* with prefix stripped)
780
+ ...passthroughEnvPairs,
781
+ // Host-resolved model chain (overrides any stale MODEL_CHAIN from passthrough)
782
+ {
783
+ key: "MODEL_CHAIN",
784
+ value: JSON.stringify(this.config.resolvedModelChain),
785
+ },
786
+ {
787
+ key: "MODEL_RETRY_MAX_PER_LEG",
788
+ value: String(this.config.modelMaxRetriesPerLeg),
789
+ },
790
+ {
791
+ key: "MODEL_CHAIN_BUDGET_MS",
792
+ value: String(this.config.effectiveModelChainBudgetMs),
793
+ },
794
+ {
795
+ key: "MODEL_CHAIN_CAPABILITIES",
796
+ value: JSON.stringify(this.config.resolvedModelChainCapabilities),
797
+ },
798
+ {
799
+ key: "OVERRIDE_PI_SYSTEM_PROMPT",
800
+ value: this.config.overridePiSystemPrompt ? "true" : "false",
801
+ },
802
+ ].filter((x): x is { key: string; value: string } => Boolean(x.value));
803
+
804
+ const containerName = this.generateContainerName();
805
+
806
+ // Resolve docs paths for self-documenting agent
807
+ const docsDir = path.resolve(PACKAGE_ROOT, "docs");
808
+ const readmePath = path.resolve(PACKAGE_ROOT, "README.md");
809
+
810
+ // In cloud deployments the outer container runs with a Docker named volume at
811
+ // config.globalDir / config.spacesDir. When those paths are passed as bind-mount
812
+ // sources to the host Docker daemon (via the Docker socket), the daemon treats them
813
+ // as HOST filesystem paths — a different directory from the volume. Setting
814
+ // MERCURY_HOST_DATA_DIR to the volume's actual host-side mountpoint
815
+ // (/var/lib/docker/volumes/<name>/_data) lets inner containers mount the same data
816
+ // the outer container reads and writes. Falls back to config paths for local dev
817
+ // where no named volume is in use.
818
+ const hostDataDir = process.env.MERCURY_HOST_DATA_DIR;
819
+ const innerGlobalDir = hostDataDir
820
+ ? path.join(hostDataDir, "global")
821
+ : globalDir;
822
+ const innerSpacesRoot = hostDataDir
823
+ ? path.join(hostDataDir, "spaces")
824
+ : spacesRoot;
825
+
826
+ // Mount only the specific space directory for isolation
827
+ const spaceDir = path.resolve(spacesRoot, input.spaceId);
828
+ const innerSpaceDir = path.join(innerSpacesRoot, input.spaceId);
829
+ fs.mkdirSync(spaceDir, { recursive: true });
830
+ try {
831
+ execFileSync("chown", ["-R", "1000:1000", spaceDir], { stdio: "pipe" });
832
+ } catch {
833
+ logger.warn(
834
+ "chown spaceDir failed (CAP_CHOWN unavailable), falling back to chmod 777",
835
+ { spaceDir },
836
+ );
837
+ try {
838
+ fs.chmodSync(spaceDir, 0o777);
839
+ } catch {
840
+ logger.warn(
841
+ "chmod spaceDir also failed, inner container may lack write access",
842
+ { spaceDir },
843
+ );
844
+ }
845
+ }
846
+
847
+ const agentId = process.env.MERCURY_AGENT_ID;
848
+ // `docker create` (not `run`) and no `-i`: the container is started detached
849
+ // and communicates over the mounted IO dir, never the attach stream. This is
850
+ // the only launch shape that survives the Bun body-proxy on the cloud agent
851
+ // lane (it cannot proxy Docker's hijacked attach connection).
852
+ const args = [
853
+ "create",
854
+ "--rm",
855
+ "--name",
856
+ containerName,
857
+ "--label",
858
+ CONTAINER_LABEL,
859
+ ...(agentId ? ["--label", `${AGENT_ID_LABEL_KEY}=${agentId}`] : []),
860
+ ];
861
+
862
+ if (
863
+ this.config.containerNetwork &&
864
+ this.config.containerRuntime !== "runsc"
865
+ ) {
866
+ // runc: join the shared network so inner containers can resolve the
867
+ // outer container by DNS name and reach external APIs.
868
+ args.push("--network", this.config.containerNetwork);
869
+ } else {
870
+ // Default bridge (no --network flag). gVisor always lands here because
871
+ // user-defined Docker networks break gVisor's outbound DNS (Docker's
872
+ // embedded resolver at 127.0.0.11 is unreachable from gVisor). Inner
873
+ // containers keep docker0 for outbound DNS/HTTPS; the inner→outer API
874
+ // callback rides the per-agent unix socket (API_SOCKET), not docker0.
875
+ args.push("--add-host", "host.docker.internal:host-gateway");
876
+ }
877
+
878
+ // Per-message IO dir — the detached reply channel. Mirrors the global/spaces
879
+ // host-path translation: the host bind source must live under the agent's own
880
+ // data volume (`hostDataDir`) so it satisfies the body-proxy's RW-bind
881
+ // allowlist (`/var/lib/docker/volumes/mercury-<agentId>-data/...`). The outer
882
+ // writes input.json here; the inner writes result.json back.
883
+ const ioLocalDir = path.join(
884
+ resolveProjectPath(this.config.dataDir),
885
+ "io",
886
+ containerName,
887
+ );
888
+ const ioHostDir = hostDataDir
889
+ ? path.join(hostDataDir, "io", containerName)
890
+ : ioLocalDir;
891
+
892
+ args.push(
893
+ "-v",
894
+ `${innerSpaceDir}:/spaces/${input.spaceId}`,
895
+ "-v",
896
+ `${innerGlobalDir}:/home/mercury/.pi/agent`,
897
+ "-v",
898
+ `${readmePath}:/docs/mercury/README.md:ro`,
899
+ "-v",
900
+ `${docsDir}:/docs/mercury/docs:ro`,
901
+ "-v",
902
+ `${ioHostDir}:${INNER_IO_DIR}`,
903
+ "-e",
904
+ `IO_DIR=${INNER_IO_DIR}`,
905
+ );
906
+
907
+ if (this.config.containerRuntime === "runsc") {
908
+ // Mount the per-agent run dir so the inner container can reach the outer's
909
+ // API unix socket (api-<hostname>.sock, created in main.ts). Mirrors the
910
+ // global/spaces host-path translation: the host-side source is the data
911
+ // volume's run dir, exposed at /run/mercury inside the inner container.
912
+ // Resolve dataDir with the same helper main.ts uses to create the socket,
913
+ // so the bind source and the listener never disagree on the run-dir path.
914
+ const localRunDir = apiSocketDir(resolveProjectPath(this.config.dataDir));
915
+ const innerRunDir = hostDataDir ? apiSocketDir(hostDataDir) : localRunDir;
916
+ // Ensure the host bind source exists (main.ts created it at startup; this
917
+ // guards against config drift). Created in the in-container data dir, which
918
+ // is the same volume the host path resolves to.
919
+ fs.mkdirSync(localRunDir, { recursive: true });
920
+ args.push("-v", `${innerRunDir}:${INNER_RUN_DIR}`);
921
+ // gVisor: intercepts all syscalls at a user-space kernel boundary — no bwrap needed.
922
+ // Restores full Docker hardening (SYS_ADMIN relaxation not required).
923
+ // CONTAINER_RUNTIME=runsc is passed explicitly (stripped prefix) so container-entry
924
+ // skips the bwrap spawn path.
925
+ args.push(
926
+ "--runtime=runsc",
927
+ "--cap-drop=ALL",
928
+ "--security-opt=no-new-privileges",
929
+ "--memory=2g",
930
+ "--cpus=2",
931
+ "--pids-limit=512",
932
+ "-e",
933
+ "CONTAINER_RUNTIME=runsc",
934
+ );
935
+ } else if (this.config.containerBwrapDockerCompat) {
936
+ // runc + bwrap: bubblewrap needs extra namespace syscalls that Docker's default
937
+ // seccomp/caps/AppArmor block. seccomp=unconfined allows unshare; apparmor=unconfined
938
+ // allows mount(MS_SLAVE); SYS_ADMIN grants the mount capability. Bwrap remains active
939
+ // inside the container; only the outer Docker layer is relaxed.
940
+ args.push(
941
+ "--security-opt",
942
+ "seccomp=unconfined",
943
+ "--security-opt",
944
+ "apparmor=unconfined",
945
+ "--cap-add",
946
+ "SYS_ADMIN",
947
+ );
948
+ }
949
+
950
+ for (const { key, value } of envPairs) {
951
+ args.push("-e", `${key}=${value}`);
952
+ }
953
+
954
+ // Extension env vars from before_container hooks
955
+ if (input.extraEnv) {
956
+ for (const [key, value] of Object.entries(input.extraEnv)) {
957
+ args.push("-e", `${key}=${value}`);
958
+ }
959
+ }
960
+
961
+ const buildingNow = this.buildState?.building ?? false;
962
+ const spawnImage = this.image;
963
+ if (buildingNow) {
964
+ logger.info("Ext image still building, spawning with base image", {
965
+ image: spawnImage,
966
+ });
967
+ }
968
+ args.push(spawnImage);
969
+
970
+ // Per-run nonce — retained in the payload for the inner container's legacy
971
+ // stdout-marker fallback (used only for direct/manual attach against a real
972
+ // daemon; the detached cloud path reads result.json instead).
973
+ const nonce = randomBytes(8).toString("hex");
974
+
975
+ const payload = {
976
+ ...input,
977
+ messages: input.messages,
978
+ anchorMessages: input.anchorMessages,
979
+ spaceWorkspace: input.spaceWorkspace
980
+ .replace(spacesRoot, "/spaces")
981
+ .replaceAll("\\", "/"),
982
+ callerRole: input.callerRole ?? "member",
983
+ authorName: input.authorName,
984
+ nonce,
985
+ };
986
+
987
+ // Create child logger with context for this container run
988
+ const log: Logger = logger.child({
989
+ spaceId: input.spaceId,
990
+ container: containerName,
991
+ });
992
+
993
+ const startTime = Date.now();
994
+
995
+ // Stage the request payload where the inner container will read it, and make
996
+ // the dir writable by the inner uid (1000) so it can drop result.json back.
997
+ fs.mkdirSync(ioLocalDir, { recursive: true });
998
+ fs.writeFileSync(
999
+ path.join(ioLocalDir, "input.json"),
1000
+ JSON.stringify(payload),
1001
+ );
1002
+ try {
1003
+ execFileSync("chown", ["-R", "1000:1000", ioLocalDir], { stdio: "pipe" });
1004
+ } catch {
1005
+ try {
1006
+ fs.chmodSync(ioLocalDir, 0o777);
1007
+ } catch {
1008
+ logger.warn("chown/chmod ioDir failed; inner may not write result", {
1009
+ ioLocalDir,
1010
+ });
1011
+ }
1012
+ }
1013
+
1014
+ const resultPath = path.join(ioLocalDir, "result.json");
1015
+ const cleanupIo = () => {
1016
+ try {
1017
+ fs.rmSync(ioLocalDir, { recursive: true, force: true });
1018
+ } catch {
1019
+ // best effort — orphaned IO dirs are harmless and small
1020
+ }
1021
+ };
1022
+
1023
+ // Create the container (detached). `docker create` is where a pruned/missing
1024
+ // image surfaces (exit 125, "No such image"/"Unable to find image"), so this
1025
+ // error shape feeds replyWithRetry's rebuild-and-retry path unchanged.
1026
+ const created = await execDocker(args);
1027
+ if (created.code !== 0) {
1028
+ cleanupIo();
1029
+ const output = created.timedOut
1030
+ ? `docker create timed out after ${Math.round(EXEC_DOCKER_TIMEOUT_MS / 1000)}s — Docker daemon may be unresponsive`
1031
+ : created.stderr ||
1032
+ created.stdout ||
1033
+ `docker create exited with code ${created.code} (no output)`;
1034
+ log.error("docker create failed", {
1035
+ exitCode: created.code,
1036
+ timedOut: created.timedOut,
1037
+ output,
1038
+ });
1039
+ throw ContainerError.error(created.code, output);
1040
+ }
1041
+
1042
+ this.runningBySpace.set(input.spaceId, { containerName });
1043
+ const deadline = startTime + this.config.containerTimeoutMs;
1044
+
1045
+ try {
1046
+ // Start detached — no `-a`/attach, so the body-proxy only sees
1047
+ // POST /containers/<id>/start (plain request/response). Returns immediately.
1048
+ const started = await execDocker(["start", containerName]);
1049
+ if (started.code !== 0) {
1050
+ const output = started.timedOut
1051
+ ? `docker start timed out after ${Math.round(EXEC_DOCKER_TIMEOUT_MS / 1000)}s — Docker daemon may be unresponsive`
1052
+ : started.stderr ||
1053
+ started.stdout ||
1054
+ `docker start exited with code ${started.code} (no output)`;
1055
+ log.error("docker start failed", {
1056
+ exitCode: started.code,
1057
+ timedOut: started.timedOut,
1058
+ containerName,
1059
+ output,
1060
+ });
1061
+ throw ContainerError.error(started.code, output);
1062
+ }
1063
+ log.info("Container started", { event: "container.start" });
1064
+
1065
+ // Poll the mounted result file. The inner container writes result.json
1066
+ // atomically (tmp + rename) on every outcome, so its presence means a
1067
+ // complete payload. A periodic `docker inspect` fails fast if the container
1068
+ // died without writing one (hard crash / OOM) instead of waiting out the
1069
+ // full timeout.
1070
+ let iter = 0;
1071
+ while (true) {
1072
+ if (fs.existsSync(resultPath)) {
1073
+ return this.consumeResult(resultPath, input, startTime, log);
1074
+ }
1075
+
1076
+ if (this.timedOutSpaces.has(input.spaceId) || Date.now() >= deadline) {
1077
+ this.timedOutSpaces.delete(input.spaceId);
1078
+ await execDocker(["kill", containerName]);
1079
+ // The kill loses the race against a just-written result occasionally.
1080
+ if (fs.existsSync(resultPath)) {
1081
+ return this.consumeResult(resultPath, input, startTime, log);
1082
+ }
1083
+ log.warn("Container exited", {
1084
+ event: "container.end",
1085
+ durationMs: Date.now() - startTime,
1086
+ reason: "timeout",
1087
+ });
1088
+ throw ContainerError.timeout(input.spaceId);
1089
+ }
1090
+
1091
+ if (this.abortedSpaces.has(input.spaceId)) {
1092
+ this.abortedSpaces.delete(input.spaceId);
1093
+ await execDocker(["kill", containerName]);
1094
+ log.info("Container exited", {
1095
+ event: "container.end",
1096
+ durationMs: Date.now() - startTime,
1097
+ reason: "aborted",
1098
+ });
1099
+ throw ContainerError.aborted(input.spaceId);
1100
+ }
1101
+
1102
+ if (++iter % LIVENESS_EVERY === 0) {
1103
+ const crash = await this.detectCrash(
1104
+ containerName,
1105
+ resultPath,
1106
+ input.spaceId,
1107
+ );
1108
+ if (crash) {
1109
+ log.error("Container exited", {
1110
+ event: "container.end",
1111
+ exitCode: crash.exitCode,
1112
+ durationMs: Date.now() - startTime,
1113
+ reason: crash.reason,
1114
+ });
1115
+ throw crash;
1116
+ }
1117
+ // detectCrash may have observed result.json appear during its grace wait
1118
+ if (fs.existsSync(resultPath)) {
1119
+ return this.consumeResult(resultPath, input, startTime, log);
1120
+ }
1121
+ }
1122
+
1123
+ await sleep(RESULT_POLL_MS);
1124
+ }
1125
+ } finally {
1126
+ this.runningBySpace.delete(input.spaceId);
1127
+ cleanupIo();
1128
+ }
1129
+ }
1130
+
1131
+ /**
1132
+ * Parse the inner container's result file and build the ContainerResult.
1133
+ * `{ ok: false }` means the container caught its own failure and reported it;
1134
+ * surface it as an error so callers see the real message rather than a generic
1135
+ * crash.
1136
+ */
1137
+ private consumeResult(
1138
+ resultPath: string,
1139
+ input: { spaceId: string; spaceWorkspace: string },
1140
+ startTime: number,
1141
+ log: Logger,
1142
+ ): ContainerResult {
1143
+ let parsed: {
1144
+ ok?: boolean;
1145
+ reply?: string;
1146
+ usage?: TokenUsage;
1147
+ error?: string;
1148
+ };
1149
+ try {
1150
+ parsed = JSON.parse(fs.readFileSync(resultPath, "utf8"));
1151
+ } catch (e) {
1152
+ throw new Error(
1153
+ `Malformed container result: ${e instanceof Error ? e.message : String(e)}`,
1154
+ );
1155
+ }
1156
+
1157
+ if (parsed.ok === false) {
1158
+ throw ContainerError.error(
1159
+ 1,
1160
+ parsed.error ?? "container reported failure",
1161
+ );
1162
+ }
1163
+
1164
+ log.info("Container exited", {
1165
+ event: "container.end",
1166
+ exitCode: 0,
1167
+ durationMs: Date.now() - startTime,
1168
+ });
1169
+
1170
+ const replyText = parsed.reply ?? "Done.";
1171
+ const files = scanOutbox(input.spaceWorkspace, startTime);
1172
+ return { reply: replyText, files, usage: parsed.usage };
1173
+ }
1174
+
1175
+ /**
1176
+ * Liveness probe used while polling for the result file. Returns a
1177
+ * ContainerError when the inner container is gone/exited without producing a
1178
+ * result (a hard crash the container couldn't catch — e.g. OOM kill, gVisor
1179
+ * panic), or `null` if it's still running. Grants a short grace so the
1180
+ * exit→result-write→--rm-reap race resolves in favour of a real result.
1181
+ */
1182
+ private async detectCrash(
1183
+ containerName: string,
1184
+ resultPath: string,
1185
+ spaceId: string,
1186
+ ): Promise<ContainerError | null> {
1187
+ const insp = await execDocker([
1188
+ "inspect",
1189
+ "-f",
1190
+ "{{.State.Status}}|{{.State.ExitCode}}|{{.State.OOMKilled}}",
1191
+ containerName,
1192
+ ]);
1193
+
1194
+ // Container still present and running — no crash.
1195
+ if (insp.code === 0 && insp.stdout.trim().startsWith("running")) {
1196
+ return null;
1197
+ }
1198
+
1199
+ // Either inspect 404'd (--rm already reaped an exited container) or the
1200
+ // container is in a terminal state. Give the result file a moment to land.
1201
+ await sleep(RESULT_POLL_MS);
1202
+ if (fs.existsSync(resultPath)) return null;
1203
+
1204
+ if (insp.code !== 0) {
1205
+ // Reaped without a result — exit code is unrecoverable post-reap.
1206
+ return ContainerError.error(
1207
+ 1,
1208
+ "inner container exited without producing a result (possible crash)",
1209
+ );
1210
+ }
1211
+
1212
+ const [, exitStr, oom] = insp.stdout.trim().split("|");
1213
+ const exitCode = Number.parseInt(exitStr ?? "1", 10) || 1;
1214
+ if (oom === "true" || exitCode === OOM_EXIT_CODE) {
1215
+ return ContainerError.oom(spaceId, exitCode);
1216
+ }
1217
+ return ContainerError.error(
1218
+ exitCode,
1219
+ "inner container exited without producing a result",
1220
+ );
1221
+ }
1222
+
1223
+ /**
1224
+ * Spawn a container for a reply, with automatic recovery if the derived ext
1225
+ * image was pruned by a rolling deploy.
1226
+ *
1227
+ * Docker returns exit code 125 with "No such image" or "Unable to find image"
1228
+ * in stderr when an image that existed at build time has since been pruned.
1229
+ * On that specific error we trigger a background rebuild and immediately retry
1230
+ * with the base image so the current message is not dropped.
1231
+ *
1232
+ * rebuild() synchronously resets resolvedImage → baseImage before its first
1233
+ * await, so by the time we call reply() again this.image already returns the
1234
+ * base image. The rebuild completes in the background; subsequent spawns use
1235
+ * the fresh derived image once it is ready.
1236
+ */
1237
+ async replyWithRetry(
1238
+ input: Parameters<AgentContainerRunner["reply"]>[0],
1239
+ ): Promise<ContainerResult> {
1240
+ try {
1241
+ return await this.reply(input);
1242
+ } catch (err) {
1243
+ if (
1244
+ err instanceof ContainerError &&
1245
+ err.reason === "error" &&
1246
+ err.exitCode === 125 &&
1247
+ this.buildState &&
1248
+ (err.message.includes("No such image") ||
1249
+ err.message.includes("Unable to find image"))
1250
+ ) {
1251
+ // Capture before rebuild() resets resolvedImage to baseImage
1252
+ const missingImage = this.buildState.currentImage();
1253
+ // Fire rebuild without awaiting — rebuild() synchronously resets
1254
+ // resolvedImage to baseImage before its first internal await, so the
1255
+ // retry below immediately uses the base image rather than blocking for
1256
+ // the full ~4-minute Playwright build and timing out the connection.
1257
+ void this.buildState.rebuild().catch((rebuildErr) => {
1258
+ logger.error(
1259
+ "Unexpected error in background ext image rebuild",
1260
+ rebuildErr instanceof Error ? rebuildErr : undefined,
1261
+ );
1262
+ });
1263
+ logger.warn(
1264
+ "Ext image missing (pruned by rolling deploy?), triggering background rebuild and retrying with base image",
1265
+ { image: missingImage },
1266
+ );
1267
+ return await this.reply(input);
1268
+ }
1269
+ throw err;
1270
+ }
1271
+ }
1272
+ }