@agentstep/agent-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/package.json +45 -0
  2. package/src/auth/middleware.ts +38 -0
  3. package/src/backends/claude/args.ts +88 -0
  4. package/src/backends/claude/index.ts +193 -0
  5. package/src/backends/claude/permission-hook.ts +152 -0
  6. package/src/backends/claude/tool-bridge.ts +211 -0
  7. package/src/backends/claude/translator.ts +209 -0
  8. package/src/backends/claude/wrapper-script.ts +45 -0
  9. package/src/backends/codex/args.ts +69 -0
  10. package/src/backends/codex/auth.ts +35 -0
  11. package/src/backends/codex/index.ts +57 -0
  12. package/src/backends/codex/setup.ts +37 -0
  13. package/src/backends/codex/translator.ts +223 -0
  14. package/src/backends/codex/wrapper-script.ts +26 -0
  15. package/src/backends/factory/args.ts +45 -0
  16. package/src/backends/factory/auth.ts +30 -0
  17. package/src/backends/factory/index.ts +56 -0
  18. package/src/backends/factory/setup.ts +34 -0
  19. package/src/backends/factory/translator.ts +139 -0
  20. package/src/backends/factory/wrapper-script.ts +33 -0
  21. package/src/backends/gemini/args.ts +44 -0
  22. package/src/backends/gemini/auth.ts +30 -0
  23. package/src/backends/gemini/index.ts +53 -0
  24. package/src/backends/gemini/setup.ts +34 -0
  25. package/src/backends/gemini/translator.ts +139 -0
  26. package/src/backends/gemini/wrapper-script.ts +26 -0
  27. package/src/backends/opencode/args.ts +53 -0
  28. package/src/backends/opencode/auth.ts +53 -0
  29. package/src/backends/opencode/index.ts +70 -0
  30. package/src/backends/opencode/mcp.ts +67 -0
  31. package/src/backends/opencode/setup.ts +54 -0
  32. package/src/backends/opencode/translator.ts +168 -0
  33. package/src/backends/opencode/wrapper-script.ts +46 -0
  34. package/src/backends/registry.ts +38 -0
  35. package/src/backends/shared/ndjson.ts +29 -0
  36. package/src/backends/shared/translator-types.ts +69 -0
  37. package/src/backends/shared/wrap-prompt.ts +17 -0
  38. package/src/backends/types.ts +85 -0
  39. package/src/config/index.ts +95 -0
  40. package/src/db/agents.ts +185 -0
  41. package/src/db/api_keys.ts +78 -0
  42. package/src/db/batch.ts +142 -0
  43. package/src/db/client.ts +81 -0
  44. package/src/db/environments.ts +127 -0
  45. package/src/db/events.ts +208 -0
  46. package/src/db/memory.ts +143 -0
  47. package/src/db/migrations.ts +295 -0
  48. package/src/db/proxy.ts +37 -0
  49. package/src/db/sessions.ts +295 -0
  50. package/src/db/vaults.ts +110 -0
  51. package/src/errors.ts +53 -0
  52. package/src/handlers/agents.ts +194 -0
  53. package/src/handlers/batch.ts +41 -0
  54. package/src/handlers/docs.ts +87 -0
  55. package/src/handlers/environments.ts +154 -0
  56. package/src/handlers/events.ts +234 -0
  57. package/src/handlers/index.ts +12 -0
  58. package/src/handlers/memory.ts +141 -0
  59. package/src/handlers/openapi.ts +14 -0
  60. package/src/handlers/sessions.ts +223 -0
  61. package/src/handlers/stream.ts +76 -0
  62. package/src/handlers/threads.ts +26 -0
  63. package/src/handlers/ui/app.js +984 -0
  64. package/src/handlers/ui/index.html +112 -0
  65. package/src/handlers/ui/style.css +164 -0
  66. package/src/handlers/ui.ts +1281 -0
  67. package/src/handlers/vaults.ts +99 -0
  68. package/src/http.ts +35 -0
  69. package/src/index.ts +104 -0
  70. package/src/init.ts +227 -0
  71. package/src/openapi/registry.ts +8 -0
  72. package/src/openapi/schemas.ts +625 -0
  73. package/src/openapi/spec.ts +691 -0
  74. package/src/providers/apple.ts +220 -0
  75. package/src/providers/daytona.ts +217 -0
  76. package/src/providers/docker.ts +264 -0
  77. package/src/providers/e2b.ts +203 -0
  78. package/src/providers/fly.ts +276 -0
  79. package/src/providers/modal.ts +222 -0
  80. package/src/providers/podman.ts +206 -0
  81. package/src/providers/registry.ts +28 -0
  82. package/src/providers/shared.ts +11 -0
  83. package/src/providers/sprites.ts +55 -0
  84. package/src/providers/types.ts +73 -0
  85. package/src/providers/vercel.ts +208 -0
  86. package/src/proxy/forward.ts +111 -0
  87. package/src/queue/index.ts +111 -0
  88. package/src/sessions/actor.ts +53 -0
  89. package/src/sessions/bus.ts +155 -0
  90. package/src/sessions/driver.ts +818 -0
  91. package/src/sessions/grader.ts +120 -0
  92. package/src/sessions/interrupt.ts +14 -0
  93. package/src/sessions/sweeper.ts +136 -0
  94. package/src/sessions/threads.ts +126 -0
  95. package/src/sessions/tools.ts +50 -0
  96. package/src/shutdown.ts +78 -0
  97. package/src/sprite/client.ts +294 -0
  98. package/src/sprite/exec.ts +161 -0
  99. package/src/sprite/lifecycle.ts +339 -0
  100. package/src/sprite/pool.ts +65 -0
  101. package/src/sprite/setup.ts +159 -0
  102. package/src/state.ts +61 -0
  103. package/src/types.ts +339 -0
  104. package/src/util/clock.ts +7 -0
  105. package/src/util/ids.ts +11 -0
@@ -0,0 +1,818 @@
1
+ /**
2
+ * Turn driver — orchestrates one turn of a backend CLI against a sprite.
3
+ *
4
+ * Backend-agnostic: resolves the agent's `backend` field via the registry
5
+ * and delegates argv/env/stdin construction + stream translation to the
6
+ * concrete backend (claude, opencode, ...). The driver owns sprite
7
+ * acquisition, the stream loop, event bus appends, and usage persistence.
8
+ *
9
+ * Flow:
10
+ * 1. Validate runtime config for the backend (fail fast before sprite).
11
+ * 2. Lazy-acquire the sprite if the session has none yet.
12
+ * 3. Mark each pending user.message as processed (processed_at = now).
13
+ * 4. Flip session status to "running", append `session.status_running`
14
+ * and `span.model_request_start`.
15
+ * 5. Call backend.buildTurn → argv + env + stdin. Compose the wrapper
16
+ * stdin body as `envLines \n\n stdin`.
17
+ * 6. Spawn exec on the sprite.
18
+ * 7. Stream NDJSON through the translator, batch-append events per chunk.
19
+ * 8. On successful exit: append `span.model_request_end`, update columnar
20
+ * usage, persist the latest backend session id, append
21
+ * `session.status_idle`.
22
+ * 9. On abort: append `session.status_idle{stop_reason:"interrupted"}`.
23
+ * 10. On error: append `session.error` + flip to idle/error.
24
+ * 11. Drain any `pendingUserInputs` accumulated during the turn — they
25
+ * are enqueued as the next turn.
26
+ */
27
+ import { appendEventsBatch, appendEvent } from "./bus";
28
+ import { getRuntime, drainPendingUserInputs, type TurnInput } from "../state";
29
+ import { getSession, setBackendSessionId, updateSessionStatus, updateSessionMutable, bumpSessionStats, setIdleSince, getSessionRow, getOutcomeCriteria, setOutcomeCriteria } from "../db/sessions";
30
+ import { getAgent } from "../db/agents";
31
+ import { getEnvironment } from "../db/environments";
32
+ import { markUserEventProcessed, listEvents } from "../db/events";
33
+ import { acquireForFirstTurn } from "../sprite/lifecycle";
34
+ import { resolveBackend } from "../backends/registry";
35
+ import { resolveContainerProvider } from "../providers/registry";
36
+ import { listEntries as listVaultEntries } from "../db/vaults";
37
+ import { parseNDJSONLines } from "../backends/shared/ndjson";
38
+ import type { TranslatedEvent } from "../backends/shared/translator-types";
39
+ import type { Agent } from "../types";
40
+ import type { ContainerProvider } from "../providers/types";
41
+ import { resolveToolset } from "./tools";
42
+ import { ApiError } from "../errors";
43
+ import { nowMs } from "../util/clock";
44
+ import {
45
+ PERMISSION_BRIDGE_PENDING_PATH,
46
+ PERMISSION_BRIDGE_REQUEST_PATH,
47
+ PERMISSION_BRIDGE_RESPONSE_PATH,
48
+ } from "../backends/claude/permission-hook";
49
+
50
+ export async function runTurn(
51
+ sessionId: string,
52
+ inputs: TurnInput[],
53
+ _depth = 0,
54
+ ): Promise<void> {
55
+ if (_depth > 25) {
56
+ appendEvent(sessionId, { type: "session.error", payload: { error: { type: "server_error", message: "max recursion depth exceeded" } }, origin: "server", processedAt: nowMs() });
57
+ updateSessionStatus(sessionId, "idle", "error");
58
+ return;
59
+ }
60
+ const session = getSession(sessionId);
61
+ if (!session) return; // session was deleted between enqueue and run
62
+ if (inputs.length === 0) return;
63
+
64
+ const agent = getAgent(session.agent.id, session.agent.version);
65
+ if (!agent) {
66
+ appendEvent(sessionId, {
67
+ type: "session.error",
68
+ payload: { error: { type: "server_error", message: "agent not found" } },
69
+ origin: "server",
70
+ processedAt: nowMs(),
71
+ });
72
+ appendEvent(sessionId, {
73
+ type: "session.status_idle",
74
+ payload: { stop_reason: "error" },
75
+ origin: "server",
76
+ processedAt: nowMs(),
77
+ });
78
+ updateSessionStatus(sessionId, "idle", "error");
79
+ return;
80
+ }
81
+
82
+ const backend = resolveBackend(agent.backend);
83
+
84
+ // Belt-and-braces runtime validation. Config may have changed since the
85
+ // agent was created (env vars cleared, settings table mutated). Fail fast
86
+ // BEFORE sprite acquire / install so a 3-minute install isn't wasted on a
87
+ // misconfigured backend.
88
+ // Skip if session has vault entries — they provide keys at container level.
89
+ let hasVaultKeys = false;
90
+ if (session.vault_ids && session.vault_ids.length > 0) {
91
+ console.log(`[driver] session ${sessionId} has vault_ids:`, session.vault_ids);
92
+ for (const vid of session.vault_ids) {
93
+ const entries = listVaultEntries(vid);
94
+ console.log(`[driver] vault ${vid} has ${entries.length} entries`);
95
+ if (entries.length > 0) { hasVaultKeys = true; break; }
96
+ }
97
+ } else {
98
+ console.log(`[driver] session ${sessionId} has no vault_ids`);
99
+ }
100
+ if (!hasVaultKeys) {
101
+ const runtimeErr = backend.validateRuntime?.();
102
+ if (runtimeErr) {
103
+ appendEvent(sessionId, {
104
+ type: "session.error",
105
+ payload: { error: { type: "invalid_request_error", message: runtimeErr } },
106
+ origin: "server",
107
+ processedAt: nowMs(),
108
+ });
109
+ appendEvent(sessionId, {
110
+ type: "session.status_idle",
111
+ payload: { stop_reason: "error" },
112
+ origin: "server",
113
+ processedAt: nowMs(),
114
+ });
115
+ updateSessionStatus(sessionId, "idle", "error");
116
+ return;
117
+ }
118
+ }
119
+
120
+ // Budget check: if max_budget_usd is set and usage has exceeded it, refuse the turn
121
+ const budgetRow = getSessionRow(sessionId);
122
+ if (budgetRow?.max_budget_usd != null && budgetRow.usage_cost_usd >= budgetRow.max_budget_usd) {
123
+ appendEvent(sessionId, {
124
+ type: "session.error",
125
+ payload: { error: { type: "budget_exceeded", message: `usage $${budgetRow.usage_cost_usd.toFixed(4)} >= budget $${budgetRow.max_budget_usd.toFixed(4)}` } },
126
+ origin: "server",
127
+ processedAt: nowMs(),
128
+ });
129
+ appendEvent(sessionId, {
130
+ type: "session.status_idle",
131
+ payload: { stop_reason: "error" },
132
+ origin: "server",
133
+ processedAt: nowMs(),
134
+ });
135
+ updateSessionStatus(sessionId, "idle", "error");
136
+ return;
137
+ }
138
+
139
+ // Mark each pending input as processed-now
140
+ for (const p of inputs) markUserEventProcessed(p.eventId, nowMs());
141
+
142
+ // Acquire sprite if needed
143
+ let spriteName: string;
144
+ try {
145
+ spriteName = await acquireForFirstTurn(sessionId);
146
+ } catch (err) {
147
+ const msg = err instanceof Error ? err.message : String(err);
148
+ appendEvent(sessionId, {
149
+ type: "session.error",
150
+ payload: { error: { type: "server_error", message: `container creation failed: ${msg}` } },
151
+ origin: "server",
152
+ processedAt: nowMs(),
153
+ });
154
+ appendEvent(sessionId, {
155
+ type: "session.status_idle",
156
+ payload: { stop_reason: "error" },
157
+ origin: "server",
158
+ processedAt: nowMs(),
159
+ });
160
+ updateSessionStatus(sessionId, "idle", "error");
161
+ return;
162
+ }
163
+
164
+ // Flip running + emit status_running + span start
165
+ updateSessionStatus(sessionId, "running");
166
+ const turnStartMs = nowMs();
167
+ appendEvent(sessionId, {
168
+ type: "session.status_running",
169
+ payload: {},
170
+ origin: "server",
171
+ processedAt: turnStartMs,
172
+ });
173
+ appendEvent(sessionId, {
174
+ type: "span.model_request_start",
175
+ payload: { model: agent.model },
176
+ origin: "server",
177
+ processedAt: turnStartMs,
178
+ });
179
+
180
+ // Build argv + env + stdin via the backend. buildTurn may throw an
181
+ // ApiError (e.g. opencode rejects tool_result re-entry) — catch, surface
182
+ // as session.error + status_idle{error}.
183
+ const promptText = inputs
184
+ .filter((i): i is Extract<TurnInput, { kind: "text" }> => i.kind === "text")
185
+ .map((i) => i.text)
186
+ .join("\n\n");
187
+ const toolResults = inputs
188
+ .filter((i): i is Extract<TurnInput, { kind: "tool_result" }> => i.kind === "tool_result")
189
+ .map((i) => ({
190
+ custom_tool_use_id: i.custom_tool_use_id,
191
+ content: i.content,
192
+ }));
193
+
194
+ let turnBuild;
195
+ try {
196
+ turnBuild = backend.buildTurn({
197
+ agent,
198
+ backendSessionId: getSessionRow(sessionId)?.claude_session_id ?? null,
199
+ promptText,
200
+ toolResults,
201
+ });
202
+ } catch (err) {
203
+ const msg =
204
+ err instanceof ApiError
205
+ ? err.message
206
+ : err instanceof Error
207
+ ? err.message
208
+ : String(err);
209
+ const type = err instanceof ApiError ? err.type : "server_error";
210
+ appendEvent(sessionId, {
211
+ type: "session.error",
212
+ payload: { error: { type, message: msg } },
213
+ origin: "server",
214
+ processedAt: nowMs(),
215
+ });
216
+ appendEvent(sessionId, {
217
+ type: "session.status_idle",
218
+ payload: { stop_reason: "error" },
219
+ origin: "server",
220
+ processedAt: nowMs(),
221
+ });
222
+ updateSessionStatus(sessionId, "idle", "error");
223
+ return;
224
+ }
225
+
226
+ const argv = [backend.wrapperPath, ...turnBuild.argv];
227
+
228
+ // Inject RESOURCES_DIR if the session has resources
229
+ const freshSession = getSession(sessionId);
230
+ if (freshSession?.resources && freshSession.resources.length > 0) {
231
+ turnBuild.env.RESOURCES_DIR = "/tmp/resources";
232
+ }
233
+
234
+ // Inject vault entries as env vars (override server defaults)
235
+ // and set VAULT_DIR for JSON file access
236
+ const BLOCKED_ENV_KEYS = new Set([
237
+ "PATH", "HOME", "USER", "SHELL", "LD_PRELOAD", "LD_LIBRARY_PATH",
238
+ "NODE_PATH", "NODE_OPTIONS",
239
+ ]);
240
+ if (freshSession?.vault_ids && freshSession.vault_ids.length > 0) {
241
+ turnBuild.env.VAULT_DIR = "/tmp/vaults";
242
+ for (const vaultId of freshSession.vault_ids) {
243
+ const vaultEntries = listVaultEntries(vaultId);
244
+ for (const entry of vaultEntries) {
245
+ if (!BLOCKED_ENV_KEYS.has(entry.key)) {
246
+ turnBuild.env[entry.key] = entry.value;
247
+ }
248
+ }
249
+ }
250
+ }
251
+
252
+ // Compose the wrapper stdin: env KEY=value lines, blank line, prompt body.
253
+ // Both backends' wrappers read env until blank line; from there claude
254
+ // pipes stdin into `claude`, while opencode captures stdin into $PROMPT
255
+ // and re-passes it as a trailing argv entry to `opencode`.
256
+ const envLines = Object.entries(turnBuild.env)
257
+ .map(([k, v]) => `${k}=${v}`)
258
+ .join("\n");
259
+ const stdin = `${envLines}\n\n${turnBuild.stdin}`;
260
+
261
+ // Resolve the container provider for this session's environment
262
+ const env = getEnvironment(session.environment_id);
263
+ const provider = await resolveContainerProvider(env?.config?.provider);
264
+
265
+ const tools = resolveToolset(agent.tools);
266
+ // If threads are enabled, add spawn_agent to custom tool names so the
267
+ // translator classifies it as a custom tool and emits custom_tool_use events.
268
+ if (agent.threads_enabled) {
269
+ tools.customToolNames.add("spawn_agent");
270
+ }
271
+ const translator = backend.createTranslator({
272
+ customToolNames: tools.customToolNames,
273
+ isFirstTurn: getSessionRow(sessionId)?.claude_session_id == null,
274
+ });
275
+
276
+ // Tool bridge: if this is a custom tool result re-entry on claude backend,
277
+ // write response.json and remove the pending sentinel before --resume.
278
+ if (agent.backend === "claude" && toolResults.length > 0) {
279
+ const { TOOL_BRIDGE_RESPONSE_PATH, TOOL_BRIDGE_PENDING_PATH } = await import("../backends/claude/tool-bridge");
280
+ const spriteName = getSessionRow(sessionId)?.sprite_name;
281
+ if (spriteName) {
282
+ for (const r of toolResults) {
283
+ const responseJson = JSON.stringify({ content: r.content });
284
+ await provider.exec(
285
+ spriteName,
286
+ ["bash", "-c", `cat > ${TOOL_BRIDGE_RESPONSE_PATH}`],
287
+ { stdin: responseJson },
288
+ ).catch((err: unknown) => {
289
+ console.warn(`[driver] failed to write tool bridge response:`, err);
290
+ });
291
+ await provider.exec(
292
+ spriteName,
293
+ ["rm", "-f", TOOL_BRIDGE_PENDING_PATH],
294
+ ).catch(() => {});
295
+ }
296
+ }
297
+ }
298
+
299
+ const runtime = getRuntime();
300
+ const controller = new AbortController();
301
+ runtime.inFlightRuns.set(sessionId, {
302
+ sessionId,
303
+ controller,
304
+ startedAt: turnStartMs,
305
+ });
306
+
307
+ let exec;
308
+ try {
309
+ exec = await provider.startExec(spriteName, {
310
+ argv,
311
+ stdin,
312
+ signal: controller.signal,
313
+ });
314
+ } catch (err) {
315
+ runtime.inFlightRuns.delete(sessionId);
316
+ const msg = err instanceof Error ? err.message : String(err);
317
+ appendEvent(sessionId, {
318
+ type: "session.error",
319
+ payload: { error: { type: "server_error", message: `exec failed: ${msg}` } },
320
+ origin: "server",
321
+ processedAt: nowMs(),
322
+ });
323
+ appendEvent(sessionId, {
324
+ type: "session.status_idle",
325
+ payload: { stop_reason: "error" },
326
+ origin: "server",
327
+ processedAt: nowMs(),
328
+ });
329
+ updateSessionStatus(sessionId, "idle", "error");
330
+ return;
331
+ }
332
+
333
+ // Stream and translate
334
+ // Strip sprites.dev HTTP exec framing bytes (0x00-0x1F) from the raw stream.
335
+ // These are control chars used for stdout/stderr multiplexing in the HTTP
336
+ // response body. JSON-escaped control chars (like `\u0001` in tool_result
337
+ // payloads) are printable ASCII and are NOT affected by this strip.
338
+ // Docker doesn't add these framing bytes, so stripping is conditional.
339
+ const CONTROL_CHARS = /[\x00-\x08\x0B\x0C\x0E-\x1F]/g;
340
+ let buffer = "";
341
+ let aborted = false;
342
+ let toolCallsInTurn = 0;
343
+
344
+ // Permission confirmation: if the agent has confirmation_mode, start a
345
+ // background poller that checks for /tmp/permission-bridge/pending every
346
+ // 2 seconds during the stream loop. When found, it emits
347
+ // agent.tool_confirmation_request and waits for the client to respond.
348
+ let permissionPollTimer: ReturnType<typeof setInterval> | null = null;
349
+ if (agent.confirmation_mode) {
350
+ permissionPollTimer = setInterval(() => {
351
+ void checkPermissionSentinel(sessionId, spriteName, provider).catch(
352
+ (err: unknown) => {
353
+ console.warn(`[driver] permission sentinel check failed:`, err);
354
+ },
355
+ );
356
+ }, 2000);
357
+ }
358
+
359
+ try {
360
+ const reader = exec.stdout.getReader();
361
+ const decoder = new TextDecoder();
362
+ for (;;) {
363
+ const { done, value } = await reader.read();
364
+ if (done) break;
365
+ const raw = decoder.decode(value, { stream: true });
366
+ buffer += provider.stripControlChars ? raw.replace(CONTROL_CHARS, "") : raw;
367
+
368
+ const batch: TranslatedEvent[] = [];
369
+ buffer = parseNDJSONLines(buffer, (raw) => {
370
+ const translated = translator.translate(raw);
371
+ for (const t of translated) batch.push(t);
372
+ });
373
+ if (batch.length > 0) {
374
+ const batchInputs = batch.map((t) => {
375
+ if (t.type.endsWith("tool_use") || t.type.endsWith("mcp_tool_use") || t.type.endsWith("custom_tool_use")) {
376
+ toolCallsInTurn++;
377
+ }
378
+ return {
379
+ type: t.type,
380
+ payload: t.payload,
381
+ origin: "server" as const,
382
+ processedAt: nowMs(),
383
+ };
384
+ });
385
+ appendEventsBatch(sessionId, batchInputs);
386
+
387
+ // Auto-generate session title from first agent.message text
388
+ for (const t of batch) {
389
+ if (t.type === "agent.message") {
390
+ const row = getSessionRow(sessionId);
391
+ if (row && row.title == null) {
392
+ const content = t.payload.content as Array<{ type: string; text?: string }> | undefined;
393
+ const text = content?.find((c) => c.type === "text" && c.text)?.text;
394
+ if (text) {
395
+ updateSessionMutable(sessionId, { title: text.slice(0, 60) });
396
+ }
397
+ }
398
+ break; // only need the first agent.message
399
+ }
400
+ }
401
+ }
402
+ }
403
+ await exec.exit;
404
+ } catch (err) {
405
+ if (controller.signal.aborted) {
406
+ aborted = true;
407
+ } else {
408
+ const msg = err instanceof Error ? err.message : String(err);
409
+ appendEvent(sessionId, {
410
+ type: "session.error",
411
+ payload: { error: { type: "server_error", message: msg } },
412
+ origin: "server",
413
+ processedAt: nowMs(),
414
+ });
415
+ appendEvent(sessionId, {
416
+ type: "session.status_idle",
417
+ payload: { stop_reason: "error" },
418
+ origin: "server",
419
+ processedAt: nowMs(),
420
+ });
421
+ updateSessionStatus(sessionId, "idle", "error");
422
+ runtime.inFlightRuns.delete(sessionId);
423
+ return;
424
+ }
425
+ } finally {
426
+ if (permissionPollTimer) clearInterval(permissionPollTimer);
427
+ runtime.inFlightRuns.delete(sessionId);
428
+ }
429
+
430
+ if (aborted) {
431
+ appendEvent(sessionId, {
432
+ type: "session.status_idle",
433
+ payload: { stop_reason: "interrupted" },
434
+ origin: "server",
435
+ processedAt: nowMs(),
436
+ });
437
+ updateSessionStatus(sessionId, "idle", "interrupted");
438
+ setIdleSince(sessionId, nowMs());
439
+ scheduleDrain(sessionId);
440
+ return;
441
+ }
442
+
443
+ // Finish turn: span end + status_idle + stats
444
+ const result = translator.getTurnResult();
445
+ const backendSid = translator.getBackendSessionId();
446
+ if (backendSid) setBackendSessionId(sessionId, backendSid);
447
+
448
+ const turnDurationSec = Math.max(0, (nowMs() - turnStartMs) / 1000);
449
+ bumpSessionStats(
450
+ sessionId,
451
+ {
452
+ turn_count: 1,
453
+ tool_calls_count: toolCallsInTurn,
454
+ duration_seconds: turnDurationSec,
455
+ active_seconds: turnDurationSec,
456
+ },
457
+ result?.usage,
458
+ );
459
+
460
+ const now = nowMs();
461
+ const stopReason = result?.stopReason ?? "end_turn";
462
+
463
+ // Multi-agent threads: if stop_reason is custom_tool_call and the tool is
464
+ // spawn_agent, intercept and delegate to the thread orchestrator. The result
465
+ // is written back as a tool result and the turn is re-run automatically.
466
+ if (stopReason === "custom_tool_call") {
467
+ const serverToolResult = await handleServerSideTool(sessionId, agent);
468
+ if (serverToolResult) {
469
+ // spawn_agent was handled — the thread orchestrator already wrote the
470
+ // result back. Re-run the turn with the tool result to continue.
471
+ appendEvent(sessionId, {
472
+ type: "span.model_request_end",
473
+ payload: { model: agent.model, model_usage: result?.usage ?? null },
474
+ origin: "server",
475
+ processedAt: now,
476
+ });
477
+ appendEvent(sessionId, {
478
+ type: "session.status_idle",
479
+ payload: { stop_reason: "custom_tool_call" },
480
+ origin: "server",
481
+ processedAt: now,
482
+ });
483
+ updateSessionStatus(sessionId, "idle", "custom_tool_call");
484
+
485
+ // Write the spawn result as response.json into the container
486
+ const { TOOL_BRIDGE_RESPONSE_PATH, TOOL_BRIDGE_PENDING_PATH } = await import("../backends/claude/tool-bridge");
487
+ const sprName = getSessionRow(sessionId)?.sprite_name;
488
+ if (sprName) {
489
+ const responseJson = JSON.stringify({ content: [{ type: "text", text: serverToolResult.text }] });
490
+ const envForSession = getEnvironment(session.environment_id);
491
+ const providerForReentry = await resolveContainerProvider(envForSession?.config?.provider);
492
+ await providerForReentry.exec(
493
+ sprName,
494
+ ["bash", "-c", `cat > ${TOOL_BRIDGE_RESPONSE_PATH}`],
495
+ { stdin: responseJson },
496
+ ).catch((err: unknown) => {
497
+ console.warn(`[driver] failed to write spawn_agent response:`, err);
498
+ });
499
+ await providerForReentry.exec(
500
+ sprName,
501
+ ["rm", "-f", TOOL_BRIDGE_PENDING_PATH],
502
+ ).catch(() => {});
503
+ }
504
+
505
+ // Re-run turn with tool result re-entry
506
+ await runTurn(sessionId, [{
507
+ kind: "tool_result",
508
+ eventId: `server_tool_${nowMs()}`,
509
+ custom_tool_use_id: serverToolResult.toolUseId,
510
+ content: [{ type: "text", text: serverToolResult.text }],
511
+ }], _depth + 1);
512
+ return;
513
+ }
514
+ }
515
+
516
+ appendEvent(sessionId, {
517
+ type: "span.model_request_end",
518
+ payload: {
519
+ model: agent.model,
520
+ model_usage: result?.usage ?? null,
521
+ },
522
+ origin: "server",
523
+ processedAt: now,
524
+ });
525
+
526
+ appendEvent(sessionId, {
527
+ type: "session.status_idle",
528
+ payload: { stop_reason: stopReason },
529
+ origin: "server",
530
+ processedAt: now,
531
+ });
532
+ updateSessionStatus(sessionId, "idle", stopReason);
533
+ setIdleSince(sessionId, now);
534
+
535
+ // Outcome evaluation: if the session has outcome criteria with a rubric,
536
+ // run the grader loop. The grader calls the Anthropic API directly (not
537
+ // claude -p on the container) to avoid corrupting session state.
538
+ if (stopReason === "end_turn") {
539
+ const criteria = getOutcomeCriteria(sessionId) as {
540
+ description?: string;
541
+ rubric?: string;
542
+ max_iterations?: number;
543
+ grader_iteration?: number;
544
+ } | null;
545
+
546
+ if (criteria?.rubric) {
547
+ try {
548
+ const { runGraderEvaluation } = await import("./grader");
549
+ const maxIter = criteria.max_iterations ?? 3;
550
+ const iteration = criteria.grader_iteration ?? 0;
551
+
552
+ // Extract last agent.message text
553
+ const recentEvents = listEvents(sessionId, { limit: 50, order: "desc" });
554
+ let lastAgentText = "";
555
+ for (const evt of recentEvents) {
556
+ if (evt.type === "agent.message") {
557
+ const payload = JSON.parse(evt.payload_json) as { content?: Array<{ type: string; text?: string }> };
558
+ const text = (payload.content ?? [])
559
+ .filter((b) => b.type === "text" && b.text)
560
+ .map((b) => b.text!)
561
+ .join("");
562
+ if (text) { lastAgentText = text; break; }
563
+ }
564
+ }
565
+
566
+ appendEvent(sessionId, {
567
+ type: "span.outcome_evaluation_start",
568
+ payload: { iteration },
569
+ origin: "server",
570
+ processedAt: nowMs(),
571
+ });
572
+
573
+ const evaluation = await runGraderEvaluation(
574
+ criteria.rubric,
575
+ lastAgentText,
576
+ agent.model,
577
+ );
578
+
579
+ // Track grader token usage in session stats
580
+ if (evaluation.usage.input_tokens || evaluation.usage.output_tokens) {
581
+ bumpSessionStats(sessionId, {}, {
582
+ input_tokens: evaluation.usage.input_tokens,
583
+ output_tokens: evaluation.usage.output_tokens,
584
+ cost_usd: 0,
585
+ });
586
+ }
587
+
588
+ // Persist incremented iteration counter
589
+ setOutcomeCriteria(sessionId, {
590
+ ...criteria,
591
+ grader_iteration: iteration + 1,
592
+ });
593
+
594
+ const finalResult = iteration + 1 >= maxIter && evaluation.result === "needs_revision"
595
+ ? "max_iterations_reached"
596
+ : evaluation.result;
597
+
598
+ appendEvent(sessionId, {
599
+ type: "span.outcome_evaluation_end",
600
+ payload: {
601
+ result: finalResult,
602
+ iteration,
603
+ feedback: evaluation.feedback,
604
+ },
605
+ origin: "server",
606
+ processedAt: nowMs(),
607
+ });
608
+
609
+ // Re-run if needs_revision and under the iteration cap
610
+ if (evaluation.result === "needs_revision" && iteration + 1 < maxIter) {
611
+ await runTurn(sessionId, [{
612
+ kind: "text",
613
+ eventId: `grader_feedback_${nowMs()}`,
614
+ text: `[Grader feedback — iteration ${iteration + 1}/${maxIter}]\n\n${evaluation.feedback}`,
615
+ }], _depth + 1);
616
+ return; // recursive runTurn handles the rest
617
+ }
618
+ } catch (err) {
619
+ console.warn(`[driver] outcome evaluation failed for ${sessionId}:`, err);
620
+ }
621
+ }
622
+ }
623
+
624
+ scheduleDrain(sessionId);
625
+ }
626
+
627
+ /**
628
+ * General server-side tool dispatcher. Checks the most recent
629
+ * `agent.custom_tool_use` event and delegates to the matching handler:
630
+ * - `spawn_agent` → thread orchestrator
631
+ * - `memory_*` → memory tool handler (Phase 3)
632
+ *
633
+ * Returns null if the tool call is not a server-side tool.
634
+ */
635
+ export interface ServerToolResult {
636
+ toolUseId: string;
637
+ text: string;
638
+ }
639
+
640
+ async function handleServerSideTool(
641
+ sessionId: string,
642
+ agent: Agent,
643
+ ): Promise<ServerToolResult | null> {
644
+ // Look at recent events to find the last custom_tool_use
645
+ const recentEvents = listEvents(sessionId, { limit: 20, order: "desc" });
646
+ for (const evt of recentEvents) {
647
+ if (evt.type === "agent.custom_tool_use") {
648
+ const payload = JSON.parse(evt.payload_json) as {
649
+ name?: string;
650
+ tool_use_id?: string;
651
+ input?: Record<string, unknown>;
652
+ };
653
+ const toolName = payload.name;
654
+ const toolUseId = payload.tool_use_id ?? evt.id;
655
+
656
+ // ── spawn_agent ──
657
+ if (toolName === "spawn_agent" && agent.threads_enabled) {
658
+ const input = payload.input as { agent_id?: string; prompt?: string } | undefined;
659
+ if (input?.agent_id && input?.prompt) {
660
+ // Validate callable_agents: if the agent has a callable_agents list,
661
+ // the spawned agent must be in it.
662
+ if (agent.callable_agents.length > 0) {
663
+ const allowed = agent.callable_agents.some((ca) => ca.id === input.agent_id);
664
+ if (!allowed) {
665
+ return { toolUseId, text: `Error: agent ${input.agent_id} is not in callable_agents list` };
666
+ }
667
+ }
668
+ const { handleSpawnAgent } = await import("./threads");
669
+ const sessionRow = getSessionRow(sessionId);
670
+ const depth = sessionRow?.thread_depth ?? 0;
671
+ try {
672
+ const text = await handleSpawnAgent(
673
+ sessionId,
674
+ input.agent_id,
675
+ input.prompt,
676
+ depth,
677
+ );
678
+ return { toolUseId, text };
679
+ } catch (err) {
680
+ const msg = err instanceof Error ? err.message : String(err);
681
+ return { toolUseId, text: `Error: ${msg}` };
682
+ }
683
+ }
684
+ }
685
+
686
+ // ── memory_* tools (Phase 3 — placeholder) ──
687
+ // if (toolName?.startsWith("memory_")) { ... }
688
+
689
+ break; // Only check the most recent custom_tool_use
690
+ }
691
+ }
692
+ return null;
693
+ }
694
+
695
+ /**
696
+ * If any user.messages landed while a turn was running, launch another
697
+ * runTurn to process them. Runs OUTSIDE the actor so it doesn't block
698
+ * subsequent event POSTs. Concurrent turns for the same session are
699
+ * prevented by the status flag.
700
+ */
701
+ function scheduleDrain(sessionId: string): void {
702
+ const pending = drainPendingUserInputs(sessionId);
703
+ if (pending.length === 0) return;
704
+ // Fire-and-forget — the next turn runs on its own, and the status flag
705
+ // (flipped to "running" by runTurn's first step) prevents concurrent turns.
706
+ void runTurn(sessionId, pending).catch((err: unknown) => {
707
+ console.error(`[driver] scheduleDrain runTurn failed:`, err);
708
+ });
709
+ }
710
+
711
+ // ---------------------------------------------------------------------------
712
+ // Permission confirmation support
713
+ // ---------------------------------------------------------------------------
714
+
715
+ /**
716
+ * A set of session IDs that already have a pending confirmation request
717
+ * emitted. Prevents duplicate events when the poller fires multiple times
718
+ * before the hook clears the sentinel.
719
+ *
720
+ * Stored on globalThis so the set survives HMR reloads during development.
721
+ */
722
+ type GlobalDriverState = typeof globalThis & { __caPendingConfirmations?: Set<string> };
723
+ const gd = globalThis as GlobalDriverState;
724
+ function getPendingConfirmations(): Set<string> {
725
+ if (!gd.__caPendingConfirmations) gd.__caPendingConfirmations = new Set();
726
+ return gd.__caPendingConfirmations;
727
+ }
728
+
729
+ /**
730
+ * Background poller: checks if /tmp/permission-bridge/pending exists on
731
+ * the container. When found, reads request.json, emits
732
+ * `agent.tool_confirmation_request` on the event bus. The hook is still
733
+ * blocked waiting for response.json — the events route will write that
734
+ * when the client sends `user.tool_confirmation`.
735
+ */
736
+ async function checkPermissionSentinel(
737
+ sessionId: string,
738
+ spriteName: string,
739
+ provider: ContainerProvider,
740
+ ): Promise<void> {
741
+ if (getPendingConfirmations().has(sessionId)) return;
742
+
743
+ try {
744
+ const result = await provider.exec(
745
+ spriteName,
746
+ ["test", "-f", PERMISSION_BRIDGE_PENDING_PATH],
747
+ );
748
+ if (result.exit_code !== 0) return; // No pending sentinel
749
+ } catch {
750
+ return; // Container exec failed — not fatal
751
+ }
752
+
753
+ // Sentinel exists — read the request
754
+ let request: { tool_name?: string; tool_input?: unknown; tool_use_id?: string };
755
+ try {
756
+ const result = await provider.exec(
757
+ spriteName,
758
+ ["cat", PERMISSION_BRIDGE_REQUEST_PATH],
759
+ );
760
+ request = JSON.parse(result.stdout) as typeof request;
761
+ } catch (err) {
762
+ console.warn(`[driver] failed to read permission request for ${sessionId}:`, err);
763
+ return;
764
+ }
765
+
766
+ // Mark as pending to avoid duplicate events
767
+ getPendingConfirmations().add(sessionId);
768
+
769
+ // Emit the confirmation request event
770
+ appendEvent(sessionId, {
771
+ type: "agent.tool_confirmation_request",
772
+ payload: {
773
+ tool_name: request.tool_name ?? "unknown",
774
+ tool_input: request.tool_input ?? {},
775
+ tool_use_id: request.tool_use_id ?? "",
776
+ },
777
+ origin: "server",
778
+ processedAt: nowMs(),
779
+ });
780
+ }
781
+
782
+ /**
783
+ * Write response.json into the container to unblock the permission hook.
784
+ * Called from the events route when a `user.tool_confirmation` event is
785
+ * received.
786
+ */
787
+ export async function writePermissionResponse(
788
+ sessionId: string,
789
+ result: "allow" | "deny",
790
+ denyMessage?: string,
791
+ ): Promise<void> {
792
+ const row = getSessionRow(sessionId);
793
+ if (!row?.sprite_name) {
794
+ console.warn(`[driver] no sprite for session ${sessionId}, cannot write permission response`);
795
+ return;
796
+ }
797
+
798
+ const env = getEnvironment(row.environment_id);
799
+ const provider = await resolveContainerProvider(env?.config?.provider);
800
+
801
+ const response = JSON.stringify({
802
+ result,
803
+ deny_message: denyMessage ?? undefined,
804
+ });
805
+
806
+ try {
807
+ await provider.exec(
808
+ row.sprite_name,
809
+ ["bash", "-c", `cat > ${PERMISSION_BRIDGE_RESPONSE_PATH}`],
810
+ { stdin: response },
811
+ );
812
+ } catch (err) {
813
+ console.warn(`[driver] failed to write permission response for ${sessionId}:`, err);
814
+ }
815
+
816
+ // Clear the pending flag so the poller can pick up future requests
817
+ getPendingConfirmations().delete(sessionId);
818
+ }