@agentstep/agent-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/package.json +45 -0
  2. package/src/auth/middleware.ts +38 -0
  3. package/src/backends/claude/args.ts +88 -0
  4. package/src/backends/claude/index.ts +193 -0
  5. package/src/backends/claude/permission-hook.ts +152 -0
  6. package/src/backends/claude/tool-bridge.ts +211 -0
  7. package/src/backends/claude/translator.ts +209 -0
  8. package/src/backends/claude/wrapper-script.ts +45 -0
  9. package/src/backends/codex/args.ts +69 -0
  10. package/src/backends/codex/auth.ts +35 -0
  11. package/src/backends/codex/index.ts +57 -0
  12. package/src/backends/codex/setup.ts +37 -0
  13. package/src/backends/codex/translator.ts +223 -0
  14. package/src/backends/codex/wrapper-script.ts +26 -0
  15. package/src/backends/factory/args.ts +45 -0
  16. package/src/backends/factory/auth.ts +30 -0
  17. package/src/backends/factory/index.ts +56 -0
  18. package/src/backends/factory/setup.ts +34 -0
  19. package/src/backends/factory/translator.ts +139 -0
  20. package/src/backends/factory/wrapper-script.ts +33 -0
  21. package/src/backends/gemini/args.ts +44 -0
  22. package/src/backends/gemini/auth.ts +30 -0
  23. package/src/backends/gemini/index.ts +53 -0
  24. package/src/backends/gemini/setup.ts +34 -0
  25. package/src/backends/gemini/translator.ts +139 -0
  26. package/src/backends/gemini/wrapper-script.ts +26 -0
  27. package/src/backends/opencode/args.ts +53 -0
  28. package/src/backends/opencode/auth.ts +53 -0
  29. package/src/backends/opencode/index.ts +70 -0
  30. package/src/backends/opencode/mcp.ts +67 -0
  31. package/src/backends/opencode/setup.ts +54 -0
  32. package/src/backends/opencode/translator.ts +168 -0
  33. package/src/backends/opencode/wrapper-script.ts +46 -0
  34. package/src/backends/registry.ts +38 -0
  35. package/src/backends/shared/ndjson.ts +29 -0
  36. package/src/backends/shared/translator-types.ts +69 -0
  37. package/src/backends/shared/wrap-prompt.ts +17 -0
  38. package/src/backends/types.ts +85 -0
  39. package/src/config/index.ts +95 -0
  40. package/src/db/agents.ts +185 -0
  41. package/src/db/api_keys.ts +78 -0
  42. package/src/db/batch.ts +142 -0
  43. package/src/db/client.ts +81 -0
  44. package/src/db/environments.ts +127 -0
  45. package/src/db/events.ts +208 -0
  46. package/src/db/memory.ts +143 -0
  47. package/src/db/migrations.ts +295 -0
  48. package/src/db/proxy.ts +37 -0
  49. package/src/db/sessions.ts +295 -0
  50. package/src/db/vaults.ts +110 -0
  51. package/src/errors.ts +53 -0
  52. package/src/handlers/agents.ts +194 -0
  53. package/src/handlers/batch.ts +41 -0
  54. package/src/handlers/docs.ts +87 -0
  55. package/src/handlers/environments.ts +154 -0
  56. package/src/handlers/events.ts +234 -0
  57. package/src/handlers/index.ts +12 -0
  58. package/src/handlers/memory.ts +141 -0
  59. package/src/handlers/openapi.ts +14 -0
  60. package/src/handlers/sessions.ts +223 -0
  61. package/src/handlers/stream.ts +76 -0
  62. package/src/handlers/threads.ts +26 -0
  63. package/src/handlers/ui/app.js +984 -0
  64. package/src/handlers/ui/index.html +112 -0
  65. package/src/handlers/ui/style.css +164 -0
  66. package/src/handlers/ui.ts +1281 -0
  67. package/src/handlers/vaults.ts +99 -0
  68. package/src/http.ts +35 -0
  69. package/src/index.ts +104 -0
  70. package/src/init.ts +227 -0
  71. package/src/openapi/registry.ts +8 -0
  72. package/src/openapi/schemas.ts +625 -0
  73. package/src/openapi/spec.ts +691 -0
  74. package/src/providers/apple.ts +220 -0
  75. package/src/providers/daytona.ts +217 -0
  76. package/src/providers/docker.ts +264 -0
  77. package/src/providers/e2b.ts +203 -0
  78. package/src/providers/fly.ts +276 -0
  79. package/src/providers/modal.ts +222 -0
  80. package/src/providers/podman.ts +206 -0
  81. package/src/providers/registry.ts +28 -0
  82. package/src/providers/shared.ts +11 -0
  83. package/src/providers/sprites.ts +55 -0
  84. package/src/providers/types.ts +73 -0
  85. package/src/providers/vercel.ts +208 -0
  86. package/src/proxy/forward.ts +111 -0
  87. package/src/queue/index.ts +111 -0
  88. package/src/sessions/actor.ts +53 -0
  89. package/src/sessions/bus.ts +155 -0
  90. package/src/sessions/driver.ts +818 -0
  91. package/src/sessions/grader.ts +120 -0
  92. package/src/sessions/interrupt.ts +14 -0
  93. package/src/sessions/sweeper.ts +136 -0
  94. package/src/sessions/threads.ts +126 -0
  95. package/src/sessions/tools.ts +50 -0
  96. package/src/shutdown.ts +78 -0
  97. package/src/sprite/client.ts +294 -0
  98. package/src/sprite/exec.ts +161 -0
  99. package/src/sprite/lifecycle.ts +339 -0
  100. package/src/sprite/pool.ts +65 -0
  101. package/src/sprite/setup.ts +159 -0
  102. package/src/state.ts +61 -0
  103. package/src/types.ts +339 -0
  104. package/src/util/clock.ts +7 -0
  105. package/src/util/ids.ts +11 -0
@@ -0,0 +1,223 @@
1
+ /**
2
+ * Stateful translator: codex NDJSON → Managed Agents events.
3
+ *
4
+ * Ported from
5
+ *
6
+ *
7
+ * Codex's event model (emitted by `codex exec --json`):
8
+ * - `thread.started` — session init with `thread_id`
9
+ * - `item.completed` — work item finished; `item.type` determines the shape:
10
+ * - `agent_message` → text response → `agent.message`
11
+ * - `command_execution` → shell command + output → [tool_use, tool_result]
12
+ * - `mcp_tool_call` → MCP tool call + result → [tool_use, tool_result]
13
+ * - `file_change` → file edit/add/delete → tool_use
14
+ * - `turn.completed` — end of an internal reasoning turn; accumulates usage
15
+ * - `error` — stream-level error
16
+ *
17
+ * Key differences from opencode:
18
+ * - translate() can return ARRAYS of events (for command_execution and
19
+ * mcp_tool_call where we split the codex "completed item" into both a
20
+ * tool_use and a tool_result in our Managed Agents timeline)
21
+ * - No inline "result" event from codex — usage is accumulated across
22
+ * turn.completed events and the final TurnResult is synthesized by
23
+ * getTurnResult() when the stream ends. The driver calls this after the
24
+ * stream loop exits, which is equivalent to finalize()
25
+ * hook without requiring a new interface method.
26
+ */
27
+ import type {
28
+ ToolClass,
29
+ TranslatedEvent,
30
+ Translator,
31
+ TranslatorOptions,
32
+ TurnResult,
33
+ TurnUsage,
34
+ } from "../shared/translator-types";
35
+
36
+ interface CodexItem {
37
+ type?: string;
38
+ id?: string;
39
+ text?: string;
40
+ command?: string;
41
+ output?: string;
42
+ result?: string;
43
+ name?: string;
44
+ input?: unknown;
45
+ path?: string;
46
+ action?: string;
47
+ }
48
+
49
+ export function createCodexTranslator(opts: TranslatorOptions): Translator {
50
+ const toolClass = new Map<string, ToolClass>();
51
+ let sessionId: string | null = null;
52
+ let turnCount = 0;
53
+ let lastText = "";
54
+ let totalInputTokens = 0;
55
+ let totalOutputTokens = 0;
56
+ let totalCostUsd = 0;
57
+ let sawCustom = false;
58
+
59
+ function classify(name: string): ToolClass {
60
+ if (opts.customToolNames.has(name)) return "custom";
61
+ return "builtin";
62
+ }
63
+
64
+ function translate(raw: Record<string, unknown>): TranslatedEvent[] {
65
+ const out: TranslatedEvent[] = [];
66
+ if (!raw || typeof raw !== "object") return out;
67
+ const type = String(raw.type ?? "");
68
+
69
+ if (type === "thread.started") {
70
+ if (typeof raw.thread_id === "string") sessionId = raw.thread_id;
71
+ // Driver emits session.status_running; translator just tracks state.
72
+ return out;
73
+ }
74
+
75
+ if (type === "item.completed" && raw.item && typeof raw.item === "object") {
76
+ const item = raw.item as CodexItem;
77
+
78
+ if (item.type === "reasoning" && typeof item.text === "string") {
79
+ out.push({
80
+ type: "agent.thinking",
81
+ payload: { content: [{ type: "thinking", thinking: item.text }] },
82
+ });
83
+ return out;
84
+ }
85
+
86
+ if (item.type === "agent_message" && typeof item.text === "string") {
87
+ lastText = item.text;
88
+ out.push({
89
+ type: "agent.message",
90
+ payload: { content: [{ type: "text", text: item.text }] },
91
+ });
92
+ return out;
93
+ }
94
+
95
+ if (item.type === "command_execution" && typeof item.id === "string") {
96
+ const name = "command";
97
+ const cls = classify(name);
98
+ toolClass.set(item.id, cls);
99
+ if (cls === "custom") sawCustom = true;
100
+ const useType = cls === "custom" ? "agent.custom_tool_use" : "agent.tool_use";
101
+
102
+ out.push({
103
+ type: useType,
104
+ payload: {
105
+ tool_use_id: item.id,
106
+ name,
107
+ input: { command: item.command ?? "" },
108
+ },
109
+ });
110
+ if (cls !== "custom") {
111
+ out.push({
112
+ type: "agent.tool_result",
113
+ payload: {
114
+ tool_use_id: item.id,
115
+ content: item.output ?? item.result ?? "",
116
+ is_error: false,
117
+ },
118
+ });
119
+ }
120
+ return out;
121
+ }
122
+
123
+ if (item.type === "mcp_tool_call" && typeof item.id === "string") {
124
+ const name = item.name ?? "mcp_tool";
125
+ const cls = classify(name);
126
+ toolClass.set(item.id, cls);
127
+ if (cls === "custom") sawCustom = true;
128
+ const useType = cls === "custom" ? "agent.custom_tool_use" : "agent.tool_use";
129
+
130
+ out.push({
131
+ type: useType,
132
+ payload: {
133
+ tool_use_id: item.id,
134
+ name,
135
+ input: item.input ?? {},
136
+ },
137
+ });
138
+ if (cls !== "custom") {
139
+ out.push({
140
+ type: "agent.tool_result",
141
+ payload: {
142
+ tool_use_id: item.id,
143
+ content: item.output ?? "",
144
+ is_error: false,
145
+ },
146
+ });
147
+ }
148
+ return out;
149
+ }
150
+
151
+ if (item.type === "file_change" && typeof item.id === "string") {
152
+ const name = "file_edit";
153
+ const cls = classify(name);
154
+ toolClass.set(item.id, cls);
155
+ if (cls === "custom") sawCustom = true;
156
+ const useType = cls === "custom" ? "agent.custom_tool_use" : "agent.tool_use";
157
+ out.push({
158
+ type: useType,
159
+ payload: {
160
+ tool_use_id: item.id,
161
+ name,
162
+ input: { path: item.path ?? "", action: item.action ?? "edit" },
163
+ },
164
+ });
165
+ return out;
166
+ }
167
+ }
168
+
169
+ if (type === "turn.completed") {
170
+ turnCount++;
171
+ const usage = raw.usage as Record<string, unknown> | undefined;
172
+ if (usage) {
173
+ if (typeof usage.input_tokens === "number") {
174
+ totalInputTokens += usage.input_tokens;
175
+ }
176
+ if (typeof usage.output_tokens === "number") {
177
+ totalOutputTokens += usage.output_tokens;
178
+ }
179
+ if (typeof usage.cost_usd === "number") {
180
+ totalCostUsd += usage.cost_usd;
181
+ }
182
+ // Defensive: treats usage as Record<string, number>
183
+ // and sums all keys generically. We extract the specific fields we
184
+ // care about. Unknown keys are silently dropped.
185
+ }
186
+ return out;
187
+ }
188
+
189
+ if (type === "error") {
190
+ // Stream-level error — driver will catch + surface via session.error.
191
+ return out;
192
+ }
193
+
194
+ // Unknown event type — drop silently, translator is forward-compatible.
195
+ return out;
196
+ }
197
+
198
+ function getTurnResult(): TurnResult | null {
199
+ // Codex doesn't emit a sentinel "finished" event — turn.completed
200
+ // accumulates and the stream just closes. The driver calls this after
201
+ // the stream loop exits, which is the equivalent of '
202
+ // finalize() hook.
203
+ if (turnCount === 0 && !lastText) return null;
204
+ return {
205
+ stopReason: sawCustom ? "custom_tool_call" : "end_turn",
206
+ usage: {
207
+ input_tokens: totalInputTokens,
208
+ output_tokens: totalOutputTokens,
209
+ cache_read_input_tokens: 0,
210
+ cache_creation_input_tokens: 0,
211
+ cost_usd: totalCostUsd,
212
+ },
213
+ num_turns: turnCount || 1,
214
+ };
215
+ }
216
+
217
+ return {
218
+ translate,
219
+ getBackendSessionId: () => sessionId,
220
+ getTurnResult,
221
+ sawCustomToolUse: () => sawCustom,
222
+ };
223
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Sprite wrapper script for codex.
3
+ *
4
+ * Identical structure to the claude wrapper because codex has
5
+ * promptViaStdin: true — the wrapper reads env vars from stdin until a
6
+ * blank line, then execs codex with the remaining stdin piped through as
7
+ * the prompt. The trailing `-` in argv tells codex to read from stdin.
8
+ */
9
+ import type { ContainerProvider } from "../../providers/types";
10
+
11
+ export const CODEX_WRAPPER_PATH = "/tmp/.codex-wrapper";
12
+
13
+ const SPRITE_WRAPPER_SCRIPT = [
14
+ "#!/bin/bash",
15
+ 'while IFS= read -r line; do [ -z "$line" ] && break; export "$line"; done',
16
+ 'exec codex "$@"',
17
+ ].join("\n");
18
+
19
+ export async function installCodexWrapper(spriteName: string, provider: ContainerProvider): Promise<void> {
20
+ const escaped = SPRITE_WRAPPER_SCRIPT.replace(/'/g, "'\\''");
21
+ await provider.exec(spriteName, [
22
+ "bash",
23
+ "-c",
24
+ `printf '%s' '${escaped}' > ${CODEX_WRAPPER_PATH} && chmod +x ${CODEX_WRAPPER_PATH}`,
25
+ ]);
26
+ }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Build the `droid exec` argv for one turn.
3
+ *
4
+ * Factory CLI constraints:
5
+ * - Uses `exec` subcommand (not `-p`)
6
+ * - `--output-format stream-json` for NDJSON streaming
7
+ * - `--auto high` for headless non-interactive execution
8
+ * - `--session-id <session_id>` on turn >= 2
9
+ * - `--model <model>` if specified on the agent
10
+ * - No --system-prompt flag — system prompt is wrapped into the user
11
+ * prompt text via the shared wrapPromptWithSystem utility
12
+ * - The prompt is positional (last arg), NOT a flag. The wrapper script
13
+ * captures it from stdin and passes it as `"$PROMPT"` at the end.
14
+ */
15
+ import type { Agent } from "../../types";
16
+
17
+ export interface BuildFactoryArgsInput {
18
+ agent: Agent;
19
+ /** Prior turn's factory session ID, if any, for --session-id resume */
20
+ backendSessionId: string | null;
21
+ }
22
+
23
+ export function buildFactoryArgs(input: BuildFactoryArgsInput): string[] {
24
+ const args: string[] = [
25
+ "exec",
26
+ "--output-format",
27
+ "stream-json",
28
+ "--auto",
29
+ "high",
30
+ ];
31
+
32
+ if (input.backendSessionId) {
33
+ args.push("--session-id", input.backendSessionId);
34
+ }
35
+
36
+ if (input.agent.model) {
37
+ args.push("--model", input.agent.model);
38
+ }
39
+
40
+ // NOTE: No trailing `-` or positional prompt here. The factory wrapper
41
+ // script captures the prompt from stdin via PROMPT=$(cat) and appends it
42
+ // as the last positional arg to `droid exec "$@" "$PROMPT"`.
43
+
44
+ return args;
45
+ }
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Auth env + create-time validation for the factory backend.
3
+ *
4
+ * Factory CLI reads FACTORY_API_KEY from the environment. We forward it
5
+ * from config.factoryApiKey (which cascades from process.env.FACTORY_API_KEY
6
+ * or the settings table).
7
+ */
8
+ import { getConfig } from "../../config";
9
+
10
+ export function buildFactoryAuthEnv(): Record<string, string> {
11
+ const cfg = getConfig();
12
+ const env: Record<string, string> = {};
13
+ if (cfg.factoryApiKey) {
14
+ env.FACTORY_API_KEY = cfg.factoryApiKey;
15
+ }
16
+ return env;
17
+ }
18
+
19
+ /**
20
+ * Returns null if factory can run, or an error message if it can't. Used at
21
+ * agent create time (validateAgentCreation) and first-turn time
22
+ * (validateRuntime).
23
+ */
24
+ export function validateFactoryRuntime(): string | null {
25
+ const cfg = getConfig();
26
+ if (!cfg.factoryApiKey) {
27
+ return "factory backend requires FACTORY_API_KEY to be set";
28
+ }
29
+ return null;
30
+ }
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Factory backend: drives Factory's `droid exec` on sprites.dev containers.
3
+ *
4
+ * Factory CLI uses `exec` subcommand with the prompt as a positional arg
5
+ * (like opencode). The wrapper script captures stdin and re-passes it as
6
+ * the trailing positional argv.
7
+ *
8
+ * Custom tool re-entry is NOT supported by factory — droid exec has no
9
+ * equivalent of claude's --input-format stream-json. buildTurn rejects
10
+ * toolResults.length > 0 with an invalid_request_error.
11
+ */
12
+ import { ApiError } from "../../errors";
13
+ import type { Backend, BuildTurnInput, BuildTurnResult } from "../types";
14
+ import type { TranslatorOptions } from "../shared/translator-types";
15
+ import { wrapPromptWithSystem } from "../shared/wrap-prompt";
16
+ import { buildFactoryArgs } from "./args";
17
+ import { buildFactoryAuthEnv, validateFactoryRuntime } from "./auth";
18
+ import { createFactoryTranslator } from "./translator";
19
+ import { FACTORY_WRAPPER_PATH } from "./wrapper-script";
20
+ import { prepareFactoryOnSprite } from "./setup";
21
+
22
+ function buildTurn(input: BuildTurnInput): BuildTurnResult {
23
+ const { agent, backendSessionId, promptText, toolResults } = input;
24
+ if (toolResults.length > 0) {
25
+ throw new ApiError(
26
+ 400,
27
+ "invalid_request_error",
28
+ "factory backend does not support user.custom_tool_result re-entry in v1",
29
+ );
30
+ }
31
+ const argv = buildFactoryArgs({ agent, backendSessionId });
32
+ const env = buildFactoryAuthEnv();
33
+ const wrappedPrompt = wrapPromptWithSystem(promptText, agent.system);
34
+ // stdin is the raw wrapped prompt — the driver prepends the env block.
35
+ // The factory wrapper script captures this via PROMPT=$(cat) and
36
+ // re-passes it to `droid exec` as a trailing positional argv.
37
+ return { argv, env, stdin: wrappedPrompt };
38
+ }
39
+
40
+ export const factoryBackend: Backend = {
41
+ name: "factory" as Backend["name"],
42
+ wrapperPath: FACTORY_WRAPPER_PATH,
43
+ buildTurn,
44
+ createTranslator: (opts: TranslatorOptions) => createFactoryTranslator(opts),
45
+ prepareOnSprite: (name, provider) => prepareFactoryOnSprite(name, provider),
46
+
47
+ validateRuntime: validateFactoryRuntime,
48
+ };
49
+
50
+ export {
51
+ buildFactoryArgs,
52
+ buildFactoryAuthEnv,
53
+ createFactoryTranslator,
54
+ prepareFactoryOnSprite,
55
+ FACTORY_WRAPPER_PATH,
56
+ };
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Install factory (droid) CLI on a freshly-created sprite.
3
+ *
4
+ * Mirrors codex/setup.ts with the same sentinel + symlink pattern.
5
+ * Factory CLI is installed via npm from the @factory/cli package.
6
+ */
7
+ import type { ContainerProvider } from "../../providers/types";
8
+ import { installFactoryWrapper } from "./wrapper-script";
9
+
10
+ const SENTINEL_NAME = ".claude-agents-factory-installed";
11
+
12
+ export async function prepareFactoryOnSprite(spriteName: string, provider: ContainerProvider): Promise<void> {
13
+ await installFactoryWrapper(spriteName, provider);
14
+
15
+ const script = [
16
+ "set -euo pipefail",
17
+ `SENTINEL="$HOME/${SENTINEL_NAME}"`,
18
+ 'if [ -f "$SENTINEL" ]; then exit 0; fi',
19
+ "npm install -g @factory/cli",
20
+ "PREFIX=$(npm config get prefix)",
21
+ 'if [ "$PREFIX" != "/usr/local" ]; then ln -sf "$PREFIX/bin/droid" /usr/local/bin/droid; fi',
22
+ '/usr/local/bin/droid --version || $PREFIX/bin/droid --version',
23
+ 'touch "$SENTINEL"',
24
+ ].join(" && ");
25
+
26
+ const result = await provider.exec(spriteName, ["bash", "-c", script], {
27
+ timeoutMs: 5 * 60_000,
28
+ });
29
+ if (result.exit_code !== 0) {
30
+ throw new Error(
31
+ `factory install failed (${result.exit_code}): ${result.stderr.slice(0, 500)}`,
32
+ );
33
+ }
34
+ }
@@ -0,0 +1,139 @@
1
+ /**
2
+ * Stateful translator: Factory CLI stream-json NDJSON -> Managed Agents events.
3
+ *
4
+ * Factory CLI event model (emitted by `droid exec --output-format stream-json`):
5
+ * - `system` (subtype: "init") — session init with `session_id` and `model`
6
+ * - `message` — assistant text (role: "assistant", text: string) NOTE: `text` not `content`
7
+ * - `tool_call` — tool invocation (toolName, id, parameters)
8
+ * - `tool_result` — tool output (id, value, isError)
9
+ * - `completion` — end of turn (finalText, numTurns, durationMs)
10
+ *
11
+ * Maps to Managed Agents events using the same Translator interface as other
12
+ * backends.
13
+ */
14
+ import type {
15
+ ToolClass,
16
+ TranslatedEvent,
17
+ Translator,
18
+ TranslatorOptions,
19
+ TurnResult,
20
+ } from "../shared/translator-types";
21
+
22
+ export function createFactoryTranslator(opts: TranslatorOptions): Translator {
23
+ const toolClass = new Map<string, ToolClass>();
24
+ let sessionId: string | null = null;
25
+ let lastText = "";
26
+ let sawCustom = false;
27
+ let sawCompletion = false;
28
+ let numTurns = 0;
29
+ let durationMs = 0;
30
+
31
+ function classify(name: string): ToolClass {
32
+ if (opts.customToolNames.has(name)) return "custom";
33
+ return "builtin";
34
+ }
35
+
36
+ function translate(raw: Record<string, unknown>): TranslatedEvent[] {
37
+ const out: TranslatedEvent[] = [];
38
+ if (!raw || typeof raw !== "object") return out;
39
+ const type = String(raw.type ?? "");
40
+
41
+ // system init event
42
+ if (type === "system" && raw.subtype === "init") {
43
+ if (typeof raw.session_id === "string") sessionId = raw.session_id;
44
+ return out;
45
+ }
46
+
47
+ // assistant message — note: Factory uses `text` not `content`
48
+ if (type === "message" && raw.role === "assistant") {
49
+ const text = typeof raw.text === "string" ? raw.text : "";
50
+ if (text) {
51
+ lastText = text;
52
+ out.push({
53
+ type: "agent.message",
54
+ payload: { content: [{ type: "text", text }] },
55
+ });
56
+ }
57
+ return out;
58
+ }
59
+
60
+ // tool call — note: Factory uses `toolName` not `tool_name`
61
+ if (type === "tool_call") {
62
+ const toolName = String(raw.toolName ?? "unknown");
63
+ const toolId = String(raw.id ?? "");
64
+ const parameters = (raw.parameters ?? {}) as Record<string, unknown>;
65
+
66
+ const cls = classify(toolName);
67
+ toolClass.set(toolId, cls);
68
+ if (cls === "custom") sawCustom = true;
69
+
70
+ const useType = cls === "custom" ? "agent.custom_tool_use" : "agent.tool_use";
71
+ out.push({
72
+ type: useType,
73
+ payload: {
74
+ tool_use_id: toolId,
75
+ name: toolName,
76
+ input: parameters,
77
+ },
78
+ });
79
+ return out;
80
+ }
81
+
82
+ // tool result — note: Factory uses `value` not `output`
83
+ if (type === "tool_result") {
84
+ const toolId = String(raw.id ?? "");
85
+ const value = typeof raw.value === "string" ? raw.value : JSON.stringify(raw.value ?? "");
86
+ const isError = raw.isError === true;
87
+ const cls = toolClass.get(toolId);
88
+
89
+ // Only emit tool_result for builtin tools
90
+ if (cls !== "custom") {
91
+ out.push({
92
+ type: "agent.tool_result",
93
+ payload: {
94
+ tool_use_id: toolId,
95
+ content: value,
96
+ is_error: isError,
97
+ },
98
+ });
99
+ }
100
+ return out;
101
+ }
102
+
103
+ // completion — end of turn
104
+ if (type === "completion") {
105
+ sawCompletion = true;
106
+ if (typeof raw.finalText === "string") lastText = raw.finalText;
107
+ if (typeof raw.numTurns === "number") numTurns = raw.numTurns;
108
+ if (typeof raw.durationMs === "number") durationMs = raw.durationMs;
109
+ return out;
110
+ }
111
+
112
+ // Unknown event type — drop silently, translator is forward-compatible.
113
+ return out;
114
+ }
115
+
116
+ function getTurnResult(): TurnResult | null {
117
+ if (!sawCompletion && !lastText) return null;
118
+ // Factory's completion event does not include token usage or cost.
119
+ // We report zeros — usage tracking is best-effort for this backend.
120
+ return {
121
+ stopReason: sawCustom ? "custom_tool_call" : "end_turn",
122
+ usage: {
123
+ input_tokens: 0,
124
+ output_tokens: 0,
125
+ cache_read_input_tokens: 0,
126
+ cache_creation_input_tokens: 0,
127
+ cost_usd: 0,
128
+ },
129
+ num_turns: numTurns || 1,
130
+ };
131
+ }
132
+
133
+ return {
134
+ translate,
135
+ getBackendSessionId: () => sessionId,
136
+ getTurnResult,
137
+ sawCustomToolUse: () => sawCustom,
138
+ };
139
+ }
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Sprite wrapper script for factory (droid).
3
+ *
4
+ * Factory's `droid exec` takes the prompt as a positional argument (like
5
+ * opencode), NOT from stdin. The wrapper:
6
+ *
7
+ * 1. Reads env vars from stdin until a blank line
8
+ * 2. Captures the remaining stdin into $PROMPT
9
+ * 3. Execs `droid exec "$@" "$PROMPT"` — the prompt becomes the last
10
+ * positional arg after any flags from argv
11
+ *
12
+ * This mirrors the opencode wrapper pattern exactly.
13
+ */
14
+ import type { ContainerProvider } from "../../providers/types";
15
+
16
+ export const FACTORY_WRAPPER_PATH = "/tmp/.factory-wrapper";
17
+
18
+ const SPRITE_WRAPPER_SCRIPT = [
19
+ "#!/bin/bash",
20
+ "set -e",
21
+ 'while IFS= read -r line; do [ -z "$line" ] && break; export "$line"; done',
22
+ "PROMPT=$(cat)",
23
+ 'exec droid "$@" "$PROMPT"',
24
+ ].join("\n");
25
+
26
+ export async function installFactoryWrapper(spriteName: string, provider: ContainerProvider): Promise<void> {
27
+ const escaped = SPRITE_WRAPPER_SCRIPT.replace(/'/g, "'\\''");
28
+ await provider.exec(spriteName, [
29
+ "bash",
30
+ "-c",
31
+ `printf '%s' '${escaped}' > ${FACTORY_WRAPPER_PATH} && chmod +x ${FACTORY_WRAPPER_PATH}`,
32
+ ]);
33
+ }
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Build the `gemini -p` argv for one turn.
3
+ *
4
+ * Gemini CLI constraints:
5
+ * - `-p` flag for prompt mode (reads prompt from stdin)
6
+ * - `--output-format stream-json` for NDJSON streaming
7
+ * - `--yolo` to bypass permission prompts in headless mode
8
+ * - `--max-turns <N>` to cap reasoning turns
9
+ * - `--resume <session_id>` on turn >= 2
10
+ * - `--model <model>` if specified on the agent
11
+ * - No --system-prompt flag — system prompt is wrapped into the user
12
+ * prompt text via the shared wrapPromptWithSystem utility
13
+ */
14
+ import { getConfig } from "../../config";
15
+ import type { Agent } from "../../types";
16
+
17
+ export interface BuildGeminiArgsInput {
18
+ agent: Agent;
19
+ /** Prior turn's gemini session ID, if any, for --resume */
20
+ backendSessionId: string | null;
21
+ maxTurns?: number;
22
+ }
23
+
24
+ export function buildGeminiArgs(input: BuildGeminiArgsInput): string[] {
25
+ const cfg = getConfig();
26
+ const args: string[] = [
27
+ "-p",
28
+ "--output-format",
29
+ "stream-json",
30
+ "--yolo",
31
+ "--max-turns",
32
+ String(input.maxTurns ?? cfg.agentMaxTurns),
33
+ ];
34
+
35
+ if (input.backendSessionId) {
36
+ args.push("--resume", input.backendSessionId);
37
+ }
38
+
39
+ if (input.agent.model) {
40
+ args.push("--model", input.agent.model);
41
+ }
42
+
43
+ return args;
44
+ }
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Auth env + create-time validation for the gemini backend.
3
+ *
4
+ * Gemini CLI reads GEMINI_API_KEY from the environment. We forward it
5
+ * from config.geminiApiKey (which cascades from process.env.GEMINI_API_KEY
6
+ * or the settings table).
7
+ */
8
+ import { getConfig } from "../../config";
9
+
10
+ export function buildGeminiAuthEnv(): Record<string, string> {
11
+ const cfg = getConfig();
12
+ const env: Record<string, string> = {};
13
+ if (cfg.geminiApiKey) {
14
+ env.GEMINI_API_KEY = cfg.geminiApiKey;
15
+ }
16
+ return env;
17
+ }
18
+
19
+ /**
20
+ * Returns null if gemini can run, or an error message if it can't. Used at
21
+ * agent create time (validateAgentCreation) and first-turn time
22
+ * (validateRuntime).
23
+ */
24
+ export function validateGeminiRuntime(): string | null {
25
+ const cfg = getConfig();
26
+ if (!cfg.geminiApiKey) {
27
+ return "gemini backend requires GEMINI_API_KEY to be set";
28
+ }
29
+ return null;
30
+ }