@dtechvision/fabrik-runtime 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,182 @@
1
+ # @dtechvision/fabrik-runtime
2
+
3
+ Shared TypeScript utilities for Fabrik workflow pods.
4
+
5
+ - **Credential pool** — read from mounted `/etc/fabrik/credentials`, rotate on failure, notify operators
6
+ - **Codex auth rotation** — rotate among `auth.json` / `*.auth.json` credentials for Codex-backed workflows
7
+ - **K8s jobs** — dispatch child verification jobs from a running workflow
8
+ - **JJ shell** — deterministic JJ/Git snapshot, bookmark push, workspace prep
9
+
10
+ ## Import Surface
11
+
12
+ Workflows should import from `@dtechvision/fabrik-runtime/...`.
13
+
14
+ - `@dtechvision/fabrik-runtime/credential-pool`
15
+ - `@dtechvision/fabrik-runtime/codex-auth`
16
+ - `@dtechvision/fabrik-runtime/jj-shell`
17
+ - `@dtechvision/fabrik-runtime/k8s-jobs`
18
+
19
+ For in-cluster Fabrik runs, the Smithers runtime image ships this package in its `node_modules`.
20
+ For local workflow development in another repo, add the package as a dependency from a release or local path.
21
+
22
+ ## Installation
23
+
24
+ Install from npm:
25
+
26
+ ```bash
27
+ bun add @dtechvision/fabrik-runtime
28
+ ```
29
+
30
+ or:
31
+
32
+ ```bash
33
+ npm install @dtechvision/fabrik-runtime
34
+ ```
35
+
36
+ Smithers workflows also need their normal workflow dependencies in the consuming repo:
37
+
38
+ ```bash
39
+ bun add smithers-orchestrator zod
40
+ ```
41
+
42
+ or:
43
+
44
+ ```bash
45
+ npm install smithers-orchestrator zod
46
+ ```
47
+
48
+ Package releases follow the same `v*` tag version as the Fabrik CLI release flow.
49
+
50
+ ## Smithers Integration
51
+
52
+ Use the package from ordinary Smithers workflows:
53
+
54
+ ```ts
55
+ /** @jsxImportSource smithers-orchestrator */
56
+ import { createSmithers, Task, Workflow } from "smithers-orchestrator";
57
+ import { z } from "zod";
58
+ import { withCodexAuthPoolEnv } from "@dtechvision/fabrik-runtime/codex-auth";
59
+ import { prepareWorkspaces } from "@dtechvision/fabrik-runtime/jj-shell";
60
+
61
+ const { smithers, outputs } = createSmithers(
62
+ {
63
+ report: z.object({
64
+ codexHomeSet: z.boolean(),
65
+ jjHelpersLoaded: z.boolean(),
66
+ }),
67
+ },
68
+ { dbPath: process.env.SMITHERS_DB_PATH ?? ".smithers/runtime-check.db" },
69
+ );
70
+
71
+ export default smithers(() => (
72
+ <Workflow name="runtime-package-check">
73
+ <Task id="verify" output={outputs.report}>
74
+ {async () => {
75
+ const env = withCodexAuthPoolEnv({});
76
+ return {
77
+ codexHomeSet: typeof env.CODEX_HOME === "string" && env.CODEX_HOME.length > 0,
78
+ jjHelpersLoaded: typeof prepareWorkspaces === "function",
79
+ };
80
+ }}
81
+ </Task>
82
+ </Workflow>
83
+ ));
84
+ ```
85
+
86
+ Run it locally with Smithers from a repo that has installed:
87
+
88
+ - `@dtechvision/fabrik-runtime`
89
+ - `smithers-orchestrator`
90
+ - `zod`
91
+
92
+ Then:
93
+
94
+ ```bash
95
+ bunx smithers run path/to/workflow.tsx --run-id runtime-package-check
96
+ ```
97
+
98
+ The workflow file should live in the consuming project tree so normal Node/Bun package resolution can find the installed dependencies.
99
+
100
+ ## Credentials
101
+
102
+ Operators manage `fabrik-credentials` in `fabrik-system` via kubectl. The CLI mirrors it into the run namespace at dispatch time. The secret is directory-mounted (no subPath) at `/etc/fabrik/credentials/` so running pods observe file replacements.
103
+
104
+ ```ts
105
+ import { injectCredentialEnv } from "@dtechvision/fabrik-runtime/credential-pool";
106
+
107
+ // Reads /etc/fabrik/credentials/ANTHROPIC_API_KEY → process.env.ANTHROPIC_API_KEY
108
+ injectCredentialEnv("ANTHROPIC_API_KEY");
109
+ ```
110
+
111
+ For file-pool rotation (e.g. multiple Codex auth files):
112
+
113
+ ```ts
114
+ import { CredentialFilePool } from "@dtechvision/fabrik-runtime/credential-pool";
115
+
116
+ const pool = new CredentialFilePool({
117
+ prefix: "codex-auth",
118
+ extension: ".json",
119
+ activeDir: "/tmp/codex-active",
120
+ activeFilename: "auth.json",
121
+ agent: "codex",
122
+ });
123
+ pool.init();
124
+
125
+ // On auth failure:
126
+ const rotated = await pool.handleError(err);
127
+ ```
128
+
129
+ For Codex-specific rotation, use the higher-level helper:
130
+
131
+ ```ts
132
+ import { createCodexAgentWithPool } from "@dtechvision/fabrik-runtime/codex-auth";
133
+
134
+ const codex = createCodexAgentWithPool({
135
+ model: "gpt-5",
136
+ cwd: process.cwd(),
137
+ env: {},
138
+ });
139
+ ```
140
+
141
+ ## Local Verification
142
+
143
+ Runtime package tests:
144
+
145
+ ```bash
146
+ cd src/fabrik-runtime
147
+ bun test ./src
148
+ ```
149
+
150
+ Repo-wide CLI and workflow verification:
151
+
152
+ ```bash
153
+ make verify-cli
154
+ make verify-cli-k3d
155
+ ```
156
+
157
+ Focused runtime-package k3d import verification:
158
+
159
+ ```bash
160
+ cd src/fabrik-cli
161
+ FABRIK_K3D_E2E=1 FABRIK_K3D_CLUSTER=dev-single \
162
+ go test ./internal/run -run TestK3dWorkflowRuntimePackageImports -timeout 10m -v
163
+ ```
164
+
165
+ The complex sample in [examples/complex/README.md](/Users/samuel/git/local-isolated-ralph/examples/complex/README.md) shows how workflow code consumes the package surface in practice.
166
+
167
+ Local Smithers CLI verification:
168
+
169
+ ```bash
170
+ bunx smithers run path/to/workflow.tsx --run-id runtime-package-check
171
+ ```
172
+
173
+ The expected result is a successful run whose output reports:
174
+
175
+ - `codexHomeSet: true`
176
+ - `jjHelpersLoaded: true`
177
+
178
+ ## Precedence
179
+
180
+ 1. Fabrik runtime metadata (`SMITHERS_*`, `FABRIK_*`, `KUBERNETES_*`)
181
+ 2. Project env (`fabrik-env-<project>-<env>`) via `envFrom`
182
+ 3. Shared credentials (`fabrik-credentials`) via file mount
package/package.json ADDED
@@ -0,0 +1,40 @@
1
+ {
2
+ "name": "@dtechvision/fabrik-runtime",
3
+ "version": "0.1.0",
4
+ "type": "module",
5
+ "main": "src/index.ts",
6
+ "types": "src/index.ts",
7
+ "exports": {
8
+ ".": "./src/index.ts",
9
+ "./credential-pool": "./src/credential-pool.ts",
10
+ "./codex-auth": "./src/codex-auth.ts",
11
+ "./k8s-jobs": "./src/k8s-jobs.ts",
12
+ "./jj-shell": "./src/jj-shell.ts"
13
+ },
14
+ "files": [
15
+ "src/index.ts",
16
+ "src/credential-pool.ts",
17
+ "src/codex-auth.ts",
18
+ "src/k8s-jobs.ts",
19
+ "src/jj-shell.ts",
20
+ "README.md"
21
+ ],
22
+ "publishConfig": {
23
+ "access": "public"
24
+ },
25
+ "repository": {
26
+ "type": "git",
27
+ "url": "git+https://github.com/SamuelLHuber/local-isolated-ralph.git",
28
+ "directory": "src/fabrik-runtime"
29
+ },
30
+ "license": "MIT",
31
+ "scripts": {
32
+ "test": "bun test ./src"
33
+ },
34
+ "dependencies": {
35
+ "smithers-orchestrator": "0.9.1"
36
+ },
37
+ "devDependencies": {
38
+ "bun-types": "1.2.12"
39
+ }
40
+ }
@@ -0,0 +1,231 @@
1
+ import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
2
+ import { tmpdir } from "node:os";
3
+ import { basename, resolve } from "node:path";
4
+ import { CodexAgent } from "smithers-orchestrator";
5
+ import {
6
+ classifyFailure,
7
+ getCredentialMountPath,
8
+ type FailureKind,
9
+ } from "./credential-pool";
10
+
11
+ const DEFAULT_CODEX_DIR = resolve(process.env.HOME ?? "", ".codex");
12
+
13
+ function getCodexAuthSourceDir(): string {
14
+ const sourceDir =
15
+ process.env.CODEX_AUTH_SOURCE_DIR ??
16
+ process.env.FABRIK_SHARED_CREDENTIALS_DIR ??
17
+ (existsSync(getCredentialMountPath()) ? getCredentialMountPath() : DEFAULT_CODEX_DIR);
18
+ return resolve(sourceDir);
19
+ }
20
+
21
+ export const CODEX_AUTH_HOME = resolve(
22
+ process.env.CODEX_AUTH_HOME ?? resolve(tmpdir(), "codex-auth-pool"),
23
+ );
24
+
25
+ const NOTIFY_WEBHOOK_URL = process.env.CODEX_AUTH_NOTIFY_WEBHOOK_URL?.trim() ?? "";
26
+ const NOTIFY_CLUSTER = process.env.KUBERNETES_NAMESPACE?.trim() ?? "";
27
+ const NOTIFY_RUN_ID = process.env.SMITHERS_RUN_ID?.trim() ?? "";
28
+
29
+ const AUTH_ROTATE_PATTERN =
30
+ /no last agent message|usage limit|quota|rate limit|insufficient (?:credits|balance|quota)|payment required|billing|exceeded.*(quota|limit)|not signed in|please run 'codex login'|unauthorized|authentication required|authentication failed|forbidden|invalid (?:api key|token|credentials)|expired (?:token|credentials)/i;
31
+ const AUTH_REFRESH_REUSED_PATTERN =
32
+ /refresh_token_reused|refresh token has already been used|could not be refreshed because your refresh token was already used/i;
33
+
34
+ const listAuthFiles = (): string[] => {
35
+ const sourceDir = getCodexAuthSourceDir();
36
+ if (!existsSync(sourceDir)) return [];
37
+ return readdirSync(sourceDir)
38
+ .filter((name) => name.endsWith(".auth.json") || name === "auth.json")
39
+ .map((name) => resolve(sourceDir, name))
40
+ .sort();
41
+ };
42
+
43
+ const ensureCodexHome = () => {
44
+ if (!existsSync(CODEX_AUTH_HOME)) {
45
+ mkdirSync(CODEX_AUTH_HOME, { recursive: true });
46
+ }
47
+ };
48
+
49
+ let authPool = listAuthFiles();
50
+ let authIndex = 0;
51
+ let activeAuth = "";
52
+ const authFailures = new Map<string, FailureKind>();
53
+
54
+ export function resetCodexAuthStateForTests(): void {
55
+ authPool = [];
56
+ authIndex = 0;
57
+ activeAuth = "";
58
+ authFailures.clear();
59
+ }
60
+
61
+ const setActiveAuth = (authPath: string, reason: string) => {
62
+ ensureCodexHome();
63
+ const authContents = readFileSync(authPath, "utf8");
64
+ writeFileSync(resolve(CODEX_AUTH_HOME, "auth.json"), authContents, "utf8");
65
+ const previous = activeAuth ? ` from ${basename(activeAuth)}` : "";
66
+ activeAuth = authPath;
67
+ console.error(
68
+ `[fabrik-runtime] codex auth rotation${previous} -> ${basename(authPath)} (${reason})`,
69
+ );
70
+ };
71
+
72
+ const initAuthPool = () => {
73
+ ensureCodexHome();
74
+ authPool = listAuthFiles();
75
+ if (authPool.length === 0 || activeAuth) return;
76
+ const defaultAuth = resolve(getCodexAuthSourceDir(), "auth.json");
77
+ if (existsSync(defaultAuth)) {
78
+ setActiveAuth(defaultAuth, "initial");
79
+ return;
80
+ }
81
+ setActiveAuth(authPool[0]!, "initial");
82
+ };
83
+
84
+ const logAuthSummary = () => {
85
+ const total = authPool.length;
86
+ const failed = [...authFailures.entries()].map(
87
+ ([path, status]) => `${basename(path)}:${status}`,
88
+ );
89
+ const failedCount = authFailures.size;
90
+ const remaining = Math.max(total - failedCount, 0);
91
+ const active = activeAuth ? basename(activeAuth) : "none";
92
+ console.error(
93
+ `[fabrik-runtime] codex auth pool summary: total=${total} failed=${failedCount} remaining=${remaining} active=${active}`,
94
+ );
95
+ if (failed.length > 0) {
96
+ console.error(`[fabrik-runtime] failed auths: ${failed.join(", ")}`);
97
+ }
98
+ };
99
+
100
+ const rotateAuth = (reason: string): boolean => {
101
+ authPool = listAuthFiles();
102
+ if (authPool.length === 0) return false;
103
+ for (let i = 0; i < authPool.length; i += 1) {
104
+ const next = authPool[authIndex % authPool.length];
105
+ authIndex += 1;
106
+ if (next && next !== activeAuth && !authFailures.has(next)) {
107
+ setActiveAuth(next, reason);
108
+ logAuthSummary();
109
+ return true;
110
+ }
111
+ }
112
+ console.error("[fabrik-runtime] no codex auth left to rotate to");
113
+ logAuthSummary();
114
+ return false;
115
+ };
116
+
117
+ export const withCodexAuthPoolEnv = (env: Record<string, string>) => ({
118
+ ...env,
119
+ CODEX_HOME: CODEX_AUTH_HOME,
120
+ });
121
+
122
+ export type AuthFailureKind = FailureKind;
123
+
124
+ export type AuthFailureEvent = {
125
+ authPath: string;
126
+ authName: string;
127
+ reason: string;
128
+ kind: AuthFailureKind;
129
+ message: string;
130
+ clusterNamespace?: string;
131
+ runId?: string;
132
+ };
133
+
134
+ export type RotatingCodexAgentOptions = {
135
+ onAuthFailure?: (event: AuthFailureEvent) => void | Promise<void>;
136
+ };
137
+
138
+ const notifyAuthFailure = async (
139
+ event: AuthFailureEvent,
140
+ onAuthFailure?: RotatingCodexAgentOptions["onAuthFailure"],
141
+ ) => {
142
+ if (onAuthFailure) {
143
+ await onAuthFailure(event);
144
+ }
145
+ if (!NOTIFY_WEBHOOK_URL) return;
146
+ try {
147
+ const response = await fetch(NOTIFY_WEBHOOK_URL, {
148
+ method: "POST",
149
+ headers: { "content-type": "application/json" },
150
+ body: JSON.stringify(event),
151
+ });
152
+ if (!response.ok) {
153
+ console.error(
154
+ `[fabrik-runtime] codex auth notification failed: webhook status ${response.status}`,
155
+ );
156
+ }
157
+ } catch (err) {
158
+ const message = err instanceof Error ? err.message : String(err);
159
+ console.error(`[fabrik-runtime] codex auth notification failed: ${message}`);
160
+ }
161
+ };
162
+
163
+ export const createCodexAgentWithPool = (
164
+ opts: ConstructorParameters<typeof CodexAgent>[0],
165
+ rotationOpts: RotatingCodexAgentOptions = {},
166
+ ) =>
167
+ new RotatingCodexAgent(
168
+ new CodexAgent({
169
+ ...opts,
170
+ env: withCodexAuthPoolEnv(opts.env ?? {}),
171
+ }),
172
+ rotationOpts,
173
+ );
174
+
175
+ export class RotatingCodexAgent {
176
+ private readonly inner: CodexAgent;
177
+ private readonly onAuthFailure?: RotatingCodexAgentOptions["onAuthFailure"];
178
+
179
+ constructor(inner: CodexAgent, opts: RotatingCodexAgentOptions = {}) {
180
+ this.inner = inner;
181
+ this.onAuthFailure = opts.onAuthFailure;
182
+ }
183
+
184
+ get id() {
185
+ return this.inner.id;
186
+ }
187
+
188
+ get tools() {
189
+ return this.inner.tools;
190
+ }
191
+
192
+ async generate(args: Parameters<CodexAgent["generate"]>[0]) {
193
+ initAuthPool();
194
+ const attempts = Math.max(authPool.length, 1);
195
+ let lastError: unknown = null;
196
+ for (let i = 0; i < attempts; i += 1) {
197
+ try {
198
+ return await this.inner.generate(args);
199
+ } catch (err) {
200
+ lastError = err;
201
+ const message = err instanceof Error ? err.message : String(err);
202
+ if (!AUTH_ROTATE_PATTERN.test(message)) {
203
+ throw err;
204
+ }
205
+ if (activeAuth) {
206
+ const kind = classifyFailure(message);
207
+ authFailures.set(activeAuth, kind);
208
+ if (AUTH_REFRESH_REUSED_PATTERN.test(message)) {
209
+ console.error("[fabrik-runtime] codex refresh token reused; re-auth required");
210
+ }
211
+ await notifyAuthFailure(
212
+ {
213
+ authPath: activeAuth,
214
+ authName: basename(activeAuth),
215
+ reason: "codex generate failed and rotation was requested",
216
+ kind,
217
+ message,
218
+ clusterNamespace: NOTIFY_CLUSTER || undefined,
219
+ runId: NOTIFY_RUN_ID || undefined,
220
+ },
221
+ this.onAuthFailure,
222
+ );
223
+ }
224
+ if (!rotateAuth("codex auth / usage failure")) {
225
+ break;
226
+ }
227
+ }
228
+ }
229
+ throw lastError ?? new Error("Codex auth pool exhausted");
230
+ }
231
+ }
@@ -0,0 +1,266 @@
1
+ /**
2
+ * Generic credential pool for Fabrik workflow pods.
3
+ *
4
+ * Credentials are managed by operators via kubectl and mounted into pods
5
+ * at /etc/fabrik/credentials as a Kubernetes Secret directory mount.
6
+ * This module reads from that mount, provides pool rotation for agents
7
+ * that support multiple credential files, and emits structured failure
8
+ * notifications without exposing secret contents.
9
+ *
10
+ * Architecture:
11
+ * - Operators create/update `fabrik-credentials` secret in `fabrik-system`
12
+ * via kubectl (e.g. `kubectl create secret generic fabrik-credentials
13
+ * --from-file=ANTHROPIC_API_KEY=./key.txt --from-literal=OPENAI_API_KEY=sk-...`)
14
+ * - Fabrik CLI mirrors the secret into the run namespace at dispatch time
15
+ * - The secret is directory-mounted (no subPath) at CREDENTIAL_MOUNT_PATH
16
+ * so running pods observe file replacements for rotation
17
+ * - This module reads credential files from that mount directory
18
+ *
19
+ * Supported credential layouts:
20
+ * - Flat env-var keys: /etc/fabrik/credentials/ANTHROPIC_API_KEY (file contains the value)
21
+ * - Codex auth pool: /etc/fabrik/credentials/codex-auth.json,
22
+ * /etc/fabrik/credentials/codex-auth-2.json, etc.
23
+ * - Claude Code: /etc/fabrik/credentials/ANTHROPIC_API_KEY
24
+ * - Pi: /etc/fabrik/credentials/FIREWORKS_API_KEY or provider config files
25
+ */
26
+ import { existsSync, readFileSync, readdirSync, writeFileSync, mkdirSync } from "node:fs";
27
+ import { basename, resolve } from "node:path";
28
+
29
+ /** Default mount path for the fabrik-credentials directory. */
30
+ export function getCredentialMountPath(): string {
31
+ return process.env.FABRIK_CREDENTIAL_PATH ?? "/etc/fabrik/credentials";
32
+ }
33
+
34
+ /** @deprecated Use getCredentialMountPath() for dynamic resolution. */
35
+ export const CREDENTIAL_MOUNT_PATH = "/etc/fabrik/credentials";
36
+
37
+ // ---------------------------------------------------------------------------
38
+ // Failure classification
39
+ // ---------------------------------------------------------------------------
40
+
41
+ export type FailureKind =
42
+ | "refresh_token_reused"
43
+ | "usage_limit"
44
+ | "auth_invalid"
45
+ | "unknown";
46
+
47
+ export type FailureEvent = {
48
+ credentialName: string;
49
+ kind: FailureKind;
50
+ message: string;
51
+ agent: string;
52
+ namespace?: string;
53
+ runId?: string;
54
+ };
55
+
56
+ const REFRESH_REUSED =
57
+ /refresh_token_reused|refresh token has already been used|could not be refreshed because your refresh token was already used/i;
58
+ const USAGE_LIMIT =
59
+ /no last agent message|usage limit|quota|rate limit|insufficient (?:credits|balance|quota)|payment required|billing|exceeded.*(quota|limit)/i;
60
+ const AUTH_INVALID =
61
+ /not signed in|please run.*login|unauthorized|authentication required|authentication failed|forbidden|invalid (?:api key|token|credentials)|expired (?:token|credentials)|Not logged in/i;
62
+
63
+ export function classifyFailure(message: string): FailureKind {
64
+ if (REFRESH_REUSED.test(message)) return "refresh_token_reused";
65
+ if (AUTH_INVALID.test(message)) return "auth_invalid";
66
+ if (USAGE_LIMIT.test(message)) return "usage_limit";
67
+ return "unknown";
68
+ }
69
+
70
+ /** Returns true if the error message indicates an auth/credential problem
71
+ * that credential rotation might fix. */
72
+ export function isRotatableFailure(message: string): boolean {
73
+ return classifyFailure(message) !== "unknown";
74
+ }
75
+
76
+ // ---------------------------------------------------------------------------
77
+ // Notification
78
+ // ---------------------------------------------------------------------------
79
+
80
+ const NOTIFY_WEBHOOK = process.env.FABRIK_CREDENTIAL_NOTIFY_WEBHOOK?.trim() ?? "";
81
+
82
+ export async function notifyFailure(event: FailureEvent): Promise<void> {
83
+ console.error(
84
+ `[fabrik-runtime] credential failure: ${event.credentialName} kind=${event.kind} agent=${event.agent}`,
85
+ );
86
+ if (!NOTIFY_WEBHOOK) return;
87
+ try {
88
+ const resp = await fetch(NOTIFY_WEBHOOK, {
89
+ method: "POST",
90
+ headers: { "content-type": "application/json" },
91
+ body: JSON.stringify(event),
92
+ });
93
+ if (!resp.ok) {
94
+ console.error(`[fabrik-runtime] notification webhook returned ${resp.status}`);
95
+ }
96
+ } catch (err) {
97
+ console.error(`[fabrik-runtime] notification failed: ${err instanceof Error ? err.message : err}`);
98
+ }
99
+ }
100
+
101
+ // ---------------------------------------------------------------------------
102
+ // Credential reading from mounted directory
103
+ // ---------------------------------------------------------------------------
104
+
105
+ /** Read a single credential value from the mounted directory. */
106
+ export function readCredential(name: string): string | null {
107
+ const mountPath = getCredentialMountPath();
108
+ const path = resolve(mountPath, name);
109
+ if (!existsSync(path)) return null;
110
+ return readFileSync(path, "utf8").trim();
111
+ }
112
+
113
+ /** List all credential file names in the mounted directory. */
114
+ export function listCredentials(): string[] {
115
+ const mountPath = getCredentialMountPath();
116
+ if (!existsSync(mountPath)) return [];
117
+ return readdirSync(mountPath)
118
+ .filter((name) => !name.startsWith(".") && name !== "..timestamp_of_last_update")
119
+ .sort();
120
+ }
121
+
122
+ /** Read all credentials as a key→value map. */
123
+ export function readAllCredentials(): Record<string, string> {
124
+ const result: Record<string, string> = {};
125
+ for (const name of listCredentials()) {
126
+ const value = readCredential(name);
127
+ if (value !== null) result[name] = value;
128
+ }
129
+ return result;
130
+ }
131
+
132
+ // ---------------------------------------------------------------------------
133
+ // File pool rotation (for agents that use auth files like Codex)
134
+ // ---------------------------------------------------------------------------
135
+
136
+ export type PoolOptions = {
137
+ /** Glob pattern to match pool files, e.g. "codex-auth" matches codex-auth*.json */
138
+ prefix: string;
139
+ /** Extension to match, e.g. ".json" */
140
+ extension?: string;
141
+ /** Directory to write the active credential file to */
142
+ activeDir: string;
143
+ /** Filename for the active credential, e.g. "auth.json" */
144
+ activeFilename: string;
145
+ /** Agent name for failure events */
146
+ agent: string;
147
+ };
148
+
149
+ export class CredentialFilePool {
150
+ private pool: string[] = [];
151
+ private index = 0;
152
+ private active = "";
153
+ private failures = new Map<string, FailureKind>();
154
+ private readonly opts: PoolOptions;
155
+
156
+ constructor(opts: PoolOptions) {
157
+ this.opts = opts;
158
+ }
159
+
160
+ /** Scan the credential mount for pool files and activate the first one. */
161
+ init(): void {
162
+ this.pool = this.scanPool();
163
+ if (this.pool.length === 0) return;
164
+ if (this.active && this.pool.includes(this.active)) return;
165
+ this.activate(this.pool[0]!, "initial");
166
+ }
167
+
168
+ /** Number of available (non-failed) credentials. */
169
+ get available(): number {
170
+ return this.pool.filter((p) => !this.failures.has(p)).length;
171
+ }
172
+
173
+ get activeName(): string {
174
+ return this.active ? basename(this.active) : "";
175
+ }
176
+
177
+ /** Try to rotate to the next unfailed credential. Returns false if exhausted. */
178
+ rotate(reason: string): boolean {
179
+ this.pool = this.scanPool();
180
+ if (this.pool.length === 0) return false;
181
+ for (let i = 0; i < this.pool.length; i++) {
182
+ const next = this.pool[this.index % this.pool.length]!;
183
+ this.index++;
184
+ if (next !== this.active && !this.failures.has(next)) {
185
+ this.activate(next, reason);
186
+ return true;
187
+ }
188
+ }
189
+ console.error(`[fabrik-runtime] ${this.opts.agent} credential pool exhausted`);
190
+ return false;
191
+ }
192
+
193
+ /** Mark the current credential as failed and optionally notify. */
194
+ async markFailed(message: string): Promise<void> {
195
+ if (!this.active) return;
196
+ const kind = classifyFailure(message);
197
+ this.failures.set(this.active, kind);
198
+ await notifyFailure({
199
+ credentialName: basename(this.active),
200
+ kind,
201
+ message,
202
+ agent: this.opts.agent,
203
+ namespace: process.env.KUBERNETES_NAMESPACE?.trim(),
204
+ runId: process.env.SMITHERS_RUN_ID?.trim(),
205
+ });
206
+ }
207
+
208
+ /** Handle an agent error: mark failed, try rotate, throw if exhausted. */
209
+ async handleError(err: unknown): Promise<boolean> {
210
+ const message = err instanceof Error ? err.message : String(err);
211
+ if (!isRotatableFailure(message)) return false;
212
+ await this.markFailed(message);
213
+ return this.rotate("credential failure");
214
+ }
215
+
216
+ private scanPool(): string[] {
217
+ const mountPath = getCredentialMountPath();
218
+ if (!existsSync(mountPath)) return [];
219
+ const ext = this.opts.extension ?? ".json";
220
+ return readdirSync(mountPath)
221
+ .filter((name) => name.startsWith(this.opts.prefix) && name.endsWith(ext))
222
+ .map((name) => resolve(mountPath, name))
223
+ .sort();
224
+ }
225
+
226
+ private activate(path: string, reason: string): void {
227
+ mkdirSync(this.opts.activeDir, { recursive: true });
228
+ const contents = readFileSync(path, "utf8");
229
+ writeFileSync(resolve(this.opts.activeDir, this.opts.activeFilename), contents, "utf8");
230
+ const prev = this.active ? ` from ${basename(this.active)}` : "";
231
+ this.active = path;
232
+ console.error(
233
+ `[fabrik-runtime] ${this.opts.agent} credential${prev} -> ${basename(path)} (${reason})`,
234
+ );
235
+ }
236
+ }
237
+
238
+ // ---------------------------------------------------------------------------
239
+ // Env-var credential helpers for agents that use env vars
240
+ // ---------------------------------------------------------------------------
241
+
242
+ /**
243
+ * Read a credential from the mounted directory and set it as an env var.
244
+ * This is the standard pattern for agents that use env vars for auth
245
+ * (Claude Code ANTHROPIC_API_KEY, Pi FIREWORKS_API_KEY, etc.)
246
+ */
247
+ export function injectCredentialEnv(credentialName: string, envVar?: string): boolean {
248
+ const value = readCredential(credentialName);
249
+ if (value === null) return false;
250
+ process.env[envVar ?? credentialName] = value;
251
+ return true;
252
+ }
253
+
254
+ /**
255
+ * Inject all credentials from the mounted directory as env vars.
256
+ * File names become env var names, file contents become values.
257
+ */
258
+ export function injectAllCredentialEnvs(): string[] {
259
+ const injected: string[] = [];
260
+ for (const name of listCredentials()) {
261
+ if (injectCredentialEnv(name)) {
262
+ injected.push(name);
263
+ }
264
+ }
265
+ return injected;
266
+ }
package/src/index.ts ADDED
@@ -0,0 +1,48 @@
1
+ /**
2
+ * fabrik-runtime — shared TypeScript utilities for Fabrik workflow pods.
3
+ *
4
+ * This package provides:
5
+ * - Credential pool management (read from mounted K8s secrets, rotate, notify)
6
+ * - K8s job helpers (dispatch child verification jobs)
7
+ * - Deterministic JJ/Git shell operations
8
+ */
9
+
10
+ export {
11
+ CREDENTIAL_MOUNT_PATH,
12
+ getCredentialMountPath,
13
+ classifyFailure,
14
+ isRotatableFailure,
15
+ notifyFailure,
16
+ readCredential,
17
+ listCredentials,
18
+ readAllCredentials,
19
+ injectCredentialEnv,
20
+ injectAllCredentialEnvs,
21
+ CredentialFilePool,
22
+ type FailureKind,
23
+ type FailureEvent,
24
+ type PoolOptions,
25
+ } from "./credential-pool";
26
+
27
+ export {
28
+ CODEX_AUTH_HOME,
29
+ withCodexAuthPoolEnv,
30
+ createCodexAgentWithPool,
31
+ RotatingCodexAgent,
32
+ type AuthFailureKind,
33
+ type AuthFailureEvent,
34
+ type RotatingCodexAgentOptions,
35
+ } from "./codex-auth";
36
+
37
+ export {
38
+ runVerificationJob,
39
+ buildVerificationJobManifest,
40
+ type VerificationResult,
41
+ } from "./k8s-jobs";
42
+
43
+ export {
44
+ prepareWorkspaces,
45
+ snapshotChange,
46
+ pushBookmark,
47
+ type ReportOutput,
48
+ } from "./jj-shell";
@@ -0,0 +1,212 @@
1
+ /**
2
+ * Deterministic JJ shell operations for workflow-owned progress tracking.
3
+ *
4
+ * These commands have fixed semantics and should not be delegated to the
5
+ * coding agent. Keeping them here makes workspace creation, snapshotting, and
6
+ * bookmark pushes reproducible across runs.
7
+ */
8
+ import { $ } from "bun";
9
+ import { existsSync } from "node:fs";
10
+ import { resolve } from "node:path";
11
+
12
+ export type ReportOutput = {
13
+ ticketId: string;
14
+ status: "done" | "partial" | "blocked";
15
+ summary: string;
16
+ };
17
+
18
+ type JjResult = {
19
+ ok: boolean;
20
+ stdout: string;
21
+ stderr: string;
22
+ exitCode: number;
23
+ };
24
+
25
+ async function jj(args: string[], cwd: string): Promise<JjResult> {
26
+ const result = await $`jj ${args}`.cwd(cwd).nothrow().quiet();
27
+ return {
28
+ ok: result.exitCode === 0,
29
+ stdout: result.stdout.toString().trim(),
30
+ stderr: result.stderr.toString().trim(),
31
+ exitCode: result.exitCode,
32
+ };
33
+ }
34
+
35
+ export async function prepareWorkspaces(
36
+ repoRoot: string,
37
+ workspacesDir: string,
38
+ ticketIds: readonly string[],
39
+ ): Promise<ReportOutput> {
40
+ await $`mkdir -p ${workspacesDir}`.quiet();
41
+
42
+ const created: string[] = [];
43
+ const skipped: string[] = [];
44
+ const errors: string[] = [];
45
+
46
+ for (const ticketId of ticketIds) {
47
+ const wsPath = resolve(workspacesDir, ticketId);
48
+
49
+ if (existsSync(wsPath)) {
50
+ const check = await jj(["status"], wsPath);
51
+ if (check.ok) {
52
+ skipped.push(ticketId);
53
+ continue;
54
+ }
55
+ }
56
+
57
+ const result = await jj(
58
+ ["workspace", "add", wsPath, "--name", ticketId],
59
+ repoRoot,
60
+ );
61
+ if (result.ok) {
62
+ created.push(ticketId);
63
+ continue;
64
+ }
65
+
66
+ const fallback = await jj(
67
+ ["workspace", "add", ticketId, wsPath],
68
+ repoRoot,
69
+ );
70
+ if (fallback.ok) {
71
+ created.push(ticketId);
72
+ continue;
73
+ }
74
+
75
+ errors.push(`${ticketId}: ${result.stderr || fallback.stderr}`);
76
+ }
77
+
78
+ const parts: string[] = [];
79
+ if (created.length > 0) parts.push(`Created: ${created.join(", ")}`);
80
+ if (skipped.length > 0) parts.push(`Existing: ${skipped.join(", ")}`);
81
+ if (errors.length > 0) parts.push(`Errors: ${errors.join("; ")}`);
82
+
83
+ return {
84
+ ticketId: "prepare-workspaces",
85
+ status: errors.length > 0 ? "partial" : "done",
86
+ summary: parts.join(". ") || "No workspaces to prepare.",
87
+ };
88
+ }
89
+
90
+ export async function snapshotChange(
91
+ workspacePath: string,
92
+ ticketId: string,
93
+ phase: string,
94
+ ): Promise<ReportOutput> {
95
+ const status = await jj(["status"], workspacePath);
96
+ if (!status.ok) {
97
+ return {
98
+ ticketId,
99
+ status: "blocked",
100
+ summary: `jj status failed: ${status.stderr}`,
101
+ };
102
+ }
103
+
104
+ const hasChanges = !status.stdout.includes("The working copy is clean");
105
+ const message = `${ticketId}: ${phase}`;
106
+
107
+ const describe = await jj(["describe", "-m", message], workspacePath);
108
+ if (!describe.ok) {
109
+ return {
110
+ ticketId,
111
+ status: "blocked",
112
+ summary: `jj describe failed: ${describe.stderr}`,
113
+ };
114
+ }
115
+
116
+ const newChange = await jj(["new"], workspacePath);
117
+ if (!newChange.ok) {
118
+ return {
119
+ ticketId,
120
+ status: "blocked",
121
+ summary: `jj new failed: ${newChange.stderr}`,
122
+ };
123
+ }
124
+
125
+ return {
126
+ ticketId,
127
+ status: "done",
128
+ summary: hasChanges
129
+ ? `Snapshotted: "${message}"`
130
+ : `Described (no file changes): "${message}"`,
131
+ };
132
+ }
133
+
134
+ export async function pushBookmark(
135
+ workspacePath: string,
136
+ bookmarkName: string,
137
+ ticketId: string,
138
+ ): Promise<ReportOutput> {
139
+ const targetRev = "@-";
140
+ const track = await jj(
141
+ ["bookmark", "track", bookmarkName, "--remote", "origin"],
142
+ workspacePath,
143
+ );
144
+ const trackSummary =
145
+ track.ok || track.stderr === ""
146
+ ? ""
147
+ : ` Tracking remote bookmark reported: ${track.stderr}`;
148
+
149
+ const targetCommit = await jj(
150
+ ["log", "-r", targetRev, "--no-graph", "-T", "commit_id"],
151
+ workspacePath,
152
+ );
153
+ if (!targetCommit.ok || !targetCommit.stdout) {
154
+ return {
155
+ ticketId,
156
+ status: "blocked",
157
+ summary: `Failed to resolve target revision for bookmark push: ${targetCommit.stderr}`,
158
+ };
159
+ }
160
+
161
+ const move = await jj(
162
+ ["bookmark", "set", bookmarkName, "-r", targetRev, "--allow-backwards"],
163
+ workspacePath,
164
+ );
165
+
166
+ if (!move.ok) {
167
+ const create = await jj(
168
+ ["bookmark", "create", "-r", targetRev, bookmarkName],
169
+ workspacePath,
170
+ );
171
+ if (!create.ok) {
172
+ return {
173
+ ticketId,
174
+ status: "blocked",
175
+ summary: `Failed to set bookmark '${bookmarkName}': ${create.stderr}`,
176
+ };
177
+ }
178
+ }
179
+
180
+ const push = await jj(
181
+ ["git", "push", "--bookmark", bookmarkName],
182
+ workspacePath,
183
+ );
184
+ if (!push.ok) {
185
+ return {
186
+ ticketId,
187
+ status: "blocked",
188
+ summary: `Bookmark set but push failed: ${push.stderr}${trackSummary}`,
189
+ };
190
+ }
191
+
192
+ const remote = await $`git ls-remote origin refs/heads/${bookmarkName}`
193
+ .cwd(workspacePath)
194
+ .nothrow()
195
+ .quiet();
196
+ const remoteCommit = remote.stdout.toString().trim().split(/\s+/)[0] ?? "";
197
+ if (remote.exitCode !== 0 || remoteCommit !== targetCommit.stdout) {
198
+ return {
199
+ ticketId,
200
+ status: "blocked",
201
+ summary:
202
+ `Bookmark push returned success but remote ${bookmarkName} is ${remoteCommit || "missing"} instead of ${targetCommit.stdout}.` +
203
+ trackSummary,
204
+ };
205
+ }
206
+
207
+ return {
208
+ ticketId,
209
+ status: "done",
210
+ summary: `Pushed bookmark '${bookmarkName}' to origin at ${targetCommit.stdout}.${trackSummary}`,
211
+ };
212
+ }
@@ -0,0 +1,313 @@
1
+ import https from "node:https";
2
+ import { readFileSync } from "node:fs";
3
+
4
+ const SERVICE_ACCOUNT_TOKEN = "/var/run/secrets/kubernetes.io/serviceaccount/token";
5
+ const SERVICE_ACCOUNT_CA = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt";
6
+
7
+ export type VerificationResult = {
8
+ passed: boolean;
9
+ jobName: string;
10
+ podName: string;
11
+ commands: string[];
12
+ logs: string;
13
+ summary: string;
14
+ };
15
+
16
+ type VerificationJobOptions = {
17
+ name: string;
18
+ image: string;
19
+ namespace: string;
20
+ serviceAccountName: string;
21
+ pvcName: string;
22
+ nodeName: string;
23
+ workspacePath: string;
24
+ commands: string[];
25
+ cleanupCommands?: string[];
26
+ labels?: Record<string, string>;
27
+ timeoutSeconds?: number;
28
+ };
29
+
30
+ type K8sObjectMeta = {
31
+ name: string;
32
+ namespace: string;
33
+ };
34
+
35
+ type PodList = {
36
+ items?: Array<{
37
+ metadata?: {
38
+ name?: string;
39
+ labels?: Record<string, string>;
40
+ };
41
+ }>;
42
+ };
43
+
44
+ type JobStatus = {
45
+ status?: {
46
+ succeeded?: number;
47
+ failed?: number;
48
+ };
49
+ };
50
+
51
+ function requiredEnv(name: string): string {
52
+ const value = process.env[name]?.trim();
53
+ if (!value) {
54
+ throw new Error(`Missing required Kubernetes environment variable ${name}.`);
55
+ }
56
+ return value;
57
+ }
58
+
59
+ function k8sRequest(
60
+ method: string,
61
+ path: string,
62
+ body?: string,
63
+ contentType = "application/json",
64
+ ): Promise<string> {
65
+ const token = readFileSync(SERVICE_ACCOUNT_TOKEN, "utf8").trim();
66
+ const ca = readFileSync(SERVICE_ACCOUNT_CA);
67
+ const host = requiredEnv("KUBERNETES_SERVICE_HOST");
68
+ const port = process.env.KUBERNETES_SERVICE_PORT_HTTPS?.trim() || "443";
69
+
70
+ return new Promise((resolve, reject) => {
71
+ const req = https.request(
72
+ {
73
+ host,
74
+ port,
75
+ method,
76
+ path,
77
+ ca,
78
+ headers: {
79
+ Authorization: `Bearer ${token}`,
80
+ Accept: "application/json",
81
+ ...(body
82
+ ? {
83
+ "Content-Type": contentType,
84
+ "Content-Length": Buffer.byteLength(body),
85
+ }
86
+ : {}),
87
+ },
88
+ },
89
+ (res) => {
90
+ let data = "";
91
+ res.setEncoding("utf8");
92
+ res.on("data", (chunk) => {
93
+ data += chunk;
94
+ });
95
+ res.on("end", () => {
96
+ const statusCode = res.statusCode ?? 500;
97
+ if (statusCode >= 200 && statusCode < 300) {
98
+ resolve(data);
99
+ return;
100
+ }
101
+ reject(
102
+ new Error(
103
+ `Kubernetes API ${method} ${path} failed with ${statusCode}: ${data}`,
104
+ ),
105
+ );
106
+ });
107
+ },
108
+ );
109
+
110
+ req.on("error", reject);
111
+ if (body) req.write(body);
112
+ req.end();
113
+ });
114
+ }
115
+
116
+ async function createJob(namespace: string, manifest: unknown): Promise<K8sObjectMeta> {
117
+ const response = await k8sRequest(
118
+ "POST",
119
+ `/apis/batch/v1/namespaces/${namespace}/jobs`,
120
+ JSON.stringify(manifest),
121
+ );
122
+ const parsed = JSON.parse(response) as { metadata?: K8sObjectMeta };
123
+ if (!parsed.metadata?.name || !parsed.metadata?.namespace) {
124
+ throw new Error("Kubernetes API create job response did not include metadata.");
125
+ }
126
+ return parsed.metadata;
127
+ }
128
+
129
+ async function getJob(namespace: string, jobName: string): Promise<JobStatus> {
130
+ const response = await k8sRequest(
131
+ "GET",
132
+ `/apis/batch/v1/namespaces/${namespace}/jobs/${jobName}`,
133
+ );
134
+ return JSON.parse(response) as JobStatus;
135
+ }
136
+
137
+ async function listPodsForJob(namespace: string, jobName: string): Promise<string[]> {
138
+ const response = await k8sRequest(
139
+ "GET",
140
+ `/api/v1/namespaces/${namespace}/pods?labelSelector=${encodeURIComponent(`job-name=${jobName}`)}`,
141
+ );
142
+ const parsed = JSON.parse(response) as PodList;
143
+ return (parsed.items ?? [])
144
+ .map((item) => item.metadata?.name?.trim() ?? "")
145
+ .filter((name) => name !== "");
146
+ }
147
+
148
+ async function getPodLogs(namespace: string, podName: string): Promise<string> {
149
+ return await k8sRequest(
150
+ "GET",
151
+ `/api/v1/namespaces/${namespace}/pods/${podName}/log?container=fabrik`,
152
+ undefined,
153
+ "text/plain",
154
+ );
155
+ }
156
+
157
+ async function deleteJob(namespace: string, jobName: string): Promise<void> {
158
+ await k8sRequest(
159
+ "DELETE",
160
+ `/apis/batch/v1/namespaces/${namespace}/jobs/${jobName}?propagationPolicy=Background`,
161
+ );
162
+ }
163
+
164
+ function sleep(ms: number): Promise<void> {
165
+ return new Promise((resolve) => setTimeout(resolve, ms));
166
+ }
167
+
168
+ function buildVerifierScript(
169
+ commands: readonly string[],
170
+ cleanupCommands: readonly string[] = [],
171
+ ): string {
172
+ const lines = ["set -euo pipefail"];
173
+ if (cleanupCommands.length > 0) {
174
+ lines.push("cleanup() {");
175
+ for (const command of cleanupCommands) {
176
+ lines.push(` ${command}`);
177
+ }
178
+ lines.push("}");
179
+ lines.push("trap cleanup EXIT");
180
+ }
181
+ lines.push(...commands);
182
+ return lines.join("\n");
183
+ }
184
+
185
+ export function buildVerificationJobManifest(options: VerificationJobOptions) {
186
+ return {
187
+ apiVersion: "batch/v1",
188
+ kind: "Job",
189
+ metadata: {
190
+ name: options.name,
191
+ namespace: options.namespace,
192
+ labels: {
193
+ "fabrik.sh/managed-by": "fabrik",
194
+ "fabrik.sh/phase": "verify",
195
+ "fabrik.sh/task": options.name,
196
+ ...(options.labels ?? {}),
197
+ },
198
+ },
199
+ spec: {
200
+ ttlSecondsAfterFinished: 3600,
201
+ backoffLimit: 0,
202
+ template: {
203
+ metadata: {
204
+ labels: {
205
+ "fabrik.sh/managed-by": "fabrik",
206
+ "fabrik.sh/phase": "verify",
207
+ "fabrik.sh/task": options.name,
208
+ ...(options.labels ?? {}),
209
+ },
210
+ },
211
+ spec: {
212
+ serviceAccountName: options.serviceAccountName,
213
+ restartPolicy: "Never",
214
+ nodeName: options.nodeName,
215
+ containers: [
216
+ {
217
+ name: "fabrik",
218
+ image: options.image,
219
+ imagePullPolicy: "IfNotPresent",
220
+ command: [
221
+ "sh",
222
+ "-lc",
223
+ buildVerifierScript(
224
+ options.commands,
225
+ options.cleanupCommands ?? [],
226
+ ),
227
+ ],
228
+ env: [
229
+ { name: "FABRIK_RUN_IMAGE", value: options.image },
230
+ { name: "KUBERNETES_NAMESPACE", value: options.namespace },
231
+ { name: "FABRIK_WORKSPACE_PVC", value: options.pvcName },
232
+ { name: "KUBERNETES_NODE_NAME", value: options.nodeName },
233
+ ],
234
+ workingDir: options.workspacePath,
235
+ volumeMounts: [
236
+ {
237
+ name: "workspace",
238
+ mountPath: "/workspace",
239
+ },
240
+ ],
241
+ },
242
+ ],
243
+ volumes: [
244
+ {
245
+ name: "workspace",
246
+ persistentVolumeClaim: {
247
+ claimName: options.pvcName,
248
+ },
249
+ },
250
+ ],
251
+ },
252
+ },
253
+ },
254
+ };
255
+ }
256
+
257
+ export async function runVerificationJob(
258
+ options: VerificationJobOptions,
259
+ ): Promise<VerificationResult> {
260
+ const timeoutMs = (options.timeoutSeconds ?? 900) * 1000;
261
+ const manifest = buildVerificationJobManifest(options);
262
+
263
+ const created = await createJob(options.namespace, manifest);
264
+ const startedAt = Date.now();
265
+ let podName = "";
266
+ let logs = "";
267
+
268
+ try {
269
+ for (;;) {
270
+ if (Date.now()-startedAt > timeoutMs) {
271
+ throw new Error(`Timed out waiting for verification job ${created.name}.`);
272
+ }
273
+
274
+ const job = await getJob(options.namespace, created.name);
275
+ const pods = await listPodsForJob(options.namespace, created.name);
276
+ if (pods.length > 0) {
277
+ podName = pods[0]!;
278
+ }
279
+
280
+ if ((job.status?.succeeded ?? 0) > 0) {
281
+ if (podName) {
282
+ logs = await getPodLogs(options.namespace, podName);
283
+ }
284
+ return {
285
+ passed: true,
286
+ jobName: created.name,
287
+ podName,
288
+ commands: [...options.commands],
289
+ logs,
290
+ summary: `Verification job ${created.name} succeeded.`,
291
+ };
292
+ }
293
+
294
+ if ((job.status?.failed ?? 0) > 0) {
295
+ if (podName) {
296
+ logs = await getPodLogs(options.namespace, podName);
297
+ }
298
+ return {
299
+ passed: false,
300
+ jobName: created.name,
301
+ podName,
302
+ commands: [...options.commands],
303
+ logs,
304
+ summary: `Verification job ${created.name} failed.`,
305
+ };
306
+ }
307
+
308
+ await sleep(2000);
309
+ }
310
+ } finally {
311
+ await deleteJob(options.namespace, created.name).catch(() => undefined);
312
+ }
313
+ }