@dtechvision/fabrik-runtime 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +182 -0
- package/package.json +40 -0
- package/src/codex-auth.ts +231 -0
- package/src/credential-pool.ts +266 -0
- package/src/index.ts +48 -0
- package/src/jj-shell.ts +212 -0
- package/src/k8s-jobs.ts +313 -0
package/README.md
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
# @dtechvision/fabrik-runtime
|
|
2
|
+
|
|
3
|
+
Shared TypeScript utilities for Fabrik workflow pods.
|
|
4
|
+
|
|
5
|
+
- **Credential pool** — read from mounted `/etc/fabrik/credentials`, rotate on failure, notify operators
|
|
6
|
+
- **Codex auth rotation** — rotate among `auth.json` / `*.auth.json` credentials for Codex-backed workflows
|
|
7
|
+
- **K8s jobs** — dispatch child verification jobs from a running workflow
|
|
8
|
+
- **JJ shell** — deterministic JJ/Git snapshot, bookmark push, workspace prep
|
|
9
|
+
|
|
10
|
+
## Import Surface
|
|
11
|
+
|
|
12
|
+
Workflows should import from `@dtechvision/fabrik-runtime/...`.
|
|
13
|
+
|
|
14
|
+
- `@dtechvision/fabrik-runtime/credential-pool`
|
|
15
|
+
- `@dtechvision/fabrik-runtime/codex-auth`
|
|
16
|
+
- `@dtechvision/fabrik-runtime/jj-shell`
|
|
17
|
+
- `@dtechvision/fabrik-runtime/k8s-jobs`
|
|
18
|
+
|
|
19
|
+
For in-cluster Fabrik runs, the Smithers runtime image ships this package in its `node_modules`.
|
|
20
|
+
For local workflow development in another repo, add the package as a dependency from a release or local path.
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
Install from npm:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
bun add @dtechvision/fabrik-runtime
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
or:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
npm install @dtechvision/fabrik-runtime
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Smithers workflows also need their normal workflow dependencies in the consuming repo:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
bun add smithers-orchestrator zod
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
or:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
npm install smithers-orchestrator zod
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Package releases follow the same `v*` tag version as the Fabrik CLI release flow.
|
|
49
|
+
|
|
50
|
+
## Smithers Integration
|
|
51
|
+
|
|
52
|
+
Use the package from ordinary Smithers workflows:
|
|
53
|
+
|
|
54
|
+
```ts
|
|
55
|
+
/** @jsxImportSource smithers-orchestrator */
|
|
56
|
+
import { createSmithers, Task, Workflow } from "smithers-orchestrator";
|
|
57
|
+
import { z } from "zod";
|
|
58
|
+
import { withCodexAuthPoolEnv } from "@dtechvision/fabrik-runtime/codex-auth";
|
|
59
|
+
import { prepareWorkspaces } from "@dtechvision/fabrik-runtime/jj-shell";
|
|
60
|
+
|
|
61
|
+
const { smithers, outputs } = createSmithers(
|
|
62
|
+
{
|
|
63
|
+
report: z.object({
|
|
64
|
+
codexHomeSet: z.boolean(),
|
|
65
|
+
jjHelpersLoaded: z.boolean(),
|
|
66
|
+
}),
|
|
67
|
+
},
|
|
68
|
+
{ dbPath: process.env.SMITHERS_DB_PATH ?? ".smithers/runtime-check.db" },
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
export default smithers(() => (
|
|
72
|
+
<Workflow name="runtime-package-check">
|
|
73
|
+
<Task id="verify" output={outputs.report}>
|
|
74
|
+
{async () => {
|
|
75
|
+
const env = withCodexAuthPoolEnv({});
|
|
76
|
+
return {
|
|
77
|
+
codexHomeSet: typeof env.CODEX_HOME === "string" && env.CODEX_HOME.length > 0,
|
|
78
|
+
jjHelpersLoaded: typeof prepareWorkspaces === "function",
|
|
79
|
+
};
|
|
80
|
+
}}
|
|
81
|
+
</Task>
|
|
82
|
+
</Workflow>
|
|
83
|
+
));
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Run it locally with Smithers from a repo that has installed:
|
|
87
|
+
|
|
88
|
+
- `@dtechvision/fabrik-runtime`
|
|
89
|
+
- `smithers-orchestrator`
|
|
90
|
+
- `zod`
|
|
91
|
+
|
|
92
|
+
Then:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
bunx smithers run path/to/workflow.tsx --run-id runtime-package-check
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
The workflow file should live in the consuming project tree so normal Node/Bun package resolution can find the installed dependencies.
|
|
99
|
+
|
|
100
|
+
## Credentials
|
|
101
|
+
|
|
102
|
+
Operators manage `fabrik-credentials` in `fabrik-system` via kubectl. The CLI mirrors it into the run namespace at dispatch time. The secret is directory-mounted (no subPath) at `/etc/fabrik/credentials/` so running pods observe file replacements.
|
|
103
|
+
|
|
104
|
+
```ts
|
|
105
|
+
import { injectCredentialEnv } from "@dtechvision/fabrik-runtime/credential-pool";
|
|
106
|
+
|
|
107
|
+
// Reads /etc/fabrik/credentials/ANTHROPIC_API_KEY → process.env.ANTHROPIC_API_KEY
|
|
108
|
+
injectCredentialEnv("ANTHROPIC_API_KEY");
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
For file-pool rotation (e.g. multiple Codex auth files):
|
|
112
|
+
|
|
113
|
+
```ts
|
|
114
|
+
import { CredentialFilePool } from "@dtechvision/fabrik-runtime/credential-pool";
|
|
115
|
+
|
|
116
|
+
const pool = new CredentialFilePool({
|
|
117
|
+
prefix: "codex-auth",
|
|
118
|
+
extension: ".json",
|
|
119
|
+
activeDir: "/tmp/codex-active",
|
|
120
|
+
activeFilename: "auth.json",
|
|
121
|
+
agent: "codex",
|
|
122
|
+
});
|
|
123
|
+
pool.init();
|
|
124
|
+
|
|
125
|
+
// On auth failure:
|
|
126
|
+
const rotated = await pool.handleError(err);
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
For Codex-specific rotation, use the higher-level helper:
|
|
130
|
+
|
|
131
|
+
```ts
|
|
132
|
+
import { createCodexAgentWithPool } from "@dtechvision/fabrik-runtime/codex-auth";
|
|
133
|
+
|
|
134
|
+
const codex = createCodexAgentWithPool({
|
|
135
|
+
model: "gpt-5",
|
|
136
|
+
cwd: process.cwd(),
|
|
137
|
+
env: {},
|
|
138
|
+
});
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Local Verification
|
|
142
|
+
|
|
143
|
+
Runtime package tests:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
cd src/fabrik-runtime
|
|
147
|
+
bun test ./src
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Repo-wide CLI and workflow verification:
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
make verify-cli
|
|
154
|
+
make verify-cli-k3d
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Focused runtime-package k3d import verification:
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
cd src/fabrik-cli
|
|
161
|
+
FABRIK_K3D_E2E=1 FABRIK_K3D_CLUSTER=dev-single \
|
|
162
|
+
go test ./internal/run -run TestK3dWorkflowRuntimePackageImports -timeout 10m -v
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
The complex sample in [examples/complex/README.md](/Users/samuel/git/local-isolated-ralph/examples/complex/README.md) shows how workflow code consumes the package surface in practice.
|
|
166
|
+
|
|
167
|
+
Local Smithers CLI verification:
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
bunx smithers run path/to/workflow.tsx --run-id runtime-package-check
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
The expected result is a successful run whose output reports:
|
|
174
|
+
|
|
175
|
+
- `codexHomeSet: true`
|
|
176
|
+
- `jjHelpersLoaded: true`
|
|
177
|
+
|
|
178
|
+
## Precedence
|
|
179
|
+
|
|
180
|
+
1. Fabrik runtime metadata (`SMITHERS_*`, `FABRIK_*`, `KUBERNETES_*`)
|
|
181
|
+
2. Project env (`fabrik-env-<project>-<env>`) via `envFrom`
|
|
182
|
+
3. Shared credentials (`fabrik-credentials`) via file mount
|
package/package.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@dtechvision/fabrik-runtime",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"main": "src/index.ts",
|
|
6
|
+
"types": "src/index.ts",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": "./src/index.ts",
|
|
9
|
+
"./credential-pool": "./src/credential-pool.ts",
|
|
10
|
+
"./codex-auth": "./src/codex-auth.ts",
|
|
11
|
+
"./k8s-jobs": "./src/k8s-jobs.ts",
|
|
12
|
+
"./jj-shell": "./src/jj-shell.ts"
|
|
13
|
+
},
|
|
14
|
+
"files": [
|
|
15
|
+
"src/index.ts",
|
|
16
|
+
"src/credential-pool.ts",
|
|
17
|
+
"src/codex-auth.ts",
|
|
18
|
+
"src/k8s-jobs.ts",
|
|
19
|
+
"src/jj-shell.ts",
|
|
20
|
+
"README.md"
|
|
21
|
+
],
|
|
22
|
+
"publishConfig": {
|
|
23
|
+
"access": "public"
|
|
24
|
+
},
|
|
25
|
+
"repository": {
|
|
26
|
+
"type": "git",
|
|
27
|
+
"url": "git+https://github.com/SamuelLHuber/local-isolated-ralph.git",
|
|
28
|
+
"directory": "src/fabrik-runtime"
|
|
29
|
+
},
|
|
30
|
+
"license": "MIT",
|
|
31
|
+
"scripts": {
|
|
32
|
+
"test": "bun test ./src"
|
|
33
|
+
},
|
|
34
|
+
"dependencies": {
|
|
35
|
+
"smithers-orchestrator": "0.9.1"
|
|
36
|
+
},
|
|
37
|
+
"devDependencies": {
|
|
38
|
+
"bun-types": "1.2.12"
|
|
39
|
+
}
|
|
40
|
+
}
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { basename, resolve } from "node:path";
|
|
4
|
+
import { CodexAgent } from "smithers-orchestrator";
|
|
5
|
+
import {
|
|
6
|
+
classifyFailure,
|
|
7
|
+
getCredentialMountPath,
|
|
8
|
+
type FailureKind,
|
|
9
|
+
} from "./credential-pool";
|
|
10
|
+
|
|
11
|
+
const DEFAULT_CODEX_DIR = resolve(process.env.HOME ?? "", ".codex");
|
|
12
|
+
|
|
13
|
+
function getCodexAuthSourceDir(): string {
|
|
14
|
+
const sourceDir =
|
|
15
|
+
process.env.CODEX_AUTH_SOURCE_DIR ??
|
|
16
|
+
process.env.FABRIK_SHARED_CREDENTIALS_DIR ??
|
|
17
|
+
(existsSync(getCredentialMountPath()) ? getCredentialMountPath() : DEFAULT_CODEX_DIR);
|
|
18
|
+
return resolve(sourceDir);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export const CODEX_AUTH_HOME = resolve(
|
|
22
|
+
process.env.CODEX_AUTH_HOME ?? resolve(tmpdir(), "codex-auth-pool"),
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
const NOTIFY_WEBHOOK_URL = process.env.CODEX_AUTH_NOTIFY_WEBHOOK_URL?.trim() ?? "";
|
|
26
|
+
const NOTIFY_CLUSTER = process.env.KUBERNETES_NAMESPACE?.trim() ?? "";
|
|
27
|
+
const NOTIFY_RUN_ID = process.env.SMITHERS_RUN_ID?.trim() ?? "";
|
|
28
|
+
|
|
29
|
+
const AUTH_ROTATE_PATTERN =
|
|
30
|
+
/no last agent message|usage limit|quota|rate limit|insufficient (?:credits|balance|quota)|payment required|billing|exceeded.*(quota|limit)|not signed in|please run 'codex login'|unauthorized|authentication required|authentication failed|forbidden|invalid (?:api key|token|credentials)|expired (?:token|credentials)/i;
|
|
31
|
+
const AUTH_REFRESH_REUSED_PATTERN =
|
|
32
|
+
/refresh_token_reused|refresh token has already been used|could not be refreshed because your refresh token was already used/i;
|
|
33
|
+
|
|
34
|
+
const listAuthFiles = (): string[] => {
|
|
35
|
+
const sourceDir = getCodexAuthSourceDir();
|
|
36
|
+
if (!existsSync(sourceDir)) return [];
|
|
37
|
+
return readdirSync(sourceDir)
|
|
38
|
+
.filter((name) => name.endsWith(".auth.json") || name === "auth.json")
|
|
39
|
+
.map((name) => resolve(sourceDir, name))
|
|
40
|
+
.sort();
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
const ensureCodexHome = () => {
|
|
44
|
+
if (!existsSync(CODEX_AUTH_HOME)) {
|
|
45
|
+
mkdirSync(CODEX_AUTH_HOME, { recursive: true });
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
let authPool = listAuthFiles();
|
|
50
|
+
let authIndex = 0;
|
|
51
|
+
let activeAuth = "";
|
|
52
|
+
const authFailures = new Map<string, FailureKind>();
|
|
53
|
+
|
|
54
|
+
export function resetCodexAuthStateForTests(): void {
|
|
55
|
+
authPool = [];
|
|
56
|
+
authIndex = 0;
|
|
57
|
+
activeAuth = "";
|
|
58
|
+
authFailures.clear();
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const setActiveAuth = (authPath: string, reason: string) => {
|
|
62
|
+
ensureCodexHome();
|
|
63
|
+
const authContents = readFileSync(authPath, "utf8");
|
|
64
|
+
writeFileSync(resolve(CODEX_AUTH_HOME, "auth.json"), authContents, "utf8");
|
|
65
|
+
const previous = activeAuth ? ` from ${basename(activeAuth)}` : "";
|
|
66
|
+
activeAuth = authPath;
|
|
67
|
+
console.error(
|
|
68
|
+
`[fabrik-runtime] codex auth rotation${previous} -> ${basename(authPath)} (${reason})`,
|
|
69
|
+
);
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
const initAuthPool = () => {
|
|
73
|
+
ensureCodexHome();
|
|
74
|
+
authPool = listAuthFiles();
|
|
75
|
+
if (authPool.length === 0 || activeAuth) return;
|
|
76
|
+
const defaultAuth = resolve(getCodexAuthSourceDir(), "auth.json");
|
|
77
|
+
if (existsSync(defaultAuth)) {
|
|
78
|
+
setActiveAuth(defaultAuth, "initial");
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
setActiveAuth(authPool[0]!, "initial");
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
const logAuthSummary = () => {
|
|
85
|
+
const total = authPool.length;
|
|
86
|
+
const failed = [...authFailures.entries()].map(
|
|
87
|
+
([path, status]) => `${basename(path)}:${status}`,
|
|
88
|
+
);
|
|
89
|
+
const failedCount = authFailures.size;
|
|
90
|
+
const remaining = Math.max(total - failedCount, 0);
|
|
91
|
+
const active = activeAuth ? basename(activeAuth) : "none";
|
|
92
|
+
console.error(
|
|
93
|
+
`[fabrik-runtime] codex auth pool summary: total=${total} failed=${failedCount} remaining=${remaining} active=${active}`,
|
|
94
|
+
);
|
|
95
|
+
if (failed.length > 0) {
|
|
96
|
+
console.error(`[fabrik-runtime] failed auths: ${failed.join(", ")}`);
|
|
97
|
+
}
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
const rotateAuth = (reason: string): boolean => {
|
|
101
|
+
authPool = listAuthFiles();
|
|
102
|
+
if (authPool.length === 0) return false;
|
|
103
|
+
for (let i = 0; i < authPool.length; i += 1) {
|
|
104
|
+
const next = authPool[authIndex % authPool.length];
|
|
105
|
+
authIndex += 1;
|
|
106
|
+
if (next && next !== activeAuth && !authFailures.has(next)) {
|
|
107
|
+
setActiveAuth(next, reason);
|
|
108
|
+
logAuthSummary();
|
|
109
|
+
return true;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
console.error("[fabrik-runtime] no codex auth left to rotate to");
|
|
113
|
+
logAuthSummary();
|
|
114
|
+
return false;
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
export const withCodexAuthPoolEnv = (env: Record<string, string>) => ({
|
|
118
|
+
...env,
|
|
119
|
+
CODEX_HOME: CODEX_AUTH_HOME,
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
export type AuthFailureKind = FailureKind;
|
|
123
|
+
|
|
124
|
+
export type AuthFailureEvent = {
|
|
125
|
+
authPath: string;
|
|
126
|
+
authName: string;
|
|
127
|
+
reason: string;
|
|
128
|
+
kind: AuthFailureKind;
|
|
129
|
+
message: string;
|
|
130
|
+
clusterNamespace?: string;
|
|
131
|
+
runId?: string;
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
export type RotatingCodexAgentOptions = {
|
|
135
|
+
onAuthFailure?: (event: AuthFailureEvent) => void | Promise<void>;
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
const notifyAuthFailure = async (
|
|
139
|
+
event: AuthFailureEvent,
|
|
140
|
+
onAuthFailure?: RotatingCodexAgentOptions["onAuthFailure"],
|
|
141
|
+
) => {
|
|
142
|
+
if (onAuthFailure) {
|
|
143
|
+
await onAuthFailure(event);
|
|
144
|
+
}
|
|
145
|
+
if (!NOTIFY_WEBHOOK_URL) return;
|
|
146
|
+
try {
|
|
147
|
+
const response = await fetch(NOTIFY_WEBHOOK_URL, {
|
|
148
|
+
method: "POST",
|
|
149
|
+
headers: { "content-type": "application/json" },
|
|
150
|
+
body: JSON.stringify(event),
|
|
151
|
+
});
|
|
152
|
+
if (!response.ok) {
|
|
153
|
+
console.error(
|
|
154
|
+
`[fabrik-runtime] codex auth notification failed: webhook status ${response.status}`,
|
|
155
|
+
);
|
|
156
|
+
}
|
|
157
|
+
} catch (err) {
|
|
158
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
159
|
+
console.error(`[fabrik-runtime] codex auth notification failed: ${message}`);
|
|
160
|
+
}
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
export const createCodexAgentWithPool = (
|
|
164
|
+
opts: ConstructorParameters<typeof CodexAgent>[0],
|
|
165
|
+
rotationOpts: RotatingCodexAgentOptions = {},
|
|
166
|
+
) =>
|
|
167
|
+
new RotatingCodexAgent(
|
|
168
|
+
new CodexAgent({
|
|
169
|
+
...opts,
|
|
170
|
+
env: withCodexAuthPoolEnv(opts.env ?? {}),
|
|
171
|
+
}),
|
|
172
|
+
rotationOpts,
|
|
173
|
+
);
|
|
174
|
+
|
|
175
|
+
export class RotatingCodexAgent {
|
|
176
|
+
private readonly inner: CodexAgent;
|
|
177
|
+
private readonly onAuthFailure?: RotatingCodexAgentOptions["onAuthFailure"];
|
|
178
|
+
|
|
179
|
+
constructor(inner: CodexAgent, opts: RotatingCodexAgentOptions = {}) {
|
|
180
|
+
this.inner = inner;
|
|
181
|
+
this.onAuthFailure = opts.onAuthFailure;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
get id() {
|
|
185
|
+
return this.inner.id;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
get tools() {
|
|
189
|
+
return this.inner.tools;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
async generate(args: Parameters<CodexAgent["generate"]>[0]) {
|
|
193
|
+
initAuthPool();
|
|
194
|
+
const attempts = Math.max(authPool.length, 1);
|
|
195
|
+
let lastError: unknown = null;
|
|
196
|
+
for (let i = 0; i < attempts; i += 1) {
|
|
197
|
+
try {
|
|
198
|
+
return await this.inner.generate(args);
|
|
199
|
+
} catch (err) {
|
|
200
|
+
lastError = err;
|
|
201
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
202
|
+
if (!AUTH_ROTATE_PATTERN.test(message)) {
|
|
203
|
+
throw err;
|
|
204
|
+
}
|
|
205
|
+
if (activeAuth) {
|
|
206
|
+
const kind = classifyFailure(message);
|
|
207
|
+
authFailures.set(activeAuth, kind);
|
|
208
|
+
if (AUTH_REFRESH_REUSED_PATTERN.test(message)) {
|
|
209
|
+
console.error("[fabrik-runtime] codex refresh token reused; re-auth required");
|
|
210
|
+
}
|
|
211
|
+
await notifyAuthFailure(
|
|
212
|
+
{
|
|
213
|
+
authPath: activeAuth,
|
|
214
|
+
authName: basename(activeAuth),
|
|
215
|
+
reason: "codex generate failed and rotation was requested",
|
|
216
|
+
kind,
|
|
217
|
+
message,
|
|
218
|
+
clusterNamespace: NOTIFY_CLUSTER || undefined,
|
|
219
|
+
runId: NOTIFY_RUN_ID || undefined,
|
|
220
|
+
},
|
|
221
|
+
this.onAuthFailure,
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
if (!rotateAuth("codex auth / usage failure")) {
|
|
225
|
+
break;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
throw lastError ?? new Error("Codex auth pool exhausted");
|
|
230
|
+
}
|
|
231
|
+
}
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generic credential pool for Fabrik workflow pods.
|
|
3
|
+
*
|
|
4
|
+
* Credentials are managed by operators via kubectl and mounted into pods
|
|
5
|
+
* at /etc/fabrik/credentials as a Kubernetes Secret directory mount.
|
|
6
|
+
* This module reads from that mount, provides pool rotation for agents
|
|
7
|
+
* that support multiple credential files, and emits structured failure
|
|
8
|
+
* notifications without exposing secret contents.
|
|
9
|
+
*
|
|
10
|
+
* Architecture:
|
|
11
|
+
* - Operators create/update `fabrik-credentials` secret in `fabrik-system`
|
|
12
|
+
* via kubectl (e.g. `kubectl create secret generic fabrik-credentials
|
|
13
|
+
* --from-file=ANTHROPIC_API_KEY=./key.txt --from-literal=OPENAI_API_KEY=sk-...`)
|
|
14
|
+
* - Fabrik CLI mirrors the secret into the run namespace at dispatch time
|
|
15
|
+
* - The secret is directory-mounted (no subPath) at CREDENTIAL_MOUNT_PATH
|
|
16
|
+
* so running pods observe file replacements for rotation
|
|
17
|
+
* - This module reads credential files from that mount directory
|
|
18
|
+
*
|
|
19
|
+
* Supported credential layouts:
|
|
20
|
+
* - Flat env-var keys: /etc/fabrik/credentials/ANTHROPIC_API_KEY (file contains the value)
|
|
21
|
+
* - Codex auth pool: /etc/fabrik/credentials/codex-auth.json,
|
|
22
|
+
* /etc/fabrik/credentials/codex-auth-2.json, etc.
|
|
23
|
+
* - Claude Code: /etc/fabrik/credentials/ANTHROPIC_API_KEY
|
|
24
|
+
* - Pi: /etc/fabrik/credentials/FIREWORKS_API_KEY or provider config files
|
|
25
|
+
*/
|
|
26
|
+
import { existsSync, readFileSync, readdirSync, writeFileSync, mkdirSync } from "node:fs";
|
|
27
|
+
import { basename, resolve } from "node:path";
|
|
28
|
+
|
|
29
|
+
/** Default mount path for the fabrik-credentials directory. */
|
|
30
|
+
export function getCredentialMountPath(): string {
|
|
31
|
+
return process.env.FABRIK_CREDENTIAL_PATH ?? "/etc/fabrik/credentials";
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/** @deprecated Use getCredentialMountPath() for dynamic resolution. */
|
|
35
|
+
export const CREDENTIAL_MOUNT_PATH = "/etc/fabrik/credentials";
|
|
36
|
+
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
// Failure classification
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
export type FailureKind =
|
|
42
|
+
| "refresh_token_reused"
|
|
43
|
+
| "usage_limit"
|
|
44
|
+
| "auth_invalid"
|
|
45
|
+
| "unknown";
|
|
46
|
+
|
|
47
|
+
export type FailureEvent = {
|
|
48
|
+
credentialName: string;
|
|
49
|
+
kind: FailureKind;
|
|
50
|
+
message: string;
|
|
51
|
+
agent: string;
|
|
52
|
+
namespace?: string;
|
|
53
|
+
runId?: string;
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
const REFRESH_REUSED =
|
|
57
|
+
/refresh_token_reused|refresh token has already been used|could not be refreshed because your refresh token was already used/i;
|
|
58
|
+
const USAGE_LIMIT =
|
|
59
|
+
/no last agent message|usage limit|quota|rate limit|insufficient (?:credits|balance|quota)|payment required|billing|exceeded.*(quota|limit)/i;
|
|
60
|
+
const AUTH_INVALID =
|
|
61
|
+
/not signed in|please run.*login|unauthorized|authentication required|authentication failed|forbidden|invalid (?:api key|token|credentials)|expired (?:token|credentials)|Not logged in/i;
|
|
62
|
+
|
|
63
|
+
export function classifyFailure(message: string): FailureKind {
|
|
64
|
+
if (REFRESH_REUSED.test(message)) return "refresh_token_reused";
|
|
65
|
+
if (AUTH_INVALID.test(message)) return "auth_invalid";
|
|
66
|
+
if (USAGE_LIMIT.test(message)) return "usage_limit";
|
|
67
|
+
return "unknown";
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/** Returns true if the error message indicates an auth/credential problem
|
|
71
|
+
* that credential rotation might fix. */
|
|
72
|
+
export function isRotatableFailure(message: string): boolean {
|
|
73
|
+
return classifyFailure(message) !== "unknown";
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
// Notification
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
const NOTIFY_WEBHOOK = process.env.FABRIK_CREDENTIAL_NOTIFY_WEBHOOK?.trim() ?? "";
|
|
81
|
+
|
|
82
|
+
export async function notifyFailure(event: FailureEvent): Promise<void> {
|
|
83
|
+
console.error(
|
|
84
|
+
`[fabrik-runtime] credential failure: ${event.credentialName} kind=${event.kind} agent=${event.agent}`,
|
|
85
|
+
);
|
|
86
|
+
if (!NOTIFY_WEBHOOK) return;
|
|
87
|
+
try {
|
|
88
|
+
const resp = await fetch(NOTIFY_WEBHOOK, {
|
|
89
|
+
method: "POST",
|
|
90
|
+
headers: { "content-type": "application/json" },
|
|
91
|
+
body: JSON.stringify(event),
|
|
92
|
+
});
|
|
93
|
+
if (!resp.ok) {
|
|
94
|
+
console.error(`[fabrik-runtime] notification webhook returned ${resp.status}`);
|
|
95
|
+
}
|
|
96
|
+
} catch (err) {
|
|
97
|
+
console.error(`[fabrik-runtime] notification failed: ${err instanceof Error ? err.message : err}`);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// ---------------------------------------------------------------------------
|
|
102
|
+
// Credential reading from mounted directory
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
/** Read a single credential value from the mounted directory. */
|
|
106
|
+
export function readCredential(name: string): string | null {
|
|
107
|
+
const mountPath = getCredentialMountPath();
|
|
108
|
+
const path = resolve(mountPath, name);
|
|
109
|
+
if (!existsSync(path)) return null;
|
|
110
|
+
return readFileSync(path, "utf8").trim();
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/** List all credential file names in the mounted directory. */
|
|
114
|
+
export function listCredentials(): string[] {
|
|
115
|
+
const mountPath = getCredentialMountPath();
|
|
116
|
+
if (!existsSync(mountPath)) return [];
|
|
117
|
+
return readdirSync(mountPath)
|
|
118
|
+
.filter((name) => !name.startsWith(".") && name !== "..timestamp_of_last_update")
|
|
119
|
+
.sort();
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/** Read all credentials as a key→value map. */
|
|
123
|
+
export function readAllCredentials(): Record<string, string> {
|
|
124
|
+
const result: Record<string, string> = {};
|
|
125
|
+
for (const name of listCredentials()) {
|
|
126
|
+
const value = readCredential(name);
|
|
127
|
+
if (value !== null) result[name] = value;
|
|
128
|
+
}
|
|
129
|
+
return result;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// ---------------------------------------------------------------------------
|
|
133
|
+
// File pool rotation (for agents that use auth files like Codex)
|
|
134
|
+
// ---------------------------------------------------------------------------
|
|
135
|
+
|
|
136
|
+
export type PoolOptions = {
|
|
137
|
+
/** Glob pattern to match pool files, e.g. "codex-auth" matches codex-auth*.json */
|
|
138
|
+
prefix: string;
|
|
139
|
+
/** Extension to match, e.g. ".json" */
|
|
140
|
+
extension?: string;
|
|
141
|
+
/** Directory to write the active credential file to */
|
|
142
|
+
activeDir: string;
|
|
143
|
+
/** Filename for the active credential, e.g. "auth.json" */
|
|
144
|
+
activeFilename: string;
|
|
145
|
+
/** Agent name for failure events */
|
|
146
|
+
agent: string;
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
export class CredentialFilePool {
|
|
150
|
+
private pool: string[] = [];
|
|
151
|
+
private index = 0;
|
|
152
|
+
private active = "";
|
|
153
|
+
private failures = new Map<string, FailureKind>();
|
|
154
|
+
private readonly opts: PoolOptions;
|
|
155
|
+
|
|
156
|
+
constructor(opts: PoolOptions) {
|
|
157
|
+
this.opts = opts;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/** Scan the credential mount for pool files and activate the first one. */
|
|
161
|
+
init(): void {
|
|
162
|
+
this.pool = this.scanPool();
|
|
163
|
+
if (this.pool.length === 0) return;
|
|
164
|
+
if (this.active && this.pool.includes(this.active)) return;
|
|
165
|
+
this.activate(this.pool[0]!, "initial");
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/** Number of available (non-failed) credentials. */
|
|
169
|
+
get available(): number {
|
|
170
|
+
return this.pool.filter((p) => !this.failures.has(p)).length;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
get activeName(): string {
|
|
174
|
+
return this.active ? basename(this.active) : "";
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/** Try to rotate to the next unfailed credential. Returns false if exhausted. */
|
|
178
|
+
rotate(reason: string): boolean {
|
|
179
|
+
this.pool = this.scanPool();
|
|
180
|
+
if (this.pool.length === 0) return false;
|
|
181
|
+
for (let i = 0; i < this.pool.length; i++) {
|
|
182
|
+
const next = this.pool[this.index % this.pool.length]!;
|
|
183
|
+
this.index++;
|
|
184
|
+
if (next !== this.active && !this.failures.has(next)) {
|
|
185
|
+
this.activate(next, reason);
|
|
186
|
+
return true;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
console.error(`[fabrik-runtime] ${this.opts.agent} credential pool exhausted`);
|
|
190
|
+
return false;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/** Mark the current credential as failed and optionally notify. */
|
|
194
|
+
async markFailed(message: string): Promise<void> {
|
|
195
|
+
if (!this.active) return;
|
|
196
|
+
const kind = classifyFailure(message);
|
|
197
|
+
this.failures.set(this.active, kind);
|
|
198
|
+
await notifyFailure({
|
|
199
|
+
credentialName: basename(this.active),
|
|
200
|
+
kind,
|
|
201
|
+
message,
|
|
202
|
+
agent: this.opts.agent,
|
|
203
|
+
namespace: process.env.KUBERNETES_NAMESPACE?.trim(),
|
|
204
|
+
runId: process.env.SMITHERS_RUN_ID?.trim(),
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/** Handle an agent error: mark failed, try rotate, throw if exhausted. */
|
|
209
|
+
async handleError(err: unknown): Promise<boolean> {
|
|
210
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
211
|
+
if (!isRotatableFailure(message)) return false;
|
|
212
|
+
await this.markFailed(message);
|
|
213
|
+
return this.rotate("credential failure");
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
private scanPool(): string[] {
|
|
217
|
+
const mountPath = getCredentialMountPath();
|
|
218
|
+
if (!existsSync(mountPath)) return [];
|
|
219
|
+
const ext = this.opts.extension ?? ".json";
|
|
220
|
+
return readdirSync(mountPath)
|
|
221
|
+
.filter((name) => name.startsWith(this.opts.prefix) && name.endsWith(ext))
|
|
222
|
+
.map((name) => resolve(mountPath, name))
|
|
223
|
+
.sort();
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
private activate(path: string, reason: string): void {
|
|
227
|
+
mkdirSync(this.opts.activeDir, { recursive: true });
|
|
228
|
+
const contents = readFileSync(path, "utf8");
|
|
229
|
+
writeFileSync(resolve(this.opts.activeDir, this.opts.activeFilename), contents, "utf8");
|
|
230
|
+
const prev = this.active ? ` from ${basename(this.active)}` : "";
|
|
231
|
+
this.active = path;
|
|
232
|
+
console.error(
|
|
233
|
+
`[fabrik-runtime] ${this.opts.agent} credential${prev} -> ${basename(path)} (${reason})`,
|
|
234
|
+
);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// ---------------------------------------------------------------------------
|
|
239
|
+
// Env-var credential helpers for agents that use env vars
|
|
240
|
+
// ---------------------------------------------------------------------------
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Read a credential from the mounted directory and set it as an env var.
|
|
244
|
+
* This is the standard pattern for agents that use env vars for auth
|
|
245
|
+
* (Claude Code ANTHROPIC_API_KEY, Pi FIREWORKS_API_KEY, etc.)
|
|
246
|
+
*/
|
|
247
|
+
export function injectCredentialEnv(credentialName: string, envVar?: string): boolean {
|
|
248
|
+
const value = readCredential(credentialName);
|
|
249
|
+
if (value === null) return false;
|
|
250
|
+
process.env[envVar ?? credentialName] = value;
|
|
251
|
+
return true;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Inject all credentials from the mounted directory as env vars.
|
|
256
|
+
* File names become env var names, file contents become values.
|
|
257
|
+
*/
|
|
258
|
+
export function injectAllCredentialEnvs(): string[] {
|
|
259
|
+
const injected: string[] = [];
|
|
260
|
+
for (const name of listCredentials()) {
|
|
261
|
+
if (injectCredentialEnv(name)) {
|
|
262
|
+
injected.push(name);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
return injected;
|
|
266
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* fabrik-runtime — shared TypeScript utilities for Fabrik workflow pods.
|
|
3
|
+
*
|
|
4
|
+
* This package provides:
|
|
5
|
+
* - Credential pool management (read from mounted K8s secrets, rotate, notify)
|
|
6
|
+
* - K8s job helpers (dispatch child verification jobs)
|
|
7
|
+
* - Deterministic JJ/Git shell operations
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export {
|
|
11
|
+
CREDENTIAL_MOUNT_PATH,
|
|
12
|
+
getCredentialMountPath,
|
|
13
|
+
classifyFailure,
|
|
14
|
+
isRotatableFailure,
|
|
15
|
+
notifyFailure,
|
|
16
|
+
readCredential,
|
|
17
|
+
listCredentials,
|
|
18
|
+
readAllCredentials,
|
|
19
|
+
injectCredentialEnv,
|
|
20
|
+
injectAllCredentialEnvs,
|
|
21
|
+
CredentialFilePool,
|
|
22
|
+
type FailureKind,
|
|
23
|
+
type FailureEvent,
|
|
24
|
+
type PoolOptions,
|
|
25
|
+
} from "./credential-pool";
|
|
26
|
+
|
|
27
|
+
export {
|
|
28
|
+
CODEX_AUTH_HOME,
|
|
29
|
+
withCodexAuthPoolEnv,
|
|
30
|
+
createCodexAgentWithPool,
|
|
31
|
+
RotatingCodexAgent,
|
|
32
|
+
type AuthFailureKind,
|
|
33
|
+
type AuthFailureEvent,
|
|
34
|
+
type RotatingCodexAgentOptions,
|
|
35
|
+
} from "./codex-auth";
|
|
36
|
+
|
|
37
|
+
export {
|
|
38
|
+
runVerificationJob,
|
|
39
|
+
buildVerificationJobManifest,
|
|
40
|
+
type VerificationResult,
|
|
41
|
+
} from "./k8s-jobs";
|
|
42
|
+
|
|
43
|
+
export {
|
|
44
|
+
prepareWorkspaces,
|
|
45
|
+
snapshotChange,
|
|
46
|
+
pushBookmark,
|
|
47
|
+
type ReportOutput,
|
|
48
|
+
} from "./jj-shell";
|
package/src/jj-shell.ts
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic JJ shell operations for workflow-owned progress tracking.
|
|
3
|
+
*
|
|
4
|
+
* These commands have fixed semantics and should not be delegated to the
|
|
5
|
+
* coding agent. Keeping them here makes workspace creation, snapshotting, and
|
|
6
|
+
* bookmark pushes reproducible across runs.
|
|
7
|
+
*/
|
|
8
|
+
import { $ } from "bun";
|
|
9
|
+
import { existsSync } from "node:fs";
|
|
10
|
+
import { resolve } from "node:path";
|
|
11
|
+
|
|
12
|
+
export type ReportOutput = {
|
|
13
|
+
ticketId: string;
|
|
14
|
+
status: "done" | "partial" | "blocked";
|
|
15
|
+
summary: string;
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
type JjResult = {
|
|
19
|
+
ok: boolean;
|
|
20
|
+
stdout: string;
|
|
21
|
+
stderr: string;
|
|
22
|
+
exitCode: number;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
async function jj(args: string[], cwd: string): Promise<JjResult> {
|
|
26
|
+
const result = await $`jj ${args}`.cwd(cwd).nothrow().quiet();
|
|
27
|
+
return {
|
|
28
|
+
ok: result.exitCode === 0,
|
|
29
|
+
stdout: result.stdout.toString().trim(),
|
|
30
|
+
stderr: result.stderr.toString().trim(),
|
|
31
|
+
exitCode: result.exitCode,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export async function prepareWorkspaces(
|
|
36
|
+
repoRoot: string,
|
|
37
|
+
workspacesDir: string,
|
|
38
|
+
ticketIds: readonly string[],
|
|
39
|
+
): Promise<ReportOutput> {
|
|
40
|
+
await $`mkdir -p ${workspacesDir}`.quiet();
|
|
41
|
+
|
|
42
|
+
const created: string[] = [];
|
|
43
|
+
const skipped: string[] = [];
|
|
44
|
+
const errors: string[] = [];
|
|
45
|
+
|
|
46
|
+
for (const ticketId of ticketIds) {
|
|
47
|
+
const wsPath = resolve(workspacesDir, ticketId);
|
|
48
|
+
|
|
49
|
+
if (existsSync(wsPath)) {
|
|
50
|
+
const check = await jj(["status"], wsPath);
|
|
51
|
+
if (check.ok) {
|
|
52
|
+
skipped.push(ticketId);
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const result = await jj(
|
|
58
|
+
["workspace", "add", wsPath, "--name", ticketId],
|
|
59
|
+
repoRoot,
|
|
60
|
+
);
|
|
61
|
+
if (result.ok) {
|
|
62
|
+
created.push(ticketId);
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const fallback = await jj(
|
|
67
|
+
["workspace", "add", ticketId, wsPath],
|
|
68
|
+
repoRoot,
|
|
69
|
+
);
|
|
70
|
+
if (fallback.ok) {
|
|
71
|
+
created.push(ticketId);
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
errors.push(`${ticketId}: ${result.stderr || fallback.stderr}`);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const parts: string[] = [];
|
|
79
|
+
if (created.length > 0) parts.push(`Created: ${created.join(", ")}`);
|
|
80
|
+
if (skipped.length > 0) parts.push(`Existing: ${skipped.join(", ")}`);
|
|
81
|
+
if (errors.length > 0) parts.push(`Errors: ${errors.join("; ")}`);
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
ticketId: "prepare-workspaces",
|
|
85
|
+
status: errors.length > 0 ? "partial" : "done",
|
|
86
|
+
summary: parts.join(". ") || "No workspaces to prepare.",
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export async function snapshotChange(
|
|
91
|
+
workspacePath: string,
|
|
92
|
+
ticketId: string,
|
|
93
|
+
phase: string,
|
|
94
|
+
): Promise<ReportOutput> {
|
|
95
|
+
const status = await jj(["status"], workspacePath);
|
|
96
|
+
if (!status.ok) {
|
|
97
|
+
return {
|
|
98
|
+
ticketId,
|
|
99
|
+
status: "blocked",
|
|
100
|
+
summary: `jj status failed: ${status.stderr}`,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const hasChanges = !status.stdout.includes("The working copy is clean");
|
|
105
|
+
const message = `${ticketId}: ${phase}`;
|
|
106
|
+
|
|
107
|
+
const describe = await jj(["describe", "-m", message], workspacePath);
|
|
108
|
+
if (!describe.ok) {
|
|
109
|
+
return {
|
|
110
|
+
ticketId,
|
|
111
|
+
status: "blocked",
|
|
112
|
+
summary: `jj describe failed: ${describe.stderr}`,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const newChange = await jj(["new"], workspacePath);
|
|
117
|
+
if (!newChange.ok) {
|
|
118
|
+
return {
|
|
119
|
+
ticketId,
|
|
120
|
+
status: "blocked",
|
|
121
|
+
summary: `jj new failed: ${newChange.stderr}`,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
ticketId,
|
|
127
|
+
status: "done",
|
|
128
|
+
summary: hasChanges
|
|
129
|
+
? `Snapshotted: "${message}"`
|
|
130
|
+
: `Described (no file changes): "${message}"`,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export async function pushBookmark(
|
|
135
|
+
workspacePath: string,
|
|
136
|
+
bookmarkName: string,
|
|
137
|
+
ticketId: string,
|
|
138
|
+
): Promise<ReportOutput> {
|
|
139
|
+
const targetRev = "@-";
|
|
140
|
+
const track = await jj(
|
|
141
|
+
["bookmark", "track", bookmarkName, "--remote", "origin"],
|
|
142
|
+
workspacePath,
|
|
143
|
+
);
|
|
144
|
+
const trackSummary =
|
|
145
|
+
track.ok || track.stderr === ""
|
|
146
|
+
? ""
|
|
147
|
+
: ` Tracking remote bookmark reported: ${track.stderr}`;
|
|
148
|
+
|
|
149
|
+
const targetCommit = await jj(
|
|
150
|
+
["log", "-r", targetRev, "--no-graph", "-T", "commit_id"],
|
|
151
|
+
workspacePath,
|
|
152
|
+
);
|
|
153
|
+
if (!targetCommit.ok || !targetCommit.stdout) {
|
|
154
|
+
return {
|
|
155
|
+
ticketId,
|
|
156
|
+
status: "blocked",
|
|
157
|
+
summary: `Failed to resolve target revision for bookmark push: ${targetCommit.stderr}`,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const move = await jj(
|
|
162
|
+
["bookmark", "set", bookmarkName, "-r", targetRev, "--allow-backwards"],
|
|
163
|
+
workspacePath,
|
|
164
|
+
);
|
|
165
|
+
|
|
166
|
+
if (!move.ok) {
|
|
167
|
+
const create = await jj(
|
|
168
|
+
["bookmark", "create", "-r", targetRev, bookmarkName],
|
|
169
|
+
workspacePath,
|
|
170
|
+
);
|
|
171
|
+
if (!create.ok) {
|
|
172
|
+
return {
|
|
173
|
+
ticketId,
|
|
174
|
+
status: "blocked",
|
|
175
|
+
summary: `Failed to set bookmark '${bookmarkName}': ${create.stderr}`,
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const push = await jj(
|
|
181
|
+
["git", "push", "--bookmark", bookmarkName],
|
|
182
|
+
workspacePath,
|
|
183
|
+
);
|
|
184
|
+
if (!push.ok) {
|
|
185
|
+
return {
|
|
186
|
+
ticketId,
|
|
187
|
+
status: "blocked",
|
|
188
|
+
summary: `Bookmark set but push failed: ${push.stderr}${trackSummary}`,
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const remote = await $`git ls-remote origin refs/heads/${bookmarkName}`
|
|
193
|
+
.cwd(workspacePath)
|
|
194
|
+
.nothrow()
|
|
195
|
+
.quiet();
|
|
196
|
+
const remoteCommit = remote.stdout.toString().trim().split(/\s+/)[0] ?? "";
|
|
197
|
+
if (remote.exitCode !== 0 || remoteCommit !== targetCommit.stdout) {
|
|
198
|
+
return {
|
|
199
|
+
ticketId,
|
|
200
|
+
status: "blocked",
|
|
201
|
+
summary:
|
|
202
|
+
`Bookmark push returned success but remote ${bookmarkName} is ${remoteCommit || "missing"} instead of ${targetCommit.stdout}.` +
|
|
203
|
+
trackSummary,
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
ticketId,
|
|
209
|
+
status: "done",
|
|
210
|
+
summary: `Pushed bookmark '${bookmarkName}' to origin at ${targetCommit.stdout}.${trackSummary}`,
|
|
211
|
+
};
|
|
212
|
+
}
|
package/src/k8s-jobs.ts
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
import https from "node:https";
|
|
2
|
+
import { readFileSync } from "node:fs";
|
|
3
|
+
|
|
4
|
+
const SERVICE_ACCOUNT_TOKEN = "/var/run/secrets/kubernetes.io/serviceaccount/token";
|
|
5
|
+
const SERVICE_ACCOUNT_CA = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt";
|
|
6
|
+
|
|
7
|
+
export type VerificationResult = {
|
|
8
|
+
passed: boolean;
|
|
9
|
+
jobName: string;
|
|
10
|
+
podName: string;
|
|
11
|
+
commands: string[];
|
|
12
|
+
logs: string;
|
|
13
|
+
summary: string;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
type VerificationJobOptions = {
|
|
17
|
+
name: string;
|
|
18
|
+
image: string;
|
|
19
|
+
namespace: string;
|
|
20
|
+
serviceAccountName: string;
|
|
21
|
+
pvcName: string;
|
|
22
|
+
nodeName: string;
|
|
23
|
+
workspacePath: string;
|
|
24
|
+
commands: string[];
|
|
25
|
+
cleanupCommands?: string[];
|
|
26
|
+
labels?: Record<string, string>;
|
|
27
|
+
timeoutSeconds?: number;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
type K8sObjectMeta = {
|
|
31
|
+
name: string;
|
|
32
|
+
namespace: string;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
type PodList = {
|
|
36
|
+
items?: Array<{
|
|
37
|
+
metadata?: {
|
|
38
|
+
name?: string;
|
|
39
|
+
labels?: Record<string, string>;
|
|
40
|
+
};
|
|
41
|
+
}>;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
type JobStatus = {
|
|
45
|
+
status?: {
|
|
46
|
+
succeeded?: number;
|
|
47
|
+
failed?: number;
|
|
48
|
+
};
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
function requiredEnv(name: string): string {
|
|
52
|
+
const value = process.env[name]?.trim();
|
|
53
|
+
if (!value) {
|
|
54
|
+
throw new Error(`Missing required Kubernetes environment variable ${name}.`);
|
|
55
|
+
}
|
|
56
|
+
return value;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function k8sRequest(
|
|
60
|
+
method: string,
|
|
61
|
+
path: string,
|
|
62
|
+
body?: string,
|
|
63
|
+
contentType = "application/json",
|
|
64
|
+
): Promise<string> {
|
|
65
|
+
const token = readFileSync(SERVICE_ACCOUNT_TOKEN, "utf8").trim();
|
|
66
|
+
const ca = readFileSync(SERVICE_ACCOUNT_CA);
|
|
67
|
+
const host = requiredEnv("KUBERNETES_SERVICE_HOST");
|
|
68
|
+
const port = process.env.KUBERNETES_SERVICE_PORT_HTTPS?.trim() || "443";
|
|
69
|
+
|
|
70
|
+
return new Promise((resolve, reject) => {
|
|
71
|
+
const req = https.request(
|
|
72
|
+
{
|
|
73
|
+
host,
|
|
74
|
+
port,
|
|
75
|
+
method,
|
|
76
|
+
path,
|
|
77
|
+
ca,
|
|
78
|
+
headers: {
|
|
79
|
+
Authorization: `Bearer ${token}`,
|
|
80
|
+
Accept: "application/json",
|
|
81
|
+
...(body
|
|
82
|
+
? {
|
|
83
|
+
"Content-Type": contentType,
|
|
84
|
+
"Content-Length": Buffer.byteLength(body),
|
|
85
|
+
}
|
|
86
|
+
: {}),
|
|
87
|
+
},
|
|
88
|
+
},
|
|
89
|
+
(res) => {
|
|
90
|
+
let data = "";
|
|
91
|
+
res.setEncoding("utf8");
|
|
92
|
+
res.on("data", (chunk) => {
|
|
93
|
+
data += chunk;
|
|
94
|
+
});
|
|
95
|
+
res.on("end", () => {
|
|
96
|
+
const statusCode = res.statusCode ?? 500;
|
|
97
|
+
if (statusCode >= 200 && statusCode < 300) {
|
|
98
|
+
resolve(data);
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
reject(
|
|
102
|
+
new Error(
|
|
103
|
+
`Kubernetes API ${method} ${path} failed with ${statusCode}: ${data}`,
|
|
104
|
+
),
|
|
105
|
+
);
|
|
106
|
+
});
|
|
107
|
+
},
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
req.on("error", reject);
|
|
111
|
+
if (body) req.write(body);
|
|
112
|
+
req.end();
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
async function createJob(namespace: string, manifest: unknown): Promise<K8sObjectMeta> {
|
|
117
|
+
const response = await k8sRequest(
|
|
118
|
+
"POST",
|
|
119
|
+
`/apis/batch/v1/namespaces/${namespace}/jobs`,
|
|
120
|
+
JSON.stringify(manifest),
|
|
121
|
+
);
|
|
122
|
+
const parsed = JSON.parse(response) as { metadata?: K8sObjectMeta };
|
|
123
|
+
if (!parsed.metadata?.name || !parsed.metadata?.namespace) {
|
|
124
|
+
throw new Error("Kubernetes API create job response did not include metadata.");
|
|
125
|
+
}
|
|
126
|
+
return parsed.metadata;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async function getJob(namespace: string, jobName: string): Promise<JobStatus> {
|
|
130
|
+
const response = await k8sRequest(
|
|
131
|
+
"GET",
|
|
132
|
+
`/apis/batch/v1/namespaces/${namespace}/jobs/${jobName}`,
|
|
133
|
+
);
|
|
134
|
+
return JSON.parse(response) as JobStatus;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
async function listPodsForJob(namespace: string, jobName: string): Promise<string[]> {
|
|
138
|
+
const response = await k8sRequest(
|
|
139
|
+
"GET",
|
|
140
|
+
`/api/v1/namespaces/${namespace}/pods?labelSelector=${encodeURIComponent(`job-name=${jobName}`)}`,
|
|
141
|
+
);
|
|
142
|
+
const parsed = JSON.parse(response) as PodList;
|
|
143
|
+
return (parsed.items ?? [])
|
|
144
|
+
.map((item) => item.metadata?.name?.trim() ?? "")
|
|
145
|
+
.filter((name) => name !== "");
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
async function getPodLogs(namespace: string, podName: string): Promise<string> {
|
|
149
|
+
return await k8sRequest(
|
|
150
|
+
"GET",
|
|
151
|
+
`/api/v1/namespaces/${namespace}/pods/${podName}/log?container=fabrik`,
|
|
152
|
+
undefined,
|
|
153
|
+
"text/plain",
|
|
154
|
+
);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
async function deleteJob(namespace: string, jobName: string): Promise<void> {
|
|
158
|
+
await k8sRequest(
|
|
159
|
+
"DELETE",
|
|
160
|
+
`/apis/batch/v1/namespaces/${namespace}/jobs/${jobName}?propagationPolicy=Background`,
|
|
161
|
+
);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function sleep(ms: number): Promise<void> {
|
|
165
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function buildVerifierScript(
|
|
169
|
+
commands: readonly string[],
|
|
170
|
+
cleanupCommands: readonly string[] = [],
|
|
171
|
+
): string {
|
|
172
|
+
const lines = ["set -euo pipefail"];
|
|
173
|
+
if (cleanupCommands.length > 0) {
|
|
174
|
+
lines.push("cleanup() {");
|
|
175
|
+
for (const command of cleanupCommands) {
|
|
176
|
+
lines.push(` ${command}`);
|
|
177
|
+
}
|
|
178
|
+
lines.push("}");
|
|
179
|
+
lines.push("trap cleanup EXIT");
|
|
180
|
+
}
|
|
181
|
+
lines.push(...commands);
|
|
182
|
+
return lines.join("\n");
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
export function buildVerificationJobManifest(options: VerificationJobOptions) {
|
|
186
|
+
return {
|
|
187
|
+
apiVersion: "batch/v1",
|
|
188
|
+
kind: "Job",
|
|
189
|
+
metadata: {
|
|
190
|
+
name: options.name,
|
|
191
|
+
namespace: options.namespace,
|
|
192
|
+
labels: {
|
|
193
|
+
"fabrik.sh/managed-by": "fabrik",
|
|
194
|
+
"fabrik.sh/phase": "verify",
|
|
195
|
+
"fabrik.sh/task": options.name,
|
|
196
|
+
...(options.labels ?? {}),
|
|
197
|
+
},
|
|
198
|
+
},
|
|
199
|
+
spec: {
|
|
200
|
+
ttlSecondsAfterFinished: 3600,
|
|
201
|
+
backoffLimit: 0,
|
|
202
|
+
template: {
|
|
203
|
+
metadata: {
|
|
204
|
+
labels: {
|
|
205
|
+
"fabrik.sh/managed-by": "fabrik",
|
|
206
|
+
"fabrik.sh/phase": "verify",
|
|
207
|
+
"fabrik.sh/task": options.name,
|
|
208
|
+
...(options.labels ?? {}),
|
|
209
|
+
},
|
|
210
|
+
},
|
|
211
|
+
spec: {
|
|
212
|
+
serviceAccountName: options.serviceAccountName,
|
|
213
|
+
restartPolicy: "Never",
|
|
214
|
+
nodeName: options.nodeName,
|
|
215
|
+
containers: [
|
|
216
|
+
{
|
|
217
|
+
name: "fabrik",
|
|
218
|
+
image: options.image,
|
|
219
|
+
imagePullPolicy: "IfNotPresent",
|
|
220
|
+
command: [
|
|
221
|
+
"sh",
|
|
222
|
+
"-lc",
|
|
223
|
+
buildVerifierScript(
|
|
224
|
+
options.commands,
|
|
225
|
+
options.cleanupCommands ?? [],
|
|
226
|
+
),
|
|
227
|
+
],
|
|
228
|
+
env: [
|
|
229
|
+
{ name: "FABRIK_RUN_IMAGE", value: options.image },
|
|
230
|
+
{ name: "KUBERNETES_NAMESPACE", value: options.namespace },
|
|
231
|
+
{ name: "FABRIK_WORKSPACE_PVC", value: options.pvcName },
|
|
232
|
+
{ name: "KUBERNETES_NODE_NAME", value: options.nodeName },
|
|
233
|
+
],
|
|
234
|
+
workingDir: options.workspacePath,
|
|
235
|
+
volumeMounts: [
|
|
236
|
+
{
|
|
237
|
+
name: "workspace",
|
|
238
|
+
mountPath: "/workspace",
|
|
239
|
+
},
|
|
240
|
+
],
|
|
241
|
+
},
|
|
242
|
+
],
|
|
243
|
+
volumes: [
|
|
244
|
+
{
|
|
245
|
+
name: "workspace",
|
|
246
|
+
persistentVolumeClaim: {
|
|
247
|
+
claimName: options.pvcName,
|
|
248
|
+
},
|
|
249
|
+
},
|
|
250
|
+
],
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
},
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
export async function runVerificationJob(
|
|
258
|
+
options: VerificationJobOptions,
|
|
259
|
+
): Promise<VerificationResult> {
|
|
260
|
+
const timeoutMs = (options.timeoutSeconds ?? 900) * 1000;
|
|
261
|
+
const manifest = buildVerificationJobManifest(options);
|
|
262
|
+
|
|
263
|
+
const created = await createJob(options.namespace, manifest);
|
|
264
|
+
const startedAt = Date.now();
|
|
265
|
+
let podName = "";
|
|
266
|
+
let logs = "";
|
|
267
|
+
|
|
268
|
+
try {
|
|
269
|
+
for (;;) {
|
|
270
|
+
if (Date.now()-startedAt > timeoutMs) {
|
|
271
|
+
throw new Error(`Timed out waiting for verification job ${created.name}.`);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const job = await getJob(options.namespace, created.name);
|
|
275
|
+
const pods = await listPodsForJob(options.namespace, created.name);
|
|
276
|
+
if (pods.length > 0) {
|
|
277
|
+
podName = pods[0]!;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if ((job.status?.succeeded ?? 0) > 0) {
|
|
281
|
+
if (podName) {
|
|
282
|
+
logs = await getPodLogs(options.namespace, podName);
|
|
283
|
+
}
|
|
284
|
+
return {
|
|
285
|
+
passed: true,
|
|
286
|
+
jobName: created.name,
|
|
287
|
+
podName,
|
|
288
|
+
commands: [...options.commands],
|
|
289
|
+
logs,
|
|
290
|
+
summary: `Verification job ${created.name} succeeded.`,
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
if ((job.status?.failed ?? 0) > 0) {
|
|
295
|
+
if (podName) {
|
|
296
|
+
logs = await getPodLogs(options.namespace, podName);
|
|
297
|
+
}
|
|
298
|
+
return {
|
|
299
|
+
passed: false,
|
|
300
|
+
jobName: created.name,
|
|
301
|
+
podName,
|
|
302
|
+
commands: [...options.commands],
|
|
303
|
+
logs,
|
|
304
|
+
summary: `Verification job ${created.name} failed.`,
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
await sleep(2000);
|
|
309
|
+
}
|
|
310
|
+
} finally {
|
|
311
|
+
await deleteJob(options.namespace, created.name).catch(() => undefined);
|
|
312
|
+
}
|
|
313
|
+
}
|