cowork-harness 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +16 -0
- package/CHANGELOG.md +190 -0
- package/LICENSE +21 -0
- package/README.md +470 -0
- package/baselines/desktop-1.11847.5.json +78 -0
- package/baselines/desktop-1.12603.1.json +140 -0
- package/baselines/prompts/desktop-1.12603.1/host-loop-append.md +8 -0
- package/baselines/prompts/desktop-1.12603.1/subagent-append-vm.md +3 -0
- package/baselines/prompts/desktop-1.12603.1/system-prompt-append.md +18 -0
- package/dist/agent/session.js +465 -0
- package/dist/assert.js +159 -0
- package/dist/baseline.js +87 -0
- package/dist/boundary.js +114 -0
- package/dist/canary/grants.js +37 -0
- package/dist/cli.js +1107 -0
- package/dist/decide/decider.js +521 -0
- package/dist/decide/external-channel.js +262 -0
- package/dist/decide/llm-transport.js +52 -0
- package/dist/dotenv.js +52 -0
- package/dist/egress/proxy.js +138 -0
- package/dist/egress/sidecar.js +125 -0
- package/dist/hostloop/provenance.js +110 -0
- package/dist/hostloop/workspace-handler.js +226 -0
- package/dist/loop-decision.js +62 -0
- package/dist/prompt.js +43 -0
- package/dist/run/cassette.js +420 -0
- package/dist/run/chat.js +194 -0
- package/dist/run/envelope.js +31 -0
- package/dist/run/execute.js +533 -0
- package/dist/run/renderer.js +179 -0
- package/dist/run/run.js +347 -0
- package/dist/run/trace-view.js +227 -0
- package/dist/runtime/argv.js +126 -0
- package/dist/runtime/container.js +76 -0
- package/dist/runtime/host-env.js +28 -0
- package/dist/runtime/hostloop.js +129 -0
- package/dist/runtime/lima.js +177 -0
- package/dist/runtime/microvm.js +151 -0
- package/dist/runtime/protocol.js +79 -0
- package/dist/runtime/stage.js +52 -0
- package/dist/secrets.js +42 -0
- package/dist/session.js +315 -0
- package/dist/sync/cowork-sync.js +215 -0
- package/dist/types.js +127 -0
- package/docker/Dockerfile.agent +31 -0
- package/docker/Dockerfile.proxy +12 -0
- package/docker/compose.yml +31 -0
- package/fixtures/subagent-grants.json +5 -0
- package/package.json +70 -0
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
import { mkdirSync, readdirSync, existsSync, readFileSync, writeFileSync, renameSync, rmSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import readline from "node:readline";
|
|
4
|
+
import { spawn } from "node:child_process";
|
|
5
|
+
/** A sequential line reader over a stream that buffers across chunk boundaries (readline does this). */
|
|
6
|
+
function lineReader(input) {
|
|
7
|
+
const rl = readline.createInterface({ input, crlfDelay: Infinity });
|
|
8
|
+
const queue = [];
|
|
9
|
+
const waiters = [];
|
|
10
|
+
let done = false;
|
|
11
|
+
rl.on("line", (l) => (waiters.length ? waiters.shift()(l) : queue.push(l)));
|
|
12
|
+
rl.on("close", () => {
|
|
13
|
+
done = true;
|
|
14
|
+
while (waiters.length)
|
|
15
|
+
waiters.shift()(null);
|
|
16
|
+
});
|
|
17
|
+
return {
|
|
18
|
+
next: () => new Promise((resolve) => {
|
|
19
|
+
if (queue.length)
|
|
20
|
+
return resolve(queue.shift());
|
|
21
|
+
if (done)
|
|
22
|
+
return resolve(null);
|
|
23
|
+
waiters.push(resolve);
|
|
24
|
+
}),
|
|
25
|
+
close: () => rl.close(),
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
const REQ = /^req-\d+\.json$/;
|
|
29
|
+
const RESP = /^resp-\d+\.json$/;
|
|
30
|
+
const seqOf = (f) => Number(f.match(/-(\d+)\.json/)?.[1] ?? 0);
|
|
31
|
+
/** Write the run-complete marker so a `gates --follow` watcher emits an explicit `{done:true}` and exits
|
|
32
|
+
* (resolves "silence is ambiguous"). Idempotent + sync (safe from a process exit handler). */
|
|
33
|
+
export function writeDoneMarker(dir) {
|
|
34
|
+
try {
|
|
35
|
+
if (!existsSync(join(dir, "done.json")))
|
|
36
|
+
writeFileSync(join(dir, "done.json"), JSON.stringify({ done: true }) + "\n");
|
|
37
|
+
}
|
|
38
|
+
catch {
|
|
39
|
+
/* dir may be gone */
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* The gate stream behind `cowork-harness gates <dir> --follow` — the harness OWNS the watcher so the
|
|
44
|
+
* driving agent points ONE Monitor at this instead of hand-writing a zsh-safe find/seen-set/poll loop.
|
|
45
|
+
* Emits one clean single-line JSON per NEW pending gate (`{seq, ...decision_request}`) and a terminal
|
|
46
|
+
* `{"done":true}` when the run finishes. Resolves when done (or after one pass if `once`).
|
|
47
|
+
*/
|
|
48
|
+
export function streamGates(dir, write, opts = {}) {
|
|
49
|
+
const pollMs = opts.pollMs ?? (Number(process.env.COWORK_HARNESS_DECIDER_DIR_POLL_MS) || 500);
|
|
50
|
+
const seen = new Set();
|
|
51
|
+
const tries = new Map(); // per-file parse attempts — bound retries so a corrupt file drops loud
|
|
52
|
+
return new Promise((resolve) => {
|
|
53
|
+
const tick = () => {
|
|
54
|
+
let files = [];
|
|
55
|
+
try {
|
|
56
|
+
files = readdirSync(dir);
|
|
57
|
+
}
|
|
58
|
+
catch {
|
|
59
|
+
/* not created yet */
|
|
60
|
+
}
|
|
61
|
+
for (const f of files.filter((x) => REQ.test(x)).sort((a, b) => seqOf(a) - seqOf(b))) {
|
|
62
|
+
if (seen.has(f))
|
|
63
|
+
continue;
|
|
64
|
+
try {
|
|
65
|
+
const body = readFileSync(join(dir, f), "utf8").trim();
|
|
66
|
+
const parsed = JSON.parse(body);
|
|
67
|
+
seen.add(f); // only mark consumed AFTER a clean parse — a mid-write is retried next tick
|
|
68
|
+
write(JSON.stringify({ seq: seqOf(f), ...parsed }));
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
// A transient mid-write is retried next tick; a PERSISTENTLY corrupt file would otherwise be
|
|
72
|
+
// retried forever — bound it, then drop loud so the gap is visible (not a silent false-negative).
|
|
73
|
+
const n = (tries.get(f) ?? 0) + 1;
|
|
74
|
+
tries.set(f, n);
|
|
75
|
+
if (n >= 3) {
|
|
76
|
+
seen.add(f);
|
|
77
|
+
process.stderr.write(`::warning:: [gates] ${f} is unreadable/malformed after ${n} tries — dropping\n`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
if (existsSync(join(dir, "done.json"))) {
|
|
82
|
+
write(JSON.stringify({ done: true }));
|
|
83
|
+
return resolve();
|
|
84
|
+
}
|
|
85
|
+
if (opts.once)
|
|
86
|
+
return resolve();
|
|
87
|
+
setTimeout(tick, pollMs);
|
|
88
|
+
};
|
|
89
|
+
tick();
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
/** Write a gate answer atomically (temp+rename) with the right wire shape — behind `cowork-harness
|
|
93
|
+
* answer`. Hides the atomic write + the `{id, answers}` shape the driver had to hand-build. */
|
|
94
|
+
export function answerGate(dir, seq, answers) {
|
|
95
|
+
let id;
|
|
96
|
+
try {
|
|
97
|
+
id = JSON.parse(readFileSync(join(dir, `req-${seq}.json`), "utf8")).id;
|
|
98
|
+
}
|
|
99
|
+
catch {
|
|
100
|
+
/* req may already be consumed; id is optional */
|
|
101
|
+
}
|
|
102
|
+
const tmp = join(dir, `.resp-${seq}.json.tmp`);
|
|
103
|
+
writeFileSync(tmp, JSON.stringify({ ...(id ? { id } : {}), answers }), { mode: 0o600 });
|
|
104
|
+
renameSync(tmp, join(dir, `resp-${seq}.json`));
|
|
105
|
+
}
|
|
106
|
+
/** Read a gate's request (for `answer` to map a `--choose` to the question text). */
|
|
107
|
+
export function readGate(dir, seq) {
|
|
108
|
+
return JSON.parse(readFileSync(join(dir, `req-${seq}.json`), "utf8"));
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Channel C: file rendezvous (`--decider-dir <dir>`). The decision_request is written to `<dir>/req-N.json`
|
|
112
|
+
* and the harness blocks polling for `<dir>/resp-N.json`. The DRIVING Claude agent arms a Monitor on the
|
|
113
|
+
* dir (each new req file → a task-notification that wakes it) and writes the answer file — answering the
|
|
114
|
+
* LIVE AskUserQuestion in-band (no resume, no re-worded question). Strictly serial: write req-N, block for
|
|
115
|
+
* resp-N, then req-(N+1) — one outstanding gate at a time. The wire protocol is identical to the other
|
|
116
|
+
* channels (the same ExternalDecider drives it); only the transport differs.
|
|
117
|
+
*/
|
|
118
|
+
export function fileChannel(dir) {
|
|
119
|
+
mkdirSync(dir, { recursive: true });
|
|
120
|
+
// H3: do NOT silently clear — fail loud if the dir already holds gate files (forces a fresh dir per run).
|
|
121
|
+
const stale = readdirSync(dir).filter((f) => REQ.test(f) || RESP.test(f));
|
|
122
|
+
if (stale.length)
|
|
123
|
+
throw new Error(`--decider-dir ${dir} already has gate files (${stale.slice(0, 3).join(", ")}…) — use a fresh, empty directory per run`);
|
|
124
|
+
const pollMs = Number(process.env.COWORK_HARNESS_DECIDER_DIR_POLL_MS) || 300;
|
|
125
|
+
const timeoutMs = Number(process.env.COWORK_HARNESS_DECIDER_DIR_TIMEOUT_MS) || 600_000; // 10-min backstop → loud UnansweredError
|
|
126
|
+
let seq = 0;
|
|
127
|
+
let lastSnapshotSeq = 0; // watermark so a per-scenario snapshot() copies ONLY that scenario's new gates
|
|
128
|
+
// #49: store the handler so it can be removed on close() — otherwise repeated fileChannel() calls in
|
|
129
|
+
// one process accumulate "exit" listeners (MaxListenersExceededWarning after >10 channels).
|
|
130
|
+
// Guarantee a completion marker on EVERY exit path (success, fail()/process.exit, throw) so a
|
|
131
|
+
// `gates --follow` watcher always gets its terminal {done:true} and never hangs.
|
|
132
|
+
const exitHandler = () => writeDoneMarker(dir);
|
|
133
|
+
process.on("exit", exitHandler);
|
|
134
|
+
return {
|
|
135
|
+
write: (line) => {
|
|
136
|
+
seq++;
|
|
137
|
+
// H1: `line` is single-line JSON (ExternalDecider) — one `cat` = one Monitor event. M2: 0600 (it's on disk).
|
|
138
|
+
const tmp = join(dir, `.req-${seq}.json.tmp`);
|
|
139
|
+
writeFileSync(tmp, line.replace(/\n/g, " ") + "\n", { mode: 0o600 });
|
|
140
|
+
renameSync(tmp, join(dir, `req-${seq}.json`)); // atomic — the watcher never sees a partial file
|
|
141
|
+
process.stderr.write(`[gate] req-${seq} emitted → waiting for resp-${seq}.json\n`); // O2: lifecycle on stderr (even under --output-format json)
|
|
142
|
+
},
|
|
143
|
+
readLine: async () => {
|
|
144
|
+
const resp = join(dir, `resp-${seq}.json`);
|
|
145
|
+
const deadline = Date.now() + timeoutMs;
|
|
146
|
+
while (Date.now() < deadline) {
|
|
147
|
+
// M3: the agent writes resp via temp+rename (atomic) → if it exists, it's complete. Read+parse ONCE
|
|
148
|
+
// (a bad parse fails loud in ExternalDecider — no retry-then-hang).
|
|
149
|
+
if (existsSync(resp)) {
|
|
150
|
+
const body = readFileSync(resp, "utf8");
|
|
151
|
+
// O4: mark the gate consumed — rename `req-N.json` out of the `req-*.json` glob so the watcher
|
|
152
|
+
// can't re-emit it, and the consumed signal is visible mid-run (distinguishes a re-emit from a
|
|
153
|
+
// genuine agent re-ask, O3).
|
|
154
|
+
try {
|
|
155
|
+
renameSync(join(dir, `req-${seq}.json`), join(dir, `req-${seq}.json.done`));
|
|
156
|
+
}
|
|
157
|
+
catch {
|
|
158
|
+
/* best-effort */
|
|
159
|
+
}
|
|
160
|
+
process.stderr.write(`[gate] resp-${seq} consumed (gate answered)\n`);
|
|
161
|
+
return body;
|
|
162
|
+
}
|
|
163
|
+
await new Promise((r) => setTimeout(r, pollMs));
|
|
164
|
+
}
|
|
165
|
+
return null; // timeout → ExternalDecider throws UnansweredError (loud, never silent)
|
|
166
|
+
},
|
|
167
|
+
snapshot: (destDir) => {
|
|
168
|
+
// Copy THIS scenario's gate wire shapes into the run dir so they survive close()'s cleanup (Part 4).
|
|
169
|
+
// The channel is reused across scenarios in a `run <dir/>` loop (one monotonic seq), so copy only
|
|
170
|
+
// files newer than the last snapshot — otherwise scenario N's snapshot would also contain 1..N-1's.
|
|
171
|
+
try {
|
|
172
|
+
const files = readdirSync(dir).filter((f) => (REQ.test(f) || RESP.test(f) || f.endsWith(".json.done")) && seqOf(f) > lastSnapshotSeq);
|
|
173
|
+
if (files.length) {
|
|
174
|
+
mkdirSync(destDir, { recursive: true });
|
|
175
|
+
for (const f of files)
|
|
176
|
+
writeFileSync(join(destDir, f), readFileSync(join(dir, f)));
|
|
177
|
+
}
|
|
178
|
+
lastSnapshotSeq = seq; // advance past this scenario's gates
|
|
179
|
+
}
|
|
180
|
+
catch {
|
|
181
|
+
/* dir may be gone / nothing to snapshot */
|
|
182
|
+
}
|
|
183
|
+
},
|
|
184
|
+
close: () => {
|
|
185
|
+
// #49: remove the exit listener registered for this channel so repeated fileChannel() calls
|
|
186
|
+
// in one process don't accumulate listeners past the MaxListenersExceededWarning threshold.
|
|
187
|
+
process.removeListener("exit", exitHandler);
|
|
188
|
+
// That exit listener was the only writer of done.json — a long-lived embedder that close()s but
|
|
189
|
+
// keeps running must still release a `gates --follow` watcher. Write the marker here too (idempotent,
|
|
190
|
+
// and not matched by the cleanup globs below, so it survives).
|
|
191
|
+
writeDoneMarker(dir);
|
|
192
|
+
// Best-effort remove processed files on close (req/resp + .done markers + tmp).
|
|
193
|
+
try {
|
|
194
|
+
for (const f of readdirSync(dir))
|
|
195
|
+
if (REQ.test(f) || RESP.test(f) || f.startsWith(".req-") || f.endsWith(".json.done"))
|
|
196
|
+
rmSync(join(dir, f), { force: true });
|
|
197
|
+
}
|
|
198
|
+
catch {
|
|
199
|
+
/* dir may be gone */
|
|
200
|
+
}
|
|
201
|
+
},
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
/** A helper spawned once (`shell:true` so `'python answerer.py'` works). Request→its stdin, answer←its stdout. */
|
|
205
|
+
export function spawnChannel(cmd) {
|
|
206
|
+
// #8: `shell: true` is INTENTIONAL, not an injection surface. `--decider-cmd` is OPERATOR-supplied —
|
|
207
|
+
// the same trust class as the harness process itself (whoever runs the harness wrote this string). Shell
|
|
208
|
+
// interpretation is the documented ergonomic so `'python answerer.py'`, pipelines, and env-var prefixes
|
|
209
|
+
// all work as written. There is no untrusted input here to escape, so we deliberately do NOT parse to argv.
|
|
210
|
+
const child = spawn(cmd, { shell: true, stdio: ["pipe", "pipe", "inherit"] });
|
|
211
|
+
const reader = lineReader(child.stdout);
|
|
212
|
+
// #53: bound the wait on the helper's stdout — a hung-but-alive helper would otherwise block the harness
|
|
213
|
+
// forever (only fileChannel had a deadline; this mirrors its 10-min backstop). On expiry kill the child
|
|
214
|
+
// (so a wedged process can't linger) and reject LOUD, never a silent hang.
|
|
215
|
+
const timeoutMs = Number(process.env.COWORK_HARNESS_DECIDER_CMD_TIMEOUT_MS) || 600_000;
|
|
216
|
+
let dead = false;
|
|
217
|
+
child.on("exit", () => (dead = true));
|
|
218
|
+
child.on("error", () => (dead = true));
|
|
219
|
+
// A broken-pipe write does NOT throw synchronously — when the helper closes its read end, the EPIPE is
|
|
220
|
+
// delivered ASYNCHRONOUSLY as an `error` event on stdin. Without a listener Node escalates it to an
|
|
221
|
+
// uncaughtException (the cross-test "write EPIPE" flake). Handle it: mark dead so the next write()/
|
|
222
|
+
// readLine() throws the clean "helper exited" error, and swallow the async event itself.
|
|
223
|
+
child.stdin?.on("error", () => (dead = true));
|
|
224
|
+
return {
|
|
225
|
+
write: (line) => {
|
|
226
|
+
if (dead)
|
|
227
|
+
throw new Error(`--decider-cmd helper exited before answering`);
|
|
228
|
+
try {
|
|
229
|
+
child.stdin.write(line + "\n"); // EPIPE if the helper died mid-run → surface as an error
|
|
230
|
+
}
|
|
231
|
+
catch {
|
|
232
|
+
throw new Error(`--decider-cmd helper closed its input (EPIPE) before answering`);
|
|
233
|
+
}
|
|
234
|
+
},
|
|
235
|
+
readLine: () => {
|
|
236
|
+
let timer;
|
|
237
|
+
const timeout = new Promise((_, reject) => {
|
|
238
|
+
timer = setTimeout(() => {
|
|
239
|
+
if (!dead)
|
|
240
|
+
try {
|
|
241
|
+
child.kill("SIGKILL");
|
|
242
|
+
}
|
|
243
|
+
catch {
|
|
244
|
+
/* already gone */
|
|
245
|
+
}
|
|
246
|
+
reject(new Error(`--decider-cmd helper timed out before answering after ${timeoutMs}ms`));
|
|
247
|
+
}, timeoutMs);
|
|
248
|
+
});
|
|
249
|
+
return Promise.race([reader.next(), timeout]).finally(() => clearTimeout(timer));
|
|
250
|
+
},
|
|
251
|
+
close: () => {
|
|
252
|
+
reader.close();
|
|
253
|
+
if (!dead)
|
|
254
|
+
try {
|
|
255
|
+
child.kill();
|
|
256
|
+
}
|
|
257
|
+
catch {
|
|
258
|
+
/* already gone */
|
|
259
|
+
}
|
|
260
|
+
},
|
|
261
|
+
};
|
|
262
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
/**
|
|
3
|
+
* The default `LlmDecider` transport: shell out to the host `claude -p` (one-shot, headless). Chosen
|
|
4
|
+
* over a direct `POST /v1/messages` (Opus H1): the harness PROCESS is not behind the egress proxy
|
|
5
|
+
* (only the spawned agent child is), so a direct API call would bypass the very allowlist the harness
|
|
6
|
+
* enforces. `claude -p` reuses the run's own auth path and is dogfood-consistent. One short, tool-less
|
|
7
|
+
* call per gate on a small model — bounded cost/latency, no recursion into the harness.
|
|
8
|
+
*/
|
|
9
|
+
export const claudeCliComplete = (prompt, model) => new Promise((resolve, reject) => {
|
|
10
|
+
const bin = process.env.COWORK_HARNESS_CLAUDE_BIN || "claude";
|
|
11
|
+
// #53: bound the `claude -p` spawn — a hung-but-alive child would otherwise block the harness forever.
|
|
12
|
+
// On expiry SIGKILL the child and reject LOUD; clear the timer on close/error so a fast call never leaks it.
|
|
13
|
+
const timeoutMs = Number(process.env.COWORK_HARNESS_LLM_TIMEOUT_MS) || 600_000;
|
|
14
|
+
const child = spawn(bin, ["-p", prompt, "--model", model], { stdio: ["ignore", "pipe", "ignore"] });
|
|
15
|
+
const timer = setTimeout(() => {
|
|
16
|
+
try {
|
|
17
|
+
child.kill("SIGKILL");
|
|
18
|
+
}
|
|
19
|
+
catch {
|
|
20
|
+
/* already gone */
|
|
21
|
+
}
|
|
22
|
+
reject(new Error(`LLM decider transport (${bin} -p) timed out after ${timeoutMs}ms`));
|
|
23
|
+
}, timeoutMs);
|
|
24
|
+
// Bound stdout too — the wall-clock timeout above caps a fully-hung child, but not one that is
|
|
25
|
+
// actively spewing. Past the cap, SIGKILL and reject loud rather than growing the buffer unbounded.
|
|
26
|
+
const maxBytes = Number(process.env.COWORK_HARNESS_LLM_MAX_BYTES) || 8 * 1024 * 1024;
|
|
27
|
+
let out = "";
|
|
28
|
+
let bytes = 0;
|
|
29
|
+
child.stdout.on("data", (d) => {
|
|
30
|
+
bytes += d.length;
|
|
31
|
+
if (bytes > maxBytes) {
|
|
32
|
+
try {
|
|
33
|
+
child.kill("SIGKILL");
|
|
34
|
+
}
|
|
35
|
+
catch {
|
|
36
|
+
/* already gone */
|
|
37
|
+
}
|
|
38
|
+
clearTimeout(timer);
|
|
39
|
+
reject(new Error(`LLM decider transport (${bin} -p) exceeded ${maxBytes} bytes — aborting`));
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
out += d;
|
|
43
|
+
});
|
|
44
|
+
child.on("error", (e) => {
|
|
45
|
+
clearTimeout(timer);
|
|
46
|
+
reject(new Error(`LLM decider transport (${bin} -p) failed to spawn: ${e.message}`));
|
|
47
|
+
});
|
|
48
|
+
child.on("close", (code) => {
|
|
49
|
+
clearTimeout(timer);
|
|
50
|
+
code === 0 ? resolve(out) : reject(new Error(`LLM decider transport (${bin} -p) exited ${code}`));
|
|
51
|
+
});
|
|
52
|
+
});
|
package/dist/dotenv.js
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
/**
|
|
4
|
+
* Minimal `.env` loader (no dependency). Loads `KEY=VALUE` lines from `./.env` into `process.env`
|
|
5
|
+
* at CLI startup so credentials (e.g. `CLAUDE_CODE_OAUTH_TOKEN`) don't have to be `export`ed each
|
|
6
|
+
* run. Standard dotenv semantics: comments (`#`), surrounding quotes, an optional `export ` prefix,
|
|
7
|
+
* and — importantly — **existing `process.env` values win** (an exported var is never overwritten).
|
|
8
|
+
*
|
|
9
|
+
* SECURITY: `.env` is a HOST-side credential store. It is read into this process's env and is NEVER
|
|
10
|
+
* mounted into the sandbox. Keep it at the repo/working-dir root — do NOT place a `.env` inside a
|
|
11
|
+
* mounted skill/project folder, or its contents would be copied into the agent's filesystem. The
|
|
12
|
+
* token value is also scrubbed from all persisted run logs regardless of source.
|
|
13
|
+
*/
|
|
14
|
+
export function loadDotenv(file = resolve(process.cwd(), ".env")) {
|
|
15
|
+
if (!existsSync(file))
|
|
16
|
+
return [];
|
|
17
|
+
const loaded = [];
|
|
18
|
+
let text;
|
|
19
|
+
try {
|
|
20
|
+
text = readFileSync(file, "utf8");
|
|
21
|
+
}
|
|
22
|
+
catch {
|
|
23
|
+
return [];
|
|
24
|
+
}
|
|
25
|
+
for (const raw of text.split("\n")) {
|
|
26
|
+
const line = raw.trim();
|
|
27
|
+
if (!line || line.startsWith("#"))
|
|
28
|
+
continue;
|
|
29
|
+
const m = line.match(/^(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(.*)$/);
|
|
30
|
+
if (!m)
|
|
31
|
+
continue;
|
|
32
|
+
const key = m[1];
|
|
33
|
+
let val = m[2];
|
|
34
|
+
const quoted = /^["']/.test(val);
|
|
35
|
+
if (quoted && val.length >= 2 && val[0] === val[val.length - 1]) {
|
|
36
|
+
val = val.slice(1, -1);
|
|
37
|
+
}
|
|
38
|
+
else {
|
|
39
|
+
// strip a trailing inline comment from an unquoted value
|
|
40
|
+
val = val.replace(/\s+#.*$/, "").trim();
|
|
41
|
+
}
|
|
42
|
+
// An empty value (`KEY=`) means "not provided" — skip it, so a blank template placeholder is
|
|
43
|
+
// harmless and a later non-empty line (or an exported var) still wins.
|
|
44
|
+
if (val === "")
|
|
45
|
+
continue;
|
|
46
|
+
if (process.env[key] === undefined) {
|
|
47
|
+
process.env[key] = val;
|
|
48
|
+
loaded.push(key);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return loaded;
|
|
52
|
+
}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import http from "node:http";
|
|
2
|
+
import net from "node:net";
|
|
3
|
+
import { appendFileSync } from "node:fs";
|
|
4
|
+
export function startEgressProxy(opts) {
|
|
5
|
+
const allow = compile(opts.allow);
|
|
6
|
+
const log = (host, decision) => {
|
|
7
|
+
opts.onDecision?.(host, decision);
|
|
8
|
+
if (opts.logPath)
|
|
9
|
+
appendFileSync(opts.logPath, JSON.stringify({ ts: Date.now(), host, decision }) + "\n");
|
|
10
|
+
};
|
|
11
|
+
const server = http.createServer((req, res) => {
|
|
12
|
+
const host = hostOf(req.url ?? "", req.headers.host).toLowerCase();
|
|
13
|
+
if (!allow(host)) {
|
|
14
|
+
log(host, "deny");
|
|
15
|
+
res.writeHead(403, { "content-type": "text/plain" });
|
|
16
|
+
res.end(`egress denied: ${host} not on allowlist`);
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
// Minimal HTTP forward (CONNECT covers HTTPS below; this handles plain HTTP).
|
|
20
|
+
// #33: `hostOf` falls back to the Host header for a relative/malformed req.url, so the
|
|
21
|
+
// allow check can pass while `new URL(req.url)` still throws. Fail loud with a clean 400
|
|
22
|
+
// instead of letting the uncaught throw take the callback (and the proxy) down.
|
|
23
|
+
let target;
|
|
24
|
+
try {
|
|
25
|
+
target = new URL(req.url);
|
|
26
|
+
}
|
|
27
|
+
catch {
|
|
28
|
+
res.writeHead(400, { "content-type": "text/plain" });
|
|
29
|
+
res.end("bad request: malformed proxy URL");
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
// Log `allow` only once the request is valid and we're about to forward. Logging before
|
|
33
|
+
// the parse would record an `allow` for a malformed URL that never reached an upstream,
|
|
34
|
+
// false-passing `egress_allowed` assertions.
|
|
35
|
+
log(host, "allow");
|
|
36
|
+
const proxyReq = http.request({ host: target.hostname, port: target.port || 80, path: target.pathname + target.search, method: req.method, headers: req.headers }, (proxyRes) => {
|
|
37
|
+
res.writeHead(proxyRes.statusCode ?? 502, proxyRes.headers);
|
|
38
|
+
proxyRes.pipe(res);
|
|
39
|
+
});
|
|
40
|
+
proxyReq.on("error", () => {
|
|
41
|
+
res.writeHead(502);
|
|
42
|
+
res.end("upstream error");
|
|
43
|
+
});
|
|
44
|
+
req.pipe(proxyReq);
|
|
45
|
+
});
|
|
46
|
+
// HTTPS via CONNECT tunneling — allow/deny by SNI host, then blind-pipe.
|
|
47
|
+
server.on("connect", (req, clientSocket, head) => {
|
|
48
|
+
// A reset on either side must never crash the proxy (ECONNRESET is normal at
|
|
49
|
+
// connection teardown). Attach error handlers before any I/O.
|
|
50
|
+
clientSocket.on("error", () => clientSocket.destroy());
|
|
51
|
+
// Parse the CONNECT authority bracket-aware so `[2001:db8::1]:443` yields the right
|
|
52
|
+
// host/port — a bare `split(":")` reads `[` as the host and `2001` as the port. The
|
|
53
|
+
// matcher lowercases, so DNS-case variants of the SNI host match the allowlist too.
|
|
54
|
+
const { host, port } = parseAuthority(req.url ?? "");
|
|
55
|
+
if (!allow(host)) {
|
|
56
|
+
log(host, "deny");
|
|
57
|
+
clientSocket.write("HTTP/1.1 403 Forbidden\r\n\r\n");
|
|
58
|
+
clientSocket.end();
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
log(host, "allow");
|
|
62
|
+
const upstream = net.connect(port, host, () => {
|
|
63
|
+
clientSocket.write("HTTP/1.1 200 Connection Established\r\n\r\n");
|
|
64
|
+
upstream.write(head);
|
|
65
|
+
upstream.pipe(clientSocket);
|
|
66
|
+
clientSocket.pipe(upstream);
|
|
67
|
+
});
|
|
68
|
+
upstream.on("error", () => clientSocket.destroy());
|
|
69
|
+
});
|
|
70
|
+
// Last-resort guards so a single bad socket can never take the proxy down.
|
|
71
|
+
server.on("clientError", (_e, sock) => {
|
|
72
|
+
try {
|
|
73
|
+
sock.destroy();
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
/* already gone */
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
// #50: store the handler so it can be removed when the server is closed — otherwise each
|
|
80
|
+
// startEgressProxy() call in one process stacks another uncaughtException handler, causing
|
|
81
|
+
// benign ECONNRESET/EPIPE to be swallowed N times by stale handlers after their server is gone.
|
|
82
|
+
const uncaughtHandler = (e) => {
|
|
83
|
+
if (e?.code === "ECONNRESET" || e?.code === "EPIPE")
|
|
84
|
+
return; // benign socket teardown
|
|
85
|
+
throw e;
|
|
86
|
+
};
|
|
87
|
+
process.on("uncaughtException", uncaughtHandler);
|
|
88
|
+
server.listen(opts.port ?? 8080);
|
|
89
|
+
// Wrap close() so the uncaughtException handler is cleaned up when the server stops.
|
|
90
|
+
const origClose = server.close.bind(server);
|
|
91
|
+
server.close = (cb) => {
|
|
92
|
+
process.removeListener("uncaughtException", uncaughtHandler);
|
|
93
|
+
return origClose(cb);
|
|
94
|
+
};
|
|
95
|
+
return server;
|
|
96
|
+
}
|
|
97
|
+
export function compile(patterns) {
|
|
98
|
+
const exact = new Set();
|
|
99
|
+
const suffixes = [];
|
|
100
|
+
// DNS hostnames are case-insensitive: store patterns lowercased and lowercase the candidate
|
|
101
|
+
// host in the matcher, so `HTTPS://API.ANTHROPIC.COM` matches an `api.anthropic.com` allow.
|
|
102
|
+
for (const p0 of patterns) {
|
|
103
|
+
const p = p0.toLowerCase();
|
|
104
|
+
if (p === "*")
|
|
105
|
+
return () => true; // unrestricted
|
|
106
|
+
if (p.startsWith("*."))
|
|
107
|
+
suffixes.push(p.slice(1)); // ".claude.ai"
|
|
108
|
+
else
|
|
109
|
+
exact.add(p);
|
|
110
|
+
}
|
|
111
|
+
return (host) => {
|
|
112
|
+
const h = host.toLowerCase();
|
|
113
|
+
return exact.has(h) || suffixes.some((s) => h.endsWith(s));
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
function hostOf(url, hostHeader) {
|
|
117
|
+
try {
|
|
118
|
+
return new URL(url).hostname || (hostHeader ?? "").split(":")[0];
|
|
119
|
+
}
|
|
120
|
+
catch {
|
|
121
|
+
return (hostHeader ?? "").split(":")[0];
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Parse a CONNECT authority (`host:port`, or `[ipv6]:port`) into a bare host (IPv6 brackets
|
|
126
|
+
* stripped, lowercased) and a numeric port (default 443). Uses the WHATWG URL parser so
|
|
127
|
+
* bracketed IPv6 literals are handled correctly; falls back to a best-effort split.
|
|
128
|
+
*/
|
|
129
|
+
function parseAuthority(authority) {
|
|
130
|
+
try {
|
|
131
|
+
const u = new URL("http://" + authority);
|
|
132
|
+
const host = u.hostname.replace(/^\[|\]$/g, "").toLowerCase();
|
|
133
|
+
return { host, port: u.port ? Number(u.port) : 443 };
|
|
134
|
+
}
|
|
135
|
+
catch {
|
|
136
|
+
return { host: authority.split(":")[0].toLowerCase(), port: 443 };
|
|
137
|
+
}
|
|
138
|
+
}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import { spawnSync } from "node:child_process";
|
|
2
|
+
import { mkdirSync, readFileSync, existsSync } from "node:fs";
|
|
3
|
+
import { join, resolve } from "node:path";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
const PROXY_IMAGE = process.env.COWORK_PROXY_IMAGE ?? "cowork-egress-proxy:1";
|
|
6
|
+
export function startEgressSidecar(allow, outDir, runId) {
|
|
7
|
+
const runner = process.env.COWORK_CONTAINER_RUNTIME ?? "docker";
|
|
8
|
+
const intNet = `cowork-int-${runId}`;
|
|
9
|
+
const outNet = `cowork-out-${runId}`;
|
|
10
|
+
const proxyName = `cowork-proxy-${runId}`;
|
|
11
|
+
const logDir = join(resolve(outDir), "proxy");
|
|
12
|
+
mkdirSync(logDir, { recursive: true });
|
|
13
|
+
const logFileHost = join(logDir, "egress.log");
|
|
14
|
+
ensureProxyImage(runner);
|
|
15
|
+
// #37: create the two networks and the proxy container in sequence, tracking each created
|
|
16
|
+
// resource so a mid-sequence failure (image start, network connect) rolls back the rest
|
|
17
|
+
// instead of orphaning networks/containers. Undo runs in reverse (container before networks).
|
|
18
|
+
const rollback = [];
|
|
19
|
+
try {
|
|
20
|
+
d(runner, ["network", "create", "--internal", intNet]);
|
|
21
|
+
rollback.push(() => d(runner, ["network", "rm", intNet], true));
|
|
22
|
+
d(runner, ["network", "create", outNet]);
|
|
23
|
+
rollback.push(() => d(runner, ["network", "rm", outNet], true));
|
|
24
|
+
// Proxy on the internal net first (so the agent can resolve it), then also wire
|
|
25
|
+
// it to the external net so it alone can reach allowlisted hosts.
|
|
26
|
+
d(runner, [
|
|
27
|
+
"run",
|
|
28
|
+
"-d",
|
|
29
|
+
"--name",
|
|
30
|
+
proxyName,
|
|
31
|
+
"--network",
|
|
32
|
+
intNet,
|
|
33
|
+
"-e",
|
|
34
|
+
`COWORK_ALLOW=${allow.join(",")}`,
|
|
35
|
+
"-e",
|
|
36
|
+
"COWORK_PROXY_LOG=/log/egress.log",
|
|
37
|
+
"-v",
|
|
38
|
+
`${logDir}:/log`,
|
|
39
|
+
PROXY_IMAGE,
|
|
40
|
+
]);
|
|
41
|
+
rollback.push(() => d(runner, ["rm", "-f", proxyName], true));
|
|
42
|
+
d(runner, ["network", "connect", outNet, proxyName]);
|
|
43
|
+
}
|
|
44
|
+
catch (e) {
|
|
45
|
+
for (const undo of rollback.reverse())
|
|
46
|
+
undo();
|
|
47
|
+
throw e;
|
|
48
|
+
}
|
|
49
|
+
return {
|
|
50
|
+
proxyUrl: `http://${proxyName}:8080`,
|
|
51
|
+
network: intNet,
|
|
52
|
+
collect() {
|
|
53
|
+
if (!existsSync(logFileHost))
|
|
54
|
+
return [];
|
|
55
|
+
return readFileSync(logFileHost, "utf8")
|
|
56
|
+
.trim()
|
|
57
|
+
.split("\n")
|
|
58
|
+
.filter(Boolean)
|
|
59
|
+
.map(parseEgressLine)
|
|
60
|
+
.filter((x) => x !== null);
|
|
61
|
+
},
|
|
62
|
+
teardown() {
|
|
63
|
+
d(runner, ["rm", "-f", proxyName], true);
|
|
64
|
+
d(runner, ["network", "rm", intNet], true);
|
|
65
|
+
d(runner, ["network", "rm", outNet], true);
|
|
66
|
+
},
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Parse one egress log line into a typed decision, or `null` if it must be dropped.
|
|
71
|
+
*
|
|
72
|
+
* #43: previously this (a) silently swallowed an unparseable line and (b) coerced any
|
|
73
|
+
* unknown/missing `decision` to "allow" via `o.decision === "deny" ? "deny" : "allow"` —
|
|
74
|
+
* a silent false-green that could mask a real deny. Now both failure modes emit a
|
|
75
|
+
* `::warning::` and DROP the line; we never invent an "allow" from corrupt input.
|
|
76
|
+
*/
|
|
77
|
+
export function parseEgressLine(line) {
|
|
78
|
+
let o;
|
|
79
|
+
try {
|
|
80
|
+
o = JSON.parse(line);
|
|
81
|
+
}
|
|
82
|
+
catch {
|
|
83
|
+
process.stderr.write(`::warning:: [egress] proxy log line is not valid JSON — dropping: ${line.slice(0, 200)}\n`);
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
// Valid JSON that isn't a non-null object (e.g. `null`, a number, an array) would throw on the
|
|
87
|
+
// field reads below, OUTSIDE the parse catch — crashing collect() at teardown. Drop it loudly.
|
|
88
|
+
if (o === null || typeof o !== "object" || Array.isArray(o)) {
|
|
89
|
+
process.stderr.write(`::warning:: [egress] proxy log line is not a JSON object — dropping: ${line.slice(0, 200)}\n`);
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
const host = String(o.host);
|
|
93
|
+
if (o.decision !== "allow" && o.decision !== "deny") {
|
|
94
|
+
process.stderr.write(`::warning:: [egress] unknown decision "${o.decision}" for host ${host} — dropping (not coercing to allow)\n`);
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
return { host, decision: o.decision };
|
|
98
|
+
}
|
|
99
|
+
function ensureProxyImage(runner) {
|
|
100
|
+
const have = spawnSync(runner, ["image", "inspect", PROXY_IMAGE], { stdio: "ignore" });
|
|
101
|
+
if (have.status === 0)
|
|
102
|
+
return;
|
|
103
|
+
// Build from the repo (Dockerfile.proxy). Context is the repo root. #39: use fileURLToPath, not
|
|
104
|
+
// `.pathname`, so an install path with spaces / URL-escaped chars yields a valid build context.
|
|
105
|
+
const repoRoot = resolve(fileURLToPath(new URL("../..", import.meta.url)));
|
|
106
|
+
// #38: Dockerfile.proxy COPYs the SHIPPED dist/egress; running from source (tsx) before
|
|
107
|
+
// `npm run build` leaves it absent, so the image build would fail confusingly. Build dist/ first.
|
|
108
|
+
if (!existsSync(join(repoRoot, "dist", "egress", "proxy.js"))) {
|
|
109
|
+
process.stderr.write(`::warning:: [egress] dist/egress missing — running \`npm run build\` before the proxy image build\n`);
|
|
110
|
+
const built = spawnSync("npm", ["run", "build"], { cwd: repoRoot, stdio: "inherit" });
|
|
111
|
+
if (built.status !== 0)
|
|
112
|
+
throw new Error("failed to build dist/ for the egress proxy image (npm run build)");
|
|
113
|
+
}
|
|
114
|
+
const build = spawnSync(runner, ["build", "-t", PROXY_IMAGE, "-f", join(repoRoot, "docker", "Dockerfile.proxy"), repoRoot], {
|
|
115
|
+
stdio: "inherit",
|
|
116
|
+
});
|
|
117
|
+
if (build.status !== 0)
|
|
118
|
+
throw new Error(`failed to build ${PROXY_IMAGE}`);
|
|
119
|
+
}
|
|
120
|
+
function d(runner, args, ignoreError = false) {
|
|
121
|
+
const r = spawnSync(runner, args, { encoding: "utf8" });
|
|
122
|
+
if (r.status !== 0 && !ignoreError) {
|
|
123
|
+
throw new Error(`${runner} ${args.slice(0, 2).join(" ")} failed: ${(r.stderr || r.stdout || "").trim().slice(0, 200)}`);
|
|
124
|
+
}
|
|
125
|
+
}
|