cowork-harness 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/.env.example +16 -0
  2. package/CHANGELOG.md +190 -0
  3. package/LICENSE +21 -0
  4. package/README.md +470 -0
  5. package/baselines/desktop-1.11847.5.json +78 -0
  6. package/baselines/desktop-1.12603.1.json +140 -0
  7. package/baselines/prompts/desktop-1.12603.1/host-loop-append.md +8 -0
  8. package/baselines/prompts/desktop-1.12603.1/subagent-append-vm.md +3 -0
  9. package/baselines/prompts/desktop-1.12603.1/system-prompt-append.md +18 -0
  10. package/dist/agent/session.js +465 -0
  11. package/dist/assert.js +159 -0
  12. package/dist/baseline.js +87 -0
  13. package/dist/boundary.js +114 -0
  14. package/dist/canary/grants.js +37 -0
  15. package/dist/cli.js +1107 -0
  16. package/dist/decide/decider.js +521 -0
  17. package/dist/decide/external-channel.js +262 -0
  18. package/dist/decide/llm-transport.js +52 -0
  19. package/dist/dotenv.js +52 -0
  20. package/dist/egress/proxy.js +138 -0
  21. package/dist/egress/sidecar.js +125 -0
  22. package/dist/hostloop/provenance.js +110 -0
  23. package/dist/hostloop/workspace-handler.js +226 -0
  24. package/dist/loop-decision.js +62 -0
  25. package/dist/prompt.js +43 -0
  26. package/dist/run/cassette.js +420 -0
  27. package/dist/run/chat.js +194 -0
  28. package/dist/run/envelope.js +31 -0
  29. package/dist/run/execute.js +533 -0
  30. package/dist/run/renderer.js +179 -0
  31. package/dist/run/run.js +347 -0
  32. package/dist/run/trace-view.js +227 -0
  33. package/dist/runtime/argv.js +126 -0
  34. package/dist/runtime/container.js +76 -0
  35. package/dist/runtime/host-env.js +28 -0
  36. package/dist/runtime/hostloop.js +129 -0
  37. package/dist/runtime/lima.js +177 -0
  38. package/dist/runtime/microvm.js +151 -0
  39. package/dist/runtime/protocol.js +79 -0
  40. package/dist/runtime/stage.js +52 -0
  41. package/dist/secrets.js +42 -0
  42. package/dist/session.js +315 -0
  43. package/dist/sync/cowork-sync.js +215 -0
  44. package/dist/types.js +127 -0
  45. package/docker/Dockerfile.agent +31 -0
  46. package/docker/Dockerfile.proxy +12 -0
  47. package/docker/compose.yml +31 -0
  48. package/fixtures/subagent-grants.json +5 -0
  49. package/package.json +70 -0
@@ -0,0 +1,262 @@
1
+ import { mkdirSync, readdirSync, existsSync, readFileSync, writeFileSync, renameSync, rmSync } from "node:fs";
2
+ import { join } from "node:path";
3
+ import readline from "node:readline";
4
+ import { spawn } from "node:child_process";
5
+ /** A sequential line reader over a stream that buffers across chunk boundaries (readline does this). */
6
+ function lineReader(input) {
7
+ const rl = readline.createInterface({ input, crlfDelay: Infinity });
8
+ const queue = [];
9
+ const waiters = [];
10
+ let done = false;
11
+ rl.on("line", (l) => (waiters.length ? waiters.shift()(l) : queue.push(l)));
12
+ rl.on("close", () => {
13
+ done = true;
14
+ while (waiters.length)
15
+ waiters.shift()(null);
16
+ });
17
+ return {
18
+ next: () => new Promise((resolve) => {
19
+ if (queue.length)
20
+ return resolve(queue.shift());
21
+ if (done)
22
+ return resolve(null);
23
+ waiters.push(resolve);
24
+ }),
25
+ close: () => rl.close(),
26
+ };
27
+ }
28
+ const REQ = /^req-\d+\.json$/;
29
+ const RESP = /^resp-\d+\.json$/;
30
+ const seqOf = (f) => Number(f.match(/-(\d+)\.json/)?.[1] ?? 0);
31
+ /** Write the run-complete marker so a `gates --follow` watcher emits an explicit `{done:true}` and exits
32
+ * (resolves "silence is ambiguous"). Idempotent + sync (safe from a process exit handler). */
33
+ export function writeDoneMarker(dir) {
34
+ try {
35
+ if (!existsSync(join(dir, "done.json")))
36
+ writeFileSync(join(dir, "done.json"), JSON.stringify({ done: true }) + "\n");
37
+ }
38
+ catch {
39
+ /* dir may be gone */
40
+ }
41
+ }
42
+ /**
43
+ * The gate stream behind `cowork-harness gates <dir> --follow` — the harness OWNS the watcher so the
44
+ * driving agent points ONE Monitor at this instead of hand-writing a zsh-safe find/seen-set/poll loop.
45
+ * Emits one clean single-line JSON per NEW pending gate (`{seq, ...decision_request}`) and a terminal
46
+ * `{"done":true}` when the run finishes. Resolves when done (or after one pass if `once`).
47
+ */
48
+ export function streamGates(dir, write, opts = {}) {
49
+ const pollMs = opts.pollMs ?? (Number(process.env.COWORK_HARNESS_DECIDER_DIR_POLL_MS) || 500);
50
+ const seen = new Set();
51
+ const tries = new Map(); // per-file parse attempts — bound retries so a corrupt file drops loud
52
+ return new Promise((resolve) => {
53
+ const tick = () => {
54
+ let files = [];
55
+ try {
56
+ files = readdirSync(dir);
57
+ }
58
+ catch {
59
+ /* not created yet */
60
+ }
61
+ for (const f of files.filter((x) => REQ.test(x)).sort((a, b) => seqOf(a) - seqOf(b))) {
62
+ if (seen.has(f))
63
+ continue;
64
+ try {
65
+ const body = readFileSync(join(dir, f), "utf8").trim();
66
+ const parsed = JSON.parse(body);
67
+ seen.add(f); // only mark consumed AFTER a clean parse — a mid-write is retried next tick
68
+ write(JSON.stringify({ seq: seqOf(f), ...parsed }));
69
+ }
70
+ catch {
71
+ // A transient mid-write is retried next tick; a PERSISTENTLY corrupt file would otherwise be
72
+ // retried forever — bound it, then drop loud so the gap is visible (not a silent false-negative).
73
+ const n = (tries.get(f) ?? 0) + 1;
74
+ tries.set(f, n);
75
+ if (n >= 3) {
76
+ seen.add(f);
77
+ process.stderr.write(`::warning:: [gates] ${f} is unreadable/malformed after ${n} tries — dropping\n`);
78
+ }
79
+ }
80
+ }
81
+ if (existsSync(join(dir, "done.json"))) {
82
+ write(JSON.stringify({ done: true }));
83
+ return resolve();
84
+ }
85
+ if (opts.once)
86
+ return resolve();
87
+ setTimeout(tick, pollMs);
88
+ };
89
+ tick();
90
+ });
91
+ }
92
+ /** Write a gate answer atomically (temp+rename) with the right wire shape — behind `cowork-harness
93
+ * answer`. Hides the atomic write + the `{id, answers}` shape the driver had to hand-build. */
94
+ export function answerGate(dir, seq, answers) {
95
+ let id;
96
+ try {
97
+ id = JSON.parse(readFileSync(join(dir, `req-${seq}.json`), "utf8")).id;
98
+ }
99
+ catch {
100
+ /* req may already be consumed; id is optional */
101
+ }
102
+ const tmp = join(dir, `.resp-${seq}.json.tmp`);
103
+ writeFileSync(tmp, JSON.stringify({ ...(id ? { id } : {}), answers }), { mode: 0o600 });
104
+ renameSync(tmp, join(dir, `resp-${seq}.json`));
105
+ }
106
+ /** Read a gate's request (for `answer` to map a `--choose` to the question text). */
107
+ export function readGate(dir, seq) {
108
+ return JSON.parse(readFileSync(join(dir, `req-${seq}.json`), "utf8"));
109
+ }
110
+ /**
111
+ * Channel C: file rendezvous (`--decider-dir <dir>`). The decision_request is written to `<dir>/req-N.json`
112
+ * and the harness blocks polling for `<dir>/resp-N.json`. The DRIVING Claude agent arms a Monitor on the
113
+ * dir (each new req file → a task-notification that wakes it) and writes the answer file — answering the
114
+ * LIVE AskUserQuestion in-band (no resume, no re-worded question). Strictly serial: write req-N, block for
115
+ * resp-N, then req-(N+1) — one outstanding gate at a time. The wire protocol is identical to the other
116
+ * channels (the same ExternalDecider drives it); only the transport differs.
117
+ */
118
+ export function fileChannel(dir) {
119
+ mkdirSync(dir, { recursive: true });
120
+ // H3: do NOT silently clear — fail loud if the dir already holds gate files (forces a fresh dir per run).
121
+ const stale = readdirSync(dir).filter((f) => REQ.test(f) || RESP.test(f));
122
+ if (stale.length)
123
+ throw new Error(`--decider-dir ${dir} already has gate files (${stale.slice(0, 3).join(", ")}…) — use a fresh, empty directory per run`);
124
+ const pollMs = Number(process.env.COWORK_HARNESS_DECIDER_DIR_POLL_MS) || 300;
125
+ const timeoutMs = Number(process.env.COWORK_HARNESS_DECIDER_DIR_TIMEOUT_MS) || 600_000; // 10-min backstop → loud UnansweredError
126
+ let seq = 0;
127
+ let lastSnapshotSeq = 0; // watermark so a per-scenario snapshot() copies ONLY that scenario's new gates
128
+ // #49: store the handler so it can be removed on close() — otherwise repeated fileChannel() calls in
129
+ // one process accumulate "exit" listeners (MaxListenersExceededWarning after >10 channels).
130
+ // Guarantee a completion marker on EVERY exit path (success, fail()/process.exit, throw) so a
131
+ // `gates --follow` watcher always gets its terminal {done:true} and never hangs.
132
+ const exitHandler = () => writeDoneMarker(dir);
133
+ process.on("exit", exitHandler);
134
+ return {
135
+ write: (line) => {
136
+ seq++;
137
+ // H1: `line` is single-line JSON (ExternalDecider) — one `cat` = one Monitor event. M2: 0600 (it's on disk).
138
+ const tmp = join(dir, `.req-${seq}.json.tmp`);
139
+ writeFileSync(tmp, line.replace(/\n/g, " ") + "\n", { mode: 0o600 });
140
+ renameSync(tmp, join(dir, `req-${seq}.json`)); // atomic — the watcher never sees a partial file
141
+ process.stderr.write(`[gate] req-${seq} emitted → waiting for resp-${seq}.json\n`); // O2: lifecycle on stderr (even under --output-format json)
142
+ },
143
+ readLine: async () => {
144
+ const resp = join(dir, `resp-${seq}.json`);
145
+ const deadline = Date.now() + timeoutMs;
146
+ while (Date.now() < deadline) {
147
+ // M3: the agent writes resp via temp+rename (atomic) → if it exists, it's complete. Read+parse ONCE
148
+ // (a bad parse fails loud in ExternalDecider — no retry-then-hang).
149
+ if (existsSync(resp)) {
150
+ const body = readFileSync(resp, "utf8");
151
+ // O4: mark the gate consumed — rename `req-N.json` out of the `req-*.json` glob so the watcher
152
+ // can't re-emit it, and the consumed signal is visible mid-run (distinguishes a re-emit from a
153
+ // genuine agent re-ask, O3).
154
+ try {
155
+ renameSync(join(dir, `req-${seq}.json`), join(dir, `req-${seq}.json.done`));
156
+ }
157
+ catch {
158
+ /* best-effort */
159
+ }
160
+ process.stderr.write(`[gate] resp-${seq} consumed (gate answered)\n`);
161
+ return body;
162
+ }
163
+ await new Promise((r) => setTimeout(r, pollMs));
164
+ }
165
+ return null; // timeout → ExternalDecider throws UnansweredError (loud, never silent)
166
+ },
167
+ snapshot: (destDir) => {
168
+ // Copy THIS scenario's gate wire shapes into the run dir so they survive close()'s cleanup (Part 4).
169
+ // The channel is reused across scenarios in a `run <dir/>` loop (one monotonic seq), so copy only
170
+ // files newer than the last snapshot — otherwise scenario N's snapshot would also contain 1..N-1's.
171
+ try {
172
+ const files = readdirSync(dir).filter((f) => (REQ.test(f) || RESP.test(f) || f.endsWith(".json.done")) && seqOf(f) > lastSnapshotSeq);
173
+ if (files.length) {
174
+ mkdirSync(destDir, { recursive: true });
175
+ for (const f of files)
176
+ writeFileSync(join(destDir, f), readFileSync(join(dir, f)));
177
+ }
178
+ lastSnapshotSeq = seq; // advance past this scenario's gates
179
+ }
180
+ catch {
181
+ /* dir may be gone / nothing to snapshot */
182
+ }
183
+ },
184
+ close: () => {
185
+ // #49: remove the exit listener registered for this channel so repeated fileChannel() calls
186
+ // in one process don't accumulate listeners past the MaxListenersExceededWarning threshold.
187
+ process.removeListener("exit", exitHandler);
188
+ // That exit listener was the only writer of done.json — a long-lived embedder that close()s but
189
+ // keeps running must still release a `gates --follow` watcher. Write the marker here too (idempotent,
190
+ // and not matched by the cleanup globs below, so it survives).
191
+ writeDoneMarker(dir);
192
+ // Best-effort remove processed files on close (req/resp + .done markers + tmp).
193
+ try {
194
+ for (const f of readdirSync(dir))
195
+ if (REQ.test(f) || RESP.test(f) || f.startsWith(".req-") || f.endsWith(".json.done"))
196
+ rmSync(join(dir, f), { force: true });
197
+ }
198
+ catch {
199
+ /* dir may be gone */
200
+ }
201
+ },
202
+ };
203
+ }
204
+ /** A helper spawned once (`shell:true` so `'python answerer.py'` works). Request→its stdin, answer←its stdout. */
205
+ export function spawnChannel(cmd) {
206
+ // #8: `shell: true` is INTENTIONAL, not an injection surface. `--decider-cmd` is OPERATOR-supplied —
207
+ // the same trust class as the harness process itself (whoever runs the harness wrote this string). Shell
208
+ // interpretation is the documented ergonomic so `'python answerer.py'`, pipelines, and env-var prefixes
209
+ // all work as written. There is no untrusted input here to escape, so we deliberately do NOT parse to argv.
210
+ const child = spawn(cmd, { shell: true, stdio: ["pipe", "pipe", "inherit"] });
211
+ const reader = lineReader(child.stdout);
212
+ // #53: bound the wait on the helper's stdout — a hung-but-alive helper would otherwise block the harness
213
+ // forever (only fileChannel had a deadline; this mirrors its 10-min backstop). On expiry kill the child
214
+ // (so a wedged process can't linger) and reject LOUD, never a silent hang.
215
+ const timeoutMs = Number(process.env.COWORK_HARNESS_DECIDER_CMD_TIMEOUT_MS) || 600_000;
216
+ let dead = false;
217
+ child.on("exit", () => (dead = true));
218
+ child.on("error", () => (dead = true));
219
+ // A broken-pipe write does NOT throw synchronously — when the helper closes its read end, the EPIPE is
220
+ // delivered ASYNCHRONOUSLY as an `error` event on stdin. Without a listener Node escalates it to an
221
+ // uncaughtException (the cross-test "write EPIPE" flake). Handle it: mark dead so the next write()/
222
+ // readLine() throws the clean "helper exited" error, and swallow the async event itself.
223
+ child.stdin?.on("error", () => (dead = true));
224
+ return {
225
+ write: (line) => {
226
+ if (dead)
227
+ throw new Error(`--decider-cmd helper exited before answering`);
228
+ try {
229
+ child.stdin.write(line + "\n"); // EPIPE if the helper died mid-run → surface as an error
230
+ }
231
+ catch {
232
+ throw new Error(`--decider-cmd helper closed its input (EPIPE) before answering`);
233
+ }
234
+ },
235
+ readLine: () => {
236
+ let timer;
237
+ const timeout = new Promise((_, reject) => {
238
+ timer = setTimeout(() => {
239
+ if (!dead)
240
+ try {
241
+ child.kill("SIGKILL");
242
+ }
243
+ catch {
244
+ /* already gone */
245
+ }
246
+ reject(new Error(`--decider-cmd helper timed out before answering after ${timeoutMs}ms`));
247
+ }, timeoutMs);
248
+ });
249
+ return Promise.race([reader.next(), timeout]).finally(() => clearTimeout(timer));
250
+ },
251
+ close: () => {
252
+ reader.close();
253
+ if (!dead)
254
+ try {
255
+ child.kill();
256
+ }
257
+ catch {
258
+ /* already gone */
259
+ }
260
+ },
261
+ };
262
+ }
@@ -0,0 +1,52 @@
1
+ import { spawn } from "node:child_process";
2
+ /**
3
+ * The default `LlmDecider` transport: shell out to the host `claude -p` (one-shot, headless). Chosen
4
+ * over a direct `POST /v1/messages` (Opus H1): the harness PROCESS is not behind the egress proxy
5
+ * (only the spawned agent child is), so a direct API call would bypass the very allowlist the harness
6
+ * enforces. `claude -p` reuses the run's own auth path and is dogfood-consistent. One short, tool-less
7
+ * call per gate on a small model — bounded cost/latency, no recursion into the harness.
8
+ */
9
+ export const claudeCliComplete = (prompt, model) => new Promise((resolve, reject) => {
10
+ const bin = process.env.COWORK_HARNESS_CLAUDE_BIN || "claude";
11
+ // #53: bound the `claude -p` spawn — a hung-but-alive child would otherwise block the harness forever.
12
+ // On expiry SIGKILL the child and reject LOUD; clear the timer on close/error so a fast call never leaks it.
13
+ const timeoutMs = Number(process.env.COWORK_HARNESS_LLM_TIMEOUT_MS) || 600_000;
14
+ const child = spawn(bin, ["-p", prompt, "--model", model], { stdio: ["ignore", "pipe", "ignore"] });
15
+ const timer = setTimeout(() => {
16
+ try {
17
+ child.kill("SIGKILL");
18
+ }
19
+ catch {
20
+ /* already gone */
21
+ }
22
+ reject(new Error(`LLM decider transport (${bin} -p) timed out after ${timeoutMs}ms`));
23
+ }, timeoutMs);
24
+ // Bound stdout too — the wall-clock timeout above caps a fully-hung child, but not one that is
25
+ // actively spewing. Past the cap, SIGKILL and reject loud rather than growing the buffer unbounded.
26
+ const maxBytes = Number(process.env.COWORK_HARNESS_LLM_MAX_BYTES) || 8 * 1024 * 1024;
27
+ let out = "";
28
+ let bytes = 0;
29
+ child.stdout.on("data", (d) => {
30
+ bytes += d.length;
31
+ if (bytes > maxBytes) {
32
+ try {
33
+ child.kill("SIGKILL");
34
+ }
35
+ catch {
36
+ /* already gone */
37
+ }
38
+ clearTimeout(timer);
39
+ reject(new Error(`LLM decider transport (${bin} -p) exceeded ${maxBytes} bytes — aborting`));
40
+ return;
41
+ }
42
+ out += d;
43
+ });
44
+ child.on("error", (e) => {
45
+ clearTimeout(timer);
46
+ reject(new Error(`LLM decider transport (${bin} -p) failed to spawn: ${e.message}`));
47
+ });
48
+ child.on("close", (code) => {
49
+ clearTimeout(timer);
50
+ code === 0 ? resolve(out) : reject(new Error(`LLM decider transport (${bin} -p) exited ${code}`));
51
+ });
52
+ });
package/dist/dotenv.js ADDED
@@ -0,0 +1,52 @@
1
+ import { existsSync, readFileSync } from "node:fs";
2
+ import { resolve } from "node:path";
3
+ /**
4
+ * Minimal `.env` loader (no dependency). Loads `KEY=VALUE` lines from `./.env` into `process.env`
5
+ * at CLI startup so credentials (e.g. `CLAUDE_CODE_OAUTH_TOKEN`) don't have to be `export`ed each
6
+ * run. Standard dotenv semantics: comments (`#`), surrounding quotes, an optional `export ` prefix,
7
+ * and — importantly — **existing `process.env` values win** (an exported var is never overwritten).
8
+ *
9
+ * SECURITY: `.env` is a HOST-side credential store. It is read into this process's env and is NEVER
10
+ * mounted into the sandbox. Keep it at the repo/working-dir root — do NOT place a `.env` inside a
11
+ * mounted skill/project folder, or its contents would be copied into the agent's filesystem. The
12
+ * token value is also scrubbed from all persisted run logs regardless of source.
13
+ */
14
+ export function loadDotenv(file = resolve(process.cwd(), ".env")) {
15
+ if (!existsSync(file))
16
+ return [];
17
+ const loaded = [];
18
+ let text;
19
+ try {
20
+ text = readFileSync(file, "utf8");
21
+ }
22
+ catch {
23
+ return [];
24
+ }
25
+ for (const raw of text.split("\n")) {
26
+ const line = raw.trim();
27
+ if (!line || line.startsWith("#"))
28
+ continue;
29
+ const m = line.match(/^(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(.*)$/);
30
+ if (!m)
31
+ continue;
32
+ const key = m[1];
33
+ let val = m[2];
34
+ const quoted = /^["']/.test(val);
35
+ if (quoted && val.length >= 2 && val[0] === val[val.length - 1]) {
36
+ val = val.slice(1, -1);
37
+ }
38
+ else {
39
+ // strip a trailing inline comment from an unquoted value
40
+ val = val.replace(/\s+#.*$/, "").trim();
41
+ }
42
+ // An empty value (`KEY=`) means "not provided" — skip it, so a blank template placeholder is
43
+ // harmless and a later non-empty line (or an exported var) still wins.
44
+ if (val === "")
45
+ continue;
46
+ if (process.env[key] === undefined) {
47
+ process.env[key] = val;
48
+ loaded.push(key);
49
+ }
50
+ }
51
+ return loaded;
52
+ }
@@ -0,0 +1,138 @@
1
+ import http from "node:http";
2
+ import net from "node:net";
3
+ import { appendFileSync } from "node:fs";
4
+ export function startEgressProxy(opts) {
5
+ const allow = compile(opts.allow);
6
+ const log = (host, decision) => {
7
+ opts.onDecision?.(host, decision);
8
+ if (opts.logPath)
9
+ appendFileSync(opts.logPath, JSON.stringify({ ts: Date.now(), host, decision }) + "\n");
10
+ };
11
+ const server = http.createServer((req, res) => {
12
+ const host = hostOf(req.url ?? "", req.headers.host).toLowerCase();
13
+ if (!allow(host)) {
14
+ log(host, "deny");
15
+ res.writeHead(403, { "content-type": "text/plain" });
16
+ res.end(`egress denied: ${host} not on allowlist`);
17
+ return;
18
+ }
19
+ // Minimal HTTP forward (CONNECT covers HTTPS below; this handles plain HTTP).
20
+ // #33: `hostOf` falls back to the Host header for a relative/malformed req.url, so the
21
+ // allow check can pass while `new URL(req.url)` still throws. Fail loud with a clean 400
22
+ // instead of letting the uncaught throw take the callback (and the proxy) down.
23
+ let target;
24
+ try {
25
+ target = new URL(req.url);
26
+ }
27
+ catch {
28
+ res.writeHead(400, { "content-type": "text/plain" });
29
+ res.end("bad request: malformed proxy URL");
30
+ return;
31
+ }
32
+ // Log `allow` only once the request is valid and we're about to forward. Logging before
33
+ // the parse would record an `allow` for a malformed URL that never reached an upstream,
34
+ // false-passing `egress_allowed` assertions.
35
+ log(host, "allow");
36
+ const proxyReq = http.request({ host: target.hostname, port: target.port || 80, path: target.pathname + target.search, method: req.method, headers: req.headers }, (proxyRes) => {
37
+ res.writeHead(proxyRes.statusCode ?? 502, proxyRes.headers);
38
+ proxyRes.pipe(res);
39
+ });
40
+ proxyReq.on("error", () => {
41
+ res.writeHead(502);
42
+ res.end("upstream error");
43
+ });
44
+ req.pipe(proxyReq);
45
+ });
46
+ // HTTPS via CONNECT tunneling — allow/deny by SNI host, then blind-pipe.
47
+ server.on("connect", (req, clientSocket, head) => {
48
+ // A reset on either side must never crash the proxy (ECONNRESET is normal at
49
+ // connection teardown). Attach error handlers before any I/O.
50
+ clientSocket.on("error", () => clientSocket.destroy());
51
+ // Parse the CONNECT authority bracket-aware so `[2001:db8::1]:443` yields the right
52
+ // host/port — a bare `split(":")` reads `[` as the host and `2001` as the port. The
53
+ // matcher lowercases, so DNS-case variants of the SNI host match the allowlist too.
54
+ const { host, port } = parseAuthority(req.url ?? "");
55
+ if (!allow(host)) {
56
+ log(host, "deny");
57
+ clientSocket.write("HTTP/1.1 403 Forbidden\r\n\r\n");
58
+ clientSocket.end();
59
+ return;
60
+ }
61
+ log(host, "allow");
62
+ const upstream = net.connect(port, host, () => {
63
+ clientSocket.write("HTTP/1.1 200 Connection Established\r\n\r\n");
64
+ upstream.write(head);
65
+ upstream.pipe(clientSocket);
66
+ clientSocket.pipe(upstream);
67
+ });
68
+ upstream.on("error", () => clientSocket.destroy());
69
+ });
70
+ // Last-resort guards so a single bad socket can never take the proxy down.
71
+ server.on("clientError", (_e, sock) => {
72
+ try {
73
+ sock.destroy();
74
+ }
75
+ catch {
76
+ /* already gone */
77
+ }
78
+ });
79
+ // #50: store the handler so it can be removed when the server is closed — otherwise each
80
+ // startEgressProxy() call in one process stacks another uncaughtException handler, causing
81
+ // benign ECONNRESET/EPIPE to be swallowed N times by stale handlers after their server is gone.
82
+ const uncaughtHandler = (e) => {
83
+ if (e?.code === "ECONNRESET" || e?.code === "EPIPE")
84
+ return; // benign socket teardown
85
+ throw e;
86
+ };
87
+ process.on("uncaughtException", uncaughtHandler);
88
+ server.listen(opts.port ?? 8080);
89
+ // Wrap close() so the uncaughtException handler is cleaned up when the server stops.
90
+ const origClose = server.close.bind(server);
91
+ server.close = (cb) => {
92
+ process.removeListener("uncaughtException", uncaughtHandler);
93
+ return origClose(cb);
94
+ };
95
+ return server;
96
+ }
97
+ export function compile(patterns) {
98
+ const exact = new Set();
99
+ const suffixes = [];
100
+ // DNS hostnames are case-insensitive: store patterns lowercased and lowercase the candidate
101
+ // host in the matcher, so `HTTPS://API.ANTHROPIC.COM` matches an `api.anthropic.com` allow.
102
+ for (const p0 of patterns) {
103
+ const p = p0.toLowerCase();
104
+ if (p === "*")
105
+ return () => true; // unrestricted
106
+ if (p.startsWith("*."))
107
+ suffixes.push(p.slice(1)); // ".claude.ai"
108
+ else
109
+ exact.add(p);
110
+ }
111
+ return (host) => {
112
+ const h = host.toLowerCase();
113
+ return exact.has(h) || suffixes.some((s) => h.endsWith(s));
114
+ };
115
+ }
116
+ function hostOf(url, hostHeader) {
117
+ try {
118
+ return new URL(url).hostname || (hostHeader ?? "").split(":")[0];
119
+ }
120
+ catch {
121
+ return (hostHeader ?? "").split(":")[0];
122
+ }
123
+ }
124
+ /**
125
+ * Parse a CONNECT authority (`host:port`, or `[ipv6]:port`) into a bare host (IPv6 brackets
126
+ * stripped, lowercased) and a numeric port (default 443). Uses the WHATWG URL parser so
127
+ * bracketed IPv6 literals are handled correctly; falls back to a best-effort split.
128
+ */
129
+ function parseAuthority(authority) {
130
+ try {
131
+ const u = new URL("http://" + authority);
132
+ const host = u.hostname.replace(/^\[|\]$/g, "").toLowerCase();
133
+ return { host, port: u.port ? Number(u.port) : 443 };
134
+ }
135
+ catch {
136
+ return { host: authority.split(":")[0].toLowerCase(), port: 443 };
137
+ }
138
+ }
@@ -0,0 +1,125 @@
1
+ import { spawnSync } from "node:child_process";
2
+ import { mkdirSync, readFileSync, existsSync } from "node:fs";
3
+ import { join, resolve } from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+ const PROXY_IMAGE = process.env.COWORK_PROXY_IMAGE ?? "cowork-egress-proxy:1";
6
+ export function startEgressSidecar(allow, outDir, runId) {
7
+ const runner = process.env.COWORK_CONTAINER_RUNTIME ?? "docker";
8
+ const intNet = `cowork-int-${runId}`;
9
+ const outNet = `cowork-out-${runId}`;
10
+ const proxyName = `cowork-proxy-${runId}`;
11
+ const logDir = join(resolve(outDir), "proxy");
12
+ mkdirSync(logDir, { recursive: true });
13
+ const logFileHost = join(logDir, "egress.log");
14
+ ensureProxyImage(runner);
15
+ // #37: create the two networks and the proxy container in sequence, tracking each created
16
+ // resource so a mid-sequence failure (image start, network connect) rolls back the rest
17
+ // instead of orphaning networks/containers. Undo runs in reverse (container before networks).
18
+ const rollback = [];
19
+ try {
20
+ d(runner, ["network", "create", "--internal", intNet]);
21
+ rollback.push(() => d(runner, ["network", "rm", intNet], true));
22
+ d(runner, ["network", "create", outNet]);
23
+ rollback.push(() => d(runner, ["network", "rm", outNet], true));
24
+ // Proxy on the internal net first (so the agent can resolve it), then also wire
25
+ // it to the external net so it alone can reach allowlisted hosts.
26
+ d(runner, [
27
+ "run",
28
+ "-d",
29
+ "--name",
30
+ proxyName,
31
+ "--network",
32
+ intNet,
33
+ "-e",
34
+ `COWORK_ALLOW=${allow.join(",")}`,
35
+ "-e",
36
+ "COWORK_PROXY_LOG=/log/egress.log",
37
+ "-v",
38
+ `${logDir}:/log`,
39
+ PROXY_IMAGE,
40
+ ]);
41
+ rollback.push(() => d(runner, ["rm", "-f", proxyName], true));
42
+ d(runner, ["network", "connect", outNet, proxyName]);
43
+ }
44
+ catch (e) {
45
+ for (const undo of rollback.reverse())
46
+ undo();
47
+ throw e;
48
+ }
49
+ return {
50
+ proxyUrl: `http://${proxyName}:8080`,
51
+ network: intNet,
52
+ collect() {
53
+ if (!existsSync(logFileHost))
54
+ return [];
55
+ return readFileSync(logFileHost, "utf8")
56
+ .trim()
57
+ .split("\n")
58
+ .filter(Boolean)
59
+ .map(parseEgressLine)
60
+ .filter((x) => x !== null);
61
+ },
62
+ teardown() {
63
+ d(runner, ["rm", "-f", proxyName], true);
64
+ d(runner, ["network", "rm", intNet], true);
65
+ d(runner, ["network", "rm", outNet], true);
66
+ },
67
+ };
68
+ }
69
+ /**
70
+ * Parse one egress log line into a typed decision, or `null` if it must be dropped.
71
+ *
72
+ * #43: previously this (a) silently swallowed an unparseable line and (b) coerced any
73
+ * unknown/missing `decision` to "allow" via `o.decision === "deny" ? "deny" : "allow"` —
74
+ * a silent false-green that could mask a real deny. Now both failure modes emit a
75
+ * `::warning::` and DROP the line; we never invent an "allow" from corrupt input.
76
+ */
77
+ export function parseEgressLine(line) {
78
+ let o;
79
+ try {
80
+ o = JSON.parse(line);
81
+ }
82
+ catch {
83
+ process.stderr.write(`::warning:: [egress] proxy log line is not valid JSON — dropping: ${line.slice(0, 200)}\n`);
84
+ return null;
85
+ }
86
+ // Valid JSON that isn't a non-null object (e.g. `null`, a number, an array) would throw on the
87
+ // field reads below, OUTSIDE the parse catch — crashing collect() at teardown. Drop it loudly.
88
+ if (o === null || typeof o !== "object" || Array.isArray(o)) {
89
+ process.stderr.write(`::warning:: [egress] proxy log line is not a JSON object — dropping: ${line.slice(0, 200)}\n`);
90
+ return null;
91
+ }
92
+ const host = String(o.host);
93
+ if (o.decision !== "allow" && o.decision !== "deny") {
94
+ process.stderr.write(`::warning:: [egress] unknown decision "${o.decision}" for host ${host} — dropping (not coercing to allow)\n`);
95
+ return null;
96
+ }
97
+ return { host, decision: o.decision };
98
+ }
99
+ function ensureProxyImage(runner) {
100
+ const have = spawnSync(runner, ["image", "inspect", PROXY_IMAGE], { stdio: "ignore" });
101
+ if (have.status === 0)
102
+ return;
103
+ // Build from the repo (Dockerfile.proxy). Context is the repo root. #39: use fileURLToPath, not
104
+ // `.pathname`, so an install path with spaces / URL-escaped chars yields a valid build context.
105
+ const repoRoot = resolve(fileURLToPath(new URL("../..", import.meta.url)));
106
+ // #38: Dockerfile.proxy COPYs the SHIPPED dist/egress; running from source (tsx) before
107
+ // `npm run build` leaves it absent, so the image build would fail confusingly. Build dist/ first.
108
+ if (!existsSync(join(repoRoot, "dist", "egress", "proxy.js"))) {
109
+ process.stderr.write(`::warning:: [egress] dist/egress missing — running \`npm run build\` before the proxy image build\n`);
110
+ const built = spawnSync("npm", ["run", "build"], { cwd: repoRoot, stdio: "inherit" });
111
+ if (built.status !== 0)
112
+ throw new Error("failed to build dist/ for the egress proxy image (npm run build)");
113
+ }
114
+ const build = spawnSync(runner, ["build", "-t", PROXY_IMAGE, "-f", join(repoRoot, "docker", "Dockerfile.proxy"), repoRoot], {
115
+ stdio: "inherit",
116
+ });
117
+ if (build.status !== 0)
118
+ throw new Error(`failed to build ${PROXY_IMAGE}`);
119
+ }
120
+ function d(runner, args, ignoreError = false) {
121
+ const r = spawnSync(runner, args, { encoding: "utf8" });
122
+ if (r.status !== 0 && !ignoreError) {
123
+ throw new Error(`${runner} ${args.slice(0, 2).join(" ")} failed: ${(r.stderr || r.stdout || "").trim().slice(0, 200)}`);
124
+ }
125
+ }