cowork-harness 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/.env.example +16 -0
  2. package/CHANGELOG.md +190 -0
  3. package/LICENSE +21 -0
  4. package/README.md +470 -0
  5. package/baselines/desktop-1.11847.5.json +78 -0
  6. package/baselines/desktop-1.12603.1.json +140 -0
  7. package/baselines/prompts/desktop-1.12603.1/host-loop-append.md +8 -0
  8. package/baselines/prompts/desktop-1.12603.1/subagent-append-vm.md +3 -0
  9. package/baselines/prompts/desktop-1.12603.1/system-prompt-append.md +18 -0
  10. package/dist/agent/session.js +465 -0
  11. package/dist/assert.js +159 -0
  12. package/dist/baseline.js +87 -0
  13. package/dist/boundary.js +114 -0
  14. package/dist/canary/grants.js +37 -0
  15. package/dist/cli.js +1107 -0
  16. package/dist/decide/decider.js +521 -0
  17. package/dist/decide/external-channel.js +262 -0
  18. package/dist/decide/llm-transport.js +52 -0
  19. package/dist/dotenv.js +52 -0
  20. package/dist/egress/proxy.js +138 -0
  21. package/dist/egress/sidecar.js +125 -0
  22. package/dist/hostloop/provenance.js +110 -0
  23. package/dist/hostloop/workspace-handler.js +226 -0
  24. package/dist/loop-decision.js +62 -0
  25. package/dist/prompt.js +43 -0
  26. package/dist/run/cassette.js +420 -0
  27. package/dist/run/chat.js +194 -0
  28. package/dist/run/envelope.js +31 -0
  29. package/dist/run/execute.js +533 -0
  30. package/dist/run/renderer.js +179 -0
  31. package/dist/run/run.js +347 -0
  32. package/dist/run/trace-view.js +227 -0
  33. package/dist/runtime/argv.js +126 -0
  34. package/dist/runtime/container.js +76 -0
  35. package/dist/runtime/host-env.js +28 -0
  36. package/dist/runtime/hostloop.js +129 -0
  37. package/dist/runtime/lima.js +177 -0
  38. package/dist/runtime/microvm.js +151 -0
  39. package/dist/runtime/protocol.js +79 -0
  40. package/dist/runtime/stage.js +52 -0
  41. package/dist/secrets.js +42 -0
  42. package/dist/session.js +315 -0
  43. package/dist/sync/cowork-sync.js +215 -0
  44. package/dist/types.js +127 -0
  45. package/docker/Dockerfile.agent +31 -0
  46. package/docker/Dockerfile.proxy +12 -0
  47. package/docker/compose.yml +31 -0
  48. package/fixtures/subagent-grants.json +5 -0
  49. package/package.json +70 -0
package/dist/assert.js ADDED
@@ -0,0 +1,159 @@
1
+ import { existsSync } from "node:fs";
2
+ import { join } from "node:path";
3
+ /**
4
+ * Boundary-aware host matching: `host` must equal `needle` exactly or be a proper subdomain of it.
5
+ * `evilanthropic.com` does NOT match `anthropic.com`; `x.anthropic.com` does.
6
+ */
7
+ export function hostMatches(host, needle) {
8
+ return host === needle || host.endsWith("." + needle);
9
+ }
10
+ export function evaluate(assertions, ctx) {
11
+ return assertions.map((a) => check(a, ctx));
12
+ }
13
+ /**
14
+ * #5: evaluate EVERY present key (AND semantics) — a multi-key assertion passes iff all of its
15
+ * keys pass. (The previous first-key-wins `if (a.X) return …` chain silently ignored every key
16
+ * after the first.) The per-key logic is unchanged; each branch now PUSHES its result instead of
17
+ * returning. The first failing key supplies the surfaced message. On the replay lane, keys that
18
+ * cannot be evaluated (filesystem/egress, or question/gate when controlOut is absent) are stripped
19
+ * from the object BEFORE this runs (see replayCassette), so AND never straddles replay classes.
20
+ */
21
+ function check(a, ctx) {
22
+ const results = [];
23
+ const ok = () => ({ pass: true });
24
+ const fail = (message) => ({ pass: false, message });
25
+ if (a.transcript_contains !== undefined)
26
+ results.push(ctx.transcript.includes(a.transcript_contains) ? ok() : fail(`transcript missing "${a.transcript_contains}"`));
27
+ if (a.transcript_not_contains !== undefined)
28
+ results.push(!ctx.transcript.includes(a.transcript_not_contains) ? ok() : fail(`transcript unexpectedly contains "${a.transcript_not_contains}"`));
29
+ // Fuzzy content for stochastic prose. All regex-building assertions are try/catch-wrapped —
30
+ // `evaluate()` is a bare `.map(check)` with no error boundary, so a malformed pattern must be a
31
+ // clean assertion failure, not an uncaught throw. Case-insensitive ("i").
32
+ if (a.transcript_matches !== undefined) {
33
+ let re;
34
+ try {
35
+ re = new RegExp(a.transcript_matches, "i");
36
+ }
37
+ catch (e) {
38
+ results.push(fail(`transcript_matches: bad regex "${a.transcript_matches}": ${String(e.message)}`));
39
+ }
40
+ if (re)
41
+ results.push(re.test(ctx.transcript) ? ok() : fail(`transcript did not match /${a.transcript_matches}/i`));
42
+ }
43
+ if (a.transcript_not_matches !== undefined) {
44
+ let re;
45
+ try {
46
+ re = new RegExp(a.transcript_not_matches, "i");
47
+ }
48
+ catch (e) {
49
+ results.push(fail(`transcript_not_matches: bad regex "${a.transcript_not_matches}": ${String(e.message)}`));
50
+ }
51
+ if (re)
52
+ results.push(!re.test(ctx.transcript) ? ok() : fail(`transcript unexpectedly matched /${a.transcript_not_matches}/i`));
53
+ }
54
+ if (a.file_exists !== undefined)
55
+ results.push(existsSync(join(ctx.workRoot, a.file_exists)) ? ok() : fail(`file not found: ${a.file_exists} (under ${ctx.workRoot})`));
56
+ if (a.user_visible_artifact !== undefined) {
57
+ const p = a.user_visible_artifact;
58
+ const visible = ctx.userVisiblePrefixes.some((pre) => p === pre || p.startsWith(pre + "/"));
59
+ if (!visible)
60
+ results.push(fail(`"${p}" is not under a user-visible prefix (${ctx.userVisiblePrefixes.join(", ")}) — invisible to the user in Cowork`));
61
+ else
62
+ results.push(existsSync(join(ctx.workRoot, p)) ? ok() : fail(`user-visible artifact not found: ${p}`));
63
+ }
64
+ if (a.tool_called !== undefined)
65
+ results.push(ctx.toolsCalled.has(a.tool_called) ? ok() : fail(`tool not called: ${a.tool_called}`));
66
+ if (a.tool_not_called !== undefined)
67
+ results.push(!ctx.toolsCalled.has(a.tool_not_called) ? ok() : fail(`tool unexpectedly called: ${a.tool_not_called}`));
68
+ if (a.subagent_tool_used !== undefined)
69
+ results.push(ctx.subagentTools.has(a.subagent_tool_used) ? ok() : fail(`sub-agent did not use: ${a.subagent_tool_used}`));
70
+ if (a.subagent_tool_absent !== undefined)
71
+ results.push(!ctx.subagentTools.has(a.subagent_tool_absent) ? ok() : fail(`sub-agent unexpectedly used: ${a.subagent_tool_absent}`));
72
+ if (a.subagent_dispatched !== undefined) {
73
+ // Match the agentType OR the description — skills often dispatch with only a `description`
74
+ // (no subagent_type → agentType "unknown"), so name-matching alone would miss those (O1).
75
+ let rx;
76
+ try {
77
+ rx = new RegExp(a.subagent_dispatched, "i");
78
+ }
79
+ catch (e) {
80
+ results.push(fail(`subagent_dispatched: bad regex "${a.subagent_dispatched}": ${String(e.message)}`));
81
+ }
82
+ if (rx)
83
+ results.push(ctx.subagents.some((s) => rx.test(s.agentType) || rx.test(s.description ?? ""))
84
+ ? ok()
85
+ : fail(`no sub-agent matching "${a.subagent_dispatched}" was dispatched (by type or description)`));
86
+ }
87
+ if (a.subagent_declared_but_unused !== undefined) {
88
+ const t = a.subagent_declared_but_unused;
89
+ // #25 / B2: declared a tool but never USED it — the observable proxy for the v0.3.0 fabrication
90
+ // class. Previously also required `toolsUsed.length === 0`, which let "declared Bash, used Read"
91
+ // pass; dropping that clause catches the broader declared-but-unused case.
92
+ const culprit = ctx.subagents.find((s) => s.declaredTools.includes(t) && !s.toolsUsed.includes(t));
93
+ results.push(culprit
94
+ ? fail(`sub-agent "${culprit.agentType}" declared "${t}" but never used it (used: ${culprit.toolsUsed.join(", ") || "none"})`)
95
+ : ok());
96
+ }
97
+ if (a.dispatch_count_max !== undefined)
98
+ results.push(ctx.subagents.length <= a.dispatch_count_max
99
+ ? ok()
100
+ : fail(`dispatched ${ctx.subagents.length} sub-agents, max ${a.dispatch_count_max} (SPEC §10 cap {global:3})`));
101
+ if (a.egress_denied !== undefined)
102
+ results.push(ctx.egress.some((e) => hostMatches(e.host, a.egress_denied) && e.decision === "deny")
103
+ ? ok()
104
+ : fail(`expected egress denied: ${a.egress_denied}`));
105
+ if (a.egress_allowed !== undefined)
106
+ results.push(ctx.egress.some((e) => hostMatches(e.host, a.egress_allowed) && e.decision === "allow")
107
+ ? ok()
108
+ : fail(`expected egress allowed: ${a.egress_allowed}`));
109
+ if (a.no_delete_in_outputs !== undefined)
110
+ results.push(ctx.outputsDeletes.length === 0
111
+ ? ok()
112
+ : fail(`delete op(s) touched outputs (forbidden in Cowork): ${ctx.outputsDeletes.slice(0, 3).join("; ")}`));
113
+ if (a.self_heal_ran !== undefined)
114
+ results.push(ctx.selfHealRan === a.self_heal_ran ? ok() : fail(`self_heal_ran was ${ctx.selfHealRan}, expected ${a.self_heal_ran}`));
115
+ if (a.transcript_no_host_path !== undefined)
116
+ results.push(!ctx.hostPathLeaked === a.transcript_no_host_path ? ok() : fail(`host path leaked into model-visible text: ${ctx.hostPathLeaked}`));
117
+ if (a.question_asked !== undefined) {
118
+ let rx;
119
+ try {
120
+ rx = new RegExp(a.question_asked, "i");
121
+ }
122
+ catch (e) {
123
+ results.push(fail(`question_asked: bad regex "${a.question_asked}": ${String(e.message)}`));
124
+ }
125
+ if (rx)
126
+ results.push(ctx.questions.some((q) => rx.test(q)) ? ok() : fail(`no question matched: ${a.question_asked}`));
127
+ }
128
+ if (a.questions_count_max !== undefined)
129
+ results.push(ctx.questions.length <= a.questions_count_max ? ok() : fail(`asked ${ctx.questions.length} questions, max ${a.questions_count_max}`));
130
+ if (a.gate_answers_delivered !== undefined) {
131
+ // #19: passes iff every answered gate's tool_result was OBSERVED and non-error. On a finished
132
+ // run/cassette, an unobserved delivery (delivered=null) is NOT neutral — it is absence of the
133
+ // evidence the assertion requires, so it fails loud ("no silent false-greens"). `delivered:
134
+ // false` is a real errored tool_result; `null` is "no tool_result observed for this gate".
135
+ if (a.gate_answers_delivered) {
136
+ const bad = ctx.gateDeliveries.filter((g) => g.delivered !== true);
137
+ results.push(bad.length === 0
138
+ ? ok()
139
+ : fail(`gate answer(s) not confirmed delivered to the model: ${bad
140
+ .map((g) => `"${g.question}" (${g.delivered === false
141
+ ? (g.error ?? "tool error")
142
+ : g.reason === "no-pairing-metadata"
143
+ ? "no pairing metadata — gate had no toolUseId"
144
+ : "delivery unobserved — no tool_result for this gate"})`)
145
+ .join("; ")}`));
146
+ }
147
+ else {
148
+ // inverse: expect a CONFIRMED delivery failure (a real errored tool_result), not merely unobserved.
149
+ const failedConfirmed = ctx.gateDeliveries.filter((g) => g.delivered === false);
150
+ results.push(failedConfirmed.length > 0 ? ok() : fail(`expected a confirmed gate-delivery failure but none was observed`));
151
+ }
152
+ }
153
+ if (a.result !== undefined)
154
+ results.push(ctx.result === a.result ? ok() : fail(`result was ${ctx.result}, expected ${a.result}`));
155
+ if (results.length === 0)
156
+ return { assertion: a, pass: false, message: "empty assertion" };
157
+ const firstFail = results.find((r) => !r.pass);
158
+ return firstFail ? { assertion: a, pass: false, message: firstFail.message } : { assertion: a, pass: true };
159
+ }
@@ -0,0 +1,87 @@
1
+ import { readFileSync, readdirSync, existsSync } from "node:fs";
2
+ import { join, resolve, isAbsolute } from "node:path";
3
+ import { homedir } from "node:os";
4
+ import { fileURLToPath } from "node:url";
5
+ import { PlatformBaseline } from "./types.js";
6
+ export const BASELINES_DIR = join(fileURLToPath(new URL("..", import.meta.url)), "baselines");
7
+ /** Resolve the host path to the staged agent ELF (COWORK_AGENT_BINARY override > baseline.stagedPath). */
8
+ export function resolveAgentBinary(baseline) {
9
+ const override = process.env.COWORK_AGENT_BINARY;
10
+ if (override) {
11
+ if (!existsSync(override))
12
+ throw new Error(`COWORK_AGENT_BINARY not found: ${override}`);
13
+ return resolve(override);
14
+ }
15
+ const staged = (baseline.agentBinary?.stagedPath ?? "").replace(/^~(?=$|\/)/, homedir());
16
+ if (!staged || !existsSync(staged)) {
17
+ throw new Error(`Staged agent binary not found at "${staged}". It is extracted from your Claude Desktop install ` +
18
+ `(claude-code-vm/<ver>/claude). Open Cowork once to stage it, or set COWORK_AGENT_BINARY to its path.`);
19
+ }
20
+ return resolve(staged);
21
+ }
22
+ /**
23
+ * Resolve a baseline by `latest`, an absolute path, or a name under `baselines/`. A non-absolute name
24
+ * is treated as a BARE FILENAME resolved under BASELINES_DIR — both `desktop-x` and `desktop-x.json`
25
+ * load from there regardless of cwd. Use an absolute path for an out-of-tree baseline.
26
+ */
27
+ export function loadBaseline(name) {
28
+ const file = name === "latest"
29
+ ? latestBaselineFile()
30
+ : isAbsolute(name)
31
+ ? name
32
+ : join(BASELINES_DIR, name.endsWith(".json") ? name : `${name}.json`);
33
+ const raw = JSON.parse(readFileSync(file, "utf8"));
34
+ return PlatformBaseline.parse(raw);
35
+ }
36
+ /**
37
+ * Compare two `desktop-<version>.json` filenames numerically by version segment.
38
+ * Returns negative if a < b, zero if equal, positive if a > b.
39
+ * Example: compareBaselineVersions("desktop-1.9.json", "desktop-1.10.json") < 0
40
+ */
41
+ export function compareBaselineVersions(a, b) {
42
+ // Strip the "desktop-" prefix and ".json" suffix to get the raw version string.
43
+ const versionOf = (f) => f.replace(/^desktop-/, "").replace(/\.json$/, "");
44
+ // A non-numeric segment (e.g. "1.0.0-beta") → parseInt NaN → NaN-0 = NaN corrupts the whole sort.
45
+ // Coerce a non-number to 0 so the comparison stays total.
46
+ const seg = (f) => versionOf(f)
47
+ .split(".")
48
+ .map((s) => {
49
+ const n = parseInt(s, 10);
50
+ return Number.isNaN(n) ? 0 : n;
51
+ });
52
+ const segA = seg(a);
53
+ const segB = seg(b);
54
+ const len = Math.max(segA.length, segB.length);
55
+ for (let i = 0; i < len; i++) {
56
+ const diff = (segA[i] ?? 0) - (segB[i] ?? 0);
57
+ if (diff !== 0)
58
+ return diff;
59
+ }
60
+ return 0;
61
+ }
62
+ function latestBaselineFile() {
63
+ const files = readdirSync(BASELINES_DIR).filter((f) => f.startsWith("desktop-") && f.endsWith(".json"));
64
+ if (files.length === 0)
65
+ throw new Error(`No baselines in ${BASELINES_DIR}; run \`cowork-harness sync\` first.`);
66
+ // Use numeric/semver-aware sort so desktop-1.10.json > desktop-1.9.json (not lexical).
67
+ files.sort(compareBaselineVersions);
68
+ return join(BASELINES_DIR, files[files.length - 1]);
69
+ }
70
+ /**
71
+ * Expand the mount layout for a concrete session id.
72
+ * cwd/sessionRoot = the session root (e.g. /sessions/<id>); mounts sit under mntRoot
73
+ * (/sessions/<id>/mnt) and are returned as ABSOLUTE guest paths.
74
+ */
75
+ export function resolveMounts(baseline, sessionId, projectId = "proj1") {
76
+ const subst = (s) => s.replace("{sessionId}", sessionId).replace("{projectId}", projectId);
77
+ const cwd = subst(baseline.mountLayout.cwd);
78
+ const sessionRoot = subst(baseline.mountLayout.sessionRoot);
79
+ const mntRoot = subst(baseline.mountLayout.mntRoot ?? `${baseline.mountLayout.sessionRoot}/mnt`);
80
+ return {
81
+ cwd,
82
+ sessionRoot,
83
+ mntRoot,
84
+ configDir: `${mntRoot}/.claude`,
85
+ mounts: baseline.mountLayout.mounts.map((m) => ({ ...m, mountPath: `${mntRoot}/${subst(m.mountPath)}` })),
86
+ };
87
+ }
@@ -0,0 +1,114 @@
1
+ import { spawnSync } from "node:child_process";
2
+ import { mkdtempSync } from "node:fs";
3
+ import { tmpdir, userInfo, homedir } from "node:os";
4
+ import { join } from "node:path";
5
+ import { startEgressSidecar } from "./egress/sidecar.js";
6
+ /**
7
+ * The allowlist the boundary sidecar seeds — baseline invariants PLUS the session's egress additions
8
+ * (so the self-test exercises the same boundary a `--session`/scenario run would). `unrestricted` widens
9
+ * to `*`, mirroring buildLaunchPlan's egress resolution. Pure → unit-testable without Docker.
10
+ */
11
+ export function boundaryAllowList(baseline, session) {
12
+ if (session?.unrestricted)
13
+ return ["*"];
14
+ return [...baseline.network.allowDomains, ...(session?.extraAllow ?? [])];
15
+ }
16
+ export function runBoundaryChecks(baseline, session) {
17
+ const runtime = process.env.COWORK_CONTAINER_RUNTIME ?? "docker";
18
+ const image = process.env.COWORK_AGENT_IMAGE ?? "cowork-agent-base:1";
19
+ const results = [];
20
+ // Stand up the real per-run boundary (internal network + allowlist proxy), exactly
21
+ // what a container-fidelity scenario uses. Tear it down at the end.
22
+ const runId = `bchk${process.hrtime.bigint().toString(36)}`;
23
+ const sidecar = startEgressSidecar(boundaryAllowList(baseline, session), mkdtempSync(join(tmpdir(), "cowork-bchk-")), runId);
24
+ const network = sidecar.network;
25
+ const proxy = sidecar.proxyUrl;
26
+ const probe = (shell, withProxy = false) => spawnSync(runtime, [
27
+ "run",
28
+ "--rm",
29
+ "--platform",
30
+ "linux/arm64",
31
+ "--network",
32
+ network,
33
+ ...(withProxy ? ["-e", `HTTPS_PROXY=${proxy}`, "-e", `HTTP_PROXY=${proxy}`] : []),
34
+ "--entrypoint",
35
+ "sh",
36
+ image,
37
+ "-c",
38
+ shell,
39
+ ], { encoding: "utf8", timeout: 30_000 });
40
+ // 1. Host filesystem is NOT visible (no /Users, no host home bind).
41
+ {
42
+ const r = probe(`ls /Users 2>&1 || true; ls /host 2>&1 || true`);
43
+ const out = (r.stdout ?? "") + (r.stderr ?? "");
44
+ const blocked = isHostFsSealed(out);
45
+ results.push({
46
+ check: "host-fs-sealed",
47
+ expectation: "host paths (/Users, /host) invisible",
48
+ pass: blocked,
49
+ detail: out.trim().slice(0, 200),
50
+ });
51
+ }
52
+ // 2. Direct (non-proxied) egress is impossible — no route off the internal net.
53
+ {
54
+ const r = probe(`curl -sS -m 5 -o /dev/null http://example.com && echo REACHED || echo BLOCKED`);
55
+ const out = ((r.stdout ?? "") + (r.stderr ?? "")).trim();
56
+ results.push({
57
+ check: "direct-egress-denied",
58
+ expectation: "no route to internet without proxy",
59
+ pass: /BLOCKED/.test(out) && !/REACHED/.test(out),
60
+ detail: out,
61
+ });
62
+ }
63
+ // 3. Non-allowlisted egress via the proxy is refused (403).
64
+ {
65
+ const r = probe(`curl -sS -m 5 -o /dev/null https://example.com && echo REACHED || echo BLOCKED`, true);
66
+ const out = ((r.stdout ?? "") + (r.stderr ?? "")).trim();
67
+ results.push({
68
+ check: "allowlist-enforced",
69
+ expectation: "off-list host refused by proxy",
70
+ pass: /BLOCKED|403/.test(out) && !/REACHED/.test(out),
71
+ detail: out.slice(0, 200),
72
+ });
73
+ }
74
+ // 4. Allowlisted egress via the proxy works (so the agent can reach inference).
75
+ {
76
+ const r = probe(`curl -sS -m 8 -o /dev/null https://api.anthropic.com && echo OK || echo FAIL`, true);
77
+ const out = ((r.stdout ?? "") + (r.stderr ?? "")).trim();
78
+ results.push({
79
+ check: "allowlist-permits",
80
+ expectation: "allowlisted host reachable via proxy",
81
+ pass: /OK/.test(out),
82
+ detail: out.slice(0, 200),
83
+ });
84
+ }
85
+ sidecar.teardown();
86
+ return results;
87
+ }
88
+ /** Escape regex metacharacters in a literal so it can be embedded in a RegExp. */
89
+ function escapeRegex(s) {
90
+ return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
91
+ }
92
+ /**
93
+ * #35: host-fs-sealed pass criterion, made environment-agnostic. The old guard hard-coded the repo
94
+ * owner's username (`yaniv`) in the negative-match, so a real host-path leak on another developer's
95
+ * machine (their username) would not be caught. Build the negative guard from the ACTUAL
96
+ * environment — `os.userInfo().username`, `os.homedir()`, plus the literal host roots `/Users/` and
97
+ * `/opt/cowork/` — escaping regex metacharacters in the dynamic parts.
98
+ *
99
+ * Sealed (pass) ⇔ the probe output looks like a denial ("No such file" etc.) AND contains NONE of
100
+ * the host markers (a leaked username/homedir/host root would mean the host fs is visible).
101
+ */
102
+ export function isHostFsSealed(probeOutput, env) {
103
+ const username = env?.username ?? userInfo().username;
104
+ const home = env?.homedir ?? homedir();
105
+ const markers = [escapeRegex(username), escapeRegex(home), "/Users/", "/opt/cowork/"].filter(Boolean);
106
+ const hostMarker = new RegExp(markers.join("|"));
107
+ const denied = /No such file|cannot access|not found/i.test(probeOutput);
108
+ return denied && !hostMarker.test(probeOutput);
109
+ }
110
+ export function formatBoundary(results) {
111
+ const lines = results.map((r) => `${r.pass ? "PASS" : "FAIL"} ${r.check.padEnd(22)} — ${r.expectation}${r.pass ? "" : `\n got: ${r.detail}`}`);
112
+ const allPass = results.every((r) => r.pass);
113
+ return `Boundary parity: ${allPass ? "ALL CONSTRAINTS ENFORCED" : "GAPS FOUND"}\n` + lines.join("\n");
114
+ }
@@ -0,0 +1,37 @@
1
+ import { readFileSync } from "node:fs";
2
+ import { fileURLToPath } from "node:url";
3
+ import { join } from "node:path";
4
+ export function loadGrantMap(path) {
5
+ const p = path ?? join(fileURLToPath(new URL("../..", import.meta.url)), "fixtures", "subagent-grants.json");
6
+ // #44: a read/parse failure previously `catch { return {} }`, silently disabling drift
7
+ // detection — corruption is exactly when the B2 canary must fire, so THROW loud instead.
8
+ let parsed;
9
+ try {
10
+ parsed = JSON.parse(readFileSync(p, "utf8"));
11
+ }
12
+ catch (e) {
13
+ throw new Error(`corrupt subagent-grants fixture at ${p}: ${e.message} — run 'cowork-harness sync' to regenerate`);
14
+ }
15
+ // #44: `.grants ?? {}` previously coerced a missing/non-object key to an empty map (silent
16
+ // empty drift). Validate it instead so a malformed fixture is a loud error, not a no-op.
17
+ const grants = parsed?.grants;
18
+ if (grants === null || typeof grants !== "object" || Array.isArray(grants)) {
19
+ throw new Error(`corrupt subagent-grants fixture at ${p}: missing or non-object ".grants" — run 'cowork-harness sync' to regenerate`);
20
+ }
21
+ return grants;
22
+ }
23
+ /** Verify dispatched sub-agents against the committed map. Unknown agentTypes are NOT asserted
24
+ * (recorded as `unknown` upstream) so we never assert a false invariant. */
25
+ export function verifyGrants(subagents, map) {
26
+ const drift = [];
27
+ for (const s of subagents) {
28
+ const expected = map[s.agentType];
29
+ if (expected === undefined)
30
+ continue;
31
+ const a = [...s.declaredTools].sort();
32
+ const e = [...expected].sort();
33
+ if (JSON.stringify(a) !== JSON.stringify(e))
34
+ drift.push({ agentType: s.agentType, expected: e, actual: a });
35
+ }
36
+ return drift;
37
+ }