cowork-harness 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -6,6 +6,28 @@ All notable changes to this project are documented here. The format is based on
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.4.2] — 2026-06-18
10
+
11
+ ### Added
12
+
13
+ - **Platform baseline `desktop-1.13576.1`** — synced from the updated Claude Desktop (the app moved
14
+ `1.12603.1` → `1.13576.1`). `loadBaseline("latest")` now resolves to it. The embedded agent binary is
15
+ unchanged at `2.1.177` (the update changed the app shell + gate states, not the agent ELF); this baseline
16
+ also corrects the prior baselines' stale `2.1.170` agent pin to the actually-staged `2.1.177`. Egress
17
+ allowlist unchanged.
18
+
19
+ ## [0.4.1] — 2026-06-18
20
+
21
+ ### Fixed
22
+
23
+ - **Agent-binary newest-staged fallback now applies on the real runtime paths** (container / hostloop, and
24
+ thus `skill` / `run` / `chat`), not just `sync`/tests. `resolveAgentBinary` had two private duplicates
25
+ (`container.ts`, `hostloop.ts`) **without** the 0.4.0 fallback, so a host with a newer staged
26
+ `claude-code-vm/<ver>` than the baseline expects still hard-failed with "Staged agent binary not found".
27
+ The duplicates were consolidated into the single exported resolver; a host that has staged a newer build
28
+ now falls back to it (with a warning) instead of failing. A structural test + CI guard prevent the
29
+ resolver from being re-duplicated.
30
+
9
31
  ## [0.4.0] — 2026-06-18
10
32
 
11
33
  The parsing/validation hardening + safety release: a current-tree code-review sweep plus fidelity and
@@ -0,0 +1,220 @@
1
+ {
2
+ "baselineVersion": 1,
3
+ "appVersion": "1.13576.1",
4
+ "agentVersion": "2.1.177",
5
+ "agentBinary": {
6
+ "stagedPath": "~/Library/Application Support/Claude/claude-code-vm/2.1.177/claude",
7
+ "format": "elf-aarch64"
8
+ },
9
+ "guest": {
10
+ "os": "linux",
11
+ "arch": "arm64",
12
+ "baseImage": "ubuntu:22.04"
13
+ },
14
+ "spawn": {
15
+ "configDirInGuest": "mnt/.claude",
16
+ "settingSources": [
17
+ "user"
18
+ ],
19
+ "permissionMode": "default",
20
+ "maxThinkingTokens": 31999,
21
+ "effortDefault": "medium",
22
+ "tools": [
23
+ "Task",
24
+ "Bash",
25
+ "Glob",
26
+ "Grep",
27
+ "Read",
28
+ "Edit",
29
+ "Write",
30
+ "NotebookEdit",
31
+ "WebFetch",
32
+ "TaskCreate",
33
+ "TaskUpdate",
34
+ "TaskGet",
35
+ "TaskList",
36
+ "TaskStop",
37
+ "WebSearch",
38
+ "Skill",
39
+ "REPL",
40
+ "JavaScript",
41
+ "AskUserQuestion",
42
+ "ToolSearch"
43
+ ],
44
+ "allowedTools": [
45
+ "Task",
46
+ "Bash",
47
+ "Glob",
48
+ "Grep",
49
+ "Read",
50
+ "Edit",
51
+ "Write",
52
+ "NotebookEdit",
53
+ "WebFetch",
54
+ "TaskCreate",
55
+ "TaskUpdate",
56
+ "TaskGet",
57
+ "TaskList",
58
+ "TaskStop",
59
+ "WebSearch",
60
+ "Skill",
61
+ "REPL",
62
+ "JavaScript",
63
+ "ToolSearch"
64
+ ],
65
+ "env": {
66
+ "CLAUDE_CODE_IS_COWORK": "1",
67
+ "CLAUDE_CODE_ENTRYPOINT": "local-agent",
68
+ "CLAUDE_CODE_TAGS": "lam_session_type:chat",
69
+ "CLAUDE_CODE_PROVIDER_MANAGED_BY_HOST": "1",
70
+ "CLAUDE_CODE_ENABLE_ASK_USER_QUESTION_TOOL": "true",
71
+ "CLAUDE_CODE_DISABLE_CRON": "1",
72
+ "CLAUDE_CODE_DISABLE_BACKGROUND_TASKS": "1",
73
+ "CLAUDE_CODE_DISABLE_AGENTS_FLEET": "1",
74
+ "CLAUDE_CODE_ENABLE_APPEND_SUBAGENT_PROMPT": "1",
75
+ "CLAUDE_CODE_ENABLE_TASKS": "true",
76
+ "CLAUDE_CODE_DISABLE_TERMINAL_TITLE": "1",
77
+ "ENABLE_PROMPT_CACHING_1H": "1",
78
+ "DISABLE_MICROCOMPACT": "1",
79
+ "MCP_CONNECTION_NONBLOCKING": "true"
80
+ },
81
+ "promptTemplate": "prompts/desktop-1.12603.1/system-prompt-append.md",
82
+ "subagentAppend": "prompts/desktop-1.12603.1/subagent-append-vm.md",
83
+ "$comment": "Binary-verified Desktop->agent spawn contract (asar 1.12603.1). See docs/cowork-spawn-contract-1.12603.1.md.",
84
+ "$comment_notSet": "Deliberately NOT set: CLAUDE_CODE_USE_COWORK_PLUGINS (Desktop never sets it; would flip the agent to cowork_settings.json/cowork_plugins). Host-derived (TZ, account UUIDs, WORKSPACE_HOST_PATHS, OTEL) injected at runtime, not pinned.",
85
+ "$comment_prompts": "Reconstructed cowork-specific sections (not the full base prompt — not cleanly extractable). The main append is delivered via the --append-system-prompt CLI flag (layered on the agent's built-in base prompt), NOT the initialize handshake; only the subagent append goes over initialize (appendSubagentSystemPrompt), gated on CLAUDE_CODE_ENABLE_APPEND_SUBAGENT_PROMPT."
86
+ },
87
+ "mountLayout": {
88
+ "sessionRoot": "/sessions/{sessionId}",
89
+ "cwd": "/sessions/{sessionId}",
90
+ "mntRoot": "/sessions/{sessionId}/mnt",
91
+ "mounts": [
92
+ {
93
+ "name": "uploads",
94
+ "mountPath": "uploads",
95
+ "mode": "r",
96
+ "purpose": "user-uploaded files (read-only — asar 'ro')"
97
+ },
98
+ {
99
+ "name": "projects",
100
+ "mountPath": ".projects/{projectId}",
101
+ "mode": "rw",
102
+ "purpose": "selected work folders (a Space) — delete denied by default (asar IX)"
103
+ },
104
+ {
105
+ "name": "local-plugins",
106
+ "mountPath": ".local-plugins/cache",
107
+ "mode": "r",
108
+ "purpose": "marketplace skills/plugins, runtime-discovered"
109
+ },
110
+ {
111
+ "name": "remote-plugins",
112
+ "mountPath": ".remote-plugins",
113
+ "mode": "r",
114
+ "purpose": "org-remote plugins, runtime-discovered"
115
+ },
116
+ {
117
+ "name": "outputs",
118
+ "mountPath": "outputs",
119
+ "mode": "rw",
120
+ "purpose": "session outputs/artifacts — delete denied by default (asar IX); rwd only when approved"
121
+ }
122
+ ]
123
+ },
124
+ "network": {
125
+ "mode": "gvisor",
126
+ "allowKind": "allowlist",
127
+ "allowDomains": [
128
+ "sentry.io",
129
+ "preview.claude.ai",
130
+ "downloads.claude.ai",
131
+ "api.anthropic.com",
132
+ "a-cdn.anthropic.com",
133
+ "a-api.anthropic.com",
134
+ "console.anthropic.com",
135
+ "api-staging.anthropic.com",
136
+ "www.anthropic.com",
137
+ "docs.anthropic.com",
138
+ "mcp-proxy.anthropic.com",
139
+ "pivot.claude.ai",
140
+ "support.anthropic.com",
141
+ "assets.claude.ai"
142
+ ]
143
+ },
144
+ "bgEnvStrip": {
145
+ "knownVars": [
146
+ "CLAUDE_CODE_OAUTH_TOKEN",
147
+ "CLAUDE_CODE_SESSION_KIND",
148
+ "CLAUDE_CODE_SESSION_ID",
149
+ "CLAUDE_CODE_SESSION_NAME",
150
+ "CLAUDE_CODE_SESSION_LOG"
151
+ ]
152
+ },
153
+ "$comment": "Platform baseline auto-derived by `cowork-harness sync` from a live Claude Desktop install + app.asar. VOLATILE per-release facts only. Regenerate per release; review the diff. Captured 2026-06-12 on macOS arm64.",
154
+ "capturedAt": "2026-06-17",
155
+ "platform": "darwin-arm64",
156
+ "settings": {
157
+ "autoMountFolders": {
158
+ "key": "autoMountFolders",
159
+ "default": false
160
+ },
161
+ "localAgentModeTrustedFolders": {
162
+ "key": "localAgentModeTrustedFolders",
163
+ "default": []
164
+ }
165
+ },
166
+ "provenance": {
167
+ "asarPath": "/Applications/Claude.app/Contents/Resources/app.asar",
168
+ "asarFingerprint": "88b6968a8a249dbf",
169
+ "gates": {
170
+ "$comment": "Production GrowthBook gate states decoded from ~/Library/Application Support/Claude/fcache (standard interactive Anthropic account, 2026-06-13; binary-verified app.asar 1.12603.1). Pin per release. Behavior-affecting gates the harness models: 1143815894 (loop), 1648655587 (dispatch cap), 1978029737 (web_fetch routing). Telemetry/auth-internal gates omitted.",
171
+ "bridgeSdkTransport:583857784": {
172
+ "on": true,
173
+ "source": "force",
174
+ "value": true,
175
+ "note": "— Cowork uses the SDK-based transport (control protocol), confirming the harness's sdkMcpServers/mcp_message path is the production transport."
176
+ },
177
+ "hostLoop:1143815894": {
178
+ "on": true,
179
+ "source": "force",
180
+ "value": true
181
+ },
182
+ "taskDispatchLimiter:1648655587": {
183
+ "on": true,
184
+ "source": "force",
185
+ "value": {
186
+ "perTask": 1,
187
+ "global": 3
188
+ },
189
+ "note": "perTask=1 global=3 (host-side SKIP: recordSkipAndEmit/GCA.PerTaskLimit — NOT queue/deny). A dispatch session launches <=1 sub-task; <=3 concurrent globally."
190
+ },
191
+ "coworkRuntimeConfig:1978029737": {
192
+ "on": true,
193
+ "source": "force",
194
+ "value": {
195
+ "sessionsBridgePollBlockMs": 30,
196
+ "coworkNativeFilePreview": true,
197
+ "coworkWebFetchViaApi": true,
198
+ "coworkWebFetchPrompt": true,
199
+ "workspaceBashWaitLonger": true
200
+ },
201
+ "note": "coworkWebFetchViaApi=true coworkWebFetchPrompt=true workspaceBashWaitLonger=true sessionsBridgePollBlockMs=30 — web_fetch is host/API-routed (POST /api/organizations/<org>/cowork/web_fetch), NOT container egress; gated by a separate web-fetch hostname allowlist + URL provenance."
202
+ },
203
+ "cliPlugin:2307090146": {
204
+ "on": false,
205
+ "source": "defaultValue",
206
+ "value": false,
207
+ "note": "— the CLI-plugin credential broker is dark-launched off for standard interactive accounts (Ch23/L106)."
208
+ },
209
+ "pluginSyncSparkplug:2340532315": {
210
+ "on": true,
211
+ "source": "force",
212
+ "value": true,
213
+ "note": "— startup syncPlugins(); plugins load via --plugin-dir (registry inert in-VM)."
214
+ }
215
+ },
216
+ "eipcChannelUuid": "4f426349-8d6f-45f3-ae22-280fef323564",
217
+ "$comment": "eipcChannelUuid is per-build; recorded for provenance only — the harness does not use Desktop IPC."
218
+ },
219
+ "requireFullVmSandbox": null
220
+ }
@@ -1,9 +1,7 @@
1
1
  import { spawn } from "node:child_process";
2
- import { existsSync } from "node:fs";
3
2
  import { resolve, join } from "node:path";
4
- import { homedir } from "node:os";
5
3
  import { DEFAULT_MAX_THINKING_TOKENS } from "../types.js";
6
- import { resolveMounts } from "../baseline.js";
4
+ import { resolveMounts, resolveAgentBinary } from "../baseline.js";
7
5
  import { agentArgs, spawnEnv, dockerRunArgv, resolveMaxThinkingTokens } from "./argv.js";
8
6
  import { runtimeAuthEnv } from "./host-env.js";
9
7
  import { stageWorkspace } from "./stage.js";
@@ -60,18 +58,3 @@ export function spawnContainer(scenario, baseline, plan, outDir, sessionId, opts
60
58
  });
61
59
  return spawn(runner, dockerArgs, { stdio: ["pipe", "pipe", "pipe"] });
62
60
  }
63
- /** Resolve the staged Linux agent binary on the host (override: COWORK_AGENT_BINARY). */
64
- function resolveAgentBinary(baseline) {
65
- const override = process.env.COWORK_AGENT_BINARY;
66
- if (override) {
67
- if (!existsSync(override))
68
- throw new Error(`COWORK_AGENT_BINARY not found: ${override}`);
69
- return resolve(override);
70
- }
71
- const staged = (baseline.agentBinary?.stagedPath ?? "").replace(/^~(?=$|\/)/, homedir());
72
- if (!staged || !existsSync(staged)) {
73
- throw new Error(`Staged agent binary not found at "${staged}". It is extracted from your Claude Desktop install ` +
74
- `(claude-code-vm/<ver>/claude). Open Cowork once to stage it, or set COWORK_AGENT_BINARY to its path.`);
75
- }
76
- return resolve(staged);
77
- }
@@ -1,11 +1,10 @@
1
1
  import { warn } from "../io.js";
2
2
  import { spawn } from "node:child_process";
3
- import { existsSync, readFileSync } from "node:fs";
3
+ import { readFileSync } from "node:fs";
4
4
  import { resolve, join } from "node:path";
5
- import { homedir } from "node:os";
6
5
  import { fileURLToPath } from "node:url";
7
6
  import { DEFAULT_MAX_THINKING_TOKENS } from "../types.js";
8
- import { resolveMounts } from "../baseline.js";
7
+ import { resolveMounts, resolveAgentBinary } from "../baseline.js";
9
8
  import { makeWorkspaceHandler } from "../hostloop/workspace-handler.js";
10
9
  import { agentArgs, spawnEnv, dockerRunArgv, resolveMaxThinkingTokens } from "./argv.js";
11
10
  import { runtimeAuthEnv } from "./host-env.js";
@@ -116,16 +115,3 @@ function hostLoopShellSection(vmMnt, appVersion) {
116
115
  .join(vmMnt)
117
116
  .trim();
118
117
  }
119
- function resolveAgentBinary(baseline) {
120
- const override = process.env.COWORK_AGENT_BINARY;
121
- if (override) {
122
- if (!existsSync(override))
123
- throw new Error(`COWORK_AGENT_BINARY not found: ${override}`);
124
- return resolve(override);
125
- }
126
- const staged = (baseline.agentBinary?.stagedPath ?? "").replace(/^~(?=$|\/)/, homedir());
127
- if (!staged || !existsSync(staged)) {
128
- throw new Error(`Staged agent binary not found at "${staged}". Open Cowork once to stage it, or set COWORK_AGENT_BINARY.`);
129
- }
130
- return resolve(staged);
131
- }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cowork-harness",
3
- "version": "0.4.0",
3
+ "version": "0.4.2",
4
4
  "description": "Scriptable, CI-friendly harness for Claude Cowork's runtime contract for testing skills across scenarios — same agent, mounts, egress allowlist, permission protocol, and sandbox limitations.",
5
5
  "license": "MIT",
6
6
  "type": "module",