cowork-harness 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/baselines/desktop-1.13576.1.json +220 -0
- package/dist/runtime/container.js +1 -18
- package/dist/runtime/hostloop.js +2 -16
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,28 @@ All notable changes to this project are documented here. The format is based on
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
## [0.4.2] — 2026-06-18
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- **Platform baseline `desktop-1.13576.1`** — synced from the updated Claude Desktop (the app moved
|
|
14
|
+
`1.12603.1` → `1.13576.1`). `loadBaseline("latest")` now resolves to it. The embedded agent binary is
|
|
15
|
+
unchanged at `2.1.177` (the update changed the app shell + gate states, not the agent ELF); this baseline
|
|
16
|
+
also corrects the prior baselines' stale `2.1.170` agent pin to the actually-staged `2.1.177`. Egress
|
|
17
|
+
allowlist unchanged.
|
|
18
|
+
|
|
19
|
+
## [0.4.1] — 2026-06-18
|
|
20
|
+
|
|
21
|
+
### Fixed
|
|
22
|
+
|
|
23
|
+
- **Agent-binary newest-staged fallback now applies on the real runtime paths** (container / hostloop, and
|
|
24
|
+
thus `skill` / `run` / `chat`), not just `sync`/tests. `resolveAgentBinary` had two private duplicates
|
|
25
|
+
(`container.ts`, `hostloop.ts`) **without** the 0.4.0 fallback, so a host with a newer staged
|
|
26
|
+
`claude-code-vm/<ver>` than the baseline expects still hard-failed with "Staged agent binary not found".
|
|
27
|
+
The duplicates were consolidated into the single exported resolver; a host that has staged a newer build
|
|
28
|
+
now falls back to it (with a warning) instead of failing. A structural test + CI guard prevent the
|
|
29
|
+
resolver from being re-duplicated.
|
|
30
|
+
|
|
9
31
|
## [0.4.0] — 2026-06-18
|
|
10
32
|
|
|
11
33
|
The parsing/validation hardening + safety release: a current-tree code-review sweep plus fidelity and
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
{
|
|
2
|
+
"baselineVersion": 1,
|
|
3
|
+
"appVersion": "1.13576.1",
|
|
4
|
+
"agentVersion": "2.1.177",
|
|
5
|
+
"agentBinary": {
|
|
6
|
+
"stagedPath": "~/Library/Application Support/Claude/claude-code-vm/2.1.177/claude",
|
|
7
|
+
"format": "elf-aarch64"
|
|
8
|
+
},
|
|
9
|
+
"guest": {
|
|
10
|
+
"os": "linux",
|
|
11
|
+
"arch": "arm64",
|
|
12
|
+
"baseImage": "ubuntu:22.04"
|
|
13
|
+
},
|
|
14
|
+
"spawn": {
|
|
15
|
+
"configDirInGuest": "mnt/.claude",
|
|
16
|
+
"settingSources": [
|
|
17
|
+
"user"
|
|
18
|
+
],
|
|
19
|
+
"permissionMode": "default",
|
|
20
|
+
"maxThinkingTokens": 31999,
|
|
21
|
+
"effortDefault": "medium",
|
|
22
|
+
"tools": [
|
|
23
|
+
"Task",
|
|
24
|
+
"Bash",
|
|
25
|
+
"Glob",
|
|
26
|
+
"Grep",
|
|
27
|
+
"Read",
|
|
28
|
+
"Edit",
|
|
29
|
+
"Write",
|
|
30
|
+
"NotebookEdit",
|
|
31
|
+
"WebFetch",
|
|
32
|
+
"TaskCreate",
|
|
33
|
+
"TaskUpdate",
|
|
34
|
+
"TaskGet",
|
|
35
|
+
"TaskList",
|
|
36
|
+
"TaskStop",
|
|
37
|
+
"WebSearch",
|
|
38
|
+
"Skill",
|
|
39
|
+
"REPL",
|
|
40
|
+
"JavaScript",
|
|
41
|
+
"AskUserQuestion",
|
|
42
|
+
"ToolSearch"
|
|
43
|
+
],
|
|
44
|
+
"allowedTools": [
|
|
45
|
+
"Task",
|
|
46
|
+
"Bash",
|
|
47
|
+
"Glob",
|
|
48
|
+
"Grep",
|
|
49
|
+
"Read",
|
|
50
|
+
"Edit",
|
|
51
|
+
"Write",
|
|
52
|
+
"NotebookEdit",
|
|
53
|
+
"WebFetch",
|
|
54
|
+
"TaskCreate",
|
|
55
|
+
"TaskUpdate",
|
|
56
|
+
"TaskGet",
|
|
57
|
+
"TaskList",
|
|
58
|
+
"TaskStop",
|
|
59
|
+
"WebSearch",
|
|
60
|
+
"Skill",
|
|
61
|
+
"REPL",
|
|
62
|
+
"JavaScript",
|
|
63
|
+
"ToolSearch"
|
|
64
|
+
],
|
|
65
|
+
"env": {
|
|
66
|
+
"CLAUDE_CODE_IS_COWORK": "1",
|
|
67
|
+
"CLAUDE_CODE_ENTRYPOINT": "local-agent",
|
|
68
|
+
"CLAUDE_CODE_TAGS": "lam_session_type:chat",
|
|
69
|
+
"CLAUDE_CODE_PROVIDER_MANAGED_BY_HOST": "1",
|
|
70
|
+
"CLAUDE_CODE_ENABLE_ASK_USER_QUESTION_TOOL": "true",
|
|
71
|
+
"CLAUDE_CODE_DISABLE_CRON": "1",
|
|
72
|
+
"CLAUDE_CODE_DISABLE_BACKGROUND_TASKS": "1",
|
|
73
|
+
"CLAUDE_CODE_DISABLE_AGENTS_FLEET": "1",
|
|
74
|
+
"CLAUDE_CODE_ENABLE_APPEND_SUBAGENT_PROMPT": "1",
|
|
75
|
+
"CLAUDE_CODE_ENABLE_TASKS": "true",
|
|
76
|
+
"CLAUDE_CODE_DISABLE_TERMINAL_TITLE": "1",
|
|
77
|
+
"ENABLE_PROMPT_CACHING_1H": "1",
|
|
78
|
+
"DISABLE_MICROCOMPACT": "1",
|
|
79
|
+
"MCP_CONNECTION_NONBLOCKING": "true"
|
|
80
|
+
},
|
|
81
|
+
"promptTemplate": "prompts/desktop-1.12603.1/system-prompt-append.md",
|
|
82
|
+
"subagentAppend": "prompts/desktop-1.12603.1/subagent-append-vm.md",
|
|
83
|
+
"$comment": "Binary-verified Desktop->agent spawn contract (asar 1.12603.1). See docs/cowork-spawn-contract-1.12603.1.md.",
|
|
84
|
+
"$comment_notSet": "Deliberately NOT set: CLAUDE_CODE_USE_COWORK_PLUGINS (Desktop never sets it; would flip the agent to cowork_settings.json/cowork_plugins). Host-derived (TZ, account UUIDs, WORKSPACE_HOST_PATHS, OTEL) injected at runtime, not pinned.",
|
|
85
|
+
"$comment_prompts": "Reconstructed cowork-specific sections (not the full base prompt — not cleanly extractable). The main append is delivered via the --append-system-prompt CLI flag (layered on the agent's built-in base prompt), NOT the initialize handshake; only the subagent append goes over initialize (appendSubagentSystemPrompt), gated on CLAUDE_CODE_ENABLE_APPEND_SUBAGENT_PROMPT."
|
|
86
|
+
},
|
|
87
|
+
"mountLayout": {
|
|
88
|
+
"sessionRoot": "/sessions/{sessionId}",
|
|
89
|
+
"cwd": "/sessions/{sessionId}",
|
|
90
|
+
"mntRoot": "/sessions/{sessionId}/mnt",
|
|
91
|
+
"mounts": [
|
|
92
|
+
{
|
|
93
|
+
"name": "uploads",
|
|
94
|
+
"mountPath": "uploads",
|
|
95
|
+
"mode": "r",
|
|
96
|
+
"purpose": "user-uploaded files (read-only — asar 'ro')"
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
"name": "projects",
|
|
100
|
+
"mountPath": ".projects/{projectId}",
|
|
101
|
+
"mode": "rw",
|
|
102
|
+
"purpose": "selected work folders (a Space) — delete denied by default (asar IX)"
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
"name": "local-plugins",
|
|
106
|
+
"mountPath": ".local-plugins/cache",
|
|
107
|
+
"mode": "r",
|
|
108
|
+
"purpose": "marketplace skills/plugins, runtime-discovered"
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
"name": "remote-plugins",
|
|
112
|
+
"mountPath": ".remote-plugins",
|
|
113
|
+
"mode": "r",
|
|
114
|
+
"purpose": "org-remote plugins, runtime-discovered"
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
"name": "outputs",
|
|
118
|
+
"mountPath": "outputs",
|
|
119
|
+
"mode": "rw",
|
|
120
|
+
"purpose": "session outputs/artifacts — delete denied by default (asar IX); rwd only when approved"
|
|
121
|
+
}
|
|
122
|
+
]
|
|
123
|
+
},
|
|
124
|
+
"network": {
|
|
125
|
+
"mode": "gvisor",
|
|
126
|
+
"allowKind": "allowlist",
|
|
127
|
+
"allowDomains": [
|
|
128
|
+
"sentry.io",
|
|
129
|
+
"preview.claude.ai",
|
|
130
|
+
"downloads.claude.ai",
|
|
131
|
+
"api.anthropic.com",
|
|
132
|
+
"a-cdn.anthropic.com",
|
|
133
|
+
"a-api.anthropic.com",
|
|
134
|
+
"console.anthropic.com",
|
|
135
|
+
"api-staging.anthropic.com",
|
|
136
|
+
"www.anthropic.com",
|
|
137
|
+
"docs.anthropic.com",
|
|
138
|
+
"mcp-proxy.anthropic.com",
|
|
139
|
+
"pivot.claude.ai",
|
|
140
|
+
"support.anthropic.com",
|
|
141
|
+
"assets.claude.ai"
|
|
142
|
+
]
|
|
143
|
+
},
|
|
144
|
+
"bgEnvStrip": {
|
|
145
|
+
"knownVars": [
|
|
146
|
+
"CLAUDE_CODE_OAUTH_TOKEN",
|
|
147
|
+
"CLAUDE_CODE_SESSION_KIND",
|
|
148
|
+
"CLAUDE_CODE_SESSION_ID",
|
|
149
|
+
"CLAUDE_CODE_SESSION_NAME",
|
|
150
|
+
"CLAUDE_CODE_SESSION_LOG"
|
|
151
|
+
]
|
|
152
|
+
},
|
|
153
|
+
"$comment": "Platform baseline auto-derived by `cowork-harness sync` from a live Claude Desktop install + app.asar. VOLATILE per-release facts only. Regenerate per release; review the diff. Captured 2026-06-12 on macOS arm64.",
|
|
154
|
+
"capturedAt": "2026-06-17",
|
|
155
|
+
"platform": "darwin-arm64",
|
|
156
|
+
"settings": {
|
|
157
|
+
"autoMountFolders": {
|
|
158
|
+
"key": "autoMountFolders",
|
|
159
|
+
"default": false
|
|
160
|
+
},
|
|
161
|
+
"localAgentModeTrustedFolders": {
|
|
162
|
+
"key": "localAgentModeTrustedFolders",
|
|
163
|
+
"default": []
|
|
164
|
+
}
|
|
165
|
+
},
|
|
166
|
+
"provenance": {
|
|
167
|
+
"asarPath": "/Applications/Claude.app/Contents/Resources/app.asar",
|
|
168
|
+
"asarFingerprint": "88b6968a8a249dbf",
|
|
169
|
+
"gates": {
|
|
170
|
+
"$comment": "Production GrowthBook gate states decoded from ~/Library/Application Support/Claude/fcache (standard interactive Anthropic account, 2026-06-13; binary-verified app.asar 1.12603.1). Pin per release. Behavior-affecting gates the harness models: 1143815894 (loop), 1648655587 (dispatch cap), 1978029737 (web_fetch routing). Telemetry/auth-internal gates omitted.",
|
|
171
|
+
"bridgeSdkTransport:583857784": {
|
|
172
|
+
"on": true,
|
|
173
|
+
"source": "force",
|
|
174
|
+
"value": true,
|
|
175
|
+
"note": "— Cowork uses the SDK-based transport (control protocol), confirming the harness's sdkMcpServers/mcp_message path is the production transport."
|
|
176
|
+
},
|
|
177
|
+
"hostLoop:1143815894": {
|
|
178
|
+
"on": true,
|
|
179
|
+
"source": "force",
|
|
180
|
+
"value": true
|
|
181
|
+
},
|
|
182
|
+
"taskDispatchLimiter:1648655587": {
|
|
183
|
+
"on": true,
|
|
184
|
+
"source": "force",
|
|
185
|
+
"value": {
|
|
186
|
+
"perTask": 1,
|
|
187
|
+
"global": 3
|
|
188
|
+
},
|
|
189
|
+
"note": "perTask=1 global=3 (host-side SKIP: recordSkipAndEmit/GCA.PerTaskLimit — NOT queue/deny). A dispatch session launches <=1 sub-task; <=3 concurrent globally."
|
|
190
|
+
},
|
|
191
|
+
"coworkRuntimeConfig:1978029737": {
|
|
192
|
+
"on": true,
|
|
193
|
+
"source": "force",
|
|
194
|
+
"value": {
|
|
195
|
+
"sessionsBridgePollBlockMs": 30,
|
|
196
|
+
"coworkNativeFilePreview": true,
|
|
197
|
+
"coworkWebFetchViaApi": true,
|
|
198
|
+
"coworkWebFetchPrompt": true,
|
|
199
|
+
"workspaceBashWaitLonger": true
|
|
200
|
+
},
|
|
201
|
+
"note": "coworkWebFetchViaApi=true coworkWebFetchPrompt=true workspaceBashWaitLonger=true sessionsBridgePollBlockMs=30 — web_fetch is host/API-routed (POST /api/organizations/<org>/cowork/web_fetch), NOT container egress; gated by a separate web-fetch hostname allowlist + URL provenance."
|
|
202
|
+
},
|
|
203
|
+
"cliPlugin:2307090146": {
|
|
204
|
+
"on": false,
|
|
205
|
+
"source": "defaultValue",
|
|
206
|
+
"value": false,
|
|
207
|
+
"note": "— the CLI-plugin credential broker is dark-launched off for standard interactive accounts (Ch23/L106)."
|
|
208
|
+
},
|
|
209
|
+
"pluginSyncSparkplug:2340532315": {
|
|
210
|
+
"on": true,
|
|
211
|
+
"source": "force",
|
|
212
|
+
"value": true,
|
|
213
|
+
"note": "— startup syncPlugins(); plugins load via --plugin-dir (registry inert in-VM)."
|
|
214
|
+
}
|
|
215
|
+
},
|
|
216
|
+
"eipcChannelUuid": "4f426349-8d6f-45f3-ae22-280fef323564",
|
|
217
|
+
"$comment": "eipcChannelUuid is per-build; recorded for provenance only — the harness does not use Desktop IPC."
|
|
218
|
+
},
|
|
219
|
+
"requireFullVmSandbox": null
|
|
220
|
+
}
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
import { spawn } from "node:child_process";
|
|
2
|
-
import { existsSync } from "node:fs";
|
|
3
2
|
import { resolve, join } from "node:path";
|
|
4
|
-
import { homedir } from "node:os";
|
|
5
3
|
import { DEFAULT_MAX_THINKING_TOKENS } from "../types.js";
|
|
6
|
-
import { resolveMounts } from "../baseline.js";
|
|
4
|
+
import { resolveMounts, resolveAgentBinary } from "../baseline.js";
|
|
7
5
|
import { agentArgs, spawnEnv, dockerRunArgv, resolveMaxThinkingTokens } from "./argv.js";
|
|
8
6
|
import { runtimeAuthEnv } from "./host-env.js";
|
|
9
7
|
import { stageWorkspace } from "./stage.js";
|
|
@@ -60,18 +58,3 @@ export function spawnContainer(scenario, baseline, plan, outDir, sessionId, opts
|
|
|
60
58
|
});
|
|
61
59
|
return spawn(runner, dockerArgs, { stdio: ["pipe", "pipe", "pipe"] });
|
|
62
60
|
}
|
|
63
|
-
/** Resolve the staged Linux agent binary on the host (override: COWORK_AGENT_BINARY). */
|
|
64
|
-
function resolveAgentBinary(baseline) {
|
|
65
|
-
const override = process.env.COWORK_AGENT_BINARY;
|
|
66
|
-
if (override) {
|
|
67
|
-
if (!existsSync(override))
|
|
68
|
-
throw new Error(`COWORK_AGENT_BINARY not found: ${override}`);
|
|
69
|
-
return resolve(override);
|
|
70
|
-
}
|
|
71
|
-
const staged = (baseline.agentBinary?.stagedPath ?? "").replace(/^~(?=$|\/)/, homedir());
|
|
72
|
-
if (!staged || !existsSync(staged)) {
|
|
73
|
-
throw new Error(`Staged agent binary not found at "${staged}". It is extracted from your Claude Desktop install ` +
|
|
74
|
-
`(claude-code-vm/<ver>/claude). Open Cowork once to stage it, or set COWORK_AGENT_BINARY to its path.`);
|
|
75
|
-
}
|
|
76
|
-
return resolve(staged);
|
|
77
|
-
}
|
package/dist/runtime/hostloop.js
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
import { warn } from "../io.js";
|
|
2
2
|
import { spawn } from "node:child_process";
|
|
3
|
-
import {
|
|
3
|
+
import { readFileSync } from "node:fs";
|
|
4
4
|
import { resolve, join } from "node:path";
|
|
5
|
-
import { homedir } from "node:os";
|
|
6
5
|
import { fileURLToPath } from "node:url";
|
|
7
6
|
import { DEFAULT_MAX_THINKING_TOKENS } from "../types.js";
|
|
8
|
-
import { resolveMounts } from "../baseline.js";
|
|
7
|
+
import { resolveMounts, resolveAgentBinary } from "../baseline.js";
|
|
9
8
|
import { makeWorkspaceHandler } from "../hostloop/workspace-handler.js";
|
|
10
9
|
import { agentArgs, spawnEnv, dockerRunArgv, resolveMaxThinkingTokens } from "./argv.js";
|
|
11
10
|
import { runtimeAuthEnv } from "./host-env.js";
|
|
@@ -116,16 +115,3 @@ function hostLoopShellSection(vmMnt, appVersion) {
|
|
|
116
115
|
.join(vmMnt)
|
|
117
116
|
.trim();
|
|
118
117
|
}
|
|
119
|
-
function resolveAgentBinary(baseline) {
|
|
120
|
-
const override = process.env.COWORK_AGENT_BINARY;
|
|
121
|
-
if (override) {
|
|
122
|
-
if (!existsSync(override))
|
|
123
|
-
throw new Error(`COWORK_AGENT_BINARY not found: ${override}`);
|
|
124
|
-
return resolve(override);
|
|
125
|
-
}
|
|
126
|
-
const staged = (baseline.agentBinary?.stagedPath ?? "").replace(/^~(?=$|\/)/, homedir());
|
|
127
|
-
if (!staged || !existsSync(staged)) {
|
|
128
|
-
throw new Error(`Staged agent binary not found at "${staged}". Open Cowork once to stage it, or set COWORK_AGENT_BINARY.`);
|
|
129
|
-
}
|
|
130
|
-
return resolve(staged);
|
|
131
|
-
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cowork-harness",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.2",
|
|
4
4
|
"description": "Scriptable, CI-friendly harness for Claude Cowork's runtime contract for testing skills across scenarios — same agent, mounts, egress allowlist, permission protocol, and sandbox limitations.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "module",
|