claude-overnight 1.25.38 → 1.25.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_version.d.ts +1 -1
- package/dist/_version.js +1 -1
- package/dist/coach.js +23 -9
- package/dist/index.js +35 -6
- package/dist/planner-query.js +42 -0
- package/dist/providers.js +5 -0
- package/dist/steering.js +15 -2
- package/dist/swarm.js +25 -2
- package/docs/PROXIED_FAST_MODEL_RESEARCH.md +403 -0
- package/package.json +2 -2
- package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1
- package/plugins/claude-overnight/skills/coach/SKILL.md +11 -10
package/dist/_version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const VERSION = "1.25.
|
|
1
|
+
export declare const VERSION = "1.25.41";
|
package/dist/_version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
// Auto-generated by build — do not edit manually.
|
|
2
|
-
export const VERSION = "1.25.
|
|
2
|
+
export const VERSION = "1.25.41";
|
package/dist/coach.js
CHANGED
|
@@ -5,9 +5,10 @@ import { execSync } from "child_process";
|
|
|
5
5
|
import { homedir } from "os";
|
|
6
6
|
import chalk from "chalk";
|
|
7
7
|
import { runPlannerQuery, attemptJsonParse } from "./planner-query.js";
|
|
8
|
+
import { renderWaitingIndicator } from "./render.js";
|
|
8
9
|
import { createTurn, beginTurn, endTurn } from "./turns.js";
|
|
9
10
|
import { selectKey, ask } from "./cli.js";
|
|
10
|
-
import { envFor } from "./providers.js";
|
|
11
|
+
import { envFor, isCursorProxyProvider, ensureCursorProxyRunning, PROXY_DEFAULT_URL } from "./providers.js";
|
|
11
12
|
// ── URL fetching for plan links in the objective ──
|
|
12
13
|
const URL_REGEX = /https?:\/\/[^\s<>"{}|\\^`\[\]]+/g;
|
|
13
14
|
async function fetchUrlContent(url, timeoutMs = 5_000) {
|
|
@@ -56,7 +57,7 @@ export function saveUserSettings(s) {
|
|
|
56
57
|
}
|
|
57
58
|
// ── Coach model (separate from DEFAULT_MODEL so the coach can stay cheap) ──
|
|
58
59
|
export const COACH_MODEL = "claude-haiku-4-5";
|
|
59
|
-
const COACH_TIMEOUT_MS =
|
|
60
|
+
const COACH_TIMEOUT_MS = 60_000;
|
|
60
61
|
const COACH_SOFT_STATUS_MS = 5_000;
|
|
61
62
|
// ── Raw schema matching the SKILL.md invocation contract ──
|
|
62
63
|
const COACH_SCHEMA = {
|
|
@@ -342,18 +343,31 @@ export async function runSetupCoach(rawObjective, cwd, ctx) {
|
|
|
342
343
|
}
|
|
343
344
|
const userMessage = renderRepoFacts(facts, rawObjective, ctx.providers, ctx.cliFlags, planContent);
|
|
344
345
|
const prompt = `${skill}\n\n---\n\n${userMessage}\n\nRespond with the JSON object defined in "Invocation contract" only.`;
|
|
345
|
-
|
|
346
|
+
// cursor "auto" maps to a slow thinking-class model for large prompts (182s observed).
|
|
347
|
+
// composer-2-fast gives the same quality for structured JSON at ~8s.
|
|
348
|
+
const CURSOR_FAST_MODEL = "composer-2-fast";
|
|
349
|
+
let model = ctx.coachModel ?? COACH_MODEL;
|
|
346
350
|
const startedAt = Date.now();
|
|
347
351
|
const spinner = setInterval(() => {
|
|
348
|
-
const
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
+
const indicator = renderWaitingIndicator("coach", startedAt, { style: "thinking" });
|
|
353
|
+
process.stdout.write(`\x1B[2K\r ${indicator}`);
|
|
354
|
+
}, 120);
|
|
355
|
+
if (ctx.coachProvider && isCursorProxyProvider(ctx.coachProvider)) {
|
|
356
|
+
const proxyUrl = ctx.coachProvider.baseURL || PROXY_DEFAULT_URL;
|
|
357
|
+
const proxyUp = await ensureCursorProxyRunning(proxyUrl);
|
|
358
|
+
if (!proxyUp) {
|
|
359
|
+
clearInterval(spinner);
|
|
360
|
+
process.stdout.write(`\x1B[2K\r`);
|
|
361
|
+
console.log(chalk.dim(" coach skipped: proxy failed to start"));
|
|
362
|
+
return null;
|
|
363
|
+
}
|
|
364
|
+
if (model === "auto")
|
|
365
|
+
model = CURSOR_FAST_MODEL;
|
|
366
|
+
}
|
|
352
367
|
let raw;
|
|
353
368
|
const turn = createTurn("coach", "Coach", "coach-0", model);
|
|
354
369
|
beginTurn(turn);
|
|
355
370
|
try {
|
|
356
|
-
const coachEnv = ctx.coachProvider ? envFor(ctx.coachProvider) : undefined;
|
|
357
371
|
const queryPromise = runPlannerQuery(prompt, {
|
|
358
372
|
cwd,
|
|
359
373
|
model,
|
|
@@ -362,7 +376,7 @@ export async function runSetupCoach(rawObjective, cwd, ctx) {
|
|
|
362
376
|
transcriptName: "coach",
|
|
363
377
|
maxTurns: 3,
|
|
364
378
|
tools: [],
|
|
365
|
-
env:
|
|
379
|
+
env: ctx.coachProvider ? envFor(ctx.coachProvider) : undefined,
|
|
366
380
|
turnId: turn.id,
|
|
367
381
|
}, () => { });
|
|
368
382
|
const timeout = new Promise((_, reject) => {
|
package/dist/index.js
CHANGED
|
@@ -843,20 +843,36 @@ async function main() {
|
|
|
843
843
|
* preflight now also runs a write-capability probe (see probeCursorWriteCapability) that
|
|
844
844
|
* asks cursor to Bash a marker file — so the total budget must cover auth ping + write turn. */
|
|
845
845
|
const preflightMs = (p) => isCursorProxyProvider(p) ? 90_000 : 20_000;
|
|
846
|
-
|
|
846
|
+
// Cursor's composer-2 pipeline intermittently stalls for 100s+ on a write-tool turn
|
|
847
|
+
// even though the tool succeeded (proxy logs it as "SLOW response"). A single retry
|
|
848
|
+
// almost always clears it — so we retry once on timeout-style failures for cursor
|
|
849
|
+
// proxy providers before giving up.
|
|
850
|
+
const isTimeoutError = (err) => /^timeout after /.test(err) || /: timeout after /.test(err);
|
|
851
|
+
const runPreflight = async (role, p) => {
|
|
847
852
|
statuses.set(role, "connecting…");
|
|
848
853
|
renderStatus();
|
|
849
|
-
|
|
854
|
+
let result = await preflightProvider(p, cwd, preflightMs(p), {
|
|
850
855
|
onProgress: (msg) => { statuses.set(role, msg); renderStatus(); },
|
|
851
856
|
});
|
|
857
|
+
if (!result.ok && isCursorProxyProvider(p) && isTimeoutError(result.error)) {
|
|
858
|
+
statuses.set(role, "retrying after timeout…");
|
|
859
|
+
renderStatus();
|
|
860
|
+
result = await preflightProvider(p, cwd, preflightMs(p), {
|
|
861
|
+
onProgress: (msg) => { statuses.set(role, `retry: ${msg}`); renderStatus(); },
|
|
862
|
+
});
|
|
863
|
+
}
|
|
852
864
|
statuses.delete(role);
|
|
853
865
|
renderStatus();
|
|
854
866
|
return { role, provider: p, result };
|
|
855
|
-
}
|
|
867
|
+
};
|
|
868
|
+
const results = await Promise.all(pending.map(([role, p]) => runPreflight(role, p)));
|
|
856
869
|
clearStatusLine();
|
|
870
|
+
let fastDegraded = false;
|
|
857
871
|
for (const { role, provider, result } of results) {
|
|
858
872
|
if (!result.ok) {
|
|
859
|
-
|
|
873
|
+
const degradable = role === "fast";
|
|
874
|
+
const prefix = degradable ? chalk.yellow(` ⚠ ${role} preflight failed`) : chalk.red(` ✗ ${role} preflight failed`);
|
|
875
|
+
console.error(`${prefix}: ${chalk.dim(result.error)}`);
|
|
860
876
|
if (isCursorProxyProvider(provider)) {
|
|
861
877
|
const tail = readCursorProxyLogTail(25);
|
|
862
878
|
if (tail) {
|
|
@@ -865,16 +881,29 @@ async function main() {
|
|
|
865
881
|
console.error(chalk.dim(` ${line}`));
|
|
866
882
|
}
|
|
867
883
|
const cmd = bundledComposerProxyShellCommand();
|
|
868
|
-
|
|
884
|
+
const proxyUrl = provider.baseURL || PROXY_DEFAULT_URL;
|
|
885
|
+
console.error(chalk.yellow(` The proxy at ${proxyUrl} may have crashed or timed out (e.g. keychain/UI). Retry, or start the bundled proxy: ${cmd ?? "npm install in the claude-overnight package, then re-run"}`));
|
|
869
886
|
}
|
|
870
|
-
else {
|
|
887
|
+
else if (!degradable) {
|
|
871
888
|
console.error(chalk.red(` Fix the provider at ~/.claude/claude-overnight/providers.json and retry.`));
|
|
872
889
|
}
|
|
890
|
+
if (degradable) {
|
|
891
|
+
console.error(chalk.yellow(` Continuing without fast — fast-eligible tasks will run on the worker model instead.`));
|
|
892
|
+
console.error("");
|
|
893
|
+
fastDegraded = true;
|
|
894
|
+
continue;
|
|
895
|
+
}
|
|
873
896
|
console.error("");
|
|
874
897
|
process.exit(1);
|
|
875
898
|
}
|
|
876
899
|
console.log(` ${chalk.green(`✓ ${role} ready`)} ${chalk.dim(`· ${provider.displayName} · ${provider.model}`)}`);
|
|
877
900
|
}
|
|
901
|
+
if (fastDegraded) {
|
|
902
|
+
fastModel = undefined;
|
|
903
|
+
fastProvider = undefined;
|
|
904
|
+
const rebuilt = buildEnvResolver({ plannerModel, plannerProvider, workerModel, workerProvider, fastModel, fastProvider });
|
|
905
|
+
setPlannerEnvResolver(rebuilt);
|
|
906
|
+
}
|
|
878
907
|
}
|
|
879
908
|
if (nonInteractive) {
|
|
880
909
|
const capStr = usageCap != null ? ` cap=${Math.round(usageCap * 100)}%` : "";
|
package/dist/planner-query.js
CHANGED
|
@@ -81,7 +81,49 @@ async function throttlePlanner(onLog, aborted) {
|
|
|
81
81
|
const NUDGE_MS = 15 * 60 * 1000;
|
|
82
82
|
const HARD_TIMEOUT_MS = 30 * 60 * 1000;
|
|
83
83
|
const WALL_CLOCK_LIMIT_MS = 45 * 60 * 1000;
|
|
84
|
+
// ── Cursor proxy: direct HTTP bypass ──
|
|
85
|
+
//
|
|
86
|
+
// When the env routes to a cursor proxy (CURSOR_API_KEY present, no ANTHROPIC_API_KEY),
|
|
87
|
+
// the claude-agent-sdk wrapper is harmful, not helpful:
|
|
88
|
+
// - The SDK spawns a `claude` subprocess that makes 4+ sequential HTTP calls to the proxy.
|
|
89
|
+
// - Each call spawns a fresh cursor-agent subprocess (~15s overhead each).
|
|
90
|
+
// - Total: 4 × 15s = 60s for what should be a single 10-15s completion.
|
|
91
|
+
// The SDK features (local tool loop, session resume, rate-limit headers) do not apply:
|
|
92
|
+
// - cursor-agent runs its own internal tool loop; local tool_use never fires.
|
|
93
|
+
// - cursor proxy doesn't expose session IDs or rate-limit headers.
|
|
94
|
+
// One direct POST is always correct and 4-10× faster.
|
|
95
|
+
function isCursorProxyEnv(env) {
|
|
96
|
+
return !!env?.CURSOR_API_KEY && !env?.ANTHROPIC_API_KEY;
|
|
97
|
+
}
|
|
98
|
+
async function runViaDirectFetch(prompt, opts, onLog) {
|
|
99
|
+
const env = opts.env ?? _envResolver?.(opts.model);
|
|
100
|
+
const baseUrl = (env?.ANTHROPIC_BASE_URL ?? "http://127.0.0.1:8765").replace(/\/$/, "");
|
|
101
|
+
const authToken = env?.ANTHROPIC_AUTH_TOKEN ?? "";
|
|
102
|
+
const MAX_RETRIES = 3;
|
|
103
|
+
const BACKOFF = [30_000, 60_000, 120_000];
|
|
104
|
+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
105
|
+
const res = await fetch(`${baseUrl}/v1/messages`, {
|
|
106
|
+
method: "POST",
|
|
107
|
+
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${authToken}` },
|
|
108
|
+
body: JSON.stringify({ model: opts.model, max_tokens: 8192, messages: [{ role: "user", content: prompt }] }),
|
|
109
|
+
});
|
|
110
|
+
if (res.status === 429 && attempt < MAX_RETRIES) {
|
|
111
|
+
const waitMs = BACKOFF[attempt];
|
|
112
|
+
onLog(`Cursor proxy rate limited — waiting ${Math.round(waitMs / 1000)}s`, "event");
|
|
113
|
+
await new Promise(r => setTimeout(r, waitMs));
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
if (!res.ok)
|
|
117
|
+
throw new Error(`Cursor proxy ${res.status}: ${(await res.text().catch(() => ""))}`);
|
|
118
|
+
const data = await res.json();
|
|
119
|
+
return data.content?.[0]?.text ?? "";
|
|
120
|
+
}
|
|
121
|
+
throw new Error("Cursor proxy direct fetch failed after retries");
|
|
122
|
+
}
|
|
84
123
|
export async function runPlannerQuery(prompt, opts, onLog) {
|
|
124
|
+
const env = opts.env ?? _envResolver?.(opts.model);
|
|
125
|
+
if (isCursorProxyEnv(env))
|
|
126
|
+
return runViaDirectFetch(prompt, opts, onLog);
|
|
85
127
|
const MAX_RETRIES = 3;
|
|
86
128
|
const BACKOFF = [30_000, 60_000, 120_000];
|
|
87
129
|
let currentPrompt = prompt;
|
package/dist/providers.js
CHANGED
|
@@ -1011,6 +1011,11 @@ async function startProxyProcess(baseUrl, url, port) {
|
|
|
1011
1011
|
// cursor-composer chat-only mode fakes HOME to a temp dir; on macOS the agent still waits on
|
|
1012
1012
|
// Keychain (~30s) for `cursor-user` despite CURSOR_API_KEY. Use the real workspace profile.
|
|
1013
1013
|
CURSOR_BRIDGE_CHAT_ONLY_WORKSPACE: "false",
|
|
1014
|
+
// Broad base so per-request `X-Cursor-Workspace` headers (set from each
|
|
1015
|
+
// agent's cwd in swarm.ts) validate under the proxy's `resolveWorkspace`
|
|
1016
|
+
// check. Without this, proxied agents in worktrees all resolve to the
|
|
1017
|
+
// proxy's startup cwd.
|
|
1018
|
+
CURSOR_BRIDGE_WORKSPACE: "/",
|
|
1014
1019
|
};
|
|
1015
1020
|
if (sysNode && agentJs) {
|
|
1016
1021
|
proxyEnv.CURSOR_AGENT_NODE = sysNode;
|
package/dist/steering.js
CHANGED
|
@@ -89,6 +89,9 @@ You have full creative freedom. Design the wave that will have the highest impac
|
|
|
89
89
|
**Polish** -- Agents focus purely on feel: loading states, error messages, micro-interactions, empty states, responsiveness. Not features -- the texture that makes users trust the product.
|
|
90
90
|
Example: 2 agents, one on happy paths, one on error/edge states
|
|
91
91
|
|
|
92
|
+
**Simplify** -- A fast model agent reviews recent changes and cleans them up. Set "model": "fast".
|
|
93
|
+
Example: 1 fast agent reviews files changed in the last wave, runs git diff, removes bloat
|
|
94
|
+
|
|
92
95
|
You can combine these. A wave can have 3 execute agents + 1 verification agent. Or 2 divergent explorers. Whatever the situation calls for.
|
|
93
96
|
|
|
94
97
|
For non-execute tasks (critique, verify, user-test, synthesize), tell agents to write their output to files in the run directory so findings persist for future waves. Use paths like: .claude-overnight/latest/reflections/wave-N-{topic}.md or .claude-overnight/latest/verifications/wave-N-{topic}.md.
|
|
@@ -111,8 +114,18 @@ Respond with ONLY a JSON object (no markdown fences):
|
|
|
111
114
|
|
|
112
115
|
"estimatedSessionsRemaining" is REQUIRED. Your best honest estimate of how many MORE agent sessions (beyond the wave you just composed above) are needed to reach 'amazing' -- include follow-up fixes, polish, verification, and anything else you'd want before shipping. Be realistic, not optimistic. Use 0 only if truly done.
|
|
113
116
|
|
|
114
|
-
The "model" field on each task
|
|
115
|
-
|
|
117
|
+
The "model" field on each task — pick based on the task's scope and risk:
|
|
118
|
+
|
|
119
|
+
**Use "fast" (${fastModel ?? "not set"})** for well-scoped, mechanical tasks where speed matters more than deep reasoning. The next wave's worker pass will catch and fix any issues:
|
|
120
|
+
- Single-file edits, refactors, renames
|
|
121
|
+
- Read/research: scan files, summarize findings
|
|
122
|
+
- Build checks, postcondition verification
|
|
123
|
+
- E2E test runs with concrete steps
|
|
124
|
+
- Simple critiques, polish tweaks
|
|
125
|
+
|
|
126
|
+
**Use "worker" (${workerModel})** for multi-file features, complex logic, architectural changes, and any task where model quality matters.
|
|
127
|
+
|
|
128
|
+
Set "noWorktree": true for verify/user-test tasks -- they need the real project directory with env files, dependencies, and local config.
|
|
116
129
|
|
|
117
130
|
OPTIONAL "postcondition": a single shell one-liner that exits 0 when the task is truly done. The framework runs it after merge; if it fails, the agent's "no-op" claim is rejected and the task is retried with the failure output as context. Use it whenever the task has a concrete, machine-checkable outcome. Examples: \`test -f src/tracking/watchlist-poller.ts && grep -q "runWatchlistPoll" src/tracking/watchlist-poller.ts\`, \`grep -q "watchlistPollerTask" src/scraper/scheduler.ts\`, \`pnpm run build\`, \`diff -q src/public/index.html frontend/dist/index.html\`. Keep it cheap (sub-second, no network). Omit for exploratory/research tasks where there is no crisp check.
|
|
118
131
|
|
package/dist/swarm.js
CHANGED
|
@@ -6,6 +6,27 @@ import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
|
6
6
|
import { NudgeError, RATE_LIMIT_WINDOW_SHORT, extractToolTarget, sumUsageTokens } from "./types.js";
|
|
7
7
|
import { gitExec, autoCommit, mergeAllBranches, warnDirtyTree, cleanStaleWorktrees, writeSwarmLog } from "./merge.js";
|
|
8
8
|
import { ensureCursorProxyRunning, PROXY_DEFAULT_URL } from "./providers.js";
|
|
9
|
+
/**
|
|
10
|
+
* Proxied Cursor models ignore SDK `cwd` and use their own workspace
|
|
11
|
+
* resolution. Inject `X-Cursor-Workspace` via ANTHROPIC_CUSTOM_HEADERS so the
|
|
12
|
+
* proxy's per-request workspace override points at this agent's cwd.
|
|
13
|
+
* Requires the proxy to run with `CURSOR_BRIDGE_WORKSPACE=/` (or a parent of
|
|
14
|
+
* all worktree paths) so the header value passes the safety check.
|
|
15
|
+
*/
|
|
16
|
+
function withCursorWorkspaceHeader(env, cwd) {
|
|
17
|
+
if (!env)
|
|
18
|
+
return undefined;
|
|
19
|
+
if (env.ANTHROPIC_BASE_URL !== PROXY_DEFAULT_URL)
|
|
20
|
+
return env;
|
|
21
|
+
const hdr = `X-Cursor-Workspace: ${cwd}`;
|
|
22
|
+
const existing = env.ANTHROPIC_CUSTOM_HEADERS?.trim();
|
|
23
|
+
return {
|
|
24
|
+
...env,
|
|
25
|
+
ANTHROPIC_CUSTOM_HEADERS: existing
|
|
26
|
+
? `${existing}\n${hdr}`
|
|
27
|
+
: hdr,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
9
30
|
import { getModelCapability } from "./models.js";
|
|
10
31
|
import { createTurn, beginTurn, endTurn, updateTurn } from "./turns.js";
|
|
11
32
|
const SIMPLIFY_PROMPT = `You just finished your task. Now review and simplify your changes.
|
|
@@ -561,7 +582,7 @@ export class Swarm {
|
|
|
561
582
|
? `You are working in an isolated git worktree. Focus only on this task. Do NOT commit your changes -- the framework handles that.\n\n${preamble}${task.prompt}${postBlock}`
|
|
562
583
|
: `${preamble}${task.prompt}${postBlock}`;
|
|
563
584
|
const effectiveModel = task.model || this.config.model;
|
|
564
|
-
const envOverride = this.config.envForModel?.(effectiveModel);
|
|
585
|
+
const envOverride = withCursorWorkspaceHeader(this.config.envForModel?.(effectiveModel), agentCwd);
|
|
565
586
|
const agentQuery = query({
|
|
566
587
|
prompt: agentPrompt,
|
|
567
588
|
options: {
|
|
@@ -786,7 +807,9 @@ Respond with JSON: {"keep": true/false, "reason": "brief explanation"}`;
|
|
|
786
807
|
allowDangerouslySkipPermissions: true,
|
|
787
808
|
maxTurns: 1,
|
|
788
809
|
persistSession: false,
|
|
789
|
-
...(envFor?.(evalModel) && {
|
|
810
|
+
...(envFor?.(evalModel) && {
|
|
811
|
+
env: withCursorWorkspaceHeader(envFor(evalModel), this.config.cwd),
|
|
812
|
+
}),
|
|
790
813
|
},
|
|
791
814
|
});
|
|
792
815
|
this.activeQueries.add(eq);
|
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
# Proxied fast-model research — Skills, tool_use, workspace, and cursor-native translation
|
|
2
|
+
|
|
3
|
+
Session date: 2026-04-18. Status: **research notes, no code changes yet.** Picks up where `CURSOR_PROXY_MACOS_DISCOVERY.md` left off.
|
|
4
|
+
|
|
5
|
+
Goal: understand what happens when a proxied Cursor model (composer-2-fast via cursor-composer-in-claude) is dispatched through the Agent SDK's `query()` — specifically whether Anthropic skills and tool-use introspection work, and what would be needed to make proxied fast models feel "just like another endpoint" (qwen-style).
|
|
6
|
+
|
|
7
|
+
## TL;DR findings
|
|
8
|
+
|
|
9
|
+
1. **Proxied fast models cannot invoke the Skill tool.** Not a phrasing issue — cursor-agent has its own hardcoded tool loop and treats SDK-provided tools (Skill, Task, sub-Agent, etc.) as text context only.
|
|
10
|
+
2. **Zero `tool_use` content blocks surface to the SDK.** cursor-agent emits rich `tool_call` events in its `stream-json` output, but the proxy's `cli-stream-parser.ts` only parses `type:"assistant"` blocks with nested `part.type==="tool_use"`. It drops every `tool_call` event on the floor. ~30 LOC fix.
|
|
11
|
+
3. **SDK `cwd` option is ignored** by cursor-agent. Needs per-request `X-Cursor-Workspace` header (already supported by the proxy) + `CURSOR_BRIDGE_WORKSPACE=/` (or broad enough base) for worktree isolation with proxied agents.
|
|
12
|
+
4. **Proxy version floor is 0.9.4.** v0.9.2 forced `--mode ask` (read-only); fixed in 0.9.3 but 0.9.3 was never published. `npm install cursor-composer-in-claude@0.9.4` gets agent-mode default.
|
|
13
|
+
5. **The cloud endpoint is `https://agentn.global.api5.cursor.sh/agent.v1.AgentService/Run`** — HTTP/2 + protobuf, not JSON. It's an *agent* endpoint, not a pure model endpoint.
|
|
14
|
+
6. **Cursor-native rules work perfectly as skill equivalents.** `.cursor/rules/*.mdc` files with frontmatter are discovered, read, and followed by cursor-agent verbatim — including slash-command invocation like `/simplify`.
|
|
15
|
+
|
|
16
|
+
## Baseline: what works vs doesn't
|
|
17
|
+
|
|
18
|
+
| | Haiku 4.5 direct | composer-2-fast via proxy (0.9.4) |
|
|
19
|
+
|---|---|---|
|
|
20
|
+
| `/simplify` Skill invocation | ✅ 12 tool calls, follows skill recipe (3 parallel review agents) | ❌ model says "Skill tool isn't wired up in this session" |
|
|
21
|
+
| File actually simplified | ✅ | ✅ (done inline via cursor-agent's internal tools) |
|
|
22
|
+
| `tool_use` blocks surface to SDK | ✅ Read, Edit, Bash, Agent visible | ❌ zero — everything is invisible |
|
|
23
|
+
| `cwd: <path>` option | ✅ respected | ❌ cursor-agent uses its own workspace resolution |
|
|
24
|
+
| Cost | $0.21 | $0.068 (≈3× cheaper) |
|
|
25
|
+
| Duration | 41s | 24–43s |
|
|
26
|
+
|
|
27
|
+
## How I tested
|
|
28
|
+
|
|
29
|
+
All probes in `/tmp/simplify-probe/` (scratch dir, not committed). Created a trivial messy TypeScript file:
|
|
30
|
+
|
|
31
|
+
```ts
|
|
32
|
+
export function add(a: number, b: number): number {
|
|
33
|
+
const result: number = a + b;
|
|
34
|
+
return result;
|
|
35
|
+
}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Then spawned `query()` from `@anthropic-ai/claude-agent-sdk` with different model/env combinations, each time asking it to simplify the file.
|
|
39
|
+
|
|
40
|
+
### 1. Haiku 4.5 direct (baseline)
|
|
41
|
+
|
|
42
|
+
```js
|
|
43
|
+
const agent = query({
|
|
44
|
+
prompt: "Please run /simplify on messy.ts in the current directory.",
|
|
45
|
+
options: { cwd: "/tmp/simplify-probe", model: "claude-haiku-4-5-20251001", permissionMode: "bypassPermissions" },
|
|
46
|
+
});
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
- **Result:** invoked `Skill({skill:"simplify", args:"messy.ts"})` on turn 1, then launched 3 parallel `general-purpose` subagents (reuse/quality/efficiency), then edited.
|
|
50
|
+
- **Tool calls surfaced:** Skill, Read (×2), Bash (×3), Agent (×3), Edit (×1).
|
|
51
|
+
- **Cost / time:** $0.21 / 41s.
|
|
52
|
+
|
|
53
|
+
### 2. composer-2-fast via cursor-composer-in-claude (v0.9.2 — broken)
|
|
54
|
+
|
|
55
|
+
Symptoms that led us to debug:
|
|
56
|
+
- Text reply: *"### Ask mode — I can't run `/simplify` or change messy.ts from here. That needs Agent mode."*
|
|
57
|
+
- Zero tool calls.
|
|
58
|
+
- File not modified.
|
|
59
|
+
- Looked for the file in the proxy's startup cwd, not the SDK's.
|
|
60
|
+
|
|
61
|
+
Root cause (from `cursor-composer-in-claude/CHANGELOG.md` 0.9.3):
|
|
62
|
+
|
|
63
|
+
> `--mode agent` is now the default — Previously the proxy always appended `--mode <plan|ask>` to every cursor-agent invocation. Current cursor-agent treats both as strictly read-only (Write/Bash calls are silently dropped, exit 0 with empty stdout).
|
|
64
|
+
|
|
65
|
+
Fix: `npm install cursor-composer-in-claude@0.9.4`. The package.json already pins `^0.9.4` but our `node_modules` had stale 0.9.2.
|
|
66
|
+
|
|
67
|
+
### 3. composer-2-fast via v0.9.4 (now agent-mode default)
|
|
68
|
+
|
|
69
|
+
Model now does real work but:
|
|
70
|
+
- Edits `src/__tests__/simplify-target.ts` in the claude-overnight repo instead of `/tmp/simplify-probe/messy.ts`, because it resolves cwd from the proxy's startup dir, not the SDK's `cwd: "/tmp/simplify-probe"` option. **Real bug for claude-overnight worktree isolation.**
|
|
71
|
+
- Still zero `tool_use` blocks surfaced. File changes happen through cursor-agent's internal Write tool and don't bubble up.
|
|
72
|
+
|
|
73
|
+
### 4. composer-2-fast with workspace header (the fix)
|
|
74
|
+
|
|
75
|
+
```js
|
|
76
|
+
const env = envFor(p);
|
|
77
|
+
env.ANTHROPIC_CUSTOM_HEADERS = "X-Cursor-Workspace: /tmp/simplify-probe";
|
|
78
|
+
// and start proxy with CURSOR_BRIDGE_WORKSPACE=/
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
- Agent SDK honors `ANTHROPIC_CUSTOM_HEADERS` env var (newline-separated `Key: Value` pairs — confirmed in `cli.js` string `ANTHROPIC_CUSTOM_HEADERS`).
|
|
82
|
+
- Proxy's `resolveWorkspace()` in `workspace.ts:50` reads `x-cursor-workspace` header; validates that the requested path is under `config.workspace` (the proxy's base). Setting base to `/` (or a broad parent) lets arbitrary worktree paths validate.
|
|
83
|
+
- Three prompt variants (`/simplify`, "use the simplify skill", concrete instructions) all simplified correctly now. Still 0 tool_use blocks.
|
|
84
|
+
|
|
85
|
+
### 5. Forcing the Skill tool explicitly (confirmation test)
|
|
86
|
+
|
|
87
|
+
Prompt: *"You have a tool named Skill. Invoke it now with parameters {skill: \"simplify\", args: \"messy.ts\"}. Do not do any work yourself — your only job is to emit that one Skill tool call."*
|
|
88
|
+
|
|
89
|
+
Response: *"I don't have a `Skill` tool in this Cursor session, so I can't emit that call here."*
|
|
90
|
+
|
|
91
|
+
Confirmed: the model is correctly reporting that the Skill tool isn't actually callable from its vantage point. Not a prompting issue.
|
|
92
|
+
|
|
93
|
+
## Why tool_use doesn't surface
|
|
94
|
+
|
|
95
|
+
Ran cursor-agent directly, bypassing the proxy:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
CI=true CURSOR_SKIP_KEYCHAIN=1 CURSOR_API_KEY="..." \
|
|
99
|
+
/opt/homebrew/bin/node /Users/francesco/.local/share/cursor-agent/versions/2026.04.17-479fd04/index.js \
|
|
100
|
+
-p --output-format stream-json --stream-partial-output \
|
|
101
|
+
--trust --workspace /tmp/simplify-probe --model composer-2-fast \
|
|
102
|
+
"read messy.ts then edit it to remove the intermediate result variable"
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
**Cursor-agent emits rich `tool_call` events** (not `tool_use`):
|
|
106
|
+
|
|
107
|
+
```json
|
|
108
|
+
{"type":"tool_call","subtype":"started","call_id":"tool_…","tool_call":{"readToolCall":{"args":{"path":"/tmp/simplify-probe/messy.ts"}}}}
|
|
109
|
+
{"type":"tool_call","subtype":"completed","call_id":"tool_…","tool_call":{"readToolCall":{"args":{…},"result":{"success":{"content":"…","totalLines":5,"fileSize":103,"path":"…","readRange":{"startLine":1,"endLine":5}}}}}}
|
|
110
|
+
{"type":"tool_call","subtype":"started","tool_call":{"editToolCall":{"args":{"path":"/tmp/simplify-probe/messy.ts","streamContent":"export function add(a: number, b: number): number {\n return a + b;\n}"}}}}
|
|
111
|
+
{"type":"tool_call","subtype":"completed","tool_call":{"editToolCall":{"args":{…},"result":{"success":{"linesAdded":1,"linesRemoved":2,"diffString":"--- a//tmp/simplify-probe/messy.ts\n+++ …"}}}}}
|
|
112
|
+
{"type":"tool_call","subtype":"started","tool_call":{"readLintsToolCall":{"args":{"paths":["/tmp/simplify-probe/messy.ts"]}}}}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Tool taxonomy observed (there are more — this is just what I triggered):
|
|
116
|
+
|
|
117
|
+
| cursor-agent event | Mapping to Anthropic standard |
|
|
118
|
+
|---|---|
|
|
119
|
+
| `readToolCall` | `Read` |
|
|
120
|
+
| `editToolCall` | `Edit` (also `Write` when streamContent is full file) |
|
|
121
|
+
| `readLintsToolCall` | (no direct equivalent — could be "LSP diagnostics") |
|
|
122
|
+
| `globToolCall` | `Glob` |
|
|
123
|
+
| `grepToolCall` | `Grep` |
|
|
124
|
+
| `shellToolCall` | `Bash` |
|
|
125
|
+
| `taskToolCall` | `Task` / `Agent` (parallel sub-agents — confirmed working) |
|
|
126
|
+
| `webFetchToolCall` | `WebFetch` |
|
|
127
|
+
| `webSearchToolCall` | `WebSearch` |
|
|
128
|
+
|
|
129
|
+
The proxy's `cli-stream-parser.ts` only handles:
|
|
130
|
+
|
|
131
|
+
```ts
|
|
132
|
+
if (obj.type === "assistant" && obj.message?.content) {
|
|
133
|
+
for (const part of obj.message.content) {
|
|
134
|
+
if (part.type === "text") …
|
|
135
|
+
else if (part.type === "thinking") …
|
|
136
|
+
else if (part.type === "tool_use" && part.id && part.name) …
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
if (obj.type === "result" && obj.subtype === "success") { done = true; onDone(); }
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
**It never matches `obj.type === "tool_call"`.** That's the bug. The `anthropic-sse-writer.ts` at line 59–82 already has a full `kind: "tool_use"` → SSE `content_block_start` path. We just don't feed it.
|
|
143
|
+
|
|
144
|
+
Fix sketch (~30 LOC in `cli-stream-parser.ts`):
|
|
145
|
+
|
|
146
|
+
```ts
|
|
147
|
+
if (obj.type === "tool_call" && obj.subtype === "started") {
|
|
148
|
+
const [kind, body] = Object.entries(obj.tool_call)[0]; // e.g. ["readToolCall", {args, ...}]
|
|
149
|
+
const name = mapToolName(kind); // readToolCall → Read
|
|
150
|
+
const input = translateArgs(kind, body.args); // keep args shape the Anthropic SDK expects
|
|
151
|
+
onEvent({ kind: "tool_use", id: obj.call_id, name, input });
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
(May also need to buffer tool results and forward them as `tool_result` content blocks in the next turn, depending on how the Agent SDK wants to correlate them.)
|
|
156
|
+
|
|
157
|
+
## The cloud endpoint — what Cursor actually talks to
|
|
158
|
+
|
|
159
|
+
Instrumented cursor-agent with a `NODE_OPTIONS=--require` preload (`/tmp/simplify-probe/fetch-logger.cjs`) that hooks `global.fetch`, `http.request`, `https.request`, and `http2.connect`. Only http2 captured the real chat traffic — cursor-agent uses undici under the hood, but the chat RPC goes through node:http2.
|
|
160
|
+
|
|
161
|
+
```
|
|
162
|
+
HTTP/2 POST https://agentn.global.api5.cursor.sh/agent.v1.AgentService/Run
|
|
163
|
+
Authorization: Bearer <JWT>
|
|
164
|
+
Content-Type: (protobuf, inferred — body is binary)
|
|
165
|
+
Body: 153 KB for a "what is 2+2" prompt (!!)
|
|
166
|
+
Response: streaming, ~9 KB+ rolling
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Plus many auxiliary JSON HTTP/1.1 calls to `https://api2.cursor.sh/aiserver.v1.*Service/*`:
|
|
170
|
+
- `AnalyticsService/BootstrapStatsig`
|
|
171
|
+
- `DashboardService/GetMe`, `GetTeamAdminSettings…`, `GetTeamHooks`, `GetManagedSkills`
|
|
172
|
+
- `ServerConfigService/GetServerConfig`
|
|
173
|
+
- `AiService/GetUsableModels`, `GetDefaultModelForCli`
|
|
174
|
+
- `AnalyticsService/SubmitLogs`, `TrackEvents`
|
|
175
|
+
- `DashboardService/GetCliDownloadUrl`
|
|
176
|
+
- `/v1/traces` (OTEL)
|
|
177
|
+
|
|
178
|
+
The chat endpoint `agent.v1.AgentService/Run` is revealing: **it's an agent-loop RPC, not a model-completion endpoint**. It expects the client to hold conversational state, execute tools locally, and feed tool results back for the next step. The 153 KB initial payload carries the whole context (prompt + tool defs + workspace hints + history).
|
|
179
|
+
|
|
180
|
+
So composer-2-fast's *only* public interface is the agent loop. There's no bare "generate text from this prompt" endpoint to call qwen-style.
|
|
181
|
+
|
|
182
|
+
## Full path A: bypass cursor-agent (the qwen dream) — not recommended
|
|
183
|
+
|
|
184
|
+
What it would take:
|
|
185
|
+
1. Extract `agent.v1.*` proto schema from `cursor-agent-svc.js` (contains hundreds of message type definitions — looks doable but tedious).
|
|
186
|
+
2. Implement protobuf codec for request + streaming response.
|
|
187
|
+
3. Handle JWT refresh (observed short-lived tokens ~1h expiry).
|
|
188
|
+
4. Translate Anthropic tool_use ↔ cursor tool_call format bidirectionally.
|
|
189
|
+
5. Handle all the auxiliary RPCs (`BootstrapStatsig`, `GetUsableModels`, etc.) that cursor-agent fires on startup.
|
|
190
|
+
6. Maintain against Cursor's API churn indefinitely.
|
|
191
|
+
|
|
192
|
+
**Weeks of work, permanent maintenance tax, can break any time.** Probably also violates Cursor's TOS.
|
|
193
|
+
|
|
194
|
+
Also: even if we do this, SDK-provided tools like Skill wouldn't automatically "just work" — we'd need to map them to cursor's native tool concepts anyway, which we can do without the protobuf spike.
|
|
195
|
+
|
|
196
|
+
## Full path B+C: fix the parser + expose cursor tools as Anthropic names (recommended)
|
|
197
|
+
|
|
198
|
+
Scope:
|
|
199
|
+
|
|
200
|
+
1. **`cli-stream-parser.ts` — translate `tool_call` events to `tool_use` events.** ~30 LOC. Gives the SDK full tool visibility: progress UI, budget tracking, nudge-on-silence, logs.
|
|
201
|
+
2. **Tool-name mapping** (tiny table in the proxy): `readToolCall → Read`, `editToolCall → Edit`, `globToolCall → Glob`, `runTerminalToolCall → Bash`, etc.
|
|
202
|
+
3. **Rewrite `toolsToSystemText`**: drop SDK-provided tools that cursor-agent can't honor (Skill, Task, sub-Agent) from the system text. Advertise only the cursor-native tools that actually execute, under Anthropic-standard names.
|
|
203
|
+
|
|
204
|
+
After this, the SDK sees: `assistant → tool_use(Read) → tool_result → tool_use(Edit) → …` exactly like a direct Anthropic session.
|
|
205
|
+
|
|
206
|
+
## Path D — **skill translation via `.cursor/rules/*.mdc`** (the killer unlock)
|
|
207
|
+
|
|
208
|
+
cursor-agent supports `.cursor/rules/<name>.mdc` files natively (confirmed: `cursor-agent rule` subcommand, `generate-rule`, rules auto-discovered). Shape:
|
|
209
|
+
|
|
210
|
+
```markdown
|
|
211
|
+
---
|
|
212
|
+
description: Short description for the model to decide when to apply
|
|
213
|
+
alwaysApply: false
|
|
214
|
+
# globs: optional
|
|
215
|
+
---
|
|
216
|
+
# Rule body
|
|
217
|
+
|
|
218
|
+
Instructions the agent follows…
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
**Proof that cursor-agent resolves them autonomously** — wrote `/tmp/skilltest/.cursor/rules/simplify.mdc` with a description matching Anthropic's simplify skill, then ran:
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
cursor-agent -p --workspace /tmp/skilltest --model composer-2-fast "/simplify messy.ts"
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
First emitted tool call:
|
|
228
|
+
|
|
229
|
+
```json
|
|
230
|
+
{"tool_call":{"readToolCall":{"args":{"path":"/tmp/skilltest/.cursor/rules/simplify.mdc"}}}}
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
**Cursor-agent autonomously discovered, read, and followed the rule.** File was simplified according to the rule body. Full tool stream: read rule → glob for target → read target → edit → lint.
|
|
234
|
+
|
|
235
|
+
### Translation map
|
|
236
|
+
|
|
237
|
+
| Anthropic | Cursor |
|
|
238
|
+
|---|---|
|
|
239
|
+
| `SKILL.md` frontmatter `name`, `description`, `type` | `.mdc` frontmatter `description`, `alwaysApply`, `globs` |
|
|
240
|
+
| Skill body | Rule body |
|
|
241
|
+
| Skill lives in plugin/user dir | Rule lives in `.cursor/rules/` or `~/.cursor/rules/` |
|
|
242
|
+
| Slash invocation `/simplify` | Slash invocation `/simplify` (identical UX — model resolves from description) |
|
|
243
|
+
| Model-selected based on task | Model-selected based on task (identical) |
|
|
244
|
+
| MCP tools | `.cursor/mcp.json` MCP tools (universal MCP protocol — no translation) |
|
|
245
|
+
| `CLAUDE.md` | `.cursor/rules/_always.mdc` with `alwaysApply: true` |
|
|
246
|
+
|
|
247
|
+
### Proxy behavior after adding skill translation
|
|
248
|
+
|
|
249
|
+
Per request:
|
|
250
|
+
1. Receive Anthropic `/v1/messages` with tools + system + user prompt.
|
|
251
|
+
2. Extract skill metadata (names + descriptions). Full bodies either:
|
|
252
|
+
- (a) bundled in the proxy for well-known Anthropic skills, OR
|
|
253
|
+
- (b) sent by claude-overnight as custom headers / system-prompt extra blocks, OR
|
|
254
|
+
- (c) the Agent SDK exposes them via a mechanism TBD.
|
|
255
|
+
3. Materialize each advertised skill as `.cursor/rules/<name>.mdc` in the workspace (or per-request temp dir if `chatOnlyWorkspace`).
|
|
256
|
+
4. Strip Skill/Task/sub-Agent from `toolsToSystemText` (they're unneeded now — skills live on disk as rules).
|
|
257
|
+
5. Run cursor-agent.
|
|
258
|
+
6. `tool_call` → `tool_use` translation streams back (from B).
|
|
259
|
+
|
|
260
|
+
**Result:** from the SDK's view, proxied fast models now honor skills. From cursor-agent's view, it's a normal Cursor session.
|
|
261
|
+
|
|
262
|
+
### Caveats
|
|
263
|
+
|
|
264
|
+
- **Skill bodies need to travel** — simplest path: bundle the common ones (simplify, security-review, etc.) with the proxy. Less clean but works day one.
|
|
265
|
+
- **Rule-file writes need per-request workspace isolation** — tie-in with the `X-Cursor-Workspace` fix. Don't stomp on parallel agents.
|
|
266
|
+
- **`alwaysApply: false`** rules are model-selected based on description — works well in practice (test confirmed composer-2-fast picked up the rule on `/simplify`). For stronger guarantees use `alwaysApply: true` or matching `globs`.
|
|
267
|
+
- **Sub-skill chains** (skill A invokes skill B) — Cursor rules can reference other rules (`@ruleName`). Needs a naming convention.
|
|
268
|
+
- **Parallel sub-agents DO work.** Earlier version of this doc claimed cursor-agent was single-agent — that was wrong. cursor-agent ships a first-class `TaskToolCall` (proto `agent.v1.TaskToolCallArgsProto`, fields `description`/`prompt`/`model`/`subagent_type`/`resume`/`readonly`/`run_in_background`/`attachments` — identical shape to Anthropic's Task tool). Runtime creates `kind: "subagent"` sessions with their own `agentId`, and the UI explicitly groups parallel `taskToolCall`s. See "Parallel sub-agents — confirmed" below for the empirical test. `/simplify`'s 3-reviewer fan-out replicates directly.
|
|
269
|
+
|
|
270
|
+
## Parallel sub-agents — confirmed (2026-04-18)
|
|
271
|
+
|
|
272
|
+
Empirical test that cursor-agent runs sub-agents concurrently, not sequentially.
|
|
273
|
+
|
|
274
|
+
Setup: `/tmp/subagent-probe/` with `messy.ts` and `.cursor/rules/fanout.mdc`. Rule body instructs the model to spawn three Task sub-agents in a single turn (count lines / count exports / find inline candidates).
|
|
275
|
+
|
|
276
|
+
Invocation:
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
CI=true CURSOR_SKIP_KEYCHAIN=1 CURSOR_API_KEY=… \
|
|
280
|
+
/opt/homebrew/bin/node /Users/francesco/.local/share/cursor-agent/versions/2026.04.17-479fd04/index.js \
|
|
281
|
+
-p --output-format stream-json --trust \
|
|
282
|
+
--workspace /tmp/subagent-probe --model composer-2-fast \
|
|
283
|
+
"/fanout messy.ts"
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
Observed in `stream-json`:
|
|
287
|
+
|
|
288
|
+
```
|
|
289
|
+
other started: readToolCall # rule discovery
|
|
290
|
+
other started: readToolCall # target file
|
|
291
|
+
task started id=tool_171e… desc=Count lines in messy.ts
|
|
292
|
+
task started id=tool_d2ab… desc=Count exports in messy.ts
|
|
293
|
+
task started id=tool_da0d… desc=Inline candidates in messy.ts
|
|
294
|
+
task completed id=tool_d2ab…
|
|
295
|
+
task completed id=tool_da0d…
|
|
296
|
+
task completed id=tool_171e…
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
Three `taskToolCall`s dispatched in the same assistant turn. **Start order (171e, d2ab, da0d) differs from completion order (d2ab, da0d, 171e) — proves concurrent execution.** Each sub-agent got its own `agentId` and ran its own internal tools independently (one used `shellToolCall` for `wc -l`, the others used `readToolCall`).
|
|
300
|
+
|
|
301
|
+
Task call payload shape (what the SDK must encode when surfacing):
|
|
302
|
+
|
|
303
|
+
```json
|
|
304
|
+
{
|
|
305
|
+
"taskToolCall": {
|
|
306
|
+
"args": {
|
|
307
|
+
"description": "Count lines in messy.ts",
|
|
308
|
+
"prompt": "Read the file at absolute path /tmp/subagent-probe/messy.ts. Report ONLY the total number of lines…",
|
|
309
|
+
"subagentType": {"unspecified": {}},
|
|
310
|
+
"model": "composer-2-fast",
|
|
311
|
+
"agentId": "0b2fd6e9-9e3f-406a-92b6-8c87072303be",
|
|
312
|
+
"attachments": [],
|
|
313
|
+
"mode": "TASK_MODE_UNSPECIFIED",
|
|
314
|
+
"respondingToMessageIds": []
|
|
315
|
+
},
|
|
316
|
+
"result": {"success": {"conversationSteps": [ /* nested tool calls executed by the subagent */ ]}}
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
Totals: 12.1s, 9.8k input / 826 output tokens for the full fan-out including parent aggregation.
|
|
322
|
+
|
|
323
|
+
**Implications for Path B/D:**
|
|
324
|
+
|
|
325
|
+
1. `cli-stream-parser.ts` tool-name table must include `taskToolCall → Task` (or `Agent`, whichever name the SDK expects for the parent-visible sub-agent tool).
|
|
326
|
+
2. Subagent inner events live inside `result.success.conversationSteps`. Decide whether to flatten them into the outer event stream (so the SDK sees `tool_use(Task) → tool_use(Read) inside → tool_result(Task)` as a nested tree) or collapse them into just the outer Task tool_use/tool_result pair. The latter is simpler and matches Anthropic's Task-tool UX, where sub-agent internals are opaque to the caller.
|
|
327
|
+
3. `subagent_type` can be left unspecified; cursor-agent accepts it. `model` defaults to the parent's model (inherited), which is the right default.
|
|
328
|
+
|
|
329
|
+
Raw stream preserved at `/tmp/subagent-probe/run.jsonl` for later inspection.
|
|
330
|
+
|
|
331
|
+
## Per-workspace isolation — the adjacent bug
|
|
332
|
+
|
|
333
|
+
Independent of skills, claude-overnight currently has a real correctness issue for proxied agents in worktrees:
|
|
334
|
+
|
|
335
|
+
```ts
|
|
336
|
+
// src/swarm.ts:578 — current spawn
|
|
337
|
+
const agentQuery = query({
|
|
338
|
+
prompt: agentPrompt,
|
|
339
|
+
options: {
|
|
340
|
+
cwd: agentCwd, model: effectiveModel, permissionMode: perm,
|
|
341
|
+
allowedTools: this.config.allowedTools,
|
|
342
|
+
…
|
|
343
|
+
},
|
|
344
|
+
});
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
For proxied agents, `cwd: agentCwd` has no effect. Two agents in separate worktrees would both execute in the proxy's startup cwd. Fix:
|
|
348
|
+
|
|
349
|
+
```ts
|
|
350
|
+
const env = this.config.envForModel?.(effectiveModel);
|
|
351
|
+
if (env && isCursorProxiedModel(effectiveModel)) {
|
|
352
|
+
env.ANTHROPIC_CUSTOM_HEADERS = `X-Cursor-Workspace: ${agentCwd}`;
|
|
353
|
+
}
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
Plus ensure the proxy is started with `CURSOR_BRIDGE_WORKSPACE=/` (or a common parent of all worktree dirs).
|
|
357
|
+
|
|
358
|
+
This is a separate fix that should land regardless of the skill-translation work.
|
|
359
|
+
|
|
360
|
+
## Code locations for reference
|
|
361
|
+
|
|
362
|
+
### `cursor-composer-in-claude` (sibling repo, Francesco's fork at ../cursor-composer-in-claude)
|
|
363
|
+
|
|
364
|
+
- `src/lib/agent-cmd-args.ts` — builds `--mode` / `--workspace` / `--model` flags. 0.9.3 made `agent` default.
|
|
365
|
+
- `src/lib/env.ts:276–281` — `CURSOR_BRIDGE_MODE` parsing (`plan` | `ask` | `agent`).
|
|
366
|
+
- `src/lib/env.ts:256–258` — `workspace` config (defaults to proxy's `process.cwd()`).
|
|
367
|
+
- `src/lib/workspace.ts:50–106` — `resolveWorkspace()`: reads `x-cursor-workspace` header, validates path is under base.
|
|
368
|
+
- `src/lib/handlers/anthropic-messages.ts:147–159` — per-request header-based workspace resolution.
|
|
369
|
+
- `src/lib/openai.ts:58–87` — `toolsToSystemText()`: how SDK tool defs get serialized to system-prompt text (this is where to rewrite when exposing cursor tools under Anthropic names).
|
|
370
|
+
- `src/lib/cli-stream-parser.ts:41–75` — the parser that needs the `tool_call` case added.
|
|
371
|
+
- `src/lib/anthropic-sse-writer.ts:59–82` — already-wired SSE emitter for `tool_use` events.
|
|
372
|
+
|
|
373
|
+
### `claude-overnight`
|
|
374
|
+
|
|
375
|
+
- `src/providers.ts:160–215` — `envFor()`: where per-model env (including proxy auth + bridge settings) is built. Add `X-Cursor-Workspace` injection here, driven by the agent's `cwd`.
|
|
376
|
+
- `src/swarm.ts:563–584` — agent spawn. `env` is already passed via `envForModel(effectiveModel)`; just needs per-agent cwd propagation.
|
|
377
|
+
|
|
378
|
+
### Agent SDK (`@anthropic-ai/claude-agent-sdk`)
|
|
379
|
+
|
|
380
|
+
- `cli.js` — honors `ANTHROPIC_CUSTOM_HEADERS` env var (newline-separated `Key: Value`), string confirmed present.
|
|
381
|
+
- `sdk.d.ts:700–710` — `headers` field on McpHttpServerConfig (not the right one for our use — the env var is the right path).
|
|
382
|
+
|
|
383
|
+
### Cursor
|
|
384
|
+
|
|
385
|
+
- `https://agentn.global.api5.cursor.sh/agent.v1.AgentService/Run` — the chat RPC (HTTP/2 + protobuf).
|
|
386
|
+
- `https://api2.cursor.sh/aiserver.v1.*Service/*` — auxiliary REST/JSON endpoints.
|
|
387
|
+
- Proto schema lives in `/Users/francesco/.local/share/cursor-agent/versions/<ver>/cursor-agent-svc.js` (bundled, minified) — contains hundreds of `aiserver.v1.*` / `agent.v1.*` message type definitions.
|
|
388
|
+
|
|
389
|
+
## Quick artifacts for picking this up later
|
|
390
|
+
|
|
391
|
+
- Scratch test dir: `/tmp/simplify-probe/` — has all probe scripts (probe.mjs, probe-proxy.mjs, probe-proxy-v2.mjs, probe-proxy-v3.mjs, probe-skill-direct.mjs, fetch-logger.cjs).
|
|
392
|
+
- Cursor-rule test dir: `/tmp/skilltest/` — has the `.cursor/rules/simplify.mdc` demo.
|
|
393
|
+
- Proxy logs: `/Users/francesco/.cursor-api-proxy/proxy.out.log` and `sessions.log`.
|
|
394
|
+
- Cursor-agent CLI: `/Users/francesco/.local/bin/cursor-agent` (avoid — segfaults with bundled Node on macOS); use `/opt/homebrew/bin/node <cursor-agent-install>/index.js` instead.
|
|
395
|
+
|
|
396
|
+
## Recommended next steps (in order)
|
|
397
|
+
|
|
398
|
+
1. **Land the `X-Cursor-Workspace` fix in claude-overnight** — independent, fixes a real worktree-isolation bug. Small patch in `providers.ts:envFor()` + start proxy with `CURSOR_BRIDGE_WORKSPACE=/`.
|
|
399
|
+
2. **Patch the proxy's `cli-stream-parser.ts`** to translate `tool_call` → `tool_use`. ~30 LOC. Gives full tool visibility in claude-overnight's UI/logs for proxied agents.
|
|
400
|
+
3. **Update `toolsToSystemText`** to drop non-executable SDK tools (Skill/Task/sub-Agent) for proxied sessions and list cursor-native tools under Anthropic names.
|
|
401
|
+
4. **Bundle skill → rule translation** in the proxy. Start with `/simplify`, `/review`, `/security-review`, `/init`. Materialize into workspace on request. Confirm end-to-end.
|
|
402
|
+
5. **Update steering/planner prompts** to give concrete operational briefs instead of skill invocations (works for both direct and proxied models — concrete is the common denominator).
|
|
403
|
+
6. **Optional/far future:** Path A (bypass cursor-agent entirely) only if the ceiling of B+C+skill-translation turns out to be too low — which seems unlikely given the experiments so far.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.25.
|
|
3
|
+
"version": "1.25.41",
|
|
4
4
|
"description": "Parallel Claude agents in git worktrees with a usage cap that reserves headroom for your interactive Claude Code. Crash-safe resume. Provider-agnostic model catalog (Anthropic, Cursor, OpenAI, Gemini, DeepSeek, Llama, Qwen) with capability-based task scoping.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"dependencies": {
|
|
18
18
|
"@anthropic-ai/claude-agent-sdk": "^0.2.92",
|
|
19
19
|
"chalk": "^5.4.1",
|
|
20
|
-
"cursor-composer-in-claude": "^0.
|
|
20
|
+
"cursor-composer-in-claude": "^0.10.0",
|
|
21
21
|
"jsonwebtoken": "^9.0.2"
|
|
22
22
|
},
|
|
23
23
|
"devDependencies": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.25.
|
|
3
|
+
"version": "1.25.41",
|
|
4
4
|
"description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs -- parallel Claude agents in git worktrees with thinking waves, multi-wave steering, and crash-safe resume. Supports Cursor API Proxy, Qwen, OpenRouter.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Francesco Fornace"
|
|
@@ -95,17 +95,18 @@ Rows: scope. Each cell is a starting point — adjust by one step when repo fact
|
|
|
95
95
|
|
|
96
96
|
| scope | tight ≤ 10 | standard 11–25 | wide 26–60 | saturated > 60 |
|
|
97
97
|
| ------------------------ | -------------------------------------------- | --------------------------------------------- | --------------------------------------------- | ----------------------------------------------- |
|
|
98
|
-
| bugfix | conc=2, flex=false, fast=null, cap=0.75 | conc=3, flex=true, fast=null, cap=0.75 | conc=4, flex=true, fast=
|
|
99
|
-
| feature-add | conc=2, flex=true, fast=null, cap=0.75 | conc=4, flex=true, fast=null, cap=0.75 | conc=6, flex=true, fast=
|
|
100
|
-
| refactor | conc=2, flex=false, fast=null, cap=0.75 | conc=4, flex=false, fast=null, cap=0.75 | conc=6, flex=true, fast=null, cap=0.9
|
|
101
|
-
| audit-and-fix | conc=3, flex=true, fast=
|
|
102
|
-
| migration | conc=2, flex=true, fast=null, cap=0.75 | conc=4, flex=true, fast=null, cap=0.9 | conc=6, flex=true, fast=null, cap=0.9
|
|
103
|
-
| research-and-implement | conc=2, flex=true, fast=null, cap=0.75 | conc=3, flex=true, fast=null, cap=0.75 | conc=4, flex=true, fast=null, cap=0.9
|
|
104
|
-
| polish-and-verify | conc=3, flex=false, fast=
|
|
98
|
+
| bugfix | conc=2, flex=false, fast=null, cap=0.75 | conc=3, flex=true, fast=null, cap=0.75 | conc=4, flex=true, fast=true, cap=0.9 | conc=5, flex=true, fast=true, cap=null |
|
|
99
|
+
| feature-add | conc=2, flex=true, fast=null, cap=0.75 | conc=4, flex=true, fast=null, cap=0.75 | conc=6, flex=true, fast=true, cap=0.9 | conc=8, flex=true, fast=true, cap=null |
|
|
100
|
+
| refactor | conc=2, flex=false, fast=null, cap=0.75 | conc=4, flex=false, fast=null, cap=0.75 | conc=6, flex=true, fast=null, cap=0.9 | conc=8, flex=true, fast=true, cap=null |
|
|
101
|
+
| audit-and-fix | conc=3, flex=true, fast=true, cap=0.75 | conc=5, flex=true, fast=true, cap=0.9 | conc=8, flex=true, fast=true, cap=0.9 | conc=10, flex=true, fast=true, cap=null |
|
|
102
|
+
| migration | conc=2, flex=true, fast=null, cap=0.75 | conc=4, flex=true, fast=null, cap=0.9 | conc=6, flex=true, fast=null, cap=0.9 | conc=8, flex=true, fast=null, cap=null |
|
|
103
|
+
| research-and-implement | conc=2, flex=true, fast=null, cap=0.75 | conc=3, flex=true, fast=null, cap=0.75 | conc=4, flex=true, fast=null, cap=0.9 | conc=5, flex=true, fast=true, cap=null |
|
|
104
|
+
| polish-and-verify | conc=3, flex=false, fast=true, cap=0.75 | conc=5, flex=false, fast=true, cap=0.75 | conc=8, flex=true, fast=true, cap=0.9 | conc=10, flex=true, fast=true, cap=null |
|
|
105
105
|
|
|
106
106
|
`conc` ⇒ `recommended.concurrency` (clamp to ≤ budget).
|
|
107
107
|
`flex` ⇒ `recommended.flex`.
|
|
108
|
-
`fast=
|
|
108
|
+
`fast=true` ⇒ recommend a fast model **if the user has one configured and reachable** from their available providers. Pick whatever the cheapest fast model is among their providers (e.g. `claude-haiku-4-5`, `composer-2-fast`, `qwen3` variants). If no fast model is reachable, set `null`.
|
|
109
|
+
`fast=null` ⇒ do not recommend a fast model (scope too complex or no suitable fast model available).
|
|
109
110
|
`cap=null` ⇒ unlimited (`recommended.usageCap = null`).
|
|
110
111
|
|
|
111
112
|
## Planner / worker model selection
|
|
@@ -117,14 +118,14 @@ Decision order (stop at the first row whose providers are present):
|
|
|
117
118
|
1. **Anthropic direct available**
|
|
118
119
|
- planner: `claude-opus-4-7` (or its `-thinking-high` variant when scope is `audit-and-fix` / `research-and-implement` / `migration`).
|
|
119
120
|
- worker: `claude-sonnet-4-6` for normal work; `claude-opus-4-7` for `wide`/`saturated` migrations or research.
|
|
120
|
-
- fastModel:
|
|
121
|
+
- fastModel: recommend the cheapest fast model available among the user's reachable providers when the matrix says `fast=true`.
|
|
121
122
|
2. **Custom Anthropic-compatible provider with a strong model** (e.g. `qwen3.6-plus`, `qwen3-coder-plus`)
|
|
122
123
|
- planner: the strongest such model the user has.
|
|
123
124
|
- worker: same model, or a cheaper sibling if the user has one.
|
|
124
125
|
3. **Cursor proxy is the only reachable provider**
|
|
125
126
|
- planner: `claude-opus-4-7` via Cursor (only if the proxy exposes it).
|
|
126
127
|
- worker: `claude-sonnet-4-6` via Cursor, or `composer-2` for the cheapest path.
|
|
127
|
-
- fastModel: `composer-2-fast` when the matrix says `fast=
|
|
128
|
+
- fastModel: recommend a Cursor fast model (e.g. `composer-2-fast`) when the matrix says `fast=true`.
|
|
128
129
|
4. **No reachable provider** — leave `plannerModel` and `workerModel` as `claude-sonnet-4-6` and emit a `blocking` checklist item titled "No reachable provider".
|
|
129
130
|
|
|
130
131
|
Never recommend Cursor models when the input does not list a `cursor proxy` provider, and never recommend stock Anthropic IDs when the input does not say "Anthropic direct: available". `fastModel` MUST be `null` rather than guessed.
|