claude-overnight 1.60.0 → 1.60.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,6 +44,7 @@ export async function runDownload(runIdArg, ...rest) {
44
44
  let baseUrl;
45
45
  let token;
46
46
  let projectId;
47
+ let watch = false;
47
48
  for (let i = 0; i < rest.length; i++) {
48
49
  if (rest[i] === "--base-url" && rest[i + 1]) {
49
50
  baseUrl = rest[i + 1];
@@ -57,6 +58,9 @@ export async function runDownload(runIdArg, ...rest) {
57
58
  projectId = rest[i + 1];
58
59
  i++;
59
60
  }
61
+ else if (rest[i] === "--watch") {
62
+ watch = true;
63
+ }
60
64
  }
61
65
  if (!baseUrl) {
62
66
  console.error("--base-url is required (e.g. https://fornace.net or http://localhost:8787)");
@@ -68,15 +72,29 @@ export async function runDownload(runIdArg, ...rest) {
68
72
  const prefix = projectId
69
73
  ? `${baseUrl.replace(/\/$/, "")}/api/projects/${projectId}/prompt-evolution/${runId}`
70
74
  : `${baseUrl.replace(/\/$/, "")}/runs/${runId}`;
71
- const metaRes = await fetch(prefix, { headers: authHeaders });
72
- if (!metaRes.ok) {
73
- console.error(`Failed to fetch run metadata: HTTP ${metaRes.status}`);
74
- process.exit(1);
75
- }
76
- const metaBody = (await metaRes.json());
77
- const remoteMeta = typeof metaBody.meta === "object" && metaBody.meta
78
- ? metaBody.meta
79
- : metaBody;
75
+ let remoteMeta = null;
76
+ let metaBody = null;
77
+ while (true) {
78
+ const metaRes = await fetch(prefix, { headers: authHeaders });
79
+ if (!metaRes.ok) {
80
+ console.error(`Failed to fetch run metadata: HTTP ${metaRes.status}`);
81
+ process.exit(1);
82
+ }
83
+ metaBody = (await metaRes.json());
84
+ remoteMeta = typeof metaBody.meta === "object" && metaBody.meta
85
+ ? metaBody.meta
86
+ : metaBody;
87
+ const status = remoteMeta.status;
88
+ if (watch && (status === "running" || status === "queued" || status === "pending" || !status)) {
89
+ process.stdout.write(`\r[${new Date().toLocaleTimeString()}] Run ${runId} is ${status || "running"}... waiting... `);
90
+ await new Promise(r => setTimeout(r, 10000));
91
+ }
92
+ else {
93
+ if (watch)
94
+ console.log(`\nRun finished with status: ${status}`);
95
+ break;
96
+ }
97
+ }
80
98
  const { runDir } = await import("../prompt-evolution/persistence.js");
81
99
  const { mkdirSync, writeFileSync } = await import("node:fs");
82
100
  const { dirname, join } = await import("node:path");
@@ -17,6 +17,8 @@
17
17
  */
18
18
  import { evolvePrompt } from "../prompt-evolution/index.js";
19
19
  import { PLAN_CASES } from "../prompt-evolution/fixtures/plan-cases.js";
20
+ import { STEER_CASES } from "../prompt-evolution/fixtures/steer-cases.js";
21
+ import { COACH_CASES } from "../prompt-evolution/fixtures/coach-cases.js";
20
22
  import { harvestRealCases } from "../prompt-evolution/fixtures/harvest.js";
21
23
  import { generateCases } from "../prompt-evolution/fixtures/generate.js";
22
24
  import { runDiff, runDownload, runPromote } from "./evolve-subcommands.js";
@@ -45,8 +47,8 @@ Options:
45
47
  --judge Use llm-judge for content scoring (costs extra API calls)
46
48
  --judge-model <model> Model to use for the judge (default: same as eval-model)
47
49
  --judge-top-n <n> Judge only the top-N variants per generation (default: 4)
48
- --cases <suite> Benchmark suite: plan | mcp-planning | mcp-review |
49
- mcp-supervision | mcp-stuck (default: plan)
50
+ --cases <suite> Benchmark suite: plan | steer | coach | mcp-planning |
51
+ mcp-review | mcp-supervision | mcp-stuck (default: plan)
50
52
  --harvest Append cases harvested from <cwd>/.claude-overnight/runs/*
51
53
  --harvest-only Use ONLY harvested real objectives (fails if none found)
52
54
  --harvest-limit <n> Max harvested cases (default: 10)
@@ -59,11 +61,12 @@ Options:
59
61
 
60
62
  Subcommands:
61
63
  claude-overnight-evolve download <runId> --base-url <url> [--token <token>]
62
- [--project <id>]
64
+ [--project <id>] [--watch]
63
65
  Pull a remote run (fornace or self-host) into the local
64
66
  ~/.claude-overnight/prompt-evolution/<runId>/ directory
65
67
  so you can audit, diff, or promote it offline. Use
66
- --project for fornace; omit for self-host.
68
+ --project for fornace; omit for self-host. If --watch
69
+ is set, it will poll until the run finishes before downloading.
67
70
  claude-overnight-evolve promote <runId> [--variant <id>] [--into <block>]
68
71
  Write a run's winning variant back into the source
69
72
  prompt file's <!-- BLOCK --> marker. If --variant is
@@ -277,6 +280,10 @@ async function evolveOne(opts) {
277
280
  else {
278
281
  if (opts.cases === "plan")
279
282
  cases = opts.harvestOnly ? [] : [...PLAN_CASES];
283
+ else if (opts.cases === "steer")
284
+ cases = opts.harvestOnly ? [] : [...STEER_CASES];
285
+ else if (opts.cases === "coach")
286
+ cases = opts.harvestOnly ? [] : [...COACH_CASES];
280
287
  else
281
288
  throw new Error(`Unknown case suite: ${opts.cases}`);
282
289
  if (opts.harvest) {
@@ -1 +1 @@
1
- export declare const VERSION = "1.60.0";
1
+ export declare const VERSION = "1.60.1";
@@ -1,2 +1,2 @@
1
1
  // Auto-generated by build — do not edit manually.
2
- export const VERSION = "1.60.0";
2
+ export const VERSION = "1.60.1";
@@ -0,0 +1,2 @@
1
+ import type { BenchmarkCase } from "../types.js";
2
+ export declare const COACH_CASES: BenchmarkCase[];
@@ -0,0 +1,64 @@
1
+ export const COACH_CASES = [
2
+ {
3
+ name: "simple-todo",
4
+ hash: "",
5
+ promptPath: "00_setup/00-1_coach",
6
+ vars: {
7
+ objective: "Build a simple todo app using vanilla JS.",
8
+ tree: "index.html\nstyle.css\napp.js",
9
+ readme: "# Todo app",
10
+ providers: "anthropic, openai",
11
+ isInitialCoach: true,
12
+ },
13
+ criteria: {
14
+ independentTasks: false,
15
+ specificTasks: false,
16
+ requiredJsonFields: ["objective", "scope", "recommended", "checklist", "remediation"],
17
+ },
18
+ },
19
+ {
20
+ name: "vague-objective",
21
+ hash: "",
22
+ promptPath: "00_setup/00-1_coach",
23
+ vars: {
24
+ objective: "Make it better.",
25
+ tree: "src/main.ts",
26
+ readme: "Project",
27
+ providers: "anthropic",
28
+ isInitialCoach: true,
29
+ },
30
+ criteria: {
31
+ independentTasks: false,
32
+ specificTasks: false,
33
+ requiredJsonFields: ["objective", "scope", "recommended", "checklist", "remediation"],
34
+ },
35
+ },
36
+ {
37
+ name: "massive-refactor",
38
+ hash: "",
39
+ promptPath: "00_setup/00-1_coach",
40
+ vars: {
41
+ objective: "Migrate the entire backend from Express to NestJS.",
42
+ tree: "src/app.ts\nsrc/routes/api.ts\nsrc/models/user.ts",
43
+ readme: "Express backend",
44
+ providers: "anthropic, google",
45
+ isInitialCoach: true,
46
+ },
47
+ criteria: {
48
+ independentTasks: false,
49
+ specificTasks: false,
50
+ requiredJsonFields: ["objective", "scope", "recommended", "checklist", "remediation"],
51
+ },
52
+ },
53
+ ];
54
+ function hashCase(c) {
55
+ const key = `${c.promptPath}:${c.variant ?? "default"}:${JSON.stringify(c.vars)}`;
56
+ let h = 0;
57
+ for (let i = 0; i < key.length; i++) {
58
+ h = ((h << 5) - h + key.charCodeAt(i)) | 0;
59
+ }
60
+ return Math.abs(h).toString(36).slice(0, 8);
61
+ }
62
+ for (const c of COACH_CASES) {
63
+ c.hash = hashCase(c);
64
+ }
@@ -0,0 +1,2 @@
1
+ import type { BenchmarkCase } from "../types.js";
2
+ export declare const STEER_CASES: BenchmarkCase[];
@@ -0,0 +1,85 @@
1
+ export const STEER_CASES = [
2
+ {
3
+ name: "idle-needs-verify",
4
+ hash: "",
5
+ promptPath: "30_wave/30-1_steer",
6
+ vars: {
7
+ objective: "Fix the pagination bug and ensure it works on mobile.",
8
+ status: "Pagination logic has been rewritten. Tests pass. We have not run the app to check mobile responsiveness yet.",
9
+ recentText: "Wave 2 finished: 1 feature agent refactored the logic.",
10
+ fastModel: "qwen",
11
+ workerModel: "sonnet",
12
+ longArchetypes: true,
13
+ },
14
+ criteria: {
15
+ independentTasks: true,
16
+ specificTasks: false,
17
+ requiredJsonFields: ["done", "reasoning", "statusUpdate", "estimatedSessionsRemaining", "tasks"],
18
+ },
19
+ },
20
+ {
21
+ name: "infinite-loop-stuck",
22
+ hash: "",
23
+ promptPath: "30_wave/30-1_steer",
24
+ vars: {
25
+ objective: "Migrate the database to PostgreSQL.",
26
+ status: "Agent keeps failing to connect to the database. It has tried 5 times with different credentials.",
27
+ recentText: "Wave 4 finished: Agent failed to run migrations due to 'Connection refused'.",
28
+ fastModel: "qwen",
29
+ workerModel: "sonnet",
30
+ longArchetypes: true,
31
+ },
32
+ criteria: {
33
+ independentTasks: true,
34
+ specificTasks: false,
35
+ requiredJsonFields: ["done", "reasoning", "statusUpdate", "estimatedSessionsRemaining", "tasks"],
36
+ },
37
+ },
38
+ {
39
+ name: "completed-all-requirements",
40
+ hash: "",
41
+ promptPath: "30_wave/30-1_steer",
42
+ vars: {
43
+ objective: "Add a dark mode toggle to the header.",
44
+ status: "Toggle component created, state management added, CSS updated. Verification agent confirmed the toggle works and persists across reloads.",
45
+ recentText: "Wave 3 finished: Verification agent reported full success.",
46
+ fastModel: "qwen",
47
+ workerModel: "sonnet",
48
+ longArchetypes: true,
49
+ },
50
+ criteria: {
51
+ independentTasks: true,
52
+ specificTasks: false,
53
+ requiredJsonFields: ["done", "reasoning", "statusUpdate", "estimatedSessionsRemaining", "tasks"],
54
+ },
55
+ },
56
+ {
57
+ name: "mid-feature-split",
58
+ hash: "",
59
+ promptPath: "30_wave/30-1_steer",
60
+ vars: {
61
+ objective: "Build a new analytics dashboard with 3 charts.",
62
+ status: "Database queries are written. We need to build the UI components and wire them up.",
63
+ recentText: "Wave 1 finished: Backend agent successfully added the SQL views.",
64
+ fastModel: "qwen",
65
+ workerModel: "sonnet",
66
+ longArchetypes: true,
67
+ },
68
+ criteria: {
69
+ independentTasks: true,
70
+ specificTasks: true,
71
+ requiredJsonFields: ["done", "reasoning", "statusUpdate", "estimatedSessionsRemaining", "tasks"],
72
+ },
73
+ }
74
+ ];
75
+ function hashCase(c) {
76
+ const key = `${c.promptPath}:${c.variant ?? "default"}:${JSON.stringify(c.vars)}`;
77
+ let h = 0;
78
+ for (let i = 0; i < key.length; i++) {
79
+ h = ((h << 5) - h + key.charCodeAt(i)) | 0;
80
+ }
81
+ return Math.abs(h).toString(36).slice(0, 8);
82
+ }
83
+ for (const c of STEER_CASES) {
84
+ c.hash = hashCase(c);
85
+ }
@@ -43,11 +43,10 @@ export async function judgeOutput(rawOutput, c, opts) {
43
43
  const headers = {
44
44
  "Content-Type": "application/json",
45
45
  "Authorization": `Bearer ${authToken}`,
46
+ "User-Agent": "Claude-Code/0.1.0",
46
47
  };
47
48
  if (isAnthropic)
48
49
  headers["anthropic-version"] = "2023-06-01";
49
- if (isKimi)
50
- headers["User-Agent"] = "Kilo-Code/1.0";
51
50
  const res = await fetch(endpoint, {
52
51
  method: "POST",
53
52
  headers,
@@ -15,19 +15,36 @@ export async function mutate(request, opts) {
15
15
  const baseUrl = (opts.baseUrl ?? process.env.ANTHROPIC_BASE_URL ?? "https://api.anthropic.com").replace(/\/$/, "");
16
16
  const authToken = opts.authToken ?? process.env.ANTHROPIC_AUTH_TOKEN ?? process.env.ANTHROPIC_API_KEY ?? "";
17
17
  const isKimi = /kimi\.com/i.test(baseUrl);
18
- const body = JSON.stringify({
19
- model: opts.model,
20
- max_tokens: opts.maxTokens ?? 4096,
21
- messages: [{ role: "user", content: prompt }],
22
- });
18
+ let body;
19
+ if (baseUrl.includes("generativelanguage")) {
20
+ body = JSON.stringify({
21
+ model: opts.model,
22
+ max_completion_tokens: opts.maxTokens ?? 4096,
23
+ messages: [{ role: "user", content: prompt }],
24
+ });
25
+ }
26
+ else {
27
+ body = JSON.stringify({
28
+ model: opts.model,
29
+ max_tokens: opts.maxTokens ?? 4096,
30
+ messages: [{ role: "user", content: prompt }],
31
+ });
32
+ }
23
33
  const headers = {
24
34
  "Content-Type": "application/json",
25
35
  "Authorization": `Bearer ${authToken}`,
26
36
  "anthropic-version": "2023-06-01",
37
+ "User-Agent": "Claude-Code/0.1.0",
27
38
  };
28
- if (isKimi)
29
- headers["User-Agent"] = "Kilo-Code/1.0";
30
- const res = await fetch(`${baseUrl}/v1/messages`, {
39
+ let endpoint = `${baseUrl}/v1/messages`;
40
+ if (baseUrl.includes("generativelanguage")) {
41
+ endpoint = `${baseUrl}/v1/chat/completions`;
42
+ }
43
+ else if (!/^https?:\/\/(api\.)?anthropic\.com/i.test(baseUrl) && !baseUrl.includes("/v1/messages")) {
44
+ // A lot of OpenAI compatible endpoints use `/v1/chat/completions` natively
45
+ endpoint = `${baseUrl}/v1/chat/completions`;
46
+ }
47
+ const res = await fetch(endpoint, {
31
48
  method: "POST",
32
49
  headers,
33
50
  body,
@@ -38,7 +55,15 @@ export async function mutate(request, opts) {
38
55
  throw new Error(`Mutator HTTP ${res.status}: ${text.slice(0, 200)}`);
39
56
  }
40
57
  const data = await res.json();
41
- const raw = data.content?.map((c) => c.text ?? "").join("") ?? "";
58
+ let raw = "";
59
+ if (endpoint.includes("chat/completions")) {
60
+ const chatData = data;
61
+ raw = chatData.choices?.[0]?.message?.content ?? "";
62
+ }
63
+ else {
64
+ const msgData = data;
65
+ raw = msgData.content?.map((c) => c.text ?? "").join("") ?? "";
66
+ }
42
67
  return parseMutantOutput(raw, request);
43
68
  }
44
69
  function buildMutatorPrompt(req) {
@@ -8,8 +8,7 @@
8
8
  * Supports both Anthropic-native and OpenAI-compatible endpoints so we can
9
9
  * run the same eval against Haiku, Kimi, and OpenRouter without a rewrite.
10
10
  */
11
- import { VERSION } from "../core/_version.js";
12
- const USER_AGENT = `claude-overnight-evolve/${VERSION}`;
11
+ const USER_AGENT = `Claude-Code/0.1.0`;
13
12
  export async function defaultCallModel(userText, systemText, opts) {
14
13
  const baseUrl = (opts.baseUrl ?? process.env.ANTHROPIC_BASE_URL ?? "https://api.anthropic.com").replace(/\/$/, "");
15
14
  const authToken = opts.authToken ?? process.env.ANTHROPIC_AUTH_TOKEN ?? process.env.ANTHROPIC_API_KEY ?? "";
@@ -44,16 +43,21 @@ export async function defaultCallModel(userText, systemText, opts) {
44
43
  if (systemText)
45
44
  messages.push({ role: "system", content: systemText });
46
45
  messages.push({ role: "user", content: userText });
47
- // Platform.moonshot.ai marks max_tokens deprecated in favor of
48
- // max_completion_tokens. Kimi's coding endpoint still accepts max_tokens.
49
- // Sending both is safe — OpenAI, Moonshot, DeepSeek, and Kimi all tolerate
50
- // the extra field, and we're future-proof against the deprecation.
51
- body = JSON.stringify({
46
+ const payload = {
52
47
  model: opts.model,
53
- max_tokens: maxOut,
54
- max_completion_tokens: maxOut,
55
48
  messages,
56
- });
49
+ };
50
+ // Platform.moonshot.ai marks max_tokens deprecated in favor of max_completion_tokens.
51
+ // Kimi's coding endpoint accepts max_tokens.
52
+ // Gemini's OpenAI wrapper strictly rejects having BOTH set.
53
+ if (baseUrl.includes("generativelanguage")) {
54
+ payload.max_completion_tokens = maxOut;
55
+ }
56
+ else {
57
+ payload.max_tokens = maxOut;
58
+ payload.max_completion_tokens = maxOut;
59
+ }
60
+ body = JSON.stringify(payload);
57
61
  }
58
62
  const res = await fetch(endpoint, {
59
63
  method: "POST",
package/dist/ui/input.js CHANGED
@@ -169,19 +169,35 @@ export function InputLayer({ store, callbacks, onToast }) {
169
169
  const lc = s.liveConfig;
170
170
  if (key.rightArrow || key.downArrow) {
171
171
  callbacks.cycleAgent(1);
172
+ const nextId = store.get().selectedAgentId;
173
+ if (nextId != null && s.viewMode.startsWith("stream:agent-")) {
174
+ store.patch({ viewMode: `stream:agent-${nextId}` });
175
+ }
172
176
  return;
173
177
  }
174
178
  if (key.upArrow) {
175
179
  callbacks.cycleAgent(-1);
180
+ const nextId = store.get().selectedAgentId;
181
+ if (nextId != null && s.viewMode.startsWith("stream:agent-")) {
182
+ store.patch({ viewMode: `stream:agent-${nextId}` });
183
+ }
176
184
  return;
177
185
  }
178
186
  if (key.leftArrow) {
179
187
  callbacks.clearSelectedAgent();
188
+ if (s.viewMode.startsWith("stream:agent-"))
189
+ store.patch({ viewMode: "events" });
180
190
  return;
181
191
  }
182
192
  if (key.escape) {
183
193
  if (s.selectedAgentId != null) {
184
194
  callbacks.clearSelectedAgent();
195
+ if (s.viewMode.startsWith("stream:agent-"))
196
+ store.patch({ viewMode: "events" });
197
+ return;
198
+ }
199
+ if (s.viewMode !== "events") {
200
+ store.patch({ viewMode: "events" });
185
201
  return;
186
202
  }
187
203
  if (s.ask && !s.ask.streaming) {
@@ -202,10 +218,18 @@ export function InputLayer({ store, callbacks, onToast }) {
202
218
  callbacks.openAskTempFile();
203
219
  return;
204
220
  }
221
+ if (key.tab) {
222
+ const modes = ["stream:planner", "stream:steerer", "stream:verifier"];
223
+ const current = s.viewMode;
224
+ const idx = modes.indexOf(current);
225
+ const next = modes[(idx + 1) % modes.length];
226
+ store.patch({ viewMode: next });
227
+ return;
228
+ }
205
229
  if (!raw || raw.length !== 1)
206
230
  return;
207
231
  const code = raw.charCodeAt(0);
208
- if (code < 0x20 || code > 0x7E)
232
+ if (code !== 9 && (code < 0x20 || code > 0x7E))
209
233
  return;
210
234
  if (key.ctrl || key.meta)
211
235
  return;
@@ -273,8 +297,11 @@ export function InputLayer({ store, callbacks, onToast }) {
273
297
  if (/^[0-9]$/.test(raw) && swarm) {
274
298
  const n = parseInt(raw, 10);
275
299
  const running = swarm.agents.filter(a => a.status === "running");
276
- if (n < running.length)
277
- callbacks.selectAgent(running[n].id);
300
+ if (n < running.length) {
301
+ const id = running[n].id;
302
+ callbacks.selectAgent(id);
303
+ store.patch({ viewMode: `stream:agent-${id}` });
304
+ }
278
305
  }
279
306
  }, { isActive: !textEntry });
280
307
  if (state.input.mode === "none")
package/dist/ui/ui.js CHANGED
@@ -8,7 +8,7 @@ import { UiStore, makeInitialState } from "./store.js";
8
8
  import { App } from "./shell.js";
9
9
  const MAX_STEERING_EVENTS = 60;
10
10
  const MAX_ASK_LINES = 40;
11
- const MAX_DEBRIEF_HISTORY = 20;
11
+ const MAX_DEBRIEF_HISTORY = 50;
12
12
  function askDisplayCap() {
13
13
  return Math.max(3, Math.min(MAX_ASK_LINES, (process.stdout.rows || 40) - 20));
14
14
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "1.60.0",
3
+ "version": "1.60.2",
4
4
  "description": "Overnight parallel coding agents in git worktrees, with a self-curating skill memory that improves while the run is going. Mix Claude Opus as planner, Kimi 2.6 or Cursor composer-2 as cheap fast worker, Gemini or Qwen for bulk implementation. Multi-wave autonomous loop that plans, executes, reviews, and steers itself until the objective is met. Crash-safe resume, rate-limit aware, usage cap preserves headroom for your interactive Claude Code.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -14,6 +14,7 @@
14
14
  "test": "node --test dist/__tests__/*.test.js",
15
15
  "matrix:cursor-proxy": "node scripts/cursor-proxy-keychain-matrix.mjs",
16
16
  "evolve": "node dist/bin/evolve.js",
17
+ "evolve:favorite": "node dist/bin/evolve.js --prompt 10_planning/10-3_plan --eval-model gemini-3.1-flash-lite-preview --mutate-model gemini-3.1-pro-preview --generations 10 --population 8 --plateau 3 --reps 3",
17
18
  "prepublishOnly": "node scripts/sync-plugin-version.js"
18
19
  },
19
20
  "dependencies": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "1.60.0",
3
+ "version": "1.60.2",
4
4
  "description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs: overnight parallel coding agents in git worktrees with a self-curating skill memory, multi-wave steering, three-layer review, and crash-safe resume. Mix Opus planner with Kimi 2.6, Cursor composer-2, Gemini, Qwen, or any Anthropic-compatible worker.",
5
5
  "author": {
6
6
  "name": "Francesco Fornace"
@@ -30,23 +30,20 @@ Respond with ONLY a JSON object (no markdown fences):
30
30
  <!-- @@@ -->
31
31
 
32
32
  <!-- STANDARD -->
33
+ <!-- LARGE -->
33
34
 
34
- You are a task coordinator for a parallel agent system with {{budget}} agent sessions available.
35
+ You are a task coordinator for a parallel agent system. Analyze this codebase and break the following objective into independent tasks.
35
36
 
36
37
  Objective: {{objective}}
37
38
 
38
39
  {{contextConstraintNote}}
39
40
 
40
- Do NOT over-specify. Give each agent a MISSION, not step-by-step instructions. Let agents make their own decisions about implementation details.
41
-
42
41
  Requirements:
43
42
  - Target exactly ~{{budget}} tasks
44
- - Each task should be a substantial piece of work
45
43
  - Each task MUST be independent -- no task depends on another
46
- - Tasks that run concurrently must touch DIFFERENT files/areas to avoid merge conflicts
47
- - Give agents scope and autonomy: "Design and implement X" not "In file Y, add function Z"
48
- - Include research/exploration tasks, design tasks, implementation tasks, testing tasks, and polish tasks
49
- - Think in terms of workstreams: architecture, features, tests, docs, UX, performance, etc.{{#if concurrency}}
44
+ - Each task should target specific files/areas to avoid merge conflicts
45
+ - Be specific: mention exact file paths, function names, what to change
46
+ - Keep tasks focused: one concrete change per task{{#if concurrency}}
50
47
  - {{concurrency}} agents run in parallel -- tasks that run concurrently must touch DIFFERENT files to avoid merge conflicts{{/if}}{{#if flexNote}}
51
48
 
52
49
  {{flexNote}}{{/if}}
@@ -54,8 +51,8 @@ Requirements:
54
51
  Respond with ONLY a JSON object (no markdown fences):
55
52
  {
56
53
  "tasks": [
57
- { "prompt": "Design and implement the complete user favorites system: database schema, API routes, client hooks, and error handling. Research existing patterns in the codebase first." },
58
- { "prompt": "Audit all existing API routes for consistency, error handling, and input validation. Fix any issues found." }
54
+ { "prompt": "In src/foo.ts, refactor the bar() function to..." },
55
+ { "prompt": "Add unit tests for the baz module in test/baz.test.ts..." }
59
56
  ]
60
57
  }
61
58