claude-overnight 1.60.1 → 1.60.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/evolve.js +8 -2
- package/dist/core/_version.d.ts +1 -1
- package/dist/core/_version.js +1 -1
- package/dist/prompt-evolution/fixtures/coach-cases.d.ts +2 -0
- package/dist/prompt-evolution/fixtures/coach-cases.js +64 -0
- package/dist/prompt-evolution/fixtures/steer-cases.d.ts +2 -0
- package/dist/prompt-evolution/fixtures/steer-cases.js +85 -0
- package/package.json +1 -1
- package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1
package/dist/bin/evolve.js
CHANGED
|
@@ -17,6 +17,8 @@
|
|
|
17
17
|
*/
|
|
18
18
|
import { evolvePrompt } from "../prompt-evolution/index.js";
|
|
19
19
|
import { PLAN_CASES } from "../prompt-evolution/fixtures/plan-cases.js";
|
|
20
|
+
import { STEER_CASES } from "../prompt-evolution/fixtures/steer-cases.js";
|
|
21
|
+
import { COACH_CASES } from "../prompt-evolution/fixtures/coach-cases.js";
|
|
20
22
|
import { harvestRealCases } from "../prompt-evolution/fixtures/harvest.js";
|
|
21
23
|
import { generateCases } from "../prompt-evolution/fixtures/generate.js";
|
|
22
24
|
import { runDiff, runDownload, runPromote } from "./evolve-subcommands.js";
|
|
@@ -45,8 +47,8 @@ Options:
|
|
|
45
47
|
--judge Use llm-judge for content scoring (costs extra API calls)
|
|
46
48
|
--judge-model <model> Model to use for the judge (default: same as eval-model)
|
|
47
49
|
--judge-top-n <n> Judge only the top-N variants per generation (default: 4)
|
|
48
|
-
--cases <suite> Benchmark suite: plan |
|
|
49
|
-
mcp-supervision | mcp-stuck (default: plan)
|
|
50
|
+
--cases <suite> Benchmark suite: plan | steer | coach | mcp-planning |
|
|
51
|
+
mcp-review | mcp-supervision | mcp-stuck (default: plan)
|
|
50
52
|
--harvest Append cases harvested from <cwd>/.claude-overnight/runs/*
|
|
51
53
|
--harvest-only Use ONLY harvested real objectives (fails if none found)
|
|
52
54
|
--harvest-limit <n> Max harvested cases (default: 10)
|
|
@@ -278,6 +280,10 @@ async function evolveOne(opts) {
|
|
|
278
280
|
else {
|
|
279
281
|
if (opts.cases === "plan")
|
|
280
282
|
cases = opts.harvestOnly ? [] : [...PLAN_CASES];
|
|
283
|
+
else if (opts.cases === "steer")
|
|
284
|
+
cases = opts.harvestOnly ? [] : [...STEER_CASES];
|
|
285
|
+
else if (opts.cases === "coach")
|
|
286
|
+
cases = opts.harvestOnly ? [] : [...COACH_CASES];
|
|
281
287
|
else
|
|
282
288
|
throw new Error(`Unknown case suite: ${opts.cases}`);
|
|
283
289
|
if (opts.harvest) {
|
package/dist/core/_version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const VERSION = "1.60.
|
|
1
|
+
export declare const VERSION = "1.60.1";
|
package/dist/core/_version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
// Auto-generated by build — do not edit manually.
|
|
2
|
-
export const VERSION = "1.60.
|
|
2
|
+
export const VERSION = "1.60.1";
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
export const COACH_CASES = [
|
|
2
|
+
{
|
|
3
|
+
name: "simple-todo",
|
|
4
|
+
hash: "",
|
|
5
|
+
promptPath: "00_setup/00-1_coach",
|
|
6
|
+
vars: {
|
|
7
|
+
objective: "Build a simple todo app using vanilla JS.",
|
|
8
|
+
tree: "index.html\nstyle.css\napp.js",
|
|
9
|
+
readme: "# Todo app",
|
|
10
|
+
providers: "anthropic, openai",
|
|
11
|
+
isInitialCoach: true,
|
|
12
|
+
},
|
|
13
|
+
criteria: {
|
|
14
|
+
independentTasks: false,
|
|
15
|
+
specificTasks: false,
|
|
16
|
+
requiredJsonFields: ["objective", "scope", "recommended", "checklist", "remediation"],
|
|
17
|
+
},
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
name: "vague-objective",
|
|
21
|
+
hash: "",
|
|
22
|
+
promptPath: "00_setup/00-1_coach",
|
|
23
|
+
vars: {
|
|
24
|
+
objective: "Make it better.",
|
|
25
|
+
tree: "src/main.ts",
|
|
26
|
+
readme: "Project",
|
|
27
|
+
providers: "anthropic",
|
|
28
|
+
isInitialCoach: true,
|
|
29
|
+
},
|
|
30
|
+
criteria: {
|
|
31
|
+
independentTasks: false,
|
|
32
|
+
specificTasks: false,
|
|
33
|
+
requiredJsonFields: ["objective", "scope", "recommended", "checklist", "remediation"],
|
|
34
|
+
},
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
name: "massive-refactor",
|
|
38
|
+
hash: "",
|
|
39
|
+
promptPath: "00_setup/00-1_coach",
|
|
40
|
+
vars: {
|
|
41
|
+
objective: "Migrate the entire backend from Express to NestJS.",
|
|
42
|
+
tree: "src/app.ts\nsrc/routes/api.ts\nsrc/models/user.ts",
|
|
43
|
+
readme: "Express backend",
|
|
44
|
+
providers: "anthropic, google",
|
|
45
|
+
isInitialCoach: true,
|
|
46
|
+
},
|
|
47
|
+
criteria: {
|
|
48
|
+
independentTasks: false,
|
|
49
|
+
specificTasks: false,
|
|
50
|
+
requiredJsonFields: ["objective", "scope", "recommended", "checklist", "remediation"],
|
|
51
|
+
},
|
|
52
|
+
},
|
|
53
|
+
];
|
|
54
|
+
function hashCase(c) {
|
|
55
|
+
const key = `${c.promptPath}:${c.variant ?? "default"}:${JSON.stringify(c.vars)}`;
|
|
56
|
+
let h = 0;
|
|
57
|
+
for (let i = 0; i < key.length; i++) {
|
|
58
|
+
h = ((h << 5) - h + key.charCodeAt(i)) | 0;
|
|
59
|
+
}
|
|
60
|
+
return Math.abs(h).toString(36).slice(0, 8);
|
|
61
|
+
}
|
|
62
|
+
for (const c of COACH_CASES) {
|
|
63
|
+
c.hash = hashCase(c);
|
|
64
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
export const STEER_CASES = [
|
|
2
|
+
{
|
|
3
|
+
name: "idle-needs-verify",
|
|
4
|
+
hash: "",
|
|
5
|
+
promptPath: "30_wave/30-1_steer",
|
|
6
|
+
vars: {
|
|
7
|
+
objective: "Fix the pagination bug and ensure it works on mobile.",
|
|
8
|
+
status: "Pagination logic has been rewritten. Tests pass. We have not run the app to check mobile responsiveness yet.",
|
|
9
|
+
recentText: "Wave 2 finished: 1 feature agent refactored the logic.",
|
|
10
|
+
fastModel: "qwen",
|
|
11
|
+
workerModel: "sonnet",
|
|
12
|
+
longArchetypes: true,
|
|
13
|
+
},
|
|
14
|
+
criteria: {
|
|
15
|
+
independentTasks: true,
|
|
16
|
+
specificTasks: false,
|
|
17
|
+
requiredJsonFields: ["done", "reasoning", "statusUpdate", "estimatedSessionsRemaining", "tasks"],
|
|
18
|
+
},
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
name: "infinite-loop-stuck",
|
|
22
|
+
hash: "",
|
|
23
|
+
promptPath: "30_wave/30-1_steer",
|
|
24
|
+
vars: {
|
|
25
|
+
objective: "Migrate the database to PostgreSQL.",
|
|
26
|
+
status: "Agent keeps failing to connect to the database. It has tried 5 times with different credentials.",
|
|
27
|
+
recentText: "Wave 4 finished: Agent failed to run migrations due to 'Connection refused'.",
|
|
28
|
+
fastModel: "qwen",
|
|
29
|
+
workerModel: "sonnet",
|
|
30
|
+
longArchetypes: true,
|
|
31
|
+
},
|
|
32
|
+
criteria: {
|
|
33
|
+
independentTasks: true,
|
|
34
|
+
specificTasks: false,
|
|
35
|
+
requiredJsonFields: ["done", "reasoning", "statusUpdate", "estimatedSessionsRemaining", "tasks"],
|
|
36
|
+
},
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
name: "completed-all-requirements",
|
|
40
|
+
hash: "",
|
|
41
|
+
promptPath: "30_wave/30-1_steer",
|
|
42
|
+
vars: {
|
|
43
|
+
objective: "Add a dark mode toggle to the header.",
|
|
44
|
+
status: "Toggle component created, state management added, CSS updated. Verification agent confirmed the toggle works and persists across reloads.",
|
|
45
|
+
recentText: "Wave 3 finished: Verification agent reported full success.",
|
|
46
|
+
fastModel: "qwen",
|
|
47
|
+
workerModel: "sonnet",
|
|
48
|
+
longArchetypes: true,
|
|
49
|
+
},
|
|
50
|
+
criteria: {
|
|
51
|
+
independentTasks: true,
|
|
52
|
+
specificTasks: false,
|
|
53
|
+
requiredJsonFields: ["done", "reasoning", "statusUpdate", "estimatedSessionsRemaining", "tasks"],
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
name: "mid-feature-split",
|
|
58
|
+
hash: "",
|
|
59
|
+
promptPath: "30_wave/30-1_steer",
|
|
60
|
+
vars: {
|
|
61
|
+
objective: "Build a new analytics dashboard with 3 charts.",
|
|
62
|
+
status: "Database queries are written. We need to build the UI components and wire them up.",
|
|
63
|
+
recentText: "Wave 1 finished: Backend agent successfully added the SQL views.",
|
|
64
|
+
fastModel: "qwen",
|
|
65
|
+
workerModel: "sonnet",
|
|
66
|
+
longArchetypes: true,
|
|
67
|
+
},
|
|
68
|
+
criteria: {
|
|
69
|
+
independentTasks: true,
|
|
70
|
+
specificTasks: true,
|
|
71
|
+
requiredJsonFields: ["done", "reasoning", "statusUpdate", "estimatedSessionsRemaining", "tasks"],
|
|
72
|
+
},
|
|
73
|
+
}
|
|
74
|
+
];
|
|
75
|
+
function hashCase(c) {
|
|
76
|
+
const key = `${c.promptPath}:${c.variant ?? "default"}:${JSON.stringify(c.vars)}`;
|
|
77
|
+
let h = 0;
|
|
78
|
+
for (let i = 0; i < key.length; i++) {
|
|
79
|
+
h = ((h << 5) - h + key.charCodeAt(i)) | 0;
|
|
80
|
+
}
|
|
81
|
+
return Math.abs(h).toString(36).slice(0, 8);
|
|
82
|
+
}
|
|
83
|
+
for (const c of STEER_CASES) {
|
|
84
|
+
c.hash = hashCase(c);
|
|
85
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.60.
|
|
3
|
+
"version": "1.60.2",
|
|
4
4
|
"description": "Overnight parallel coding agents in git worktrees, with a self-curating skill memory that improves while the run is going. Mix Claude Opus as planner, Kimi 2.6 or Cursor composer-2 as cheap fast worker, Gemini or Qwen for bulk implementation. Multi-wave autonomous loop that plans, executes, reviews, and steers itself until the objective is met. Crash-safe resume, rate-limit aware, usage cap preserves headroom for your interactive Claude Code.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.60.
|
|
3
|
+
"version": "1.60.2",
|
|
4
4
|
"description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs: overnight parallel coding agents in git worktrees with a self-curating skill memory, multi-wave steering, three-layer review, and crash-safe resume. Mix Opus planner with Kimi 2.6, Cursor composer-2, Gemini, Qwen, or any Anthropic-compatible worker.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Francesco Fornace"
|