claude-overnight 1.59.0 → 1.60.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/evolve-subcommands.d.ts +3 -0
- package/dist/bin/evolve-subcommands.js +252 -0
- package/dist/bin/evolve.js +24 -37
- package/dist/core/_version.d.ts +1 -1
- package/dist/core/_version.js +1 -1
- package/dist/prompt-evolution/llm-judge.js +1 -2
- package/dist/prompt-evolution/mutator.js +34 -9
- package/dist/prompt-evolution/transport.js +14 -10
- package/dist/prompts/load.d.ts +1 -0
- package/dist/prompts/load.js +1 -1
- package/dist/ui/input.js +30 -3
- package/dist/ui/ui.js +1 -1
- package/package.json +2 -1
- package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1
- package/prompts/10_planning/10-3_plan.md +7 -10
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
export declare function runDiff(runIdA: string | undefined, runIdB: string | undefined): Promise<void>;
|
|
2
|
+
export declare function runDownload(runIdArg?: string, ...rest: string[]): Promise<void>;
|
|
3
|
+
export declare function runPromote(runIdArg?: string, ...rest: string[]): Promise<void>;
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
export async function runDiff(runIdA, runIdB) {
|
|
2
|
+
if (!runIdA || !runIdB) {
|
|
3
|
+
console.error("usage: claude-overnight-evolve diff <runIdA> <runIdB>");
|
|
4
|
+
process.exit(2);
|
|
5
|
+
}
|
|
6
|
+
const { loadRun } = await import("../prompt-evolution/persistence.js");
|
|
7
|
+
const a = loadRun(runIdA);
|
|
8
|
+
const b = loadRun(runIdB);
|
|
9
|
+
const collect = (run) => {
|
|
10
|
+
const out = new Map();
|
|
11
|
+
for (const rec of run.matrix) {
|
|
12
|
+
// Keep the latest-generation row per variantId so diff compares final state.
|
|
13
|
+
const existing = out.get(rec.variantId);
|
|
14
|
+
if (!existing || rec.generation > existing.generation) {
|
|
15
|
+
out.set(rec.variantId, { generation: rec.generation, variantId: rec.variantId, gmean: rec.gmean });
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return out;
|
|
19
|
+
};
|
|
20
|
+
const rowsA = collect(a);
|
|
21
|
+
const rowsB = collect(b);
|
|
22
|
+
const ids = new Set([...rowsA.keys(), ...rowsB.keys()]);
|
|
23
|
+
console.log(`# Diff: ${runIdA} → ${runIdB}`);
|
|
24
|
+
console.log("");
|
|
25
|
+
console.log(`| Variant | A gmean | B gmean | Δ | note |`);
|
|
26
|
+
console.log(`|-----------|-----------|-----------|-------|--------|`);
|
|
27
|
+
const sorted = [...ids].sort();
|
|
28
|
+
for (const id of sorted) {
|
|
29
|
+
const ra = rowsA.get(id);
|
|
30
|
+
const rb = rowsB.get(id);
|
|
31
|
+
const ga = ra ? (ra.gmean * 100).toFixed(1) : "—";
|
|
32
|
+
const gb = rb ? (rb.gmean * 100).toFixed(1) : "—";
|
|
33
|
+
const delta = ra && rb ? ((rb.gmean - ra.gmean) * 100).toFixed(1) : "—";
|
|
34
|
+
const note = !ra ? "new in B" : !rb ? "missing in B" : ra.gmean < rb.gmean ? "↑" : ra.gmean > rb.gmean ? "↓" : "=";
|
|
35
|
+
console.log(`| ${id.padEnd(10)}| ${ga.padStart(9)} | ${gb.padStart(9)} | ${delta.padStart(5)} | ${note} |`);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
export async function runDownload(runIdArg, ...rest) {
|
|
39
|
+
if (!runIdArg) {
|
|
40
|
+
console.error("usage: claude-overnight-evolve download <runId> --base-url <url> [--token <token>] [--project <id>]");
|
|
41
|
+
process.exit(2);
|
|
42
|
+
}
|
|
43
|
+
const runId = runIdArg;
|
|
44
|
+
let baseUrl;
|
|
45
|
+
let token;
|
|
46
|
+
let projectId;
|
|
47
|
+
let watch = false;
|
|
48
|
+
for (let i = 0; i < rest.length; i++) {
|
|
49
|
+
if (rest[i] === "--base-url" && rest[i + 1]) {
|
|
50
|
+
baseUrl = rest[i + 1];
|
|
51
|
+
i++;
|
|
52
|
+
}
|
|
53
|
+
else if (rest[i] === "--token" && rest[i + 1]) {
|
|
54
|
+
token = rest[i + 1];
|
|
55
|
+
i++;
|
|
56
|
+
}
|
|
57
|
+
else if (rest[i] === "--project" && rest[i + 1]) {
|
|
58
|
+
projectId = rest[i + 1];
|
|
59
|
+
i++;
|
|
60
|
+
}
|
|
61
|
+
else if (rest[i] === "--watch") {
|
|
62
|
+
watch = true;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
if (!baseUrl) {
|
|
66
|
+
console.error("--base-url is required (e.g. https://fornace.net or http://localhost:8787)");
|
|
67
|
+
process.exit(2);
|
|
68
|
+
}
|
|
69
|
+
const authHeaders = {};
|
|
70
|
+
if (token)
|
|
71
|
+
authHeaders.Authorization = `Bearer ${token}`;
|
|
72
|
+
const prefix = projectId
|
|
73
|
+
? `${baseUrl.replace(/\/$/, "")}/api/projects/${projectId}/prompt-evolution/${runId}`
|
|
74
|
+
: `${baseUrl.replace(/\/$/, "")}/runs/${runId}`;
|
|
75
|
+
let remoteMeta = null;
|
|
76
|
+
let metaBody = null;
|
|
77
|
+
while (true) {
|
|
78
|
+
const metaRes = await fetch(prefix, { headers: authHeaders });
|
|
79
|
+
if (!metaRes.ok) {
|
|
80
|
+
console.error(`Failed to fetch run metadata: HTTP ${metaRes.status}`);
|
|
81
|
+
process.exit(1);
|
|
82
|
+
}
|
|
83
|
+
metaBody = (await metaRes.json());
|
|
84
|
+
remoteMeta = typeof metaBody.meta === "object" && metaBody.meta
|
|
85
|
+
? metaBody.meta
|
|
86
|
+
: metaBody;
|
|
87
|
+
const status = remoteMeta.status;
|
|
88
|
+
if (watch && (status === "running" || status === "queued" || status === "pending" || !status)) {
|
|
89
|
+
process.stdout.write(`\r[${new Date().toLocaleTimeString()}] Run ${runId} is ${status || "running"}... waiting... `);
|
|
90
|
+
await new Promise(r => setTimeout(r, 10000));
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
if (watch)
|
|
94
|
+
console.log(`\nRun finished with status: ${status}`);
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
const { runDir } = await import("../prompt-evolution/persistence.js");
|
|
99
|
+
const { mkdirSync, writeFileSync } = await import("node:fs");
|
|
100
|
+
const { dirname, join } = await import("node:path");
|
|
101
|
+
const localDir = runDir(runId);
|
|
102
|
+
mkdirSync(localDir, { recursive: true });
|
|
103
|
+
mkdirSync(join(localDir, "prompts"), { recursive: true });
|
|
104
|
+
const meta = {
|
|
105
|
+
runId,
|
|
106
|
+
promptPath: (remoteMeta.promptPath ?? remoteMeta.prompt ?? ""),
|
|
107
|
+
target: (remoteMeta.target ?? "claude-overnight"),
|
|
108
|
+
evalModel: (remoteMeta.evalModel ?? ""),
|
|
109
|
+
mutateModel: (remoteMeta.mutateModel ?? remoteMeta.evalModel ?? ""),
|
|
110
|
+
generations: (remoteMeta.generations ?? 10),
|
|
111
|
+
populationCap: (remoteMeta.populationCap ?? remoteMeta.population ?? 8),
|
|
112
|
+
startedAt: (remoteMeta.startedAt ?? remoteMeta.queuedAt ?? new Date().toISOString()),
|
|
113
|
+
status: (remoteMeta.status ?? "done"),
|
|
114
|
+
caseNames: [],
|
|
115
|
+
};
|
|
116
|
+
writeFileSync(join(localDir, "meta.json"), JSON.stringify(meta, null, 2) + "\n");
|
|
117
|
+
const inlineReport = typeof metaBody.report === "string" ? metaBody.report : metaBody.report_md;
|
|
118
|
+
if (typeof inlineReport === "string") {
|
|
119
|
+
writeFileSync(join(localDir, "report.md"), inlineReport);
|
|
120
|
+
console.log(" ✓ report.md (inline)");
|
|
121
|
+
}
|
|
122
|
+
const listRes = await fetch(`${prefix}/files`, { headers: authHeaders });
|
|
123
|
+
let files = [];
|
|
124
|
+
if (listRes.ok) {
|
|
125
|
+
const listBody = (await listRes.json());
|
|
126
|
+
files = listBody.files ?? [];
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
console.log(` ⚠ File listing not available (HTTP ${listRes.status}); trying known files...`);
|
|
130
|
+
files = ["report.md", "best.md", "matrix.jsonl", "learning.jsonl"];
|
|
131
|
+
}
|
|
132
|
+
for (const file of files) {
|
|
133
|
+
const fileRes = await fetch(`${prefix}/files/${encodeURIComponent(file)}`, { headers: authHeaders });
|
|
134
|
+
if (!fileRes.ok) {
|
|
135
|
+
console.error(` ⚠ ${file}: HTTP ${fileRes.status}`);
|
|
136
|
+
continue;
|
|
137
|
+
}
|
|
138
|
+
const data = Buffer.from(await fileRes.arrayBuffer());
|
|
139
|
+
const localPath = join(localDir, file);
|
|
140
|
+
mkdirSync(dirname(localPath), { recursive: true });
|
|
141
|
+
writeFileSync(localPath, data);
|
|
142
|
+
console.log(` ✓ ${file}`);
|
|
143
|
+
}
|
|
144
|
+
const matrixPath = join(localDir, "matrix.jsonl");
|
|
145
|
+
const { existsSync, readFileSync } = await import("node:fs");
|
|
146
|
+
if (existsSync(matrixPath)) {
|
|
147
|
+
const variantIds = new Set();
|
|
148
|
+
for (const line of readFileSync(matrixPath, "utf-8").trim().split("\n")) {
|
|
149
|
+
if (!line)
|
|
150
|
+
continue;
|
|
151
|
+
try {
|
|
152
|
+
const row = JSON.parse(line);
|
|
153
|
+
if (row.variantId)
|
|
154
|
+
variantIds.add(row.variantId);
|
|
155
|
+
}
|
|
156
|
+
catch { /* ignore */ }
|
|
157
|
+
}
|
|
158
|
+
for (const vid of variantIds) {
|
|
159
|
+
const safeId = vid.replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
160
|
+
const promptFile = `prompts/${safeId}.md`;
|
|
161
|
+
if (existsSync(join(localDir, promptFile)))
|
|
162
|
+
continue;
|
|
163
|
+
const fileRes = await fetch(`${prefix}/files/${encodeURIComponent(promptFile)}`, { headers: authHeaders });
|
|
164
|
+
if (!fileRes.ok)
|
|
165
|
+
continue;
|
|
166
|
+
const data = Buffer.from(await fileRes.arrayBuffer());
|
|
167
|
+
mkdirSync(dirname(join(localDir, promptFile)), { recursive: true });
|
|
168
|
+
writeFileSync(join(localDir, promptFile), data);
|
|
169
|
+
console.log(` ✓ ${promptFile} (from matrix)`);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
console.log(`\nDownloaded to ${localDir}`);
|
|
173
|
+
}
|
|
174
|
+
export async function runPromote(runIdArg, ...rest) {
|
|
175
|
+
if (!runIdArg) {
|
|
176
|
+
console.error("usage: claude-overnight-evolve promote <runId> [--variant <id>] [--into <block>]");
|
|
177
|
+
process.exit(2);
|
|
178
|
+
}
|
|
179
|
+
const runId = runIdArg;
|
|
180
|
+
let variantId;
|
|
181
|
+
let intoBlock;
|
|
182
|
+
for (let i = 0; i < rest.length; i++) {
|
|
183
|
+
if (rest[i] === "--variant" && rest[i + 1]) {
|
|
184
|
+
variantId = rest[i + 1];
|
|
185
|
+
i++;
|
|
186
|
+
}
|
|
187
|
+
else if (rest[i] === "--into" && rest[i + 1]) {
|
|
188
|
+
intoBlock = rest[i + 1];
|
|
189
|
+
i++;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
const { loadRun, runDir } = await import("../prompt-evolution/persistence.js");
|
|
193
|
+
const { PROMPTS_ROOT } = await import("../prompts/load.js");
|
|
194
|
+
const { readFileSync, writeFileSync, existsSync } = await import("node:fs");
|
|
195
|
+
const { join } = await import("node:path");
|
|
196
|
+
const run = loadRun(runId);
|
|
197
|
+
const promptPath = run.meta.promptPath;
|
|
198
|
+
let sourceVariant = variantId;
|
|
199
|
+
if (!sourceVariant) {
|
|
200
|
+
const bestMatch = run.bestMd.match(/variantId\s*\|\s*`([^`]+)`/);
|
|
201
|
+
sourceVariant = bestMatch ? bestMatch[1] : undefined;
|
|
202
|
+
if (!sourceVariant) {
|
|
203
|
+
const rows = run.matrix;
|
|
204
|
+
if (rows.length)
|
|
205
|
+
sourceVariant = [...rows].sort((a, b) => b.gmean - a.gmean)[0].variantId;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
if (!sourceVariant) {
|
|
209
|
+
console.error("Could not determine best variant for run. Use --variant <id>.");
|
|
210
|
+
process.exit(2);
|
|
211
|
+
}
|
|
212
|
+
const safeId = sourceVariant.replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
213
|
+
const variantFile = join(runDir(runId), "prompts", `${safeId}.md`);
|
|
214
|
+
if (!existsSync(variantFile)) {
|
|
215
|
+
console.error(`Variant file not found: ${variantFile}`);
|
|
216
|
+
process.exit(2);
|
|
217
|
+
}
|
|
218
|
+
const variantText = readFileSync(variantFile, "utf-8").replace(/^<!--\s*generation=[\s\S]*?-->\n\n?/, "");
|
|
219
|
+
const namedVariants = ["tight", "standard", "large", "wrap", "amend", "wave", "run", "file", "all", "postfailed", "nofiles"];
|
|
220
|
+
const targetBlock = intoBlock ?? (namedVariants.includes(sourceVariant.toLowerCase()) ? sourceVariant : undefined);
|
|
221
|
+
if (!targetBlock) {
|
|
222
|
+
console.error(`Variant "${sourceVariant}" is not a named seed variant. Use --into <block> to specify which marker block to overwrite.`);
|
|
223
|
+
process.exit(2);
|
|
224
|
+
}
|
|
225
|
+
const promptFile = join(PROMPTS_ROOT, promptPath + ".md");
|
|
226
|
+
if (!existsSync(promptFile)) {
|
|
227
|
+
console.error(`Prompt file not found: ${promptFile}`);
|
|
228
|
+
process.exit(2);
|
|
229
|
+
}
|
|
230
|
+
const newText = replaceVariantBlock(readFileSync(promptFile, "utf-8"), targetBlock, variantText);
|
|
231
|
+
writeFileSync(promptFile, newText);
|
|
232
|
+
console.log(`Promoted ${sourceVariant} → ${promptPath} (<!-- ${targetBlock.toUpperCase()} -->)`);
|
|
233
|
+
console.log(` file: ${promptFile}`);
|
|
234
|
+
}
|
|
235
|
+
function replaceVariantBlock(fileText, blockName, newText) {
|
|
236
|
+
const separator = "\n<!-- @@@ -->\n";
|
|
237
|
+
const sections = fileText.split(separator);
|
|
238
|
+
const markerRegex = new RegExp(`<!--\\s*(?:[─\\-]+\\s*)?${blockName.toUpperCase()}\\s*-->`, "i");
|
|
239
|
+
let found = false;
|
|
240
|
+
const newSections = sections.map((section) => {
|
|
241
|
+
const lines = section.split("\n");
|
|
242
|
+
const markerIndex = lines.findIndex((line) => markerRegex.test(line));
|
|
243
|
+
if (markerIndex === -1)
|
|
244
|
+
return section;
|
|
245
|
+
found = true;
|
|
246
|
+
const before = lines.slice(0, markerIndex + 1);
|
|
247
|
+
return [...before, "", newText.trim(), ""].join("\n").trimEnd() + "\n";
|
|
248
|
+
});
|
|
249
|
+
if (!found)
|
|
250
|
+
throw new Error(`Variant block "${blockName.toUpperCase()}" not found in prompt file`);
|
|
251
|
+
return newSections.join(separator);
|
|
252
|
+
}
|
package/dist/bin/evolve.js
CHANGED
|
@@ -19,6 +19,7 @@ import { evolvePrompt } from "../prompt-evolution/index.js";
|
|
|
19
19
|
import { PLAN_CASES } from "../prompt-evolution/fixtures/plan-cases.js";
|
|
20
20
|
import { harvestRealCases } from "../prompt-evolution/fixtures/harvest.js";
|
|
21
21
|
import { generateCases } from "../prompt-evolution/fixtures/generate.js";
|
|
22
|
+
import { runDiff, runDownload, runPromote } from "./evolve-subcommands.js";
|
|
22
23
|
import { scenariosToCases, PLANNING_SCENARIOS, REVIEW_SCENARIOS, SUPERVISION_SCENARIOS, STUCK_SCENARIOS, hydrateCases, extractPrompt, } from "../prompt-evolution/adapters/mcp-browser.js";
|
|
23
24
|
function help() {
|
|
24
25
|
process.stdout.write(`Usage: claude-overnight-evolve [options]
|
|
@@ -57,6 +58,19 @@ Options:
|
|
|
57
58
|
--gen-model <model> Model used by the case generator (default: eval-model)
|
|
58
59
|
|
|
59
60
|
Subcommands:
|
|
61
|
+
claude-overnight-evolve download <runId> --base-url <url> [--token <token>]
|
|
62
|
+
[--project <id>] [--watch]
|
|
63
|
+
Pull a remote run (fornace or self-host) into the local
|
|
64
|
+
~/.claude-overnight/prompt-evolution/<runId>/ directory
|
|
65
|
+
so you can audit, diff, or promote it offline. Use
|
|
66
|
+
--project for fornace; omit for self-host. If --watch
|
|
67
|
+
is set, it will poll until the run finishes before downloading.
|
|
68
|
+
claude-overnight-evolve promote <runId> [--variant <id>] [--into <block>]
|
|
69
|
+
Write a run's winning variant back into the source
|
|
70
|
+
prompt file's <!-- BLOCK --> marker. If --variant is
|
|
71
|
+
omitted, uses the run's best variant. If the variant is
|
|
72
|
+
a seed (tight/standard/large) --into defaults to its
|
|
73
|
+
name; evo-* or default variants require --into.
|
|
60
74
|
claude-overnight-evolve diff <runIdA> <runIdB>
|
|
61
75
|
Print a per-variant diff of two persisted runs
|
|
62
76
|
--base-url <url> API base URL override
|
|
@@ -208,6 +222,16 @@ function parseArgs() {
|
|
|
208
222
|
return opts;
|
|
209
223
|
}
|
|
210
224
|
async function main() {
|
|
225
|
+
// Subcommand: download a remote run for local audit/promote.
|
|
226
|
+
if (process.argv[2] === "download") {
|
|
227
|
+
await runDownload(process.argv[3], ...process.argv.slice(4));
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
230
|
+
// Subcommand: promote a run variant back into the source prompt file.
|
|
231
|
+
if (process.argv[2] === "promote") {
|
|
232
|
+
await runPromote(process.argv[3], ...process.argv.slice(4));
|
|
233
|
+
return;
|
|
234
|
+
}
|
|
211
235
|
// Subcommand: diff two persisted runs.
|
|
212
236
|
if (process.argv[2] === "diff") {
|
|
213
237
|
await runDiff(process.argv[3], process.argv[4]);
|
|
@@ -357,43 +381,6 @@ async function evolveOne(opts) {
|
|
|
357
381
|
console.log(result.bestVariant.text);
|
|
358
382
|
return result;
|
|
359
383
|
}
|
|
360
|
-
async function runDiff(runIdA, runIdB) {
|
|
361
|
-
if (!runIdA || !runIdB) {
|
|
362
|
-
console.error("usage: claude-overnight-evolve diff <runIdA> <runIdB>");
|
|
363
|
-
process.exit(2);
|
|
364
|
-
}
|
|
365
|
-
const { loadRun } = await import("../prompt-evolution/persistence.js");
|
|
366
|
-
const a = loadRun(runIdA);
|
|
367
|
-
const b = loadRun(runIdB);
|
|
368
|
-
const collect = (run) => {
|
|
369
|
-
const out = new Map();
|
|
370
|
-
for (const rec of run.matrix) {
|
|
371
|
-
// Keep the latest-generation row per variantId so diff compares final state.
|
|
372
|
-
const existing = out.get(rec.variantId);
|
|
373
|
-
if (!existing || rec.generation > existing.generation) {
|
|
374
|
-
out.set(rec.variantId, { generation: rec.generation, variantId: rec.variantId, gmean: rec.gmean });
|
|
375
|
-
}
|
|
376
|
-
}
|
|
377
|
-
return out;
|
|
378
|
-
};
|
|
379
|
-
const rowsA = collect(a);
|
|
380
|
-
const rowsB = collect(b);
|
|
381
|
-
const ids = new Set([...rowsA.keys(), ...rowsB.keys()]);
|
|
382
|
-
console.log(`# Diff: ${runIdA} → ${runIdB}`);
|
|
383
|
-
console.log("");
|
|
384
|
-
console.log(`| Variant | A gmean | B gmean | Δ | note |`);
|
|
385
|
-
console.log(`|-----------|-----------|-----------|-------|--------|`);
|
|
386
|
-
const sorted = [...ids].sort();
|
|
387
|
-
for (const id of sorted) {
|
|
388
|
-
const ra = rowsA.get(id);
|
|
389
|
-
const rb = rowsB.get(id);
|
|
390
|
-
const ga = ra ? (ra.gmean * 100).toFixed(1) : "—";
|
|
391
|
-
const gb = rb ? (rb.gmean * 100).toFixed(1) : "—";
|
|
392
|
-
const delta = ra && rb ? ((rb.gmean - ra.gmean) * 100).toFixed(1) : "—";
|
|
393
|
-
const note = !ra ? "new in B" : !rb ? "missing in B" : ra.gmean < rb.gmean ? "↑" : ra.gmean > rb.gmean ? "↓" : "=";
|
|
394
|
-
console.log(`| ${id.padEnd(10)}| ${ga.padStart(9)} | ${gb.padStart(9)} | ${delta.padStart(5)} | ${note} |`);
|
|
395
|
-
}
|
|
396
|
-
}
|
|
397
384
|
main().catch((err) => {
|
|
398
385
|
console.error(err);
|
|
399
386
|
process.exit(1);
|
package/dist/core/_version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const VERSION = "1.
|
|
1
|
+
export declare const VERSION = "1.60.0";
|
package/dist/core/_version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
// Auto-generated by build — do not edit manually.
|
|
2
|
-
export const VERSION = "1.
|
|
2
|
+
export const VERSION = "1.60.0";
|
|
@@ -43,11 +43,10 @@ export async function judgeOutput(rawOutput, c, opts) {
|
|
|
43
43
|
const headers = {
|
|
44
44
|
"Content-Type": "application/json",
|
|
45
45
|
"Authorization": `Bearer ${authToken}`,
|
|
46
|
+
"User-Agent": "Claude-Code/0.1.0",
|
|
46
47
|
};
|
|
47
48
|
if (isAnthropic)
|
|
48
49
|
headers["anthropic-version"] = "2023-06-01";
|
|
49
|
-
if (isKimi)
|
|
50
|
-
headers["User-Agent"] = "Kilo-Code/1.0";
|
|
51
50
|
const res = await fetch(endpoint, {
|
|
52
51
|
method: "POST",
|
|
53
52
|
headers,
|
|
@@ -15,19 +15,36 @@ export async function mutate(request, opts) {
|
|
|
15
15
|
const baseUrl = (opts.baseUrl ?? process.env.ANTHROPIC_BASE_URL ?? "https://api.anthropic.com").replace(/\/$/, "");
|
|
16
16
|
const authToken = opts.authToken ?? process.env.ANTHROPIC_AUTH_TOKEN ?? process.env.ANTHROPIC_API_KEY ?? "";
|
|
17
17
|
const isKimi = /kimi\.com/i.test(baseUrl);
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
18
|
+
let body;
|
|
19
|
+
if (baseUrl.includes("generativelanguage")) {
|
|
20
|
+
body = JSON.stringify({
|
|
21
|
+
model: opts.model,
|
|
22
|
+
max_completion_tokens: opts.maxTokens ?? 4096,
|
|
23
|
+
messages: [{ role: "user", content: prompt }],
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
body = JSON.stringify({
|
|
28
|
+
model: opts.model,
|
|
29
|
+
max_tokens: opts.maxTokens ?? 4096,
|
|
30
|
+
messages: [{ role: "user", content: prompt }],
|
|
31
|
+
});
|
|
32
|
+
}
|
|
23
33
|
const headers = {
|
|
24
34
|
"Content-Type": "application/json",
|
|
25
35
|
"Authorization": `Bearer ${authToken}`,
|
|
26
36
|
"anthropic-version": "2023-06-01",
|
|
37
|
+
"User-Agent": "Claude-Code/0.1.0",
|
|
27
38
|
};
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
39
|
+
let endpoint = `${baseUrl}/v1/messages`;
|
|
40
|
+
if (baseUrl.includes("generativelanguage")) {
|
|
41
|
+
endpoint = `${baseUrl}/v1/chat/completions`;
|
|
42
|
+
}
|
|
43
|
+
else if (!/^https?:\/\/(api\.)?anthropic\.com/i.test(baseUrl) && !baseUrl.includes("/v1/messages")) {
|
|
44
|
+
// A lot of OpenAI compatible endpoints use `/v1/chat/completions` natively
|
|
45
|
+
endpoint = `${baseUrl}/v1/chat/completions`;
|
|
46
|
+
}
|
|
47
|
+
const res = await fetch(endpoint, {
|
|
31
48
|
method: "POST",
|
|
32
49
|
headers,
|
|
33
50
|
body,
|
|
@@ -38,7 +55,15 @@ export async function mutate(request, opts) {
|
|
|
38
55
|
throw new Error(`Mutator HTTP ${res.status}: ${text.slice(0, 200)}`);
|
|
39
56
|
}
|
|
40
57
|
const data = await res.json();
|
|
41
|
-
|
|
58
|
+
let raw = "";
|
|
59
|
+
if (endpoint.includes("chat/completions")) {
|
|
60
|
+
const chatData = data;
|
|
61
|
+
raw = chatData.choices?.[0]?.message?.content ?? "";
|
|
62
|
+
}
|
|
63
|
+
else {
|
|
64
|
+
const msgData = data;
|
|
65
|
+
raw = msgData.content?.map((c) => c.text ?? "").join("") ?? "";
|
|
66
|
+
}
|
|
42
67
|
return parseMutantOutput(raw, request);
|
|
43
68
|
}
|
|
44
69
|
function buildMutatorPrompt(req) {
|
|
@@ -8,8 +8,7 @@
|
|
|
8
8
|
* Supports both Anthropic-native and OpenAI-compatible endpoints so we can
|
|
9
9
|
* run the same eval against Haiku, Kimi, and OpenRouter without a rewrite.
|
|
10
10
|
*/
|
|
11
|
-
|
|
12
|
-
const USER_AGENT = `claude-overnight-evolve/${VERSION}`;
|
|
11
|
+
const USER_AGENT = `Claude-Code/0.1.0`;
|
|
13
12
|
export async function defaultCallModel(userText, systemText, opts) {
|
|
14
13
|
const baseUrl = (opts.baseUrl ?? process.env.ANTHROPIC_BASE_URL ?? "https://api.anthropic.com").replace(/\/$/, "");
|
|
15
14
|
const authToken = opts.authToken ?? process.env.ANTHROPIC_AUTH_TOKEN ?? process.env.ANTHROPIC_API_KEY ?? "";
|
|
@@ -44,16 +43,21 @@ export async function defaultCallModel(userText, systemText, opts) {
|
|
|
44
43
|
if (systemText)
|
|
45
44
|
messages.push({ role: "system", content: systemText });
|
|
46
45
|
messages.push({ role: "user", content: userText });
|
|
47
|
-
|
|
48
|
-
// max_completion_tokens. Kimi's coding endpoint still accepts max_tokens.
|
|
49
|
-
// Sending both is safe — OpenAI, Moonshot, DeepSeek, and Kimi all tolerate
|
|
50
|
-
// the extra field, and we're future-proof against the deprecation.
|
|
51
|
-
body = JSON.stringify({
|
|
46
|
+
const payload = {
|
|
52
47
|
model: opts.model,
|
|
53
|
-
max_tokens: maxOut,
|
|
54
|
-
max_completion_tokens: maxOut,
|
|
55
48
|
messages,
|
|
56
|
-
}
|
|
49
|
+
};
|
|
50
|
+
// Platform.moonshot.ai marks max_tokens deprecated in favor of max_completion_tokens.
|
|
51
|
+
// Kimi's coding endpoint accepts max_tokens.
|
|
52
|
+
// Gemini's OpenAI wrapper strictly rejects having BOTH set.
|
|
53
|
+
if (baseUrl.includes("generativelanguage")) {
|
|
54
|
+
payload.max_completion_tokens = maxOut;
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
payload.max_tokens = maxOut;
|
|
58
|
+
payload.max_completion_tokens = maxOut;
|
|
59
|
+
}
|
|
60
|
+
body = JSON.stringify(payload);
|
|
57
61
|
}
|
|
58
62
|
const res = await fetch(endpoint, {
|
|
59
63
|
method: "POST",
|
package/dist/prompts/load.d.ts
CHANGED
package/dist/prompts/load.js
CHANGED
|
@@ -2,7 +2,7 @@ import { readFileSync } from "node:fs";
|
|
|
2
2
|
import { dirname, join } from "node:path";
|
|
3
3
|
import { fileURLToPath } from "node:url";
|
|
4
4
|
// Resolve <pkg>/prompts whether running from dist/ (installed) or src/ (dev).
|
|
5
|
-
const PROMPTS_ROOT = (() => {
|
|
5
|
+
export const PROMPTS_ROOT = (() => {
|
|
6
6
|
const here = dirname(fileURLToPath(import.meta.url));
|
|
7
7
|
for (const depth of [2, 3, 4]) {
|
|
8
8
|
const candidate = join(here, ...Array(depth).fill(".."), "prompts");
|
package/dist/ui/input.js
CHANGED
|
@@ -169,19 +169,35 @@ export function InputLayer({ store, callbacks, onToast }) {
|
|
|
169
169
|
const lc = s.liveConfig;
|
|
170
170
|
if (key.rightArrow || key.downArrow) {
|
|
171
171
|
callbacks.cycleAgent(1);
|
|
172
|
+
const nextId = store.get().selectedAgentId;
|
|
173
|
+
if (nextId != null && s.viewMode.startsWith("stream:agent-")) {
|
|
174
|
+
store.patch({ viewMode: `stream:agent-${nextId}` });
|
|
175
|
+
}
|
|
172
176
|
return;
|
|
173
177
|
}
|
|
174
178
|
if (key.upArrow) {
|
|
175
179
|
callbacks.cycleAgent(-1);
|
|
180
|
+
const nextId = store.get().selectedAgentId;
|
|
181
|
+
if (nextId != null && s.viewMode.startsWith("stream:agent-")) {
|
|
182
|
+
store.patch({ viewMode: `stream:agent-${nextId}` });
|
|
183
|
+
}
|
|
176
184
|
return;
|
|
177
185
|
}
|
|
178
186
|
if (key.leftArrow) {
|
|
179
187
|
callbacks.clearSelectedAgent();
|
|
188
|
+
if (s.viewMode.startsWith("stream:agent-"))
|
|
189
|
+
store.patch({ viewMode: "events" });
|
|
180
190
|
return;
|
|
181
191
|
}
|
|
182
192
|
if (key.escape) {
|
|
183
193
|
if (s.selectedAgentId != null) {
|
|
184
194
|
callbacks.clearSelectedAgent();
|
|
195
|
+
if (s.viewMode.startsWith("stream:agent-"))
|
|
196
|
+
store.patch({ viewMode: "events" });
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
if (s.viewMode !== "events") {
|
|
200
|
+
store.patch({ viewMode: "events" });
|
|
185
201
|
return;
|
|
186
202
|
}
|
|
187
203
|
if (s.ask && !s.ask.streaming) {
|
|
@@ -202,10 +218,18 @@ export function InputLayer({ store, callbacks, onToast }) {
|
|
|
202
218
|
callbacks.openAskTempFile();
|
|
203
219
|
return;
|
|
204
220
|
}
|
|
221
|
+
if (key.tab) {
|
|
222
|
+
const modes = ["stream:planner", "stream:steerer", "stream:verifier"];
|
|
223
|
+
const current = s.viewMode;
|
|
224
|
+
const idx = modes.indexOf(current);
|
|
225
|
+
const next = modes[(idx + 1) % modes.length];
|
|
226
|
+
store.patch({ viewMode: next });
|
|
227
|
+
return;
|
|
228
|
+
}
|
|
205
229
|
if (!raw || raw.length !== 1)
|
|
206
230
|
return;
|
|
207
231
|
const code = raw.charCodeAt(0);
|
|
208
|
-
if (code < 0x20 || code > 0x7E)
|
|
232
|
+
if (code !== 9 && (code < 0x20 || code > 0x7E))
|
|
209
233
|
return;
|
|
210
234
|
if (key.ctrl || key.meta)
|
|
211
235
|
return;
|
|
@@ -273,8 +297,11 @@ export function InputLayer({ store, callbacks, onToast }) {
|
|
|
273
297
|
if (/^[0-9]$/.test(raw) && swarm) {
|
|
274
298
|
const n = parseInt(raw, 10);
|
|
275
299
|
const running = swarm.agents.filter(a => a.status === "running");
|
|
276
|
-
if (n < running.length)
|
|
277
|
-
|
|
300
|
+
if (n < running.length) {
|
|
301
|
+
const id = running[n].id;
|
|
302
|
+
callbacks.selectAgent(id);
|
|
303
|
+
store.patch({ viewMode: `stream:agent-${id}` });
|
|
304
|
+
}
|
|
278
305
|
}
|
|
279
306
|
}, { isActive: !textEntry });
|
|
280
307
|
if (state.input.mode === "none")
|
package/dist/ui/ui.js
CHANGED
|
@@ -8,7 +8,7 @@ import { UiStore, makeInitialState } from "./store.js";
|
|
|
8
8
|
import { App } from "./shell.js";
|
|
9
9
|
const MAX_STEERING_EVENTS = 60;
|
|
10
10
|
const MAX_ASK_LINES = 40;
|
|
11
|
-
const MAX_DEBRIEF_HISTORY =
|
|
11
|
+
const MAX_DEBRIEF_HISTORY = 50;
|
|
12
12
|
function askDisplayCap() {
|
|
13
13
|
return Math.max(3, Math.min(MAX_ASK_LINES, (process.stdout.rows || 40) - 20));
|
|
14
14
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.60.1",
|
|
4
4
|
"description": "Overnight parallel coding agents in git worktrees, with a self-curating skill memory that improves while the run is going. Mix Claude Opus as planner, Kimi 2.6 or Cursor composer-2 as cheap fast worker, Gemini or Qwen for bulk implementation. Multi-wave autonomous loop that plans, executes, reviews, and steers itself until the objective is met. Crash-safe resume, rate-limit aware, usage cap preserves headroom for your interactive Claude Code.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
"test": "node --test dist/__tests__/*.test.js",
|
|
15
15
|
"matrix:cursor-proxy": "node scripts/cursor-proxy-keychain-matrix.mjs",
|
|
16
16
|
"evolve": "node dist/bin/evolve.js",
|
|
17
|
+
"evolve:favorite": "node dist/bin/evolve.js --prompt 10_planning/10-3_plan --eval-model gemini-3.1-flash-lite-preview --mutate-model gemini-3.1-pro-preview --generations 10 --population 8 --plateau 3 --reps 3",
|
|
17
18
|
"prepublishOnly": "node scripts/sync-plugin-version.js"
|
|
18
19
|
},
|
|
19
20
|
"dependencies": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.60.1",
|
|
4
4
|
"description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs: overnight parallel coding agents in git worktrees with a self-curating skill memory, multi-wave steering, three-layer review, and crash-safe resume. Mix Opus planner with Kimi 2.6, Cursor composer-2, Gemini, Qwen, or any Anthropic-compatible worker.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Francesco Fornace"
|
|
@@ -30,23 +30,20 @@ Respond with ONLY a JSON object (no markdown fences):
|
|
|
30
30
|
<!-- @@@ -->
|
|
31
31
|
|
|
32
32
|
<!-- STANDARD -->
|
|
33
|
+
<!-- LARGE -->
|
|
33
34
|
|
|
34
|
-
You are a task coordinator for a parallel agent system
|
|
35
|
+
You are a task coordinator for a parallel agent system. Analyze this codebase and break the following objective into independent tasks.
|
|
35
36
|
|
|
36
37
|
Objective: {{objective}}
|
|
37
38
|
|
|
38
39
|
{{contextConstraintNote}}
|
|
39
40
|
|
|
40
|
-
Do NOT over-specify. Give each agent a MISSION, not step-by-step instructions. Let agents make their own decisions about implementation details.
|
|
41
|
-
|
|
42
41
|
Requirements:
|
|
43
42
|
- Target exactly ~{{budget}} tasks
|
|
44
|
-
- Each task should be a substantial piece of work
|
|
45
43
|
- Each task MUST be independent -- no task depends on another
|
|
46
|
-
-
|
|
47
|
-
-
|
|
48
|
-
-
|
|
49
|
-
- Think in terms of workstreams: architecture, features, tests, docs, UX, performance, etc.{{#if concurrency}}
|
|
44
|
+
- Each task should target specific files/areas to avoid merge conflicts
|
|
45
|
+
- Be specific: mention exact file paths, function names, what to change
|
|
46
|
+
- Keep tasks focused: one concrete change per task{{#if concurrency}}
|
|
50
47
|
- {{concurrency}} agents run in parallel -- tasks that run concurrently must touch DIFFERENT files to avoid merge conflicts{{/if}}{{#if flexNote}}
|
|
51
48
|
|
|
52
49
|
{{flexNote}}{{/if}}
|
|
@@ -54,8 +51,8 @@ Requirements:
|
|
|
54
51
|
Respond with ONLY a JSON object (no markdown fences):
|
|
55
52
|
{
|
|
56
53
|
"tasks": [
|
|
57
|
-
{ "prompt": "
|
|
58
|
-
{ "prompt": "
|
|
54
|
+
{ "prompt": "In src/foo.ts, refactor the bar() function to..." },
|
|
55
|
+
{ "prompt": "Add unit tests for the baz module in test/baz.test.ts..." }
|
|
59
56
|
]
|
|
60
57
|
}
|
|
61
58
|
|