claude-overnight 1.57.2 → 1.57.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/evolve.js
CHANGED
|
@@ -294,8 +294,15 @@ async function evolveOne(opts) {
|
|
|
294
294
|
}
|
|
295
295
|
catch (err) {
|
|
296
296
|
const msg = err.message ?? String(err);
|
|
297
|
-
|
|
298
|
-
|
|
297
|
+
// When the user explicitly asked for --case-pool, fall-back to the
|
|
298
|
+
// 10-case default silently would be dishonest — the statistical
|
|
299
|
+
// power they expected won't be there. Fail loud so the error lands
|
|
300
|
+
// in the fornace `error` field (only surfaced on non-zero exit).
|
|
301
|
+
throw new Error(`Case generation failed: ${msg.slice(0, 1500)}\n\n` +
|
|
302
|
+
`You asked for --case-pool ${opts.casePool}, but the generator ` +
|
|
303
|
+
`couldn't produce valid cases via ${opts.genModel ?? opts.evalModel}. ` +
|
|
304
|
+
`Try --gen-model claude-haiku-4-5 (or another JSON-reliable model), ` +
|
|
305
|
+
`or drop --case-pool to proceed with the synthetic fixture alone.`);
|
|
299
306
|
}
|
|
300
307
|
}
|
|
301
308
|
}
|
package/dist/core/_version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const VERSION = "1.57.
|
|
1
|
+
export declare const VERSION = "1.57.4";
|
package/dist/core/_version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
// Auto-generated by build — do not edit manually.
|
|
2
|
-
export const VERSION = "1.57.
|
|
2
|
+
export const VERSION = "1.57.4";
|
|
@@ -41,34 +41,73 @@ export async function generateCases(opts) {
|
|
|
41
41
|
model: opts.model,
|
|
42
42
|
baseUrl: opts.baseUrl,
|
|
43
43
|
authToken: opts.authToken,
|
|
44
|
-
maxTokens: Math.max(
|
|
44
|
+
maxTokens: Math.max(8192, askFor * 200),
|
|
45
45
|
timeoutMs: 120_000,
|
|
46
46
|
};
|
|
47
47
|
const { raw } = await defaultCallModel(prompt, undefined, callOpts);
|
|
48
48
|
const parsed = attemptJsonParse(raw);
|
|
49
|
-
const items =
|
|
49
|
+
const items = coerceArray(parsed);
|
|
50
50
|
if (!Array.isArray(items)) {
|
|
51
|
-
throw new Error(`Generator returned
|
|
51
|
+
throw new Error(`Generator returned no JSON array. First 500 chars of response:\n${raw.slice(0, 500)}`);
|
|
52
52
|
}
|
|
53
53
|
const newCases = [];
|
|
54
54
|
const seen = new Set([
|
|
55
55
|
...existingSigs,
|
|
56
56
|
...cached.map((c) => signatureOf(String(c.vars.objective))),
|
|
57
57
|
]);
|
|
58
|
+
let rejected = 0;
|
|
58
59
|
for (const it of items) {
|
|
59
60
|
const parsed = parseGenerated(it);
|
|
60
|
-
if (!parsed)
|
|
61
|
+
if (!parsed) {
|
|
62
|
+
rejected++;
|
|
61
63
|
continue;
|
|
64
|
+
}
|
|
62
65
|
const sig = signatureOf(parsed.objective);
|
|
63
66
|
if (seen.has(sig))
|
|
64
67
|
continue;
|
|
65
68
|
seen.add(sig);
|
|
66
69
|
newCases.push(toCase(parsed, opts.promptPath));
|
|
67
70
|
}
|
|
71
|
+
if (newCases.length === 0) {
|
|
72
|
+
// Silent fall-through hid this in 1.57.0-1.57.2. Throw with diagnostics
|
|
73
|
+
// so the CLI's loud error handler surfaces the real reason.
|
|
74
|
+
throw new Error(`Generator returned ${items.length} item(s) but none passed validation ` +
|
|
75
|
+
`(${rejected} rejected). Check the tier/objective/budget schema. ` +
|
|
76
|
+
`First 500 chars of raw:\n${raw.slice(0, 500)}`);
|
|
77
|
+
}
|
|
68
78
|
const combined = cached.concat(newCases);
|
|
69
79
|
writeCache(cachePath, combined);
|
|
70
80
|
return combined.slice(0, opts.targetCount);
|
|
71
81
|
}
|
|
82
|
+
/**
|
|
83
|
+
* Coerce a parsed JSON value into an array of objectives. Accepts:
|
|
84
|
+
* - Top-level array: [ {tier, objective, budget}, … ]
|
|
85
|
+
* - Wrapper object under a known key: {cases: […]} | {tasks: […]} |
|
|
86
|
+
* {items: […]} | {data: […]} | {objectives: […]}
|
|
87
|
+
* - Array of strings: treat each string as an objective with budget
|
|
88
|
+
* inferred from length (TIGHT < 80 chars, LARGE > 160 chars).
|
|
89
|
+
*/
|
|
90
|
+
function coerceArray(parsed) {
|
|
91
|
+
if (Array.isArray(parsed))
|
|
92
|
+
return parsed.map(stringToRecord);
|
|
93
|
+
if (parsed && typeof parsed === "object") {
|
|
94
|
+
const wrapperKeys = ["cases", "tasks", "items", "data", "objectives"];
|
|
95
|
+
const obj = parsed;
|
|
96
|
+
for (const k of wrapperKeys) {
|
|
97
|
+
if (Array.isArray(obj[k]))
|
|
98
|
+
return obj[k].map(stringToRecord);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return null;
|
|
102
|
+
}
|
|
103
|
+
function stringToRecord(it) {
|
|
104
|
+
if (typeof it !== "string")
|
|
105
|
+
return it;
|
|
106
|
+
const len = it.length;
|
|
107
|
+
const tier = len < 80 ? "TIGHT" : len > 160 ? "LARGE" : "STANDARD";
|
|
108
|
+
const budget = tier === "TIGHT" ? 4 : tier === "LARGE" ? 30 : 10;
|
|
109
|
+
return { tier, objective: it, budget };
|
|
110
|
+
}
|
|
72
111
|
function buildGeneratorPrompt(count) {
|
|
73
112
|
return `You are generating benchmark test cases for a planner prompt evaluation.
|
|
74
113
|
|
|
@@ -98,14 +137,21 @@ function parseGenerated(raw) {
|
|
|
98
137
|
if (typeof raw !== "object" || raw == null)
|
|
99
138
|
return null;
|
|
100
139
|
const obj = raw;
|
|
101
|
-
const
|
|
102
|
-
const
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
140
|
+
const tierRaw = typeof obj.tier === "string" ? obj.tier.toUpperCase() : "";
|
|
141
|
+
const tier = tierRaw === "TIGHT" || tierRaw === "SMALL" ? "TIGHT"
|
|
142
|
+
: tierRaw === "LARGE" || tierRaw === "BIG" ? "LARGE"
|
|
143
|
+
: tierRaw === "STANDARD" || tierRaw === "MEDIUM" ? "STANDARD"
|
|
144
|
+
: null;
|
|
145
|
+
const objective = typeof obj.objective === "string" ? obj.objective
|
|
146
|
+
: typeof obj.prompt === "string" ? obj.prompt
|
|
147
|
+
: typeof obj.task === "string" ? obj.task
|
|
148
|
+
: null;
|
|
149
|
+
const budget = typeof obj.budget === "number" ? obj.budget
|
|
150
|
+
: typeof obj.size === "number" ? obj.size
|
|
151
|
+
: tier === "TIGHT" ? 4 : tier === "LARGE" ? 30 : 10;
|
|
152
|
+
if (!tier || !objective || objective.length < 10)
|
|
107
153
|
return null;
|
|
108
|
-
if (
|
|
154
|
+
if (budget < 1 || budget > 100)
|
|
109
155
|
return null;
|
|
110
156
|
return { tier, objective: objective.trim(), budget };
|
|
111
157
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.57.
|
|
3
|
+
"version": "1.57.4",
|
|
4
4
|
"description": "Overnight parallel coding agents in git worktrees, with a self-curating skill memory that improves while the run is going. Mix Claude Opus as planner, Kimi 2.6 or Cursor composer-2 as cheap fast worker, Gemini or Qwen for bulk implementation. Multi-wave autonomous loop that plans, executes, reviews, and steers itself until the objective is met. Crash-safe resume, rate-limit aware, usage cap preserves headroom for your interactive Claude Code.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.57.
|
|
3
|
+
"version": "1.57.4",
|
|
4
4
|
"description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs: overnight parallel coding agents in git worktrees with a self-curating skill memory, multi-wave steering, three-layer review, and crash-safe resume. Mix Opus planner with Kimi 2.6, Cursor composer-2, Gemini, Qwen, or any Anthropic-compatible worker.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Francesco Fornace"
|