claude-overnight 1.55.1 → 1.55.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/evolve.js +6 -0
- package/dist/core/_version.d.ts +1 -1
- package/dist/core/_version.js +1 -1
- package/dist/prompt-evolution/evaluator.js +27 -12
- package/dist/prompt-evolution/index.d.ts +2 -0
- package/dist/prompt-evolution/index.js +2 -2
- package/package.json +1 -1
- package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1
package/dist/bin/evolve.js
CHANGED
|
@@ -34,6 +34,7 @@ Options:
|
|
|
34
34
|
--population <n> Max population size (default: 8)
|
|
35
35
|
--plateau <n> Stop early if no improvement for N generations (default: 3)
|
|
36
36
|
--reps <n> Repetitions per (variant, case, model) for noise floor (default: 1)
|
|
37
|
+
--concurrency <n> Max in-flight eval calls (default: 8; bump for slow endpoints)
|
|
37
38
|
--judge Use llm-judge for content scoring (costs extra API calls)
|
|
38
39
|
--judge-model <model> Model to use for the judge (default: same as eval-model)
|
|
39
40
|
--judge-top-n <n> Judge only the top-N variants per generation (default: 4)
|
|
@@ -112,6 +113,10 @@ function parseArgs() {
|
|
|
112
113
|
opts.reps = parseInt(v, 10);
|
|
113
114
|
i++;
|
|
114
115
|
break;
|
|
116
|
+
case "--concurrency":
|
|
117
|
+
opts.concurrency = parseInt(v, 10);
|
|
118
|
+
i++;
|
|
119
|
+
break;
|
|
115
120
|
case "--judge":
|
|
116
121
|
opts.useJudge = true;
|
|
117
122
|
break;
|
|
@@ -215,6 +220,7 @@ async function main() {
|
|
|
215
220
|
populationCap: opts.population,
|
|
216
221
|
plateauGenerations: opts.plateau,
|
|
217
222
|
repetitions: opts.reps > 1 ? opts.reps : undefined,
|
|
223
|
+
concurrency: opts.concurrency,
|
|
218
224
|
judge: opts.useJudge
|
|
219
225
|
? {
|
|
220
226
|
model: opts.judgeModel ?? opts.evalModel,
|
package/dist/core/_version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const VERSION = "1.55.
|
|
1
|
+
export declare const VERSION = "1.55.2";
|
package/dist/core/_version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
// Auto-generated by build — do not edit manually.
|
|
2
|
-
export const VERSION = "1.55.
|
|
2
|
+
export const VERSION = "1.55.2";
|
|
@@ -22,7 +22,7 @@ import { defaultCallModel, attemptJsonParse, } from "./transport.js";
|
|
|
22
22
|
export async function buildMatrix(variants, cases, opts) {
|
|
23
23
|
const models = opts.models && opts.models.length > 0 ? opts.models : [opts.model];
|
|
24
24
|
const reps = Math.max(1, opts.repetitions ?? 1);
|
|
25
|
-
const concurrency = opts.concurrency ??
|
|
25
|
+
const concurrency = opts.concurrency ?? 8;
|
|
26
26
|
const transport = opts.callModel ?? defaultCallModel;
|
|
27
27
|
// Build the full job list: (variant × case × model × rep).
|
|
28
28
|
const jobs = [];
|
|
@@ -35,13 +35,19 @@ export async function buildMatrix(variants, cases, opts) {
|
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
37
|
}
|
|
38
|
-
//
|
|
38
|
+
// Work-stealing pool: keep `concurrency` jobs in flight at all times so a
|
|
39
|
+
// slow call (Kimi at 4 min/call is typical) doesn't block the others in its
|
|
40
|
+
// slice. Previous batch-and-wait loop serialized the slowest job in every
|
|
41
|
+
// window of `concurrency`.
|
|
39
42
|
const rawByKey = new Map();
|
|
40
43
|
let done = 0;
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
44
|
+
let next = 0;
|
|
45
|
+
const worker = async () => {
|
|
46
|
+
while (true) {
|
|
47
|
+
const i = next++;
|
|
48
|
+
if (i >= jobs.length)
|
|
49
|
+
return;
|
|
50
|
+
const r = await runSingle(jobs[i], opts, transport);
|
|
45
51
|
const key = `${r.variantId}:${r.caseHash}:${r.model ?? ""}`;
|
|
46
52
|
const arr = rawByKey.get(key) ?? [];
|
|
47
53
|
arr.push(r);
|
|
@@ -49,7 +55,8 @@ export async function buildMatrix(variants, cases, opts) {
|
|
|
49
55
|
done++;
|
|
50
56
|
opts.onProgress?.(done, jobs.length, r.caseName, r.variantId);
|
|
51
57
|
}
|
|
52
|
-
}
|
|
58
|
+
};
|
|
59
|
+
await Promise.all(Array.from({ length: Math.min(concurrency, jobs.length) }, worker));
|
|
53
60
|
// Collapse reps: one aggregated EvaluationResult per (variant, case, model).
|
|
54
61
|
const aggregated = new Map();
|
|
55
62
|
for (const [key, runs] of rawByKey) {
|
|
@@ -198,11 +205,19 @@ async function runJudge(variants, cases, models, aggregated, judge) {
|
|
|
198
205
|
}
|
|
199
206
|
}
|
|
200
207
|
}
|
|
201
|
-
//
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
208
|
+
// Work-stealing pool for judge calls — modest concurrency to stay under
|
|
209
|
+
// provider rate limits, but no slice-blocking.
|
|
210
|
+
const judgeConcurrency = 3;
|
|
211
|
+
let nextJob = 0;
|
|
212
|
+
const judgeWorker = async () => {
|
|
213
|
+
while (true) {
|
|
214
|
+
const i = nextJob++;
|
|
215
|
+
if (i >= jobs.length)
|
|
216
|
+
return;
|
|
217
|
+
await jobs[i]();
|
|
218
|
+
}
|
|
219
|
+
};
|
|
220
|
+
await Promise.all(Array.from({ length: Math.min(judgeConcurrency, jobs.length) }, judgeWorker));
|
|
206
221
|
}
|
|
207
222
|
function averageDimensions(scores) {
|
|
208
223
|
if (scores.length === 0)
|
|
@@ -52,6 +52,8 @@ export interface EvolveOpts {
|
|
|
52
52
|
evalModels?: string[];
|
|
53
53
|
/** Repetitions per (variant, case, model). Default 1. Recommended ≥3 for noise floor. */
|
|
54
54
|
repetitions?: number;
|
|
55
|
+
/** Max in-flight eval calls. Default 8. Raise for slow endpoints, lower for strict rate limits. */
|
|
56
|
+
concurrency?: number;
|
|
55
57
|
/** Optional llm-judge — replaces the heuristic content score for top-N variants each gen. */
|
|
56
58
|
judge?: JudgeOpts & {
|
|
57
59
|
topN?: number;
|
|
@@ -63,7 +63,7 @@ export async function evolvePrompt(opts) {
|
|
|
63
63
|
models: opts.evalModels,
|
|
64
64
|
baseUrl: opts.baseUrl,
|
|
65
65
|
authToken: opts.authToken,
|
|
66
|
-
concurrency:
|
|
66
|
+
concurrency: opts.concurrency ?? 8,
|
|
67
67
|
repetitions: opts.repetitions,
|
|
68
68
|
judge: opts.judge,
|
|
69
69
|
onProgress: (done, total, caseName, variantId) => {
|
|
@@ -178,7 +178,7 @@ export async function evolvePrompt(opts) {
|
|
|
178
178
|
models: opts.evalModels,
|
|
179
179
|
baseUrl: opts.baseUrl,
|
|
180
180
|
authToken: opts.authToken,
|
|
181
|
-
concurrency:
|
|
181
|
+
concurrency: opts.concurrency ?? 8,
|
|
182
182
|
repetitions: opts.repetitions,
|
|
183
183
|
judge: opts.judge,
|
|
184
184
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.55.
|
|
3
|
+
"version": "1.55.2",
|
|
4
4
|
"description": "Overnight parallel coding agents in git worktrees, with a self-curating skill memory that improves while the run is going. Mix Claude Opus as planner, Kimi 2.6 or Cursor composer-2 as cheap fast worker, Gemini or Qwen for bulk implementation. Multi-wave autonomous loop that plans, executes, reviews, and steers itself until the objective is met. Crash-safe resume, rate-limit aware, usage cap preserves headroom for your interactive Claude Code.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.55.
|
|
3
|
+
"version": "1.55.2",
|
|
4
4
|
"description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs: overnight parallel coding agents in git worktrees with a self-curating skill memory, multi-wave steering, three-layer review, and crash-safe resume. Mix Opus planner with Kimi 2.6, Cursor composer-2, Gemini, Qwen, or any Anthropic-compatible worker.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Francesco Fornace"
|