claude-overnight 1.57.4 → 1.58.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@
8
8
  *
9
9
  * Examples:
10
10
  * claude-overnight-evolve --prompt 10_planning/10-3_plan --eval-model claude-haiku-4-5 --generations 3
11
- * claude-overnight-evolve --target mcp-browser --prompt-kind plan-supervision --eval-model kimi-k2-6
11
+ * claude-overnight-evolve --target mcp-browser --prompt-kind plan-supervision --eval-model kimi-for-coding
12
12
  *
13
13
  * Requires ANTHROPIC_API_KEY (or ANTHROPIC_AUTH_TOKEN) in env. When `--target
14
14
  * mcp-browser` is used the cwd must be the MCP-browser repo root (so
@@ -8,7 +8,7 @@
8
8
  *
9
9
  * Examples:
10
10
  * claude-overnight-evolve --prompt 10_planning/10-3_plan --eval-model claude-haiku-4-5 --generations 3
11
- * claude-overnight-evolve --target mcp-browser --prompt-kind plan-supervision --eval-model kimi-k2-6
11
+ * claude-overnight-evolve --target mcp-browser --prompt-kind plan-supervision --eval-model kimi-for-coding
12
12
  *
13
13
  * Requires ANTHROPIC_API_KEY (or ANTHROPIC_AUTH_TOKEN) in env. When `--target
14
14
  * mcp-browser` is used the cwd must be the MCP-browser repo root (so
@@ -28,7 +28,10 @@ Options:
28
28
  --prompt <path> Prompt file path (claude-overnight)
29
29
  --prompt-kind <kind> MCP-browser prompt kind: planning | review | evolution |
30
30
  goal-refinement | plan-supervision | simple-supervision | stuck-analysis
31
- --eval-model <model> Fast model for evaluation (default: claude-haiku-4-5)
31
+ --eval-model <model> Fast model for evaluation (default: claude-haiku-4-5).
32
+ For Kimi endpoints use "kimi-for-coding" (stable alias,
33
+ auto-upgrades as flagship revs). For Moonshot platform
34
+ API use "kimi-k2.6" (dot, not dash).
32
35
  --eval-models <list> Comma-separated list to run cross-model (overrides --eval-model)
33
36
  --mutate-model <model> Smarter model for mutation (defaults to eval-model)
34
37
  --generations <n> Number of evolution generations (default: 10)
@@ -37,6 +40,11 @@ Options:
37
40
  --reps <n> Repetitions per (variant, case, model) for noise floor (default: 1)
38
41
  --concurrency <n> Max in-flight eval calls (default: 8; bump for slow endpoints)
39
42
  --batch Use provider batch API (50% cheaper, slower wall-clock)
43
+ --batch-base-url <url> Override base URL for batch only (e.g. api.moonshot.ai/v1
44
+ when online uses api.kimi.com/coding)
45
+ --batch-auth-token <t> Override auth token for batch only
46
+ --batch-model <model> Override model for batch only (e.g. "kimi-k2.6" for
47
+ Moonshot platform when online uses "kimi-for-coding")
40
48
  --adaptive-cap <n> Adaptive sampling: extend reps up to N when σ > threshold (default: off)
41
49
  --adaptive-threshold <x> σ threshold that triggers an extra rep (default: 0.1)
42
50
  --judge Use llm-judge for content scoring (costs extra API calls)
@@ -138,6 +146,18 @@ function parseArgs() {
138
146
  case "--batch":
139
147
  opts.batch = true;
140
148
  break;
149
+ case "--batch-base-url":
150
+ opts.batchBaseUrl = v;
151
+ i++;
152
+ break;
153
+ case "--batch-auth-token":
154
+ opts.batchAuthToken = v;
155
+ i++;
156
+ break;
157
+ case "--batch-model":
158
+ opts.batchModel = v;
159
+ i++;
160
+ break;
141
161
  case "--adaptive-cap":
142
162
  opts.adaptiveCap = parseInt(v, 10);
143
163
  i++;
@@ -328,6 +348,9 @@ async function evolveOne(opts) {
328
348
  repetitions: opts.reps > 1 ? opts.reps : undefined,
329
349
  concurrency: opts.concurrency,
330
350
  batch: opts.batch || undefined,
351
+ batchBaseUrl: opts.batchBaseUrl,
352
+ batchAuthToken: opts.batchAuthToken,
353
+ batchModel: opts.batchModel,
331
354
  adaptiveReps: opts.adaptiveCap
332
355
  ? { cap: opts.adaptiveCap, threshold: opts.adaptiveThreshold }
333
356
  : undefined,
@@ -1 +1 @@
1
- export declare const VERSION = "1.57.4";
1
+ export declare const VERSION = "1.58.0";
@@ -1,2 +1,2 @@
1
1
  // Auto-generated by build — do not edit manually.
2
- export const VERSION = "1.57.4";
2
+ export const VERSION = "1.58.0";
@@ -82,8 +82,12 @@ async function runJudgeBatch(cells, judge, opts) {
82
82
  : null;
83
83
  const transport = opts.batchCallModel ?? batchCallModel;
84
84
  const results = await transport(batchJobs, {
85
- baseUrl: judge.baseUrl ?? opts.baseUrl,
86
- authToken: judge.authToken ?? opts.authToken,
85
+ // Judge batch follows the same override hierarchy as eval batch: if a
86
+ // dedicated batch endpoint is set on EvalOpts, use it; else fall back
87
+ // to the judge's own endpoint or the main one.
88
+ baseUrl: opts.batchBaseUrl ?? judge.baseUrl ?? opts.baseUrl,
89
+ authToken: opts.batchAuthToken ?? judge.authToken ?? opts.authToken,
90
+ modelOverride: opts.batchModel,
87
91
  maxTokens: judge.maxTokens ?? 2048,
88
92
  resumeBatchId: existing?.batchId,
89
93
  onSubmitted: (batchId, p) => {
@@ -53,6 +53,17 @@ export interface EvalOpts {
53
53
  callModel?: CallModel;
54
54
  /** Use provider batch API instead of online calls (50% cheaper, slower wall-clock). */
55
55
  batch?: boolean;
56
+ /**
57
+ * Override base URL for batch submissions only — lets batch hit a
58
+ * different endpoint than online. Key use-case: Kimi users whose online
59
+ * traffic runs through api.kimi.com/coding (which has no batch) but
60
+ * whose batch traffic should go to api.moonshot.ai/v1.
61
+ */
62
+ batchBaseUrl?: string;
63
+ /** Override auth token for batch when batchBaseUrl needs a different key. */
64
+ batchAuthToken?: string;
65
+ /** Override model for batch submissions (e.g., kimi-k2.6 when online uses kimi-for-coding). */
66
+ batchModel?: string;
56
67
  /** Run id — required when batch=true so state is crash-resumable. */
57
68
  runId?: string;
58
69
  /** Current generation number — used to key batch state. */
@@ -268,8 +268,9 @@ async function runBatchPath(jobs, opts, rawByKey) {
268
268
  : null;
269
269
  const transport = opts.batchCallModel ?? batchCallModel;
270
270
  const results = await transport(batchJobs, {
271
- baseUrl: opts.baseUrl,
272
- authToken: opts.authToken,
271
+ baseUrl: opts.batchBaseUrl ?? opts.baseUrl,
272
+ authToken: opts.batchAuthToken ?? opts.authToken,
273
+ modelOverride: opts.batchModel,
273
274
  maxTokens: opts.maxTokens,
274
275
  resumeBatchId: existing?.batchId,
275
276
  onSubmitted: (batchId, p) => {
@@ -56,6 +56,12 @@ export interface EvolveOpts {
56
56
  concurrency?: number;
57
57
  /** Use provider batch API instead of online calls. 50% cheaper, slower wall-clock. */
58
58
  batch?: boolean;
59
+ /** Override base URL for batch submissions only. */
60
+ batchBaseUrl?: string;
61
+ /** Override auth token for batch submissions only. */
62
+ batchAuthToken?: string;
63
+ /** Override model for batch submissions (e.g. kimi-k2.6 when online uses kimi-for-coding). */
64
+ batchModel?: string;
59
65
  /** Adaptive sampling cap (opt-in). Keeps adding reps to noisy cells up to this count. */
60
66
  adaptiveReps?: {
61
67
  cap: number;
@@ -74,6 +74,9 @@ export async function evolvePrompt(opts) {
74
74
  repetitions: opts.repetitions,
75
75
  judge: opts.judge,
76
76
  batch: opts.batch,
77
+ batchBaseUrl: opts.batchBaseUrl,
78
+ batchAuthToken: opts.batchAuthToken,
79
+ batchModel: opts.batchModel,
77
80
  adaptiveReps: opts.adaptiveReps,
78
81
  runId,
79
82
  generation: gen,
@@ -220,6 +223,9 @@ export async function evolvePrompt(opts) {
220
223
  concurrency: opts.concurrency ?? 8,
221
224
  repetitions: opts.repetitions,
222
225
  batch: opts.batch,
226
+ batchBaseUrl: opts.batchBaseUrl,
227
+ batchAuthToken: opts.batchAuthToken,
228
+ batchModel: opts.batchModel,
223
229
  adaptiveReps: opts.adaptiveReps,
224
230
  runId,
225
231
  generation: generations + 1,
@@ -12,7 +12,7 @@
12
12
  * - openrouter → NO batch support; throws (caller must fall back to online)
13
13
  *
14
14
  * Custom IDs route results back to the right (variant, case, model, rep)
15
- * cell. The evaluator builds ids like `v0:h_abc:kimi-k2-6:r0`.
15
+ * cell. The evaluator builds ids like `v0:h_abc:kimi-for-coding:r0`.
16
16
  *
17
17
  * Poll state is persisted via `persistBatchState` so a crashed or
18
18
  * restarted run can resume without resubmitting.
@@ -27,6 +27,14 @@ export interface BatchJob {
27
27
  export interface BatchOpts {
28
28
  baseUrl?: string;
29
29
  authToken?: string;
30
+ /**
31
+ * Override model for the batch submission. Moonshot's batch API only
32
+ * accepts kimi-k2.5 or kimi-k2.6 — NOT the kimi-for-coding alias that the
33
+ * coding endpoint uses. When batch is enabled against a Kimi stack, set
34
+ * this so online eval keeps using kimi-for-coding while batch uses the
35
+ * concrete version.
36
+ */
37
+ modelOverride?: string;
30
38
  maxTokens?: number;
31
39
  /** Poll interval starts here and doubles to `pollMaxMs`. Defaults 30s → 5min. */
32
40
  pollStartMs?: number;
@@ -12,19 +12,30 @@
12
12
  * - openrouter → NO batch support; throws (caller must fall back to online)
13
13
  *
14
14
  * Custom IDs route results back to the right (variant, case, model, rep)
15
- * cell. The evaluator builds ids like `v0:h_abc:kimi-k2-6:r0`.
15
+ * cell. The evaluator builds ids like `v0:h_abc:kimi-for-coding:r0`.
16
16
  *
17
17
  * Poll state is persisted via `persistBatchState` so a crashed or
18
18
  * restarted run can resume without resubmitting.
19
19
  */
20
+ import { VERSION } from "../core/_version.js";
21
+ const USER_AGENT = `claude-overnight-evolve/${VERSION}`;
20
22
  export function detectBatchProvider(baseUrl) {
21
23
  const url = (baseUrl ?? "https://api.anthropic.com").toLowerCase();
22
24
  if (/(^|\/\/)(api\.)?anthropic\.com/.test(url))
23
25
  return "anthropic";
26
+ // Providers with no batch support — caller auto-falls back to online.
27
+ // - OpenRouter: no batch API at all.
28
+ // - api.kimi.com/coding: Moonshot's coding-specific endpoint; synchronous
29
+ // only (30 concurrent, 300-1200 req/5hr) with no /v1/files upload flow.
30
+ // Moonshot's generic platform.moonshot.ai might have batch; this one
31
+ // doesn't.
24
32
  if (/openrouter/.test(url))
25
33
  return "unsupported";
26
- // Everything else that speaks /v1/chat/completions — OpenAI, Kimi, Moonshot,
27
- // DeepSeek — exposes an OpenAI-compatible batch endpoint.
34
+ if (/(api\.)?kimi\.com\/coding/.test(url))
35
+ return "unsupported";
36
+ // Everything else that speaks /v1/chat/completions — OpenAI, DeepSeek,
37
+ // DashScope in OpenAI-compat mode — exposes an OpenAI-compatible batch
38
+ // endpoint we can ride.
28
39
  return "openai-compatible";
29
40
  }
30
41
  export async function batchCallModel(jobs, opts) {
@@ -32,7 +43,11 @@ export async function batchCallModel(jobs, opts) {
32
43
  return new Map();
33
44
  const provider = detectBatchProvider(opts.baseUrl);
34
45
  if (provider === "unsupported") {
35
- throw new Error(`Batch API not supported for baseUrl=${opts.baseUrl}; use online transport`);
46
+ throw new Error(`Batch API not supported for baseUrl=${opts.baseUrl}. ` +
47
+ `Options: (1) omit --batch and use online transport, or (2) point ` +
48
+ `the batch call at a provider with batch support (e.g. set --batch-base-url ` +
49
+ `https://api.moonshot.ai/v1 --batch-model kimi-k2.6 for Kimi users whose ` +
50
+ `online endpoint is api.kimi.com/coding).`);
36
51
  }
37
52
  if (provider === "anthropic")
38
53
  return runAnthropicBatch(jobs, opts);
@@ -45,6 +60,7 @@ async function runAnthropicBatch(jobs, opts) {
45
60
  const headers = {
46
61
  "Content-Type": "application/json",
47
62
  "Authorization": `Bearer ${authToken}`,
63
+ "User-Agent": USER_AGENT,
48
64
  "anthropic-version": "2023-06-01",
49
65
  "anthropic-beta": "message-batches-2024-09-24",
50
66
  };
@@ -53,7 +69,7 @@ async function runAnthropicBatch(jobs, opts) {
53
69
  const body = JSON.stringify({
54
70
  requests: jobs.map((j) => {
55
71
  const params = {
56
- model: j.model,
72
+ model: opts.modelOverride ?? j.model,
57
73
  max_tokens: opts.maxTokens ?? 4096,
58
74
  messages: [{ role: "user", content: j.userText }],
59
75
  };
@@ -115,7 +131,10 @@ async function runAnthropicBatch(jobs, opts) {
115
131
  async function runOpenAIBatch(jobs, opts) {
116
132
  const baseUrl = (opts.baseUrl ?? "https://api.openai.com").replace(/\/$/, "");
117
133
  const authToken = opts.authToken ?? process.env.ANTHROPIC_AUTH_TOKEN ?? process.env.ANTHROPIC_API_KEY ?? "";
118
- const authHeaders = { "Authorization": `Bearer ${authToken}` };
134
+ const authHeaders = {
135
+ "Authorization": `Bearer ${authToken}`,
136
+ "User-Agent": USER_AGENT,
137
+ };
119
138
  let batchId = opts.resumeBatchId;
120
139
  let outputFileId;
121
140
  if (!batchId) {
@@ -129,7 +148,7 @@ async function runOpenAIBatch(jobs, opts) {
129
148
  custom_id: j.customId,
130
149
  method: "POST",
131
150
  url: "/v1/chat/completions",
132
- body: { model: j.model, max_tokens: opts.maxTokens ?? 4096, messages },
151
+ body: { model: opts.modelOverride ?? j.model, max_tokens: opts.maxTokens ?? 4096, max_completion_tokens: opts.maxTokens ?? 4096, messages },
133
152
  });
134
153
  }).join("\n");
135
154
  const form = new FormData();
@@ -8,17 +8,22 @@
8
8
  * Supports both Anthropic-native and OpenAI-compatible endpoints so we can
9
9
  * run the same eval against Haiku, Kimi, and OpenRouter without a rewrite.
10
10
  */
11
+ import { VERSION } from "../core/_version.js";
12
+ const USER_AGENT = `claude-overnight-evolve/${VERSION}`;
11
13
  export async function defaultCallModel(userText, systemText, opts) {
12
14
  const baseUrl = (opts.baseUrl ?? process.env.ANTHROPIC_BASE_URL ?? "https://api.anthropic.com").replace(/\/$/, "");
13
15
  const authToken = opts.authToken ?? process.env.ANTHROPIC_AUTH_TOKEN ?? process.env.ANTHROPIC_API_KEY ?? "";
14
16
  const isAnthropic = /^https?:\/\/(api\.)?anthropic\.com/i.test(baseUrl);
15
- const isKimi = /kimi\.com/i.test(baseUrl);
17
+ // Identify ourselves honestly. Kimi's coding-endpoint docs explicitly say
18
+ // "Tampering with the client identifier (User-Agent) is considered a
19
+ // violation." The previous "Kilo-Code/1.0" was impersonating a third-party
20
+ // tool; we now send our real binary name + version.
16
21
  const headers = {
17
22
  "Content-Type": "application/json",
18
23
  "Authorization": `Bearer ${authToken}`,
24
+ "User-Agent": USER_AGENT,
19
25
  };
20
- if (isKimi)
21
- headers["User-Agent"] = "Kilo-Code/1.0";
26
+ const maxOut = opts.maxTokens ?? 4096;
22
27
  let endpoint;
23
28
  let body;
24
29
  if (isAnthropic) {
@@ -26,7 +31,7 @@ export async function defaultCallModel(userText, systemText, opts) {
26
31
  headers["anthropic-version"] = "2023-06-01";
27
32
  const payload = {
28
33
  model: opts.model,
29
- max_tokens: opts.maxTokens ?? 4096,
34
+ max_tokens: maxOut, // Anthropic uses max_tokens, not max_completion_tokens.
30
35
  messages: [{ role: "user", content: userText }],
31
36
  };
32
37
  if (systemText)
@@ -39,9 +44,14 @@ export async function defaultCallModel(userText, systemText, opts) {
39
44
  if (systemText)
40
45
  messages.push({ role: "system", content: systemText });
41
46
  messages.push({ role: "user", content: userText });
47
+ // Platform.moonshot.ai marks max_tokens deprecated in favor of
48
+ // max_completion_tokens. Kimi's coding endpoint still accepts max_tokens.
49
+ // Sending both is safe — OpenAI, Moonshot, DeepSeek, and Kimi all tolerate
50
+ // the extra field, and we're future-proof against the deprecation.
42
51
  body = JSON.stringify({
43
52
  model: opts.model,
44
- max_tokens: opts.maxTokens ?? 4096,
53
+ max_tokens: maxOut,
54
+ max_completion_tokens: maxOut,
45
55
  messages,
46
56
  });
47
57
  }
@@ -183,7 +183,7 @@ Your laptop can be off the whole time.
183
183
  npm run evolve -- --prompt 10_planning/10-3_plan --eval-model claude-haiku-4-5 --generations 10
184
184
 
185
185
  # Evolve an MCP-browser supervision prompt
186
- npm run evolve -- --target mcp-browser --prompt-kind plan-supervision --eval-model kimi-k2-6 --generations 10
186
+ npm run evolve -- --target mcp-browser --prompt-kind plan-supervision --eval-model kimi-for-coding --generations 10
187
187
  ```
188
188
 
189
189
  ### Via Platform API (runs on server)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "1.57.4",
3
+ "version": "1.58.0",
4
4
  "description": "Overnight parallel coding agents in git worktrees, with a self-curating skill memory that improves while the run is going. Mix Claude Opus as planner, Kimi 2.6 or Cursor composer-2 as cheap fast worker, Gemini or Qwen for bulk implementation. Multi-wave autonomous loop that plans, executes, reviews, and steers itself until the objective is met. Crash-safe resume, rate-limit aware, usage cap preserves headroom for your interactive Claude Code.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "1.57.4",
3
+ "version": "1.58.0",
4
4
  "description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs: overnight parallel coding agents in git worktrees with a self-curating skill memory, multi-wave steering, three-layer review, and crash-safe resume. Mix Opus planner with Kimi 2.6, Cursor composer-2, Gemini, Qwen, or any Anthropic-compatible worker.",
5
5
  "author": {
6
6
  "name": "Francesco Fornace"