claude-overnight 1.57.1 → 1.57.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- export declare const VERSION = "1.57.1";
1
+ export declare const VERSION = "1.57.3";
@@ -1,2 +1,2 @@
1
1
  // Auto-generated by build — do not edit manually.
2
- export const VERSION = "1.57.1";
2
+ export const VERSION = "1.57.3";
@@ -41,34 +41,73 @@ export async function generateCases(opts) {
41
41
  model: opts.model,
42
42
  baseUrl: opts.baseUrl,
43
43
  authToken: opts.authToken,
44
- maxTokens: Math.max(4096, askFor * 120),
44
+ maxTokens: Math.max(8192, askFor * 200),
45
45
  timeoutMs: 120_000,
46
46
  };
47
47
  const { raw } = await defaultCallModel(prompt, undefined, callOpts);
48
48
  const parsed = attemptJsonParse(raw);
49
- const items = Array.isArray(parsed) ? parsed : parsed?.cases;
49
+ const items = coerceArray(parsed);
50
50
  if (!Array.isArray(items)) {
51
- throw new Error(`Generator returned non-array response (${raw.slice(0, 200)})`);
51
+ throw new Error(`Generator returned no JSON array. First 500 chars of response:\n${raw.slice(0, 500)}`);
52
52
  }
53
53
  const newCases = [];
54
54
  const seen = new Set([
55
55
  ...existingSigs,
56
56
  ...cached.map((c) => signatureOf(String(c.vars.objective))),
57
57
  ]);
58
+ let rejected = 0;
58
59
  for (const it of items) {
59
60
  const parsed = parseGenerated(it);
60
- if (!parsed)
61
+ if (!parsed) {
62
+ rejected++;
61
63
  continue;
64
+ }
62
65
  const sig = signatureOf(parsed.objective);
63
66
  if (seen.has(sig))
64
67
  continue;
65
68
  seen.add(sig);
66
69
  newCases.push(toCase(parsed, opts.promptPath));
67
70
  }
71
+ if (newCases.length === 0) {
72
+ // Silent fall-through hid this in 1.57.0-1.57.2. Throw with diagnostics
73
+ // so the CLI's loud error handler surfaces the real reason.
74
+ throw new Error(`Generator returned ${items.length} item(s) but none passed validation ` +
75
+ `(${rejected} rejected). Check the tier/objective/budget schema. ` +
76
+ `First 500 chars of raw:\n${raw.slice(0, 500)}`);
77
+ }
68
78
  const combined = cached.concat(newCases);
69
79
  writeCache(cachePath, combined);
70
80
  return combined.slice(0, opts.targetCount);
71
81
  }
82
+ /**
83
+ * Coerce a parsed JSON value into an array of objectives. Accepts:
84
+ * - Top-level array: [ {tier, objective, budget}, … ]
85
+ * - Wrapper object under a known key: {cases: […]} | {tasks: […]} |
86
+ * {items: […]} | {data: […]} | {objectives: […]}
87
+ * - Array of strings: treat each string as an objective with budget
88
+ * inferred from length (TIGHT < 80 chars, LARGE > 160 chars).
89
+ */
90
+ function coerceArray(parsed) {
91
+ if (Array.isArray(parsed))
92
+ return parsed.map(stringToRecord);
93
+ if (parsed && typeof parsed === "object") {
94
+ const wrapperKeys = ["cases", "tasks", "items", "data", "objectives"];
95
+ const obj = parsed;
96
+ for (const k of wrapperKeys) {
97
+ if (Array.isArray(obj[k]))
98
+ return obj[k].map(stringToRecord);
99
+ }
100
+ }
101
+ return null;
102
+ }
103
+ function stringToRecord(it) {
104
+ if (typeof it !== "string")
105
+ return it;
106
+ const len = it.length;
107
+ const tier = len < 80 ? "TIGHT" : len > 160 ? "LARGE" : "STANDARD";
108
+ const budget = tier === "TIGHT" ? 4 : tier === "LARGE" ? 30 : 10;
109
+ return { tier, objective: it, budget };
110
+ }
72
111
  function buildGeneratorPrompt(count) {
73
112
  return `You are generating benchmark test cases for a planner prompt evaluation.
74
113
 
@@ -98,14 +137,21 @@ function parseGenerated(raw) {
98
137
  if (typeof raw !== "object" || raw == null)
99
138
  return null;
100
139
  const obj = raw;
101
- const tier = obj.tier;
102
- const objective = obj.objective;
103
- const budget = obj.budget;
104
- if (tier !== "TIGHT" && tier !== "STANDARD" && tier !== "LARGE")
105
- return null;
106
- if (typeof objective !== "string" || objective.length < 10)
140
+ const tierRaw = typeof obj.tier === "string" ? obj.tier.toUpperCase() : "";
141
+ const tier = tierRaw === "TIGHT" || tierRaw === "SMALL" ? "TIGHT"
142
+ : tierRaw === "LARGE" || tierRaw === "BIG" ? "LARGE"
143
+ : tierRaw === "STANDARD" || tierRaw === "MEDIUM" ? "STANDARD"
144
+ : null;
145
+ const objective = typeof obj.objective === "string" ? obj.objective
146
+ : typeof obj.prompt === "string" ? obj.prompt
147
+ : typeof obj.task === "string" ? obj.task
148
+ : null;
149
+ const budget = typeof obj.budget === "number" ? obj.budget
150
+ : typeof obj.size === "number" ? obj.size
151
+ : tier === "TIGHT" ? 4 : tier === "LARGE" ? 30 : 10;
152
+ if (!tier || !objective || objective.length < 10)
107
153
  return null;
108
- if (typeof budget !== "number" || budget < 1 || budget > 100)
154
+ if (budget < 1 || budget > 100)
109
155
  return null;
110
156
  return { tier, objective: objective.trim(), budget };
111
157
  }
@@ -24,5 +24,12 @@ export interface CallModelResult {
24
24
  /** Injectable model call — default is `defaultCallModel`; tests pass a mock. */
25
25
  export type CallModel = (userText: string, systemText: string | undefined, opts: CallModelOpts) => Promise<CallModelResult>;
26
26
  export declare function defaultCallModel(userText: string, systemText: string | undefined, opts: CallModelOpts): Promise<CallModelResult>;
27
- /** Strip markdown fences and try hard to find a JSON object in a model output. */
27
+ /**
28
+ * Strip markdown fences, strip preamble, and try to find a JSON value.
29
+ *
30
+ * Handles both `{…}` objects and `[…]` arrays — the previous implementation
31
+ * missed arrays entirely, which broke the case generator (Kimi returns the
32
+ * case list as a top-level array that's often preceded by a one-line
33
+ * preamble even when instructed otherwise).
34
+ */
28
35
  export declare function attemptJsonParse(text: string): unknown;
@@ -75,7 +75,14 @@ export async function defaultCallModel(userText, systemText, opts) {
75
75
  const costUsd = inputTokens * 0.000003 + outputTokens * 0.000015;
76
76
  return { raw, costUsd, inputTokens, outputTokens };
77
77
  }
78
- /** Strip markdown fences and try hard to find a JSON object in a model output. */
78
+ /**
79
+ * Strip markdown fences, strip preamble, and try to find a JSON value.
80
+ *
81
+ * Handles both `{…}` objects and `[…]` arrays — the previous implementation
82
+ * missed arrays entirely, which broke the case generator (Kimi returns the
83
+ * case list as a top-level array that's often preceded by a one-line
84
+ * preamble even when instructed otherwise).
85
+ */
79
86
  export function attemptJsonParse(text) {
80
87
  const cleaned = text
81
88
  .replace(/^```(?:json)?\s*\n?/i, "")
@@ -85,14 +92,21 @@ export function attemptJsonParse(text) {
85
92
  return JSON.parse(cleaned);
86
93
  }
87
94
  catch {
88
- const m = cleaned.match(/\{[\s\S]*\}/);
89
- if (m) {
95
+ // Try the first plausible JSON value — object OR array, whichever comes
96
+ // first in the text. We build a regex union and pick the earliest match.
97
+ const objMatch = cleaned.match(/\{[\s\S]*\}/);
98
+ const arrMatch = cleaned.match(/\[[\s\S]*\]/);
99
+ const candidates = [];
100
+ if (objMatch && objMatch.index != null)
101
+ candidates.push({ idx: objMatch.index, text: objMatch[0] });
102
+ if (arrMatch && arrMatch.index != null)
103
+ candidates.push({ idx: arrMatch.index, text: arrMatch[0] });
104
+ candidates.sort((a, b) => a.idx - b.idx);
105
+ for (const c of candidates) {
90
106
  try {
91
- return JSON.parse(m[0]);
92
- }
93
- catch {
94
- return null;
107
+ return JSON.parse(c.text);
95
108
  }
109
+ catch { /* try next */ }
96
110
  }
97
111
  return null;
98
112
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "1.57.1",
3
+ "version": "1.57.3",
4
4
  "description": "Overnight parallel coding agents in git worktrees, with a self-curating skill memory that improves while the run is going. Mix Claude Opus as planner, Kimi 2.6 or Cursor composer-2 as cheap fast worker, Gemini or Qwen for bulk implementation. Multi-wave autonomous loop that plans, executes, reviews, and steers itself until the objective is met. Crash-safe resume, rate-limit aware, usage cap preserves headroom for your interactive Claude Code.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "1.57.1",
3
+ "version": "1.57.3",
4
4
  "description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs: overnight parallel coding agents in git worktrees with a self-curating skill memory, multi-wave steering, three-layer review, and crash-safe resume. Mix Opus planner with Kimi 2.6, Cursor composer-2, Gemini, Qwen, or any Anthropic-compatible worker.",
5
5
  "author": {
6
6
  "name": "Francesco Fornace"