claude-overnight 1.25.48 → 1.50.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/dist/cli/plan-phase.js +23 -4
  2. package/dist/cli/resume.js +2 -1
  3. package/dist/core/_version.d.ts +1 -1
  4. package/dist/core/_version.js +1 -1
  5. package/dist/core/fingerprint.d.ts +2 -0
  6. package/dist/core/fingerprint.js +20 -0
  7. package/dist/core/stall-guard.d.ts +71 -0
  8. package/dist/core/stall-guard.js +188 -0
  9. package/dist/core/transcripts.d.ts +26 -1
  10. package/dist/core/transcripts.js +75 -15
  11. package/dist/core/types.d.ts +8 -0
  12. package/dist/planner/coach/coach.js +5 -2
  13. package/dist/planner/planner.d.ts +0 -1
  14. package/dist/planner/planner.js +30 -190
  15. package/dist/planner/query.d.ts +7 -4
  16. package/dist/planner/query.js +87 -41
  17. package/dist/planner/steering.js +53 -130
  18. package/dist/planner/verifier.js +10 -23
  19. package/dist/prompts/load.d.ts +6 -0
  20. package/dist/prompts/load.js +64 -0
  21. package/dist/run/health.js +3 -2
  22. package/dist/run/review.js +6 -9
  23. package/dist/run/run.js +29 -8
  24. package/dist/run/summary.js +12 -10
  25. package/dist/run/wave-loop.d.ts +4 -0
  26. package/dist/run/wave-loop.js +103 -4
  27. package/dist/skills/ab.d.ts +32 -0
  28. package/dist/skills/ab.js +95 -0
  29. package/dist/skills/index-db.d.ts +44 -0
  30. package/dist/skills/index-db.js +139 -0
  31. package/dist/skills/injection.d.ts +19 -0
  32. package/dist/skills/injection.js +63 -0
  33. package/dist/skills/librarian.d.ts +22 -0
  34. package/dist/skills/librarian.js +319 -0
  35. package/dist/skills/paths.d.ts +11 -0
  36. package/dist/skills/paths.js +37 -0
  37. package/dist/skills/scribe.d.ts +14 -0
  38. package/dist/skills/scribe.js +55 -0
  39. package/dist/skills/skills.d.ts +8 -0
  40. package/dist/skills/skills.js +9 -0
  41. package/dist/skills/tools.d.ts +15 -0
  42. package/dist/skills/tools.js +54 -0
  43. package/dist/state/state.js +1 -0
  44. package/dist/swarm/agent-run.d.ts +5 -9
  45. package/dist/swarm/agent-run.js +116 -148
  46. package/dist/swarm/branch-evaluator.d.ts +11 -0
  47. package/dist/swarm/branch-evaluator.js +68 -0
  48. package/dist/swarm/config.d.ts +8 -3
  49. package/dist/swarm/config.js +0 -5
  50. package/dist/swarm/message-handler.d.ts +5 -2
  51. package/dist/swarm/message-handler.js +13 -2
  52. package/dist/swarm/swarm.d.ts +6 -0
  53. package/dist/swarm/swarm.js +9 -0
  54. package/dist/ui/bars.js +5 -3
  55. package/dist/ui/hooks/use-scroll-buffer.d.ts +9 -0
  56. package/dist/ui/hooks/use-scroll-buffer.js +53 -0
  57. package/dist/ui/hooks/use-transcript-tail.d.ts +2 -0
  58. package/dist/ui/hooks/use-transcript-tail.js +94 -0
  59. package/dist/ui/run-body.d.ts +4 -1
  60. package/dist/ui/run-body.js +7 -4
  61. package/dist/ui/settings.d.ts +1 -1
  62. package/dist/ui/shell.js +1 -1
  63. package/dist/ui/store.d.ts +2 -0
  64. package/dist/ui/store.js +1 -0
  65. package/dist/ui/widgets/stream-pane.d.ts +8 -0
  66. package/dist/ui/widgets/stream-pane.js +117 -0
  67. package/docs/STREAMING_UI_AND_STALL_RESILIENCE.md +216 -0
  68. package/docs/skills-evolution/PLAN.md +128 -0
  69. package/docs/skills-evolution/integration-map.md +107 -0
  70. package/docs/skills-evolution/librarian-prompt.md +104 -0
  71. package/docs/skills-evolution/phase-1-scribe.md +216 -0
  72. package/docs/skills-evolution/phase-2-librarian.md +168 -0
  73. package/docs/skills-evolution/phase-3-ab-waves.md +151 -0
  74. package/docs/skills-evolution/phase-4-tool-recipes.md +137 -0
  75. package/docs/skills-evolution/schemas.md +277 -0
  76. package/package.json +4 -1
  77. package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1
  78. package/plugins/claude-overnight/skills/claude-overnight/SKILL.md +15 -5
  79. package/plugins/claude-overnight/skills/claude-overnight/authoring.md +107 -0
  80. package/plugins/claude-overnight/skills/claude-overnight/recipes.md +48 -0
  81. package/prompts/00_setup/00-1_coach.md +232 -0
  82. package/prompts/00_setup/00-2_coach-wrapper.md +23 -0
  83. package/prompts/10_planning/10-1_identify-themes.md +15 -0
  84. package/prompts/10_planning/10-2_thinking-tasks.md +39 -0
  85. package/prompts/10_planning/10-3_plan.md +105 -0
  86. package/prompts/10_planning/10-4_orchestrate.md +32 -0
  87. package/prompts/10_planning/10-5_refine.md +22 -0
  88. package/prompts/20_execution/20-1_simplify.md +6 -0
  89. package/prompts/20_execution/20-2_skill-proposal.md +12 -0
  90. package/prompts/20_execution/20-3_agent-wrap.md +18 -0
  91. package/prompts/30_wave/30-1_steer.md +120 -0
  92. package/prompts/30_wave/30-2_verify.md +26 -0
  93. package/prompts/30_wave/30-3_branch-retry.md +6 -0
  94. package/prompts/30_wave/30-4_decomposer-minimal.md +9 -0
  95. package/prompts/30_wave/30-5_auto-verify.md +13 -0
  96. package/prompts/30_wave/30-6_retry-suffix.md +19 -0
  97. package/prompts/30_wave/30-7_steer-retry.md +12 -0
  98. package/prompts/40_skills/40-1_librarian.md +50 -0
  99. package/prompts/40_skills/40-2_branch-evaluator.md +18 -0
  100. package/prompts/40_skills/40-3_librarian-wrap.md +10 -0
  101. package/prompts/50_review/50-1_review.md +18 -0
  102. package/prompts/50_review/50-2_summary.md +34 -0
  103. package/prompts/60_runtime/60-1_ask.md +9 -0
  104. package/prompts/60_runtime/60-2_debrief.md +8 -0
  105. package/prompts/60_runtime/60-3_plan-chat.md +20 -0
  106. package/prompts/60_runtime/60-4_build-fix.md +20 -0
  107. package/prompts/_shared/design-thinking.md +9 -0
  108. package/prompts/_shared/flex-note.md +3 -0
  109. package/prompts/_shared/non-claude-json-wrap.md +8 -0
  110. package/prompts/_shared/retry-json.md +5 -0
@@ -8,9 +8,12 @@ import { RunDisplay } from "../ui/ui.js";
8
8
  import { renderSummary } from "../ui/summary.js";
9
9
  import { isCursorProxyProvider } from "../providers/index.js";
10
10
  import { readMdDir, saveRunState } from "../state/state.js";
11
+ import { computeRepoFingerprint } from "../skills/scribe.js";
11
12
  import { selectKey, ask, showPlan, makeProgressLog, isJWTAuthError } from "./cli.js";
13
+ import { renderPrompt } from "../prompts/load.js";
12
14
  export async function runPlanPhase(input) {
13
15
  const { objective, noTTY, flex, budget, concurrency, cwd, plannerModel, workerModel, fastModel, plannerProvider, workerProvider, fastProvider, usageCap, allowExtraUsage, extraUsageBudget, useWorktrees, mergeStrategy, agentTimeoutMs, runDir, designDir, previousKnowledge, envForModel, coachedOriginal, coachedAt, } = input;
16
+ const repoFingerprint = computeRepoFingerprint(cwd);
14
17
  let tasks = [];
15
18
  let thinkingHistory;
16
19
  let thinkingUsed = 0, thinkingCost = 0, thinkingIn = 0, thinkingOut = 0, thinkingTools = 0;
@@ -36,6 +39,7 @@ export async function runPlanPhase(input) {
36
39
  phase: "planning",
37
40
  startedAt: new Date().toISOString(),
38
41
  cwd,
42
+ repoFingerprint,
39
43
  });
40
44
  }
41
45
  catch { }
@@ -96,7 +100,14 @@ export async function runPlanPhase(input) {
96
100
  let answer = "";
97
101
  const plannerEnv = envForModel(plannerModel);
98
102
  for await (const msg of query({
99
- prompt: `You're planning work for: "${objective}"\n\nThemes identified:\n${themes.map((t, i) => `${i + 1}. ${t}`).join("\n")}\n\nUser question: ${question}`,
103
+ prompt: renderPrompt("60_runtime/60-3_plan-chat", {
104
+ variant: "THEMES",
105
+ vars: {
106
+ objective,
107
+ themesList: themes.map((t, i) => `${i + 1}. ${t}`).join("\n"),
108
+ question,
109
+ },
110
+ }),
100
111
  options: { cwd, model: plannerModel, permissionMode: "bypassPermissions", allowDangerouslySkipPermissions: true, persistSession: false, ...(plannerEnv && { env: plannerEnv }) },
101
112
  })) {
102
113
  if (msg.type === "result" && msg.subtype === "success")
@@ -169,6 +180,7 @@ export async function runPlanPhase(input) {
169
180
  phase: "planning",
170
181
  startedAt: new Date().toISOString(),
171
182
  cwd,
183
+ repoFingerprint,
172
184
  coachedObjective: coachedOriginal,
173
185
  coachedAt,
174
186
  });
@@ -204,7 +216,7 @@ export async function runPlanPhase(input) {
204
216
  const taskFile = join(runDir, "tasks.json");
205
217
  if (designs) {
206
218
  const orchBudget = Math.min(50, Math.max(concurrency, Math.ceil(((budget ?? 10) - thinkingUsed) * 0.5)));
207
- const flexNote = `This is wave 1 of an adaptive multi-wave run (total budget: ${(budget ?? 10) - thinkingUsed}). Plan the highest-impact foundational work first. Future waves will iterate based on what's learned.`;
219
+ const flexNote = renderPrompt("_shared/flex-note", { vars: { remainingBudget: (budget ?? 10) - thinkingUsed } });
208
220
  console.log(chalk.cyan(`\n ◆ Orchestrating plan...\n`));
209
221
  tasks = await orchestrate(objective, designs, cwd, plannerModel, workerModel, orchBudget, concurrency, makeProgressLog(), flexNote, taskFile);
210
222
  process.stdout.write(`\x1B[2K\r ${chalk.green(`✓ ${tasks.length} tasks`)}\n\n`);
@@ -218,7 +230,7 @@ export async function runPlanPhase(input) {
218
230
  }
219
231
  else {
220
232
  const waveBudget = flex ? Math.min(50, Math.max(concurrency, Math.ceil((budget ?? 10) * 0.5))) : budget;
221
- const flexNote = flex ? `This is wave 1 of an adaptive multi-wave run (total budget: ${budget}). Plan the highest-impact foundational work first. Future waves will iterate, polish, and expand based on what's learned.` : undefined;
233
+ const flexNote = flex ? renderPrompt("_shared/flex-note", { vars: { remainingBudget: budget ?? 10 } }) : undefined;
222
234
  console.log(chalk.cyan(`\n ◆ Planning${flex ? " wave 1" : ""}...\n`));
223
235
  tasks = await planTasks(objective, cwd, plannerModel, workerModel, waveBudget, concurrency, makeProgressLog(), flexNote);
224
236
  process.stdout.write(`\x1B[2K\r ${chalk.green(`✓ ${tasks.length} tasks`)}${flex ? chalk.dim(` · wave 1`) : ""}\n\n`);
@@ -256,7 +268,14 @@ export async function runPlanPhase(input) {
256
268
  let answer = "";
257
269
  const plannerEnv = envForModel(plannerModel);
258
270
  for await (const msg of query({
259
- prompt: `You planned these tasks for the objective "${objective}":\n${tasks.map((t, i) => `${i + 1}. ${t.prompt}`).join("\n")}\n\nUser question: ${question}`,
271
+ prompt: renderPrompt("60_runtime/60-3_plan-chat", {
272
+ variant: "TASKS",
273
+ vars: {
274
+ objective,
275
+ tasksList: tasks.map((t, i) => `${i + 1}. ${t.prompt}`).join("\n"),
276
+ question,
277
+ },
278
+ }),
260
279
  options: { cwd, model: plannerModel, permissionMode: "bypassPermissions", allowDangerouslySkipPermissions: true, persistSession: false, ...(plannerEnv && { env: plannerEnv }) },
261
280
  })) {
262
281
  if (msg.type === "result" && msg.subtype === "success")
@@ -8,6 +8,7 @@ import { setTranscriptRunDir } from "../core/transcripts.js";
8
8
  import { wrap } from "../ui/primitives.js";
9
9
  import { makeProgressLog, selectKey } from "./cli.js";
10
10
  import { editRunSettings } from "./settings.js";
11
+ import { renderPrompt } from "../prompts/load.js";
11
12
  export function countTasksInFile(path) {
12
13
  try {
13
14
  const parsed = JSON.parse(readFileSync(path, "utf-8"));
@@ -276,7 +277,7 @@ export async function detectResume(input) {
276
277
  else {
277
278
  const remainingBudget = Math.max(resumeState.concurrency, resumeState.budget - resumeState.accCompleted);
278
279
  const orchBudget = Math.min(50, Math.max(resumeState.concurrency, Math.ceil(remainingBudget * 0.5)));
279
- const flexNote = `This is wave 1 of an adaptive multi-wave run (total budget: ${remainingBudget}). Plan the highest-impact foundational work first. Future waves will iterate based on what's learned.`;
280
+ const flexNote = renderPrompt("_shared/flex-note", { vars: { remainingBudget } });
280
281
  console.log(chalk.cyan(`\n ◆ Re-orchestrating plan from existing designs...\n`));
281
282
  process.stdout.write("\x1B[?25l");
282
283
  // Route transcripts into the resumed run so this call's events
@@ -1 +1 @@
1
- export declare const VERSION = "1.25.48";
1
+ export declare const VERSION = "1.50.0";
@@ -1,2 +1,2 @@
1
1
  // Auto-generated by build — do not edit manually.
2
- export const VERSION = "1.25.48";
2
+ export const VERSION = "1.50.0";
@@ -0,0 +1,2 @@
1
+ /** SHA-256 of git remote URL (or realpath fallback), first 12 chars. */
2
+ export declare function computeRepoFingerprint(cwd: string): string;
@@ -0,0 +1,20 @@
1
+ import { createHash } from "node:crypto";
2
+ import { realpathSync } from "node:fs";
3
+ import { execSync } from "node:child_process";
4
+ /** SHA-256 of git remote URL (or realpath fallback), first 12 chars. */
5
+ export function computeRepoFingerprint(cwd) {
6
+ try {
7
+ const remote = execSync("git -C " + JSON.stringify(cwd) + " config --get remote.origin.url", {
8
+ stdio: ["ignore", "pipe", "ignore"],
9
+ }).toString().trim();
10
+ if (remote)
11
+ return createHash("sha256").update(remote).digest("hex").slice(0, 12);
12
+ }
13
+ catch { }
14
+ try {
15
+ return createHash("sha256").update(realpathSync(cwd)).digest("hex").slice(0, 12);
16
+ }
17
+ catch {
18
+ return "000000000000";
19
+ }
20
+ }
@@ -0,0 +1,71 @@
1
+ import { EventEmitter } from "events";
2
+ export type StallType = "thinking" | "action" | "betweenTool";
3
+ export interface StallThresholds {
4
+ thinking: number;
5
+ action: number;
6
+ betweenTool: number;
7
+ }
8
+ /** Minimal sink — StreamSink satisfies this; planner passes an inline stub. */
9
+ export interface StallGuardSink {
10
+ lastByteAt: number;
11
+ /** Empty string disables the transcript-bus subscription (planner mode). */
12
+ streamId: string;
13
+ finished: boolean;
14
+ path?: string;
15
+ }
16
+ export type FallbackProviderFn = (model: string | undefined) => Record<string, string> | undefined;
17
+ export declare class StallError extends Error {
18
+ readonly elapsedMs: number;
19
+ readonly thresholdMs: number;
20
+ readonly activityType: StallType;
21
+ readonly salvagedText?: string | undefined;
22
+ constructor(elapsedMs: number, thresholdMs: number, activityType: StallType, salvagedText?: string | undefined);
23
+ }
24
+ export declare function isStallError(err: unknown): err is StallError;
25
+ export declare class StallGuard extends EventEmitter {
26
+ readonly thresholds: StallThresholds;
27
+ private _sink;
28
+ private _abort;
29
+ private _interval?;
30
+ private _stopped;
31
+ private _unsub?;
32
+ private _salvagedText?;
33
+ private _assembledDelta;
34
+ private _lastActivityType;
35
+ constructor(sink: StallGuardSink, abortController: AbortController, thresholds?: Partial<StallThresholds>);
36
+ private _onEvent;
37
+ private _tick;
38
+ get salvagedText(): string | undefined;
39
+ stop(): void;
40
+ }
41
+ export declare class StallMonitor extends EventEmitter {
42
+ private static _instance?;
43
+ private _active;
44
+ private _peakActive;
45
+ private _outageActive;
46
+ private _fallbackProvider?;
47
+ static get instance(): StallMonitor;
48
+ static reset(): void;
49
+ setFallbackProvider(fn: FallbackProviderFn): void;
50
+ getFallbackEnv(model: string | undefined): Record<string, string> | undefined;
51
+ get outageActive(): boolean;
52
+ resetOutage(): void;
53
+ get activeCount(): number;
54
+ register(guard: StallGuard): void;
55
+ unregister(guard: StallGuard): void;
56
+ private _onStall;
57
+ requestRotation(guard?: StallGuard): void;
58
+ }
59
+ export declare function buildStallResumePrompt(salvaged: string | undefined, fallback?: string): string;
60
+ export interface RunWithStallRotationOpts {
61
+ run: (isResume: boolean, prompt: string, env: Record<string, string> | undefined) => Promise<void>;
62
+ initialPrompt: string;
63
+ initialIsResume: boolean;
64
+ initialEnv: Record<string, string> | undefined;
65
+ resolveFallbackEnv: () => Record<string, string> | undefined;
66
+ log: (text: string) => void;
67
+ defaultResumePrompt?: string;
68
+ isAborted?: () => boolean;
69
+ maxRetries?: number;
70
+ }
71
+ export declare function runWithStallRotation(opts: RunWithStallRotationOpts): Promise<void>;
@@ -0,0 +1,188 @@
1
+ import { EventEmitter } from "events";
2
+ import { onStreamEvent } from "./transcripts.js";
3
+ const DEFAULT_THRESHOLDS = {
4
+ thinking: 30_000,
5
+ action: 60_000,
6
+ betweenTool: 45_000,
7
+ };
8
+ const TICK_MS = 5_000;
9
+ const PROVIDER_WINDOW_MS = 15_000;
10
+ /** ~300 tokens @ 4 chars/token. Below this, salvage isn't worth a resume prompt. */
11
+ const MIN_SALVAGE_CHARS = 1200;
12
+ export class StallError extends Error {
13
+ elapsedMs;
14
+ thresholdMs;
15
+ activityType;
16
+ salvagedText;
17
+ constructor(elapsedMs, thresholdMs, activityType, salvagedText) {
18
+ super(`StallGuard: ${activityType} stalled after ${Math.round(elapsedMs / 1000)}s (threshold ${Math.round(thresholdMs / 1000)}s)`);
19
+ this.elapsedMs = elapsedMs;
20
+ this.thresholdMs = thresholdMs;
21
+ this.activityType = activityType;
22
+ this.salvagedText = salvagedText;
23
+ this.name = "StallError";
24
+ }
25
+ }
26
+ export function isStallError(err) {
27
+ return err instanceof StallError;
28
+ }
29
+ function classifyType(t) {
30
+ if (t === "user" || t === "tool_result")
31
+ return "betweenTool";
32
+ if (t === "tool_use" || t === "tool_call")
33
+ return "action";
34
+ return "thinking";
35
+ }
36
+ export class StallGuard extends EventEmitter {
37
+ thresholds;
38
+ _sink;
39
+ _abort;
40
+ _interval;
41
+ _stopped = false;
42
+ _unsub;
43
+ _salvagedText;
44
+ _assembledDelta = "";
45
+ _lastActivityType = "thinking";
46
+ constructor(sink, abortController, thresholds = {}) {
47
+ super();
48
+ this.thresholds = { ...DEFAULT_THRESHOLDS, ...thresholds };
49
+ this._sink = sink;
50
+ this._abort = abortController;
51
+ if (sink.streamId) {
52
+ this._unsub = onStreamEvent(sink.streamId, (evt) => this._onEvent(evt));
53
+ }
54
+ this._interval = setInterval(() => this._tick(), TICK_MS);
55
+ StallMonitor.instance.register(this);
56
+ }
57
+ _onEvent(evt) {
58
+ this._lastActivityType = classifyType(evt.type);
59
+ const payload = evt.payload;
60
+ if (payload?.type === "assistant" && typeof payload.delta === "string") {
61
+ this._assembledDelta += payload.delta;
62
+ }
63
+ }
64
+ _tick() {
65
+ if (this._stopped || this._sink.finished)
66
+ return;
67
+ const kind = this._lastActivityType;
68
+ const threshold = this.thresholds[kind];
69
+ const elapsed = Date.now() - this._sink.lastByteAt;
70
+ if (elapsed < threshold)
71
+ return;
72
+ if (this._assembledDelta.length >= MIN_SALVAGE_CHARS)
73
+ this._salvagedText = this._assembledDelta;
74
+ const error = new StallError(elapsed, threshold, kind, this._salvagedText);
75
+ this.emit("stall", error);
76
+ this._abort.abort(error);
77
+ this.stop();
78
+ }
79
+ get salvagedText() { return this._salvagedText; }
80
+ stop() {
81
+ if (this._stopped)
82
+ return;
83
+ this._stopped = true;
84
+ if (this._interval)
85
+ clearInterval(this._interval);
86
+ this._unsub?.();
87
+ StallMonitor.instance.unregister(this);
88
+ }
89
+ }
90
+ export class StallMonitor extends EventEmitter {
91
+ static _instance;
92
+ _active = new Map();
93
+ _peakActive = 0;
94
+ _outageActive = false;
95
+ _fallbackProvider;
96
+ static get instance() {
97
+ if (!StallMonitor._instance)
98
+ StallMonitor._instance = new StallMonitor();
99
+ return StallMonitor._instance;
100
+ }
101
+ static reset() { StallMonitor._instance = undefined; }
102
+ setFallbackProvider(fn) { this._fallbackProvider = fn; }
103
+ getFallbackEnv(model) {
104
+ return this._fallbackProvider?.(model);
105
+ }
106
+ get outageActive() { return this._outageActive; }
107
+ resetOutage() { this._outageActive = false; }
108
+ get activeCount() { return this._active.size; }
109
+ register(guard) {
110
+ guard.on("stall", () => this._onStall(guard));
111
+ this._active.set(guard, 0);
112
+ if (this._active.size > this._peakActive)
113
+ this._peakActive = this._active.size;
114
+ }
115
+ unregister(guard) { this._active.delete(guard); }
116
+ _onStall(guard) {
117
+ this._active.set(guard, Date.now());
118
+ if (this._outageActive || this._peakActive === 0)
119
+ return;
120
+ const now = Date.now();
121
+ let stalled = 0;
122
+ for (const t of this._active.values())
123
+ if (t > 0 && now - t <= PROVIDER_WINDOW_MS)
124
+ stalled++;
125
+ if (stalled >= Math.ceil(this._peakActive / 2)) {
126
+ this._outageActive = true;
127
+ this.emit("provider_outage", { count: stalled, total: this._peakActive });
128
+ this.emit("rotate_provider", { reason: "outage", stallCount: stalled, totalGuards: this._peakActive });
129
+ }
130
+ }
131
+ requestRotation(guard) {
132
+ const stalled = guard && this._active.get(guard) ? 1 : 0;
133
+ this.emit("rotate_provider", { reason: "exhausted", stallCount: stalled, totalGuards: this._peakActive });
134
+ }
135
+ }
136
+ export function buildStallResumePrompt(salvaged, fallback = "Continue. Complete the task.") {
137
+ return salvaged
138
+ ? `Continue from where you left off. Here is what was already written:\n\n${salvaged.slice(0, 4000)}`
139
+ : fallback;
140
+ }
141
+ export async function runWithStallRotation(opts) {
142
+ let prompt = opts.initialPrompt;
143
+ let isResume = opts.initialIsResume;
144
+ let env = opts.initialEnv;
145
+ let attempt = 0;
146
+ const maxRetries = opts.maxRetries ?? 2;
147
+ const monitor = StallMonitor.instance;
148
+ while (true) {
149
+ try {
150
+ await opts.run(isResume, prompt, env);
151
+ return;
152
+ }
153
+ catch (err) {
154
+ if (!isStallError(err) || opts.isAborted?.())
155
+ throw err;
156
+ const stall = err;
157
+ const elapsedS = Math.round(stall.elapsedMs / 1000);
158
+ if (monitor.outageActive) {
159
+ env = opts.resolveFallbackEnv();
160
+ opts.log(`Provider outage — rotating to fallback (stall at ${elapsedS}s)`);
161
+ prompt = buildStallResumePrompt(stall.salvagedText, opts.defaultResumePrompt);
162
+ isResume = true;
163
+ continue;
164
+ }
165
+ if (attempt < maxRetries) {
166
+ attempt++;
167
+ const backoffMs = Math.min(30_000, 2000 * 4 ** (attempt - 1)) * (0.5 + Math.random());
168
+ opts.log(`Stall at ${elapsedS}s — retry ${attempt}/${maxRetries} in ${Math.round(backoffMs / 1000)}s`);
169
+ await new Promise(r => setTimeout(r, backoffMs));
170
+ prompt = buildStallResumePrompt(stall.salvagedText, opts.defaultResumePrompt);
171
+ isResume = true;
172
+ monitor.emit("retry", { attempt, maxAttempts: maxRetries, elapsed: stall.elapsedMs });
173
+ continue;
174
+ }
175
+ const fallback = opts.resolveFallbackEnv();
176
+ if (fallback && env !== fallback) {
177
+ monitor.requestRotation();
178
+ env = fallback;
179
+ opts.log(`Stall retries exhausted — rotating to fallback provider`);
180
+ prompt = buildStallResumePrompt(stall.salvagedText, opts.defaultResumePrompt);
181
+ isResume = true;
182
+ attempt = 0;
183
+ continue;
184
+ }
185
+ throw err;
186
+ }
187
+ }
188
+ }
@@ -1,5 +1,30 @@
1
1
  export declare function setTranscriptRunDir(dir: string | undefined): void;
2
2
  export declare function getTranscriptRunDir(): string | undefined;
3
3
  export declare function transcriptPath(name: string): string | undefined;
4
- /** Append a single event; log to stderr once per name on failure (C5). */
5
4
  export declare function writeTranscriptEvent(name: string, event: Record<string, unknown>): void;
5
+ export interface TranscriptEvent {
6
+ t: number;
7
+ type: string;
8
+ payload: Record<string, unknown>;
9
+ meta?: {
10
+ streamId?: string;
11
+ agentId?: number;
12
+ };
13
+ }
14
+ type StreamListener = (evt: TranscriptEvent) => void;
15
+ export declare function onStreamEvent(streamId: string, fn: StreamListener): () => void;
16
+ export declare class StreamSink {
17
+ readonly streamId: string;
18
+ readonly agentId?: number;
19
+ lastByteAt: number;
20
+ eventCount: number;
21
+ finished: boolean;
22
+ private _path;
23
+ constructor(streamId: string, agentId?: number);
24
+ append(msg: {
25
+ type: string;
26
+ } & Record<string, unknown>): void;
27
+ markFinished(): void;
28
+ get path(): string | undefined;
29
+ }
30
+ export {};
@@ -1,19 +1,9 @@
1
1
  import { appendFileSync, mkdirSync } from "fs";
2
2
  import { dirname, join } from "path";
3
3
  /**
4
- * Crash-safe NDJSON transcripts for planner/steering queries.
5
- *
6
- * Each query writes to `<runDir>/transcripts/<name>.ndjson` -- one JSON object
7
- * per line, so partial writes survive crashes. Multiple invocations of the same
8
- * name append with a `session_start` marker separating them.
9
- *
10
- * Why NDJSON:
11
- * - append-only → no read-modify-write race under parallel waves
12
- * - one line per event → `tail -f` works; a killed process never leaves
13
- * the file in an unparseable state
14
- * - machine-readable → this assistant and future tools can `jq` through it
15
- *
16
- * Consumed by: planner-query.ts (stream_event, rate_limit_event, result, error).
4
+ * Crash-safe NDJSON transcripts. One JSON object per line survives partial writes.
5
+ * Planner/steering live at `<runDir>/transcripts/<name>.ndjson`; per-stream agent
6
+ * transcripts at `<runDir>/transcripts/streams/<streamId>.ndjson`.
17
7
  */
18
8
  let _runDir;
19
9
  export function setTranscriptRunDir(dir) {
@@ -25,9 +15,7 @@ export function getTranscriptRunDir() {
25
15
  export function transcriptPath(name) {
26
16
  return _runDir ? join(_runDir, "transcripts", `${name}.ndjson`) : undefined;
27
17
  }
28
- /** Names that already errored — guard against repeated stderr spam. */
29
18
  const _seenErrors = new Set();
30
- /** Append a single event; log to stderr once per name on failure (C5). */
31
19
  export function writeTranscriptEvent(name, event) {
32
20
  const path = transcriptPath(name);
33
21
  if (!path)
@@ -44,3 +32,75 @@ export function writeTranscriptEvent(name, event) {
44
32
  }
45
33
  }
46
34
  }
35
+ // Keyed subscription: avoids O(N) fanout when many guards each filter for one stream.
36
+ const _listeners = new Map();
37
+ export function onStreamEvent(streamId, fn) {
38
+ let set = _listeners.get(streamId);
39
+ if (!set) {
40
+ set = new Set();
41
+ _listeners.set(streamId, set);
42
+ }
43
+ set.add(fn);
44
+ return () => {
45
+ const s = _listeners.get(streamId);
46
+ if (!s)
47
+ return;
48
+ s.delete(fn);
49
+ if (s.size === 0)
50
+ _listeners.delete(streamId);
51
+ };
52
+ }
53
+ function dispatchStreamEvent(streamId, evt) {
54
+ const set = _listeners.get(streamId);
55
+ if (!set)
56
+ return;
57
+ for (const fn of set) {
58
+ try {
59
+ fn(evt);
60
+ }
61
+ catch { /* listener errors must not break the sink */ }
62
+ }
63
+ }
64
+ export class StreamSink {
65
+ streamId;
66
+ agentId;
67
+ lastByteAt;
68
+ eventCount = 0;
69
+ finished = false;
70
+ _path;
71
+ constructor(streamId, agentId) {
72
+ this.streamId = streamId;
73
+ this.agentId = agentId;
74
+ this.lastByteAt = Date.now();
75
+ if (_runDir) {
76
+ this._path = join(_runDir, "transcripts", "streams", `${streamId}.ndjson`);
77
+ mkdirSync(dirname(this._path), { recursive: true });
78
+ }
79
+ }
80
+ append(msg) {
81
+ const evt = {
82
+ t: Date.now(),
83
+ type: msg.type,
84
+ payload: msg,
85
+ meta: { streamId: this.streamId, agentId: this.agentId },
86
+ };
87
+ this.lastByteAt = evt.t;
88
+ this.eventCount++;
89
+ if (this._path) {
90
+ try {
91
+ appendFileSync(this._path, JSON.stringify(evt) + "\n");
92
+ }
93
+ catch (err) {
94
+ const msg = err instanceof Error ? err.message : String(err);
95
+ process.stderr.write(`[StreamSink] append("${this.streamId}") failed: ${msg}\n`);
96
+ }
97
+ }
98
+ dispatchStreamEvent(this.streamId, evt);
99
+ }
100
+ markFinished() {
101
+ this.finished = true;
102
+ }
103
+ get path() {
104
+ return this._path;
105
+ }
106
+ }
@@ -18,6 +18,12 @@ export interface Task {
18
18
  type?: string;
19
19
  /** Shell command that must exit 0 for the task to be considered done. Runs in cwd after merge. Failed postconditions trigger the same retry path as filesChanged=0. */
20
20
  postcondition?: string;
21
+ /** Groups sibling tasks for A/B pairing — tasks with the same groupId are considered comparable. */
22
+ groupId?: string;
23
+ /** A/B arm assignment: "treatment" includes the test skill, "control" explicitly excludes it. */
24
+ abArm?: "treatment" | "control";
25
+ /** Skill name to exclude from L0 stub for control arm agents. */
26
+ abExcludeSkill?: string;
21
27
  }
22
28
  /** Schema for a JSON task file that defines a batch of work for the swarm. */
23
29
  export interface TaskFile {
@@ -289,6 +295,8 @@ export interface RunState extends RunConfigBase {
289
295
  startedAt: string;
290
296
  /** Working directory for the run. */
291
297
  cwd: string;
298
+ /** 12-char repo fingerprint for skill storage. */
299
+ repoFingerprint: string;
292
300
  /** Original raw objective the user typed, before the setup coach rewrote it. Optional — only present when the coach ran and was accepted. */
293
301
  coachedObjective?: string;
294
302
  /** Unix timestamp (ms) when the coach produced the accepted rewrite. */
@@ -10,6 +10,7 @@ import { envFor, isCursorProxyProvider, ensureCursorProxyRunning, PROXY_DEFAULT_
10
10
  import { COACH_SCHEMA, validateCoachOutput } from "./schema.js";
11
11
  import { URL_REGEX, fetchUrlContent, collectRepoFacts, renderRepoFacts } from "./context.js";
12
12
  import { loadUserSettings, saveUserSettings } from "./settings.js";
13
+ import { renderPrompt } from "../../prompts/load.js";
13
14
  export { loadUserSettings, saveUserSettings } from "./settings.js";
14
15
  export { validateCoachOutput, } from "./schema.js";
15
16
  export const COACH_MODEL = "claude-haiku-4-5";
@@ -59,7 +60,7 @@ export async function runSetupCoach(rawObjective, cwd, ctx) {
59
60
  }
60
61
  }
61
62
  const userMessage = renderRepoFacts(facts, rawObjective, ctx.providers, ctx.cliFlags, planContent);
62
- const prompt = `${skill}\n\n---\n\n${userMessage}\n\nRespond with the JSON object defined in "Invocation contract" only.`;
63
+ const prompt = renderPrompt("00_setup/00-2_coach-wrapper", { variant: "WRAP", vars: { skill, userMessage } });
63
64
  // cursor "auto" maps to a slow thinking-class model for large prompts (182s observed).
64
65
  // composer-2-fast gives the same quality for structured JSON at ~8s.
65
66
  const CURSOR_FAST_MODEL = "composer-2-fast";
@@ -142,7 +143,9 @@ export async function runSetupCoach(rawObjective, cwd, ctx) {
142
143
  const amend = (await ask(`\n ${chalk.cyan(">")} what would you change? `)).trim();
143
144
  if (!amend)
144
145
  return null;
145
- const amendedPrompt = `${prompt}\n\n---\n\nUser amendment (apply and return a revised JSON object):\n${amend}`;
146
+ const amendedPrompt = renderPrompt("00_setup/00-2_coach-wrapper", {
147
+ variant: "AMEND", vars: { previousPrompt: prompt, amendment: amend },
148
+ });
146
149
  const amendTurn = createTurn("coach", "Coach (amended)", "coach-amend-0", model);
147
150
  beginTurn(amendTurn);
148
151
  try {
@@ -1,6 +1,5 @@
1
1
  import type { Task } from "../core/types.js";
2
2
  export declare function salvageFromFile(outFile: string | undefined, budget: number | undefined, onLog: (text: string, kind?: "status" | "event") => void, why: string): Task[] | null;
3
- export declare const DESIGN_THINKING = "\nHOW TO THINK ABOUT EVERY TASK:\n\nStart from the user's job. What is someone hiring this product to do? \"I need to send money abroad cheaply\" -- not \"I need a currency conversion API.\" Every decision -- what to build, how fast it needs to respond, what happens on error -- flows from the job.\n\nThe experience IS the product. A 200ms server response is not a \"performance metric\" -- it's the difference between an app that feels alive and one that feels broken. A loading state is not \"polish\" -- it's the user knowing the app heard them. An error message is not \"error handling\" -- it's the app being honest. There is no line between backend and UX. The server, the API, the database query, the render -- they're all one experience the user either trusts or doesn't.\n\nBuild the core, verify it works, learn, iterate. Don't plan 20 features and build them all. Build the ONE thing that matters most, run it, see if it actually works from a user's chair. What you learn from seeing it run will change what you build next. Each wave should make what exists better before adding what doesn't exist yet.\n\nConsistency is what makes complex things feel simple. One design system, rigid rules, no exceptions. This is how Revolut ships a super-app with 30+ features that doesn't feel like chaos.\n";
4
3
  export declare function planTasks(objective: string, cwd: string, plannerModel: string, workerModel: string, budget: number | undefined, concurrency: number, onLog: (text: string) => void, flexNote?: string, outFile?: string, transcriptName?: string): Promise<Task[]>;
5
4
  export declare function identifyThemes(objective: string, count: number, cwd: string, model: string, onLog?: (text: string) => void, transcriptName?: string): Promise<string[]>;
6
5
  export declare function buildThinkingTasks(objective: string, themes: string[], designDir: string, plannerModel: string, previousKnowledge?: string): Task[];