smithers-orchestrator 0.8.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,6 +11,7 @@
11
11
  * Re-renders the workflow after each step
12
12
  * Resumes exactly where it left off after crashes
13
13
  * Supports subscriptions
14
+ * Hot-reloads workflow code on file save (prompts, config, components) without restarting
14
15
 
15
16
  There is no hidden in-memory state. Every task result is stored as:
16
17
 
@@ -226,6 +227,36 @@ smithers list workflow.tsx
226
227
  smithers approve workflow.tsx --run-id abc123 --node-id review
227
228
  ```
228
229
 
230
+ ## Hot Module Replacement
231
+
232
+ Edit your workflow files while a run is executing. Smithers watches your source tree and hot-reloads changes on save — prompts, config, agent settings, and component structure — without restarting the process or losing run state.
233
+
234
+ ```bash
235
+ smithers run workflow.tsx --hot
236
+ ```
237
+
238
+ In-flight tasks continue with their original code. Only newly scheduled tasks pick up the changes.
239
+
240
+ ```
241
+ [00:05:12] ⟳ File change detected: 1 file(s)
242
+ [00:05:12] ⟳ Workflow reloaded (generation 1)
243
+ [00:05:13] → implement-cat-12 (attempt 1, iteration 0)
244
+ ```
245
+
246
+ **What you can change live:**
247
+
248
+ * Prompt strings and `.md`/`.mdx` prompt files
249
+ * Focus lists, config values, concurrency settings
250
+ * Agent models, timeouts, system prompts
251
+ * JSX tree structure (add/remove/reorder tasks)
252
+
253
+ **What requires a restart:**
254
+
255
+ * Output schema changes (Zod shapes)
256
+ * Database path changes
257
+
258
+ See the [Hot Reload Guide](/guides/hot-reload) for details.
259
+
229
260
  ---
230
261
 
231
262
  ## Built-in Tools
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smithers-orchestrator",
3
- "version": "0.8.4",
3
+ "version": "0.9.0",
4
4
  "description": "AI workflow orchestration with JSX",
5
5
  "author": "William Cory",
6
6
  "license": "MIT",
package/src/RunOptions.ts CHANGED
@@ -1,5 +1,18 @@
1
1
  import type { SmithersEvent } from "./SmithersEvent";
2
2
 
3
+ export type HotReloadOptions = {
4
+ /** Root directory to watch for changes (default: auto-detect from workflow entry) */
5
+ rootDir?: string;
6
+ /** Directory for generation overlays (default: .smithers/hmr/<runId>) */
7
+ outDir?: string;
8
+ /** Max overlay generations to keep (default: 3) */
9
+ maxGenerations?: number;
10
+ /** Whether to cancel tasks that become unmounted after hot reload (default: false) */
11
+ cancelUnmounted?: boolean;
12
+ /** Debounce interval in ms for file change events (default: 100) */
13
+ debounceMs?: number;
14
+ };
15
+
3
16
  export type RunOptions = {
4
17
  runId?: string;
5
18
  input: Record<string, unknown>;
@@ -13,4 +26,5 @@ export type RunOptions = {
13
26
  allowNetwork?: boolean;
14
27
  maxOutputBytes?: number;
15
28
  toolTimeoutMs?: number;
29
+ hot?: boolean | HotReloadOptions;
16
30
  };
@@ -152,4 +152,31 @@ export type SmithersEvent =
152
152
  success: boolean;
153
153
  error?: string;
154
154
  timestampMs: number;
155
+ }
156
+ | {
157
+ type: "WorkflowReloadDetected";
158
+ runId: string;
159
+ changedFiles: string[];
160
+ timestampMs: number;
161
+ }
162
+ | {
163
+ type: "WorkflowReloaded";
164
+ runId: string;
165
+ generation: number;
166
+ changedFiles: string[];
167
+ timestampMs: number;
168
+ }
169
+ | {
170
+ type: "WorkflowReloadFailed";
171
+ runId: string;
172
+ error: unknown;
173
+ changedFiles: string[];
174
+ timestampMs: number;
175
+ }
176
+ | {
177
+ type: "WorkflowReloadUnsafe";
178
+ runId: string;
179
+ reason: string;
180
+ changedFiles: string[];
181
+ timestampMs: number;
155
182
  };
package/src/cli/index.ts CHANGED
@@ -112,6 +112,7 @@ Run options:
112
112
  --no-log Disable event log file output
113
113
  --allow-network Allow network access for bash tool
114
114
  --max-output-bytes N Max tool output bytes (default: 200000)
115
+ --hot Enable hot module replacement (watch & reload workflow on file changes)
115
116
  --tool-timeout-ms N Tool timeout in ms (default: 60000)
116
117
  --version, -v Print version
117
118
  `);
@@ -130,8 +131,32 @@ Run options:
130
131
  process.exit(4);
131
132
  }
132
133
  const resolvedWorkflowPath = resolve(process.cwd(), workflowPath);
134
+ if (args.hot) {
135
+ process.env.SMITHERS_HOT = "1";
136
+ }
133
137
  const workflow = await loadWorkflow(workflowPath);
134
138
  ensureSmithersTables(workflow.db as any);
139
+ if (args.hot) {
140
+ process.stderr.write(`[hot] Hot reload enabled\n`);
141
+ }
142
+
143
+ // Register cleanup to close the SQLite connection before process exit.
144
+ // This prevents libsqlite3 fatal "unfinalized statement" errors that
145
+ // crash the process when bun's GC tries to close the Database while
146
+ // Drizzle's prepared statements are still alive.
147
+ const closeSqlite = () => {
148
+ try {
149
+ const client: any = (workflow.db as any)?.$client;
150
+ if (client && typeof client.close === "function") {
151
+ client.close();
152
+ }
153
+ } catch {
154
+ // Best-effort — ignore errors during cleanup
155
+ }
156
+ };
157
+ process.on("exit", closeSqlite);
158
+ process.on("SIGINT", () => { closeSqlite(); process.exit(130); });
159
+ process.on("SIGTERM", () => { closeSqlite(); process.exit(143); });
135
160
  const input = args.input ? parseJsonOrExit(args.input, "input") : {};
136
161
  const runId = args["run-id"];
137
162
  const resume = cmd === "resume" || Boolean(args.resume);
@@ -227,6 +252,26 @@ Run options:
227
252
  case "FrameCommitted":
228
253
  // Don't print frame commits - too noisy
229
254
  break;
255
+ case "WorkflowReloadDetected":
256
+ process.stderr.write(
257
+ `[${ts}] ⟳ File change detected: ${(event as any).changedFiles?.length ?? 0} file(s)\n`,
258
+ );
259
+ break;
260
+ case "WorkflowReloaded":
261
+ process.stderr.write(
262
+ `[${ts}] ⟳ Workflow reloaded (generation ${(event as any).generation})\n`,
263
+ );
264
+ break;
265
+ case "WorkflowReloadFailed":
266
+ process.stderr.write(
267
+ `[${ts}] ⚠ Workflow reload failed: ${typeof (event as any).error === "string" ? (event as any).error : ((event as any).error?.message ?? "unknown")}\n`,
268
+ );
269
+ break;
270
+ case "WorkflowReloadUnsafe":
271
+ process.stderr.write(
272
+ `[${ts}] ⚠ Workflow reload blocked: ${(event as any).reason}\n`,
273
+ );
274
+ break;
230
275
  }
231
276
  };
232
277
  const result = await runWorkflow(workflow, {
@@ -240,6 +285,7 @@ Run options:
240
285
  allowNetwork: Boolean(args["allow-network"]),
241
286
  maxOutputBytes,
242
287
  toolTimeoutMs,
288
+ hot: Boolean(args["hot"]),
243
289
  onProgress,
244
290
  });
245
291
  console.log(JSON.stringify(result, null, 2));
package/src/create.ts CHANGED
@@ -13,8 +13,28 @@ import type { WorkflowProps, TaskProps } from "./components";
13
13
  import { zodToTable } from "./zodToTable";
14
14
  import { zodToCreateTableSQL } from "./zodToCreateTableSQL";
15
15
  import { camelToSnake } from "./camelToSnake";
16
+ import { resolve } from "node:path";
16
17
  import type { z } from "zod";
17
18
 
19
+ type HotCacheEntry = {
20
+ api: CreateSmithersApi<any>;
21
+ schemaSig: string;
22
+ };
23
+ const hotCache = new Map<string, HotCacheEntry>();
24
+
25
+ function computeSchemaSig(
26
+ schemas: Record<string, any>,
27
+ dbPath: string,
28
+ ): string {
29
+ const parts: string[] = [dbPath];
30
+ for (const name of Object.keys(schemas).sort()) {
31
+ const tableName = camelToSnake(name);
32
+ const ddl = zodToCreateTableSQL(tableName, schemas[name]);
33
+ parts.push(`${name}:${ddl}`);
34
+ }
35
+ return parts.join("\n");
36
+ }
37
+
18
38
  export type CreateSmithersApi<Schema = any> = {
19
39
  Workflow: (props: WorkflowProps) => React.ReactElement;
20
40
  Task: <Row>(props: TaskProps<Row>) => React.ReactElement;
@@ -51,6 +71,23 @@ export function createSmithers<
51
71
  schemas: Schemas,
52
72
  opts?: { dbPath?: string; journalMode?: string },
53
73
  ): CreateSmithersApi<Schemas> {
74
+ const dbPath = opts?.dbPath ?? "./smithers.db";
75
+ const absDbPath = resolve(process.cwd(), dbPath);
76
+
77
+ if (process.env.SMITHERS_HOT === "1") {
78
+ const sig = computeSchemaSig(schemas as Record<string, any>, absDbPath);
79
+ const cached = hotCache.get(absDbPath);
80
+ if (cached) {
81
+ if (cached.schemaSig !== sig) {
82
+ throw new Error(
83
+ "[smithers hot] Schema change detected; restart required to apply schema changes.",
84
+ );
85
+ }
86
+ return cached.api as any;
87
+ }
88
+ // Will cache after creating the API below
89
+ }
90
+
54
91
  // 1. Generate Drizzle tables from Zod schemas
55
92
  const tables: Record<string, any> = {};
56
93
  const inputTable = sqliteTable("input", {
@@ -64,11 +101,23 @@ export function createSmithers<
64
101
  }
65
102
 
66
103
  // 2. Create SQLite db
67
- const dbPath = opts?.dbPath ?? "./smithers.db";
68
104
  const sqlite = new Database(dbPath);
69
105
  sqlite.exec(`PRAGMA journal_mode = ${opts?.journalMode ?? "WAL"}`);
106
+ sqlite.exec("PRAGMA busy_timeout = 5000");
70
107
  sqlite.exec("PRAGMA foreign_keys = ON");
71
108
 
109
+ // Register a process-exit hook to explicitly close the Database.
110
+ // bun:sqlite's GC finalizer calls sqlite3_close() which fatally aborts if
111
+ // Drizzle's cached prepared statements haven't been finalized first.
112
+ // Calling close() ourselves lets sqlite3 finalize everything gracefully.
113
+ let dbClosed = false;
114
+ const closeDb = () => {
115
+ if (dbClosed) return;
116
+ dbClosed = true;
117
+ try { sqlite.close(); } catch {}
118
+ };
119
+ process.on("exit", closeDb);
120
+
72
121
  // 3. Auto-create tables using CREATE TABLE IF NOT EXISTS
73
122
  sqlite.exec(
74
123
  `CREATE TABLE IF NOT EXISTS "input" (run_id TEXT PRIMARY KEY, payload TEXT)`,
@@ -146,13 +195,20 @@ export function createSmithers<
146
195
  } as SmithersWorkflow<any>;
147
196
  }
148
197
 
149
- return {
198
+ const api = {
150
199
  Workflow,
151
200
  Task,
152
201
  useCtx,
153
202
  smithers: boundSmithers,
154
203
  db,
155
- tables: tables as { [K in keyof Schemas]: any },
156
- outputs: schemas as { [K in keyof Schemas]: Schemas[K] },
204
+ tables: tables as any,
205
+ outputs: schemas as any,
157
206
  };
207
+
208
+ if (process.env.SMITHERS_HOT === "1") {
209
+ const sig = computeSchemaSig(schemas as Record<string, any>, absDbPath);
210
+ hotCache.set(absDbPath, { api: api as any, schemaSig: sig });
211
+ }
212
+
213
+ return api;
158
214
  }
package/src/db/adapter.ts CHANGED
@@ -155,6 +155,13 @@ export class SmithersDb {
155
155
  );
156
156
  }
157
157
 
158
+ async listAllInProgressAttempts() {
159
+ return this.db
160
+ .select()
161
+ .from(smithersAttempts)
162
+ .where(eq(smithersAttempts.state, "in-progress"));
163
+ }
164
+
158
165
  async insertFrame(row: any) {
159
166
  await this.db
160
167
  .insert(smithersFrames)
@@ -39,6 +39,8 @@ import { eq, getTableName } from "drizzle-orm";
39
39
  import { getTableColumns } from "drizzle-orm/utils";
40
40
  import { dirname, resolve } from "node:path";
41
41
  import { existsSync } from "node:fs";
42
+ import { HotWorkflowController } from "../hot/HotWorkflowController";
43
+ import type { HotReloadOptions } from "../RunOptions";
42
44
  import { spawn as nodeSpawn } from "node:child_process";
43
45
  import { platform } from "node:os";
44
46
 
@@ -123,15 +125,23 @@ async function ensureWorktree(
123
125
  worktreePath: string,
124
126
  branch?: string,
125
127
  ): Promise<void> {
126
- if (createdWorktrees.has(worktreePath)) {
127
- if (existsSync(worktreePath)) return;
128
- // Process-global cache can become stale if the path is later deleted.
129
- createdWorktrees.delete(worktreePath);
130
- }
131
128
  if (existsSync(worktreePath)) {
129
+ // Worktree exists — rebase onto latest main so work starts from tip.
130
+ const vcs = findVcsRoot(rootDir);
131
+ if (vcs?.type === "jj") {
132
+ const { runJj } = await import("../vcs/jj");
133
+ await runJj(["git", "fetch"], { cwd: worktreePath });
134
+ await runJj(["rebase", "-d", "main"], { cwd: worktreePath });
135
+ } else if (vcs?.type === "git") {
136
+ await runGitCommand(worktreePath, ["fetch", "origin"]);
137
+ await runGitCommand(worktreePath, ["rebase", "origin/main"]);
138
+ }
132
139
  createdWorktrees.add(worktreePath);
133
140
  return;
134
141
  }
142
+ if (createdWorktrees.has(worktreePath)) {
143
+ createdWorktrees.delete(worktreePath);
144
+ }
135
145
 
136
146
  // Walk up from rootDir to find the actual VCS root
137
147
  const vcs = findVcsRoot(rootDir);
@@ -296,6 +306,12 @@ function resolveLogDir(
296
306
  return resolve(rootDir, ".smithers", "executions", runId, "logs");
297
307
  }
298
308
 
309
+ function normalizeHotOptions(hot: boolean | HotReloadOptions | undefined): HotReloadOptions & { enabled: boolean } {
310
+ if (!hot) return { enabled: false };
311
+ if (hot === true) return { enabled: true };
312
+ return { enabled: true, ...hot };
313
+ }
314
+
299
315
  function assertInputObject(input: unknown) {
300
316
  if (!input || typeof input !== "object" || Array.isArray(input)) {
301
317
  throw new SmithersError("INVALID_INPUT", "Run input must be a JSON object");
@@ -722,6 +738,7 @@ async function executeTask(
722
738
  workflowName: string,
723
739
  cacheEnabled: boolean,
724
740
  signal?: AbortSignal,
741
+ disabledAgents?: Set<any>,
725
742
  ) {
726
743
  const attempts = await adapter.listAttempts(
727
744
  runId,
@@ -777,6 +794,7 @@ async function executeTask(
777
794
  let cacheKey: string | null = null;
778
795
  let cacheJjBase: string | null = null;
779
796
  let responseText: string | null = null;
797
+ let effectiveAgent: any = null;
780
798
  // Resolve effective root once so both caching and execution share it.
781
799
  const taskRoot = desc.worktreePath ?? toolConfig.rootDir;
782
800
 
@@ -823,8 +841,11 @@ async function executeTask(
823
841
  }
824
842
 
825
843
  if (!payload) {
826
- const agents = Array.isArray(desc.agent) ? desc.agent : (desc.agent ? [desc.agent] : []);
827
- const effectiveAgent = agents[Math.min(attemptNo - 1, agents.length - 1)];
844
+ const allAgents = Array.isArray(desc.agent) ? desc.agent : (desc.agent ? [desc.agent] : []);
845
+ const agents = disabledAgents ? allAgents.filter((a: any) => !disabledAgents.has(a)) : allAgents;
846
+ effectiveAgent = agents.length > 0
847
+ ? agents[Math.min(attemptNo - 1, agents.length - 1)]
848
+ : allAgents[Math.min(attemptNo - 1, allAgents.length - 1)]; // fallback to disabled agent if all disabled
828
849
  if (effectiveAgent) {
829
850
  // Use fallback agent on retry attempts when available
830
851
  const result = await runWithToolContext(
@@ -1365,6 +1386,17 @@ async function executeTask(
1365
1386
  label: desc.label ?? null,
1366
1387
  });
1367
1388
 
1389
+ // Circuit-breaker: disable agents that fail with auth errors
1390
+ if (disabledAgents && effectiveAgent) {
1391
+ const errStr = String((err as any)?.message ?? err ?? "") + (responseText ?? "");
1392
+ const isAuthError = /invalid_authentication|401|api.key.*invalid|expired.*credentials|authentication.*failed/i.test(errStr);
1393
+ if (isAuthError) {
1394
+ disabledAgents.add(effectiveAgent);
1395
+ const agentName = effectiveAgent?.model ?? effectiveAgent?.id ?? "unknown";
1396
+ console.log(`[smithers] Circuit-breaker: disabled agent ${agentName} due to auth failure`);
1397
+ }
1398
+ }
1399
+
1368
1400
  await eventBus.emitEventWithPersist({
1369
1401
  type: "NodeFailed",
1370
1402
  runId,
@@ -1464,6 +1496,11 @@ export async function runWorkflow<Schema>(
1464
1496
  eventBus.on("event", (e: SmithersEvent) => opts.onProgress?.(e));
1465
1497
  }
1466
1498
 
1499
+ const hotOpts = normalizeHotOptions(opts.hot);
1500
+ let hotController: HotWorkflowController | null = null;
1501
+ let hotPendingFiles: string[] | null = null;
1502
+ let workflowRef = workflow;
1503
+
1467
1504
  const wakeLock = acquireCaffeinate();
1468
1505
  try {
1469
1506
  const existingRun = await adapter.getRun(runId);
@@ -1540,6 +1577,25 @@ export async function runWorkflow<Schema>(
1540
1577
 
1541
1578
  await cancelStaleAttempts(adapter, runId);
1542
1579
 
1580
+ // Cancel orphaned in-progress attempts from previous runs (killed processes)
1581
+ {
1582
+ const allInProgress = await adapter.listAllInProgressAttempts();
1583
+ const now = nowMs();
1584
+ for (const attempt of allInProgress) {
1585
+ if (attempt.runId === runId) continue;
1586
+ await adapter.updateAttempt(
1587
+ attempt.runId,
1588
+ attempt.nodeId,
1589
+ attempt.iteration,
1590
+ attempt.attempt,
1591
+ {
1592
+ state: "cancelled",
1593
+ finishedAtMs: now,
1594
+ },
1595
+ );
1596
+ }
1597
+ }
1598
+
1543
1599
  if (opts.resume) {
1544
1600
  // On resume, cancel ALL in-progress attempts since the previous process is dead
1545
1601
  const staleInProgress = await adapter.listInProgressAttempts(runId);
@@ -1568,9 +1624,13 @@ export async function runWorkflow<Schema>(
1568
1624
  }
1569
1625
  }
1570
1626
 
1627
+ const disabledAgents = new Set<any>();
1571
1628
  const renderer = new SmithersRenderer();
1572
1629
  let frameNo = (await adapter.getLastFrame(runId))?.frameNo ?? 0;
1573
1630
  let defaultIteration = 0;
1631
+ // Track in-flight task promises across loop iterations so we
1632
+ // wait for them before declaring the run finished.
1633
+ const inflight = new Set<Promise<void>>();
1574
1634
  if (opts.resume) {
1575
1635
  const nodes = await adapter.listNodes(runId);
1576
1636
  const maxIteration = nodes.reduce(
@@ -1583,6 +1643,15 @@ export async function runWorkflow<Schema>(
1583
1643
  await adapter.listRalph(runId),
1584
1644
  );
1585
1645
 
1646
+ if (hotOpts.enabled && (resolvedWorkflowPath ?? opts.workflowPath)) {
1647
+ process.env.SMITHERS_HOT = "1";
1648
+ hotController = new HotWorkflowController(
1649
+ resolvedWorkflowPath ?? opts.workflowPath!,
1650
+ hotOpts,
1651
+ );
1652
+ await hotController.init();
1653
+ }
1654
+
1586
1655
  while (true) {
1587
1656
  if (opts.signal?.aborted) {
1588
1657
  await adapter.updateRun(runId, {
@@ -1597,6 +1666,64 @@ export async function runWorkflow<Schema>(
1597
1666
  return { runId, status: "cancelled" };
1598
1667
  }
1599
1668
 
1669
+ // Process pending hot reload
1670
+ if (hotController && hotPendingFiles) {
1671
+ const result = await hotController.reload(hotPendingFiles);
1672
+ hotPendingFiles = null;
1673
+
1674
+ switch (result.type) {
1675
+ case "reloaded":
1676
+ workflowRef = { ...workflowRef, build: result.newBuild };
1677
+ await eventBus.emitEventWithPersist({
1678
+ type: "WorkflowReloaded",
1679
+ runId,
1680
+ generation: result.generation,
1681
+ changedFiles: result.changedFiles,
1682
+ timestampMs: nowMs(),
1683
+ });
1684
+ opts.onProgress?.({
1685
+ type: "WorkflowReloaded",
1686
+ runId,
1687
+ generation: result.generation,
1688
+ changedFiles: result.changedFiles,
1689
+ timestampMs: nowMs(),
1690
+ });
1691
+ break;
1692
+ case "failed":
1693
+ await eventBus.emitEventWithPersist({
1694
+ type: "WorkflowReloadFailed",
1695
+ runId,
1696
+ error: result.error instanceof Error ? result.error.message : String(result.error),
1697
+ changedFiles: result.changedFiles,
1698
+ timestampMs: nowMs(),
1699
+ });
1700
+ opts.onProgress?.({
1701
+ type: "WorkflowReloadFailed",
1702
+ runId,
1703
+ error: result.error instanceof Error ? result.error.message : String(result.error),
1704
+ changedFiles: result.changedFiles,
1705
+ timestampMs: nowMs(),
1706
+ });
1707
+ break;
1708
+ case "unsafe":
1709
+ await eventBus.emitEventWithPersist({
1710
+ type: "WorkflowReloadUnsafe",
1711
+ runId,
1712
+ reason: result.reason,
1713
+ changedFiles: result.changedFiles,
1714
+ timestampMs: nowMs(),
1715
+ });
1716
+ opts.onProgress?.({
1717
+ type: "WorkflowReloadUnsafe",
1718
+ runId,
1719
+ reason: result.reason,
1720
+ changedFiles: result.changedFiles,
1721
+ timestampMs: nowMs(),
1722
+ });
1723
+ break;
1724
+ }
1725
+ }
1726
+
1600
1727
  const inputRow = await loadInput(db, inputTable, runId);
1601
1728
  const outputs = await loadOutputs(db, schema, runId);
1602
1729
  const ralphIterations = ralphIterationsFromState(ralphState);
@@ -1611,7 +1738,7 @@ export async function runWorkflow<Schema>(
1611
1738
  });
1612
1739
 
1613
1740
  const { xml, tasks, mountedTaskIds } = await renderer.render(
1614
- workflow.build(ctx),
1741
+ workflowRef.build(ctx),
1615
1742
  {
1616
1743
  ralphIterations,
1617
1744
  defaultIteration,
@@ -1662,6 +1789,7 @@ export async function runWorkflow<Schema>(
1662
1789
  const inProgress = await adapter.listInProgressAttempts(runId);
1663
1790
  const mountedSet = new Set(mountedTaskIds);
1664
1791
  if (
1792
+ !hotOpts.enabled &&
1665
1793
  inProgress.some(
1666
1794
  (a: any) => !mountedSet.has(`${a.nodeId}::${a.iteration ?? 0}`),
1667
1795
  )
@@ -1711,6 +1839,25 @@ export async function runWorkflow<Schema>(
1711
1839
  );
1712
1840
 
1713
1841
  if (runnable.length === 0) {
1842
+ // If tasks are still in-flight, wait for one to finish then
1843
+ // loop back to re-evaluate instead of declaring the run done.
1844
+ if (inflight.size > 0) {
1845
+ {
1846
+ const waitables: Promise<any>[] = [...inflight];
1847
+ if (hotController) {
1848
+ waitables.push(
1849
+ hotController.wait().then((files) => {
1850
+ hotPendingFiles = files;
1851
+ }),
1852
+ );
1853
+ }
1854
+ if (waitables.length > 0) {
1855
+ await Promise.race(waitables);
1856
+ }
1857
+ }
1858
+ continue;
1859
+ }
1860
+
1714
1861
  if (schedule.waitingApprovalExists) {
1715
1862
  await adapter.updateRun(runId, { status: "waiting-approval" });
1716
1863
  await eventBus.emitEventWithPersist({
@@ -1834,21 +1981,37 @@ export async function runWorkflow<Schema>(
1834
1981
  toolTimeoutMs,
1835
1982
  };
1836
1983
 
1837
- await Promise.all(
1838
- runnable.map((task) =>
1839
- executeTask(
1840
- adapter,
1841
- db,
1842
- runId,
1843
- task,
1844
- eventBus,
1845
- toolConfig,
1846
- workflowName,
1847
- cacheEnabled,
1848
- opts.signal,
1849
- ),
1850
- ),
1851
- );
1984
+ // Launch new tasks and track them in the persistent inflight set.
1985
+ for (const task of runnable) {
1986
+ const p = executeTask(
1987
+ adapter,
1988
+ db,
1989
+ runId,
1990
+ task,
1991
+ eventBus,
1992
+ toolConfig,
1993
+ workflowName,
1994
+ cacheEnabled,
1995
+ opts.signal,
1996
+ disabledAgents,
1997
+ ).finally(() => inflight.delete(p));
1998
+ inflight.add(p);
1999
+ }
2000
+ // Wait for at least one task to finish, then loop back to
2001
+ // re-render and schedule newly runnable tasks.
2002
+ {
2003
+ const waitables: Promise<any>[] = [...inflight];
2004
+ if (hotController) {
2005
+ waitables.push(
2006
+ hotController.wait().then((files) => {
2007
+ hotPendingFiles = files;
2008
+ }),
2009
+ );
2010
+ }
2011
+ if (waitables.length > 0) {
2012
+ await Promise.race(waitables);
2013
+ }
2014
+ }
1852
2015
  }
1853
2016
  } catch (err) {
1854
2017
  if (process.env.SMITHERS_DEBUG) {
@@ -1868,6 +2031,7 @@ export async function runWorkflow<Schema>(
1868
2031
  });
1869
2032
  return { runId, status: "failed", error: errorInfo };
1870
2033
  } finally {
2034
+ await hotController?.close();
1871
2035
  wakeLock.release();
1872
2036
  }
1873
2037
  }
@@ -0,0 +1,142 @@
1
+ import { resolve, dirname } from "node:path";
2
+ import { mkdir, rm } from "node:fs/promises";
3
+ import { pathToFileURL } from "node:url";
4
+ import { WatchTree } from "./watch";
5
+ import { buildOverlay, cleanupGenerations, resolveOverlayEntry } from "./overlay";
6
+ import type { SmithersWorkflow } from "../SmithersWorkflow";
7
+ import type { HotReloadOptions } from "../RunOptions";
8
+
9
+ export type HotReloadEvent =
10
+ | { type: "reloaded"; generation: number; changedFiles: string[]; newBuild: SmithersWorkflow<any>["build"] }
11
+ | { type: "failed"; generation: number; changedFiles: string[]; error: unknown }
12
+ | { type: "unsafe"; generation: number; changedFiles: string[]; reason: string };
13
+
14
+ const DEFAULT_MAX_GENERATIONS = 3;
15
+ const DEFAULT_DEBOUNCE_MS = 100;
16
+
17
+ export class HotWorkflowController {
18
+ private entryPath: string;
19
+ private hotRoot: string;
20
+ private outDir: string;
21
+ private maxGenerations: number;
22
+ private watcher: WatchTree;
23
+ private generation = 0;
24
+ private closed = false;
25
+
26
+ constructor(entryPath: string, opts?: HotReloadOptions) {
27
+ this.entryPath = resolve(entryPath);
28
+ this.hotRoot = opts?.rootDir
29
+ ? resolve(opts.rootDir)
30
+ : dirname(this.entryPath);
31
+ this.outDir = opts?.outDir
32
+ ? resolve(opts.outDir)
33
+ : resolve(this.hotRoot, ".smithers", "hmr");
34
+ this.maxGenerations = opts?.maxGenerations ?? DEFAULT_MAX_GENERATIONS;
35
+ this.watcher = new WatchTree(this.hotRoot, {
36
+ debounceMs: opts?.debounceMs ?? DEFAULT_DEBOUNCE_MS,
37
+ });
38
+ }
39
+
40
+ /** Initialize: start file watchers. Call once before using wait/reload. */
41
+ async init(): Promise<void> {
42
+ await mkdir(this.outDir, { recursive: true });
43
+ await this.watcher.start();
44
+ }
45
+
46
+ /** Current generation number. */
47
+ get gen(): number {
48
+ return this.generation;
49
+ }
50
+
51
+ /**
52
+ * Wait for the next file change event.
53
+ * Returns the list of changed file paths.
54
+ * Use this in Promise.race with inflight tasks to wake the engine loop.
55
+ */
56
+ async wait(): Promise<string[]> {
57
+ return this.watcher.wait();
58
+ }
59
+
60
+ /**
61
+ * Perform a hot reload:
62
+ * 1. Build a new generation overlay
63
+ * 2. Import the workflow module from the overlay
64
+ * 3. Validate the module
65
+ * 4. Return the result (reloaded, failed, or unsafe)
66
+ *
67
+ * The caller is responsible for swapping workflow.build on success.
68
+ */
69
+ async reload(changedFiles: string[]): Promise<HotReloadEvent> {
70
+ this.generation += 1;
71
+ const gen = this.generation;
72
+
73
+ try {
74
+ // 1. Build overlay
75
+ const genDir = await buildOverlay(this.hotRoot, this.outDir, gen);
76
+
77
+ // 2. Resolve entry path in overlay
78
+ const overlayEntry = resolveOverlayEntry(this.entryPath, this.hotRoot, genDir);
79
+ const overlayUrl = pathToFileURL(overlayEntry).href;
80
+
81
+ // 3. Import fresh module
82
+ let mod: any;
83
+ try {
84
+ mod = await import(overlayUrl);
85
+ } catch (err: any) {
86
+ return { type: "failed", generation: gen, changedFiles, error: err };
87
+ }
88
+
89
+ // 4. Validate module shape
90
+ const workflow = mod.default as SmithersWorkflow<any> | undefined;
91
+ if (!workflow) {
92
+ return {
93
+ type: "failed",
94
+ generation: gen,
95
+ changedFiles,
96
+ error: new Error("Reloaded module does not export default"),
97
+ };
98
+ }
99
+ if (typeof workflow.build !== "function") {
100
+ return {
101
+ type: "failed",
102
+ generation: gen,
103
+ changedFiles,
104
+ error: new Error("Reloaded module default does not have a build function"),
105
+ };
106
+ }
107
+
108
+ // 5. Cleanup old generations
109
+ await cleanupGenerations(this.outDir, this.maxGenerations);
110
+
111
+ // 6. Return success with the new build function
112
+ return {
113
+ type: "reloaded",
114
+ generation: gen,
115
+ changedFiles,
116
+ newBuild: workflow.build,
117
+ };
118
+ } catch (err: any) {
119
+ if (err?.message?.includes("Schema change detected")) {
120
+ return {
121
+ type: "unsafe",
122
+ generation: gen,
123
+ changedFiles,
124
+ reason: err.message,
125
+ };
126
+ }
127
+ return { type: "failed", generation: gen, changedFiles, error: err };
128
+ }
129
+ }
130
+
131
+ /** Stop watchers and clean up overlay directory. */
132
+ async close(): Promise<void> {
133
+ if (this.closed) return;
134
+ this.closed = true;
135
+ this.watcher.close();
136
+ try {
137
+ await rm(this.outDir, { recursive: true, force: true });
138
+ } catch {
139
+ // Ignore cleanup errors
140
+ }
141
+ }
142
+ }
@@ -0,0 +1,6 @@
1
+ export { WatchTree } from "./watch";
2
+ export type { WatchTreeOptions } from "./watch";
3
+ export { buildOverlay, cleanupGenerations, resolveOverlayEntry } from "./overlay";
4
+ export type { OverlayOptions } from "./overlay";
5
+ export { HotWorkflowController } from "./HotWorkflowController";
6
+ export type { HotReloadEvent } from "./HotWorkflowController";
@@ -0,0 +1,113 @@
1
+ import { readdir, mkdir, link, copyFile, rm, stat } from "node:fs/promises";
2
+ import { resolve, relative, join, dirname } from "node:path";
3
+ import { existsSync } from "node:fs";
4
+
5
+ const DEFAULT_EXCLUDE = [
6
+ "node_modules",
7
+ ".git",
8
+ ".jj",
9
+ ".smithers",
10
+ ".DS_Store",
11
+ ];
12
+
13
+ export type OverlayOptions = {
14
+ /** Directory basenames to exclude from overlay */
15
+ exclude?: string[];
16
+ };
17
+
18
+ /**
19
+ * Build a generation overlay by hardlinking (or copying) the hot root
20
+ * tree into a new generation directory.
21
+ *
22
+ * Returns the absolute path to the overlay directory.
23
+ */
24
+ export async function buildOverlay(
25
+ hotRoot: string,
26
+ outDir: string,
27
+ generation: number,
28
+ opts?: OverlayOptions,
29
+ ): Promise<string> {
30
+ const exclude = new Set(opts?.exclude ?? DEFAULT_EXCLUDE);
31
+ const genDir = join(outDir, `gen-${generation}`);
32
+ await mkdir(genDir, { recursive: true });
33
+ await mirrorTree(hotRoot, genDir, exclude);
34
+ return genDir;
35
+ }
36
+
37
+ /**
38
+ * Recursively mirror `src` into `dest`, using hardlinks where possible
39
+ * and falling back to copy. Skips excluded directory basenames.
40
+ */
41
+ async function mirrorTree(
42
+ src: string,
43
+ dest: string,
44
+ exclude: Set<string>,
45
+ ): Promise<void> {
46
+ const entries = await readdir(src, { withFileTypes: true });
47
+
48
+ for (const entry of entries) {
49
+ if (exclude.has(entry.name)) continue;
50
+ // Skip hidden files/dirs (dotfiles)
51
+ if (entry.name.startsWith(".")) continue;
52
+
53
+ const srcPath = join(src, entry.name);
54
+ const destPath = join(dest, entry.name);
55
+
56
+ if (entry.isDirectory()) {
57
+ await mkdir(destPath, { recursive: true });
58
+ await mirrorTree(srcPath, destPath, exclude);
59
+ } else if (entry.isFile()) {
60
+ try {
61
+ await link(srcPath, destPath);
62
+ } catch {
63
+ // Hardlink failed (cross-device, permissions, etc.) — fall back to copy
64
+ await mkdir(dirname(destPath), { recursive: true });
65
+ await copyFile(srcPath, destPath);
66
+ }
67
+ }
68
+ // Skip symlinks, sockets, etc.
69
+ }
70
+ }
71
+
72
+ /**
73
+ * Remove old generation directories, keeping only the last `keepLast`.
74
+ */
75
+ export async function cleanupGenerations(
76
+ outDir: string,
77
+ keepLast: number,
78
+ ): Promise<void> {
79
+ if (!existsSync(outDir)) return;
80
+
81
+ const entries = await readdir(outDir, { withFileTypes: true });
82
+ const genDirs = entries
83
+ .filter((e) => e.isDirectory() && e.name.startsWith("gen-"))
84
+ .map((e) => {
85
+ const num = parseInt(e.name.slice(4), 10);
86
+ return { name: e.name, num: isNaN(num) ? -1 : num };
87
+ })
88
+ .filter((e) => e.num >= 0)
89
+ .sort((a, b) => a.num - b.num);
90
+
91
+ // Keep only the last `keepLast` generations
92
+ const toRemove = genDirs.slice(0, Math.max(0, genDirs.length - keepLast));
93
+ for (const dir of toRemove) {
94
+ try {
95
+ await rm(join(outDir, dir.name), { recursive: true, force: true });
96
+ } catch {
97
+ // Best effort
98
+ }
99
+ }
100
+ }
101
+
102
+ /**
103
+ * Resolve the overlay entry path given the original entry path,
104
+ * the hot root, and the overlay generation directory.
105
+ */
106
+ export function resolveOverlayEntry(
107
+ entryPath: string,
108
+ hotRoot: string,
109
+ genDir: string,
110
+ ): string {
111
+ const rel = relative(hotRoot, entryPath);
112
+ return resolve(genDir, rel);
113
+ }
@@ -0,0 +1,126 @@
1
+ import { watch, type FSWatcher } from "node:fs";
2
+ import { readdir, stat } from "node:fs/promises";
3
+ import { resolve, relative } from "node:path";
4
+
5
+ const DEFAULT_IGNORE = [
6
+ "node_modules",
7
+ ".git",
8
+ ".jj",
9
+ ".smithers",
10
+ ];
11
+
12
+ export type WatchTreeOptions = {
13
+ /** Patterns to ignore (directory basenames) */
14
+ ignore?: string[];
15
+ /** Debounce interval in ms (default: 100) */
16
+ debounceMs?: number;
17
+ };
18
+
19
+ export class WatchTree {
20
+ private watchers: FSWatcher[] = [];
21
+ private rootDir: string;
22
+ private ignore: string[];
23
+ private debounceMs: number;
24
+ private changedFiles = new Set<string>();
25
+ private debounceTimer: ReturnType<typeof setTimeout> | null = null;
26
+ private waitResolve: ((files: string[]) => void) | null = null;
27
+ private closed = false;
28
+
29
+ constructor(rootDir: string, opts?: WatchTreeOptions) {
30
+ this.rootDir = resolve(rootDir);
31
+ this.ignore = opts?.ignore ?? DEFAULT_IGNORE;
32
+ this.debounceMs = opts?.debounceMs ?? 100;
33
+ }
34
+
35
+ /** Start watching. Call once. */
36
+ async start(): Promise<void> {
37
+ await this.watchDir(this.rootDir);
38
+ }
39
+
40
+ /**
41
+ * Returns a promise that resolves with changed file paths
42
+ * the next time file changes are detected (after debounce).
43
+ * Can be called repeatedly.
44
+ */
45
+ wait(): Promise<string[]> {
46
+ // If there are already buffered changes, resolve immediately
47
+ if (this.changedFiles.size > 0) {
48
+ const files = [...this.changedFiles];
49
+ this.changedFiles.clear();
50
+ return Promise.resolve(files);
51
+ }
52
+ return new Promise<string[]>((resolve) => {
53
+ this.waitResolve = resolve;
54
+ });
55
+ }
56
+
57
+ /** Stop all watchers and clean up. */
58
+ close(): void {
59
+ this.closed = true;
60
+ if (this.debounceTimer) clearTimeout(this.debounceTimer);
61
+ for (const w of this.watchers) {
62
+ try { w.close(); } catch {}
63
+ }
64
+ this.watchers = [];
65
+ // Resolve any pending wait with empty array
66
+ if (this.waitResolve) {
67
+ this.waitResolve([]);
68
+ this.waitResolve = null;
69
+ }
70
+ }
71
+
72
+ private shouldIgnore(name: string): boolean {
73
+ return this.ignore.includes(name) || name.startsWith(".");
74
+ }
75
+
76
+ private async watchDir(dir: string): Promise<void> {
77
+ if (this.closed) return;
78
+
79
+ const baseName = dir.split("/").pop() ?? "";
80
+ if (baseName && this.shouldIgnore(baseName) && dir !== this.rootDir) return;
81
+
82
+ try {
83
+ const watcher = watch(dir, (eventType, filename) => {
84
+ if (!filename || this.closed) return;
85
+ // Ignore hidden files and ignored dirs
86
+ const parts = filename.split("/");
87
+ if (parts.some((p) => this.shouldIgnore(p))) return;
88
+
89
+ const fullPath = resolve(dir, filename);
90
+ this.onFileChange(fullPath);
91
+ });
92
+ this.watchers.push(watcher);
93
+
94
+ // Recursively watch subdirectories
95
+ const entries = await readdir(dir, { withFileTypes: true });
96
+ for (const entry of entries) {
97
+ if (entry.isDirectory() && !this.shouldIgnore(entry.name)) {
98
+ await this.watchDir(resolve(dir, entry.name));
99
+ }
100
+ }
101
+ } catch {
102
+ // Directory may have been deleted; ignore
103
+ }
104
+ }
105
+
106
+ private onFileChange(filePath: string): void {
107
+ this.changedFiles.add(filePath);
108
+
109
+ // Debounce: reset timer on each change
110
+ if (this.debounceTimer) clearTimeout(this.debounceTimer);
111
+ this.debounceTimer = setTimeout(() => {
112
+ this.flush();
113
+ }, this.debounceMs);
114
+ }
115
+
116
+ private flush(): void {
117
+ if (this.changedFiles.size === 0) return;
118
+ const files = [...this.changedFiles];
119
+ this.changedFiles.clear();
120
+
121
+ if (this.waitResolve) {
122
+ this.waitResolve(files);
123
+ this.waitResolve = null;
124
+ }
125
+ }
126
+ }