@wrongstack/core 0.1.10 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/{agent-bridge-6KPqsFx6.d.ts → agent-bridge-C3DUGjSb.d.ts} +1 -1
  2. package/dist/{compactor-B4mQZXf2.d.ts → compactor-BUU6Zm_3.d.ts} +1 -1
  3. package/dist/{config-BU9f_5yH.d.ts → config-CKLYPkCi.d.ts} +1 -1
  4. package/dist/{context-BmM2xGUZ.d.ts → context-IovtuTf8.d.ts} +10 -0
  5. package/dist/coordination/index.d.ts +211 -13
  6. package/dist/coordination/index.js +964 -67
  7. package/dist/coordination/index.js.map +1 -1
  8. package/dist/defaults/index.d.ts +33 -18
  9. package/dist/defaults/index.js +1273 -42
  10. package/dist/defaults/index.js.map +1 -1
  11. package/dist/{events-BMNaEFZl.d.ts → events-CNB9PALO.d.ts} +99 -1
  12. package/dist/execution/index.d.ts +12 -12
  13. package/dist/extension/index.d.ts +9 -0
  14. package/dist/extension/index.js +234 -0
  15. package/dist/extension/index.js.map +1 -0
  16. package/dist/index-BDb0cAMP.d.ts +806 -0
  17. package/dist/index.d.ts +112 -29
  18. package/dist/index.js +2036 -490
  19. package/dist/index.js.map +1 -1
  20. package/dist/infrastructure/index.d.ts +6 -6
  21. package/dist/kernel/index.d.ts +12 -9
  22. package/dist/kernel/index.js +73 -7
  23. package/dist/kernel/index.js.map +1 -1
  24. package/dist/{mcp-servers-Dzgg4x1w.d.ts → mcp-servers-DR35ojJZ.d.ts} +3 -3
  25. package/dist/models/index.d.ts +2 -2
  26. package/dist/models/index.js +24 -1
  27. package/dist/models/index.js.map +1 -1
  28. package/dist/{multi-agent-fmkRHtof.d.ts → multi-agent-B9a6sflH.d.ts} +71 -3
  29. package/dist/observability/index.d.ts +2 -2
  30. package/dist/{path-resolver-DBjaoXFq.d.ts → path-resolver-Cl_q0u-R.d.ts} +2 -2
  31. package/dist/provider-runner-BXuADQqQ.d.ts +36 -0
  32. package/dist/sdd/index.d.ts +3 -3
  33. package/dist/{secret-scrubber-CicHLN4G.d.ts → secret-scrubber-CgG2tV2B.d.ts} +1 -1
  34. package/dist/{secret-scrubber-DF88luOe.d.ts → secret-scrubber-Cuy5afaQ.d.ts} +1 -1
  35. package/dist/security/index.d.ts +20 -4
  36. package/dist/security/index.js +37 -2
  37. package/dist/security/index.js.map +1 -1
  38. package/dist/{selector-BbJqiEP4.d.ts → selector-wT2fv9Fg.d.ts} +1 -1
  39. package/dist/{session-reader-Drq8RvJu.d.ts → session-reader-CcPi4BQ8.d.ts} +1 -1
  40. package/dist/{skill-DhfSizKv.d.ts → skill-C_7znCIC.d.ts} +2 -2
  41. package/dist/storage/index.d.ts +164 -6
  42. package/dist/storage/index.js +297 -2
  43. package/dist/storage/index.js.map +1 -1
  44. package/dist/{renderer-rk_1Swwc.d.ts → system-prompt-Dk1qm8ey.d.ts} +30 -2
  45. package/dist/{tool-executor-CpuJPYm9.d.ts → tool-executor-DKu4A6nB.d.ts} +5 -5
  46. package/dist/types/index.d.ts +16 -16
  47. package/dist/types/index.js +24 -1
  48. package/dist/types/index.js.map +1 -1
  49. package/dist/utils/index.d.ts +1 -1
  50. package/dist/utils/index.js +24 -1
  51. package/dist/utils/index.js.map +1 -1
  52. package/package.json +5 -1
  53. package/dist/plugin-DJk6LL8B.d.ts +0 -434
  54. package/dist/system-prompt-BC_8ypCG.d.ts +0 -23
@@ -1,9 +1,175 @@
1
1
  import { randomUUID, randomBytes } from 'crypto';
2
- import * as fsp2 from 'fs/promises';
3
- import * as path3 from 'path';
2
+ import * as fsp4 from 'fs/promises';
3
+ import * as path4 from 'path';
4
4
  import { EventEmitter } from 'events';
5
5
 
6
6
  // src/coordination/director.ts
7
+ async function atomicWrite(targetPath, content, opts = {}) {
8
+ const dir = path4.dirname(targetPath);
9
+ await fsp4.mkdir(dir, { recursive: true });
10
+ const tmp = path4.join(dir, `.${path4.basename(targetPath)}.${randomBytes(6).toString("hex")}.tmp`);
11
+ try {
12
+ if (typeof content === "string") {
13
+ await fsp4.writeFile(tmp, content, { flag: "wx", encoding: opts.encoding ?? "utf8" });
14
+ } else {
15
+ await fsp4.writeFile(tmp, content, { flag: "wx" });
16
+ }
17
+ try {
18
+ const fh = await fsp4.open(tmp, "r+");
19
+ try {
20
+ await fh.sync();
21
+ } finally {
22
+ await fh.close();
23
+ }
24
+ } catch {
25
+ }
26
+ let mode;
27
+ try {
28
+ const stat3 = await fsp4.stat(targetPath);
29
+ mode = stat3.mode & 511;
30
+ } catch {
31
+ mode = opts.mode;
32
+ }
33
+ if (mode !== void 0) {
34
+ await fsp4.chmod(tmp, mode);
35
+ }
36
+ await renameWithRetry(tmp, targetPath);
37
+ } catch (err) {
38
+ try {
39
+ await fsp4.unlink(tmp);
40
+ } catch {
41
+ }
42
+ throw err;
43
+ }
44
+ }
45
+ async function ensureDir(dir) {
46
+ await fsp4.mkdir(dir, { recursive: true });
47
+ }
48
+ var TRANSIENT_RENAME_CODES = /* @__PURE__ */ new Set(["EPERM", "EBUSY", "EACCES", "ENOTEMPTY"]);
49
+ async function renameWithRetry(from, to) {
50
+ if (process.platform !== "win32") {
51
+ await fsp4.rename(from, to);
52
+ return;
53
+ }
54
+ const delays = [10, 25, 60, 120, 250];
55
+ let lastErr;
56
+ for (let i = 0; i <= delays.length; i++) {
57
+ try {
58
+ await fsp4.rename(from, to);
59
+ return;
60
+ } catch (err) {
61
+ lastErr = err;
62
+ const code = err?.code;
63
+ if (!code || !TRANSIENT_RENAME_CODES.has(code) || i === delays.length) {
64
+ throw err;
65
+ }
66
+ await new Promise((resolve) => setTimeout(resolve, delays[i]));
67
+ }
68
+ }
69
+ throw lastErr;
70
+ }
71
+
72
+ // src/storage/director-state.ts
73
+ var DirectorStateCheckpoint = class {
74
+ snapshot;
75
+ filePath;
76
+ timer = null;
77
+ debounceMs;
78
+ writing = false;
79
+ rewriteRequested = false;
80
+ constructor(filePath, init, debounceMs = 250) {
81
+ this.filePath = filePath;
82
+ this.debounceMs = debounceMs;
83
+ this.snapshot = {
84
+ version: 1,
85
+ directorRunId: init.directorRunId,
86
+ updatedAt: (/* @__PURE__ */ new Date()).toISOString(),
87
+ spawnCount: 0,
88
+ maxSpawns: init.maxSpawns,
89
+ spawnDepth: init.spawnDepth,
90
+ maxSpawnDepth: init.maxSpawnDepth,
91
+ subagents: [],
92
+ tasks: []
93
+ };
94
+ }
95
+ current() {
96
+ return this.snapshot;
97
+ }
98
+ recordSpawn(sub, spawnCount) {
99
+ this.snapshot = {
100
+ ...this.snapshot,
101
+ spawnCount,
102
+ subagents: [...this.snapshot.subagents.filter((s) => s.id !== sub.id), sub]
103
+ };
104
+ this.bumpUpdatedAt();
105
+ this.schedule();
106
+ }
107
+ recordTaskAssigned(task) {
108
+ const exists = this.snapshot.tasks.some((t) => t.taskId === task.taskId);
109
+ this.snapshot = {
110
+ ...this.snapshot,
111
+ tasks: exists ? this.snapshot.tasks.map((t) => t.taskId === task.taskId ? { ...t, ...task } : t) : [...this.snapshot.tasks, task]
112
+ };
113
+ this.bumpUpdatedAt();
114
+ this.schedule();
115
+ }
116
+ recordTaskStatus(taskId, patch) {
117
+ this.snapshot = {
118
+ ...this.snapshot,
119
+ tasks: this.snapshot.tasks.map(
120
+ (t) => t.taskId === taskId ? { ...t, ...patch } : t
121
+ )
122
+ };
123
+ this.bumpUpdatedAt();
124
+ this.schedule();
125
+ }
126
+ setUsage(usage) {
127
+ this.snapshot = { ...this.snapshot, usage };
128
+ this.bumpUpdatedAt();
129
+ this.schedule();
130
+ }
131
+ /** Force a synchronous flush — used by Director.shutdown(). */
132
+ async flush() {
133
+ if (this.timer) {
134
+ clearTimeout(this.timer);
135
+ this.timer = null;
136
+ }
137
+ await this.persist();
138
+ }
139
+ bumpUpdatedAt() {
140
+ this.snapshot = { ...this.snapshot, updatedAt: (/* @__PURE__ */ new Date()).toISOString() };
141
+ }
142
+ schedule() {
143
+ if (this.timer) return;
144
+ this.timer = setTimeout(() => {
145
+ this.timer = null;
146
+ void this.persist();
147
+ }, this.debounceMs);
148
+ }
149
+ async persist() {
150
+ if (this.writing) {
151
+ this.rewriteRequested = true;
152
+ return;
153
+ }
154
+ this.writing = true;
155
+ try {
156
+ await atomicWrite(this.filePath, JSON.stringify(this.snapshot, null, 2), {
157
+ mode: 384
158
+ });
159
+ } catch (err) {
160
+ console.warn(
161
+ "[director-state] checkpoint write failed:",
162
+ err instanceof Error ? err.message : String(err)
163
+ );
164
+ } finally {
165
+ this.writing = false;
166
+ if (this.rewriteRequested) {
167
+ this.rewriteRequested = false;
168
+ this.schedule();
169
+ }
170
+ }
171
+ }
172
+ };
7
173
 
8
174
  // src/coordination/in-memory-transport.ts
9
175
  var InMemoryBridgeTransport = class {
@@ -260,6 +426,10 @@ var FleetBus = class {
260
426
  "iteration.started",
261
427
  "iteration.completed",
262
428
  "provider.text_delta",
429
+ // Subagent extended-thinking output. Forwarded so the FleetPanel /
430
+ // /fleet log can surface "the planner is thinking…" instead of a
431
+ // silent gap between iteration.started and the first text_delta.
432
+ "provider.thinking_delta",
263
433
  "provider.response",
264
434
  "provider.retry",
265
435
  "provider.error",
@@ -416,6 +586,108 @@ var FleetUsageAggregator = class {
416
586
  }
417
587
  };
418
588
 
589
+ // src/types/errors.ts
590
+ var WrongStackError = class extends Error {
591
+ code;
592
+ subsystem;
593
+ severity;
594
+ recoverable;
595
+ context;
596
+ constructor(opts) {
597
+ super(opts.message, { cause: opts.cause });
598
+ this.name = "WrongStackError";
599
+ this.code = opts.code;
600
+ this.subsystem = opts.subsystem;
601
+ this.severity = opts.severity ?? "error";
602
+ this.recoverable = opts.recoverable ?? false;
603
+ this.context = opts.context;
604
+ }
605
+ /**
606
+ * Render a one-line user-facing description.
607
+ * Subclasses should override for domain-specific formatting.
608
+ */
609
+ describe() {
610
+ const ctx = this.context ? ` ${formatContext(this.context)}` : "";
611
+ return `${this.code}: ${this.message}${ctx}`;
612
+ }
613
+ };
614
+ function formatContext(ctx) {
615
+ const parts = Object.entries(ctx).filter(([, v]) => v !== void 0).slice(0, 3).map(([k, v]) => `${k}=${String(v)}`);
616
+ return parts.length > 0 ? `[${parts.join(" ")}]` : "";
617
+ }
618
+
619
+ // src/types/provider.ts
620
+ var ProviderError = class extends WrongStackError {
621
+ status;
622
+ retryable;
623
+ providerId;
624
+ body;
625
+ constructor(message, status, retryable, providerId, opts = {}) {
626
+ super({
627
+ message,
628
+ code: providerStatusToCode(status, opts.body?.type),
629
+ subsystem: "provider",
630
+ severity: status >= 500 ? "error" : "warning",
631
+ recoverable: retryable,
632
+ context: { providerId, status },
633
+ cause: opts.cause
634
+ });
635
+ this.name = "ProviderError";
636
+ this.status = status;
637
+ this.retryable = retryable;
638
+ this.providerId = providerId;
639
+ this.body = opts.body;
640
+ }
641
+ /**
642
+ * Render a one-line, user-facing description. Designed for the CLI/TUI
643
+ * status line and the agent's retry warning. Avoids dumping raw JSON
644
+ * (which is what users see today when a 529 lands and the log message
645
+ * includes the full `{"type":"error",...}` body).
646
+ *
647
+ * Examples:
648
+ * "minimax-coding-plan overloaded (529): High traffic detected. Upgrade for highspeed model. [req 06534785201de9c0…]"
649
+ * "openai rate limited (429): Retry after 12s"
650
+ * "anthropic invalid request (400): messages.0.role must be one of 'user'|'assistant'"
651
+ * "groq HTTP 500 (server error)"
652
+ */
653
+ describe() {
654
+ const kind = describeStatus(this.status, this.body?.type);
655
+ const head = `${this.providerId} ${kind}`;
656
+ const detail = this.body?.message?.trim();
657
+ const reqId = this.body?.requestId ? ` [req ${this.body.requestId.slice(0, 16)}${this.body.requestId.length > 16 ? "\u2026" : ""}]` : "";
658
+ if (detail && detail.length > 0) {
659
+ return `${head}: ${truncate(detail, 240)}${reqId}`;
660
+ }
661
+ return `${head}${reqId}`;
662
+ }
663
+ };
664
+ function describeStatus(status, type) {
665
+ if (status === 0) return "network error";
666
+ if (type === "overloaded_error" || status === 529) return `overloaded (${status})`;
667
+ if (type === "rate_limit_error" || status === 429) return `rate limited (${status})`;
668
+ if (type === "authentication_error" || status === 401) return `auth failed (${status})`;
669
+ if (type === "permission_error" || status === 403) return `forbidden (${status})`;
670
+ if (type === "not_found_error" || status === 404) return `not found (${status})`;
671
+ if (type === "invalid_request_error" || status === 400) return `invalid request (${status})`;
672
+ if (status === 408) return `timeout (${status})`;
673
+ if (status >= 500 && status < 600) return `HTTP ${status} (server error)`;
674
+ if (type) return `${type} (${status})`;
675
+ return `HTTP ${status}`;
676
+ }
677
+ function truncate(s, n) {
678
+ return s.length <= n ? s : `${s.slice(0, n - 1)}\u2026`;
679
+ }
680
+ function providerStatusToCode(status, type) {
681
+ if (status === 0) return "PROVIDER_NETWORK_ERROR";
682
+ if (type === "rate_limit_error" || status === 429) return "PROVIDER_RATE_LIMITED";
683
+ if (type === "authentication_error" || status === 401) return "PROVIDER_AUTH_FAILED";
684
+ if (type === "overloaded_error" || status === 529) return "PROVIDER_OVERLOADED";
685
+ if (type === "invalid_request_error" || status === 400) return "PROVIDER_INVALID_REQUEST";
686
+ if (status === 408) return "PROVIDER_NETWORK_ERROR";
687
+ if (status >= 500) return "PROVIDER_SERVER_ERROR";
688
+ return "PROVIDER_INVALID_REQUEST";
689
+ }
690
+
419
691
  // src/coordination/subagent-budget.ts
420
692
  var BudgetExceededError = class extends Error {
421
693
  kind;
@@ -508,14 +780,34 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
508
780
  completedResults = [];
509
781
  totalIterations = 0;
510
782
  inFlight = 0;
783
+ /**
784
+ * Subagents currently being stopped. Set on entry to `stop()`, cleared
785
+ * once `recordCompletion` lands the terminal TaskResult. Used by
786
+ * `runDispatched` and `findIdleSubagent` to refuse mid-flight dispatch
787
+ * to a subagent the caller has already asked to terminate — closes the
788
+ * assign+terminate race where a fresh task could land on a worker that
789
+ * was about to be killed.
790
+ */
791
+ terminating = /* @__PURE__ */ new Set();
511
792
  constructor(config, options = {}) {
512
793
  super();
513
794
  this.coordinatorId = config.coordinatorId;
514
795
  this.config = config;
515
796
  this.runner = options.runner;
516
797
  }
798
+ /**
799
+ * Replace the runner after construction. Used when the runner depends
800
+ * on infrastructure (e.g. FleetBus) that isn't available until after
801
+ * the coordinator's owning Director is built.
802
+ */
803
+ setRunner(runner) {
804
+ this.runner = runner;
805
+ }
517
806
  async spawn(subagent) {
518
807
  const id = subagent.id || randomUUID();
808
+ if (this.subagents.has(id)) {
809
+ throw new Error(`Subagent id "${id}" already exists \u2014 refusing to overwrite`);
810
+ }
519
811
  const context = {
520
812
  subagentId: id,
521
813
  tasks: [],
@@ -559,6 +851,7 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
559
851
  async stop(subagentId) {
560
852
  const subagent = this.subagents.get(subagentId);
561
853
  if (!subagent) return;
854
+ this.terminating.add(subagentId);
562
855
  subagent.abortController.abort();
563
856
  subagent.status = "stopped";
564
857
  subagent.currentTask = void 0;
@@ -566,6 +859,7 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
566
859
  this.emit("subagent.stopped", { subagentId, reason: "stopped by coordinator" });
567
860
  }
568
861
  async stopAll() {
862
+ this.drainPendingAsAborted("Coordinator stopAll() drained the pending queue");
569
863
  await Promise.allSettled([...this.subagents.keys()].map((id) => this.stop(id)));
570
864
  }
571
865
  getStatus() {
@@ -599,7 +893,14 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
599
893
  tryDispatchNext() {
600
894
  while (this.canDispatch()) {
601
895
  const subagentId = this.findIdleSubagent();
602
- if (!subagentId) return;
896
+ if (!subagentId) {
897
+ if (this.pendingTasks.length > 0 && !this.hasLiveSubagent()) {
898
+ this.drainPendingAsAborted(
899
+ "No live subagent available \u2014 all stopped or mid-termination"
900
+ );
901
+ }
902
+ return;
903
+ }
603
904
  const task = this.pendingTasks.shift();
604
905
  if (!task) return;
605
906
  this.runDispatched(subagentId, task).catch((err) => {
@@ -607,7 +908,7 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
607
908
  subagentId,
608
909
  taskId: task.id,
609
910
  status: "failed",
610
- error: err instanceof Error ? err.message : String(err),
911
+ error: classifySubagentError(err),
611
912
  iterations: 0,
612
913
  toolCalls: 0,
613
914
  durationMs: 0
@@ -621,13 +922,76 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
621
922
  }
622
923
  findIdleSubagent() {
623
924
  for (const [id, s] of this.subagents) {
624
- if (s.status === "idle") return id;
925
+ if (s.status === "idle" && !this.terminating.has(id)) return id;
625
926
  }
626
927
  return null;
627
928
  }
929
+ /**
930
+ * Returns true iff at least one spawned subagent could still
931
+ * process a task. A "live" subagent is one that is not stopped
932
+ * AND not mid-termination — `running` workers count because they
933
+ * will eventually finish and become idle.
934
+ *
935
+ * When no subagent has ever been spawned, returns `true` so a
936
+ * pre-spawn `assign()` simply queues (legacy behaviour). The
937
+ * dead-end detection only fires after `stop()` has retired every
938
+ * spawned worker.
939
+ *
940
+ * Used by `tryDispatchNext` to detect a dead-end pending queue.
941
+ */
942
+ hasLiveSubagent() {
943
+ if (this.subagents.size === 0) return true;
944
+ for (const [id, s] of this.subagents) {
945
+ if (s.status !== "stopped" && !this.terminating.has(id)) return true;
946
+ }
947
+ return false;
948
+ }
949
+ /**
950
+ * Drain every pending task with a synthetic `aborted_by_parent`
951
+ * completion event. Same shape as the `stopAll()` drain — we go
952
+ * around `recordCompletion` because pending tasks were never
953
+ * counted in `inFlight` and routing them through would trip the
954
+ * underflow guard on every task after the first.
955
+ */
956
+ drainPendingAsAborted(message) {
957
+ const dropped = this.pendingTasks.splice(0, this.pendingTasks.length);
958
+ for (const t of dropped) {
959
+ const synthetic = {
960
+ subagentId: t.subagentId ?? "unassigned",
961
+ taskId: t.id,
962
+ status: "stopped",
963
+ error: {
964
+ kind: "aborted_by_parent",
965
+ message,
966
+ retryable: false
967
+ },
968
+ iterations: 0,
969
+ toolCalls: 0,
970
+ durationMs: 0
971
+ };
972
+ this.completedResults.push(synthetic);
973
+ this.emit("task.completed", { task: t, result: synthetic });
974
+ }
975
+ }
628
976
  async runDispatched(subagentId, task) {
629
977
  const subagent = this.subagents.get(subagentId);
630
978
  if (!subagent) return;
979
+ if (this.terminating.has(subagentId) || subagent.status === "stopped") {
980
+ this.recordCompletion({
981
+ subagentId,
982
+ taskId: task.id,
983
+ status: "stopped",
984
+ error: {
985
+ kind: "aborted_by_parent",
986
+ message: "Subagent was terminated before task could start",
987
+ retryable: false
988
+ },
989
+ iterations: 0,
990
+ toolCalls: 0,
991
+ durationMs: 0
992
+ });
993
+ return;
994
+ }
631
995
  subagent.status = "running";
632
996
  subagent.currentTask = task.id;
633
997
  task.subagentId = subagentId;
@@ -673,7 +1037,9 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
673
1037
  subagentId,
674
1038
  taskId: task.id,
675
1039
  status,
676
- error: err instanceof Error ? err.message : String(err),
1040
+ error: classifySubagentError(err, {
1041
+ parentAborted: subagent.abortController.signal.aborted
1042
+ }),
677
1043
  iterations: usage.iterations,
678
1044
  toolCalls: usage.toolCalls,
679
1045
  durationMs: Date.now() - startTime
@@ -712,19 +1078,14 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
712
1078
  }
713
1079
  const subagent = this.subagents.get(result.subagentId);
714
1080
  if (subagent && subagent.status !== "stopped") {
715
- const failed = result.status === "failed" || result.status === "timeout";
716
- subagent.status = failed ? "error" : "idle";
1081
+ result.status === "failed" || result.status === "timeout";
1082
+ subagent.status = "idle";
717
1083
  subagent.currentTask = void 0;
718
1084
  if (subagent.abortController.signal.aborted) {
719
1085
  subagent.abortController = new AbortController();
720
1086
  }
721
- if (subagent.status === "error") {
722
- queueMicrotask(() => {
723
- if (subagent.status === "error") subagent.status = "idle";
724
- this.tryDispatchNext();
725
- });
726
- }
727
1087
  }
1088
+ this.terminating.delete(result.subagentId);
728
1089
  this.emit("task.completed", {
729
1090
  task: subagent?.context.tasks.find((t) => t.id === result.taskId) ?? { id: result.taskId },
730
1091
  result
@@ -747,6 +1108,99 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
747
1108
  return false;
748
1109
  }
749
1110
  };
1111
+ function classifySubagentError(err, hints = {}) {
1112
+ const cause = err instanceof Error ? { name: err.name, message: err.message, stack: err.stack } : void 0;
1113
+ const baseMessage = err instanceof Error ? err.message : String(err);
1114
+ if (err instanceof ProviderError) {
1115
+ return providerErrorToSubagentError(err, baseMessage, cause);
1116
+ }
1117
+ if (err instanceof BudgetExceededError) {
1118
+ const map = {
1119
+ iterations: "budget_iterations",
1120
+ tool_calls: "budget_tool_calls",
1121
+ tokens: "budget_tokens",
1122
+ cost: "budget_cost",
1123
+ timeout: "budget_timeout"
1124
+ };
1125
+ return {
1126
+ kind: map[err.kind],
1127
+ message: baseMessage,
1128
+ // Budgets are user-configured ceilings, not transient failures —
1129
+ // retrying with the same budget will hit the same ceiling. The
1130
+ // orchestrator must raise the budget or narrow the task first.
1131
+ retryable: false,
1132
+ cause
1133
+ };
1134
+ }
1135
+ if (hints.parentAborted) {
1136
+ return {
1137
+ kind: "aborted_by_parent",
1138
+ message: baseMessage,
1139
+ retryable: false,
1140
+ cause
1141
+ };
1142
+ }
1143
+ const lower = baseMessage.toLowerCase();
1144
+ if (/agent aborted$/i.test(baseMessage)) {
1145
+ return {
1146
+ kind: "aborted_by_parent",
1147
+ message: baseMessage,
1148
+ retryable: false,
1149
+ cause
1150
+ };
1151
+ }
1152
+ if (/agent exhausted iteration limit$/i.test(baseMessage)) {
1153
+ return { kind: "budget_iterations", message: baseMessage, retryable: false, cause };
1154
+ }
1155
+ if (/empty response$/i.test(baseMessage)) {
1156
+ return { kind: "empty_response", message: baseMessage, retryable: false, cause };
1157
+ }
1158
+ if (/^tool failed: /i.test(baseMessage)) {
1159
+ return { kind: "tool_failed", message: baseMessage, retryable: false, cause };
1160
+ }
1161
+ if (lower.includes("bridge transport") || /bridge.*(closed|disconnect)/i.test(baseMessage)) {
1162
+ return { kind: "bridge_failed", message: baseMessage, retryable: false, cause };
1163
+ }
1164
+ if (/context length|max.*tokens?.*exceeded|prompt is too long/i.test(baseMessage)) {
1165
+ return { kind: "context_overflow", message: baseMessage, retryable: false, cause };
1166
+ }
1167
+ return {
1168
+ kind: "unknown",
1169
+ message: baseMessage,
1170
+ retryable: false,
1171
+ cause
1172
+ };
1173
+ }
1174
+ function providerErrorToSubagentError(err, message, cause) {
1175
+ const status = err.status;
1176
+ if (status === 429 || err.body?.type === "rate_limit_error") {
1177
+ return {
1178
+ kind: "provider_rate_limit",
1179
+ message,
1180
+ retryable: true,
1181
+ // Conservative default: 5s. Provider-specific code can override
1182
+ // by emitting an error whose body carries an explicit hint.
1183
+ backoffMs: 5e3,
1184
+ cause
1185
+ };
1186
+ }
1187
+ if (status === 401 || status === 403 || err.body?.type === "authentication_error") {
1188
+ return { kind: "provider_auth", message, retryable: false, cause };
1189
+ }
1190
+ if (status === 408 || status === 0) {
1191
+ return { kind: "provider_timeout", message, retryable: true, cause };
1192
+ }
1193
+ if (status >= 500 && status < 600) {
1194
+ return {
1195
+ kind: "provider_5xx",
1196
+ message,
1197
+ retryable: true,
1198
+ backoffMs: 3e3,
1199
+ cause
1200
+ };
1201
+ }
1202
+ return { kind: "unknown", message, retryable: err.retryable, cause };
1203
+ }
750
1204
 
751
1205
  // src/coordination/director.ts
752
1206
  var DirectorBudgetError = class extends Error {
@@ -810,6 +1264,27 @@ var Director = class {
810
1264
  spawnDepth;
811
1265
  /** Live spawn counter for `maxSpawns` enforcement. */
812
1266
  spawnCount = 0;
1267
+ /** Optional checkpoint mirror — writes the live task graph + roster to disk. */
1268
+ stateCheckpoint;
1269
+ /** Optional session writer for emitting task_* / agent_* lifecycle events. */
1270
+ sessionWriter;
1271
+ /** Debounce timer for periodic manifest writes. */
1272
+ manifestTimer = null;
1273
+ manifestDebounceMs;
1274
+ /** Resolves task descriptions back from `assign()` so completion events
1275
+ * can also carry a human-readable title. */
1276
+ taskDescriptions = /* @__PURE__ */ new Map();
1277
+ /** Snapshot of which subagent owns each task — drives state-checkpoint
1278
+ * status updates without re-walking the manifest. */
1279
+ taskOwners = /* @__PURE__ */ new Map();
1280
+ /**
1281
+ * Handle to the coordinator-side `task.completed` listener so we can
1282
+ * unsubscribe in `shutdown()`. Without this, repeated Director
1283
+ * construction (e.g. tests, hot reloads) accumulates listeners on a
1284
+ * cached coordinator and slowly drifts the EventEmitter past its
1285
+ * default cap.
1286
+ */
1287
+ taskCompletedListener = null;
813
1288
  constructor(opts) {
814
1289
  this.id = opts.config.coordinatorId || randomUUID();
815
1290
  this.manifestPath = opts.manifestPath;
@@ -820,8 +1295,16 @@ var Director = class {
820
1295
  this.maxSpawns = opts.maxSpawns ?? Number.POSITIVE_INFINITY;
821
1296
  this.maxSpawnDepth = opts.maxSpawnDepth ?? 2;
822
1297
  this.spawnDepth = opts.spawnDepth ?? 0;
1298
+ this.sessionWriter = opts.sessionWriter ?? null;
1299
+ this.manifestDebounceMs = opts.manifestDebounceMs ?? 2e3;
1300
+ this.stateCheckpoint = opts.stateCheckpointPath ? new DirectorStateCheckpoint(opts.stateCheckpointPath, {
1301
+ directorRunId: this.id,
1302
+ maxSpawns: opts.maxSpawns,
1303
+ spawnDepth: this.spawnDepth,
1304
+ maxSpawnDepth: this.maxSpawnDepth
1305
+ }) : null;
823
1306
  if (this.sharedScratchpadPath) {
824
- void fsp2.mkdir(this.sharedScratchpadPath, { recursive: true }).catch(() => void 0);
1307
+ void fsp4.mkdir(this.sharedScratchpadPath, { recursive: true }).catch(() => void 0);
825
1308
  }
826
1309
  this.transport = new InMemoryBridgeTransport();
827
1310
  this.bridge = new InMemoryAgentBridge(
@@ -838,7 +1321,7 @@ var Director = class {
838
1321
  { ...opts.config, coordinatorId: this.id },
839
1322
  { runner: opts.runner }
840
1323
  );
841
- this.coordinator.on("task.completed", (payload) => {
1324
+ this.taskCompletedListener = (payload) => {
842
1325
  const r = payload.result;
843
1326
  this.completed.set(r.taskId, r);
844
1327
  const waiter = this.taskWaiters.get(r.taskId);
@@ -846,7 +1329,54 @@ var Director = class {
846
1329
  waiter.resolve(r);
847
1330
  this.taskWaiters.delete(r.taskId);
848
1331
  }
849
- });
1332
+ const title = this.taskDescriptions.get(r.taskId) ?? payload.task.description ?? r.taskId;
1333
+ const failed = r.status !== "success";
1334
+ const errorString = r.error ? `${r.error.kind}: ${r.error.message}` : void 0;
1335
+ this.stateCheckpoint?.recordTaskStatus(r.taskId, {
1336
+ status: failed ? r.status : "completed",
1337
+ completedAt: (/* @__PURE__ */ new Date()).toISOString(),
1338
+ iterations: r.iterations,
1339
+ toolCalls: r.toolCalls,
1340
+ durationMs: r.durationMs,
1341
+ error: errorString
1342
+ });
1343
+ this.stateCheckpoint?.setUsage(this.usage.snapshot());
1344
+ void this.appendSessionEvent(
1345
+ failed ? {
1346
+ type: "task_failed",
1347
+ ts: (/* @__PURE__ */ new Date()).toISOString(),
1348
+ taskId: r.taskId,
1349
+ title,
1350
+ error: errorString ?? r.status
1351
+ } : {
1352
+ type: "task_completed",
1353
+ ts: (/* @__PURE__ */ new Date()).toISOString(),
1354
+ taskId: r.taskId,
1355
+ title
1356
+ }
1357
+ );
1358
+ this.scheduleManifest();
1359
+ };
1360
+ this.coordinator.on("task.completed", this.taskCompletedListener);
1361
+ }
1362
+ /** Best-effort session-writer append. Swallows failures — the director
1363
+ * must not break a fleet run because the session JSONL handle closed. */
1364
+ async appendSessionEvent(event) {
1365
+ if (!this.sessionWriter) return;
1366
+ try {
1367
+ await this.sessionWriter.append(event);
1368
+ } catch {
1369
+ }
1370
+ }
1371
+ /** Debounced manifest writer. A burst of spawn/assign/complete events
1372
+ * collapses into one write. Set `manifestDebounceMs` to 0 to disable. */
1373
+ scheduleManifest() {
1374
+ if (!this.manifestPath || this.manifestDebounceMs <= 0) return;
1375
+ if (this.manifestTimer) return;
1376
+ this.manifestTimer = setTimeout(() => {
1377
+ this.manifestTimer = null;
1378
+ void this.writeManifest().catch(() => void 0);
1379
+ }, this.manifestDebounceMs);
850
1380
  }
851
1381
  /**
852
1382
  * Spawn a subagent. Identical to the coordinator's `spawn()` but
@@ -885,6 +1415,25 @@ var Director = class {
885
1415
  model: config.model,
886
1416
  taskIds: []
887
1417
  });
1418
+ const spawnedAt = (/* @__PURE__ */ new Date()).toISOString();
1419
+ this.stateCheckpoint?.recordSpawn(
1420
+ {
1421
+ id: result.subagentId,
1422
+ name: config.name,
1423
+ role: config.role,
1424
+ provider: config.provider,
1425
+ model: config.model,
1426
+ spawnedAt
1427
+ },
1428
+ this.spawnCount
1429
+ );
1430
+ void this.appendSessionEvent({
1431
+ type: "agent_spawned",
1432
+ ts: spawnedAt,
1433
+ agentId: result.subagentId,
1434
+ role: config.role ?? config.name
1435
+ });
1436
+ this.scheduleManifest();
888
1437
  return result.subagentId;
889
1438
  }
890
1439
  /**
@@ -995,8 +1544,8 @@ var Director = class {
995
1544
  })),
996
1545
  usage: this.usage.snapshot()
997
1546
  };
998
- await fsp2.mkdir(path3.dirname(this.manifestPath), { recursive: true });
999
- await fsp2.writeFile(this.manifestPath, JSON.stringify(manifest, null, 2), { mode: 384 });
1547
+ await fsp4.mkdir(path4.dirname(this.manifestPath), { recursive: true });
1548
+ await fsp4.writeFile(this.manifestPath, JSON.stringify(manifest, null, 2), { mode: 384 });
1000
1549
  return this.manifestPath;
1001
1550
  }
1002
1551
  /**
@@ -1005,13 +1554,42 @@ var Director = class {
1005
1554
  * — calling shutdown twice is a no-op on the second invocation.
1006
1555
  */
1007
1556
  async shutdown() {
1557
+ if (this.manifestTimer) {
1558
+ clearTimeout(this.manifestTimer);
1559
+ this.manifestTimer = null;
1560
+ }
1561
+ if (this.taskCompletedListener) {
1562
+ this.coordinator.off("task.completed", this.taskCompletedListener);
1563
+ this.taskCompletedListener = null;
1564
+ }
1008
1565
  await this.coordinator.stopAll();
1009
1566
  for (const b of this.subagentBridges.values()) {
1010
- await b.stop().catch(() => void 0);
1567
+ await b.stop().catch((err) => this.logShutdownError("subagent_bridge_stop", err));
1011
1568
  }
1012
1569
  this.subagentBridges.clear();
1013
- await this.bridge.stop().catch(() => void 0);
1014
- if (this.manifestPath) await this.writeManifest().catch(() => void 0);
1570
+ await this.bridge.stop().catch((err) => this.logShutdownError("director_bridge_stop", err));
1571
+ if (this.manifestPath)
1572
+ await this.writeManifest().catch((err) => this.logShutdownError("manifest_write", err));
1573
+ if (this.stateCheckpoint) {
1574
+ this.stateCheckpoint.setUsage(this.usage.snapshot());
1575
+ await this.stateCheckpoint.flush().catch((err) => this.logShutdownError("state_checkpoint_flush", err));
1576
+ }
1577
+ }
1578
+ /**
1579
+ * Funnel for shutdown-phase errors. We can't throw — `shutdown()` is
1580
+ * called from process-exit paths where an uncaught throw would lose
1581
+ * the manifest write that comes after. But we MUST NOT silently
1582
+ * swallow either — a persistent bridge-close failure would otherwise
1583
+ * mask a real bug. `process.emitWarning` is the right tier:
1584
+ * surfaces on stderr by default, lets the host plug a warning
1585
+ * listener for structured collection, and never affects exit code.
1586
+ */
1587
+ logShutdownError(phase, err) {
1588
+ const detail = err instanceof Error ? err.message : String(err);
1589
+ process.emitWarning(
1590
+ `Director shutdown phase "${phase}" failed: ${detail}`,
1591
+ "DirectorShutdownWarning"
1592
+ );
1015
1593
  }
1016
1594
  /**
1017
1595
  * Hand a task to the coordinator. Returns the assigned task id so
@@ -1025,6 +1603,23 @@ var Director = class {
1025
1603
  if (entry) entry.taskIds.push(taskWithId.id);
1026
1604
  }
1027
1605
  await this.coordinator.assign(taskWithId);
1606
+ this.taskDescriptions.set(taskWithId.id, taskWithId.description);
1607
+ if (taskWithId.subagentId) this.taskOwners.set(taskWithId.id, taskWithId.subagentId);
1608
+ const assignedAt = (/* @__PURE__ */ new Date()).toISOString();
1609
+ this.stateCheckpoint?.recordTaskAssigned({
1610
+ taskId: taskWithId.id,
1611
+ subagentId: taskWithId.subagentId,
1612
+ description: taskWithId.description,
1613
+ status: "running",
1614
+ assignedAt
1615
+ });
1616
+ void this.appendSessionEvent({
1617
+ type: "task_created",
1618
+ ts: assignedAt,
1619
+ taskId: taskWithId.id,
1620
+ title: taskWithId.description
1621
+ });
1622
+ this.scheduleManifest();
1028
1623
  return taskWithId.id;
1029
1624
  }
1030
1625
  /**
@@ -1084,6 +1679,23 @@ var Director = class {
1084
1679
  snapshot() {
1085
1680
  return this.usage.snapshot();
1086
1681
  }
1682
+ /**
1683
+ * Look up provider/model metadata for a spawned subagent. Returns
1684
+ * undefined when the subagent id is unknown (not yet spawned, or
1685
+ * already torn down). Callers — notably the TUI fleet panel — use
1686
+ * this to render human-readable provider/model tags next to each
1687
+ * subagent row without reaching into private state.
1688
+ */
1689
+ getSubagentMeta(id) {
1690
+ const usage = this.subagentMeta.get(id);
1691
+ const manifest = this.manifestEntries.get(id);
1692
+ if (!usage && !manifest) return void 0;
1693
+ return {
1694
+ provider: usage?.provider ?? manifest?.provider,
1695
+ model: usage?.model ?? manifest?.model,
1696
+ name: manifest?.name
1697
+ };
1698
+ }
1087
1699
  /**
1088
1700
  * Compose the leader/director-agent system prompt: fleet preamble +
1089
1701
  * (optional) roster summary + user base prompt. Pass the result to your
@@ -1383,12 +1995,260 @@ function makeFleetUsageTool(director) {
1383
1995
  }
1384
1996
  };
1385
1997
  }
1998
+ function createDelegateTool(opts) {
1999
+ const defaultTimeoutMs = opts.defaultTimeoutMs ?? 4 * 60 * 60 * 1e3;
2000
+ const rosterIds = opts.roster ? Object.keys(opts.roster) : [];
2001
+ const inputSchema = {
2002
+ type: "object",
2003
+ properties: {
2004
+ task: {
2005
+ type: "string",
2006
+ description: "What the subagent should do \u2014 natural language, complete sentence(s). The subagent has its own tool slice, its own LLM call, and returns when its task is done."
2007
+ },
2008
+ role: {
2009
+ type: "string",
2010
+ description: rosterIds.length > 0 ? `Roster role (preferred). One of: ${rosterIds.join(", ")}. Picks a pre-tuned config (prompt, budgets, tools) for that role.` : "No roster is configured \u2014 pass `name` instead.",
2011
+ enum: rosterIds.length > 0 ? rosterIds : void 0
2012
+ },
2013
+ name: {
2014
+ type: "string",
2015
+ description: "Display name for the subagent when not using a roster role. Required when `role` is omitted."
2016
+ },
2017
+ provider: {
2018
+ type: "string",
2019
+ description: 'Provider id (e.g. "anthropic", "openai"). Defaults to the host provider when omitted.'
2020
+ },
2021
+ model: {
2022
+ type: "string",
2023
+ description: "Model id within the provider. Defaults to the host model when omitted."
2024
+ },
2025
+ systemPromptOverride: {
2026
+ type: "string",
2027
+ description: "Optional extra prompt text appended to the role baseline."
2028
+ },
2029
+ timeoutMs: {
2030
+ type: "number",
2031
+ description: `Wall-clock budget for this delegate in milliseconds. No hard cap \u2014 set as high as the task realistically needs (a monorepo audit can take hours, a single-file lint takes seconds). Default ${Math.round(defaultTimeoutMs / 1e3 / 60)} minutes.`
2032
+ },
2033
+ maxIterations: {
2034
+ type: "number",
2035
+ description: "Maximum LLM iterations the subagent may take. Unset = use the role/coordinator default. Raise this for tasks with many tool-think-tool cycles (deep code analysis, multi-file refactors)."
2036
+ },
2037
+ maxToolCalls: {
2038
+ type: "number",
2039
+ description: "Maximum number of tool invocations the subagent may make. Unset = use the role/coordinator default. Raise this for tasks that touch many files (large grep + read + report)."
2040
+ }
2041
+ },
2042
+ required: ["task"]
2043
+ };
2044
+ return {
2045
+ name: "delegate",
2046
+ description: "Hand a discrete piece of work to a dedicated subagent and wait for its result. The subagent has its own context, its own LLM call, and its own budget \u2014 use this when a task is self-contained, would otherwise blow up your context, or benefits from a specialized role (bug-hunter, security-scanner, refactor-planner, audit-log). YOU decide how big the budget is: pass `timeoutMs`, `maxIterations`, and `maxToolCalls` sized to the actual work. There is no hidden cap forcing a 3-minute / 80-iteration limit \u2014 if a monorepo audit needs 2 hours and 500 tool calls, ask for that. Call multiple delegates in parallel through the provider's parallel-tool-call surface to fan work out across roles.",
2047
+ usageHint: "Set `task` to a complete instruction. Either pick `role` from the roster or pass `name` + `provider` + `model`. For non-trivial work, also pass `timeoutMs` (the wall-clock budget you actually need), `maxIterations`, and `maxToolCalls` \u2014 defaults are intentionally generous (4 hours) but the right values depend on scope. Returns the subagent's `TaskResult` \u2014 including the textual `result`, iteration count, tool count, and duration. Auto-promotes the host into director mode on first call.",
2048
+ permission: "auto",
2049
+ mutating: false,
2050
+ inputSchema,
2051
+ async execute(input) {
2052
+ const i = input ?? {};
2053
+ if (typeof i.task !== "string" || !i.task.trim()) {
2054
+ return { ok: false, error: "`task` is required." };
2055
+ }
2056
+ let director = await opts.host.ensureDirector();
2057
+ if (!director) {
2058
+ director = await opts.host.promoteToDirector();
2059
+ }
2060
+ if (!director) {
2061
+ const reason = opts.host.getPromotionBlockReason?.();
2062
+ return {
2063
+ ok: false,
2064
+ error: reason ?? "Director could not be activated \u2014 multi-agent host already running in legacy non-director mode. Restart with `--director` for fleet support."
2065
+ };
2066
+ }
2067
+ const timeoutMs = i.timeoutMs ?? defaultTimeoutMs;
2068
+ let cfg;
2069
+ if (i.role) {
2070
+ const base = opts.roster?.[i.role];
2071
+ if (!base) {
2072
+ return {
2073
+ ok: false,
2074
+ error: `Unknown role "${i.role}". Available: ${rosterIds.join(", ") || "(no roster configured)"}.`
2075
+ };
2076
+ }
2077
+ cfg = { ...base };
2078
+ if (i.systemPromptOverride) cfg.systemPromptOverride = i.systemPromptOverride;
2079
+ if (i.provider) cfg.provider = i.provider;
2080
+ if (i.model) cfg.model = i.model;
2081
+ } else {
2082
+ if (!i.name) {
2083
+ return {
2084
+ ok: false,
2085
+ error: "Either `role` (from the roster) or `name` is required."
2086
+ };
2087
+ }
2088
+ cfg = {
2089
+ name: i.name,
2090
+ provider: i.provider,
2091
+ model: i.model,
2092
+ systemPromptOverride: i.systemPromptOverride
2093
+ };
2094
+ }
2095
+ if (typeof i.maxIterations === "number" && i.maxIterations > 0) {
2096
+ cfg.maxIterations = i.maxIterations;
2097
+ }
2098
+ if (typeof i.maxToolCalls === "number" && i.maxToolCalls > 0) {
2099
+ cfg.maxToolCalls = i.maxToolCalls;
2100
+ }
2101
+ const SUBAGENT_TIMEOUT_BUFFER_MS = 3e4;
2102
+ const desiredSubTimeout = Math.max(3e4, timeoutMs - SUBAGENT_TIMEOUT_BUFFER_MS);
2103
+ if (!cfg.timeoutMs || cfg.timeoutMs > desiredSubTimeout) {
2104
+ cfg.timeoutMs = desiredSubTimeout;
2105
+ }
2106
+ try {
2107
+ const subagentId = await director.spawn(cfg);
2108
+ const taskId = await director.assign({
2109
+ id: "",
2110
+ description: i.task,
2111
+ subagentId
2112
+ });
2113
+ const result = await Promise.race([
2114
+ director.awaitTasks([taskId]).then((r) => r[0]),
2115
+ new Promise(
2116
+ (resolve) => setTimeout(() => resolve({ __timeout: true }), timeoutMs)
2117
+ )
2118
+ ]);
2119
+ if ("__timeout" in result) {
2120
+ const partial2 = await readSubagentPartial(opts, subagentId);
2121
+ return {
2122
+ ok: false,
2123
+ stopReason: "host_timeout",
2124
+ error: `Subagent did not finish within ${timeoutMs}ms.`,
2125
+ hint: "Reduce scope of the next delegate, raise timeoutMs, or use spawn_subagent + await_tasks for long-running work.",
2126
+ subagentId,
2127
+ taskId,
2128
+ partial: partial2
2129
+ };
2130
+ }
2131
+ const baseStopReason = result.status === "success" ? "end_turn" : result.status === "timeout" ? "subagent_timeout" : result.status === "stopped" ? "aborted" : "budget_exhausted";
2132
+ const partial = result.status === "success" ? void 0 : await readSubagentPartial(opts, subagentId);
2133
+ const errorKind = result.error?.kind;
2134
+ const retryable = result.error?.retryable;
2135
+ const backoffMs = result.error?.backoffMs;
2136
+ return {
2137
+ ok: result.status === "success",
2138
+ status: result.status,
2139
+ stopReason: baseStopReason,
2140
+ errorKind,
2141
+ retryable,
2142
+ backoffMs,
2143
+ subagentId: result.subagentId,
2144
+ taskId: result.taskId,
2145
+ result: result.result,
2146
+ error: result.error,
2147
+ iterations: result.iterations,
2148
+ toolCalls: result.toolCalls,
2149
+ durationMs: result.durationMs,
2150
+ ...partial ? { partial } : {},
2151
+ ...hintForKind(errorKind, retryable, backoffMs) ? { hint: hintForKind(errorKind, retryable, backoffMs) } : {}
2152
+ };
2153
+ } catch (err) {
2154
+ return {
2155
+ ok: false,
2156
+ stopReason: "error",
2157
+ error: err instanceof Error ? err.message : String(err)
2158
+ };
2159
+ }
2160
+ }
2161
+ };
2162
+ }
2163
+ function hintForKind(kind, retryable, backoffMs) {
2164
+ if (!kind) return void 0;
2165
+ switch (kind) {
2166
+ case "provider_rate_limit":
2167
+ return `Provider rate-limited. Retry safe after ${backoffMs ?? 5e3}ms backoff. Consider a smaller model or fewer parallel delegates.`;
2168
+ case "provider_5xx":
2169
+ return `Provider server error. Retry safe after ${backoffMs ?? 3e3}ms backoff \u2014 usually transient.`;
2170
+ case "provider_timeout":
2171
+ return "Provider network timeout. Retry safe; reduce input size if it persists.";
2172
+ case "provider_auth":
2173
+ return "Provider rejected credentials. Cannot retry \u2014 fix the API key / config and re-invoke.";
2174
+ case "context_overflow":
2175
+ return "Subagent context exceeded the model limit. Narrow the task, use a larger-context model, or split into multiple delegates.";
2176
+ case "budget_iterations":
2177
+ case "budget_tool_calls":
2178
+ case "budget_tokens":
2179
+ case "budget_cost":
2180
+ return "Subagent exhausted its budget. Raise the matching `max*` field on the next delegate or narrow task scope.";
2181
+ case "budget_timeout":
2182
+ return "Subagent hit its wall-clock budget. Raise `timeoutMs` on the next delegate or split the task.";
2183
+ case "aborted_by_parent":
2184
+ return "Subagent was aborted (user Ctrl+C, parent unwound, or sibling failure cascade). Not retryable until the abort condition is resolved.";
2185
+ case "empty_response":
2186
+ return "Subagent ended its turn with no text and no tool calls. Almost always a prompt / config issue \u2014 clarify the task or check the model.";
2187
+ case "tool_failed":
2188
+ return "A tool inside the subagent returned ok:false. Inspect `partial.lastAssistantText` for the agent reasoning, then retry with corrected inputs.";
2189
+ case "bridge_failed":
2190
+ return "Parent-child bridge transport failed. This is rare \u2014 restart the session and retry.";
2191
+ default:
2192
+ return retryable ? "Failure classified as retryable. Try again with the same input." : void 0;
2193
+ }
2194
+ }
2195
+ async function readSubagentPartial(opts, subagentId) {
2196
+ if (!opts.sessionsRoot) return void 0;
2197
+ const candidates = [];
2198
+ if (opts.directorRunId) {
2199
+ candidates.push(path4.join(opts.sessionsRoot, opts.directorRunId, `${subagentId}.jsonl`));
2200
+ } else {
2201
+ try {
2202
+ const runDirs = await fsp4.readdir(opts.sessionsRoot);
2203
+ for (const r of runDirs) {
2204
+ candidates.push(path4.join(opts.sessionsRoot, r, `${subagentId}.jsonl`));
2205
+ }
2206
+ } catch {
2207
+ return void 0;
2208
+ }
2209
+ }
2210
+ for (const file of candidates) {
2211
+ let raw;
2212
+ try {
2213
+ raw = await fsp4.readFile(file, "utf8");
2214
+ } catch {
2215
+ continue;
2216
+ }
2217
+ const lines = raw.split("\n").filter((l) => l.trim());
2218
+ let lastAssistantText;
2219
+ let lastStopReason;
2220
+ let toolUses = 0;
2221
+ for (const line of lines) {
2222
+ try {
2223
+ const ev = JSON.parse(line);
2224
+ if (ev.type === "tool_use") toolUses += 1;
2225
+ if (ev.type === "llm_response") {
2226
+ if (typeof ev.stopReason === "string") lastStopReason = ev.stopReason;
2227
+ if (Array.isArray(ev.content)) {
2228
+ const txt = ev.content.filter((b) => b.type === "text").map((b) => b.text ?? "").join("\n").trim();
2229
+ if (txt) lastAssistantText = txt;
2230
+ }
2231
+ }
2232
+ } catch {
2233
+ }
2234
+ }
2235
+ return {
2236
+ lastAssistantText,
2237
+ lastStopReason,
2238
+ toolUsesObserved: toolUses,
2239
+ events: lines.length
2240
+ };
2241
+ }
2242
+ return void 0;
2243
+ }
1386
2244
 
1387
2245
  // src/coordination/agent-subagent-runner.ts
1388
2246
  function makeAgentSubagentRunner(opts) {
1389
2247
  const format = opts.formatTaskInput ?? defaultFormatTaskInput;
1390
2248
  return async (task, ctx) => {
1391
- const { agent, events } = await opts.factory(ctx.config);
2249
+ const factoryResult = await opts.factory(ctx.config);
2250
+ const { agent, events } = factoryResult;
2251
+ const detachFleet = opts.fleetBus?.attach(ctx.subagentId, events, task.id);
1392
2252
  const aborter = new AbortController();
1393
2253
  let budgetError = null;
1394
2254
  const onBudgetError = (err) => {
@@ -1402,13 +2262,19 @@ function makeAgentSubagentRunner(opts) {
1402
2262
  budgetError.message += ` (caused by: ${err.message})`;
1403
2263
  }
1404
2264
  };
2265
+ let lastToolFailed = null;
1405
2266
  const unsub = [];
1406
2267
  unsub.push(
1407
- events.on("tool.started", () => {
2268
+ events.on("tool.executed", (e) => {
1408
2269
  try {
1409
2270
  ctx.budget.recordToolCall();
1410
- } catch (e) {
1411
- onBudgetError(e);
2271
+ } catch (eb) {
2272
+ onBudgetError(eb);
2273
+ }
2274
+ if (e.ok === false) {
2275
+ lastToolFailed = e.name;
2276
+ } else if (e.ok === true) {
2277
+ lastToolFailed = null;
1412
2278
  }
1413
2279
  }),
1414
2280
  events.on("provider.response", (e) => {
@@ -1425,6 +2291,26 @@ function makeAgentSubagentRunner(opts) {
1425
2291
  } catch (e) {
1426
2292
  onBudgetError(e);
1427
2293
  }
2294
+ }),
2295
+ // D3: cooperative timeout enforcement DURING a long tool call.
2296
+ // The iteration-loop checkTimeout() only fires between agent
2297
+ // iterations — a single `bash sleep 3600` call would otherwise
2298
+ // park inside one tool execution while the timeout silently
2299
+ // passes, relying solely on the coordinator's hard Promise.race
2300
+ // to interrupt. Tools that emit `tool.progress` (bash chunks,
2301
+ // fetch byte progress, spawn-stream stdout) give us a heartbeat
2302
+ // we can hang the check on. When the budget trips here:
2303
+ // 1. onBudgetError sets budgetError + aborter.abort()
2304
+ // 2. aborter signal propagates to agent.run → tool executor
2305
+ // 3. tool's own signal listener kills the child process
2306
+ // Cheap: O(1) per progress event, and the budget short-circuits
2307
+ // when timeoutMs is unset (most subagents have one set anyway).
2308
+ events.on("tool.progress", () => {
2309
+ try {
2310
+ ctx.budget.checkTimeout();
2311
+ } catch (e) {
2312
+ onBudgetError(e);
2313
+ }
1428
2314
  })
1429
2315
  );
1430
2316
  const onParentAbort = () => aborter.abort();
@@ -1433,8 +2319,15 @@ function makeAgentSubagentRunner(opts) {
1433
2319
  try {
1434
2320
  result = await agent.run(format(task, ctx.config), { signal: aborter.signal });
1435
2321
  } finally {
2322
+ detachFleet?.();
1436
2323
  ctx.signal.removeEventListener("abort", onParentAbort);
1437
2324
  for (const u of unsub) u();
2325
+ if (factoryResult.dispose) {
2326
+ try {
2327
+ await factoryResult.dispose();
2328
+ } catch {
2329
+ }
2330
+ }
1438
2331
  }
1439
2332
  if (budgetError) throw budgetError;
1440
2333
  if (result.status === "failed") {
@@ -1447,6 +2340,13 @@ function makeAgentSubagentRunner(opts) {
1447
2340
  throw new Error("agent exhausted iteration limit");
1448
2341
  }
1449
2342
  const usage = ctx.budget.usage();
2343
+ const finalText = (result.finalText ?? "").trim();
2344
+ if (finalText.length === 0 && usage.toolCalls === 0) {
2345
+ throw new Error("empty response");
2346
+ }
2347
+ if (finalText.length === 0 && lastToolFailed !== null) {
2348
+ throw new Error(`tool failed: ${lastToolFailed}`);
2349
+ }
1450
2350
  return {
1451
2351
  result: result.finalText,
1452
2352
  iterations: result.iterations,
@@ -1457,11 +2357,6 @@ function makeAgentSubagentRunner(opts) {
1457
2357
  function defaultFormatTaskInput(task) {
1458
2358
  return task.description ?? "";
1459
2359
  }
1460
- async function ensureDir(dir) {
1461
- await fsp2.mkdir(dir, { recursive: true });
1462
- }
1463
-
1464
- // src/storage/session-store.ts
1465
2360
  var DefaultSessionStore = class {
1466
2361
  dir;
1467
2362
  events;
@@ -1473,10 +2368,10 @@ var DefaultSessionStore = class {
1473
2368
  await ensureDir(this.dir);
1474
2369
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
1475
2370
  const id = meta.id ?? `${startedAt.replace(/[:.]/g, "-")}-${randomBytes(2).toString("hex")}`;
1476
- const file = path3.join(this.dir, `${id}.jsonl`);
2371
+ const file = path4.join(this.dir, `${id}.jsonl`);
1477
2372
  let handle;
1478
2373
  try {
1479
- handle = await fsp2.open(file, "a", 384);
2374
+ handle = await fsp4.open(file, "a", 384);
1480
2375
  } catch (err) {
1481
2376
  throw new Error(
1482
2377
  `Failed to open session file: ${err instanceof Error ? err.message : String(err)}`,
@@ -1495,10 +2390,10 @@ var DefaultSessionStore = class {
1495
2390
  }
1496
2391
  async resume(id) {
1497
2392
  const data = await this.load(id);
1498
- const file = path3.join(this.dir, `${id}.jsonl`);
2393
+ const file = path4.join(this.dir, `${id}.jsonl`);
1499
2394
  let handle;
1500
2395
  try {
1501
- handle = await fsp2.open(file, "a", 384);
2396
+ handle = await fsp4.open(file, "a", 384);
1502
2397
  } catch (err) {
1503
2398
  throw new Error(
1504
2399
  `Failed to open session "${id}" for append: ${err instanceof Error ? err.message : String(err)}`,
@@ -1519,8 +2414,8 @@ var DefaultSessionStore = class {
1519
2414
  return { writer, data };
1520
2415
  }
1521
2416
  async load(id) {
1522
- const file = path3.join(this.dir, `${id}.jsonl`);
1523
- const raw = await fsp2.readFile(file, "utf8");
2417
+ const file = path4.join(this.dir, `${id}.jsonl`);
2418
+ const raw = await fsp4.readFile(file, "utf8");
1524
2419
  const lines = raw.split("\n").filter((l) => l.trim());
1525
2420
  const events = [];
1526
2421
  for (const line of lines) {
@@ -1539,7 +2434,7 @@ var DefaultSessionStore = class {
1539
2434
  async list(limit = 20) {
1540
2435
  try {
1541
2436
  await ensureDir(this.dir);
1542
- const files = await fsp2.readdir(this.dir);
2437
+ const files = await fsp4.readdir(this.dir);
1543
2438
  const ids = files.filter((f) => f.endsWith(".jsonl")).map((f) => f.replace(/\.jsonl$/, ""));
1544
2439
  const sessions = await Promise.all(ids.map((id) => this.summaryFor(id).catch(() => null)));
1545
2440
  const out = sessions.filter((s) => s !== null);
@@ -1554,15 +2449,15 @@ var DefaultSessionStore = class {
1554
2449
  }
1555
2450
  }
1556
2451
  async summaryFor(id) {
1557
- const manifest = path3.join(this.dir, `${id}.summary.json`);
2452
+ const manifest = path4.join(this.dir, `${id}.summary.json`);
1558
2453
  try {
1559
- const raw = await fsp2.readFile(manifest, "utf8");
2454
+ const raw = await fsp4.readFile(manifest, "utf8");
1560
2455
  return JSON.parse(raw);
1561
2456
  } catch {
1562
- const full = path3.join(this.dir, `${id}.jsonl`);
1563
- const stat3 = await fsp2.stat(full);
2457
+ const full = path4.join(this.dir, `${id}.jsonl`);
2458
+ const stat3 = await fsp4.stat(full);
1564
2459
  const summary = await this.summarize(id, stat3.mtime.toISOString());
1565
- await fsp2.writeFile(manifest, JSON.stringify(summary), { mode: 384 }).catch((err) => {
2460
+ await fsp4.writeFile(manifest, JSON.stringify(summary), { mode: 384 }).catch((err) => {
1566
2461
  console.warn(
1567
2462
  `[session-store] Failed to write manifest for "${id}":`,
1568
2463
  err instanceof Error ? err.message : String(err)
@@ -1572,8 +2467,8 @@ var DefaultSessionStore = class {
1572
2467
  }
1573
2468
  }
1574
2469
  async delete(id) {
1575
- await fsp2.unlink(path3.join(this.dir, `${id}.jsonl`));
1576
- await fsp2.unlink(path3.join(this.dir, `${id}.summary.json`)).catch(() => void 0);
2470
+ await fsp4.unlink(path4.join(this.dir, `${id}.jsonl`));
2471
+ await fsp4.unlink(path4.join(this.dir, `${id}.summary.json`)).catch(() => void 0);
1577
2472
  }
1578
2473
  async summarize(id, mtime) {
1579
2474
  try {
@@ -1677,7 +2572,7 @@ var FileSessionWriter = class {
1677
2572
  this.startedAt = startedAt;
1678
2573
  this.meta = meta;
1679
2574
  this.resumed = opts.resumed ?? false;
1680
- this.manifestFile = opts.dir ? path3.join(opts.dir, `${id}.summary.json`) : "";
2575
+ this.manifestFile = opts.dir ? path4.join(opts.dir, `${id}.summary.json`) : "";
1681
2576
  this.filePath = opts.filePath ?? "";
1682
2577
  this.summary = {
1683
2578
  id,
@@ -1698,6 +2593,12 @@ var FileSessionWriter = class {
1698
2593
  tokenIn = 0;
1699
2594
  tokenOut = 0;
1700
2595
  filePath;
2596
+ /** Public accessor for the JSONL path — required by SessionWriter so
2597
+ * observability surfaces (`/fleet log`, FleetPanel) can locate the
2598
+ * transcript without recomputing the path from session metadata. */
2599
+ get transcriptPath() {
2600
+ return this.filePath || void 0;
2601
+ }
1701
2602
  initDone = false;
1702
2603
  resumed;
1703
2604
  appendFailCount = 0;
@@ -1715,7 +2616,7 @@ var FileSessionWriter = class {
1715
2616
  `;
1716
2617
  try {
1717
2618
  if (this.filePath) {
1718
- await fsp2.writeFile(this.filePath, record, { flag: "a", mode: 384 });
2619
+ await fsp4.writeFile(this.filePath, record, { flag: "a", mode: 384 });
1719
2620
  }
1720
2621
  } catch {
1721
2622
  }
@@ -1768,7 +2669,7 @@ var FileSessionWriter = class {
1768
2669
  this.closed = true;
1769
2670
  if (this.manifestFile) {
1770
2671
  try {
1771
- await fsp2.writeFile(this.manifestFile, JSON.stringify(this.summary), { mode: 384 });
2672
+ await fsp4.writeFile(this.manifestFile, JSON.stringify(this.summary), { mode: 384 });
1772
2673
  } catch {
1773
2674
  }
1774
2675
  }
@@ -1791,9 +2692,9 @@ function makeDirectorSessionFactory(opts) {
1791
2692
  let dir;
1792
2693
  if (opts.store) {
1793
2694
  store = opts.store;
1794
- dir = opts.sessionsRoot ? path3.join(opts.sessionsRoot, runId) : "(caller-managed)";
2695
+ dir = opts.sessionsRoot ? path4.join(opts.sessionsRoot, runId) : "(caller-managed)";
1795
2696
  } else if (opts.sessionsRoot) {
1796
- dir = path3.join(opts.sessionsRoot, runId);
2697
+ dir = path4.join(opts.sessionsRoot, runId);
1797
2698
  store = new DefaultSessionStore({ dir });
1798
2699
  } else {
1799
2700
  throw new Error("makeDirectorSessionFactory requires either `store` or `sessionsRoot`");
@@ -1840,10 +2741,12 @@ Working rules:
1840
2741
  - Never fabricate numbers \u2014 read the actual logs first
1841
2742
  - Always include file:line references for errors
1842
2743
  - If sessionPath is missing, ask the director to provide it
1843
- - Report confidence level: high (>90% accuracy), medium, low`,
1844
- maxIterations: 50,
1845
- maxToolCalls: 200,
1846
- timeoutMs: 12e4
2744
+ - Report confidence level: high (>90% accuracy), medium, low`
2745
+ // No hardcoded budgets — the orchestrator (delegate tool or
2746
+ // spawn_subagent) decides per-task how much room a subagent gets.
2747
+ // A monorepo audit needs hours; a single-file lint check needs
2748
+ // seconds. Pinning a number here forces the orchestrator to fight
2749
+ // the role's default instead of just asking for what it needs.
1847
2750
  };
1848
2751
  var BUG_HUNTER_AGENT = {
1849
2752
  id: "bug-hunter",
@@ -1883,10 +2786,8 @@ Working rules:
1883
2786
  - Never scan node_modules \u2014 it's noise
1884
2787
  - Always include file:line for every finding
1885
2788
  - If >30% of findings are false positives, note the confidence level
1886
- - Ask director for clarification if paths are ambiguous`,
1887
- maxIterations: 80,
1888
- maxToolCalls: 300,
1889
- timeoutMs: 18e4
2789
+ - Ask director for clarification if paths are ambiguous`
2790
+ // Budgets are set by the orchestrator per task — see fleet.ts header.
1890
2791
  };
1891
2792
  var REFACTOR_PLANNER_AGENT = {
1892
2793
  id: "refactor-planner",
@@ -1926,10 +2827,8 @@ Working rules:
1926
2827
  - Always include rollback strategy \u2014 every refactor can fail
1927
2828
  - Merge tasks that take <1h into a single phase
1928
2829
  - Respect team constraints (reviewer availability, parallelization)
1929
- - Never plan without analyzing the actual code first`,
1930
- maxIterations: 60,
1931
- maxToolCalls: 250,
1932
- timeoutMs: 15e4
2830
+ - Never plan without analyzing the actual code first`
2831
+ // Budgets are set by the orchestrator per task — see fleet.ts header.
1933
2832
  };
1934
2833
  var SECURITY_SCANNER_AGENT = {
1935
2834
  id: "security-scanner",
@@ -1977,10 +2876,8 @@ Working rules:
1977
2876
  - Never scan node_modules \u2014 use npm audit instead
1978
2877
  - Always provide remediation steps, not just findings
1979
2878
  - Verify regex-based secrets before flagging (false positive risk)
1980
- - When in doubt, flag as medium rather than ignoring potential issues`,
1981
- maxIterations: 70,
1982
- maxToolCalls: 280,
1983
- timeoutMs: 16e4
2879
+ - When in doubt, flag as medium rather than ignoring potential issues`
2880
+ // Budgets are set by the orchestrator per task — see fleet.ts header.
1984
2881
  };
1985
2882
  var FLEET_ROSTER = {
1986
2883
  "audit-log": AUDIT_LOG_AGENT,
@@ -1990,6 +2887,6 @@ var FLEET_ROSTER = {
1990
2887
  };
1991
2888
  var ALL_FLEET_AGENTS = Object.values(FLEET_ROSTER);
1992
2889
 
1993
- export { ALL_FLEET_AGENTS, AUDIT_LOG_AGENT, BUG_HUNTER_AGENT, BudgetExceededError, DEFAULT_DIRECTOR_PREAMBLE, DEFAULT_SUBAGENT_BASELINE, DefaultMultiAgentCoordinator, Director, DirectorBudgetError, FLEET_ROSTER, FleetBus, FleetUsageAggregator, InMemoryAgentBridge, InMemoryBridgeTransport, REFACTOR_PLANNER_AGENT, SECURITY_SCANNER_AGENT, SubagentBudget, composeDirectorPrompt, composeSubagentPrompt, createMessage, makeAgentSubagentRunner, makeDirectorSessionFactory, rosterSummaryFromConfigs };
2890
+ export { ALL_FLEET_AGENTS, AUDIT_LOG_AGENT, BUG_HUNTER_AGENT, BudgetExceededError, DEFAULT_DIRECTOR_PREAMBLE, DEFAULT_SUBAGENT_BASELINE, DefaultMultiAgentCoordinator, Director, DirectorBudgetError, FLEET_ROSTER, FleetBus, FleetUsageAggregator, InMemoryAgentBridge, InMemoryBridgeTransport, REFACTOR_PLANNER_AGENT, SECURITY_SCANNER_AGENT, SubagentBudget, composeDirectorPrompt, composeSubagentPrompt, createDelegateTool, createMessage, makeAgentSubagentRunner, makeDirectorSessionFactory, rosterSummaryFromConfigs };
1994
2891
  //# sourceMappingURL=index.js.map
1995
2892
  //# sourceMappingURL=index.js.map