@wrongstack/core 0.1.10 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{agent-bridge-6KPqsFx6.d.ts → agent-bridge-DmBiCipY.d.ts} +1 -1
- package/dist/{compactor-B4mQZXf2.d.ts → compactor-DSl2FK7a.d.ts} +1 -1
- package/dist/{config-BU9f_5yH.d.ts → config-DXrqb41m.d.ts} +1 -1
- package/dist/{context-BmM2xGUZ.d.ts → context-u0bryklF.d.ts} +8 -0
- package/dist/coordination/index.d.ts +210 -12
- package/dist/coordination/index.js +941 -67
- package/dist/coordination/index.js.map +1 -1
- package/dist/defaults/index.d.ts +18 -18
- package/dist/defaults/index.js +953 -41
- package/dist/defaults/index.js.map +1 -1
- package/dist/{events-BMNaEFZl.d.ts → events-B6Q03pTu.d.ts} +73 -1
- package/dist/execution/index.d.ts +11 -11
- package/dist/index.d.ts +61 -28
- package/dist/index.js +1077 -48
- package/dist/index.js.map +1 -1
- package/dist/infrastructure/index.d.ts +6 -6
- package/dist/kernel/index.d.ts +9 -9
- package/dist/kernel/index.js.map +1 -1
- package/dist/{mcp-servers-Dzgg4x1w.d.ts → mcp-servers-BA1Ofmfj.d.ts} +3 -3
- package/dist/models/index.d.ts +2 -2
- package/dist/{multi-agent-fmkRHtof.d.ts → multi-agent-BDfkxL5C.d.ts} +71 -3
- package/dist/observability/index.d.ts +2 -2
- package/dist/{path-resolver-DBjaoXFq.d.ts → path-resolver-Crkt8wTQ.d.ts} +2 -2
- package/dist/{plugin-DJk6LL8B.d.ts → plugin-CoYYZKdn.d.ts} +19 -6
- package/dist/{renderer-rk_1Swwc.d.ts → renderer-0A2ZEtca.d.ts} +1 -1
- package/dist/sdd/index.d.ts +3 -3
- package/dist/{secret-scrubber-CicHLN4G.d.ts → secret-scrubber-3TLUkiCV.d.ts} +1 -1
- package/dist/{secret-scrubber-DF88luOe.d.ts → secret-scrubber-CwYliRWd.d.ts} +1 -1
- package/dist/security/index.d.ts +20 -4
- package/dist/security/index.js +13 -1
- package/dist/security/index.js.map +1 -1
- package/dist/{selector-BbJqiEP4.d.ts → selector-BRqzvugb.d.ts} +1 -1
- package/dist/{session-reader-Drq8RvJu.d.ts → session-reader-C3x96CDR.d.ts} +1 -1
- package/dist/{skill-DhfSizKv.d.ts → skill-Bx8jxznf.d.ts} +1 -1
- package/dist/storage/index.d.ts +164 -6
- package/dist/storage/index.js +273 -1
- package/dist/storage/index.js.map +1 -1
- package/dist/{system-prompt-BC_8ypCG.d.ts → system-prompt-CG9jU5-5.d.ts} +9 -1
- package/dist/{tool-executor-CpuJPYm9.d.ts → tool-executor-CYdZdtno.d.ts} +4 -4
- package/dist/types/index.d.ts +15 -15
- package/dist/utils/index.d.ts +1 -1
- package/package.json +1 -1
|
@@ -1,9 +1,152 @@
|
|
|
1
1
|
import { randomUUID, randomBytes } from 'crypto';
|
|
2
|
-
import * as
|
|
3
|
-
import * as
|
|
2
|
+
import * as fsp4 from 'fs/promises';
|
|
3
|
+
import * as path4 from 'path';
|
|
4
4
|
import { EventEmitter } from 'events';
|
|
5
5
|
|
|
6
6
|
// src/coordination/director.ts
|
|
7
|
+
async function atomicWrite(targetPath, content, opts = {}) {
|
|
8
|
+
const dir = path4.dirname(targetPath);
|
|
9
|
+
await fsp4.mkdir(dir, { recursive: true });
|
|
10
|
+
const tmp = path4.join(dir, `.${path4.basename(targetPath)}.${randomBytes(6).toString("hex")}.tmp`);
|
|
11
|
+
try {
|
|
12
|
+
if (typeof content === "string") {
|
|
13
|
+
await fsp4.writeFile(tmp, content, { flag: "wx", encoding: opts.encoding ?? "utf8" });
|
|
14
|
+
} else {
|
|
15
|
+
await fsp4.writeFile(tmp, content, { flag: "wx" });
|
|
16
|
+
}
|
|
17
|
+
try {
|
|
18
|
+
const fh = await fsp4.open(tmp, "r+");
|
|
19
|
+
try {
|
|
20
|
+
await fh.sync();
|
|
21
|
+
} finally {
|
|
22
|
+
await fh.close();
|
|
23
|
+
}
|
|
24
|
+
} catch {
|
|
25
|
+
}
|
|
26
|
+
let mode;
|
|
27
|
+
try {
|
|
28
|
+
const stat3 = await fsp4.stat(targetPath);
|
|
29
|
+
mode = stat3.mode & 511;
|
|
30
|
+
} catch {
|
|
31
|
+
mode = opts.mode;
|
|
32
|
+
}
|
|
33
|
+
if (mode !== void 0) {
|
|
34
|
+
await fsp4.chmod(tmp, mode);
|
|
35
|
+
}
|
|
36
|
+
await fsp4.rename(tmp, targetPath);
|
|
37
|
+
} catch (err) {
|
|
38
|
+
try {
|
|
39
|
+
await fsp4.unlink(tmp);
|
|
40
|
+
} catch {
|
|
41
|
+
}
|
|
42
|
+
throw err;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
async function ensureDir(dir) {
|
|
46
|
+
await fsp4.mkdir(dir, { recursive: true });
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// src/storage/director-state.ts
|
|
50
|
+
var DirectorStateCheckpoint = class {
|
|
51
|
+
snapshot;
|
|
52
|
+
filePath;
|
|
53
|
+
timer = null;
|
|
54
|
+
debounceMs;
|
|
55
|
+
writing = false;
|
|
56
|
+
rewriteRequested = false;
|
|
57
|
+
constructor(filePath, init, debounceMs = 250) {
|
|
58
|
+
this.filePath = filePath;
|
|
59
|
+
this.debounceMs = debounceMs;
|
|
60
|
+
this.snapshot = {
|
|
61
|
+
version: 1,
|
|
62
|
+
directorRunId: init.directorRunId,
|
|
63
|
+
updatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
64
|
+
spawnCount: 0,
|
|
65
|
+
maxSpawns: init.maxSpawns,
|
|
66
|
+
spawnDepth: init.spawnDepth,
|
|
67
|
+
maxSpawnDepth: init.maxSpawnDepth,
|
|
68
|
+
subagents: [],
|
|
69
|
+
tasks: []
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
current() {
|
|
73
|
+
return this.snapshot;
|
|
74
|
+
}
|
|
75
|
+
recordSpawn(sub, spawnCount) {
|
|
76
|
+
this.snapshot = {
|
|
77
|
+
...this.snapshot,
|
|
78
|
+
spawnCount,
|
|
79
|
+
subagents: [...this.snapshot.subagents.filter((s) => s.id !== sub.id), sub]
|
|
80
|
+
};
|
|
81
|
+
this.bumpUpdatedAt();
|
|
82
|
+
this.schedule();
|
|
83
|
+
}
|
|
84
|
+
recordTaskAssigned(task) {
|
|
85
|
+
const exists = this.snapshot.tasks.some((t) => t.taskId === task.taskId);
|
|
86
|
+
this.snapshot = {
|
|
87
|
+
...this.snapshot,
|
|
88
|
+
tasks: exists ? this.snapshot.tasks.map((t) => t.taskId === task.taskId ? { ...t, ...task } : t) : [...this.snapshot.tasks, task]
|
|
89
|
+
};
|
|
90
|
+
this.bumpUpdatedAt();
|
|
91
|
+
this.schedule();
|
|
92
|
+
}
|
|
93
|
+
recordTaskStatus(taskId, patch) {
|
|
94
|
+
this.snapshot = {
|
|
95
|
+
...this.snapshot,
|
|
96
|
+
tasks: this.snapshot.tasks.map(
|
|
97
|
+
(t) => t.taskId === taskId ? { ...t, ...patch } : t
|
|
98
|
+
)
|
|
99
|
+
};
|
|
100
|
+
this.bumpUpdatedAt();
|
|
101
|
+
this.schedule();
|
|
102
|
+
}
|
|
103
|
+
setUsage(usage) {
|
|
104
|
+
this.snapshot = { ...this.snapshot, usage };
|
|
105
|
+
this.bumpUpdatedAt();
|
|
106
|
+
this.schedule();
|
|
107
|
+
}
|
|
108
|
+
/** Force a synchronous flush — used by Director.shutdown(). */
|
|
109
|
+
async flush() {
|
|
110
|
+
if (this.timer) {
|
|
111
|
+
clearTimeout(this.timer);
|
|
112
|
+
this.timer = null;
|
|
113
|
+
}
|
|
114
|
+
await this.persist();
|
|
115
|
+
}
|
|
116
|
+
bumpUpdatedAt() {
|
|
117
|
+
this.snapshot = { ...this.snapshot, updatedAt: (/* @__PURE__ */ new Date()).toISOString() };
|
|
118
|
+
}
|
|
119
|
+
schedule() {
|
|
120
|
+
if (this.timer) return;
|
|
121
|
+
this.timer = setTimeout(() => {
|
|
122
|
+
this.timer = null;
|
|
123
|
+
void this.persist();
|
|
124
|
+
}, this.debounceMs);
|
|
125
|
+
}
|
|
126
|
+
async persist() {
|
|
127
|
+
if (this.writing) {
|
|
128
|
+
this.rewriteRequested = true;
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
this.writing = true;
|
|
132
|
+
try {
|
|
133
|
+
await atomicWrite(this.filePath, JSON.stringify(this.snapshot, null, 2), {
|
|
134
|
+
mode: 384
|
|
135
|
+
});
|
|
136
|
+
} catch (err) {
|
|
137
|
+
console.warn(
|
|
138
|
+
"[director-state] checkpoint write failed:",
|
|
139
|
+
err instanceof Error ? err.message : String(err)
|
|
140
|
+
);
|
|
141
|
+
} finally {
|
|
142
|
+
this.writing = false;
|
|
143
|
+
if (this.rewriteRequested) {
|
|
144
|
+
this.rewriteRequested = false;
|
|
145
|
+
this.schedule();
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
};
|
|
7
150
|
|
|
8
151
|
// src/coordination/in-memory-transport.ts
|
|
9
152
|
var InMemoryBridgeTransport = class {
|
|
@@ -260,6 +403,10 @@ var FleetBus = class {
|
|
|
260
403
|
"iteration.started",
|
|
261
404
|
"iteration.completed",
|
|
262
405
|
"provider.text_delta",
|
|
406
|
+
// Subagent extended-thinking output. Forwarded so the FleetPanel /
|
|
407
|
+
// /fleet log can surface "the planner is thinking…" instead of a
|
|
408
|
+
// silent gap between iteration.started and the first text_delta.
|
|
409
|
+
"provider.thinking_delta",
|
|
263
410
|
"provider.response",
|
|
264
411
|
"provider.retry",
|
|
265
412
|
"provider.error",
|
|
@@ -416,6 +563,108 @@ var FleetUsageAggregator = class {
|
|
|
416
563
|
}
|
|
417
564
|
};
|
|
418
565
|
|
|
566
|
+
// src/types/errors.ts
|
|
567
|
+
var WrongStackError = class extends Error {
|
|
568
|
+
code;
|
|
569
|
+
subsystem;
|
|
570
|
+
severity;
|
|
571
|
+
recoverable;
|
|
572
|
+
context;
|
|
573
|
+
constructor(opts) {
|
|
574
|
+
super(opts.message, { cause: opts.cause });
|
|
575
|
+
this.name = "WrongStackError";
|
|
576
|
+
this.code = opts.code;
|
|
577
|
+
this.subsystem = opts.subsystem;
|
|
578
|
+
this.severity = opts.severity ?? "error";
|
|
579
|
+
this.recoverable = opts.recoverable ?? false;
|
|
580
|
+
this.context = opts.context;
|
|
581
|
+
}
|
|
582
|
+
/**
|
|
583
|
+
* Render a one-line user-facing description.
|
|
584
|
+
* Subclasses should override for domain-specific formatting.
|
|
585
|
+
*/
|
|
586
|
+
describe() {
|
|
587
|
+
const ctx = this.context ? ` ${formatContext(this.context)}` : "";
|
|
588
|
+
return `${this.code}: ${this.message}${ctx}`;
|
|
589
|
+
}
|
|
590
|
+
};
|
|
591
|
+
function formatContext(ctx) {
|
|
592
|
+
const parts = Object.entries(ctx).filter(([, v]) => v !== void 0).slice(0, 3).map(([k, v]) => `${k}=${String(v)}`);
|
|
593
|
+
return parts.length > 0 ? `[${parts.join(" ")}]` : "";
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
// src/types/provider.ts
|
|
597
|
+
var ProviderError = class extends WrongStackError {
|
|
598
|
+
status;
|
|
599
|
+
retryable;
|
|
600
|
+
providerId;
|
|
601
|
+
body;
|
|
602
|
+
constructor(message, status, retryable, providerId, opts = {}) {
|
|
603
|
+
super({
|
|
604
|
+
message,
|
|
605
|
+
code: providerStatusToCode(status, opts.body?.type),
|
|
606
|
+
subsystem: "provider",
|
|
607
|
+
severity: status >= 500 ? "error" : "warning",
|
|
608
|
+
recoverable: retryable,
|
|
609
|
+
context: { providerId, status },
|
|
610
|
+
cause: opts.cause
|
|
611
|
+
});
|
|
612
|
+
this.name = "ProviderError";
|
|
613
|
+
this.status = status;
|
|
614
|
+
this.retryable = retryable;
|
|
615
|
+
this.providerId = providerId;
|
|
616
|
+
this.body = opts.body;
|
|
617
|
+
}
|
|
618
|
+
/**
|
|
619
|
+
* Render a one-line, user-facing description. Designed for the CLI/TUI
|
|
620
|
+
* status line and the agent's retry warning. Avoids dumping raw JSON
|
|
621
|
+
* (which is what users see today when a 529 lands and the log message
|
|
622
|
+
* includes the full `{"type":"error",...}` body).
|
|
623
|
+
*
|
|
624
|
+
* Examples:
|
|
625
|
+
* "minimax-coding-plan overloaded (529): High traffic detected. Upgrade for highspeed model. [req 06534785201de9c0…]"
|
|
626
|
+
* "openai rate limited (429): Retry after 12s"
|
|
627
|
+
* "anthropic invalid request (400): messages.0.role must be one of 'user'|'assistant'"
|
|
628
|
+
* "groq HTTP 500 (server error)"
|
|
629
|
+
*/
|
|
630
|
+
describe() {
|
|
631
|
+
const kind = describeStatus(this.status, this.body?.type);
|
|
632
|
+
const head = `${this.providerId} ${kind}`;
|
|
633
|
+
const detail = this.body?.message?.trim();
|
|
634
|
+
const reqId = this.body?.requestId ? ` [req ${this.body.requestId.slice(0, 16)}${this.body.requestId.length > 16 ? "\u2026" : ""}]` : "";
|
|
635
|
+
if (detail && detail.length > 0) {
|
|
636
|
+
return `${head}: ${truncate(detail, 240)}${reqId}`;
|
|
637
|
+
}
|
|
638
|
+
return `${head}${reqId}`;
|
|
639
|
+
}
|
|
640
|
+
};
|
|
641
|
+
function describeStatus(status, type) {
|
|
642
|
+
if (status === 0) return "network error";
|
|
643
|
+
if (type === "overloaded_error" || status === 529) return `overloaded (${status})`;
|
|
644
|
+
if (type === "rate_limit_error" || status === 429) return `rate limited (${status})`;
|
|
645
|
+
if (type === "authentication_error" || status === 401) return `auth failed (${status})`;
|
|
646
|
+
if (type === "permission_error" || status === 403) return `forbidden (${status})`;
|
|
647
|
+
if (type === "not_found_error" || status === 404) return `not found (${status})`;
|
|
648
|
+
if (type === "invalid_request_error" || status === 400) return `invalid request (${status})`;
|
|
649
|
+
if (status === 408) return `timeout (${status})`;
|
|
650
|
+
if (status >= 500 && status < 600) return `HTTP ${status} (server error)`;
|
|
651
|
+
if (type) return `${type} (${status})`;
|
|
652
|
+
return `HTTP ${status}`;
|
|
653
|
+
}
|
|
654
|
+
function truncate(s, n) {
|
|
655
|
+
return s.length <= n ? s : `${s.slice(0, n - 1)}\u2026`;
|
|
656
|
+
}
|
|
657
|
+
function providerStatusToCode(status, type) {
|
|
658
|
+
if (status === 0) return "PROVIDER_NETWORK_ERROR";
|
|
659
|
+
if (type === "rate_limit_error" || status === 429) return "PROVIDER_RATE_LIMITED";
|
|
660
|
+
if (type === "authentication_error" || status === 401) return "PROVIDER_AUTH_FAILED";
|
|
661
|
+
if (type === "overloaded_error" || status === 529) return "PROVIDER_OVERLOADED";
|
|
662
|
+
if (type === "invalid_request_error" || status === 400) return "PROVIDER_INVALID_REQUEST";
|
|
663
|
+
if (status === 408) return "PROVIDER_NETWORK_ERROR";
|
|
664
|
+
if (status >= 500) return "PROVIDER_SERVER_ERROR";
|
|
665
|
+
return "PROVIDER_INVALID_REQUEST";
|
|
666
|
+
}
|
|
667
|
+
|
|
419
668
|
// src/coordination/subagent-budget.ts
|
|
420
669
|
var BudgetExceededError = class extends Error {
|
|
421
670
|
kind;
|
|
@@ -508,14 +757,34 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
|
|
|
508
757
|
completedResults = [];
|
|
509
758
|
totalIterations = 0;
|
|
510
759
|
inFlight = 0;
|
|
760
|
+
/**
|
|
761
|
+
* Subagents currently being stopped. Set on entry to `stop()`, cleared
|
|
762
|
+
* once `recordCompletion` lands the terminal TaskResult. Used by
|
|
763
|
+
* `runDispatched` and `findIdleSubagent` to refuse mid-flight dispatch
|
|
764
|
+
* to a subagent the caller has already asked to terminate — closes the
|
|
765
|
+
* assign+terminate race where a fresh task could land on a worker that
|
|
766
|
+
* was about to be killed.
|
|
767
|
+
*/
|
|
768
|
+
terminating = /* @__PURE__ */ new Set();
|
|
511
769
|
constructor(config, options = {}) {
|
|
512
770
|
super();
|
|
513
771
|
this.coordinatorId = config.coordinatorId;
|
|
514
772
|
this.config = config;
|
|
515
773
|
this.runner = options.runner;
|
|
516
774
|
}
|
|
775
|
+
/**
|
|
776
|
+
* Replace the runner after construction. Used when the runner depends
|
|
777
|
+
* on infrastructure (e.g. FleetBus) that isn't available until after
|
|
778
|
+
* the coordinator's owning Director is built.
|
|
779
|
+
*/
|
|
780
|
+
setRunner(runner) {
|
|
781
|
+
this.runner = runner;
|
|
782
|
+
}
|
|
517
783
|
async spawn(subagent) {
|
|
518
784
|
const id = subagent.id || randomUUID();
|
|
785
|
+
if (this.subagents.has(id)) {
|
|
786
|
+
throw new Error(`Subagent id "${id}" already exists \u2014 refusing to overwrite`);
|
|
787
|
+
}
|
|
519
788
|
const context = {
|
|
520
789
|
subagentId: id,
|
|
521
790
|
tasks: [],
|
|
@@ -559,6 +828,7 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
|
|
|
559
828
|
async stop(subagentId) {
|
|
560
829
|
const subagent = this.subagents.get(subagentId);
|
|
561
830
|
if (!subagent) return;
|
|
831
|
+
this.terminating.add(subagentId);
|
|
562
832
|
subagent.abortController.abort();
|
|
563
833
|
subagent.status = "stopped";
|
|
564
834
|
subagent.currentTask = void 0;
|
|
@@ -566,6 +836,7 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
|
|
|
566
836
|
this.emit("subagent.stopped", { subagentId, reason: "stopped by coordinator" });
|
|
567
837
|
}
|
|
568
838
|
async stopAll() {
|
|
839
|
+
this.drainPendingAsAborted("Coordinator stopAll() drained the pending queue");
|
|
569
840
|
await Promise.allSettled([...this.subagents.keys()].map((id) => this.stop(id)));
|
|
570
841
|
}
|
|
571
842
|
getStatus() {
|
|
@@ -599,7 +870,14 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
|
|
|
599
870
|
tryDispatchNext() {
|
|
600
871
|
while (this.canDispatch()) {
|
|
601
872
|
const subagentId = this.findIdleSubagent();
|
|
602
|
-
if (!subagentId)
|
|
873
|
+
if (!subagentId) {
|
|
874
|
+
if (this.pendingTasks.length > 0 && !this.hasLiveSubagent()) {
|
|
875
|
+
this.drainPendingAsAborted(
|
|
876
|
+
"No live subagent available \u2014 all stopped or mid-termination"
|
|
877
|
+
);
|
|
878
|
+
}
|
|
879
|
+
return;
|
|
880
|
+
}
|
|
603
881
|
const task = this.pendingTasks.shift();
|
|
604
882
|
if (!task) return;
|
|
605
883
|
this.runDispatched(subagentId, task).catch((err) => {
|
|
@@ -607,7 +885,7 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
|
|
|
607
885
|
subagentId,
|
|
608
886
|
taskId: task.id,
|
|
609
887
|
status: "failed",
|
|
610
|
-
error:
|
|
888
|
+
error: classifySubagentError(err),
|
|
611
889
|
iterations: 0,
|
|
612
890
|
toolCalls: 0,
|
|
613
891
|
durationMs: 0
|
|
@@ -621,13 +899,76 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
|
|
|
621
899
|
}
|
|
622
900
|
findIdleSubagent() {
|
|
623
901
|
for (const [id, s] of this.subagents) {
|
|
624
|
-
if (s.status === "idle") return id;
|
|
902
|
+
if (s.status === "idle" && !this.terminating.has(id)) return id;
|
|
625
903
|
}
|
|
626
904
|
return null;
|
|
627
905
|
}
|
|
906
|
+
/**
|
|
907
|
+
* Returns true iff at least one spawned subagent could still
|
|
908
|
+
* process a task. A "live" subagent is one that is not stopped
|
|
909
|
+
* AND not mid-termination — `running` workers count because they
|
|
910
|
+
* will eventually finish and become idle.
|
|
911
|
+
*
|
|
912
|
+
* When no subagent has ever been spawned, returns `true` so a
|
|
913
|
+
* pre-spawn `assign()` simply queues (legacy behaviour). The
|
|
914
|
+
* dead-end detection only fires after `stop()` has retired every
|
|
915
|
+
* spawned worker.
|
|
916
|
+
*
|
|
917
|
+
* Used by `tryDispatchNext` to detect a dead-end pending queue.
|
|
918
|
+
*/
|
|
919
|
+
hasLiveSubagent() {
|
|
920
|
+
if (this.subagents.size === 0) return true;
|
|
921
|
+
for (const [id, s] of this.subagents) {
|
|
922
|
+
if (s.status !== "stopped" && !this.terminating.has(id)) return true;
|
|
923
|
+
}
|
|
924
|
+
return false;
|
|
925
|
+
}
|
|
926
|
+
/**
|
|
927
|
+
* Drain every pending task with a synthetic `aborted_by_parent`
|
|
928
|
+
* completion event. Same shape as the `stopAll()` drain — we go
|
|
929
|
+
* around `recordCompletion` because pending tasks were never
|
|
930
|
+
* counted in `inFlight` and routing them through would trip the
|
|
931
|
+
* underflow guard on every task after the first.
|
|
932
|
+
*/
|
|
933
|
+
drainPendingAsAborted(message) {
|
|
934
|
+
const dropped = this.pendingTasks.splice(0, this.pendingTasks.length);
|
|
935
|
+
for (const t of dropped) {
|
|
936
|
+
const synthetic = {
|
|
937
|
+
subagentId: t.subagentId ?? "unassigned",
|
|
938
|
+
taskId: t.id,
|
|
939
|
+
status: "stopped",
|
|
940
|
+
error: {
|
|
941
|
+
kind: "aborted_by_parent",
|
|
942
|
+
message,
|
|
943
|
+
retryable: false
|
|
944
|
+
},
|
|
945
|
+
iterations: 0,
|
|
946
|
+
toolCalls: 0,
|
|
947
|
+
durationMs: 0
|
|
948
|
+
};
|
|
949
|
+
this.completedResults.push(synthetic);
|
|
950
|
+
this.emit("task.completed", { task: t, result: synthetic });
|
|
951
|
+
}
|
|
952
|
+
}
|
|
628
953
|
async runDispatched(subagentId, task) {
|
|
629
954
|
const subagent = this.subagents.get(subagentId);
|
|
630
955
|
if (!subagent) return;
|
|
956
|
+
if (this.terminating.has(subagentId) || subagent.status === "stopped") {
|
|
957
|
+
this.recordCompletion({
|
|
958
|
+
subagentId,
|
|
959
|
+
taskId: task.id,
|
|
960
|
+
status: "stopped",
|
|
961
|
+
error: {
|
|
962
|
+
kind: "aborted_by_parent",
|
|
963
|
+
message: "Subagent was terminated before task could start",
|
|
964
|
+
retryable: false
|
|
965
|
+
},
|
|
966
|
+
iterations: 0,
|
|
967
|
+
toolCalls: 0,
|
|
968
|
+
durationMs: 0
|
|
969
|
+
});
|
|
970
|
+
return;
|
|
971
|
+
}
|
|
631
972
|
subagent.status = "running";
|
|
632
973
|
subagent.currentTask = task.id;
|
|
633
974
|
task.subagentId = subagentId;
|
|
@@ -673,7 +1014,9 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
|
|
|
673
1014
|
subagentId,
|
|
674
1015
|
taskId: task.id,
|
|
675
1016
|
status,
|
|
676
|
-
error:
|
|
1017
|
+
error: classifySubagentError(err, {
|
|
1018
|
+
parentAborted: subagent.abortController.signal.aborted
|
|
1019
|
+
}),
|
|
677
1020
|
iterations: usage.iterations,
|
|
678
1021
|
toolCalls: usage.toolCalls,
|
|
679
1022
|
durationMs: Date.now() - startTime
|
|
@@ -712,19 +1055,14 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
|
|
|
712
1055
|
}
|
|
713
1056
|
const subagent = this.subagents.get(result.subagentId);
|
|
714
1057
|
if (subagent && subagent.status !== "stopped") {
|
|
715
|
-
|
|
716
|
-
subagent.status =
|
|
1058
|
+
result.status === "failed" || result.status === "timeout";
|
|
1059
|
+
subagent.status = "idle";
|
|
717
1060
|
subagent.currentTask = void 0;
|
|
718
1061
|
if (subagent.abortController.signal.aborted) {
|
|
719
1062
|
subagent.abortController = new AbortController();
|
|
720
1063
|
}
|
|
721
|
-
if (subagent.status === "error") {
|
|
722
|
-
queueMicrotask(() => {
|
|
723
|
-
if (subagent.status === "error") subagent.status = "idle";
|
|
724
|
-
this.tryDispatchNext();
|
|
725
|
-
});
|
|
726
|
-
}
|
|
727
1064
|
}
|
|
1065
|
+
this.terminating.delete(result.subagentId);
|
|
728
1066
|
this.emit("task.completed", {
|
|
729
1067
|
task: subagent?.context.tasks.find((t) => t.id === result.taskId) ?? { id: result.taskId },
|
|
730
1068
|
result
|
|
@@ -747,6 +1085,99 @@ var DefaultMultiAgentCoordinator = class extends EventEmitter {
|
|
|
747
1085
|
return false;
|
|
748
1086
|
}
|
|
749
1087
|
};
|
|
1088
|
+
function classifySubagentError(err, hints = {}) {
|
|
1089
|
+
const cause = err instanceof Error ? { name: err.name, message: err.message, stack: err.stack } : void 0;
|
|
1090
|
+
const baseMessage = err instanceof Error ? err.message : String(err);
|
|
1091
|
+
if (err instanceof ProviderError) {
|
|
1092
|
+
return providerErrorToSubagentError(err, baseMessage, cause);
|
|
1093
|
+
}
|
|
1094
|
+
if (err instanceof BudgetExceededError) {
|
|
1095
|
+
const map = {
|
|
1096
|
+
iterations: "budget_iterations",
|
|
1097
|
+
tool_calls: "budget_tool_calls",
|
|
1098
|
+
tokens: "budget_tokens",
|
|
1099
|
+
cost: "budget_cost",
|
|
1100
|
+
timeout: "budget_timeout"
|
|
1101
|
+
};
|
|
1102
|
+
return {
|
|
1103
|
+
kind: map[err.kind],
|
|
1104
|
+
message: baseMessage,
|
|
1105
|
+
// Budgets are user-configured ceilings, not transient failures —
|
|
1106
|
+
// retrying with the same budget will hit the same ceiling. The
|
|
1107
|
+
// orchestrator must raise the budget or narrow the task first.
|
|
1108
|
+
retryable: false,
|
|
1109
|
+
cause
|
|
1110
|
+
};
|
|
1111
|
+
}
|
|
1112
|
+
if (hints.parentAborted) {
|
|
1113
|
+
return {
|
|
1114
|
+
kind: "aborted_by_parent",
|
|
1115
|
+
message: baseMessage,
|
|
1116
|
+
retryable: false,
|
|
1117
|
+
cause
|
|
1118
|
+
};
|
|
1119
|
+
}
|
|
1120
|
+
const lower = baseMessage.toLowerCase();
|
|
1121
|
+
if (/agent aborted$/i.test(baseMessage)) {
|
|
1122
|
+
return {
|
|
1123
|
+
kind: "aborted_by_parent",
|
|
1124
|
+
message: baseMessage,
|
|
1125
|
+
retryable: false,
|
|
1126
|
+
cause
|
|
1127
|
+
};
|
|
1128
|
+
}
|
|
1129
|
+
if (/agent exhausted iteration limit$/i.test(baseMessage)) {
|
|
1130
|
+
return { kind: "budget_iterations", message: baseMessage, retryable: false, cause };
|
|
1131
|
+
}
|
|
1132
|
+
if (/empty response$/i.test(baseMessage)) {
|
|
1133
|
+
return { kind: "empty_response", message: baseMessage, retryable: false, cause };
|
|
1134
|
+
}
|
|
1135
|
+
if (/^tool failed: /i.test(baseMessage)) {
|
|
1136
|
+
return { kind: "tool_failed", message: baseMessage, retryable: false, cause };
|
|
1137
|
+
}
|
|
1138
|
+
if (lower.includes("bridge transport") || /bridge.*(closed|disconnect)/i.test(baseMessage)) {
|
|
1139
|
+
return { kind: "bridge_failed", message: baseMessage, retryable: false, cause };
|
|
1140
|
+
}
|
|
1141
|
+
if (/context length|max.*tokens?.*exceeded|prompt is too long/i.test(baseMessage)) {
|
|
1142
|
+
return { kind: "context_overflow", message: baseMessage, retryable: false, cause };
|
|
1143
|
+
}
|
|
1144
|
+
return {
|
|
1145
|
+
kind: "unknown",
|
|
1146
|
+
message: baseMessage,
|
|
1147
|
+
retryable: false,
|
|
1148
|
+
cause
|
|
1149
|
+
};
|
|
1150
|
+
}
|
|
1151
|
+
function providerErrorToSubagentError(err, message, cause) {
|
|
1152
|
+
const status = err.status;
|
|
1153
|
+
if (status === 429 || err.body?.type === "rate_limit_error") {
|
|
1154
|
+
return {
|
|
1155
|
+
kind: "provider_rate_limit",
|
|
1156
|
+
message,
|
|
1157
|
+
retryable: true,
|
|
1158
|
+
// Conservative default: 5s. Provider-specific code can override
|
|
1159
|
+
// by emitting an error whose body carries an explicit hint.
|
|
1160
|
+
backoffMs: 5e3,
|
|
1161
|
+
cause
|
|
1162
|
+
};
|
|
1163
|
+
}
|
|
1164
|
+
if (status === 401 || status === 403 || err.body?.type === "authentication_error") {
|
|
1165
|
+
return { kind: "provider_auth", message, retryable: false, cause };
|
|
1166
|
+
}
|
|
1167
|
+
if (status === 408 || status === 0) {
|
|
1168
|
+
return { kind: "provider_timeout", message, retryable: true, cause };
|
|
1169
|
+
}
|
|
1170
|
+
if (status >= 500 && status < 600) {
|
|
1171
|
+
return {
|
|
1172
|
+
kind: "provider_5xx",
|
|
1173
|
+
message,
|
|
1174
|
+
retryable: true,
|
|
1175
|
+
backoffMs: 3e3,
|
|
1176
|
+
cause
|
|
1177
|
+
};
|
|
1178
|
+
}
|
|
1179
|
+
return { kind: "unknown", message, retryable: err.retryable, cause };
|
|
1180
|
+
}
|
|
750
1181
|
|
|
751
1182
|
// src/coordination/director.ts
|
|
752
1183
|
var DirectorBudgetError = class extends Error {
|
|
@@ -810,6 +1241,27 @@ var Director = class {
|
|
|
810
1241
|
spawnDepth;
|
|
811
1242
|
/** Live spawn counter for `maxSpawns` enforcement. */
|
|
812
1243
|
spawnCount = 0;
|
|
1244
|
+
/** Optional checkpoint mirror — writes the live task graph + roster to disk. */
|
|
1245
|
+
stateCheckpoint;
|
|
1246
|
+
/** Optional session writer for emitting task_* / agent_* lifecycle events. */
|
|
1247
|
+
sessionWriter;
|
|
1248
|
+
/** Debounce timer for periodic manifest writes. */
|
|
1249
|
+
manifestTimer = null;
|
|
1250
|
+
manifestDebounceMs;
|
|
1251
|
+
/** Resolves task descriptions back from `assign()` so completion events
|
|
1252
|
+
* can also carry a human-readable title. */
|
|
1253
|
+
taskDescriptions = /* @__PURE__ */ new Map();
|
|
1254
|
+
/** Snapshot of which subagent owns each task — drives state-checkpoint
|
|
1255
|
+
* status updates without re-walking the manifest. */
|
|
1256
|
+
taskOwners = /* @__PURE__ */ new Map();
|
|
1257
|
+
/**
|
|
1258
|
+
* Handle to the coordinator-side `task.completed` listener so we can
|
|
1259
|
+
* unsubscribe in `shutdown()`. Without this, repeated Director
|
|
1260
|
+
* construction (e.g. tests, hot reloads) accumulates listeners on a
|
|
1261
|
+
* cached coordinator and slowly drifts the EventEmitter past its
|
|
1262
|
+
* default cap.
|
|
1263
|
+
*/
|
|
1264
|
+
taskCompletedListener = null;
|
|
813
1265
|
constructor(opts) {
|
|
814
1266
|
this.id = opts.config.coordinatorId || randomUUID();
|
|
815
1267
|
this.manifestPath = opts.manifestPath;
|
|
@@ -820,8 +1272,16 @@ var Director = class {
|
|
|
820
1272
|
this.maxSpawns = opts.maxSpawns ?? Number.POSITIVE_INFINITY;
|
|
821
1273
|
this.maxSpawnDepth = opts.maxSpawnDepth ?? 2;
|
|
822
1274
|
this.spawnDepth = opts.spawnDepth ?? 0;
|
|
1275
|
+
this.sessionWriter = opts.sessionWriter ?? null;
|
|
1276
|
+
this.manifestDebounceMs = opts.manifestDebounceMs ?? 2e3;
|
|
1277
|
+
this.stateCheckpoint = opts.stateCheckpointPath ? new DirectorStateCheckpoint(opts.stateCheckpointPath, {
|
|
1278
|
+
directorRunId: this.id,
|
|
1279
|
+
maxSpawns: opts.maxSpawns,
|
|
1280
|
+
spawnDepth: this.spawnDepth,
|
|
1281
|
+
maxSpawnDepth: this.maxSpawnDepth
|
|
1282
|
+
}) : null;
|
|
823
1283
|
if (this.sharedScratchpadPath) {
|
|
824
|
-
void
|
|
1284
|
+
void fsp4.mkdir(this.sharedScratchpadPath, { recursive: true }).catch(() => void 0);
|
|
825
1285
|
}
|
|
826
1286
|
this.transport = new InMemoryBridgeTransport();
|
|
827
1287
|
this.bridge = new InMemoryAgentBridge(
|
|
@@ -838,7 +1298,7 @@ var Director = class {
|
|
|
838
1298
|
{ ...opts.config, coordinatorId: this.id },
|
|
839
1299
|
{ runner: opts.runner }
|
|
840
1300
|
);
|
|
841
|
-
this.
|
|
1301
|
+
this.taskCompletedListener = (payload) => {
|
|
842
1302
|
const r = payload.result;
|
|
843
1303
|
this.completed.set(r.taskId, r);
|
|
844
1304
|
const waiter = this.taskWaiters.get(r.taskId);
|
|
@@ -846,7 +1306,54 @@ var Director = class {
|
|
|
846
1306
|
waiter.resolve(r);
|
|
847
1307
|
this.taskWaiters.delete(r.taskId);
|
|
848
1308
|
}
|
|
849
|
-
|
|
1309
|
+
const title = this.taskDescriptions.get(r.taskId) ?? payload.task.description ?? r.taskId;
|
|
1310
|
+
const failed = r.status !== "success";
|
|
1311
|
+
const errorString = r.error ? `${r.error.kind}: ${r.error.message}` : void 0;
|
|
1312
|
+
this.stateCheckpoint?.recordTaskStatus(r.taskId, {
|
|
1313
|
+
status: failed ? r.status : "completed",
|
|
1314
|
+
completedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1315
|
+
iterations: r.iterations,
|
|
1316
|
+
toolCalls: r.toolCalls,
|
|
1317
|
+
durationMs: r.durationMs,
|
|
1318
|
+
error: errorString
|
|
1319
|
+
});
|
|
1320
|
+
this.stateCheckpoint?.setUsage(this.usage.snapshot());
|
|
1321
|
+
void this.appendSessionEvent(
|
|
1322
|
+
failed ? {
|
|
1323
|
+
type: "task_failed",
|
|
1324
|
+
ts: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1325
|
+
taskId: r.taskId,
|
|
1326
|
+
title,
|
|
1327
|
+
error: errorString ?? r.status
|
|
1328
|
+
} : {
|
|
1329
|
+
type: "task_completed",
|
|
1330
|
+
ts: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1331
|
+
taskId: r.taskId,
|
|
1332
|
+
title
|
|
1333
|
+
}
|
|
1334
|
+
);
|
|
1335
|
+
this.scheduleManifest();
|
|
1336
|
+
};
|
|
1337
|
+
this.coordinator.on("task.completed", this.taskCompletedListener);
|
|
1338
|
+
}
|
|
1339
|
+
/** Best-effort session-writer append. Swallows failures — the director
|
|
1340
|
+
* must not break a fleet run because the session JSONL handle closed. */
|
|
1341
|
+
async appendSessionEvent(event) {
|
|
1342
|
+
if (!this.sessionWriter) return;
|
|
1343
|
+
try {
|
|
1344
|
+
await this.sessionWriter.append(event);
|
|
1345
|
+
} catch {
|
|
1346
|
+
}
|
|
1347
|
+
}
|
|
1348
|
+
/** Debounced manifest writer. A burst of spawn/assign/complete events
|
|
1349
|
+
* collapses into one write. Set `manifestDebounceMs` to 0 to disable. */
|
|
1350
|
+
scheduleManifest() {
|
|
1351
|
+
if (!this.manifestPath || this.manifestDebounceMs <= 0) return;
|
|
1352
|
+
if (this.manifestTimer) return;
|
|
1353
|
+
this.manifestTimer = setTimeout(() => {
|
|
1354
|
+
this.manifestTimer = null;
|
|
1355
|
+
void this.writeManifest().catch(() => void 0);
|
|
1356
|
+
}, this.manifestDebounceMs);
|
|
850
1357
|
}
|
|
851
1358
|
/**
|
|
852
1359
|
* Spawn a subagent. Identical to the coordinator's `spawn()` but
|
|
@@ -885,6 +1392,25 @@ var Director = class {
|
|
|
885
1392
|
model: config.model,
|
|
886
1393
|
taskIds: []
|
|
887
1394
|
});
|
|
1395
|
+
const spawnedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1396
|
+
this.stateCheckpoint?.recordSpawn(
|
|
1397
|
+
{
|
|
1398
|
+
id: result.subagentId,
|
|
1399
|
+
name: config.name,
|
|
1400
|
+
role: config.role,
|
|
1401
|
+
provider: config.provider,
|
|
1402
|
+
model: config.model,
|
|
1403
|
+
spawnedAt
|
|
1404
|
+
},
|
|
1405
|
+
this.spawnCount
|
|
1406
|
+
);
|
|
1407
|
+
void this.appendSessionEvent({
|
|
1408
|
+
type: "agent_spawned",
|
|
1409
|
+
ts: spawnedAt,
|
|
1410
|
+
agentId: result.subagentId,
|
|
1411
|
+
role: config.role ?? config.name
|
|
1412
|
+
});
|
|
1413
|
+
this.scheduleManifest();
|
|
888
1414
|
return result.subagentId;
|
|
889
1415
|
}
|
|
890
1416
|
/**
|
|
@@ -995,8 +1521,8 @@ var Director = class {
|
|
|
995
1521
|
})),
|
|
996
1522
|
usage: this.usage.snapshot()
|
|
997
1523
|
};
|
|
998
|
-
await
|
|
999
|
-
await
|
|
1524
|
+
await fsp4.mkdir(path4.dirname(this.manifestPath), { recursive: true });
|
|
1525
|
+
await fsp4.writeFile(this.manifestPath, JSON.stringify(manifest, null, 2), { mode: 384 });
|
|
1000
1526
|
return this.manifestPath;
|
|
1001
1527
|
}
|
|
1002
1528
|
/**
|
|
@@ -1005,13 +1531,42 @@ var Director = class {
|
|
|
1005
1531
|
* — calling shutdown twice is a no-op on the second invocation.
|
|
1006
1532
|
*/
|
|
1007
1533
|
async shutdown() {
|
|
1534
|
+
if (this.manifestTimer) {
|
|
1535
|
+
clearTimeout(this.manifestTimer);
|
|
1536
|
+
this.manifestTimer = null;
|
|
1537
|
+
}
|
|
1538
|
+
if (this.taskCompletedListener) {
|
|
1539
|
+
this.coordinator.off("task.completed", this.taskCompletedListener);
|
|
1540
|
+
this.taskCompletedListener = null;
|
|
1541
|
+
}
|
|
1008
1542
|
await this.coordinator.stopAll();
|
|
1009
1543
|
for (const b of this.subagentBridges.values()) {
|
|
1010
|
-
await b.stop().catch(() =>
|
|
1544
|
+
await b.stop().catch((err) => this.logShutdownError("subagent_bridge_stop", err));
|
|
1011
1545
|
}
|
|
1012
1546
|
this.subagentBridges.clear();
|
|
1013
|
-
await this.bridge.stop().catch(() =>
|
|
1014
|
-
if (this.manifestPath)
|
|
1547
|
+
await this.bridge.stop().catch((err) => this.logShutdownError("director_bridge_stop", err));
|
|
1548
|
+
if (this.manifestPath)
|
|
1549
|
+
await this.writeManifest().catch((err) => this.logShutdownError("manifest_write", err));
|
|
1550
|
+
if (this.stateCheckpoint) {
|
|
1551
|
+
this.stateCheckpoint.setUsage(this.usage.snapshot());
|
|
1552
|
+
await this.stateCheckpoint.flush().catch((err) => this.logShutdownError("state_checkpoint_flush", err));
|
|
1553
|
+
}
|
|
1554
|
+
}
|
|
1555
|
+
/**
|
|
1556
|
+
* Funnel for shutdown-phase errors. We can't throw — `shutdown()` is
|
|
1557
|
+
* called from process-exit paths where an uncaught throw would lose
|
|
1558
|
+
* the manifest write that comes after. But we MUST NOT silently
|
|
1559
|
+
* swallow either — a persistent bridge-close failure would otherwise
|
|
1560
|
+
* mask a real bug. `process.emitWarning` is the right tier:
|
|
1561
|
+
* surfaces on stderr by default, lets the host plug a warning
|
|
1562
|
+
* listener for structured collection, and never affects exit code.
|
|
1563
|
+
*/
|
|
1564
|
+
logShutdownError(phase, err) {
|
|
1565
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
1566
|
+
process.emitWarning(
|
|
1567
|
+
`Director shutdown phase "${phase}" failed: ${detail}`,
|
|
1568
|
+
"DirectorShutdownWarning"
|
|
1569
|
+
);
|
|
1015
1570
|
}
|
|
1016
1571
|
/**
|
|
1017
1572
|
* Hand a task to the coordinator. Returns the assigned task id so
|
|
@@ -1025,6 +1580,23 @@ var Director = class {
|
|
|
1025
1580
|
if (entry) entry.taskIds.push(taskWithId.id);
|
|
1026
1581
|
}
|
|
1027
1582
|
await this.coordinator.assign(taskWithId);
|
|
1583
|
+
this.taskDescriptions.set(taskWithId.id, taskWithId.description);
|
|
1584
|
+
if (taskWithId.subagentId) this.taskOwners.set(taskWithId.id, taskWithId.subagentId);
|
|
1585
|
+
const assignedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1586
|
+
this.stateCheckpoint?.recordTaskAssigned({
|
|
1587
|
+
taskId: taskWithId.id,
|
|
1588
|
+
subagentId: taskWithId.subagentId,
|
|
1589
|
+
description: taskWithId.description,
|
|
1590
|
+
status: "running",
|
|
1591
|
+
assignedAt
|
|
1592
|
+
});
|
|
1593
|
+
void this.appendSessionEvent({
|
|
1594
|
+
type: "task_created",
|
|
1595
|
+
ts: assignedAt,
|
|
1596
|
+
taskId: taskWithId.id,
|
|
1597
|
+
title: taskWithId.description
|
|
1598
|
+
});
|
|
1599
|
+
this.scheduleManifest();
|
|
1028
1600
|
return taskWithId.id;
|
|
1029
1601
|
}
|
|
1030
1602
|
/**
|
|
@@ -1084,6 +1656,23 @@ var Director = class {
|
|
|
1084
1656
|
snapshot() {
|
|
1085
1657
|
return this.usage.snapshot();
|
|
1086
1658
|
}
|
|
1659
|
+
/**
|
|
1660
|
+
* Look up provider/model metadata for a spawned subagent. Returns
|
|
1661
|
+
* undefined when the subagent id is unknown (not yet spawned, or
|
|
1662
|
+
* already torn down). Callers — notably the TUI fleet panel — use
|
|
1663
|
+
* this to render human-readable provider/model tags next to each
|
|
1664
|
+
* subagent row without reaching into private state.
|
|
1665
|
+
*/
|
|
1666
|
+
getSubagentMeta(id) {
|
|
1667
|
+
const usage = this.subagentMeta.get(id);
|
|
1668
|
+
const manifest = this.manifestEntries.get(id);
|
|
1669
|
+
if (!usage && !manifest) return void 0;
|
|
1670
|
+
return {
|
|
1671
|
+
provider: usage?.provider ?? manifest?.provider,
|
|
1672
|
+
model: usage?.model ?? manifest?.model,
|
|
1673
|
+
name: manifest?.name
|
|
1674
|
+
};
|
|
1675
|
+
}
|
|
1087
1676
|
/**
|
|
1088
1677
|
* Compose the leader/director-agent system prompt: fleet preamble +
|
|
1089
1678
|
* (optional) roster summary + user base prompt. Pass the result to your
|
|
@@ -1383,12 +1972,260 @@ function makeFleetUsageTool(director) {
|
|
|
1383
1972
|
}
|
|
1384
1973
|
};
|
|
1385
1974
|
}
|
|
1975
|
+
function createDelegateTool(opts) {
|
|
1976
|
+
const defaultTimeoutMs = opts.defaultTimeoutMs ?? 4 * 60 * 60 * 1e3;
|
|
1977
|
+
const rosterIds = opts.roster ? Object.keys(opts.roster) : [];
|
|
1978
|
+
const inputSchema = {
|
|
1979
|
+
type: "object",
|
|
1980
|
+
properties: {
|
|
1981
|
+
task: {
|
|
1982
|
+
type: "string",
|
|
1983
|
+
description: "What the subagent should do \u2014 natural language, complete sentence(s). The subagent has its own tool slice, its own LLM call, and returns when its task is done."
|
|
1984
|
+
},
|
|
1985
|
+
role: {
|
|
1986
|
+
type: "string",
|
|
1987
|
+
description: rosterIds.length > 0 ? `Roster role (preferred). One of: ${rosterIds.join(", ")}. Picks a pre-tuned config (prompt, budgets, tools) for that role.` : "No roster is configured \u2014 pass `name` instead.",
|
|
1988
|
+
enum: rosterIds.length > 0 ? rosterIds : void 0
|
|
1989
|
+
},
|
|
1990
|
+
name: {
|
|
1991
|
+
type: "string",
|
|
1992
|
+
description: "Display name for the subagent when not using a roster role. Required when `role` is omitted."
|
|
1993
|
+
},
|
|
1994
|
+
provider: {
|
|
1995
|
+
type: "string",
|
|
1996
|
+
description: 'Provider id (e.g. "anthropic", "openai"). Defaults to the host provider when omitted.'
|
|
1997
|
+
},
|
|
1998
|
+
model: {
|
|
1999
|
+
type: "string",
|
|
2000
|
+
description: "Model id within the provider. Defaults to the host model when omitted."
|
|
2001
|
+
},
|
|
2002
|
+
systemPromptOverride: {
|
|
2003
|
+
type: "string",
|
|
2004
|
+
description: "Optional extra prompt text appended to the role baseline."
|
|
2005
|
+
},
|
|
2006
|
+
timeoutMs: {
|
|
2007
|
+
type: "number",
|
|
2008
|
+
description: `Wall-clock budget for this delegate in milliseconds. No hard cap \u2014 set as high as the task realistically needs (a monorepo audit can take hours, a single-file lint takes seconds). Default ${Math.round(defaultTimeoutMs / 1e3 / 60)} minutes.`
|
|
2009
|
+
},
|
|
2010
|
+
maxIterations: {
|
|
2011
|
+
type: "number",
|
|
2012
|
+
description: "Maximum LLM iterations the subagent may take. Unset = use the role/coordinator default. Raise this for tasks with many tool-think-tool cycles (deep code analysis, multi-file refactors)."
|
|
2013
|
+
},
|
|
2014
|
+
maxToolCalls: {
|
|
2015
|
+
type: "number",
|
|
2016
|
+
description: "Maximum number of tool invocations the subagent may make. Unset = use the role/coordinator default. Raise this for tasks that touch many files (large grep + read + report)."
|
|
2017
|
+
}
|
|
2018
|
+
},
|
|
2019
|
+
required: ["task"]
|
|
2020
|
+
};
|
|
2021
|
+
return {
|
|
2022
|
+
name: "delegate",
|
|
2023
|
+
description: "Hand a discrete piece of work to a dedicated subagent and wait for its result. The subagent has its own context, its own LLM call, and its own budget \u2014 use this when a task is self-contained, would otherwise blow up your context, or benefits from a specialized role (bug-hunter, security-scanner, refactor-planner, audit-log). YOU decide how big the budget is: pass `timeoutMs`, `maxIterations`, and `maxToolCalls` sized to the actual work. There is no hidden cap forcing a 3-minute / 80-iteration limit \u2014 if a monorepo audit needs 2 hours and 500 tool calls, ask for that. Call multiple delegates in parallel through the provider's parallel-tool-call surface to fan work out across roles.",
|
|
2024
|
+
usageHint: "Set `task` to a complete instruction. Either pick `role` from the roster or pass `name` + `provider` + `model`. For non-trivial work, also pass `timeoutMs` (the wall-clock budget you actually need), `maxIterations`, and `maxToolCalls` \u2014 defaults are intentionally generous (4 hours) but the right values depend on scope. Returns the subagent's `TaskResult` \u2014 including the textual `result`, iteration count, tool count, and duration. Auto-promotes the host into director mode on first call.",
|
|
2025
|
+
permission: "auto",
|
|
2026
|
+
mutating: false,
|
|
2027
|
+
inputSchema,
|
|
2028
|
+
async execute(input) {
|
|
2029
|
+
const i = input ?? {};
|
|
2030
|
+
if (typeof i.task !== "string" || !i.task.trim()) {
|
|
2031
|
+
return { ok: false, error: "`task` is required." };
|
|
2032
|
+
}
|
|
2033
|
+
let director = await opts.host.ensureDirector();
|
|
2034
|
+
if (!director) {
|
|
2035
|
+
director = await opts.host.promoteToDirector();
|
|
2036
|
+
}
|
|
2037
|
+
if (!director) {
|
|
2038
|
+
const reason = opts.host.getPromotionBlockReason?.();
|
|
2039
|
+
return {
|
|
2040
|
+
ok: false,
|
|
2041
|
+
error: reason ?? "Director could not be activated \u2014 multi-agent host already running in legacy non-director mode. Restart with `--director` for fleet support."
|
|
2042
|
+
};
|
|
2043
|
+
}
|
|
2044
|
+
const timeoutMs = i.timeoutMs ?? defaultTimeoutMs;
|
|
2045
|
+
let cfg;
|
|
2046
|
+
if (i.role) {
|
|
2047
|
+
const base = opts.roster?.[i.role];
|
|
2048
|
+
if (!base) {
|
|
2049
|
+
return {
|
|
2050
|
+
ok: false,
|
|
2051
|
+
error: `Unknown role "${i.role}". Available: ${rosterIds.join(", ") || "(no roster configured)"}.`
|
|
2052
|
+
};
|
|
2053
|
+
}
|
|
2054
|
+
cfg = { ...base };
|
|
2055
|
+
if (i.systemPromptOverride) cfg.systemPromptOverride = i.systemPromptOverride;
|
|
2056
|
+
if (i.provider) cfg.provider = i.provider;
|
|
2057
|
+
if (i.model) cfg.model = i.model;
|
|
2058
|
+
} else {
|
|
2059
|
+
if (!i.name) {
|
|
2060
|
+
return {
|
|
2061
|
+
ok: false,
|
|
2062
|
+
error: "Either `role` (from the roster) or `name` is required."
|
|
2063
|
+
};
|
|
2064
|
+
}
|
|
2065
|
+
cfg = {
|
|
2066
|
+
name: i.name,
|
|
2067
|
+
provider: i.provider,
|
|
2068
|
+
model: i.model,
|
|
2069
|
+
systemPromptOverride: i.systemPromptOverride
|
|
2070
|
+
};
|
|
2071
|
+
}
|
|
2072
|
+
if (typeof i.maxIterations === "number" && i.maxIterations > 0) {
|
|
2073
|
+
cfg.maxIterations = i.maxIterations;
|
|
2074
|
+
}
|
|
2075
|
+
if (typeof i.maxToolCalls === "number" && i.maxToolCalls > 0) {
|
|
2076
|
+
cfg.maxToolCalls = i.maxToolCalls;
|
|
2077
|
+
}
|
|
2078
|
+
const SUBAGENT_TIMEOUT_BUFFER_MS = 3e4;
|
|
2079
|
+
const desiredSubTimeout = Math.max(3e4, timeoutMs - SUBAGENT_TIMEOUT_BUFFER_MS);
|
|
2080
|
+
if (!cfg.timeoutMs || cfg.timeoutMs > desiredSubTimeout) {
|
|
2081
|
+
cfg.timeoutMs = desiredSubTimeout;
|
|
2082
|
+
}
|
|
2083
|
+
try {
|
|
2084
|
+
const subagentId = await director.spawn(cfg);
|
|
2085
|
+
const taskId = await director.assign({
|
|
2086
|
+
id: "",
|
|
2087
|
+
description: i.task,
|
|
2088
|
+
subagentId
|
|
2089
|
+
});
|
|
2090
|
+
const result = await Promise.race([
|
|
2091
|
+
director.awaitTasks([taskId]).then((r) => r[0]),
|
|
2092
|
+
new Promise(
|
|
2093
|
+
(resolve) => setTimeout(() => resolve({ __timeout: true }), timeoutMs)
|
|
2094
|
+
)
|
|
2095
|
+
]);
|
|
2096
|
+
if ("__timeout" in result) {
|
|
2097
|
+
const partial2 = await readSubagentPartial(opts, subagentId);
|
|
2098
|
+
return {
|
|
2099
|
+
ok: false,
|
|
2100
|
+
stopReason: "host_timeout",
|
|
2101
|
+
error: `Subagent did not finish within ${timeoutMs}ms.`,
|
|
2102
|
+
hint: "Reduce scope of the next delegate, raise timeoutMs, or use spawn_subagent + await_tasks for long-running work.",
|
|
2103
|
+
subagentId,
|
|
2104
|
+
taskId,
|
|
2105
|
+
partial: partial2
|
|
2106
|
+
};
|
|
2107
|
+
}
|
|
2108
|
+
const baseStopReason = result.status === "success" ? "end_turn" : result.status === "timeout" ? "subagent_timeout" : result.status === "stopped" ? "aborted" : "budget_exhausted";
|
|
2109
|
+
const partial = result.status === "success" ? void 0 : await readSubagentPartial(opts, subagentId);
|
|
2110
|
+
const errorKind = result.error?.kind;
|
|
2111
|
+
const retryable = result.error?.retryable;
|
|
2112
|
+
const backoffMs = result.error?.backoffMs;
|
|
2113
|
+
return {
|
|
2114
|
+
ok: result.status === "success",
|
|
2115
|
+
status: result.status,
|
|
2116
|
+
stopReason: baseStopReason,
|
|
2117
|
+
errorKind,
|
|
2118
|
+
retryable,
|
|
2119
|
+
backoffMs,
|
|
2120
|
+
subagentId: result.subagentId,
|
|
2121
|
+
taskId: result.taskId,
|
|
2122
|
+
result: result.result,
|
|
2123
|
+
error: result.error,
|
|
2124
|
+
iterations: result.iterations,
|
|
2125
|
+
toolCalls: result.toolCalls,
|
|
2126
|
+
durationMs: result.durationMs,
|
|
2127
|
+
...partial ? { partial } : {},
|
|
2128
|
+
...hintForKind(errorKind, retryable, backoffMs) ? { hint: hintForKind(errorKind, retryable, backoffMs) } : {}
|
|
2129
|
+
};
|
|
2130
|
+
} catch (err) {
|
|
2131
|
+
return {
|
|
2132
|
+
ok: false,
|
|
2133
|
+
stopReason: "error",
|
|
2134
|
+
error: err instanceof Error ? err.message : String(err)
|
|
2135
|
+
};
|
|
2136
|
+
}
|
|
2137
|
+
}
|
|
2138
|
+
};
|
|
2139
|
+
}
|
|
2140
|
+
function hintForKind(kind, retryable, backoffMs) {
|
|
2141
|
+
if (!kind) return void 0;
|
|
2142
|
+
switch (kind) {
|
|
2143
|
+
case "provider_rate_limit":
|
|
2144
|
+
return `Provider rate-limited. Retry safe after ${backoffMs ?? 5e3}ms backoff. Consider a smaller model or fewer parallel delegates.`;
|
|
2145
|
+
case "provider_5xx":
|
|
2146
|
+
return `Provider server error. Retry safe after ${backoffMs ?? 3e3}ms backoff \u2014 usually transient.`;
|
|
2147
|
+
case "provider_timeout":
|
|
2148
|
+
return "Provider network timeout. Retry safe; reduce input size if it persists.";
|
|
2149
|
+
case "provider_auth":
|
|
2150
|
+
return "Provider rejected credentials. Cannot retry \u2014 fix the API key / config and re-invoke.";
|
|
2151
|
+
case "context_overflow":
|
|
2152
|
+
return "Subagent context exceeded the model limit. Narrow the task, use a larger-context model, or split into multiple delegates.";
|
|
2153
|
+
case "budget_iterations":
|
|
2154
|
+
case "budget_tool_calls":
|
|
2155
|
+
case "budget_tokens":
|
|
2156
|
+
case "budget_cost":
|
|
2157
|
+
return "Subagent exhausted its budget. Raise the matching `max*` field on the next delegate or narrow task scope.";
|
|
2158
|
+
case "budget_timeout":
|
|
2159
|
+
return "Subagent hit its wall-clock budget. Raise `timeoutMs` on the next delegate or split the task.";
|
|
2160
|
+
case "aborted_by_parent":
|
|
2161
|
+
return "Subagent was aborted (user Ctrl+C, parent unwound, or sibling failure cascade). Not retryable until the abort condition is resolved.";
|
|
2162
|
+
case "empty_response":
|
|
2163
|
+
return "Subagent ended its turn with no text and no tool calls. Almost always a prompt / config issue \u2014 clarify the task or check the model.";
|
|
2164
|
+
case "tool_failed":
|
|
2165
|
+
return "A tool inside the subagent returned ok:false. Inspect `partial.lastAssistantText` for the agent reasoning, then retry with corrected inputs.";
|
|
2166
|
+
case "bridge_failed":
|
|
2167
|
+
return "Parent-child bridge transport failed. This is rare \u2014 restart the session and retry.";
|
|
2168
|
+
default:
|
|
2169
|
+
return retryable ? "Failure classified as retryable. Try again with the same input." : void 0;
|
|
2170
|
+
}
|
|
2171
|
+
}
|
|
2172
|
+
async function readSubagentPartial(opts, subagentId) {
|
|
2173
|
+
if (!opts.sessionsRoot) return void 0;
|
|
2174
|
+
const candidates = [];
|
|
2175
|
+
if (opts.directorRunId) {
|
|
2176
|
+
candidates.push(path4.join(opts.sessionsRoot, opts.directorRunId, `${subagentId}.jsonl`));
|
|
2177
|
+
} else {
|
|
2178
|
+
try {
|
|
2179
|
+
const runDirs = await fsp4.readdir(opts.sessionsRoot);
|
|
2180
|
+
for (const r of runDirs) {
|
|
2181
|
+
candidates.push(path4.join(opts.sessionsRoot, r, `${subagentId}.jsonl`));
|
|
2182
|
+
}
|
|
2183
|
+
} catch {
|
|
2184
|
+
return void 0;
|
|
2185
|
+
}
|
|
2186
|
+
}
|
|
2187
|
+
for (const file of candidates) {
|
|
2188
|
+
let raw;
|
|
2189
|
+
try {
|
|
2190
|
+
raw = await fsp4.readFile(file, "utf8");
|
|
2191
|
+
} catch {
|
|
2192
|
+
continue;
|
|
2193
|
+
}
|
|
2194
|
+
const lines = raw.split("\n").filter((l) => l.trim());
|
|
2195
|
+
let lastAssistantText;
|
|
2196
|
+
let lastStopReason;
|
|
2197
|
+
let toolUses = 0;
|
|
2198
|
+
for (const line of lines) {
|
|
2199
|
+
try {
|
|
2200
|
+
const ev = JSON.parse(line);
|
|
2201
|
+
if (ev.type === "tool_use") toolUses += 1;
|
|
2202
|
+
if (ev.type === "llm_response") {
|
|
2203
|
+
if (typeof ev.stopReason === "string") lastStopReason = ev.stopReason;
|
|
2204
|
+
if (Array.isArray(ev.content)) {
|
|
2205
|
+
const txt = ev.content.filter((b) => b.type === "text").map((b) => b.text ?? "").join("\n").trim();
|
|
2206
|
+
if (txt) lastAssistantText = txt;
|
|
2207
|
+
}
|
|
2208
|
+
}
|
|
2209
|
+
} catch {
|
|
2210
|
+
}
|
|
2211
|
+
}
|
|
2212
|
+
return {
|
|
2213
|
+
lastAssistantText,
|
|
2214
|
+
lastStopReason,
|
|
2215
|
+
toolUsesObserved: toolUses,
|
|
2216
|
+
events: lines.length
|
|
2217
|
+
};
|
|
2218
|
+
}
|
|
2219
|
+
return void 0;
|
|
2220
|
+
}
|
|
1386
2221
|
|
|
1387
2222
|
// src/coordination/agent-subagent-runner.ts
|
|
1388
2223
|
function makeAgentSubagentRunner(opts) {
|
|
1389
2224
|
const format = opts.formatTaskInput ?? defaultFormatTaskInput;
|
|
1390
2225
|
return async (task, ctx) => {
|
|
1391
|
-
const
|
|
2226
|
+
const factoryResult = await opts.factory(ctx.config);
|
|
2227
|
+
const { agent, events } = factoryResult;
|
|
2228
|
+
const detachFleet = opts.fleetBus?.attach(ctx.subagentId, events, task.id);
|
|
1392
2229
|
const aborter = new AbortController();
|
|
1393
2230
|
let budgetError = null;
|
|
1394
2231
|
const onBudgetError = (err) => {
|
|
@@ -1402,13 +2239,19 @@ function makeAgentSubagentRunner(opts) {
|
|
|
1402
2239
|
budgetError.message += ` (caused by: ${err.message})`;
|
|
1403
2240
|
}
|
|
1404
2241
|
};
|
|
2242
|
+
let lastToolFailed = null;
|
|
1405
2243
|
const unsub = [];
|
|
1406
2244
|
unsub.push(
|
|
1407
|
-
events.on("tool.
|
|
2245
|
+
events.on("tool.executed", (e) => {
|
|
1408
2246
|
try {
|
|
1409
2247
|
ctx.budget.recordToolCall();
|
|
1410
|
-
} catch (
|
|
1411
|
-
onBudgetError(
|
|
2248
|
+
} catch (eb) {
|
|
2249
|
+
onBudgetError(eb);
|
|
2250
|
+
}
|
|
2251
|
+
if (e.ok === false) {
|
|
2252
|
+
lastToolFailed = e.name;
|
|
2253
|
+
} else if (e.ok === true) {
|
|
2254
|
+
lastToolFailed = null;
|
|
1412
2255
|
}
|
|
1413
2256
|
}),
|
|
1414
2257
|
events.on("provider.response", (e) => {
|
|
@@ -1425,6 +2268,26 @@ function makeAgentSubagentRunner(opts) {
|
|
|
1425
2268
|
} catch (e) {
|
|
1426
2269
|
onBudgetError(e);
|
|
1427
2270
|
}
|
|
2271
|
+
}),
|
|
2272
|
+
// D3: cooperative timeout enforcement DURING a long tool call.
|
|
2273
|
+
// The iteration-loop checkTimeout() only fires between agent
|
|
2274
|
+
// iterations — a single `bash sleep 3600` call would otherwise
|
|
2275
|
+
// park inside one tool execution while the timeout silently
|
|
2276
|
+
// passes, relying solely on the coordinator's hard Promise.race
|
|
2277
|
+
// to interrupt. Tools that emit `tool.progress` (bash chunks,
|
|
2278
|
+
// fetch byte progress, spawn-stream stdout) give us a heartbeat
|
|
2279
|
+
// we can hang the check on. When the budget trips here:
|
|
2280
|
+
// 1. onBudgetError sets budgetError + aborter.abort()
|
|
2281
|
+
// 2. aborter signal propagates to agent.run → tool executor
|
|
2282
|
+
// 3. tool's own signal listener kills the child process
|
|
2283
|
+
// Cheap: O(1) per progress event, and the budget short-circuits
|
|
2284
|
+
// when timeoutMs is unset (most subagents have one set anyway).
|
|
2285
|
+
events.on("tool.progress", () => {
|
|
2286
|
+
try {
|
|
2287
|
+
ctx.budget.checkTimeout();
|
|
2288
|
+
} catch (e) {
|
|
2289
|
+
onBudgetError(e);
|
|
2290
|
+
}
|
|
1428
2291
|
})
|
|
1429
2292
|
);
|
|
1430
2293
|
const onParentAbort = () => aborter.abort();
|
|
@@ -1433,8 +2296,15 @@ function makeAgentSubagentRunner(opts) {
|
|
|
1433
2296
|
try {
|
|
1434
2297
|
result = await agent.run(format(task, ctx.config), { signal: aborter.signal });
|
|
1435
2298
|
} finally {
|
|
2299
|
+
detachFleet?.();
|
|
1436
2300
|
ctx.signal.removeEventListener("abort", onParentAbort);
|
|
1437
2301
|
for (const u of unsub) u();
|
|
2302
|
+
if (factoryResult.dispose) {
|
|
2303
|
+
try {
|
|
2304
|
+
await factoryResult.dispose();
|
|
2305
|
+
} catch {
|
|
2306
|
+
}
|
|
2307
|
+
}
|
|
1438
2308
|
}
|
|
1439
2309
|
if (budgetError) throw budgetError;
|
|
1440
2310
|
if (result.status === "failed") {
|
|
@@ -1447,6 +2317,13 @@ function makeAgentSubagentRunner(opts) {
|
|
|
1447
2317
|
throw new Error("agent exhausted iteration limit");
|
|
1448
2318
|
}
|
|
1449
2319
|
const usage = ctx.budget.usage();
|
|
2320
|
+
const finalText = (result.finalText ?? "").trim();
|
|
2321
|
+
if (finalText.length === 0 && usage.toolCalls === 0) {
|
|
2322
|
+
throw new Error("empty response");
|
|
2323
|
+
}
|
|
2324
|
+
if (finalText.length === 0 && lastToolFailed !== null) {
|
|
2325
|
+
throw new Error(`tool failed: ${lastToolFailed}`);
|
|
2326
|
+
}
|
|
1450
2327
|
return {
|
|
1451
2328
|
result: result.finalText,
|
|
1452
2329
|
iterations: result.iterations,
|
|
@@ -1457,11 +2334,6 @@ function makeAgentSubagentRunner(opts) {
|
|
|
1457
2334
|
function defaultFormatTaskInput(task) {
|
|
1458
2335
|
return task.description ?? "";
|
|
1459
2336
|
}
|
|
1460
|
-
async function ensureDir(dir) {
|
|
1461
|
-
await fsp2.mkdir(dir, { recursive: true });
|
|
1462
|
-
}
|
|
1463
|
-
|
|
1464
|
-
// src/storage/session-store.ts
|
|
1465
2337
|
var DefaultSessionStore = class {
|
|
1466
2338
|
dir;
|
|
1467
2339
|
events;
|
|
@@ -1473,10 +2345,10 @@ var DefaultSessionStore = class {
|
|
|
1473
2345
|
await ensureDir(this.dir);
|
|
1474
2346
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1475
2347
|
const id = meta.id ?? `${startedAt.replace(/[:.]/g, "-")}-${randomBytes(2).toString("hex")}`;
|
|
1476
|
-
const file =
|
|
2348
|
+
const file = path4.join(this.dir, `${id}.jsonl`);
|
|
1477
2349
|
let handle;
|
|
1478
2350
|
try {
|
|
1479
|
-
handle = await
|
|
2351
|
+
handle = await fsp4.open(file, "a", 384);
|
|
1480
2352
|
} catch (err) {
|
|
1481
2353
|
throw new Error(
|
|
1482
2354
|
`Failed to open session file: ${err instanceof Error ? err.message : String(err)}`,
|
|
@@ -1495,10 +2367,10 @@ var DefaultSessionStore = class {
|
|
|
1495
2367
|
}
|
|
1496
2368
|
async resume(id) {
|
|
1497
2369
|
const data = await this.load(id);
|
|
1498
|
-
const file =
|
|
2370
|
+
const file = path4.join(this.dir, `${id}.jsonl`);
|
|
1499
2371
|
let handle;
|
|
1500
2372
|
try {
|
|
1501
|
-
handle = await
|
|
2373
|
+
handle = await fsp4.open(file, "a", 384);
|
|
1502
2374
|
} catch (err) {
|
|
1503
2375
|
throw new Error(
|
|
1504
2376
|
`Failed to open session "${id}" for append: ${err instanceof Error ? err.message : String(err)}`,
|
|
@@ -1519,8 +2391,8 @@ var DefaultSessionStore = class {
|
|
|
1519
2391
|
return { writer, data };
|
|
1520
2392
|
}
|
|
1521
2393
|
async load(id) {
|
|
1522
|
-
const file =
|
|
1523
|
-
const raw = await
|
|
2394
|
+
const file = path4.join(this.dir, `${id}.jsonl`);
|
|
2395
|
+
const raw = await fsp4.readFile(file, "utf8");
|
|
1524
2396
|
const lines = raw.split("\n").filter((l) => l.trim());
|
|
1525
2397
|
const events = [];
|
|
1526
2398
|
for (const line of lines) {
|
|
@@ -1539,7 +2411,7 @@ var DefaultSessionStore = class {
|
|
|
1539
2411
|
async list(limit = 20) {
|
|
1540
2412
|
try {
|
|
1541
2413
|
await ensureDir(this.dir);
|
|
1542
|
-
const files = await
|
|
2414
|
+
const files = await fsp4.readdir(this.dir);
|
|
1543
2415
|
const ids = files.filter((f) => f.endsWith(".jsonl")).map((f) => f.replace(/\.jsonl$/, ""));
|
|
1544
2416
|
const sessions = await Promise.all(ids.map((id) => this.summaryFor(id).catch(() => null)));
|
|
1545
2417
|
const out = sessions.filter((s) => s !== null);
|
|
@@ -1554,15 +2426,15 @@ var DefaultSessionStore = class {
|
|
|
1554
2426
|
}
|
|
1555
2427
|
}
|
|
1556
2428
|
async summaryFor(id) {
|
|
1557
|
-
const manifest =
|
|
2429
|
+
const manifest = path4.join(this.dir, `${id}.summary.json`);
|
|
1558
2430
|
try {
|
|
1559
|
-
const raw = await
|
|
2431
|
+
const raw = await fsp4.readFile(manifest, "utf8");
|
|
1560
2432
|
return JSON.parse(raw);
|
|
1561
2433
|
} catch {
|
|
1562
|
-
const full =
|
|
1563
|
-
const stat3 = await
|
|
2434
|
+
const full = path4.join(this.dir, `${id}.jsonl`);
|
|
2435
|
+
const stat3 = await fsp4.stat(full);
|
|
1564
2436
|
const summary = await this.summarize(id, stat3.mtime.toISOString());
|
|
1565
|
-
await
|
|
2437
|
+
await fsp4.writeFile(manifest, JSON.stringify(summary), { mode: 384 }).catch((err) => {
|
|
1566
2438
|
console.warn(
|
|
1567
2439
|
`[session-store] Failed to write manifest for "${id}":`,
|
|
1568
2440
|
err instanceof Error ? err.message : String(err)
|
|
@@ -1572,8 +2444,8 @@ var DefaultSessionStore = class {
|
|
|
1572
2444
|
}
|
|
1573
2445
|
}
|
|
1574
2446
|
async delete(id) {
|
|
1575
|
-
await
|
|
1576
|
-
await
|
|
2447
|
+
await fsp4.unlink(path4.join(this.dir, `${id}.jsonl`));
|
|
2448
|
+
await fsp4.unlink(path4.join(this.dir, `${id}.summary.json`)).catch(() => void 0);
|
|
1577
2449
|
}
|
|
1578
2450
|
async summarize(id, mtime) {
|
|
1579
2451
|
try {
|
|
@@ -1677,7 +2549,7 @@ var FileSessionWriter = class {
|
|
|
1677
2549
|
this.startedAt = startedAt;
|
|
1678
2550
|
this.meta = meta;
|
|
1679
2551
|
this.resumed = opts.resumed ?? false;
|
|
1680
|
-
this.manifestFile = opts.dir ?
|
|
2552
|
+
this.manifestFile = opts.dir ? path4.join(opts.dir, `${id}.summary.json`) : "";
|
|
1681
2553
|
this.filePath = opts.filePath ?? "";
|
|
1682
2554
|
this.summary = {
|
|
1683
2555
|
id,
|
|
@@ -1698,6 +2570,12 @@ var FileSessionWriter = class {
|
|
|
1698
2570
|
tokenIn = 0;
|
|
1699
2571
|
tokenOut = 0;
|
|
1700
2572
|
filePath;
|
|
2573
|
+
/** Public accessor for the JSONL path — required by SessionWriter so
|
|
2574
|
+
* observability surfaces (`/fleet log`, FleetPanel) can locate the
|
|
2575
|
+
* transcript without recomputing the path from session metadata. */
|
|
2576
|
+
get transcriptPath() {
|
|
2577
|
+
return this.filePath || void 0;
|
|
2578
|
+
}
|
|
1701
2579
|
initDone = false;
|
|
1702
2580
|
resumed;
|
|
1703
2581
|
appendFailCount = 0;
|
|
@@ -1715,7 +2593,7 @@ var FileSessionWriter = class {
|
|
|
1715
2593
|
`;
|
|
1716
2594
|
try {
|
|
1717
2595
|
if (this.filePath) {
|
|
1718
|
-
await
|
|
2596
|
+
await fsp4.writeFile(this.filePath, record, { flag: "a", mode: 384 });
|
|
1719
2597
|
}
|
|
1720
2598
|
} catch {
|
|
1721
2599
|
}
|
|
@@ -1768,7 +2646,7 @@ var FileSessionWriter = class {
|
|
|
1768
2646
|
this.closed = true;
|
|
1769
2647
|
if (this.manifestFile) {
|
|
1770
2648
|
try {
|
|
1771
|
-
await
|
|
2649
|
+
await fsp4.writeFile(this.manifestFile, JSON.stringify(this.summary), { mode: 384 });
|
|
1772
2650
|
} catch {
|
|
1773
2651
|
}
|
|
1774
2652
|
}
|
|
@@ -1791,9 +2669,9 @@ function makeDirectorSessionFactory(opts) {
|
|
|
1791
2669
|
let dir;
|
|
1792
2670
|
if (opts.store) {
|
|
1793
2671
|
store = opts.store;
|
|
1794
|
-
dir = opts.sessionsRoot ?
|
|
2672
|
+
dir = opts.sessionsRoot ? path4.join(opts.sessionsRoot, runId) : "(caller-managed)";
|
|
1795
2673
|
} else if (opts.sessionsRoot) {
|
|
1796
|
-
dir =
|
|
2674
|
+
dir = path4.join(opts.sessionsRoot, runId);
|
|
1797
2675
|
store = new DefaultSessionStore({ dir });
|
|
1798
2676
|
} else {
|
|
1799
2677
|
throw new Error("makeDirectorSessionFactory requires either `store` or `sessionsRoot`");
|
|
@@ -1840,10 +2718,12 @@ Working rules:
|
|
|
1840
2718
|
- Never fabricate numbers \u2014 read the actual logs first
|
|
1841
2719
|
- Always include file:line references for errors
|
|
1842
2720
|
- If sessionPath is missing, ask the director to provide it
|
|
1843
|
-
- Report confidence level: high (>90% accuracy), medium, low
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
2721
|
+
- Report confidence level: high (>90% accuracy), medium, low`
|
|
2722
|
+
// No hardcoded budgets — the orchestrator (delegate tool or
|
|
2723
|
+
// spawn_subagent) decides per-task how much room a subagent gets.
|
|
2724
|
+
// A monorepo audit needs hours; a single-file lint check needs
|
|
2725
|
+
// seconds. Pinning a number here forces the orchestrator to fight
|
|
2726
|
+
// the role's default instead of just asking for what it needs.
|
|
1847
2727
|
};
|
|
1848
2728
|
var BUG_HUNTER_AGENT = {
|
|
1849
2729
|
id: "bug-hunter",
|
|
@@ -1883,10 +2763,8 @@ Working rules:
|
|
|
1883
2763
|
- Never scan node_modules \u2014 it's noise
|
|
1884
2764
|
- Always include file:line for every finding
|
|
1885
2765
|
- If >30% of findings are false positives, note the confidence level
|
|
1886
|
-
- Ask director for clarification if paths are ambiguous
|
|
1887
|
-
|
|
1888
|
-
maxToolCalls: 300,
|
|
1889
|
-
timeoutMs: 18e4
|
|
2766
|
+
- Ask director for clarification if paths are ambiguous`
|
|
2767
|
+
// Budgets are set by the orchestrator per task — see fleet.ts header.
|
|
1890
2768
|
};
|
|
1891
2769
|
var REFACTOR_PLANNER_AGENT = {
|
|
1892
2770
|
id: "refactor-planner",
|
|
@@ -1926,10 +2804,8 @@ Working rules:
|
|
|
1926
2804
|
- Always include rollback strategy \u2014 every refactor can fail
|
|
1927
2805
|
- Merge tasks that take <1h into a single phase
|
|
1928
2806
|
- Respect team constraints (reviewer availability, parallelization)
|
|
1929
|
-
- Never plan without analyzing the actual code first
|
|
1930
|
-
|
|
1931
|
-
maxToolCalls: 250,
|
|
1932
|
-
timeoutMs: 15e4
|
|
2807
|
+
- Never plan without analyzing the actual code first`
|
|
2808
|
+
// Budgets are set by the orchestrator per task — see fleet.ts header.
|
|
1933
2809
|
};
|
|
1934
2810
|
var SECURITY_SCANNER_AGENT = {
|
|
1935
2811
|
id: "security-scanner",
|
|
@@ -1977,10 +2853,8 @@ Working rules:
|
|
|
1977
2853
|
- Never scan node_modules \u2014 use npm audit instead
|
|
1978
2854
|
- Always provide remediation steps, not just findings
|
|
1979
2855
|
- Verify regex-based secrets before flagging (false positive risk)
|
|
1980
|
-
- When in doubt, flag as medium rather than ignoring potential issues
|
|
1981
|
-
|
|
1982
|
-
maxToolCalls: 280,
|
|
1983
|
-
timeoutMs: 16e4
|
|
2856
|
+
- When in doubt, flag as medium rather than ignoring potential issues`
|
|
2857
|
+
// Budgets are set by the orchestrator per task — see fleet.ts header.
|
|
1984
2858
|
};
|
|
1985
2859
|
var FLEET_ROSTER = {
|
|
1986
2860
|
"audit-log": AUDIT_LOG_AGENT,
|
|
@@ -1990,6 +2864,6 @@ var FLEET_ROSTER = {
|
|
|
1990
2864
|
};
|
|
1991
2865
|
var ALL_FLEET_AGENTS = Object.values(FLEET_ROSTER);
|
|
1992
2866
|
|
|
1993
|
-
export { ALL_FLEET_AGENTS, AUDIT_LOG_AGENT, BUG_HUNTER_AGENT, BudgetExceededError, DEFAULT_DIRECTOR_PREAMBLE, DEFAULT_SUBAGENT_BASELINE, DefaultMultiAgentCoordinator, Director, DirectorBudgetError, FLEET_ROSTER, FleetBus, FleetUsageAggregator, InMemoryAgentBridge, InMemoryBridgeTransport, REFACTOR_PLANNER_AGENT, SECURITY_SCANNER_AGENT, SubagentBudget, composeDirectorPrompt, composeSubagentPrompt, createMessage, makeAgentSubagentRunner, makeDirectorSessionFactory, rosterSummaryFromConfigs };
|
|
2867
|
+
export { ALL_FLEET_AGENTS, AUDIT_LOG_AGENT, BUG_HUNTER_AGENT, BudgetExceededError, DEFAULT_DIRECTOR_PREAMBLE, DEFAULT_SUBAGENT_BASELINE, DefaultMultiAgentCoordinator, Director, DirectorBudgetError, FLEET_ROSTER, FleetBus, FleetUsageAggregator, InMemoryAgentBridge, InMemoryBridgeTransport, REFACTOR_PLANNER_AGENT, SECURITY_SCANNER_AGENT, SubagentBudget, composeDirectorPrompt, composeSubagentPrompt, createDelegateTool, createMessage, makeAgentSubagentRunner, makeDirectorSessionFactory, rosterSummaryFromConfigs };
|
|
1994
2868
|
//# sourceMappingURL=index.js.map
|
|
1995
2869
|
//# sourceMappingURL=index.js.map
|