@forwardimpact/libeval 0.1.50 → 0.1.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -8
- package/bin/fit-benchmark.js +26 -27
- package/bin/fit-eval.js +36 -30
- package/bin/fit-trace.js +83 -57
- package/package.json +1 -1
- package/src/agent-runner.js +20 -12
- package/src/benchmark/apm-installer.js +48 -44
- package/src/benchmark/env-loader.js +35 -23
- package/src/benchmark/invariants.js +128 -0
- package/src/benchmark/judge.js +18 -19
- package/src/benchmark/npm-installer.js +33 -33
- package/src/benchmark/report.js +40 -26
- package/src/benchmark/result.js +11 -11
- package/src/benchmark/runner.js +90 -46
- package/src/benchmark/task-family.js +78 -65
- package/src/benchmark/workdir.js +100 -93
- package/src/commands/assert.js +30 -22
- package/src/commands/benchmark-invariants.js +74 -0
- package/src/commands/benchmark-report.js +24 -15
- package/src/commands/benchmark-run.js +16 -9
- package/src/commands/by-discussion.js +33 -23
- package/src/commands/callback.js +20 -11
- package/src/commands/discuss.js +31 -13
- package/src/commands/facilitate.js +21 -14
- package/src/commands/output.js +15 -13
- package/src/commands/run.js +28 -14
- package/src/commands/supervise.js +29 -19
- package/src/commands/task-input.js +10 -5
- package/src/commands/tee.js +24 -9
- package/src/commands/trace.js +181 -99
- package/src/discuss-tools.js +48 -2
- package/src/discusser.js +53 -2
- package/src/events/github.js +27 -5
- package/src/facilitator.js +4 -0
- package/src/inbox-poller.js +84 -0
- package/src/judge.js +4 -1
- package/src/message-bus.js +6 -0
- package/src/orchestration-loop.js +14 -4
- package/src/orchestration-toolkit.js +14 -0
- package/src/profile-prompt.js +22 -9
- package/src/redaction.js +31 -9
- package/src/reply-emitter.js +47 -0
- package/src/supervisor.js +4 -0
- package/src/tee-writer.js +4 -2
- package/src/trace-collector.js +9 -2
- package/src/trace-github.js +47 -27
- package/src/benchmark/scorer.js +0 -138
- package/src/commands/benchmark-score.js +0 -68
package/src/commands/trace.js
CHANGED
|
@@ -1,153 +1,230 @@
|
|
|
1
|
-
import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
|
|
2
1
|
import { join, dirname } from "node:path";
|
|
2
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
3
3
|
import { createTraceCollector } from "@forwardimpact/libeval";
|
|
4
4
|
import { createTraceQuery } from "../trace-query.js";
|
|
5
5
|
import { createTraceGitHub } from "../trace-github.js";
|
|
6
6
|
import { stripSignatures } from "../signature-filter.js";
|
|
7
7
|
|
|
8
|
+
// Every handler receives a libcli `InvocationContext`:
|
|
9
|
+
// ctx.options — parsed flag values (`cli.parse().values`)
|
|
10
|
+
// ctx.args — named positionals declared on the subcommand
|
|
11
|
+
// ctx.deps — host-injected collaborators: `{ runtime, config }`
|
|
12
|
+
// Handlers read/write the filesystem and stdout exclusively through
|
|
13
|
+
// `ctx.deps.runtime` and return `{ ok: true }` on success.
|
|
14
|
+
|
|
8
15
|
// --- GitHub commands ---
|
|
9
16
|
|
|
10
17
|
/**
|
|
11
18
|
* List recent workflow runs matching a pattern.
|
|
12
|
-
* @param {
|
|
13
|
-
* @param {string[]} args - [pattern?]
|
|
14
|
-
* @param {{config: import("@forwardimpact/libconfig").Config}} ctx
|
|
19
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
15
20
|
*/
|
|
16
|
-
export async function runRunsCommand(
|
|
21
|
+
export async function runRunsCommand(ctx) {
|
|
22
|
+
const { runtime, config } = ctx.deps;
|
|
17
23
|
const gh = await createTraceGitHub({
|
|
18
|
-
token:
|
|
19
|
-
repo:
|
|
24
|
+
token: config.ghToken(),
|
|
25
|
+
repo: ctx.options.repo,
|
|
26
|
+
runtime,
|
|
20
27
|
});
|
|
21
|
-
const pattern = args
|
|
22
|
-
const lookback =
|
|
28
|
+
const pattern = ctx.args.pattern ?? "agent";
|
|
29
|
+
const lookback = ctx.options.lookback ?? "7d";
|
|
23
30
|
const runs = await gh.listRuns({ pattern, lookback });
|
|
24
|
-
writeJSON(runs,
|
|
31
|
+
writeJSON(runtime, runs, ctx.options);
|
|
32
|
+
return { ok: true };
|
|
25
33
|
}
|
|
26
34
|
|
|
27
35
|
/**
|
|
28
36
|
* Download a trace artifact and auto-convert to structured JSON.
|
|
29
|
-
* @param {
|
|
30
|
-
* @param {string[]} args - [run-id]
|
|
31
|
-
* @param {{config: import("@forwardimpact/libconfig").Config}} ctx
|
|
37
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
32
38
|
*/
|
|
33
|
-
export async function runDownloadCommand(
|
|
39
|
+
export async function runDownloadCommand(ctx) {
|
|
40
|
+
const { runtime, config } = ctx.deps;
|
|
34
41
|
const gh = await createTraceGitHub({
|
|
35
|
-
token:
|
|
36
|
-
repo:
|
|
42
|
+
token: config.ghToken(),
|
|
43
|
+
repo: ctx.options.repo,
|
|
44
|
+
runtime,
|
|
37
45
|
});
|
|
38
|
-
const result = await gh.downloadTrace(args[
|
|
39
|
-
dir:
|
|
40
|
-
name:
|
|
46
|
+
const result = await gh.downloadTrace(ctx.args["run-id"], {
|
|
47
|
+
dir: ctx.options.dir,
|
|
48
|
+
name: ctx.options.artifact,
|
|
41
49
|
});
|
|
42
50
|
|
|
43
51
|
const ndjsonFile = result.files.find((f) => f.endsWith(".ndjson"));
|
|
44
52
|
if (ndjsonFile) {
|
|
45
53
|
const ndjsonPath = join(result.dir, ndjsonFile);
|
|
46
|
-
const collector = createTraceCollector(
|
|
47
|
-
|
|
54
|
+
const collector = createTraceCollector({
|
|
55
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
56
|
+
});
|
|
57
|
+
for (const line of runtime.fsSync
|
|
58
|
+
.readFileSync(ndjsonPath, "utf8")
|
|
59
|
+
.split("\n")) {
|
|
48
60
|
collector.addLine(line);
|
|
49
61
|
}
|
|
50
62
|
const structuredPath = join(result.dir, "structured.json");
|
|
51
|
-
|
|
63
|
+
runtime.fsSync.writeFileSync(
|
|
64
|
+
structuredPath,
|
|
65
|
+
JSON.stringify(collector.toJSON()) + "\n",
|
|
66
|
+
);
|
|
52
67
|
result.files.push("structured.json");
|
|
53
68
|
}
|
|
54
69
|
|
|
55
|
-
writeJSON(result,
|
|
70
|
+
writeJSON(runtime, result, ctx.options);
|
|
71
|
+
return { ok: true };
|
|
56
72
|
}
|
|
57
73
|
|
|
58
74
|
// --- Query commands ---
|
|
59
75
|
|
|
60
|
-
/** @param {
|
|
61
|
-
export async function runOverviewCommand(
|
|
62
|
-
|
|
76
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
77
|
+
export async function runOverviewCommand(ctx) {
|
|
78
|
+
const { runtime } = ctx.deps;
|
|
79
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).overview(), ctx.options);
|
|
80
|
+
return { ok: true };
|
|
63
81
|
}
|
|
64
82
|
|
|
65
|
-
/** @param {
|
|
66
|
-
export async function runCountCommand(
|
|
67
|
-
|
|
83
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
84
|
+
export async function runCountCommand(ctx) {
|
|
85
|
+
const { runtime } = ctx.deps;
|
|
86
|
+
runtime.proc.stdout.write(
|
|
87
|
+
String(loadTrace(runtime, ctx.args.file).count()) + "\n",
|
|
88
|
+
);
|
|
89
|
+
return { ok: true };
|
|
68
90
|
}
|
|
69
91
|
|
|
70
|
-
/** @param {
|
|
71
|
-
export async function runBatchCommand(
|
|
92
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
93
|
+
export async function runBatchCommand(ctx) {
|
|
94
|
+
const { runtime } = ctx.deps;
|
|
72
95
|
writeJSON(
|
|
73
|
-
|
|
74
|
-
|
|
96
|
+
runtime,
|
|
97
|
+
loadTrace(runtime, ctx.args.file).batch(
|
|
98
|
+
parseInt(ctx.args.from, 10),
|
|
99
|
+
parseInt(ctx.args.to, 10),
|
|
100
|
+
),
|
|
101
|
+
ctx.options,
|
|
75
102
|
);
|
|
103
|
+
return { ok: true };
|
|
76
104
|
}
|
|
77
105
|
|
|
78
|
-
/** @param {
|
|
79
|
-
export async function runHeadCommand(
|
|
80
|
-
const
|
|
81
|
-
|
|
106
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
107
|
+
export async function runHeadCommand(ctx) {
|
|
108
|
+
const { runtime } = ctx.deps;
|
|
109
|
+
const n = ctx.args.n ? parseInt(ctx.args.n, 10) : 10;
|
|
110
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).head(n), ctx.options);
|
|
111
|
+
return { ok: true };
|
|
82
112
|
}
|
|
83
113
|
|
|
84
|
-
/** @param {
|
|
85
|
-
export async function runTailCommand(
|
|
86
|
-
const
|
|
87
|
-
|
|
114
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
115
|
+
export async function runTailCommand(ctx) {
|
|
116
|
+
const { runtime } = ctx.deps;
|
|
117
|
+
const n = ctx.args.n ? parseInt(ctx.args.n, 10) : 10;
|
|
118
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).tail(n), ctx.options);
|
|
119
|
+
return { ok: true };
|
|
88
120
|
}
|
|
89
121
|
|
|
90
|
-
/** @param {
|
|
91
|
-
export async function runSearchCommand(
|
|
92
|
-
const
|
|
93
|
-
const
|
|
94
|
-
const
|
|
122
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
123
|
+
export async function runSearchCommand(ctx) {
|
|
124
|
+
const { runtime } = ctx.deps;
|
|
125
|
+
const limit = ctx.options.limit ? parseInt(ctx.options.limit, 10) : 50;
|
|
126
|
+
const context = ctx.options.context ? parseInt(ctx.options.context, 10) : 0;
|
|
127
|
+
const full = ctx.options.full ?? false;
|
|
95
128
|
writeJSON(
|
|
96
|
-
|
|
97
|
-
|
|
129
|
+
runtime,
|
|
130
|
+
loadTrace(runtime, ctx.args.file).search(ctx.args.pattern, {
|
|
131
|
+
limit,
|
|
132
|
+
context,
|
|
133
|
+
full,
|
|
134
|
+
}),
|
|
135
|
+
ctx.options,
|
|
98
136
|
);
|
|
137
|
+
return { ok: true };
|
|
99
138
|
}
|
|
100
139
|
|
|
101
|
-
/** @param {
|
|
102
|
-
export async function runToolsCommand(
|
|
103
|
-
|
|
140
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
141
|
+
export async function runToolsCommand(ctx) {
|
|
142
|
+
const { runtime } = ctx.deps;
|
|
143
|
+
writeJSON(
|
|
144
|
+
runtime,
|
|
145
|
+
loadTrace(runtime, ctx.args.file).toolFrequency(),
|
|
146
|
+
ctx.options,
|
|
147
|
+
);
|
|
148
|
+
return { ok: true };
|
|
104
149
|
}
|
|
105
150
|
|
|
106
|
-
/** @param {
|
|
107
|
-
export async function runToolCommand(
|
|
108
|
-
|
|
151
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
152
|
+
export async function runToolCommand(ctx) {
|
|
153
|
+
const { runtime } = ctx.deps;
|
|
154
|
+
writeJSON(
|
|
155
|
+
runtime,
|
|
156
|
+
loadTrace(runtime, ctx.args.file).tool(ctx.args.name),
|
|
157
|
+
ctx.options,
|
|
158
|
+
);
|
|
159
|
+
return { ok: true };
|
|
109
160
|
}
|
|
110
161
|
|
|
111
|
-
/** @param {
|
|
112
|
-
export async function runErrorsCommand(
|
|
113
|
-
|
|
162
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
163
|
+
export async function runErrorsCommand(ctx) {
|
|
164
|
+
const { runtime } = ctx.deps;
|
|
165
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).errors(), ctx.options);
|
|
166
|
+
return { ok: true };
|
|
114
167
|
}
|
|
115
168
|
|
|
116
|
-
/** @param {
|
|
117
|
-
export async function runReasoningCommand(
|
|
118
|
-
const
|
|
119
|
-
const
|
|
120
|
-
|
|
169
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
170
|
+
export async function runReasoningCommand(ctx) {
|
|
171
|
+
const { runtime } = ctx.deps;
|
|
172
|
+
const from = ctx.options.from ? parseInt(ctx.options.from, 10) : undefined;
|
|
173
|
+
const to = ctx.options.to ? parseInt(ctx.options.to, 10) : undefined;
|
|
174
|
+
writeJSON(
|
|
175
|
+
runtime,
|
|
176
|
+
loadTrace(runtime, ctx.args.file).reasoning({ from, to }),
|
|
177
|
+
ctx.options,
|
|
178
|
+
);
|
|
179
|
+
return { ok: true };
|
|
121
180
|
}
|
|
122
181
|
|
|
123
|
-
/** @param {
|
|
124
|
-
export async function runTimelineCommand(
|
|
125
|
-
const
|
|
126
|
-
|
|
182
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
183
|
+
export async function runTimelineCommand(ctx) {
|
|
184
|
+
const { runtime } = ctx.deps;
|
|
185
|
+
const lines = loadTrace(runtime, ctx.args.file).timeline();
|
|
186
|
+
runtime.proc.stdout.write(lines.join("\n") + "\n");
|
|
187
|
+
return { ok: true };
|
|
127
188
|
}
|
|
128
189
|
|
|
129
|
-
/** @param {
|
|
130
|
-
export async function runStatsCommand(
|
|
131
|
-
|
|
190
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
191
|
+
export async function runStatsCommand(ctx) {
|
|
192
|
+
const { runtime } = ctx.deps;
|
|
193
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).stats(), ctx.options);
|
|
194
|
+
return { ok: true };
|
|
132
195
|
}
|
|
133
196
|
|
|
134
|
-
/** @param {
|
|
135
|
-
export async function runInitCommand(
|
|
136
|
-
|
|
197
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
198
|
+
export async function runInitCommand(ctx) {
|
|
199
|
+
const { runtime } = ctx.deps;
|
|
200
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).init(), ctx.options);
|
|
201
|
+
return { ok: true };
|
|
137
202
|
}
|
|
138
203
|
|
|
139
|
-
/** @param {
|
|
140
|
-
export async function runTurnCommand(
|
|
141
|
-
|
|
204
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
205
|
+
export async function runTurnCommand(ctx) {
|
|
206
|
+
const { runtime } = ctx.deps;
|
|
207
|
+
writeJSON(
|
|
208
|
+
runtime,
|
|
209
|
+
loadTrace(runtime, ctx.args.file).turn(parseInt(ctx.args.index, 10)),
|
|
210
|
+
ctx.options,
|
|
211
|
+
);
|
|
212
|
+
return { ok: true };
|
|
142
213
|
}
|
|
143
214
|
|
|
144
|
-
/** @param {
|
|
145
|
-
export async function runFilterCommand(
|
|
215
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
216
|
+
export async function runFilterCommand(ctx) {
|
|
217
|
+
const { runtime } = ctx.deps;
|
|
146
218
|
const opts = {};
|
|
147
|
-
if (
|
|
148
|
-
if (
|
|
149
|
-
if (
|
|
150
|
-
writeJSON(
|
|
219
|
+
if (ctx.options.role) opts.role = ctx.options.role;
|
|
220
|
+
if (ctx.options.tool) opts.toolName = ctx.options.tool;
|
|
221
|
+
if (ctx.options.error) opts.isError = true;
|
|
222
|
+
writeJSON(
|
|
223
|
+
runtime,
|
|
224
|
+
loadTrace(runtime, ctx.args.file).filter(opts),
|
|
225
|
+
ctx.options,
|
|
226
|
+
);
|
|
227
|
+
return { ok: true };
|
|
151
228
|
}
|
|
152
229
|
|
|
153
230
|
// --- Split command ---
|
|
@@ -168,24 +245,24 @@ const STRUCTURAL_ROLES = new Set(["agent", "supervisor", "facilitator"]);
|
|
|
168
245
|
* `staff-engineer`) classify as agents with the profile in the participant
|
|
169
246
|
* slot. Orchestrator events and invalid source names are dropped.
|
|
170
247
|
*
|
|
171
|
-
* @param {
|
|
172
|
-
* @param {string[]} args - [file]
|
|
248
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
173
249
|
*/
|
|
174
|
-
export async function runSplitCommand(
|
|
175
|
-
const
|
|
176
|
-
|
|
250
|
+
export async function runSplitCommand(ctx) {
|
|
251
|
+
const { runtime } = ctx.deps;
|
|
252
|
+
const file = ctx.args.file;
|
|
253
|
+
if (!file) return { ok: false, code: 1, error: "split: missing input file" };
|
|
177
254
|
|
|
178
|
-
const mode =
|
|
179
|
-
if (!mode)
|
|
255
|
+
const mode = ctx.options.mode;
|
|
256
|
+
if (!mode) return { ok: false, code: 1, error: "split: --mode is required" };
|
|
180
257
|
if (!["run", "supervise", "facilitate"].includes(mode)) {
|
|
181
|
-
|
|
258
|
+
return { ok: false, code: 1, error: `split: invalid --mode "${mode}"` };
|
|
182
259
|
}
|
|
183
260
|
|
|
184
|
-
const caseId =
|
|
185
|
-
const outputDir =
|
|
186
|
-
mkdirSync(outputDir, { recursive: true });
|
|
261
|
+
const caseId = ctx.options.case ?? "default";
|
|
262
|
+
const outputDir = ctx.options["output-dir"] || dirname(file);
|
|
263
|
+
runtime.fsSync.mkdirSync(outputDir, { recursive: true });
|
|
187
264
|
|
|
188
|
-
const buckets = parseBuckets(readFileSync(file, "utf8"));
|
|
265
|
+
const buckets = parseBuckets(runtime.fsSync.readFileSync(file, "utf8"));
|
|
189
266
|
|
|
190
267
|
for (const [source, lines] of buckets.entries()) {
|
|
191
268
|
if (!VALID_SOURCE_NAME.test(source)) continue;
|
|
@@ -194,8 +271,9 @@ export async function runSplitCommand(values, args) {
|
|
|
194
271
|
outputDir,
|
|
195
272
|
`trace--${caseId}--${source}.${role}.ndjson`,
|
|
196
273
|
);
|
|
197
|
-
writeFileSync(outPath, lines.join("\n") + "\n");
|
|
274
|
+
runtime.fsSync.writeFileSync(outPath, lines.join("\n") + "\n");
|
|
198
275
|
}
|
|
276
|
+
return { ok: true };
|
|
199
277
|
}
|
|
200
278
|
|
|
201
279
|
/**
|
|
@@ -234,11 +312,12 @@ function parseBuckets(content) {
|
|
|
234
312
|
|
|
235
313
|
/**
|
|
236
314
|
* Load a trace file. Supports structured JSON and raw NDJSON.
|
|
315
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
237
316
|
* @param {string} file
|
|
238
317
|
* @returns {import("../trace-query.js").TraceQuery}
|
|
239
318
|
*/
|
|
240
|
-
function loadTrace(file) {
|
|
241
|
-
const content = readFileSync(file, "utf8");
|
|
319
|
+
function loadTrace(runtime, file) {
|
|
320
|
+
const content = runtime.fsSync.readFileSync(file, "utf8");
|
|
242
321
|
|
|
243
322
|
try {
|
|
244
323
|
const parsed = JSON.parse(content);
|
|
@@ -249,7 +328,9 @@ function loadTrace(file) {
|
|
|
249
328
|
// Not valid JSON — fall through to NDJSON.
|
|
250
329
|
}
|
|
251
330
|
|
|
252
|
-
const collector = createTraceCollector(
|
|
331
|
+
const collector = createTraceCollector({
|
|
332
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
333
|
+
});
|
|
253
334
|
for (const line of content.split("\n")) {
|
|
254
335
|
collector.addLine(line);
|
|
255
336
|
}
|
|
@@ -260,10 +341,11 @@ function loadTrace(file) {
|
|
|
260
341
|
* Write JSON output to stdout. By default strips `thinking.signature`
|
|
261
342
|
* base64 blobs from the payload so they don't dominate terminal output;
|
|
262
343
|
* pass `--signatures` (surfaced as `values.signatures`) to keep them.
|
|
344
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
263
345
|
* @param {*} data
|
|
264
346
|
* @param {object} [values]
|
|
265
347
|
*/
|
|
266
|
-
function writeJSON(data, values = {}) {
|
|
348
|
+
function writeJSON(runtime, data, values = {}) {
|
|
267
349
|
const output = values.signatures ? data : stripSignatures(data);
|
|
268
|
-
|
|
350
|
+
runtime.proc.stdout.write(JSON.stringify(output, null, 2) + "\n");
|
|
269
351
|
}
|
package/src/discuss-tools.js
CHANGED
|
@@ -27,6 +27,7 @@ import {
|
|
|
27
27
|
RECESS_DESC,
|
|
28
28
|
requestForCommentTool,
|
|
29
29
|
requireNoPendingAsks,
|
|
30
|
+
requireNoUnprocessedInbox,
|
|
30
31
|
} from "./orchestration-toolkit.js";
|
|
31
32
|
|
|
32
33
|
/** System prompt for discuss-mode agent participants. L0 mechanics only per COALIGNED. */
|
|
@@ -63,6 +64,26 @@ const RESUME_TRIGGER_SCHEMA = z.discriminatedUnion("kind", [
|
|
|
63
64
|
export function createDiscussLeadToolServer(ctx) {
|
|
64
65
|
return orchestrationServer([
|
|
65
66
|
...baseTools(ctx, { from: "lead", defaultTo: undefined, broadcast: true }),
|
|
67
|
+
tool(
|
|
68
|
+
"Acknowledge",
|
|
69
|
+
"Post a brief message directly to the discussion thread. Use when responding to a human follow-up or providing a status update while participants are working.",
|
|
70
|
+
{
|
|
71
|
+
message: z.string().describe("Message to post on the thread"),
|
|
72
|
+
},
|
|
73
|
+
async ({ message }) => {
|
|
74
|
+
const seq =
|
|
75
|
+
ctx.emitter?.emit({ kind: "ack", body: message, agent: "lead" }) ??
|
|
76
|
+
-1;
|
|
77
|
+
ctx.replies.push({
|
|
78
|
+
body: message,
|
|
79
|
+
agent: "lead",
|
|
80
|
+
kind: "ack",
|
|
81
|
+
seq,
|
|
82
|
+
...(ctx.discussionId && { thread_id: ctx.discussionId }),
|
|
83
|
+
});
|
|
84
|
+
return { content: [{ type: "text", text: "Posted." }] };
|
|
85
|
+
},
|
|
86
|
+
),
|
|
66
87
|
tool(
|
|
67
88
|
"Recess",
|
|
68
89
|
RECESS_DESC,
|
|
@@ -82,11 +103,36 @@ export function createDiscussLeadToolServer(ctx) {
|
|
|
82
103
|
]);
|
|
83
104
|
}
|
|
84
105
|
|
|
106
|
+
const ACKNOWLEDGE_DESC =
|
|
107
|
+
"Acknowledge an Ask before starting work. Posts a visible comment on the thread. Does not discharge the Ask — you still owe an Answer.";
|
|
108
|
+
|
|
85
109
|
/** Discuss-mode agent tool server. */
|
|
86
110
|
export function createDiscussAgentToolServer(ctx, { from }) {
|
|
87
111
|
return orchestrationServer([
|
|
88
112
|
...baseTools(ctx, { from, defaultTo: "lead", broadcast: true }),
|
|
89
113
|
requestForCommentTool(ctx),
|
|
114
|
+
tool(
|
|
115
|
+
"Acknowledge",
|
|
116
|
+
ACKNOWLEDGE_DESC,
|
|
117
|
+
{
|
|
118
|
+
message: z
|
|
119
|
+
.string()
|
|
120
|
+
.describe("Brief acknowledgement to post on the thread"),
|
|
121
|
+
askId: z.number().optional().describe("The ask being acknowledged"),
|
|
122
|
+
},
|
|
123
|
+
async ({ message }) => {
|
|
124
|
+
const seq =
|
|
125
|
+
ctx.emitter?.emit({ kind: "ack", body: message, agent: from }) ?? -1;
|
|
126
|
+
ctx.replies.push({
|
|
127
|
+
body: message,
|
|
128
|
+
agent: from,
|
|
129
|
+
kind: "ack",
|
|
130
|
+
seq,
|
|
131
|
+
...(ctx.discussionId && { thread_id: ctx.discussionId }),
|
|
132
|
+
});
|
|
133
|
+
return { content: [{ type: "text", text: "Acknowledged." }] };
|
|
134
|
+
},
|
|
135
|
+
),
|
|
90
136
|
]);
|
|
91
137
|
}
|
|
92
138
|
|
|
@@ -99,7 +145,7 @@ export function createDiscussAgentToolServer(ctx, { from }) {
|
|
|
99
145
|
*/
|
|
100
146
|
export function createRecessHandler(ctx) {
|
|
101
147
|
return async ({ reason, trigger }) => {
|
|
102
|
-
const guard = requireNoPendingAsks(ctx);
|
|
148
|
+
const guard = requireNoPendingAsks(ctx) ?? requireNoUnprocessedInbox(ctx);
|
|
103
149
|
if (guard) return guard;
|
|
104
150
|
ctx.recessTrigger = trigger;
|
|
105
151
|
concludeSession(ctx, {
|
|
@@ -114,7 +160,7 @@ export function createRecessHandler(ctx) {
|
|
|
114
160
|
/** Adjourn handler — ends the discussion with a verdict. */
|
|
115
161
|
export function createAdjournHandler(ctx) {
|
|
116
162
|
return async ({ verdict, summary, outcome }) => {
|
|
117
|
-
const guard = requireNoPendingAsks(ctx);
|
|
163
|
+
const guard = requireNoPendingAsks(ctx) ?? requireNoUnprocessedInbox(ctx);
|
|
118
164
|
if (guard) return guard;
|
|
119
165
|
if (outcome !== undefined) ctx.outcome = outcome;
|
|
120
166
|
concludeSession(ctx, {
|
package/src/discusser.js
CHANGED
|
@@ -17,6 +17,8 @@ import { Writable } from "node:stream";
|
|
|
17
17
|
import { resolve } from "node:path";
|
|
18
18
|
|
|
19
19
|
import { createAgentRunner } from "./agent-runner.js";
|
|
20
|
+
import { InboxPoller } from "./inbox-poller.js";
|
|
21
|
+
import { ReplyEmitter } from "./reply-emitter.js";
|
|
20
22
|
import { composeSystemPrompt } from "./profile-prompt.js";
|
|
21
23
|
import { SequenceCounter } from "./sequence-counter.js";
|
|
22
24
|
import { createMessageBus } from "./message-bus.js";
|
|
@@ -40,6 +42,7 @@ export const DISCUSS_SYSTEM_PROMPT =
|
|
|
40
42
|
"Answers arrive on your next turn as `[answer#N] <participant>: <text>` in your inbox.\n" +
|
|
41
43
|
"End your turn while Asks are pending. The system resumes you when answers arrive.\n" +
|
|
42
44
|
"Multiple `Ask` calls in one turn run participants in parallel.\n" +
|
|
45
|
+
"Use `Acknowledge` to post a brief message directly to the discussion thread — use it to respond to human follow-ups or give status updates while participants are working.\n" +
|
|
43
46
|
"End the discussion by calling `Adjourn` with a verdict and summary, or `Recess` only to wait on an external reply or duration.";
|
|
44
47
|
|
|
45
48
|
/**
|
|
@@ -79,7 +82,15 @@ export class Discusser {
|
|
|
79
82
|
* @param {string|null} [deps.discussionId]
|
|
80
83
|
* @param {SequenceCounter} [deps.counter]
|
|
81
84
|
*/
|
|
82
|
-
constructor({
|
|
85
|
+
constructor({
|
|
86
|
+
loop,
|
|
87
|
+
ctx,
|
|
88
|
+
output,
|
|
89
|
+
discussionId,
|
|
90
|
+
counter,
|
|
91
|
+
redactor,
|
|
92
|
+
inboxPoller,
|
|
93
|
+
}) {
|
|
83
94
|
if (!loop) throw new Error("loop is required");
|
|
84
95
|
if (!ctx) throw new Error("ctx is required");
|
|
85
96
|
if (!output) throw new Error("output is required");
|
|
@@ -90,6 +101,7 @@ export class Discusser {
|
|
|
90
101
|
this.discussionId = discussionId ?? null;
|
|
91
102
|
this.counter = counter ?? new SequenceCounter();
|
|
92
103
|
this.redactor = redactor;
|
|
104
|
+
this.inboxPoller = inboxPoller ?? null;
|
|
93
105
|
}
|
|
94
106
|
|
|
95
107
|
/**
|
|
@@ -150,6 +162,7 @@ export class Discusser {
|
|
|
150
162
|
...(this.ctx.rfcs?.length && { rfcs: this.ctx.rfcs }),
|
|
151
163
|
...(this.ctx.recessTrigger && { trigger: this.ctx.recessTrigger }),
|
|
152
164
|
...(this.discussionId && { discussion_id: this.discussionId }),
|
|
165
|
+
lastActedSeq: this.inboxPoller?.lastActedSeq ?? -1,
|
|
153
166
|
};
|
|
154
167
|
this.output.write(
|
|
155
168
|
JSON.stringify(
|
|
@@ -184,10 +197,14 @@ export class Discusser {
|
|
|
184
197
|
* @param {function} deps.query
|
|
185
198
|
* @param {import("stream").Writable} deps.output
|
|
186
199
|
* @param {number} [deps.maxTurns]
|
|
200
|
+
* @param {number} [deps.maxLeadTurns]
|
|
187
201
|
* @param {string} [deps.leadCwd]
|
|
188
202
|
* @param {string} [deps.profilesDir]
|
|
189
203
|
* @param {string} [deps.taskAmend]
|
|
190
204
|
* @param {object} deps.redactor
|
|
205
|
+
* @param {string|null} [deps.callbackUrl]
|
|
206
|
+
* @param {string|null} [deps.inboxUrl]
|
|
207
|
+
* @param {string|null} [deps.correlationId]
|
|
191
208
|
* @returns {Discusser}
|
|
192
209
|
*/
|
|
193
210
|
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: factory wires N runners + resume hydration paths
|
|
@@ -201,12 +218,18 @@ export function createDiscusser({
|
|
|
201
218
|
query,
|
|
202
219
|
output,
|
|
203
220
|
maxTurns,
|
|
221
|
+
maxLeadTurns,
|
|
204
222
|
leadCwd,
|
|
205
223
|
profilesDir,
|
|
206
224
|
taskAmend,
|
|
207
225
|
redactor,
|
|
226
|
+
callbackUrl,
|
|
227
|
+
inboxUrl,
|
|
228
|
+
correlationId,
|
|
229
|
+
runtime,
|
|
208
230
|
}) {
|
|
209
231
|
if (!redactor) throw new Error("redactor is required");
|
|
232
|
+
if (!runtime) throw new Error("runtime is required");
|
|
210
233
|
const resolvedLeadCwd = resolve(leadCwd ?? ".");
|
|
211
234
|
const resolvedProfilesDir =
|
|
212
235
|
profilesDir ?? resolve(resolvedLeadCwd, ".claude/agents");
|
|
@@ -236,13 +259,34 @@ export function createDiscusser({
|
|
|
236
259
|
participants: ["lead", ...resolvedConfigs.map((a) => a.name)],
|
|
237
260
|
});
|
|
238
261
|
|
|
262
|
+
const loopCounter = new SequenceCounter();
|
|
263
|
+
const emitter = new ReplyEmitter({
|
|
264
|
+
callbackUrl: callbackUrl ?? null,
|
|
265
|
+
correlationId: correlationId ?? null,
|
|
266
|
+
counter: loopCounter,
|
|
267
|
+
});
|
|
268
|
+
ctx.emitter = emitter;
|
|
269
|
+
|
|
270
|
+
const abortController = new AbortController();
|
|
271
|
+
const inboxPoller = inboxUrl
|
|
272
|
+
? new InboxPoller({
|
|
273
|
+
inboxUrl,
|
|
274
|
+
messageBus,
|
|
275
|
+
leadName: "lead",
|
|
276
|
+
signal: abortController.signal,
|
|
277
|
+
})
|
|
278
|
+
: null;
|
|
279
|
+
|
|
239
280
|
// Intercept answers routed to the lead — each becomes a discussion reply.
|
|
240
281
|
const originalAnswer = messageBus.answer.bind(messageBus);
|
|
241
282
|
messageBus.answer = (from, to, text, askId) => {
|
|
242
283
|
if (to === "lead" && from !== "@orchestrator") {
|
|
284
|
+
const seq = emitter.emit({ kind: "reply", body: text, agent: from });
|
|
243
285
|
ctx.replies.push({
|
|
244
286
|
body: text,
|
|
245
287
|
agent: from,
|
|
288
|
+
kind: "reply",
|
|
289
|
+
seq,
|
|
246
290
|
...(ctx.discussionId && { thread_id: ctx.discussionId }),
|
|
247
291
|
});
|
|
248
292
|
}
|
|
@@ -284,6 +328,7 @@ export function createDiscusser({
|
|
|
284
328
|
profile: config.agentProfile,
|
|
285
329
|
profilesDir: resolvedProfilesDir,
|
|
286
330
|
trailer: agentTrailer,
|
|
331
|
+
runtime,
|
|
287
332
|
}),
|
|
288
333
|
redactor,
|
|
289
334
|
});
|
|
@@ -316,6 +361,7 @@ export function createDiscusser({
|
|
|
316
361
|
profile: leadProfile,
|
|
317
362
|
profilesDir: resolvedProfilesDir,
|
|
318
363
|
trailer: DISCUSS_SYSTEM_PROMPT,
|
|
364
|
+
runtime,
|
|
319
365
|
}),
|
|
320
366
|
redactor,
|
|
321
367
|
});
|
|
@@ -327,10 +373,14 @@ export function createDiscusser({
|
|
|
327
373
|
output,
|
|
328
374
|
leadName: "lead",
|
|
329
375
|
mode: "discussion",
|
|
376
|
+
maxLeadTurns: maxLeadTurns ?? undefined,
|
|
330
377
|
ctx,
|
|
331
378
|
taskAmend,
|
|
332
379
|
redactor,
|
|
380
|
+
inboxPoller,
|
|
381
|
+
abortController,
|
|
333
382
|
});
|
|
383
|
+
loop.counter = loopCounter;
|
|
334
384
|
|
|
335
385
|
discusser = new Discusser({
|
|
336
386
|
loop,
|
|
@@ -338,7 +388,8 @@ export function createDiscusser({
|
|
|
338
388
|
output,
|
|
339
389
|
discussionId: discussionId ?? null,
|
|
340
390
|
redactor,
|
|
341
|
-
counter:
|
|
391
|
+
counter: loopCounter,
|
|
392
|
+
inboxPoller,
|
|
342
393
|
});
|
|
343
394
|
return discusser;
|
|
344
395
|
}
|