@forwardimpact/libeval 0.1.49 → 0.1.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -8
- package/bin/fit-benchmark.js +26 -27
- package/bin/fit-eval.js +76 -78
- package/bin/fit-trace.js +83 -57
- package/package.json +2 -2
- package/src/agent-runner.js +23 -13
- package/src/benchmark/env-loader.js +35 -23
- package/src/benchmark/{scorer.js → invariants.js} +14 -12
- package/src/benchmark/judge.js +5 -8
- package/src/benchmark/npm-installer.js +87 -0
- package/src/benchmark/report.js +15 -15
- package/src/benchmark/result.js +11 -11
- package/src/benchmark/runner.js +17 -11
- package/src/benchmark/task-family.js +6 -4
- package/src/benchmark/workdir.js +23 -3
- package/src/commands/assert.js +30 -22
- package/src/commands/benchmark-invariants.js +74 -0
- package/src/commands/benchmark-report.js +23 -15
- package/src/commands/benchmark-run.js +22 -7
- package/src/commands/by-discussion.js +29 -18
- package/src/commands/callback.js +20 -11
- package/src/commands/discuss.js +30 -21
- package/src/commands/facilitate.js +20 -21
- package/src/commands/output.js +11 -12
- package/src/commands/run.js +24 -21
- package/src/commands/supervise.js +27 -27
- package/src/commands/task-input.js +54 -0
- package/src/commands/trace.js +174 -97
- package/src/discuss-tools.js +48 -2
- package/src/discusser.js +49 -2
- package/src/events/github.js +155 -0
- package/src/inbox-poller.js +84 -0
- package/src/index.js +10 -0
- package/src/judge.js +1 -1
- package/src/message-bus.js +6 -0
- package/src/orchestration-loop.js +19 -5
- package/src/orchestration-toolkit.js +14 -0
- package/src/redaction.js +31 -9
- package/src/reply-emitter.js +47 -0
- package/src/commands/benchmark-score.js +0 -68
package/src/commands/trace.js
CHANGED
|
@@ -1,153 +1,227 @@
|
|
|
1
|
-
import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
|
|
2
1
|
import { join, dirname } from "node:path";
|
|
3
2
|
import { createTraceCollector } from "@forwardimpact/libeval";
|
|
4
3
|
import { createTraceQuery } from "../trace-query.js";
|
|
5
4
|
import { createTraceGitHub } from "../trace-github.js";
|
|
6
5
|
import { stripSignatures } from "../signature-filter.js";
|
|
7
6
|
|
|
7
|
+
// Every handler receives a libcli `InvocationContext`:
|
|
8
|
+
// ctx.options — parsed flag values (`cli.parse().values`)
|
|
9
|
+
// ctx.args — named positionals declared on the subcommand
|
|
10
|
+
// ctx.deps — host-injected collaborators: `{ runtime, config }`
|
|
11
|
+
// Handlers read/write the filesystem and stdout exclusively through
|
|
12
|
+
// `ctx.deps.runtime` and return `{ ok: true }` on success.
|
|
13
|
+
|
|
8
14
|
// --- GitHub commands ---
|
|
9
15
|
|
|
10
16
|
/**
|
|
11
17
|
* List recent workflow runs matching a pattern.
|
|
12
|
-
* @param {
|
|
13
|
-
* @param {string[]} args - [pattern?]
|
|
14
|
-
* @param {{config: import("@forwardimpact/libconfig").Config}} ctx
|
|
18
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
15
19
|
*/
|
|
16
|
-
export async function runRunsCommand(
|
|
20
|
+
export async function runRunsCommand(ctx) {
|
|
21
|
+
const { runtime, config } = ctx.deps;
|
|
17
22
|
const gh = await createTraceGitHub({
|
|
18
|
-
token:
|
|
19
|
-
repo:
|
|
23
|
+
token: config.ghToken(),
|
|
24
|
+
repo: ctx.options.repo,
|
|
25
|
+
runtime,
|
|
20
26
|
});
|
|
21
|
-
const pattern = args
|
|
22
|
-
const lookback =
|
|
27
|
+
const pattern = ctx.args.pattern ?? "agent";
|
|
28
|
+
const lookback = ctx.options.lookback ?? "7d";
|
|
23
29
|
const runs = await gh.listRuns({ pattern, lookback });
|
|
24
|
-
writeJSON(runs,
|
|
30
|
+
writeJSON(runtime, runs, ctx.options);
|
|
31
|
+
return { ok: true };
|
|
25
32
|
}
|
|
26
33
|
|
|
27
34
|
/**
|
|
28
35
|
* Download a trace artifact and auto-convert to structured JSON.
|
|
29
|
-
* @param {
|
|
30
|
-
* @param {string[]} args - [run-id]
|
|
31
|
-
* @param {{config: import("@forwardimpact/libconfig").Config}} ctx
|
|
36
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
32
37
|
*/
|
|
33
|
-
export async function runDownloadCommand(
|
|
38
|
+
export async function runDownloadCommand(ctx) {
|
|
39
|
+
const { runtime, config } = ctx.deps;
|
|
34
40
|
const gh = await createTraceGitHub({
|
|
35
|
-
token:
|
|
36
|
-
repo:
|
|
41
|
+
token: config.ghToken(),
|
|
42
|
+
repo: ctx.options.repo,
|
|
43
|
+
runtime,
|
|
37
44
|
});
|
|
38
|
-
const result = await gh.downloadTrace(args[
|
|
39
|
-
dir:
|
|
40
|
-
name:
|
|
45
|
+
const result = await gh.downloadTrace(ctx.args["run-id"], {
|
|
46
|
+
dir: ctx.options.dir,
|
|
47
|
+
name: ctx.options.artifact,
|
|
41
48
|
});
|
|
42
49
|
|
|
43
50
|
const ndjsonFile = result.files.find((f) => f.endsWith(".ndjson"));
|
|
44
51
|
if (ndjsonFile) {
|
|
45
52
|
const ndjsonPath = join(result.dir, ndjsonFile);
|
|
46
53
|
const collector = createTraceCollector();
|
|
47
|
-
for (const line of
|
|
54
|
+
for (const line of runtime.fsSync
|
|
55
|
+
.readFileSync(ndjsonPath, "utf8")
|
|
56
|
+
.split("\n")) {
|
|
48
57
|
collector.addLine(line);
|
|
49
58
|
}
|
|
50
59
|
const structuredPath = join(result.dir, "structured.json");
|
|
51
|
-
|
|
60
|
+
runtime.fsSync.writeFileSync(
|
|
61
|
+
structuredPath,
|
|
62
|
+
JSON.stringify(collector.toJSON()) + "\n",
|
|
63
|
+
);
|
|
52
64
|
result.files.push("structured.json");
|
|
53
65
|
}
|
|
54
66
|
|
|
55
|
-
writeJSON(result,
|
|
67
|
+
writeJSON(runtime, result, ctx.options);
|
|
68
|
+
return { ok: true };
|
|
56
69
|
}
|
|
57
70
|
|
|
58
71
|
// --- Query commands ---
|
|
59
72
|
|
|
60
|
-
/** @param {
|
|
61
|
-
export async function runOverviewCommand(
|
|
62
|
-
|
|
73
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
74
|
+
export async function runOverviewCommand(ctx) {
|
|
75
|
+
const { runtime } = ctx.deps;
|
|
76
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).overview(), ctx.options);
|
|
77
|
+
return { ok: true };
|
|
63
78
|
}
|
|
64
79
|
|
|
65
|
-
/** @param {
|
|
66
|
-
export async function runCountCommand(
|
|
67
|
-
|
|
80
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
81
|
+
export async function runCountCommand(ctx) {
|
|
82
|
+
const { runtime } = ctx.deps;
|
|
83
|
+
runtime.proc.stdout.write(
|
|
84
|
+
String(loadTrace(runtime, ctx.args.file).count()) + "\n",
|
|
85
|
+
);
|
|
86
|
+
return { ok: true };
|
|
68
87
|
}
|
|
69
88
|
|
|
70
|
-
/** @param {
|
|
71
|
-
export async function runBatchCommand(
|
|
89
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
90
|
+
export async function runBatchCommand(ctx) {
|
|
91
|
+
const { runtime } = ctx.deps;
|
|
72
92
|
writeJSON(
|
|
73
|
-
|
|
74
|
-
|
|
93
|
+
runtime,
|
|
94
|
+
loadTrace(runtime, ctx.args.file).batch(
|
|
95
|
+
parseInt(ctx.args.from, 10),
|
|
96
|
+
parseInt(ctx.args.to, 10),
|
|
97
|
+
),
|
|
98
|
+
ctx.options,
|
|
75
99
|
);
|
|
100
|
+
return { ok: true };
|
|
76
101
|
}
|
|
77
102
|
|
|
78
|
-
/** @param {
|
|
79
|
-
export async function runHeadCommand(
|
|
80
|
-
const
|
|
81
|
-
|
|
103
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
104
|
+
export async function runHeadCommand(ctx) {
|
|
105
|
+
const { runtime } = ctx.deps;
|
|
106
|
+
const n = ctx.args.n ? parseInt(ctx.args.n, 10) : 10;
|
|
107
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).head(n), ctx.options);
|
|
108
|
+
return { ok: true };
|
|
82
109
|
}
|
|
83
110
|
|
|
84
|
-
/** @param {
|
|
85
|
-
export async function runTailCommand(
|
|
86
|
-
const
|
|
87
|
-
|
|
111
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
112
|
+
export async function runTailCommand(ctx) {
|
|
113
|
+
const { runtime } = ctx.deps;
|
|
114
|
+
const n = ctx.args.n ? parseInt(ctx.args.n, 10) : 10;
|
|
115
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).tail(n), ctx.options);
|
|
116
|
+
return { ok: true };
|
|
88
117
|
}
|
|
89
118
|
|
|
90
|
-
/** @param {
|
|
91
|
-
export async function runSearchCommand(
|
|
92
|
-
const
|
|
93
|
-
const
|
|
94
|
-
const
|
|
119
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
120
|
+
export async function runSearchCommand(ctx) {
|
|
121
|
+
const { runtime } = ctx.deps;
|
|
122
|
+
const limit = ctx.options.limit ? parseInt(ctx.options.limit, 10) : 50;
|
|
123
|
+
const context = ctx.options.context ? parseInt(ctx.options.context, 10) : 0;
|
|
124
|
+
const full = ctx.options.full ?? false;
|
|
95
125
|
writeJSON(
|
|
96
|
-
|
|
97
|
-
|
|
126
|
+
runtime,
|
|
127
|
+
loadTrace(runtime, ctx.args.file).search(ctx.args.pattern, {
|
|
128
|
+
limit,
|
|
129
|
+
context,
|
|
130
|
+
full,
|
|
131
|
+
}),
|
|
132
|
+
ctx.options,
|
|
98
133
|
);
|
|
134
|
+
return { ok: true };
|
|
99
135
|
}
|
|
100
136
|
|
|
101
|
-
/** @param {
|
|
102
|
-
export async function runToolsCommand(
|
|
103
|
-
|
|
137
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
138
|
+
export async function runToolsCommand(ctx) {
|
|
139
|
+
const { runtime } = ctx.deps;
|
|
140
|
+
writeJSON(
|
|
141
|
+
runtime,
|
|
142
|
+
loadTrace(runtime, ctx.args.file).toolFrequency(),
|
|
143
|
+
ctx.options,
|
|
144
|
+
);
|
|
145
|
+
return { ok: true };
|
|
104
146
|
}
|
|
105
147
|
|
|
106
|
-
/** @param {
|
|
107
|
-
export async function runToolCommand(
|
|
108
|
-
|
|
148
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
149
|
+
export async function runToolCommand(ctx) {
|
|
150
|
+
const { runtime } = ctx.deps;
|
|
151
|
+
writeJSON(
|
|
152
|
+
runtime,
|
|
153
|
+
loadTrace(runtime, ctx.args.file).tool(ctx.args.name),
|
|
154
|
+
ctx.options,
|
|
155
|
+
);
|
|
156
|
+
return { ok: true };
|
|
109
157
|
}
|
|
110
158
|
|
|
111
|
-
/** @param {
|
|
112
|
-
export async function runErrorsCommand(
|
|
113
|
-
|
|
159
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
160
|
+
export async function runErrorsCommand(ctx) {
|
|
161
|
+
const { runtime } = ctx.deps;
|
|
162
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).errors(), ctx.options);
|
|
163
|
+
return { ok: true };
|
|
114
164
|
}
|
|
115
165
|
|
|
116
|
-
/** @param {
|
|
117
|
-
export async function runReasoningCommand(
|
|
118
|
-
const
|
|
119
|
-
const
|
|
120
|
-
|
|
166
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
167
|
+
export async function runReasoningCommand(ctx) {
|
|
168
|
+
const { runtime } = ctx.deps;
|
|
169
|
+
const from = ctx.options.from ? parseInt(ctx.options.from, 10) : undefined;
|
|
170
|
+
const to = ctx.options.to ? parseInt(ctx.options.to, 10) : undefined;
|
|
171
|
+
writeJSON(
|
|
172
|
+
runtime,
|
|
173
|
+
loadTrace(runtime, ctx.args.file).reasoning({ from, to }),
|
|
174
|
+
ctx.options,
|
|
175
|
+
);
|
|
176
|
+
return { ok: true };
|
|
121
177
|
}
|
|
122
178
|
|
|
123
|
-
/** @param {
|
|
124
|
-
export async function runTimelineCommand(
|
|
125
|
-
const
|
|
126
|
-
|
|
179
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
180
|
+
export async function runTimelineCommand(ctx) {
|
|
181
|
+
const { runtime } = ctx.deps;
|
|
182
|
+
const lines = loadTrace(runtime, ctx.args.file).timeline();
|
|
183
|
+
runtime.proc.stdout.write(lines.join("\n") + "\n");
|
|
184
|
+
return { ok: true };
|
|
127
185
|
}
|
|
128
186
|
|
|
129
|
-
/** @param {
|
|
130
|
-
export async function runStatsCommand(
|
|
131
|
-
|
|
187
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
188
|
+
export async function runStatsCommand(ctx) {
|
|
189
|
+
const { runtime } = ctx.deps;
|
|
190
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).stats(), ctx.options);
|
|
191
|
+
return { ok: true };
|
|
132
192
|
}
|
|
133
193
|
|
|
134
|
-
/** @param {
|
|
135
|
-
export async function runInitCommand(
|
|
136
|
-
|
|
194
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
195
|
+
export async function runInitCommand(ctx) {
|
|
196
|
+
const { runtime } = ctx.deps;
|
|
197
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).init(), ctx.options);
|
|
198
|
+
return { ok: true };
|
|
137
199
|
}
|
|
138
200
|
|
|
139
|
-
/** @param {
|
|
140
|
-
export async function runTurnCommand(
|
|
141
|
-
|
|
201
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
202
|
+
export async function runTurnCommand(ctx) {
|
|
203
|
+
const { runtime } = ctx.deps;
|
|
204
|
+
writeJSON(
|
|
205
|
+
runtime,
|
|
206
|
+
loadTrace(runtime, ctx.args.file).turn(parseInt(ctx.args.index, 10)),
|
|
207
|
+
ctx.options,
|
|
208
|
+
);
|
|
209
|
+
return { ok: true };
|
|
142
210
|
}
|
|
143
211
|
|
|
144
|
-
/** @param {
|
|
145
|
-
export async function runFilterCommand(
|
|
212
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
213
|
+
export async function runFilterCommand(ctx) {
|
|
214
|
+
const { runtime } = ctx.deps;
|
|
146
215
|
const opts = {};
|
|
147
|
-
if (
|
|
148
|
-
if (
|
|
149
|
-
if (
|
|
150
|
-
writeJSON(
|
|
216
|
+
if (ctx.options.role) opts.role = ctx.options.role;
|
|
217
|
+
if (ctx.options.tool) opts.toolName = ctx.options.tool;
|
|
218
|
+
if (ctx.options.error) opts.isError = true;
|
|
219
|
+
writeJSON(
|
|
220
|
+
runtime,
|
|
221
|
+
loadTrace(runtime, ctx.args.file).filter(opts),
|
|
222
|
+
ctx.options,
|
|
223
|
+
);
|
|
224
|
+
return { ok: true };
|
|
151
225
|
}
|
|
152
226
|
|
|
153
227
|
// --- Split command ---
|
|
@@ -168,24 +242,24 @@ const STRUCTURAL_ROLES = new Set(["agent", "supervisor", "facilitator"]);
|
|
|
168
242
|
* `staff-engineer`) classify as agents with the profile in the participant
|
|
169
243
|
* slot. Orchestrator events and invalid source names are dropped.
|
|
170
244
|
*
|
|
171
|
-
* @param {
|
|
172
|
-
* @param {string[]} args - [file]
|
|
245
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
173
246
|
*/
|
|
174
|
-
export async function runSplitCommand(
|
|
175
|
-
const
|
|
176
|
-
|
|
247
|
+
export async function runSplitCommand(ctx) {
|
|
248
|
+
const { runtime } = ctx.deps;
|
|
249
|
+
const file = ctx.args.file;
|
|
250
|
+
if (!file) return { ok: false, code: 1, error: "split: missing input file" };
|
|
177
251
|
|
|
178
|
-
const mode =
|
|
179
|
-
if (!mode)
|
|
252
|
+
const mode = ctx.options.mode;
|
|
253
|
+
if (!mode) return { ok: false, code: 1, error: "split: --mode is required" };
|
|
180
254
|
if (!["run", "supervise", "facilitate"].includes(mode)) {
|
|
181
|
-
|
|
255
|
+
return { ok: false, code: 1, error: `split: invalid --mode "${mode}"` };
|
|
182
256
|
}
|
|
183
257
|
|
|
184
|
-
const caseId =
|
|
185
|
-
const outputDir =
|
|
186
|
-
mkdirSync(outputDir, { recursive: true });
|
|
258
|
+
const caseId = ctx.options.case ?? "default";
|
|
259
|
+
const outputDir = ctx.options["output-dir"] || dirname(file);
|
|
260
|
+
runtime.fsSync.mkdirSync(outputDir, { recursive: true });
|
|
187
261
|
|
|
188
|
-
const buckets = parseBuckets(readFileSync(file, "utf8"));
|
|
262
|
+
const buckets = parseBuckets(runtime.fsSync.readFileSync(file, "utf8"));
|
|
189
263
|
|
|
190
264
|
for (const [source, lines] of buckets.entries()) {
|
|
191
265
|
if (!VALID_SOURCE_NAME.test(source)) continue;
|
|
@@ -194,8 +268,9 @@ export async function runSplitCommand(values, args) {
|
|
|
194
268
|
outputDir,
|
|
195
269
|
`trace--${caseId}--${source}.${role}.ndjson`,
|
|
196
270
|
);
|
|
197
|
-
writeFileSync(outPath, lines.join("\n") + "\n");
|
|
271
|
+
runtime.fsSync.writeFileSync(outPath, lines.join("\n") + "\n");
|
|
198
272
|
}
|
|
273
|
+
return { ok: true };
|
|
199
274
|
}
|
|
200
275
|
|
|
201
276
|
/**
|
|
@@ -234,11 +309,12 @@ function parseBuckets(content) {
|
|
|
234
309
|
|
|
235
310
|
/**
|
|
236
311
|
* Load a trace file. Supports structured JSON and raw NDJSON.
|
|
312
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
237
313
|
* @param {string} file
|
|
238
314
|
* @returns {import("../trace-query.js").TraceQuery}
|
|
239
315
|
*/
|
|
240
|
-
function loadTrace(file) {
|
|
241
|
-
const content = readFileSync(file, "utf8");
|
|
316
|
+
function loadTrace(runtime, file) {
|
|
317
|
+
const content = runtime.fsSync.readFileSync(file, "utf8");
|
|
242
318
|
|
|
243
319
|
try {
|
|
244
320
|
const parsed = JSON.parse(content);
|
|
@@ -260,10 +336,11 @@ function loadTrace(file) {
|
|
|
260
336
|
* Write JSON output to stdout. By default strips `thinking.signature`
|
|
261
337
|
* base64 blobs from the payload so they don't dominate terminal output;
|
|
262
338
|
* pass `--signatures` (surfaced as `values.signatures`) to keep them.
|
|
339
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
263
340
|
* @param {*} data
|
|
264
341
|
* @param {object} [values]
|
|
265
342
|
*/
|
|
266
|
-
function writeJSON(data, values = {}) {
|
|
343
|
+
function writeJSON(runtime, data, values = {}) {
|
|
267
344
|
const output = values.signatures ? data : stripSignatures(data);
|
|
268
|
-
|
|
345
|
+
runtime.proc.stdout.write(JSON.stringify(output, null, 2) + "\n");
|
|
269
346
|
}
|
package/src/discuss-tools.js
CHANGED
|
@@ -27,6 +27,7 @@ import {
|
|
|
27
27
|
RECESS_DESC,
|
|
28
28
|
requestForCommentTool,
|
|
29
29
|
requireNoPendingAsks,
|
|
30
|
+
requireNoUnprocessedInbox,
|
|
30
31
|
} from "./orchestration-toolkit.js";
|
|
31
32
|
|
|
32
33
|
/** System prompt for discuss-mode agent participants. L0 mechanics only per COALIGNED. */
|
|
@@ -63,6 +64,26 @@ const RESUME_TRIGGER_SCHEMA = z.discriminatedUnion("kind", [
|
|
|
63
64
|
export function createDiscussLeadToolServer(ctx) {
|
|
64
65
|
return orchestrationServer([
|
|
65
66
|
...baseTools(ctx, { from: "lead", defaultTo: undefined, broadcast: true }),
|
|
67
|
+
tool(
|
|
68
|
+
"Acknowledge",
|
|
69
|
+
"Post a brief message directly to the discussion thread. Use when responding to a human follow-up or providing a status update while participants are working.",
|
|
70
|
+
{
|
|
71
|
+
message: z.string().describe("Message to post on the thread"),
|
|
72
|
+
},
|
|
73
|
+
async ({ message }) => {
|
|
74
|
+
const seq =
|
|
75
|
+
ctx.emitter?.emit({ kind: "ack", body: message, agent: "lead" }) ??
|
|
76
|
+
-1;
|
|
77
|
+
ctx.replies.push({
|
|
78
|
+
body: message,
|
|
79
|
+
agent: "lead",
|
|
80
|
+
kind: "ack",
|
|
81
|
+
seq,
|
|
82
|
+
...(ctx.discussionId && { thread_id: ctx.discussionId }),
|
|
83
|
+
});
|
|
84
|
+
return { content: [{ type: "text", text: "Posted." }] };
|
|
85
|
+
},
|
|
86
|
+
),
|
|
66
87
|
tool(
|
|
67
88
|
"Recess",
|
|
68
89
|
RECESS_DESC,
|
|
@@ -82,11 +103,36 @@ export function createDiscussLeadToolServer(ctx) {
|
|
|
82
103
|
]);
|
|
83
104
|
}
|
|
84
105
|
|
|
106
|
+
const ACKNOWLEDGE_DESC =
|
|
107
|
+
"Acknowledge an Ask before starting work. Posts a visible comment on the thread. Does not discharge the Ask — you still owe an Answer.";
|
|
108
|
+
|
|
85
109
|
/** Discuss-mode agent tool server. */
|
|
86
110
|
export function createDiscussAgentToolServer(ctx, { from }) {
|
|
87
111
|
return orchestrationServer([
|
|
88
112
|
...baseTools(ctx, { from, defaultTo: "lead", broadcast: true }),
|
|
89
113
|
requestForCommentTool(ctx),
|
|
114
|
+
tool(
|
|
115
|
+
"Acknowledge",
|
|
116
|
+
ACKNOWLEDGE_DESC,
|
|
117
|
+
{
|
|
118
|
+
message: z
|
|
119
|
+
.string()
|
|
120
|
+
.describe("Brief acknowledgement to post on the thread"),
|
|
121
|
+
askId: z.number().optional().describe("The ask being acknowledged"),
|
|
122
|
+
},
|
|
123
|
+
async ({ message }) => {
|
|
124
|
+
const seq =
|
|
125
|
+
ctx.emitter?.emit({ kind: "ack", body: message, agent: from }) ?? -1;
|
|
126
|
+
ctx.replies.push({
|
|
127
|
+
body: message,
|
|
128
|
+
agent: from,
|
|
129
|
+
kind: "ack",
|
|
130
|
+
seq,
|
|
131
|
+
...(ctx.discussionId && { thread_id: ctx.discussionId }),
|
|
132
|
+
});
|
|
133
|
+
return { content: [{ type: "text", text: "Acknowledged." }] };
|
|
134
|
+
},
|
|
135
|
+
),
|
|
90
136
|
]);
|
|
91
137
|
}
|
|
92
138
|
|
|
@@ -99,7 +145,7 @@ export function createDiscussAgentToolServer(ctx, { from }) {
|
|
|
99
145
|
*/
|
|
100
146
|
export function createRecessHandler(ctx) {
|
|
101
147
|
return async ({ reason, trigger }) => {
|
|
102
|
-
const guard = requireNoPendingAsks(ctx);
|
|
148
|
+
const guard = requireNoPendingAsks(ctx) ?? requireNoUnprocessedInbox(ctx);
|
|
103
149
|
if (guard) return guard;
|
|
104
150
|
ctx.recessTrigger = trigger;
|
|
105
151
|
concludeSession(ctx, {
|
|
@@ -114,7 +160,7 @@ export function createRecessHandler(ctx) {
|
|
|
114
160
|
/** Adjourn handler — ends the discussion with a verdict. */
|
|
115
161
|
export function createAdjournHandler(ctx) {
|
|
116
162
|
return async ({ verdict, summary, outcome }) => {
|
|
117
|
-
const guard = requireNoPendingAsks(ctx);
|
|
163
|
+
const guard = requireNoPendingAsks(ctx) ?? requireNoUnprocessedInbox(ctx);
|
|
118
164
|
if (guard) return guard;
|
|
119
165
|
if (outcome !== undefined) ctx.outcome = outcome;
|
|
120
166
|
concludeSession(ctx, {
|
package/src/discusser.js
CHANGED
|
@@ -17,6 +17,8 @@ import { Writable } from "node:stream";
|
|
|
17
17
|
import { resolve } from "node:path";
|
|
18
18
|
|
|
19
19
|
import { createAgentRunner } from "./agent-runner.js";
|
|
20
|
+
import { InboxPoller } from "./inbox-poller.js";
|
|
21
|
+
import { ReplyEmitter } from "./reply-emitter.js";
|
|
20
22
|
import { composeSystemPrompt } from "./profile-prompt.js";
|
|
21
23
|
import { SequenceCounter } from "./sequence-counter.js";
|
|
22
24
|
import { createMessageBus } from "./message-bus.js";
|
|
@@ -40,6 +42,7 @@ export const DISCUSS_SYSTEM_PROMPT =
|
|
|
40
42
|
"Answers arrive on your next turn as `[answer#N] <participant>: <text>` in your inbox.\n" +
|
|
41
43
|
"End your turn while Asks are pending. The system resumes you when answers arrive.\n" +
|
|
42
44
|
"Multiple `Ask` calls in one turn run participants in parallel.\n" +
|
|
45
|
+
"Use `Acknowledge` to post a brief message directly to the discussion thread — use it to respond to human follow-ups or give status updates while participants are working.\n" +
|
|
43
46
|
"End the discussion by calling `Adjourn` with a verdict and summary, or `Recess` only to wait on an external reply or duration.";
|
|
44
47
|
|
|
45
48
|
/**
|
|
@@ -79,7 +82,15 @@ export class Discusser {
|
|
|
79
82
|
* @param {string|null} [deps.discussionId]
|
|
80
83
|
* @param {SequenceCounter} [deps.counter]
|
|
81
84
|
*/
|
|
82
|
-
constructor({
|
|
85
|
+
constructor({
|
|
86
|
+
loop,
|
|
87
|
+
ctx,
|
|
88
|
+
output,
|
|
89
|
+
discussionId,
|
|
90
|
+
counter,
|
|
91
|
+
redactor,
|
|
92
|
+
inboxPoller,
|
|
93
|
+
}) {
|
|
83
94
|
if (!loop) throw new Error("loop is required");
|
|
84
95
|
if (!ctx) throw new Error("ctx is required");
|
|
85
96
|
if (!output) throw new Error("output is required");
|
|
@@ -90,6 +101,7 @@ export class Discusser {
|
|
|
90
101
|
this.discussionId = discussionId ?? null;
|
|
91
102
|
this.counter = counter ?? new SequenceCounter();
|
|
92
103
|
this.redactor = redactor;
|
|
104
|
+
this.inboxPoller = inboxPoller ?? null;
|
|
93
105
|
}
|
|
94
106
|
|
|
95
107
|
/**
|
|
@@ -150,6 +162,7 @@ export class Discusser {
|
|
|
150
162
|
...(this.ctx.rfcs?.length && { rfcs: this.ctx.rfcs }),
|
|
151
163
|
...(this.ctx.recessTrigger && { trigger: this.ctx.recessTrigger }),
|
|
152
164
|
...(this.discussionId && { discussion_id: this.discussionId }),
|
|
165
|
+
lastActedSeq: this.inboxPoller?.lastActedSeq ?? -1,
|
|
153
166
|
};
|
|
154
167
|
this.output.write(
|
|
155
168
|
JSON.stringify(
|
|
@@ -184,10 +197,14 @@ export class Discusser {
|
|
|
184
197
|
* @param {function} deps.query
|
|
185
198
|
* @param {import("stream").Writable} deps.output
|
|
186
199
|
* @param {number} [deps.maxTurns]
|
|
200
|
+
* @param {number} [deps.maxLeadTurns]
|
|
187
201
|
* @param {string} [deps.leadCwd]
|
|
188
202
|
* @param {string} [deps.profilesDir]
|
|
189
203
|
* @param {string} [deps.taskAmend]
|
|
190
204
|
* @param {object} deps.redactor
|
|
205
|
+
* @param {string|null} [deps.callbackUrl]
|
|
206
|
+
* @param {string|null} [deps.inboxUrl]
|
|
207
|
+
* @param {string|null} [deps.correlationId]
|
|
191
208
|
* @returns {Discusser}
|
|
192
209
|
*/
|
|
193
210
|
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: factory wires N runners + resume hydration paths
|
|
@@ -201,10 +218,14 @@ export function createDiscusser({
|
|
|
201
218
|
query,
|
|
202
219
|
output,
|
|
203
220
|
maxTurns,
|
|
221
|
+
maxLeadTurns,
|
|
204
222
|
leadCwd,
|
|
205
223
|
profilesDir,
|
|
206
224
|
taskAmend,
|
|
207
225
|
redactor,
|
|
226
|
+
callbackUrl,
|
|
227
|
+
inboxUrl,
|
|
228
|
+
correlationId,
|
|
208
229
|
}) {
|
|
209
230
|
if (!redactor) throw new Error("redactor is required");
|
|
210
231
|
const resolvedLeadCwd = resolve(leadCwd ?? ".");
|
|
@@ -236,13 +257,34 @@ export function createDiscusser({
|
|
|
236
257
|
participants: ["lead", ...resolvedConfigs.map((a) => a.name)],
|
|
237
258
|
});
|
|
238
259
|
|
|
260
|
+
const loopCounter = new SequenceCounter();
|
|
261
|
+
const emitter = new ReplyEmitter({
|
|
262
|
+
callbackUrl: callbackUrl ?? null,
|
|
263
|
+
correlationId: correlationId ?? null,
|
|
264
|
+
counter: loopCounter,
|
|
265
|
+
});
|
|
266
|
+
ctx.emitter = emitter;
|
|
267
|
+
|
|
268
|
+
const abortController = new AbortController();
|
|
269
|
+
const inboxPoller = inboxUrl
|
|
270
|
+
? new InboxPoller({
|
|
271
|
+
inboxUrl,
|
|
272
|
+
messageBus,
|
|
273
|
+
leadName: "lead",
|
|
274
|
+
signal: abortController.signal,
|
|
275
|
+
})
|
|
276
|
+
: null;
|
|
277
|
+
|
|
239
278
|
// Intercept answers routed to the lead — each becomes a discussion reply.
|
|
240
279
|
const originalAnswer = messageBus.answer.bind(messageBus);
|
|
241
280
|
messageBus.answer = (from, to, text, askId) => {
|
|
242
281
|
if (to === "lead" && from !== "@orchestrator") {
|
|
282
|
+
const seq = emitter.emit({ kind: "reply", body: text, agent: from });
|
|
243
283
|
ctx.replies.push({
|
|
244
284
|
body: text,
|
|
245
285
|
agent: from,
|
|
286
|
+
kind: "reply",
|
|
287
|
+
seq,
|
|
246
288
|
...(ctx.discussionId && { thread_id: ctx.discussionId }),
|
|
247
289
|
});
|
|
248
290
|
}
|
|
@@ -327,10 +369,14 @@ export function createDiscusser({
|
|
|
327
369
|
output,
|
|
328
370
|
leadName: "lead",
|
|
329
371
|
mode: "discussion",
|
|
372
|
+
maxLeadTurns: maxLeadTurns ?? undefined,
|
|
330
373
|
ctx,
|
|
331
374
|
taskAmend,
|
|
332
375
|
redactor,
|
|
376
|
+
inboxPoller,
|
|
377
|
+
abortController,
|
|
333
378
|
});
|
|
379
|
+
loop.counter = loopCounter;
|
|
334
380
|
|
|
335
381
|
discusser = new Discusser({
|
|
336
382
|
loop,
|
|
@@ -338,7 +384,8 @@ export function createDiscusser({
|
|
|
338
384
|
output,
|
|
339
385
|
discussionId: discussionId ?? null,
|
|
340
386
|
redactor,
|
|
341
|
-
counter:
|
|
387
|
+
counter: loopCounter,
|
|
388
|
+
inboxPoller,
|
|
342
389
|
});
|
|
343
390
|
return discusser;
|
|
344
391
|
}
|