@forwardimpact/libeval 0.1.50 → 0.1.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +11 -8
  2. package/bin/fit-benchmark.js +26 -27
  3. package/bin/fit-eval.js +36 -30
  4. package/bin/fit-trace.js +83 -57
  5. package/package.json +1 -1
  6. package/src/agent-runner.js +20 -12
  7. package/src/benchmark/apm-installer.js +48 -44
  8. package/src/benchmark/env-loader.js +35 -23
  9. package/src/benchmark/invariants.js +128 -0
  10. package/src/benchmark/judge.js +18 -19
  11. package/src/benchmark/npm-installer.js +33 -33
  12. package/src/benchmark/report.js +40 -26
  13. package/src/benchmark/result.js +11 -11
  14. package/src/benchmark/runner.js +90 -46
  15. package/src/benchmark/task-family.js +78 -65
  16. package/src/benchmark/workdir.js +100 -93
  17. package/src/commands/assert.js +30 -22
  18. package/src/commands/benchmark-invariants.js +74 -0
  19. package/src/commands/benchmark-report.js +24 -15
  20. package/src/commands/benchmark-run.js +16 -9
  21. package/src/commands/by-discussion.js +33 -23
  22. package/src/commands/callback.js +20 -11
  23. package/src/commands/discuss.js +31 -13
  24. package/src/commands/facilitate.js +21 -14
  25. package/src/commands/output.js +15 -13
  26. package/src/commands/run.js +28 -14
  27. package/src/commands/supervise.js +29 -19
  28. package/src/commands/task-input.js +10 -5
  29. package/src/commands/tee.js +24 -9
  30. package/src/commands/trace.js +181 -99
  31. package/src/discuss-tools.js +48 -2
  32. package/src/discusser.js +53 -2
  33. package/src/events/github.js +27 -5
  34. package/src/facilitator.js +4 -0
  35. package/src/inbox-poller.js +84 -0
  36. package/src/judge.js +4 -1
  37. package/src/message-bus.js +6 -0
  38. package/src/orchestration-loop.js +14 -4
  39. package/src/orchestration-toolkit.js +14 -0
  40. package/src/profile-prompt.js +22 -9
  41. package/src/redaction.js +31 -9
  42. package/src/reply-emitter.js +47 -0
  43. package/src/supervisor.js +4 -0
  44. package/src/tee-writer.js +4 -2
  45. package/src/trace-collector.js +9 -2
  46. package/src/trace-github.js +47 -27
  47. package/src/benchmark/scorer.js +0 -138
  48. package/src/commands/benchmark-score.js +0 -68
@@ -1,153 +1,230 @@
1
- import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
2
1
  import { join, dirname } from "node:path";
2
+ import { isoTimestamp } from "@forwardimpact/libutil";
3
3
  import { createTraceCollector } from "@forwardimpact/libeval";
4
4
  import { createTraceQuery } from "../trace-query.js";
5
5
  import { createTraceGitHub } from "../trace-github.js";
6
6
  import { stripSignatures } from "../signature-filter.js";
7
7
 
8
+ // Every handler receives a libcli `InvocationContext`:
9
+ // ctx.options — parsed flag values (`cli.parse().values`)
10
+ // ctx.args — named positionals declared on the subcommand
11
+ // ctx.deps — host-injected collaborators: `{ runtime, config }`
12
+ // Handlers read/write the filesystem and stdout exclusively through
13
+ // `ctx.deps.runtime` and return `{ ok: true }` on success.
14
+
8
15
  // --- GitHub commands ---
9
16
 
10
17
  /**
11
18
  * List recent workflow runs matching a pattern.
12
- * @param {object} values - Parsed option values
13
- * @param {string[]} args - [pattern?]
14
- * @param {{config: import("@forwardimpact/libconfig").Config}} ctx
19
+ * @param {import("@forwardimpact/libcli").InvocationContext} ctx
15
20
  */
16
- export async function runRunsCommand(values, args, ctx) {
21
+ export async function runRunsCommand(ctx) {
22
+ const { runtime, config } = ctx.deps;
17
23
  const gh = await createTraceGitHub({
18
- token: ctx.config.ghToken(),
19
- repo: values.repo,
24
+ token: config.ghToken(),
25
+ repo: ctx.options.repo,
26
+ runtime,
20
27
  });
21
- const pattern = args[0] ?? "agent";
22
- const lookback = values.lookback ?? "7d";
28
+ const pattern = ctx.args.pattern ?? "agent";
29
+ const lookback = ctx.options.lookback ?? "7d";
23
30
  const runs = await gh.listRuns({ pattern, lookback });
24
- writeJSON(runs, values);
31
+ writeJSON(runtime, runs, ctx.options);
32
+ return { ok: true };
25
33
  }
26
34
 
27
35
  /**
28
36
  * Download a trace artifact and auto-convert to structured JSON.
29
- * @param {object} values - Parsed option values
30
- * @param {string[]} args - [run-id]
31
- * @param {{config: import("@forwardimpact/libconfig").Config}} ctx
37
+ * @param {import("@forwardimpact/libcli").InvocationContext} ctx
32
38
  */
33
- export async function runDownloadCommand(values, args, ctx) {
39
+ export async function runDownloadCommand(ctx) {
40
+ const { runtime, config } = ctx.deps;
34
41
  const gh = await createTraceGitHub({
35
- token: ctx.config.ghToken(),
36
- repo: values.repo,
42
+ token: config.ghToken(),
43
+ repo: ctx.options.repo,
44
+ runtime,
37
45
  });
38
- const result = await gh.downloadTrace(args[0], {
39
- dir: values.dir,
40
- name: values.artifact,
46
+ const result = await gh.downloadTrace(ctx.args["run-id"], {
47
+ dir: ctx.options.dir,
48
+ name: ctx.options.artifact,
41
49
  });
42
50
 
43
51
  const ndjsonFile = result.files.find((f) => f.endsWith(".ndjson"));
44
52
  if (ndjsonFile) {
45
53
  const ndjsonPath = join(result.dir, ndjsonFile);
46
- const collector = createTraceCollector();
47
- for (const line of readFileSync(ndjsonPath, "utf8").split("\n")) {
54
+ const collector = createTraceCollector({
55
+ now: () => isoTimestamp(runtime.clock.now()),
56
+ });
57
+ for (const line of runtime.fsSync
58
+ .readFileSync(ndjsonPath, "utf8")
59
+ .split("\n")) {
48
60
  collector.addLine(line);
49
61
  }
50
62
  const structuredPath = join(result.dir, "structured.json");
51
- writeFileSync(structuredPath, JSON.stringify(collector.toJSON()) + "\n");
63
+ runtime.fsSync.writeFileSync(
64
+ structuredPath,
65
+ JSON.stringify(collector.toJSON()) + "\n",
66
+ );
52
67
  result.files.push("structured.json");
53
68
  }
54
69
 
55
- writeJSON(result, values);
70
+ writeJSON(runtime, result, ctx.options);
71
+ return { ok: true };
56
72
  }
57
73
 
58
74
  // --- Query commands ---
59
75
 
60
- /** @param {object} values @param {string[]} args - [file] */
61
- export async function runOverviewCommand(values, args) {
62
- writeJSON(loadTrace(args[0]).overview(), values);
76
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
77
+ export async function runOverviewCommand(ctx) {
78
+ const { runtime } = ctx.deps;
79
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).overview(), ctx.options);
80
+ return { ok: true };
63
81
  }
64
82
 
65
- /** @param {object} values @param {string[]} args - [file] */
66
- export async function runCountCommand(values, args) {
67
- process.stdout.write(String(loadTrace(args[0]).count()) + "\n");
83
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
84
+ export async function runCountCommand(ctx) {
85
+ const { runtime } = ctx.deps;
86
+ runtime.proc.stdout.write(
87
+ String(loadTrace(runtime, ctx.args.file).count()) + "\n",
88
+ );
89
+ return { ok: true };
68
90
  }
69
91
 
70
- /** @param {object} values @param {string[]} args - [file, from, to] */
71
- export async function runBatchCommand(values, args) {
92
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
93
+ export async function runBatchCommand(ctx) {
94
+ const { runtime } = ctx.deps;
72
95
  writeJSON(
73
- loadTrace(args[0]).batch(parseInt(args[1], 10), parseInt(args[2], 10)),
74
- values,
96
+ runtime,
97
+ loadTrace(runtime, ctx.args.file).batch(
98
+ parseInt(ctx.args.from, 10),
99
+ parseInt(ctx.args.to, 10),
100
+ ),
101
+ ctx.options,
75
102
  );
103
+ return { ok: true };
76
104
  }
77
105
 
78
- /** @param {object} values @param {string[]} args - [file, N?] */
79
- export async function runHeadCommand(values, args) {
80
- const n = args[1] ? parseInt(args[1], 10) : 10;
81
- writeJSON(loadTrace(args[0]).head(n), values);
106
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
107
+ export async function runHeadCommand(ctx) {
108
+ const { runtime } = ctx.deps;
109
+ const n = ctx.args.n ? parseInt(ctx.args.n, 10) : 10;
110
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).head(n), ctx.options);
111
+ return { ok: true };
82
112
  }
83
113
 
84
- /** @param {object} values @param {string[]} args - [file, N?] */
85
- export async function runTailCommand(values, args) {
86
- const n = args[1] ? parseInt(args[1], 10) : 10;
87
- writeJSON(loadTrace(args[0]).tail(n), values);
114
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
115
+ export async function runTailCommand(ctx) {
116
+ const { runtime } = ctx.deps;
117
+ const n = ctx.args.n ? parseInt(ctx.args.n, 10) : 10;
118
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).tail(n), ctx.options);
119
+ return { ok: true };
88
120
  }
89
121
 
90
- /** @param {object} values @param {string[]} args - [file, pattern] */
91
- export async function runSearchCommand(values, args) {
92
- const limit = values.limit ? parseInt(values.limit, 10) : 50;
93
- const context = values.context ? parseInt(values.context, 10) : 0;
94
- const full = values.full ?? false;
122
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
123
+ export async function runSearchCommand(ctx) {
124
+ const { runtime } = ctx.deps;
125
+ const limit = ctx.options.limit ? parseInt(ctx.options.limit, 10) : 50;
126
+ const context = ctx.options.context ? parseInt(ctx.options.context, 10) : 0;
127
+ const full = ctx.options.full ?? false;
95
128
  writeJSON(
96
- loadTrace(args[0]).search(args[1], { limit, context, full }),
97
- values,
129
+ runtime,
130
+ loadTrace(runtime, ctx.args.file).search(ctx.args.pattern, {
131
+ limit,
132
+ context,
133
+ full,
134
+ }),
135
+ ctx.options,
98
136
  );
137
+ return { ok: true };
99
138
  }
100
139
 
101
- /** @param {object} values @param {string[]} args - [file] */
102
- export async function runToolsCommand(values, args) {
103
- writeJSON(loadTrace(args[0]).toolFrequency(), values);
140
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
141
+ export async function runToolsCommand(ctx) {
142
+ const { runtime } = ctx.deps;
143
+ writeJSON(
144
+ runtime,
145
+ loadTrace(runtime, ctx.args.file).toolFrequency(),
146
+ ctx.options,
147
+ );
148
+ return { ok: true };
104
149
  }
105
150
 
106
- /** @param {object} values @param {string[]} args - [file, name] */
107
- export async function runToolCommand(values, args) {
108
- writeJSON(loadTrace(args[0]).tool(args[1]), values);
151
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
152
+ export async function runToolCommand(ctx) {
153
+ const { runtime } = ctx.deps;
154
+ writeJSON(
155
+ runtime,
156
+ loadTrace(runtime, ctx.args.file).tool(ctx.args.name),
157
+ ctx.options,
158
+ );
159
+ return { ok: true };
109
160
  }
110
161
 
111
- /** @param {object} values @param {string[]} args - [file] */
112
- export async function runErrorsCommand(values, args) {
113
- writeJSON(loadTrace(args[0]).errors(), values);
162
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
163
+ export async function runErrorsCommand(ctx) {
164
+ const { runtime } = ctx.deps;
165
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).errors(), ctx.options);
166
+ return { ok: true };
114
167
  }
115
168
 
116
- /** @param {object} values @param {string[]} args - [file] */
117
- export async function runReasoningCommand(values, args) {
118
- const from = values.from ? parseInt(values.from, 10) : undefined;
119
- const to = values.to ? parseInt(values.to, 10) : undefined;
120
- writeJSON(loadTrace(args[0]).reasoning({ from, to }), values);
169
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
170
+ export async function runReasoningCommand(ctx) {
171
+ const { runtime } = ctx.deps;
172
+ const from = ctx.options.from ? parseInt(ctx.options.from, 10) : undefined;
173
+ const to = ctx.options.to ? parseInt(ctx.options.to, 10) : undefined;
174
+ writeJSON(
175
+ runtime,
176
+ loadTrace(runtime, ctx.args.file).reasoning({ from, to }),
177
+ ctx.options,
178
+ );
179
+ return { ok: true };
121
180
  }
122
181
 
123
- /** @param {object} values @param {string[]} args - [file] */
124
- export async function runTimelineCommand(values, args) {
125
- const lines = loadTrace(args[0]).timeline();
126
- process.stdout.write(lines.join("\n") + "\n");
182
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
183
+ export async function runTimelineCommand(ctx) {
184
+ const { runtime } = ctx.deps;
185
+ const lines = loadTrace(runtime, ctx.args.file).timeline();
186
+ runtime.proc.stdout.write(lines.join("\n") + "\n");
187
+ return { ok: true };
127
188
  }
128
189
 
129
- /** @param {object} values @param {string[]} args - [file] */
130
- export async function runStatsCommand(values, args) {
131
- writeJSON(loadTrace(args[0]).stats(), values);
190
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
191
+ export async function runStatsCommand(ctx) {
192
+ const { runtime } = ctx.deps;
193
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).stats(), ctx.options);
194
+ return { ok: true };
132
195
  }
133
196
 
134
- /** @param {object} values @param {string[]} args - [file] */
135
- export async function runInitCommand(values, args) {
136
- writeJSON(loadTrace(args[0]).init(), values);
197
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
198
+ export async function runInitCommand(ctx) {
199
+ const { runtime } = ctx.deps;
200
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).init(), ctx.options);
201
+ return { ok: true };
137
202
  }
138
203
 
139
- /** @param {object} values @param {string[]} args - [file, index] */
140
- export async function runTurnCommand(values, args) {
141
- writeJSON(loadTrace(args[0]).turn(parseInt(args[1], 10)), values);
204
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
205
+ export async function runTurnCommand(ctx) {
206
+ const { runtime } = ctx.deps;
207
+ writeJSON(
208
+ runtime,
209
+ loadTrace(runtime, ctx.args.file).turn(parseInt(ctx.args.index, 10)),
210
+ ctx.options,
211
+ );
212
+ return { ok: true };
142
213
  }
143
214
 
144
- /** @param {object} values @param {string[]} args - [file] */
145
- export async function runFilterCommand(values, args) {
215
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
216
+ export async function runFilterCommand(ctx) {
217
+ const { runtime } = ctx.deps;
146
218
  const opts = {};
147
- if (values.role) opts.role = values.role;
148
- if (values.tool) opts.toolName = values.tool;
149
- if (values.error) opts.isError = true;
150
- writeJSON(loadTrace(args[0]).filter(opts), values);
219
+ if (ctx.options.role) opts.role = ctx.options.role;
220
+ if (ctx.options.tool) opts.toolName = ctx.options.tool;
221
+ if (ctx.options.error) opts.isError = true;
222
+ writeJSON(
223
+ runtime,
224
+ loadTrace(runtime, ctx.args.file).filter(opts),
225
+ ctx.options,
226
+ );
227
+ return { ok: true };
151
228
  }
152
229
 
153
230
  // --- Split command ---
@@ -168,24 +245,24 @@ const STRUCTURAL_ROLES = new Set(["agent", "supervisor", "facilitator"]);
168
245
  * `staff-engineer`) classify as agents with the profile in the participant
169
246
  * slot. Orchestrator events and invalid source names are dropped.
170
247
  *
171
- * @param {object} values - Parsed option values
172
- * @param {string[]} args - [file]
248
+ * @param {import("@forwardimpact/libcli").InvocationContext} ctx
173
249
  */
174
- export async function runSplitCommand(values, args) {
175
- const file = args[0];
176
- if (!file) throw new Error("split: missing input file");
250
+ export async function runSplitCommand(ctx) {
251
+ const { runtime } = ctx.deps;
252
+ const file = ctx.args.file;
253
+ if (!file) return { ok: false, code: 1, error: "split: missing input file" };
177
254
 
178
- const mode = values.mode;
179
- if (!mode) throw new Error("split: --mode is required");
255
+ const mode = ctx.options.mode;
256
+ if (!mode) return { ok: false, code: 1, error: "split: --mode is required" };
180
257
  if (!["run", "supervise", "facilitate"].includes(mode)) {
181
- throw new Error(`split: invalid --mode "${mode}"`);
258
+ return { ok: false, code: 1, error: `split: invalid --mode "${mode}"` };
182
259
  }
183
260
 
184
- const caseId = values.case ?? "default";
185
- const outputDir = values["output-dir"] || dirname(file);
186
- mkdirSync(outputDir, { recursive: true });
261
+ const caseId = ctx.options.case ?? "default";
262
+ const outputDir = ctx.options["output-dir"] || dirname(file);
263
+ runtime.fsSync.mkdirSync(outputDir, { recursive: true });
187
264
 
188
- const buckets = parseBuckets(readFileSync(file, "utf8"));
265
+ const buckets = parseBuckets(runtime.fsSync.readFileSync(file, "utf8"));
189
266
 
190
267
  for (const [source, lines] of buckets.entries()) {
191
268
  if (!VALID_SOURCE_NAME.test(source)) continue;
@@ -194,8 +271,9 @@ export async function runSplitCommand(values, args) {
194
271
  outputDir,
195
272
  `trace--${caseId}--${source}.${role}.ndjson`,
196
273
  );
197
- writeFileSync(outPath, lines.join("\n") + "\n");
274
+ runtime.fsSync.writeFileSync(outPath, lines.join("\n") + "\n");
198
275
  }
276
+ return { ok: true };
199
277
  }
200
278
 
201
279
  /**
@@ -234,11 +312,12 @@ function parseBuckets(content) {
234
312
 
235
313
  /**
236
314
  * Load a trace file. Supports structured JSON and raw NDJSON.
315
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
237
316
  * @param {string} file
238
317
  * @returns {import("../trace-query.js").TraceQuery}
239
318
  */
240
- function loadTrace(file) {
241
- const content = readFileSync(file, "utf8");
319
+ function loadTrace(runtime, file) {
320
+ const content = runtime.fsSync.readFileSync(file, "utf8");
242
321
 
243
322
  try {
244
323
  const parsed = JSON.parse(content);
@@ -249,7 +328,9 @@ function loadTrace(file) {
249
328
  // Not valid JSON — fall through to NDJSON.
250
329
  }
251
330
 
252
- const collector = createTraceCollector();
331
+ const collector = createTraceCollector({
332
+ now: () => isoTimestamp(runtime.clock.now()),
333
+ });
253
334
  for (const line of content.split("\n")) {
254
335
  collector.addLine(line);
255
336
  }
@@ -260,10 +341,11 @@ function loadTrace(file) {
260
341
  * Write JSON output to stdout. By default strips `thinking.signature`
261
342
  * base64 blobs from the payload so they don't dominate terminal output;
262
343
  * pass `--signatures` (surfaced as `values.signatures`) to keep them.
344
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
263
345
  * @param {*} data
264
346
  * @param {object} [values]
265
347
  */
266
- function writeJSON(data, values = {}) {
348
+ function writeJSON(runtime, data, values = {}) {
267
349
  const output = values.signatures ? data : stripSignatures(data);
268
- process.stdout.write(JSON.stringify(output, null, 2) + "\n");
350
+ runtime.proc.stdout.write(JSON.stringify(output, null, 2) + "\n");
269
351
  }
@@ -27,6 +27,7 @@ import {
27
27
  RECESS_DESC,
28
28
  requestForCommentTool,
29
29
  requireNoPendingAsks,
30
+ requireNoUnprocessedInbox,
30
31
  } from "./orchestration-toolkit.js";
31
32
 
32
33
  /** System prompt for discuss-mode agent participants. L0 mechanics only per COALIGNED. */
@@ -63,6 +64,26 @@ const RESUME_TRIGGER_SCHEMA = z.discriminatedUnion("kind", [
63
64
  export function createDiscussLeadToolServer(ctx) {
64
65
  return orchestrationServer([
65
66
  ...baseTools(ctx, { from: "lead", defaultTo: undefined, broadcast: true }),
67
+ tool(
68
+ "Acknowledge",
69
+ "Post a brief message directly to the discussion thread. Use when responding to a human follow-up or providing a status update while participants are working.",
70
+ {
71
+ message: z.string().describe("Message to post on the thread"),
72
+ },
73
+ async ({ message }) => {
74
+ const seq =
75
+ ctx.emitter?.emit({ kind: "ack", body: message, agent: "lead" }) ??
76
+ -1;
77
+ ctx.replies.push({
78
+ body: message,
79
+ agent: "lead",
80
+ kind: "ack",
81
+ seq,
82
+ ...(ctx.discussionId && { thread_id: ctx.discussionId }),
83
+ });
84
+ return { content: [{ type: "text", text: "Posted." }] };
85
+ },
86
+ ),
66
87
  tool(
67
88
  "Recess",
68
89
  RECESS_DESC,
@@ -82,11 +103,36 @@ export function createDiscussLeadToolServer(ctx) {
82
103
  ]);
83
104
  }
84
105
 
106
+ const ACKNOWLEDGE_DESC =
107
+ "Acknowledge an Ask before starting work. Posts a visible comment on the thread. Does not discharge the Ask — you still owe an Answer.";
108
+
85
109
  /** Discuss-mode agent tool server. */
86
110
  export function createDiscussAgentToolServer(ctx, { from }) {
87
111
  return orchestrationServer([
88
112
  ...baseTools(ctx, { from, defaultTo: "lead", broadcast: true }),
89
113
  requestForCommentTool(ctx),
114
+ tool(
115
+ "Acknowledge",
116
+ ACKNOWLEDGE_DESC,
117
+ {
118
+ message: z
119
+ .string()
120
+ .describe("Brief acknowledgement to post on the thread"),
121
+ askId: z.number().optional().describe("The ask being acknowledged"),
122
+ },
123
+ async ({ message }) => {
124
+ const seq =
125
+ ctx.emitter?.emit({ kind: "ack", body: message, agent: from }) ?? -1;
126
+ ctx.replies.push({
127
+ body: message,
128
+ agent: from,
129
+ kind: "ack",
130
+ seq,
131
+ ...(ctx.discussionId && { thread_id: ctx.discussionId }),
132
+ });
133
+ return { content: [{ type: "text", text: "Acknowledged." }] };
134
+ },
135
+ ),
90
136
  ]);
91
137
  }
92
138
 
@@ -99,7 +145,7 @@ export function createDiscussAgentToolServer(ctx, { from }) {
99
145
  */
100
146
  export function createRecessHandler(ctx) {
101
147
  return async ({ reason, trigger }) => {
102
- const guard = requireNoPendingAsks(ctx);
148
+ const guard = requireNoPendingAsks(ctx) ?? requireNoUnprocessedInbox(ctx);
103
149
  if (guard) return guard;
104
150
  ctx.recessTrigger = trigger;
105
151
  concludeSession(ctx, {
@@ -114,7 +160,7 @@ export function createRecessHandler(ctx) {
114
160
  /** Adjourn handler — ends the discussion with a verdict. */
115
161
  export function createAdjournHandler(ctx) {
116
162
  return async ({ verdict, summary, outcome }) => {
117
- const guard = requireNoPendingAsks(ctx);
163
+ const guard = requireNoPendingAsks(ctx) ?? requireNoUnprocessedInbox(ctx);
118
164
  if (guard) return guard;
119
165
  if (outcome !== undefined) ctx.outcome = outcome;
120
166
  concludeSession(ctx, {
package/src/discusser.js CHANGED
@@ -17,6 +17,8 @@ import { Writable } from "node:stream";
17
17
  import { resolve } from "node:path";
18
18
 
19
19
  import { createAgentRunner } from "./agent-runner.js";
20
+ import { InboxPoller } from "./inbox-poller.js";
21
+ import { ReplyEmitter } from "./reply-emitter.js";
20
22
  import { composeSystemPrompt } from "./profile-prompt.js";
21
23
  import { SequenceCounter } from "./sequence-counter.js";
22
24
  import { createMessageBus } from "./message-bus.js";
@@ -40,6 +42,7 @@ export const DISCUSS_SYSTEM_PROMPT =
40
42
  "Answers arrive on your next turn as `[answer#N] <participant>: <text>` in your inbox.\n" +
41
43
  "End your turn while Asks are pending. The system resumes you when answers arrive.\n" +
42
44
  "Multiple `Ask` calls in one turn run participants in parallel.\n" +
45
+ "Use `Acknowledge` to post a brief message directly to the discussion thread — use it to respond to human follow-ups or give status updates while participants are working.\n" +
43
46
  "End the discussion by calling `Adjourn` with a verdict and summary, or `Recess` only to wait on an external reply or duration.";
44
47
 
45
48
  /**
@@ -79,7 +82,15 @@ export class Discusser {
79
82
  * @param {string|null} [deps.discussionId]
80
83
  * @param {SequenceCounter} [deps.counter]
81
84
  */
82
- constructor({ loop, ctx, output, discussionId, counter, redactor }) {
85
+ constructor({
86
+ loop,
87
+ ctx,
88
+ output,
89
+ discussionId,
90
+ counter,
91
+ redactor,
92
+ inboxPoller,
93
+ }) {
83
94
  if (!loop) throw new Error("loop is required");
84
95
  if (!ctx) throw new Error("ctx is required");
85
96
  if (!output) throw new Error("output is required");
@@ -90,6 +101,7 @@ export class Discusser {
90
101
  this.discussionId = discussionId ?? null;
91
102
  this.counter = counter ?? new SequenceCounter();
92
103
  this.redactor = redactor;
104
+ this.inboxPoller = inboxPoller ?? null;
93
105
  }
94
106
 
95
107
  /**
@@ -150,6 +162,7 @@ export class Discusser {
150
162
  ...(this.ctx.rfcs?.length && { rfcs: this.ctx.rfcs }),
151
163
  ...(this.ctx.recessTrigger && { trigger: this.ctx.recessTrigger }),
152
164
  ...(this.discussionId && { discussion_id: this.discussionId }),
165
+ lastActedSeq: this.inboxPoller?.lastActedSeq ?? -1,
153
166
  };
154
167
  this.output.write(
155
168
  JSON.stringify(
@@ -184,10 +197,14 @@ export class Discusser {
184
197
  * @param {function} deps.query
185
198
  * @param {import("stream").Writable} deps.output
186
199
  * @param {number} [deps.maxTurns]
200
+ * @param {number} [deps.maxLeadTurns]
187
201
  * @param {string} [deps.leadCwd]
188
202
  * @param {string} [deps.profilesDir]
189
203
  * @param {string} [deps.taskAmend]
190
204
  * @param {object} deps.redactor
205
+ * @param {string|null} [deps.callbackUrl]
206
+ * @param {string|null} [deps.inboxUrl]
207
+ * @param {string|null} [deps.correlationId]
191
208
  * @returns {Discusser}
192
209
  */
193
210
  // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: factory wires N runners + resume hydration paths
@@ -201,12 +218,18 @@ export function createDiscusser({
201
218
  query,
202
219
  output,
203
220
  maxTurns,
221
+ maxLeadTurns,
204
222
  leadCwd,
205
223
  profilesDir,
206
224
  taskAmend,
207
225
  redactor,
226
+ callbackUrl,
227
+ inboxUrl,
228
+ correlationId,
229
+ runtime,
208
230
  }) {
209
231
  if (!redactor) throw new Error("redactor is required");
232
+ if (!runtime) throw new Error("runtime is required");
210
233
  const resolvedLeadCwd = resolve(leadCwd ?? ".");
211
234
  const resolvedProfilesDir =
212
235
  profilesDir ?? resolve(resolvedLeadCwd, ".claude/agents");
@@ -236,13 +259,34 @@ export function createDiscusser({
236
259
  participants: ["lead", ...resolvedConfigs.map((a) => a.name)],
237
260
  });
238
261
 
262
+ const loopCounter = new SequenceCounter();
263
+ const emitter = new ReplyEmitter({
264
+ callbackUrl: callbackUrl ?? null,
265
+ correlationId: correlationId ?? null,
266
+ counter: loopCounter,
267
+ });
268
+ ctx.emitter = emitter;
269
+
270
+ const abortController = new AbortController();
271
+ const inboxPoller = inboxUrl
272
+ ? new InboxPoller({
273
+ inboxUrl,
274
+ messageBus,
275
+ leadName: "lead",
276
+ signal: abortController.signal,
277
+ })
278
+ : null;
279
+
239
280
  // Intercept answers routed to the lead — each becomes a discussion reply.
240
281
  const originalAnswer = messageBus.answer.bind(messageBus);
241
282
  messageBus.answer = (from, to, text, askId) => {
242
283
  if (to === "lead" && from !== "@orchestrator") {
284
+ const seq = emitter.emit({ kind: "reply", body: text, agent: from });
243
285
  ctx.replies.push({
244
286
  body: text,
245
287
  agent: from,
288
+ kind: "reply",
289
+ seq,
246
290
  ...(ctx.discussionId && { thread_id: ctx.discussionId }),
247
291
  });
248
292
  }
@@ -284,6 +328,7 @@ export function createDiscusser({
284
328
  profile: config.agentProfile,
285
329
  profilesDir: resolvedProfilesDir,
286
330
  trailer: agentTrailer,
331
+ runtime,
287
332
  }),
288
333
  redactor,
289
334
  });
@@ -316,6 +361,7 @@ export function createDiscusser({
316
361
  profile: leadProfile,
317
362
  profilesDir: resolvedProfilesDir,
318
363
  trailer: DISCUSS_SYSTEM_PROMPT,
364
+ runtime,
319
365
  }),
320
366
  redactor,
321
367
  });
@@ -327,10 +373,14 @@ export function createDiscusser({
327
373
  output,
328
374
  leadName: "lead",
329
375
  mode: "discussion",
376
+ maxLeadTurns: maxLeadTurns ?? undefined,
330
377
  ctx,
331
378
  taskAmend,
332
379
  redactor,
380
+ inboxPoller,
381
+ abortController,
333
382
  });
383
+ loop.counter = loopCounter;
334
384
 
335
385
  discusser = new Discusser({
336
386
  loop,
@@ -338,7 +388,8 @@ export function createDiscusser({
338
388
  output,
339
389
  discussionId: discussionId ?? null,
340
390
  redactor,
341
- counter: loop.counter,
391
+ counter: loopCounter,
392
+ inboxPoller,
342
393
  });
343
394
  return discusser;
344
395
  }