@forwardimpact/libeval 0.1.49 → 0.1.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +11 -8
  2. package/bin/fit-benchmark.js +26 -27
  3. package/bin/fit-eval.js +76 -78
  4. package/bin/fit-trace.js +83 -57
  5. package/package.json +2 -2
  6. package/src/agent-runner.js +23 -13
  7. package/src/benchmark/env-loader.js +35 -23
  8. package/src/benchmark/{scorer.js → invariants.js} +14 -12
  9. package/src/benchmark/judge.js +5 -8
  10. package/src/benchmark/npm-installer.js +87 -0
  11. package/src/benchmark/report.js +15 -15
  12. package/src/benchmark/result.js +11 -11
  13. package/src/benchmark/runner.js +17 -11
  14. package/src/benchmark/task-family.js +6 -4
  15. package/src/benchmark/workdir.js +23 -3
  16. package/src/commands/assert.js +30 -22
  17. package/src/commands/benchmark-invariants.js +74 -0
  18. package/src/commands/benchmark-report.js +23 -15
  19. package/src/commands/benchmark-run.js +22 -7
  20. package/src/commands/by-discussion.js +29 -18
  21. package/src/commands/callback.js +20 -11
  22. package/src/commands/discuss.js +30 -21
  23. package/src/commands/facilitate.js +20 -21
  24. package/src/commands/output.js +11 -12
  25. package/src/commands/run.js +24 -21
  26. package/src/commands/supervise.js +27 -27
  27. package/src/commands/task-input.js +54 -0
  28. package/src/commands/trace.js +174 -97
  29. package/src/discuss-tools.js +48 -2
  30. package/src/discusser.js +49 -2
  31. package/src/events/github.js +155 -0
  32. package/src/inbox-poller.js +84 -0
  33. package/src/index.js +10 -0
  34. package/src/judge.js +1 -1
  35. package/src/message-bus.js +6 -0
  36. package/src/orchestration-loop.js +19 -5
  37. package/src/orchestration-toolkit.js +14 -0
  38. package/src/redaction.js +31 -9
  39. package/src/reply-emitter.js +47 -0
  40. package/src/commands/benchmark-score.js +0 -68
@@ -1,153 +1,227 @@
1
- import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
2
1
  import { join, dirname } from "node:path";
3
2
  import { createTraceCollector } from "@forwardimpact/libeval";
4
3
  import { createTraceQuery } from "../trace-query.js";
5
4
  import { createTraceGitHub } from "../trace-github.js";
6
5
  import { stripSignatures } from "../signature-filter.js";
7
6
 
7
+ // Every handler receives a libcli `InvocationContext`:
8
+ // ctx.options — parsed flag values (`cli.parse().values`)
9
+ // ctx.args — named positionals declared on the subcommand
10
+ // ctx.deps — host-injected collaborators: `{ runtime, config }`
11
+ // Handlers read/write the filesystem and stdout exclusively through
12
+ // `ctx.deps.runtime` and return `{ ok: true }` on success.
13
+
8
14
  // --- GitHub commands ---
9
15
 
10
16
  /**
11
17
  * List recent workflow runs matching a pattern.
12
- * @param {object} values - Parsed option values
13
- * @param {string[]} args - [pattern?]
14
- * @param {{config: import("@forwardimpact/libconfig").Config}} ctx
18
+ * @param {import("@forwardimpact/libcli").InvocationContext} ctx
15
19
  */
16
- export async function runRunsCommand(values, args, ctx) {
20
+ export async function runRunsCommand(ctx) {
21
+ const { runtime, config } = ctx.deps;
17
22
  const gh = await createTraceGitHub({
18
- token: ctx.config.ghToken(),
19
- repo: values.repo,
23
+ token: config.ghToken(),
24
+ repo: ctx.options.repo,
25
+ runtime,
20
26
  });
21
- const pattern = args[0] ?? "agent";
22
- const lookback = values.lookback ?? "7d";
27
+ const pattern = ctx.args.pattern ?? "agent";
28
+ const lookback = ctx.options.lookback ?? "7d";
23
29
  const runs = await gh.listRuns({ pattern, lookback });
24
- writeJSON(runs, values);
30
+ writeJSON(runtime, runs, ctx.options);
31
+ return { ok: true };
25
32
  }
26
33
 
27
34
  /**
28
35
  * Download a trace artifact and auto-convert to structured JSON.
29
- * @param {object} values - Parsed option values
30
- * @param {string[]} args - [run-id]
31
- * @param {{config: import("@forwardimpact/libconfig").Config}} ctx
36
+ * @param {import("@forwardimpact/libcli").InvocationContext} ctx
32
37
  */
33
- export async function runDownloadCommand(values, args, ctx) {
38
+ export async function runDownloadCommand(ctx) {
39
+ const { runtime, config } = ctx.deps;
34
40
  const gh = await createTraceGitHub({
35
- token: ctx.config.ghToken(),
36
- repo: values.repo,
41
+ token: config.ghToken(),
42
+ repo: ctx.options.repo,
43
+ runtime,
37
44
  });
38
- const result = await gh.downloadTrace(args[0], {
39
- dir: values.dir,
40
- name: values.artifact,
45
+ const result = await gh.downloadTrace(ctx.args["run-id"], {
46
+ dir: ctx.options.dir,
47
+ name: ctx.options.artifact,
41
48
  });
42
49
 
43
50
  const ndjsonFile = result.files.find((f) => f.endsWith(".ndjson"));
44
51
  if (ndjsonFile) {
45
52
  const ndjsonPath = join(result.dir, ndjsonFile);
46
53
  const collector = createTraceCollector();
47
- for (const line of readFileSync(ndjsonPath, "utf8").split("\n")) {
54
+ for (const line of runtime.fsSync
55
+ .readFileSync(ndjsonPath, "utf8")
56
+ .split("\n")) {
48
57
  collector.addLine(line);
49
58
  }
50
59
  const structuredPath = join(result.dir, "structured.json");
51
- writeFileSync(structuredPath, JSON.stringify(collector.toJSON()) + "\n");
60
+ runtime.fsSync.writeFileSync(
61
+ structuredPath,
62
+ JSON.stringify(collector.toJSON()) + "\n",
63
+ );
52
64
  result.files.push("structured.json");
53
65
  }
54
66
 
55
- writeJSON(result, values);
67
+ writeJSON(runtime, result, ctx.options);
68
+ return { ok: true };
56
69
  }
57
70
 
58
71
  // --- Query commands ---
59
72
 
60
- /** @param {object} values @param {string[]} args - [file] */
61
- export async function runOverviewCommand(values, args) {
62
- writeJSON(loadTrace(args[0]).overview(), values);
73
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
74
+ export async function runOverviewCommand(ctx) {
75
+ const { runtime } = ctx.deps;
76
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).overview(), ctx.options);
77
+ return { ok: true };
63
78
  }
64
79
 
65
- /** @param {object} values @param {string[]} args - [file] */
66
- export async function runCountCommand(values, args) {
67
- process.stdout.write(String(loadTrace(args[0]).count()) + "\n");
80
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
81
+ export async function runCountCommand(ctx) {
82
+ const { runtime } = ctx.deps;
83
+ runtime.proc.stdout.write(
84
+ String(loadTrace(runtime, ctx.args.file).count()) + "\n",
85
+ );
86
+ return { ok: true };
68
87
  }
69
88
 
70
- /** @param {object} values @param {string[]} args - [file, from, to] */
71
- export async function runBatchCommand(values, args) {
89
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
90
+ export async function runBatchCommand(ctx) {
91
+ const { runtime } = ctx.deps;
72
92
  writeJSON(
73
- loadTrace(args[0]).batch(parseInt(args[1], 10), parseInt(args[2], 10)),
74
- values,
93
+ runtime,
94
+ loadTrace(runtime, ctx.args.file).batch(
95
+ parseInt(ctx.args.from, 10),
96
+ parseInt(ctx.args.to, 10),
97
+ ),
98
+ ctx.options,
75
99
  );
100
+ return { ok: true };
76
101
  }
77
102
 
78
- /** @param {object} values @param {string[]} args - [file, N?] */
79
- export async function runHeadCommand(values, args) {
80
- const n = args[1] ? parseInt(args[1], 10) : 10;
81
- writeJSON(loadTrace(args[0]).head(n), values);
103
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
104
+ export async function runHeadCommand(ctx) {
105
+ const { runtime } = ctx.deps;
106
+ const n = ctx.args.n ? parseInt(ctx.args.n, 10) : 10;
107
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).head(n), ctx.options);
108
+ return { ok: true };
82
109
  }
83
110
 
84
- /** @param {object} values @param {string[]} args - [file, N?] */
85
- export async function runTailCommand(values, args) {
86
- const n = args[1] ? parseInt(args[1], 10) : 10;
87
- writeJSON(loadTrace(args[0]).tail(n), values);
111
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
112
+ export async function runTailCommand(ctx) {
113
+ const { runtime } = ctx.deps;
114
+ const n = ctx.args.n ? parseInt(ctx.args.n, 10) : 10;
115
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).tail(n), ctx.options);
116
+ return { ok: true };
88
117
  }
89
118
 
90
- /** @param {object} values @param {string[]} args - [file, pattern] */
91
- export async function runSearchCommand(values, args) {
92
- const limit = values.limit ? parseInt(values.limit, 10) : 50;
93
- const context = values.context ? parseInt(values.context, 10) : 0;
94
- const full = values.full ?? false;
119
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
120
+ export async function runSearchCommand(ctx) {
121
+ const { runtime } = ctx.deps;
122
+ const limit = ctx.options.limit ? parseInt(ctx.options.limit, 10) : 50;
123
+ const context = ctx.options.context ? parseInt(ctx.options.context, 10) : 0;
124
+ const full = ctx.options.full ?? false;
95
125
  writeJSON(
96
- loadTrace(args[0]).search(args[1], { limit, context, full }),
97
- values,
126
+ runtime,
127
+ loadTrace(runtime, ctx.args.file).search(ctx.args.pattern, {
128
+ limit,
129
+ context,
130
+ full,
131
+ }),
132
+ ctx.options,
98
133
  );
134
+ return { ok: true };
99
135
  }
100
136
 
101
- /** @param {object} values @param {string[]} args - [file] */
102
- export async function runToolsCommand(values, args) {
103
- writeJSON(loadTrace(args[0]).toolFrequency(), values);
137
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
138
+ export async function runToolsCommand(ctx) {
139
+ const { runtime } = ctx.deps;
140
+ writeJSON(
141
+ runtime,
142
+ loadTrace(runtime, ctx.args.file).toolFrequency(),
143
+ ctx.options,
144
+ );
145
+ return { ok: true };
104
146
  }
105
147
 
106
- /** @param {object} values @param {string[]} args - [file, name] */
107
- export async function runToolCommand(values, args) {
108
- writeJSON(loadTrace(args[0]).tool(args[1]), values);
148
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
149
+ export async function runToolCommand(ctx) {
150
+ const { runtime } = ctx.deps;
151
+ writeJSON(
152
+ runtime,
153
+ loadTrace(runtime, ctx.args.file).tool(ctx.args.name),
154
+ ctx.options,
155
+ );
156
+ return { ok: true };
109
157
  }
110
158
 
111
- /** @param {object} values @param {string[]} args - [file] */
112
- export async function runErrorsCommand(values, args) {
113
- writeJSON(loadTrace(args[0]).errors(), values);
159
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
160
+ export async function runErrorsCommand(ctx) {
161
+ const { runtime } = ctx.deps;
162
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).errors(), ctx.options);
163
+ return { ok: true };
114
164
  }
115
165
 
116
- /** @param {object} values @param {string[]} args - [file] */
117
- export async function runReasoningCommand(values, args) {
118
- const from = values.from ? parseInt(values.from, 10) : undefined;
119
- const to = values.to ? parseInt(values.to, 10) : undefined;
120
- writeJSON(loadTrace(args[0]).reasoning({ from, to }), values);
166
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
167
+ export async function runReasoningCommand(ctx) {
168
+ const { runtime } = ctx.deps;
169
+ const from = ctx.options.from ? parseInt(ctx.options.from, 10) : undefined;
170
+ const to = ctx.options.to ? parseInt(ctx.options.to, 10) : undefined;
171
+ writeJSON(
172
+ runtime,
173
+ loadTrace(runtime, ctx.args.file).reasoning({ from, to }),
174
+ ctx.options,
175
+ );
176
+ return { ok: true };
121
177
  }
122
178
 
123
- /** @param {object} values @param {string[]} args - [file] */
124
- export async function runTimelineCommand(values, args) {
125
- const lines = loadTrace(args[0]).timeline();
126
- process.stdout.write(lines.join("\n") + "\n");
179
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
180
+ export async function runTimelineCommand(ctx) {
181
+ const { runtime } = ctx.deps;
182
+ const lines = loadTrace(runtime, ctx.args.file).timeline();
183
+ runtime.proc.stdout.write(lines.join("\n") + "\n");
184
+ return { ok: true };
127
185
  }
128
186
 
129
- /** @param {object} values @param {string[]} args - [file] */
130
- export async function runStatsCommand(values, args) {
131
- writeJSON(loadTrace(args[0]).stats(), values);
187
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
188
+ export async function runStatsCommand(ctx) {
189
+ const { runtime } = ctx.deps;
190
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).stats(), ctx.options);
191
+ return { ok: true };
132
192
  }
133
193
 
134
- /** @param {object} values @param {string[]} args - [file] */
135
- export async function runInitCommand(values, args) {
136
- writeJSON(loadTrace(args[0]).init(), values);
194
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
195
+ export async function runInitCommand(ctx) {
196
+ const { runtime } = ctx.deps;
197
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).init(), ctx.options);
198
+ return { ok: true };
137
199
  }
138
200
 
139
- /** @param {object} values @param {string[]} args - [file, index] */
140
- export async function runTurnCommand(values, args) {
141
- writeJSON(loadTrace(args[0]).turn(parseInt(args[1], 10)), values);
201
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
202
+ export async function runTurnCommand(ctx) {
203
+ const { runtime } = ctx.deps;
204
+ writeJSON(
205
+ runtime,
206
+ loadTrace(runtime, ctx.args.file).turn(parseInt(ctx.args.index, 10)),
207
+ ctx.options,
208
+ );
209
+ return { ok: true };
142
210
  }
143
211
 
144
- /** @param {object} values @param {string[]} args - [file] */
145
- export async function runFilterCommand(values, args) {
212
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
213
+ export async function runFilterCommand(ctx) {
214
+ const { runtime } = ctx.deps;
146
215
  const opts = {};
147
- if (values.role) opts.role = values.role;
148
- if (values.tool) opts.toolName = values.tool;
149
- if (values.error) opts.isError = true;
150
- writeJSON(loadTrace(args[0]).filter(opts), values);
216
+ if (ctx.options.role) opts.role = ctx.options.role;
217
+ if (ctx.options.tool) opts.toolName = ctx.options.tool;
218
+ if (ctx.options.error) opts.isError = true;
219
+ writeJSON(
220
+ runtime,
221
+ loadTrace(runtime, ctx.args.file).filter(opts),
222
+ ctx.options,
223
+ );
224
+ return { ok: true };
151
225
  }
152
226
 
153
227
  // --- Split command ---
@@ -168,24 +242,24 @@ const STRUCTURAL_ROLES = new Set(["agent", "supervisor", "facilitator"]);
168
242
  * `staff-engineer`) classify as agents with the profile in the participant
169
243
  * slot. Orchestrator events and invalid source names are dropped.
170
244
  *
171
- * @param {object} values - Parsed option values
172
- * @param {string[]} args - [file]
245
+ * @param {import("@forwardimpact/libcli").InvocationContext} ctx
173
246
  */
174
- export async function runSplitCommand(values, args) {
175
- const file = args[0];
176
- if (!file) throw new Error("split: missing input file");
247
+ export async function runSplitCommand(ctx) {
248
+ const { runtime } = ctx.deps;
249
+ const file = ctx.args.file;
250
+ if (!file) return { ok: false, code: 1, error: "split: missing input file" };
177
251
 
178
- const mode = values.mode;
179
- if (!mode) throw new Error("split: --mode is required");
252
+ const mode = ctx.options.mode;
253
+ if (!mode) return { ok: false, code: 1, error: "split: --mode is required" };
180
254
  if (!["run", "supervise", "facilitate"].includes(mode)) {
181
- throw new Error(`split: invalid --mode "${mode}"`);
255
+ return { ok: false, code: 1, error: `split: invalid --mode "${mode}"` };
182
256
  }
183
257
 
184
- const caseId = values.case ?? "default";
185
- const outputDir = values["output-dir"] || dirname(file);
186
- mkdirSync(outputDir, { recursive: true });
258
+ const caseId = ctx.options.case ?? "default";
259
+ const outputDir = ctx.options["output-dir"] || dirname(file);
260
+ runtime.fsSync.mkdirSync(outputDir, { recursive: true });
187
261
 
188
- const buckets = parseBuckets(readFileSync(file, "utf8"));
262
+ const buckets = parseBuckets(runtime.fsSync.readFileSync(file, "utf8"));
189
263
 
190
264
  for (const [source, lines] of buckets.entries()) {
191
265
  if (!VALID_SOURCE_NAME.test(source)) continue;
@@ -194,8 +268,9 @@ export async function runSplitCommand(values, args) {
194
268
  outputDir,
195
269
  `trace--${caseId}--${source}.${role}.ndjson`,
196
270
  );
197
- writeFileSync(outPath, lines.join("\n") + "\n");
271
+ runtime.fsSync.writeFileSync(outPath, lines.join("\n") + "\n");
198
272
  }
273
+ return { ok: true };
199
274
  }
200
275
 
201
276
  /**
@@ -234,11 +309,12 @@ function parseBuckets(content) {
234
309
 
235
310
  /**
236
311
  * Load a trace file. Supports structured JSON and raw NDJSON.
312
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
237
313
  * @param {string} file
238
314
  * @returns {import("../trace-query.js").TraceQuery}
239
315
  */
240
- function loadTrace(file) {
241
- const content = readFileSync(file, "utf8");
316
+ function loadTrace(runtime, file) {
317
+ const content = runtime.fsSync.readFileSync(file, "utf8");
242
318
 
243
319
  try {
244
320
  const parsed = JSON.parse(content);
@@ -260,10 +336,11 @@ function loadTrace(file) {
260
336
  * Write JSON output to stdout. By default strips `thinking.signature`
261
337
  * base64 blobs from the payload so they don't dominate terminal output;
262
338
  * pass `--signatures` (surfaced as `values.signatures`) to keep them.
339
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
263
340
  * @param {*} data
264
341
  * @param {object} [values]
265
342
  */
266
- function writeJSON(data, values = {}) {
343
+ function writeJSON(runtime, data, values = {}) {
267
344
  const output = values.signatures ? data : stripSignatures(data);
268
- process.stdout.write(JSON.stringify(output, null, 2) + "\n");
345
+ runtime.proc.stdout.write(JSON.stringify(output, null, 2) + "\n");
269
346
  }
@@ -27,6 +27,7 @@ import {
27
27
  RECESS_DESC,
28
28
  requestForCommentTool,
29
29
  requireNoPendingAsks,
30
+ requireNoUnprocessedInbox,
30
31
  } from "./orchestration-toolkit.js";
31
32
 
32
33
  /** System prompt for discuss-mode agent participants. L0 mechanics only per COALIGNED. */
@@ -63,6 +64,26 @@ const RESUME_TRIGGER_SCHEMA = z.discriminatedUnion("kind", [
63
64
  export function createDiscussLeadToolServer(ctx) {
64
65
  return orchestrationServer([
65
66
  ...baseTools(ctx, { from: "lead", defaultTo: undefined, broadcast: true }),
67
+ tool(
68
+ "Acknowledge",
69
+ "Post a brief message directly to the discussion thread. Use when responding to a human follow-up or providing a status update while participants are working.",
70
+ {
71
+ message: z.string().describe("Message to post on the thread"),
72
+ },
73
+ async ({ message }) => {
74
+ const seq =
75
+ ctx.emitter?.emit({ kind: "ack", body: message, agent: "lead" }) ??
76
+ -1;
77
+ ctx.replies.push({
78
+ body: message,
79
+ agent: "lead",
80
+ kind: "ack",
81
+ seq,
82
+ ...(ctx.discussionId && { thread_id: ctx.discussionId }),
83
+ });
84
+ return { content: [{ type: "text", text: "Posted." }] };
85
+ },
86
+ ),
66
87
  tool(
67
88
  "Recess",
68
89
  RECESS_DESC,
@@ -82,11 +103,36 @@ export function createDiscussLeadToolServer(ctx) {
82
103
  ]);
83
104
  }
84
105
 
106
+ const ACKNOWLEDGE_DESC =
107
+ "Acknowledge an Ask before starting work. Posts a visible comment on the thread. Does not discharge the Ask — you still owe an Answer.";
108
+
85
109
  /** Discuss-mode agent tool server. */
86
110
  export function createDiscussAgentToolServer(ctx, { from }) {
87
111
  return orchestrationServer([
88
112
  ...baseTools(ctx, { from, defaultTo: "lead", broadcast: true }),
89
113
  requestForCommentTool(ctx),
114
+ tool(
115
+ "Acknowledge",
116
+ ACKNOWLEDGE_DESC,
117
+ {
118
+ message: z
119
+ .string()
120
+ .describe("Brief acknowledgement to post on the thread"),
121
+ askId: z.number().optional().describe("The ask being acknowledged"),
122
+ },
123
+ async ({ message }) => {
124
+ const seq =
125
+ ctx.emitter?.emit({ kind: "ack", body: message, agent: from }) ?? -1;
126
+ ctx.replies.push({
127
+ body: message,
128
+ agent: from,
129
+ kind: "ack",
130
+ seq,
131
+ ...(ctx.discussionId && { thread_id: ctx.discussionId }),
132
+ });
133
+ return { content: [{ type: "text", text: "Acknowledged." }] };
134
+ },
135
+ ),
90
136
  ]);
91
137
  }
92
138
 
@@ -99,7 +145,7 @@ export function createDiscussAgentToolServer(ctx, { from }) {
99
145
  */
100
146
  export function createRecessHandler(ctx) {
101
147
  return async ({ reason, trigger }) => {
102
- const guard = requireNoPendingAsks(ctx);
148
+ const guard = requireNoPendingAsks(ctx) ?? requireNoUnprocessedInbox(ctx);
103
149
  if (guard) return guard;
104
150
  ctx.recessTrigger = trigger;
105
151
  concludeSession(ctx, {
@@ -114,7 +160,7 @@ export function createRecessHandler(ctx) {
114
160
  /** Adjourn handler — ends the discussion with a verdict. */
115
161
  export function createAdjournHandler(ctx) {
116
162
  return async ({ verdict, summary, outcome }) => {
117
- const guard = requireNoPendingAsks(ctx);
163
+ const guard = requireNoPendingAsks(ctx) ?? requireNoUnprocessedInbox(ctx);
118
164
  if (guard) return guard;
119
165
  if (outcome !== undefined) ctx.outcome = outcome;
120
166
  concludeSession(ctx, {
package/src/discusser.js CHANGED
@@ -17,6 +17,8 @@ import { Writable } from "node:stream";
17
17
  import { resolve } from "node:path";
18
18
 
19
19
  import { createAgentRunner } from "./agent-runner.js";
20
+ import { InboxPoller } from "./inbox-poller.js";
21
+ import { ReplyEmitter } from "./reply-emitter.js";
20
22
  import { composeSystemPrompt } from "./profile-prompt.js";
21
23
  import { SequenceCounter } from "./sequence-counter.js";
22
24
  import { createMessageBus } from "./message-bus.js";
@@ -40,6 +42,7 @@ export const DISCUSS_SYSTEM_PROMPT =
40
42
  "Answers arrive on your next turn as `[answer#N] <participant>: <text>` in your inbox.\n" +
41
43
  "End your turn while Asks are pending. The system resumes you when answers arrive.\n" +
42
44
  "Multiple `Ask` calls in one turn run participants in parallel.\n" +
45
+ "Use `Acknowledge` to post a brief message directly to the discussion thread — use it to respond to human follow-ups or give status updates while participants are working.\n" +
43
46
  "End the discussion by calling `Adjourn` with a verdict and summary, or `Recess` only to wait on an external reply or duration.";
44
47
 
45
48
  /**
@@ -79,7 +82,15 @@ export class Discusser {
79
82
  * @param {string|null} [deps.discussionId]
80
83
  * @param {SequenceCounter} [deps.counter]
81
84
  */
82
- constructor({ loop, ctx, output, discussionId, counter, redactor }) {
85
+ constructor({
86
+ loop,
87
+ ctx,
88
+ output,
89
+ discussionId,
90
+ counter,
91
+ redactor,
92
+ inboxPoller,
93
+ }) {
83
94
  if (!loop) throw new Error("loop is required");
84
95
  if (!ctx) throw new Error("ctx is required");
85
96
  if (!output) throw new Error("output is required");
@@ -90,6 +101,7 @@ export class Discusser {
90
101
  this.discussionId = discussionId ?? null;
91
102
  this.counter = counter ?? new SequenceCounter();
92
103
  this.redactor = redactor;
104
+ this.inboxPoller = inboxPoller ?? null;
93
105
  }
94
106
 
95
107
  /**
@@ -150,6 +162,7 @@ export class Discusser {
150
162
  ...(this.ctx.rfcs?.length && { rfcs: this.ctx.rfcs }),
151
163
  ...(this.ctx.recessTrigger && { trigger: this.ctx.recessTrigger }),
152
164
  ...(this.discussionId && { discussion_id: this.discussionId }),
165
+ lastActedSeq: this.inboxPoller?.lastActedSeq ?? -1,
153
166
  };
154
167
  this.output.write(
155
168
  JSON.stringify(
@@ -184,10 +197,14 @@ export class Discusser {
184
197
  * @param {function} deps.query
185
198
  * @param {import("stream").Writable} deps.output
186
199
  * @param {number} [deps.maxTurns]
200
+ * @param {number} [deps.maxLeadTurns]
187
201
  * @param {string} [deps.leadCwd]
188
202
  * @param {string} [deps.profilesDir]
189
203
  * @param {string} [deps.taskAmend]
190
204
  * @param {object} deps.redactor
205
+ * @param {string|null} [deps.callbackUrl]
206
+ * @param {string|null} [deps.inboxUrl]
207
+ * @param {string|null} [deps.correlationId]
191
208
  * @returns {Discusser}
192
209
  */
193
210
  // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: factory wires N runners + resume hydration paths
@@ -201,10 +218,14 @@ export function createDiscusser({
201
218
  query,
202
219
  output,
203
220
  maxTurns,
221
+ maxLeadTurns,
204
222
  leadCwd,
205
223
  profilesDir,
206
224
  taskAmend,
207
225
  redactor,
226
+ callbackUrl,
227
+ inboxUrl,
228
+ correlationId,
208
229
  }) {
209
230
  if (!redactor) throw new Error("redactor is required");
210
231
  const resolvedLeadCwd = resolve(leadCwd ?? ".");
@@ -236,13 +257,34 @@ export function createDiscusser({
236
257
  participants: ["lead", ...resolvedConfigs.map((a) => a.name)],
237
258
  });
238
259
 
260
+ const loopCounter = new SequenceCounter();
261
+ const emitter = new ReplyEmitter({
262
+ callbackUrl: callbackUrl ?? null,
263
+ correlationId: correlationId ?? null,
264
+ counter: loopCounter,
265
+ });
266
+ ctx.emitter = emitter;
267
+
268
+ const abortController = new AbortController();
269
+ const inboxPoller = inboxUrl
270
+ ? new InboxPoller({
271
+ inboxUrl,
272
+ messageBus,
273
+ leadName: "lead",
274
+ signal: abortController.signal,
275
+ })
276
+ : null;
277
+
239
278
  // Intercept answers routed to the lead — each becomes a discussion reply.
240
279
  const originalAnswer = messageBus.answer.bind(messageBus);
241
280
  messageBus.answer = (from, to, text, askId) => {
242
281
  if (to === "lead" && from !== "@orchestrator") {
282
+ const seq = emitter.emit({ kind: "reply", body: text, agent: from });
243
283
  ctx.replies.push({
244
284
  body: text,
245
285
  agent: from,
286
+ kind: "reply",
287
+ seq,
246
288
  ...(ctx.discussionId && { thread_id: ctx.discussionId }),
247
289
  });
248
290
  }
@@ -327,10 +369,14 @@ export function createDiscusser({
327
369
  output,
328
370
  leadName: "lead",
329
371
  mode: "discussion",
372
+ maxLeadTurns: maxLeadTurns ?? undefined,
330
373
  ctx,
331
374
  taskAmend,
332
375
  redactor,
376
+ inboxPoller,
377
+ abortController,
333
378
  });
379
+ loop.counter = loopCounter;
334
380
 
335
381
  discusser = new Discusser({
336
382
  loop,
@@ -338,7 +384,8 @@ export function createDiscusser({
338
384
  output,
339
385
  discussionId: discussionId ?? null,
340
386
  redactor,
341
- counter: loop.counter,
387
+ counter: loopCounter,
388
+ inboxPoller,
342
389
  });
343
390
  return discusser;
344
391
  }