npm - @forwardimpact/libeval - Versions diffs - 0.1.42 → 0.1.44 - Mend

@forwardimpact/libeval 0.1.42 → 0.1.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/bin/fit-benchmark.js +2 -2
package/bin/fit-eval.js +103 -22
package/bin/fit-trace.js +14 -0
package/package.json +1 -1
package/src/commands/benchmark-run.js +1 -1
package/src/commands/by-discussion.js +84 -0
package/src/commands/callback.js +104 -0
package/src/commands/discuss.js +116 -0
package/src/commands/facilitate.js +16 -8
package/src/commands/supervise.js +4 -4
package/src/discuss-tools.js +203 -0
package/src/discusser.js +332 -0
package/src/facilitator.js +40 -334
package/src/index.js +14 -0
package/src/orchestration-loop.js +369 -0
package/src/redaction.js +10 -0
package/src/render/orchestrator-filter.js +1 -0
package/src/supervisor.js +17 -5
package/src/trace-collector.js +4 -0

package/src/orchestration-loop.js ADDED Viewed

@@ -0,0 +1,369 @@
+/**
+ * OrchestrationLoop — N agent sessions + one lead LLM session. The
+ * Ask/Answer contract is enforced at turn boundaries via checkPendingAsk:
+ * one synthetic reminder, then a `protocol_violation` event plus a
+ * null-answer injection so the session advances instead of deadlocking.
+ *
+ * Mode-specific concepts (Conclude vs. Adjourn/Recess, lead role name,
+ * system prompts, tool sets) live in mode-specific wrappers
+ * (`Facilitator` for facilitate mode, `Discusser` for discuss mode). This
+ * file owns only the loop itself.
+ */
+import { SequenceCounter } from "./sequence-counter.js";
+import {
+  createOrchestrationContext,
+  checkPendingAsk,
+} from "./orchestration-toolkit.js";
+import { createAsyncQueue, formatMessages } from "./orchestrator-helpers.js";
+/**
+ * Orchestrate N agent sessions coordinated by a single lead LLM session.
+ * Mode-neutral. Callers parameterise the lead participant's name and the
+ * `protocol_violation` mode tag so the same loop powers both facilitate
+ * and discuss modes without either knowing about the other.
+ */
+export class OrchestrationLoop {
+  /**
+   * @param {object} deps
+   * @param {import("./agent-runner.js").AgentRunner} deps.leadRunner
+   * @param {Array<{name: string, role: string, runner: import("./agent-runner.js").AgentRunner}>} deps.agents
+   * @param {import("./message-bus.js").MessageBus} deps.messageBus
+   * @param {import("stream").Writable} deps.output
+   * @param {string} [deps.leadName] - Canonical name of the lead participant on the messageBus (default "lead").
+   * @param {"facilitated"|"discussion"|"supervised"} [deps.mode] - Mode tag emitted on `protocol_violation` events.
+   * @param {number} [deps.maxTurns]
+   * @param {object} [deps.ctx]
+   * @param {object} [deps.eventQueue]
+   * @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
+   * @param {object} deps.redactor
+   */
+  constructor({
+    leadRunner,
+    agents,
+    messageBus,
+    output,
+    leadName,
+    mode,
+    maxTurns,
+    ctx,
+    eventQueue,
+    taskAmend,
+    redactor,
+  }) {
+    if (!redactor) throw new Error("redactor is required");
+    this.redactor = redactor;
+    this.leadRunner = leadRunner;
+    this.leadName = leadName ?? "lead";
+    this.mode = mode ?? "facilitated";
+    this.agents = agents;
+    this.messageBus = messageBus;
+    this.output = output;
+    this.maxTurns = maxTurns ?? 20;
+    this.ctx = ctx ?? createOrchestrationContext();
+    this.counter = new SequenceCounter();
+    this.eventQueue = eventQueue ?? createAsyncQueue();
+    this.leadTurns = 0;
+    this.taskAmend = taskAmend ?? null;
+    let resolve;
+    const promise = new Promise((r) => {
+      resolve = r;
+    });
+    this.concludePromise = promise;
+    this.concludeResolve = resolve;
+  }
+  /**
+   * Run the full orchestrated session.
+   * @param {string} task
+   * @returns {Promise<{success: boolean, turns: number}>}
+   */
+  async run(task) {
+    this.emitOrchestratorEvent({ type: "session_start" });
+    const initialTask = this.taskAmend ? `${task}\n\n${this.taskAmend}` : task;
+    // Launch agent loops first — they wait for messages via messageBus.
+    // This lets agents process Ask/Announce messages that arrive during
+    // the lead's initial run, rather than after it completes.
+    const agentPromises = this.agents.map((a) => this.#runAgent(a));
+    // Turn 0: lead receives the task
+    this.leadTurns++;
+    await this.leadRunner.run(initialTask);
+    // Handle redirect after turn 0
+    await this.#processRedirect();
+    if (this.ctx.concluded) {
+      // Lead concluded during its initial run. Let agents finish any
+      // in-progress work before returning — they may have received Ask/Answer
+      // messages and started processing concurrently.
+      this.concludeResolve();
+      await Promise.allSettled(agentPromises);
+      const success = this.ctx.verdict === "success";
+      this.emitSummary({
+        success,
+        verdict: this.ctx.verdict,
+        turns: this.leadTurns,
+        summary: this.ctx.summary,
+      });
+      return { success, turns: this.leadTurns };
+    }
+    // Abort agents promptly when the session concludes during the event loop
+    this.concludePromise.then(() => {
+      for (const agent of this.agents) {
+        agent.runner.currentAbortController?.abort();
+      }
+    });
+    // Concurrent phase: lead event loop + already-running agent loops
+    const leadPromise = this.#leadLoop();
+    try {
+      await Promise.all([...agentPromises, leadPromise]);
+    } catch (err) {
+      for (const agent of this.agents) {
+        agent.runner.currentAbortController?.abort();
+      }
+      this.leadRunner.currentAbortController?.abort();
+      throw err;
+    }
+    const success = this.ctx.concluded && this.ctx.verdict === "success";
+    const result = {
+      success,
+      turns: this.leadTurns,
+    };
+    this.emitSummary({
+      success,
+      verdict: this.ctx.verdict,
+      turns: result.turns,
+      summary: this.ctx.summary,
+    });
+    return result;
+  }
+  #checkAsk(name) {
+    return checkPendingAsk({
+      ctx: this.ctx,
+      messageBus: this.messageBus,
+      addresseeName: name,
+      mode: this.mode,
+      emitViolation: (e) => this.emitOrchestratorEvent(e),
+    });
+  }
+  async #enforcePendingAsk(agent) {
+    if (this.#checkAsk(agent.name) !== "recheck") return;
+    if (this.ctx.concluded) return;
+    const reminders = this.messageBus.drain(agent.name);
+    if (reminders.length === 0) return;
+    await agent.runner.resume(formatMessages(reminders));
+    if (this.ctx.concluded) return;
+    this.#checkAsk(agent.name);
+  }
+  /**
+   * Agent outer loop — waits for messages, runs/resumes the agent.
+   * @param {{name: string, role: string, runner: import("./agent-runner.js").AgentRunner}} agent
+   */
+  async #runAgent(agent) {
+    // Wait for first message (lazy start)
+    await Promise.race([
+      this.messageBus.waitForMessages(agent.name),
+      this.concludePromise,
+    ]);
+    if (this.ctx.concluded) return;
+    let messages = this.messageBus.drain(agent.name);
+    if (messages.length === 0) return;
+    this.emitOrchestratorEvent({ type: "agent_start", agent: agent.name });
+    await agent.runner.run(formatMessages(messages));
+    if (await this.#settleAgentTurn(agent)) return;
+    // Loop: check for new messages, resume if any
+    while (!this.ctx.concluded) {
+      messages = await this.#awaitAgentMessages(agent.name);
+      if (messages.length === 0) break;
+      await agent.runner.resume(formatMessages(messages));
+      if (await this.#settleAgentTurn(agent)) break;
+    }
+  }
+  /**
+   * Enforce pending-ask and emit turn_complete. Returns true when the
+   * session has concluded and the caller should stop.
+   */
+  async #settleAgentTurn(agent) {
+    if (this.ctx.concluded) return true;
+    await this.#enforcePendingAsk(agent);
+    if (this.ctx.concluded) return true;
+    this.eventQueue.enqueue({
+      type: "lifecycle",
+      agent: agent.name,
+      status: "turn_complete",
+    });
+    return false;
+  }
+  /**
+   * Wait for messages addressed to `name`, returning an empty array when
+   * the session concludes first.
+   */
+  async #awaitAgentMessages(name) {
+    const messages = this.messageBus.drain(name);
+    if (messages.length > 0) return messages;
+    await Promise.race([
+      this.messageBus.waitForMessages(name),
+      this.concludePromise,
+    ]);
+    if (this.ctx.concluded) return [];
+    return this.messageBus.drain(name);
+  }
+  /**
+   * Lead event loop — only runs when input arrives.
+   */
+  async #leadLoop() {
+    while (!this.ctx.concluded) {
+      const event = await this.eventQueue.dequeue();
+      if (this.ctx.concluded || event === null) break;
+      await this.#handleEvent(event);
+    }
+  }
+  async #handleEvent(event) {
+    switch (event.type) {
+      case "messages":
+      case "lifecycle": {
+        const msgs = this.messageBus.drain(this.leadName);
+        if (msgs.length === 0) break;
+        this.leadTurns++;
+        await this.leadRunner.resume(formatMessages(msgs));
+        await this.#processRedirect();
+        if (!this.ctx.concluded) await this.#enforceLeadPendingAsk();
+        break;
+      }
+    }
+    if (this.ctx.concluded) {
+      this.concludeResolve();
+      this.eventQueue.close();
+    }
+  }
+  async #enforceLeadPendingAsk() {
+    if (this.#checkAsk(this.leadName) !== "recheck") return;
+    if (this.ctx.concluded) return;
+    const reminders = this.messageBus.drain(this.leadName);
+    if (reminders.length === 0) return;
+    this.leadTurns++;
+    await this.leadRunner.resume(formatMessages(reminders));
+    await this.#processRedirect();
+    if (this.ctx.concluded) return;
+    this.#checkAsk(this.leadName);
+  }
+  /**
+   * Process a pending redirect after a lead turn.
+   */
+  async #processRedirect() {
+    if (!this.ctx.redirect) return;
+    const redirect = this.ctx.redirect;
+    this.ctx.redirect = null;
+    this.emitOrchestratorEvent({
+      type: "redirect",
+      to: redirect.to,
+    });
+    if (redirect.to === "all") {
+      // Abort all agents and deliver redirect via broadcast
+      for (const agent of this.agents) {
+        agent.runner.currentAbortController?.abort();
+      }
+      this.messageBus.announce(this.leadName, redirect.message);
+    } else if (redirect.to) {
+      // Abort specific agent and deliver via direct message
+      const target = this.agents.find((a) => a.name === redirect.to);
+      if (target) {
+        target.runner.currentAbortController?.abort();
+      }
+      this.messageBus.direct(this.leadName, redirect.to, redirect.message);
+    }
+  }
+  /** Return the last assistant text block from a runner's buffer, or the fallback if none exists. */
+  extractLastText(runner, fallback) {
+    const lines = runner.buffer;
+    for (let i = lines.length - 1; i >= 0; i--) {
+      const event = JSON.parse(lines[i]);
+      if (event.type !== "assistant") continue;
+      const content = event.message?.content ?? event.content;
+      if (!Array.isArray(content)) continue;
+      for (let j = content.length - 1; j >= 0; j--) {
+        if (content[j].type === "text" && content[j].text) {
+          return content[j].text;
+        }
+      }
+    }
+    return fallback;
+  }
+  /**
+   * Emit a single NDJSON line tagged with source and seq.
+   * @param {string} source - Participant name
+   * @param {string} line - Raw NDJSON line
+   */
+  emitLine(source, line) {
+    const event = JSON.parse(line);
+    this.output.write(
+      JSON.stringify(
+        this.redactor.redactValue({
+          source,
+          seq: this.counter.next(),
+          event,
+        }),
+      ) + "\n",
+    );
+  }
+  /**
+   * @param {{type: string}} event
+   */
+  emitOrchestratorEvent(event) {
+    this.output.write(
+      JSON.stringify(
+        this.redactor.redactValue({
+          source: "orchestrator",
+          seq: this.counter.next(),
+          event,
+        }),
+      ) + "\n",
+    );
+  }
+  /**
+   * @param {{success: boolean, verdict?: string|null, turns: number, summary?: string}} result
+   */
+  emitSummary(result) {
+    this.output.write(
+      JSON.stringify(
+        this.redactor.redactValue({
+          source: "orchestrator",
+          seq: this.counter.next(),
+          event: {
+            type: "summary",
+            success: result.success,
+            ...(result.verdict && { verdict: result.verdict }),
+            turns: result.turns,
+            ...(result.summary && { summary: result.summary }),
+          },
+        }),
+      ) + "\n",
+    );
+  }
+}

package/src/redaction.js CHANGED Viewed

@@ -10,8 +10,18 @@
 export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
   "ANTHROPIC_API_KEY",
+  "AWS_ACCESS_KEY_ID",
+  "AWS_SECRET_ACCESS_KEY",
+  "DATABASE_PASSWORD",
   "GH_TOKEN",
   "GITHUB_TOKEN",
+  "MCP_TOKEN",
+  "MICROSOFT_APP_PASSWORD",
+  "PRODUCT_LANDMARK_TOKEN",
+  "SERVICE_SECRET",
+  "SUPABASE_ANON_KEY",
+  "SUPABASE_JWT_SECRET",
+  "SUPABASE_SERVICE_ROLE_KEY",
 ]);
 // Anchored prefixes per

package/src/render/orchestrator-filter.js CHANGED Viewed

@@ -13,6 +13,7 @@ const SUPPRESSED = new Set([
   "ask_answered",
   "redirect",
   "summary",
+  "meta",
 ]);
 /**

package/src/supervisor.js CHANGED Viewed

@@ -50,10 +50,17 @@ export const AGENT_SYSTEM_PROMPT =
  * Maximum number of mid-turn interventions allowed within a single agent turn.
  * Bounded so a looping supervisor exhausts its quota fast (observability) but
  * leaves headroom for legitimate "intervene, observe, intervene again" patterns.
- * The outer maxTurns budget still bounds overall runtime.
+ * The outer exchange budget still bounds overall runtime.
  */
 const MAX_INTERVENTIONS_PER_TURN = 5;
+/**
+ * Default cap on supervisor↔agent exchanges in a single run. Not exposed via
+ * CLI — `--max-turns` governs the per-runner invocation budget instead. When
+ * a `--max-exchanges` flag is added this becomes the default for that flag.
+ */
+const DEFAULT_MAX_EXCHANGES = 100;
 /** Orchestrate a relay loop between a supervisor LLM and an agent LLM with mid-turn review. */
 export class Supervisor {
   /**
@@ -485,7 +492,7 @@ const devNull = new Writable({
  * @param {string} [deps.model] - Default model for both runners.
  * @param {string} [deps.agentModel] - Agent model override (falls back to `model`).
  * @param {string} [deps.supervisorModel] - Supervisor model override (falls back to `model`).
- * @param {number} [deps.maxTurns]
+ * @param {number} [deps.maxTurns] - Per-runner invocation budget for both the supervisor and the agent (default 200; 0 = unlimited). Outer supervisor↔agent exchanges are bounded separately by `DEFAULT_MAX_EXCHANGES` (passes through to unlimited when `maxTurns === 0`).
  * @param {string[]} [deps.allowedTools]
  * @param {string[]} [deps.supervisorAllowedTools]
  * @param {string[]} [deps.supervisorDisallowedTools]
@@ -544,8 +551,13 @@ export function createSupervisor({
   const onLine = (line) => supervisor.emitLine(line);
-  const perInvocationTurns =
-    maxTurns === 0 ? 0 : Math.max(maxTurns ?? 100, 200);
+  // `maxTurns` is the per-runner invocation budget — matches `run` and
+  // `facilitate` semantics. The outer supervisor↔agent exchange loop is
+  // bounded separately by `DEFAULT_MAX_EXCHANGES`; when --max-exchanges is
+  // added it will become a parameter. `maxTurns === 0` propagates through
+  // to mean unlimited on both axes.
+  const perInvocationTurns = maxTurns ?? 200;
+  const exchangeBudget = maxTurns === 0 ? 0 : DEFAULT_MAX_EXCHANGES;
   const agentRunner = createAgentRunner({
     cwd: agentCwd,
@@ -595,7 +607,7 @@ export function createSupervisor({
     agentRunner,
     supervisorRunner,
     output,
-    maxTurns,
+    maxTurns: exchangeBudget,
     ctx,
     messageBus,
     taskAmend,

package/src/trace-collector.js CHANGED Viewed

@@ -40,6 +40,7 @@ export class TraceCollector {
    * Malformed lines are silently skipped.
    * @param {string} line - A single JSON line from stream-json output
    */
+  // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: NDJSON envelope unwrap + orchestrator/system/assistant/user dispatch
   addLine(line) {
     const trimmed = line.trim();
     if (!trimmed) return;
@@ -74,6 +75,9 @@ export class TraceCollector {
           ...(typeof event.turns === "number" && { turns: event.turns }),
         };
       }
+      if (event.type === "meta" && typeof event.discussion_id === "string") {
+        this.discussionId = event.discussion_id;
+      }
       return;
     }