npm - @intx/harness - Versions diffs - 0.1.2 - Mend

@intx/harness 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +38 -0
package/package.json +19 -0
package/src/config.ts +135 -0
package/src/connector-router.test.ts +718 -0
package/src/connector-router.ts +304 -0
package/src/deploy-tree.test.ts +51 -0
package/src/deploy-tree.ts +35 -0
package/src/harness.test.ts +1747 -0
package/src/harness.ts +379 -0
package/src/index.ts +31 -0
package/src/merge-tool-runners.test.ts +149 -0
package/src/merge-tool-runners.ts +90 -0
package/src/runtime-capabilities.test.ts +19 -0
package/src/runtime-capabilities.ts +22 -0
package/tsconfig.json +4 -0
package/tsconfig.tsbuildinfo +1 -0

package/src/harness.ts ADDED Viewed

@@ -0,0 +1,379 @@
+// Agent harness: supervisor, connector, and reactor wiring.
+//
+// The harness is the supervisor layer between the message transport and the
+// reactor. It watches the agent's INBOX, routes messages by thread, and
+// manages the connector lifecycle.
+//
+// Connector semantics:
+//   - Messages in the active connector thread are fetched, delivered to the
+//     reactor, and deleted from the INBOX (consumed).
+//   - All other inbound messages (replies to agent sends, unsolicited
+//     inter-agent mail) are delivered to the reactor and stay in the INBOX.
+//   - Outbound replies are sent by the harness when the reactor emits a
+//     connector.reply event, with correct threading headers.
+//
+// The INBOX is a delivery queue — the persistent conversation record lives in
+// the context store (git), not the mailbox.
+//
+// (ARCHITECTURE.md § Agent Harness, INFERENCE.md § Relationship to Harness)
+import { getLogger } from "@intx/log";
+import { createReactorAssembly, createDefaultDirector } from "@intx/inference";
+import type { ReactorEmittedEvent } from "@intx/inference";
+import {
+  InferenceSource,
+  applyInferenceSourceFields,
+  type BlobReader,
+  type ContextStore,
+  type InboundMessage,
+  type Unsubscribe,
+  type ReactorDirector,
+} from "@intx/types/runtime";
+import type { ErrorRecord } from "@intx/types/audit";
+import type { HarnessConfig } from "./config";
+import { validateConfig } from "./config";
+import { createConnectorRouter, type RouteDecision } from "./connector-router";
+import { type } from "arktype";
+const logger = getLogger(["interchange", "harness"]);
+export type Harness = {
+  /**
+   * Begin watching the agent's INBOX and start the reactor event loop.
+   * Must be called exactly once.
+   */
+  start(): void;
+  /**
+   * Initiate graceful shutdown: abort the reactor, unsubscribe from the
+   * transport watch, and flush state to the context store.
+   */
+  stop(): void;
+  /**
+   * Inject an already-fetched inbound message directly into the reactor.
+   * Useful for testing and for messages the harness receives through channels
+   * other than the INBOX watch.
+   */
+  deliver(message: InboundMessage): void;
+  /**
+   * Hot-swap the active inference source. Takes effect on the next
+   * inference call — in-flight calls continue with the previous source.
+   */
+  setSource(source: InferenceSource): void;
+  /**
+   * Read-only blob reader backed by this harness's context store. Pass it to
+   * the tool factory (e.g. `createPosixTools({ blobReader })`) so the agent
+   * can resolve `tool-output:///{callId}` URIs through the same store the
+   * reactor commits to.
+   */
+  readonly blobReader: BlobReader;
+};
+export function createHarness(config: HarnessConfig): Harness {
+  validateConfig(config);
+  const { transport, storage, source, tools, onEvent } = config;
+  let director: ReactorDirector;
+  if (config.director !== undefined) {
+    director = config.director;
+  } else {
+    // The caller-supplied tools runner carries the full set of tool
+    // definitions the model should see; pass them through to the
+    // director as-is.
+    director = createDefaultDirector(
+      config.systemPrompt,
+      tools.definitions,
+      config.defaultDirectorPolicy ?? {},
+    );
+  }
+  const sessionId = crypto.randomUUID();
+  const auditStore = config.auditStore;
+  const accumulatedErrors: ErrorRecord[] = [];
+  let errorSeq = 0;
+  // -------------------------------------------------------------------------
+  // Connector state: track which thread(s) this reactor owns.
+  // -------------------------------------------------------------------------
+  const connectorRouter = createConnectorRouter(
+    config.onConnectorStateChanged !== undefined
+      ? { onStateChanged: config.onConnectorStateChanged }
+      : undefined,
+  );
+  // Wrap the context store so load() restores connector state and the reactor's
+  // per-cycle writeMetadata picks up the live connector state via the underlying
+  // store's setConnectorState buffer (Phase 4: connector state rides along with
+  // metadata.json rather than being injected during commit).
+  const wrappedStore: ContextStore = {
+    async load(signal) {
+      const loaded = await storage.load(signal);
+      connectorRouter.restore(loaded.connectorState);
+      return loaded;
+    },
+    setConnectorState(state) {
+      storage.setConnectorState(state);
+    },
+    async commit(options, signal) {
+      return storage.commit(options, signal);
+    },
+    async branch(name, signal) {
+      return storage.branch(name, signal);
+    },
+    async log(limit, signal) {
+      return storage.log(limit, signal);
+    },
+    async readAt(hash, signal) {
+      return storage.readAt(hash, signal);
+    },
+    async writeBlob(key, bytes, contentType, signal) {
+      return storage.writeBlob(key, bytes, contentType, signal);
+    },
+    async readBlob(key, signal) {
+      return storage.readBlob(key, signal);
+    },
+    async writePrompt(turns, signal) {
+      return storage.writePrompt(turns, signal);
+    },
+    async writeResponse(turn, signal) {
+      return storage.writeResponse(turn, signal);
+    },
+    async writeManifest(records, signal) {
+      return storage.writeManifest(records, signal);
+    },
+    async writeTurns(turns, signal) {
+      return storage.writeTurns(turns, signal);
+    },
+    async writeMetadata(metadata, signal) {
+      // Flush the current in-memory connector state into the wrapped store's
+      // buffer so writeMetadata picks it up alongside pendingOperations and
+      // tokenUsage. This is the reactor's per-cycle moment to durably record
+      // connector thread state.
+      storage.setConnectorState(connectorRouter.snapshot());
+      return storage.writeMetadata(metadata, signal);
+    },
+    async readManifestHistory(limit, signal) {
+      return storage.readManifestHistory(limit, signal);
+    },
+  };
+  /**
+   * Delete a message from the INBOX after it has been delivered to the reactor.
+   */
+  async function consumeFromInbox(message: InboundMessage): Promise<void> {
+    try {
+      await transport.setFlags(message.ref, ["\\Deleted"]);
+      await transport.expunge("INBOX");
+    } catch (cause) {
+      logger.warn`Failed to consume message uid=${message.ref.uid} from INBOX: ${cause}`;
+    }
+  }
+  // -------------------------------------------------------------------------
+  // Event interception
+  // -------------------------------------------------------------------------
+  function handleEvent(event: ReactorEmittedEvent): void {
+    // Handle connector.reply: send the reply via transport.
+    if (event.type === "connector.reply") {
+      const replyContent = event.data.content;
+      void (async () => {
+        try {
+          const parts = connectorRouter.composeReply();
+          const receipt = await transport.send({
+            ...parts,
+            content: replyContent,
+            type: "conversation.message",
+          });
+          connectorRouter.onReplySent(receipt);
+        } catch (cause) {
+          logger.error`Failed to send connector reply: ${cause}`;
+        }
+      })();
+    }
+    // message.received is reactor-internal; do not forward to the caller.
+    if (event.type === "message.received") return;
+    if (event.type === "inference.error" && auditStore) {
+      accumulatedErrors.push({
+        source: "inference",
+        category: event.data.error.category,
+        message: event.data.error.message,
+        fatal: false,
+        timestamp: new Date().toISOString(),
+        sessionId,
+        seq: errorSeq++,
+        ...(event.data.error.statusCode !== undefined
+          ? { statusCode: event.data.error.statusCode }
+          : {}),
+      });
+    }
+    if (event.type === "reactor.error" && auditStore) {
+      accumulatedErrors.push({
+        source: "reactor",
+        category: "reactor_error",
+        message: event.data.error,
+        fatal: event.data.fatal,
+        timestamp: new Date().toISOString(),
+        sessionId,
+        seq: errorSeq++,
+      });
+    }
+    onEvent(event);
+  }
+  // -------------------------------------------------------------------------
+  // Reactor
+  // -------------------------------------------------------------------------
+  async function flushErrors(): Promise<void> {
+    if (accumulatedErrors.length === 0) return;
+    if (auditStore === undefined) return;
+    const count = accumulatedErrors.length;
+    await auditStore.commitErrors(accumulatedErrors.slice(0, count));
+    accumulatedErrors.splice(0, count);
+  }
+  // activeSource is held as a single mutable object whose reference is
+  // shared with the reactor's config (via the assembly helper). The reactor
+  // reads the source lazily at each inference call, so mutating the
+  // fields on this object hot-swaps credentials and model without
+  // restarting.
+  const activeSource: InferenceSource = { ...source };
+  const { reactor, blobReader } = createReactorAssembly({
+    sessionId,
+    director,
+    source: activeSource,
+    toolRunner: tools,
+    contextStore: wrappedStore,
+    onEvent: handleEvent,
+    ...(config.authorize !== undefined ? { authorize: config.authorize } : {}),
+    ...(config.auditStore !== undefined
+      ? { auditStore: config.auditStore }
+      : {}),
+    ...(config.beforeToolExtensions !== undefined
+      ? { beforeToolExtensions: config.beforeToolExtensions }
+      : {}),
+    // flushErrors only runs when audit is wired — preserves today's
+    // behavior where harness.ts only invokes flushErrors inside the
+    // auditCollector branch.
+    ...(config.auditStore !== undefined
+      ? { afterCheckpoint: flushErrors, onShutdown: flushErrors }
+      : {}),
+  });
+  let unsubscribe: Unsubscribe | null = null;
+  let started = false;
+  let stopped = false;
+  function start(): void {
+    if (started) {
+      throw new Error("Harness is already started");
+    }
+    started = true;
+    // Subscribe to the INBOX before starting the reactor so no messages are
+    // missed in the window between subscription and first watch callback.
+    unsubscribe = transport.watch("INBOX", (event) => {
+      if (stopped) return;
+      if (event.type !== "exists") {
+        return;
+      }
+      const ref = { uid: event.uid, mailbox: "INBOX" };
+      void (async () => {
+        let message;
+        try {
+          message = await transport.fetchFull(ref);
+        } catch (cause) {
+          logger.error`Failed to fetch message uid=${event.uid}: ${cause}`;
+          return;
+        }
+        if (stopped) return;
+        // Only connector-thread messages are consumed from the INBOX.
+        // Everything else is delivered to the reactor and stays in the
+        // INBOX so message tools can access it.
+        //
+        // route() can throw on malformed inbound headers (e.g. a From
+        // header that is not a valid RFC 5322 address). Surface the
+        // failure via the logger and fall through to passthrough so
+        // the message still reaches the reactor for inspection, but
+        // do not advance router state or consume.
+        let decision: RouteDecision;
+        try {
+          decision = connectorRouter.route(message);
+        } catch (cause) {
+          logger.warn`Connector router rejected message uid=${message.ref.uid} for agent ${config.address}: ${cause instanceof Error ? cause.message : String(cause)}`;
+          reactor.deliver(message);
+          return;
+        }
+        if (decision.kind === "passthrough") {
+          // Non-connector mail (replies to agent sends, unsolicited
+          // inter-agent mail, etc.). Deliver to reactor for notification
+          // but leave in INBOX for message tools.
+          reactor.deliver(message);
+          return;
+        }
+        // start or continue: commit router state synchronously before
+        // any await so that a concurrent watch callback fired during
+        // consumeFromInbox observes the updated state.
+        connectorRouter.commit(decision);
+        reactor.deliver(message);
+        await consumeFromInbox(message);
+      })();
+    });
+    reactor.start();
+  }
+  function stop(): void {
+    if (stopped) return;
+    stopped = true;
+    reactor.abort("user_disconnect");
+    if (unsubscribe !== null) {
+      unsubscribe();
+      unsubscribe = null;
+    }
+  }
+  function deliver(message: InboundMessage): void {
+    reactor.deliver(message);
+  }
+  function setSource(newSource: InferenceSource): void {
+    const parsed = InferenceSource(newSource);
+    if (parsed instanceof type.errors) {
+      throw new Error(`Invalid InferenceSource: ${parsed.summary}`);
+    }
+    // Mutate the shared activeSource object in place so the reactor's
+    // next inference call (which reads the source lazily through the
+    // same reference held by the assembly helper) observes the new
+    // fields. Defaults and capabilities rotate alongside the
+    // credentials.
+    applyInferenceSourceFields(activeSource, parsed);
+  }
+  return { start, stop, deliver, setSource, blobReader };
+}

package/src/index.ts ADDED Viewed

@@ -0,0 +1,31 @@
+export { createHarness } from "./harness";
+export type { Harness } from "./harness";
+export type { HarnessConfig } from "./config";
+export { validateConfig } from "./config";
+export type { BeforeToolExtension } from "@intx/types/runtime";
+export {
+  createDefaultDirector,
+  DefaultDirector,
+  type DefaultDirectorPolicy,
+} from "@intx/inference";
+export { mergeToolRunners } from "./merge-tool-runners";
+export { createHarnessRuntimeCapabilities } from "./runtime-capabilities";
+export type { HarnessRuntimeCapabilitiesOptions } from "./runtime-capabilities";
+export { readDeployTree } from "./deploy-tree";
+export type { DeployTree } from "./deploy-tree";
+export {
+  createConnectorRouter,
+  NoActiveConnectorThreadError,
+} from "./connector-router";
+export type {
+  ConnectorRouter,
+  ConnectorReplyParts,
+  ConnectorRouterOptions,
+  RouteDecision,
+} from "./connector-router";

package/src/merge-tool-runners.test.ts ADDED Viewed

@@ -0,0 +1,149 @@
+import { describe, test, expect } from "bun:test";
+import type {
+  ToolCall,
+  ToolDefinition,
+  ToolResult,
+  ToolRunner,
+} from "@intx/types/runtime";
+import { mergeToolRunners } from "./merge-tool-runners";
+function makeRunner(
+  label: string,
+  definitions: ToolDefinition[],
+): ToolRunner & { definitions: ToolDefinition[] } {
+  return {
+    definitions,
+    async run(call: ToolCall): Promise<ToolResult> {
+      return { callId: call.id, content: `${label}:${call.name}` };
+    },
+  };
+}
+const TOOL_DEF = (name: string): ToolDefinition => ({
+  name,
+  description: `Tool ${name}`,
+  inputSchema: { type: "object", properties: {} },
+});
+const signal = AbortSignal.timeout(5000);
+describe("mergeToolRunners dispatch", () => {
+  test("routes each call to the runner whose definitions declare it", async () => {
+    const a = makeRunner("a", [TOOL_DEF("read_file")]);
+    const b = makeRunner("b", [TOOL_DEF("mail_send")]);
+    const merged = mergeToolRunners([a, b]);
+    const r1 = await merged.run(
+      { id: "c1", name: "read_file", arguments: {} },
+      signal,
+    );
+    const r2 = await merged.run(
+      { id: "c2", name: "mail_send", arguments: {} },
+      signal,
+    );
+    expect(r1.content).toBe("a:read_file");
+    expect(r2.content).toBe("b:mail_send");
+  });
+  test("returns Unknown tool error for a name not declared by any runner", async () => {
+    const a = makeRunner("a", [TOOL_DEF("read_file")]);
+    const merged = mergeToolRunners([a]);
+    const result = await merged.run(
+      { id: "c1", name: "nonexistent", arguments: {} },
+      signal,
+    );
+    expect(result.callId).toBe("c1");
+    expect(result.isError).toBe(true);
+    expect(result.content).toEqual({ error: `Unknown tool: "nonexistent"` });
+  });
+  test("three-way merge dispatches to each runner", async () => {
+    const a = makeRunner("a", [TOOL_DEF("alpha")]);
+    const b = makeRunner("b", [TOOL_DEF("beta")]);
+    const c = makeRunner("c", [TOOL_DEF("gamma")]);
+    const merged = mergeToolRunners([a, b, c]);
+    const r1 = await merged.run(
+      { id: "1", name: "alpha", arguments: {} },
+      signal,
+    );
+    const r2 = await merged.run(
+      { id: "2", name: "beta", arguments: {} },
+      signal,
+    );
+    const r3 = await merged.run(
+      { id: "3", name: "gamma", arguments: {} },
+      signal,
+    );
+    expect(r1.content).toBe("a:alpha");
+    expect(r2.content).toBe("b:beta");
+    expect(r3.content).toBe("c:gamma");
+  });
+  test("combined definitions preserve input order and within-runner order", () => {
+    const a = makeRunner("a", [TOOL_DEF("a1"), TOOL_DEF("a2")]);
+    const b = makeRunner("b", [TOOL_DEF("b1"), TOOL_DEF("b2")]);
+    const merged = mergeToolRunners([a, b]);
+    expect(merged.definitions.map((d) => d.name)).toEqual([
+      "a1",
+      "a2",
+      "b1",
+      "b2",
+    ]);
+  });
+  test("forwards the caller's AbortSignal to the underlying runner", async () => {
+    let receivedSignal: AbortSignal | undefined;
+    const captureRunner: ToolRunner & { definitions: ToolDefinition[] } = {
+      definitions: [TOOL_DEF("capture")],
+      async run(call: ToolCall, sig: AbortSignal): Promise<ToolResult> {
+        receivedSignal = sig;
+        return { callId: call.id, content: "ok" };
+      },
+    };
+    const merged = mergeToolRunners([captureRunner]);
+    const ctl = new AbortController();
+    await merged.run({ id: "1", name: "capture", arguments: {} }, ctl.signal);
+    expect(receivedSignal).toBe(ctl.signal);
+  });
+});
+describe("mergeToolRunners collision detection", () => {
+  test("throws on duplicate name across two runners, naming both indices", () => {
+    const a = makeRunner("a", [TOOL_DEF("shared")]);
+    const b = makeRunner("b", [TOOL_DEF("shared")]);
+    expect(() => mergeToolRunners([a, b])).toThrow(
+      new Error(
+        'Tool name collision on "shared": registered by both runners[0] and runners[1]',
+      ),
+    );
+  });
+  test("throws when a single runner declares the same name twice", () => {
+    const a = makeRunner("a", [TOOL_DEF("dup"), TOOL_DEF("dup")]);
+    expect(() => mergeToolRunners([a])).toThrow(
+      new Error('Tool name collision on "dup": runners[0] declares it twice'),
+    );
+  });
+});
+describe("mergeToolRunners empty input", () => {
+  test("throws on an empty runners list", () => {
+    expect(() => mergeToolRunners([])).toThrow(
+      new Error("mergeToolRunners called with no runners"),
+    );
+  });
+});

package/src/merge-tool-runners.ts ADDED Viewed

@@ -0,0 +1,90 @@
+// Generic, mail-agnostic merger for tool runners. Takes an arbitrary list
+// of (runner + its declared definitions) and produces a single runner
+// whose definitions list is the concatenation, with name-collision
+// detection at construction.
+//
+// Used by hosts (e.g. the sidecar) that want to compose multiple tool
+// packages — mail, posix, lsp, third-party — into the single
+// `ToolRunner & { definitions }` shape the harness accepts.
+import type {
+  ToolCall,
+  ToolDefinition,
+  ToolResult,
+  ToolRunner,
+} from "@intx/types/runtime";
+/**
+ * Merge an arbitrary list of tool runners into a single runner with a
+ * combined `definitions` list.
+ *
+ * Ordering: the combined `definitions` array preserves the order of the
+ * input runners, and the order of definitions within each input runner.
+ * This is observable by the model through the prompt the director
+ * assembles from `definitions`, so callers that care about
+ * model-facing ordering control it by sequencing the input array.
+ *
+ * Collision: a tool name that appears in more than one input runner's
+ * `definitions` throws at construction time, naming the two source
+ * runner indices and the colliding name. A tool name that appears
+ * twice within a single runner's `definitions` is the runner's bug;
+ * this function surfaces it with a distinct message.
+ *
+ * Dispatch: a call whose name is not declared by any input runner
+ * resolves to a result with `isError: true` and object-shaped content
+ * `{ error: 'Unknown tool: "<name>"' }`. The object shape matches the
+ * per-handler error shape that ToolRunner implementations across the
+ * codebase use, so callers see one error shape regardless of whether
+ * the failure came from dispatch or from a runner's own handler.
+ *
+ * Empty input throws — `mergeToolRunners` with no runners is almost
+ * always a wiring bug. A caller that legitimately wants an empty
+ * runner constructs one explicitly at the call site. A runner whose
+ * own `definitions` array is empty is accepted: it contributes nothing
+ * to the merged dispatch and is treated as the caller's choice.
+ */
+export function mergeToolRunners(
+  runners: readonly (ToolRunner & { definitions: ToolDefinition[] })[],
+): ToolRunner & { definitions: ToolDefinition[] } {
+  if (runners.length === 0) {
+    throw new Error("mergeToolRunners called with no runners");
+  }
+  const definitions: ToolDefinition[] = [];
+  // Per declared name: which runner provides it (for dispatch) and at
+  // which input index (so collisions can name both sides).
+  const owners = new Map<string, { runner: ToolRunner; index: number }>();
+  for (const [i, runner] of runners.entries()) {
+    for (const def of runner.definitions) {
+      const existing = owners.get(def.name);
+      if (existing !== undefined) {
+        if (existing.index === i) {
+          throw new Error(
+            `Tool name collision on "${def.name}": runners[${i}] declares it twice`,
+          );
+        }
+        throw new Error(
+          `Tool name collision on "${def.name}": registered by both runners[${existing.index}] and runners[${i}]`,
+        );
+      }
+      owners.set(def.name, { runner, index: i });
+      definitions.push(def);
+    }
+  }
+  return {
+    definitions,
+    async run(call: ToolCall, signal: AbortSignal): Promise<ToolResult> {
+      const entry = owners.get(call.name);
+      if (entry === undefined) {
+        return {
+          callId: call.id,
+          content: { error: `Unknown tool: "${call.name}"` },
+          isError: true,
+        };
+      }
+      return entry.runner.run(call, signal);
+    },
+  };
+}

package/src/runtime-capabilities.test.ts ADDED Viewed

@@ -0,0 +1,19 @@
+import { describe, test, expect } from "bun:test";
+import type { MessageTransport } from "@intx/types/runtime";
+import { createHarnessRuntimeCapabilities } from "./runtime-capabilities";
+// Minimal stand-in for MessageTransport. The factory passes the handle
+// through; it does not invoke any methods on it.
+// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- test-only stand-in; factory never calls these methods
+const stubTransport = {} as unknown as MessageTransport;
+describe("createHarnessRuntimeCapabilities", () => {
+  test("resolve('mail.transport') returns the supplied transport reference", () => {
+    const capabilities = createHarnessRuntimeCapabilities({
+      transport: stubTransport,
+    });
+    expect(capabilities.resolve("mail.transport")).toBe(stubTransport);
+  });
+});

package/src/runtime-capabilities.ts ADDED Viewed

@@ -0,0 +1,22 @@
+// Harness-side factory for the RuntimeCapabilities that tool packages
+// consume. The wrapper exists so callers (sidecar, alternate runtimes)
+// pass a config object keyed by domain (`transport`) and the harness
+// owns the translation to RuntimeCapabilityMap keys (`mail.transport`).
+// When new capabilities are added, callers' shapes evolve through this
+// wrapper, not at the call site.
+import {
+  createRuntimeCapabilities,
+  type RuntimeCapabilities,
+} from "@intx/types/runtime-capabilities";
+import type { MessageTransport } from "@intx/types/runtime";
+export interface HarnessRuntimeCapabilitiesOptions {
+  transport: MessageTransport;
+}
+export function createHarnessRuntimeCapabilities(
+  opts: HarnessRuntimeCapabilitiesOptions,
+): RuntimeCapabilities {
+  return createRuntimeCapabilities({ "mail.transport": opts.transport });
+}

package/tsconfig.json ADDED Viewed

@@ -0,0 +1,4 @@
+{
+  "extends": "../../tsconfig.base.json",
+  "include": ["src/**/*.ts"]
+}