npm - niahere - Versions diffs - 0.3.12 → 0.4.1 - Mend

niahere 0.3.12 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/package.json +1 -1
package/src/agent/backends/claude-normalize.ts +142 -0
package/src/agent/backends/claude.ts +181 -0
package/src/agent/backends/codex-normalize.ts +76 -0
package/src/agent/backends/codex.ts +208 -0
package/src/agent/index.ts +12 -0
package/src/agent/mcp-endpoint.ts +102 -0
package/src/agent/message-stream.ts +106 -0
package/src/agent/registry.ts +51 -0
package/src/agent/types.ts +126 -0
package/src/chat/engine.ts +148 -480
package/src/commands/validate.ts +13 -3
package/src/core/daemon.ts +8 -0
package/src/core/runner.ts +94 -225
package/src/mcp/server.ts +10 -367
package/src/mcp/tools/table.ts +258 -0
package/src/mcp/tools/types.ts +16 -0
package/src/types/config.ts +7 -1
package/src/utils/config.ts +6 -2
package/src/utils/retry.ts +10 -0

package/src/agent/mcp-endpoint.ts ADDED Viewed

@@ -0,0 +1,102 @@
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
+import { randomBytes, randomUUID } from "crypto";
+import type { NiaTool } from "../mcp/tools/types";
+import type { McpSourceContext } from "../mcp";
+import { log } from "../utils/log";
+/**
+ * Loopback MCP endpoint — how out-of-process CLI backends (Codex/Gemini) reach
+ * Nia's tools. The daemon hosts ONE 127.0.0.1 HTTP server; each agent run mints
+ * a bearer token bound to an IMMUTABLE `McpSourceContext` snapshot and gets its
+ * own MCP server instance (so `send_message` routing is frozen per run, exactly
+ * like the in-process per-query closure — no shared mutable routing state, no
+ * cross-run race). Tool handlers run IN the daemon process, keeping their
+ * channel/phone/DB singleton access.
+ *
+ * Round-trip verified end-to-end against real codex 0.142.0 (see the spec).
+ */
+interface RunEntry {
+  ctx: McpSourceContext;
+  server: McpServer;
+  transport: WebStandardStreamableHTTPServerTransport;
+}
+const runs = new Map<string, RunEntry>();
+let server: ReturnType<typeof Bun.serve> | null = null;
+let port = 0;
+// Injected by the daemon (the composition root) so this module never imports the
+// tool table — which would create a cycle (handlers → runner → agent → here).
+let endpointTools: NiaTool[] = [];
+/** Build a per-run MCP server whose tool closures bake in the frozen context. */
+function buildRunServer(ctx: McpSourceContext, tools: NiaTool[]): McpServer {
+  const mcp = new McpServer({ name: "nia", version: "0.1.0" });
+  for (const t of tools) {
+    mcp.registerTool(t.name, { description: t.description, inputSchema: t.schema }, async (args: unknown) => ({
+      content: [{ type: "text" as const, text: await t.handler(args, ctx) }],
+    }));
+  }
+  return mcp;
+}
+/** Start the loopback endpoint (idempotent). The daemon passes `NIA_TOOLS`. */
+export async function startMcpEndpoint(tools: NiaTool[] = []): Promise<void> {
+  endpointTools = tools;
+  if (server) return;
+  server = Bun.serve({
+    hostname: "127.0.0.1",
+    port: 0, // OS-assigned ephemeral port
+    // MCP Streamable-HTTP keeps a long-lived server→client stream open; without
+    // a high idle timeout Bun cuts it (default 10s) mid-job. 255s is Bun's max.
+    idleTimeout: 255,
+    async fetch(req) {
+      const url = new URL(req.url);
+      if (url.pathname !== "/mcp") return new Response("not found", { status: 404 });
+      const auth = req.headers.get("authorization") ?? "";
+      const token = auth.startsWith("Bearer ") ? auth.slice(7) : "";
+      const entry = runs.get(token);
+      if (!entry) return new Response("unauthorized", { status: 401 });
+      return entry.transport.handleRequest(req);
+    },
+  });
+  port = server.port ?? 0;
+  log.info({ port }, "mcp-endpoint: listening on loopback");
+}
+export function stopMcpEndpoint(): void {
+  for (const token of [...runs.keys()]) revokeRun(token);
+  server?.stop(true);
+  server = null;
+  port = 0;
+}
+/**
+ * Mint a per-run endpoint token bound to a frozen context. Returns the URL +
+ * token to hand to the CLI backend (e.g. `mcp_servers.nia.url` + a bearer env
+ * var). Throws if the endpoint isn't started.
+ */
+export async function mintRun(ctx: McpSourceContext, tools?: NiaTool[]): Promise<{ url: string; token: string }> {
+  if (!server) throw new Error("mcp-endpoint not started");
+  const token = randomBytes(32).toString("base64url");
+  const mcp = buildRunServer(ctx, tools ?? endpointTools);
+  const transport = new WebStandardStreamableHTTPServerTransport({ sessionIdGenerator: () => randomUUID() });
+  await mcp.connect(transport);
+  runs.set(token, { ctx, server: mcp, transport });
+  return { url: `http://127.0.0.1:${port}/mcp`, token };
+}
+/** Revoke a run's token and tear down its server/transport. Safe to call twice. */
+export function revokeRun(token: string): void {
+  const entry = runs.get(token);
+  if (!entry) return;
+  runs.delete(token);
+  entry.transport.close().catch(() => {});
+  entry.server.close().catch(() => {});
+}
+/** Test/diagnostic: number of live runs. */
+export function liveRunCount(): number {
+  return runs.size;
+}

package/src/agent/message-stream.ts ADDED Viewed

@@ -0,0 +1,106 @@
+// @ts-ignore — SDK re-exports this type but tsc can't resolve the path under Bun
+import type { MessageParam } from "@anthropic-ai/sdk/resources";
+import type { Attachment } from "../types/attachment";
+export interface SDKUserMessage {
+  type: "user";
+  message: MessageParam;
+  parent_tool_use_id: null;
+  session_id: string;
+}
+/** Convert provider-agnostic attachments to Anthropic content blocks. */
+export function buildContentBlocks(text: string, attachments?: Attachment[]): MessageParam["content"] {
+  if (!attachments?.length) return text;
+  const blocks: Array<
+    | { type: "text"; text: string }
+    | {
+        type: "image";
+        source: { type: "base64"; media_type: string; data: string };
+      }
+  > = [];
+  const pathHints = attachments
+    .map((att, idx) => {
+      if (!att.sourcePath) return "";
+      const label = att.filename || `${att.type}-${idx + 1}`;
+      return `- ${idx + 1}. ${label} (${att.type}, ${att.mimeType}) -> ${att.sourcePath}`;
+    })
+    .filter(Boolean);
+  if (pathHints.length > 0) {
+    blocks.push({
+      type: "text",
+      text:
+        "[Attachment local paths]\n" +
+        "Use these absolute paths to inspect attachments. To resend/forward one, call send_message with media_path set to its path.\n" +
+        pathHints.join("\n"),
+    });
+  }
+  for (const att of attachments) {
+    if (att.sourcePath) continue;
+    if (att.type === "image") {
+      blocks.push({
+        type: "image",
+        source: {
+          type: "base64",
+          media_type: att.mimeType,
+          data: att.data.toString("base64"),
+        },
+      });
+    } else if (att.type === "document") {
+      const docText = att.data.toString("utf8");
+      const label = att.filename ? `[${att.filename}]` : "[document]";
+      blocks.push({ type: "text", text: `${label}\n${docText}` });
+    }
+  }
+  if (text) {
+    blocks.push({ type: "text", text });
+  }
+  return blocks as MessageParam["content"];
+}
+/**
+ * Push-based async iterable for streaming user messages to the SDK.
+ * Keeps the query subprocess alive between messages (the warm-session
+ * optimization): one query() consumes this stream for the life of a session,
+ * and each turn pushes one user message onto it.
+ */
+export class MessageStream {
+  private queue: SDKUserMessage[] = [];
+  private waiting: (() => void) | null = null;
+  private done = false;
+  push(text: string, attachments?: Attachment[]): void {
+    this.queue.push({
+      type: "user",
+      message: { role: "user", content: buildContentBlocks(text, attachments) },
+      parent_tool_use_id: null,
+      session_id: "",
+    });
+    this.waiting?.();
+  }
+  end(): void {
+    this.done = true;
+    this.waiting?.();
+  }
+  async *[Symbol.asyncIterator](): AsyncGenerator<SDKUserMessage> {
+    while (true) {
+      while (this.queue.length > 0) {
+        yield this.queue.shift()!;
+      }
+      if (this.done) return;
+      await new Promise<void>((r) => {
+        this.waiting = r;
+      });
+      this.waiting = null;
+    }
+  }
+}

package/src/agent/registry.ts ADDED Viewed

@@ -0,0 +1,51 @@
+import type { AgentBackend } from "./types";
+import { ClaudeBackend } from "./backends/claude";
+import { CodexBackend } from "./backends/codex";
+import { getConfig } from "../utils/config";
+/**
+ * Backend selection — the ONE place backend identity is resolved. Consumers call
+ * `getBackend()` and depend only on the `AgentBackend` interface, so no
+ * `if (backend === …)` ever leaks into the orchestration loop.
+ *
+ * Phase 1: always the in-process Claude backend. Phase 2+ adds Codex/Gemini and
+ * a role/per-job selector; Phase 3 adds the ordered-fallback failover list.
+ */
+let claudeBackend: ClaudeBackend | null = null;
+let codexBackend: CodexBackend | null = null;
+let override: AgentBackend | null = null;
+let chainOverride: AgentBackend[] | null = null;
+export function getBackend(name?: "claude" | "codex" | "gemini"): AgentBackend {
+  if (override) return override;
+  if (name === "codex") {
+    if (!codexBackend) codexBackend = new CodexBackend();
+    return codexBackend;
+  }
+  if (!claudeBackend) claudeBackend = new ClaudeBackend();
+  return claudeBackend;
+}
+/** Test seam: force `getBackend()` to return a specific backend; pass null to reset. */
+export function setBackend(backend: AgentBackend | null): void {
+  override = backend;
+}
+/** Test seam: force `resolveBackends()` to return a specific chain; null resets. */
+export function setBackendChain(backends: AgentBackend[] | null): void {
+  chainOverride = backends;
+}
+/**
+ * The ordered backend chain for a run: the configured primary first, then any
+ * fallbacks (provider-down failover), de-duplicated. Consumers try each in order
+ * until one isn't provider-down.
+ */
+export function resolveBackends(): AgentBackend[] {
+  if (chainOverride) return chainOverride;
+  if (override) return [override];
+  const cfg = getConfig();
+  const seen = new Set<string>();
+  const names = [cfg.runner, ...cfg.fallback].filter((n) => !seen.has(n) && seen.add(n));
+  return names.map((n) => getBackend(n));
+}

package/src/agent/types.ts ADDED Viewed

@@ -0,0 +1,126 @@
+import type { Attachment } from "../types/attachment";
+import type { McpSourceContext } from "../mcp";
+/**
+ * The harness-agnostic execution seam. The orchestrator (engine.ts / runner.ts)
+ * depends only on these abstractions and the `AgentEvent` stream — it never
+ * branches on which backend is running. Everything backend-specific lives inside
+ * one adapter under `src/agent/backends/`.
+ */
+/** A subagent definition, mirroring `getAgentDefinitions()` (Claude-only feature). */
+export interface AgentDef {
+  description: string;
+  prompt: string;
+  model?: string;
+}
+/** Normalized token/cost usage. A union so a tokens-only backend (Codex/Gemini)
+ *  is first-class, not a special case. */
+export interface AgentUsage {
+  costUsd?: number;
+  tokens?: { input: number; output: number };
+  turns?: number;
+}
+/**
+ * The normalized event vocabulary every backend maps its native stream into.
+ * Adapters emit these; consumers switch on `type` and nothing else.
+ *
+ * - `session`: emitted exactly ONCE per `send()`, even across internal retries,
+ *   so the consumer can persist the user message idempotently.
+ * - `text`/`thinking`: streamed reply / status (→ onStream / onActivity).
+ * - `tool`: a tool-call activity line.
+ * - `result`/`error`: terminal events ending a turn.
+ * - `error.retryable` (transient API failure → the backend may retry internally)
+ *   and `error.providerDown` (the provider is unavailable → failover trigger) are
+ *   INDEPENDENT predicates.
+ */
+export type AgentEvent =
+  | { type: "session"; backendSessionId: string }
+  | { type: "text"; delta: string }
+  | { type: "thinking"; delta: string }
+  | { type: "tool"; name: string; summary?: string }
+  | {
+      type: "result";
+      text: string;
+      usage: AgentUsage;
+      backendSessionId: string;
+      terminalReason?: string;
+      /** Backend-native metadata the consumer persists to the session/message DB
+       *  row (Claude: total_cost_usd, num_turns, duration_ms, usage, modelUsage…).
+       *  Opaque to the orchestrator. */
+      metadata?: Record<string, unknown>;
+    }
+  | { type: "error"; message: string; retryable: boolean; providerDown: boolean; terminalReason?: string };
+export function isResultEvent(ev: AgentEvent): ev is Extract<AgentEvent, { type: "result" }> {
+  return ev.type === "result";
+}
+/** Per-session configuration handed to a backend when a session opens. */
+export interface AgentSessionContext {
+  room: string;
+  channel: string;
+  systemPrompt: string;
+  cwd: string;
+  model?: string;
+  /**
+   * MCP wiring. There are two real call paths in the codebase and the adapter
+   * uses whichever is present (it must NOT rebuild this itself, or chat loses
+   * its Slack thread context):
+   *  - chat passes a pre-built server blob down via `EngineOptions.mcpServers`
+   *    (built by the channel through `getMcpServers(slackCtx)`);
+   *  - jobs pass a raw `McpSourceContext` and let the backend wire MCP.
+   */
+  mcpServers?: Record<string, unknown>;
+  source?: McpSourceContext;
+  resume: boolean | string;
+  /** Capability-gated; consumed only by backends that support subagents (Claude). */
+  subagents?: Record<string, AgentDef>;
+  /**
+   * True for warm, interactive chat sessions; false/undefined for headless
+   * one-shot jobs. Backends use it to choose interactive options — e.g. the
+   * Claude backend loads project/user settings and streams partial messages
+   * only when interactive (jobs keep the leaner one-shot option set).
+   */
+  interactive?: boolean;
+}
+/**
+ * A live agent session. Chat keeps one open across many turns; a job opens it,
+ * sends once, and closes.
+ */
+export interface AgentSession {
+  /**
+   * Re-read AFTER each `send()` drains: a new session assigns it on the first
+   * turn, and an internal retry may rotate it. The consumer threads this value
+   * into finalizer/DB — it must never cache the id from before the send.
+   */
+  readonly backendSessionId: string | null;
+  /** Streams the turn's events; ends with `result` or `error`. Emits exactly one
+   *  `session` event even across internal retries. */
+  send(text: string, attachments?: Attachment[]): AsyncIterable<AgentEvent>;
+  /** Interrupt an in-flight send. Retry teardown+restart is atomic w.r.t. this.
+   *  The consumer registers it via `registerActiveHandle`. */
+  abort(reason: string): void;
+  close(): Promise<void>;
+}
+export interface AgentBackend {
+  readonly name: "claude" | "codex" | "gemini";
+  openSession(ctx: AgentSessionContext): Promise<AgentSession>;
+  /** Whether a prior session id can be resumed on this backend in this cwd.
+   *  Opaque to the consumer — Claude probes a jsonl file, Codex a thread id, etc.
+   *  Unknowns return false → fresh session with replayed context. */
+  canResume(backendSessionId: string, cwd: string): Promise<boolean>;
+}
+/**
+ * Shared contract for the per-backend stream normalizers. Each backend has one
+ * (SdkNormalizer, CodexNormalizer, GeminiNormalizer). Normalizers are PURE — no
+ * I/O, no timers — so the session is just orchestration.
+ */
+export interface Normalizer {
+  consume(message: unknown): AgentEvent[];
+}