niahere 0.3.11 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,102 @@
1
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
3
+ import { randomBytes, randomUUID } from "crypto";
4
+ import type { NiaTool } from "../mcp/tools/types";
5
+ import type { McpSourceContext } from "../mcp";
6
+ import { log } from "../utils/log";
7
+
8
+ /**
9
+ * Loopback MCP endpoint — how out-of-process CLI backends (Codex/Gemini) reach
10
+ * Nia's tools. The daemon hosts ONE 127.0.0.1 HTTP server; each agent run mints
11
+ * a bearer token bound to an IMMUTABLE `McpSourceContext` snapshot and gets its
12
+ * own MCP server instance (so `send_message` routing is frozen per run, exactly
13
+ * like the in-process per-query closure — no shared mutable routing state, no
14
+ * cross-run race). Tool handlers run IN the daemon process, keeping their
15
+ * channel/phone/DB singleton access.
16
+ *
17
+ * Round-trip verified end-to-end against real codex 0.142.0 (see the spec).
18
+ */
19
+
20
+ interface RunEntry {
21
+ ctx: McpSourceContext;
22
+ server: McpServer;
23
+ transport: WebStandardStreamableHTTPServerTransport;
24
+ }
25
+
26
+ const runs = new Map<string, RunEntry>();
27
+ let server: ReturnType<typeof Bun.serve> | null = null;
28
+ let port = 0;
29
+ // Injected by the daemon (the composition root) so this module never imports the
30
+ // tool table — which would create a cycle (handlers → runner → agent → here).
31
+ let endpointTools: NiaTool[] = [];
32
+
33
+ /** Build a per-run MCP server whose tool closures bake in the frozen context. */
34
+ function buildRunServer(ctx: McpSourceContext, tools: NiaTool[]): McpServer {
35
+ const mcp = new McpServer({ name: "nia", version: "0.1.0" });
36
+ for (const t of tools) {
37
+ mcp.registerTool(t.name, { description: t.description, inputSchema: t.schema }, async (args: unknown) => ({
38
+ content: [{ type: "text" as const, text: await t.handler(args, ctx) }],
39
+ }));
40
+ }
41
+ return mcp;
42
+ }
43
+
44
+ /** Start the loopback endpoint (idempotent). The daemon passes `NIA_TOOLS`. */
45
+ export async function startMcpEndpoint(tools: NiaTool[] = []): Promise<void> {
46
+ endpointTools = tools;
47
+ if (server) return;
48
+ server = Bun.serve({
49
+ hostname: "127.0.0.1",
50
+ port: 0, // OS-assigned ephemeral port
51
+ // MCP Streamable-HTTP keeps a long-lived server→client stream open; without
52
+ // a high idle timeout Bun cuts it (default 10s) mid-job. 255s is Bun's max.
53
+ idleTimeout: 255,
54
+ async fetch(req) {
55
+ const url = new URL(req.url);
56
+ if (url.pathname !== "/mcp") return new Response("not found", { status: 404 });
57
+ const auth = req.headers.get("authorization") ?? "";
58
+ const token = auth.startsWith("Bearer ") ? auth.slice(7) : "";
59
+ const entry = runs.get(token);
60
+ if (!entry) return new Response("unauthorized", { status: 401 });
61
+ return entry.transport.handleRequest(req);
62
+ },
63
+ });
64
+ port = server.port ?? 0;
65
+ log.info({ port }, "mcp-endpoint: listening on loopback");
66
+ }
67
+
68
+ export function stopMcpEndpoint(): void {
69
+ for (const token of [...runs.keys()]) revokeRun(token);
70
+ server?.stop(true);
71
+ server = null;
72
+ port = 0;
73
+ }
74
+
75
+ /**
76
+ * Mint a per-run endpoint token bound to a frozen context. Returns the URL +
77
+ * token to hand to the CLI backend (e.g. `mcp_servers.nia.url` + a bearer env
78
+ * var). Throws if the endpoint isn't started.
79
+ */
80
+ export async function mintRun(ctx: McpSourceContext, tools?: NiaTool[]): Promise<{ url: string; token: string }> {
81
+ if (!server) throw new Error("mcp-endpoint not started");
82
+ const token = randomBytes(32).toString("base64url");
83
+ const mcp = buildRunServer(ctx, tools ?? endpointTools);
84
+ const transport = new WebStandardStreamableHTTPServerTransport({ sessionIdGenerator: () => randomUUID() });
85
+ await mcp.connect(transport);
86
+ runs.set(token, { ctx, server: mcp, transport });
87
+ return { url: `http://127.0.0.1:${port}/mcp`, token };
88
+ }
89
+
90
+ /** Revoke a run's token and tear down its server/transport. Safe to call twice. */
91
+ export function revokeRun(token: string): void {
92
+ const entry = runs.get(token);
93
+ if (!entry) return;
94
+ runs.delete(token);
95
+ entry.transport.close().catch(() => {});
96
+ entry.server.close().catch(() => {});
97
+ }
98
+
99
+ /** Test/diagnostic: number of live runs. */
100
+ export function liveRunCount(): number {
101
+ return runs.size;
102
+ }
@@ -0,0 +1,106 @@
1
+ // @ts-ignore — SDK re-exports this type but tsc can't resolve the path under Bun
2
+ import type { MessageParam } from "@anthropic-ai/sdk/resources";
3
+ import type { Attachment } from "../types/attachment";
4
+
5
+ export interface SDKUserMessage {
6
+ type: "user";
7
+ message: MessageParam;
8
+ parent_tool_use_id: null;
9
+ session_id: string;
10
+ }
11
+
12
+ /** Convert provider-agnostic attachments to Anthropic content blocks. */
13
+ export function buildContentBlocks(text: string, attachments?: Attachment[]): MessageParam["content"] {
14
+ if (!attachments?.length) return text;
15
+
16
+ const blocks: Array<
17
+ | { type: "text"; text: string }
18
+ | {
19
+ type: "image";
20
+ source: { type: "base64"; media_type: string; data: string };
21
+ }
22
+ > = [];
23
+
24
+ const pathHints = attachments
25
+ .map((att, idx) => {
26
+ if (!att.sourcePath) return "";
27
+ const label = att.filename || `${att.type}-${idx + 1}`;
28
+ return `- ${idx + 1}. ${label} (${att.type}, ${att.mimeType}) -> ${att.sourcePath}`;
29
+ })
30
+ .filter(Boolean);
31
+
32
+ if (pathHints.length > 0) {
33
+ blocks.push({
34
+ type: "text",
35
+ text:
36
+ "[Attachment local paths]\n" +
37
+ "Use these absolute paths to inspect attachments. To resend/forward one, call send_message with media_path set to its path.\n" +
38
+ pathHints.join("\n"),
39
+ });
40
+ }
41
+
42
+ for (const att of attachments) {
43
+ if (att.sourcePath) continue;
44
+
45
+ if (att.type === "image") {
46
+ blocks.push({
47
+ type: "image",
48
+ source: {
49
+ type: "base64",
50
+ media_type: att.mimeType,
51
+ data: att.data.toString("base64"),
52
+ },
53
+ });
54
+ } else if (att.type === "document") {
55
+ const docText = att.data.toString("utf8");
56
+ const label = att.filename ? `[${att.filename}]` : "[document]";
57
+ blocks.push({ type: "text", text: `${label}\n${docText}` });
58
+ }
59
+ }
60
+
61
+ if (text) {
62
+ blocks.push({ type: "text", text });
63
+ }
64
+
65
+ return blocks as MessageParam["content"];
66
+ }
67
+
68
+ /**
69
+ * Push-based async iterable for streaming user messages to the SDK.
70
+ * Keeps the query subprocess alive between messages (the warm-session
71
+ * optimization): one query() consumes this stream for the life of a session,
72
+ * and each turn pushes one user message onto it.
73
+ */
74
+ export class MessageStream {
75
+ private queue: SDKUserMessage[] = [];
76
+ private waiting: (() => void) | null = null;
77
+ private done = false;
78
+
79
+ push(text: string, attachments?: Attachment[]): void {
80
+ this.queue.push({
81
+ type: "user",
82
+ message: { role: "user", content: buildContentBlocks(text, attachments) },
83
+ parent_tool_use_id: null,
84
+ session_id: "",
85
+ });
86
+ this.waiting?.();
87
+ }
88
+
89
+ end(): void {
90
+ this.done = true;
91
+ this.waiting?.();
92
+ }
93
+
94
+ async *[Symbol.asyncIterator](): AsyncGenerator<SDKUserMessage> {
95
+ while (true) {
96
+ while (this.queue.length > 0) {
97
+ yield this.queue.shift()!;
98
+ }
99
+ if (this.done) return;
100
+ await new Promise<void>((r) => {
101
+ this.waiting = r;
102
+ });
103
+ this.waiting = null;
104
+ }
105
+ }
106
+ }
@@ -0,0 +1,51 @@
1
+ import type { AgentBackend } from "./types";
2
+ import { ClaudeBackend } from "./backends/claude";
3
+ import { CodexBackend } from "./backends/codex";
4
+ import { getConfig } from "../utils/config";
5
+
6
+ /**
7
+ * Backend selection — the ONE place backend identity is resolved. Consumers call
8
+ * `getBackend()` and depend only on the `AgentBackend` interface, so no
9
+ * `if (backend === …)` ever leaks into the orchestration loop.
10
+ *
11
+ * Phase 1: always the in-process Claude backend. Phase 2+ adds Codex/Gemini and
12
+ * a role/per-job selector; Phase 3 adds the ordered-fallback failover list.
13
+ */
14
+ let claudeBackend: ClaudeBackend | null = null;
15
+ let codexBackend: CodexBackend | null = null;
16
+ let override: AgentBackend | null = null;
17
+ let chainOverride: AgentBackend[] | null = null;
18
+
19
+ export function getBackend(name?: "claude" | "codex" | "gemini"): AgentBackend {
20
+ if (override) return override;
21
+ if (name === "codex") {
22
+ if (!codexBackend) codexBackend = new CodexBackend();
23
+ return codexBackend;
24
+ }
25
+ if (!claudeBackend) claudeBackend = new ClaudeBackend();
26
+ return claudeBackend;
27
+ }
28
+
29
+ /** Test seam: force `getBackend()` to return a specific backend; pass null to reset. */
30
+ export function setBackend(backend: AgentBackend | null): void {
31
+ override = backend;
32
+ }
33
+
34
+ /** Test seam: force `resolveBackends()` to return a specific chain; null resets. */
35
+ export function setBackendChain(backends: AgentBackend[] | null): void {
36
+ chainOverride = backends;
37
+ }
38
+
39
+ /**
40
+ * The ordered backend chain for a run: the configured primary first, then any
41
+ * fallbacks (provider-down failover), de-duplicated. Consumers try each in order
42
+ * until one isn't provider-down.
43
+ */
44
+ export function resolveBackends(): AgentBackend[] {
45
+ if (chainOverride) return chainOverride;
46
+ if (override) return [override];
47
+ const cfg = getConfig();
48
+ const seen = new Set<string>();
49
+ const names = [cfg.runner, ...cfg.fallback].filter((n) => !seen.has(n) && seen.add(n));
50
+ return names.map((n) => getBackend(n));
51
+ }
@@ -0,0 +1,126 @@
1
+ import type { Attachment } from "../types/attachment";
2
+ import type { McpSourceContext } from "../mcp";
3
+
4
+ /**
5
+ * The harness-agnostic execution seam. The orchestrator (engine.ts / runner.ts)
6
+ * depends only on these abstractions and the `AgentEvent` stream — it never
7
+ * branches on which backend is running. Everything backend-specific lives inside
8
+ * one adapter under `src/agent/backends/`.
9
+ */
10
+
11
+ /** A subagent definition, mirroring `getAgentDefinitions()` (Claude-only feature). */
12
+ export interface AgentDef {
13
+ description: string;
14
+ prompt: string;
15
+ model?: string;
16
+ }
17
+
18
+ /** Normalized token/cost usage. A union so a tokens-only backend (Codex/Gemini)
19
+ * is first-class, not a special case. */
20
+ export interface AgentUsage {
21
+ costUsd?: number;
22
+ tokens?: { input: number; output: number };
23
+ turns?: number;
24
+ }
25
+
26
+ /**
27
+ * The normalized event vocabulary every backend maps its native stream into.
28
+ * Adapters emit these; consumers switch on `type` and nothing else.
29
+ *
30
+ * - `session`: emitted exactly ONCE per `send()`, even across internal retries,
31
+ * so the consumer can persist the user message idempotently.
32
+ * - `text`/`thinking`: streamed reply / status (→ onStream / onActivity).
33
+ * - `tool`: a tool-call activity line.
34
+ * - `result`/`error`: terminal events ending a turn.
35
+ * - `error.retryable` (transient API failure → the backend may retry internally)
36
+ * and `error.providerDown` (the provider is unavailable → failover trigger) are
37
+ * INDEPENDENT predicates.
38
+ */
39
+ export type AgentEvent =
40
+ | { type: "session"; backendSessionId: string }
41
+ | { type: "text"; delta: string }
42
+ | { type: "thinking"; delta: string }
43
+ | { type: "tool"; name: string; summary?: string }
44
+ | {
45
+ type: "result";
46
+ text: string;
47
+ usage: AgentUsage;
48
+ backendSessionId: string;
49
+ terminalReason?: string;
50
+ /** Backend-native metadata the consumer persists to the session/message DB
51
+ * row (Claude: total_cost_usd, num_turns, duration_ms, usage, modelUsage…).
52
+ * Opaque to the orchestrator. */
53
+ metadata?: Record<string, unknown>;
54
+ }
55
+ | { type: "error"; message: string; retryable: boolean; providerDown: boolean; terminalReason?: string };
56
+
57
+ export function isResultEvent(ev: AgentEvent): ev is Extract<AgentEvent, { type: "result" }> {
58
+ return ev.type === "result";
59
+ }
60
+
61
+ /** Per-session configuration handed to a backend when a session opens. */
62
+ export interface AgentSessionContext {
63
+ room: string;
64
+ channel: string;
65
+ systemPrompt: string;
66
+ cwd: string;
67
+ model?: string;
68
+ /**
69
+ * MCP wiring. There are two real call paths in the codebase and the adapter
70
+ * uses whichever is present (it must NOT rebuild this itself, or chat loses
71
+ * its Slack thread context):
72
+ * - chat passes a pre-built server blob down via `EngineOptions.mcpServers`
73
+ * (built by the channel through `getMcpServers(slackCtx)`);
74
+ * - jobs pass a raw `McpSourceContext` and let the backend wire MCP.
75
+ */
76
+ mcpServers?: Record<string, unknown>;
77
+ source?: McpSourceContext;
78
+ resume: boolean | string;
79
+ /** Capability-gated; consumed only by backends that support subagents (Claude). */
80
+ subagents?: Record<string, AgentDef>;
81
+ /**
82
+ * True for warm, interactive chat sessions; false/undefined for headless
83
+ * one-shot jobs. Backends use it to choose interactive options — e.g. the
84
+ * Claude backend loads project/user settings and streams partial messages
85
+ * only when interactive (jobs keep the leaner one-shot option set).
86
+ */
87
+ interactive?: boolean;
88
+ }
89
+
90
+ /**
91
+ * A live agent session. Chat keeps one open across many turns; a job opens it,
92
+ * sends once, and closes.
93
+ */
94
+ export interface AgentSession {
95
+ /**
96
+ * Re-read AFTER each `send()` drains: a new session assigns it on the first
97
+ * turn, and an internal retry may rotate it. The consumer threads this value
98
+ * into finalizer/DB — it must never cache the id from before the send.
99
+ */
100
+ readonly backendSessionId: string | null;
101
+ /** Streams the turn's events; ends with `result` or `error`. Emits exactly one
102
+ * `session` event even across internal retries. */
103
+ send(text: string, attachments?: Attachment[]): AsyncIterable<AgentEvent>;
104
+ /** Interrupt an in-flight send. Retry teardown+restart is atomic w.r.t. this.
105
+ * The consumer registers it via `registerActiveHandle`. */
106
+ abort(reason: string): void;
107
+ close(): Promise<void>;
108
+ }
109
+
110
+ export interface AgentBackend {
111
+ readonly name: "claude" | "codex" | "gemini";
112
+ openSession(ctx: AgentSessionContext): Promise<AgentSession>;
113
+ /** Whether a prior session id can be resumed on this backend in this cwd.
114
+ * Opaque to the consumer — Claude probes a jsonl file, Codex a thread id, etc.
115
+ * Unknowns return false → fresh session with replayed context. */
116
+ canResume(backendSessionId: string, cwd: string): Promise<boolean>;
117
+ }
118
+
119
+ /**
120
+ * Shared contract for the per-backend stream normalizers. Each backend has one
121
+ * (SdkNormalizer, CodexNormalizer, GeminiNormalizer). Normalizers are PURE — no
122
+ * I/O, no timers — so the session is just orchestration.
123
+ */
124
+ export interface Normalizer {
125
+ consume(message: unknown): AgentEvent[];
126
+ }
@@ -15,6 +15,21 @@ function cleanSentinel(text: string): string {
15
15
  return text.replace(/`/g, "").trim();
16
16
  }
17
17
 
18
+ interface SlackReactionClient {
19
+ reactions: {
20
+ add(args: { channel: string; timestamp: string; name: string }): Promise<unknown>;
21
+ };
22
+ }
23
+
24
+ export async function reactToSlackMessage(
25
+ client: SlackReactionClient,
26
+ channel: string,
27
+ timestamp: string,
28
+ name: string,
29
+ ): Promise<void> {
30
+ await client.reactions.add({ channel, timestamp, name });
31
+ }
32
+
18
33
  class SlackChannel implements Channel {
19
34
  name = "slack" as const;
20
35
  private app: App | null = null;
@@ -415,12 +430,11 @@ class SlackChannel implements Channel {
415
430
  }
416
431
 
417
432
  // Add thinking reaction inside the lock so cleanup is guaranteed
418
- await client.reactions
419
- .add({ channel: msg.channel, timestamp: msg.ts, name: "thinking_face" })
433
+ await reactToSlackMessage(client, msg.channel, msg.ts, "thinking_face")
420
434
  .catch((err) => log.debug({ err, channel: msg.channel }, "slack: failed to add thinking reaction"));
421
435
 
422
436
  try {
423
- const { result, messageId } = await state.engine.send(
437
+ const { result, messageId, signal } = await state.engine.send(
424
438
  text,
425
439
  {
426
440
  onActivity(status) {
@@ -430,6 +444,15 @@ class SlackChannel implements Channel {
430
444
  attachments,
431
445
  );
432
446
 
447
+ if (signal === "provider_down") {
448
+ await reactToSlackMessage(client, msg.channel, msg.ts, "skull").catch((err) =>
449
+ log.debug({ err, channel: msg.channel }, "slack: failed to add provider-down reaction"),
450
+ );
451
+ if (messageId) await Message.updateDeliveryStatus(messageId, "sent").catch(() => {});
452
+ log.info({ channel: msg.channel, key, reaction: "skull" }, "slack provider failure sent as reaction");
453
+ return;
454
+ }
455
+
433
456
  const reply = result.trim();
434
457
  const cleaned = cleanSentinel(reply);
435
458