npm - @hevmind/ask - Versions diffs - 0.1.1 → 0.3.0 - Mend

@hevmind/ask 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md +33 -13
package/openapi.yaml +53 -7
package/package.json +6 -6
package/skills/build-digest/SKILL.md +70 -120
package/src/digest/build.ts +54 -16
package/src/digest/cli.ts +19 -7
package/src/digest/frontmatter.ts +7 -0
package/src/digest/schema.ts +3 -0
package/src/digest/tree.ts +259 -0
package/src/digest/verify.ts +2 -11
package/src/endpoint.ts +121 -5
package/src/index.ts +1 -1
package/src/integration.ts +16 -14
package/src/llm-openai.ts +330 -0
package/src/observability.ts +3 -1
package/src/providers.ts +81 -0
package/src/search/loop.ts +219 -4
package/src/types.ts +34 -6

package/src/llm-openai.ts ADDED Viewed

@@ -0,0 +1,330 @@
+// OpenAI-compatible Chat Completions client over fetch. One translation layer
+// covers OpenAI, OpenRouter, and any Chat Completions-compatible endpoint: the
+// rest of the package keeps speaking the internal (Anthropic-shaped) block
+// types, and this module converts both ways. Like `llm.ts`, it stays free of
+// runtime dependencies and edge-runtime friendly.
+import type {
+  AnthropicResponse,
+  AnthropicTextBlock,
+  AnthropicUsage,
+  CallClaudeOptions,
+  StreamEvent,
+} from './llm.ts';
+export interface OpenAiEndpoint {
+  /** API base, e.g. `https://api.openai.com/v1` or `https://openrouter.ai/api/v1`. */
+  baseUrl: string;
+  /**
+   * OpenAI's reasoning models reject `max_tokens` and want
+   * `max_completion_tokens`; OpenRouter normalizes `max_tokens` for every
+   * underlying provider.
+   */
+  tokenParam: 'max_tokens' | 'max_completion_tokens';
+  /** Human label used in error messages, e.g. `OpenAI` or `OpenRouter`. */
+  label: string;
+}
+interface OpenAiToolCall {
+  id: string;
+  type: 'function';
+  function: { name: string; arguments: string };
+}
+interface OpenAiMessage {
+  role: 'system' | 'user' | 'assistant' | 'tool';
+  content: string | null;
+  tool_calls?: OpenAiToolCall[];
+  tool_call_id?: string;
+}
+function systemText(system: string | AnthropicTextBlock[]): string {
+  // cache_control is Anthropic-specific; OpenAI-compatible APIs cache on their own.
+  return typeof system === 'string' ? system : system.map((block) => block.text).join('\n\n');
+}
+/** Converts the internal (Anthropic-shaped) conversation into Chat Completions messages. */
+export function toOpenAiMessages(opts: Pick<CallClaudeOptions, 'system' | 'messages'>): OpenAiMessage[] {
+  const out: OpenAiMessage[] = [{ role: 'system', content: systemText(opts.system) }];
+  for (const message of opts.messages) {
+    if (typeof message.content === 'string') {
+      out.push({ role: message.role, content: message.content });
+      continue;
+    }
+    if (!Array.isArray(message.content)) continue;
+    const blocks = message.content as Array<Record<string, unknown>>;
+    if (message.role === 'assistant') {
+      const text = blocks
+        .filter((block) => block.type === 'text')
+        .map((block) => String(block.text ?? ''))
+        .join('');
+      const toolCalls: OpenAiToolCall[] = blocks
+        .filter((block) => block.type === 'tool_use')
+        .map((block) => ({
+          id: String(block.id ?? ''),
+          type: 'function',
+          function: { name: String(block.name ?? ''), arguments: JSON.stringify(block.input ?? {}) },
+        }));
+      out.push({
+        role: 'assistant',
+        content: text || null,
+        ...(toolCalls.length ? { tool_calls: toolCalls } : {}),
+      });
+      continue;
+    }
+    // User turns: tool_result blocks must become role:"tool" messages directly
+    // after the assistant turn that issued the calls; any text follows as a
+    // plain user message.
+    for (const block of blocks) {
+      if (block.type !== 'tool_result') continue;
+      out.push({
+        role: 'tool',
+        tool_call_id: String(block.tool_use_id ?? ''),
+        content: typeof block.content === 'string' ? block.content : JSON.stringify(block.content ?? ''),
+      });
+    }
+    const text = blocks
+      .filter((block) => block.type === 'text')
+      .map((block) => String(block.text ?? ''))
+      .join('');
+    if (text) out.push({ role: 'user', content: text });
+  }
+  return out;
+}
+/** Builds the full Chat Completions request body from internal call options. */
+export function toOpenAiRequest(
+  opts: CallClaudeOptions,
+  endpoint: OpenAiEndpoint,
+  stream: boolean,
+): Record<string, unknown> {
+  return {
+    model: opts.model,
+    [endpoint.tokenParam]: opts.maxTokens ?? 2048,
+    messages: toOpenAiMessages(opts),
+    ...(opts.tools?.length
+      ? {
+          tools: opts.tools.map((tool) => ({
+            type: 'function',
+            function: { name: tool.name, description: tool.description, parameters: tool.input_schema },
+          })),
+        }
+      : {}),
+    ...(opts.toolChoice
+      ? {
+          tool_choice:
+            opts.toolChoice.type === 'tool'
+              ? { type: 'function', function: { name: opts.toolChoice.name } }
+              : 'auto',
+        }
+      : {}),
+    ...(stream ? { stream: true, stream_options: { include_usage: true } } : {}),
+  };
+}
+function mapStopReason(finishReason: string | null | undefined): string | null {
+  if (finishReason === 'tool_calls') return 'tool_use';
+  if (finishReason === 'stop') return 'end_turn';
+  if (finishReason === 'length') return 'max_tokens';
+  return finishReason ?? null;
+}
+function parseToolInput(args: string): unknown {
+  try {
+    return JSON.parse(args || '{}');
+  } catch {
+    return {};
+  }
+}
+function mapUsage(usage: unknown): AnthropicUsage | undefined {
+  const u = usage as { prompt_tokens?: number; completion_tokens?: number } | null | undefined;
+  if (typeof u?.prompt_tokens !== 'number' && typeof u?.completion_tokens !== 'number') return undefined;
+  return { input_tokens: u?.prompt_tokens ?? 0, output_tokens: u?.completion_tokens ?? 0 };
+}
+function requestInit(opts: CallClaudeOptions, endpoint: OpenAiEndpoint, stream: boolean): RequestInit {
+  return {
+    method: 'POST',
+    headers: {
+      'content-type': 'application/json',
+      authorization: `Bearer ${opts.apiKey}`,
+    },
+    body: JSON.stringify(toOpenAiRequest(opts, endpoint, stream)),
+    signal: opts.signal,
+  };
+}
+function completionsUrl(endpoint: OpenAiEndpoint): string {
+  return `${endpoint.baseUrl.replace(/\/+$/, '')}/chat/completions`;
+}
+export async function callOpenAi(opts: CallClaudeOptions, endpoint: OpenAiEndpoint): Promise<AnthropicResponse> {
+  const res = await fetch(completionsUrl(endpoint), requestInit(opts, endpoint, false));
+  if (!res.ok) {
+    const detail = await res.text().catch(() => '');
+    throw new Error(`${endpoint.label} API ${res.status}: ${detail.slice(0, 500)}`);
+  }
+  const payload = (await res.json()) as {
+    choices?: Array<{ message?: { content?: string | null; tool_calls?: OpenAiToolCall[] }; finish_reason?: string | null }>;
+    usage?: unknown;
+  };
+  const choice = payload.choices?.[0];
+  const content: AnthropicResponse['content'] = [];
+  if (choice?.message?.content) content.push({ type: 'text', text: choice.message.content });
+  for (const call of choice?.message?.tool_calls ?? []) {
+    content.push({ type: 'tool_use', id: call.id, name: call.function.name, input: parseToolInput(call.function.arguments) });
+  }
+  return {
+    content,
+    stop_reason: mapStopReason(choice?.finish_reason),
+    ...(mapUsage(payload.usage) ? { usage: mapUsage(payload.usage) } : {}),
+  };
+}
+/**
+ * Streams a Chat Completions response, yielding text deltas as they arrive and
+ * fully-reconstructed tool_use blocks (plus one `stop` event) at the end.
+ */
+export async function* streamOpenAi(opts: CallClaudeOptions, endpoint: OpenAiEndpoint): AsyncGenerator<StreamEvent> {
+  const res = await fetch(completionsUrl(endpoint), requestInit(opts, endpoint, true));
+  if (!res.ok || !res.body) {
+    const detail = res.ok ? 'no response body' : await res.text().catch(() => '');
+    throw new Error(`${endpoint.label} API ${res.status}: ${detail.slice(0, 500)}`);
+  }
+  const reader = res.body.getReader();
+  const decoder = new TextDecoder('utf-8');
+  let state = newOpenAiSseState();
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+    const out = parseOpenAiSseChunk(decoder.decode(value, { stream: true }), state);
+    state = out.state;
+    for (const event of out.events) yield event;
+  }
+  // Streams normally end with `data: [DONE]`; flush here in case one doesn't.
+  for (const event of flushOpenAiSse(state)) yield event;
+}
+interface SseToolCall {
+  id: string;
+  name: string;
+  args: string;
+}
+export interface OpenAiSseState {
+  /** Bytes not yet terminated by a blank line. */
+  buffer: string;
+  /** Tool calls accumulated by their stream index. */
+  toolCalls: Record<number, SseToolCall>;
+  usage: AnthropicUsage;
+  finishReason: string | null;
+  /** Tool-use and stop events were already emitted (on `[DONE]`). */
+  flushed: boolean;
+}
+export function newOpenAiSseState(): OpenAiSseState {
+  return {
+    buffer: '',
+    toolCalls: {},
+    usage: { input_tokens: 0, output_tokens: 0 },
+    finishReason: null,
+    flushed: false,
+  };
+}
+/**
+ * Pure, network-free Chat Completions SSE parser. Text deltas surface
+ * immediately; tool calls and usage accumulate until `[DONE]` flushes them.
+ */
+export function parseOpenAiSseChunk(
+  chunk: string,
+  prev: OpenAiSseState,
+): { events: StreamEvent[]; state: OpenAiSseState } {
+  const events: StreamEvent[] = [];
+  const state = { ...prev, toolCalls: prev.toolCalls, usage: prev.usage };
+  state.buffer = prev.buffer + chunk;
+  let sep: number;
+  while ((sep = state.buffer.indexOf('\n\n')) !== -1) {
+    const frame = state.buffer.slice(0, sep);
+    state.buffer = state.buffer.slice(sep + 2);
+    // Non-`data:` lines (OpenRouter emits `: PROCESSING` comments) are dropped.
+    const data = frame
+      .split('\n')
+      .filter((line) => line.startsWith('data:'))
+      .map((line) => line.slice(5).trim())
+      .join('');
+    if (!data) continue;
+    if (data === '[DONE]') {
+      events.push(...flushOpenAiSse(state));
+      continue;
+    }
+    let payload: Record<string, unknown>;
+    try {
+      payload = JSON.parse(data) as Record<string, unknown>;
+    } catch {
+      continue;
+    }
+    const mappedUsage = mapUsage(payload.usage);
+    if (mappedUsage) state.usage = mappedUsage;
+    const choice = (payload.choices as Array<Record<string, unknown>> | undefined)?.[0];
+    if (!choice) continue;
+    if (typeof choice.finish_reason === 'string') state.finishReason = choice.finish_reason;
+    const delta = choice.delta as
+      | { content?: string | null; tool_calls?: Array<{ index?: number; id?: string; function?: { name?: string; arguments?: string } }> }
+      | undefined;
+    if (typeof delta?.content === 'string' && delta.content) {
+      events.push({ type: 'text', text: delta.content });
+    }
+    for (const call of delta?.tool_calls ?? []) {
+      const index = call.index ?? 0;
+      const existing = state.toolCalls[index] ?? { id: '', name: '', args: '' };
+      state.toolCalls[index] = {
+        id: call.id ?? existing.id,
+        name: existing.name + (call.function?.name ?? ''),
+        args: existing.args + (call.function?.arguments ?? ''),
+      };
+    }
+  }
+  return { events, state };
+}
+/** Emits accumulated tool_use blocks and the final stop event, exactly once. */
+export function flushOpenAiSse(state: OpenAiSseState): StreamEvent[] {
+  if (state.flushed) return [];
+  state.flushed = true;
+  const events: StreamEvent[] = [];
+  const indexes = Object.keys(state.toolCalls)
+    .map(Number)
+    .sort((a, b) => a - b);
+  for (const index of indexes) {
+    const call = state.toolCalls[index];
+    events.push({ type: 'tool_use', id: call.id, name: call.name, input: parseToolInput(call.args) });
+  }
+  const hasUsage = state.usage.input_tokens > 0 || state.usage.output_tokens > 0;
+  events.push({
+    type: 'stop',
+    stopReason: mapStopReason(state.finishReason),
+    ...(hasUsage ? { usage: { ...state.usage } } : {}),
+  });
+  return events;
+}

package/src/observability.ts CHANGED Viewed

@@ -66,6 +66,8 @@ export interface TelemetryOptions {
   distinctId?: string;
   /** Optional label attached to every event as `agent_scope`. */
   scope?: string;
+  /** Inference provider reported as `$ai_provider`; defaults to `anthropic`. */
+  provider?: string;
   /** Reuse an existing trace id; one is generated otherwise. */
   traceId?: string;
   /** Cloudflare-style keep-alive so in-flight captures survive response end. */
@@ -113,7 +115,7 @@ export function makeTelemetry(options: TelemetryOptions = {}): Telemetry {
       distinct_id: distinctId,
       properties: {
         $ai_trace_id: traceId,
-        $ai_provider: 'anthropic',
+        $ai_provider: options.provider ?? 'anthropic',
         $process_person_profile: false, // anonymous — no person profile
         ...(scope ? { agent_scope: scope } : {}),
         ...properties,

package/src/providers.ts ADDED Viewed

@@ -0,0 +1,81 @@
+// Inference provider registry. Anthropic keeps its native Messages client;
+// OpenAI and OpenRouter share the Chat Completions client in `llm-openai.ts`,
+// differing only in base URL, key env var, token param, and default models.
+import { callClaude, streamClaude } from './llm.ts';
+import { callOpenAi, streamOpenAi, type OpenAiEndpoint } from './llm-openai.ts';
+import type { ProviderName } from './types.ts';
+export type { ProviderName };
+export interface ProviderInfo {
+  name: ProviderName;
+  /** Human label for log and error messages. */
+  label: string;
+  /** Environment variable the API key is read from. */
+  envKey: string;
+  /** Default API base URL (OpenAI-compatible providers only). */
+  baseUrl?: string;
+  /** Default model for the agentic search loop. */
+  defaultModel: string;
+  /** Default model for the offline digest builder. */
+  defaultDigestModel: string;
+}
+export const PROVIDERS: Record<ProviderName, ProviderInfo> = {
+  anthropic: {
+    name: 'anthropic',
+    label: 'Anthropic',
+    envKey: 'ANTHROPIC_API_KEY',
+    defaultModel: 'claude-haiku-4-5',
+    defaultDigestModel: 'claude-opus-4-8',
+  },
+  openai: {
+    name: 'openai',
+    label: 'OpenAI',
+    envKey: 'OPENAI_API_KEY',
+    baseUrl: 'https://api.openai.com/v1',
+    defaultModel: 'gpt-4.1-mini',
+    defaultDigestModel: 'gpt-5.1',
+  },
+  openrouter: {
+    name: 'openrouter',
+    label: 'OpenRouter',
+    envKey: 'OPENROUTER_API_KEY',
+    baseUrl: 'https://openrouter.ai/api/v1',
+    defaultModel: 'anthropic/claude-haiku-4.5',
+    defaultDigestModel: 'anthropic/claude-opus-4.8',
+  },
+};
+/** Validates a configured provider name, defaulting to `anthropic`. */
+export function resolveProviderName(value?: string): ProviderName {
+  if (!value) return 'anthropic';
+  if (value in PROVIDERS) return value as ProviderName;
+  throw new Error(`Unknown provider "${value}". Expected one of: ${Object.keys(PROVIDERS).join(', ')}.`);
+}
+export interface LlmClient {
+  call: typeof callClaude;
+  stream: typeof streamClaude;
+}
+/**
+ * Returns the call/stream pair for a provider. `baseUrl` overrides the
+ * provider's API base, so any Chat Completions-compatible endpoint works.
+ */
+export function clientFor(provider: ProviderName, baseUrl?: string): LlmClient {
+  if (provider === 'anthropic') return { call: callClaude, stream: streamClaude };
+  const info = PROVIDERS[provider];
+  const endpoint: OpenAiEndpoint = {
+    baseUrl: baseUrl ?? info.baseUrl!,
+    // OpenAI's reasoning models reject `max_tokens`; OpenRouter normalizes it.
+    tokenParam: provider === 'openai' ? 'max_completion_tokens' : 'max_tokens',
+    label: info.label,
+  };
+  return {
+    call: (opts) => callOpenAi(opts, endpoint),
+    stream: (opts) => streamOpenAi(opts, endpoint),
+  };
+}

package/src/search/loop.ts CHANGED Viewed

@@ -112,13 +112,37 @@ async function* tracedStream(
 }
 /**
- * Entry point. When the committed digest carries distilled `nodes`, the
- * agent navigates that shadow digest (digest path). A node-less (v1 / degraded)
- * digest falls back to the original keyword-search loop, unchanged.
+ * Cap on the characters the digest path inlines into the system prompt (the
+ * `<map>` + `<summaries>` blocks). Below it, every section summary is inlined so
+ * the agent navigates from a complete map — best for small/medium sites. Above
+ * it (large docs, e.g. a CLI/API reference with thousands of sections), inlining
+ * everything would blow the context window, so the loop switches to search-routed
+ * navigation: a compact page map plus a search tool that surfaces ids on demand.
+ * ~200 KB ≈ ~50k tokens; a ~500-section site stays fully inlined as before.
+ */
+export const INLINE_DIGEST_BUDGET = 200_000;
+/** Cheap estimate of what `buildDigestSystemPrompt` would inline, without building it. */
+export function digestInlineSize(digest: Digest): number {
+  let size = digest.overview.length;
+  for (const node of digest.nodes) size += node.id.length + node.summary.length + 24;
+  return size;
+}
+/**
+ * Entry point. When the committed digest carries distilled `nodes`, the agent
+ * navigates that shadow digest: small digests are inlined whole (digest path);
+ * digests larger than {@link INLINE_DIGEST_BUDGET} are navigated by search so the
+ * prompt stays bounded (routed path). A node-less (v1 / degraded) digest falls
+ * back to the original keyword-search loop, unchanged.
  */
 export async function* runAgenticAnswerLoop(args: AnswerLoopArgs): AsyncGenerator<AgenticEvent> {
   if (args.digest.nodes && args.digest.nodes.length > 0) {
-    yield* digestAnswerLoop(args);
+    if (digestInlineSize(args.digest) <= INLINE_DIGEST_BUDGET) {
+      yield* digestAnswerLoop(args);
+    } else {
+      yield* routedDigestAnswerLoop(args);
+    }
   } else {
     yield* legacyAnswerLoop(args);
   }
@@ -300,6 +324,197 @@ function renderNodeMap(nodes: DigestNode[]): string {
   return nodes.map((node) => `- ${node.heading ?? node.title} — \`${node.id}\``).join('\n');
 }
+// ---------------------------------------------------------------------------
+// Routed path: navigate a large digest by search instead of inlining it whole.
+// ---------------------------------------------------------------------------
+const SEARCH_SECTIONS_TOOL: AnthropicTool = {
+  name: 'search_sections',
+  description:
+    'Search the documentation for sections relevant to a focused sub-query. Returns matching section ids with their group, heading, and a one-line summary. Use it to find the ids you then read with open_section.',
+  input_schema: {
+    type: 'object',
+    properties: {
+      query: { type: 'string', description: 'Focused keyword query or synonym expansion to search for.' },
+    },
+    required: ['query'],
+  },
+};
+/** Compact group → page map: orientation only, so the prompt stays bounded. */
+function routedDigestMap(nodes: DigestNode[]): string {
+  const byGroup = new Map<string, Set<string>>();
+  for (const node of nodes) {
+    const group = node.group ?? 'Docs';
+    if (!byGroup.has(group)) byGroup.set(group, new Set());
+    byGroup.get(group)!.add(node.title);
+  }
+  const lines: string[] = [];
+  for (const [group, pages] of byGroup) {
+    lines.push(`## ${group}`);
+    for (const page of pages) lines.push(`- ${page}`);
+  }
+  return lines.join('\n');
+}
+function routedDigestSystemPrompt(digest: Digest): AnthropicTextBlock[] {
+  return [
+    {
+      type: 'text',
+      text: `You are the documentation assistant for this site. Answer the user's question using ONLY documentation sections you retrieve.
+The documentation is large, so it is not all shown here. Use search_sections to find sections relevant to the question, then read the ones you need with open_section for their summary and exact facts. Run a few searches with varied terms if the first does not surface what you need. Open every section your answer draws on — you may only link to sections you opened.
+Write a short, direct answer in Markdown:
+- Start IMMEDIATELY with the substance. Your first sentence must answer the question. Never open with "Based on…", "Here is…", "Sure", a restatement of the question, or any summary/preamble.
+- Keep it tight: one or two short paragraphs, plus a short bullet list only if it genuinely helps. This renders in a small search popover, so do NOT use headings (#, ##) or horizontal rules (---).
+- For exact strings (flags, commands, identifiers, versions), quote the section's \`facts\` verbatim — never reword them.
+- When you reference a section, link to it inline using its exact \`url\`, e.g. [autoscaling](/docs/concepts#kubernetes-autoscaling). Never invent a URL or anchor.
+- If the documentation does not cover the question, say so plainly in one sentence and do not fabricate an answer.`,
+    },
+    {
+      type: 'text',
+      text: `<domain_context>\n${digest.context || 'No digest context is available.'}\n</domain_context>\n\n<map>\n${routedDigestMap(digest.nodes)}\n</map>`,
+      cache_control: { type: 'ephemeral' },
+    },
+  ];
+}
+/** Search the digest's nodes for a sub-query; returns distilled candidates. */
+function searchSections(
+  searchQuery: string,
+  chunks: Chunk[],
+  nodesById: Map<string, DigestNode>,
+  digest: Digest,
+  config: SearchLoopConfig,
+) {
+  return prefilter(chunks, searchQuery, digest.glossary, config.candidatePerSearch, config.perDocCap, digest.nodes)
+    .map((candidate) => nodesById.get(candidate.id))
+    .filter((node): node is DigestNode => node !== undefined)
+    .map((node) => ({
+      id: node.id,
+      url: node.url,
+      group: node.group,
+      heading: node.heading,
+      summary: node.summary,
+      ...(node.mode === 'source-primary' ? { reference: true } : {}),
+    }));
+}
+async function* routedDigestAnswerLoop({
+  apiKey,
+  query,
+  chunks,
+  digest,
+  config,
+  signal,
+  call = callClaude,
+  stream = streamClaude,
+  telemetry = makeTelemetry(),
+}: AnswerLoopArgs): AsyncGenerator<AgenticEvent> {
+  const byId = new Map(chunks.map((chunk) => [chunk.id, chunk]));
+  const nodesById = new Map(digest.nodes.map((node) => [node.id, node]));
+  const opened = new Map<string, DigestNode>();
+  const messages: AnthropicMessage[] = [{ role: 'user', content: `Query: ${query}` }];
+  const system = routedDigestSystemPrompt(digest);
+  const open = (id: string): DigestNode | null => {
+    const node = nodesById.get(id);
+    if (node) opened.set(id, node);
+    return node ?? null;
+  };
+  // Phase 1: bounded loop of searches and section opens (non-streaming tool turns).
+  for (let i = 0; i < config.maxIterations; i += 1) {
+    const response = await tracedCall(
+      call,
+      {
+        apiKey,
+        model: config.model,
+        system,
+        messages,
+        tools: [SEARCH_SECTIONS_TOOL, OPEN_SECTION_TOOL],
+        toolChoice: { type: 'auto' },
+        maxTokens: 1024,
+        signal,
+      },
+      telemetry,
+      i,
+    );
+    messages.push({ role: 'assistant', content: response.content });
+    const toolResults: AnthropicToolResultBlock[] = [];
+    for (const block of response.content) {
+      if (block.type !== 'tool_use') continue;
+      if (block.name === 'search_sections') {
+        const searchQuery = normalizeToolQuery(block.input) || query;
+        yield { type: 'search', query: searchQuery };
+        toolResults.push({
+          type: 'tool_result',
+          tool_use_id: block.id,
+          content: JSON.stringify(searchSections(searchQuery, chunks, nodesById, digest, config)),
+        });
+      } else if (block.name === 'open_section') {
+        const id = normalizeId(block.input);
+        const node = open(id);
+        toolResults.push({
+          type: 'tool_result',
+          tool_use_id: block.id,
+          content: node
+            ? JSON.stringify(openSectionResult(node, byId))
+            : JSON.stringify({ error: `No section "${id}". Search first, then open an exact id from the results.` }),
+        });
+      }
+    }
+    if (!toolResults.length) break; // model is ready to answer
+    messages.push({ role: 'user', content: toolResults });
+  }
+  // Fallback: ground the answer even if the model opened nothing, by opening the
+  // best keyword matches for the original query.
+  if (!opened.size) {
+    for (const candidate of prefilter(chunks, query, digest.glossary, config.maxResults, config.perDocCap, digest.nodes)) {
+      const node = open(candidate.id);
+      if (node) yield { type: 'search', query: node.heading ?? node.title };
+    }
+    if (opened.size && lastRole(messages) !== 'user') {
+      const sections = [...opened.values()].map((node) => openSectionResult(node, byId));
+      messages.push({ role: 'user', content: `Opened sections:\n${JSON.stringify(sections)}` });
+    }
+  }
+  if (lastRole(messages) === 'assistant') {
+    messages.push({
+      role: 'user',
+      content:
+        'Write the answer now. Begin directly with the answer itself — no preamble, no "based on…" opener, no headings. Link only to sections you opened, using their exact url.',
+    });
+  }
+  const sources = sourcesFromNodes(opened, config.maxResults);
+  yield { type: 'sources', sources };
+  // Phase 2: streamed answer turn — no tools, so the model can only answer.
+  for await (const event of tracedStream(
+    stream,
+    {
+      apiKey,
+      model: config.model,
+      system: answerSystem(system, sources),
+      messages,
+      maxTokens: config.answerMaxTokens,
+      signal,
+    },
+    telemetry,
+  )) {
+    if (event.type === 'text' && event.text) yield { type: 'token', text: event.text };
+  }
+  yield { type: 'done' };
+}
 function sourcesFromNodes(opened: Map<string, DigestNode>, maxResults: number): Source[] {
   const sources: Source[] = [];
   const urls = new Set<string>();