@hevmind/ask 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,330 @@
1
+ // OpenAI-compatible Chat Completions client over fetch. One translation layer
2
+ // covers OpenAI, OpenRouter, and any Chat Completions-compatible endpoint: the
3
+ // rest of the package keeps speaking the internal (Anthropic-shaped) block
4
+ // types, and this module converts both ways. Like `llm.ts`, it stays free of
5
+ // runtime dependencies and edge-runtime friendly.
6
+
7
+ import type {
8
+ AnthropicResponse,
9
+ AnthropicTextBlock,
10
+ AnthropicUsage,
11
+ CallClaudeOptions,
12
+ StreamEvent,
13
+ } from './llm.ts';
14
+
15
+ export interface OpenAiEndpoint {
16
+ /** API base, e.g. `https://api.openai.com/v1` or `https://openrouter.ai/api/v1`. */
17
+ baseUrl: string;
18
+ /**
19
+ * OpenAI's reasoning models reject `max_tokens` and want
20
+ * `max_completion_tokens`; OpenRouter normalizes `max_tokens` for every
21
+ * underlying provider.
22
+ */
23
+ tokenParam: 'max_tokens' | 'max_completion_tokens';
24
+ /** Human label used in error messages, e.g. `OpenAI` or `OpenRouter`. */
25
+ label: string;
26
+ }
27
+
28
+ interface OpenAiToolCall {
29
+ id: string;
30
+ type: 'function';
31
+ function: { name: string; arguments: string };
32
+ }
33
+
34
+ interface OpenAiMessage {
35
+ role: 'system' | 'user' | 'assistant' | 'tool';
36
+ content: string | null;
37
+ tool_calls?: OpenAiToolCall[];
38
+ tool_call_id?: string;
39
+ }
40
+
41
+ function systemText(system: string | AnthropicTextBlock[]): string {
42
+ // cache_control is Anthropic-specific; OpenAI-compatible APIs cache on their own.
43
+ return typeof system === 'string' ? system : system.map((block) => block.text).join('\n\n');
44
+ }
45
+
46
+ /** Converts the internal (Anthropic-shaped) conversation into Chat Completions messages. */
47
+ export function toOpenAiMessages(opts: Pick<CallClaudeOptions, 'system' | 'messages'>): OpenAiMessage[] {
48
+ const out: OpenAiMessage[] = [{ role: 'system', content: systemText(opts.system) }];
49
+
50
+ for (const message of opts.messages) {
51
+ if (typeof message.content === 'string') {
52
+ out.push({ role: message.role, content: message.content });
53
+ continue;
54
+ }
55
+ if (!Array.isArray(message.content)) continue;
56
+ const blocks = message.content as Array<Record<string, unknown>>;
57
+
58
+ if (message.role === 'assistant') {
59
+ const text = blocks
60
+ .filter((block) => block.type === 'text')
61
+ .map((block) => String(block.text ?? ''))
62
+ .join('');
63
+ const toolCalls: OpenAiToolCall[] = blocks
64
+ .filter((block) => block.type === 'tool_use')
65
+ .map((block) => ({
66
+ id: String(block.id ?? ''),
67
+ type: 'function',
68
+ function: { name: String(block.name ?? ''), arguments: JSON.stringify(block.input ?? {}) },
69
+ }));
70
+ out.push({
71
+ role: 'assistant',
72
+ content: text || null,
73
+ ...(toolCalls.length ? { tool_calls: toolCalls } : {}),
74
+ });
75
+ continue;
76
+ }
77
+
78
+ // User turns: tool_result blocks must become role:"tool" messages directly
79
+ // after the assistant turn that issued the calls; any text follows as a
80
+ // plain user message.
81
+ for (const block of blocks) {
82
+ if (block.type !== 'tool_result') continue;
83
+ out.push({
84
+ role: 'tool',
85
+ tool_call_id: String(block.tool_use_id ?? ''),
86
+ content: typeof block.content === 'string' ? block.content : JSON.stringify(block.content ?? ''),
87
+ });
88
+ }
89
+ const text = blocks
90
+ .filter((block) => block.type === 'text')
91
+ .map((block) => String(block.text ?? ''))
92
+ .join('');
93
+ if (text) out.push({ role: 'user', content: text });
94
+ }
95
+
96
+ return out;
97
+ }
98
+
99
+ /** Builds the full Chat Completions request body from internal call options. */
100
+ export function toOpenAiRequest(
101
+ opts: CallClaudeOptions,
102
+ endpoint: OpenAiEndpoint,
103
+ stream: boolean,
104
+ ): Record<string, unknown> {
105
+ return {
106
+ model: opts.model,
107
+ [endpoint.tokenParam]: opts.maxTokens ?? 2048,
108
+ messages: toOpenAiMessages(opts),
109
+ ...(opts.tools?.length
110
+ ? {
111
+ tools: opts.tools.map((tool) => ({
112
+ type: 'function',
113
+ function: { name: tool.name, description: tool.description, parameters: tool.input_schema },
114
+ })),
115
+ }
116
+ : {}),
117
+ ...(opts.toolChoice
118
+ ? {
119
+ tool_choice:
120
+ opts.toolChoice.type === 'tool'
121
+ ? { type: 'function', function: { name: opts.toolChoice.name } }
122
+ : 'auto',
123
+ }
124
+ : {}),
125
+ ...(stream ? { stream: true, stream_options: { include_usage: true } } : {}),
126
+ };
127
+ }
128
+
129
+ function mapStopReason(finishReason: string | null | undefined): string | null {
130
+ if (finishReason === 'tool_calls') return 'tool_use';
131
+ if (finishReason === 'stop') return 'end_turn';
132
+ if (finishReason === 'length') return 'max_tokens';
133
+ return finishReason ?? null;
134
+ }
135
+
136
+ function parseToolInput(args: string): unknown {
137
+ try {
138
+ return JSON.parse(args || '{}');
139
+ } catch {
140
+ return {};
141
+ }
142
+ }
143
+
144
+ function mapUsage(usage: unknown): AnthropicUsage | undefined {
145
+ const u = usage as { prompt_tokens?: number; completion_tokens?: number } | null | undefined;
146
+ if (typeof u?.prompt_tokens !== 'number' && typeof u?.completion_tokens !== 'number') return undefined;
147
+ return { input_tokens: u?.prompt_tokens ?? 0, output_tokens: u?.completion_tokens ?? 0 };
148
+ }
149
+
150
+ function requestInit(opts: CallClaudeOptions, endpoint: OpenAiEndpoint, stream: boolean): RequestInit {
151
+ return {
152
+ method: 'POST',
153
+ headers: {
154
+ 'content-type': 'application/json',
155
+ authorization: `Bearer ${opts.apiKey}`,
156
+ },
157
+ body: JSON.stringify(toOpenAiRequest(opts, endpoint, stream)),
158
+ signal: opts.signal,
159
+ };
160
+ }
161
+
162
+ function completionsUrl(endpoint: OpenAiEndpoint): string {
163
+ return `${endpoint.baseUrl.replace(/\/+$/, '')}/chat/completions`;
164
+ }
165
+
166
+ export async function callOpenAi(opts: CallClaudeOptions, endpoint: OpenAiEndpoint): Promise<AnthropicResponse> {
167
+ const res = await fetch(completionsUrl(endpoint), requestInit(opts, endpoint, false));
168
+
169
+ if (!res.ok) {
170
+ const detail = await res.text().catch(() => '');
171
+ throw new Error(`${endpoint.label} API ${res.status}: ${detail.slice(0, 500)}`);
172
+ }
173
+
174
+ const payload = (await res.json()) as {
175
+ choices?: Array<{ message?: { content?: string | null; tool_calls?: OpenAiToolCall[] }; finish_reason?: string | null }>;
176
+ usage?: unknown;
177
+ };
178
+ const choice = payload.choices?.[0];
179
+ const content: AnthropicResponse['content'] = [];
180
+ if (choice?.message?.content) content.push({ type: 'text', text: choice.message.content });
181
+ for (const call of choice?.message?.tool_calls ?? []) {
182
+ content.push({ type: 'tool_use', id: call.id, name: call.function.name, input: parseToolInput(call.function.arguments) });
183
+ }
184
+
185
+ return {
186
+ content,
187
+ stop_reason: mapStopReason(choice?.finish_reason),
188
+ ...(mapUsage(payload.usage) ? { usage: mapUsage(payload.usage) } : {}),
189
+ };
190
+ }
191
+
192
+ /**
193
+ * Streams a Chat Completions response, yielding text deltas as they arrive and
194
+ * fully-reconstructed tool_use blocks (plus one `stop` event) at the end.
195
+ */
196
+ export async function* streamOpenAi(opts: CallClaudeOptions, endpoint: OpenAiEndpoint): AsyncGenerator<StreamEvent> {
197
+ const res = await fetch(completionsUrl(endpoint), requestInit(opts, endpoint, true));
198
+
199
+ if (!res.ok || !res.body) {
200
+ const detail = res.ok ? 'no response body' : await res.text().catch(() => '');
201
+ throw new Error(`${endpoint.label} API ${res.status}: ${detail.slice(0, 500)}`);
202
+ }
203
+
204
+ const reader = res.body.getReader();
205
+ const decoder = new TextDecoder('utf-8');
206
+ let state = newOpenAiSseState();
207
+
208
+ while (true) {
209
+ const { done, value } = await reader.read();
210
+ if (done) break;
211
+ const out = parseOpenAiSseChunk(decoder.decode(value, { stream: true }), state);
212
+ state = out.state;
213
+ for (const event of out.events) yield event;
214
+ }
215
+ // Streams normally end with `data: [DONE]`; flush here in case one doesn't.
216
+ for (const event of flushOpenAiSse(state)) yield event;
217
+ }
218
+
219
+ interface SseToolCall {
220
+ id: string;
221
+ name: string;
222
+ args: string;
223
+ }
224
+
225
+ export interface OpenAiSseState {
226
+ /** Bytes not yet terminated by a blank line. */
227
+ buffer: string;
228
+ /** Tool calls accumulated by their stream index. */
229
+ toolCalls: Record<number, SseToolCall>;
230
+ usage: AnthropicUsage;
231
+ finishReason: string | null;
232
+ /** Tool-use and stop events were already emitted (on `[DONE]`). */
233
+ flushed: boolean;
234
+ }
235
+
236
+ export function newOpenAiSseState(): OpenAiSseState {
237
+ return {
238
+ buffer: '',
239
+ toolCalls: {},
240
+ usage: { input_tokens: 0, output_tokens: 0 },
241
+ finishReason: null,
242
+ flushed: false,
243
+ };
244
+ }
245
+
246
+ /**
247
+ * Pure, network-free Chat Completions SSE parser. Text deltas surface
248
+ * immediately; tool calls and usage accumulate until `[DONE]` flushes them.
249
+ */
250
+ export function parseOpenAiSseChunk(
251
+ chunk: string,
252
+ prev: OpenAiSseState,
253
+ ): { events: StreamEvent[]; state: OpenAiSseState } {
254
+ const events: StreamEvent[] = [];
255
+ const state = { ...prev, toolCalls: prev.toolCalls, usage: prev.usage };
256
+ state.buffer = prev.buffer + chunk;
257
+
258
+ let sep: number;
259
+ while ((sep = state.buffer.indexOf('\n\n')) !== -1) {
260
+ const frame = state.buffer.slice(0, sep);
261
+ state.buffer = state.buffer.slice(sep + 2);
262
+
263
+ // Non-`data:` lines (OpenRouter emits `: PROCESSING` comments) are dropped.
264
+ const data = frame
265
+ .split('\n')
266
+ .filter((line) => line.startsWith('data:'))
267
+ .map((line) => line.slice(5).trim())
268
+ .join('');
269
+ if (!data) continue;
270
+ if (data === '[DONE]') {
271
+ events.push(...flushOpenAiSse(state));
272
+ continue;
273
+ }
274
+
275
+ let payload: Record<string, unknown>;
276
+ try {
277
+ payload = JSON.parse(data) as Record<string, unknown>;
278
+ } catch {
279
+ continue;
280
+ }
281
+
282
+ const mappedUsage = mapUsage(payload.usage);
283
+ if (mappedUsage) state.usage = mappedUsage;
284
+
285
+ const choice = (payload.choices as Array<Record<string, unknown>> | undefined)?.[0];
286
+ if (!choice) continue;
287
+ if (typeof choice.finish_reason === 'string') state.finishReason = choice.finish_reason;
288
+
289
+ const delta = choice.delta as
290
+ | { content?: string | null; tool_calls?: Array<{ index?: number; id?: string; function?: { name?: string; arguments?: string } }> }
291
+ | undefined;
292
+ if (typeof delta?.content === 'string' && delta.content) {
293
+ events.push({ type: 'text', text: delta.content });
294
+ }
295
+ for (const call of delta?.tool_calls ?? []) {
296
+ const index = call.index ?? 0;
297
+ const existing = state.toolCalls[index] ?? { id: '', name: '', args: '' };
298
+ state.toolCalls[index] = {
299
+ id: call.id ?? existing.id,
300
+ name: existing.name + (call.function?.name ?? ''),
301
+ args: existing.args + (call.function?.arguments ?? ''),
302
+ };
303
+ }
304
+ }
305
+
306
+ return { events, state };
307
+ }
308
+
309
+ /** Emits accumulated tool_use blocks and the final stop event, exactly once. */
310
+ export function flushOpenAiSse(state: OpenAiSseState): StreamEvent[] {
311
+ if (state.flushed) return [];
312
+ state.flushed = true;
313
+
314
+ const events: StreamEvent[] = [];
315
+ const indexes = Object.keys(state.toolCalls)
316
+ .map(Number)
317
+ .sort((a, b) => a - b);
318
+ for (const index of indexes) {
319
+ const call = state.toolCalls[index];
320
+ events.push({ type: 'tool_use', id: call.id, name: call.name, input: parseToolInput(call.args) });
321
+ }
322
+
323
+ const hasUsage = state.usage.input_tokens > 0 || state.usage.output_tokens > 0;
324
+ events.push({
325
+ type: 'stop',
326
+ stopReason: mapStopReason(state.finishReason),
327
+ ...(hasUsage ? { usage: { ...state.usage } } : {}),
328
+ });
329
+ return events;
330
+ }
@@ -66,6 +66,8 @@ export interface TelemetryOptions {
66
66
  distinctId?: string;
67
67
  /** Optional label attached to every event as `agent_scope`. */
68
68
  scope?: string;
69
+ /** Inference provider reported as `$ai_provider`; defaults to `anthropic`. */
70
+ provider?: string;
69
71
  /** Reuse an existing trace id; one is generated otherwise. */
70
72
  traceId?: string;
71
73
  /** Cloudflare-style keep-alive so in-flight captures survive response end. */
@@ -113,7 +115,7 @@ export function makeTelemetry(options: TelemetryOptions = {}): Telemetry {
113
115
  distinct_id: distinctId,
114
116
  properties: {
115
117
  $ai_trace_id: traceId,
116
- $ai_provider: 'anthropic',
118
+ $ai_provider: options.provider ?? 'anthropic',
117
119
  $process_person_profile: false, // anonymous — no person profile
118
120
  ...(scope ? { agent_scope: scope } : {}),
119
121
  ...properties,
@@ -0,0 +1,81 @@
1
+ // Inference provider registry. Anthropic keeps its native Messages client;
2
+ // OpenAI and OpenRouter share the Chat Completions client in `llm-openai.ts`,
3
+ // differing only in base URL, key env var, token param, and default models.
4
+
5
+ import { callClaude, streamClaude } from './llm.ts';
6
+ import { callOpenAi, streamOpenAi, type OpenAiEndpoint } from './llm-openai.ts';
7
+ import type { ProviderName } from './types.ts';
8
+
9
+ export type { ProviderName };
10
+
11
+ export interface ProviderInfo {
12
+ name: ProviderName;
13
+ /** Human label for log and error messages. */
14
+ label: string;
15
+ /** Environment variable the API key is read from. */
16
+ envKey: string;
17
+ /** Default API base URL (OpenAI-compatible providers only). */
18
+ baseUrl?: string;
19
+ /** Default model for the agentic search loop. */
20
+ defaultModel: string;
21
+ /** Default model for the offline digest builder. */
22
+ defaultDigestModel: string;
23
+ }
24
+
25
+ export const PROVIDERS: Record<ProviderName, ProviderInfo> = {
26
+ anthropic: {
27
+ name: 'anthropic',
28
+ label: 'Anthropic',
29
+ envKey: 'ANTHROPIC_API_KEY',
30
+ defaultModel: 'claude-haiku-4-5',
31
+ defaultDigestModel: 'claude-opus-4-8',
32
+ },
33
+ openai: {
34
+ name: 'openai',
35
+ label: 'OpenAI',
36
+ envKey: 'OPENAI_API_KEY',
37
+ baseUrl: 'https://api.openai.com/v1',
38
+ defaultModel: 'gpt-4.1-mini',
39
+ defaultDigestModel: 'gpt-5.1',
40
+ },
41
+ openrouter: {
42
+ name: 'openrouter',
43
+ label: 'OpenRouter',
44
+ envKey: 'OPENROUTER_API_KEY',
45
+ baseUrl: 'https://openrouter.ai/api/v1',
46
+ defaultModel: 'anthropic/claude-haiku-4.5',
47
+ defaultDigestModel: 'anthropic/claude-opus-4.8',
48
+ },
49
+ };
50
+
51
+ /** Validates a configured provider name, defaulting to `anthropic`. */
52
+ export function resolveProviderName(value?: string): ProviderName {
53
+ if (!value) return 'anthropic';
54
+ if (value in PROVIDERS) return value as ProviderName;
55
+ throw new Error(`Unknown provider "${value}". Expected one of: ${Object.keys(PROVIDERS).join(', ')}.`);
56
+ }
57
+
58
+ export interface LlmClient {
59
+ call: typeof callClaude;
60
+ stream: typeof streamClaude;
61
+ }
62
+
63
+ /**
64
+ * Returns the call/stream pair for a provider. `baseUrl` overrides the
65
+ * provider's API base, so any Chat Completions-compatible endpoint works.
66
+ */
67
+ export function clientFor(provider: ProviderName, baseUrl?: string): LlmClient {
68
+ if (provider === 'anthropic') return { call: callClaude, stream: streamClaude };
69
+
70
+ const info = PROVIDERS[provider];
71
+ const endpoint: OpenAiEndpoint = {
72
+ baseUrl: baseUrl ?? info.baseUrl!,
73
+ // OpenAI's reasoning models reject `max_tokens`; OpenRouter normalizes it.
74
+ tokenParam: provider === 'openai' ? 'max_completion_tokens' : 'max_tokens',
75
+ label: info.label,
76
+ };
77
+ return {
78
+ call: (opts) => callOpenAi(opts, endpoint),
79
+ stream: (opts) => streamOpenAi(opts, endpoint),
80
+ };
81
+ }
@@ -112,13 +112,37 @@ async function* tracedStream(
112
112
  }
113
113
 
114
114
  /**
115
- * Entry point. When the committed digest carries distilled `nodes`, the
116
- * agent navigates that shadow digest (digest path). A node-less (v1 / degraded)
117
- * digest falls back to the original keyword-search loop, unchanged.
115
+ * Cap on the characters the digest path inlines into the system prompt (the
116
+ * `<map>` + `<summaries>` blocks). Below it, every section summary is inlined so
117
+ * the agent navigates from a complete map best for small/medium sites. Above
118
+ * it (large docs, e.g. a CLI/API reference with thousands of sections), inlining
119
+ * everything would blow the context window, so the loop switches to search-routed
120
+ * navigation: a compact page map plus a search tool that surfaces ids on demand.
121
+ * ~200 KB ≈ ~50k tokens; a ~500-section site stays fully inlined as before.
122
+ */
123
+ export const INLINE_DIGEST_BUDGET = 200_000;
124
+
125
+ /** Cheap estimate of what `buildDigestSystemPrompt` would inline, without building it. */
126
+ export function digestInlineSize(digest: Digest): number {
127
+ let size = digest.overview.length;
128
+ for (const node of digest.nodes) size += node.id.length + node.summary.length + 24;
129
+ return size;
130
+ }
131
+
132
+ /**
133
+ * Entry point. When the committed digest carries distilled `nodes`, the agent
134
+ * navigates that shadow digest: small digests are inlined whole (digest path);
135
+ * digests larger than {@link INLINE_DIGEST_BUDGET} are navigated by search so the
136
+ * prompt stays bounded (routed path). A node-less (v1 / degraded) digest falls
137
+ * back to the original keyword-search loop, unchanged.
118
138
  */
119
139
  export async function* runAgenticAnswerLoop(args: AnswerLoopArgs): AsyncGenerator<AgenticEvent> {
120
140
  if (args.digest.nodes && args.digest.nodes.length > 0) {
121
- yield* digestAnswerLoop(args);
141
+ if (digestInlineSize(args.digest) <= INLINE_DIGEST_BUDGET) {
142
+ yield* digestAnswerLoop(args);
143
+ } else {
144
+ yield* routedDigestAnswerLoop(args);
145
+ }
122
146
  } else {
123
147
  yield* legacyAnswerLoop(args);
124
148
  }
@@ -300,6 +324,197 @@ function renderNodeMap(nodes: DigestNode[]): string {
300
324
  return nodes.map((node) => `- ${node.heading ?? node.title} — \`${node.id}\``).join('\n');
301
325
  }
302
326
 
327
+ // ---------------------------------------------------------------------------
328
+ // Routed path: navigate a large digest by search instead of inlining it whole.
329
+ // ---------------------------------------------------------------------------
330
+
331
+ const SEARCH_SECTIONS_TOOL: AnthropicTool = {
332
+ name: 'search_sections',
333
+ description:
334
+ 'Search the documentation for sections relevant to a focused sub-query. Returns matching section ids with their group, heading, and a one-line summary. Use it to find the ids you then read with open_section.',
335
+ input_schema: {
336
+ type: 'object',
337
+ properties: {
338
+ query: { type: 'string', description: 'Focused keyword query or synonym expansion to search for.' },
339
+ },
340
+ required: ['query'],
341
+ },
342
+ };
343
+
344
+ /** Compact group → page map: orientation only, so the prompt stays bounded. */
345
+ function routedDigestMap(nodes: DigestNode[]): string {
346
+ const byGroup = new Map<string, Set<string>>();
347
+ for (const node of nodes) {
348
+ const group = node.group ?? 'Docs';
349
+ if (!byGroup.has(group)) byGroup.set(group, new Set());
350
+ byGroup.get(group)!.add(node.title);
351
+ }
352
+ const lines: string[] = [];
353
+ for (const [group, pages] of byGroup) {
354
+ lines.push(`## ${group}`);
355
+ for (const page of pages) lines.push(`- ${page}`);
356
+ }
357
+ return lines.join('\n');
358
+ }
359
+
360
+ function routedDigestSystemPrompt(digest: Digest): AnthropicTextBlock[] {
361
+ return [
362
+ {
363
+ type: 'text',
364
+ text: `You are the documentation assistant for this site. Answer the user's question using ONLY documentation sections you retrieve.
365
+
366
+ The documentation is large, so it is not all shown here. Use search_sections to find sections relevant to the question, then read the ones you need with open_section for their summary and exact facts. Run a few searches with varied terms if the first does not surface what you need. Open every section your answer draws on — you may only link to sections you opened.
367
+
368
+ Write a short, direct answer in Markdown:
369
+ - Start IMMEDIATELY with the substance. Your first sentence must answer the question. Never open with "Based on…", "Here is…", "Sure", a restatement of the question, or any summary/preamble.
370
+ - Keep it tight: one or two short paragraphs, plus a short bullet list only if it genuinely helps. This renders in a small search popover, so do NOT use headings (#, ##) or horizontal rules (---).
371
+ - For exact strings (flags, commands, identifiers, versions), quote the section's \`facts\` verbatim — never reword them.
372
+ - When you reference a section, link to it inline using its exact \`url\`, e.g. [autoscaling](/docs/concepts#kubernetes-autoscaling). Never invent a URL or anchor.
373
+ - If the documentation does not cover the question, say so plainly in one sentence and do not fabricate an answer.`,
374
+ },
375
+ {
376
+ type: 'text',
377
+ text: `<domain_context>\n${digest.context || 'No digest context is available.'}\n</domain_context>\n\n<map>\n${routedDigestMap(digest.nodes)}\n</map>`,
378
+ cache_control: { type: 'ephemeral' },
379
+ },
380
+ ];
381
+ }
382
+
383
+ /** Search the digest's nodes for a sub-query; returns distilled candidates. */
384
+ function searchSections(
385
+ searchQuery: string,
386
+ chunks: Chunk[],
387
+ nodesById: Map<string, DigestNode>,
388
+ digest: Digest,
389
+ config: SearchLoopConfig,
390
+ ) {
391
+ return prefilter(chunks, searchQuery, digest.glossary, config.candidatePerSearch, config.perDocCap, digest.nodes)
392
+ .map((candidate) => nodesById.get(candidate.id))
393
+ .filter((node): node is DigestNode => node !== undefined)
394
+ .map((node) => ({
395
+ id: node.id,
396
+ url: node.url,
397
+ group: node.group,
398
+ heading: node.heading,
399
+ summary: node.summary,
400
+ ...(node.mode === 'source-primary' ? { reference: true } : {}),
401
+ }));
402
+ }
403
+
404
+ async function* routedDigestAnswerLoop({
405
+ apiKey,
406
+ query,
407
+ chunks,
408
+ digest,
409
+ config,
410
+ signal,
411
+ call = callClaude,
412
+ stream = streamClaude,
413
+ telemetry = makeTelemetry(),
414
+ }: AnswerLoopArgs): AsyncGenerator<AgenticEvent> {
415
+ const byId = new Map(chunks.map((chunk) => [chunk.id, chunk]));
416
+ const nodesById = new Map(digest.nodes.map((node) => [node.id, node]));
417
+ const opened = new Map<string, DigestNode>();
418
+ const messages: AnthropicMessage[] = [{ role: 'user', content: `Query: ${query}` }];
419
+ const system = routedDigestSystemPrompt(digest);
420
+
421
+ const open = (id: string): DigestNode | null => {
422
+ const node = nodesById.get(id);
423
+ if (node) opened.set(id, node);
424
+ return node ?? null;
425
+ };
426
+
427
+ // Phase 1: bounded loop of searches and section opens (non-streaming tool turns).
428
+ for (let i = 0; i < config.maxIterations; i += 1) {
429
+ const response = await tracedCall(
430
+ call,
431
+ {
432
+ apiKey,
433
+ model: config.model,
434
+ system,
435
+ messages,
436
+ tools: [SEARCH_SECTIONS_TOOL, OPEN_SECTION_TOOL],
437
+ toolChoice: { type: 'auto' },
438
+ maxTokens: 1024,
439
+ signal,
440
+ },
441
+ telemetry,
442
+ i,
443
+ );
444
+
445
+ messages.push({ role: 'assistant', content: response.content });
446
+ const toolResults: AnthropicToolResultBlock[] = [];
447
+
448
+ for (const block of response.content) {
449
+ if (block.type !== 'tool_use') continue;
450
+ if (block.name === 'search_sections') {
451
+ const searchQuery = normalizeToolQuery(block.input) || query;
452
+ yield { type: 'search', query: searchQuery };
453
+ toolResults.push({
454
+ type: 'tool_result',
455
+ tool_use_id: block.id,
456
+ content: JSON.stringify(searchSections(searchQuery, chunks, nodesById, digest, config)),
457
+ });
458
+ } else if (block.name === 'open_section') {
459
+ const id = normalizeId(block.input);
460
+ const node = open(id);
461
+ toolResults.push({
462
+ type: 'tool_result',
463
+ tool_use_id: block.id,
464
+ content: node
465
+ ? JSON.stringify(openSectionResult(node, byId))
466
+ : JSON.stringify({ error: `No section "${id}". Search first, then open an exact id from the results.` }),
467
+ });
468
+ }
469
+ }
470
+
471
+ if (!toolResults.length) break; // model is ready to answer
472
+ messages.push({ role: 'user', content: toolResults });
473
+ }
474
+
475
+ // Fallback: ground the answer even if the model opened nothing, by opening the
476
+ // best keyword matches for the original query.
477
+ if (!opened.size) {
478
+ for (const candidate of prefilter(chunks, query, digest.glossary, config.maxResults, config.perDocCap, digest.nodes)) {
479
+ const node = open(candidate.id);
480
+ if (node) yield { type: 'search', query: node.heading ?? node.title };
481
+ }
482
+ if (opened.size && lastRole(messages) !== 'user') {
483
+ const sections = [...opened.values()].map((node) => openSectionResult(node, byId));
484
+ messages.push({ role: 'user', content: `Opened sections:\n${JSON.stringify(sections)}` });
485
+ }
486
+ }
487
+
488
+ if (lastRole(messages) === 'assistant') {
489
+ messages.push({
490
+ role: 'user',
491
+ content:
492
+ 'Write the answer now. Begin directly with the answer itself — no preamble, no "based on…" opener, no headings. Link only to sections you opened, using their exact url.',
493
+ });
494
+ }
495
+
496
+ const sources = sourcesFromNodes(opened, config.maxResults);
497
+ yield { type: 'sources', sources };
498
+
499
+ // Phase 2: streamed answer turn — no tools, so the model can only answer.
500
+ for await (const event of tracedStream(
501
+ stream,
502
+ {
503
+ apiKey,
504
+ model: config.model,
505
+ system: answerSystem(system, sources),
506
+ messages,
507
+ maxTokens: config.answerMaxTokens,
508
+ signal,
509
+ },
510
+ telemetry,
511
+ )) {
512
+ if (event.type === 'text' && event.text) yield { type: 'token', text: event.text };
513
+ }
514
+
515
+ yield { type: 'done' };
516
+ }
517
+
303
518
  function sourcesFromNodes(opened: Map<string, DigestNode>, maxResults: number): Source[] {
304
519
  const sources: Source[] = [];
305
520
  const urls = new Set<string>();