@hevmind/ask 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/skills/build-digest/SKILL.md +69 -119
- package/src/search/loop.ts +230 -4
- package/src/search/prefilter.ts +13 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hevmind/ask",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "hev ask: a heading-anchored, agentic search overlay for Astro docs sites.",
|
|
6
6
|
"keywords": [
|
|
@@ -28,11 +28,11 @@
|
|
|
28
28
|
"ask": "./bin/ask.mjs"
|
|
29
29
|
},
|
|
30
30
|
"optionalDependencies": {
|
|
31
|
-
"@hevmind/ask-darwin-arm64": "0.
|
|
32
|
-
"@hevmind/ask-
|
|
33
|
-
"@hevmind/ask-
|
|
34
|
-
"@hevmind/ask-
|
|
35
|
-
"@hevmind/ask-
|
|
31
|
+
"@hevmind/ask-darwin-arm64": "0.3.1",
|
|
32
|
+
"@hevmind/ask-darwin-x64": "0.3.1",
|
|
33
|
+
"@hevmind/ask-linux-arm64": "0.3.1",
|
|
34
|
+
"@hevmind/ask-win32-x64": "0.3.1",
|
|
35
|
+
"@hevmind/ask-linux-x64": "0.3.1"
|
|
36
36
|
},
|
|
37
37
|
"exports": {
|
|
38
38
|
".": "./src/index.ts",
|
|
@@ -1,120 +1,84 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: build-digest
|
|
3
3
|
description: >-
|
|
4
|
-
Build the @hevmind/ask ask digest (.hev-ask/ markdown tree) for
|
|
5
|
-
using your
|
|
6
|
-
|
|
7
|
-
graph
|
|
8
|
-
|
|
9
|
-
`ask digest assemble --input-dir`.
|
|
4
|
+
Build the @hevmind/ask ask digest (the committed .hev-ask/ markdown tree) for a
|
|
5
|
+
docs site using your own agent subscription instead of a provider API key. Use
|
|
6
|
+
when asked to build, rebuild, or refresh the hev ask digest, search index, or
|
|
7
|
+
knowledge graph (KG), or after docs content changes. Shards the corpus, distils
|
|
8
|
+
each shard in a fresh context, then assembles and verifies.
|
|
10
9
|
---
|
|
11
10
|
|
|
12
11
|
# Build the hev ask digest
|
|
13
12
|
|
|
14
|
-
`@hevmind/ask`
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
the user's subscription, so it costs **no API tokens on their own key**.
|
|
20
|
-
|
|
21
|
-
The corpus is split into **shards** (~200KB of text each, along slug-prefix
|
|
22
|
-
boundaries) and each shard is distilled in its own fresh context. Corpus size
|
|
23
|
-
is therefore never a context-limit problem — a bigger site just means more
|
|
24
|
-
shards. All state lives on disk in `.hev-ask/shards/`, so the build can be
|
|
25
|
-
stopped, resumed, and incrementally refreshed: after content edits, only the
|
|
26
|
-
shards whose content changed need re-distilling.
|
|
27
|
-
|
|
28
|
-
Run every command from the **site root** (the directory whose `astro.config.*`
|
|
29
|
-
registers `hevAsk()`). Prefer `pnpm exec ask digest …`; fall back to
|
|
30
|
-
`npx -p @hevmind/ask ask digest …` if pnpm isn't used. Pass the same content flags
|
|
31
|
-
the site's integration uses if they differ from the defaults (`--collection`,
|
|
32
|
-
`--base-path`, `--chunk-heading-depth`, `--content-glob`, `--digest-dir`);
|
|
33
|
-
they must match across `corpus` and `assemble`.
|
|
34
|
-
|
|
35
|
-
**Never read a shard input file into the orchestrating context** (they hold
|
|
36
|
-
the full corpus text). The orchestrator works only from command output,
|
|
37
|
-
`manifest.json`, and `status`; shard contents are read by the per-shard
|
|
38
|
-
distillation agents.
|
|
13
|
+
`@hevmind/ask`'s agentic loop, keyword ranking, and suggested questions run off a
|
|
14
|
+
committed digest tree at `.hev-ask/`. Only the **distillation** needs a model —
|
|
15
|
+
the CLI computes the node structure, verbatim facts, overview map, and content
|
|
16
|
+
hashes deterministically. This skill does that distillation here in your
|
|
17
|
+
subscription, so it costs **no provider API tokens**.
|
|
39
18
|
|
|
40
|
-
|
|
19
|
+
`ask` is the `@hevmind/ask` binary: install it on PATH, or it resolves via the
|
|
20
|
+
package bin / `HEV_ASK_BINARY` (see `api/cli.mdx`). Run every command from the
|
|
21
|
+
**site root** (the dir whose config registers `hevAsk()`). If the integration
|
|
22
|
+
overrides any content flags (`--collection`, `--base-path`,
|
|
23
|
+
`--chunk-heading-depth`, `--content-glob`, `--digest-dir`), pass the same ones to
|
|
24
|
+
`corpus` and `assemble` — they must match.
|
|
41
25
|
|
|
42
|
-
|
|
26
|
+
The corpus splits into ~200KB **shards** (`--shard-bytes` tunes it), each
|
|
27
|
+
distilled in its own context, so corpus size is never a context limit. State
|
|
28
|
+
lives in `.hev-ask/shards/`, so the build resumes and refreshes incrementally:
|
|
29
|
+
after edits, only changed shards re-distil.
|
|
43
30
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
```
|
|
31
|
+
**Never read a shard input file yourself** — they hold the full corpus text.
|
|
32
|
+
Work from command output and `status`; the per-shard agents read the shards.
|
|
47
33
|
|
|
48
|
-
|
|
49
|
-
plus `manifest.json`, and reports `(N sections, M shards, P pending,
|
|
50
|
-
up-to-date|needs-rebuild)`. Re-running after content edits is safe and is
|
|
51
|
-
the refresh mechanism: unchanged shards keep their distillations; changed
|
|
52
|
-
ones are marked pending again.
|
|
53
|
-
|
|
54
|
-
2. **Check state.** If the corpus reported `up-to-date` AND `0 pending`, the
|
|
55
|
-
committed digest already matches the content — **stop here** and tell the
|
|
56
|
-
user nothing needs rebuilding. Otherwise:
|
|
34
|
+
## Steps
|
|
57
35
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
36
|
+
1. **Shard.** `ask digest corpus --shards-dir .hev-ask/shards`
|
|
37
|
+
Deterministic and keyless. Reports `(N sections, M shards, P pending,
|
|
38
|
+
up-to-date|needs-rebuild)`. Safe to re-run; this is the refresh mechanism.
|
|
61
39
|
|
|
62
|
-
|
|
63
|
-
content
|
|
40
|
+
2. **Check.** If corpus said `up-to-date` AND `0 pending`, the digest already
|
|
41
|
+
matches the content — **stop and tell the user nothing needs rebuilding.**
|
|
42
|
+
Otherwise `ask digest status --shards-dir .hev-ask/shards` lists the
|
|
43
|
+
`pending`/`stale` shards (both need distilling).
|
|
64
44
|
|
|
65
45
|
3. **Distil each pending/stale shard in a fresh context.** Spawn one agent per
|
|
66
|
-
shard (
|
|
67
|
-
|
|
46
|
+
shard (a few in parallel; don't read shards yourself). Give each this prompt
|
|
47
|
+
with `<id>` filled in:
|
|
68
48
|
|
|
69
|
-
> Read ONLY `.hev-ask/shards/input-<id>.json` (from the site root)
|
|
70
|
-
> `shardId`, `shardHash`, and a `sections` array of `{ id, url, title,
|
|
71
|
-
>
|
|
72
|
-
> shape:
|
|
49
|
+
> Read ONLY `.hev-ask/shards/input-<id>.json` (from the site root): it has
|
|
50
|
+
> `shardId`, `shardHash`, and a `sections` array of `{ id, url, title, text }`.
|
|
51
|
+
> Write `.hev-ask/shards/distill-<id>.json` with exactly this shape:
|
|
73
52
|
>
|
|
74
53
|
> ```json
|
|
75
54
|
> {
|
|
76
|
-
> "shardHash": "<copy
|
|
77
|
-
> "notes": "5-10 lines: what this shard covers, its key concepts, and how users
|
|
78
|
-
> "glossary": [
|
|
79
|
-
>
|
|
80
|
-
> ],
|
|
81
|
-
> "summaries": [
|
|
82
|
-
> { "id": "<exact section id from sections>", "summary": "1-3 sentence distillation." }
|
|
83
|
-
> ]
|
|
55
|
+
> "shardHash": "<copy shardHash verbatim>",
|
|
56
|
+
> "notes": "5-10 lines: what this shard covers, its key concepts, and how users phrase them.",
|
|
57
|
+
> "glossary": [{ "term": "...", "aliases": ["..."], "definition": "One line." }],
|
|
58
|
+
> "summaries": [{ "id": "<exact section id>", "summary": "1-3 sentences." }]
|
|
84
59
|
> }
|
|
85
60
|
> ```
|
|
86
61
|
>
|
|
87
|
-
>
|
|
88
|
-
> -
|
|
89
|
-
>
|
|
90
|
-
>
|
|
91
|
-
>
|
|
92
|
-
>
|
|
93
|
-
>
|
|
94
|
-
> -
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
4. **Synthesize the global context.** Once every shard is distilled, extract
|
|
104
|
-
only the `notes` fields (small) — never the full distill files:
|
|
105
|
-
|
|
106
|
-
```sh
|
|
107
|
-
python3 -c "import json,glob; [print('##', f.split('distill-')[1].removesuffix('.json'), '\n' + json.load(open(f)).get('notes','')) for f in sorted(glob.glob('.hev-ask/shards/distill-*.json'))]"
|
|
108
|
-
```
|
|
109
|
-
|
|
110
|
-
From those notes, write `.hev-ask/shards/global.json`:
|
|
62
|
+
> - One `summaries` entry for every `id` in `sections` — exact ids, no more, no fewer.
|
|
63
|
+
> - Summaries are what the search agent reasons from: faithful, self-contained.
|
|
64
|
+
> Paraphrase prose; **never restate code, flags, commands, or identifiers** —
|
|
65
|
+
> the CLI extracts those verbatim, and they'd only drift if retyped.
|
|
66
|
+
> - `glossary`: ≤10 terms a real user would actually type (aliases like
|
|
67
|
+
> `k8s`→`kubernetes`); one-line definitions. The CLI dedupes and caps them.
|
|
68
|
+
> - `notes` is not user-facing; it feeds the global synthesis pass.
|
|
69
|
+
> - Reply with just the shard id and how many summaries you wrote.
|
|
70
|
+
|
|
71
|
+
Interrupted? Re-run from step 1 — disk is the source of truth and `status`
|
|
72
|
+
shows what's left.
|
|
73
|
+
|
|
74
|
+
4. **Synthesize the global context.** Once every shard is distilled, read the
|
|
75
|
+
`notes` field from each `.hev-ask/shards/distill-*.json` (small — never the
|
|
76
|
+
full files) and write `.hev-ask/shards/global.json`:
|
|
111
77
|
|
|
112
78
|
```json
|
|
113
79
|
{
|
|
114
80
|
"context": "Compact markdown orientation: what the product/site is, its core concepts and feature areas, and how users talk about them.",
|
|
115
|
-
"suggestions": [
|
|
116
|
-
"A natural question a reader might type that these docs answer."
|
|
117
|
-
],
|
|
81
|
+
"suggestions": ["A natural question a reader might type that these docs answer."],
|
|
118
82
|
"glossary": []
|
|
119
83
|
}
|
|
120
84
|
```
|
|
@@ -122,28 +86,17 @@ distillation agents.
|
|
|
122
86
|
`suggestions`: 3-5 questions phrased the way a user would ask them, each
|
|
123
87
|
genuinely answerable from these docs (they show in the overlay on open).
|
|
124
88
|
|
|
125
|
-
5. **Assemble.**
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
Merges every current shard distillation with the global synthesis, derives
|
|
132
|
-
the deterministic parts, and writes the `.hev-ask/` markdown tree. Sections from
|
|
133
|
-
undistilled shards fall back to plain excerpts and are reported — the
|
|
134
|
-
digest stays usable mid-wave, but aim for 0 pending before committing.
|
|
135
|
-
|
|
136
|
-
6. **Verify.**
|
|
137
|
-
|
|
138
|
-
```sh
|
|
139
|
-
pnpm exec ask digest verify
|
|
140
|
-
```
|
|
89
|
+
5. **Assemble.** `ask digest assemble --input-dir .hev-ask/shards`
|
|
90
|
+
Merges every shard distillation with the global synthesis, derives the
|
|
91
|
+
deterministic parts, and writes the `.hev-ask/` tree. Undistilled shards fall
|
|
92
|
+
back to excerpts and are reported — usable mid-wave, but aim for 0 pending
|
|
93
|
+
before committing.
|
|
141
94
|
|
|
142
|
-
|
|
95
|
+
6. **Verify.** `ask digest verify` — anchor drift is fatal; coverage/fidelity
|
|
96
|
+
warnings are informational.
|
|
143
97
|
|
|
144
|
-
7. **
|
|
145
|
-
|
|
146
|
-
input files (regenerated by `corpus` any time):
|
|
98
|
+
7. **Commit.** The shards dir is the local resume/refresh cache — keep it on disk
|
|
99
|
+
but out of git, and drop the bulky input files (`corpus` regenerates them):
|
|
147
100
|
|
|
148
101
|
```sh
|
|
149
102
|
rm -f .hev-ask/shards/input-*.json
|
|
@@ -151,14 +104,11 @@ distillation agents.
|
|
|
151
104
|
git add .gitignore .hev-ask
|
|
152
105
|
```
|
|
153
106
|
|
|
154
|
-
Only the `.hev-ask/` tree is committed; `.hev-ask/shards/`
|
|
107
|
+
Only the `.hev-ask/` tree is committed; `.hev-ask/shards/` stays local.
|
|
155
108
|
|
|
156
109
|
## Notes
|
|
157
110
|
|
|
158
|
-
- A small site may produce a single shard
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
unusually dense.
|
|
163
|
-
- If `corpus` fails because no content is found, you're likely not in the
|
|
164
|
-
site root, or the collection name isn't `docs` — pass `--collection <name>`.
|
|
111
|
+
- A small site may produce a single shard — distil it yourself instead of
|
|
112
|
+
spawning an agent (its input is small enough to read directly).
|
|
113
|
+
- If `corpus` finds no content, you're likely not in the site root, or the
|
|
114
|
+
collection isn't named `docs` — pass `--collection <name>`.
|
package/src/search/loop.ts
CHANGED
|
@@ -112,13 +112,37 @@ async function* tracedStream(
|
|
|
112
112
|
}
|
|
113
113
|
|
|
114
114
|
/**
|
|
115
|
-
*
|
|
116
|
-
*
|
|
117
|
-
*
|
|
115
|
+
* Cap on the characters the digest path inlines into the system prompt (the
|
|
116
|
+
* `<map>` + `<summaries>` blocks). Below it, every section summary is inlined so
|
|
117
|
+
* the agent navigates from a complete map — best for small/medium sites. Above
|
|
118
|
+
* it (large docs, e.g. a CLI/API reference with thousands of sections), inlining
|
|
119
|
+
* everything would blow the context window, so the loop switches to search-routed
|
|
120
|
+
* navigation: a compact page map plus a search tool that surfaces ids on demand.
|
|
121
|
+
* ~200 KB ≈ ~50k tokens; a ~500-section site stays fully inlined as before.
|
|
122
|
+
*/
|
|
123
|
+
export const INLINE_DIGEST_BUDGET = 200_000;
|
|
124
|
+
|
|
125
|
+
/** Cheap estimate of what `buildDigestSystemPrompt` would inline, without building it. */
|
|
126
|
+
export function digestInlineSize(digest: Digest): number {
|
|
127
|
+
let size = digest.overview.length;
|
|
128
|
+
for (const node of digest.nodes) size += node.id.length + node.summary.length + 24;
|
|
129
|
+
return size;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Entry point. When the committed digest carries distilled `nodes`, the agent
|
|
134
|
+
* navigates that shadow digest: small digests are inlined whole (digest path);
|
|
135
|
+
* digests larger than {@link INLINE_DIGEST_BUDGET} are navigated by search so the
|
|
136
|
+
* prompt stays bounded (routed path). A node-less (v1 / degraded) digest falls
|
|
137
|
+
* back to the original keyword-search loop, unchanged.
|
|
118
138
|
*/
|
|
119
139
|
export async function* runAgenticAnswerLoop(args: AnswerLoopArgs): AsyncGenerator<AgenticEvent> {
|
|
120
140
|
if (args.digest.nodes && args.digest.nodes.length > 0) {
|
|
121
|
-
|
|
141
|
+
if (digestInlineSize(args.digest) <= INLINE_DIGEST_BUDGET) {
|
|
142
|
+
yield* digestAnswerLoop(args);
|
|
143
|
+
} else {
|
|
144
|
+
yield* routedDigestAnswerLoop(args);
|
|
145
|
+
}
|
|
122
146
|
} else {
|
|
123
147
|
yield* legacyAnswerLoop(args);
|
|
124
148
|
}
|
|
@@ -300,6 +324,208 @@ function renderNodeMap(nodes: DigestNode[]): string {
|
|
|
300
324
|
return nodes.map((node) => `- ${node.heading ?? node.title} — \`${node.id}\``).join('\n');
|
|
301
325
|
}
|
|
302
326
|
|
|
327
|
+
// ---------------------------------------------------------------------------
|
|
328
|
+
// Routed path: navigate a large digest by search instead of inlining it whole.
|
|
329
|
+
// ---------------------------------------------------------------------------
|
|
330
|
+
|
|
331
|
+
const SEARCH_SECTIONS_TOOL: AnthropicTool = {
|
|
332
|
+
name: 'search_sections',
|
|
333
|
+
description:
|
|
334
|
+
'Search the documentation for sections relevant to a focused sub-query. Returns matching section ids with their group, heading, and a one-line summary. Use it to find the ids you then read with open_section.',
|
|
335
|
+
input_schema: {
|
|
336
|
+
type: 'object',
|
|
337
|
+
properties: {
|
|
338
|
+
query: { type: 'string', description: 'Focused keyword query or synonym expansion to search for.' },
|
|
339
|
+
},
|
|
340
|
+
required: ['query'],
|
|
341
|
+
},
|
|
342
|
+
};
|
|
343
|
+
|
|
344
|
+
/** Compact group → page map: orientation only, so the prompt stays bounded. */
|
|
345
|
+
function routedDigestMap(nodes: DigestNode[]): string {
|
|
346
|
+
const byGroup = new Map<string, Set<string>>();
|
|
347
|
+
for (const node of nodes) {
|
|
348
|
+
const group = node.group ?? 'Docs';
|
|
349
|
+
if (!byGroup.has(group)) byGroup.set(group, new Set());
|
|
350
|
+
byGroup.get(group)!.add(node.title);
|
|
351
|
+
}
|
|
352
|
+
const lines: string[] = [];
|
|
353
|
+
for (const [group, pages] of byGroup) {
|
|
354
|
+
lines.push(`## ${group}`);
|
|
355
|
+
for (const page of pages) lines.push(`- ${page}`);
|
|
356
|
+
}
|
|
357
|
+
return lines.join('\n');
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
function routedDigestSystemPrompt(digest: Digest): AnthropicTextBlock[] {
|
|
361
|
+
return [
|
|
362
|
+
{
|
|
363
|
+
type: 'text',
|
|
364
|
+
text: `You are the documentation assistant for this site. Answer the user's question using ONLY documentation sections you retrieve.
|
|
365
|
+
|
|
366
|
+
The documentation is large, so it is not all shown here. Use search_sections to find relevant sections — each result includes a short summary you can answer from directly. When you need a section's exact facts (flags, commands, identifiers), open_section it. One or two focused searches is plenty: once the results cover the question, STOP searching and answer. Do not keep searching for a perfect match.
|
|
367
|
+
|
|
368
|
+
Write a short, direct answer in Markdown:
|
|
369
|
+
- Start IMMEDIATELY with the substance. Your first sentence must answer the question. Never open with "Based on…", "Here is…", "Sure", a restatement of the question, or any summary/preamble.
|
|
370
|
+
- Keep it tight: one or two short paragraphs, plus a short bullet list only if it genuinely helps. This renders in a small search popover, so do NOT use headings (#, ##) or horizontal rules (---).
|
|
371
|
+
- For exact strings (flags, commands, identifiers, versions), quote a section's \`facts\` verbatim — never reword them.
|
|
372
|
+
- When you reference a section, link to it inline using its exact \`url\` from your search results or open_section, e.g. [autoscaling](/docs/concepts#kubernetes-autoscaling). Never invent a URL or anchor.
|
|
373
|
+
- If the documentation does not cover the question, say so plainly in one sentence and do not fabricate an answer.`,
|
|
374
|
+
},
|
|
375
|
+
{
|
|
376
|
+
type: 'text',
|
|
377
|
+
text: `<domain_context>\n${digest.context || 'No digest context is available.'}\n</domain_context>\n\n<map>\n${routedDigestMap(digest.nodes)}\n</map>`,
|
|
378
|
+
cache_control: { type: 'ephemeral' },
|
|
379
|
+
},
|
|
380
|
+
];
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
/** Search the digest's nodes for a sub-query; returns distilled candidates. */
|
|
384
|
+
function searchSections(
|
|
385
|
+
searchQuery: string,
|
|
386
|
+
chunks: Chunk[],
|
|
387
|
+
nodesById: Map<string, DigestNode>,
|
|
388
|
+
digest: Digest,
|
|
389
|
+
config: SearchLoopConfig,
|
|
390
|
+
) {
|
|
391
|
+
return prefilter(chunks, searchQuery, digest.glossary, config.candidatePerSearch, config.perDocCap, digest.nodes)
|
|
392
|
+
.map((candidate) => nodesById.get(candidate.id))
|
|
393
|
+
.filter((node): node is DigestNode => node !== undefined)
|
|
394
|
+
.map((node) => ({
|
|
395
|
+
id: node.id,
|
|
396
|
+
url: node.url,
|
|
397
|
+
group: node.group,
|
|
398
|
+
heading: node.heading,
|
|
399
|
+
summary: node.summary,
|
|
400
|
+
...(node.mode === 'source-primary' ? { reference: true } : {}),
|
|
401
|
+
}));
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
async function* routedDigestAnswerLoop({
|
|
405
|
+
apiKey,
|
|
406
|
+
query,
|
|
407
|
+
chunks,
|
|
408
|
+
digest,
|
|
409
|
+
config,
|
|
410
|
+
signal,
|
|
411
|
+
call = callClaude,
|
|
412
|
+
stream = streamClaude,
|
|
413
|
+
telemetry = makeTelemetry(),
|
|
414
|
+
}: AnswerLoopArgs): AsyncGenerator<AgenticEvent> {
|
|
415
|
+
const byId = new Map(chunks.map((chunk) => [chunk.id, chunk]));
|
|
416
|
+
const nodesById = new Map(digest.nodes.map((node) => [node.id, node]));
|
|
417
|
+
const opened = new Map<string, DigestNode>();
|
|
418
|
+
const seen = new Map<string, DigestNode>(); // sections surfaced by search, in rank order
|
|
419
|
+
const messages: AnthropicMessage[] = [{ role: 'user', content: `Query: ${query}` }];
|
|
420
|
+
const system = routedDigestSystemPrompt(digest);
|
|
421
|
+
|
|
422
|
+
const open = (id: string): DigestNode | null => {
|
|
423
|
+
const node = nodesById.get(id);
|
|
424
|
+
if (node) opened.set(id, node);
|
|
425
|
+
return node ?? null;
|
|
426
|
+
};
|
|
427
|
+
|
|
428
|
+
// Phase 1: bounded loop of searches and section opens (non-streaming tool turns).
|
|
429
|
+
for (let i = 0; i < config.maxIterations; i += 1) {
|
|
430
|
+
const response = await tracedCall(
|
|
431
|
+
call,
|
|
432
|
+
{
|
|
433
|
+
apiKey,
|
|
434
|
+
model: config.model,
|
|
435
|
+
system,
|
|
436
|
+
messages,
|
|
437
|
+
tools: [SEARCH_SECTIONS_TOOL, OPEN_SECTION_TOOL],
|
|
438
|
+
toolChoice: { type: 'auto' },
|
|
439
|
+
maxTokens: 1024,
|
|
440
|
+
signal,
|
|
441
|
+
},
|
|
442
|
+
telemetry,
|
|
443
|
+
i,
|
|
444
|
+
);
|
|
445
|
+
|
|
446
|
+
messages.push({ role: 'assistant', content: response.content });
|
|
447
|
+
const toolResults: AnthropicToolResultBlock[] = [];
|
|
448
|
+
|
|
449
|
+
for (const block of response.content) {
|
|
450
|
+
if (block.type !== 'tool_use') continue;
|
|
451
|
+
if (block.name === 'search_sections') {
|
|
452
|
+
const searchQuery = normalizeToolQuery(block.input) || query;
|
|
453
|
+
yield { type: 'search', query: searchQuery };
|
|
454
|
+
const results = searchSections(searchQuery, chunks, nodesById, digest, config);
|
|
455
|
+
for (const result of results) {
|
|
456
|
+
const node = nodesById.get(result.id);
|
|
457
|
+
if (node && !seen.has(node.id)) seen.set(node.id, node);
|
|
458
|
+
}
|
|
459
|
+
toolResults.push({
|
|
460
|
+
type: 'tool_result',
|
|
461
|
+
tool_use_id: block.id,
|
|
462
|
+
content: JSON.stringify(results),
|
|
463
|
+
});
|
|
464
|
+
} else if (block.name === 'open_section') {
|
|
465
|
+
const id = normalizeId(block.input);
|
|
466
|
+
const node = open(id);
|
|
467
|
+
toolResults.push({
|
|
468
|
+
type: 'tool_result',
|
|
469
|
+
tool_use_id: block.id,
|
|
470
|
+
content: node
|
|
471
|
+
? JSON.stringify(openSectionResult(node, byId))
|
|
472
|
+
: JSON.stringify({ error: `No section "${id}". Search first, then open an exact id from the results.` }),
|
|
473
|
+
});
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
if (!toolResults.length) break; // model is ready to answer
|
|
478
|
+
messages.push({ role: 'user', content: toolResults });
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
// Fallback: if the model never searched or opened anything, ground on the best
|
|
482
|
+
// keyword matches for the original query so the answer isn't empty.
|
|
483
|
+
if (!opened.size && !seen.size) {
|
|
484
|
+
for (const candidate of prefilter(chunks, query, digest.glossary, config.maxResults, config.perDocCap, digest.nodes)) {
|
|
485
|
+
const node = nodesById.get(candidate.id);
|
|
486
|
+
if (node && !seen.has(node.id)) seen.set(node.id, node);
|
|
487
|
+
}
|
|
488
|
+
if (seen.size && lastRole(messages) !== 'user') {
|
|
489
|
+
const sections = [...seen.values()].map((node) => openSectionResult(node, byId));
|
|
490
|
+
messages.push({ role: 'user', content: `Relevant sections:\n${JSON.stringify(sections)}` });
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
// The answer is grounded in everything the model surfaced: sections it opened
|
|
495
|
+
// (full facts) ranked first, then the summaries from its searches. This lets it
|
|
496
|
+
// answer from search results without a separate open per section.
|
|
497
|
+
const grounded = new Map<string, DigestNode>([...opened, ...seen]);
|
|
498
|
+
|
|
499
|
+
if (lastRole(messages) === 'assistant') {
|
|
500
|
+
messages.push({
|
|
501
|
+
role: 'user',
|
|
502
|
+
content:
|
|
503
|
+
'Write the answer now, grounded in the sections from your search results and any you opened. Begin directly with the answer — no preamble. Do NOT say you will search, check, or look further: you cannot, and you already have the context you will get. If the sections do not cover the question, say so in one sentence. Link only to section urls you have seen.',
|
|
504
|
+
});
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
const sources = sourcesFromNodes(grounded, config.maxResults);
|
|
508
|
+
yield { type: 'sources', sources };
|
|
509
|
+
|
|
510
|
+
// Phase 2: streamed answer turn — no tools, so the model can only answer.
|
|
511
|
+
for await (const event of tracedStream(
|
|
512
|
+
stream,
|
|
513
|
+
{
|
|
514
|
+
apiKey,
|
|
515
|
+
model: config.model,
|
|
516
|
+
system: answerSystem(system, sources),
|
|
517
|
+
messages,
|
|
518
|
+
maxTokens: config.answerMaxTokens,
|
|
519
|
+
signal,
|
|
520
|
+
},
|
|
521
|
+
telemetry,
|
|
522
|
+
)) {
|
|
523
|
+
if (event.type === 'text' && event.text) yield { type: 'token', text: event.text };
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
yield { type: 'done' };
|
|
527
|
+
}
|
|
528
|
+
|
|
303
529
|
function sourcesFromNodes(opened: Map<string, DigestNode>, maxResults: number): Source[] {
|
|
304
530
|
const sources: Source[] = [];
|
|
305
531
|
const urls = new Set<string>();
|
package/src/search/prefilter.ts
CHANGED
|
@@ -48,13 +48,24 @@ export function prefilter(
|
|
|
48
48
|
if (!terms.length) return [];
|
|
49
49
|
|
|
50
50
|
const signal = nodeSignal(nodes);
|
|
51
|
+
// Inverse document frequency: down-weight terms common across the corpus
|
|
52
|
+
// (stopwords, ubiquitous words like "authentication" or "setup") so a rare,
|
|
53
|
+
// on-topic term ("oidc") dominates ranking. Without it, plain overlap buries
|
|
54
|
+
// the specific section under pages that merely share several common words —
|
|
55
|
+
// which degrades badly as the corpus grows (hundreds → thousands of sections).
|
|
56
|
+
const df = new Map<string, number>();
|
|
57
|
+
for (const chunk of chunks) for (const token of chunk.tokens) df.set(token, (df.get(token) ?? 0) + 1);
|
|
58
|
+
const total = chunks.length;
|
|
59
|
+
const weights = new Map(terms.map((term) => [term, Math.log(1 + total / (1 + (df.get(term) ?? 0)))]));
|
|
60
|
+
|
|
51
61
|
const scored = chunks
|
|
52
62
|
.map((chunk) => {
|
|
53
63
|
const boost = signal.get(chunk.id);
|
|
54
64
|
let score = 0;
|
|
55
65
|
for (const term of terms) {
|
|
56
|
-
|
|
57
|
-
if (
|
|
66
|
+
const weight = weights.get(term) ?? 0;
|
|
67
|
+
if (chunk.tokens.has(term)) score += weight;
|
|
68
|
+
if (boost?.has(term)) score += weight;
|
|
58
69
|
}
|
|
59
70
|
return { chunk, score };
|
|
60
71
|
})
|