@hevmind/ask 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/src/search/loop.ts +38 -21
- package/src/search/prefilter.ts +41 -12
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hevmind/ask",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.3",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "hev ask: a heading-anchored, agentic search overlay for Astro docs sites.",
|
|
6
6
|
"keywords": [
|
|
@@ -28,11 +28,11 @@
|
|
|
28
28
|
"ask": "./bin/ask.mjs"
|
|
29
29
|
},
|
|
30
30
|
"optionalDependencies": {
|
|
31
|
-
"@hevmind/ask-darwin-
|
|
32
|
-
"@hevmind/ask-darwin-
|
|
33
|
-
"@hevmind/ask-linux-
|
|
34
|
-
"@hevmind/ask-win32-x64": "0.3.
|
|
35
|
-
"@hevmind/ask-linux-
|
|
31
|
+
"@hevmind/ask-darwin-x64": "0.3.3",
|
|
32
|
+
"@hevmind/ask-darwin-arm64": "0.3.3",
|
|
33
|
+
"@hevmind/ask-linux-x64": "0.3.3",
|
|
34
|
+
"@hevmind/ask-win32-x64": "0.3.3",
|
|
35
|
+
"@hevmind/ask-linux-arm64": "0.3.3"
|
|
36
36
|
},
|
|
37
37
|
"exports": {
|
|
38
38
|
".": "./src/index.ts",
|
package/src/search/loop.ts
CHANGED
|
@@ -485,36 +485,32 @@ async function* routedDigestAnswerLoop({
|
|
|
485
485
|
const node = nodesById.get(candidate.id);
|
|
486
486
|
if (node && !seen.has(node.id)) seen.set(node.id, node);
|
|
487
487
|
}
|
|
488
|
-
if (seen.size && lastRole(messages) !== 'user') {
|
|
489
|
-
const sections = [...seen.values()].map((node) => openSectionResult(node, byId));
|
|
490
|
-
messages.push({ role: 'user', content: `Relevant sections:\n${JSON.stringify(sections)}` });
|
|
491
|
-
}
|
|
492
488
|
}
|
|
493
489
|
|
|
494
|
-
//
|
|
495
|
-
//
|
|
496
|
-
|
|
497
|
-
const
|
|
498
|
-
|
|
499
|
-
if (lastRole(messages) === 'assistant') {
|
|
500
|
-
messages.push({
|
|
501
|
-
role: 'user',
|
|
502
|
-
content:
|
|
503
|
-
'Write the answer now, grounded in the sections from your search results and any you opened. Begin directly with the answer — no preamble. Do NOT say you will search, check, or look further: you cannot, and you already have the context you will get. If the sections do not cover the question, say so in one sentence. Link only to section urls you have seen.',
|
|
504
|
-
});
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
const sources = sourcesFromNodes(grounded, config.maxResults);
|
|
490
|
+
// Ground the answer in everything surfaced: opened sections (full facts) first,
|
|
491
|
+
// then searched summaries, capped to maxResults.
|
|
492
|
+
const grounded = [...new Map<string, DigestNode>([...opened, ...seen]).values()].slice(0, config.maxResults);
|
|
493
|
+
const sources = sourcesFromNodes(new Map(grounded.map((node) => [node.id, node])), config.maxResults);
|
|
508
494
|
yield { type: 'sources', sources };
|
|
509
495
|
|
|
510
|
-
// Phase 2:
|
|
496
|
+
// Phase 2: a clean answer turn. Replaying the tool transcript keeps the model in
|
|
497
|
+
// "let me search more" mode (and it tries to call tools that no longer exist), so
|
|
498
|
+
// instead hand it just the question and the gathered sections — now it can only
|
|
499
|
+
// write the final prose answer.
|
|
500
|
+
const answerContext = grounded.map((node) => openSectionResult(node, byId));
|
|
501
|
+
const answerMessages: AnthropicMessage[] = [
|
|
502
|
+
{
|
|
503
|
+
role: 'user',
|
|
504
|
+
content: `Question: ${query}\n\nAnswer using only these documentation sections:\n${JSON.stringify(answerContext)}`,
|
|
505
|
+
},
|
|
506
|
+
];
|
|
511
507
|
for await (const event of tracedStream(
|
|
512
508
|
stream,
|
|
513
509
|
{
|
|
514
510
|
apiKey,
|
|
515
511
|
model: config.model,
|
|
516
|
-
system:
|
|
517
|
-
messages,
|
|
512
|
+
system: routedAnswerSystemPrompt(digest),
|
|
513
|
+
messages: answerMessages,
|
|
518
514
|
maxTokens: config.answerMaxTokens,
|
|
519
515
|
signal,
|
|
520
516
|
},
|
|
@@ -526,6 +522,27 @@ async function* routedDigestAnswerLoop({
|
|
|
526
522
|
yield { type: 'done' };
|
|
527
523
|
}
|
|
528
524
|
|
|
525
|
+
/** Answer-only system prompt for the routed loop's final turn (no tools). */
|
|
526
|
+
function routedAnswerSystemPrompt(digest: Digest): AnthropicTextBlock[] {
|
|
527
|
+
return [
|
|
528
|
+
{
|
|
529
|
+
type: 'text',
|
|
530
|
+
text: `You are the documentation assistant for this site. Write the answer to the user's question using ONLY the documentation sections provided in the next message. You have no tools — produce the final prose answer now.
|
|
531
|
+
|
|
532
|
+
- Start IMMEDIATELY with the substance. Your first sentence must answer the question. Never open with "Based on…", "Here is…", "Sure", "Let me…", or any preamble or statement about searching, opening, or checking further.
|
|
533
|
+
- Keep it tight: one or two short paragraphs, plus a short bullet list only if it genuinely helps. This renders in a small search popover, so do NOT use headings (#, ##) or horizontal rules (---).
|
|
534
|
+
- For exact strings (flags, commands, identifiers, versions), quote a section's \`facts\` verbatim — never reword them.
|
|
535
|
+
- Link to sections inline using their exact \`url\` from the provided sections, e.g. [autoscaling](/docs/concepts#kubernetes-autoscaling). Never invent a URL or anchor.
|
|
536
|
+
- If the provided sections do not answer the question, say so plainly in one sentence and do not fabricate an answer.`,
|
|
537
|
+
},
|
|
538
|
+
{
|
|
539
|
+
type: 'text',
|
|
540
|
+
text: `<domain_context>\n${digest.context || ''}\n</domain_context>`,
|
|
541
|
+
cache_control: { type: 'ephemeral' },
|
|
542
|
+
},
|
|
543
|
+
];
|
|
544
|
+
}
|
|
545
|
+
|
|
529
546
|
function sourcesFromNodes(opened: Map<string, DigestNode>, maxResults: number): Source[] {
|
|
530
547
|
const sources: Source[] = [];
|
|
531
548
|
const urls = new Set<string>();
|
package/src/search/prefilter.ts
CHANGED
|
@@ -11,6 +11,16 @@ export interface Candidate {
|
|
|
11
11
|
snippet: string;
|
|
12
12
|
}
|
|
13
13
|
|
|
14
|
+
/** Grammatical/generic words that carry no topical signal in a docs query. */
|
|
15
|
+
const STOPWORDS = new Set<string>([
|
|
16
|
+
'the', 'a', 'an', 'and', 'or', 'but', 'of', 'to', 'in', 'on', 'at', 'by', 'for', 'with', 'from', 'into', 'as',
|
|
17
|
+
'is', 'are', 'be', 'was', 'were', 'been', 'being', 'do', 'does', 'did', 'how', 'what', 'when', 'where', 'which',
|
|
18
|
+
'who', 'whom', 'why', 'can', 'could', 'should', 'would', 'shall', 'may', 'might', 'my', 'your', 'yours', 'you',
|
|
19
|
+
'i', 'it', 'its', 'this', 'that', 'these', 'those', 'there', 'here', 'we', 'our', 'us', 'me', 'if', 'then',
|
|
20
|
+
'else', 'not', 'no', 'so', 'than', 'too', 'very', 'just', 'also', 'only', 'out', 'about', 'up', 'set', 'get',
|
|
21
|
+
'use', 'using', 'used', 'via', 'want', 'need', 'make', 'have', 'has', 'will',
|
|
22
|
+
]);
|
|
23
|
+
|
|
14
24
|
/**
|
|
15
25
|
* Distinctive tokens the digest carries for a section: its `terms`, the
|
|
16
26
|
* tokens of its distilled `summary`, and the tokens of its verbatim `facts`. A
|
|
@@ -44,33 +54,52 @@ export function prefilter(
|
|
|
44
54
|
perDocCap: number,
|
|
45
55
|
nodes?: DigestNode[],
|
|
46
56
|
): Candidate[] {
|
|
47
|
-
const
|
|
48
|
-
if (!
|
|
57
|
+
const expanded = expandQueryTerms(query, glossary);
|
|
58
|
+
if (!expanded.length) return [];
|
|
59
|
+
// Drop grammatical/generic stopwords so a few ubiquitous words ("how", "set
|
|
60
|
+
// up", "with") don't dominate ranking. Fall back to the raw terms if the query
|
|
61
|
+
// is nothing but stopwords.
|
|
62
|
+
const filtered = expanded.filter((term) => term.length > 1 && !STOPWORDS.has(term));
|
|
63
|
+
const terms = filtered.length ? filtered : expanded;
|
|
49
64
|
|
|
50
65
|
const signal = nodeSignal(nodes);
|
|
51
66
|
// Inverse document frequency: down-weight terms common across the corpus
|
|
52
|
-
// (
|
|
53
|
-
//
|
|
54
|
-
//
|
|
55
|
-
// which degrades badly as the corpus grows (hundreds → thousands of sections).
|
|
67
|
+
// (ubiquitous words like "authentication") so a rare, on-topic term ("oidc")
|
|
68
|
+
// dominates. Without it, plain overlap buries the specific section under pages
|
|
69
|
+
// that merely share several common words — which degrades as the corpus grows.
|
|
56
70
|
const df = new Map<string, number>();
|
|
57
|
-
|
|
71
|
+
let lengthSum = 0;
|
|
72
|
+
for (const chunk of chunks) {
|
|
73
|
+
lengthSum += chunk.tokens.size;
|
|
74
|
+
for (const token of chunk.tokens) df.set(token, (df.get(token) ?? 0) + 1);
|
|
75
|
+
}
|
|
58
76
|
const total = chunks.length;
|
|
77
|
+
const avgLen = total ? lengthSum / total : 1;
|
|
59
78
|
const weights = new Map(terms.map((term) => [term, Math.log(1 + total / (1 + (df.get(term) ?? 0)))]));
|
|
79
|
+
const b = 0.75; // BM25 length-normalization strength
|
|
60
80
|
|
|
61
81
|
const scored = chunks
|
|
62
82
|
.map((chunk) => {
|
|
63
83
|
const boost = signal.get(chunk.id);
|
|
64
|
-
|
|
84
|
+
// A query term in the section heading or page title is a strong topical
|
|
85
|
+
// signal — the page titled "OIDC Authentication" is what someone asking
|
|
86
|
+
// about "oidc" wants, far more than a page that merely mentions it.
|
|
87
|
+
const headingTokens = new Set([...tokenize(chunk.heading ?? ''), ...tokenize(chunk.docTitle ?? '')]);
|
|
88
|
+
let raw = 0;
|
|
65
89
|
for (const term of terms) {
|
|
66
90
|
const weight = weights.get(term) ?? 0;
|
|
67
|
-
if (chunk.tokens.has(term))
|
|
68
|
-
if (boost?.has(term))
|
|
91
|
+
if (chunk.tokens.has(term)) raw += weight;
|
|
92
|
+
if (boost?.has(term)) raw += weight;
|
|
93
|
+
if (headingTokens.has(term)) raw += weight * 3;
|
|
69
94
|
}
|
|
70
|
-
|
|
95
|
+
// Length-penalize (not -reward): a huge page (e.g. an autogenerated CLI flag
|
|
96
|
+
// dump that mentions nearly every term) is divided down, but short sections
|
|
97
|
+
// are not boosted — the floor at 1 avoids over-rewarding tiny reference stubs.
|
|
98
|
+
const norm = Math.max(1, 1 - b + (b * chunk.tokens.size) / (avgLen || 1));
|
|
99
|
+
return { chunk, score: raw / norm };
|
|
71
100
|
})
|
|
72
101
|
.filter((candidate) => candidate.score > 0)
|
|
73
|
-
.sort((a,
|
|
102
|
+
.sort((a, b2) => b2.score - a.score || a.chunk.id.localeCompare(b2.chunk.id));
|
|
74
103
|
|
|
75
104
|
const perDoc = new Map<string, number>();
|
|
76
105
|
const capped = [];
|