@hevmind/ask 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hevmind/ask",
3
- "version": "0.3.0",
3
+ "version": "0.3.2",
4
4
  "type": "module",
5
5
  "description": "hev ask: a heading-anchored, agentic search overlay for Astro docs sites.",
6
6
  "keywords": [
@@ -28,11 +28,11 @@
28
28
  "ask": "./bin/ask.mjs"
29
29
  },
30
30
  "optionalDependencies": {
31
- "@hevmind/ask-win32-x64": "0.3.0",
32
- "@hevmind/ask-linux-arm64": "0.3.0",
33
- "@hevmind/ask-darwin-arm64": "0.3.0",
34
- "@hevmind/ask-linux-x64": "0.3.0",
35
- "@hevmind/ask-darwin-x64": "0.3.0"
31
+ "@hevmind/ask-darwin-arm64": "0.3.2",
32
+ "@hevmind/ask-linux-x64": "0.3.2",
33
+ "@hevmind/ask-linux-arm64": "0.3.2",
34
+ "@hevmind/ask-darwin-x64": "0.3.2",
35
+ "@hevmind/ask-win32-x64": "0.3.2"
36
36
  },
37
37
  "exports": {
38
38
  ".": "./src/index.ts",
@@ -363,13 +363,13 @@ function routedDigestSystemPrompt(digest: Digest): AnthropicTextBlock[] {
363
363
  type: 'text',
364
364
  text: `You are the documentation assistant for this site. Answer the user's question using ONLY documentation sections you retrieve.
365
365
 
366
- The documentation is large, so it is not all shown here. Use search_sections to find sections relevant to the question, then read the ones you need with open_section for their summary and exact facts. Run a few searches with varied terms if the first does not surface what you need. Open every section your answer draws on you may only link to sections you opened.
366
+ The documentation is large, so it is not all shown here. Use search_sections to find relevant sections each result includes a short summary you can answer from directly. When you need a section's exact facts (flags, commands, identifiers), open_section it. One or two focused searches is plenty: once the results cover the question, STOP searching and answer. Do not keep searching for a perfect match.
367
367
 
368
368
  Write a short, direct answer in Markdown:
369
369
  - Start IMMEDIATELY with the substance. Your first sentence must answer the question. Never open with "Based on…", "Here is…", "Sure", a restatement of the question, or any summary/preamble.
370
370
  - Keep it tight: one or two short paragraphs, plus a short bullet list only if it genuinely helps. This renders in a small search popover, so do NOT use headings (#, ##) or horizontal rules (---).
371
- - For exact strings (flags, commands, identifiers, versions), quote the section's \`facts\` verbatim — never reword them.
372
- - When you reference a section, link to it inline using its exact \`url\`, e.g. [autoscaling](/docs/concepts#kubernetes-autoscaling). Never invent a URL or anchor.
371
+ - For exact strings (flags, commands, identifiers, versions), quote a section's \`facts\` verbatim — never reword them.
372
+ - When you reference a section, link to it inline using its exact \`url\` from your search results or open_section, e.g. [autoscaling](/docs/concepts#kubernetes-autoscaling). Never invent a URL or anchor.
373
373
  - If the documentation does not cover the question, say so plainly in one sentence and do not fabricate an answer.`,
374
374
  },
375
375
  {
@@ -415,6 +415,7 @@ async function* routedDigestAnswerLoop({
415
415
  const byId = new Map(chunks.map((chunk) => [chunk.id, chunk]));
416
416
  const nodesById = new Map(digest.nodes.map((node) => [node.id, node]));
417
417
  const opened = new Map<string, DigestNode>();
418
+ const seen = new Map<string, DigestNode>(); // sections surfaced by search, in rank order
418
419
  const messages: AnthropicMessage[] = [{ role: 'user', content: `Query: ${query}` }];
419
420
  const system = routedDigestSystemPrompt(digest);
420
421
 
@@ -450,10 +451,15 @@ async function* routedDigestAnswerLoop({
450
451
  if (block.name === 'search_sections') {
451
452
  const searchQuery = normalizeToolQuery(block.input) || query;
452
453
  yield { type: 'search', query: searchQuery };
454
+ const results = searchSections(searchQuery, chunks, nodesById, digest, config);
455
+ for (const result of results) {
456
+ const node = nodesById.get(result.id);
457
+ if (node && !seen.has(node.id)) seen.set(node.id, node);
458
+ }
453
459
  toolResults.push({
454
460
  type: 'tool_result',
455
461
  tool_use_id: block.id,
456
- content: JSON.stringify(searchSections(searchQuery, chunks, nodesById, digest, config)),
462
+ content: JSON.stringify(results),
457
463
  });
458
464
  } else if (block.name === 'open_section') {
459
465
  const id = normalizeId(block.input);
@@ -472,38 +478,39 @@ async function* routedDigestAnswerLoop({
472
478
  messages.push({ role: 'user', content: toolResults });
473
479
  }
474
480
 
475
- // Fallback: ground the answer even if the model opened nothing, by opening the
476
- // best keyword matches for the original query.
477
- if (!opened.size) {
481
+ // Fallback: if the model never searched or opened anything, ground on the best
482
+ // keyword matches for the original query so the answer isn't empty.
483
+ if (!opened.size && !seen.size) {
478
484
  for (const candidate of prefilter(chunks, query, digest.glossary, config.maxResults, config.perDocCap, digest.nodes)) {
479
- const node = open(candidate.id);
480
- if (node) yield { type: 'search', query: node.heading ?? node.title };
481
- }
482
- if (opened.size && lastRole(messages) !== 'user') {
483
- const sections = [...opened.values()].map((node) => openSectionResult(node, byId));
484
- messages.push({ role: 'user', content: `Opened sections:\n${JSON.stringify(sections)}` });
485
+ const node = nodesById.get(candidate.id);
486
+ if (node && !seen.has(node.id)) seen.set(node.id, node);
485
487
  }
486
488
  }
487
489
 
488
- if (lastRole(messages) === 'assistant') {
489
- messages.push({
490
- role: 'user',
491
- content:
492
- 'Write the answer now. Begin directly with the answer itself — no preamble, no "based on…" opener, no headings. Link only to sections you opened, using their exact url.',
493
- });
494
- }
495
-
496
- const sources = sourcesFromNodes(opened, config.maxResults);
490
+ // Ground the answer in everything surfaced: opened sections (full facts) first,
491
+ // then searched summaries, capped to maxResults.
492
+ const grounded = [...new Map<string, DigestNode>([...opened, ...seen]).values()].slice(0, config.maxResults);
493
+ const sources = sourcesFromNodes(new Map(grounded.map((node) => [node.id, node])), config.maxResults);
497
494
  yield { type: 'sources', sources };
498
495
 
499
- // Phase 2: streamed answer turn no tools, so the model can only answer.
496
+ // Phase 2: a clean answer turn. Replaying the tool transcript keeps the model in
497
+ // "let me search more" mode (and it tries to call tools that no longer exist), so
498
+ // instead hand it just the question and the gathered sections — now it can only
499
+ // write the final prose answer.
500
+ const answerContext = grounded.map((node) => openSectionResult(node, byId));
501
+ const answerMessages: AnthropicMessage[] = [
502
+ {
503
+ role: 'user',
504
+ content: `Question: ${query}\n\nAnswer using only these documentation sections:\n${JSON.stringify(answerContext)}`,
505
+ },
506
+ ];
500
507
  for await (const event of tracedStream(
501
508
  stream,
502
509
  {
503
510
  apiKey,
504
511
  model: config.model,
505
- system: answerSystem(system, sources),
506
- messages,
512
+ system: routedAnswerSystemPrompt(digest),
513
+ messages: answerMessages,
507
514
  maxTokens: config.answerMaxTokens,
508
515
  signal,
509
516
  },
@@ -515,6 +522,27 @@ async function* routedDigestAnswerLoop({
515
522
  yield { type: 'done' };
516
523
  }
517
524
 
525
+ /** Answer-only system prompt for the routed loop's final turn (no tools). */
526
+ function routedAnswerSystemPrompt(digest: Digest): AnthropicTextBlock[] {
527
+ return [
528
+ {
529
+ type: 'text',
530
+ text: `You are the documentation assistant for this site. Write the answer to the user's question using ONLY the documentation sections provided in the next message. You have no tools — produce the final prose answer now.
531
+
532
+ - Start IMMEDIATELY with the substance. Your first sentence must answer the question. Never open with "Based on…", "Here is…", "Sure", "Let me…", or any preamble or statement about searching, opening, or checking further.
533
+ - Keep it tight: one or two short paragraphs, plus a short bullet list only if it genuinely helps. This renders in a small search popover, so do NOT use headings (#, ##) or horizontal rules (---).
534
+ - For exact strings (flags, commands, identifiers, versions), quote a section's \`facts\` verbatim — never reword them.
535
+ - Link to sections inline using their exact \`url\` from the provided sections, e.g. [autoscaling](/docs/concepts#kubernetes-autoscaling). Never invent a URL or anchor.
536
+ - If the provided sections do not answer the question, say so plainly in one sentence and do not fabricate an answer.`,
537
+ },
538
+ {
539
+ type: 'text',
540
+ text: `<domain_context>\n${digest.context || ''}\n</domain_context>`,
541
+ cache_control: { type: 'ephemeral' },
542
+ },
543
+ ];
544
+ }
545
+
518
546
  function sourcesFromNodes(opened: Map<string, DigestNode>, maxResults: number): Source[] {
519
547
  const sources: Source[] = [];
520
548
  const urls = new Set<string>();
@@ -11,6 +11,16 @@ export interface Candidate {
11
11
  snippet: string;
12
12
  }
13
13
 
14
+ /** Grammatical/generic words that carry no topical signal in a docs query. */
15
+ const STOPWORDS = new Set<string>([
16
+ 'the', 'a', 'an', 'and', 'or', 'but', 'of', 'to', 'in', 'on', 'at', 'by', 'for', 'with', 'from', 'into', 'as',
17
+ 'is', 'are', 'be', 'was', 'were', 'been', 'being', 'do', 'does', 'did', 'how', 'what', 'when', 'where', 'which',
18
+ 'who', 'whom', 'why', 'can', 'could', 'should', 'would', 'shall', 'may', 'might', 'my', 'your', 'yours', 'you',
19
+ 'i', 'it', 'its', 'this', 'that', 'these', 'those', 'there', 'here', 'we', 'our', 'us', 'me', 'if', 'then',
20
+ 'else', 'not', 'no', 'so', 'than', 'too', 'very', 'just', 'also', 'only', 'out', 'about', 'up', 'set', 'get',
21
+ 'use', 'using', 'used', 'via', 'want', 'need', 'make', 'have', 'has', 'will',
22
+ ]);
23
+
14
24
  /**
15
25
  * Distinctive tokens the digest carries for a section: its `terms`, the
16
26
  * tokens of its distilled `summary`, and the tokens of its verbatim `facts`. A
@@ -44,22 +54,47 @@ export function prefilter(
44
54
  perDocCap: number,
45
55
  nodes?: DigestNode[],
46
56
  ): Candidate[] {
47
- const terms = expandQueryTerms(query, glossary);
48
- if (!terms.length) return [];
57
+ const expanded = expandQueryTerms(query, glossary);
58
+ if (!expanded.length) return [];
59
+ // Drop grammatical/generic stopwords so a few ubiquitous words ("how", "set
60
+ // up", "with") don't dominate ranking. Fall back to the raw terms if the query
61
+ // is nothing but stopwords.
62
+ const filtered = expanded.filter((term) => term.length > 1 && !STOPWORDS.has(term));
63
+ const terms = filtered.length ? filtered : expanded;
49
64
 
50
65
  const signal = nodeSignal(nodes);
66
+ // Inverse document frequency: down-weight terms common across the corpus
67
+ // (ubiquitous words like "authentication") so a rare, on-topic term ("oidc")
68
+ // dominates. Without it, plain overlap buries the specific section under pages
69
+ // that merely share several common words — which degrades as the corpus grows.
70
+ const df = new Map<string, number>();
71
+ let lengthSum = 0;
72
+ for (const chunk of chunks) {
73
+ lengthSum += chunk.tokens.size;
74
+ for (const token of chunk.tokens) df.set(token, (df.get(token) ?? 0) + 1);
75
+ }
76
+ const total = chunks.length;
77
+ const avgLen = total ? lengthSum / total : 1;
78
+ const weights = new Map(terms.map((term) => [term, Math.log(1 + total / (1 + (df.get(term) ?? 0)))]));
79
+ const b = 0.75; // BM25 length-normalization strength
80
+
51
81
  const scored = chunks
52
82
  .map((chunk) => {
53
83
  const boost = signal.get(chunk.id);
54
- let score = 0;
84
+ let raw = 0;
55
85
  for (const term of terms) {
56
- if (chunk.tokens.has(term)) score += 1;
57
- if (boost?.has(term)) score += 1;
86
+ const weight = weights.get(term) ?? 0;
87
+ if (chunk.tokens.has(term)) raw += weight;
88
+ if (boost?.has(term)) raw += weight;
58
89
  }
59
- return { chunk, score };
90
+ // Length-penalize (not -reward): a huge page (e.g. an autogenerated CLI flag
91
+ // dump that mentions nearly every term) is divided down, but short sections
92
+ // are not boosted — the floor at 1 avoids over-rewarding tiny reference stubs.
93
+ const norm = Math.max(1, 1 - b + (b * chunk.tokens.size) / (avgLen || 1));
94
+ return { chunk, score: raw / norm };
60
95
  })
61
96
  .filter((candidate) => candidate.score > 0)
62
- .sort((a, b) => b.score - a.score || a.chunk.id.localeCompare(b.chunk.id));
97
+ .sort((a, b2) => b2.score - a.score || a.chunk.id.localeCompare(b2.chunk.id));
63
98
 
64
99
  const perDoc = new Map<string, number>();
65
100
  const capped = [];