npm - @hevmind/ask - Versions diffs - 0.3.2 → 0.3.4 - Mend

@hevmind/ask 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@hevmind/ask",
-  "version": "0.3.2",
+  "version": "0.3.4",
   "type": "module",
   "description": "hev ask: a heading-anchored, agentic search overlay for Astro docs sites.",
   "keywords": [
@@ -28,11 +28,11 @@
     "ask": "./bin/ask.mjs"
   },
   "optionalDependencies": {
-    "@hevmind/ask-darwin-arm64": "0.3.2",
-    "@hevmind/ask-linux-x64": "0.3.2",
-    "@hevmind/ask-linux-arm64": "0.3.2",
-    "@hevmind/ask-darwin-x64": "0.3.2",
-    "@hevmind/ask-win32-x64": "0.3.2"
+    "@hevmind/ask-darwin-arm64": "0.3.4",
+    "@hevmind/ask-linux-arm64": "0.3.4",
+    "@hevmind/ask-darwin-x64": "0.3.4",
+    "@hevmind/ask-win32-x64": "0.3.4",
+    "@hevmind/ask-linux-x64": "0.3.4"
   },
   "exports": {
     ".": "./src/index.ts",

package/src/digest/expand.ts CHANGED Viewed

@@ -2,23 +2,28 @@ import { tokenize } from '../search/chunk.ts';
 import type { GlossaryEntry } from './schema';
 export function expandQueryTerms(query: string, glossary: GlossaryEntry[], cap = 24): string[] {
-  const terms = new Set(tokenize(query));
-  if (!terms.size) return [];
+  const queryTokens = new Set(tokenize(query));
+  if (!queryTokens.size) return [];
+  const terms = new Set(queryTokens);
   for (const entry of glossary) {
     if (terms.size >= cap) break;
-    const entryTerms = new Set([...tokenize(entry.term), ...entry.aliases.flatMap((alias) => tokenize(alias))]);
-    if (!intersects(terms, entryTerms)) continue;
-    for (const term of entryTerms) {
-      terms.add(term);
+    // Expand only when the query contains a full glossary phrase — the term or one
+    // of its aliases, every token present. Matching on any shared token (e.g. the
+    // ubiquitous "authentication") drags in every entry that merely mentions it,
+    // which floods results once the glossary is large.
+    const phrases = [entry.term, ...entry.aliases].map((phrase) => tokenize(phrase)).filter((tokens) => tokens.length);
+    const matched = phrases.some((phrase) => phrase.every((token) => queryTokens.has(token)));
+    if (!matched) continue;
+    for (const phrase of phrases) {
+      for (const token of phrase) {
+        if (token.length < 3) continue; // skip noisy short tokens like "ad", "id"
+        terms.add(token);
+        if (terms.size >= cap) break;
+      }
       if (terms.size >= cap) break;
     }
   }
   return [...terms];
 }
-function intersects(a: Set<string>, b: Set<string>): boolean {
-  for (const item of b) if (a.has(item)) return true;
-  return false;
-}

package/src/search/prefilter.ts CHANGED Viewed

@@ -81,11 +81,16 @@ export function prefilter(
   const scored = chunks
     .map((chunk) => {
       const boost = signal.get(chunk.id);
+      // A query term in the section heading or page title is a strong topical
+      // signal — the page titled "OIDC Authentication" is what someone asking
+      // about "oidc" wants, far more than a page that merely mentions it.
+      const headingTokens = new Set([...tokenize(chunk.heading ?? ''), ...tokenize(chunk.docTitle ?? '')]);
       let raw = 0;
       for (const term of terms) {
         const weight = weights.get(term) ?? 0;
         if (chunk.tokens.has(term)) raw += weight;
         if (boost?.has(term)) raw += weight;
+        if (headingTokens.has(term)) raw += weight * 3;
       }
       // Length-penalize (not -reward): a huge page (e.g. an autogenerated CLI flag
       // dump that mentions nearly every term) is divided down, but short sections