@hevmind/ask 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hevmind/ask",
3
- "version": "0.3.2",
3
+ "version": "0.3.4",
4
4
  "type": "module",
5
5
  "description": "hev ask: a heading-anchored, agentic search overlay for Astro docs sites.",
6
6
  "keywords": [
@@ -28,11 +28,11 @@
28
28
  "ask": "./bin/ask.mjs"
29
29
  },
30
30
  "optionalDependencies": {
31
- "@hevmind/ask-darwin-arm64": "0.3.2",
32
- "@hevmind/ask-linux-x64": "0.3.2",
33
- "@hevmind/ask-linux-arm64": "0.3.2",
34
- "@hevmind/ask-darwin-x64": "0.3.2",
35
- "@hevmind/ask-win32-x64": "0.3.2"
31
+ "@hevmind/ask-darwin-arm64": "0.3.4",
32
+ "@hevmind/ask-linux-arm64": "0.3.4",
33
+ "@hevmind/ask-darwin-x64": "0.3.4",
34
+ "@hevmind/ask-win32-x64": "0.3.4",
35
+ "@hevmind/ask-linux-x64": "0.3.4"
36
36
  },
37
37
  "exports": {
38
38
  ".": "./src/index.ts",
@@ -2,23 +2,28 @@ import { tokenize } from '../search/chunk.ts';
2
2
  import type { GlossaryEntry } from './schema';
3
3
 
4
4
  export function expandQueryTerms(query: string, glossary: GlossaryEntry[], cap = 24): string[] {
5
- const terms = new Set(tokenize(query));
6
- if (!terms.size) return [];
5
+ const queryTokens = new Set(tokenize(query));
6
+ if (!queryTokens.size) return [];
7
+ const terms = new Set(queryTokens);
7
8
 
8
9
  for (const entry of glossary) {
9
10
  if (terms.size >= cap) break;
10
- const entryTerms = new Set([...tokenize(entry.term), ...entry.aliases.flatMap((alias) => tokenize(alias))]);
11
- if (!intersects(terms, entryTerms)) continue;
12
- for (const term of entryTerms) {
13
- terms.add(term);
11
+ // Expand only when the query contains a full glossary phrase — the term or one
12
+ // of its aliases, every token present. Matching on any shared token (e.g. the
13
+ // ubiquitous "authentication") drags in every entry that merely mentions it,
14
+ // which floods results once the glossary is large.
15
+ const phrases = [entry.term, ...entry.aliases].map((phrase) => tokenize(phrase)).filter((tokens) => tokens.length);
16
+ const matched = phrases.some((phrase) => phrase.every((token) => queryTokens.has(token)));
17
+ if (!matched) continue;
18
+ for (const phrase of phrases) {
19
+ for (const token of phrase) {
20
+ if (token.length < 3) continue; // skip noisy short tokens like "ad", "id"
21
+ terms.add(token);
22
+ if (terms.size >= cap) break;
23
+ }
14
24
  if (terms.size >= cap) break;
15
25
  }
16
26
  }
17
27
 
18
28
  return [...terms];
19
29
  }
20
-
21
- function intersects(a: Set<string>, b: Set<string>): boolean {
22
- for (const item of b) if (a.has(item)) return true;
23
- return false;
24
- }
@@ -81,11 +81,16 @@ export function prefilter(
81
81
  const scored = chunks
82
82
  .map((chunk) => {
83
83
  const boost = signal.get(chunk.id);
84
+ // A query term in the section heading or page title is a strong topical
85
+ // signal — the page titled "OIDC Authentication" is what someone asking
86
+ // about "oidc" wants, far more than a page that merely mentions it.
87
+ const headingTokens = new Set([...tokenize(chunk.heading ?? ''), ...tokenize(chunk.docTitle ?? '')]);
84
88
  let raw = 0;
85
89
  for (const term of terms) {
86
90
  const weight = weights.get(term) ?? 0;
87
91
  if (chunk.tokens.has(term)) raw += weight;
88
92
  if (boost?.has(term)) raw += weight;
93
+ if (headingTokens.has(term)) raw += weight * 3;
89
94
  }
90
95
  // Length-penalize (not -reward): a huge page (e.g. an autogenerated CLI flag
91
96
  // dump that mentions nearly every term) is divided down, but short sections