@hevmind/ask 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/src/digest/expand.ts +16 -11
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hevmind/ask",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.4",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "hev ask: a heading-anchored, agentic search overlay for Astro docs sites.",
|
|
6
6
|
"keywords": [
|
|
@@ -28,11 +28,11 @@
|
|
|
28
28
|
"ask": "./bin/ask.mjs"
|
|
29
29
|
},
|
|
30
30
|
"optionalDependencies": {
|
|
31
|
-
"@hevmind/ask-darwin-
|
|
32
|
-
"@hevmind/ask-
|
|
33
|
-
"@hevmind/ask-
|
|
34
|
-
"@hevmind/ask-win32-x64": "0.3.
|
|
35
|
-
"@hevmind/ask-linux-
|
|
31
|
+
"@hevmind/ask-darwin-arm64": "0.3.4",
|
|
32
|
+
"@hevmind/ask-linux-arm64": "0.3.4",
|
|
33
|
+
"@hevmind/ask-darwin-x64": "0.3.4",
|
|
34
|
+
"@hevmind/ask-win32-x64": "0.3.4",
|
|
35
|
+
"@hevmind/ask-linux-x64": "0.3.4"
|
|
36
36
|
},
|
|
37
37
|
"exports": {
|
|
38
38
|
".": "./src/index.ts",
|
package/src/digest/expand.ts
CHANGED
|
@@ -2,23 +2,28 @@ import { tokenize } from '../search/chunk.ts';
|
|
|
2
2
|
import type { GlossaryEntry } from './schema';
|
|
3
3
|
|
|
4
4
|
export function expandQueryTerms(query: string, glossary: GlossaryEntry[], cap = 24): string[] {
|
|
5
|
-
const
|
|
6
|
-
if (!
|
|
5
|
+
const queryTokens = new Set(tokenize(query));
|
|
6
|
+
if (!queryTokens.size) return [];
|
|
7
|
+
const terms = new Set(queryTokens);
|
|
7
8
|
|
|
8
9
|
for (const entry of glossary) {
|
|
9
10
|
if (terms.size >= cap) break;
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
// Expand only when the query contains a full glossary phrase — the term or one
|
|
12
|
+
// of its aliases, every token present. Matching on any shared token (e.g. the
|
|
13
|
+
// ubiquitous "authentication") drags in every entry that merely mentions it,
|
|
14
|
+
// which floods results once the glossary is large.
|
|
15
|
+
const phrases = [entry.term, ...entry.aliases].map((phrase) => tokenize(phrase)).filter((tokens) => tokens.length);
|
|
16
|
+
const matched = phrases.some((phrase) => phrase.every((token) => queryTokens.has(token)));
|
|
17
|
+
if (!matched) continue;
|
|
18
|
+
for (const phrase of phrases) {
|
|
19
|
+
for (const token of phrase) {
|
|
20
|
+
if (token.length < 3) continue; // skip noisy short tokens like "ad", "id"
|
|
21
|
+
terms.add(token);
|
|
22
|
+
if (terms.size >= cap) break;
|
|
23
|
+
}
|
|
14
24
|
if (terms.size >= cap) break;
|
|
15
25
|
}
|
|
16
26
|
}
|
|
17
27
|
|
|
18
28
|
return [...terms];
|
|
19
29
|
}
|
|
20
|
-
|
|
21
|
-
function intersects(a: Set<string>, b: Set<string>): boolean {
|
|
22
|
-
for (const item of b) if (a.has(item)) return true;
|
|
23
|
-
return false;
|
|
24
|
-
}
|