@hevmind/ask 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/src/digest/expand.ts +16 -11
- package/src/search/prefilter.ts +5 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hevmind/ask",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.4",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "hev ask: a heading-anchored, agentic search overlay for Astro docs sites.",
|
|
6
6
|
"keywords": [
|
|
@@ -28,11 +28,11 @@
|
|
|
28
28
|
"ask": "./bin/ask.mjs"
|
|
29
29
|
},
|
|
30
30
|
"optionalDependencies": {
|
|
31
|
-
"@hevmind/ask-darwin-arm64": "0.3.
|
|
32
|
-
"@hevmind/ask-linux-
|
|
33
|
-
"@hevmind/ask-
|
|
34
|
-
"@hevmind/ask-
|
|
35
|
-
"@hevmind/ask-
|
|
31
|
+
"@hevmind/ask-darwin-arm64": "0.3.4",
|
|
32
|
+
"@hevmind/ask-linux-arm64": "0.3.4",
|
|
33
|
+
"@hevmind/ask-darwin-x64": "0.3.4",
|
|
34
|
+
"@hevmind/ask-win32-x64": "0.3.4",
|
|
35
|
+
"@hevmind/ask-linux-x64": "0.3.4"
|
|
36
36
|
},
|
|
37
37
|
"exports": {
|
|
38
38
|
".": "./src/index.ts",
|
package/src/digest/expand.ts
CHANGED
|
@@ -2,23 +2,28 @@ import { tokenize } from '../search/chunk.ts';
|
|
|
2
2
|
import type { GlossaryEntry } from './schema';
|
|
3
3
|
|
|
4
4
|
export function expandQueryTerms(query: string, glossary: GlossaryEntry[], cap = 24): string[] {
|
|
5
|
-
const
|
|
6
|
-
if (!
|
|
5
|
+
const queryTokens = new Set(tokenize(query));
|
|
6
|
+
if (!queryTokens.size) return [];
|
|
7
|
+
const terms = new Set(queryTokens);
|
|
7
8
|
|
|
8
9
|
for (const entry of glossary) {
|
|
9
10
|
if (terms.size >= cap) break;
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
// Expand only when the query contains a full glossary phrase — the term or one
|
|
12
|
+
// of its aliases, every token present. Matching on any shared token (e.g. the
|
|
13
|
+
// ubiquitous "authentication") drags in every entry that merely mentions it,
|
|
14
|
+
// which floods results once the glossary is large.
|
|
15
|
+
const phrases = [entry.term, ...entry.aliases].map((phrase) => tokenize(phrase)).filter((tokens) => tokens.length);
|
|
16
|
+
const matched = phrases.some((phrase) => phrase.every((token) => queryTokens.has(token)));
|
|
17
|
+
if (!matched) continue;
|
|
18
|
+
for (const phrase of phrases) {
|
|
19
|
+
for (const token of phrase) {
|
|
20
|
+
if (token.length < 3) continue; // skip noisy short tokens like "ad", "id"
|
|
21
|
+
terms.add(token);
|
|
22
|
+
if (terms.size >= cap) break;
|
|
23
|
+
}
|
|
14
24
|
if (terms.size >= cap) break;
|
|
15
25
|
}
|
|
16
26
|
}
|
|
17
27
|
|
|
18
28
|
return [...terms];
|
|
19
29
|
}
|
|
20
|
-
|
|
21
|
-
function intersects(a: Set<string>, b: Set<string>): boolean {
|
|
22
|
-
for (const item of b) if (a.has(item)) return true;
|
|
23
|
-
return false;
|
|
24
|
-
}
|
package/src/search/prefilter.ts
CHANGED
|
@@ -81,11 +81,16 @@ export function prefilter(
|
|
|
81
81
|
const scored = chunks
|
|
82
82
|
.map((chunk) => {
|
|
83
83
|
const boost = signal.get(chunk.id);
|
|
84
|
+
// A query term in the section heading or page title is a strong topical
|
|
85
|
+
// signal — the page titled "OIDC Authentication" is what someone asking
|
|
86
|
+
// about "oidc" wants, far more than a page that merely mentions it.
|
|
87
|
+
const headingTokens = new Set([...tokenize(chunk.heading ?? ''), ...tokenize(chunk.docTitle ?? '')]);
|
|
84
88
|
let raw = 0;
|
|
85
89
|
for (const term of terms) {
|
|
86
90
|
const weight = weights.get(term) ?? 0;
|
|
87
91
|
if (chunk.tokens.has(term)) raw += weight;
|
|
88
92
|
if (boost?.has(term)) raw += weight;
|
|
93
|
+
if (headingTokens.has(term)) raw += weight * 3;
|
|
89
94
|
}
|
|
90
95
|
// Length-penalize (not -reward): a huge page (e.g. an autogenerated CLI flag
|
|
91
96
|
// dump that mentions nearly every term) is divided down, but short sections
|