daftari 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/LICENSE +21 -0
- package/README.md +259 -0
- package/dist/access/locks.d.ts +19 -0
- package/dist/access/locks.d.ts.map +1 -0
- package/dist/access/locks.js +112 -0
- package/dist/access/locks.js.map +1 -0
- package/dist/access/rbac.d.ts +18 -0
- package/dist/access/rbac.d.ts.map +1 -0
- package/dist/access/rbac.js +48 -0
- package/dist/access/rbac.js.map +1 -0
- package/dist/cli.d.ts +4 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +216 -0
- package/dist/cli.js.map +1 -0
- package/dist/curation/lint.d.ts +20 -0
- package/dist/curation/lint.d.ts.map +1 -0
- package/dist/curation/lint.js +176 -0
- package/dist/curation/lint.js.map +1 -0
- package/dist/curation/provenance.d.ts +21 -0
- package/dist/curation/provenance.d.ts.map +1 -0
- package/dist/curation/provenance.js +80 -0
- package/dist/curation/provenance.js.map +1 -0
- package/dist/curation/staleness.d.ts +19 -0
- package/dist/curation/staleness.d.ts.map +1 -0
- package/dist/curation/staleness.js +67 -0
- package/dist/curation/staleness.js.map +1 -0
- package/dist/curation/tension.d.ts +20 -0
- package/dist/curation/tension.d.ts.map +1 -0
- package/dist/curation/tension.js +134 -0
- package/dist/curation/tension.js.map +1 -0
- package/dist/frontmatter/parser.d.ts +10 -0
- package/dist/frontmatter/parser.d.ts.map +1 -0
- package/dist/frontmatter/parser.js +29 -0
- package/dist/frontmatter/parser.js.map +1 -0
- package/dist/frontmatter/schema.d.ts +7 -0
- package/dist/frontmatter/schema.d.ts.map +1 -0
- package/dist/frontmatter/schema.js +115 -0
- package/dist/frontmatter/schema.js.map +1 -0
- package/dist/frontmatter/types.d.ts +41 -0
- package/dist/frontmatter/types.d.ts.map +1 -0
- package/dist/frontmatter/types.js +8 -0
- package/dist/frontmatter/types.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +94 -0
- package/dist/index.js.map +1 -0
- package/dist/search/bm25.d.ts +19 -0
- package/dist/search/bm25.d.ts.map +1 -0
- package/dist/search/bm25.js +115 -0
- package/dist/search/bm25.js.map +1 -0
- package/dist/search/hybrid.d.ts +38 -0
- package/dist/search/hybrid.d.ts.map +1 -0
- package/dist/search/hybrid.js +162 -0
- package/dist/search/hybrid.js.map +1 -0
- package/dist/search/reindex.d.ts +15 -0
- package/dist/search/reindex.d.ts.map +1 -0
- package/dist/search/reindex.js +189 -0
- package/dist/search/reindex.js.map +1 -0
- package/dist/search/vector.d.ts +9 -0
- package/dist/search/vector.d.ts.map +1 -0
- package/dist/search/vector.js +128 -0
- package/dist/search/vector.js.map +1 -0
- package/dist/server.d.ts +6 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +72 -0
- package/dist/server.js.map +1 -0
- package/dist/storage/index-db.d.ts +37 -0
- package/dist/storage/index-db.d.ts.map +1 -0
- package/dist/storage/index-db.js +145 -0
- package/dist/storage/index-db.js.map +1 -0
- package/dist/storage/local.d.ts +6 -0
- package/dist/storage/local.d.ts.map +1 -0
- package/dist/storage/local.js +57 -0
- package/dist/storage/local.js.map +1 -0
- package/dist/tools/curation.d.ts +22 -0
- package/dist/tools/curation.d.ts.map +1 -0
- package/dist/tools/curation.js +202 -0
- package/dist/tools/curation.js.map +1 -0
- package/dist/tools/read.d.ts +74 -0
- package/dist/tools/read.d.ts.map +1 -0
- package/dist/tools/read.js +254 -0
- package/dist/tools/read.js.map +1 -0
- package/dist/tools/search.d.ts +13 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +190 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/write.d.ts +18 -0
- package/dist/tools/write.d.ts.map +1 -0
- package/dist/tools/write.js +465 -0
- package/dist/tools/write.js.map +1 -0
- package/dist/utils/config.d.ts +12 -0
- package/dist/utils/config.d.ts.map +1 -0
- package/dist/utils/config.js +94 -0
- package/dist/utils/config.js.map +1 -0
- package/dist/utils/git.d.ts +23 -0
- package/dist/utils/git.d.ts.map +1 -0
- package/dist/utils/git.js +114 -0
- package/dist/utils/git.js.map +1 -0
- package/package.json +69 -0
- package/templates/config.yaml +31 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
// BM25 lexical ranking, hand-rolled and dependency-free.
|
|
2
|
+
//
|
|
3
|
+
// BM25 scores a document for a query by summing, over each query term, an IDF
|
|
4
|
+
// weight times a saturating term-frequency factor. It is the lexical half of
|
|
5
|
+
// hybrid search: it rewards exact word overlap, which embeddings tend to blur.
|
|
6
|
+
//
|
|
7
|
+
// The vault is a curated knowledge base (tens to low-hundreds of documents),
|
|
8
|
+
// so the whole corpus is ranked in memory on every query. No inverted index.
|
|
9
|
+
// Okapi BM25 free parameters. k1 controls term-frequency saturation; b controls
|
|
10
|
+
// how strongly document length normalises the score. These are the standard
|
|
11
|
+
// defaults and need no tuning at this corpus size.
|
|
12
|
+
const K1 = 1.5;
|
|
13
|
+
const B = 0.75;
|
|
14
|
+
// Common English words carry no discriminating signal; dropping them keeps IDF
|
|
15
|
+
// meaningful and snippets pointed at content words.
|
|
16
|
+
const STOPWORDS = new Set([
|
|
17
|
+
"a",
|
|
18
|
+
"an",
|
|
19
|
+
"and",
|
|
20
|
+
"are",
|
|
21
|
+
"as",
|
|
22
|
+
"at",
|
|
23
|
+
"be",
|
|
24
|
+
"but",
|
|
25
|
+
"by",
|
|
26
|
+
"for",
|
|
27
|
+
"from",
|
|
28
|
+
"has",
|
|
29
|
+
"have",
|
|
30
|
+
"he",
|
|
31
|
+
"her",
|
|
32
|
+
"his",
|
|
33
|
+
"in",
|
|
34
|
+
"is",
|
|
35
|
+
"it",
|
|
36
|
+
"its",
|
|
37
|
+
"of",
|
|
38
|
+
"on",
|
|
39
|
+
"or",
|
|
40
|
+
"she",
|
|
41
|
+
"that",
|
|
42
|
+
"the",
|
|
43
|
+
"their",
|
|
44
|
+
"them",
|
|
45
|
+
"they",
|
|
46
|
+
"this",
|
|
47
|
+
"to",
|
|
48
|
+
"was",
|
|
49
|
+
"were",
|
|
50
|
+
"will",
|
|
51
|
+
"with",
|
|
52
|
+
"you",
|
|
53
|
+
"your",
|
|
54
|
+
]);
|
|
55
|
+
// Lowercases, splits on any non-alphanumeric run, and drops stopwords and
|
|
56
|
+
// 1-character fragments. Used identically for documents and queries so the
|
|
57
|
+
// term spaces line up.
|
|
58
|
+
export function tokenize(text) {
|
|
59
|
+
return text
|
|
60
|
+
.toLowerCase()
|
|
61
|
+
.split(/[^a-z0-9]+/)
|
|
62
|
+
.filter((t) => t.length > 1 && !STOPWORDS.has(t));
|
|
63
|
+
}
|
|
64
|
+
export function buildBm25(docs) {
|
|
65
|
+
const termFreqs = new Map();
|
|
66
|
+
const docFreqs = new Map();
|
|
67
|
+
const docLengths = new Map();
|
|
68
|
+
let totalLength = 0;
|
|
69
|
+
for (const doc of docs) {
|
|
70
|
+
const tf = new Map();
|
|
71
|
+
for (const term of doc.tokens) {
|
|
72
|
+
tf.set(term, (tf.get(term) ?? 0) + 1);
|
|
73
|
+
}
|
|
74
|
+
termFreqs.set(doc.path, tf);
|
|
75
|
+
docLengths.set(doc.path, doc.tokens.length);
|
|
76
|
+
totalLength += doc.tokens.length;
|
|
77
|
+
for (const term of tf.keys()) {
|
|
78
|
+
docFreqs.set(term, (docFreqs.get(term) ?? 0) + 1);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return {
|
|
82
|
+
termFreqs,
|
|
83
|
+
docFreqs,
|
|
84
|
+
docLengths,
|
|
85
|
+
docCount: docs.length,
|
|
86
|
+
avgDocLength: docs.length > 0 ? totalLength / docs.length : 0,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
// Scores every document against the query terms and returns the matches
|
|
90
|
+
// (score > 0) sorted high to low. A document with zero query-term overlap is
|
|
91
|
+
// omitted entirely rather than returned with a zero score.
|
|
92
|
+
export function searchBm25(model, queryTokens) {
|
|
93
|
+
const hits = [];
|
|
94
|
+
const uniqueQueryTerms = [...new Set(queryTokens)];
|
|
95
|
+
for (const [path, tf] of model.termFreqs) {
|
|
96
|
+
const docLength = model.docLengths.get(path) ?? 0;
|
|
97
|
+
let score = 0;
|
|
98
|
+
for (const term of uniqueQueryTerms) {
|
|
99
|
+
const freq = tf.get(term) ?? 0;
|
|
100
|
+
if (freq === 0)
|
|
101
|
+
continue;
|
|
102
|
+
const df = model.docFreqs.get(term) ?? 0;
|
|
103
|
+
// IDF with the +1 inside the log keeps it non-negative even for terms
|
|
104
|
+
// that appear in more than half the corpus.
|
|
105
|
+
const idf = Math.log(1 + (model.docCount - df + 0.5) / (df + 0.5));
|
|
106
|
+
const denom = freq + K1 * (1 - B + (B * docLength) / (model.avgDocLength || 1));
|
|
107
|
+
score += idf * ((freq * (K1 + 1)) / denom);
|
|
108
|
+
}
|
|
109
|
+
if (score > 0)
|
|
110
|
+
hits.push({ path, score });
|
|
111
|
+
}
|
|
112
|
+
hits.sort((a, b) => b.score - a.score);
|
|
113
|
+
return hits;
|
|
114
|
+
}
|
|
115
|
+
//# sourceMappingURL=bm25.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bm25.js","sourceRoot":"","sources":["../../src/search/bm25.ts"],"names":[],"mappings":"AAAA,yDAAyD;AACzD,EAAE;AACF,8EAA8E;AAC9E,6EAA6E;AAC7E,+EAA+E;AAC/E,EAAE;AACF,6EAA6E;AAC7E,6EAA6E;AAE7E,gFAAgF;AAChF,4EAA4E;AAC5E,mDAAmD;AACnD,MAAM,EAAE,GAAG,GAAG,CAAC;AACf,MAAM,CAAC,GAAG,IAAI,CAAC;AAEf,+EAA+E;AAC/E,oDAAoD;AACpD,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACxB,GAAG;IACH,IAAI;IACJ,KAAK;IACL,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,KAAK;IACL,MAAM;IACN,KAAK;IACL,MAAM;IACN,IAAI;IACJ,KAAK;IACL,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,MAAM;IACN,KAAK;IACL,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,IAAI;IACJ,KAAK;IACL,MAAM;IACN,MAAM;IACN,MAAM;IACN,KAAK;IACL,MAAM;CACP,CAAC,CAAC;AAEH,0EAA0E;AAC1E,2EAA2E;AAC3E,uBAAuB;AACvB,MAAM,UAAU,QAAQ,CAAC,IAAY;IACnC,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,YAAY,CAAC;SACnB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;AACtD,CAAC;AAkBD,MAAM,UAAU,SAAS,CAAC,IAAoB;IAC5C,MAAM,SAAS,GAAG,IAAI,GAAG,EAA+B,CAAC;IACzD,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC3C,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC7C,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAC;QACrC,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,MAAM,EAAE,CAAC;YAC9B,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACxC,CAAC;QACD,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC5B,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAC5C,WAAW,IAAI,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC;QACjC,KAAK,MAAM,IAAI,IAAI,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC;YAC7B,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;IAED,OAAO;QACL,SAAS;QACT,QAAQ;QACR,UAAU;QACV,QAAQ,EAAE,IAAI,CAAC,MAAM;QACrB,YAAY,EAAE,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;KAC9D,CAAC;AACJ,CAAC;AAOD,wEAAwE;AACxE,6EAA6E;AAC7E,2DAA2D;AAC3D,MAAM,UAAU,UAAU,CAAC,KAAgB,EAAE,WAAqB;IAChE,MAAM,IAAI,GAAc,EAAE,CAAC;IAC3B,MAAM,gBAAgB,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC;IAEnD,KAAK,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;QACzC,MAAM,SAAS,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClD,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,IAAI,IAAI,gBAAgB,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC/B,IAAI,IAAI,KAAK,CAAC;gBAAE,SAAS;YACzB,MAAM,EAAE,GAAG,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACzC,sEAAsE;YACtE,4CAA4C;YAC5C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,GAAG,EAAE,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC;YACnE,MAAM,KAAK,GAAG,IAAI,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC,CAAC;YAChF,KAAK,IAAI,GAAG,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC;QAC7C,CAAC;QACD,IAAI,KAAK,GAAG,CAAC;YAAE,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5C,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACvC,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { type Result } from "../frontmatter/types.js";
|
|
2
|
+
import { type IndexDb } from "../storage/index-db.js";
|
|
3
|
+
export interface HybridWeights {
|
|
4
|
+
bm25: number;
|
|
5
|
+
vector: number;
|
|
6
|
+
}
|
|
7
|
+
export declare const DEFAULT_WEIGHTS: HybridWeights;
|
|
8
|
+
export interface HybridHit {
|
|
9
|
+
path: string;
|
|
10
|
+
title: string;
|
|
11
|
+
collection: string;
|
|
12
|
+
status: string;
|
|
13
|
+
score: number;
|
|
14
|
+
bm25Score: number;
|
|
15
|
+
vectorScore: number;
|
|
16
|
+
snippet: string;
|
|
17
|
+
}
|
|
18
|
+
export interface HybridSearchResult {
|
|
19
|
+
query: string;
|
|
20
|
+
count: number;
|
|
21
|
+
vectorUsed: boolean;
|
|
22
|
+
weights: HybridWeights;
|
|
23
|
+
hits: HybridHit[];
|
|
24
|
+
}
|
|
25
|
+
export interface HybridSearchOptions {
|
|
26
|
+
weights?: HybridWeights;
|
|
27
|
+
limit?: number;
|
|
28
|
+
}
|
|
29
|
+
export declare function hybridSearch(db: IndexDb, query: string, options?: HybridSearchOptions): Promise<Result<HybridSearchResult, Error>>;
|
|
30
|
+
export interface RelatedSearchResult {
|
|
31
|
+
path: string;
|
|
32
|
+
count: number;
|
|
33
|
+
vectorUsed: boolean;
|
|
34
|
+
weights: HybridWeights;
|
|
35
|
+
hits: HybridHit[];
|
|
36
|
+
}
|
|
37
|
+
export declare function relatedSearch(db: IndexDb, path: string, options?: HybridSearchOptions): Result<RelatedSearchResult, Error>;
|
|
38
|
+
//# sourceMappingURL=hybrid.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../../src/search/hybrid.ts"],"names":[],"mappings":"AAUA,OAAO,EAAM,KAAK,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAKL,KAAK,OAAO,EACb,MAAM,wBAAwB,CAAC;AAIhC,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,eAAO,MAAM,eAAe,EAAE,aAA0C,CAAC;AAEzE,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,OAAO,CAAC;IACpB,OAAO,EAAE,aAAa,CAAC;IACvB,IAAI,EAAE,SAAS,EAAE,CAAC;CACnB;AA8GD,MAAM,WAAW,mBAAmB;IAClC,OAAO,CAAC,EAAE,aAAa,CAAC;IACxB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAGD,wBAAsB,YAAY,CAChC,EAAE,EAAE,OAAO,EACX,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,mBAAwB,GAChC,OAAO,CAAC,MAAM,CAAC,kBAAkB,EAAE,KAAK,CAAC,CAAC,CAqB5C;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,OAAO,CAAC;IACpB,OAAO,EAAE,aAAa,CAAC;IACvB,IAAI,EAAE,SAAS,EAAE,CAAC;CACnB;AAOD,wBAAgB,aAAa,CAC3B,EAAE,EAAE,OAAO,EACX,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE,mBAAwB,GAChC,MAAM,CAAC,mBAAmB,EAAE,KAAK,CAAC,CA8BpC"}
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
// Hybrid search: combine BM25 lexical ranking with vector semantic ranking.
|
|
2
|
+
//
|
|
3
|
+
// Each ranker produces raw scores on its own scale, so both are min-normalised
|
|
4
|
+
// to [0, 1] (divide by the top score) before being mixed by weight. Default
|
|
5
|
+
// weighting is an even 0.5 / 0.5 split.
|
|
6
|
+
//
|
|
7
|
+
// Vector ranking is best-effort. If the query cannot be embedded (model
|
|
8
|
+
// unavailable) or the index holds no embeddings, the search degrades to
|
|
9
|
+
// lexical-only and reports vectorUsed: false rather than failing.
|
|
10
|
+
import { ok } from "../frontmatter/types.js";
|
|
11
|
+
import { getAllChunks, getAllDocuments, getChunksForPath, getDocument, } from "../storage/index-db.js";
|
|
12
|
+
import { buildBm25, searchBm25, tokenize } from "./bm25.js";
|
|
13
|
+
import { cosineSimilarity, embedQuery, meanEmbedding } from "./vector.js";
|
|
14
|
+
export const DEFAULT_WEIGHTS = { bm25: 0.5, vector: 0.5 };
|
|
15
|
+
const SNIPPET_RADIUS = 140;
|
|
16
|
+
// Pulls a readable excerpt from a document body, centred on the earliest
|
|
17
|
+
// occurrence of any query term. Falls back to the document head when no term
|
|
18
|
+
// is found (e.g. a purely semantic match).
|
|
19
|
+
function makeSnippet(content, queryTokens) {
|
|
20
|
+
const collapsed = content.replace(/\s+/g, " ").trim();
|
|
21
|
+
if (collapsed.length === 0)
|
|
22
|
+
return "";
|
|
23
|
+
const lower = collapsed.toLowerCase();
|
|
24
|
+
let hitAt = -1;
|
|
25
|
+
for (const term of queryTokens) {
|
|
26
|
+
const at = lower.indexOf(term);
|
|
27
|
+
if (at !== -1 && (hitAt === -1 || at < hitAt))
|
|
28
|
+
hitAt = at;
|
|
29
|
+
}
|
|
30
|
+
if (hitAt === -1) {
|
|
31
|
+
return collapsed.length > SNIPPET_RADIUS * 2
|
|
32
|
+
? `${collapsed.slice(0, SNIPPET_RADIUS * 2)}…`
|
|
33
|
+
: collapsed;
|
|
34
|
+
}
|
|
35
|
+
const start = Math.max(0, hitAt - SNIPPET_RADIUS);
|
|
36
|
+
const end = Math.min(collapsed.length, hitAt + SNIPPET_RADIUS);
|
|
37
|
+
let snippet = collapsed.slice(start, end);
|
|
38
|
+
if (start > 0)
|
|
39
|
+
snippet = `…${snippet}`;
|
|
40
|
+
if (end < collapsed.length)
|
|
41
|
+
snippet = `${snippet}…`;
|
|
42
|
+
return snippet;
|
|
43
|
+
}
|
|
44
|
+
// Divides every score by the largest so the top hit becomes 1.0. An empty or
|
|
45
|
+
// all-zero map normalises to all zeros.
|
|
46
|
+
function normalize(scores) {
|
|
47
|
+
let max = 0;
|
|
48
|
+
for (const v of scores.values())
|
|
49
|
+
if (v > max)
|
|
50
|
+
max = v;
|
|
51
|
+
if (max === 0)
|
|
52
|
+
return new Map([...scores].map(([k]) => [k, 0]));
|
|
53
|
+
return new Map([...scores].map(([k, v]) => [k, v / max]));
|
|
54
|
+
}
|
|
55
|
+
// Core ranker shared by query search and related-document search. queryTokens
|
|
56
|
+
// drives BM25; queryEmbedding (when present) drives vector similarity against
|
|
57
|
+
// every embedded chunk, taking each document's best-matching chunk.
|
|
58
|
+
function rankDocuments(db, queryTokens, queryEmbedding, opts) {
|
|
59
|
+
const documents = getAllDocuments(db);
|
|
60
|
+
const byPath = new Map(documents.map((d) => [d.path, d]));
|
|
61
|
+
const bm25Model = buildBm25(documents.map((d) => ({ path: d.path, tokens: d.tokens })));
|
|
62
|
+
const bm25Raw = new Map();
|
|
63
|
+
for (const hit of searchBm25(bm25Model, queryTokens)) {
|
|
64
|
+
bm25Raw.set(hit.path, hit.score);
|
|
65
|
+
}
|
|
66
|
+
const vectorRaw = new Map();
|
|
67
|
+
let vectorUsed = false;
|
|
68
|
+
if (queryEmbedding) {
|
|
69
|
+
for (const chunk of getAllChunks(db)) {
|
|
70
|
+
if (!chunk.embedding)
|
|
71
|
+
continue;
|
|
72
|
+
vectorUsed = true;
|
|
73
|
+
const sim = cosineSimilarity(queryEmbedding, chunk.embedding);
|
|
74
|
+
const prev = vectorRaw.get(chunk.path) ?? -Infinity;
|
|
75
|
+
if (sim > prev)
|
|
76
|
+
vectorRaw.set(chunk.path, sim);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
const bm25Norm = normalize(bm25Raw);
|
|
80
|
+
const vectorNorm = normalize(vectorRaw);
|
|
81
|
+
// With no usable vector signal, lexical ranking carries the full weight.
|
|
82
|
+
const weights = vectorUsed ? opts.weights : { bm25: 1, vector: 0 };
|
|
83
|
+
const candidates = new Set([...bm25Norm.keys(), ...vectorNorm.keys()]);
|
|
84
|
+
const hits = [];
|
|
85
|
+
for (const path of candidates) {
|
|
86
|
+
if (path === opts.excludePath)
|
|
87
|
+
continue;
|
|
88
|
+
const doc = byPath.get(path);
|
|
89
|
+
if (!doc)
|
|
90
|
+
continue;
|
|
91
|
+
const bm25Score = bm25Norm.get(path) ?? 0;
|
|
92
|
+
const vectorScore = vectorNorm.get(path) ?? 0;
|
|
93
|
+
const score = weights.bm25 * bm25Score + weights.vector * vectorScore;
|
|
94
|
+
if (score <= 0)
|
|
95
|
+
continue;
|
|
96
|
+
hits.push({
|
|
97
|
+
path,
|
|
98
|
+
title: doc.title,
|
|
99
|
+
collection: doc.collection,
|
|
100
|
+
status: doc.status,
|
|
101
|
+
score,
|
|
102
|
+
bm25Score,
|
|
103
|
+
vectorScore,
|
|
104
|
+
snippet: makeSnippet(doc.content, queryTokens),
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
hits.sort((a, b) => b.score - a.score);
|
|
108
|
+
return { hits: hits.slice(0, opts.limit), vectorUsed };
|
|
109
|
+
}
|
|
110
|
+
// Ranks vault documents against a free-text query.
|
|
111
|
+
export async function hybridSearch(db, query, options = {}) {
|
|
112
|
+
const weights = options.weights ?? DEFAULT_WEIGHTS;
|
|
113
|
+
const limit = options.limit ?? 10;
|
|
114
|
+
const queryTokens = tokenize(query);
|
|
115
|
+
const embedResult = await embedQuery(query);
|
|
116
|
+
const queryEmbedding = embedResult.ok ? embedResult.value : null;
|
|
117
|
+
const { hits, vectorUsed } = rankDocuments(db, queryTokens, queryEmbedding, {
|
|
118
|
+
weights,
|
|
119
|
+
limit,
|
|
120
|
+
excludePath: undefined,
|
|
121
|
+
});
|
|
122
|
+
return ok({
|
|
123
|
+
query,
|
|
124
|
+
count: hits.length,
|
|
125
|
+
vectorUsed,
|
|
126
|
+
weights: vectorUsed ? weights : { bm25: 1, vector: 0 },
|
|
127
|
+
hits,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
// Finds documents related to an already-indexed document. The source document
|
|
131
|
+
// itself is the query: its BM25 tokens drive lexical similarity and the mean
|
|
132
|
+
// of its chunk embeddings drives semantic similarity. The source is excluded
|
|
133
|
+
// from its own results. Needs no embedding model — it reuses vectors already
|
|
134
|
+
// stored in the index.
|
|
135
|
+
export function relatedSearch(db, path, options = {}) {
|
|
136
|
+
const weights = options.weights ?? DEFAULT_WEIGHTS;
|
|
137
|
+
const limit = options.limit ?? 10;
|
|
138
|
+
const doc = getDocument(db, path);
|
|
139
|
+
if (!doc) {
|
|
140
|
+
return {
|
|
141
|
+
ok: false,
|
|
142
|
+
error: new Error(`document not indexed: ${path} (try vault_reindex)`),
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
const chunkVectors = getChunksForPath(db, path)
|
|
146
|
+
.map((c) => c.embedding)
|
|
147
|
+
.filter((e) => e !== null);
|
|
148
|
+
const queryEmbedding = meanEmbedding(chunkVectors);
|
|
149
|
+
const { hits, vectorUsed } = rankDocuments(db, doc.tokens, queryEmbedding, {
|
|
150
|
+
weights,
|
|
151
|
+
limit,
|
|
152
|
+
excludePath: path,
|
|
153
|
+
});
|
|
154
|
+
return ok({
|
|
155
|
+
path,
|
|
156
|
+
count: hits.length,
|
|
157
|
+
vectorUsed,
|
|
158
|
+
weights: vectorUsed ? weights : { bm25: 1, vector: 0 },
|
|
159
|
+
hits,
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
//# sourceMappingURL=hybrid.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hybrid.js","sourceRoot":"","sources":["../../src/search/hybrid.ts"],"names":[],"mappings":"AAAA,4EAA4E;AAC5E,EAAE;AACF,+EAA+E;AAC/E,4EAA4E;AAC5E,wCAAwC;AACxC,EAAE;AACF,wEAAwE;AACxE,wEAAwE;AACxE,kEAAkE;AAElE,OAAO,EAAE,EAAE,EAAe,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EACL,YAAY,EACZ,eAAe,EACf,gBAAgB,EAChB,WAAW,GAEZ,MAAM,wBAAwB,CAAC;AAChC,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC5D,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAO1E,MAAM,CAAC,MAAM,eAAe,GAAkB,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC;AAqBzE,MAAM,cAAc,GAAG,GAAG,CAAC;AAE3B,yEAAyE;AACzE,6EAA6E;AAC7E,2CAA2C;AAC3C,SAAS,WAAW,CAAC,OAAe,EAAE,WAAqB;IACzD,MAAM,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACtD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACtC,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC;IAEtC,IAAI,KAAK,GAAG,CAAC,CAAC,CAAC;IACf,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC/B,MAAM,EAAE,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC/B,IAAI,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,KAAK,CAAC,CAAC,IAAI,EAAE,GAAG,KAAK,CAAC;YAAE,KAAK,GAAG,EAAE,CAAC;IAC5D,CAAC;IAED,IAAI,KAAK,KAAK,CAAC,CAAC,EAAE,CAAC;QACjB,OAAO,SAAS,CAAC,MAAM,GAAG,cAAc,GAAG,CAAC;YAC1C,CAAC,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,GAAG,CAAC,CAAC,GAAG;YAC9C,CAAC,CAAC,SAAS,CAAC;IAChB,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,cAAc,CAAC,CAAC;IAClD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,EAAE,KAAK,GAAG,cAAc,CAAC,CAAC;IAC/D,IAAI,OAAO,GAAG,SAAS,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IAC1C,IAAI,KAAK,GAAG,CAAC;QAAE,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;IACvC,IAAI,GAAG,GAAG,SAAS,CAAC,MAAM;QAAE,OAAO,GAAG,GAAG,OAAO,GAAG,CAAC;IACpD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,6EAA6E;AAC7E,wCAAwC;AACxC,SAAS,SAAS,CAAC,MAA2B;IAC5C,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE;QAAE,IAAI,CAAC,GAAG,GAAG;YAAE,GAAG,GAAG,CAAC,CAAC;IACtD,IAAI,GAAG,KAAK,CAAC;QAAE,OAAO,IAAI,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAChE,OAAO,IAAI,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;AAC5D,CAAC;AAQD,8EAA8E;AAC9E,8EAA8E;AAC9E,oEAAoE;AACpE,SAAS,aAAa,CACpB,EAAW,EACX,WAAqB,EACrB,cAAmC,EACnC,IAAiB;IAEjB,MAAM,SAAS,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;IACtC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAE1D,MAAM,SAAS,GAAG,SAAS,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC;IACxF,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC1C,KAAK,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,WAAW,CAAC,EAAE,CAAC;QACrD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC;IACnC,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC5C,IAAI,UAAU,GAAG,KAAK,CAAC;IACvB,IAAI,cAAc,EAAE,CAAC;QACnB,KAAK,MAAM,KAAK,IAAI,YAAY,CAAC,EAAE,CAAC,EAAE,CAAC;YACrC,IAAI,CAAC,KAAK,CAAC,SAAS;gBAAE,SAAS;YAC/B,UAAU,GAAG,IAAI,CAAC;YAClB,MAAM,GAAG,GAAG,gBAAgB,CAAC,cAAc,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;YAC9D,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC;YACpD,IAAI,GAAG,GAAG,IAAI;gBAAE,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACjD,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;IACpC,MAAM,UAAU,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC;IAExC,yEAAyE;IACzE,MAAM,OAAO,GAAkB,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;IAElF,MAAM,UAAU,GAAG,IAAI,GAAG,CAAS,CAAC,GAAG,QAAQ,CAAC,IAAI,EAAE,EAAE,GAAG,UAAU,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAE/E,MAAM,IAAI,GAAgB,EAAE,CAAC;IAC7B,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,IAAI,IAAI,KAAK,IAAI,CAAC,WAAW;YAAE,SAAS;QACxC,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC7B,IAAI,CAAC,GAAG;YAAE,SAAS;QACnB,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1C,MAAM,WAAW,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9C,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,GAAG,SAAS,GAAG,OAAO,CAAC,MAAM,GAAG,WAAW,CAAC;QACtE,IAAI,KAAK,IAAI,CAAC;YAAE,SAAS;QACzB,IAAI,CAAC,IAAI,CAAC;YACR,IAAI;YACJ,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,UAAU,EAAE,GAAG,CAAC,UAAU;YAC1B,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,KAAK;YACL,SAAS;YACT,WAAW;YACX,OAAO,EAAE,WAAW,CAAC,GAAG,CAAC,OAAO,EAAE,WAAW,CAAC;SAC/C,CAAC,CAAC;IACL,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACvC,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,EAAE,UAAU,EAAE,CAAC;AACzD,CAAC;AAOD,mDAAmD;AACnD,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,EAAW,EACX,KAAa,EACb,UAA+B,EAAE;IAEjC,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,eAAe,CAAC;IACnD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;IAClC,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IAEpC,MAAM,WAAW,GAAG,MAAM,UAAU,CAAC,KAAK,CAAC,CAAC;IAC5C,MAAM,cAAc,GAAG,WAAW,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;IAEjE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,aAAa,CAAC,EAAE,EAAE,WAAW,EAAE,cAAc,EAAE;QAC1E,OAAO;QACP,KAAK;QACL,WAAW,EAAE,SAAS;KACvB,CAAC,CAAC;IAEH,OAAO,EAAE,CAAC;QACR,KAAK;QACL,KAAK,EAAE,IAAI,CAAC,MAAM;QAClB,UAAU;QACV,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE;QACtD,IAAI;KACL,CAAC,CAAC;AACL,CAAC;AAUD,8EAA8E;AAC9E,6EAA6E;AAC7E,6EAA6E;AAC7E,6EAA6E;AAC7E,uBAAuB;AACvB,MAAM,UAAU,aAAa,CAC3B,EAAW,EACX,IAAY,EACZ,UAA+B,EAAE;IAEjC,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,eAAe,CAAC;IACnD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;IAElC,MAAM,GAAG,GAAG,WAAW,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC;IAClC,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO;YACL,EAAE,EAAE,KAAK;YACT,KAAK,EAAE,IAAI,KAAK,CAAC,yBAAyB,IAAI,sBAAsB,CAAC;SACtE,CAAC;IACJ,CAAC;IAED,MAAM,YAAY,GAAG,gBAAgB,CAAC,EAAE,EAAE,IAAI,CAAC;SAC5C,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;SACvB,MAAM,CAAC,CAAC,CAAC,EAAqB,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;IAChD,MAAM,cAAc,GAAG,aAAa,CAAC,YAAY,CAAC,CAAC;IAEnD,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,aAAa,CAAC,EAAE,EAAE,GAAG,CAAC,MAAM,EAAE,cAAc,EAAE;QACzE,OAAO;QACP,KAAK;QACL,WAAW,EAAE,IAAI;KAClB,CAAC,CAAC;IAEH,OAAO,EAAE,CAAC;QACR,IAAI;QACJ,KAAK,EAAE,IAAI,CAAC,MAAM;QAClB,UAAU;QACV,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE;QACtD,IAAI;KACL,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { type Result } from "../frontmatter/types.js";
|
|
2
|
+
export interface ReindexResult {
|
|
3
|
+
documentCount: number;
|
|
4
|
+
chunkCount: number;
|
|
5
|
+
vectorEnabled: boolean;
|
|
6
|
+
skipped: string[];
|
|
7
|
+
indexedAt: string;
|
|
8
|
+
}
|
|
9
|
+
export declare function reindexVault(vaultRoot: string): Promise<Result<ReindexResult, Error>>;
|
|
10
|
+
export interface IndexDocumentResult {
|
|
11
|
+
chunkCount: number;
|
|
12
|
+
vectorEnabled: boolean;
|
|
13
|
+
}
|
|
14
|
+
export declare function indexDocument(vaultRoot: string, relPath: string): Promise<Result<IndexDocumentResult, Error>>;
|
|
15
|
+
//# sourceMappingURL=reindex.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reindex.d.ts","sourceRoot":"","sources":["../../src/search/reindex.ts"],"names":[],"mappings":"AAWA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAgB/D,MAAM,WAAW,aAAa;IAC5B,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,OAAO,CAAC;IACvB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB;AAyFD,wBAAsB,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC,CAsC3F;AAED,MAAM,WAAW,mBAAmB;IAClC,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,OAAO,CAAC;CACxB;AAQD,wBAAsB,aAAa,CACjC,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,MAAM,CAAC,mBAAmB,EAAE,KAAK,CAAC,CAAC,CAmD7C"}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
// Rebuilds the SQLite search index from the markdown files on disk.
|
|
2
|
+
//
|
|
3
|
+
// The index is a derived cache, so a reindex always clears it and rebuilds
|
|
4
|
+
// from scratch — there is no incremental update path to drift out of sync.
|
|
5
|
+
// Called on server start and by the vault_reindex tool / --reindex CLI flag.
|
|
6
|
+
//
|
|
7
|
+
// Embedding is best-effort: every chunk across the whole vault is embedded in
|
|
8
|
+
// one batch. If the model is unavailable the documents (and their BM25 tokens)
|
|
9
|
+
// still index; only the vector column is left NULL and vectorEnabled is false.
|
|
10
|
+
import { parseDocument } from "../frontmatter/parser.js";
|
|
11
|
+
import { err, ok } from "../frontmatter/types.js";
|
|
12
|
+
import { clearIndex, deleteDocument, documentCount, insertChunk, insertDocument, openIndexDb, setMeta, } from "../storage/index-db.js";
|
|
13
|
+
import { listFiles, readFile, resolveVaultPath } from "../storage/local.js";
|
|
14
|
+
import { tokenize } from "./bm25.js";
|
|
15
|
+
import { chunkText, EMBEDDING_DIM, embed } from "./vector.js";
|
|
16
|
+
// Reads and parses a single markdown file into the shape the index needs.
|
|
17
|
+
// Returns null when the file should be skipped (unreadable, or malformed YAML
|
|
18
|
+
// frontmatter) so a reindex never aborts on one bad file.
|
|
19
|
+
async function stageOne(vaultRoot, relPath) {
|
|
20
|
+
const resolved = resolveVaultPath(vaultRoot, relPath);
|
|
21
|
+
if (!resolved.ok)
|
|
22
|
+
return null;
|
|
23
|
+
const file = await readFile(resolved.value);
|
|
24
|
+
if (!file.ok)
|
|
25
|
+
return null;
|
|
26
|
+
const parsed = parseDocument(file.value);
|
|
27
|
+
if (!parsed.ok)
|
|
28
|
+
return null;
|
|
29
|
+
const fm = parsed.value.frontmatter;
|
|
30
|
+
const body = parsed.value.content;
|
|
31
|
+
// BM25 indexes title, tags, and body together so a title- or tag-only
|
|
32
|
+
// match still ranks.
|
|
33
|
+
const tokens = tokenize(`${fm.title} ${fm.tags.join(" ")} ${body}`);
|
|
34
|
+
return {
|
|
35
|
+
doc: {
|
|
36
|
+
path: relPath,
|
|
37
|
+
title: fm.title,
|
|
38
|
+
collection: fm.collection || (relPath.split("/")[0] ?? ""),
|
|
39
|
+
domain: fm.domain,
|
|
40
|
+
status: fm.status,
|
|
41
|
+
confidence: fm.confidence,
|
|
42
|
+
updated: fm.updated,
|
|
43
|
+
tags: fm.tags,
|
|
44
|
+
content: body,
|
|
45
|
+
tokens,
|
|
46
|
+
},
|
|
47
|
+
chunks: chunkText(body),
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
// Reads and parses every markdown file into the shape the index needs. A file
|
|
51
|
+
// that stageOne rejects is skipped (recorded in `skipped`) rather than
|
|
52
|
+
// aborting the whole rebuild.
|
|
53
|
+
async function stageDocuments(vaultRoot) {
|
|
54
|
+
const list = await listFiles(vaultRoot);
|
|
55
|
+
if (!list.ok)
|
|
56
|
+
return list;
|
|
57
|
+
const staged = [];
|
|
58
|
+
const skipped = [];
|
|
59
|
+
for (const relPath of list.value) {
|
|
60
|
+
const one = await stageOne(vaultRoot, relPath);
|
|
61
|
+
if (one)
|
|
62
|
+
staged.push(one);
|
|
63
|
+
else
|
|
64
|
+
skipped.push(relPath);
|
|
65
|
+
}
|
|
66
|
+
return ok({ staged, skipped });
|
|
67
|
+
}
|
|
68
|
+
function writeIndex(db, staged, embeddings) {
|
|
69
|
+
let chunkCount = 0;
|
|
70
|
+
let cursor = 0;
|
|
71
|
+
const write = db.transaction(() => {
|
|
72
|
+
clearIndex(db);
|
|
73
|
+
for (const { doc, chunks } of staged) {
|
|
74
|
+
insertDocument(db, doc);
|
|
75
|
+
chunks.forEach((text, chunkIndex) => {
|
|
76
|
+
insertChunk(db, {
|
|
77
|
+
path: doc.path,
|
|
78
|
+
chunkIndex,
|
|
79
|
+
text,
|
|
80
|
+
embedding: embeddings[cursor] ?? null,
|
|
81
|
+
});
|
|
82
|
+
cursor += 1;
|
|
83
|
+
chunkCount += 1;
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
write();
|
|
88
|
+
return chunkCount;
|
|
89
|
+
}
|
|
90
|
+
export async function reindexVault(vaultRoot) {
|
|
91
|
+
const staging = await stageDocuments(vaultRoot);
|
|
92
|
+
if (!staging.ok)
|
|
93
|
+
return staging;
|
|
94
|
+
const { staged, skipped } = staging.value;
|
|
95
|
+
// One flat list of every chunk's text, embedded in a single batch.
|
|
96
|
+
const allChunkTexts = [];
|
|
97
|
+
for (const s of staged)
|
|
98
|
+
allChunkTexts.push(...s.chunks);
|
|
99
|
+
const embedResult = await embed(allChunkTexts);
|
|
100
|
+
const vectorEnabled = embedResult.ok;
|
|
101
|
+
const embeddings = embedResult.ok
|
|
102
|
+
? embedResult.value
|
|
103
|
+
: allChunkTexts.map(() => null);
|
|
104
|
+
const dbResult = openIndexDb(vaultRoot);
|
|
105
|
+
if (!dbResult.ok)
|
|
106
|
+
return dbResult;
|
|
107
|
+
const db = dbResult.value;
|
|
108
|
+
const indexedAt = new Date().toISOString();
|
|
109
|
+
try {
|
|
110
|
+
const chunkCount = writeIndex(db, staged, embeddings);
|
|
111
|
+
setMeta(db, "indexed_at", indexedAt);
|
|
112
|
+
setMeta(db, "vector_enabled", String(vectorEnabled));
|
|
113
|
+
setMeta(db, "embedding_dim", String(EMBEDDING_DIM));
|
|
114
|
+
return ok({
|
|
115
|
+
documentCount: staged.length,
|
|
116
|
+
chunkCount,
|
|
117
|
+
vectorEnabled,
|
|
118
|
+
skipped,
|
|
119
|
+
indexedAt,
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
catch (e) {
|
|
123
|
+
const reason = e instanceof Error ? e.message : String(e);
|
|
124
|
+
return err(new Error(`reindex write failed: ${reason}`));
|
|
125
|
+
}
|
|
126
|
+
finally {
|
|
127
|
+
db.close();
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
// Incrementally updates the index for a single document after a write.
|
|
131
|
+
//
|
|
132
|
+
// If the index has never been built it falls back to a full reindex, so the
|
|
133
|
+
// first write to a fresh vault still produces a complete index rather than a
|
|
134
|
+
// one-document one. Otherwise it re-stages just `relPath`, evicts its stale
|
|
135
|
+
// rows, and re-inserts — embedding only that document's chunks.
|
|
136
|
+
export async function indexDocument(vaultRoot, relPath) {
|
|
137
|
+
const dbCheck = openIndexDb(vaultRoot);
|
|
138
|
+
if (!dbCheck.ok)
|
|
139
|
+
return dbCheck;
|
|
140
|
+
const indexEmpty = documentCount(dbCheck.value) === 0;
|
|
141
|
+
dbCheck.value.close();
|
|
142
|
+
if (indexEmpty) {
|
|
143
|
+
const full = await reindexVault(vaultRoot);
|
|
144
|
+
if (!full.ok)
|
|
145
|
+
return full;
|
|
146
|
+
return ok({
|
|
147
|
+
chunkCount: full.value.chunkCount,
|
|
148
|
+
vectorEnabled: full.value.vectorEnabled,
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
const staged = await stageOne(vaultRoot, relPath);
|
|
152
|
+
if (!staged) {
|
|
153
|
+
return err(new Error(`cannot index document: ${relPath}`));
|
|
154
|
+
}
|
|
155
|
+
const { doc, chunks } = staged;
|
|
156
|
+
const embedResult = await embed(chunks);
|
|
157
|
+
const vectorEnabled = embedResult.ok;
|
|
158
|
+
const embeddings = embedResult.ok
|
|
159
|
+
? embedResult.value
|
|
160
|
+
: chunks.map(() => null);
|
|
161
|
+
const dbResult = openIndexDb(vaultRoot);
|
|
162
|
+
if (!dbResult.ok)
|
|
163
|
+
return dbResult;
|
|
164
|
+
const db = dbResult.value;
|
|
165
|
+
try {
|
|
166
|
+
const write = db.transaction(() => {
|
|
167
|
+
deleteDocument(db, doc.path);
|
|
168
|
+
insertDocument(db, doc);
|
|
169
|
+
chunks.forEach((text, chunkIndex) => {
|
|
170
|
+
insertChunk(db, {
|
|
171
|
+
path: doc.path,
|
|
172
|
+
chunkIndex,
|
|
173
|
+
text,
|
|
174
|
+
embedding: embeddings[chunkIndex] ?? null,
|
|
175
|
+
});
|
|
176
|
+
});
|
|
177
|
+
});
|
|
178
|
+
write();
|
|
179
|
+
return ok({ chunkCount: chunks.length, vectorEnabled });
|
|
180
|
+
}
|
|
181
|
+
catch (e) {
|
|
182
|
+
const reason = e instanceof Error ? e.message : String(e);
|
|
183
|
+
return err(new Error(`index update failed: ${reason}`));
|
|
184
|
+
}
|
|
185
|
+
finally {
|
|
186
|
+
db.close();
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
//# sourceMappingURL=reindex.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reindex.js","sourceRoot":"","sources":["../../src/search/reindex.ts"],"names":[],"mappings":"AAAA,oEAAoE;AACpE,EAAE;AACF,2EAA2E;AAC3E,2EAA2E;AAC3E,6EAA6E;AAC7E,EAAE;AACF,8EAA8E;AAC9E,+EAA+E;AAC/E,+EAA+E;AAE/E,OAAO,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACzD,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,yBAAyB,CAAC;AAC/D,OAAO,EACL,UAAU,EACV,cAAc,EACd,aAAa,EAGb,WAAW,EACX,cAAc,EACd,WAAW,EACX,OAAO,GACR,MAAM,wBAAwB,CAAC;AAChC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAC5E,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAe9D,0EAA0E;AAC1E,8EAA8E;AAC9E,0DAA0D;AAC1D,KAAK,UAAU,QAAQ,CAAC,SAAiB,EAAE,OAAe;IACxD,MAAM,QAAQ,GAAG,gBAAgB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IACtD,IAAI,CAAC,QAAQ,CAAC,EAAE;QAAE,OAAO,IAAI,CAAC;IAC9B,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC5C,IAAI,CAAC,IAAI,CAAC,EAAE;QAAE,OAAO,IAAI,CAAC;IAC1B,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzC,IAAI,CAAC,MAAM,CAAC,EAAE;QAAE,OAAO,IAAI,CAAC;IAE5B,MAAM,EAAE,GAAG,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC;IACpC,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC;IAClC,sEAAsE;IACtE,qBAAqB;IACrB,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;IAEpE,OAAO;QACL,GAAG,EAAE;YACH,IAAI,EAAE,OAAO;YACb,KAAK,EAAE,EAAE,CAAC,KAAK;YACf,UAAU,EAAE,EAAE,CAAC,UAAU,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC1D,MAAM,EAAE,EAAE,CAAC,MAAM;YACjB,MAAM,EAAE,EAAE,CAAC,MAAM;YACjB,UAAU,EAAE,EAAE,CAAC,UAAU;YACzB,OAAO,EAAE,EAAE,CAAC,OAAO;YACnB,IAAI,EAAE,EAAE,CAAC,IAAI;YACb,OAAO,EAAE,IAAI;YACb,MAAM;SACP;QACD,MAAM,EAAE,SAAS,CAAC,IAAI,CAAC;KACxB,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,uEAAuE;AACvE,8BAA8B;AAC9B,KAAK,UAAU,cAAc,CAC3B,SAAiB;IAEjB,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,SAAS,CAAC,CAAC;IACxC,IAAI,CAAC,IAAI,CAAC,EAAE;QAAE,OAAO,IAAI,CAAC;IAE1B,MAAM,MAAM,GAAqB,EAAE,CAAC;IACpC,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QACjC,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAC/C,IAAI,GAAG;YAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;;YACrB,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,EAAE,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC;AACjC,CAAC;AAED,SAAS,UAAU,CACjB,EAAW,EACX,MAAwB,EACxB,UAAmC;IAEnC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;QAChC,UAAU,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;YACrC,cAAc,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;YACxB,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,UAAU,EAAE,EAAE;gBAClC,WAAW,CAAC,EAAE,EAAE;oBACd,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,UAAU;oBACV,IAAI;oBACJ,SAAS,EAAE,UAAU,CAAC,MAAM,CAAC,IAAI,IAAI;iBACtC,CAAC,CAAC;gBACH,MAAM,IAAI,CAAC,CAAC;gBACZ,UAAU,IAAI,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;IACH,KAAK,EAAE,CAAC;IACR,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,SAAiB;IAClD,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC,SAAS,CAAC,CAAC;IAChD,IAAI,CAAC,OAAO,CAAC,EAAE;QAAE,OAAO,OAAO,CAAC;IAChC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC;IAE1C,mEAAmE;IACnE,MAAM,aAAa,GAAa,EAAE,CAAC;IACnC,KAAK,MAAM,CAAC,IAAI,MAAM;QAAE,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;IAExD,MAAM,WAAW,GAAG,MAAM,KAAK,CAAC,aAAa,CAAC,CAAC;IAC/C,MAAM,aAAa,GAAG,WAAW,CAAC,EAAE,CAAC;IACrC,MAAM,UAAU,GAA4B,WAAW,CAAC,EAAE;QACxD,CAAC,CAAC,WAAW,CAAC,KAAK;QACnB,CAAC,CAAC,aAAa,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,QAAQ,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;IACxC,IAAI,CAAC,QAAQ,CAAC,EAAE;QAAE,OAAO,QAAQ,CAAC;IAClC,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,CAAC;IAE1B,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC3C,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,UAAU,CAAC,EAAE,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QACtD,OAAO,CAAC,EAAE,EAAE,YAAY,EAAE,SAAS,CAAC,CAAC;QACrC,OAAO,CAAC,EAAE,EAAE,gBAAgB,EAAE,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC;QACrD,OAAO,CAAC,EAAE,EAAE,eAAe,EAAE,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC;QACpD,OAAO,EAAE,CAAC;YACR,aAAa,EAAE,MAAM,CAAC,MAAM;YAC5B,UAAU;YACV,aAAa;YACb,OAAO;YACP,SAAS;SACV,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,MAAM,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC1D,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,yBAAyB,MAAM,EAAE,CAAC,CAAC,CAAC;IAC3D,CAAC;YAAS,CAAC;QACT,EAAE,CAAC,KAAK,EAAE,CAAC;IACb,CAAC;AACH,CAAC;AAOD,uEAAuE;AACvE,EAAE;AACF,4EAA4E;AAC5E,6EAA6E;AAC7E,4EAA4E;AAC5E,gEAAgE;AAChE,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,SAAiB,EACjB,OAAe;IAEf,MAAM,OAAO,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;IACvC,IAAI,CAAC,OAAO,CAAC,EAAE;QAAE,OAAO,OAAO,CAAC;IAChC,MAAM,UAAU,GAAG,aAAa,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACtD,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IAEtB,IAAI,UAAU,EAAE,CAAC;QACf,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,SAAS,CAAC,CAAC;QAC3C,IAAI,CAAC,IAAI,CAAC,EAAE;YAAE,OAAO,IAAI,CAAC;QAC1B,OAAO,EAAE,CAAC;YACR,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU;YACjC,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,aAAa;SACxC,CAAC,CAAC;IACL,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IAClD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,0BAA0B,OAAO,EAAE,CAAC,CAAC,CAAC;IAC7D,CAAC;IACD,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC;IAE/B,MAAM,WAAW,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,CAAC;IACxC,MAAM,aAAa,GAAG,WAAW,CAAC,EAAE,CAAC;IACrC,MAAM,UAAU,GAA4B,WAAW,CAAC,EAAE;QACxD,CAAC,CAAC,WAAW,CAAC,KAAK;QACnB,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC;IAE3B,MAAM,QAAQ,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;IACxC,IAAI,CAAC,QAAQ,CAAC,EAAE;QAAE,OAAO,QAAQ,CAAC;IAClC,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,CAAC;IAC1B,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;YAChC,cAAc,CAAC,EAAE,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;YAC7B,cAAc,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;YACxB,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,UAAU,EAAE,EAAE;gBAClC,WAAW,CAAC,EAAE,EAAE;oBACd,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,UAAU;oBACV,IAAI;oBACJ,SAAS,EAAE,UAAU,CAAC,UAAU,CAAC,IAAI,IAAI;iBAC1C,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QACH,KAAK,EAAE,CAAC;QACR,OAAO,EAAE,CAAC,EAAE,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,EAAE,CAAC,CAAC;IAC1D,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,MAAM,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC1D,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,wBAAwB,MAAM,EAAE,CAAC,CAAC,CAAC;IAC1D,CAAC;YAAS,CAAC;QACT,EAAE,CAAC,KAAK,EAAE,CAAC;IACb,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { type Result } from "../frontmatter/types.js";
|
|
2
|
+
export declare const EMBEDDING_MODEL = "Xenova/all-MiniLM-L6-v2";
|
|
3
|
+
export declare const EMBEDDING_DIM = 384;
|
|
4
|
+
export declare function chunkText(text: string): string[];
|
|
5
|
+
export declare function cosineSimilarity(a: Float32Array, b: Float32Array): number;
|
|
6
|
+
export declare function meanEmbedding(vectors: Float32Array[]): Float32Array | null;
|
|
7
|
+
export declare function embed(texts: string[]): Promise<Result<Float32Array[], Error>>;
|
|
8
|
+
export declare function embedQuery(text: string): Promise<Result<Float32Array, Error>>;
|
|
9
|
+
//# sourceMappingURL=vector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vector.d.ts","sourceRoot":"","sources":["../../src/search/vector.ts"],"names":[],"mappings":"AAYA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAE/D,eAAO,MAAM,eAAe,4BAA4B,CAAC;AACzD,eAAO,MAAM,aAAa,MAAM,CAAC;AAQjC,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CA4BhD;AAGD,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CAczE;AAID,wBAAgB,aAAa,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,YAAY,GAAG,IAAI,CAW1E;AAqBD,wBAAsB,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,EAAE,KAAK,CAAC,CAAC,CAkBnF;AAGD,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC,CAMnF"}
|