@rekal/mem 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/db-BMh1OP4b.mjs +294 -0
- package/dist/doc-DnYN4jAU.mjs +116 -0
- package/dist/embed-rUMZxqed.mjs +100 -0
- package/dist/fs-DMp26Byo.mjs +32 -0
- package/dist/glob.d.mts +27 -0
- package/dist/glob.mjs +132 -0
- package/dist/index.d.mts +1465 -0
- package/dist/index.mjs +351 -0
- package/dist/llama-CT3dc9Cn.mjs +75 -0
- package/dist/models-DFQSgBNr.mjs +77 -0
- package/dist/openai-j2_2GM4J.mjs +76 -0
- package/dist/progress-B1JdNapX.mjs +263 -0
- package/dist/query-VFSpErTB.mjs +125 -0
- package/dist/runtime.node-DlQPaGrV.mjs +35 -0
- package/dist/search-BllHWtZF.mjs +166 -0
- package/dist/store-DE7S35SS.mjs +137 -0
- package/dist/transformers-CJ3QA2PK.mjs +55 -0
- package/dist/uri-CehXVDGB.mjs +28 -0
- package/dist/util-DNyrmcA3.mjs +11 -0
- package/dist/vfs-CNQbkhsf.mjs +222 -0
- package/foo.ts +3 -0
- package/foo2.ts +20 -0
- package/package.json +61 -0
- package/src/context.ts +77 -0
- package/src/db.ts +464 -0
- package/src/doc.ts +163 -0
- package/src/embed/base.ts +122 -0
- package/src/embed/index.ts +67 -0
- package/src/embed/llama.ts +111 -0
- package/src/embed/models.ts +104 -0
- package/src/embed/openai.ts +95 -0
- package/src/embed/transformers.ts +81 -0
- package/src/frecency.ts +58 -0
- package/src/fs.ts +36 -0
- package/src/glob.ts +163 -0
- package/src/index.ts +15 -0
- package/src/log.ts +60 -0
- package/src/md.ts +204 -0
- package/src/progress.ts +121 -0
- package/src/query.ts +131 -0
- package/src/runtime.bun.ts +33 -0
- package/src/runtime.node.ts +47 -0
- package/src/search.ts +230 -0
- package/src/snippet.ts +248 -0
- package/src/sqlite.ts +1 -0
- package/src/store.ts +180 -0
- package/src/uri.ts +28 -0
- package/src/util.ts +21 -0
- package/src/vfs.ts +257 -0
- package/test/doc.test.ts +61 -0
- package/test/fixtures/ignore-test/keep.md +0 -0
- package/test/fixtures/ignore-test/skip.log +0 -0
- package/test/fixtures/ignore-test/sub/keep.md +0 -0
- package/test/fixtures/store/agent/index.md +9 -0
- package/test/fixtures/store/agent/lessons.md +21 -0
- package/test/fixtures/store/agent/soul.md +28 -0
- package/test/fixtures/store/agent/tools.md +25 -0
- package/test/fixtures/store/concepts/frecency.md +30 -0
- package/test/fixtures/store/concepts/index.md +9 -0
- package/test/fixtures/store/concepts/memory-coherence.md +33 -0
- package/test/fixtures/store/concepts/rag.md +27 -0
- package/test/fixtures/store/index.md +9 -0
- package/test/fixtures/store/projects/index.md +9 -0
- package/test/fixtures/store/projects/rekall-inc/architecture.md +41 -0
- package/test/fixtures/store/projects/rekall-inc/decisions/index.md +9 -0
- package/test/fixtures/store/projects/rekall-inc/decisions/no-military.md +20 -0
- package/test/fixtures/store/projects/rekall-inc/index.md +28 -0
- package/test/fixtures/store/user/family.md +13 -0
- package/test/fixtures/store/user/index.md +9 -0
- package/test/fixtures/store/user/preferences.md +29 -0
- package/test/fixtures/store/user/profile.md +29 -0
- package/test/fs.test.ts +15 -0
- package/test/glob.test.ts +190 -0
- package/test/md.test.ts +177 -0
- package/test/query.test.ts +105 -0
- package/test/uri.test.ts +46 -0
- package/test/util.test.ts +62 -0
- package/test/vfs.test.ts +164 -0
- package/tsconfig.json +3 -0
- package/tsdown.config.ts +8 -0
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
import { n as parseYaml } from "./runtime.node-DlQPaGrV.mjs";
|
|
2
|
+
import { EventEmitter } from "node:events";
|
|
3
|
+
import { inspect } from "node:util";
|
|
4
|
+
//#region src/md.ts
|
|
5
|
+
const CHARS_PER_TOKEN = 3;
|
|
6
|
+
function parseFrontmatter(text) {
|
|
7
|
+
const match = text.match(/^---\n([\s\S]*?)\n---\n?/);
|
|
8
|
+
return {
|
|
9
|
+
body: match ? text.slice(match[0].length) : text,
|
|
10
|
+
bodyOffset: match?.[0].trim().split("\n").length ?? 0,
|
|
11
|
+
frontmatter: match ? parseYaml(match[1]) : {},
|
|
12
|
+
frontmatterText: match?.[0],
|
|
13
|
+
text
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
function parseMarkdown(text) {
|
|
17
|
+
const ret = parseFrontmatter(text);
|
|
18
|
+
return {
|
|
19
|
+
...ret,
|
|
20
|
+
sections: parseSections(ret.body)
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
function parseSections(md) {
|
|
24
|
+
const lines = md.split(/\n/);
|
|
25
|
+
let current = {
|
|
26
|
+
content: [],
|
|
27
|
+
context: [],
|
|
28
|
+
heading: "",
|
|
29
|
+
headingText: "",
|
|
30
|
+
level: 0,
|
|
31
|
+
offset: 0
|
|
32
|
+
};
|
|
33
|
+
const sections = [current];
|
|
34
|
+
let codeBlock = void 0;
|
|
35
|
+
for (const [i, line] of lines.entries()) {
|
|
36
|
+
const match = line.match(/^(#+)\s+(.*)/);
|
|
37
|
+
const fenceMatch = line.match(/^\s*(`{3,}|~{3,})/);
|
|
38
|
+
if (codeBlock && line.startsWith(codeBlock)) codeBlock = void 0;
|
|
39
|
+
else if (!codeBlock && fenceMatch) codeBlock = fenceMatch[1];
|
|
40
|
+
if (!codeBlock && match) {
|
|
41
|
+
if (current.content.length === 0) sections.pop();
|
|
42
|
+
const level = match[1].length;
|
|
43
|
+
current = {
|
|
44
|
+
content: [line],
|
|
45
|
+
context: [],
|
|
46
|
+
heading: match[2].trim(),
|
|
47
|
+
headingText: match[0].trim(),
|
|
48
|
+
level,
|
|
49
|
+
offset: i
|
|
50
|
+
};
|
|
51
|
+
sections.push(current);
|
|
52
|
+
} else current.content.push(line);
|
|
53
|
+
}
|
|
54
|
+
const stack = [];
|
|
55
|
+
for (const section of sections) {
|
|
56
|
+
while ((stack.at(-1)?.level ?? -1) >= section.level) stack.pop();
|
|
57
|
+
section.context = stack.map((s) => s.headingText);
|
|
58
|
+
if (section.level > 0) stack.push(section);
|
|
59
|
+
}
|
|
60
|
+
return sections;
|
|
61
|
+
}
|
|
62
|
+
function findSplit(slice) {
|
|
63
|
+
for (const sub of [
|
|
64
|
+
"\n\n",
|
|
65
|
+
"\n",
|
|
66
|
+
" ",
|
|
67
|
+
" ",
|
|
68
|
+
" "
|
|
69
|
+
]) {
|
|
70
|
+
const i = slice.lastIndexOf(sub);
|
|
71
|
+
if (i > slice.length * .8) return i;
|
|
72
|
+
}
|
|
73
|
+
return slice.length;
|
|
74
|
+
}
|
|
75
|
+
var SafeCounter = class SafeCounter {
|
|
76
|
+
static #chars = 0;
|
|
77
|
+
static #toks = 0;
|
|
78
|
+
constructor(tok, maxTokens = 500) {
|
|
79
|
+
this.tok = tok;
|
|
80
|
+
this.maxTokens = maxTokens;
|
|
81
|
+
}
|
|
82
|
+
get charsPerToken() {
|
|
83
|
+
return SafeCounter.#toks > this.maxTokens * 2 ? SafeCounter.#chars / SafeCounter.#toks : CHARS_PER_TOKEN;
|
|
84
|
+
}
|
|
85
|
+
estimate(text) {
|
|
86
|
+
return Math.ceil(text.length / this.charsPerToken);
|
|
87
|
+
}
|
|
88
|
+
toks(text) {
|
|
89
|
+
if (text.length === 0) return {
|
|
90
|
+
count: 0,
|
|
91
|
+
estimated: false
|
|
92
|
+
};
|
|
93
|
+
let count = this.estimate(text) * .9;
|
|
94
|
+
if (count > this.maxTokens) return {
|
|
95
|
+
count,
|
|
96
|
+
estimated: true
|
|
97
|
+
};
|
|
98
|
+
count = this.tok.toks(text);
|
|
99
|
+
SafeCounter.#chars += text.length;
|
|
100
|
+
SafeCounter.#toks += count;
|
|
101
|
+
return {
|
|
102
|
+
count,
|
|
103
|
+
estimated: false
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
};
|
|
107
|
+
function chunkText(text, tok, size = 500) {
|
|
108
|
+
const counter = new SafeCounter(tok, size);
|
|
109
|
+
const chunks = [];
|
|
110
|
+
while (text.length) {
|
|
111
|
+
let next = text;
|
|
112
|
+
let toks = counter.toks(next);
|
|
113
|
+
if (toks.count <= size) {
|
|
114
|
+
chunks.push(next);
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
let maxChars = size * counter.charsPerToken * .8;
|
|
118
|
+
while (true) {
|
|
119
|
+
maxChars = Math.min(maxChars, next.length);
|
|
120
|
+
const split = findSplit(next.slice(0, maxChars));
|
|
121
|
+
next = next.slice(0, split);
|
|
122
|
+
toks = counter.toks(next);
|
|
123
|
+
if (toks.count <= size) break;
|
|
124
|
+
maxChars *= size / toks.count * .8;
|
|
125
|
+
}
|
|
126
|
+
chunks.push(next);
|
|
127
|
+
text = text.slice(next.length);
|
|
128
|
+
}
|
|
129
|
+
return chunks;
|
|
130
|
+
}
|
|
131
|
+
function chunkMarkdown(md, tok, size = 500) {
|
|
132
|
+
const sections = parseSections(md);
|
|
133
|
+
const chunks = [{
|
|
134
|
+
content: [],
|
|
135
|
+
context: [],
|
|
136
|
+
tokens: 0
|
|
137
|
+
}];
|
|
138
|
+
const counter = new SafeCounter(tok, size);
|
|
139
|
+
for (const section of sections) {
|
|
140
|
+
const chunk = chunks.at(-1);
|
|
141
|
+
const content = [...section.context, ...section.content];
|
|
142
|
+
const text = content.join("\n");
|
|
143
|
+
const toks = counter.toks(text).count;
|
|
144
|
+
if (chunk.tokens + toks <= size) {
|
|
145
|
+
const context = section.context.filter((h, c) => chunk.context[c] !== h);
|
|
146
|
+
chunk.content.push(...context);
|
|
147
|
+
chunk.content.push(...section.content);
|
|
148
|
+
chunk.context = [...section.context, section.headingText];
|
|
149
|
+
chunk.tokens += toks;
|
|
150
|
+
} else if (toks <= size) chunks.push({
|
|
151
|
+
content,
|
|
152
|
+
context: [...section.context, section.headingText],
|
|
153
|
+
tokens: toks
|
|
154
|
+
});
|
|
155
|
+
else {
|
|
156
|
+
const context = section.context.join("\n");
|
|
157
|
+
const toksCtx = counter.toks(context);
|
|
158
|
+
chunks.push(...chunkText(section.content.join("\n"), tok, size - toksCtx.count).map((c) => ({
|
|
159
|
+
content: (context.length ? `${context}\n${c}` : c).split("\n"),
|
|
160
|
+
context: [],
|
|
161
|
+
tokens: 0
|
|
162
|
+
})));
|
|
163
|
+
chunks.push({
|
|
164
|
+
content: [],
|
|
165
|
+
context: [],
|
|
166
|
+
tokens: 0
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
return chunks.map((c) => c.content.join("\n").trim()).filter(Boolean);
|
|
171
|
+
}
|
|
172
|
+
//#endregion
|
|
173
|
+
//#region src/progress.ts
|
|
174
|
+
var Progress = class Progress extends EventEmitter {
|
|
175
|
+
#max = 100;
|
|
176
|
+
#value = 0;
|
|
177
|
+
#children = /* @__PURE__ */ new Map();
|
|
178
|
+
#status;
|
|
179
|
+
#done = false;
|
|
180
|
+
constructor(name, opts = {}) {
|
|
181
|
+
super();
|
|
182
|
+
this.name = name;
|
|
183
|
+
this.set(opts);
|
|
184
|
+
}
|
|
185
|
+
get group() {
|
|
186
|
+
return this.#children.size > 0;
|
|
187
|
+
}
|
|
188
|
+
set(opts) {
|
|
189
|
+
if (this.#done) return this;
|
|
190
|
+
if (typeof opts === "number") this.#value = opts;
|
|
191
|
+
else {
|
|
192
|
+
this.#max = opts.max ?? this.#max;
|
|
193
|
+
this.#status = opts.status ?? this.#status;
|
|
194
|
+
this.#value = opts.value ?? this.#value;
|
|
195
|
+
}
|
|
196
|
+
this.emit("update", this);
|
|
197
|
+
if (this.#value >= this.#max) this.stop();
|
|
198
|
+
return this;
|
|
199
|
+
}
|
|
200
|
+
get status() {
|
|
201
|
+
return this.#status ?? this.name;
|
|
202
|
+
}
|
|
203
|
+
set status(status) {
|
|
204
|
+
this.set({ status });
|
|
205
|
+
}
|
|
206
|
+
set value(value) {
|
|
207
|
+
this.set(value);
|
|
208
|
+
}
|
|
209
|
+
get value() {
|
|
210
|
+
return !this.group ? this.#value : this.#children.values().reduce((sum, c) => sum + c.value, 0);
|
|
211
|
+
}
|
|
212
|
+
set max(max) {
|
|
213
|
+
this.set({ max });
|
|
214
|
+
}
|
|
215
|
+
get max() {
|
|
216
|
+
return !this.group ? this.#max : this.#children.values().reduce((sum, c) => sum + c.max, 0);
|
|
217
|
+
}
|
|
218
|
+
get done() {
|
|
219
|
+
return this.#done;
|
|
220
|
+
}
|
|
221
|
+
get ratio() {
|
|
222
|
+
return this.max === 0 ? 0 : Math.min(1, this.value / this.max);
|
|
223
|
+
}
|
|
224
|
+
get pct() {
|
|
225
|
+
return this.ratio * 100;
|
|
226
|
+
}
|
|
227
|
+
stop() {
|
|
228
|
+
if (this.#done) return;
|
|
229
|
+
this.#done = true;
|
|
230
|
+
if (!this.group) this.#value = this.#max;
|
|
231
|
+
this.#children.forEach((c) => c.stop());
|
|
232
|
+
this.emit("done", this);
|
|
233
|
+
}
|
|
234
|
+
children() {
|
|
235
|
+
return [...this.#children.values()];
|
|
236
|
+
}
|
|
237
|
+
child(name, opts = {}) {
|
|
238
|
+
if (this.#value > 0) throw new Error("Cannot add child to Progress that has already made progress");
|
|
239
|
+
let child = this.#children.get(name);
|
|
240
|
+
if (!child) {
|
|
241
|
+
child = new Progress(name, opts);
|
|
242
|
+
child.on("update", () => this.emit("update", this));
|
|
243
|
+
child.on("done", () => {
|
|
244
|
+
if (!this.done && this.children().every((c) => c.done)) this.stop();
|
|
245
|
+
});
|
|
246
|
+
this.#children.set(name, child);
|
|
247
|
+
}
|
|
248
|
+
return child;
|
|
249
|
+
}
|
|
250
|
+
[inspect.custom](_depth, _options) {
|
|
251
|
+
return this.toString();
|
|
252
|
+
}
|
|
253
|
+
toString(indent = 0) {
|
|
254
|
+
const pad = " ".repeat(indent);
|
|
255
|
+
const pct = `${this.pct.toFixed(0)}%`.padStart(4);
|
|
256
|
+
const status = this.#status ? ` ${this.#status}` : "";
|
|
257
|
+
const line = `${pad}${pct} ${this.name}${status}`;
|
|
258
|
+
if (!this.group) return line;
|
|
259
|
+
return [line, ...[...this.#children.values()].map((c) => c.toString(indent + 1))].join("\n");
|
|
260
|
+
}
|
|
261
|
+
};
|
|
262
|
+
//#endregion
|
|
263
|
+
export { parseMarkdown as a, parseFrontmatter as i, chunkMarkdown as n, parseSections as o, chunkText as r, Progress as t };
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
//#region src/query.ts
|
|
2
|
+
const FTS_FIELDS = new Set([
|
|
3
|
+
"entities",
|
|
4
|
+
"tags",
|
|
5
|
+
"description",
|
|
6
|
+
"title",
|
|
7
|
+
"body"
|
|
8
|
+
]);
|
|
9
|
+
function tokenize(input) {
|
|
10
|
+
const tokens = [];
|
|
11
|
+
let i = 0;
|
|
12
|
+
while (i < input.length) {
|
|
13
|
+
while (i < input.length && input[i] === " ") i++;
|
|
14
|
+
if (i >= input.length) break;
|
|
15
|
+
const ch = input[i];
|
|
16
|
+
if (ch === "(" || ch === ")") {
|
|
17
|
+
tokens.push({
|
|
18
|
+
type: "paren",
|
|
19
|
+
value: ch
|
|
20
|
+
});
|
|
21
|
+
i++;
|
|
22
|
+
} else if (ch === "|") {
|
|
23
|
+
tokens.push({
|
|
24
|
+
type: "op",
|
|
25
|
+
value: "OR"
|
|
26
|
+
});
|
|
27
|
+
i++;
|
|
28
|
+
} else if ((ch === "\"" || ch === "'") && (i === 0 || input[i - 1] === " ")) {
|
|
29
|
+
const quote = ch;
|
|
30
|
+
i++;
|
|
31
|
+
const start = i;
|
|
32
|
+
while (i < input.length && input[i] !== quote) i++;
|
|
33
|
+
if (start < i) tokens.push({
|
|
34
|
+
type: "term",
|
|
35
|
+
value: input.slice(start, i)
|
|
36
|
+
});
|
|
37
|
+
if (i < input.length) i++;
|
|
38
|
+
} else {
|
|
39
|
+
const neg = ch === "-";
|
|
40
|
+
const req = ch === "+";
|
|
41
|
+
if (neg || req) i++;
|
|
42
|
+
const start = i;
|
|
43
|
+
while (i < input.length && !" \"()|".includes(input[i])) i++;
|
|
44
|
+
if (start < i) {
|
|
45
|
+
const raw = input.slice(start, i);
|
|
46
|
+
const colon = raw.indexOf(":");
|
|
47
|
+
if (colon > 0 && FTS_FIELDS.has(raw.slice(0, colon))) tokens.push({
|
|
48
|
+
field: raw.slice(0, colon),
|
|
49
|
+
neg: neg || void 0,
|
|
50
|
+
req: req || void 0,
|
|
51
|
+
type: "term",
|
|
52
|
+
value: raw.slice(colon + 1)
|
|
53
|
+
});
|
|
54
|
+
else tokens.push({
|
|
55
|
+
neg: neg || void 0,
|
|
56
|
+
req: req || void 0,
|
|
57
|
+
type: "term",
|
|
58
|
+
value: raw
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return tokens;
|
|
64
|
+
}
|
|
65
|
+
/** Sanitize a term for FTS5 — strip non-word/non-apostrophe chars, preserve colons */
|
|
66
|
+
function sanitize(term) {
|
|
67
|
+
return term.replace(/[^\p{L}\p{N}\s':]/gu, "").trim();
|
|
68
|
+
}
|
|
69
|
+
function buildTerm(token) {
|
|
70
|
+
const clean = sanitize(token.value);
|
|
71
|
+
if (!clean) return;
|
|
72
|
+
const phrase = `"${clean}"${token.value.endsWith("*") ? "*" : ""}`;
|
|
73
|
+
const scoped = token.field ? `${token.field} : ${phrase}` : phrase;
|
|
74
|
+
return token.neg ? `NOT ${scoped}` : scoped;
|
|
75
|
+
}
|
|
76
|
+
function joinParts(parts, op) {
|
|
77
|
+
return parts.join(` ${op} `);
|
|
78
|
+
}
|
|
79
|
+
/** Build an FTS5 query string from user input */
|
|
80
|
+
function toFts(input, defaultOp = "OR") {
|
|
81
|
+
const tokens = tokenize(input);
|
|
82
|
+
if (!tokens.some((t) => t.type === "term" && t.req)) {
|
|
83
|
+
const parts = [];
|
|
84
|
+
let needsOp = false;
|
|
85
|
+
for (const token of tokens) {
|
|
86
|
+
if (token.type === "paren") {
|
|
87
|
+
if (token.value === "(") {
|
|
88
|
+
if (needsOp) parts.push(defaultOp);
|
|
89
|
+
parts.push(token.value);
|
|
90
|
+
needsOp = false;
|
|
91
|
+
} else {
|
|
92
|
+
parts.push(token.value);
|
|
93
|
+
needsOp = true;
|
|
94
|
+
}
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
if (token.type === "op") {
|
|
98
|
+
parts.push(token.value);
|
|
99
|
+
needsOp = false;
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
const term = buildTerm(token);
|
|
103
|
+
if (!term) continue;
|
|
104
|
+
if (needsOp) parts.push(defaultOp);
|
|
105
|
+
parts.push(term);
|
|
106
|
+
needsOp = true;
|
|
107
|
+
}
|
|
108
|
+
return parts.join(" ").replace(/\( /g, "(").replace(/ \)/g, ")");
|
|
109
|
+
}
|
|
110
|
+
const required = [];
|
|
111
|
+
const all = [];
|
|
112
|
+
for (const token of tokens) {
|
|
113
|
+
if (token.type !== "term") continue;
|
|
114
|
+
const term = buildTerm(token);
|
|
115
|
+
if (!term) continue;
|
|
116
|
+
all.push(term);
|
|
117
|
+
if (token.req) required.push(term);
|
|
118
|
+
}
|
|
119
|
+
const requiredPart = joinParts(required, "AND");
|
|
120
|
+
const allPart = joinParts(all, "OR");
|
|
121
|
+
if (required.length === all.length) return requiredPart;
|
|
122
|
+
return `${requiredPart} AND (${allPart})`;
|
|
123
|
+
}
|
|
124
|
+
//#endregion
|
|
125
|
+
export { tokenize as n, toFts as t };
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
//#region src/runtime.node.ts
|
|
2
|
+
let DB;
|
|
3
|
+
async function dbInit() {
|
|
4
|
+
const { default: BetterDatabase } = await import("better-sqlite3");
|
|
5
|
+
return class extends BetterDatabase {
|
|
6
|
+
prepareCache = /* @__PURE__ */ new Map();
|
|
7
|
+
constructor(filename) {
|
|
8
|
+
super(filename);
|
|
9
|
+
}
|
|
10
|
+
run(...args) {
|
|
11
|
+
return this.exec(...args);
|
|
12
|
+
}
|
|
13
|
+
query(source) {
|
|
14
|
+
let ret = this.prepareCache.get(source);
|
|
15
|
+
if (!ret) {
|
|
16
|
+
ret = this.prepare(source);
|
|
17
|
+
this.prepareCache.set(source, ret);
|
|
18
|
+
}
|
|
19
|
+
return ret;
|
|
20
|
+
}
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
async function openDatabase(path) {
|
|
24
|
+
DB ??= await dbInit();
|
|
25
|
+
const { load: sqliteVec } = await import("sqlite-vec");
|
|
26
|
+
const db = new DB(path, { strict: true });
|
|
27
|
+
sqliteVec(db);
|
|
28
|
+
return db;
|
|
29
|
+
}
|
|
30
|
+
const { load: loadYaml } = await import("js-yaml");
|
|
31
|
+
function parseYaml(content) {
|
|
32
|
+
return loadYaml(content);
|
|
33
|
+
}
|
|
34
|
+
//#endregion
|
|
35
|
+
export { parseYaml as n, openDatabase as t };
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import { t as hash } from "./util-DNyrmcA3.mjs";
|
|
2
|
+
import { i as parentUri } from "./uri-CehXVDGB.mjs";
|
|
3
|
+
import { t as toFts } from "./query-VFSpErTB.mjs";
|
|
4
|
+
//#region src/search.ts
|
|
5
|
+
const DESC_BOOST = .2;
|
|
6
|
+
const PARENT_BOOST = .3;
|
|
7
|
+
const RRF_K = 60;
|
|
8
|
+
const RRF_LIMIT = 50;
|
|
9
|
+
const VEC_OVERSAMPLE = 4;
|
|
10
|
+
var Search = class Search {
|
|
11
|
+
constructor(db, ctx) {
|
|
12
|
+
this.db = db;
|
|
13
|
+
this.ctx = ctx;
|
|
14
|
+
}
|
|
15
|
+
static async load(ctx) {
|
|
16
|
+
return new Search(await ctx.db(), ctx);
|
|
17
|
+
}
|
|
18
|
+
async search(query, opts = {}) {
|
|
19
|
+
const mode = opts.mode ?? "hybrid";
|
|
20
|
+
if (mode === "fts") return this.searchFts(query, opts);
|
|
21
|
+
if (mode === "vec") return this.searchVec(query, opts);
|
|
22
|
+
const limit = opts.limit ?? 20;
|
|
23
|
+
const subLimit = Math.max(RRF_LIMIT, limit * 2);
|
|
24
|
+
const [fts, vec] = await Promise.all([this.searchFts(query, {
|
|
25
|
+
...opts,
|
|
26
|
+
limit: subLimit
|
|
27
|
+
}), this.searchVec(query, {
|
|
28
|
+
...opts,
|
|
29
|
+
limit: subLimit,
|
|
30
|
+
slice: false
|
|
31
|
+
})]);
|
|
32
|
+
return this.fuse(fts, vec, limit);
|
|
33
|
+
}
|
|
34
|
+
async searchVec(query, opts = {}) {
|
|
35
|
+
const cacheKey = hash(`embed:${query}`);
|
|
36
|
+
const embedder = await this.ctx.embedder();
|
|
37
|
+
const vfs = await this.ctx.vfs();
|
|
38
|
+
const embedding = this.db.cacheGet(cacheKey) ?? this.db.cacheSet(cacheKey, await embedder.embed(query));
|
|
39
|
+
const scope = vfs.getScope(opts.uri);
|
|
40
|
+
const limit = opts.limit ?? 20;
|
|
41
|
+
const results = this.db.searchVec(embedding, { limit: Math.max(limit, RRF_LIMIT) * VEC_OVERSAMPLE });
|
|
42
|
+
const best = /* @__PURE__ */ new Map();
|
|
43
|
+
for (const vec of results) {
|
|
44
|
+
const uri = scope.map(vec.path);
|
|
45
|
+
if (!uri) continue;
|
|
46
|
+
vec.score = vec.seq === 0 ? vec.score + DESC_BOOST * (1 - vec.score) : vec.score;
|
|
47
|
+
const existing = best.get(vec.doc_id)?.score ?? -Infinity;
|
|
48
|
+
if (vec.score > existing) best.set(vec.doc_id, Object.assign(vec, {
|
|
49
|
+
hiscore: 0,
|
|
50
|
+
uri
|
|
51
|
+
}));
|
|
52
|
+
}
|
|
53
|
+
const scores = new Map(best.values().map((vec) => [vec.uri, vec.score]));
|
|
54
|
+
const parentScores = /* @__PURE__ */ new Map();
|
|
55
|
+
const getParentScore = (uri) => {
|
|
56
|
+
const parent = parentUri(uri);
|
|
57
|
+
if (!parent) return 0;
|
|
58
|
+
let score = parentScores.get(parent);
|
|
59
|
+
if (score !== void 0) return score;
|
|
60
|
+
score = (scores.get(parent) ?? 0) * .5 + getParentScore(parent) * .5;
|
|
61
|
+
parentScores.set(parent, score);
|
|
62
|
+
return score;
|
|
63
|
+
};
|
|
64
|
+
for (const vec of best.values()) {
|
|
65
|
+
const parentScore = getParentScore(vec.uri);
|
|
66
|
+
vec.score += PARENT_BOOST * parentScore * (1 - vec.score);
|
|
67
|
+
}
|
|
68
|
+
let bestResults = [...best.values()].toSorted((a, b) => b.score - a.score);
|
|
69
|
+
bestResults = opts.slice === false ? bestResults : bestResults.slice(0, limit);
|
|
70
|
+
const docs = this.db.getDocs(bestResults.map((r) => r.doc_id));
|
|
71
|
+
const ret = [];
|
|
72
|
+
for (const vec of bestResults) {
|
|
73
|
+
const doc = docs.get(vec.doc_id);
|
|
74
|
+
if (doc) ret.push({
|
|
75
|
+
doc,
|
|
76
|
+
match: { vec },
|
|
77
|
+
path: vec.path,
|
|
78
|
+
scores: { vec: {
|
|
79
|
+
rank: 0,
|
|
80
|
+
score: vec.score
|
|
81
|
+
} },
|
|
82
|
+
uri: vec.uri
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
return this.rank("vec", ret);
|
|
86
|
+
}
|
|
87
|
+
async searchFts(query, opts = {}) {
|
|
88
|
+
const scope = (await this.ctx.vfs()).getScope(opts.uri);
|
|
89
|
+
const results = this.db.searchFts(toFts(query, opts.op ?? "OR"), {
|
|
90
|
+
limit: opts.limit ?? 20,
|
|
91
|
+
scope: scope.paths.map((p) => p.path)
|
|
92
|
+
});
|
|
93
|
+
const docs = this.db.getDocs(results.map((r) => r.rowid));
|
|
94
|
+
const ret = [];
|
|
95
|
+
for (const fts of results) {
|
|
96
|
+
fts.score = Math.abs(fts.score) / (1 + Math.abs(fts.score));
|
|
97
|
+
const doc = docs.get(fts.rowid);
|
|
98
|
+
const uri = scope.map(doc?.path ?? "");
|
|
99
|
+
if (doc && uri) ret.push({
|
|
100
|
+
doc,
|
|
101
|
+
match: { fts },
|
|
102
|
+
path: doc.path,
|
|
103
|
+
scores: { fts: {
|
|
104
|
+
rank: 0,
|
|
105
|
+
score: fts.score
|
|
106
|
+
} },
|
|
107
|
+
uri
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
return this.rank("fts", ret);
|
|
111
|
+
}
|
|
112
|
+
rank(mode, results) {
|
|
113
|
+
const score = (r) => r.scores[mode];
|
|
114
|
+
return results.toSorted((a, b) => score(b).score - score(a).score || (score(b).display_score ?? 0) - (score(a).display_score ?? 0)).map((r, i) => {
|
|
115
|
+
score(r).rank = i + 1;
|
|
116
|
+
return r;
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
/** Reciprocal Rank Fusion: merge FTS and vector results */
|
|
120
|
+
fuse(ftsResults, vecResults, limit) {
|
|
121
|
+
const merged = /* @__PURE__ */ new Map();
|
|
122
|
+
const minVecScore = vecResults.length ? vecResults[vecResults.length - 1]?.scores.vec.score : void 0;
|
|
123
|
+
const minFtsScore = ftsResults.length ? ftsResults[ftsResults.length - 1]?.scores.fts.score : void 0;
|
|
124
|
+
const minScore = Math.min(minVecScore ?? 1, minFtsScore ?? 1);
|
|
125
|
+
for (const fts of ftsResults) merged.set(fts.doc.id, {
|
|
126
|
+
fts,
|
|
127
|
+
uri: fts.uri
|
|
128
|
+
});
|
|
129
|
+
for (const vec of vecResults) merged.set(vec.doc.id, {
|
|
130
|
+
...merged.get(vec.doc.id),
|
|
131
|
+
uri: vec.uri,
|
|
132
|
+
vec
|
|
133
|
+
});
|
|
134
|
+
let ret = [...merged.values()].map(({ uri, fts, vec }) => {
|
|
135
|
+
const ftsScore = fts?.scores.fts;
|
|
136
|
+
const vecScore = vec?.scores.vec;
|
|
137
|
+
const score = (ftsScore?.rank !== void 0 ? 1 / (RRF_K + ftsScore.rank) : 0) + (vecScore?.rank !== void 0 ? 1 / (RRF_K + vecScore.rank) : 0);
|
|
138
|
+
const display_score = .6 * (vecScore?.score ?? minScore) + .4 * (ftsScore?.score ?? minScore);
|
|
139
|
+
const doc = fts?.doc ?? vec?.doc;
|
|
140
|
+
return {
|
|
141
|
+
doc,
|
|
142
|
+
match: {
|
|
143
|
+
...fts?.match,
|
|
144
|
+
...vec?.match
|
|
145
|
+
},
|
|
146
|
+
path: doc.path,
|
|
147
|
+
scores: {
|
|
148
|
+
...fts?.scores,
|
|
149
|
+
...vec?.scores,
|
|
150
|
+
hybrid: {
|
|
151
|
+
display_score,
|
|
152
|
+
rank: 0,
|
|
153
|
+
score
|
|
154
|
+
}
|
|
155
|
+
},
|
|
156
|
+
uri
|
|
157
|
+
};
|
|
158
|
+
});
|
|
159
|
+
ret = this.rank("hybrid", ret).slice(0, limit);
|
|
160
|
+
const bestScore = ret[0]?.scores.hybrid.score ?? 1;
|
|
161
|
+
for (const r of ret) r.scores.hybrid.score /= bestScore;
|
|
162
|
+
return ret;
|
|
163
|
+
}
|
|
164
|
+
};
|
|
165
|
+
//#endregion
|
|
166
|
+
export { Search };
|