@rekal/mem 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/db-BMh1OP4b.mjs +294 -0
- package/dist/doc-DnYN4jAU.mjs +116 -0
- package/dist/embed-rUMZxqed.mjs +100 -0
- package/dist/fs-DMp26Byo.mjs +32 -0
- package/dist/glob.d.mts +27 -0
- package/dist/glob.mjs +132 -0
- package/dist/index.d.mts +1465 -0
- package/dist/index.mjs +351 -0
- package/dist/llama-CT3dc9Cn.mjs +75 -0
- package/dist/models-DFQSgBNr.mjs +77 -0
- package/dist/openai-j2_2GM4J.mjs +76 -0
- package/dist/progress-B1JdNapX.mjs +263 -0
- package/dist/query-VFSpErTB.mjs +125 -0
- package/dist/runtime.node-DlQPaGrV.mjs +35 -0
- package/dist/search-BllHWtZF.mjs +166 -0
- package/dist/store-DE7S35SS.mjs +137 -0
- package/dist/transformers-CJ3QA2PK.mjs +55 -0
- package/dist/uri-CehXVDGB.mjs +28 -0
- package/dist/util-DNyrmcA3.mjs +11 -0
- package/dist/vfs-CNQbkhsf.mjs +222 -0
- package/foo.ts +3 -0
- package/foo2.ts +20 -0
- package/package.json +61 -0
- package/src/context.ts +77 -0
- package/src/db.ts +464 -0
- package/src/doc.ts +163 -0
- package/src/embed/base.ts +122 -0
- package/src/embed/index.ts +67 -0
- package/src/embed/llama.ts +111 -0
- package/src/embed/models.ts +104 -0
- package/src/embed/openai.ts +95 -0
- package/src/embed/transformers.ts +81 -0
- package/src/frecency.ts +58 -0
- package/src/fs.ts +36 -0
- package/src/glob.ts +163 -0
- package/src/index.ts +15 -0
- package/src/log.ts +60 -0
- package/src/md.ts +204 -0
- package/src/progress.ts +121 -0
- package/src/query.ts +131 -0
- package/src/runtime.bun.ts +33 -0
- package/src/runtime.node.ts +47 -0
- package/src/search.ts +230 -0
- package/src/snippet.ts +248 -0
- package/src/sqlite.ts +1 -0
- package/src/store.ts +180 -0
- package/src/uri.ts +28 -0
- package/src/util.ts +21 -0
- package/src/vfs.ts +257 -0
- package/test/doc.test.ts +61 -0
- package/test/fixtures/ignore-test/keep.md +0 -0
- package/test/fixtures/ignore-test/skip.log +0 -0
- package/test/fixtures/ignore-test/sub/keep.md +0 -0
- package/test/fixtures/store/agent/index.md +9 -0
- package/test/fixtures/store/agent/lessons.md +21 -0
- package/test/fixtures/store/agent/soul.md +28 -0
- package/test/fixtures/store/agent/tools.md +25 -0
- package/test/fixtures/store/concepts/frecency.md +30 -0
- package/test/fixtures/store/concepts/index.md +9 -0
- package/test/fixtures/store/concepts/memory-coherence.md +33 -0
- package/test/fixtures/store/concepts/rag.md +27 -0
- package/test/fixtures/store/index.md +9 -0
- package/test/fixtures/store/projects/index.md +9 -0
- package/test/fixtures/store/projects/rekall-inc/architecture.md +41 -0
- package/test/fixtures/store/projects/rekall-inc/decisions/index.md +9 -0
- package/test/fixtures/store/projects/rekall-inc/decisions/no-military.md +20 -0
- package/test/fixtures/store/projects/rekall-inc/index.md +28 -0
- package/test/fixtures/store/user/family.md +13 -0
- package/test/fixtures/store/user/index.md +9 -0
- package/test/fixtures/store/user/preferences.md +29 -0
- package/test/fixtures/store/user/profile.md +29 -0
- package/test/fs.test.ts +15 -0
- package/test/glob.test.ts +190 -0
- package/test/md.test.ts +177 -0
- package/test/query.test.ts +105 -0
- package/test/uri.test.ts +46 -0
- package/test/util.test.ts +62 -0
- package/test/vfs.test.ts +164 -0
- package/tsconfig.json +3 -0
- package/tsdown.config.ts +8 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
import { a as sstat, i as normPath, n as findUp, r as gitRoot, t as astat } from "./fs-DMp26Byo.mjs";
|
|
2
|
+
import { n as parseYaml } from "./runtime.node-DlQPaGrV.mjs";
|
|
3
|
+
import { n as toError, t as hash } from "./util-DNyrmcA3.mjs";
|
|
4
|
+
import { a as parseMarkdown, i as parseFrontmatter, n as chunkMarkdown, o as parseSections, r as chunkText, t as Progress } from "./progress-B1JdNapX.mjs";
|
|
5
|
+
import { i as parentUri, n as assertUri, r as normUri, t as URI_PREFIX } from "./uri-CehXVDGB.mjs";
|
|
6
|
+
import { t as Doc } from "./doc-DnYN4jAU.mjs";
|
|
7
|
+
import { n as tokenize, t as toFts } from "./query-VFSpErTB.mjs";
|
|
8
|
+
import { mkdirSync } from "node:fs";
|
|
9
|
+
import { join } from "pathe";
|
|
10
|
+
import { EventEmitter } from "node:events";
|
|
11
|
+
//#region src/log.ts
|
|
12
|
+
const LOG_LEVELS = [
|
|
13
|
+
"cancel",
|
|
14
|
+
"info",
|
|
15
|
+
"success",
|
|
16
|
+
"warn",
|
|
17
|
+
"error",
|
|
18
|
+
"debug",
|
|
19
|
+
"fatal",
|
|
20
|
+
"prompt",
|
|
21
|
+
"log",
|
|
22
|
+
"trace"
|
|
23
|
+
];
|
|
24
|
+
const LOG_PRIORITY = {
|
|
25
|
+
trace: 0,
|
|
26
|
+
debug: 1,
|
|
27
|
+
log: 2,
|
|
28
|
+
info: 2,
|
|
29
|
+
prompt: 2,
|
|
30
|
+
success: 2,
|
|
31
|
+
cancel: 2,
|
|
32
|
+
warn: 3,
|
|
33
|
+
error: 4,
|
|
34
|
+
fatal: 5
|
|
35
|
+
};
|
|
36
|
+
function isLogLevel(level) {
|
|
37
|
+
return LOG_LEVELS.includes(level);
|
|
38
|
+
}
|
|
39
|
+
function shouldLog(level, minLevel) {
|
|
40
|
+
if (!isLogLevel(level)) return true;
|
|
41
|
+
return LOG_PRIORITY[level] >= LOG_PRIORITY[minLevel ?? "log"];
|
|
42
|
+
}
|
|
43
|
+
var LoggerBase = class {
|
|
44
|
+
cancel;
|
|
45
|
+
info;
|
|
46
|
+
success;
|
|
47
|
+
warn;
|
|
48
|
+
error;
|
|
49
|
+
debug;
|
|
50
|
+
fatal;
|
|
51
|
+
prompt;
|
|
52
|
+
log;
|
|
53
|
+
trace;
|
|
54
|
+
constructor() {
|
|
55
|
+
for (const level of LOG_LEVELS) this[level] = (...msg) => this._log(level, ...msg);
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
//#endregion
|
|
59
|
+
//#region src/context.ts
|
|
60
|
+
var Context = class extends LoggerBase {
|
|
61
|
+
#embedder;
|
|
62
|
+
#root;
|
|
63
|
+
#db;
|
|
64
|
+
#search;
|
|
65
|
+
#store;
|
|
66
|
+
#vfs;
|
|
67
|
+
events = new EventEmitter();
|
|
68
|
+
constructor(opts = {}) {
|
|
69
|
+
super();
|
|
70
|
+
this.opts = opts;
|
|
71
|
+
this.opts.embedder ??= {};
|
|
72
|
+
this.opts.embedder.onProgress ??= (progress) => this.events.emit("progress", progress);
|
|
73
|
+
this.#root = normPath(this.opts.root ?? "~/.rekal-data");
|
|
74
|
+
mkdirSync(this.#root, { recursive: true });
|
|
75
|
+
}
|
|
76
|
+
_log(level, ...msg) {
|
|
77
|
+
this.events.emit("log", level, ...msg);
|
|
78
|
+
}
|
|
79
|
+
get root() {
|
|
80
|
+
return this.#root;
|
|
81
|
+
}
|
|
82
|
+
async db() {
|
|
83
|
+
const { Db } = await import("./db-BMh1OP4b.mjs");
|
|
84
|
+
return this.#db ??= await Db.load(join(this.root, "index.sqlite3"));
|
|
85
|
+
}
|
|
86
|
+
async search() {
|
|
87
|
+
const { Search } = await import("./search-BllHWtZF.mjs");
|
|
88
|
+
return this.#search ??= await Search.load(this);
|
|
89
|
+
}
|
|
90
|
+
async store() {
|
|
91
|
+
const { Store } = await import("./store-DE7S35SS.mjs");
|
|
92
|
+
return this.#store ??= await Store.load(this);
|
|
93
|
+
}
|
|
94
|
+
async embedder() {
|
|
95
|
+
const { Embedder } = await import("./embed-rUMZxqed.mjs");
|
|
96
|
+
return this.#embedder ??= new Embedder(this);
|
|
97
|
+
}
|
|
98
|
+
async vfs() {
|
|
99
|
+
const { Vfs } = await import("./vfs-CNQbkhsf.mjs");
|
|
100
|
+
return this.#vfs ??= new Vfs(this);
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
//#endregion
|
|
104
|
+
//#region src/snippet.ts
|
|
105
|
+
const WORD_REGEX = /[\p{L}\p{N}]+/gu;
|
|
106
|
+
const SCORE_EXACT = 3;
|
|
107
|
+
const SCORE_LOWER = 2;
|
|
108
|
+
const SCORE_QUERY_PREFIX = 1.5;
|
|
109
|
+
const SCORE_DOC_PREFIX = 1;
|
|
110
|
+
const SCORE_OVERLAP = 1;
|
|
111
|
+
const MIN_PREFIX_LENGTH = 3;
|
|
112
|
+
const STOPWORD_MIN = 0;
|
|
113
|
+
const STOPWORD_MAX = 1;
|
|
114
|
+
const HEAT_SPREAD = 5;
|
|
115
|
+
const WEIGHT_EMPTY = .1;
|
|
116
|
+
const WEIGHT_NONWORD = .3;
|
|
117
|
+
const WEIGHT_REPETITION = .7;
|
|
118
|
+
const decayLinear = (d, r) => Math.max(0, 1 - d / r);
|
|
119
|
+
const STOP_WORDS = new Set([
|
|
120
|
+
"a",
|
|
121
|
+
"an",
|
|
122
|
+
"and",
|
|
123
|
+
"are",
|
|
124
|
+
"as",
|
|
125
|
+
"at",
|
|
126
|
+
"be",
|
|
127
|
+
"but",
|
|
128
|
+
"by",
|
|
129
|
+
"do",
|
|
130
|
+
"for",
|
|
131
|
+
"from",
|
|
132
|
+
"had",
|
|
133
|
+
"has",
|
|
134
|
+
"have",
|
|
135
|
+
"he",
|
|
136
|
+
"her",
|
|
137
|
+
"his",
|
|
138
|
+
"how",
|
|
139
|
+
"i",
|
|
140
|
+
"if",
|
|
141
|
+
"in",
|
|
142
|
+
"is",
|
|
143
|
+
"it",
|
|
144
|
+
"its",
|
|
145
|
+
"my",
|
|
146
|
+
"no",
|
|
147
|
+
"not",
|
|
148
|
+
"of",
|
|
149
|
+
"on",
|
|
150
|
+
"or",
|
|
151
|
+
"our",
|
|
152
|
+
"she",
|
|
153
|
+
"so",
|
|
154
|
+
"than",
|
|
155
|
+
"that",
|
|
156
|
+
"the",
|
|
157
|
+
"their",
|
|
158
|
+
"them",
|
|
159
|
+
"then",
|
|
160
|
+
"there",
|
|
161
|
+
"these",
|
|
162
|
+
"they",
|
|
163
|
+
"this",
|
|
164
|
+
"to",
|
|
165
|
+
"up",
|
|
166
|
+
"us",
|
|
167
|
+
"was",
|
|
168
|
+
"we",
|
|
169
|
+
"what",
|
|
170
|
+
"when",
|
|
171
|
+
"which",
|
|
172
|
+
"who",
|
|
173
|
+
"will",
|
|
174
|
+
"with",
|
|
175
|
+
"you",
|
|
176
|
+
"your"
|
|
177
|
+
]);
|
|
178
|
+
function isStopWord(word) {
|
|
179
|
+
return STOP_WORDS.has(word.toLowerCase());
|
|
180
|
+
}
|
|
181
|
+
var Snippet = class {
|
|
182
|
+
query = [];
|
|
183
|
+
prefixes = /* @__PURE__ */ new Set();
|
|
184
|
+
prefixRegex;
|
|
185
|
+
opts;
|
|
186
|
+
constructor(opts) {
|
|
187
|
+
this.opts = {
|
|
188
|
+
...opts,
|
|
189
|
+
lines: 5,
|
|
190
|
+
stopWords: /* @__PURE__ */ new Map()
|
|
191
|
+
};
|
|
192
|
+
const stopwords = new Map([...STOP_WORDS].map((w) => [w, 0]));
|
|
193
|
+
this.opts.stopWords.forEach((s, w) => stopwords.set(w, Math.max(STOPWORD_MIN, Math.min(STOPWORD_MAX, s))));
|
|
194
|
+
const tokens = this.tokenize(opts.query, false);
|
|
195
|
+
for (const tok of tokens) {
|
|
196
|
+
const score = stopwords.get(tok.lower) ?? 2;
|
|
197
|
+
if (score === 0) continue;
|
|
198
|
+
this.prefixes.add(tok.lower.slice(0, 2));
|
|
199
|
+
this.query.push({
|
|
200
|
+
...tok,
|
|
201
|
+
score
|
|
202
|
+
});
|
|
203
|
+
}
|
|
204
|
+
this.prefixRegex = this.prefixes.size > 0 ? new RegExp(`(${[...this.prefixes].join("|")})`, "i") : /(?!)/;
|
|
205
|
+
}
|
|
206
|
+
normalize(text) {
|
|
207
|
+
text = text.normalize("NFD").replace(/\p{M}/gu, "");
|
|
208
|
+
text = text.replace(/['’]s\b/gi, "");
|
|
209
|
+
text = text.replace(/['’]/g, " ");
|
|
210
|
+
return text;
|
|
211
|
+
}
|
|
212
|
+
tokenize(text, queryOnly = true) {
|
|
213
|
+
if (queryOnly && !this.prefixRegex.test(text)) return [];
|
|
214
|
+
const tokens = this.normalize(text).match(WORD_REGEX) ?? [];
|
|
215
|
+
const ret = [];
|
|
216
|
+
for (const token of tokens) {
|
|
217
|
+
const lower = token.toLowerCase();
|
|
218
|
+
ret.push({
|
|
219
|
+
lower,
|
|
220
|
+
text: token
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
return ret;
|
|
224
|
+
}
|
|
225
|
+
score(token, queryToken) {
|
|
226
|
+
const tl = token.lower.length;
|
|
227
|
+
if (token.text === queryToken.text) return SCORE_EXACT;
|
|
228
|
+
if (token.lower === queryToken.lower) return SCORE_LOWER;
|
|
229
|
+
if (token.lower.startsWith(queryToken.lower)) return SCORE_QUERY_PREFIX;
|
|
230
|
+
if (queryToken.lower.startsWith(token.lower) && tl >= MIN_PREFIX_LENGTH) return SCORE_DOC_PREFIX;
|
|
231
|
+
let prefix = 0;
|
|
232
|
+
for (let i = 0; i < token.lower.length; i++) {
|
|
233
|
+
if (token.lower[i] !== queryToken.lower[i]) break;
|
|
234
|
+
prefix++;
|
|
235
|
+
}
|
|
236
|
+
return prefix >= MIN_PREFIX_LENGTH ? SCORE_OVERLAP * (prefix / Math.max(token.lower.length, queryToken.lower.length)) : 0;
|
|
237
|
+
}
|
|
238
|
+
match(input) {
|
|
239
|
+
let token;
|
|
240
|
+
if (typeof input === "string") token = {
|
|
241
|
+
lower: this.normalize(input).toLowerCase(),
|
|
242
|
+
text: input
|
|
243
|
+
};
|
|
244
|
+
else token = input;
|
|
245
|
+
let [bestScore, bestTok] = [0, this.query[0]];
|
|
246
|
+
for (let t = 0; t < this.query.length; t++) {
|
|
247
|
+
const queryTok = this.query[t];
|
|
248
|
+
const s = this.score(token, queryTok);
|
|
249
|
+
if (s > bestScore) [bestScore, bestTok] = [s, queryTok];
|
|
250
|
+
}
|
|
251
|
+
if (bestScore > 0) return {
|
|
252
|
+
...bestTok,
|
|
253
|
+
score: bestTok.score * bestScore
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
scores(tokens) {
|
|
257
|
+
const coverage = tokens.map(() => /* @__PURE__ */ new Set());
|
|
258
|
+
return {
|
|
259
|
+
coverage,
|
|
260
|
+
scores: tokens.map((line, l) => {
|
|
261
|
+
let lineScore = 0;
|
|
262
|
+
for (const token of line) {
|
|
263
|
+
const queryTok = this.match(token);
|
|
264
|
+
if (!queryTok) continue;
|
|
265
|
+
let score = queryTok.score;
|
|
266
|
+
if (coverage[l].has(queryTok.lower)) score *= WEIGHT_REPETITION;
|
|
267
|
+
coverage[l].add(queryTok.lower);
|
|
268
|
+
lineScore += score;
|
|
269
|
+
}
|
|
270
|
+
return lineScore;
|
|
271
|
+
})
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
heat(lines, scores) {
|
|
275
|
+
const spread = Math.max(HEAT_SPREAD, Math.ceil(this.opts.lines / 2));
|
|
276
|
+
const heat = new Float64Array(scores.length);
|
|
277
|
+
for (let i = 0; i < scores.length; i++) {
|
|
278
|
+
if (scores[i] === 0) continue;
|
|
279
|
+
const spreadStart = Math.max(0, i - spread);
|
|
280
|
+
const spreadEnd = Math.min(scores.length - 1, i + spread);
|
|
281
|
+
for (let j = spreadStart; j <= spreadEnd; j++) {
|
|
282
|
+
let weight = 1;
|
|
283
|
+
if (!lines[j].trim()) weight *= WEIGHT_EMPTY;
|
|
284
|
+
else if (!lines[j].match(/\p{L}/u)) weight *= WEIGHT_NONWORD;
|
|
285
|
+
weight *= decayLinear(Math.abs(i - j), spread);
|
|
286
|
+
heat[j] += scores[i] * weight;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
return [...heat];
|
|
290
|
+
}
|
|
291
|
+
extract(text) {
|
|
292
|
+
const lines = text.split("\n");
|
|
293
|
+
const radius = Math.min(this.opts.lines, lines.length);
|
|
294
|
+
const tokens = lines.map((line) => this.tokenize(line));
|
|
295
|
+
const { scores, coverage } = this.scores(tokens);
|
|
296
|
+
const heat = this.heat(lines, scores);
|
|
297
|
+
const windows = [];
|
|
298
|
+
for (let i = 0; i <= scores.length - radius; i++) {
|
|
299
|
+
if (heat[i] === 0 && windows.length > 0) continue;
|
|
300
|
+
let heatSum = 0;
|
|
301
|
+
const covered = /* @__PURE__ */ new Set();
|
|
302
|
+
for (let j = i; j < i + radius; j++) {
|
|
303
|
+
heatSum += heat[j];
|
|
304
|
+
coverage[j].forEach((t) => covered.add(t));
|
|
305
|
+
}
|
|
306
|
+
const cov = this.query.length === 0 ? 1 : covered.size / this.query.length;
|
|
307
|
+
windows.push({
|
|
308
|
+
coverage: cov,
|
|
309
|
+
heat: heatSum,
|
|
310
|
+
score: heatSum * cov,
|
|
311
|
+
start: i
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
let best = windows[0] ?? {
|
|
315
|
+
coverage: 0,
|
|
316
|
+
heat: 0,
|
|
317
|
+
score: 0,
|
|
318
|
+
start: 0
|
|
319
|
+
};
|
|
320
|
+
for (let i = 1; i < windows.length; i++) if (windows[i].score > best.score) best = windows[i];
|
|
321
|
+
const snippet = lines.slice(best.start, best.start + radius);
|
|
322
|
+
return {
|
|
323
|
+
best,
|
|
324
|
+
heat: [...heat],
|
|
325
|
+
lines,
|
|
326
|
+
scores,
|
|
327
|
+
snippet,
|
|
328
|
+
tokens,
|
|
329
|
+
windows
|
|
330
|
+
};
|
|
331
|
+
}
|
|
332
|
+
debug(result) {
|
|
333
|
+
console.info("Options:", this.opts);
|
|
334
|
+
console.info("Query :", this.query);
|
|
335
|
+
const score = (n, f = 1) => n !== void 0 ? n.toFixed(f).padEnd(4) : " ".repeat(4);
|
|
336
|
+
result.lines.forEach((line, i) => {
|
|
337
|
+
const isBest = (l) => l >= result.best.start && l < result.best.start + this.opts.lines;
|
|
338
|
+
const lineScore = score(result.scores[i]);
|
|
339
|
+
const lineHeat = score(result.heat[i]);
|
|
340
|
+
const windowHeat = score(result.windows[i]?.heat ?? 0);
|
|
341
|
+
const windowScore = score(result.windows[i]?.score ?? 0);
|
|
342
|
+
const coverage = ((result.windows[i]?.coverage ?? 0) * 100).toFixed(0).padStart(3);
|
|
343
|
+
console.log(`s:${lineScore} h:${lineHeat} wh:${windowHeat} ws:${windowScore} ${coverage}% ${isBest(i) ? ">" : " "} ${line}`);
|
|
344
|
+
});
|
|
345
|
+
}
|
|
346
|
+
highlight(text, hl) {
|
|
347
|
+
return text.replace(WORD_REGEX, (word, offset) => this.match(word) ? hl(word, offset) : word);
|
|
348
|
+
}
|
|
349
|
+
};
|
|
350
|
+
//#endregion
|
|
351
|
+
export { Context, Doc, LOG_LEVELS, LoggerBase, Progress, Snippet, URI_PREFIX, WORD_REGEX, assertUri, astat, chunkMarkdown, chunkText, findUp, gitRoot, hash, isLogLevel, isStopWord, normPath, normUri, parentUri, parseFrontmatter, parseMarkdown, parseSections, parseYaml, shouldLog, sstat, toError, toFts, tokenize };
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { n as parseModelUri } from "./models-DFQSgBNr.mjs";
|
|
2
|
+
import { join } from "pathe";
|
|
3
|
+
import { availableParallelism } from "node:os";
|
|
4
|
+
import { LlamaLogLevel } from "node-llama-cpp";
|
|
5
|
+
//#region src/embed/llama.ts
|
|
6
|
+
var LlamaBackend = class LlamaBackend {
|
|
7
|
+
static llama;
|
|
8
|
+
device;
|
|
9
|
+
maxTokens;
|
|
10
|
+
dims;
|
|
11
|
+
#contexts = [];
|
|
12
|
+
#model;
|
|
13
|
+
#poolSize;
|
|
14
|
+
#threadsPerCtx;
|
|
15
|
+
#ctx;
|
|
16
|
+
constructor(llama, model, ctx) {
|
|
17
|
+
this.#model = model;
|
|
18
|
+
this.#ctx = ctx;
|
|
19
|
+
this.maxTokens = model.trainContextSize;
|
|
20
|
+
this.dims = model.embeddingVectorSize;
|
|
21
|
+
this.device = llama.gpu ? "gpu" : "cpu";
|
|
22
|
+
const useGpu = llama.gpu && ctx.opts.useGpu;
|
|
23
|
+
const cores = availableParallelism();
|
|
24
|
+
const poolSize = useGpu ? Math.min(8, Math.max(1, Math.floor(cores / 4))) : Math.min(8, Math.max(1, Math.floor(cores / 4)));
|
|
25
|
+
this.#threadsPerCtx = useGpu ? 0 : Math.max(1, Math.floor(ctx.opts.threads / poolSize));
|
|
26
|
+
this.#poolSize = poolSize;
|
|
27
|
+
ctx.logger.debug({
|
|
28
|
+
cores,
|
|
29
|
+
gpu: llama.gpu,
|
|
30
|
+
poolSize,
|
|
31
|
+
threadsPerCtx: this.#threadsPerCtx
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
static async load(ctx) {
|
|
35
|
+
const { model, variant } = parseModelUri(ctx.opts.model.uri);
|
|
36
|
+
const { getLlama, resolveModelFile } = await import("node-llama-cpp");
|
|
37
|
+
const modelsDir = join(ctx.root, "models");
|
|
38
|
+
const modelPath = await resolveModelFile(`hf:${model}${variant ? `:${variant}` : ""}`, modelsDir);
|
|
39
|
+
LlamaBackend.llama ??= await getLlama({
|
|
40
|
+
gpu: ctx.opts.useGpu ? "auto" : false,
|
|
41
|
+
logLevel: LlamaLogLevel.error,
|
|
42
|
+
logger: (level, message) => {
|
|
43
|
+
const l = level;
|
|
44
|
+
(ctx.logger[l] ?? ctx.logger.log)(level.toString(), message);
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
const lm = await LlamaBackend.llama.loadModel({ modelPath });
|
|
48
|
+
return new LlamaBackend(LlamaBackend.llama, lm, ctx);
|
|
49
|
+
}
|
|
50
|
+
/** Get or create up to `count` embedding contexts */
|
|
51
|
+
async acquire(count) {
|
|
52
|
+
const needed = Math.min(count, this.#poolSize) - this.#contexts.length;
|
|
53
|
+
for (let i = 0; i < needed; i++) try {
|
|
54
|
+
this.#ctx.logger.debug(`Creating embedding context ${this.#contexts.length + 1}/${this.#poolSize}...`);
|
|
55
|
+
this.#contexts.push(await this.#model.createEmbeddingContext({
|
|
56
|
+
contextSize: this.#ctx.opts.maxTokens,
|
|
57
|
+
threads: this.#threadsPerCtx
|
|
58
|
+
}));
|
|
59
|
+
} catch {
|
|
60
|
+
this.#ctx.logger.warn(`Failed to create embedding context ${this.#contexts.length + 1}. Adjusting pool size down to ${this.#contexts.length}.`);
|
|
61
|
+
this.#poolSize = this.#contexts.length;
|
|
62
|
+
break;
|
|
63
|
+
}
|
|
64
|
+
return this.#contexts;
|
|
65
|
+
}
|
|
66
|
+
async embed(texts) {
|
|
67
|
+
const contexts = await this.acquire(texts.length);
|
|
68
|
+
return Promise.all(texts.map((text, idx) => contexts[idx % contexts.length].getEmbeddingFor(text).then((embedding) => [...embedding.vector])));
|
|
69
|
+
}
|
|
70
|
+
toks(input) {
|
|
71
|
+
return this.#model.tokenize(input).length;
|
|
72
|
+
}
|
|
73
|
+
};
|
|
74
|
+
//#endregion
|
|
75
|
+
export { LlamaBackend };
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import { defu } from "defu";
|
|
2
|
+
//#region src/embed/models.ts
|
|
3
|
+
function parseModelUri(uri) {
|
|
4
|
+
const [backend, model, variant] = uri.split(":");
|
|
5
|
+
if (!backend || !model) throw new Error(`Invalid model URI: ${uri}. Expected format "provider:model[:variant]"`);
|
|
6
|
+
return {
|
|
7
|
+
backend,
|
|
8
|
+
model,
|
|
9
|
+
variant
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
function resolveModel(opts) {
|
|
13
|
+
const uri = typeof opts === "string" ? opts : opts?.uri ?? DEFAULTS.uri;
|
|
14
|
+
const options = typeof opts === "string" ? { uri } : opts ?? { uri };
|
|
15
|
+
const base = MODELS[options.base ?? MODELS[uri]?.base ?? ""] ?? {};
|
|
16
|
+
return defu(options, MODELS[uri] ?? {}, base, DEFAULTS);
|
|
17
|
+
}
|
|
18
|
+
function loadModel(ctx) {
|
|
19
|
+
const { backend } = parseModelUri(ctx.opts.model.uri);
|
|
20
|
+
if (backend === "transformers") return import("./transformers-CJ3QA2PK.mjs").then(({ TransformersBackend }) => TransformersBackend.load(ctx));
|
|
21
|
+
else if (backend === "llama") return import("./llama-CT3dc9Cn.mjs").then(({ LlamaBackend }) => LlamaBackend.load(ctx));
|
|
22
|
+
else if (backend === "openai") return import("./openai-j2_2GM4J.mjs").then(({ OpenAIBackend }) => OpenAIBackend.load(ctx));
|
|
23
|
+
else throw new Error(`Unsupported model backend: ${String(backend)}`);
|
|
24
|
+
}
|
|
25
|
+
const DEFAULTS = {
|
|
26
|
+
prompt: {
|
|
27
|
+
document: (doc) => {
|
|
28
|
+
const title = doc.title?.trim();
|
|
29
|
+
if (title?.length) {
|
|
30
|
+
const idx = doc.text.indexOf(title);
|
|
31
|
+
if (idx === -1 || idx > 10) return `${title}\n\n${doc.text}`;
|
|
32
|
+
}
|
|
33
|
+
return doc.text;
|
|
34
|
+
},
|
|
35
|
+
query: (query) => query
|
|
36
|
+
},
|
|
37
|
+
uri: "transformers:Snowflake/snowflake-arctic-embed-s:q8"
|
|
38
|
+
};
|
|
39
|
+
const MODELS = {
|
|
40
|
+
BAAI: {
|
|
41
|
+
models: [
|
|
42
|
+
"transformers:BAAI/bge-large-en-v1.5",
|
|
43
|
+
"transformers:BAAI/bge-base-en-v1.5",
|
|
44
|
+
"transformers:BAAI/bge-small-en-v1.5"
|
|
45
|
+
],
|
|
46
|
+
pooling: "cls",
|
|
47
|
+
prompt: { query: (query) => `Represent this sentence for searching relevant passages: ${query}` }
|
|
48
|
+
},
|
|
49
|
+
openai: { models: [
|
|
50
|
+
"openai:text-embedding-3-small",
|
|
51
|
+
"openai:text-embedding-3-large",
|
|
52
|
+
"openai:text-embedding-ada-002"
|
|
53
|
+
] },
|
|
54
|
+
snowflake: {
|
|
55
|
+
models: [
|
|
56
|
+
"transformers:Snowflake/snowflake-arctic-embed-m-v2.0:q8",
|
|
57
|
+
"transformers:Snowflake/snowflake-arctic-embed-s:q8",
|
|
58
|
+
"transformers:Snowflake/snowflake-arctic-embed-xs:q8",
|
|
59
|
+
"llama:mradermacher/snowflake-arctic-embed-s-GGUF:Q4_K_M"
|
|
60
|
+
],
|
|
61
|
+
pooling: "cls",
|
|
62
|
+
prompt: { query: (query) => `Represent this sentence for searching relevant passages: ${query}` }
|
|
63
|
+
},
|
|
64
|
+
"transformers:onnx-community/embeddinggemma-300m-ONNX:q8": {
|
|
65
|
+
pooling: "mean",
|
|
66
|
+
prompt: {
|
|
67
|
+
document: (doc) => `title: ${doc.title ?? "none"} | ${doc.text}`,
|
|
68
|
+
query: (query) => `task: search result | query: ${query}`
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
};
|
|
72
|
+
for (const [base, options] of Object.entries(MODELS)) for (const model of options.models ?? []) MODELS[model] = {
|
|
73
|
+
base,
|
|
74
|
+
uri: model
|
|
75
|
+
};
|
|
76
|
+
//#endregion
|
|
77
|
+
export { parseModelUri as n, resolveModel as r, loadModel as t };
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { n as parseModelUri } from "./models-DFQSgBNr.mjs";
|
|
2
|
+
//#region src/embed/openai.ts
|
|
3
|
+
const OPENAI_EMBEDDING_URL = "https://api.openai.com/v1/embeddings";
|
|
4
|
+
const MODEL_INFO = {
|
|
5
|
+
"text-embedding-3-large": {
|
|
6
|
+
contextSize: 8191,
|
|
7
|
+
vectorSize: 3072
|
|
8
|
+
},
|
|
9
|
+
"text-embedding-3-small": {
|
|
10
|
+
contextSize: 8191,
|
|
11
|
+
vectorSize: 1536
|
|
12
|
+
},
|
|
13
|
+
"text-embedding-ada-002": {
|
|
14
|
+
contextSize: 8191,
|
|
15
|
+
vectorSize: 1536
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
var OpenAIBackend = class OpenAIBackend {
|
|
19
|
+
device = "api";
|
|
20
|
+
maxTokens;
|
|
21
|
+
dims;
|
|
22
|
+
#model;
|
|
23
|
+
#apiKey;
|
|
24
|
+
#tokenizer;
|
|
25
|
+
#ctx;
|
|
26
|
+
constructor(model, apiKey, info, tokenizer, ctx) {
|
|
27
|
+
this.#model = model;
|
|
28
|
+
this.#apiKey = apiKey;
|
|
29
|
+
this.maxTokens = info.contextSize;
|
|
30
|
+
this.dims = info.vectorSize;
|
|
31
|
+
this.#tokenizer = tokenizer;
|
|
32
|
+
this.#ctx = ctx;
|
|
33
|
+
}
|
|
34
|
+
static async load(ctx) {
|
|
35
|
+
const { model } = parseModelUri(ctx.opts.model.uri);
|
|
36
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
37
|
+
if (!apiKey) throw new Error("Missing `OPENAI_API_KEY` environment variable for OpenAI embeddings.");
|
|
38
|
+
const { encode } = await import(`gpt-tokenizer/model/${model}`);
|
|
39
|
+
let info = MODEL_INFO[model];
|
|
40
|
+
if (!info) {
|
|
41
|
+
ctx.status.status = "probing model dimensions...";
|
|
42
|
+
info = {
|
|
43
|
+
contextSize: 8191,
|
|
44
|
+
vectorSize: (await new OpenAIBackend(model, apiKey, {
|
|
45
|
+
contextSize: 8191,
|
|
46
|
+
vectorSize: 0
|
|
47
|
+
}, encode, ctx).embed(["test"]))[0].length
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
return new OpenAIBackend(model, apiKey, info, encode, ctx);
|
|
51
|
+
}
|
|
52
|
+
async embed(texts) {
|
|
53
|
+
const response = await fetch(OPENAI_EMBEDDING_URL, {
|
|
54
|
+
body: JSON.stringify({
|
|
55
|
+
dimensions: this.#ctx.opts.maxDims,
|
|
56
|
+
input: texts,
|
|
57
|
+
model: this.#model
|
|
58
|
+
}),
|
|
59
|
+
headers: {
|
|
60
|
+
Authorization: `Bearer ${this.#apiKey}`,
|
|
61
|
+
"Content-Type": "application/json"
|
|
62
|
+
},
|
|
63
|
+
method: "POST"
|
|
64
|
+
});
|
|
65
|
+
if (!response.ok) {
|
|
66
|
+
const error = await response.text();
|
|
67
|
+
throw new Error(`OpenAI embedding API error (${response.status}): ${error}`);
|
|
68
|
+
}
|
|
69
|
+
return (await response.json()).data.toSorted((a, b) => a.index - b.index).map((d) => d.embedding);
|
|
70
|
+
}
|
|
71
|
+
toks(input) {
|
|
72
|
+
return this.#tokenizer(input).length;
|
|
73
|
+
}
|
|
74
|
+
};
|
|
75
|
+
//#endregion
|
|
76
|
+
export { OpenAIBackend };
|