@rekal/mem 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/dist/db-BMh1OP4b.mjs +294 -0
  2. package/dist/doc-DnYN4jAU.mjs +116 -0
  3. package/dist/embed-rUMZxqed.mjs +100 -0
  4. package/dist/fs-DMp26Byo.mjs +32 -0
  5. package/dist/glob.d.mts +27 -0
  6. package/dist/glob.mjs +132 -0
  7. package/dist/index.d.mts +1465 -0
  8. package/dist/index.mjs +351 -0
  9. package/dist/llama-CT3dc9Cn.mjs +75 -0
  10. package/dist/models-DFQSgBNr.mjs +77 -0
  11. package/dist/openai-j2_2GM4J.mjs +76 -0
  12. package/dist/progress-B1JdNapX.mjs +263 -0
  13. package/dist/query-VFSpErTB.mjs +125 -0
  14. package/dist/runtime.node-DlQPaGrV.mjs +35 -0
  15. package/dist/search-BllHWtZF.mjs +166 -0
  16. package/dist/store-DE7S35SS.mjs +137 -0
  17. package/dist/transformers-CJ3QA2PK.mjs +55 -0
  18. package/dist/uri-CehXVDGB.mjs +28 -0
  19. package/dist/util-DNyrmcA3.mjs +11 -0
  20. package/dist/vfs-CNQbkhsf.mjs +222 -0
  21. package/foo.ts +3 -0
  22. package/foo2.ts +20 -0
  23. package/package.json +61 -0
  24. package/src/context.ts +77 -0
  25. package/src/db.ts +464 -0
  26. package/src/doc.ts +163 -0
  27. package/src/embed/base.ts +122 -0
  28. package/src/embed/index.ts +67 -0
  29. package/src/embed/llama.ts +111 -0
  30. package/src/embed/models.ts +104 -0
  31. package/src/embed/openai.ts +95 -0
  32. package/src/embed/transformers.ts +81 -0
  33. package/src/frecency.ts +58 -0
  34. package/src/fs.ts +36 -0
  35. package/src/glob.ts +163 -0
  36. package/src/index.ts +15 -0
  37. package/src/log.ts +60 -0
  38. package/src/md.ts +204 -0
  39. package/src/progress.ts +121 -0
  40. package/src/query.ts +131 -0
  41. package/src/runtime.bun.ts +33 -0
  42. package/src/runtime.node.ts +47 -0
  43. package/src/search.ts +230 -0
  44. package/src/snippet.ts +248 -0
  45. package/src/sqlite.ts +1 -0
  46. package/src/store.ts +180 -0
  47. package/src/uri.ts +28 -0
  48. package/src/util.ts +21 -0
  49. package/src/vfs.ts +257 -0
  50. package/test/doc.test.ts +61 -0
  51. package/test/fixtures/ignore-test/keep.md +0 -0
  52. package/test/fixtures/ignore-test/skip.log +0 -0
  53. package/test/fixtures/ignore-test/sub/keep.md +0 -0
  54. package/test/fixtures/store/agent/index.md +9 -0
  55. package/test/fixtures/store/agent/lessons.md +21 -0
  56. package/test/fixtures/store/agent/soul.md +28 -0
  57. package/test/fixtures/store/agent/tools.md +25 -0
  58. package/test/fixtures/store/concepts/frecency.md +30 -0
  59. package/test/fixtures/store/concepts/index.md +9 -0
  60. package/test/fixtures/store/concepts/memory-coherence.md +33 -0
  61. package/test/fixtures/store/concepts/rag.md +27 -0
  62. package/test/fixtures/store/index.md +9 -0
  63. package/test/fixtures/store/projects/index.md +9 -0
  64. package/test/fixtures/store/projects/rekall-inc/architecture.md +41 -0
  65. package/test/fixtures/store/projects/rekall-inc/decisions/index.md +9 -0
  66. package/test/fixtures/store/projects/rekall-inc/decisions/no-military.md +20 -0
  67. package/test/fixtures/store/projects/rekall-inc/index.md +28 -0
  68. package/test/fixtures/store/user/family.md +13 -0
  69. package/test/fixtures/store/user/index.md +9 -0
  70. package/test/fixtures/store/user/preferences.md +29 -0
  71. package/test/fixtures/store/user/profile.md +29 -0
  72. package/test/fs.test.ts +15 -0
  73. package/test/glob.test.ts +190 -0
  74. package/test/md.test.ts +177 -0
  75. package/test/query.test.ts +105 -0
  76. package/test/uri.test.ts +46 -0
  77. package/test/util.test.ts +62 -0
  78. package/test/vfs.test.ts +164 -0
  79. package/tsconfig.json +3 -0
  80. package/tsdown.config.ts +8 -0
@@ -0,0 +1,294 @@
1
+ import { t as openDatabase } from "./runtime.node-DlQPaGrV.mjs";
2
+ //#region src/db.ts
3
+ const SEARCH_LIMIT = 20;
4
+ const STOPWORD_THRESHOLD = .3;
5
+ const STOPWORD_MIN_DOCS = 10;
6
+ const STOPWORD_LIMIT = 1e3;
7
+ function hasEmbedding(c) {
8
+ return Array.isArray(c.embedding);
9
+ }
10
+ function assertEmbeddings(chunks) {
11
+ for (const c of chunks) if (!hasEmbedding(c)) throw new Error(`Chunk is missing embedding: ${JSON.stringify(c)}`);
12
+ }
13
+ var Db = class Db {
14
+ #db;
15
+ #vec;
16
+ constructor(db) {
17
+ this.#db = db;
18
+ this.init();
19
+ }
20
+ static async load(dbPath) {
21
+ return new Db(await openDatabase(dbPath));
22
+ }
23
+ init() {
24
+ this.#db.run("PRAGMA journal_mode = WAL");
25
+ this.#db.run("PRAGMA foreign_keys = ON");
26
+ this.#db.run("PRAGMA busy_timeout = 5000");
27
+ this.#db.run(`
28
+ CREATE TABLE IF NOT EXISTS docs (
29
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
30
+ path TEXT NOT NULL UNIQUE,
31
+ hash TEXT NOT NULL,
32
+ vec_hash TEXT,
33
+ body TEXT NOT NULL DEFAULT '',
34
+ description TEXT NOT NULL DEFAULT '',
35
+ title TEXT NOT NULL DEFAULT '',
36
+ tags TEXT NOT NULL DEFAULT '',
37
+ entities TEXT NOT NULL DEFAULT '',
38
+ updated_at TEXT NOT NULL,
39
+ synced_at TEXT,
40
+ deadline REAL
41
+ )
42
+ `);
43
+ this.#db.run(`CREATE INDEX IF NOT EXISTS idx_docs_path ON docs(path)`);
44
+ this.#db.run(`CREATE INDEX IF NOT EXISTS idx_docs_hash ON docs(hash)`);
45
+ this.#db.run(`
46
+ CREATE VIRTUAL TABLE IF NOT EXISTS docs_fts USING fts5(
47
+ entities, tags, description, title, body,
48
+ content='docs',
49
+ content_rowid='id',
50
+ tokenize='porter unicode61'
51
+ )
52
+ `);
53
+ this.#db.run(`
54
+ CREATE TRIGGER IF NOT EXISTS docs_fts_insert AFTER INSERT ON docs BEGIN
55
+ INSERT INTO docs_fts(rowid, entities, tags, description, title, body)
56
+ VALUES (new.id, new.entities, new.tags, new.description, new.title, new.body);
57
+ END
58
+ `);
59
+ this.#db.run(`
60
+ CREATE TRIGGER IF NOT EXISTS docs_fts_delete AFTER DELETE ON docs BEGIN
61
+ INSERT INTO docs_fts(docs_fts, rowid, entities, tags, description, title, body)
62
+ VALUES ('delete', old.id, old.entities, old.tags, old.description, old.title, old.body);
63
+ END
64
+ `);
65
+ this.#db.run(`
66
+ CREATE TRIGGER IF NOT EXISTS docs_fts_update AFTER UPDATE ON docs
67
+ WHEN old.body != new.body
68
+ OR old.title != new.title
69
+ OR old.description != new.description
70
+ OR old.tags != new.tags
71
+ OR old.entities != new.entities
72
+ BEGIN
73
+ INSERT INTO docs_fts(docs_fts, rowid, entities, tags, description, title, body)
74
+ VALUES ('delete', old.id, old.entities, old.tags, old.description, old.title, old.body);
75
+ INSERT INTO docs_fts(rowid, entities, tags, description, title, body)
76
+ VALUES (new.id, new.entities, new.tags, new.description, new.title, new.body);
77
+ END
78
+ `);
79
+ this.#db.run(`CREATE VIRTUAL TABLE IF NOT EXISTS docs_vocab USING fts5vocab('docs_fts', 'row')`);
80
+ this.#db.run(`
81
+ CREATE TABLE IF NOT EXISTS meta (
82
+ key TEXT PRIMARY KEY,
83
+ value TEXT
84
+ )
85
+ `);
86
+ this.#db.run(`
87
+ CREATE TABLE IF NOT EXISTS cache (
88
+ key TEXT PRIMARY KEY,
89
+ value TEXT NOT NULL,
90
+ accessed_at TEXT NOT NULL
91
+ )
92
+ `);
93
+ }
94
+ reset() {
95
+ this.#db.run(`DROP TRIGGER IF EXISTS docs_fts_insert`);
96
+ this.#db.run(`DROP TRIGGER IF EXISTS docs_fts_delete`);
97
+ this.#db.run(`DROP TRIGGER IF EXISTS docs_fts_update`);
98
+ this.#db.run(`DROP TABLE IF EXISTS docs_fts`);
99
+ this.#db.run(`DROP TABLE IF EXISTS vec`);
100
+ this.#db.run(`DROP TABLE IF EXISTS cache`);
101
+ this.#db.run(`DROP TABLE IF EXISTS docs`);
102
+ this.#db.run(`DROP TABLE IF EXISTS meta`);
103
+ this.#db.run(`VACUUM`);
104
+ this.#vec = { exists: false };
105
+ this.init();
106
+ }
107
+ initVec(dims) {
108
+ if (this.vec.init) return;
109
+ const existingDims = this.vec.dims;
110
+ if (existingDims && existingDims !== dims) throw new Error(`Vector dimension mismatch: existing **vec** has \`${existingDims}\` dims, but got \`${dims}\`.\nRun \`rekal reset\` and \`rekal sync\` to recreate with the correct dimensions.`);
111
+ this.#db.run(`CREATE VIRTUAL TABLE IF NOT EXISTS vec USING vec0(
112
+ doc_id INTEGER NOT NULL,
113
+ seq INTEGER NOT NULL,
114
+ +path TEXT NOT NULL,
115
+ embedding float[${dims}] distance_metric=cosine
116
+ )`);
117
+ this.#vec = {
118
+ dims,
119
+ exists: true,
120
+ init: true
121
+ };
122
+ }
123
+ getDoc(from) {
124
+ const field = typeof from === "number" ? "id" : "path";
125
+ return this.#db.query(`SELECT * FROM docs WHERE ${field} = ?`).get(from);
126
+ }
127
+ getDocs(from) {
128
+ let ret;
129
+ if (!from) ret = this.#db.query(`SELECT * FROM docs`).all();
130
+ else {
131
+ const field = typeof from[0] === "number" ? "id" : "path";
132
+ const placeholders = from.map(() => "?").join(",");
133
+ ret = this.#db.query(`SELECT * FROM docs WHERE ${field} IN (${placeholders})`).all(...from);
134
+ }
135
+ return new Map(ret.map((row) => [row.id, row]));
136
+ }
137
+ addDoc(row) {
138
+ return this.#db.query(`INSERT INTO docs (path, hash, body, description, title, tags, entities, updated_at, synced_at)
139
+ VALUES($path, $hash, $body, $description, $title, $tags, $entities, $updated_at, $synced_at)
140
+ ON CONFLICT(path) DO UPDATE SET
141
+ hash = excluded.hash,
142
+ body = excluded.body,
143
+ description = excluded.description,
144
+ title = excluded.title,
145
+ tags = excluded.tags,
146
+ entities = excluded.entities,
147
+ updated_at = excluded.updated_at,
148
+ synced_at = excluded.synced_at
149
+ RETURNING id`).get(row).id;
150
+ }
151
+ deleteDoc(id, tables = {}) {
152
+ if (tables.vec) this.deleteEmbeddings(id);
153
+ if (tables.docs) this.#db.query(`DELETE FROM docs WHERE id = ?`).run(id);
154
+ }
155
+ get vec() {
156
+ if (this.#vec) return this.#vec;
157
+ const row = this.#db.query(`SELECT sql FROM sqlite_master WHERE type = 'table' AND name = 'vec'`).get();
158
+ const match = row?.sql.match(/embedding float\[(\d+)\]/);
159
+ this.#vec = {
160
+ dims: match ? parseInt(match[1]) : void 0,
161
+ exists: !!row?.sql
162
+ };
163
+ return this.#vec;
164
+ }
165
+ getStatus() {
166
+ const count = (sql) => this.#db.query(sql).get().n;
167
+ return {
168
+ cache: count(`SELECT count(*) as n FROM cache`),
169
+ dbSize: this.#db.query(`SELECT page_count * page_size as n FROM pragma_page_count, pragma_page_size`).get().n,
170
+ docs: count(`SELECT count(*) as n FROM docs`),
171
+ docsWithDescription: count(`SELECT count(*) as n FROM docs WHERE description != ''`),
172
+ lastSync: this.#db.query(`SELECT max(synced_at) as t FROM docs`).get().t,
173
+ unembedded: count(`SELECT count(*) as n FROM docs WHERE vec_hash IS NULL OR vec_hash != hash`),
174
+ vecDims: this.vec.dims,
175
+ vecs: this.vec.exists ? count(`SELECT count(*) as n FROM vec`) : 0,
176
+ vocabTerms: count(`SELECT count(DISTINCT term) as n FROM docs_vocab`)
177
+ };
178
+ }
179
+ transaction(fn) {
180
+ return this.#db.transaction(fn);
181
+ }
182
+ getUnembeddedDocs() {
183
+ return this.#db.query(`SELECT * FROM docs
184
+ WHERE vec_hash IS NULL OR vec_hash != hash
185
+ ORDER BY path`).all();
186
+ }
187
+ touchDoc(id) {
188
+ this.#db.query(`UPDATE docs SET synced_at = ? WHERE id = ?`).run((/* @__PURE__ */ new Date()).toISOString(), id);
189
+ }
190
+ markEmbedded(id, docHash) {
191
+ this.#db.query(`UPDATE docs SET vec_hash = ? WHERE id = ?`).run(docHash, id);
192
+ }
193
+ /** Delete docs not seen since the given sync timestamp, optionally scoped to a path prefix. */
194
+ deleteStaleDocs(syncedBefore, prefix) {
195
+ let query = `SELECT id FROM docs WHERE synced_at IS NULL OR synced_at < ?`;
196
+ const params = [syncedBefore];
197
+ if (prefix) {
198
+ query += ` AND path LIKE ? || '%'`;
199
+ params.push(prefix);
200
+ }
201
+ const stale = this.#db.query(query).all(...params);
202
+ for (const { id } of stale) this.deleteDoc(id, {
203
+ docs: true,
204
+ vec: true
205
+ });
206
+ return stale.length;
207
+ }
208
+ /** Scoped FTS search: only match docs whose path starts with one of the given prefixes */
209
+ searchFts(query, opts) {
210
+ if (opts?.scope?.length === 0) return [];
211
+ const scope = opts?.scope ?? [];
212
+ const scopeQuery = scope.length === 0 ? "" : `AND (${scope.map(() => `d.path LIKE ? || '%'`).join(" OR ")})`;
213
+ return this.#db.query(`SELECT f.rowid, bm25(docs_fts, 10, 8, 5, 3, 1) as score
214
+ FROM docs_fts f
215
+ ${scope.length > 0 ? "JOIN docs d ON d.id = f.rowid" : ""}
216
+ WHERE docs_fts MATCH ?
217
+ ${scopeQuery}
218
+ ORDER BY score
219
+ LIMIT ?`).all(query, ...scope, opts?.limit ?? SEARCH_LIMIT);
220
+ }
221
+ /** * Gets weights for high-frequency terms.
222
+ * Note: Truly common words will result in an IDF of 0 or less.
223
+ */
224
+ getStopWords() {
225
+ const totalDocs = this.#db.query("SELECT count(*) as n FROM docs").get()?.n ?? 0;
226
+ if (totalDocs === 0) return /* @__PURE__ */ new Map();
227
+ const rows = this.#db.query(`SELECT v.term, v.doc
228
+ FROM docs_vocab v
229
+ WHERE v.doc > ? AND v.doc > ?
230
+ ORDER BY v.doc DESC
231
+ LIMIT ?`).all(totalDocs * STOPWORD_THRESHOLD, STOPWORD_MIN_DOCS, STOPWORD_LIMIT);
232
+ return new Map(rows.map((r) => {
233
+ const idf = Math.log((totalDocs - r.doc + .5) / (r.doc + .5));
234
+ return [r.term, Math.max(0, idf)];
235
+ }));
236
+ }
237
+ getWeights(terms) {
238
+ if (terms.length === 0) return [];
239
+ const total = this.#db.query(`SELECT count(*) as n FROM docs`).get().n;
240
+ const placeholders = terms.map(() => "?").join(",");
241
+ const rows = this.#db.query(`SELECT term, doc FROM docs_vocab WHERE term IN (${placeholders})`).all(...terms);
242
+ const df = new Map(rows.map((r) => [r.term, r.doc]));
243
+ return terms.map((t) => Math.log((total - (df.get(t) ?? 0) + .5) / ((df.get(t) ?? 0) + .5)));
244
+ }
245
+ /** Insert embeddings into the vec table */
246
+ insertEmbeddings(chunks) {
247
+ assertEmbeddings(chunks);
248
+ if (chunks.length === 0) return;
249
+ this.initVec(chunks[0].embedding.length);
250
+ const stmt = this.#db.query(`INSERT INTO vec(doc_id, seq, path, embedding) VALUES (?, ?, ?, ?)`);
251
+ for (const chunk of chunks) stmt.run(chunk.doc_id, chunk.seq, chunk.doc.path, JSON.stringify(chunk.embedding));
252
+ }
253
+ /** Delete all vec entries for a doc */
254
+ deleteEmbeddings(docId) {
255
+ if (this.vec.exists) this.#db.query(`DELETE FROM vec WHERE doc_id = ?`).run(docId);
256
+ }
257
+ /** Global KNN search, returns top results across all docs */
258
+ searchVec(embedding, opts) {
259
+ if (!this.vec.exists) return [];
260
+ const limit = opts?.limit ?? SEARCH_LIMIT;
261
+ return this.#db.query(`SELECT doc_id, seq, path, distance, (1 - distance/2) as score
262
+ FROM vec
263
+ WHERE embedding MATCH ?
264
+ AND k = ?
265
+ ORDER BY distance`).all(JSON.stringify(embedding), limit);
266
+ }
267
+ setDeadline(docId, deadline) {
268
+ this.#db.query(`UPDATE docs SET deadline = ? WHERE id = ?`).run(deadline, docId);
269
+ }
270
+ getMeta(key) {
271
+ return this.#db.query(`SELECT value FROM meta WHERE key = ?`).get(key)?.value;
272
+ }
273
+ setMeta(key, value) {
274
+ this.#db.query(`INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = ?`).run(key, value, value);
275
+ }
276
+ cacheGet(key) {
277
+ const row = this.#db.query(`SELECT value FROM cache WHERE key = ?`).get(key);
278
+ if (!row) return;
279
+ this.#db.query(`UPDATE cache SET accessed_at = ? WHERE key = ?`).run((/* @__PURE__ */ new Date()).toISOString(), key);
280
+ return JSON.parse(row.value);
281
+ }
282
+ cacheSet(key, value) {
283
+ this.#db.query(`INSERT INTO cache (key, value, accessed_at) VALUES (?, ?, ?)
284
+ ON CONFLICT(key) DO UPDATE SET value = excluded.value, accessed_at = excluded.accessed_at`).run(key, JSON.stringify(value), (/* @__PURE__ */ new Date()).toISOString());
285
+ return value;
286
+ }
287
+ cachePrune(maxEntries = 1e4) {
288
+ this.#db.query(`DELETE FROM cache WHERE key NOT IN (
289
+ SELECT key FROM cache ORDER BY accessed_at DESC LIMIT ?
290
+ )`).run(maxEntries);
291
+ }
292
+ };
293
+ //#endregion
294
+ export { Db };
@@ -0,0 +1,116 @@
1
+ import { t as astat } from "./fs-DMp26Byo.mjs";
2
+ import { t as hash } from "./util-DNyrmcA3.mjs";
3
+ import { a as parseMarkdown } from "./progress-B1JdNapX.mjs";
4
+ import { r as normUri } from "./uri-CehXVDGB.mjs";
5
+ import { readFile } from "node:fs/promises";
6
+ import { basename, join, resolve } from "pathe";
7
+ //#region src/doc.ts
8
+ const INDEX = "index.md";
9
+ const MAX_DESC_LENGTH = 120;
10
+ var Doc = class Doc {
11
+ #isDir = false;
12
+ #name = "";
13
+ hash = "";
14
+ updated = /* @__PURE__ */ new Date(0);
15
+ headings = [];
16
+ parsed = {
17
+ body: "",
18
+ bodyOffset: 0,
19
+ frontmatter: {},
20
+ sections: [],
21
+ text: ""
22
+ };
23
+ constructor(uri, path) {
24
+ this.uri = uri;
25
+ this.path = path;
26
+ this.path = resolve(path);
27
+ this.#name = basename(this.path, ".md");
28
+ }
29
+ /** Full original markdown text, including frontmatter. */
30
+ get text() {
31
+ return this.parsed.text;
32
+ }
33
+ /** Markdown body without frontmatter. */
34
+ get body() {
35
+ return this.parsed.body;
36
+ }
37
+ /** Parsed frontmatter as an object. */
38
+ get fm() {
39
+ return this.parsed.frontmatter;
40
+ }
41
+ /** Name of the doc, derived from the file or folder name, without extension */
42
+ get name() {
43
+ return this.#name;
44
+ }
45
+ get $description() {
46
+ const ret = this.fm.description?.trim();
47
+ return ret?.length ? ret : void 0;
48
+ }
49
+ /** Desc from frontmatter or packed from headings. */
50
+ get description() {
51
+ const desc = this.$description;
52
+ if (desc || this.headings.length === 0) return desc;
53
+ const headings = this.headings.map((h) => ({ ...h }));
54
+ const minLevel = Math.min(...headings.map((h) => h.level));
55
+ const maxLevel = Math.max(...headings.map((h) => h.level));
56
+ let chars = 0;
57
+ for (let level = minLevel; level <= maxLevel; level++) for (const h of headings) {
58
+ if (h.level !== level) continue;
59
+ if (chars !== 0 && chars + h.text.length > MAX_DESC_LENGTH) continue;
60
+ h.used = true;
61
+ chars += h.text.length;
62
+ }
63
+ return headings.filter((h) => h.used).map((h) => h.text).join(", ").trim();
64
+ }
65
+ /** Title from fontmatter or first heading */
66
+ get $title() {
67
+ const title = this.fm.title;
68
+ if (typeof title === "string" && title.trim().length > 0) return title.trim();
69
+ return this.headings[0]?.text;
70
+ }
71
+ /** `$title` if it doesn't contain the name, otherwise `name - $title` */
72
+ get title() {
73
+ const title = this.$title;
74
+ if (!(title ?? "").length) return this.name;
75
+ return title?.toLowerCase().includes(this.name.toLowerCase()) ? title : `${this.name} - ${title}`;
76
+ }
77
+ get tags() {
78
+ return this.fm.tags ?? [];
79
+ }
80
+ get entities() {
81
+ return this.fm.entities ?? [];
82
+ }
83
+ get isDir() {
84
+ return this.#isDir;
85
+ }
86
+ async load() {
87
+ if (basename(this.path) === INDEX) throw new Error(`Doc path cannot end with \`${INDEX}\`:\n\`${this.path}\``);
88
+ let s = await astat(this.path);
89
+ let mdPath = this.path;
90
+ if (s?.isDirectory()) {
91
+ this.#isDir = true;
92
+ mdPath = join(this.path, INDEX);
93
+ s = await astat(mdPath);
94
+ }
95
+ if (!s && !this.#isDir) return;
96
+ this.uri = normUri(this.uri, this.isDir);
97
+ const text = (s ? await readFile(mdPath, "utf8") : "").replace(/\r\n/g, "\n");
98
+ this.updated = s?.mtime ?? /* @__PURE__ */ new Date(0);
99
+ this.hash = hash(text);
100
+ this.parsed = parseMarkdown(text);
101
+ this.headings = this.parsed.sections.filter((section) => section.level > 0 && section.heading.trim().length > 0).map((section) => ({
102
+ level: section.level,
103
+ text: section.heading
104
+ }));
105
+ return this;
106
+ }
107
+ static async load(uri, path) {
108
+ const e = typeof uri === "string" ? {
109
+ path,
110
+ uri
111
+ } : uri;
112
+ return e.path ? await new Doc(e.uri, e.path).load() : void 0;
113
+ }
114
+ };
115
+ //#endregion
116
+ export { Doc as t };
@@ -0,0 +1,100 @@
1
+ import { n as chunkMarkdown, t as Progress } from "./progress-B1JdNapX.mjs";
2
+ import { n as parseModelUri, r as resolveModel, t as loadModel } from "./models-DFQSgBNr.mjs";
3
+ import { availableParallelism } from "node:os";
4
+ //#region src/embed/base.ts
5
+ const defaults = {
6
+ batchSize: 0,
7
+ maxDims: 512,
8
+ maxTokens: 512,
9
+ useGpu: true
10
+ };
11
+ const backend_defaults = {
12
+ llama: {},
13
+ openai: { batchSize: 50 },
14
+ transformers: {}
15
+ };
16
+ function isChunk(input) {
17
+ return typeof input?.prompt === "string";
18
+ }
19
+ var Embedder = class {
20
+ #loading;
21
+ opts;
22
+ model;
23
+ status = new Progress("embedder");
24
+ #backend;
25
+ constructor(ctx) {
26
+ this.ctx = ctx;
27
+ const opts = ctx.opts.embedder ?? {};
28
+ this.model = resolveModel(opts.model);
29
+ const { backend } = parseModelUri(this.model.uri);
30
+ const base = {
31
+ ...defaults,
32
+ ...backend_defaults[backend]
33
+ };
34
+ this.opts = {
35
+ threads: Math.max(1, opts.threads ?? Math.min(8, availableParallelism() - 2)),
36
+ ...base,
37
+ ...opts,
38
+ model: this.model
39
+ };
40
+ }
41
+ info() {
42
+ return parseModelUri(this.model.uri);
43
+ }
44
+ async backend() {
45
+ this.#loading ??= (async () => {
46
+ this.status.name = `Loading model \`${this.model.uri}\``;
47
+ const t = setTimeout(() => {
48
+ this.opts.onProgress?.(this.status);
49
+ }, 500);
50
+ this.#backend = await loadModel({
51
+ logger: this.ctx,
52
+ opts: this.opts,
53
+ root: this.ctx.root,
54
+ status: this.status
55
+ });
56
+ clearTimeout(t);
57
+ this.opts.maxTokens = Math.min(this.opts.maxTokens, this.#backend.maxTokens);
58
+ this.opts.maxDims = Math.min(this.opts.maxDims, this.#backend.dims);
59
+ if (this.opts.batchSize === 0) this.opts.batchSize = this.#backend.device === "gpu" ? 50 : 1;
60
+ this.ctx.debug({
61
+ batchSize: this.opts.batchSize,
62
+ device: this.#backend.device,
63
+ threads: this.opts.threads,
64
+ useGpu: this.opts.useGpu
65
+ });
66
+ this.status.stop();
67
+ return this.#backend;
68
+ })();
69
+ return this.#backend ??= await this.#loading;
70
+ }
71
+ transform(input) {
72
+ if (isChunk(input)) return input.prompt;
73
+ const { prompt } = this.model;
74
+ return typeof input === "string" ? prompt.query(input) : prompt.document(input);
75
+ }
76
+ async embed(input) {
77
+ const single = !Array.isArray(input);
78
+ const todo = single ? [input] : input;
79
+ const ret = await (await this.backend()).embed(todo.map((item) => this.transform(item)));
80
+ return single ? ret[0] : ret;
81
+ }
82
+ async chunk(input) {
83
+ const backend = await this.backend();
84
+ const isQuery = typeof input === "string";
85
+ const fixed = this.transform(isQuery ? "" : {
86
+ text: "",
87
+ title: input.title
88
+ });
89
+ return chunkMarkdown(isQuery ? input : input.text, backend, this.opts.maxTokens - backend.toks(fixed)).map((text, seq) => ({
90
+ prompt: isQuery ? this.transform(text) : this.transform({
91
+ text,
92
+ title: input.title
93
+ }),
94
+ seq,
95
+ text
96
+ }));
97
+ }
98
+ };
99
+ //#endregion
100
+ export { Embedder };
@@ -0,0 +1,32 @@
1
+ import { existsSync, statSync } from "node:fs";
2
+ import { stat } from "node:fs/promises";
3
+ import { dirname, join, resolve } from "pathe";
4
+ import { homedir } from "node:os";
5
+ //#region src/fs.ts
6
+ function sstat(path) {
7
+ try {
8
+ return statSync(path);
9
+ } catch {}
10
+ }
11
+ async function astat(path) {
12
+ return await stat(path).catch(() => void 0);
13
+ }
14
+ function findUp(root, name, stop) {
15
+ let current = resolve(root);
16
+ while (true) {
17
+ const check = join(current, name);
18
+ if (sstat(check)?.isFile()) return check;
19
+ if (stop && existsSync(join(current, stop))) return;
20
+ const next = dirname(current);
21
+ if (next === current) break;
22
+ current = next;
23
+ }
24
+ }
25
+ function normPath(...paths) {
26
+ return resolve(...paths.map((p) => p.replace(/^~(?=\/|\\|$)/, homedir())));
27
+ }
28
+ function gitRoot(path) {
29
+ return findUp(path, ".git");
30
+ }
31
+ //#endregion
32
+ export { sstat as a, normPath as i, findUp as n, gitRoot as r, astat as t };
@@ -0,0 +1,27 @@
1
+ import { Dirent } from "node:fs";
2
+
3
+ //#region src/glob.d.ts
4
+ type GlobSort = (a: Dirent, b: Dirent) => number;
5
+ declare const sorters: {
6
+ name: (a: Dirent<string>, b: Dirent<string>) => number;
7
+ none: () => number;
8
+ type: (a: Dirent<string>, b: Dirent<string>) => number;
9
+ };
10
+ type GlobOptions = {
11
+ cwd: string | string[];
12
+ glob?: string | string[];
13
+ follow: boolean;
14
+ hidden: boolean;
15
+ ignore: boolean;
16
+ type?: "file" | "directory";
17
+ empty: boolean;
18
+ depth: number;
19
+ ignoreFiles: string[];
20
+ exclude: string[];
21
+ onVisit?: (rel: string) => void;
22
+ onError?: (path: string, error: Error) => void;
23
+ sort?: GlobSort | keyof typeof sorters;
24
+ };
25
+ declare function glob(opts?: Partial<GlobOptions>): AsyncGenerator<string>;
26
+ //#endregion
27
+ export { GlobOptions, GlobSort, glob };
package/dist/glob.mjs ADDED
@@ -0,0 +1,132 @@
1
+ import { a as sstat, i as normPath, n as findUp } from "./fs-DMp26Byo.mjs";
2
+ import { n as toError } from "./util-DNyrmcA3.mjs";
3
+ import { readFileSync } from "node:fs";
4
+ import { readdir } from "node:fs/promises";
5
+ import { join } from "pathe";
6
+ //#region src/glob.ts
7
+ const sorters = {
8
+ name: (a, b) => a.name.localeCompare(b.name),
9
+ none: () => 0,
10
+ type: (a, b) => {
11
+ if (a.isDirectory() && !b.isDirectory()) return -1;
12
+ if (!a.isDirectory() && b.isDirectory()) return 1;
13
+ return a.name.localeCompare(b.name);
14
+ }
15
+ };
16
+ const defaults = {
17
+ cwd: ".",
18
+ depth: Infinity,
19
+ empty: false,
20
+ exclude: [".git", "node_modules/"],
21
+ follow: false,
22
+ hidden: false,
23
+ ignore: true,
24
+ ignoreFiles: [".gitignore", ".ignore"],
25
+ sort: "name"
26
+ };
27
+ var IgnoreTree = class IgnoreTree {
28
+ parent;
29
+ constructor(ig, rel = "") {
30
+ this.ig = ig;
31
+ this.rel = rel;
32
+ }
33
+ extend(ig, rel) {
34
+ const child = new IgnoreTree(ig, rel);
35
+ child.parent = this;
36
+ return child;
37
+ }
38
+ ignores(rel) {
39
+ const test = this.ig.test(rel.slice(this.rel.length));
40
+ if (test.ignored) return true;
41
+ if (test.unignored) return false;
42
+ return this.parent?.ignores(rel) ?? false;
43
+ }
44
+ };
45
+ async function* glob(opts = {}) {
46
+ if (opts.depth && opts.depth < 1) return;
47
+ const { default: ignore } = await import("ignore");
48
+ const o = {
49
+ ...defaults,
50
+ ...opts
51
+ };
52
+ if (Array.isArray(o.cwd)) {
53
+ for (const cwd of o.cwd) yield* glob({
54
+ ...o,
55
+ cwd
56
+ });
57
+ return;
58
+ }
59
+ const root = normPath(o.cwd);
60
+ const ignoreFiles = new Set(o.ignoreFiles);
61
+ const rootIgnore = ignore().add([...o.exclude, ...ignoreFiles]);
62
+ const globIgnore = ignore().add(o.glob ?? []);
63
+ const sorter = (typeof o.sort === "string" ? sorters[o.sort] : o.sort) ?? sorters.name;
64
+ const visited = /* @__PURE__ */ new Set();
65
+ if (o.ignore) for (const igf of ignoreFiles) {
66
+ const igPath = findUp(root, igf, ".git");
67
+ if (igPath) rootIgnore.add(readFileSync(igPath, "utf8"));
68
+ }
69
+ async function ls(dir) {
70
+ if (visited.has(dir.path)) return;
71
+ visited.add(dir.path);
72
+ let entries;
73
+ try {
74
+ entries = (await readdir(dir.path, { withFileTypes: true })).toSorted(sorter).toReversed();
75
+ } catch (error) {
76
+ return o.onError?.(dir.path, toError(error));
77
+ }
78
+ let ig = dir.ignore;
79
+ const children = [];
80
+ for (const entry of entries) {
81
+ const path = join(entry.parentPath, entry.name);
82
+ if (o.ignore && entry.isFile() && ignoreFiles.has(entry.name)) {
83
+ const fig = ignore().add(readFileSync(path, "utf8"));
84
+ ig = ig ? ig.extend(fig, dir.rel) : new IgnoreTree(fig, dir.rel);
85
+ } else if (!o.hidden && entry.name.startsWith(".")) continue;
86
+ else {
87
+ let isDirectory = entry.isDirectory();
88
+ isDirectory ||= o.follow && entry.isSymbolicLink() && (sstat(path)?.isDirectory() ?? false);
89
+ const rel = dir.rel + entry.name + (isDirectory ? "/" : "");
90
+ const depth = dir.depth + 1;
91
+ children.push({
92
+ depth,
93
+ dir: isDirectory,
94
+ path,
95
+ rel
96
+ });
97
+ }
98
+ }
99
+ for (const child of children) {
100
+ o.onVisit?.(child.rel);
101
+ if (o.ignore && ig?.ignores(child.rel)) continue;
102
+ if (o.glob && !child.dir && !globIgnore.ignores(child.rel)) continue;
103
+ stack.push({
104
+ ...child,
105
+ ignore: ig
106
+ });
107
+ }
108
+ }
109
+ const stack = [{
110
+ depth: 0,
111
+ dir: true,
112
+ ignore: new IgnoreTree(rootIgnore),
113
+ path: root,
114
+ rel: ""
115
+ }];
116
+ const parents = [];
117
+ while (stack.length > 0) {
118
+ const entry = stack.pop();
119
+ if (o.type !== "file" && entry.depth !== 0) {
120
+ while (!o.empty && parents.length > 0 && parents[parents.length - 1].depth >= entry.depth) parents.pop();
121
+ if (entry.dir && entry.depth < o.depth) parents.push(entry);
122
+ else {
123
+ for (const p of parents) yield p.rel;
124
+ parents.length = 0;
125
+ if (o.type !== "directory") yield entry.rel;
126
+ }
127
+ } else if (!entry.dir) yield entry.rel;
128
+ if (entry.dir && entry.depth < o.depth) await ls(entry);
129
+ }
130
+ }
131
+ //#endregion
132
+ export { glob };