@rekal/mem 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/db-BMh1OP4b.mjs +294 -0
- package/dist/doc-DnYN4jAU.mjs +116 -0
- package/dist/embed-rUMZxqed.mjs +100 -0
- package/dist/fs-DMp26Byo.mjs +32 -0
- package/dist/glob.d.mts +27 -0
- package/dist/glob.mjs +132 -0
- package/dist/index.d.mts +1465 -0
- package/dist/index.mjs +351 -0
- package/dist/llama-CT3dc9Cn.mjs +75 -0
- package/dist/models-DFQSgBNr.mjs +77 -0
- package/dist/openai-j2_2GM4J.mjs +76 -0
- package/dist/progress-B1JdNapX.mjs +263 -0
- package/dist/query-VFSpErTB.mjs +125 -0
- package/dist/runtime.node-DlQPaGrV.mjs +35 -0
- package/dist/search-BllHWtZF.mjs +166 -0
- package/dist/store-DE7S35SS.mjs +137 -0
- package/dist/transformers-CJ3QA2PK.mjs +55 -0
- package/dist/uri-CehXVDGB.mjs +28 -0
- package/dist/util-DNyrmcA3.mjs +11 -0
- package/dist/vfs-CNQbkhsf.mjs +222 -0
- package/foo.ts +3 -0
- package/foo2.ts +20 -0
- package/package.json +61 -0
- package/src/context.ts +77 -0
- package/src/db.ts +464 -0
- package/src/doc.ts +163 -0
- package/src/embed/base.ts +122 -0
- package/src/embed/index.ts +67 -0
- package/src/embed/llama.ts +111 -0
- package/src/embed/models.ts +104 -0
- package/src/embed/openai.ts +95 -0
- package/src/embed/transformers.ts +81 -0
- package/src/frecency.ts +58 -0
- package/src/fs.ts +36 -0
- package/src/glob.ts +163 -0
- package/src/index.ts +15 -0
- package/src/log.ts +60 -0
- package/src/md.ts +204 -0
- package/src/progress.ts +121 -0
- package/src/query.ts +131 -0
- package/src/runtime.bun.ts +33 -0
- package/src/runtime.node.ts +47 -0
- package/src/search.ts +230 -0
- package/src/snippet.ts +248 -0
- package/src/sqlite.ts +1 -0
- package/src/store.ts +180 -0
- package/src/uri.ts +28 -0
- package/src/util.ts +21 -0
- package/src/vfs.ts +257 -0
- package/test/doc.test.ts +61 -0
- package/test/fixtures/ignore-test/keep.md +0 -0
- package/test/fixtures/ignore-test/skip.log +0 -0
- package/test/fixtures/ignore-test/sub/keep.md +0 -0
- package/test/fixtures/store/agent/index.md +9 -0
- package/test/fixtures/store/agent/lessons.md +21 -0
- package/test/fixtures/store/agent/soul.md +28 -0
- package/test/fixtures/store/agent/tools.md +25 -0
- package/test/fixtures/store/concepts/frecency.md +30 -0
- package/test/fixtures/store/concepts/index.md +9 -0
- package/test/fixtures/store/concepts/memory-coherence.md +33 -0
- package/test/fixtures/store/concepts/rag.md +27 -0
- package/test/fixtures/store/index.md +9 -0
- package/test/fixtures/store/projects/index.md +9 -0
- package/test/fixtures/store/projects/rekall-inc/architecture.md +41 -0
- package/test/fixtures/store/projects/rekall-inc/decisions/index.md +9 -0
- package/test/fixtures/store/projects/rekall-inc/decisions/no-military.md +20 -0
- package/test/fixtures/store/projects/rekall-inc/index.md +28 -0
- package/test/fixtures/store/user/family.md +13 -0
- package/test/fixtures/store/user/index.md +9 -0
- package/test/fixtures/store/user/preferences.md +29 -0
- package/test/fixtures/store/user/profile.md +29 -0
- package/test/fs.test.ts +15 -0
- package/test/glob.test.ts +190 -0
- package/test/md.test.ts +177 -0
- package/test/query.test.ts +105 -0
- package/test/uri.test.ts +46 -0
- package/test/util.test.ts +62 -0
- package/test/vfs.test.ts +164 -0
- package/tsconfig.json +3 -0
- package/tsdown.config.ts +8 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import { t as Progress } from "./progress-B1JdNapX.mjs";
|
|
2
|
+
import { t as Doc } from "./doc-DnYN4jAU.mjs";
|
|
3
|
+
import { performance } from "node:perf_hooks";
|
|
4
|
+
//#region src/store.ts
|
|
5
|
+
var Store = class Store {
|
|
6
|
+
constructor(db, ctx) {
|
|
7
|
+
this.db = db;
|
|
8
|
+
this.ctx = ctx;
|
|
9
|
+
}
|
|
10
|
+
static async load(ctx) {
|
|
11
|
+
return new Store(await ctx.db(), ctx);
|
|
12
|
+
}
|
|
13
|
+
add(doc) {
|
|
14
|
+
const row = this.db.getDoc(doc.path);
|
|
15
|
+
if (row?.hash === doc.hash) {
|
|
16
|
+
this.db.touchDoc(row.id);
|
|
17
|
+
return row.id;
|
|
18
|
+
}
|
|
19
|
+
if (row) this.db.deleteDoc(row.id, { vec: true });
|
|
20
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
21
|
+
return this.db.addDoc({
|
|
22
|
+
body: doc.body,
|
|
23
|
+
description: doc.$description ?? "",
|
|
24
|
+
entities: doc.entities.join(","),
|
|
25
|
+
hash: doc.hash,
|
|
26
|
+
path: doc.path,
|
|
27
|
+
synced_at: now,
|
|
28
|
+
tags: doc.tags.join(","),
|
|
29
|
+
title: doc.title,
|
|
30
|
+
updated_at: now
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
async chunk(id, doc) {
|
|
34
|
+
const title = doc.title.trim();
|
|
35
|
+
const description = (doc.description ?? "").trim();
|
|
36
|
+
const body = doc.body.trim();
|
|
37
|
+
const chunks = [];
|
|
38
|
+
const embedder = await this.ctx.embedder();
|
|
39
|
+
if (description.length > 0) chunks.push(...await embedder.chunk({
|
|
40
|
+
text: description,
|
|
41
|
+
title
|
|
42
|
+
}));
|
|
43
|
+
if (body.length > 0) {
|
|
44
|
+
const offset = Math.max(1, chunks.length);
|
|
45
|
+
const bodyChunks = await embedder.chunk({
|
|
46
|
+
text: body,
|
|
47
|
+
title
|
|
48
|
+
});
|
|
49
|
+
chunks.push(...bodyChunks.map((c) => Object.assign(c, { seq: c.seq + offset })));
|
|
50
|
+
}
|
|
51
|
+
return chunks.map((chunk) => Object.assign(chunk, {
|
|
52
|
+
doc,
|
|
53
|
+
doc_id: id
|
|
54
|
+
}));
|
|
55
|
+
}
|
|
56
|
+
async index() {
|
|
57
|
+
const docs = [];
|
|
58
|
+
const nodes = /* @__PURE__ */ new Map();
|
|
59
|
+
const vfs = await this.ctx.vfs();
|
|
60
|
+
for await (const entry of vfs.find()) docs.push(Doc.load(entry));
|
|
61
|
+
const loaded = await Promise.all(docs);
|
|
62
|
+
this.db.transaction(() => {
|
|
63
|
+
for (const doc of loaded) if (doc) nodes.set(this.add(doc), doc);
|
|
64
|
+
})();
|
|
65
|
+
this.ctx.success(`Indexed ${nodes.size} docs from disk`);
|
|
66
|
+
return nodes;
|
|
67
|
+
}
|
|
68
|
+
async embed(docs) {
|
|
69
|
+
const todo = this.db.getUnembeddedDocs();
|
|
70
|
+
if (todo.length === 0) {
|
|
71
|
+
this.ctx.success("All docs are already embedded");
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
this.ctx.info(`Sync found ${todo.length} unembedded docs`);
|
|
75
|
+
let doneBytes = 0;
|
|
76
|
+
let doneDocs = 0;
|
|
77
|
+
const queue = [];
|
|
78
|
+
const embedder = await this.ctx.embedder();
|
|
79
|
+
await embedder.backend();
|
|
80
|
+
const start = performance.now();
|
|
81
|
+
const progress = new Progress("Embedding", { max: todo.length });
|
|
82
|
+
this.ctx.events.emit("progress", progress);
|
|
83
|
+
const updateProgress = () => {
|
|
84
|
+
const secs = (performance.now() - start) / 1e3;
|
|
85
|
+
const kbPerSec = (doneBytes / secs / 1024).toFixed(0);
|
|
86
|
+
progress.set({
|
|
87
|
+
status: `${progress.max - todo.length}/${progress.max} docs embedded ${kbPerSec}kb/s...`,
|
|
88
|
+
value: doneDocs
|
|
89
|
+
});
|
|
90
|
+
};
|
|
91
|
+
const markEmbedded = (id) => {
|
|
92
|
+
doneDocs++;
|
|
93
|
+
const hash = docs.get(id)?.hash;
|
|
94
|
+
if (hash) this.db.markEmbedded(id, hash);
|
|
95
|
+
};
|
|
96
|
+
const embed = async (flush) => {
|
|
97
|
+
while (queue.length >= (flush ? 1 : embedder.opts.batchSize)) {
|
|
98
|
+
const batch = queue.splice(0, embedder.opts.batchSize);
|
|
99
|
+
const embeddings = await embedder.embed(batch);
|
|
100
|
+
doneBytes += batch.reduce((sum, c) => sum + c.prompt.length, 0);
|
|
101
|
+
batch.forEach((chunk, i) => chunk.embedding = embeddings[i]);
|
|
102
|
+
this.db.insertEmbeddings(batch);
|
|
103
|
+
updateProgress();
|
|
104
|
+
const completed = new Set(batch.map((c) => c.doc_id));
|
|
105
|
+
for (const c of queue) completed.delete(c.doc_id);
|
|
106
|
+
completed.forEach((id) => markEmbedded(id));
|
|
107
|
+
}
|
|
108
|
+
};
|
|
109
|
+
while (todo.length > 0) {
|
|
110
|
+
const { id } = todo.pop();
|
|
111
|
+
const doc = docs.get(id);
|
|
112
|
+
if (!doc) continue;
|
|
113
|
+
this.db.deleteEmbeddings(id);
|
|
114
|
+
const chunks = await this.chunk(id, doc);
|
|
115
|
+
queue.push(...chunks);
|
|
116
|
+
if (chunks.length === 0) markEmbedded(id);
|
|
117
|
+
await embed();
|
|
118
|
+
}
|
|
119
|
+
await embed(true);
|
|
120
|
+
progress.stop();
|
|
121
|
+
this.ctx.success("Sync complete");
|
|
122
|
+
}
|
|
123
|
+
async sync(opts) {
|
|
124
|
+
const syncStart = (/* @__PURE__ */ new Date()).toISOString();
|
|
125
|
+
const docs = await this.index();
|
|
126
|
+
await this.prune(syncStart);
|
|
127
|
+
if (opts?.embed) await this.embed(docs);
|
|
128
|
+
}
|
|
129
|
+
async prune(syncStart) {
|
|
130
|
+
let dels = 0;
|
|
131
|
+
const vfs = await this.ctx.vfs();
|
|
132
|
+
for (const { path } of vfs.folders) dels += this.db.deleteStaleDocs(syncStart, path);
|
|
133
|
+
if (dels > 0) this.ctx.warn(`Removed ${dels} stale docs`);
|
|
134
|
+
}
|
|
135
|
+
};
|
|
136
|
+
//#endregion
|
|
137
|
+
export { Store };
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { n as parseModelUri } from "./models-DFQSgBNr.mjs";
|
|
2
|
+
//#region src/embed/transformers.ts
|
|
3
|
+
var TransformersBackend = class TransformersBackend {
|
|
4
|
+
device = "cpu";
|
|
5
|
+
maxTokens;
|
|
6
|
+
dims;
|
|
7
|
+
#pipeline;
|
|
8
|
+
#ctx;
|
|
9
|
+
normalize;
|
|
10
|
+
constructor(pipeline, ctx) {
|
|
11
|
+
this.#pipeline = pipeline;
|
|
12
|
+
this.#ctx = ctx;
|
|
13
|
+
this.maxTokens = pipeline.model.config.max_position_embeddings;
|
|
14
|
+
this.dims = pipeline.model.config.hidden_size ?? 0;
|
|
15
|
+
this.device = pipeline.model.sessions.model?.config?.device ?? "cpu";
|
|
16
|
+
}
|
|
17
|
+
static async load(ctx) {
|
|
18
|
+
const parsed = parseModelUri(ctx.opts.model.uri);
|
|
19
|
+
const { pipeline, layer_norm } = await import("@huggingface/transformers");
|
|
20
|
+
const backend = new TransformersBackend(await pipeline("feature-extraction", parsed.model, {
|
|
21
|
+
dtype: parsed.variant ?? "auto",
|
|
22
|
+
progress_callback: (event) => TransformersBackend.onProgress(ctx, event),
|
|
23
|
+
session_options: { intraOpNumThreads: ctx.opts.threads }
|
|
24
|
+
}), ctx);
|
|
25
|
+
const dims = ctx.opts.maxDims;
|
|
26
|
+
if (dims < backend.dims) backend.normalize = (output) => layer_norm(output, [output.dims[1] ?? 0]).slice(null, [0, dims]).normalize(2, -1);
|
|
27
|
+
return backend;
|
|
28
|
+
}
|
|
29
|
+
async embed(texts) {
|
|
30
|
+
const output = await this.#pipeline(texts, {
|
|
31
|
+
normalize: !this.normalize,
|
|
32
|
+
pooling: this.#ctx.opts.model.pooling
|
|
33
|
+
});
|
|
34
|
+
return (this.normalize?.(output) ?? output).tolist();
|
|
35
|
+
}
|
|
36
|
+
toks(input) {
|
|
37
|
+
return this.#pipeline.tokenizer.tokenize(input).length;
|
|
38
|
+
}
|
|
39
|
+
static onProgress(ctx, event) {
|
|
40
|
+
if (event.status === "initiate") ctx.status.child(event.name).child(event.file).status = event.status;
|
|
41
|
+
else if (event.status === "download") ctx.status.child(event.name).child(event.file).status = event.status;
|
|
42
|
+
else if (event.status === "progress") ctx.status.child(event.name).child(event.file).set({
|
|
43
|
+
max: event.total,
|
|
44
|
+
status: event.status,
|
|
45
|
+
value: event.loaded
|
|
46
|
+
});
|
|
47
|
+
else if (event.status === "done") ctx.status.child(event.name).child(event.file).set({ status: event.status }).stop();
|
|
48
|
+
else if (event.status === "ready") {
|
|
49
|
+
ctx.status.name = `model \`${ctx.opts.model.uri}\` loaded`;
|
|
50
|
+
ctx.status.child(event.task).set({ status: event.status }).stop();
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
//#endregion
|
|
55
|
+
export { TransformersBackend };
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
//#region src/uri.ts
|
|
2
|
+
const URI_PREFIX = "rekal://";
|
|
3
|
+
function assertUri(uri) {
|
|
4
|
+
if (!uri.startsWith("rekal://")) throw new Error(`URI must start with ${URI_PREFIX}, got: ${uri}`);
|
|
5
|
+
}
|
|
6
|
+
function normUri(uri, dir) {
|
|
7
|
+
if (uri === void 0) return URI_PREFIX;
|
|
8
|
+
if (typeof uri !== "string") throw new Error(`URI must be a string, got: ${JSON.stringify(uri)}`);
|
|
9
|
+
uri = uri.trim();
|
|
10
|
+
uri = uri.replace(/^rekall?:/, "");
|
|
11
|
+
uri = uri.replace(/[\\/]+/g, "/");
|
|
12
|
+
uri = uri.replace(/^\/+/, "");
|
|
13
|
+
if (uri === "") return URI_PREFIX;
|
|
14
|
+
uri = URI_PREFIX + uri;
|
|
15
|
+
if (uri.endsWith("/index.md")) return uri.replace(/\/index\.md$/, "/");
|
|
16
|
+
uri = dir ? uri.replace(/\/?$/, "/") : uri;
|
|
17
|
+
uri = dir === false ? uri.replace(/\/?$/, "") : uri;
|
|
18
|
+
return uri;
|
|
19
|
+
}
|
|
20
|
+
function parentUri(uri) {
|
|
21
|
+
uri = normUri(uri);
|
|
22
|
+
if (uri === "rekal://") return;
|
|
23
|
+
uri = uri.replace(/\/?$/, "");
|
|
24
|
+
uri = uri.replace(/\/[^/]+$/, "");
|
|
25
|
+
return uri === "rekal://" ? URI_PREFIX : `${uri}/`;
|
|
26
|
+
}
|
|
27
|
+
//#endregion
|
|
28
|
+
export { parentUri as i, assertUri as n, normUri as r, URI_PREFIX as t };
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import "./runtime.node-DlQPaGrV.mjs";
|
|
2
|
+
import { createHash } from "node:crypto";
|
|
3
|
+
//#region src/util.ts
|
|
4
|
+
function hash(content) {
|
|
5
|
+
return createHash("sha256").update(content).digest("hex");
|
|
6
|
+
}
|
|
7
|
+
function toError(err) {
|
|
8
|
+
return err instanceof Error ? err : new Error(String(err));
|
|
9
|
+
}
|
|
10
|
+
//#endregion
|
|
11
|
+
export { toError as n, hash as t };
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import { a as sstat, i as normPath } from "./fs-DMp26Byo.mjs";
|
|
2
|
+
import { r as normUri, t as URI_PREFIX } from "./uri-CehXVDGB.mjs";
|
|
3
|
+
import { basename, join, relative } from "pathe";
|
|
4
|
+
//#region src/vfs.ts
|
|
5
|
+
const DEFAULT_EXCLUDE = [".git", "node_modules/"];
|
|
6
|
+
var Vfs = class {
|
|
7
|
+
#folders = /* @__PURE__ */ new Map();
|
|
8
|
+
#root = {
|
|
9
|
+
children: /* @__PURE__ */ new Map(),
|
|
10
|
+
depth: 0,
|
|
11
|
+
name: "#root",
|
|
12
|
+
paths: [],
|
|
13
|
+
uri: URI_PREFIX
|
|
14
|
+
};
|
|
15
|
+
constructor(ctx) {
|
|
16
|
+
this.ctx = ctx;
|
|
17
|
+
for (const folder of ctx.opts.folders ?? []) this.addFolder(folder);
|
|
18
|
+
}
|
|
19
|
+
get folders() {
|
|
20
|
+
return [...this.#folders.values()].flat();
|
|
21
|
+
}
|
|
22
|
+
isFolder(path) {
|
|
23
|
+
path = normPath(path).replace(/\/?$/, "/");
|
|
24
|
+
return this.#folders.has(path);
|
|
25
|
+
}
|
|
26
|
+
getScope(uri, opts) {
|
|
27
|
+
uri = normUri(uri, true);
|
|
28
|
+
const view = this.resolve(uri, opts);
|
|
29
|
+
return {
|
|
30
|
+
...view,
|
|
31
|
+
map: (path) => {
|
|
32
|
+
path = normPath(path);
|
|
33
|
+
let best;
|
|
34
|
+
for (const p of view.paths) {
|
|
35
|
+
const rel = relative(p.path, path);
|
|
36
|
+
if (rel.startsWith("..")) continue;
|
|
37
|
+
const candidate = p.node.uri + rel;
|
|
38
|
+
if (!best || candidate.length < best.length) best = candidate;
|
|
39
|
+
}
|
|
40
|
+
return best;
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
getNode(uri, create = false) {
|
|
45
|
+
uri = normUri(uri);
|
|
46
|
+
let node = this.#root;
|
|
47
|
+
const parts = uri.slice(URI_PREFIX.length).split("/").filter(Boolean);
|
|
48
|
+
for (const part of parts) {
|
|
49
|
+
let child = node.children.get(part);
|
|
50
|
+
if (!child) {
|
|
51
|
+
child = {
|
|
52
|
+
children: /* @__PURE__ */ new Map(),
|
|
53
|
+
depth: node.depth + 1,
|
|
54
|
+
name: part,
|
|
55
|
+
parent: node,
|
|
56
|
+
paths: [],
|
|
57
|
+
uri: `${node.uri}${part}/`
|
|
58
|
+
};
|
|
59
|
+
if (create) node.children.set(part, child);
|
|
60
|
+
}
|
|
61
|
+
node = child;
|
|
62
|
+
}
|
|
63
|
+
return node;
|
|
64
|
+
}
|
|
65
|
+
resolve(node, opts) {
|
|
66
|
+
node = typeof node === "string" ? this.getNode(node) : node;
|
|
67
|
+
const nodes = [node];
|
|
68
|
+
let { parent } = node;
|
|
69
|
+
while (parent) {
|
|
70
|
+
nodes.unshift(parent);
|
|
71
|
+
parent = parent.parent;
|
|
72
|
+
}
|
|
73
|
+
const paths = [];
|
|
74
|
+
const folders = [];
|
|
75
|
+
for (const n of nodes) {
|
|
76
|
+
for (const p of paths) p.path = join(p.path, n.name);
|
|
77
|
+
for (const path of n.paths) {
|
|
78
|
+
paths.push({
|
|
79
|
+
node,
|
|
80
|
+
path
|
|
81
|
+
});
|
|
82
|
+
if (this.isFolder(path)) folders.push({
|
|
83
|
+
node: n,
|
|
84
|
+
path
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
const stack = opts?.children === false ? [] : [...node.children.values()];
|
|
89
|
+
while (stack.length > 0) {
|
|
90
|
+
const n = stack.pop();
|
|
91
|
+
for (const path of n.paths) {
|
|
92
|
+
paths.push({
|
|
93
|
+
node: n,
|
|
94
|
+
path
|
|
95
|
+
});
|
|
96
|
+
if (this.isFolder(path)) folders.push({
|
|
97
|
+
node: n,
|
|
98
|
+
path
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
stack.push(...n.children.values());
|
|
102
|
+
}
|
|
103
|
+
paths.sort((a, b) => a.node.uri.localeCompare(b.node.uri));
|
|
104
|
+
return {
|
|
105
|
+
node,
|
|
106
|
+
paths,
|
|
107
|
+
uri: node.uri
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
addFolder(folder) {
|
|
111
|
+
folder.uri = normUri(folder.uri, true);
|
|
112
|
+
folder.path = normPath(folder.path).replace(/\/?$/, "/");
|
|
113
|
+
const folders = this.#folders.get(folder.path) ?? [];
|
|
114
|
+
this.#folders.set(folder.path, [...folders, folder]);
|
|
115
|
+
this.getNode(folder.uri, true).paths.push(folder.path);
|
|
116
|
+
}
|
|
117
|
+
matcher(opts) {
|
|
118
|
+
const pattern = opts?.pattern ?? "";
|
|
119
|
+
if (!pattern.length) return () => true;
|
|
120
|
+
const ignoreCase = opts?.ignoreCase ?? !/[A-Z]/.test(pattern);
|
|
121
|
+
const re = new RegExp(pattern, ignoreCase ? "i" : "");
|
|
122
|
+
return (uri) => re.test(uri);
|
|
123
|
+
}
|
|
124
|
+
async *find(opts = {}) {
|
|
125
|
+
const { glob } = await import("./glob.mjs");
|
|
126
|
+
const uri = normUri(opts.uri ?? "rekal://", true);
|
|
127
|
+
const root = this.resolve(uri);
|
|
128
|
+
const maxDepth = root.node.depth + (opts.depth ?? Infinity);
|
|
129
|
+
const visited = /* @__PURE__ */ new Set();
|
|
130
|
+
const filter = this.matcher(opts);
|
|
131
|
+
const stop = () => opts.limit !== void 0 && visited.size >= opts.limit;
|
|
132
|
+
const use = (p) => {
|
|
133
|
+
if (p.uri.endsWith("/") && opts.type === "file") return false;
|
|
134
|
+
if (!filter(p.uri)) return false;
|
|
135
|
+
if (stop()) return false;
|
|
136
|
+
const key = `${p.uri}:${p.path ?? ""}`;
|
|
137
|
+
if (visited.has(key)) return false;
|
|
138
|
+
visited.add(key);
|
|
139
|
+
return true;
|
|
140
|
+
};
|
|
141
|
+
function* yieldVirtual(p) {
|
|
142
|
+
if (p.node === root.node || opts.type === "file") return;
|
|
143
|
+
let parent = p.node.parent;
|
|
144
|
+
while (parent && parent !== root.node && !stop()) {
|
|
145
|
+
if (parent.depth <= maxDepth && !root.paths.some((rp) => parent?.uri.startsWith(rp.node.uri)) && use({ uri: parent.uri })) yield { uri: parent.uri };
|
|
146
|
+
parent = parent.parent;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
scan: for (const p of root.paths) {
|
|
150
|
+
yield* yieldVirtual(p);
|
|
151
|
+
if (p.node.depth > maxDepth) continue;
|
|
152
|
+
const e = {
|
|
153
|
+
path: p.path,
|
|
154
|
+
uri: p.node.uri
|
|
155
|
+
};
|
|
156
|
+
if (p.node !== root.node && use(e)) yield e;
|
|
157
|
+
const cwd = p.path;
|
|
158
|
+
const todo = glob({
|
|
159
|
+
cwd,
|
|
160
|
+
depth: maxDepth - p.node.depth,
|
|
161
|
+
empty: false,
|
|
162
|
+
exclude: DEFAULT_EXCLUDE,
|
|
163
|
+
glob: ["**/*.md"],
|
|
164
|
+
type: opts.type
|
|
165
|
+
});
|
|
166
|
+
for await (const childPath of todo) {
|
|
167
|
+
if (basename(childPath) === "index.md") continue;
|
|
168
|
+
const path = {
|
|
169
|
+
path: join(cwd, childPath),
|
|
170
|
+
uri: p.node.uri + childPath
|
|
171
|
+
};
|
|
172
|
+
if (use(path)) yield path;
|
|
173
|
+
if (stop()) break scan;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
async *ls(opts) {
|
|
178
|
+
yield* this.find({
|
|
179
|
+
...opts,
|
|
180
|
+
depth: 1
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
/** Normalizes the URI and path to a real path if it exists **/
|
|
184
|
+
normPath(p) {
|
|
185
|
+
const transforms = [
|
|
186
|
+
{
|
|
187
|
+
from: /\/index\.md$/,
|
|
188
|
+
to: "/"
|
|
189
|
+
},
|
|
190
|
+
{
|
|
191
|
+
from: /\/index$/,
|
|
192
|
+
to: "/"
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
from: /\.md$/,
|
|
196
|
+
to: ""
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
from: /(?!\.md)$/,
|
|
200
|
+
to: ".md"
|
|
201
|
+
}
|
|
202
|
+
];
|
|
203
|
+
if (!sstat(p.path) || basename(p.path) === "index.md") {
|
|
204
|
+
const root = this.getScope();
|
|
205
|
+
for (const t of transforms) {
|
|
206
|
+
const path = p.path.replace(t.from, t.to);
|
|
207
|
+
const uri = root.map(path);
|
|
208
|
+
if (sstat(path) && uri) return {
|
|
209
|
+
path,
|
|
210
|
+
uri
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
return { uri: p.node.uri };
|
|
214
|
+
}
|
|
215
|
+
return {
|
|
216
|
+
path: p.path,
|
|
217
|
+
uri: p.node.uri
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
};
|
|
221
|
+
//#endregion
|
|
222
|
+
export { Vfs };
|
package/foo.ts
ADDED
package/foo2.ts
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { getLlama, resolveModelFile } from "node-llama-cpp"
|
|
2
|
+
import path from "path"
|
|
3
|
+
import { fileURLToPath } from "url"
|
|
4
|
+
|
|
5
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url))
|
|
6
|
+
|
|
7
|
+
const m = "hf:mradermacher/snowflake-arctic-embed-s-GGUF:Q4_K_M"
|
|
8
|
+
// resolve a model from Hugging Face to the models directory
|
|
9
|
+
const modelPath = await resolveModelFile(m, path.join(__dirname, "models"))
|
|
10
|
+
|
|
11
|
+
const llama = await getLlama()
|
|
12
|
+
const model = await llama.loadModel({ modelPath })
|
|
13
|
+
|
|
14
|
+
const context = await model.createEmbeddingContext()
|
|
15
|
+
|
|
16
|
+
const text = "Hello world"
|
|
17
|
+
console.log("Text:", text)
|
|
18
|
+
|
|
19
|
+
const embedding = await context.getEmbeddingFor(text)
|
|
20
|
+
console.log("Embedding vector:", embedding.vector)
|
package/package.json
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@rekal/mem",
|
|
3
|
+
"version": "0.0.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"imports": {
|
|
6
|
+
"#runtime": {
|
|
7
|
+
"bun": "./src/runtime.bun.ts",
|
|
8
|
+
"default": "./src/runtime.node.ts"
|
|
9
|
+
}
|
|
10
|
+
},
|
|
11
|
+
"exports": {
|
|
12
|
+
".": {
|
|
13
|
+
"bun": "./src/index.ts",
|
|
14
|
+
"default": "./dist/index.mjs"
|
|
15
|
+
},
|
|
16
|
+
"./glob": {
|
|
17
|
+
"bun": "./src/glob.ts",
|
|
18
|
+
"default": "./dist/glob.mjs"
|
|
19
|
+
},
|
|
20
|
+
"./package.json": "./package.json"
|
|
21
|
+
},
|
|
22
|
+
"publishConfig": {
|
|
23
|
+
"exports": {
|
|
24
|
+
".": "./dist/index.mjs",
|
|
25
|
+
"./glob": "./dist/glob.mjs",
|
|
26
|
+
"./package.json": "./package.json"
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
"scripts": {
|
|
30
|
+
"build": "tsdown --cwd ../../ --filter @rekal/mem",
|
|
31
|
+
"test": "bun test:node && bun test:bun",
|
|
32
|
+
"test:node": "cd ../../; vitest --project @rekal/mem run",
|
|
33
|
+
"test:bun": "bun test --only-failures"
|
|
34
|
+
},
|
|
35
|
+
"dependencies": {
|
|
36
|
+
"@huggingface/transformers": "^4.0.0",
|
|
37
|
+
"better-sqlite3": "^12.8.0",
|
|
38
|
+
"defu": "^6.1.6",
|
|
39
|
+
"gpt-tokenizer": "^3.4.0",
|
|
40
|
+
"ignore": "^7.0.5",
|
|
41
|
+
"js-yaml": "^4.1.1",
|
|
42
|
+
"node-llama-cpp": "^3.18.1",
|
|
43
|
+
"pathe": "^2.0.3",
|
|
44
|
+
"sqlite-vec": "^0.1.9"
|
|
45
|
+
},
|
|
46
|
+
"devDependencies": {
|
|
47
|
+
"@arethetypeswrong/cli": "^0.18.2",
|
|
48
|
+
"@types/better-sqlite3": "^7.6.13",
|
|
49
|
+
"publint": "^0.3.18",
|
|
50
|
+
"tsdown": "^0.21.7",
|
|
51
|
+
"type-fest": "^5.5.0",
|
|
52
|
+
"typescript": "^6.0.2"
|
|
53
|
+
},
|
|
54
|
+
"inlinedDependencies": {
|
|
55
|
+
"type-fest": "5.5.0"
|
|
56
|
+
},
|
|
57
|
+
"trustedDependencies": [
|
|
58
|
+
"sqlite-vec",
|
|
59
|
+
"better-sqlite3"
|
|
60
|
+
]
|
|
61
|
+
}
|
package/src/context.ts
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import type { Db } from "./db.ts"
|
|
2
|
+
import type { Embedder, EmbedderOptions } from "./embed/index.ts"
|
|
3
|
+
import type { LogLevel } from "./log.ts"
|
|
4
|
+
import type { Progress } from "./progress.ts"
|
|
5
|
+
import type { Search } from "./search.ts"
|
|
6
|
+
import type { Store } from "./store.ts"
|
|
7
|
+
import type { TypedEmitter } from "./util.ts"
|
|
8
|
+
import type { Vfs, VfsFolder } from "./vfs.ts"
|
|
9
|
+
|
|
10
|
+
import { EventEmitter } from "node:events"
|
|
11
|
+
import { mkdirSync } from "node:fs"
|
|
12
|
+
import { join } from "pathe"
|
|
13
|
+
import { normPath } from "./fs.ts"
|
|
14
|
+
import { LoggerBase } from "./log.ts"
|
|
15
|
+
|
|
16
|
+
type ContextEvents = {
|
|
17
|
+
log: [level: LogLevel, ...msg: unknown[]]
|
|
18
|
+
progress: [progress: Progress]
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export type ContextOptions = {
|
|
22
|
+
embedder?: EmbedderOptions
|
|
23
|
+
folders?: VfsFolder[]
|
|
24
|
+
root?: string // path to data folder, defaults to `~/.rekal-data/`
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export class Context extends LoggerBase {
|
|
28
|
+
#embedder?: Embedder
|
|
29
|
+
#root: string
|
|
30
|
+
#db?: Db
|
|
31
|
+
#search?: Search
|
|
32
|
+
#store?: Store
|
|
33
|
+
#vfs?: Vfs
|
|
34
|
+
events = new EventEmitter() as TypedEmitter<ContextEvents> & EventEmitter
|
|
35
|
+
|
|
36
|
+
constructor(public opts: ContextOptions = {}) {
|
|
37
|
+
super()
|
|
38
|
+
this.opts.embedder ??= {}
|
|
39
|
+
this.opts.embedder.onProgress ??= (progress) => this.events.emit("progress", progress)
|
|
40
|
+
this.#root = normPath(this.opts.root ?? "~/.rekal-data")
|
|
41
|
+
mkdirSync(this.#root, { recursive: true })
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Emit log events instead of logging directly
|
|
45
|
+
protected _log(level: LogLevel, ...msg: unknown[]) {
|
|
46
|
+
this.events.emit("log", level, ...msg)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
get root() {
|
|
50
|
+
return this.#root
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async db() {
|
|
54
|
+
const { Db } = await import("./db.ts")
|
|
55
|
+
return (this.#db ??= await Db.load(join(this.root, "index.sqlite3")))
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async search() {
|
|
59
|
+
const { Search } = await import("./search.ts")
|
|
60
|
+
return (this.#search ??= await Search.load(this))
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async store() {
|
|
64
|
+
const { Store } = await import("./store.ts")
|
|
65
|
+
return (this.#store ??= await Store.load(this))
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async embedder() {
|
|
69
|
+
const { Embedder } = await import("./embed/index.ts")
|
|
70
|
+
return (this.#embedder ??= new Embedder(this))
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
async vfs() {
|
|
74
|
+
const { Vfs } = await import("./vfs.ts")
|
|
75
|
+
return (this.#vfs ??= new Vfs(this))
|
|
76
|
+
}
|
|
77
|
+
}
|