@rekal/mem 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/db-BMh1OP4b.mjs +294 -0
- package/dist/doc-DnYN4jAU.mjs +116 -0
- package/dist/embed-rUMZxqed.mjs +100 -0
- package/dist/fs-DMp26Byo.mjs +32 -0
- package/dist/glob.d.mts +27 -0
- package/dist/glob.mjs +132 -0
- package/dist/index.d.mts +1465 -0
- package/dist/index.mjs +351 -0
- package/dist/llama-CT3dc9Cn.mjs +75 -0
- package/dist/models-DFQSgBNr.mjs +77 -0
- package/dist/openai-j2_2GM4J.mjs +76 -0
- package/dist/progress-B1JdNapX.mjs +263 -0
- package/dist/query-VFSpErTB.mjs +125 -0
- package/dist/runtime.node-DlQPaGrV.mjs +35 -0
- package/dist/search-BllHWtZF.mjs +166 -0
- package/dist/store-DE7S35SS.mjs +137 -0
- package/dist/transformers-CJ3QA2PK.mjs +55 -0
- package/dist/uri-CehXVDGB.mjs +28 -0
- package/dist/util-DNyrmcA3.mjs +11 -0
- package/dist/vfs-CNQbkhsf.mjs +222 -0
- package/foo.ts +3 -0
- package/foo2.ts +20 -0
- package/package.json +61 -0
- package/src/context.ts +77 -0
- package/src/db.ts +464 -0
- package/src/doc.ts +163 -0
- package/src/embed/base.ts +122 -0
- package/src/embed/index.ts +67 -0
- package/src/embed/llama.ts +111 -0
- package/src/embed/models.ts +104 -0
- package/src/embed/openai.ts +95 -0
- package/src/embed/transformers.ts +81 -0
- package/src/frecency.ts +58 -0
- package/src/fs.ts +36 -0
- package/src/glob.ts +163 -0
- package/src/index.ts +15 -0
- package/src/log.ts +60 -0
- package/src/md.ts +204 -0
- package/src/progress.ts +121 -0
- package/src/query.ts +131 -0
- package/src/runtime.bun.ts +33 -0
- package/src/runtime.node.ts +47 -0
- package/src/search.ts +230 -0
- package/src/snippet.ts +248 -0
- package/src/sqlite.ts +1 -0
- package/src/store.ts +180 -0
- package/src/uri.ts +28 -0
- package/src/util.ts +21 -0
- package/src/vfs.ts +257 -0
- package/test/doc.test.ts +61 -0
- package/test/fixtures/ignore-test/keep.md +0 -0
- package/test/fixtures/ignore-test/skip.log +0 -0
- package/test/fixtures/ignore-test/sub/keep.md +0 -0
- package/test/fixtures/store/agent/index.md +9 -0
- package/test/fixtures/store/agent/lessons.md +21 -0
- package/test/fixtures/store/agent/soul.md +28 -0
- package/test/fixtures/store/agent/tools.md +25 -0
- package/test/fixtures/store/concepts/frecency.md +30 -0
- package/test/fixtures/store/concepts/index.md +9 -0
- package/test/fixtures/store/concepts/memory-coherence.md +33 -0
- package/test/fixtures/store/concepts/rag.md +27 -0
- package/test/fixtures/store/index.md +9 -0
- package/test/fixtures/store/projects/index.md +9 -0
- package/test/fixtures/store/projects/rekall-inc/architecture.md +41 -0
- package/test/fixtures/store/projects/rekall-inc/decisions/index.md +9 -0
- package/test/fixtures/store/projects/rekall-inc/decisions/no-military.md +20 -0
- package/test/fixtures/store/projects/rekall-inc/index.md +28 -0
- package/test/fixtures/store/user/family.md +13 -0
- package/test/fixtures/store/user/index.md +9 -0
- package/test/fixtures/store/user/preferences.md +29 -0
- package/test/fixtures/store/user/profile.md +29 -0
- package/test/fs.test.ts +15 -0
- package/test/glob.test.ts +190 -0
- package/test/md.test.ts +177 -0
- package/test/query.test.ts +105 -0
- package/test/uri.test.ts +46 -0
- package/test/util.test.ts +62 -0
- package/test/vfs.test.ts +164 -0
- package/tsconfig.json +3 -0
- package/tsdown.config.ts +8 -0
package/src/store.ts
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import type { Context } from "./context.ts"
|
|
2
|
+
import type { Db } from "./db.ts"
|
|
3
|
+
import type { EmbedderChunk } from "./embed/index.ts"
|
|
4
|
+
|
|
5
|
+
import { performance } from "node:perf_hooks"
|
|
6
|
+
import { Doc } from "./doc.ts"
|
|
7
|
+
import { Progress } from "./progress.ts"
|
|
8
|
+
export type StoreChunk = EmbedderChunk & {
|
|
9
|
+
doc_id: number
|
|
10
|
+
doc: Doc
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export class Store {
|
|
14
|
+
private constructor(
|
|
15
|
+
public db: Db,
|
|
16
|
+
public ctx: Context
|
|
17
|
+
) {}
|
|
18
|
+
|
|
19
|
+
static async load(ctx: Context) {
|
|
20
|
+
return new Store(await ctx.db(), ctx)
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Add or update a node in the store (docs + FTS via triggers, no embeddings)
|
|
24
|
+
add(doc: Doc) {
|
|
25
|
+
const row = this.db.getDoc(doc.path)
|
|
26
|
+
|
|
27
|
+
if (row?.hash === doc.hash) {
|
|
28
|
+
this.db.touchDoc(row.id)
|
|
29
|
+
return row.id
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Document changed, so delete old vec
|
|
33
|
+
if (row) this.db.deleteDoc(row.id, { vec: true })
|
|
34
|
+
|
|
35
|
+
const now = new Date().toISOString()
|
|
36
|
+
const id = this.db.addDoc({
|
|
37
|
+
body: doc.body,
|
|
38
|
+
description: doc.$description ?? "",
|
|
39
|
+
entities: doc.entities.join(","),
|
|
40
|
+
hash: doc.hash,
|
|
41
|
+
path: doc.path,
|
|
42
|
+
synced_at: now,
|
|
43
|
+
tags: doc.tags.join(","),
|
|
44
|
+
title: doc.title,
|
|
45
|
+
updated_at: now,
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
return id
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Chunk a doc for embedding
|
|
52
|
+
async chunk(id: number, doc: Doc): Promise<StoreChunk[]> {
|
|
53
|
+
const title = doc.title.trim()
|
|
54
|
+
const description = (doc.description ?? "").trim()
|
|
55
|
+
const body = doc.body.trim()
|
|
56
|
+
|
|
57
|
+
const chunks: EmbedderChunk[] = []
|
|
58
|
+
|
|
59
|
+
const embedder = await this.ctx.embedder()
|
|
60
|
+
if (description.length > 0) chunks.push(...(await embedder.chunk({ text: description, title })))
|
|
61
|
+
|
|
62
|
+
if (body.length > 0) {
|
|
63
|
+
// seq=0 is reserved for description, so offset body seq by at least 1
|
|
64
|
+
const offset = Math.max(1, chunks.length)
|
|
65
|
+
const bodyChunks = await embedder.chunk({ text: body, title })
|
|
66
|
+
chunks.push(
|
|
67
|
+
...bodyChunks.map((c) =>
|
|
68
|
+
Object.assign(c, {
|
|
69
|
+
seq: c.seq + offset,
|
|
70
|
+
})
|
|
71
|
+
)
|
|
72
|
+
)
|
|
73
|
+
}
|
|
74
|
+
return chunks.map((chunk) => Object.assign(chunk, { doc, doc_id: id }))
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
async index() {
|
|
78
|
+
const docs: Promise<Doc | undefined>[] = []
|
|
79
|
+
const nodes = new Map<number, Doc>()
|
|
80
|
+
|
|
81
|
+
const vfs = await this.ctx.vfs()
|
|
82
|
+
for await (const entry of vfs.find()) {
|
|
83
|
+
docs.push(Doc.load(entry))
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const loaded = await Promise.all(docs)
|
|
87
|
+
this.db.transaction(() => {
|
|
88
|
+
for (const doc of loaded) {
|
|
89
|
+
if (doc) nodes.set(this.add(doc), doc)
|
|
90
|
+
}
|
|
91
|
+
})()
|
|
92
|
+
|
|
93
|
+
this.ctx.success(`Indexed ${nodes.size} docs from disk`)
|
|
94
|
+
return nodes
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async embed(docs: Map<number, Doc>) {
|
|
98
|
+
const todo = this.db.getUnembeddedDocs()
|
|
99
|
+
if (todo.length === 0) {
|
|
100
|
+
this.ctx.success("All docs are already embedded")
|
|
101
|
+
return
|
|
102
|
+
}
|
|
103
|
+
this.ctx.info(`Sync found ${todo.length} unembedded docs`)
|
|
104
|
+
|
|
105
|
+
let doneBytes = 0
|
|
106
|
+
let doneDocs = 0
|
|
107
|
+
const queue: StoreChunk[] = []
|
|
108
|
+
const embedder = await this.ctx.embedder()
|
|
109
|
+
await embedder.backend() // load the embedder before starting the progress bar
|
|
110
|
+
const start = performance.now()
|
|
111
|
+
const progress = new Progress("Embedding", { max: todo.length })
|
|
112
|
+
this.ctx.events.emit("progress", progress)
|
|
113
|
+
|
|
114
|
+
const updateProgress = () => {
|
|
115
|
+
const secs = (performance.now() - start) / 1000
|
|
116
|
+
const kbPerSec = (doneBytes / secs / 1024).toFixed(0)
|
|
117
|
+
progress.set({
|
|
118
|
+
status: `${progress.max - todo.length}/${progress.max} docs embedded ${kbPerSec}kb/s...`,
|
|
119
|
+
value: doneDocs,
|
|
120
|
+
})
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const markEmbedded = (id: number) => {
|
|
124
|
+
doneDocs++
|
|
125
|
+
const hash = docs.get(id)?.hash
|
|
126
|
+
if (hash) this.db.markEmbedded(id, hash)
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const embed = async (flush?: boolean) => {
|
|
130
|
+
while (queue.length >= (flush ? 1 : embedder.opts.batchSize)) {
|
|
131
|
+
const batch = queue.splice(0, embedder.opts.batchSize)
|
|
132
|
+
// oxlint-disable-next-line no-await-in-loop
|
|
133
|
+
const embeddings = await embedder.embed(batch)
|
|
134
|
+
doneBytes += batch.reduce((sum, c) => sum + c.prompt.length, 0)
|
|
135
|
+
batch.forEach((chunk, i) => (chunk.embedding = embeddings[i]))
|
|
136
|
+
this.db.insertEmbeddings(batch)
|
|
137
|
+
|
|
138
|
+
updateProgress()
|
|
139
|
+
|
|
140
|
+
const completed = new Set(batch.map((c) => c.doc_id))
|
|
141
|
+
for (const c of queue) completed.delete(c.doc_id)
|
|
142
|
+
completed.forEach((id) => markEmbedded(id))
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
while (todo.length > 0) {
|
|
147
|
+
const { id } = todo.pop()!
|
|
148
|
+
const doc = docs.get(id)
|
|
149
|
+
if (!doc) continue
|
|
150
|
+
this.db.deleteEmbeddings(id)
|
|
151
|
+
// oxlint-disable-next-line no-await-in-loop
|
|
152
|
+
const chunks = await this.chunk(id, doc)
|
|
153
|
+
queue.push(...chunks)
|
|
154
|
+
if (chunks.length === 0) markEmbedded(id) // mark as embedded even if there are no chunks to embed
|
|
155
|
+
// oxlint-disable-next-line no-await-in-loop
|
|
156
|
+
await embed()
|
|
157
|
+
}
|
|
158
|
+
await embed(true) // embed any remaining chunks in the queue
|
|
159
|
+
|
|
160
|
+
progress.stop()
|
|
161
|
+
this.ctx.success("Sync complete")
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
async sync(opts?: { embed?: boolean }) {
|
|
165
|
+
const syncStart = new Date().toISOString()
|
|
166
|
+
const docs = await this.index()
|
|
167
|
+
await this.prune(syncStart)
|
|
168
|
+
if (opts?.embed) await this.embed(docs)
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Remove docs that no longer exist on disk
|
|
172
|
+
async prune(syncStart: string) {
|
|
173
|
+
let dels = 0
|
|
174
|
+
const vfs = await this.ctx.vfs()
|
|
175
|
+
for (const { path } of vfs.folders) {
|
|
176
|
+
dels += this.db.deleteStaleDocs(syncStart, path)
|
|
177
|
+
}
|
|
178
|
+
if (dels > 0) this.ctx.warn(`Removed ${dels} stale docs`)
|
|
179
|
+
}
|
|
180
|
+
}
|
package/src/uri.ts
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export const URI_PREFIX = "rekal://"
|
|
2
|
+
|
|
3
|
+
export function assertUri(uri: string) {
|
|
4
|
+
if (!uri.startsWith(URI_PREFIX)) throw new Error(`URI must start with ${URI_PREFIX}, got: ${uri}`)
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export function normUri(uri?: string, dir?: boolean): string {
|
|
8
|
+
if (uri === undefined) return URI_PREFIX
|
|
9
|
+
if (typeof uri !== "string") throw new Error(`URI must be a string, got: ${JSON.stringify(uri)}`)
|
|
10
|
+
uri = uri.trim()
|
|
11
|
+
uri = uri.replace(/^rekall?:/, "") // protocol
|
|
12
|
+
uri = uri.replace(/[\\/]+/g, "/") // normalize slashes
|
|
13
|
+
uri = uri.replace(/^\/+/, "") // leading slashes
|
|
14
|
+
if (uri === "") return URI_PREFIX
|
|
15
|
+
uri = URI_PREFIX + uri
|
|
16
|
+
if (uri.endsWith("/index.md")) return uri.replace(/\/index\.md$/, "/") // index.md implies directory
|
|
17
|
+
uri = dir ? uri.replace(/\/?$/, "/") : uri // trailing slash for directories
|
|
18
|
+
uri = dir === false ? uri.replace(/\/?$/, "") : uri // remove trailing slash for files
|
|
19
|
+
return uri
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function parentUri(uri: string): string | undefined {
|
|
23
|
+
uri = normUri(uri)
|
|
24
|
+
if (uri === URI_PREFIX) return
|
|
25
|
+
uri = uri.replace(/\/?$/, "") // remove trailing slash
|
|
26
|
+
uri = uri.replace(/\/[^/]+$/, "") // remove last segment
|
|
27
|
+
return uri === URI_PREFIX ? URI_PREFIX : `${uri}/`
|
|
28
|
+
}
|
package/src/util.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { parseYaml } from "#runtime"
|
|
2
|
+
import { createHash } from "node:crypto"
|
|
3
|
+
|
|
4
|
+
export { parseYaml }
|
|
5
|
+
|
|
6
|
+
export function hash(content: string): string {
|
|
7
|
+
return createHash("sha256").update(content).digest("hex")
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export function toError(err: unknown): Error {
|
|
11
|
+
return err instanceof Error ? err : new Error(String(err))
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export type Events = Record<string, unknown[]>
|
|
15
|
+
|
|
16
|
+
export type TypedEmitter<T extends Events> = {
|
|
17
|
+
on<K extends keyof T>(event: K, fn: (...args: T[K]) => void): TypedEmitter<T>
|
|
18
|
+
off<K extends keyof T>(event: K, fn: (...args: T[K]) => void): TypedEmitter<T>
|
|
19
|
+
once<K extends keyof T>(event: K, fn: (...args: T[K]) => void): TypedEmitter<T>
|
|
20
|
+
emit<K extends keyof T>(event: K, ...args: T[K]): boolean
|
|
21
|
+
}
|
package/src/vfs.ts
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
import type { Context } from "./context.ts"
|
|
2
|
+
import type { Doc } from "./doc.ts"
|
|
3
|
+
|
|
4
|
+
import { basename, join, relative } from "pathe"
|
|
5
|
+
import { normPath, sstat } from "./fs.ts"
|
|
6
|
+
import { URI_PREFIX, normUri } from "./uri.ts"
|
|
7
|
+
|
|
8
|
+
export class Node {
|
|
9
|
+
constructor(
|
|
10
|
+
public uri: string,
|
|
11
|
+
public doc: Doc
|
|
12
|
+
) {}
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const DEFAULT_EXCLUDE = [".git", "node_modules/"]
|
|
16
|
+
|
|
17
|
+
export type VfsFolder = {
|
|
18
|
+
uri: string
|
|
19
|
+
path: string
|
|
20
|
+
merge?: boolean // TODO: whether this path should be merged with others in the same URI, defaults to false
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export type VfsEntry = {
|
|
24
|
+
uri: string
|
|
25
|
+
path?: string
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export type VfsPath = {
|
|
29
|
+
node: VfsNode
|
|
30
|
+
path: string
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export type VfsNode = {
|
|
34
|
+
name: string
|
|
35
|
+
parent?: VfsNode
|
|
36
|
+
uri: string
|
|
37
|
+
paths: string[]
|
|
38
|
+
depth: number
|
|
39
|
+
children: Map<string, VfsNode>
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export type VfsFindOptions = {
|
|
43
|
+
/** URI to start from, defaults to root */
|
|
44
|
+
uri?: string
|
|
45
|
+
depth?: number // max depth to search, defaults to Infinity
|
|
46
|
+
pattern?: string // extra regex pattern to match URIs against
|
|
47
|
+
ignoreCase?: boolean // whether pattern matching should ignore case. When not set smart case is used.
|
|
48
|
+
limit?: number // max results to return, defaults to Infinity
|
|
49
|
+
type?: "file" | "directory" // filter by type
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export type VfsView = {
|
|
53
|
+
uri: string // URI of the scope, defaults to rekal://
|
|
54
|
+
node: VfsNode // the node representing the resolved URI
|
|
55
|
+
paths: VfsPath[] // paths leading up to this node and to descendant nodes
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export type VfsScope = VfsView & {
|
|
59
|
+
// map a path to the shortest URI in this scope, if it exists
|
|
60
|
+
map: (path: string) => string | undefined
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export class Vfs {
|
|
64
|
+
#folders = new Map<string, VfsFolder[]>() // map of paths to folders
|
|
65
|
+
#root: VfsNode = { children: new Map(), depth: 0, name: "#root", paths: [], uri: URI_PREFIX }
|
|
66
|
+
|
|
67
|
+
public constructor(public ctx: Context) {
|
|
68
|
+
for (const folder of ctx.opts.folders ?? []) this.addFolder(folder)
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
get folders(): VfsFolder[] {
|
|
72
|
+
return [...this.#folders.values()].flat()
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
isFolder(path: string): boolean {
|
|
76
|
+
path = normPath(path).replace(/\/?$/, "/")
|
|
77
|
+
return this.#folders.has(path)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
getScope(uri?: string, opts?: { children?: boolean }): VfsScope {
|
|
81
|
+
uri = normUri(uri, true)
|
|
82
|
+
const view = this.resolve(uri, opts)
|
|
83
|
+
return {
|
|
84
|
+
...view,
|
|
85
|
+
map: (path: string) => {
|
|
86
|
+
path = normPath(path)
|
|
87
|
+
let best: string | undefined
|
|
88
|
+
for (const p of view.paths) {
|
|
89
|
+
const rel = relative(p.path, path)
|
|
90
|
+
if (rel.startsWith("..")) continue
|
|
91
|
+
const candidate = p.node.uri + rel
|
|
92
|
+
if (!best || candidate.length < best.length) best = candidate
|
|
93
|
+
}
|
|
94
|
+
return best
|
|
95
|
+
},
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
getNode(uri: string, create = false) {
|
|
100
|
+
uri = normUri(uri)
|
|
101
|
+
let node = this.#root
|
|
102
|
+
const parts = uri.slice(URI_PREFIX.length).split("/").filter(Boolean)
|
|
103
|
+
for (const part of parts) {
|
|
104
|
+
let child = node.children.get(part)
|
|
105
|
+
if (!child) {
|
|
106
|
+
child = {
|
|
107
|
+
children: new Map(),
|
|
108
|
+
depth: node.depth + 1,
|
|
109
|
+
name: part,
|
|
110
|
+
parent: node,
|
|
111
|
+
paths: [],
|
|
112
|
+
uri: `${node.uri}${part}/`,
|
|
113
|
+
}
|
|
114
|
+
if (create) node.children.set(part, child)
|
|
115
|
+
}
|
|
116
|
+
node = child
|
|
117
|
+
}
|
|
118
|
+
return node
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
resolve(node: VfsNode | string, opts?: { children?: boolean }): VfsView {
|
|
122
|
+
node = typeof node === "string" ? this.getNode(node) : node
|
|
123
|
+
const nodes = [node]
|
|
124
|
+
let { parent } = node
|
|
125
|
+
while (parent) {
|
|
126
|
+
nodes.unshift(parent)
|
|
127
|
+
parent = parent.parent
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
const paths: VfsPath[] = [] // paths to this node or to descendant nodes
|
|
131
|
+
const folders: VfsPath[] = [] // folders used by ancestor and descendant nodes
|
|
132
|
+
|
|
133
|
+
// resolve paths and folders to this node
|
|
134
|
+
for (const n of nodes) {
|
|
135
|
+
for (const p of paths) p.path = join(p.path, n.name)
|
|
136
|
+
for (const path of n.paths) {
|
|
137
|
+
paths.push({ node, path })
|
|
138
|
+
if (this.isFolder(path)) folders.push({ node: n, path })
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// resolve paths and folders to descendant nodes
|
|
143
|
+
const stack = opts?.children === false ? [] : [...node.children.values()]
|
|
144
|
+
while (stack.length > 0) {
|
|
145
|
+
const n = stack.pop()!
|
|
146
|
+
for (const path of n.paths) {
|
|
147
|
+
paths.push({ node: n, path })
|
|
148
|
+
if (this.isFolder(path)) folders.push({ node: n, path })
|
|
149
|
+
}
|
|
150
|
+
stack.push(...n.children.values())
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
paths.sort((a, b) => a.node.uri.localeCompare(b.node.uri))
|
|
154
|
+
return { node, paths, uri: node.uri }
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
addFolder(folder: VfsFolder) {
|
|
158
|
+
folder.uri = normUri(folder.uri, true)
|
|
159
|
+
folder.path = normPath(folder.path).replace(/\/?$/, "/")
|
|
160
|
+
const folders = this.#folders.get(folder.path) ?? []
|
|
161
|
+
this.#folders.set(folder.path, [...folders, folder])
|
|
162
|
+
const node = this.getNode(folder.uri, true)
|
|
163
|
+
node.paths.push(folder.path)
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
matcher(opts?: VfsFindOptions): (uri: string) => boolean {
|
|
167
|
+
const pattern = opts?.pattern ?? ""
|
|
168
|
+
if (!pattern.length) return () => true
|
|
169
|
+
const ignoreCase = opts?.ignoreCase ?? !/[A-Z]/.test(pattern)
|
|
170
|
+
const re = new RegExp(pattern, ignoreCase ? "i" : "")
|
|
171
|
+
return (uri: string) => re.test(uri)
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
async *find(opts: VfsFindOptions = {}): AsyncGenerator<VfsEntry> {
|
|
175
|
+
const { glob } = await import("./glob.ts")
|
|
176
|
+
const uri = normUri(opts.uri ?? URI_PREFIX, true)
|
|
177
|
+
const root = this.resolve(uri)
|
|
178
|
+
const maxDepth = root.node.depth + (opts.depth ?? Infinity)
|
|
179
|
+
const visited = new Set<string>()
|
|
180
|
+
const filter = this.matcher(opts)
|
|
181
|
+
|
|
182
|
+
const stop = () => opts.limit !== undefined && visited.size >= opts.limit
|
|
183
|
+
|
|
184
|
+
const use = (p: VfsEntry) => {
|
|
185
|
+
if (p.uri.endsWith("/") && opts.type === "file") return false
|
|
186
|
+
if (!filter(p.uri)) return false
|
|
187
|
+
if (stop()) return false
|
|
188
|
+
const key = `${p.uri}:${p.path ?? ""}`
|
|
189
|
+
if (visited.has(key)) return false
|
|
190
|
+
visited.add(key)
|
|
191
|
+
return true
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// add virtual internal uris
|
|
195
|
+
function* yieldVirtual(p: VfsPath) {
|
|
196
|
+
if (p.node === root.node || opts.type === "file") return
|
|
197
|
+
let parent = p.node.parent
|
|
198
|
+
while (parent && parent !== root.node && !stop()) {
|
|
199
|
+
const virtual =
|
|
200
|
+
parent.depth <= maxDepth &&
|
|
201
|
+
!root.paths.some((rp) => parent?.uri.startsWith(rp.node.uri)) &&
|
|
202
|
+
use({ uri: parent.uri })
|
|
203
|
+
if (virtual) yield { uri: parent.uri }
|
|
204
|
+
parent = parent.parent
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
scan: for (const p of root.paths) {
|
|
209
|
+
yield* yieldVirtual(p)
|
|
210
|
+
if (p.node.depth > maxDepth) continue
|
|
211
|
+
const e = { path: p.path, uri: p.node.uri }
|
|
212
|
+
if (p.node !== root.node && use(e)) yield e
|
|
213
|
+
|
|
214
|
+
const cwd = p.path
|
|
215
|
+
const todo = glob({
|
|
216
|
+
cwd,
|
|
217
|
+
depth: maxDepth - p.node.depth,
|
|
218
|
+
empty: false,
|
|
219
|
+
exclude: DEFAULT_EXCLUDE,
|
|
220
|
+
glob: ["**/*.md"],
|
|
221
|
+
type: opts.type,
|
|
222
|
+
})
|
|
223
|
+
|
|
224
|
+
// oxlint-disable-next-line no-await-in-loop
|
|
225
|
+
for await (const childPath of todo) {
|
|
226
|
+
if (basename(childPath) === "index.md") continue
|
|
227
|
+
const path = { path: join(cwd, childPath), uri: p.node.uri + childPath }
|
|
228
|
+
if (use(path)) yield path
|
|
229
|
+
if (stop()) break scan
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
async *ls(opts?: Omit<VfsFindOptions, "depth">) {
|
|
235
|
+
yield* this.find({ ...opts, depth: 1 })
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/** Normalizes the URI and path to a real path if it exists **/
|
|
239
|
+
normPath(p: VfsPath): VfsEntry {
|
|
240
|
+
const transforms = [
|
|
241
|
+
{ from: /\/index\.md$/, to: "/" },
|
|
242
|
+
{ from: /\/index$/, to: "/" },
|
|
243
|
+
{ from: /\.md$/, to: "" },
|
|
244
|
+
{ from: /(?!\.md)$/, to: ".md" },
|
|
245
|
+
]
|
|
246
|
+
if (!sstat(p.path) || basename(p.path) === "index.md") {
|
|
247
|
+
const root = this.getScope()
|
|
248
|
+
for (const t of transforms) {
|
|
249
|
+
const path = p.path.replace(t.from, t.to)
|
|
250
|
+
const uri = root.map(path)
|
|
251
|
+
if (sstat(path) && uri) return { path, uri }
|
|
252
|
+
}
|
|
253
|
+
return { uri: p.node.uri }
|
|
254
|
+
}
|
|
255
|
+
return { path: p.path, uri: p.node.uri }
|
|
256
|
+
}
|
|
257
|
+
}
|
package/test/doc.test.ts
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { fileURLToPath } from "node:url"
|
|
2
|
+
import { join } from "pathe"
|
|
3
|
+
import { describe, expect, test } from "vitest"
|
|
4
|
+
import { Doc } from "../src/doc.ts"
|
|
5
|
+
|
|
6
|
+
const FIXTURES = join(fileURLToPath(import.meta.url), "..", "fixtures/store")
|
|
7
|
+
|
|
8
|
+
describe("Doc.load", () => {
|
|
9
|
+
test("loads a markdown file", async () => {
|
|
10
|
+
const doc = (await Doc.load("", join(FIXTURES, "user/family.md")))!
|
|
11
|
+
expect(doc).toBeDefined()
|
|
12
|
+
expect(doc.description).toContain("Douglas's family")
|
|
13
|
+
expect(doc.tags).toContain("family")
|
|
14
|
+
expect(doc.entities).toContain("Melina")
|
|
15
|
+
expect(doc.entities).toContain("Lori Quaid")
|
|
16
|
+
expect(doc.title).toBeTruthy()
|
|
17
|
+
expect(doc.body).toContain("Lori")
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
test("handles directory with index.md", async () => {
|
|
21
|
+
const doc = await Doc.load("", join(FIXTURES, "concepts"))
|
|
22
|
+
expect(doc?.isDir).toBe(true)
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
test("throws for nonexistent file", async () => {
|
|
26
|
+
expect(await Doc.load("", join(FIXTURES, "nonexistent.md"))).toBeUndefined()
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
test("extracts title from H1", async () => {
|
|
30
|
+
const doc = await Doc.load("", join(FIXTURES, "concepts/frecency.md"))
|
|
31
|
+
expect(doc?.title).toBe("Frecency Algorithm")
|
|
32
|
+
})
|
|
33
|
+
|
|
34
|
+
test("handles file without frontmatter", async () => {
|
|
35
|
+
// Create a doc from raw content to simulate
|
|
36
|
+
const doc = await Doc.load("", join(FIXTURES, "user/family.md"))
|
|
37
|
+
expect(doc?.tags.length).toBeGreaterThan(0)
|
|
38
|
+
})
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
describe("Doc properties", () => {
|
|
42
|
+
test("name for regular file", async () => {
|
|
43
|
+
const doc = await Doc.load("", join(FIXTURES, "user/family.md"))
|
|
44
|
+
expect(doc?.name).toBe("family")
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
test("name for directory (index.md)", async () => {
|
|
48
|
+
const doc = await Doc.load("", join(FIXTURES, "user"))
|
|
49
|
+
expect(doc?.name).toBe("user")
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
test("isDir for index.md", async () => {
|
|
53
|
+
const doc = await Doc.load("", join(FIXTURES, "user"))
|
|
54
|
+
expect(doc?.isDir).toBe(true)
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
test("isDir for regular file", async () => {
|
|
58
|
+
const doc = await Doc.load("", join(FIXTURES, "user/family.md"))
|
|
59
|
+
expect(doc?.isDir).toBe(false)
|
|
60
|
+
})
|
|
61
|
+
})
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Agent configuration — personality, operational notes, and accumulated lessons"
|
|
3
|
+
tags: [agent, config]
|
|
4
|
+
entities: [Douglas Quaid]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Agent Configuration
|
|
8
|
+
|
|
9
|
+
Configuration and learned behaviors for the AI agent. Defines personality, tool-specific notes, and lessons from past interactions.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Lessons learned from past agent mistakes — verification, attribution, skill priority"
|
|
3
|
+
tags: [agent, lessons, corrections]
|
|
4
|
+
entities: []
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Lessons Learned from Agent Mistakes
|
|
8
|
+
|
|
9
|
+
Accumulated lessons from past failures. Key themes: always verify before speaking, use specialized tools before general search, and watch for cognitive biases in content generation.
|
|
10
|
+
|
|
11
|
+
## Verification First
|
|
12
|
+
|
|
13
|
+
Never state an action is "done" before actually executing the tool. Always call the tool first and verify facts before speaking. Multiple failures from premature confirmation.
|
|
14
|
+
|
|
15
|
+
## Attribution Accuracy
|
|
16
|
+
|
|
17
|
+
Messed up a news report by confusing two country names — caused by contextual bias from recent conversations. Must carefully cross-reference entity names in headlines before reporting.
|
|
18
|
+
|
|
19
|
+
## Skill Priority
|
|
20
|
+
|
|
21
|
+
Defaulted to generic web search for "AI news" instead of using the specialized news skill. Always check defined skills and sources first to ensure high-signal, persona-aligned updates before falling back to general search.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Agent personality — be genuine, have opinions, earn trust through competence"
|
|
3
|
+
tags: [agent, personality, identity]
|
|
4
|
+
entities: []
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Agent Personality
|
|
8
|
+
|
|
9
|
+
Core principles: genuinely helpful over performatively helpful, resourceful before asking, careful with external actions, bold with internal ones.
|
|
10
|
+
|
|
11
|
+
## Core Truths
|
|
12
|
+
|
|
13
|
+
- Be genuinely helpful, not performatively helpful. Skip filler words, just help.
|
|
14
|
+
- Have opinions. Disagree, prefer things, find stuff amusing or boring.
|
|
15
|
+
- Be resourceful before asking. Read the file, check the context, search for it.
|
|
16
|
+
- Earn trust through competence. Be careful with external actions, bold with internal ones.
|
|
17
|
+
- Remember you're a guest. Access to someone's life is intimacy — treat it with respect.
|
|
18
|
+
|
|
19
|
+
## Boundaries
|
|
20
|
+
|
|
21
|
+
- Private things stay private
|
|
22
|
+
- Ask before acting externally when in doubt
|
|
23
|
+
- Never send half-baked replies to messaging surfaces
|
|
24
|
+
- Not the user's voice — careful in group chats
|
|
25
|
+
|
|
26
|
+
## Vibe
|
|
27
|
+
|
|
28
|
+
Concise when needed, thorough when it matters. Not a corporate drone. Not a sycophant.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Agent tool-specific notes — calendar, email, home automation quirks"
|
|
3
|
+
tags: [agent, tools, operations]
|
|
4
|
+
entities: [Home Assistant]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Agent Tool Notes
|
|
8
|
+
|
|
9
|
+
Operational notes for tools the agent uses. Includes calendar patterns, email filtering, and home automation quirks.
|
|
10
|
+
|
|
11
|
+
## Calendar
|
|
12
|
+
|
|
13
|
+
- Always check all calendars by default
|
|
14
|
+
- Focus on urgent and personal events
|
|
15
|
+
|
|
16
|
+
## Email
|
|
17
|
+
|
|
18
|
+
- Focus on urgent/personal email, ignore newsletters and promotions
|
|
19
|
+
- PDF extraction: convert to Google Docs as workaround for parsing
|
|
20
|
+
|
|
21
|
+
## Home Automation
|
|
22
|
+
|
|
23
|
+
- Use status scripts for entity overview
|
|
24
|
+
- Some integrations go offline periodically — check connection status before reporting errors
|
|
25
|
+
- Ignore known false alarms: offline VMs, sensors on units without hardware
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Frecency algorithm — frequency × recency scoring with exponential decay"
|
|
3
|
+
tags: [algorithm, scoring, frecency]
|
|
4
|
+
entities: [Firefox]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Frecency Algorithm
|
|
8
|
+
|
|
9
|
+
Frecency combines frequency and recency into a single score. Items accessed often and recently rank highest. Used in Firefox's URL bar and rekal's memory injection.
|
|
10
|
+
|
|
11
|
+
## Core Idea
|
|
12
|
+
|
|
13
|
+
Each access bumps a score. Scores decay exponentially over time. The result is a ranking that naturally balances "how often" with "how recently."
|
|
14
|
+
|
|
15
|
+
## Exponential Decay
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
score(t) = Σ e^(-λ * (now - access_i))
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Where λ controls how fast old accesses fade. Higher λ = more recency-biased.
|
|
22
|
+
|
|
23
|
+
## Deadline Trick
|
|
24
|
+
|
|
25
|
+
Instead of recomputing all scores on every access, store a "deadline" — the time at which the score would drop below a threshold. On read, only decay if the deadline has passed. This makes frecency O(1) amortized per access.
|
|
26
|
+
|
|
27
|
+
## Applications
|
|
28
|
+
|
|
29
|
+
- **Firefox:** URL bar suggestions ranked by frecency
|
|
30
|
+
- **rekal:** Memory nodes scored by frecency for auto-injection into agent context
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Technical concepts — frecency, RAG, memory coherence, retrieval methods"
|
|
3
|
+
tags: [concepts, reference]
|
|
4
|
+
entities: []
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Technical Concepts
|
|
8
|
+
|
|
9
|
+
General-purpose technical concepts that appear across multiple projects. Includes retrieval methods, scoring algorithms, memory systems, and AI patterns.
|