@rekal/mem 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/dist/db-BMh1OP4b.mjs +294 -0
  2. package/dist/doc-DnYN4jAU.mjs +116 -0
  3. package/dist/embed-rUMZxqed.mjs +100 -0
  4. package/dist/fs-DMp26Byo.mjs +32 -0
  5. package/dist/glob.d.mts +27 -0
  6. package/dist/glob.mjs +132 -0
  7. package/dist/index.d.mts +1465 -0
  8. package/dist/index.mjs +351 -0
  9. package/dist/llama-CT3dc9Cn.mjs +75 -0
  10. package/dist/models-DFQSgBNr.mjs +77 -0
  11. package/dist/openai-j2_2GM4J.mjs +76 -0
  12. package/dist/progress-B1JdNapX.mjs +263 -0
  13. package/dist/query-VFSpErTB.mjs +125 -0
  14. package/dist/runtime.node-DlQPaGrV.mjs +35 -0
  15. package/dist/search-BllHWtZF.mjs +166 -0
  16. package/dist/store-DE7S35SS.mjs +137 -0
  17. package/dist/transformers-CJ3QA2PK.mjs +55 -0
  18. package/dist/uri-CehXVDGB.mjs +28 -0
  19. package/dist/util-DNyrmcA3.mjs +11 -0
  20. package/dist/vfs-CNQbkhsf.mjs +222 -0
  21. package/foo.ts +3 -0
  22. package/foo2.ts +20 -0
  23. package/package.json +61 -0
  24. package/src/context.ts +77 -0
  25. package/src/db.ts +464 -0
  26. package/src/doc.ts +163 -0
  27. package/src/embed/base.ts +122 -0
  28. package/src/embed/index.ts +67 -0
  29. package/src/embed/llama.ts +111 -0
  30. package/src/embed/models.ts +104 -0
  31. package/src/embed/openai.ts +95 -0
  32. package/src/embed/transformers.ts +81 -0
  33. package/src/frecency.ts +58 -0
  34. package/src/fs.ts +36 -0
  35. package/src/glob.ts +163 -0
  36. package/src/index.ts +15 -0
  37. package/src/log.ts +60 -0
  38. package/src/md.ts +204 -0
  39. package/src/progress.ts +121 -0
  40. package/src/query.ts +131 -0
  41. package/src/runtime.bun.ts +33 -0
  42. package/src/runtime.node.ts +47 -0
  43. package/src/search.ts +230 -0
  44. package/src/snippet.ts +248 -0
  45. package/src/sqlite.ts +1 -0
  46. package/src/store.ts +180 -0
  47. package/src/uri.ts +28 -0
  48. package/src/util.ts +21 -0
  49. package/src/vfs.ts +257 -0
  50. package/test/doc.test.ts +61 -0
  51. package/test/fixtures/ignore-test/keep.md +0 -0
  52. package/test/fixtures/ignore-test/skip.log +0 -0
  53. package/test/fixtures/ignore-test/sub/keep.md +0 -0
  54. package/test/fixtures/store/agent/index.md +9 -0
  55. package/test/fixtures/store/agent/lessons.md +21 -0
  56. package/test/fixtures/store/agent/soul.md +28 -0
  57. package/test/fixtures/store/agent/tools.md +25 -0
  58. package/test/fixtures/store/concepts/frecency.md +30 -0
  59. package/test/fixtures/store/concepts/index.md +9 -0
  60. package/test/fixtures/store/concepts/memory-coherence.md +33 -0
  61. package/test/fixtures/store/concepts/rag.md +27 -0
  62. package/test/fixtures/store/index.md +9 -0
  63. package/test/fixtures/store/projects/index.md +9 -0
  64. package/test/fixtures/store/projects/rekall-inc/architecture.md +41 -0
  65. package/test/fixtures/store/projects/rekall-inc/decisions/index.md +9 -0
  66. package/test/fixtures/store/projects/rekall-inc/decisions/no-military.md +20 -0
  67. package/test/fixtures/store/projects/rekall-inc/index.md +28 -0
  68. package/test/fixtures/store/user/family.md +13 -0
  69. package/test/fixtures/store/user/index.md +9 -0
  70. package/test/fixtures/store/user/preferences.md +29 -0
  71. package/test/fixtures/store/user/profile.md +29 -0
  72. package/test/fs.test.ts +15 -0
  73. package/test/glob.test.ts +190 -0
  74. package/test/md.test.ts +177 -0
  75. package/test/query.test.ts +105 -0
  76. package/test/uri.test.ts +46 -0
  77. package/test/util.test.ts +62 -0
  78. package/test/vfs.test.ts +164 -0
  79. package/tsconfig.json +3 -0
  80. package/tsdown.config.ts +8 -0
package/src/db.ts ADDED
@@ -0,0 +1,464 @@
1
+ import type { EmbedderChunk } from "./embed/index.ts"
2
+ import type { Database } from "./sqlite.ts"
3
+ import type { StoreChunk } from "./store.ts"
4
+
5
+ import { openDatabase } from "./sqlite.ts"
6
+
7
+ export type { Database }
8
+
9
+ export type DocRow = {
10
+ id: number
11
+ path: string
12
+ hash: string
13
+ body: string
14
+ vec_hash?: string
15
+ description: string
16
+ title: string
17
+ tags: string
18
+ entities: string
19
+ updated_at: string
20
+ synced_at?: string
21
+ deadline?: number
22
+ }
23
+
24
+ export type VecResult = {
25
+ doc_id: number
26
+ path: string
27
+ seq: number
28
+ distance: number
29
+ score: number
30
+ rank?: number
31
+ }
32
+
33
+ export type FTSResult = {
34
+ rowid: number
35
+ score: number
36
+ rank?: number
37
+ }
38
+
39
+ export type DbSearchOptions = {
40
+ limit?: number
41
+ scope?: string[] // path prefixes to limit search to (e.g. ["folder1/", "folder2/sub"])
42
+ }
43
+
44
+ const SEARCH_LIMIT = 20
45
+ const STOPWORD_THRESHOLD = 0.3 // terms in >50% of docs are candidates
46
+ const STOPWORD_MIN_DOCS = 10 // terms must be in at least 5 docs to be considered stop words
47
+ const STOPWORD_LIMIT = 1000 // max number of stop words to return
48
+
49
+ export function hasEmbedding<T extends EmbedderChunk>(c: T): c is T & { embedding: number[] } {
50
+ return Array.isArray(c.embedding)
51
+ }
52
+
53
+ export function assertEmbeddings<T extends EmbedderChunk>(
54
+ chunks: T[]
55
+ ): asserts chunks is (T & { embedding: number[] })[] {
56
+ for (const c of chunks) {
57
+ if (!hasEmbedding(c)) throw new Error(`Chunk is missing embedding: ${JSON.stringify(c)}`)
58
+ }
59
+ }
60
+
61
+ export class Db {
62
+ #db: Database
63
+ #vec?: { exists: boolean; dims?: number; init?: boolean }
64
+
65
+ private constructor(db: Database) {
66
+ this.#db = db
67
+ this.init()
68
+ }
69
+
70
+ static async load(dbPath: string) {
71
+ return new Db(await openDatabase(dbPath))
72
+ }
73
+
74
+ private init() {
75
+ this.#db.run("PRAGMA journal_mode = WAL")
76
+ this.#db.run("PRAGMA foreign_keys = ON")
77
+ this.#db.run("PRAGMA busy_timeout = 5000")
78
+
79
+ this.#db.run(`
80
+ CREATE TABLE IF NOT EXISTS docs (
81
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
82
+ path TEXT NOT NULL UNIQUE,
83
+ hash TEXT NOT NULL,
84
+ vec_hash TEXT,
85
+ body TEXT NOT NULL DEFAULT '',
86
+ description TEXT NOT NULL DEFAULT '',
87
+ title TEXT NOT NULL DEFAULT '',
88
+ tags TEXT NOT NULL DEFAULT '',
89
+ entities TEXT NOT NULL DEFAULT '',
90
+ updated_at TEXT NOT NULL,
91
+ synced_at TEXT,
92
+ deadline REAL
93
+ )
94
+ `)
95
+
96
+ this.#db.run(`CREATE INDEX IF NOT EXISTS idx_docs_path ON docs(path)`)
97
+ this.#db.run(`CREATE INDEX IF NOT EXISTS idx_docs_hash ON docs(hash)`)
98
+
99
+ // Content-synced FTS5: reads content from docs table, no duplication.
100
+ // Fields ordered by BM25 weight: entities(10), tags(8), description(5), title(3), body(1)
101
+ this.#db.run(`
102
+ CREATE VIRTUAL TABLE IF NOT EXISTS docs_fts USING fts5(
103
+ entities, tags, description, title, body,
104
+ content='docs',
105
+ content_rowid='id',
106
+ tokenize='porter unicode61'
107
+ )
108
+ `)
109
+
110
+ // Triggers to keep FTS in sync with docs table
111
+ this.#db.run(`
112
+ CREATE TRIGGER IF NOT EXISTS docs_fts_insert AFTER INSERT ON docs BEGIN
113
+ INSERT INTO docs_fts(rowid, entities, tags, description, title, body)
114
+ VALUES (new.id, new.entities, new.tags, new.description, new.title, new.body);
115
+ END
116
+ `)
117
+
118
+ this.#db.run(`
119
+ CREATE TRIGGER IF NOT EXISTS docs_fts_delete AFTER DELETE ON docs BEGIN
120
+ INSERT INTO docs_fts(docs_fts, rowid, entities, tags, description, title, body)
121
+ VALUES ('delete', old.id, old.entities, old.tags, old.description, old.title, old.body);
122
+ END
123
+ `)
124
+
125
+ this.#db.run(`
126
+ CREATE TRIGGER IF NOT EXISTS docs_fts_update AFTER UPDATE ON docs
127
+ WHEN old.body != new.body
128
+ OR old.title != new.title
129
+ OR old.description != new.description
130
+ OR old.tags != new.tags
131
+ OR old.entities != new.entities
132
+ BEGIN
133
+ INSERT INTO docs_fts(docs_fts, rowid, entities, tags, description, title, body)
134
+ VALUES ('delete', old.id, old.entities, old.tags, old.description, old.title, old.body);
135
+ INSERT INTO docs_fts(rowid, entities, tags, description, title, body)
136
+ VALUES (new.id, new.entities, new.tags, new.description, new.title, new.body);
137
+ END
138
+ `)
139
+
140
+ // FTS5 vocabulary table for IDF-based term weighting
141
+ this.#db.run(`CREATE VIRTUAL TABLE IF NOT EXISTS docs_vocab USING fts5vocab('docs_fts', 'row')`)
142
+
143
+ this.#db.run(`
144
+ CREATE TABLE IF NOT EXISTS meta (
145
+ key TEXT PRIMARY KEY,
146
+ value TEXT
147
+ )
148
+ `)
149
+
150
+ this.#db.run(`
151
+ CREATE TABLE IF NOT EXISTS cache (
152
+ key TEXT PRIMARY KEY,
153
+ value TEXT NOT NULL,
154
+ accessed_at TEXT NOT NULL
155
+ )
156
+ `)
157
+ }
158
+
159
+ reset() {
160
+ // Drop triggers first, then FTS (which references docs), then docs
161
+ this.#db.run(`DROP TRIGGER IF EXISTS docs_fts_insert`)
162
+ this.#db.run(`DROP TRIGGER IF EXISTS docs_fts_delete`)
163
+ this.#db.run(`DROP TRIGGER IF EXISTS docs_fts_update`)
164
+ this.#db.run(`DROP TABLE IF EXISTS docs_fts`)
165
+ this.#db.run(`DROP TABLE IF EXISTS vec`)
166
+ this.#db.run(`DROP TABLE IF EXISTS cache`)
167
+ this.#db.run(`DROP TABLE IF EXISTS docs`)
168
+ this.#db.run(`DROP TABLE IF EXISTS meta`)
169
+ this.#db.run(`VACUUM`)
170
+ this.#vec = { exists: false }
171
+ this.init()
172
+ }
173
+
174
+ private initVec(dims: number) {
175
+ if (this.vec.init) return
176
+ const existingDims = this.vec.dims
177
+ if (existingDims && existingDims !== dims)
178
+ throw new Error(
179
+ `Vector dimension mismatch: existing **vec** has \`${existingDims}\` dims, but got \`${dims}\`.\n` +
180
+ `Run \`rekal reset\` and \`rekal sync\` to recreate with the correct dimensions.`
181
+ )
182
+ this.#db.run(
183
+ `CREATE VIRTUAL TABLE IF NOT EXISTS vec USING vec0(
184
+ doc_id INTEGER NOT NULL,
185
+ seq INTEGER NOT NULL,
186
+ +path TEXT NOT NULL,
187
+ embedding float[${dims}] distance_metric=cosine
188
+ )`
189
+ )
190
+ this.#vec = { dims, exists: true, init: true }
191
+ }
192
+
193
+ // --- Docs ---
194
+
195
+ getDoc(from: string | number) {
196
+ const field = typeof from === "number" ? "id" : "path"
197
+ return this.#db.query(`SELECT * FROM docs WHERE ${field} = ?`).get(from) as DocRow | undefined
198
+ }
199
+
200
+ getDocs(from?: (string | number)[]) {
201
+ let ret: DocRow[]
202
+
203
+ if (!from) ret = this.#db.query(`SELECT * FROM docs`).all() as DocRow[]
204
+ else {
205
+ const field = typeof from[0] === "number" ? "id" : "path"
206
+ const placeholders = from.map(() => "?").join(",")
207
+ ret = this.#db
208
+ .query(`SELECT * FROM docs WHERE ${field} IN (${placeholders})`)
209
+ .all(...from) as DocRow[]
210
+ }
211
+ return new Map(ret.map((row) => [row.id, row]))
212
+ }
213
+
214
+ addDoc(row: Omit<DocRow, "id">) {
215
+ const result = this.#db
216
+ .query(
217
+ `INSERT INTO docs (path, hash, body, description, title, tags, entities, updated_at, synced_at)
218
+ VALUES($path, $hash, $body, $description, $title, $tags, $entities, $updated_at, $synced_at)
219
+ ON CONFLICT(path) DO UPDATE SET
220
+ hash = excluded.hash,
221
+ body = excluded.body,
222
+ description = excluded.description,
223
+ title = excluded.title,
224
+ tags = excluded.tags,
225
+ entities = excluded.entities,
226
+ updated_at = excluded.updated_at,
227
+ synced_at = excluded.synced_at
228
+ RETURNING id`
229
+ )
230
+ .get(row) as { id: number }
231
+ return result.id
232
+ }
233
+
234
+ deleteDoc(id: number, tables: { docs?: boolean; vec?: boolean } = {}) {
235
+ // FTS is auto-synced via triggers when docs are deleted/updated
236
+ if (tables.vec) this.deleteEmbeddings(id)
237
+ if (tables.docs) this.#db.query(`DELETE FROM docs WHERE id = ?`).run(id)
238
+ }
239
+
240
+ get vec() {
241
+ if (this.#vec) return this.#vec
242
+ const row = this.#db
243
+ .query(`SELECT sql FROM sqlite_master WHERE type = 'table' AND name = 'vec'`)
244
+ .get() as { sql: string } | undefined
245
+ const match = row?.sql.match(/embedding float\[(\d+)\]/)
246
+ this.#vec = { dims: match ? parseInt(match[1]) : undefined, exists: !!row?.sql }
247
+ return this.#vec
248
+ }
249
+
250
+ getStatus() {
251
+ const count = (sql: string) => (this.#db.query(sql).get() as { n: number }).n
252
+ return {
253
+ cache: count(`SELECT count(*) as n FROM cache`),
254
+ dbSize: (
255
+ this.#db
256
+ .query(`SELECT page_count * page_size as n FROM pragma_page_count, pragma_page_size`)
257
+ .get() as { n: number }
258
+ ).n,
259
+ docs: count(`SELECT count(*) as n FROM docs`),
260
+ docsWithDescription: count(`SELECT count(*) as n FROM docs WHERE description != ''`),
261
+ lastSync: (
262
+ this.#db.query(`SELECT max(synced_at) as t FROM docs`).get() as { t: string | null }
263
+ ).t,
264
+ unembedded: count(
265
+ `SELECT count(*) as n FROM docs WHERE vec_hash IS NULL OR vec_hash != hash`
266
+ ),
267
+ vecDims: this.vec.dims,
268
+ vecs: this.vec.exists ? count(`SELECT count(*) as n FROM vec`) : 0,
269
+ vocabTerms: count(`SELECT count(DISTINCT term) as n FROM docs_vocab`),
270
+ }
271
+ }
272
+
273
+ transaction<A extends any[], T>(fn: (...args: A) => T) {
274
+ return this.#db.transaction(fn)
275
+ }
276
+
277
+ getUnembeddedDocs(): DocRow[] {
278
+ return this.#db
279
+ .query(`SELECT * FROM docs
280
+ WHERE vec_hash IS NULL OR vec_hash != hash
281
+ ORDER BY path`)
282
+ .all() as DocRow[]
283
+ }
284
+
285
+ touchDoc(id: number) {
286
+ this.#db.query(`UPDATE docs SET synced_at = ? WHERE id = ?`).run(new Date().toISOString(), id)
287
+ }
288
+
289
+ markEmbedded(id: number, docHash: string) {
290
+ this.#db.query(`UPDATE docs SET vec_hash = ? WHERE id = ?`).run(docHash, id)
291
+ }
292
+
293
+ /** Delete docs not seen since the given sync timestamp, optionally scoped to a path prefix. */
294
+ deleteStaleDocs(syncedBefore: string, prefix?: string): number {
295
+ let query = `SELECT id FROM docs WHERE synced_at IS NULL OR synced_at < ?`
296
+ const params = [syncedBefore]
297
+ if (prefix) {
298
+ query += ` AND path LIKE ? || '%'`
299
+ params.push(prefix)
300
+ }
301
+ const stale = this.#db.query(query).all(...params) as { id: number }[]
302
+ for (const { id } of stale) {
303
+ this.deleteDoc(id, { docs: true, vec: true })
304
+ }
305
+ return stale.length
306
+ }
307
+
308
+ // --- FTS ---
309
+ // FTS is auto-synced via triggers on the docs table.
310
+
311
+ /** Scoped FTS search: only match docs whose path starts with one of the given prefixes */
312
+ searchFts(query: string, opts?: DbSearchOptions): FTSResult[] {
313
+ if (opts?.scope?.length === 0) return [] // empty scope means no results
314
+ const scope = opts?.scope ?? []
315
+ const scopeQuery =
316
+ scope.length === 0 ? "" : `AND (${scope.map(() => `d.path LIKE ? || '%'`).join(" OR ")})`
317
+ return this.#db
318
+ .query(
319
+ `SELECT f.rowid, bm25(docs_fts, 10, 8, 5, 3, 1) as score
320
+ FROM docs_fts f
321
+ ${scope.length > 0 ? "JOIN docs d ON d.id = f.rowid" : ""}
322
+ WHERE docs_fts MATCH ?
323
+ ${scopeQuery}
324
+ ORDER BY score
325
+ LIMIT ?`
326
+ )
327
+ .all(query, ...scope, opts?.limit ?? SEARCH_LIMIT) as FTSResult[]
328
+ }
329
+
330
+ /** * Gets weights for high-frequency terms.
331
+ * Note: Truly common words will result in an IDF of 0 or less.
332
+ */
333
+ getStopWords(): Map<string, number> {
334
+ // 1. Get total doc count (N) first
335
+ const totalDocs =
336
+ (this.#db.query("SELECT count(*) as n FROM docs").get() as { n: number } | undefined)?.n ?? 0
337
+
338
+ if (totalDocs === 0) return new Map()
339
+
340
+ // 2. Fetch the high-frequency terms
341
+ const rows = this.#db
342
+ .query(
343
+ `SELECT v.term, v.doc
344
+ FROM docs_vocab v
345
+ WHERE v.doc > ? AND v.doc > ?
346
+ ORDER BY v.doc DESC
347
+ LIMIT ?`
348
+ )
349
+ .all(totalDocs * STOPWORD_THRESHOLD, STOPWORD_MIN_DOCS, STOPWORD_LIMIT) as {
350
+ term: string
351
+ doc: number
352
+ }[]
353
+
354
+ return new Map(
355
+ rows.map((r) => {
356
+ // Calculate IDF
357
+ const idf = Math.log((totalDocs - r.doc + 0.5) / (r.doc + 0.5))
358
+
359
+ // For stop words, we usually want to clamp at 0.
360
+ // If a word is in >50% of docs, the formula goes negative.
361
+ return [r.term, Math.max(0, idf)]
362
+ })
363
+ )
364
+ }
365
+
366
+ getWeights(terms: string[]): number[] {
367
+ if (terms.length === 0) return []
368
+ const total = (this.#db.query(`SELECT count(*) as n FROM docs`).get() as { n: number }).n
369
+ const placeholders = terms.map(() => "?").join(",")
370
+ const rows = this.#db
371
+ .query(`SELECT term, doc FROM docs_vocab WHERE term IN (${placeholders})`)
372
+ .all(...terms) as { term: string; doc: number }[]
373
+ const df = new Map(rows.map((r) => [r.term, r.doc]))
374
+ return terms.map((t) => Math.log((total - (df.get(t) ?? 0) + 0.5) / ((df.get(t) ?? 0) + 0.5)))
375
+ }
376
+
377
+ // --- Vector ---
378
+
379
+ /** Insert embeddings into the vec table */
380
+ insertEmbeddings(chunks: StoreChunk[]) {
381
+ assertEmbeddings(chunks)
382
+ if (chunks.length === 0) return
383
+ this.initVec(chunks[0].embedding.length)
384
+ const stmt = this.#db.query(`INSERT INTO vec(doc_id, seq, path, embedding) VALUES (?, ?, ?, ?)`)
385
+ for (const chunk of chunks) {
386
+ stmt.run(chunk.doc_id, chunk.seq, chunk.doc.path, JSON.stringify(chunk.embedding))
387
+ }
388
+ }
389
+
390
+ /** Delete all vec entries for a doc */
391
+ deleteEmbeddings(docId: number) {
392
+ if (this.vec.exists) this.#db.query(`DELETE FROM vec WHERE doc_id = ?`).run(docId)
393
+ }
394
+
395
+ /** Global KNN search, returns top results across all docs */
396
+ searchVec(embedding: number[], opts?: DbSearchOptions): VecResult[] {
397
+ if (!this.vec.exists) return []
398
+ const limit = opts?.limit ?? SEARCH_LIMIT
399
+ return this.#db
400
+ .query(
401
+ `SELECT doc_id, seq, path, distance, (1 - distance/2) as score
402
+ FROM vec
403
+ WHERE embedding MATCH ?
404
+ AND k = ?
405
+ ORDER BY distance`
406
+ )
407
+ .all(JSON.stringify(embedding), limit) as VecResult[]
408
+ }
409
+
410
+ // --- Frecency ---
411
+
412
+ setDeadline(docId: number, deadline: number) {
413
+ this.#db.query(`UPDATE docs SET deadline = ? WHERE id = ?`).run(deadline, docId)
414
+ }
415
+
416
+ // --- Meta ---
417
+
418
+ getMeta(key: string) {
419
+ return (
420
+ this.#db.query(`SELECT value FROM meta WHERE key = ?`).get(key) as
421
+ | { value: string }
422
+ | undefined
423
+ )?.value
424
+ }
425
+
426
+ setMeta(key: string, value: string) {
427
+ this.#db
428
+ .query(`INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = ?`)
429
+ .run(key, value, value)
430
+ }
431
+
432
+ // --- Cache ---
433
+
434
+ cacheGet<T>(key: string): T | undefined {
435
+ const row = this.#db.query(`SELECT value FROM cache WHERE key = ?`).get(key) as
436
+ | { value: string }
437
+ | undefined
438
+ if (!row) return
439
+ this.#db
440
+ .query(`UPDATE cache SET accessed_at = ? WHERE key = ?`)
441
+ .run(new Date().toISOString(), key)
442
+ return JSON.parse(row.value) as T
443
+ }
444
+
445
+ cacheSet<T>(key: string, value: T): T {
446
+ this.#db
447
+ .query(
448
+ `INSERT INTO cache (key, value, accessed_at) VALUES (?, ?, ?)
449
+ ON CONFLICT(key) DO UPDATE SET value = excluded.value, accessed_at = excluded.accessed_at`
450
+ )
451
+ .run(key, JSON.stringify(value), new Date().toISOString())
452
+ return value
453
+ }
454
+
455
+ cachePrune(maxEntries = 10_000) {
456
+ this.#db
457
+ .query(
458
+ `DELETE FROM cache WHERE key NOT IN (
459
+ SELECT key FROM cache ORDER BY accessed_at DESC LIMIT ?
460
+ )`
461
+ )
462
+ .run(maxEntries)
463
+ }
464
+ }
package/src/doc.ts ADDED
@@ -0,0 +1,163 @@
1
+ import type { Frontmatter, MarkdownDoc } from "./md.ts"
2
+ import type { VfsEntry } from "./vfs.ts"
3
+
4
+ import { readFile } from "node:fs/promises"
5
+ import { basename, join, resolve } from "pathe"
6
+ import { astat } from "./fs.ts"
7
+ import { parseMarkdown } from "./md.ts"
8
+ import { normUri } from "./uri.ts"
9
+ import { hash } from "./util.ts"
10
+
11
+ const INDEX = "index.md"
12
+ const MAX_DESC_LENGTH = 30 * 4 // roughly 30 tokens
13
+
14
+ export type DocFrontmatter = {
15
+ description?: string
16
+ tags?: string[]
17
+ entities?: string[]
18
+ } & Frontmatter
19
+
20
+ type DocHeading = {
21
+ level: number
22
+ text: string
23
+ }
24
+
25
+ export class Doc {
26
+ #isDir = false
27
+ #name = ""
28
+ hash = ""
29
+ updated = new Date(0)
30
+ headings: DocHeading[] = []
31
+ parsed: MarkdownDoc = { body: "", bodyOffset: 0, frontmatter: {}, sections: [], text: "" }
32
+
33
+ protected constructor(
34
+ public uri: string,
35
+ public path: string
36
+ ) {
37
+ this.path = resolve(path)
38
+ this.#name = basename(this.path, ".md")
39
+ }
40
+
41
+ /** Full original markdown text, including frontmatter. */
42
+ get text() {
43
+ return this.parsed.text
44
+ }
45
+
46
+ /** Markdown body without frontmatter. */
47
+ get body() {
48
+ return this.parsed.body
49
+ }
50
+
51
+ /** Parsed frontmatter as an object. */
52
+ get fm() {
53
+ return this.parsed.frontmatter as DocFrontmatter
54
+ }
55
+
56
+ /** Name of the doc, derived from the file or folder name, without extension */
57
+ get name(): string {
58
+ return this.#name
59
+ }
60
+
61
+ // Actual (non-empty) description from frontmatter
62
+ get $description() {
63
+ const ret = this.fm.description?.trim()
64
+ return ret?.length ? ret : undefined
65
+ }
66
+
67
+ /** Desc from frontmatter or packed from headings. */
68
+ get description() {
69
+ const desc = this.$description
70
+ if (desc || this.headings.length === 0) return desc
71
+
72
+ const headings: (DocHeading & { used?: boolean })[] = this.headings.map((h) => ({ ...h }))
73
+ const minLevel = Math.min(...headings.map((h) => h.level))
74
+ const maxLevel = Math.max(...headings.map((h) => h.level))
75
+
76
+ // Pack by level until we reach maximum description length
77
+ let chars = 0
78
+ for (let level = minLevel; level <= maxLevel; level++) {
79
+ for (const h of headings) {
80
+ if (h.level !== level) continue
81
+ if (chars !== 0 && chars + h.text.length > MAX_DESC_LENGTH) continue
82
+ h.used = true
83
+ chars += h.text.length
84
+ }
85
+ }
86
+
87
+ return headings
88
+ .filter((h) => h.used)
89
+ .map((h) => h.text)
90
+ .join(", ")
91
+ .trim()
92
+ }
93
+
94
+ /** Title from fontmatter or first heading */
95
+ get $title(): string | undefined {
96
+ const title = this.fm.title
97
+ if (typeof title === "string" && title.trim().length > 0) return title.trim()
98
+ return this.headings[0]?.text
99
+ }
100
+
101
+ /** `$title` if it doesn't contain the name, otherwise `name - $title` */
102
+ get title() {
103
+ const title = this.$title
104
+ if (!(title ?? "").length) return this.name
105
+ return title?.toLowerCase().includes(this.name.toLowerCase())
106
+ ? title
107
+ : `${this.name} - ${title}`
108
+ }
109
+
110
+ get tags(): string[] {
111
+ return this.fm.tags ?? []
112
+ }
113
+
114
+ get entities(): string[] {
115
+ return this.fm.entities ?? []
116
+ }
117
+
118
+ get isDir(): boolean {
119
+ return this.#isDir
120
+ }
121
+
122
+ protected async load(): Promise<Doc | undefined> {
123
+ const name = basename(this.path)
124
+
125
+ // Quick validation. It's up to the caller to ensure the path
126
+ // is either a markdown file or a directory.
127
+ if (name === INDEX) throw new Error(`Doc path cannot end with \`${INDEX}\`:\n\`${this.path}\``)
128
+
129
+ let s = await astat(this.path)
130
+ let mdPath = this.path
131
+
132
+ if (s?.isDirectory()) {
133
+ this.#isDir = true
134
+ mdPath = join(this.path, INDEX)
135
+ s = await astat(mdPath)
136
+ }
137
+
138
+ if (!s && !this.#isDir) return
139
+
140
+ this.uri = normUri(this.uri, this.isDir)
141
+
142
+ // read file and normalize line endings to LF
143
+ const text = (s ? await readFile(mdPath, "utf8") : "").replace(/\r\n/g, "\n")
144
+
145
+ this.updated = s?.mtime ?? new Date(0)
146
+ this.hash = hash(text)
147
+ this.parsed = parseMarkdown(text)
148
+ this.headings = this.parsed.sections
149
+ .filter((section) => section.level > 0 && section.heading.trim().length > 0)
150
+ .map((section) => ({
151
+ level: section.level,
152
+ text: section.heading,
153
+ }))
154
+ return this
155
+ }
156
+
157
+ static async load(entry: string | VfsEntry): Promise<Doc | undefined>
158
+ static async load(uri: string, path?: string): Promise<Doc | undefined>
159
+ static async load(uri: string | VfsEntry, path?: string): Promise<Doc | undefined> {
160
+ const e = typeof uri === "string" ? { path: path, uri } : uri
161
+ return e.path ? await new Doc(e.uri, e.path).load() : undefined
162
+ }
163
+ }