@100xprompt/chitta 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +203 -0
  3. package/assets/rules/claude-md.md +9 -0
  4. package/assets/skill/SKILL.md +47 -0
  5. package/package.json +48 -0
  6. package/src/README.md +124 -0
  7. package/src/arango-client.ts +67 -0
  8. package/src/arango-graph-provider.ts +364 -0
  9. package/src/bin.ts +27 -0
  10. package/src/config-env.ts +53 -0
  11. package/src/embedded/authorizer.ts +89 -0
  12. package/src/embedded/cli.ts +86 -0
  13. package/src/embedded/code-extractor.ts +9 -0
  14. package/src/embedded/demo.ts +36 -0
  15. package/src/embedded/extract.ts +12 -0
  16. package/src/embedded/extractors/code.ts +308 -0
  17. package/src/embedded/extractors/deterministic.ts +63 -0
  18. package/src/embedded/extractors/llm.ts +151 -0
  19. package/src/embedded/extractors/text-hygiene.ts +54 -0
  20. package/src/embedded/extractors/types.ts +34 -0
  21. package/src/embedded/graph/acl-paths.ts +96 -0
  22. package/src/embedded/graph/adjacency.ts +61 -0
  23. package/src/embedded/graph/centrality.ts +23 -0
  24. package/src/embedded/graph/communities.ts +46 -0
  25. package/src/embedded/graph/cypher.ts +17 -0
  26. package/src/embedded/graph/impact.ts +24 -0
  27. package/src/embedded/graph/knowledge-graph.ts +108 -0
  28. package/src/embedded/graph/pagerank.ts +57 -0
  29. package/src/embedded/graph/sql-access.ts +13 -0
  30. package/src/embedded/graph/traversal.ts +73 -0
  31. package/src/embedded/graph/types.ts +35 -0
  32. package/src/embedded/graph-query.ts +126 -0
  33. package/src/embedded/index.ts +171 -0
  34. package/src/embedded/ingest.ts +262 -0
  35. package/src/embedded/kgqa/answer-paths.ts +197 -0
  36. package/src/embedded/kgqa/entity-link.ts +13 -0
  37. package/src/embedded/kgqa/intent.ts +14 -0
  38. package/src/embedded/kgqa/predicates.ts +9 -0
  39. package/src/embedded/kgqa/preference.ts +20 -0
  40. package/src/embedded/kgqa/select.ts +99 -0
  41. package/src/embedded/kgqa/text.ts +16 -0
  42. package/src/embedded/kgqa/types.ts +6 -0
  43. package/src/embedded/kgqa-service.ts +122 -0
  44. package/src/embedded/llm-extractor.ts +10 -0
  45. package/src/embedded/local-embeddings.ts +36 -0
  46. package/src/embedded/personal.ts +100 -0
  47. package/src/embedded/reranker.ts +62 -0
  48. package/src/embedded/retrieval/decay-stage.ts +59 -0
  49. package/src/embedded/retrieval/diversity.ts +37 -0
  50. package/src/embedded/retrieval/fuse.ts +52 -0
  51. package/src/embedded/retrieval/graph-stage.ts +45 -0
  52. package/src/embedded/retrieval/hybrid-retriever.ts +80 -0
  53. package/src/embedded/retrieval/keyword-stage.ts +27 -0
  54. package/src/embedded/retrieval/passage.ts +44 -0
  55. package/src/embedded/retrieval/rerank-stage.ts +31 -0
  56. package/src/embedded/retrieval/trace.ts +31 -0
  57. package/src/embedded/retrieval/vector-stage.ts +15 -0
  58. package/src/embedded/sqlite-graph-provider.ts +119 -0
  59. package/src/embedded/sqlite-store.ts +95 -0
  60. package/src/embedded/sqlite-vec-service.ts +122 -0
  61. package/src/embedded/store/chunks.ts +61 -0
  62. package/src/embedded/store/fts.ts +50 -0
  63. package/src/embedded/store/nodes-edges.ts +112 -0
  64. package/src/embedded/store/salience.ts +37 -0
  65. package/src/embedded/store/schema.ts +109 -0
  66. package/src/embedded/transformers-embeddings.ts +100 -0
  67. package/src/embeddings.ts +51 -0
  68. package/src/eval/goldset.ts +46 -0
  69. package/src/eval/harness.ts +65 -0
  70. package/src/eval/metrics.ts +38 -0
  71. package/src/http/server.ts +93 -0
  72. package/src/index.ts +44 -0
  73. package/src/install/index.ts +139 -0
  74. package/src/install/platforms.ts +126 -0
  75. package/src/install/skill.ts +46 -0
  76. package/src/install/writers.ts +82 -0
  77. package/src/mcp/backend.ts +129 -0
  78. package/src/mcp/server.ts +83 -0
  79. package/src/mcp/tools/context-about.ts +69 -0
  80. package/src/mcp/tools/context-graph.ts +23 -0
  81. package/src/mcp/tools/context-ingest.ts +88 -0
  82. package/src/mcp/tools/context-rebuild.ts +22 -0
  83. package/src/mcp/tools/context-relate.ts +88 -0
  84. package/src/mcp/tools/get-context.ts +52 -0
  85. package/src/mcp/tools/index.ts +40 -0
  86. package/src/mcp/tools/types.ts +33 -0
  87. package/src/permission.ts +72 -0
  88. package/src/provider.ts +65 -0
  89. package/src/qdrant-vector.ts +76 -0
  90. package/src/retrieval.ts +218 -0
  91. package/src/service.ts +40 -0
  92. package/src/types.ts +91 -0
@@ -0,0 +1,95 @@
1
+ // Embedded store schema (bun:sqlite). A generic property-graph (nodes + edges)
2
+ // plus a chunks table holding payloads and dense vectors. One file = the whole
3
+ // knowledge base - no servers. This is what makes the single-binary path work.
4
+ //
5
+ // Vector search adapts: if an extension-capable SQLite is available it loads
6
+ // sqlite-vec and maintains a `vec_chunks` ANN index (the zvec-style fast path,
7
+ // in-process, same file); otherwise it transparently falls back to brute-force
8
+ // cosine. Either way the public API and the VectorDBService interface are identical.
9
+ //
10
+ // This file is a thin FACADE: it owns the single bun:sqlite Database and delegates
11
+ // to focused modules under ./store/ (schema/migrations, graph nodes+edges, chunks+
12
+ // vec ANN, FTS5, salience). Pure structural refactor - identical SQL, identical
13
+ // behavior. The public surface of SqliteStore is preserved exactly.
14
+
15
+ import { Database } from "bun:sqlite"
16
+ import { migrate, tryEnableExtensions, tryLoadVec } from "./store/schema"
17
+ import * as graph from "./store/nodes-edges"
18
+ import * as fts from "./store/fts"
19
+ import { ChunkRepo } from "./store/chunks"
20
+ import * as salience from "./store/salience"
21
+
22
+ export type Json = Record<string, unknown>
23
+
24
+ export class SqliteStore {
25
+ readonly db: Database
26
+ readonly vecEnabled: boolean
27
+ readonly ftsEnabled: boolean
28
+ private readonly chunks: ChunkRepo
29
+
30
+ constructor(path = ":memory:") {
31
+ tryEnableExtensions()
32
+ this.db = new Database(path)
33
+ this.db.exec("PRAGMA journal_mode = WAL;")
34
+ migrate(this.db)
35
+ this.vecEnabled = tryLoadVec(this.db)
36
+ this.ftsEnabled = fts.tryEnableFts(this.db)
37
+ this.chunks = new ChunkRepo(this.db, this.vecEnabled, this.ftsEnabled)
38
+ }
39
+
40
+ // ── Graph: nodes & edges ────────────────────────────────────────────────
41
+ addNode(id: string, coll: string, data: Json = {}): void {
42
+ graph.addNode(this.db, id, coll, data)
43
+ }
44
+
45
+ addEdge(src: string, dst: string, label: string, opts: { weight?: number; validAt?: number; recordId?: string; confidence?: number } = {}): void {
46
+ graph.addEdge(this.db, src, dst, label, opts)
47
+ }
48
+
49
+ clearRecordContributions(recordId: string): void {
50
+ graph.clearRecordContributions(this.db, recordId)
51
+ }
52
+
53
+ supersedeEdge(src: string, label: string, keepDst: string, atTime = Date.now()): number {
54
+ return graph.supersedeEdge(this.db, src, label, keepDst, atTime)
55
+ }
56
+
57
+ backfillEdgeProvenance(): number {
58
+ return graph.backfillEdgeProvenance(this.db)
59
+ }
60
+
61
+ // ── Salience / decay ────────────────────────────────────────────────────
62
+ recordSalience(recordIds: string[]): Map<string, { lastAccessedAt: number; accessCount: number; importance: number }> {
63
+ return salience.recordSalience(this.db, recordIds)
64
+ }
65
+
66
+ touchRecords(recordIds: string[]): void {
67
+ salience.touchRecords(this.db, recordIds)
68
+ }
69
+
70
+ // ── Chunks + vec0 ANN ───────────────────────────────────────────────────
71
+ addChunk(pointId: string, virtualRecordId: string, orgId: string, content: string, embedding: number[]): void {
72
+ this.chunks.addChunk(pointId, virtualRecordId, orgId, content, embedding)
73
+ }
74
+
75
+ knnSearch(queryVec: number[], k: number): Array<{ rowid: number; distance: number }> {
76
+ return this.chunks.knnSearch(queryVec, k)
77
+ }
78
+
79
+ resetVec(): void {
80
+ this.chunks.resetVec()
81
+ }
82
+
83
+ // ── FTS5 keyword index ──────────────────────────────────────────────────
84
+ ftsSearch(query: string, k: number): number[] {
85
+ return fts.ftsSearch(this.db, this.ftsEnabled, query, k)
86
+ }
87
+
88
+ resetFts(): void {
89
+ fts.resetFts(this.db, this.ftsEnabled)
90
+ }
91
+
92
+ close(): void {
93
+ this.db.close()
94
+ }
95
+ }
@@ -0,0 +1,122 @@
1
+ // VectorDBService over bun:sqlite. Uses the sqlite-vec ANN index when the store
2
+ // has it (fast path), else brute-force cosine - same results, same interface.
3
+ // Honors the must/should filter the retrieval spine builds: a point passes if it
4
+ // matches all `must` AND (no `should` OR matches a `should`). The `should` on
5
+ // virtualRecordId is the ACL restriction to accessible records, applied AFTER the
6
+ // ANN candidates come back (over-fetched) so recall holds under filtering.
7
+
8
+ import type { VectorDBService, VectorPoint, VectorQueryResult } from "../provider"
9
+ import type { SqliteStore } from "./sqlite-store"
10
+
11
+ interface EmbeddedFilter {
12
+ must?: Record<string, unknown>
13
+ should?: Record<string, unknown>
14
+ }
15
+
16
+ function cosine(a: number[], b: number[]): number {
17
+ let dot = 0
18
+ let na = 0
19
+ let nb = 0
20
+ const n = Math.min(a.length, b.length)
21
+ for (let i = 0; i < n; i++) {
22
+ dot += a[i] * b[i]
23
+ na += a[i] * a[i]
24
+ nb += b[i] * b[i]
25
+ }
26
+ if (na === 0 || nb === 0) return 0
27
+ return dot / (Math.sqrt(na) * Math.sqrt(nb))
28
+ }
29
+
30
+ export class SqliteVecService implements VectorDBService {
31
+ constructor(private readonly store: SqliteStore) {}
32
+
33
+ async filterCollection(args: {
34
+ must?: Record<string, unknown>
35
+ should?: Record<string, unknown>
36
+ }): Promise<EmbeddedFilter> {
37
+ return { must: args.must, should: args.should }
38
+ }
39
+
40
+ async queryNearestPoints(args: {
41
+ collectionName: string
42
+ requests: unknown[]
43
+ }): Promise<VectorQueryResult[]> {
44
+ return args.requests.map((reqUnknown) => {
45
+ const req = reqUnknown as {
46
+ prefetch?: Array<{ query: unknown; using?: string }>
47
+ filter?: EmbeddedFilter
48
+ limit?: number
49
+ }
50
+ const dense = (req.prefetch?.find((p) => p.using === "dense")?.query ?? req.prefetch?.[0]?.query) as number[]
51
+ const filter = req.filter ?? {}
52
+ const limit = req.limit ?? 20
53
+ const mustOrg = filter.must?.["orgId"] as string | undefined
54
+ const allowedVids = filter.should?.["virtualRecordId"] as string[] | undefined
55
+ const allowed = allowedVids ? new Set(allowedVids) : undefined
56
+
57
+ // Try ANN; fall back to brute-force when the index can't serve (missing /
58
+ // not yet built / written by a non-vec store). Guarantees we never miss rows
59
+ // that exist in `chunks` just because the ANN index isn't populated.
60
+ let points = this.store.vecEnabled && dense ? this.annQuery(dense, mustOrg, allowed, limit) : []
61
+ if (points.length === 0) points = this.bruteForce(dense, mustOrg, allowed, limit)
62
+ return { points }
63
+ })
64
+ }
65
+
66
+ // Fast path: ANN candidates from vec0, over-fetched then ACL-filtered.
67
+ private annQuery(
68
+ dense: number[],
69
+ mustOrg: string | undefined,
70
+ allowed: Set<string> | undefined,
71
+ limit: number,
72
+ ): VectorPoint[] {
73
+ const knn = this.store.knnSearch(dense, Math.max(limit * 20, 50))
74
+ if (knn.length === 0) return []
75
+ const byRowid = new Map(knn.map((k) => [k.rowid, k.distance]))
76
+ const rows = this.store.db
77
+ .query(`SELECT rowid, point_id, virtual_record_id, org_id, content FROM chunks WHERE rowid IN (${knn.map(() => "?").join(",")})`)
78
+ .all(...knn.map((k) => k.rowid)) as Array<{
79
+ rowid: number
80
+ point_id: string
81
+ virtual_record_id: string
82
+ org_id: string
83
+ content: string
84
+ }>
85
+ const out: VectorPoint[] = []
86
+ for (const c of rows) {
87
+ if (mustOrg != null && c.org_id !== mustOrg) continue
88
+ if (allowed && !allowed.has(c.virtual_record_id)) continue
89
+ out.push({
90
+ id: c.point_id,
91
+ score: 1 - (byRowid.get(c.rowid) ?? 1), // cosine distance → similarity
92
+ payload: { page_content: c.content, metadata: { virtualRecordId: c.virtual_record_id, orgId: c.org_id } },
93
+ })
94
+ }
95
+ out.sort((a, b) => b.score - a.score)
96
+ return out.slice(0, limit)
97
+ }
98
+
99
+ // Fallback: scan + cosine in TS (portable, no extension needed).
100
+ private bruteForce(
101
+ dense: number[] | undefined,
102
+ mustOrg: string | undefined,
103
+ allowed: Set<string> | undefined,
104
+ limit: number,
105
+ ): VectorPoint[] {
106
+ const rows = this.store.db
107
+ .query("SELECT point_id, virtual_record_id, org_id, content, embedding FROM chunks")
108
+ .all() as Array<{ point_id: string; virtual_record_id: string; org_id: string; content: string; embedding: string }>
109
+ const scored: VectorPoint[] = []
110
+ for (const c of rows) {
111
+ if (mustOrg != null && c.org_id !== mustOrg) continue
112
+ if (allowed && !allowed.has(c.virtual_record_id)) continue
113
+ scored.push({
114
+ id: c.point_id,
115
+ score: dense ? cosine(dense, JSON.parse(c.embedding) as number[]) : 0,
116
+ payload: { page_content: c.content, metadata: { virtualRecordId: c.virtual_record_id, orgId: c.org_id } },
117
+ })
118
+ }
119
+ scored.sort((a, b) => b.score - a.score)
120
+ return scored.slice(0, limit)
121
+ }
122
+ }
@@ -0,0 +1,61 @@
1
+ // Chunk persistence + vec0 ANN index + knnSearch. Pure structural extraction from
2
+ // sqlite-store.ts - identical SQL, identical behavior.
3
+ //
4
+ // The vec0 ANN table is created lazily once we know the embedding dimension, so the
5
+ // mutable `vecDim` state lives in a small repo object owned by the facade.
6
+
7
+ import { Database } from "bun:sqlite"
8
+ import { indexChunkFts } from "./fts"
9
+
10
+ export class ChunkRepo {
11
+ private vecDim = 0
12
+
13
+ constructor(
14
+ private readonly db: Database,
15
+ private readonly vecEnabled: boolean,
16
+ private readonly ftsEnabled: boolean,
17
+ ) {}
18
+
19
+ // The vec0 ANN table is created lazily once we know the embedding dimension.
20
+ private ensureVec(dim: number): void {
21
+ if (!this.vecEnabled || this.vecDim) return
22
+ this.db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS vec_chunks USING vec0(embedding float[${dim}] distance_metric=cosine)`)
23
+ this.vecDim = dim
24
+ }
25
+
26
+ addChunk(pointId: string, virtualRecordId: string, orgId: string, content: string, embedding: number[]): void {
27
+ const res = this.db
28
+ .query("INSERT OR REPLACE INTO chunks (point_id, virtual_record_id, org_id, content, embedding) VALUES (?, ?, ?, ?, ?)")
29
+ .run(pointId, virtualRecordId, orgId, content, JSON.stringify(embedding))
30
+ const rowid = Number(res.lastInsertRowid)
31
+ if (this.vecEnabled) {
32
+ this.ensureVec(embedding.length)
33
+ this.db.query("DELETE FROM vec_chunks WHERE rowid = ?").run(rowid)
34
+ this.db.query("INSERT INTO vec_chunks(rowid, embedding) VALUES (?, ?)").run(rowid, JSON.stringify(embedding))
35
+ }
36
+ if (this.ftsEnabled) {
37
+ indexChunkFts(this.db, rowid, content)
38
+ }
39
+ }
40
+
41
+ /** ANN KNN over the vec0 index → [{rowid, distance}] (cosine distance). Returns
42
+ * [] if the index isn't present/usable (e.g. data written by a non-vec store, a
43
+ * query-only process, or a dim mismatch) - the caller then falls back to brute-force. */
44
+ knnSearch(queryVec: number[], k: number): Array<{ rowid: number; distance: number }> {
45
+ if (!this.vecEnabled) return []
46
+ try {
47
+ return this.db
48
+ .query("SELECT rowid, distance FROM vec_chunks WHERE embedding MATCH ? ORDER BY distance LIMIT ?")
49
+ .all(JSON.stringify(queryVec), k) as Array<{ rowid: number; distance: number }>
50
+ } catch {
51
+ return [] // vec_chunks missing or incompatible → brute-force handles it
52
+ }
53
+ }
54
+
55
+ /** Drop the ANN index (e.g. before reindexing with a different embedder/dim). */
56
+ resetVec(): void {
57
+ if (!this.vecEnabled) return
58
+ this.db.exec("DROP TABLE IF EXISTS vec_chunks")
59
+ this.vecDim = 0
60
+ }
61
+ }
@@ -0,0 +1,50 @@
1
+ // FTS5 build/search/reset. Pure structural extraction from sqlite-store.ts -
2
+ // identical SQL, identical behavior.
3
+
4
+ import { Database } from "bun:sqlite"
5
+
6
+ // BM25 keyword index (SQLite FTS5, built-in - no extension needed). Complements the
7
+ // dense vector index: FTS5 nails exact tokens dense embeddings miss (acronyms "SAP",
8
+ // numbers "£230M", proper nouns). Backfills existing chunks on first open so hybrid
9
+ // search works on prior data without a reindex.
10
+ export function tryEnableFts(db: Database): boolean {
11
+ try {
12
+ db.exec("CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(content)")
13
+ const ftsCount = (db.query("SELECT count(*) c FROM chunks_fts").get() as { c: number }).c
14
+ const chunkCount = (db.query("SELECT count(*) c FROM chunks").get() as { c: number }).c
15
+ if (ftsCount === 0 && chunkCount > 0)
16
+ db.exec("INSERT INTO chunks_fts(rowid, content) SELECT rowid, content FROM chunks")
17
+ return true
18
+ } catch {
19
+ return false // FTS5 unavailable → dense-only, hybrid degrades gracefully
20
+ }
21
+ }
22
+
23
+ // Upsert a chunk row into the FTS index (called as part of addChunk).
24
+ export function indexChunkFts(db: Database, rowid: number, content: string): void {
25
+ db.query("DELETE FROM chunks_fts WHERE rowid = ?").run(rowid)
26
+ db.query("INSERT INTO chunks_fts(rowid, content) VALUES (?, ?)").run(rowid, content)
27
+ }
28
+
29
+ /** BM25 keyword search → chunk rowids in relevance order (best first). Each query
30
+ * token is matched as a quoted literal OR-joined, so punctuation/special chars are
31
+ * safe and ANY matching term contributes (high recall). [] if FTS is unavailable. */
32
+ export function ftsSearch(db: Database, ftsEnabled: boolean, query: string, k: number): number[] {
33
+ if (!ftsEnabled) return []
34
+ const terms = (query.toLowerCase().match(/[\p{L}\p{N}£$€%.\-]+/gu) ?? []).filter((t) => t.replace(/[^a-z0-9]/g, "").length >= 2)
35
+ if (!terms.length) return []
36
+ const expr = terms.map((t) => `"${t.replace(/"/g, "")}"`).join(" OR ")
37
+ try {
38
+ return (
39
+ db.query("SELECT rowid FROM chunks_fts WHERE chunks_fts MATCH ? ORDER BY bm25(chunks_fts) LIMIT ?").all(expr, k) as Array<{ rowid: number }>
40
+ ).map((r) => r.rowid)
41
+ } catch {
42
+ return []
43
+ }
44
+ }
45
+
46
+ /** Drop + recreate the BM25 index (e.g. before a full reindex). */
47
+ export function resetFts(db: Database, ftsEnabled: boolean): void {
48
+ if (!ftsEnabled) return
49
+ db.exec("DROP TABLE IF EXISTS chunks_fts; CREATE VIRTUAL TABLE chunks_fts USING fts5(content);")
50
+ }
@@ -0,0 +1,112 @@
1
+ // Graph vertex/edge CRUD + provenance + supersession. Pure structural extraction
2
+ // from sqlite-store.ts - identical SQL, identical behavior.
3
+
4
+ import { Database } from "bun:sqlite"
5
+ import type { Json } from "../sqlite-store"
6
+
7
+ export function addNode(db: Database, id: string, coll: string, data: Json = {}): void {
8
+ db.query("INSERT OR REPLACE INTO nodes (id, coll, data) VALUES (?, ?, ?)").run(id, coll, JSON.stringify(data))
9
+ }
10
+
11
+ // Merge-on-upsert (LightRAG): one row per (src,dst,label). Re-asserting an edge
12
+ // ACCUMULATES weight, refreshes its validity (revives a previously superseded
13
+ // edge), and unions provenance - the graph gets denser per write, never duplicated.
14
+ export function addEdge(
15
+ db: Database,
16
+ src: string,
17
+ dst: string,
18
+ label: string,
19
+ opts: { weight?: number; validAt?: number; recordId?: string; confidence?: number } = {},
20
+ ): void {
21
+ const now = Date.now()
22
+ db
23
+ .query(
24
+ `INSERT INTO edges (src, dst, label, weight, created_at, valid_at, invalid_at, expired_at, provenance, confidence)
25
+ VALUES (?, ?, ?, ?, ?, ?, NULL, NULL, '[]', ?)
26
+ ON CONFLICT(src, dst, label) DO UPDATE SET
27
+ weight = weight + excluded.weight,
28
+ expired_at = NULL,
29
+ invalid_at = NULL,
30
+ valid_at = COALESCE(edges.valid_at, excluded.valid_at),
31
+ confidence = MAX(edges.confidence, excluded.confidence)`,
32
+ )
33
+ .run(src, dst, label, opts.weight ?? 1, now, opts.validAt ?? null, opts.confidence ?? 1)
34
+ if (opts.recordId) addProvenance(db, src, dst, label, opts.recordId)
35
+ }
36
+
37
+ // Union a source record id into an edge's provenance list (which records asserted it).
38
+ function addProvenance(db: Database, src: string, dst: string, label: string, recordId: string): void {
39
+ const row = db.query("SELECT provenance FROM edges WHERE src = ? AND dst = ? AND label = ?").get(src, dst, label) as
40
+ | { provenance: string }
41
+ | undefined
42
+ if (!row) return
43
+ const set = new Set<string>(JSON.parse(row.provenance) as string[])
44
+ if (set.has(recordId)) return
45
+ set.add(recordId)
46
+ db.query("UPDATE edges SET provenance = ? WHERE src = ? AND dst = ? AND label = ?").run(JSON.stringify([...set]), src, dst, label)
47
+ }
48
+
49
+ // Source-keyed replace-on-reingest (Graphify build_merge): before re-ingesting a
50
+ // record, drop everything IT contributed so facts removed from the source get
51
+ // garbage-collected (an entity-id-keyed upsert alone would orphan them forever).
52
+ // Removes the record's `mentions` edges, and strips the record from every concept
53
+ // edge's provenance - deleting the edge if no other record still asserts it, else
54
+ // lowering its weight to the surviving source count.
55
+ export function clearRecordContributions(db: Database, recordId: string): void {
56
+ db.query("DELETE FROM edges WHERE src = ? AND label = 'mentions'").run(recordId)
57
+ const rows = db
58
+ .query("SELECT src, dst, label, provenance FROM edges WHERE provenance LIKE ?")
59
+ .all(`%${JSON.stringify(recordId).slice(1, -1)}%`) as Array<{ src: string; dst: string; label: string; provenance: string }>
60
+ for (const r of rows) {
61
+ const prov = (JSON.parse(r.provenance) as string[]).filter((p) => p !== recordId)
62
+ if (prov.length === (JSON.parse(r.provenance) as string[]).length) continue // not actually this record
63
+ if (prov.length === 0) {
64
+ db.query("DELETE FROM edges WHERE src = ? AND dst = ? AND label = ?").run(r.src, r.dst, r.label)
65
+ } else {
66
+ db
67
+ .query("UPDATE edges SET provenance = ?, weight = ? WHERE src = ? AND dst = ? AND label = ?")
68
+ .run(JSON.stringify(prov), prov.length, r.src, r.dst, r.label)
69
+ }
70
+ }
71
+ }
72
+
73
+ // Non-destructive supersession (Graphiti): close the validity interval on every
74
+ // LIVE edge from `src` with this `label` whose target ISN'T `keepDst`. Used for a
75
+ // FUNCTIONAL relation (single-valued: lives_in, works_at) when a newer fact arrives.
76
+ // The old edge stays in the table with invalid_at/expired_at set - history intact.
77
+ export function supersedeEdge(db: Database, src: string, label: string, keepDst: string, atTime = Date.now()): number {
78
+ const res = db
79
+ .query(
80
+ `UPDATE edges SET invalid_at = ?, expired_at = ?
81
+ WHERE src = ? AND label = ? AND dst != ? AND expired_at IS NULL`,
82
+ )
83
+ .run(atTime, Date.now(), src, label, keepDst)
84
+ return Number(res.changes)
85
+ }
86
+
87
+ // Backfill provenance for LEGACY concept edges that predate provenance tracking
88
+ // (migrated/older data has provenance '[]'). With per-edge ACL now fail-closed, an
89
+ // un-provenanced edge would be hidden from everyone - so attribute each to the
90
+ // records that mention BOTH of its endpoints (where the relationship was extracted),
91
+ // restoring it to the correct permission scope. Returns the number repaired.
92
+ export function backfillEdgeProvenance(db: Database): number {
93
+ const edges = db
94
+ .query(
95
+ `SELECT src, dst, label FROM edges
96
+ WHERE provenance = '[]' AND label NOT IN ('mentions','permissions','belongsTo','inheritPermissions')`,
97
+ )
98
+ .all() as Array<{ src: string; dst: string; label: string }>
99
+ const recordsMentioningBoth = db.query(
100
+ `SELECT m1.src AS r FROM edges m1 JOIN edges m2 ON m1.src = m2.src
101
+ WHERE m1.label = 'mentions' AND m2.label = 'mentions' AND m1.dst = ? AND m2.dst = ?`,
102
+ )
103
+ let repaired = 0
104
+ for (const e of edges) {
105
+ const recs = [...new Set((recordsMentioningBoth.all(e.src, e.dst) as Array<{ r: string }>).map((x) => x.r))]
106
+ if (recs.length) {
107
+ db.query("UPDATE edges SET provenance = ? WHERE src = ? AND dst = ? AND label = ?").run(JSON.stringify(recs), e.src, e.dst, e.label)
108
+ repaired++
109
+ }
110
+ }
111
+ return repaired
112
+ }
@@ -0,0 +1,37 @@
1
+ // Memory salience / decay ops. Pure structural extraction from sqlite-store.ts -
2
+ // identical SQL, identical behavior.
3
+
4
+ import { Database } from "bun:sqlite"
5
+ import { ph } from "./schema"
6
+
7
+ // Memory salience (Generative-Agents / ACT-R): per-record access recency, frequency,
8
+ // and importance - used to gently re-weight retrieval so fresh/important/often-used
9
+ // memories surface over stale ones. NEVER deletes; only dampens retrieval strength.
10
+ export function recordSalience(
11
+ db: Database,
12
+ recordIds: string[],
13
+ ): Map<string, { lastAccessedAt: number; accessCount: number; importance: number }> {
14
+ const out = new Map<string, { lastAccessedAt: number; accessCount: number; importance: number }>()
15
+ if (recordIds.length === 0) return out
16
+ const rows = db
17
+ .query(
18
+ `SELECT id,
19
+ COALESCE(json_extract(data,'$.lastAccessedAt'), json_extract(data,'$.createdAt'), 0) la,
20
+ COALESCE(json_extract(data,'$.accessCount'), 0) ac,
21
+ COALESCE(json_extract(data,'$.importance'), 1) imp
22
+ FROM nodes WHERE coll = 'records' AND id IN (${ph(recordIds.length)})`,
23
+ )
24
+ .all(...recordIds) as Array<{ id: string; la: number; ac: number; imp: number }>
25
+ for (const r of rows) out.set(r.id, { lastAccessedAt: r.la, accessCount: r.ac, importance: r.imp })
26
+ return out
27
+ }
28
+
29
+ /** Mark records as just-accessed (bump recency + frequency) - called on retrieval. */
30
+ export function touchRecords(db: Database, recordIds: string[]): void {
31
+ const now = Date.now()
32
+ const stmt = db.query(
33
+ `UPDATE nodes SET data = json_set(json_set(data,'$.lastAccessedAt', ?), '$.accessCount',
34
+ COALESCE(json_extract(data,'$.accessCount'),0) + 1) WHERE id = ? AND coll = 'records'`,
35
+ )
36
+ for (const id of recordIds) stmt.run(now, id)
37
+ }
@@ -0,0 +1,109 @@
1
+ // Schema DDL + migrations + sqlite-vec load detection for the embedded store.
2
+ // Pure structural extraction from sqlite-store.ts - identical SQL, identical behavior.
3
+
4
+ import { Database } from "bun:sqlite"
5
+ import fs from "node:fs"
6
+ import * as sqliteVec from "sqlite-vec"
7
+
8
+ // setCustomSQLite must be called once, before any Database is opened. We point bun
9
+ // at an extension-capable SQLite (Homebrew / system) so sqlite-vec can load.
10
+ let triedCustomSqlite = false
11
+ export function tryEnableExtensions(): void {
12
+ if (triedCustomSqlite) return
13
+ triedCustomSqlite = true
14
+ const candidates = [
15
+ "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib",
16
+ "/usr/local/opt/sqlite/lib/libsqlite3.dylib",
17
+ "/usr/lib/x86_64-linux-gnu/libsqlite3.so",
18
+ "/usr/lib/aarch64-linux-gnu/libsqlite3.so",
19
+ ]
20
+ const lib = candidates.find((p) => fs.existsSync(p))
21
+ if (!lib) return
22
+ try {
23
+ ;(Database as unknown as { setCustomSQLite(p: string): void }).setCustomSQLite(lib)
24
+ } catch {
25
+ /* already opened a DB, or unsupported - fall back to brute-force */
26
+ }
27
+ }
28
+
29
+ export function tryLoadVec(db: Database): boolean {
30
+ try {
31
+ sqliteVec.load(db)
32
+ return true
33
+ } catch {
34
+ return false // no extension support → brute-force path
35
+ }
36
+ }
37
+
38
+ export function migrate(db: Database): void {
39
+ db.exec(`
40
+ CREATE TABLE IF NOT EXISTS nodes (
41
+ id TEXT PRIMARY KEY,
42
+ coll TEXT NOT NULL,
43
+ data TEXT NOT NULL DEFAULT '{}'
44
+ );
45
+ CREATE INDEX IF NOT EXISTS idx_nodes_coll ON nodes(coll);
46
+ CREATE TABLE IF NOT EXISTS chunks (
47
+ point_id TEXT PRIMARY KEY,
48
+ virtual_record_id TEXT,
49
+ org_id TEXT,
50
+ content TEXT,
51
+ embedding TEXT
52
+ );
53
+ CREATE INDEX IF NOT EXISTS idx_chunks_org ON chunks(org_id);
54
+ `)
55
+ migrateEdges(db)
56
+ // Confidence tier (Graphify EXTRACTED=1.0 / INFERRED≈0.8 / AMBIGUOUS≈0.5) - how
57
+ // sure we are the relationship is real. Added idempotently so existing DBs upgrade.
58
+ const ecols = (db.query("PRAGMA table_info(edges)").all() as Array<{ name: string }>).map((c) => c.name)
59
+ if (!ecols.includes("confidence")) db.exec("ALTER TABLE edges ADD COLUMN confidence REAL NOT NULL DEFAULT 1")
60
+ }
61
+
62
+ // The edges table is a property-graph relation store shared by ACL (permissions/
63
+ // belongsTo), structure (mentions), and the concept graph (relates_to / typed verbs).
64
+ // It is bi-temporal + merge-on-upsert: one row per (src,dst,label), with `weight`
65
+ // accumulating across mentions (frequency≈confidence, LightRAG), `created_at` the
66
+ // ingest time, and (valid_at, invalid_at, expired_at) the Graphiti validity axes -
67
+ // a "live" edge has expired_at IS NULL. Contradictions INVALIDATE (close the
68
+ // interval); they never delete, so history is never lost.
69
+ export function migrateEdges(db: Database): void {
70
+ const cols = db.query("PRAGMA table_info(edges)").all() as Array<{ name: string }>
71
+ const hasEdges = cols.length > 0
72
+ const hasWeight = cols.some((c) => c.name === "weight")
73
+ if (hasEdges && hasWeight) return // already current
74
+
75
+ db.exec(`
76
+ CREATE TABLE IF NOT EXISTS edges_new (
77
+ src TEXT NOT NULL,
78
+ dst TEXT NOT NULL,
79
+ label TEXT NOT NULL,
80
+ weight REAL NOT NULL DEFAULT 1,
81
+ created_at INTEGER NOT NULL DEFAULT 0,
82
+ valid_at INTEGER,
83
+ invalid_at INTEGER,
84
+ expired_at INTEGER,
85
+ provenance TEXT NOT NULL DEFAULT '[]',
86
+ PRIMARY KEY (src, dst, label)
87
+ );
88
+ `)
89
+ if (hasEdges) {
90
+ // Fold any duplicate (src,dst,label) rows from the old schema into one,
91
+ // carrying the duplicate COUNT forward as the starting weight.
92
+ db.exec(`
93
+ INSERT INTO edges_new (src, dst, label, weight, created_at)
94
+ SELECT src, dst, label, COUNT(*), 0 FROM edges GROUP BY src, dst, label;
95
+ DROP TABLE edges;
96
+ `)
97
+ }
98
+ db.exec(`
99
+ ALTER TABLE edges_new RENAME TO edges;
100
+ CREATE INDEX IF NOT EXISTS idx_edges_src ON edges(src, label);
101
+ CREATE INDEX IF NOT EXISTS idx_edges_dst ON edges(dst, label);
102
+ CREATE INDEX IF NOT EXISTS idx_edges_live ON edges(label, expired_at);
103
+ `)
104
+ }
105
+
106
+ // Small shared placeholder helper for IN (...) clauses.
107
+ export function ph(n: number): string {
108
+ return Array.from({ length: n }, () => "?").join(",")
109
+ }