autoctxd 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +62 -0
  2. package/CONTRIBUTING.md +80 -0
  3. package/LICENSE +21 -0
  4. package/README.md +301 -0
  5. package/SECURITY.md +81 -0
  6. package/package.json +55 -0
  7. package/scripts/install-hooks.ts +80 -0
  8. package/scripts/install.ps1 +71 -0
  9. package/scripts/install.sh +67 -0
  10. package/scripts/uninstall-hooks.ts +57 -0
  11. package/src/ai/active-guard.ts +96 -0
  12. package/src/ai/adaptive-ranker.ts +48 -0
  13. package/src/ai/classifier.ts +256 -0
  14. package/src/ai/compressor.ts +129 -0
  15. package/src/ai/decision-chains.ts +100 -0
  16. package/src/ai/decision-extractor.ts +148 -0
  17. package/src/ai/pattern-detector.ts +147 -0
  18. package/src/ai/proactive.ts +78 -0
  19. package/src/cli/doctor.ts +171 -0
  20. package/src/cli/embeddings.ts +209 -0
  21. package/src/cli/index.ts +574 -0
  22. package/src/cli/reclassify.ts +134 -0
  23. package/src/context/builder.ts +97 -0
  24. package/src/context/formatter.ts +109 -0
  25. package/src/context/ranker.ts +84 -0
  26. package/src/db/sqlite/decisions.ts +56 -0
  27. package/src/db/sqlite/feedback.ts +92 -0
  28. package/src/db/sqlite/observations.ts +58 -0
  29. package/src/db/sqlite/schema.ts +366 -0
  30. package/src/db/sqlite/sessions.ts +50 -0
  31. package/src/db/sqlite/summaries.ts +69 -0
  32. package/src/db/vector/client.ts +134 -0
  33. package/src/db/vector/embeddings.ts +119 -0
  34. package/src/db/vector/providers/factory.ts +99 -0
  35. package/src/db/vector/providers/minilm.ts +90 -0
  36. package/src/db/vector/providers/ollama.ts +92 -0
  37. package/src/db/vector/providers/tfidf.ts +98 -0
  38. package/src/db/vector/providers/types.ts +39 -0
  39. package/src/db/vector/search.ts +131 -0
  40. package/src/hooks/post-tool-use.ts +205 -0
  41. package/src/hooks/pre-tool-use.ts +305 -0
  42. package/src/hooks/stop.ts +334 -0
  43. package/src/mcp/server.ts +293 -0
  44. package/src/server/dashboard.html +268 -0
  45. package/src/server/dashboard.ts +170 -0
  46. package/src/util/debug.ts +56 -0
  47. package/src/util/ignore.ts +171 -0
  48. package/src/util/metrics.ts +236 -0
  49. package/src/util/path.ts +57 -0
  50. package/tsconfig.json +14 -0
@@ -0,0 +1,366 @@
1
+ import { Database } from "bun:sqlite";
2
+ import { join } from "path";
3
+ import { mkdirSync } from "fs";
4
+
5
+ const DATA_DIR = process.env.AUTOCTXD_DATA_DIR || join(import.meta.dir, "..", "..", "..", "data");
6
+ const DB_PATH = process.env.AUTOCTXD_DB_PATH || join(DATA_DIR, "autoctxd.db");
7
+
8
+ let _db: Database | null = null;
9
+
10
+ export function getDb(): Database {
11
+ if (_db) return _db;
12
+
13
+ mkdirSync(DATA_DIR, { recursive: true });
14
+
15
+ _db = new Database(DB_PATH, { create: true });
16
+ _db.exec("PRAGMA journal_mode = WAL");
17
+ _db.exec("PRAGMA busy_timeout = 5000");
18
+ initSchema(_db);
19
+ return _db;
20
+ }
21
+
22
+ function initSchema(db: Database) {
23
+ db.exec(`
24
+ CREATE TABLE IF NOT EXISTS sessions (
25
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
26
+ session_id TEXT UNIQUE NOT NULL,
27
+ project_path TEXT,
28
+ git_repo TEXT,
29
+ git_branch TEXT,
30
+ started_at TEXT NOT NULL DEFAULT (datetime('now')),
31
+ ended_at TEXT,
32
+ total_observations INTEGER DEFAULT 0,
33
+ tokens_injected INTEGER DEFAULT 0
34
+ );
35
+
36
+ CREATE TABLE IF NOT EXISTS observations (
37
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
38
+ session_id TEXT NOT NULL,
39
+ type TEXT NOT NULL,
40
+ tool_name TEXT,
41
+ summary TEXT NOT NULL,
42
+ file_paths TEXT,
43
+ timestamp TEXT NOT NULL DEFAULT (datetime('now')),
44
+ importance_score INTEGER DEFAULT 5,
45
+ FOREIGN KEY (session_id) REFERENCES sessions(session_id)
46
+ );
47
+
48
+ CREATE TABLE IF NOT EXISTS summaries (
49
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
50
+ session_id TEXT,
51
+ level INTEGER NOT NULL DEFAULT 1,
52
+ text TEXT NOT NULL,
53
+ embedding_id TEXT,
54
+ project_path TEXT,
55
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
56
+ );
57
+
58
+ CREATE TABLE IF NOT EXISTS decisions (
59
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
60
+ project_path TEXT,
61
+ title TEXT NOT NULL,
62
+ decision_text TEXT NOT NULL,
63
+ alternatives TEXT,
64
+ rationale TEXT,
65
+ files_affected TEXT,
66
+ embedding_id TEXT,
67
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
68
+ last_referenced TEXT
69
+ );
70
+
71
+ CREATE TABLE IF NOT EXISTS patterns (
72
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
73
+ project_path TEXT,
74
+ pattern_type TEXT NOT NULL,
75
+ description TEXT NOT NULL,
76
+ frequency INTEGER DEFAULT 1,
77
+ last_seen TEXT NOT NULL DEFAULT (datetime('now')),
78
+ examples TEXT
79
+ );
80
+
81
+ CREATE TABLE IF NOT EXISTS embeddings_cache (
82
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
83
+ text_hash TEXT NOT NULL,
84
+ provider TEXT NOT NULL DEFAULT 'tfidf',
85
+ embedding BLOB NOT NULL,
86
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
87
+ UNIQUE(text_hash, provider)
88
+ );
89
+
90
+ CREATE TABLE IF NOT EXISTS feedback (
91
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
92
+ target_type TEXT NOT NULL,
93
+ target_id TEXT NOT NULL,
94
+ target_text TEXT,
95
+ verdict TEXT NOT NULL,
96
+ reason TEXT,
97
+ project_path TEXT,
98
+ session_id TEXT,
99
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
100
+ );
101
+
102
+ CREATE TABLE IF NOT EXISTS mcp_access_log (
103
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
104
+ session_id TEXT,
105
+ project_path TEXT,
106
+ tool_name TEXT NOT NULL,
107
+ args TEXT,
108
+ result_count INTEGER,
109
+ ts TEXT NOT NULL DEFAULT (datetime('now'))
110
+ );
111
+
112
+ CREATE TABLE IF NOT EXISTS token_metrics (
113
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
114
+ session_id TEXT,
115
+ tokens_injected INTEGER DEFAULT 0,
116
+ estimated_tokens_saved INTEGER DEFAULT 0,
117
+ context_relevance_score REAL DEFAULT 0,
118
+ exploration_calls_before_edit INTEGER DEFAULT 0,
119
+ time_to_first_edit_sec REAL,
120
+ context_hit_count INTEGER DEFAULT 0,
121
+ context_miss_count INTEGER DEFAULT 0,
122
+ injected_files TEXT,
123
+ first_edit_recorded INTEGER DEFAULT 0,
124
+ FOREIGN KEY (session_id) REFERENCES sessions(session_id)
125
+ );
126
+
127
+ CREATE INDEX IF NOT EXISTS idx_observations_session ON observations(session_id);
128
+ CREATE INDEX IF NOT EXISTS idx_observations_type ON observations(type);
129
+ CREATE INDEX IF NOT EXISTS idx_observations_importance ON observations(importance_score DESC);
130
+ CREATE INDEX IF NOT EXISTS idx_summaries_project ON summaries(project_path);
131
+ CREATE INDEX IF NOT EXISTS idx_summaries_level ON summaries(level);
132
+ CREATE INDEX IF NOT EXISTS idx_decisions_project ON decisions(project_path);
133
+ CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project_path);
134
+ CREATE INDEX IF NOT EXISTS idx_sessions_started ON sessions(started_at DESC);
135
+ CREATE INDEX IF NOT EXISTS idx_feedback_target ON feedback(target_type, target_id);
136
+ CREATE INDEX IF NOT EXISTS idx_feedback_project ON feedback(project_path);
137
+ CREATE INDEX IF NOT EXISTS idx_mcp_log_session ON mcp_access_log(session_id, ts DESC);
138
+ `);
139
+
140
+ // Migrate embeddings_cache to include `provider` column (legacy DBs were UNIQUE on
141
+ // text_hash alone, before provider partitioning). Add the column with default
142
+ // 'tfidf' so existing cached vectors are still attributable.
143
+ try {
144
+ const cols = db.prepare("PRAGMA table_info(embeddings_cache)").all() as Array<{ name: string }>;
145
+ const colNames = new Set(cols.map(c => c.name));
146
+ if (!colNames.has("provider")) {
147
+ db.exec(`ALTER TABLE embeddings_cache ADD COLUMN provider TEXT NOT NULL DEFAULT 'tfidf'`);
148
+ // Drop the legacy single-column UNIQUE index; the composite UNIQUE in the
149
+ // CREATE TABLE will not apply to the existing table, but a unique index
150
+ // on (text_hash, provider) keeps semantics intact going forward.
151
+ try { db.exec(`DROP INDEX IF EXISTS sqlite_autoindex_embeddings_cache_1`); } catch {}
152
+ db.exec(`CREATE UNIQUE INDEX IF NOT EXISTS idx_embeddings_cache_text_provider
153
+ ON embeddings_cache(text_hash, provider)`);
154
+ }
155
+ } catch {
156
+ // best-effort migration
157
+ }
158
+
159
+ // Migrate token_metrics if columns are missing (existing DBs)
160
+ try {
161
+ const cols = db.prepare("PRAGMA table_info(token_metrics)").all() as Array<{ name: string }>;
162
+ const colNames = new Set(cols.map(c => c.name));
163
+ if (!colNames.has("exploration_calls_before_edit")) {
164
+ db.exec(`ALTER TABLE token_metrics ADD COLUMN exploration_calls_before_edit INTEGER DEFAULT 0`);
165
+ db.exec(`ALTER TABLE token_metrics ADD COLUMN time_to_first_edit_sec REAL`);
166
+ db.exec(`ALTER TABLE token_metrics ADD COLUMN context_hit_count INTEGER DEFAULT 0`);
167
+ db.exec(`ALTER TABLE token_metrics ADD COLUMN context_miss_count INTEGER DEFAULT 0`);
168
+ db.exec(`ALTER TABLE token_metrics ADD COLUMN injected_files TEXT`);
169
+ db.exec(`ALTER TABLE token_metrics ADD COLUMN first_edit_recorded INTEGER DEFAULT 0`);
170
+ }
171
+ } catch {
172
+ // Migration already applied or table doesn't exist yet
173
+ }
174
+
175
+ // Collapse duplicate (session_id, level) summary rows produced before the
176
+ // unique index existed: keep the most recent, drop the rest. Required so
177
+ // CREATE UNIQUE INDEX below succeeds on legacy DBs.
178
+ try {
179
+ db.exec(`
180
+ DELETE FROM summaries WHERE id IN (
181
+ SELECT id FROM summaries s1
182
+ WHERE session_id IS NOT NULL
183
+ AND EXISTS (
184
+ SELECT 1 FROM summaries s2
185
+ WHERE s2.session_id = s1.session_id
186
+ AND s2.level = s1.level
187
+ AND (s2.created_at > s1.created_at OR (s2.created_at = s1.created_at AND s2.id > s1.id))
188
+ )
189
+ )
190
+ `);
191
+ } catch {
192
+ // table missing on first run, nothing to clean
193
+ }
194
+
195
+ // One Level-1 (per-session) summary per session. Stop hook can fire multiple
196
+ // times (compaction, sub-agents) — without this, the same session shows up
197
+ // 3x in "RECENT SESSIONS". Digests (level 2/3) have NULL session_id and are
198
+ // not constrained by this partial index.
199
+ try {
200
+ db.exec(`
201
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_summaries_session_level
202
+ ON summaries(session_id, level) WHERE session_id IS NOT NULL
203
+ `);
204
+ } catch {
205
+ // index already exists or unsupported
206
+ }
207
+
208
+ // Normalize legacy pattern descriptions: older versions baked a per-session
209
+ // counter into the description ("Heavily uses Bash (32 times this session)",
210
+ // "Session focused on refactor (84/200 actions)"), which broke the upsert
211
+ // dedupe — every session inserted a fresh row. Strip the volatile tail so
212
+ // future upserts collapse correctly, then merge duplicates that already exist.
213
+ try {
214
+ cleanupLegacyPatterns(db);
215
+ } catch {
216
+ // best-effort migration
217
+ }
218
+
219
+ // Purge legacy "decisions" that aren't actually decisions. Older versions
220
+ // promoted Agent spawns / tool-prefixed observations to architectural
221
+ // decisions whenever a stray keyword matched. New code blocks this at the
222
+ // source; this cleans up rows already persisted.
223
+ try {
224
+ db.exec(`
225
+ DELETE FROM decisions WHERE
226
+ decision_text LIKE 'Spawned agent:%' OR
227
+ decision_text LIKE 'Wrote %' OR
228
+ decision_text LIKE 'mcp__%' OR
229
+ decision_text LIKE 'WebFetch %' OR
230
+ decision_text LIKE 'WebSearch:%' OR
231
+ decision_text LIKE 'Notebook %'
232
+ `);
233
+ } catch {
234
+ // best-effort
235
+ }
236
+
237
+ // Collapse duplicate (project_path, title) decision rows before creating the
238
+ // unique index. Older versions of the extractor inserted the same dep
239
+ // decision on every session — keep the earliest row, drop the rest.
240
+ try {
241
+ db.exec(`
242
+ DELETE FROM decisions WHERE id NOT IN (
243
+ SELECT MIN(id) FROM decisions
244
+ GROUP BY COALESCE(project_path, ''), title
245
+ )
246
+ `);
247
+ } catch {
248
+ // table missing on first run
249
+ }
250
+
251
+ // Once duplicates are gone, enforce the constraint going forward. Combined
252
+ // with INSERT OR IGNORE in decisions.ts this is what makes Bug 3
253
+ // (decisions duplicated across sessions) actually go away.
254
+ try {
255
+ db.exec(`
256
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_decisions_project_title
257
+ ON decisions(COALESCE(project_path, ''), title)
258
+ `);
259
+ } catch {
260
+ // index already exists or unsupported expression-index syntax
261
+ }
262
+
263
+ // FTS5 virtual tables for full-text search
264
+ try {
265
+ db.exec(`
266
+ CREATE VIRTUAL TABLE IF NOT EXISTS observations_fts USING fts5(
267
+ summary,
268
+ content='observations',
269
+ content_rowid='id'
270
+ );
271
+
272
+ CREATE VIRTUAL TABLE IF NOT EXISTS decisions_fts USING fts5(
273
+ title,
274
+ decision_text,
275
+ content='decisions',
276
+ content_rowid='id'
277
+ );
278
+ `);
279
+ } catch {
280
+ // FTS5 tables may already exist
281
+ }
282
+
283
+ // Triggers to keep FTS in sync
284
+ try {
285
+ db.exec(`
286
+ CREATE TRIGGER IF NOT EXISTS observations_ai AFTER INSERT ON observations BEGIN
287
+ INSERT INTO observations_fts(rowid, summary) VALUES (new.id, new.summary);
288
+ END;
289
+
290
+ CREATE TRIGGER IF NOT EXISTS decisions_ai AFTER INSERT ON decisions BEGIN
291
+ INSERT INTO decisions_fts(rowid, title, decision_text) VALUES (new.id, new.title, new.decision_text);
292
+ END;
293
+ `);
294
+ } catch {
295
+ // Triggers may already exist
296
+ }
297
+ }
298
+
299
+ function cleanupLegacyPatterns(db: Database) {
300
+ const VOLATILE_TAIL = / \((?:\d+ times this session|\d+\/\d+ actions)\)\s*$/;
301
+ const rows = db.prepare(
302
+ `SELECT id, project_path, pattern_type, description, frequency, last_seen, examples FROM patterns`
303
+ ).all() as Array<{
304
+ id: number;
305
+ project_path: string | null;
306
+ pattern_type: string;
307
+ description: string;
308
+ frequency: number;
309
+ last_seen: string | null;
310
+ examples: string | null;
311
+ }>;
312
+
313
+ type Group = { keepId: number; freq: number; idsToDelete: number[]; latestExamples: string | null; latestSeen: string | null };
314
+ const groups = new Map<string, Group>();
315
+
316
+ for (const r of rows) {
317
+ const cleaned = r.description.replace(VOLATILE_TAIL, "").trim();
318
+ if (cleaned !== r.description) {
319
+ db.prepare(`UPDATE patterns SET description = ? WHERE id = ?`).run(cleaned, r.id);
320
+ r.description = cleaned;
321
+ }
322
+ const key = `${r.project_path ?? ""}${r.pattern_type}${r.description}`;
323
+ const existing = groups.get(key);
324
+ if (!existing) {
325
+ groups.set(key, {
326
+ keepId: r.id,
327
+ freq: r.frequency,
328
+ idsToDelete: [],
329
+ latestExamples: r.examples,
330
+ latestSeen: r.last_seen,
331
+ });
332
+ } else {
333
+ existing.freq += r.frequency;
334
+ if ((r.last_seen || "") > (existing.latestSeen || "")) {
335
+ existing.latestSeen = r.last_seen;
336
+ existing.latestExamples = r.examples;
337
+ }
338
+ existing.idsToDelete.push(r.id);
339
+ }
340
+ }
341
+
342
+ for (const g of groups.values()) {
343
+ if (g.idsToDelete.length === 0) continue;
344
+ db.prepare(
345
+ `UPDATE patterns SET frequency = ?, last_seen = COALESCE(?, last_seen), examples = COALESCE(?, examples) WHERE id = ?`
346
+ ).run(g.freq, g.latestSeen, g.latestExamples, g.keepId);
347
+ const placeholders = g.idsToDelete.map(() => "?").join(",");
348
+ db.prepare(`DELETE FROM patterns WHERE id IN (${placeholders})`).run(...g.idsToDelete);
349
+ }
350
+ }
351
+
352
+ export function closeDb() {
353
+ if (_db) {
354
+ _db.close();
355
+ _db = null;
356
+ }
357
+ }
358
+
359
+ // Run directly to initialize
360
+ if (import.meta.main) {
361
+ const db = getDb();
362
+ console.log("Database initialized at", DB_PATH);
363
+ const tables = db.prepare("SELECT name FROM sqlite_master WHERE type='table'").all() as Array<{ name: string }>;
364
+ console.log(" Tables:", tables.map(r => r.name).join(", "));
365
+ closeDb();
366
+ }
@@ -0,0 +1,50 @@
1
+ import { getDb } from "./schema";
2
+
3
+ export interface Session {
4
+ id?: number;
5
+ session_id: string;
6
+ project_path?: string;
7
+ git_repo?: string;
8
+ git_branch?: string;
9
+ started_at?: string;
10
+ ended_at?: string;
11
+ total_observations?: number;
12
+ tokens_injected?: number;
13
+ }
14
+
15
+ export function createSession(session: Session): void {
16
+ const db = getDb();
17
+ db.prepare(`
18
+ INSERT OR IGNORE INTO sessions (session_id, project_path, git_repo, git_branch)
19
+ VALUES (?, ?, ?, ?)
20
+ `).run(
21
+ session.session_id,
22
+ session.project_path || null,
23
+ session.git_repo || null,
24
+ session.git_branch || null
25
+ );
26
+ }
27
+
28
+ export function endSession(sessionId: string, totalObs: number) {
29
+ const db = getDb();
30
+ db.prepare(`
31
+ UPDATE sessions SET ended_at = datetime('now'), total_observations = ? WHERE session_id = ?
32
+ `).run(totalObs, sessionId);
33
+ }
34
+
35
+ export function getRecentSessions(projectPath: string, limit = 3): Session[] {
36
+ const db = getDb();
37
+ return db.prepare(`
38
+ SELECT * FROM sessions WHERE project_path = ? ORDER BY started_at DESC LIMIT ?
39
+ `).all(projectPath, limit) as Session[];
40
+ }
41
+
42
+ export function getSessionById(sessionId: string): Session | undefined {
43
+ const db = getDb();
44
+ return db.prepare(`SELECT * FROM sessions WHERE session_id = ?`).get(sessionId) as Session | undefined;
45
+ }
46
+
47
+ export function getAllSessions(limit = 50): Session[] {
48
+ const db = getDb();
49
+ return db.prepare(`SELECT * FROM sessions ORDER BY started_at DESC LIMIT ?`).all(limit) as Session[];
50
+ }
@@ -0,0 +1,69 @@
1
+ import { getDb } from "./schema";
2
+
3
+ export interface Summary {
4
+ id?: number;
5
+ session_id?: string;
6
+ level: number;
7
+ text: string;
8
+ embedding_id?: string;
9
+ project_path?: string;
10
+ created_at?: string;
11
+ }
12
+
13
+ export function insertSummary(summary: Summary): void {
14
+ const db = getDb();
15
+ // For per-session summaries, replace any existing (session_id, level) row so a
16
+ // Stop hook that fires more than once doesn't duplicate. SQLite doesn't
17
+ // accept ON CONFLICT against the partial unique index used here, so we
18
+ // do the delete + insert explicitly inside a transaction. Digests have a
19
+ // null session_id and just append.
20
+ if (summary.session_id) {
21
+ const tx = db.transaction((s: Summary) => {
22
+ db.prepare(`DELETE FROM summaries WHERE session_id = ? AND level = ?`).run(
23
+ s.session_id!,
24
+ s.level
25
+ );
26
+ db.prepare(`
27
+ INSERT INTO summaries (session_id, level, text, embedding_id, project_path)
28
+ VALUES (?, ?, ?, ?, ?)
29
+ `).run(
30
+ s.session_id!,
31
+ s.level,
32
+ s.text,
33
+ s.embedding_id || null,
34
+ s.project_path || null
35
+ );
36
+ });
37
+ tx(summary);
38
+ return;
39
+ }
40
+ db.prepare(`
41
+ INSERT INTO summaries (session_id, level, text, embedding_id, project_path)
42
+ VALUES (?, ?, ?, ?, ?)
43
+ `).run(
44
+ null,
45
+ summary.level,
46
+ summary.text,
47
+ summary.embedding_id || null,
48
+ summary.project_path || null
49
+ );
50
+ }
51
+
52
+ export function getRecentSummaries(projectPath: string, level = 1, limit = 3): Summary[] {
53
+ const db = getDb();
54
+ return db.prepare(`
55
+ SELECT * FROM summaries
56
+ WHERE project_path = ? AND level = ?
57
+ ORDER BY created_at DESC
58
+ LIMIT ?
59
+ `).all(projectPath, level, limit) as Summary[];
60
+ }
61
+
62
+ export function getAllSummariesForDigest(projectPath: string, since: string): Summary[] {
63
+ const db = getDb();
64
+ return db.prepare(`
65
+ SELECT * FROM summaries
66
+ WHERE project_path = ? AND level = 1 AND created_at >= ?
67
+ ORDER BY created_at ASC
68
+ `).all(projectPath, since) as Summary[];
69
+ }
@@ -0,0 +1,134 @@
1
+ // LanceDB vector database client for semantic search.
2
+ //
3
+ // The table holds vectors at whatever dimension the active embedding provider
4
+ // emits. If a query vector arrives with a mismatched dim (e.g. user just
5
+ // switched providers without re-embedding), searchSimilar returns [] rather
6
+ // than throwing — the CLI's `embeddings switch` command rebuilds the table.
7
+
8
+ import * as lancedb from "@lancedb/lancedb";
9
+ import { join } from "path";
10
+ import { mkdirSync } from "fs";
11
+ import { getActiveDim } from "./embeddings";
12
+
13
+ const DATA_DIR = process.env.AUTOCTXD_VECTOR_DIR
14
+ || (process.env.AUTOCTXD_DATA_DIR ? join(process.env.AUTOCTXD_DATA_DIR, "vector") : join(import.meta.dir, "..", "..", "..", "data", "vector"));
15
+ const TABLE_NAME = "summaries";
16
+
17
+ let _db: any = null;
18
+ let _table: any = null;
19
+ let _tableDim: number | null = null;
20
+
21
+ export async function getVectorDb() {
22
+ if (_db) return _db;
23
+ mkdirSync(DATA_DIR, { recursive: true });
24
+ _db = await lancedb.connect(DATA_DIR);
25
+ return _db;
26
+ }
27
+
28
+ async function detectTableDim(table: any): Promise<number | null> {
29
+ try {
30
+ const sample = await table.query().limit(1).toArray();
31
+ const v = sample?.[0]?.vector;
32
+ if (Array.isArray(v)) return v.length;
33
+ if (v && typeof v.length === "number") return v.length;
34
+ } catch {
35
+ // ignore
36
+ }
37
+ return null;
38
+ }
39
+
40
+ export async function getOrCreateTable() {
41
+ const expectedDim = getActiveDim();
42
+ if (_table && _tableDim === expectedDim) return _table;
43
+
44
+ const db = await getVectorDb();
45
+
46
+ try {
47
+ _table = await db.openTable(TABLE_NAME);
48
+ _tableDim = await detectTableDim(_table);
49
+ // If existing table was built for a different dim, signal to caller via
50
+ // _tableDim so dropTable() can be invoked from the CLI migration path.
51
+ } catch {
52
+ _table = await db.createTable(TABLE_NAME, [
53
+ {
54
+ id: "__init__",
55
+ session_id: "",
56
+ project_path: "",
57
+ text: "",
58
+ level: 0,
59
+ created_at: "",
60
+ vector: new Array(expectedDim).fill(0),
61
+ },
62
+ ]);
63
+ _tableDim = expectedDim;
64
+ }
65
+
66
+ return _table;
67
+ }
68
+
69
+ export async function dropTable(): Promise<void> {
70
+ const db = await getVectorDb();
71
+ try {
72
+ await db.dropTable(TABLE_NAME);
73
+ } catch {
74
+ // table may not exist
75
+ }
76
+ _table = null;
77
+ _tableDim = null;
78
+ }
79
+
80
+ export function getCurrentTableDim(): number | null {
81
+ return _tableDim;
82
+ }
83
+
84
+ export interface VectorRecord {
85
+ id: string;
86
+ session_id: string;
87
+ project_path: string;
88
+ text: string;
89
+ level: number;
90
+ created_at: string;
91
+ vector: number[];
92
+ }
93
+
94
+ export async function addVector(record: VectorRecord): Promise<void> {
95
+ const table = await getOrCreateTable();
96
+ await table.add([record]);
97
+ }
98
+
99
+ export async function searchSimilar(
100
+ queryVector: number[],
101
+ limit = 5,
102
+ projectFilter?: string
103
+ ): Promise<VectorRecord[]> {
104
+ const table = await getOrCreateTable();
105
+
106
+ let query = table.search(queryVector).limit(limit);
107
+
108
+ if (projectFilter) {
109
+ query = query.where(`project_path = '${projectFilter.replace(/'/g, "''")}'`);
110
+ }
111
+
112
+ try {
113
+ const results = await query.toArray();
114
+ return results
115
+ .filter((r: any) => r.id !== "__init__")
116
+ .map((r: any) => ({
117
+ id: r.id,
118
+ session_id: r.session_id,
119
+ project_path: r.project_path,
120
+ text: r.text,
121
+ level: r.level,
122
+ created_at: r.created_at,
123
+ vector: r.vector,
124
+ _distance: r._distance,
125
+ }));
126
+ } catch {
127
+ return [];
128
+ }
129
+ }
130
+
131
+ export async function closeVectorDb(): Promise<void> {
132
+ _table = null;
133
+ _db = null;
134
+ }