autoctxd 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +62 -0
- package/CONTRIBUTING.md +80 -0
- package/LICENSE +21 -0
- package/README.md +301 -0
- package/SECURITY.md +81 -0
- package/package.json +55 -0
- package/scripts/install-hooks.ts +80 -0
- package/scripts/install.ps1 +71 -0
- package/scripts/install.sh +67 -0
- package/scripts/uninstall-hooks.ts +57 -0
- package/src/ai/active-guard.ts +96 -0
- package/src/ai/adaptive-ranker.ts +48 -0
- package/src/ai/classifier.ts +256 -0
- package/src/ai/compressor.ts +129 -0
- package/src/ai/decision-chains.ts +100 -0
- package/src/ai/decision-extractor.ts +148 -0
- package/src/ai/pattern-detector.ts +147 -0
- package/src/ai/proactive.ts +78 -0
- package/src/cli/doctor.ts +171 -0
- package/src/cli/embeddings.ts +209 -0
- package/src/cli/index.ts +574 -0
- package/src/cli/reclassify.ts +134 -0
- package/src/context/builder.ts +97 -0
- package/src/context/formatter.ts +109 -0
- package/src/context/ranker.ts +84 -0
- package/src/db/sqlite/decisions.ts +56 -0
- package/src/db/sqlite/feedback.ts +92 -0
- package/src/db/sqlite/observations.ts +58 -0
- package/src/db/sqlite/schema.ts +366 -0
- package/src/db/sqlite/sessions.ts +50 -0
- package/src/db/sqlite/summaries.ts +69 -0
- package/src/db/vector/client.ts +134 -0
- package/src/db/vector/embeddings.ts +119 -0
- package/src/db/vector/providers/factory.ts +99 -0
- package/src/db/vector/providers/minilm.ts +90 -0
- package/src/db/vector/providers/ollama.ts +92 -0
- package/src/db/vector/providers/tfidf.ts +98 -0
- package/src/db/vector/providers/types.ts +39 -0
- package/src/db/vector/search.ts +131 -0
- package/src/hooks/post-tool-use.ts +205 -0
- package/src/hooks/pre-tool-use.ts +305 -0
- package/src/hooks/stop.ts +334 -0
- package/src/mcp/server.ts +293 -0
- package/src/server/dashboard.html +268 -0
- package/src/server/dashboard.ts +170 -0
- package/src/util/debug.ts +56 -0
- package/src/util/ignore.ts +171 -0
- package/src/util/metrics.ts +236 -0
- package/src/util/path.ts +57 -0
- package/tsconfig.json +14 -0
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
import { Database } from "bun:sqlite";
|
|
2
|
+
import { join } from "path";
|
|
3
|
+
import { mkdirSync } from "fs";
|
|
4
|
+
|
|
5
|
+
const DATA_DIR = process.env.AUTOCTXD_DATA_DIR || join(import.meta.dir, "..", "..", "..", "data");
|
|
6
|
+
const DB_PATH = process.env.AUTOCTXD_DB_PATH || join(DATA_DIR, "autoctxd.db");
|
|
7
|
+
|
|
8
|
+
let _db: Database | null = null;
|
|
9
|
+
|
|
10
|
+
export function getDb(): Database {
|
|
11
|
+
if (_db) return _db;
|
|
12
|
+
|
|
13
|
+
mkdirSync(DATA_DIR, { recursive: true });
|
|
14
|
+
|
|
15
|
+
_db = new Database(DB_PATH, { create: true });
|
|
16
|
+
_db.exec("PRAGMA journal_mode = WAL");
|
|
17
|
+
_db.exec("PRAGMA busy_timeout = 5000");
|
|
18
|
+
initSchema(_db);
|
|
19
|
+
return _db;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function initSchema(db: Database) {
|
|
23
|
+
db.exec(`
|
|
24
|
+
CREATE TABLE IF NOT EXISTS sessions (
|
|
25
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
26
|
+
session_id TEXT UNIQUE NOT NULL,
|
|
27
|
+
project_path TEXT,
|
|
28
|
+
git_repo TEXT,
|
|
29
|
+
git_branch TEXT,
|
|
30
|
+
started_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
31
|
+
ended_at TEXT,
|
|
32
|
+
total_observations INTEGER DEFAULT 0,
|
|
33
|
+
tokens_injected INTEGER DEFAULT 0
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
CREATE TABLE IF NOT EXISTS observations (
|
|
37
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
38
|
+
session_id TEXT NOT NULL,
|
|
39
|
+
type TEXT NOT NULL,
|
|
40
|
+
tool_name TEXT,
|
|
41
|
+
summary TEXT NOT NULL,
|
|
42
|
+
file_paths TEXT,
|
|
43
|
+
timestamp TEXT NOT NULL DEFAULT (datetime('now')),
|
|
44
|
+
importance_score INTEGER DEFAULT 5,
|
|
45
|
+
FOREIGN KEY (session_id) REFERENCES sessions(session_id)
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
CREATE TABLE IF NOT EXISTS summaries (
|
|
49
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
50
|
+
session_id TEXT,
|
|
51
|
+
level INTEGER NOT NULL DEFAULT 1,
|
|
52
|
+
text TEXT NOT NULL,
|
|
53
|
+
embedding_id TEXT,
|
|
54
|
+
project_path TEXT,
|
|
55
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
CREATE TABLE IF NOT EXISTS decisions (
|
|
59
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
60
|
+
project_path TEXT,
|
|
61
|
+
title TEXT NOT NULL,
|
|
62
|
+
decision_text TEXT NOT NULL,
|
|
63
|
+
alternatives TEXT,
|
|
64
|
+
rationale TEXT,
|
|
65
|
+
files_affected TEXT,
|
|
66
|
+
embedding_id TEXT,
|
|
67
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
68
|
+
last_referenced TEXT
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
CREATE TABLE IF NOT EXISTS patterns (
|
|
72
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
73
|
+
project_path TEXT,
|
|
74
|
+
pattern_type TEXT NOT NULL,
|
|
75
|
+
description TEXT NOT NULL,
|
|
76
|
+
frequency INTEGER DEFAULT 1,
|
|
77
|
+
last_seen TEXT NOT NULL DEFAULT (datetime('now')),
|
|
78
|
+
examples TEXT
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
CREATE TABLE IF NOT EXISTS embeddings_cache (
|
|
82
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
83
|
+
text_hash TEXT NOT NULL,
|
|
84
|
+
provider TEXT NOT NULL DEFAULT 'tfidf',
|
|
85
|
+
embedding BLOB NOT NULL,
|
|
86
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
87
|
+
UNIQUE(text_hash, provider)
|
|
88
|
+
);
|
|
89
|
+
|
|
90
|
+
CREATE TABLE IF NOT EXISTS feedback (
|
|
91
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
92
|
+
target_type TEXT NOT NULL,
|
|
93
|
+
target_id TEXT NOT NULL,
|
|
94
|
+
target_text TEXT,
|
|
95
|
+
verdict TEXT NOT NULL,
|
|
96
|
+
reason TEXT,
|
|
97
|
+
project_path TEXT,
|
|
98
|
+
session_id TEXT,
|
|
99
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
100
|
+
);
|
|
101
|
+
|
|
102
|
+
CREATE TABLE IF NOT EXISTS mcp_access_log (
|
|
103
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
104
|
+
session_id TEXT,
|
|
105
|
+
project_path TEXT,
|
|
106
|
+
tool_name TEXT NOT NULL,
|
|
107
|
+
args TEXT,
|
|
108
|
+
result_count INTEGER,
|
|
109
|
+
ts TEXT NOT NULL DEFAULT (datetime('now'))
|
|
110
|
+
);
|
|
111
|
+
|
|
112
|
+
CREATE TABLE IF NOT EXISTS token_metrics (
|
|
113
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
114
|
+
session_id TEXT,
|
|
115
|
+
tokens_injected INTEGER DEFAULT 0,
|
|
116
|
+
estimated_tokens_saved INTEGER DEFAULT 0,
|
|
117
|
+
context_relevance_score REAL DEFAULT 0,
|
|
118
|
+
exploration_calls_before_edit INTEGER DEFAULT 0,
|
|
119
|
+
time_to_first_edit_sec REAL,
|
|
120
|
+
context_hit_count INTEGER DEFAULT 0,
|
|
121
|
+
context_miss_count INTEGER DEFAULT 0,
|
|
122
|
+
injected_files TEXT,
|
|
123
|
+
first_edit_recorded INTEGER DEFAULT 0,
|
|
124
|
+
FOREIGN KEY (session_id) REFERENCES sessions(session_id)
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
CREATE INDEX IF NOT EXISTS idx_observations_session ON observations(session_id);
|
|
128
|
+
CREATE INDEX IF NOT EXISTS idx_observations_type ON observations(type);
|
|
129
|
+
CREATE INDEX IF NOT EXISTS idx_observations_importance ON observations(importance_score DESC);
|
|
130
|
+
CREATE INDEX IF NOT EXISTS idx_summaries_project ON summaries(project_path);
|
|
131
|
+
CREATE INDEX IF NOT EXISTS idx_summaries_level ON summaries(level);
|
|
132
|
+
CREATE INDEX IF NOT EXISTS idx_decisions_project ON decisions(project_path);
|
|
133
|
+
CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project_path);
|
|
134
|
+
CREATE INDEX IF NOT EXISTS idx_sessions_started ON sessions(started_at DESC);
|
|
135
|
+
CREATE INDEX IF NOT EXISTS idx_feedback_target ON feedback(target_type, target_id);
|
|
136
|
+
CREATE INDEX IF NOT EXISTS idx_feedback_project ON feedback(project_path);
|
|
137
|
+
CREATE INDEX IF NOT EXISTS idx_mcp_log_session ON mcp_access_log(session_id, ts DESC);
|
|
138
|
+
`);
|
|
139
|
+
|
|
140
|
+
// Migrate embeddings_cache to include `provider` column (legacy DBs were UNIQUE on
|
|
141
|
+
// text_hash alone, before provider partitioning). Add the column with default
|
|
142
|
+
// 'tfidf' so existing cached vectors are still attributable.
|
|
143
|
+
try {
|
|
144
|
+
const cols = db.prepare("PRAGMA table_info(embeddings_cache)").all() as Array<{ name: string }>;
|
|
145
|
+
const colNames = new Set(cols.map(c => c.name));
|
|
146
|
+
if (!colNames.has("provider")) {
|
|
147
|
+
db.exec(`ALTER TABLE embeddings_cache ADD COLUMN provider TEXT NOT NULL DEFAULT 'tfidf'`);
|
|
148
|
+
// Drop the legacy single-column UNIQUE index; the composite UNIQUE in the
|
|
149
|
+
// CREATE TABLE will not apply to the existing table, but a unique index
|
|
150
|
+
// on (text_hash, provider) keeps semantics intact going forward.
|
|
151
|
+
try { db.exec(`DROP INDEX IF EXISTS sqlite_autoindex_embeddings_cache_1`); } catch {}
|
|
152
|
+
db.exec(`CREATE UNIQUE INDEX IF NOT EXISTS idx_embeddings_cache_text_provider
|
|
153
|
+
ON embeddings_cache(text_hash, provider)`);
|
|
154
|
+
}
|
|
155
|
+
} catch {
|
|
156
|
+
// best-effort migration
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Migrate token_metrics if columns are missing (existing DBs)
|
|
160
|
+
try {
|
|
161
|
+
const cols = db.prepare("PRAGMA table_info(token_metrics)").all() as Array<{ name: string }>;
|
|
162
|
+
const colNames = new Set(cols.map(c => c.name));
|
|
163
|
+
if (!colNames.has("exploration_calls_before_edit")) {
|
|
164
|
+
db.exec(`ALTER TABLE token_metrics ADD COLUMN exploration_calls_before_edit INTEGER DEFAULT 0`);
|
|
165
|
+
db.exec(`ALTER TABLE token_metrics ADD COLUMN time_to_first_edit_sec REAL`);
|
|
166
|
+
db.exec(`ALTER TABLE token_metrics ADD COLUMN context_hit_count INTEGER DEFAULT 0`);
|
|
167
|
+
db.exec(`ALTER TABLE token_metrics ADD COLUMN context_miss_count INTEGER DEFAULT 0`);
|
|
168
|
+
db.exec(`ALTER TABLE token_metrics ADD COLUMN injected_files TEXT`);
|
|
169
|
+
db.exec(`ALTER TABLE token_metrics ADD COLUMN first_edit_recorded INTEGER DEFAULT 0`);
|
|
170
|
+
}
|
|
171
|
+
} catch {
|
|
172
|
+
// Migration already applied or table doesn't exist yet
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Collapse duplicate (session_id, level) summary rows produced before the
|
|
176
|
+
// unique index existed: keep the most recent, drop the rest. Required so
|
|
177
|
+
// CREATE UNIQUE INDEX below succeeds on legacy DBs.
|
|
178
|
+
try {
|
|
179
|
+
db.exec(`
|
|
180
|
+
DELETE FROM summaries WHERE id IN (
|
|
181
|
+
SELECT id FROM summaries s1
|
|
182
|
+
WHERE session_id IS NOT NULL
|
|
183
|
+
AND EXISTS (
|
|
184
|
+
SELECT 1 FROM summaries s2
|
|
185
|
+
WHERE s2.session_id = s1.session_id
|
|
186
|
+
AND s2.level = s1.level
|
|
187
|
+
AND (s2.created_at > s1.created_at OR (s2.created_at = s1.created_at AND s2.id > s1.id))
|
|
188
|
+
)
|
|
189
|
+
)
|
|
190
|
+
`);
|
|
191
|
+
} catch {
|
|
192
|
+
// table missing on first run, nothing to clean
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// One Level-1 (per-session) summary per session. Stop hook can fire multiple
|
|
196
|
+
// times (compaction, sub-agents) — without this, the same session shows up
|
|
197
|
+
// 3x in "RECENT SESSIONS". Digests (level 2/3) have NULL session_id and are
|
|
198
|
+
// not constrained by this partial index.
|
|
199
|
+
try {
|
|
200
|
+
db.exec(`
|
|
201
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_summaries_session_level
|
|
202
|
+
ON summaries(session_id, level) WHERE session_id IS NOT NULL
|
|
203
|
+
`);
|
|
204
|
+
} catch {
|
|
205
|
+
// index already exists or unsupported
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Normalize legacy pattern descriptions: older versions baked a per-session
|
|
209
|
+
// counter into the description ("Heavily uses Bash (32 times this session)",
|
|
210
|
+
// "Session focused on refactor (84/200 actions)"), which broke the upsert
|
|
211
|
+
// dedupe — every session inserted a fresh row. Strip the volatile tail so
|
|
212
|
+
// future upserts collapse correctly, then merge duplicates that already exist.
|
|
213
|
+
try {
|
|
214
|
+
cleanupLegacyPatterns(db);
|
|
215
|
+
} catch {
|
|
216
|
+
// best-effort migration
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Purge legacy "decisions" that aren't actually decisions. Older versions
|
|
220
|
+
// promoted Agent spawns / tool-prefixed observations to architectural
|
|
221
|
+
// decisions whenever a stray keyword matched. New code blocks this at the
|
|
222
|
+
// source; this cleans up rows already persisted.
|
|
223
|
+
try {
|
|
224
|
+
db.exec(`
|
|
225
|
+
DELETE FROM decisions WHERE
|
|
226
|
+
decision_text LIKE 'Spawned agent:%' OR
|
|
227
|
+
decision_text LIKE 'Wrote %' OR
|
|
228
|
+
decision_text LIKE 'mcp__%' OR
|
|
229
|
+
decision_text LIKE 'WebFetch %' OR
|
|
230
|
+
decision_text LIKE 'WebSearch:%' OR
|
|
231
|
+
decision_text LIKE 'Notebook %'
|
|
232
|
+
`);
|
|
233
|
+
} catch {
|
|
234
|
+
// best-effort
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Collapse duplicate (project_path, title) decision rows before creating the
|
|
238
|
+
// unique index. Older versions of the extractor inserted the same dep
|
|
239
|
+
// decision on every session — keep the earliest row, drop the rest.
|
|
240
|
+
try {
|
|
241
|
+
db.exec(`
|
|
242
|
+
DELETE FROM decisions WHERE id NOT IN (
|
|
243
|
+
SELECT MIN(id) FROM decisions
|
|
244
|
+
GROUP BY COALESCE(project_path, ''), title
|
|
245
|
+
)
|
|
246
|
+
`);
|
|
247
|
+
} catch {
|
|
248
|
+
// table missing on first run
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Once duplicates are gone, enforce the constraint going forward. Combined
|
|
252
|
+
// with INSERT OR IGNORE in decisions.ts this is what makes Bug 3
|
|
253
|
+
// (decisions duplicated across sessions) actually go away.
|
|
254
|
+
try {
|
|
255
|
+
db.exec(`
|
|
256
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_decisions_project_title
|
|
257
|
+
ON decisions(COALESCE(project_path, ''), title)
|
|
258
|
+
`);
|
|
259
|
+
} catch {
|
|
260
|
+
// index already exists or unsupported expression-index syntax
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// FTS5 virtual tables for full-text search
|
|
264
|
+
try {
|
|
265
|
+
db.exec(`
|
|
266
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS observations_fts USING fts5(
|
|
267
|
+
summary,
|
|
268
|
+
content='observations',
|
|
269
|
+
content_rowid='id'
|
|
270
|
+
);
|
|
271
|
+
|
|
272
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS decisions_fts USING fts5(
|
|
273
|
+
title,
|
|
274
|
+
decision_text,
|
|
275
|
+
content='decisions',
|
|
276
|
+
content_rowid='id'
|
|
277
|
+
);
|
|
278
|
+
`);
|
|
279
|
+
} catch {
|
|
280
|
+
// FTS5 tables may already exist
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Triggers to keep FTS in sync
|
|
284
|
+
try {
|
|
285
|
+
db.exec(`
|
|
286
|
+
CREATE TRIGGER IF NOT EXISTS observations_ai AFTER INSERT ON observations BEGIN
|
|
287
|
+
INSERT INTO observations_fts(rowid, summary) VALUES (new.id, new.summary);
|
|
288
|
+
END;
|
|
289
|
+
|
|
290
|
+
CREATE TRIGGER IF NOT EXISTS decisions_ai AFTER INSERT ON decisions BEGIN
|
|
291
|
+
INSERT INTO decisions_fts(rowid, title, decision_text) VALUES (new.id, new.title, new.decision_text);
|
|
292
|
+
END;
|
|
293
|
+
`);
|
|
294
|
+
} catch {
|
|
295
|
+
// Triggers may already exist
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
function cleanupLegacyPatterns(db: Database) {
|
|
300
|
+
const VOLATILE_TAIL = / \((?:\d+ times this session|\d+\/\d+ actions)\)\s*$/;
|
|
301
|
+
const rows = db.prepare(
|
|
302
|
+
`SELECT id, project_path, pattern_type, description, frequency, last_seen, examples FROM patterns`
|
|
303
|
+
).all() as Array<{
|
|
304
|
+
id: number;
|
|
305
|
+
project_path: string | null;
|
|
306
|
+
pattern_type: string;
|
|
307
|
+
description: string;
|
|
308
|
+
frequency: number;
|
|
309
|
+
last_seen: string | null;
|
|
310
|
+
examples: string | null;
|
|
311
|
+
}>;
|
|
312
|
+
|
|
313
|
+
type Group = { keepId: number; freq: number; idsToDelete: number[]; latestExamples: string | null; latestSeen: string | null };
|
|
314
|
+
const groups = new Map<string, Group>();
|
|
315
|
+
|
|
316
|
+
for (const r of rows) {
|
|
317
|
+
const cleaned = r.description.replace(VOLATILE_TAIL, "").trim();
|
|
318
|
+
if (cleaned !== r.description) {
|
|
319
|
+
db.prepare(`UPDATE patterns SET description = ? WHERE id = ?`).run(cleaned, r.id);
|
|
320
|
+
r.description = cleaned;
|
|
321
|
+
}
|
|
322
|
+
const key = `${r.project_path ?? ""}${r.pattern_type}${r.description}`;
|
|
323
|
+
const existing = groups.get(key);
|
|
324
|
+
if (!existing) {
|
|
325
|
+
groups.set(key, {
|
|
326
|
+
keepId: r.id,
|
|
327
|
+
freq: r.frequency,
|
|
328
|
+
idsToDelete: [],
|
|
329
|
+
latestExamples: r.examples,
|
|
330
|
+
latestSeen: r.last_seen,
|
|
331
|
+
});
|
|
332
|
+
} else {
|
|
333
|
+
existing.freq += r.frequency;
|
|
334
|
+
if ((r.last_seen || "") > (existing.latestSeen || "")) {
|
|
335
|
+
existing.latestSeen = r.last_seen;
|
|
336
|
+
existing.latestExamples = r.examples;
|
|
337
|
+
}
|
|
338
|
+
existing.idsToDelete.push(r.id);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
for (const g of groups.values()) {
|
|
343
|
+
if (g.idsToDelete.length === 0) continue;
|
|
344
|
+
db.prepare(
|
|
345
|
+
`UPDATE patterns SET frequency = ?, last_seen = COALESCE(?, last_seen), examples = COALESCE(?, examples) WHERE id = ?`
|
|
346
|
+
).run(g.freq, g.latestSeen, g.latestExamples, g.keepId);
|
|
347
|
+
const placeholders = g.idsToDelete.map(() => "?").join(",");
|
|
348
|
+
db.prepare(`DELETE FROM patterns WHERE id IN (${placeholders})`).run(...g.idsToDelete);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
export function closeDb() {
|
|
353
|
+
if (_db) {
|
|
354
|
+
_db.close();
|
|
355
|
+
_db = null;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
// Run directly to initialize
|
|
360
|
+
if (import.meta.main) {
|
|
361
|
+
const db = getDb();
|
|
362
|
+
console.log("Database initialized at", DB_PATH);
|
|
363
|
+
const tables = db.prepare("SELECT name FROM sqlite_master WHERE type='table'").all() as Array<{ name: string }>;
|
|
364
|
+
console.log(" Tables:", tables.map(r => r.name).join(", "));
|
|
365
|
+
closeDb();
|
|
366
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { getDb } from "./schema";
|
|
2
|
+
|
|
3
|
+
export interface Session {
|
|
4
|
+
id?: number;
|
|
5
|
+
session_id: string;
|
|
6
|
+
project_path?: string;
|
|
7
|
+
git_repo?: string;
|
|
8
|
+
git_branch?: string;
|
|
9
|
+
started_at?: string;
|
|
10
|
+
ended_at?: string;
|
|
11
|
+
total_observations?: number;
|
|
12
|
+
tokens_injected?: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function createSession(session: Session): void {
|
|
16
|
+
const db = getDb();
|
|
17
|
+
db.prepare(`
|
|
18
|
+
INSERT OR IGNORE INTO sessions (session_id, project_path, git_repo, git_branch)
|
|
19
|
+
VALUES (?, ?, ?, ?)
|
|
20
|
+
`).run(
|
|
21
|
+
session.session_id,
|
|
22
|
+
session.project_path || null,
|
|
23
|
+
session.git_repo || null,
|
|
24
|
+
session.git_branch || null
|
|
25
|
+
);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function endSession(sessionId: string, totalObs: number) {
|
|
29
|
+
const db = getDb();
|
|
30
|
+
db.prepare(`
|
|
31
|
+
UPDATE sessions SET ended_at = datetime('now'), total_observations = ? WHERE session_id = ?
|
|
32
|
+
`).run(totalObs, sessionId);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function getRecentSessions(projectPath: string, limit = 3): Session[] {
|
|
36
|
+
const db = getDb();
|
|
37
|
+
return db.prepare(`
|
|
38
|
+
SELECT * FROM sessions WHERE project_path = ? ORDER BY started_at DESC LIMIT ?
|
|
39
|
+
`).all(projectPath, limit) as Session[];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function getSessionById(sessionId: string): Session | undefined {
|
|
43
|
+
const db = getDb();
|
|
44
|
+
return db.prepare(`SELECT * FROM sessions WHERE session_id = ?`).get(sessionId) as Session | undefined;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function getAllSessions(limit = 50): Session[] {
|
|
48
|
+
const db = getDb();
|
|
49
|
+
return db.prepare(`SELECT * FROM sessions ORDER BY started_at DESC LIMIT ?`).all(limit) as Session[];
|
|
50
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { getDb } from "./schema";
|
|
2
|
+
|
|
3
|
+
export interface Summary {
|
|
4
|
+
id?: number;
|
|
5
|
+
session_id?: string;
|
|
6
|
+
level: number;
|
|
7
|
+
text: string;
|
|
8
|
+
embedding_id?: string;
|
|
9
|
+
project_path?: string;
|
|
10
|
+
created_at?: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function insertSummary(summary: Summary): void {
|
|
14
|
+
const db = getDb();
|
|
15
|
+
// For per-session summaries, replace any existing (session_id, level) row so a
|
|
16
|
+
// Stop hook that fires more than once doesn't duplicate. SQLite doesn't
|
|
17
|
+
// accept ON CONFLICT against the partial unique index used here, so we
|
|
18
|
+
// do the delete + insert explicitly inside a transaction. Digests have a
|
|
19
|
+
// null session_id and just append.
|
|
20
|
+
if (summary.session_id) {
|
|
21
|
+
const tx = db.transaction((s: Summary) => {
|
|
22
|
+
db.prepare(`DELETE FROM summaries WHERE session_id = ? AND level = ?`).run(
|
|
23
|
+
s.session_id!,
|
|
24
|
+
s.level
|
|
25
|
+
);
|
|
26
|
+
db.prepare(`
|
|
27
|
+
INSERT INTO summaries (session_id, level, text, embedding_id, project_path)
|
|
28
|
+
VALUES (?, ?, ?, ?, ?)
|
|
29
|
+
`).run(
|
|
30
|
+
s.session_id!,
|
|
31
|
+
s.level,
|
|
32
|
+
s.text,
|
|
33
|
+
s.embedding_id || null,
|
|
34
|
+
s.project_path || null
|
|
35
|
+
);
|
|
36
|
+
});
|
|
37
|
+
tx(summary);
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
db.prepare(`
|
|
41
|
+
INSERT INTO summaries (session_id, level, text, embedding_id, project_path)
|
|
42
|
+
VALUES (?, ?, ?, ?, ?)
|
|
43
|
+
`).run(
|
|
44
|
+
null,
|
|
45
|
+
summary.level,
|
|
46
|
+
summary.text,
|
|
47
|
+
summary.embedding_id || null,
|
|
48
|
+
summary.project_path || null
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export function getRecentSummaries(projectPath: string, level = 1, limit = 3): Summary[] {
|
|
53
|
+
const db = getDb();
|
|
54
|
+
return db.prepare(`
|
|
55
|
+
SELECT * FROM summaries
|
|
56
|
+
WHERE project_path = ? AND level = ?
|
|
57
|
+
ORDER BY created_at DESC
|
|
58
|
+
LIMIT ?
|
|
59
|
+
`).all(projectPath, level, limit) as Summary[];
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function getAllSummariesForDigest(projectPath: string, since: string): Summary[] {
|
|
63
|
+
const db = getDb();
|
|
64
|
+
return db.prepare(`
|
|
65
|
+
SELECT * FROM summaries
|
|
66
|
+
WHERE project_path = ? AND level = 1 AND created_at >= ?
|
|
67
|
+
ORDER BY created_at ASC
|
|
68
|
+
`).all(projectPath, since) as Summary[];
|
|
69
|
+
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
// LanceDB vector database client for semantic search.
|
|
2
|
+
//
|
|
3
|
+
// The table holds vectors at whatever dimension the active embedding provider
|
|
4
|
+
// emits. If a query vector arrives with a mismatched dim (e.g. user just
|
|
5
|
+
// switched providers without re-embedding), searchSimilar returns [] rather
|
|
6
|
+
// than throwing — the CLI's `embeddings switch` command rebuilds the table.
|
|
7
|
+
|
|
8
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
9
|
+
import { join } from "path";
|
|
10
|
+
import { mkdirSync } from "fs";
|
|
11
|
+
import { getActiveDim } from "./embeddings";
|
|
12
|
+
|
|
13
|
+
const DATA_DIR = process.env.AUTOCTXD_VECTOR_DIR
|
|
14
|
+
|| (process.env.AUTOCTXD_DATA_DIR ? join(process.env.AUTOCTXD_DATA_DIR, "vector") : join(import.meta.dir, "..", "..", "..", "data", "vector"));
|
|
15
|
+
const TABLE_NAME = "summaries";
|
|
16
|
+
|
|
17
|
+
let _db: any = null;
|
|
18
|
+
let _table: any = null;
|
|
19
|
+
let _tableDim: number | null = null;
|
|
20
|
+
|
|
21
|
+
export async function getVectorDb() {
|
|
22
|
+
if (_db) return _db;
|
|
23
|
+
mkdirSync(DATA_DIR, { recursive: true });
|
|
24
|
+
_db = await lancedb.connect(DATA_DIR);
|
|
25
|
+
return _db;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async function detectTableDim(table: any): Promise<number | null> {
|
|
29
|
+
try {
|
|
30
|
+
const sample = await table.query().limit(1).toArray();
|
|
31
|
+
const v = sample?.[0]?.vector;
|
|
32
|
+
if (Array.isArray(v)) return v.length;
|
|
33
|
+
if (v && typeof v.length === "number") return v.length;
|
|
34
|
+
} catch {
|
|
35
|
+
// ignore
|
|
36
|
+
}
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export async function getOrCreateTable() {
|
|
41
|
+
const expectedDim = getActiveDim();
|
|
42
|
+
if (_table && _tableDim === expectedDim) return _table;
|
|
43
|
+
|
|
44
|
+
const db = await getVectorDb();
|
|
45
|
+
|
|
46
|
+
try {
|
|
47
|
+
_table = await db.openTable(TABLE_NAME);
|
|
48
|
+
_tableDim = await detectTableDim(_table);
|
|
49
|
+
// If existing table was built for a different dim, signal to caller via
|
|
50
|
+
// _tableDim so dropTable() can be invoked from the CLI migration path.
|
|
51
|
+
} catch {
|
|
52
|
+
_table = await db.createTable(TABLE_NAME, [
|
|
53
|
+
{
|
|
54
|
+
id: "__init__",
|
|
55
|
+
session_id: "",
|
|
56
|
+
project_path: "",
|
|
57
|
+
text: "",
|
|
58
|
+
level: 0,
|
|
59
|
+
created_at: "",
|
|
60
|
+
vector: new Array(expectedDim).fill(0),
|
|
61
|
+
},
|
|
62
|
+
]);
|
|
63
|
+
_tableDim = expectedDim;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return _table;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export async function dropTable(): Promise<void> {
|
|
70
|
+
const db = await getVectorDb();
|
|
71
|
+
try {
|
|
72
|
+
await db.dropTable(TABLE_NAME);
|
|
73
|
+
} catch {
|
|
74
|
+
// table may not exist
|
|
75
|
+
}
|
|
76
|
+
_table = null;
|
|
77
|
+
_tableDim = null;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export function getCurrentTableDim(): number | null {
|
|
81
|
+
return _tableDim;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export interface VectorRecord {
|
|
85
|
+
id: string;
|
|
86
|
+
session_id: string;
|
|
87
|
+
project_path: string;
|
|
88
|
+
text: string;
|
|
89
|
+
level: number;
|
|
90
|
+
created_at: string;
|
|
91
|
+
vector: number[];
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export async function addVector(record: VectorRecord): Promise<void> {
|
|
95
|
+
const table = await getOrCreateTable();
|
|
96
|
+
await table.add([record]);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export async function searchSimilar(
|
|
100
|
+
queryVector: number[],
|
|
101
|
+
limit = 5,
|
|
102
|
+
projectFilter?: string
|
|
103
|
+
): Promise<VectorRecord[]> {
|
|
104
|
+
const table = await getOrCreateTable();
|
|
105
|
+
|
|
106
|
+
let query = table.search(queryVector).limit(limit);
|
|
107
|
+
|
|
108
|
+
if (projectFilter) {
|
|
109
|
+
query = query.where(`project_path = '${projectFilter.replace(/'/g, "''")}'`);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
try {
|
|
113
|
+
const results = await query.toArray();
|
|
114
|
+
return results
|
|
115
|
+
.filter((r: any) => r.id !== "__init__")
|
|
116
|
+
.map((r: any) => ({
|
|
117
|
+
id: r.id,
|
|
118
|
+
session_id: r.session_id,
|
|
119
|
+
project_path: r.project_path,
|
|
120
|
+
text: r.text,
|
|
121
|
+
level: r.level,
|
|
122
|
+
created_at: r.created_at,
|
|
123
|
+
vector: r.vector,
|
|
124
|
+
_distance: r._distance,
|
|
125
|
+
}));
|
|
126
|
+
} catch {
|
|
127
|
+
return [];
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
export async function closeVectorDb(): Promise<void> {
|
|
132
|
+
_table = null;
|
|
133
|
+
_db = null;
|
|
134
|
+
}
|