@vivantel/virage-core 0.2.17 → 0.2.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config-loader.d.ts.map +1 -1
- package/dist/config-loader.js +1 -0
- package/dist/config-loader.js.map +1 -1
- package/dist/core/embedder.d.ts +4 -4
- package/dist/core/embedder.d.ts.map +1 -1
- package/dist/core/embedder.js.map +1 -1
- package/dist/core/orchestrator.d.ts +2 -0
- package/dist/core/orchestrator.d.ts.map +1 -1
- package/dist/core/orchestrator.js +5 -7
- package/dist/core/orchestrator.js.map +1 -1
- package/dist/core/strategy-registry.d.ts +1 -1
- package/dist/core/strategy-registry.d.ts.map +1 -1
- package/dist/core/strategy-registry.js +17 -0
- package/dist/core/strategy-registry.js.map +1 -1
- package/dist/core/telemetry.d.ts +4 -3
- package/dist/core/telemetry.d.ts.map +1 -1
- package/dist/core/telemetry.js +3 -6
- package/dist/core/telemetry.js.map +1 -1
- package/dist/core/uploader.d.ts +6 -6
- package/dist/core/uploader.d.ts.map +1 -1
- package/dist/core/uploader.js.map +1 -1
- package/dist/core/{embeddings-db.d.ts → virage-db.d.ts} +34 -2
- package/dist/core/virage-db.d.ts.map +1 -0
- package/dist/core/virage-db.js +722 -0
- package/dist/core/virage-db.js.map +1 -0
- package/dist/core/virage-defaults.d.ts +1 -1
- package/dist/core/virage-defaults.d.ts.map +1 -1
- package/dist/core/virage-defaults.js +2 -2
- package/dist/core/virage-defaults.js.map +1 -1
- package/dist/eval/experiment-store.d.ts +4 -3
- package/dist/eval/experiment-store.d.ts.map +1 -1
- package/dist/eval/experiment-store.js +14 -54
- package/dist/eval/experiment-store.js.map +1 -1
- package/dist/index.d.ts +6 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -2
- package/dist/index.js.map +1 -1
- package/dist/interfaces/embedder.d.ts +1 -1
- package/dist/interfaces/embedder.d.ts.map +1 -1
- package/dist/telemetry/flusher.d.ts +58 -0
- package/dist/telemetry/flusher.d.ts.map +1 -0
- package/dist/telemetry/flusher.js +173 -0
- package/dist/telemetry/flusher.js.map +1 -0
- package/dist/telemetry/index.d.ts +7 -0
- package/dist/telemetry/index.d.ts.map +1 -0
- package/dist/telemetry/index.js +5 -0
- package/dist/telemetry/index.js.map +1 -0
- package/dist/telemetry/manager.d.ts +13 -0
- package/dist/telemetry/manager.d.ts.map +1 -0
- package/dist/telemetry/manager.js +36 -0
- package/dist/telemetry/manager.js.map +1 -0
- package/dist/telemetry/session.d.ts +30 -0
- package/dist/telemetry/session.d.ts.map +1 -0
- package/dist/telemetry/session.js +158 -0
- package/dist/telemetry/session.js.map +1 -0
- package/dist/telemetry/types.d.ts +97 -0
- package/dist/telemetry/types.d.ts.map +1 -0
- package/dist/telemetry/types.js +46 -0
- package/dist/telemetry/types.js.map +1 -0
- package/package.json +1 -1
- package/dist/core/embeddings-db.d.ts.map +0 -1
- package/dist/core/embeddings-db.js +0 -327
- package/dist/core/embeddings-db.js.map +0 -1
|
@@ -0,0 +1,722 @@
|
|
|
1
|
+
import { createHash } from "crypto";
|
|
2
|
+
import { existsSync, mkdirSync, readFileSync, renameSync, readdirSync, } from "fs";
|
|
3
|
+
import { dirname, join } from "path";
|
|
4
|
+
import { rename } from "fs/promises";
|
|
5
|
+
import Database from "better-sqlite3";
|
|
6
|
+
function computeContentHash(content) {
|
|
7
|
+
return createHash("sha256").update(content).digest("hex").slice(0, 16);
|
|
8
|
+
}
|
|
9
|
+
function chunkContentHash(chunk) {
|
|
10
|
+
return chunk.contentHash ?? computeContentHash(chunk.content);
|
|
11
|
+
}
|
|
12
|
+
function embeddingToBlob(embedding) {
|
|
13
|
+
return Buffer.from(new Float32Array(embedding).buffer);
|
|
14
|
+
}
|
|
15
|
+
function blobToEmbedding(blob) {
|
|
16
|
+
return Array.from(new Float32Array(blob.buffer, blob.byteOffset, blob.byteLength / 4));
|
|
17
|
+
}
|
|
18
|
+
function parseMetadata(json) {
|
|
19
|
+
try {
|
|
20
|
+
return JSON.parse(json);
|
|
21
|
+
}
|
|
22
|
+
catch {
|
|
23
|
+
return {};
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
const META_DDL = `
|
|
27
|
+
CREATE TABLE IF NOT EXISTS meta (
|
|
28
|
+
key TEXT PRIMARY KEY,
|
|
29
|
+
value TEXT NOT NULL
|
|
30
|
+
);
|
|
31
|
+
`;
|
|
32
|
+
// embedding: raw IEEE-754 float32 little-endian bytes (~4× smaller than JSON).
|
|
33
|
+
// NULL until embedded; cleared after upload to reclaim storage.
|
|
34
|
+
const CHUNKS_DDL = `
|
|
35
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
36
|
+
content_hash TEXT PRIMARY KEY,
|
|
37
|
+
source_file TEXT NOT NULL,
|
|
38
|
+
commit_hash TEXT NOT NULL,
|
|
39
|
+
content TEXT NOT NULL,
|
|
40
|
+
metadata_json TEXT NOT NULL,
|
|
41
|
+
embedding BLOB,
|
|
42
|
+
embedded_at INTEGER,
|
|
43
|
+
uploaded INTEGER NOT NULL DEFAULT 0
|
|
44
|
+
) STRICT;
|
|
45
|
+
CREATE INDEX IF NOT EXISTS idx_source_file ON chunks(source_file);
|
|
46
|
+
`;
|
|
47
|
+
const EXPERIMENT_RUNS_DDL = `
|
|
48
|
+
CREATE TABLE IF NOT EXISTS experiment_runs (
|
|
49
|
+
id TEXT PRIMARY KEY,
|
|
50
|
+
name TEXT NOT NULL,
|
|
51
|
+
timestamp TEXT NOT NULL,
|
|
52
|
+
config_json TEXT NOT NULL,
|
|
53
|
+
eval_result_json TEXT NOT NULL,
|
|
54
|
+
ragas_result_json TEXT,
|
|
55
|
+
per_query_rr_scores_json TEXT
|
|
56
|
+
) STRICT;
|
|
57
|
+
CREATE INDEX IF NOT EXISTS idx_exp_name ON experiment_runs(name);
|
|
58
|
+
CREATE INDEX IF NOT EXISTS idx_exp_ts ON experiment_runs(timestamp);
|
|
59
|
+
`;
|
|
60
|
+
const EVAL_DATASETS_DDL = `
|
|
61
|
+
CREATE TABLE IF NOT EXISTS eval_datasets (
|
|
62
|
+
slot TEXT PRIMARY KEY,
|
|
63
|
+
version TEXT,
|
|
64
|
+
queries_json TEXT NOT NULL,
|
|
65
|
+
saved_at TEXT NOT NULL
|
|
66
|
+
) STRICT;
|
|
67
|
+
`;
|
|
68
|
+
const PIPELINE_RUNS_DDL = `
|
|
69
|
+
CREATE TABLE IF NOT EXISTS pipeline_runs (
|
|
70
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
71
|
+
run_at TEXT NOT NULL,
|
|
72
|
+
duration_ms INTEGER NOT NULL,
|
|
73
|
+
stages_json TEXT NOT NULL
|
|
74
|
+
) STRICT;
|
|
75
|
+
CREATE INDEX IF NOT EXISTS idx_pipeline_run_at ON pipeline_runs(run_at);
|
|
76
|
+
`;
|
|
77
|
+
const TELEMETRY_DDL = `
|
|
78
|
+
CREATE TABLE IF NOT EXISTS telemetry_sessions (
|
|
79
|
+
id TEXT PRIMARY KEY,
|
|
80
|
+
started_at TEXT NOT NULL,
|
|
81
|
+
ended_at TEXT,
|
|
82
|
+
embedding_model TEXT,
|
|
83
|
+
chunking_strategy TEXT,
|
|
84
|
+
store_type TEXT,
|
|
85
|
+
node_version TEXT NOT NULL,
|
|
86
|
+
os TEXT NOT NULL,
|
|
87
|
+
total_searches INTEGER NOT NULL DEFAULT 0,
|
|
88
|
+
total_tool_calls INTEGER NOT NULL DEFAULT 0,
|
|
89
|
+
tools_used_json TEXT NOT NULL DEFAULT '[]',
|
|
90
|
+
flushed INTEGER NOT NULL DEFAULT 0
|
|
91
|
+
) STRICT;
|
|
92
|
+
|
|
93
|
+
CREATE TABLE IF NOT EXISTS telemetry_searches (
|
|
94
|
+
id TEXT PRIMARY KEY,
|
|
95
|
+
session_id TEXT NOT NULL,
|
|
96
|
+
occurred_at TEXT NOT NULL,
|
|
97
|
+
result_count INTEGER NOT NULL,
|
|
98
|
+
result_count_bucket TEXT NOT NULL,
|
|
99
|
+
empty INTEGER NOT NULL,
|
|
100
|
+
query_hash TEXT,
|
|
101
|
+
redundancy_detected INTEGER NOT NULL DEFAULT 0,
|
|
102
|
+
flushed INTEGER NOT NULL DEFAULT 0
|
|
103
|
+
) STRICT;
|
|
104
|
+
CREATE INDEX IF NOT EXISTS idx_tel_searches_session ON telemetry_searches(session_id);
|
|
105
|
+
|
|
106
|
+
CREATE TABLE IF NOT EXISTS telemetry_latency (
|
|
107
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
108
|
+
session_id TEXT NOT NULL,
|
|
109
|
+
occurred_at TEXT NOT NULL,
|
|
110
|
+
phase TEXT NOT NULL,
|
|
111
|
+
duration_ms INTEGER NOT NULL,
|
|
112
|
+
flushed INTEGER NOT NULL DEFAULT 0
|
|
113
|
+
) STRICT;
|
|
114
|
+
CREATE INDEX IF NOT EXISTS idx_tel_latency_session ON telemetry_latency(session_id);
|
|
115
|
+
|
|
116
|
+
CREATE TABLE IF NOT EXISTS telemetry_errors (
|
|
117
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
118
|
+
session_id TEXT NOT NULL,
|
|
119
|
+
occurred_at TEXT NOT NULL,
|
|
120
|
+
error_type TEXT NOT NULL,
|
|
121
|
+
retry_count INTEGER NOT NULL DEFAULT 0,
|
|
122
|
+
recovered INTEGER NOT NULL DEFAULT 0,
|
|
123
|
+
flushed INTEGER NOT NULL DEFAULT 0
|
|
124
|
+
) STRICT;
|
|
125
|
+
|
|
126
|
+
CREATE TABLE IF NOT EXISTS telemetry_feedback (
|
|
127
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
128
|
+
session_id TEXT NOT NULL,
|
|
129
|
+
search_id TEXT NOT NULL,
|
|
130
|
+
occurred_at TEXT NOT NULL,
|
|
131
|
+
was_useful INTEGER NOT NULL,
|
|
132
|
+
context_relevance REAL,
|
|
133
|
+
context_completeness REAL,
|
|
134
|
+
noise_ratio REAL,
|
|
135
|
+
missing_category TEXT,
|
|
136
|
+
flushed INTEGER NOT NULL DEFAULT 0
|
|
137
|
+
) STRICT;
|
|
138
|
+
|
|
139
|
+
CREATE TABLE IF NOT EXISTS telemetry_cache_stats (
|
|
140
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
141
|
+
session_id TEXT NOT NULL,
|
|
142
|
+
recorded_at TEXT NOT NULL,
|
|
143
|
+
file_hit_rate REAL,
|
|
144
|
+
semantic_hit_rate REAL,
|
|
145
|
+
flushed INTEGER NOT NULL DEFAULT 0
|
|
146
|
+
) STRICT;
|
|
147
|
+
`;
|
|
148
|
+
export class VirageDb {
|
|
149
|
+
db;
|
|
150
|
+
constructor(dbPath) {
|
|
151
|
+
mkdirSync(dirname(dbPath), { recursive: true });
|
|
152
|
+
// Backward-compat: if virage.db doesn't exist but embeddings.db does, rename it.
|
|
153
|
+
const legacyPath = dbPath.replace(/virage\.db$/, "embeddings.db");
|
|
154
|
+
if (!existsSync(dbPath) && existsSync(legacyPath)) {
|
|
155
|
+
renameSync(legacyPath, dbPath);
|
|
156
|
+
}
|
|
157
|
+
this.db = new Database(dbPath);
|
|
158
|
+
this.db.pragma("journal_mode = WAL");
|
|
159
|
+
this.db.exec(META_DDL);
|
|
160
|
+
const hasOldTable = this.db
|
|
161
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='embeddings'")
|
|
162
|
+
.get();
|
|
163
|
+
if (hasOldTable) {
|
|
164
|
+
this.migrateFromEmbeddingsTable();
|
|
165
|
+
}
|
|
166
|
+
this.db.exec(CHUNKS_DDL);
|
|
167
|
+
this.db.exec(EXPERIMENT_RUNS_DDL);
|
|
168
|
+
this.db.exec(EVAL_DATASETS_DDL);
|
|
169
|
+
this.db.exec(PIPELINE_RUNS_DDL);
|
|
170
|
+
this.db.exec(TELEMETRY_DDL);
|
|
171
|
+
const jsonPath = dbPath.replace(/\.db$/, ".json");
|
|
172
|
+
if (this.isEmpty() && existsSync(jsonPath)) {
|
|
173
|
+
this.migrateFromJson(jsonPath);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
isEmpty() {
|
|
177
|
+
const row = this.db.prepare("SELECT COUNT(*) as cnt FROM chunks").get();
|
|
178
|
+
return row.cnt === 0;
|
|
179
|
+
}
|
|
180
|
+
migrateFromEmbeddingsTable() {
|
|
181
|
+
const rows = this.db.prepare("SELECT * FROM embeddings").all();
|
|
182
|
+
const migrate = this.db.transaction(() => {
|
|
183
|
+
this.db
|
|
184
|
+
.prepare(`
|
|
185
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
186
|
+
content_hash TEXT PRIMARY KEY,
|
|
187
|
+
source_file TEXT NOT NULL,
|
|
188
|
+
commit_hash TEXT NOT NULL,
|
|
189
|
+
content TEXT NOT NULL,
|
|
190
|
+
metadata_json TEXT NOT NULL,
|
|
191
|
+
embedding BLOB,
|
|
192
|
+
embedded_at INTEGER,
|
|
193
|
+
uploaded INTEGER NOT NULL DEFAULT 0
|
|
194
|
+
) STRICT
|
|
195
|
+
`)
|
|
196
|
+
.run();
|
|
197
|
+
const stmt = this.db.prepare(`
|
|
198
|
+
INSERT OR IGNORE INTO chunks
|
|
199
|
+
(content_hash, source_file, commit_hash, content, metadata_json, embedding, embedded_at, uploaded)
|
|
200
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
201
|
+
`);
|
|
202
|
+
for (const row of rows) {
|
|
203
|
+
let blob = null;
|
|
204
|
+
if (row.embedding_json) {
|
|
205
|
+
try {
|
|
206
|
+
blob = embeddingToBlob(JSON.parse(row.embedding_json));
|
|
207
|
+
}
|
|
208
|
+
catch {
|
|
209
|
+
/* skip rows with invalid embedding JSON */
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
stmt.run(row.content_hash, row.source_file, row.commit_hash, row.content, row.metadata_json, blob, row.embedded_at != null ? Math.floor(row.embedded_at) : null, row.uploaded);
|
|
213
|
+
}
|
|
214
|
+
this.db.prepare("DROP TABLE embeddings").run();
|
|
215
|
+
this.db.prepare("DROP INDEX IF EXISTS idx_source_file").run();
|
|
216
|
+
});
|
|
217
|
+
migrate();
|
|
218
|
+
}
|
|
219
|
+
getMeta() {
|
|
220
|
+
const row = this.db
|
|
221
|
+
.prepare("SELECT value FROM meta WHERE key = 'meta'")
|
|
222
|
+
.get();
|
|
223
|
+
if (!row)
|
|
224
|
+
return null;
|
|
225
|
+
try {
|
|
226
|
+
return JSON.parse(row.value);
|
|
227
|
+
}
|
|
228
|
+
catch {
|
|
229
|
+
return null;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
setMeta(meta) {
|
|
233
|
+
this.db
|
|
234
|
+
.prepare("INSERT OR REPLACE INTO meta (key, value) VALUES ('meta', ?)")
|
|
235
|
+
.run(JSON.stringify(meta));
|
|
236
|
+
}
|
|
237
|
+
// ── Streaming pipeline methods ─────────────────────────────────────────────
|
|
238
|
+
/** Insert chunk metadata only (no embedding yet). Safe to call inside a transaction. */
|
|
239
|
+
insertChunk(chunk) {
|
|
240
|
+
this.db
|
|
241
|
+
.prepare(`INSERT OR IGNORE INTO chunks
|
|
242
|
+
(content_hash, source_file, commit_hash, content, metadata_json, embedding, embedded_at, uploaded)
|
|
243
|
+
VALUES (?, ?, ?, ?, ?, NULL, NULL, 0)`)
|
|
244
|
+
.run(chunkContentHash(chunk), chunk.sourceFile, chunk.commitHash, chunk.content, JSON.stringify(chunk.metadata));
|
|
245
|
+
}
|
|
246
|
+
/** Batch-insert chunk metadata in a single transaction. */
|
|
247
|
+
insertChunks(chunks) {
|
|
248
|
+
const stmt = this.db.prepare(`INSERT OR IGNORE INTO chunks
|
|
249
|
+
(content_hash, source_file, commit_hash, content, metadata_json, embedding, embedded_at, uploaded)
|
|
250
|
+
VALUES (?, ?, ?, ?, ?, NULL, NULL, 0)`);
|
|
251
|
+
const insertAll = this.db.transaction((items) => {
|
|
252
|
+
for (const chunk of items) {
|
|
253
|
+
stmt.run(chunkContentHash(chunk), chunk.sourceFile, chunk.commitHash, chunk.content, JSON.stringify(chunk.metadata));
|
|
254
|
+
}
|
|
255
|
+
});
|
|
256
|
+
insertAll(chunks);
|
|
257
|
+
}
|
|
258
|
+
/** Store the embedding for a previously inserted chunk. */
|
|
259
|
+
updateEmbedding(contentHash, embedding, embeddedAt) {
|
|
260
|
+
this.db
|
|
261
|
+
.prepare("UPDATE chunks SET embedding = ?, embedded_at = ? WHERE content_hash = ?")
|
|
262
|
+
.run(embeddingToBlob(embedding), Math.floor(embeddedAt), contentHash);
|
|
263
|
+
}
|
|
264
|
+
/** Clear embedding data after upload to reclaim storage. */
|
|
265
|
+
clearEmbedding(contentHash) {
|
|
266
|
+
this.db
|
|
267
|
+
.prepare("UPDATE chunks SET embedding = NULL, embedded_at = NULL WHERE content_hash = ?")
|
|
268
|
+
.run(contentHash);
|
|
269
|
+
}
|
|
270
|
+
/** Delete all chunks for a source file (called before re-chunking a changed file). */
|
|
271
|
+
deleteBySourceFile(sourceFile) {
|
|
272
|
+
this.db.prepare("DELETE FROM chunks WHERE source_file = ?").run(sourceFile);
|
|
273
|
+
}
|
|
274
|
+
/**
|
|
275
|
+
* Atomically replace all chunks for a source file with a new set.
|
|
276
|
+
* The delete and insert run in a single SQLite transaction.
|
|
277
|
+
*/
|
|
278
|
+
replaceChunks(sourceFile, chunks) {
|
|
279
|
+
const del = this.db.prepare("DELETE FROM chunks WHERE source_file = ?");
|
|
280
|
+
const ins = this.db.prepare(`INSERT OR IGNORE INTO chunks
|
|
281
|
+
(content_hash, source_file, commit_hash, content, metadata_json, embedding, embedded_at, uploaded)
|
|
282
|
+
VALUES (?, ?, ?, ?, ?, NULL, NULL, 0)`);
|
|
283
|
+
const txn = this.db.transaction(() => {
|
|
284
|
+
del.run(sourceFile);
|
|
285
|
+
for (const chunk of chunks) {
|
|
286
|
+
ins.run(chunkContentHash(chunk), chunk.sourceFile, chunk.commitHash, chunk.content, JSON.stringify(chunk.metadata));
|
|
287
|
+
}
|
|
288
|
+
});
|
|
289
|
+
txn();
|
|
290
|
+
}
|
|
291
|
+
/** Chunks inserted during chunking that still need an embedding computed. */
|
|
292
|
+
getPendingEmbedChunks() {
|
|
293
|
+
const rows = this.db
|
|
294
|
+
.prepare("SELECT content_hash, source_file, commit_hash, content, metadata_json FROM chunks WHERE embedding IS NULL AND uploaded = 0")
|
|
295
|
+
.all();
|
|
296
|
+
return rows.map((row) => ({
|
|
297
|
+
contentHash: row.content_hash,
|
|
298
|
+
sourceFile: row.source_file,
|
|
299
|
+
commitHash: row.commit_hash,
|
|
300
|
+
content: row.content,
|
|
301
|
+
metadata: parseMetadata(row.metadata_json),
|
|
302
|
+
}));
|
|
303
|
+
}
|
|
304
|
+
/** Embedded chunks that have not yet been uploaded to the vector store. */
|
|
305
|
+
getPendingUploadChunks() {
|
|
306
|
+
return this.rowsToEmbeddedChunks("SELECT * FROM chunks WHERE uploaded = 0 AND embedding IS NOT NULL");
|
|
307
|
+
}
|
|
308
|
+
/** Returns all chunk rows as Chunk objects, regardless of embedding or upload status. */
|
|
309
|
+
getAllChunks() {
|
|
310
|
+
const rows = this.db
|
|
311
|
+
.prepare("SELECT content_hash, source_file, commit_hash, content, metadata_json FROM chunks")
|
|
312
|
+
.all();
|
|
313
|
+
return rows.map((row) => ({
|
|
314
|
+
contentHash: row.content_hash,
|
|
315
|
+
sourceFile: row.source_file,
|
|
316
|
+
commitHash: row.commit_hash,
|
|
317
|
+
content: row.content,
|
|
318
|
+
metadata: parseMetadata(row.metadata_json),
|
|
319
|
+
}));
|
|
320
|
+
}
|
|
321
|
+
/** Returns a file → commitHash map for all files tracked in the DB. */
|
|
322
|
+
getFileStates() {
|
|
323
|
+
const rows = this.db
|
|
324
|
+
.prepare("SELECT source_file, commit_hash FROM chunks GROUP BY source_file")
|
|
325
|
+
.all();
|
|
326
|
+
return new Map(rows.map((r) => [r.source_file, r.commit_hash]));
|
|
327
|
+
}
|
|
328
|
+
// ── Preserved methods ──────────────────────────────────────────────────────
|
|
329
|
+
/** Insert fully-embedded chunks (used by migration and legacy callers). */
|
|
330
|
+
insert(chunks) {
|
|
331
|
+
const stmt = this.db.prepare(`
|
|
332
|
+
INSERT OR IGNORE INTO chunks
|
|
333
|
+
(content_hash, source_file, commit_hash, content, metadata_json, embedding, embedded_at, uploaded)
|
|
334
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, 0)
|
|
335
|
+
`);
|
|
336
|
+
const insertMany = this.db.transaction((items) => {
|
|
337
|
+
for (const chunk of items) {
|
|
338
|
+
stmt.run(chunkContentHash(chunk), chunk.sourceFile, chunk.commitHash, chunk.content, JSON.stringify(chunk.metadata), embeddingToBlob(chunk.embedding), Math.floor(chunk.embeddedAt));
|
|
339
|
+
}
|
|
340
|
+
});
|
|
341
|
+
insertMany(chunks);
|
|
342
|
+
}
|
|
343
|
+
has(contentHash) {
|
|
344
|
+
const row = this.db
|
|
345
|
+
.prepare("SELECT 1 FROM chunks WHERE content_hash = ?")
|
|
346
|
+
.get(contentHash);
|
|
347
|
+
return row !== undefined;
|
|
348
|
+
}
|
|
349
|
+
/** Returns all rows that have an embedding (excludes metadata-only rows). */
|
|
350
|
+
getAll() {
|
|
351
|
+
return this.rowsToEmbeddedChunks("SELECT * FROM chunks WHERE embedding IS NOT NULL");
|
|
352
|
+
}
|
|
353
|
+
/** Alias for getPendingUploadChunks() — keeps Uploader compatible. */
|
|
354
|
+
getPending() {
|
|
355
|
+
return this.getPendingUploadChunks();
|
|
356
|
+
}
|
|
357
|
+
markUploaded(contentHashes) {
|
|
358
|
+
if (contentHashes.length === 0)
|
|
359
|
+
return;
|
|
360
|
+
const placeholders = contentHashes.map(() => "?").join(", ");
|
|
361
|
+
this.db
|
|
362
|
+
.prepare(`UPDATE chunks SET uploaded = 1 WHERE content_hash IN (${placeholders})`)
|
|
363
|
+
.run(...contentHashes);
|
|
364
|
+
}
|
|
365
|
+
/** Count of embedded chunks not yet uploaded. */
|
|
366
|
+
pendingCount() {
|
|
367
|
+
const row = this.db
|
|
368
|
+
.prepare("SELECT COUNT(*) as cnt FROM chunks WHERE uploaded = 0 AND embedding IS NOT NULL")
|
|
369
|
+
.get();
|
|
370
|
+
return row.cnt;
|
|
371
|
+
}
|
|
372
|
+
clearAll() {
|
|
373
|
+
this.db.prepare("DELETE FROM chunks").run();
|
|
374
|
+
this.db.prepare("DELETE FROM meta").run();
|
|
375
|
+
}
|
|
376
|
+
pruneUploaded() {
|
|
377
|
+
this.db.prepare("DELETE FROM chunks WHERE uploaded = 1").run();
|
|
378
|
+
}
|
|
379
|
+
migrateFromJson(jsonPath) {
|
|
380
|
+
let raw;
|
|
381
|
+
try {
|
|
382
|
+
raw = readFileSync(jsonPath, "utf-8");
|
|
383
|
+
}
|
|
384
|
+
catch {
|
|
385
|
+
return;
|
|
386
|
+
}
|
|
387
|
+
let parsed;
|
|
388
|
+
try {
|
|
389
|
+
parsed = JSON.parse(raw);
|
|
390
|
+
}
|
|
391
|
+
catch {
|
|
392
|
+
return;
|
|
393
|
+
}
|
|
394
|
+
let chunks;
|
|
395
|
+
let meta = null;
|
|
396
|
+
if (Array.isArray(parsed)) {
|
|
397
|
+
chunks = parsed;
|
|
398
|
+
}
|
|
399
|
+
else {
|
|
400
|
+
const file = parsed;
|
|
401
|
+
chunks = file.chunks ?? [];
|
|
402
|
+
meta = file._meta ?? null;
|
|
403
|
+
}
|
|
404
|
+
if (meta)
|
|
405
|
+
this.setMeta(meta);
|
|
406
|
+
if (chunks.length > 0) {
|
|
407
|
+
const stmt = this.db.prepare(`
|
|
408
|
+
INSERT OR IGNORE INTO chunks
|
|
409
|
+
(content_hash, source_file, commit_hash, content, metadata_json, embedding, embedded_at, uploaded)
|
|
410
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, 1)
|
|
411
|
+
`);
|
|
412
|
+
const insertAll = this.db.transaction((items) => {
|
|
413
|
+
for (const chunk of items) {
|
|
414
|
+
stmt.run(chunkContentHash(chunk), chunk.sourceFile, chunk.commitHash, chunk.content, JSON.stringify(chunk.metadata), embeddingToBlob(chunk.embedding), Math.floor(chunk.embeddedAt));
|
|
415
|
+
}
|
|
416
|
+
});
|
|
417
|
+
insertAll(chunks);
|
|
418
|
+
}
|
|
419
|
+
rename(jsonPath, jsonPath + ".migrated").catch(() => { });
|
|
420
|
+
}
|
|
421
|
+
close() {
|
|
422
|
+
this.db.close();
|
|
423
|
+
}
|
|
424
|
+
rowsToEmbeddedChunks(sql) {
|
|
425
|
+
const rows = this.db.prepare(sql).all();
|
|
426
|
+
return rows.map((row) => ({
|
|
427
|
+
contentHash: row.content_hash,
|
|
428
|
+
sourceFile: row.source_file,
|
|
429
|
+
commitHash: row.commit_hash,
|
|
430
|
+
content: row.content,
|
|
431
|
+
metadata: parseMetadata(row.metadata_json),
|
|
432
|
+
embedding: blobToEmbedding(row.embedding),
|
|
433
|
+
embeddedAt: row.embedded_at,
|
|
434
|
+
}));
|
|
435
|
+
}
|
|
436
|
+
// ── Experiment runs ────────────────────────────────────────────────────────
|
|
437
|
+
saveExperimentRun(run) {
|
|
438
|
+
this.db
|
|
439
|
+
.prepare(`INSERT OR REPLACE INTO experiment_runs
|
|
440
|
+
(id, name, timestamp, config_json, eval_result_json, ragas_result_json, per_query_rr_scores_json)
|
|
441
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`)
|
|
442
|
+
.run(run.id, run.name, run.timestamp, JSON.stringify(run.config), JSON.stringify(run.evalResult), run.ragasResult != null ? JSON.stringify(run.ragasResult) : null, run.perQueryRrScores != null
|
|
443
|
+
? JSON.stringify(run.perQueryRrScores)
|
|
444
|
+
: null);
|
|
445
|
+
}
|
|
446
|
+
loadExperimentRun(nameOrId) {
|
|
447
|
+
let row = this.db
|
|
448
|
+
.prepare("SELECT * FROM experiment_runs WHERE id = ?")
|
|
449
|
+
.get(nameOrId);
|
|
450
|
+
if (!row) {
|
|
451
|
+
row = this.db
|
|
452
|
+
.prepare("SELECT * FROM experiment_runs WHERE name = ? ORDER BY timestamp DESC LIMIT 1")
|
|
453
|
+
.get(nameOrId);
|
|
454
|
+
}
|
|
455
|
+
if (!row)
|
|
456
|
+
return null;
|
|
457
|
+
return this.rowToExperimentRun(row);
|
|
458
|
+
}
|
|
459
|
+
listExperimentRuns() {
|
|
460
|
+
const rows = this.db
|
|
461
|
+
.prepare("SELECT * FROM experiment_runs ORDER BY timestamp ASC")
|
|
462
|
+
.all();
|
|
463
|
+
return rows.map((r) => this.rowToExperimentRun(r));
|
|
464
|
+
}
|
|
465
|
+
deleteExperimentRun(id) {
|
|
466
|
+
const result = this.db
|
|
467
|
+
.prepare("DELETE FROM experiment_runs WHERE id = ?")
|
|
468
|
+
.run(id);
|
|
469
|
+
if (result.changes === 0) {
|
|
470
|
+
throw new Error(`Experiment run "${id}" not found.`);
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
migrateExperimentsFromDir(dir) {
|
|
474
|
+
if (!existsSync(dir))
|
|
475
|
+
return;
|
|
476
|
+
const count = this.db.prepare("SELECT COUNT(*) as cnt FROM experiment_runs").get().cnt;
|
|
477
|
+
if (count > 0)
|
|
478
|
+
return;
|
|
479
|
+
let files;
|
|
480
|
+
try {
|
|
481
|
+
files = readdirSync(dir).filter((f) => f.endsWith(".json"));
|
|
482
|
+
}
|
|
483
|
+
catch {
|
|
484
|
+
return;
|
|
485
|
+
}
|
|
486
|
+
for (const file of files) {
|
|
487
|
+
try {
|
|
488
|
+
const raw = readFileSync(join(dir, file), "utf-8");
|
|
489
|
+
const run = JSON.parse(raw);
|
|
490
|
+
this.saveExperimentRun(run);
|
|
491
|
+
}
|
|
492
|
+
catch {
|
|
493
|
+
/* skip malformed files */
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
rowToExperimentRun(row) {
|
|
498
|
+
return {
|
|
499
|
+
id: row.id,
|
|
500
|
+
name: row.name,
|
|
501
|
+
timestamp: row.timestamp,
|
|
502
|
+
config: JSON.parse(row.config_json),
|
|
503
|
+
evalResult: JSON.parse(row.eval_result_json),
|
|
504
|
+
ragasResult: row.ragas_result_json != null
|
|
505
|
+
? JSON.parse(row.ragas_result_json)
|
|
506
|
+
: undefined,
|
|
507
|
+
perQueryRrScores: row.per_query_rr_scores_json != null
|
|
508
|
+
? JSON.parse(row.per_query_rr_scores_json)
|
|
509
|
+
: undefined,
|
|
510
|
+
};
|
|
511
|
+
}
|
|
512
|
+
// ── Eval datasets ──────────────────────────────────────────────────────────
|
|
513
|
+
saveEvalDataset(dataset, slot = "default") {
|
|
514
|
+
this.db
|
|
515
|
+
.prepare(`INSERT OR REPLACE INTO eval_datasets (slot, version, queries_json, saved_at)
|
|
516
|
+
VALUES (?, ?, ?, ?)`)
|
|
517
|
+
.run(slot, dataset.version ?? null, JSON.stringify(dataset.queries), new Date().toISOString());
|
|
518
|
+
}
|
|
519
|
+
loadEvalDataset(slot = "default") {
|
|
520
|
+
const row = this.db
|
|
521
|
+
.prepare("SELECT * FROM eval_datasets WHERE slot = ?")
|
|
522
|
+
.get(slot);
|
|
523
|
+
if (!row)
|
|
524
|
+
return null;
|
|
525
|
+
return {
|
|
526
|
+
queries: JSON.parse(row.queries_json),
|
|
527
|
+
...(row.version != null ? { version: row.version } : {}),
|
|
528
|
+
};
|
|
529
|
+
}
|
|
530
|
+
// ── Pipeline runs ──────────────────────────────────────────────────────────
|
|
531
|
+
savePipelineRun(data) {
|
|
532
|
+
this.db
|
|
533
|
+
.prepare(`INSERT INTO pipeline_runs (run_at, duration_ms, stages_json)
|
|
534
|
+
VALUES (?, ?, ?)`)
|
|
535
|
+
.run(data.runAt, data.durationMs, JSON.stringify(data.stages));
|
|
536
|
+
}
|
|
537
|
+
listPipelineRuns(limit = 100) {
|
|
538
|
+
const rows = this.db
|
|
539
|
+
.prepare("SELECT run_at, duration_ms, stages_json FROM pipeline_runs ORDER BY run_at ASC LIMIT ?")
|
|
540
|
+
.all(limit);
|
|
541
|
+
return rows.map((r) => ({
|
|
542
|
+
runAt: r.run_at,
|
|
543
|
+
durationMs: r.duration_ms,
|
|
544
|
+
stages: JSON.parse(r.stages_json),
|
|
545
|
+
}));
|
|
546
|
+
}
|
|
547
|
+
// ── Telemetry: sessions ────────────────────────────────────────────────────
|
|
548
|
+
insertTelemetrySession(row) {
|
|
549
|
+
this.db
|
|
550
|
+
.prepare(`INSERT OR REPLACE INTO telemetry_sessions
|
|
551
|
+
(id, started_at, ended_at, embedding_model, chunking_strategy, store_type,
|
|
552
|
+
node_version, os, total_searches, total_tool_calls, tools_used_json, flushed)
|
|
553
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`)
|
|
554
|
+
.run(row.id, row.started_at, row.ended_at ?? null, row.embedding_model ?? null, row.chunking_strategy ?? null, row.store_type ?? null, row.node_version, row.os, row.total_searches, row.total_tool_calls, row.tools_used_json, row.flushed);
|
|
555
|
+
}
|
|
556
|
+
updateTelemetrySession(id, updates) {
|
|
557
|
+
const fields = Object.keys(updates)
|
|
558
|
+
.map((k) => `${k} = ?`)
|
|
559
|
+
.join(", ");
|
|
560
|
+
const values = Object.values(updates);
|
|
561
|
+
if (!fields)
|
|
562
|
+
return;
|
|
563
|
+
this.db
|
|
564
|
+
.prepare(`UPDATE telemetry_sessions SET ${fields} WHERE id = ?`)
|
|
565
|
+
.run(...values, id);
|
|
566
|
+
}
|
|
567
|
+
getTelemetrySession(id) {
|
|
568
|
+
return (this.db
|
|
569
|
+
.prepare("SELECT * FROM telemetry_sessions WHERE id = ?")
|
|
570
|
+
.get(id) ?? null);
|
|
571
|
+
}
|
|
572
|
+
getUnflushedSessions() {
|
|
573
|
+
return this.db
|
|
574
|
+
.prepare("SELECT * FROM telemetry_sessions WHERE flushed = 0")
|
|
575
|
+
.all();
|
|
576
|
+
}
|
|
577
|
+
markTelemetryFlushed(sessionId) {
|
|
578
|
+
this.db
|
|
579
|
+
.prepare("UPDATE telemetry_sessions SET flushed = 1 WHERE id = ?")
|
|
580
|
+
.run(sessionId);
|
|
581
|
+
this.db
|
|
582
|
+
.prepare("UPDATE telemetry_searches SET flushed = 1 WHERE session_id = ?")
|
|
583
|
+
.run(sessionId);
|
|
584
|
+
this.db
|
|
585
|
+
.prepare("UPDATE telemetry_latency SET flushed = 1 WHERE session_id = ?")
|
|
586
|
+
.run(sessionId);
|
|
587
|
+
this.db
|
|
588
|
+
.prepare("UPDATE telemetry_errors SET flushed = 1 WHERE session_id = ?")
|
|
589
|
+
.run(sessionId);
|
|
590
|
+
this.db
|
|
591
|
+
.prepare("UPDATE telemetry_feedback SET flushed = 1 WHERE session_id = ?")
|
|
592
|
+
.run(sessionId);
|
|
593
|
+
this.db
|
|
594
|
+
.prepare("UPDATE telemetry_cache_stats SET flushed = 1 WHERE session_id = ?")
|
|
595
|
+
.run(sessionId);
|
|
596
|
+
}
|
|
597
|
+
// ── Telemetry: searches ────────────────────────────────────────────────────
|
|
598
|
+
insertTelemetrySearch(row) {
|
|
599
|
+
this.db
|
|
600
|
+
.prepare(`INSERT OR REPLACE INTO telemetry_searches
|
|
601
|
+
(id, session_id, occurred_at, result_count, result_count_bucket,
|
|
602
|
+
empty, query_hash, redundancy_detected, flushed)
|
|
603
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`)
|
|
604
|
+
.run(row.id, row.session_id, row.occurred_at, row.result_count, row.result_count_bucket, row.empty, row.query_hash ?? null, row.redundancy_detected, row.flushed);
|
|
605
|
+
}
|
|
606
|
+
getSearchesForSession(sessionId) {
|
|
607
|
+
return this.db
|
|
608
|
+
.prepare("SELECT * FROM telemetry_searches WHERE session_id = ? ORDER BY occurred_at ASC")
|
|
609
|
+
.all(sessionId);
|
|
610
|
+
}
|
|
611
|
+
// ── Telemetry: latency ─────────────────────────────────────────────────────
|
|
612
|
+
insertTelemetryLatency(row) {
|
|
613
|
+
this.db
|
|
614
|
+
.prepare(`INSERT INTO telemetry_latency
|
|
615
|
+
(session_id, occurred_at, phase, duration_ms, flushed)
|
|
616
|
+
VALUES (?, ?, ?, ?, ?)`)
|
|
617
|
+
.run(row.session_id, row.occurred_at, row.phase, row.duration_ms, row.flushed);
|
|
618
|
+
}
|
|
619
|
+
getLatencyForSession(sessionId) {
|
|
620
|
+
return this.db
|
|
621
|
+
.prepare("SELECT * FROM telemetry_latency WHERE session_id = ? ORDER BY occurred_at ASC")
|
|
622
|
+
.all(sessionId);
|
|
623
|
+
}
|
|
624
|
+
// ── Telemetry: errors ──────────────────────────────────────────────────────
|
|
625
|
+
insertTelemetryError(row) {
|
|
626
|
+
this.db
|
|
627
|
+
.prepare(`INSERT INTO telemetry_errors
|
|
628
|
+
(session_id, occurred_at, error_type, retry_count, recovered, flushed)
|
|
629
|
+
VALUES (?, ?, ?, ?, ?, ?)`)
|
|
630
|
+
.run(row.session_id, row.occurred_at, row.error_type, row.retry_count, row.recovered, row.flushed);
|
|
631
|
+
}
|
|
632
|
+
getErrorsForSession(sessionId) {
|
|
633
|
+
return this.db
|
|
634
|
+
.prepare("SELECT * FROM telemetry_errors WHERE session_id = ? ORDER BY occurred_at ASC")
|
|
635
|
+
.all(sessionId);
|
|
636
|
+
}
|
|
637
|
+
// ── Telemetry: feedback ────────────────────────────────────────────────────
|
|
638
|
+
insertTelemetryFeedback(row) {
|
|
639
|
+
this.db
|
|
640
|
+
.prepare(`INSERT INTO telemetry_feedback
|
|
641
|
+
(session_id, search_id, occurred_at, was_useful,
|
|
642
|
+
context_relevance, context_completeness, noise_ratio,
|
|
643
|
+
missing_category, flushed)
|
|
644
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`)
|
|
645
|
+
.run(row.session_id, row.search_id, row.occurred_at, row.was_useful, row.context_relevance ?? null, row.context_completeness ?? null, row.noise_ratio ?? null, row.missing_category ?? null, row.flushed);
|
|
646
|
+
}
|
|
647
|
+
getFeedbackForSession(sessionId) {
|
|
648
|
+
return this.db
|
|
649
|
+
.prepare("SELECT * FROM telemetry_feedback WHERE session_id = ? ORDER BY occurred_at ASC")
|
|
650
|
+
.all(sessionId);
|
|
651
|
+
}
|
|
652
|
+
// ── Telemetry: cache stats ─────────────────────────────────────────────────
|
|
653
|
+
insertTelemetryCacheStats(row) {
|
|
654
|
+
this.db
|
|
655
|
+
.prepare(`INSERT INTO telemetry_cache_stats
|
|
656
|
+
(session_id, recorded_at, file_hit_rate, semantic_hit_rate, flushed)
|
|
657
|
+
VALUES (?, ?, ?, ?, ?)`)
|
|
658
|
+
.run(row.session_id, row.recorded_at, row.file_hit_rate ?? null, row.semantic_hit_rate ?? null, row.flushed);
|
|
659
|
+
}
|
|
660
|
+
getCacheStatsForSession(sessionId) {
|
|
661
|
+
return this.db
|
|
662
|
+
.prepare("SELECT * FROM telemetry_cache_stats WHERE session_id = ? ORDER BY recorded_at ASC")
|
|
663
|
+
.all(sessionId);
|
|
664
|
+
}
|
|
665
|
+
// ── Telemetry: buffer management ───────────────────────────────────────────
|
|
666
|
+
getTelemetryBufferSizeBytes() {
|
|
667
|
+
const tables = [
|
|
668
|
+
"telemetry_sessions",
|
|
669
|
+
"telemetry_searches",
|
|
670
|
+
"telemetry_latency",
|
|
671
|
+
"telemetry_errors",
|
|
672
|
+
"telemetry_feedback",
|
|
673
|
+
"telemetry_cache_stats",
|
|
674
|
+
];
|
|
675
|
+
let totalBytes = 0;
|
|
676
|
+
for (const table of tables) {
|
|
677
|
+
const row = this.db
|
|
678
|
+
.prepare(`SELECT SUM(LENGTH(CAST(rowid AS TEXT)) + 50) as sz FROM ${table}`)
|
|
679
|
+
.get();
|
|
680
|
+
totalBytes += row.sz ?? 0;
|
|
681
|
+
}
|
|
682
|
+
return totalBytes;
|
|
683
|
+
}
|
|
684
|
+
pruneOldTelemetry(maxBytes) {
|
|
685
|
+
const currentSize = this.getTelemetryBufferSizeBytes();
|
|
686
|
+
if (currentSize <= maxBytes)
|
|
687
|
+
return;
|
|
688
|
+
// Delete oldest latency rows first, then cache stats — never sessions/errors/feedback
|
|
689
|
+
this.db
|
|
690
|
+
.prepare(`DELETE FROM telemetry_latency WHERE id IN (
|
|
691
|
+
SELECT id FROM telemetry_latency ORDER BY occurred_at ASC LIMIT 1000
|
|
692
|
+
)`)
|
|
693
|
+
.run();
|
|
694
|
+
if (this.getTelemetryBufferSizeBytes() > maxBytes) {
|
|
695
|
+
this.db
|
|
696
|
+
.prepare(`DELETE FROM telemetry_cache_stats WHERE id IN (
|
|
697
|
+
SELECT id FROM telemetry_cache_stats ORDER BY recorded_at ASC LIMIT 1000
|
|
698
|
+
)`)
|
|
699
|
+
.run();
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
hasTelemetrySessions() {
|
|
703
|
+
const row = this.db
|
|
704
|
+
.prepare("SELECT COUNT(*) as cnt FROM telemetry_sessions")
|
|
705
|
+
.get();
|
|
706
|
+
return row.cnt > 0;
|
|
707
|
+
}
|
|
708
|
+
clearTelemetryData() {
|
|
709
|
+
const tables = [
|
|
710
|
+
"telemetry_cache_stats",
|
|
711
|
+
"telemetry_feedback",
|
|
712
|
+
"telemetry_errors",
|
|
713
|
+
"telemetry_latency",
|
|
714
|
+
"telemetry_searches",
|
|
715
|
+
"telemetry_sessions",
|
|
716
|
+
];
|
|
717
|
+
for (const table of tables) {
|
|
718
|
+
this.db.prepare(`DELETE FROM ${table}`).run();
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
//# sourceMappingURL=virage-db.js.map
|