@spark-agents/engram 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ export interface Reranker {
2
+ /** Rerank candidates by (query, document) relevance. Returns reordered with updated scores. */
3
+ rerank(query: string, candidates: Array<{
4
+ text: string;
5
+ score: number;
6
+ }>): Promise<Array<{
7
+ text: string;
8
+ score: number;
9
+ rerankerScore: number;
10
+ }>>;
11
+ /** Clean up model resources */
12
+ close(): Promise<void>;
13
+ }
14
+ export declare function createLightweightReranker(): Reranker;
15
+ export declare function createReranker(): Promise<Reranker | null>;
@@ -0,0 +1,104 @@
1
+ import { AutoModelForSequenceClassification, AutoTokenizer, env } from "@xenova/transformers";
2
+ const MODEL_NAME = "Xenova/ms-marco-MiniLM-L-12-v2";
3
+ const MAX_INPUT_LENGTH = 512;
4
+ const MIN_QUERY_TERM_LEN = 3;
5
+ function toTerms(text) {
6
+ return text
7
+ .toLowerCase()
8
+ .split(/\W+/)
9
+ .filter((term) => term.length >= MIN_QUERY_TERM_LEN);
10
+ }
11
+ function extractRerankerScores(logits, candidateCount) {
12
+ if (candidateCount <= 0) {
13
+ return [];
14
+ }
15
+ const rawData = logits?.data ? Array.from(logits.data) : [];
16
+ if (rawData.length === 0) {
17
+ return Array.from({ length: candidateCount }, () => Number.NEGATIVE_INFINITY);
18
+ }
19
+ const dims = logits?.dims ? Array.from(logits.dims, (value) => Number(value)) : [];
20
+ const classesPerCandidate = dims.length >= 2 ? Math.max(1, dims[dims.length - 1] ?? 1) : Math.max(1, Math.floor(rawData.length / candidateCount));
21
+ const scores = [];
22
+ for (let i = 0; i < candidateCount; i += 1) {
23
+ const offset = i * classesPerCandidate;
24
+ if (classesPerCandidate === 1) {
25
+ scores.push(rawData[offset] ?? rawData[i] ?? Number.NEGATIVE_INFINITY);
26
+ continue;
27
+ }
28
+ // For two-class heads, use the positive class logit (last class).
29
+ scores.push(rawData[offset + classesPerCandidate - 1] ?? Number.NEGATIVE_INFINITY);
30
+ }
31
+ return scores;
32
+ }
33
+ export function createLightweightReranker() {
34
+ return {
35
+ async rerank(query, candidates) {
36
+ if (candidates.length === 0) {
37
+ return [];
38
+ }
39
+ const queryTerms = new Set(toTerms(query));
40
+ const reranked = candidates.map((candidate) => {
41
+ const docTerms = new Set(toTerms(candidate.text));
42
+ const overlapCount = Array.from(queryTerms).filter((term) => docTerms.has(term)).length;
43
+ const coverage = queryTerms.size > 0 ? overlapCount / queryTerms.size : 0;
44
+ const rerankerScore = 0.6 * candidate.score + 0.4 * coverage;
45
+ return {
46
+ text: candidate.text,
47
+ score: candidate.score,
48
+ rerankerScore,
49
+ };
50
+ });
51
+ reranked.sort((a, b) => b.rerankerScore - a.rerankerScore);
52
+ return reranked;
53
+ },
54
+ async close() { },
55
+ };
56
+ }
57
+ export async function createReranker() {
58
+ try {
59
+ if (process.env.NODE_ENV === "production") {
60
+ env.allowRemoteModels = false;
61
+ }
62
+ const localFilesOnly = env.allowRemoteModels === false;
63
+ const tokenizer = await AutoTokenizer.from_pretrained(MODEL_NAME, {
64
+ local_files_only: localFilesOnly,
65
+ });
66
+ const model = await AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, {
67
+ local_files_only: localFilesOnly,
68
+ });
69
+ return {
70
+ async rerank(query, candidates) {
71
+ if (candidates.length === 0) {
72
+ return [];
73
+ }
74
+ const queries = candidates.map(() => query);
75
+ const documents = candidates.map((candidate) => candidate.text);
76
+ const inputs = tokenizer(queries, {
77
+ text_pair: documents,
78
+ padding: true,
79
+ truncation: true,
80
+ max_length: MAX_INPUT_LENGTH,
81
+ });
82
+ const outputs = await model(inputs);
83
+ const scores = extractRerankerScores(outputs?.logits, candidates.length);
84
+ const reranked = candidates.map((candidate, i) => ({
85
+ text: candidate.text,
86
+ score: candidate.score,
87
+ rerankerScore: scores[i] ?? Number.NEGATIVE_INFINITY,
88
+ }));
89
+ reranked.sort((a, b) => b.rerankerScore - a.rerankerScore);
90
+ return reranked;
91
+ },
92
+ async close() {
93
+ if (typeof model.dispose === "function") {
94
+ await model.dispose();
95
+ }
96
+ },
97
+ };
98
+ }
99
+ catch (error) {
100
+ const message = error instanceof Error ? error.message : String(error);
101
+ console.warn(`Engram: Cross-encoder reranker unavailable: ${message}`);
102
+ return null;
103
+ }
104
+ }
@@ -0,0 +1,33 @@
1
+ import type { EmbeddingClient, IndexManager, ScoredChunk, SearchOptions, SearchResult } from "./types.js";
2
+ import type { Reranker } from "./reranker.js";
3
+ /** Create a unique key for a chunk (for deduplication/merging) */
4
+ export declare function chunkKey(chunk: ScoredChunk): string;
5
+ /** Compute RRF scores for a ranked list of chunks */
6
+ export declare function computeRRFScores(chunks: ScoredChunk[], weight: number, k: number): Map<string, {
7
+ chunk: ScoredChunk;
8
+ score: number;
9
+ }>;
10
+ /** Apply exponential time decay to scores */
11
+ export declare function applyTimeDecay(results: Array<{
12
+ chunk: ScoredChunk;
13
+ score: number;
14
+ }>, halfLifeDays: number): void;
15
+ /** Apply source balancing — demote excess session results */
16
+ export declare function applySourceBalancing(results: Array<{
17
+ chunk: ScoredChunk;
18
+ score: number;
19
+ }>, maxSessionShare: number, maxResults: number): Array<{
20
+ chunk: ScoredChunk;
21
+ score: number;
22
+ }>;
23
+ export declare function search(query: string, embedding: EmbeddingClient, index: IndexManager, options: SearchOptions & {
24
+ vectorWeight?: number;
25
+ bm25Weight?: number;
26
+ rrfK?: number;
27
+ timeDecay?: {
28
+ enabled: boolean;
29
+ halfLifeDays: number;
30
+ };
31
+ maxSessionShare?: number;
32
+ reranker?: Reranker | null;
33
+ }): Promise<SearchResult[]>;
package/dist/search.js ADDED
@@ -0,0 +1,203 @@
1
+ const DEFAULT_VECTOR_WEIGHT = 0.7;
2
+ const DEFAULT_BM25_WEIGHT = 0.3;
3
+ const DEFAULT_RRF_K = 60;
4
+ const DEFAULT_MAX_RESULTS = 10;
5
+ const DEFAULT_MIN_SCORE = 0.0;
6
+ const CO_OCCURRENCE_BONUS = 0.05;
7
+ const CANDIDATE_TOP_K = 50;
8
+ const RERANK_TOP_K = 20;
9
+ function getChunkTimestampMs(chunk) {
10
+ return chunk.indexedAt ?? null;
11
+ }
12
+ /** Create a unique key for a chunk (for deduplication/merging) */
13
+ export function chunkKey(chunk) {
14
+ return `${chunk.fileKey}:${chunk.startLine}:${chunk.endLine}`;
15
+ }
16
+ /** Compute RRF scores for a ranked list of chunks */
17
+ export function computeRRFScores(chunks, weight, k) {
18
+ const scores = new Map();
19
+ if (chunks.length === 0 || weight <= 0) {
20
+ return scores;
21
+ }
22
+ const safeK = Number.isFinite(k) && k >= 0 ? k : DEFAULT_RRF_K;
23
+ for (let i = 0; i < chunks.length; i += 1) {
24
+ const chunk = chunks[i];
25
+ const key = chunkKey(chunk);
26
+ if (scores.has(key)) {
27
+ continue;
28
+ }
29
+ const rank = i + 1;
30
+ scores.set(key, {
31
+ chunk,
32
+ score: weight / (safeK + rank),
33
+ });
34
+ }
35
+ return scores;
36
+ }
37
+ /** Apply exponential time decay to scores */
38
+ export function applyTimeDecay(results, halfLifeDays) {
39
+ if (halfLifeDays <= 0 || !Number.isFinite(halfLifeDays)) {
40
+ return;
41
+ }
42
+ const nowMs = Date.now();
43
+ const msPerDay = 24 * 60 * 60 * 1000;
44
+ for (const result of results) {
45
+ const timestampMs = getChunkTimestampMs(result.chunk);
46
+ if (timestampMs === null) {
47
+ continue;
48
+ }
49
+ const ageDays = Math.max(0, (nowMs - timestampMs) / msPerDay);
50
+ const decayMultiplier = Math.pow(0.5, ageDays / halfLifeDays);
51
+ result.score *= decayMultiplier;
52
+ }
53
+ }
54
+ /** Apply source balancing — demote excess session results */
55
+ export function applySourceBalancing(results, maxSessionShare, maxResults) {
56
+ if (results.length === 0 || maxResults <= 0) {
57
+ return [];
58
+ }
59
+ if (!Number.isFinite(maxSessionShare) || maxSessionShare >= 1) {
60
+ return results.slice();
61
+ }
62
+ const sessionCap = Math.max(0, Math.floor(maxSessionShare * maxResults));
63
+ const balanced = [];
64
+ let sessionCount = 0;
65
+ for (const result of results) {
66
+ if (result.chunk.source === "sessions") {
67
+ if (sessionCount < sessionCap) {
68
+ balanced.push(result);
69
+ sessionCount += 1;
70
+ }
71
+ continue;
72
+ }
73
+ balanced.push(result);
74
+ }
75
+ return balanced;
76
+ }
77
+ export async function search(query, embedding, index, options) {
78
+ const normalizedQuery = query.trim();
79
+ if (normalizedQuery.length === 0) {
80
+ return [];
81
+ }
82
+ const maxResults = options.maxResults ?? DEFAULT_MAX_RESULTS;
83
+ if (maxResults <= 0) {
84
+ return [];
85
+ }
86
+ const minScore = options.minScore ?? DEFAULT_MIN_SCORE;
87
+ const vectorWeight = options.vectorWeight ?? DEFAULT_VECTOR_WEIGHT;
88
+ const bm25Weight = options.bm25Weight ?? DEFAULT_BM25_WEIGHT;
89
+ const rrfK = options.rrfK ?? DEFAULT_RRF_K;
90
+ let queryVector = null;
91
+ try {
92
+ queryVector = await embedding.embedText(normalizedQuery, "RETRIEVAL_QUERY");
93
+ }
94
+ catch {
95
+ queryVector = null;
96
+ }
97
+ const bm25Promise = Promise.resolve()
98
+ .then(() => index.searchBM25(normalizedQuery, CANDIDATE_TOP_K))
99
+ .catch(() => []);
100
+ const vectorPromise = queryVector
101
+ ? Promise.resolve()
102
+ .then(() => index.searchVector(queryVector, CANDIDATE_TOP_K))
103
+ .catch(() => [])
104
+ : Promise.resolve([]);
105
+ const [bm25Results, vectorResults] = await Promise.all([bm25Promise, vectorPromise]);
106
+ if (bm25Results.length === 0 && vectorResults.length === 0) {
107
+ return [];
108
+ }
109
+ const bm25Scores = computeRRFScores(bm25Results, bm25Weight, rrfK);
110
+ const vectorScores = computeRRFScores(vectorResults, vectorWeight, rrfK);
111
+ const merged = new Map();
112
+ const mergeScores = (scores, flags) => {
113
+ for (const [key, value] of scores) {
114
+ const existing = merged.get(key);
115
+ if (existing) {
116
+ existing.score += value.score;
117
+ existing.inBM25 ||= flags.inBM25;
118
+ existing.inVector ||= flags.inVector;
119
+ }
120
+ else {
121
+ merged.set(key, {
122
+ chunk: value.chunk,
123
+ score: value.score,
124
+ inBM25: flags.inBM25,
125
+ inVector: flags.inVector,
126
+ });
127
+ }
128
+ }
129
+ };
130
+ mergeScores(bm25Scores, { inBM25: true, inVector: false });
131
+ mergeScores(vectorScores, { inBM25: false, inVector: true });
132
+ const fused = [];
133
+ for (const entry of merged.values()) {
134
+ if (entry.inBM25 && entry.inVector) {
135
+ entry.score += CO_OCCURRENCE_BONUS;
136
+ }
137
+ fused.push({ chunk: entry.chunk, score: entry.score });
138
+ }
139
+ fused.sort((a, b) => b.score - a.score);
140
+ if (options.reranker && fused.length > 0) {
141
+ const topK = Math.min(RERANK_TOP_K, fused.length);
142
+ const topCandidates = fused.slice(0, topK);
143
+ try {
144
+ const reranked = await options.reranker.rerank(normalizedQuery, topCandidates.map((row) => ({
145
+ text: row.chunk.text,
146
+ score: row.score,
147
+ })));
148
+ if (reranked.length > 0) {
149
+ const candidatesByText = new Map();
150
+ for (const row of topCandidates) {
151
+ const list = candidatesByText.get(row.chunk.text);
152
+ if (list) {
153
+ list.push(row);
154
+ }
155
+ else {
156
+ candidatesByText.set(row.chunk.text, [row]);
157
+ }
158
+ }
159
+ const rerankedRows = [];
160
+ for (const row of reranked) {
161
+ const matches = candidatesByText.get(row.text);
162
+ const match = matches?.shift();
163
+ if (!match) {
164
+ continue;
165
+ }
166
+ match.score = row.rerankerScore;
167
+ rerankedRows.push(match);
168
+ }
169
+ const consumed = new Set(rerankedRows);
170
+ for (const row of topCandidates) {
171
+ if (!consumed.has(row)) {
172
+ rerankedRows.push(row);
173
+ }
174
+ }
175
+ const rest = fused.slice(topK);
176
+ fused.length = 0;
177
+ fused.push(...rerankedRows, ...rest);
178
+ }
179
+ }
180
+ catch {
181
+ // Ignore reranker failures and keep RRF ordering.
182
+ }
183
+ }
184
+ if (options.timeDecay?.enabled === true) {
185
+ applyTimeDecay(fused, options.timeDecay.halfLifeDays);
186
+ fused.sort((a, b) => b.score - a.score);
187
+ }
188
+ const balanced = options.maxSessionShare === undefined
189
+ ? fused
190
+ : applySourceBalancing(fused, options.maxSessionShare, maxResults);
191
+ return balanced
192
+ .filter((row) => row.score >= minScore)
193
+ .slice(0, maxResults)
194
+ .map(({ chunk, score }) => ({
195
+ path: chunk.fileKey,
196
+ startLine: chunk.startLine,
197
+ endLine: chunk.endLine,
198
+ score,
199
+ snippet: chunk.text,
200
+ source: chunk.source,
201
+ citation: `${chunk.fileKey}#L${chunk.startLine}-L${chunk.endLine}`,
202
+ }));
203
+ }
@@ -0,0 +1,6 @@
1
+ import type { IndexManager } from "./types.js";
2
+ export declare function createIndexManager(params: {
3
+ dbPath: string;
4
+ dimensions: number;
5
+ workspaceDir: string;
6
+ }): IndexManager;
package/dist/store.js ADDED
@@ -0,0 +1,272 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import Database from "better-sqlite3";
4
+ import * as sqliteVec from "sqlite-vec";
5
+ function toVectorBlob(vec) {
6
+ return Buffer.from(vec.buffer, vec.byteOffset, vec.byteLength);
7
+ }
8
+ function toScoredChunk(chunk, score, indexedAt) {
9
+ return {
10
+ fileKey: chunk.file_key,
11
+ startLine: chunk.start_line,
12
+ endLine: chunk.end_line,
13
+ text: chunk.text,
14
+ score,
15
+ source: chunk.source,
16
+ headingContext: chunk.heading_context ?? undefined,
17
+ indexedAt,
18
+ };
19
+ }
20
+ export function createIndexManager(params) {
21
+ const { dbPath, dimensions, workspaceDir } = params;
22
+ if (!Number.isInteger(dimensions) || dimensions <= 0) {
23
+ throw new Error(`Invalid vector dimensions: ${dimensions}`);
24
+ }
25
+ if (dbPath !== ":memory:") {
26
+ fs.mkdirSync(path.dirname(dbPath), { recursive: true });
27
+ }
28
+ const db = new Database(dbPath);
29
+ db.pragma("journal_mode = WAL");
30
+ db.pragma("foreign_keys = ON");
31
+ let vectorEnabled = true;
32
+ try {
33
+ sqliteVec.load(db);
34
+ }
35
+ catch {
36
+ vectorEnabled = false;
37
+ }
38
+ db.exec(`
39
+ CREATE TABLE IF NOT EXISTS files (
40
+ file_key TEXT PRIMARY KEY,
41
+ content_hash TEXT NOT NULL,
42
+ source TEXT NOT NULL,
43
+ indexed_at INTEGER NOT NULL
44
+ );
45
+
46
+ CREATE TABLE IF NOT EXISTS chunks (
47
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
48
+ file_key TEXT NOT NULL,
49
+ start_line INTEGER NOT NULL,
50
+ end_line INTEGER NOT NULL,
51
+ text TEXT NOT NULL,
52
+ heading_context TEXT,
53
+ source TEXT NOT NULL,
54
+ FOREIGN KEY (file_key) REFERENCES files(file_key)
55
+ );
56
+
57
+ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
58
+ text,
59
+ content='chunks',
60
+ content_rowid='id',
61
+ tokenize='porter unicode61'
62
+ );
63
+
64
+ CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
65
+ INSERT INTO chunks_fts(rowid, text) VALUES (new.id, new.text);
66
+ END;
67
+
68
+ CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
69
+ INSERT INTO chunks_fts(chunks_fts, rowid, text) VALUES ('delete', old.id, old.text);
70
+ END;
71
+
72
+ CREATE TABLE IF NOT EXISTS meta (
73
+ key TEXT PRIMARY KEY,
74
+ value TEXT
75
+ );
76
+ `);
77
+ if (vectorEnabled) {
78
+ db.exec(`
79
+ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_vec USING vec0(
80
+ chunk_id INTEGER PRIMARY KEY,
81
+ embedding float[${dimensions}]
82
+ );
83
+ `);
84
+ }
85
+ const deleteVectorsByFileStmt = vectorEnabled
86
+ ? db.prepare(`DELETE FROM chunks_vec WHERE chunk_id IN (SELECT id FROM chunks WHERE file_key = ?);`)
87
+ : null;
88
+ const deleteChunksByFileStmt = db.prepare(`DELETE FROM chunks WHERE file_key = ?;`);
89
+ const deleteFileStmt = db.prepare(`DELETE FROM files WHERE file_key = ?;`);
90
+ const insertFileStmt = db.prepare(`INSERT INTO files (file_key, content_hash, source, indexed_at) VALUES (?, ?, ?, ?);`);
91
+ const insertChunkStmt = db.prepare(`INSERT INTO chunks (file_key, start_line, end_line, text, heading_context, source) VALUES (?, ?, ?, ?, ?, ?);`);
92
+ const bm25Stmt = db.prepare(`
93
+ SELECT
94
+ c.id,
95
+ c.file_key,
96
+ c.start_line,
97
+ c.end_line,
98
+ c.text,
99
+ c.heading_context,
100
+ c.source,
101
+ f.indexed_at,
102
+ rank
103
+ FROM chunks_fts fts
104
+ JOIN chunks c ON c.id = fts.rowid
105
+ JOIN files f ON f.file_key = c.file_key
106
+ WHERE chunks_fts MATCH ?
107
+ ORDER BY rank
108
+ LIMIT ?;
109
+ `);
110
+ const vectorSearchStmt = vectorEnabled
111
+ ? db.prepare(`
112
+ SELECT chunk_id, distance
113
+ FROM chunks_vec
114
+ WHERE embedding MATCH ? AND k = ?
115
+ ORDER BY distance;
116
+ `)
117
+ : null;
118
+ const getChunkByIdStmt = db.prepare(`
119
+ SELECT id, file_key, start_line, end_line, text, heading_context, source
120
+ FROM chunks
121
+ WHERE id = ?;
122
+ `);
123
+ const getFileIndexedAtStmt = db.prepare(`SELECT indexed_at FROM files WHERE file_key = ?;`);
124
+ const getFileHashStmt = db.prepare(`SELECT content_hash FROM files WHERE file_key = ?;`);
125
+ const fileCountStmt = db.prepare(`SELECT COUNT(*) AS count FROM files;`);
126
+ const chunkCountStmt = db.prepare(`SELECT COUNT(*) AS count FROM chunks;`);
127
+ const sourceStatsStmt = db.prepare(`
128
+ SELECT
129
+ s.source AS source,
130
+ COALESCE(f.files, 0) AS files,
131
+ COALESCE(c.chunks, 0) AS chunks
132
+ FROM (
133
+ SELECT 'memory' AS source
134
+ UNION ALL
135
+ SELECT 'sessions' AS source
136
+ ) AS s
137
+ LEFT JOIN (
138
+ SELECT source, COUNT(*) AS files
139
+ FROM files
140
+ GROUP BY source
141
+ ) AS f ON f.source = s.source
142
+ LEFT JOIN (
143
+ SELECT source, COUNT(*) AS chunks
144
+ FROM chunks
145
+ GROUP BY source
146
+ ) AS c ON c.source = s.source;
147
+ `);
148
+ const removeFileTx = db.transaction((fileKey) => {
149
+ deleteVectorsByFileStmt?.run(fileKey);
150
+ deleteChunksByFileStmt.run(fileKey);
151
+ deleteFileStmt.run(fileKey);
152
+ });
153
+ const indexFileTx = db.transaction((file, chunks, vectors) => {
154
+ if (chunks.length !== vectors.length) {
155
+ throw new Error(`Chunk/vector length mismatch for "${file.fileKey}": ${chunks.length} chunks vs ${vectors.length} vectors`);
156
+ }
157
+ deleteVectorsByFileStmt?.run(file.fileKey);
158
+ deleteChunksByFileStmt.run(file.fileKey);
159
+ deleteFileStmt.run(file.fileKey);
160
+ insertFileStmt.run(file.fileKey, file.contentHash, file.source, file.indexedAt);
161
+ for (let i = 0; i < chunks.length; i += 1) {
162
+ const chunk = chunks[i];
163
+ const vector = vectors[i];
164
+ if (vector.length !== dimensions) {
165
+ throw new Error(`Vector dimension mismatch at chunk ${i} for "${file.fileKey}": expected ${dimensions}, received ${vector.length}`);
166
+ }
167
+ const insertResult = insertChunkStmt.run(file.fileKey, chunk.startLine, chunk.endLine, chunk.text, chunk.headingContext ?? null, file.source);
168
+ if (vectorEnabled) {
169
+ const chunkId = Number(insertResult.lastInsertRowid);
170
+ const insertVectorStmt = db.prepare(`INSERT INTO chunks_vec (chunk_id, embedding) VALUES (${chunkId}, ?);`);
171
+ insertVectorStmt.run(toVectorBlob(vector));
172
+ }
173
+ }
174
+ });
175
+ return {
176
+ indexFile(file, chunks, vectors) {
177
+ indexFileTx(file, chunks, vectors);
178
+ },
179
+ removeFile(fileKey) {
180
+ removeFileTx(fileKey);
181
+ },
182
+ searchBM25(query, topK) {
183
+ if (topK <= 0) {
184
+ return [];
185
+ }
186
+ try {
187
+ const rows = bm25Stmt.all(query, topK);
188
+ return rows.map((row) => {
189
+ const score = 1 / (1 + Math.abs(row.rank));
190
+ return toScoredChunk(row, score, row.indexed_at);
191
+ });
192
+ }
193
+ catch {
194
+ return [];
195
+ }
196
+ },
197
+ searchVector(queryVec, topK) {
198
+ if (!vectorEnabled || vectorSearchStmt === null || topK <= 0) {
199
+ return [];
200
+ }
201
+ const hits = vectorSearchStmt.all(toVectorBlob(queryVec), topK);
202
+ if (hits.length === 0) {
203
+ return [];
204
+ }
205
+ const results = [];
206
+ for (const hit of hits) {
207
+ const chunk = getChunkByIdStmt.get(hit.chunk_id);
208
+ if (!chunk) {
209
+ continue;
210
+ }
211
+ const indexedAt = getFileIndexedAtStmt.get(chunk.file_key)?.indexed_at;
212
+ const score = 1 / (1 + hit.distance);
213
+ results.push(toScoredChunk(chunk, score, indexedAt));
214
+ }
215
+ return results;
216
+ },
217
+ getFileHash(fileKey) {
218
+ const row = getFileHashStmt.get(fileKey);
219
+ return row?.content_hash ?? null;
220
+ },
221
+ readFileContent(relPath, from, lines) {
222
+ const normalizedRelPath = path.posix
223
+ .normalize(relPath.replace(/\\/g, "/").replace(/^\.\/+/, ""))
224
+ .replace(/^\/+/, "");
225
+ if (normalizedRelPath.length === 0 ||
226
+ normalizedRelPath === "." ||
227
+ normalizedRelPath === ".." ||
228
+ normalizedRelPath.startsWith("../")) {
229
+ return null;
230
+ }
231
+ const absPath = path.resolve(workspaceDir, normalizedRelPath);
232
+ if (!fs.existsSync(absPath)) {
233
+ return null;
234
+ }
235
+ const realWorkspace = fs.realpathSync(workspaceDir);
236
+ const realAbs = fs.realpathSync(absPath);
237
+ if (realAbs !== realWorkspace && !realAbs.startsWith(`${realWorkspace}${path.sep}`)) {
238
+ return null;
239
+ }
240
+ const text = fs.readFileSync(absPath, "utf8");
241
+ if (from === undefined && lines === undefined) {
242
+ return { text, path: normalizedRelPath };
243
+ }
244
+ const split = text.split(/\r?\n/);
245
+ const start = Math.max((from ?? 1) - 1, 0);
246
+ const count = lines === undefined ? split.length - start : Math.max(lines, 0);
247
+ const slicedText = split.slice(start, start + count).join("\n");
248
+ return { text: slicedText, path: normalizedRelPath };
249
+ },
250
+ stats() {
251
+ const fileCount = fileCountStmt.get()?.count ?? 0;
252
+ const chunkCount = chunkCountStmt.get()?.count ?? 0;
253
+ const sources = sourceStatsStmt.all().map((row) => ({
254
+ source: row.source,
255
+ files: Number(row.files),
256
+ chunks: Number(row.chunks),
257
+ }));
258
+ return {
259
+ files: Number(fileCount),
260
+ chunks: Number(chunkCount),
261
+ sources,
262
+ dbPath,
263
+ vectorDims: vectorEnabled ? dimensions : 0,
264
+ };
265
+ },
266
+ close() {
267
+ if (db.open) {
268
+ db.close();
269
+ }
270
+ },
271
+ };
272
+ }
package/dist/sync.d.ts ADDED
@@ -0,0 +1,31 @@
1
+ import { type Chunk, type EmbeddingClient, type IndexManager, type MediaModality, type SyncOptions } from "./types.js";
2
+ export interface SyncManager {
3
+ sync(opts?: SyncOptions): Promise<void>;
4
+ markDirty(): void;
5
+ isDirty(): boolean;
6
+ startWatching(opts?: {
7
+ debounceMs?: number;
8
+ intervalMinutes?: number;
9
+ }): void;
10
+ warmSession(sessionKey?: string): Promise<void>;
11
+ syncIfDirty(): void;
12
+ close(): void;
13
+ }
14
+ export declare function flattenSessionJsonl(content: string): {
15
+ text: string;
16
+ lineMap: number[];
17
+ } | null;
18
+ export declare function remapChunkLines(chunks: Chunk[], lineMap: number[]): void;
19
+ export declare function createSyncManager(params: {
20
+ workspaceDir: string;
21
+ index: IndexManager;
22
+ embedding: EmbeddingClient;
23
+ chunkTokens: number;
24
+ chunkOverlap: number;
25
+ sessionsDir?: string;
26
+ multimodal?: {
27
+ enabled: boolean;
28
+ modalities: MediaModality[];
29
+ maxFileBytes?: number;
30
+ };
31
+ }): SyncManager;