lmgrep 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +559 -13
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +32 -0
- package/dist/index.js +180 -0
- package/dist/index.js.map +1 -0
- package/dist/lib/build.d.ts +7 -0
- package/dist/lib/build.js +222 -0
- package/dist/lib/build.js.map +1 -0
- package/dist/{chunker → lib/chunker}/context.d.ts +0 -4
- package/dist/{chunker → lib/chunker}/context.js +3 -76
- package/dist/lib/chunker/context.js.map +1 -0
- package/dist/lib/chunker/index.d.ts +7 -0
- package/dist/{chunker → lib/chunker}/index.js +10 -5
- package/dist/lib/chunker/index.js.map +1 -0
- package/dist/lib/chunker/languages.js.map +1 -0
- package/dist/lib/config.d.ts +12 -0
- package/dist/lib/config.js +82 -0
- package/dist/lib/config.js.map +1 -0
- package/dist/lib/embedder.d.ts +55 -0
- package/dist/lib/embedder.js +221 -0
- package/dist/lib/embedder.js.map +1 -0
- package/dist/{providers.js → lib/providers.js} +0 -2
- package/dist/lib/providers.js.map +1 -0
- package/dist/lib/repair.d.ts +3 -0
- package/dist/lib/repair.js +104 -0
- package/dist/lib/repair.js.map +1 -0
- package/dist/lib/scanner.d.ts +34 -0
- package/dist/lib/scanner.js +219 -0
- package/dist/lib/scanner.js.map +1 -0
- package/dist/lib/serve.d.ts +11 -0
- package/dist/lib/serve.js +124 -0
- package/dist/lib/serve.js.map +1 -0
- package/dist/lib/store.d.ts +147 -0
- package/dist/lib/store.js +673 -0
- package/dist/lib/store.js.map +1 -0
- package/dist/lib/types.d.ts +118 -0
- package/dist/lib/types.js +13 -0
- package/dist/lib/types.js.map +1 -0
- package/dist/mcp.js +160 -45
- package/dist/mcp.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunker/context.js.map +0 -1
- package/dist/chunker/index.d.ts +0 -3
- package/dist/chunker/index.js.map +0 -1
- package/dist/chunker/languages.js.map +0 -1
- package/dist/config.d.ts +0 -2
- package/dist/config.js +0 -31
- package/dist/config.js.map +0 -1
- package/dist/embedder.d.ts +0 -3
- package/dist/embedder.js +0 -55
- package/dist/embedder.js.map +0 -1
- package/dist/index-cmd.d.ts +0 -9
- package/dist/index-cmd.js +0 -250
- package/dist/index-cmd.js.map +0 -1
- package/dist/providers.js.map +0 -1
- package/dist/repair-cmd.d.ts +0 -5
- package/dist/repair-cmd.js +0 -112
- package/dist/repair-cmd.js.map +0 -1
- package/dist/search-cmd.d.ts +0 -10
- package/dist/search-cmd.js +0 -60
- package/dist/search-cmd.js.map +0 -1
- package/dist/serve-cmd.d.ts +0 -1
- package/dist/serve-cmd.js +0 -139
- package/dist/serve-cmd.js.map +0 -1
- package/dist/status-cmd.d.ts +0 -5
- package/dist/status-cmd.js +0 -119
- package/dist/status-cmd.js.map +0 -1
- package/dist/store.d.ts +0 -25
- package/dist/store.js +0 -207
- package/dist/store.js.map +0 -1
- package/dist/types.d.ts +0 -40
- package/dist/types.js +0 -2
- package/dist/types.js.map +0 -1
- package/dist/walker.d.ts +0 -3
- package/dist/walker.js +0 -90
- package/dist/walker.js.map +0 -1
- /package/dist/{chunker → lib/chunker}/languages.d.ts +0 -0
- /package/dist/{chunker → lib/chunker}/languages.js +0 -0
- /package/dist/{providers.d.ts → lib/providers.d.ts} +0 -0
|
@@ -0,0 +1,673 @@
|
|
|
1
|
+
import { connect } from "@lancedb/lancedb";
|
|
2
|
+
import { execSync } from "node:child_process";
|
|
3
|
+
import { createHash } from "node:crypto";
|
|
4
|
+
import { mkdirSync, statSync, writeFileSync, readFileSync, readdirSync, existsSync, unlinkSync, } from "node:fs";
|
|
5
|
+
import { homedir } from "node:os";
|
|
6
|
+
import { join, resolve } from "node:path";
|
|
7
|
+
const CHUNKS_TABLE = "chunks";
|
|
8
|
+
const FILES_TABLE = "files";
|
|
9
|
+
const DELETE_BATCH_SIZE = 50;
|
|
10
|
+
function buildInFilter(column, values) {
|
|
11
|
+
const escaped = values.map((v) => `'${v.replace(/'/g, "''")}'`);
|
|
12
|
+
return `${column} IN (${escaped.join(", ")})`;
|
|
13
|
+
}
|
|
14
|
+
async function batchDelete(table, column, values) {
|
|
15
|
+
for (let i = 0; i < values.length; i += DELETE_BATCH_SIZE) {
|
|
16
|
+
const batch = values.slice(i, i + DELETE_BATCH_SIZE);
|
|
17
|
+
await table.delete(buildInFilter(column, batch));
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
function git(cwd, ...args) {
|
|
21
|
+
try {
|
|
22
|
+
return execSync(`git ${args.join(" ")}`, {
|
|
23
|
+
cwd,
|
|
24
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
25
|
+
timeout: 5000,
|
|
26
|
+
})
|
|
27
|
+
.toString()
|
|
28
|
+
.trim();
|
|
29
|
+
}
|
|
30
|
+
catch {
|
|
31
|
+
return undefined;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Resolve the project identity for a directory.
|
|
36
|
+
*
|
|
37
|
+
* For git repos: uses the remote origin URL as identity so that multiple
|
|
38
|
+
* worktrees of the same repo share one index.
|
|
39
|
+
*
|
|
40
|
+
* For non-git directories: falls back to hashing the absolute path.
|
|
41
|
+
*
|
|
42
|
+
* Returns { id, root } where id is the string to hash for the DB path,
|
|
43
|
+
* and root is the project root directory (git toplevel or cwd).
|
|
44
|
+
*/
|
|
45
|
+
export function resolveProject(cwd) {
|
|
46
|
+
const absolute = resolve(cwd);
|
|
47
|
+
const gitRoot = git(absolute, "rev-parse", "--show-toplevel");
|
|
48
|
+
if (gitRoot) {
|
|
49
|
+
const branch = git(gitRoot, "rev-parse", "--abbrev-ref", "HEAD") ?? "HEAD";
|
|
50
|
+
const remoteUrl = git(gitRoot, "remote", "get-url", "origin");
|
|
51
|
+
if (remoteUrl) {
|
|
52
|
+
return { id: remoteUrl, root: gitRoot, branch };
|
|
53
|
+
}
|
|
54
|
+
// Git repo with no remote — use the git root path
|
|
55
|
+
return { id: gitRoot, root: gitRoot, branch };
|
|
56
|
+
}
|
|
57
|
+
return { id: absolute, root: absolute, branch: "_default" };
|
|
58
|
+
}
|
|
59
|
+
function buildSlug(id, root) {
|
|
60
|
+
const hash = createHash("sha256").update(id).digest("hex").slice(0, 8);
|
|
61
|
+
const parts = root.split("/").filter(Boolean);
|
|
62
|
+
const slug = parts.slice(-2).join("-").replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
63
|
+
return `${slug}-${hash}`;
|
|
64
|
+
}
|
|
65
|
+
export function getDbPath(cwd) {
|
|
66
|
+
const { id, root } = resolveProject(cwd);
|
|
67
|
+
return join(homedir(), ".local", "state", "lmgrep", buildSlug(id, root));
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Compute the DB path using the pre-git-aware scheme (absolute path hash).
|
|
71
|
+
* Used by `lmgrep import` to find legacy indexes.
|
|
72
|
+
*/
|
|
73
|
+
export function getLegacyDbPath(cwd) {
|
|
74
|
+
const absolute = resolve(cwd);
|
|
75
|
+
const hash = createHash("sha256").update(absolute).digest("hex").slice(0, 6);
|
|
76
|
+
const parts = absolute.split("/").filter(Boolean);
|
|
77
|
+
const slug = parts.slice(-2).join("-").replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
78
|
+
return join(homedir(), ".local", "state", "lmgrep", `${slug}-${hash}`);
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Find the project root and compute the prefix (subdirectory offset).
|
|
82
|
+
* For git repos, the root is the git toplevel. For non-git dirs, walks up
|
|
83
|
+
* looking for an existing index.
|
|
84
|
+
*/
|
|
85
|
+
export function findIndexedAncestor(cwd) {
|
|
86
|
+
const absolute = resolve(cwd);
|
|
87
|
+
const { root } = resolveProject(cwd);
|
|
88
|
+
// For git repos, the root is always the git toplevel
|
|
89
|
+
const dbPath = getDbPath(root);
|
|
90
|
+
try {
|
|
91
|
+
if (statSync(dbPath).isDirectory()) {
|
|
92
|
+
const prefix = root === absolute ? "" : absolute.slice(root.length + 1);
|
|
93
|
+
return { root, prefix };
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
catch {
|
|
97
|
+
// no index yet
|
|
98
|
+
}
|
|
99
|
+
// For non-git dirs, walk up looking for an ancestor with an index
|
|
100
|
+
if (root === absolute) {
|
|
101
|
+
let current = resolve(absolute, "..");
|
|
102
|
+
while (true) {
|
|
103
|
+
const ancestorDb = getDbPath(current);
|
|
104
|
+
try {
|
|
105
|
+
if (statSync(ancestorDb).isDirectory()) {
|
|
106
|
+
const prefix = absolute.slice(current.length + 1);
|
|
107
|
+
return { root: current, prefix };
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
// keep climbing
|
|
112
|
+
}
|
|
113
|
+
const parent = resolve(current, "..");
|
|
114
|
+
if (parent === current)
|
|
115
|
+
break;
|
|
116
|
+
current = parent;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return undefined;
|
|
120
|
+
}
|
|
121
|
+
// --- Project metadata ---
|
|
122
|
+
const METADATA_FILE = "lmgrep.json";
|
|
123
|
+
/**
|
|
124
|
+
* Extract the base model family name from a full model string.
|
|
125
|
+
* Strips provider prefix (e.g. "openai:", "ollama:") and quantization/tag
|
|
126
|
+
* suffixes (e.g. ":Q4_K_M", ":latest", ":fp16").
|
|
127
|
+
*
|
|
128
|
+
* Examples:
|
|
129
|
+
* "openai:nomic-embed-text" → "nomic-embed-text"
|
|
130
|
+
* "ollama:nomic-embed-text:Q4_K_M" → "nomic-embed-text"
|
|
131
|
+
* "lmstudio:bge-large-en:fp16" → "bge-large-en"
|
|
132
|
+
* "openai:text-embedding-3-small" → "text-embedding-3-small"
|
|
133
|
+
*/
|
|
134
|
+
export function extractModelFamily(model) {
|
|
135
|
+
// Strip provider prefix (first colon-separated segment)
|
|
136
|
+
const colonIdx = model.indexOf(":");
|
|
137
|
+
if (colonIdx === -1)
|
|
138
|
+
return model;
|
|
139
|
+
const rest = model.slice(colonIdx + 1);
|
|
140
|
+
// Strip quant/tag suffix: known patterns like Q4_K_M, Q8_0, fp16, latest, etc.
|
|
141
|
+
// These appear as the last colon-separated segment
|
|
142
|
+
const lastColon = rest.lastIndexOf(":");
|
|
143
|
+
if (lastColon === -1)
|
|
144
|
+
return rest;
|
|
145
|
+
const suffix = rest.slice(lastColon + 1);
|
|
146
|
+
// Match common quantization and tag patterns
|
|
147
|
+
if (/^(Q\d|q\d|fp\d|f\d|latest|gguf|ggml)/i.test(suffix)) {
|
|
148
|
+
return rest.slice(0, lastColon);
|
|
149
|
+
}
|
|
150
|
+
// Not a recognized suffix — keep the whole thing (could be part of model name)
|
|
151
|
+
return rest;
|
|
152
|
+
}
|
|
153
|
+
export function writeProjectMetadata(cwd, extra) {
|
|
154
|
+
const dbPath = getDbPath(cwd);
|
|
155
|
+
const { id, root, branch } = resolveProject(cwd);
|
|
156
|
+
const gitRoot = git(resolve(cwd), "rev-parse", "--show-toplevel");
|
|
157
|
+
const remote = gitRoot
|
|
158
|
+
? git(gitRoot, "remote", "get-url", "origin") ?? undefined
|
|
159
|
+
: undefined;
|
|
160
|
+
mkdirSync(dbPath, { recursive: true });
|
|
161
|
+
// Preserve existing model/dimensions if not provided (don't overwrite baseline)
|
|
162
|
+
const existing = readProjectMetadata(dbPath);
|
|
163
|
+
const metadata = {
|
|
164
|
+
root,
|
|
165
|
+
remote,
|
|
166
|
+
branch,
|
|
167
|
+
indexedAt: new Date().toISOString(),
|
|
168
|
+
model: existing?.model ?? extra?.model,
|
|
169
|
+
dimensions: existing?.dimensions ?? extra?.dimensions,
|
|
170
|
+
};
|
|
171
|
+
writeFileSync(join(dbPath, METADATA_FILE), JSON.stringify(metadata, null, 2));
|
|
172
|
+
}
|
|
173
|
+
export function readProjectMetadata(dbPath) {
|
|
174
|
+
const metaPath = join(dbPath, METADATA_FILE);
|
|
175
|
+
try {
|
|
176
|
+
return JSON.parse(readFileSync(metaPath, "utf-8"));
|
|
177
|
+
}
|
|
178
|
+
catch {
|
|
179
|
+
return undefined;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Scan all lmgrep indexes and return their metadata.
|
|
184
|
+
*/
|
|
185
|
+
export function discoverIndexedProjects() {
|
|
186
|
+
const baseDir = join(homedir(), ".local", "state", "lmgrep");
|
|
187
|
+
if (!existsSync(baseDir))
|
|
188
|
+
return [];
|
|
189
|
+
const results = [];
|
|
190
|
+
for (const entry of readdirSync(baseDir, { withFileTypes: true })) {
|
|
191
|
+
if (!entry.isDirectory())
|
|
192
|
+
continue;
|
|
193
|
+
const dbPath = join(baseDir, entry.name);
|
|
194
|
+
const metadata = readProjectMetadata(dbPath);
|
|
195
|
+
if (metadata) {
|
|
196
|
+
results.push({ dbPath, metadata });
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
return results;
|
|
200
|
+
}
|
|
201
|
+
// --- DB-level write lock ---
|
|
202
|
+
function isProcessAlive(pid) {
|
|
203
|
+
try {
|
|
204
|
+
process.kill(pid, 0);
|
|
205
|
+
return true;
|
|
206
|
+
}
|
|
207
|
+
catch {
|
|
208
|
+
return false;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Acquire an exclusive write lock for a project's DB.
|
|
213
|
+
* Returns true if the lock was acquired, false if another process holds it.
|
|
214
|
+
*/
|
|
215
|
+
export function acquireDbLock(cwd) {
|
|
216
|
+
const lockPath = `${getDbPath(cwd)}.lock`;
|
|
217
|
+
if (existsSync(lockPath)) {
|
|
218
|
+
try {
|
|
219
|
+
const pid = Number.parseInt(readFileSync(lockPath, "utf-8").trim(), 10);
|
|
220
|
+
if (isProcessAlive(pid))
|
|
221
|
+
return false;
|
|
222
|
+
}
|
|
223
|
+
catch {
|
|
224
|
+
// stale lock, take over
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
const dbPath = getDbPath(cwd);
|
|
228
|
+
mkdirSync(dbPath, { recursive: true });
|
|
229
|
+
writeFileSync(lockPath, `${process.pid}\n`);
|
|
230
|
+
return true;
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Release the write lock for a project's DB.
|
|
234
|
+
*/
|
|
235
|
+
export function releaseDbLock(cwd) {
|
|
236
|
+
try {
|
|
237
|
+
unlinkSync(`${getDbPath(cwd)}.lock`);
|
|
238
|
+
}
|
|
239
|
+
catch { }
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Check if a write lock is held by a live process.
|
|
243
|
+
*/
|
|
244
|
+
export function isDbLocked(cwd) {
|
|
245
|
+
const lockPath = `${getDbPath(cwd)}.lock`;
|
|
246
|
+
if (!existsSync(lockPath))
|
|
247
|
+
return false;
|
|
248
|
+
try {
|
|
249
|
+
const pid = Number.parseInt(readFileSync(lockPath, "utf-8").trim(), 10);
|
|
250
|
+
return isProcessAlive(pid);
|
|
251
|
+
}
|
|
252
|
+
catch {
|
|
253
|
+
return false;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
function getProcessInfo(pid) {
|
|
257
|
+
try {
|
|
258
|
+
const name = readFileSync(`/proc/${pid}/comm`, "utf-8").trim();
|
|
259
|
+
const cmdline = readFileSync(`/proc/${pid}/cmdline`, "utf-8")
|
|
260
|
+
.replace(/\0/g, " ")
|
|
261
|
+
.trim();
|
|
262
|
+
return { name, cmdline };
|
|
263
|
+
}
|
|
264
|
+
catch {
|
|
265
|
+
return undefined;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
function classifyProcess(info) {
|
|
269
|
+
if (info.name === "lmgrep-mcp" || info.cmdline.includes("mcp"))
|
|
270
|
+
return "mcp";
|
|
271
|
+
if (info.cmdline.includes("serve"))
|
|
272
|
+
return "serve";
|
|
273
|
+
return "cli";
|
|
274
|
+
}
|
|
275
|
+
/**
|
|
276
|
+
* Scan all lock files to find running lmgrep processes,
|
|
277
|
+
* which indexes they hold, and whether they are watching for changes.
|
|
278
|
+
*/
|
|
279
|
+
export function discoverRunningProcesses() {
|
|
280
|
+
const baseDir = join(homedir(), ".local", "state", "lmgrep");
|
|
281
|
+
if (!existsSync(baseDir))
|
|
282
|
+
return [];
|
|
283
|
+
const results = [];
|
|
284
|
+
const seen = new Set();
|
|
285
|
+
for (const entry of readdirSync(baseDir)) {
|
|
286
|
+
if (!entry.endsWith(".lock"))
|
|
287
|
+
continue;
|
|
288
|
+
const lockPath = join(baseDir, entry);
|
|
289
|
+
let pid;
|
|
290
|
+
try {
|
|
291
|
+
pid = Number.parseInt(readFileSync(lockPath, "utf-8").trim(), 10);
|
|
292
|
+
}
|
|
293
|
+
catch {
|
|
294
|
+
continue;
|
|
295
|
+
}
|
|
296
|
+
if (!isProcessAlive(pid) || seen.has(pid))
|
|
297
|
+
continue;
|
|
298
|
+
seen.add(pid);
|
|
299
|
+
const info = getProcessInfo(pid);
|
|
300
|
+
if (!info)
|
|
301
|
+
continue;
|
|
302
|
+
const kind = classifyProcess(info);
|
|
303
|
+
// Resolve which project this lock belongs to
|
|
304
|
+
const dbDir = entry.slice(0, -".lock".length);
|
|
305
|
+
const dbPath = join(baseDir, dbDir);
|
|
306
|
+
const metadata = readProjectMetadata(dbPath);
|
|
307
|
+
results.push({
|
|
308
|
+
pid,
|
|
309
|
+
processName: info.name,
|
|
310
|
+
cmdline: info.cmdline,
|
|
311
|
+
kind,
|
|
312
|
+
projectRoot: metadata?.root,
|
|
313
|
+
// MCP and serve processes watch; plain CLI invocations don't
|
|
314
|
+
watching: kind === "mcp" || kind === "serve",
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
return results;
|
|
318
|
+
}
|
|
319
|
+
export class Store {
|
|
320
|
+
dbPath;
|
|
321
|
+
branch;
|
|
322
|
+
db;
|
|
323
|
+
chunksTable;
|
|
324
|
+
filesTable;
|
|
325
|
+
constructor(dbPath, branch = "_default") {
|
|
326
|
+
this.dbPath = dbPath;
|
|
327
|
+
this.branch = branch;
|
|
328
|
+
}
|
|
329
|
+
static forProject(cwd) {
|
|
330
|
+
const { branch } = resolveProject(cwd);
|
|
331
|
+
return new Store(getDbPath(cwd), branch);
|
|
332
|
+
}
|
|
333
|
+
// --- Connection ---
|
|
334
|
+
async connection() {
|
|
335
|
+
if (this.db)
|
|
336
|
+
return this.db;
|
|
337
|
+
mkdirSync(this.dbPath, { recursive: true });
|
|
338
|
+
this.db = await connect(this.dbPath);
|
|
339
|
+
return this.db;
|
|
340
|
+
}
|
|
341
|
+
async openChunks() {
|
|
342
|
+
if (this.chunksTable)
|
|
343
|
+
return this.chunksTable;
|
|
344
|
+
const conn = await this.connection();
|
|
345
|
+
const tables = await conn.tableNames();
|
|
346
|
+
if (tables.includes(CHUNKS_TABLE)) {
|
|
347
|
+
this.chunksTable = await conn.openTable(CHUNKS_TABLE);
|
|
348
|
+
return this.chunksTable;
|
|
349
|
+
}
|
|
350
|
+
return undefined;
|
|
351
|
+
}
|
|
352
|
+
async openFiles() {
|
|
353
|
+
if (this.filesTable)
|
|
354
|
+
return this.filesTable;
|
|
355
|
+
const conn = await this.connection();
|
|
356
|
+
const tables = await conn.tableNames();
|
|
357
|
+
if (tables.includes(FILES_TABLE)) {
|
|
358
|
+
this.filesTable = await conn.openTable(FILES_TABLE);
|
|
359
|
+
return this.filesTable;
|
|
360
|
+
}
|
|
361
|
+
return undefined;
|
|
362
|
+
}
|
|
363
|
+
// --- Chunks ---
|
|
364
|
+
async addChunks(chunks) {
|
|
365
|
+
if (chunks.length === 0)
|
|
366
|
+
return;
|
|
367
|
+
const conn = await this.connection();
|
|
368
|
+
const records = chunks.map((c) => ({
|
|
369
|
+
id: c.id,
|
|
370
|
+
filePath: c.filePath,
|
|
371
|
+
startLine: c.startLine,
|
|
372
|
+
endLine: c.endLine,
|
|
373
|
+
type: c.type,
|
|
374
|
+
name: c.name,
|
|
375
|
+
content: c.content,
|
|
376
|
+
context: c.context,
|
|
377
|
+
hash: c.hash,
|
|
378
|
+
vector: c.vector,
|
|
379
|
+
}));
|
|
380
|
+
const tables = await conn.tableNames();
|
|
381
|
+
if (tables.includes(CHUNKS_TABLE)) {
|
|
382
|
+
const t = await conn.openTable(CHUNKS_TABLE);
|
|
383
|
+
this.chunksTable = t;
|
|
384
|
+
await t.add(records);
|
|
385
|
+
}
|
|
386
|
+
else {
|
|
387
|
+
this.chunksTable = await conn.createTable(CHUNKS_TABLE, records);
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Delete chunks for files that are no longer referenced by ANY branch.
|
|
392
|
+
* If another branch still has a file hash entry for a given path,
|
|
393
|
+
* the chunks are kept (they're shared via content-addressing).
|
|
394
|
+
*/
|
|
395
|
+
async deleteChunksByFiles(filePaths) {
|
|
396
|
+
const t = await this.openChunks();
|
|
397
|
+
if (!t || filePaths.length === 0)
|
|
398
|
+
return;
|
|
399
|
+
const filesTable = await this.openFiles();
|
|
400
|
+
if (!filesTable) {
|
|
401
|
+
// No files table means no other branches — safe to delete all
|
|
402
|
+
await batchDelete(t, "filePath", filePaths);
|
|
403
|
+
return;
|
|
404
|
+
}
|
|
405
|
+
// Find which files are still referenced by other branches
|
|
406
|
+
const escaped = this.branch.replace(/'/g, "''");
|
|
407
|
+
const stillReferenced = new Set();
|
|
408
|
+
for (let i = 0; i < filePaths.length; i += DELETE_BATCH_SIZE) {
|
|
409
|
+
const batch = filePaths.slice(i, i + DELETE_BATCH_SIZE);
|
|
410
|
+
const pathFilter = buildInFilter("filePath", batch);
|
|
411
|
+
const refs = await filesTable
|
|
412
|
+
.query()
|
|
413
|
+
.where(`branch != '${escaped}' AND ${pathFilter}`)
|
|
414
|
+
.select(["filePath"])
|
|
415
|
+
.toArray();
|
|
416
|
+
for (const r of refs) {
|
|
417
|
+
stillReferenced.add(r.filePath);
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
// Only delete chunks for files not referenced by other branches
|
|
421
|
+
const toDelete = filePaths.filter((fp) => !stillReferenced.has(fp));
|
|
422
|
+
if (toDelete.length > 0) {
|
|
423
|
+
await batchDelete(t, "filePath", toDelete);
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
async search(queryVector, limit = 25, filePrefix, typeFilter) {
|
|
427
|
+
const t = await this.openChunks();
|
|
428
|
+
if (!t) {
|
|
429
|
+
throw new Error("No index found. Run `lmgrep index` first.");
|
|
430
|
+
}
|
|
431
|
+
let query = t.search(queryVector).limit(limit);
|
|
432
|
+
const conditions = [];
|
|
433
|
+
if (filePrefix) {
|
|
434
|
+
conditions.push(`filePath LIKE '${filePrefix.replace(/'/g, "''")}%'`);
|
|
435
|
+
}
|
|
436
|
+
if (typeFilter && typeFilter.length > 0) {
|
|
437
|
+
const escaped = typeFilter.map((t) => `'${t.replace(/'/g, "''")}'`);
|
|
438
|
+
conditions.push(`type IN (${escaped.join(", ")})`);
|
|
439
|
+
}
|
|
440
|
+
if (conditions.length > 0) {
|
|
441
|
+
query = query.where(conditions.join(" AND "));
|
|
442
|
+
}
|
|
443
|
+
const results = await query.toArray();
|
|
444
|
+
return results.map((r) => ({
|
|
445
|
+
filePath: r.filePath,
|
|
446
|
+
startLine: r.startLine,
|
|
447
|
+
endLine: r.endLine,
|
|
448
|
+
type: r.type,
|
|
449
|
+
name: r.name,
|
|
450
|
+
content: r.content,
|
|
451
|
+
context: r.context,
|
|
452
|
+
score: r._distance != null ? 1 - r._distance : 0,
|
|
453
|
+
}));
|
|
454
|
+
}
|
|
455
|
+
async getIndexedFiles() {
|
|
456
|
+
const t = await this.openChunks();
|
|
457
|
+
if (!t)
|
|
458
|
+
return new Map();
|
|
459
|
+
const rows = await t.query().select(["filePath", "hash"]).toArray();
|
|
460
|
+
const map = new Map();
|
|
461
|
+
for (const row of rows) {
|
|
462
|
+
const fp = row.filePath;
|
|
463
|
+
const hash = row.hash;
|
|
464
|
+
const existing = map.get(fp) ?? [];
|
|
465
|
+
existing.push(hash);
|
|
466
|
+
map.set(fp, existing);
|
|
467
|
+
}
|
|
468
|
+
return map;
|
|
469
|
+
}
|
|
470
|
+
async getIndexedHashes() {
|
|
471
|
+
const t = await this.openChunks();
|
|
472
|
+
if (!t)
|
|
473
|
+
return new Set();
|
|
474
|
+
const rows = await t.query().select(["hash"]).toArray();
|
|
475
|
+
return new Set(rows.map((r) => r.hash));
|
|
476
|
+
}
|
|
477
|
+
/**
|
|
478
|
+
* Given a set of chunk hashes, return those that already exist in the
|
|
479
|
+
* chunks table. Runs as batched IN() queries in the DB.
|
|
480
|
+
*/
|
|
481
|
+
async filterExistingChunkHashes(hashes) {
|
|
482
|
+
const t = await this.openChunks();
|
|
483
|
+
if (!t || hashes.length === 0)
|
|
484
|
+
return new Set();
|
|
485
|
+
const existing = new Set();
|
|
486
|
+
const unique = [...new Set(hashes)];
|
|
487
|
+
for (let i = 0; i < unique.length; i += DELETE_BATCH_SIZE) {
|
|
488
|
+
const batch = unique.slice(i, i + DELETE_BATCH_SIZE);
|
|
489
|
+
const filter = buildInFilter("hash", batch);
|
|
490
|
+
const rows = await t
|
|
491
|
+
.query()
|
|
492
|
+
.where(filter)
|
|
493
|
+
.select(["hash"])
|
|
494
|
+
.toArray();
|
|
495
|
+
for (const r of rows) {
|
|
496
|
+
existing.add(r.hash);
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
return existing;
|
|
500
|
+
}
|
|
501
|
+
async chunkCount() {
|
|
502
|
+
const t = await this.openChunks();
|
|
503
|
+
if (!t)
|
|
504
|
+
return 0;
|
|
505
|
+
return await t.countRows();
|
|
506
|
+
}
|
|
507
|
+
// --- File hashes (change detection) ---
|
|
508
|
+
async getFileHashes() {
|
|
509
|
+
const t = await this.openFiles();
|
|
510
|
+
if (!t)
|
|
511
|
+
return new Map();
|
|
512
|
+
const escaped = this.branch.replace(/'/g, "''");
|
|
513
|
+
const rows = await t
|
|
514
|
+
.query()
|
|
515
|
+
.where(`branch = '${escaped}'`)
|
|
516
|
+
.select(["filePath", "fileHash"])
|
|
517
|
+
.toArray();
|
|
518
|
+
const map = new Map();
|
|
519
|
+
for (const row of rows) {
|
|
520
|
+
map.set(row.filePath, row.fileHash);
|
|
521
|
+
}
|
|
522
|
+
return map;
|
|
523
|
+
}
|
|
524
|
+
/**
|
|
525
|
+
* Given a set of file hashes, return those that already exist in the
|
|
526
|
+
* files table on ANY branch. The query runs in the DB, not in JS.
|
|
527
|
+
*/
|
|
528
|
+
async filterKnownFileHashes(hashes) {
|
|
529
|
+
const t = await this.openFiles();
|
|
530
|
+
if (!t || hashes.length === 0)
|
|
531
|
+
return new Set();
|
|
532
|
+
const known = new Set();
|
|
533
|
+
for (let i = 0; i < hashes.length; i += DELETE_BATCH_SIZE) {
|
|
534
|
+
const batch = hashes.slice(i, i + DELETE_BATCH_SIZE);
|
|
535
|
+
const filter = buildInFilter("fileHash", batch);
|
|
536
|
+
const rows = await t
|
|
537
|
+
.query()
|
|
538
|
+
.where(filter)
|
|
539
|
+
.select(["fileHash"])
|
|
540
|
+
.toArray();
|
|
541
|
+
for (const r of rows) {
|
|
542
|
+
known.add(r.fileHash);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
return known;
|
|
546
|
+
}
|
|
547
|
+
async upsertFileHashes(entries) {
|
|
548
|
+
if (entries.length === 0)
|
|
549
|
+
return;
|
|
550
|
+
const records = entries.map((e) => ({
|
|
551
|
+
...e,
|
|
552
|
+
branch: this.branch,
|
|
553
|
+
}));
|
|
554
|
+
const conn = await this.connection();
|
|
555
|
+
const tables = await conn.tableNames();
|
|
556
|
+
if (tables.includes(FILES_TABLE)) {
|
|
557
|
+
const t = await conn.openTable(FILES_TABLE);
|
|
558
|
+
this.filesTable = t;
|
|
559
|
+
// Delete existing entries for this branch + these file paths
|
|
560
|
+
const escaped = this.branch.replace(/'/g, "''");
|
|
561
|
+
for (let i = 0; i < entries.length; i += DELETE_BATCH_SIZE) {
|
|
562
|
+
const batch = entries.slice(i, i + DELETE_BATCH_SIZE);
|
|
563
|
+
const pathFilter = buildInFilter("filePath", batch.map((e) => e.filePath));
|
|
564
|
+
await t.delete(`branch = '${escaped}' AND ${pathFilter}`);
|
|
565
|
+
}
|
|
566
|
+
await t.add(records);
|
|
567
|
+
}
|
|
568
|
+
else {
|
|
569
|
+
this.filesTable = await conn.createTable(FILES_TABLE, records);
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
async deleteFileHashes(filePaths) {
|
|
573
|
+
const t = await this.openFiles();
|
|
574
|
+
if (!t || filePaths.length === 0)
|
|
575
|
+
return;
|
|
576
|
+
const escaped = this.branch.replace(/'/g, "''");
|
|
577
|
+
for (let i = 0; i < filePaths.length; i += DELETE_BATCH_SIZE) {
|
|
578
|
+
const batch = filePaths.slice(i, i + DELETE_BATCH_SIZE);
|
|
579
|
+
const pathFilter = buildInFilter("filePath", batch);
|
|
580
|
+
await t.delete(`branch = '${escaped}' AND ${pathFilter}`);
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
// --- Admin ---
|
|
584
|
+
async reset() {
|
|
585
|
+
const conn = await this.connection();
|
|
586
|
+
const tables = await conn.tableNames();
|
|
587
|
+
if (tables.includes(CHUNKS_TABLE))
|
|
588
|
+
await conn.dropTable(CHUNKS_TABLE);
|
|
589
|
+
if (tables.includes(FILES_TABLE))
|
|
590
|
+
await conn.dropTable(FILES_TABLE);
|
|
591
|
+
this.chunksTable = undefined;
|
|
592
|
+
this.filesTable = undefined;
|
|
593
|
+
}
|
|
594
|
+
async compact() {
|
|
595
|
+
const t = await this.openChunks();
|
|
596
|
+
if (t)
|
|
597
|
+
await t.optimize();
|
|
598
|
+
const f = await this.openFiles();
|
|
599
|
+
if (f)
|
|
600
|
+
await f.optimize();
|
|
601
|
+
}
|
|
602
|
+
/**
|
|
603
|
+
* Import all chunks and file hashes from another Store's database.
|
|
604
|
+
* Returns { chunks, files } counts of imported records.
|
|
605
|
+
*/
|
|
606
|
+
async importFrom(sourcePath) {
|
|
607
|
+
const sourceConn = await connect(sourcePath);
|
|
608
|
+
const sourceTables = await sourceConn.tableNames();
|
|
609
|
+
let chunks = 0;
|
|
610
|
+
let files = 0;
|
|
611
|
+
if (sourceTables.includes(CHUNKS_TABLE)) {
|
|
612
|
+
const sourceChunks = await sourceConn.openTable(CHUNKS_TABLE);
|
|
613
|
+
const rawChunkRows = await sourceChunks.query().toArray();
|
|
614
|
+
if (rawChunkRows.length > 0) {
|
|
615
|
+
// Convert Arrow typed arrays to plain JS objects so LanceDB can
|
|
616
|
+
// infer the schema when creating a new table.
|
|
617
|
+
const rows = rawChunkRows.map((r) => ({
|
|
618
|
+
id: r.id,
|
|
619
|
+
filePath: r.filePath,
|
|
620
|
+
startLine: r.startLine,
|
|
621
|
+
endLine: r.endLine,
|
|
622
|
+
type: r.type,
|
|
623
|
+
name: r.name,
|
|
624
|
+
content: r.content,
|
|
625
|
+
context: r.context,
|
|
626
|
+
hash: r.hash,
|
|
627
|
+
vector: Array.from(r.vector),
|
|
628
|
+
}));
|
|
629
|
+
const conn = await this.connection();
|
|
630
|
+
const destTables = await conn.tableNames();
|
|
631
|
+
if (destTables.includes(CHUNKS_TABLE)) {
|
|
632
|
+
const t = await conn.openTable(CHUNKS_TABLE);
|
|
633
|
+
this.chunksTable = t;
|
|
634
|
+
await t.add(rows);
|
|
635
|
+
}
|
|
636
|
+
else {
|
|
637
|
+
this.chunksTable = await conn.createTable(CHUNKS_TABLE, rows);
|
|
638
|
+
}
|
|
639
|
+
chunks = rows.length;
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
if (sourceTables.includes(FILES_TABLE)) {
|
|
643
|
+
const sourceFiles = await sourceConn.openTable(FILES_TABLE);
|
|
644
|
+
const rawRows = await sourceFiles.query().toArray();
|
|
645
|
+
if (rawRows.length > 0) {
|
|
646
|
+
// Ensure branch column exists (legacy DBs won't have it)
|
|
647
|
+
const rows = rawRows.map((r) => ({
|
|
648
|
+
filePath: r.filePath,
|
|
649
|
+
fileHash: r.fileHash,
|
|
650
|
+
branch: r.branch ?? this.branch,
|
|
651
|
+
}));
|
|
652
|
+
const conn = await this.connection();
|
|
653
|
+
const destTables = await conn.tableNames();
|
|
654
|
+
if (destTables.includes(FILES_TABLE)) {
|
|
655
|
+
const t = await conn.openTable(FILES_TABLE);
|
|
656
|
+
this.filesTable = t;
|
|
657
|
+
await t.add(rows);
|
|
658
|
+
}
|
|
659
|
+
else {
|
|
660
|
+
this.filesTable = await conn.createTable(FILES_TABLE, rows);
|
|
661
|
+
}
|
|
662
|
+
files = rows.length;
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
return { chunks, files };
|
|
666
|
+
}
|
|
667
|
+
async close() {
|
|
668
|
+
this.chunksTable = undefined;
|
|
669
|
+
this.filesTable = undefined;
|
|
670
|
+
this.db = undefined;
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
//# sourceMappingURL=store.js.map
|