@tobilu/qmd 2.1.0 → 2.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +88 -0
- package/README.md +3 -0
- package/bin/qmd +111 -32
- package/dist/ast.d.ts +1 -0
- package/dist/ast.js +18 -8
- package/dist/bench/bench.d.ts +2 -0
- package/dist/bench/bench.js +108 -13
- package/dist/bench/score.d.ts +11 -4
- package/dist/bench/score.js +34 -13
- package/dist/bench/types.d.ts +13 -0
- package/dist/cli/qmd.d.ts +26 -0
- package/dist/cli/qmd.js +1172 -121
- package/dist/collections.d.ts +9 -0
- package/dist/collections.js +32 -7
- package/dist/db.d.ts +6 -3
- package/dist/db.js +1 -1
- package/dist/index.d.ts +4 -0
- package/dist/index.js +5 -2
- package/dist/llm.d.ts +65 -3
- package/dist/llm.js +376 -63
- package/dist/mcp/server.d.ts +6 -3
- package/dist/mcp/server.js +41 -26
- package/dist/paths.d.ts +1 -0
- package/dist/paths.js +4 -0
- package/dist/store.d.ts +92 -17
- package/dist/store.js +676 -176
- package/package.json +23 -12
- package/scripts/build.mjs +29 -0
- package/scripts/check-package-grammars.mjs +29 -0
- package/scripts/package-smoke.mjs +65 -0
- package/scripts/test-all.mjs +27 -0
- package/skills/qmd/SKILL.md +203 -0
- package/skills/qmd/references/mcp-setup.md +102 -0
- package/skills/release/SKILL.md +139 -0
- package/skills/release/scripts/install-hooks.sh +38 -0
- package/dist/embedded-skills.d.ts +0 -6
- package/dist/embedded-skills.js +0 -14
package/dist/mcp/server.d.ts
CHANGED
|
@@ -6,7 +6,10 @@
|
|
|
6
6
|
*
|
|
7
7
|
* Follows MCP spec 2025-06-18 for proper response types.
|
|
8
8
|
*/
|
|
9
|
-
export
|
|
9
|
+
export type McpStartupOptions = {
|
|
10
|
+
dbPath?: string;
|
|
11
|
+
};
|
|
12
|
+
export declare function startMcpServer(options?: McpStartupOptions): Promise<void>;
|
|
10
13
|
export type HttpServerHandle = {
|
|
11
14
|
httpServer: import("http").Server;
|
|
12
15
|
port: number;
|
|
@@ -16,6 +19,6 @@ export type HttpServerHandle = {
|
|
|
16
19
|
* Start MCP server over Streamable HTTP (JSON responses, no SSE).
|
|
17
20
|
* Binds to localhost only. Returns a handle for shutdown and port discovery.
|
|
18
21
|
*/
|
|
19
|
-
export declare function startMcpHttpServer(port: number, options?: {
|
|
22
|
+
export declare function startMcpHttpServer(port: number, options?: ({
|
|
20
23
|
quiet?: boolean;
|
|
21
|
-
}): Promise<HttpServerHandle>;
|
|
24
|
+
} & McpStartupOptions)): Promise<HttpServerHandle>;
|
package/dist/mcp/server.js
CHANGED
|
@@ -19,6 +19,7 @@ import { z } from "zod";
|
|
|
19
19
|
import { existsSync } from "fs";
|
|
20
20
|
import { createStore, extractSnippet, addLineNumbers, getDefaultDbPath, DEFAULT_MULTI_GET_MAX_BYTES, } from "../index.js";
|
|
21
21
|
import { getConfigPath } from "../collections.js";
|
|
22
|
+
import { enableProductionMode } from "../store.js";
|
|
22
23
|
// =============================================================================
|
|
23
24
|
// Helper functions
|
|
24
25
|
// =============================================================================
|
|
@@ -63,7 +64,6 @@ function getPackageVersion() {
|
|
|
63
64
|
*/
|
|
64
65
|
async function buildInstructions(store) {
|
|
65
66
|
const status = await store.getStatus();
|
|
66
|
-
const contexts = await store.listContexts();
|
|
67
67
|
const globalCtx = await store.getGlobalContext();
|
|
68
68
|
const lines = [];
|
|
69
69
|
// --- What is this? ---
|
|
@@ -71,15 +71,13 @@ async function buildInstructions(store) {
|
|
|
71
71
|
if (globalCtx)
|
|
72
72
|
lines.push(`Context: ${globalCtx}`);
|
|
73
73
|
// --- What's searchable? ---
|
|
74
|
+
// Emit names only — the per-collection doc counts and descriptions can run to ~1.5 KB
|
|
75
|
+
// across a dozen collections, and the same info is available on demand via the `status` tool.
|
|
74
76
|
if (status.collections.length > 0) {
|
|
75
77
|
lines.push("");
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
const rootCtx = contexts.find(c => c.collection === col.name && (c.path === "" || c.path === "/"));
|
|
80
|
-
const desc = rootCtx ? ` — ${rootCtx.context}` : "";
|
|
81
|
-
lines.push(` - "${col.name}" (${col.documents} docs)${desc}`);
|
|
82
|
-
}
|
|
78
|
+
const names = status.collections.map(c => c.name).join(", ");
|
|
79
|
+
lines.push(`Collections (scope with \`collection\` parameter): ${names}`);
|
|
80
|
+
lines.push("Call the `status` tool for collection descriptions, paths, and per-collection doc counts.");
|
|
83
81
|
}
|
|
84
82
|
// --- Capability gaps ---
|
|
85
83
|
if (!status.hasVectorIndex) {
|
|
@@ -169,6 +167,8 @@ async function createMcpServer(store) {
|
|
|
169
167
|
title: "Query",
|
|
170
168
|
description: `Search the knowledge base using a query document — one or more typed sub-queries combined for best recall.
|
|
171
169
|
|
|
170
|
+
Each result includes a \`line\` field with the absolute 1-indexed line of the best match in the source markdown. To read more context around a hit, call \`get(file, fromLine = max(1, line - 20), maxLines = 80, lineNumbers = true)\`.
|
|
171
|
+
|
|
172
172
|
## Query Types
|
|
173
173
|
|
|
174
174
|
**lex** — BM25 keyword search. Fast, exact, no LLM needed.
|
|
@@ -247,6 +247,7 @@ Intent-aware lex (C++ performance, not sports):
|
|
|
247
247
|
collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
|
|
248
248
|
limit,
|
|
249
249
|
minScore,
|
|
250
|
+
candidateLimit,
|
|
250
251
|
rerank,
|
|
251
252
|
intent,
|
|
252
253
|
});
|
|
@@ -255,13 +256,14 @@ Intent-aware lex (C++ performance, not sports):
|
|
|
255
256
|
|| searches.find(s => s.type === 'vec')?.query
|
|
256
257
|
|| searches[0]?.query || "";
|
|
257
258
|
const filtered = results.map(r => {
|
|
258
|
-
const { line, snippet } = extractSnippet(r.
|
|
259
|
+
const { line, snippet } = extractSnippet(r.body, primaryQuery, 300, r.bestChunkPos, r.bestChunk.length, intent);
|
|
259
260
|
return {
|
|
260
261
|
docid: `#${r.docid}`,
|
|
261
262
|
file: r.displayPath,
|
|
262
263
|
title: r.title,
|
|
263
264
|
score: Math.round(r.score * 100) / 100,
|
|
264
265
|
context: r.context,
|
|
266
|
+
line,
|
|
265
267
|
snippet: addLineNumbers(snippet, line),
|
|
266
268
|
};
|
|
267
269
|
});
|
|
@@ -292,6 +294,8 @@ Intent-aware lex (C++ performance, not sports):
|
|
|
292
294
|
parsedFromLine = parseInt(colonMatch[1], 10);
|
|
293
295
|
lookup = lookup.slice(0, -colonMatch[0].length);
|
|
294
296
|
}
|
|
297
|
+
if (parsedFromLine !== undefined)
|
|
298
|
+
parsedFromLine = Math.max(1, parsedFromLine);
|
|
295
299
|
const result = await store.get(lookup, { includeBody: false });
|
|
296
300
|
if ("error" in result) {
|
|
297
301
|
let msg = `Document not found: ${file}`;
|
|
@@ -412,13 +416,16 @@ Intent-aware lex (C++ performance, not sports):
|
|
|
412
416
|
});
|
|
413
417
|
return server;
|
|
414
418
|
}
|
|
415
|
-
|
|
416
|
-
//
|
|
417
|
-
//
|
|
418
|
-
|
|
419
|
+
export async function startMcpServer(options = {}) {
|
|
420
|
+
// Opt into production mode when the MCP server is actually started, not
|
|
421
|
+
// when this module is merely imported for its exports. Importing the module
|
|
422
|
+
// at the top level flipped the global production flag and broke test
|
|
423
|
+
// isolation for downstream suites that expect the default (development)
|
|
424
|
+
// database path behaviour.
|
|
425
|
+
enableProductionMode();
|
|
419
426
|
const configPath = getConfigPath();
|
|
420
427
|
const store = await createStore({
|
|
421
|
-
dbPath: getDefaultDbPath(),
|
|
428
|
+
dbPath: options.dbPath ?? getDefaultDbPath(),
|
|
422
429
|
...(existsSync(configPath) ? { configPath } : {}),
|
|
423
430
|
});
|
|
424
431
|
const server = await createMcpServer(store);
|
|
@@ -429,10 +436,14 @@ export async function startMcpServer() {
|
|
|
429
436
|
* Start MCP server over Streamable HTTP (JSON responses, no SSE).
|
|
430
437
|
* Binds to localhost only. Returns a handle for shutdown and port discovery.
|
|
431
438
|
*/
|
|
432
|
-
export async function startMcpHttpServer(port, options) {
|
|
439
|
+
export async function startMcpHttpServer(port, options = {}) {
|
|
440
|
+
// See startMcpServer() for the rationale — flip production mode here so the
|
|
441
|
+
// HTTP transport resolves the real database path, without leaking state into
|
|
442
|
+
// callers that only import this module for its exports (e.g. tests).
|
|
443
|
+
enableProductionMode();
|
|
433
444
|
const configPath = getConfigPath();
|
|
434
445
|
const store = await createStore({
|
|
435
|
-
dbPath: getDefaultDbPath(),
|
|
446
|
+
dbPath: options.dbPath ?? getDefaultDbPath(),
|
|
436
447
|
...(existsSync(configPath) ? { configPath } : {}),
|
|
437
448
|
});
|
|
438
449
|
// Pre-fetch default collection names for REST endpoint
|
|
@@ -466,7 +477,7 @@ export async function startMcpHttpServer(port, options) {
|
|
|
466
477
|
}
|
|
467
478
|
/** Extract a human-readable label from a JSON-RPC body */
|
|
468
479
|
function describeRequest(body) {
|
|
469
|
-
const method = body
|
|
480
|
+
const method = typeof body.method === "string" ? body.method : "unknown";
|
|
470
481
|
if (method === "tools/call") {
|
|
471
482
|
const tool = body.params?.name ?? "?";
|
|
472
483
|
const args = body.params?.arguments;
|
|
@@ -517,31 +528,35 @@ export async function startMcpHttpServer(port, options) {
|
|
|
517
528
|
return;
|
|
518
529
|
}
|
|
519
530
|
// Map to internal format
|
|
520
|
-
const
|
|
531
|
+
const searches = params.searches;
|
|
532
|
+
const queries = searches.map((s) => ({
|
|
521
533
|
type: s.type,
|
|
522
534
|
query: String(s.query || ""),
|
|
523
535
|
}));
|
|
524
536
|
// Use default collections if none specified
|
|
525
|
-
const effectiveCollections = params.collections
|
|
537
|
+
const effectiveCollections = Array.isArray(params.collections) ? params.collections.map(String) : defaultCollectionNames;
|
|
526
538
|
const results = await store.search({
|
|
527
539
|
queries,
|
|
528
540
|
collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
|
|
529
|
-
limit: params.limit
|
|
530
|
-
minScore: params.minScore
|
|
531
|
-
|
|
541
|
+
limit: typeof params.limit === "number" ? params.limit : 10,
|
|
542
|
+
minScore: typeof params.minScore === "number" ? params.minScore : 0,
|
|
543
|
+
candidateLimit: typeof params.candidateLimit === "number" ? params.candidateLimit : undefined,
|
|
544
|
+
intent: typeof params.intent === "string" ? params.intent : undefined,
|
|
545
|
+
rerank: typeof params.rerank === "boolean" ? params.rerank : undefined,
|
|
532
546
|
});
|
|
533
547
|
// Use first lex or vec query for snippet extraction
|
|
534
|
-
const primaryQuery =
|
|
535
|
-
||
|
|
536
|
-
||
|
|
548
|
+
const primaryQuery = searches.find((s) => s.type === 'lex')?.query
|
|
549
|
+
|| searches.find((s) => s.type === 'vec')?.query
|
|
550
|
+
|| searches[0]?.query || "";
|
|
537
551
|
const formatted = results.map(r => {
|
|
538
|
-
const { line, snippet } = extractSnippet(r.
|
|
552
|
+
const { line, snippet } = extractSnippet(r.body, String(primaryQuery), 300, r.bestChunkPos, r.bestChunk.length, typeof params.intent === "string" ? params.intent : undefined);
|
|
539
553
|
return {
|
|
540
554
|
docid: `#${r.docid}`,
|
|
541
555
|
file: r.displayPath,
|
|
542
556
|
title: r.title,
|
|
543
557
|
score: Math.round(r.score * 100) / 100,
|
|
544
558
|
context: r.context,
|
|
559
|
+
line,
|
|
545
560
|
snippet: addLineNumbers(snippet, line),
|
|
546
561
|
};
|
|
547
562
|
});
|
package/dist/paths.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function qmdHomedir(): string;
|
package/dist/paths.js
ADDED
package/dist/store.d.ts
CHANGED
|
@@ -13,9 +13,9 @@
|
|
|
13
13
|
import type { Database } from "./db.js";
|
|
14
14
|
import { LlamaCpp, formatQueryForEmbedding, formatDocForEmbedding, type ILLMSession } from "./llm.js";
|
|
15
15
|
import type { NamedCollection, Collection, CollectionConfig } from "./collections.js";
|
|
16
|
-
export declare const DEFAULT_EMBED_MODEL = "embeddinggemma";
|
|
17
|
-
export declare const DEFAULT_RERANK_MODEL = "
|
|
18
|
-
export declare const DEFAULT_QUERY_MODEL = "
|
|
16
|
+
export declare const DEFAULT_EMBED_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
|
|
17
|
+
export declare const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
|
|
18
|
+
export declare const DEFAULT_QUERY_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
|
|
19
19
|
export declare const DEFAULT_GLOB = "**/*.md";
|
|
20
20
|
export declare const DEFAULT_MULTI_GET_MAX_BYTES: number;
|
|
21
21
|
export declare const DEFAULT_EMBED_MAX_DOCS_PER_BATCH = 64;
|
|
@@ -26,6 +26,7 @@ export declare const CHUNK_SIZE_CHARS: number;
|
|
|
26
26
|
export declare const CHUNK_OVERLAP_CHARS: number;
|
|
27
27
|
export declare const CHUNK_WINDOW_TOKENS = 200;
|
|
28
28
|
export declare const CHUNK_WINDOW_CHARS: number;
|
|
29
|
+
export declare function getEmbeddingFingerprint(model?: string): string;
|
|
29
30
|
/**
|
|
30
31
|
* A potential break point in the document with a base score indicating quality.
|
|
31
32
|
*/
|
|
@@ -142,6 +143,7 @@ export declare function getRealPath(path: string): string;
|
|
|
142
143
|
export type VirtualPath = {
|
|
143
144
|
collectionName: string;
|
|
144
145
|
path: string;
|
|
146
|
+
indexName?: string;
|
|
145
147
|
};
|
|
146
148
|
/**
|
|
147
149
|
* Normalize explicit virtual path formats to standard qmd:// format.
|
|
@@ -164,7 +166,7 @@ export declare function parseVirtualPath(virtualPath: string): VirtualPath | nul
|
|
|
164
166
|
/**
|
|
165
167
|
* Build a virtual path from collection name and relative path.
|
|
166
168
|
*/
|
|
167
|
-
export declare function buildVirtualPath(collectionName: string, path: string): string;
|
|
169
|
+
export declare function buildVirtualPath(collectionName: string, path: string, indexName?: string): string;
|
|
168
170
|
/**
|
|
169
171
|
* Check if a path is explicitly a virtual path.
|
|
170
172
|
* Only recognizes explicit virtual path formats:
|
|
@@ -185,6 +187,12 @@ export declare function resolveVirtualPath(db: Database, virtualPath: string): s
|
|
|
185
187
|
*/
|
|
186
188
|
export declare function toVirtualPath(db: Database, absolutePath: string): string | null;
|
|
187
189
|
export declare function verifySqliteVecLoaded(db: Database): void;
|
|
190
|
+
/**
|
|
191
|
+
* FTS5's unicode61 tokenizer does not segment CJK text into searchable words.
|
|
192
|
+
* Normalize CJK runs by spacing every character so exact CJK queries can be
|
|
193
|
+
* translated into phrase queries while Latin text keeps the default tokenizer.
|
|
194
|
+
*/
|
|
195
|
+
export declare function normalizeCjkForFTS(text: string): string;
|
|
188
196
|
export declare function getStoreCollections(db: Database): NamedCollection[];
|
|
189
197
|
export declare function getStoreCollection(db: Database, name: string): NamedCollection | null;
|
|
190
198
|
export declare function getStoreGlobalContext(db: Database): string | undefined;
|
|
@@ -214,9 +222,9 @@ export type Store = {
|
|
|
214
222
|
llm?: LlamaCpp;
|
|
215
223
|
close: () => void;
|
|
216
224
|
ensureVecTable: (dimensions: number) => void;
|
|
217
|
-
getHashesNeedingEmbedding: () => number;
|
|
218
|
-
getIndexHealth: () => IndexHealthInfo;
|
|
219
|
-
getStatus: () => IndexStatus;
|
|
225
|
+
getHashesNeedingEmbedding: (model?: string) => number;
|
|
226
|
+
getIndexHealth: (model?: string) => IndexHealthInfo;
|
|
227
|
+
getStatus: (model?: string) => IndexStatus;
|
|
220
228
|
getCacheKey: typeof getCacheKey;
|
|
221
229
|
getCachedResult: (cacheKey: string) => string | null;
|
|
222
230
|
setCachedResult: (cacheKey: string, result: string) => void;
|
|
@@ -284,6 +292,11 @@ export type Store = {
|
|
|
284
292
|
hash: string;
|
|
285
293
|
title: string;
|
|
286
294
|
} | null;
|
|
295
|
+
findOrMigrateLegacyDocument: (collectionName: string, path: string) => {
|
|
296
|
+
id: number;
|
|
297
|
+
hash: string;
|
|
298
|
+
title: string;
|
|
299
|
+
} | null;
|
|
287
300
|
updateDocumentTitle: (documentId: number, title: string, modifiedAt: string) => void;
|
|
288
301
|
updateDocument: (documentId: number, title: string, hash: string, modifiedAt: string) => void;
|
|
289
302
|
deactivateDocument: (collectionName: string, path: string) => void;
|
|
@@ -294,7 +307,7 @@ export type Store = {
|
|
|
294
307
|
path: string;
|
|
295
308
|
}[];
|
|
296
309
|
clearAllEmbeddings: () => void;
|
|
297
|
-
insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string) => void;
|
|
310
|
+
insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string, totalChunks?: number, fingerprint?: string) => void;
|
|
298
311
|
};
|
|
299
312
|
export type ReindexProgress = {
|
|
300
313
|
file: string;
|
|
@@ -316,22 +329,38 @@ export declare function reindexCollection(store: Store, collectionPath: string,
|
|
|
316
329
|
ignorePatterns?: string[];
|
|
317
330
|
onProgress?: (info: ReindexProgress) => void;
|
|
318
331
|
}): Promise<ReindexResult>;
|
|
332
|
+
export type EmbedFailure = {
|
|
333
|
+
path: string;
|
|
334
|
+
hash: string;
|
|
335
|
+
seq: number;
|
|
336
|
+
attempts: number;
|
|
337
|
+
reason: string;
|
|
338
|
+
};
|
|
319
339
|
export type EmbedProgress = {
|
|
320
340
|
chunksEmbedded: number;
|
|
321
341
|
totalChunks: number;
|
|
322
342
|
bytesProcessed: number;
|
|
323
343
|
totalBytes: number;
|
|
344
|
+
/** Active failed chunks still awaiting a successful retry. */
|
|
324
345
|
errors: number;
|
|
346
|
+
failures?: EmbedFailure[];
|
|
325
347
|
};
|
|
326
348
|
export type EmbedResult = {
|
|
327
349
|
docsProcessed: number;
|
|
328
350
|
chunksEmbedded: number;
|
|
351
|
+
/** Active failed chunks that did not recover after retries. */
|
|
329
352
|
errors: number;
|
|
353
|
+
failures?: EmbedFailure[];
|
|
330
354
|
durationMs: number;
|
|
331
355
|
};
|
|
332
356
|
export type EmbedOptions = {
|
|
333
357
|
force?: boolean;
|
|
334
358
|
model?: string;
|
|
359
|
+
/**
|
|
360
|
+
* Restrict embedding to documents in a single collection.
|
|
361
|
+
* When omitted, all pending documents across every collection are embedded.
|
|
362
|
+
*/
|
|
363
|
+
collection?: string;
|
|
335
364
|
maxDocsPerBatch?: number;
|
|
336
365
|
maxBatchBytes?: number;
|
|
337
366
|
chunkStrategy?: ChunkStrategy;
|
|
@@ -454,13 +483,19 @@ export type IndexStatus = {
|
|
|
454
483
|
hasVectorIndex: boolean;
|
|
455
484
|
collections: CollectionInfo[];
|
|
456
485
|
};
|
|
457
|
-
export declare function getHashesNeedingEmbedding(db: Database): number;
|
|
486
|
+
export declare function getHashesNeedingEmbedding(db: Database, collection?: string, model?: string): number;
|
|
458
487
|
export type IndexHealthInfo = {
|
|
459
488
|
needsEmbedding: number;
|
|
460
489
|
totalDocs: number;
|
|
461
490
|
daysStale: number | null;
|
|
462
491
|
};
|
|
463
|
-
export
|
|
492
|
+
export type LegacyFingerprintAdoptionResult = {
|
|
493
|
+
checked: boolean;
|
|
494
|
+
adopted: number;
|
|
495
|
+
reason: string;
|
|
496
|
+
};
|
|
497
|
+
export declare function maybeAdoptLegacyEmbeddingFingerprint(store: Store, model?: string): Promise<LegacyFingerprintAdoptionResult>;
|
|
498
|
+
export declare function getIndexHealth(db: Database, model?: string): IndexHealthInfo;
|
|
464
499
|
export declare function getCacheKey(url: string, body: object): string;
|
|
465
500
|
export declare function getCachedResult(db: Database, cacheKey: string): string | null;
|
|
466
501
|
export declare function setCachedResult(db: Database, cacheKey: string, result: string): void;
|
|
@@ -476,7 +511,9 @@ export declare function deleteLLMCache(db: Database): number;
|
|
|
476
511
|
*/
|
|
477
512
|
export declare function deleteInactiveDocuments(db: Database): number;
|
|
478
513
|
/**
|
|
479
|
-
* Remove orphaned content hashes that are not referenced by any
|
|
514
|
+
* Remove orphaned content hashes that are not referenced by any document.
|
|
515
|
+
* Inactive documents are soft-deleted tombstones, so their content rows must
|
|
516
|
+
* remain referenced until deleteInactiveDocuments() hard-deletes them.
|
|
480
517
|
* Returns the number of orphaned content hashes deleted.
|
|
481
518
|
*/
|
|
482
519
|
export declare function cleanupOrphanedContent(db: Database): number;
|
|
@@ -509,6 +546,20 @@ export declare function findActiveDocument(db: Database, collectionName: string,
|
|
|
509
546
|
hash: string;
|
|
510
547
|
title: string;
|
|
511
548
|
} | null;
|
|
549
|
+
/**
|
|
550
|
+
* Find an active document, falling back to a case-insensitive path match.
|
|
551
|
+
* If found under a different casing, renames it in-place and rebuilds the
|
|
552
|
+
* FTS entry. Embeddings are keyed by content hash, so the rename is
|
|
553
|
+
* safe — no re-embedding required.
|
|
554
|
+
*
|
|
555
|
+
* @internal Used by reindexCollection and indexFiles during qmd update.
|
|
556
|
+
* Returns null if the document does not exist under either path.
|
|
557
|
+
*/
|
|
558
|
+
export declare function findOrMigrateLegacyDocument(db: Database, collectionName: string, path: string): {
|
|
559
|
+
id: number;
|
|
560
|
+
hash: string;
|
|
561
|
+
title: string;
|
|
562
|
+
} | null;
|
|
512
563
|
/**
|
|
513
564
|
* Update the title and modified_at timestamp for a document.
|
|
514
565
|
*/
|
|
@@ -694,16 +745,28 @@ export declare function searchVec(db: Database, query: string, model: string, li
|
|
|
694
745
|
* Get all unique content hashes that need embeddings (from active documents).
|
|
695
746
|
* Returns hash, document body, and a sample path for display purposes.
|
|
696
747
|
*/
|
|
697
|
-
export declare function getHashesForEmbedding(db: Database): {
|
|
748
|
+
export declare function getHashesForEmbedding(db: Database, model?: string): {
|
|
698
749
|
hash: string;
|
|
699
750
|
body: string;
|
|
700
751
|
path: string;
|
|
701
752
|
}[];
|
|
702
753
|
/**
|
|
703
|
-
* Clear
|
|
704
|
-
*
|
|
754
|
+
* Clear embeddings for the whole index, or just for one collection.
|
|
755
|
+
*
|
|
756
|
+
* When `collection` is omitted the entire content_vectors table is emptied and
|
|
757
|
+
* the vectors_vec virtual table is dropped (it is recreated with the right
|
|
758
|
+
* dimensions on the next embed run).
|
|
759
|
+
*
|
|
760
|
+
* When `collection` is provided, only vectors whose hash is referenced
|
|
761
|
+
* exclusively by active documents in that collection are removed. Hashes
|
|
762
|
+
* shared with active documents in other collections are left in place so
|
|
763
|
+
* vector search keeps working there (content_vectors is keyed globally by
|
|
764
|
+
* content hash; identical document bodies across collections share a row).
|
|
765
|
+
* vectors_vec is preserved so other collections keep working unless the scoped
|
|
766
|
+
* clear empties content_vectors entirely, in which case it is dropped so the
|
|
767
|
+
* next embed can recreate the table with the current dimensions.
|
|
705
768
|
*/
|
|
706
|
-
export declare function clearAllEmbeddings(db: Database): void;
|
|
769
|
+
export declare function clearAllEmbeddings(db: Database, collection?: string): void;
|
|
707
770
|
/**
|
|
708
771
|
* Insert a single embedding into both content_vectors and vectors_vec tables.
|
|
709
772
|
* The hash_seq key is formatted as "hash_seq" for the vectors_vec table.
|
|
@@ -714,7 +777,7 @@ export declare function clearAllEmbeddings(db: Database): void;
|
|
|
714
777
|
* vectors_vec uses DELETE + INSERT instead of INSERT OR REPLACE because sqlite-vec's
|
|
715
778
|
* vec0 virtual tables silently ignore the OR REPLACE conflict clause.
|
|
716
779
|
*/
|
|
717
|
-
export declare function insertEmbedding(db: Database, hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string): void;
|
|
780
|
+
export declare function insertEmbedding(db: Database, hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string, totalChunks?: number, fingerprint?: string): void;
|
|
718
781
|
export declare function expandQuery(query: string, model: string | undefined, db: Database, intent?: string, llmOverride?: LlamaCpp): Promise<ExpandedQuery[]>;
|
|
719
782
|
export declare function rerank(query: string, documents: {
|
|
720
783
|
file: string;
|
|
@@ -759,7 +822,7 @@ export declare function findDocuments(db: Database, pattern: string, options?: {
|
|
|
759
822
|
docs: MultiGetResult[];
|
|
760
823
|
errors: string[];
|
|
761
824
|
};
|
|
762
|
-
export declare function getStatus(db: Database): IndexStatus;
|
|
825
|
+
export declare function getStatus(db: Database, model?: string): IndexStatus;
|
|
763
826
|
export type SnippetResult = {
|
|
764
827
|
line: number;
|
|
765
828
|
snippet: string;
|
|
@@ -831,6 +894,18 @@ export type RankedListMeta = {
|
|
|
831
894
|
queryType: "original" | "lex" | "vec" | "hyde";
|
|
832
895
|
query: string;
|
|
833
896
|
};
|
|
897
|
+
/**
|
|
898
|
+
* RRF list weights for hybridQuery.
|
|
899
|
+
*
|
|
900
|
+
* Original-query retrieval paths are the primary evidence and get 2x weight:
|
|
901
|
+
* - original FTS
|
|
902
|
+
* - original vector search
|
|
903
|
+
*
|
|
904
|
+
* Expansion-derived lists (lex/vec/hyde) stay at 1x regardless of list order,
|
|
905
|
+
* so a lex expansion inserted before original vector search cannot steal the
|
|
906
|
+
* original vector boost.
|
|
907
|
+
*/
|
|
908
|
+
export declare function getHybridRrfWeights(rankedListMeta: RankedListMeta[]): number[];
|
|
834
909
|
/**
|
|
835
910
|
* Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
|
|
836
911
|
*
|