org-qmd 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +529 -0
- package/LICENSE +21 -0
- package/README.md +917 -0
- package/bin/qmd +32 -0
- package/dist/ast.d.ts +64 -0
- package/dist/ast.js +324 -0
- package/dist/cli/formatter.d.ts +120 -0
- package/dist/cli/formatter.js +350 -0
- package/dist/cli/qmd.d.ts +1 -0
- package/dist/cli/qmd.js +2820 -0
- package/dist/collections.d.ts +146 -0
- package/dist/collections.js +385 -0
- package/dist/db.d.ts +41 -0
- package/dist/db.js +75 -0
- package/dist/embedded-skills.d.ts +6 -0
- package/dist/embedded-skills.js +14 -0
- package/dist/index.d.ts +226 -0
- package/dist/index.js +234 -0
- package/dist/llm.d.ts +406 -0
- package/dist/llm.js +1174 -0
- package/dist/maintenance.d.ts +23 -0
- package/dist/maintenance.js +37 -0
- package/dist/mcp/server.d.ts +21 -0
- package/dist/mcp/server.js +653 -0
- package/dist/store.d.ts +993 -0
- package/dist/store.js +3806 -0
- package/package.json +101 -0
package/dist/store.d.ts
ADDED
|
@@ -0,0 +1,993 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* QMD Store - Core data access and retrieval functions
|
|
3
|
+
*
|
|
4
|
+
* This module provides all database operations, search functions, and document
|
|
5
|
+
* retrieval for QMD. It returns raw data structures that can be formatted by
|
|
6
|
+
* CLI or MCP consumers.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* const store = createStore("/path/to/db.sqlite");
|
|
10
|
+
* // or use default path:
|
|
11
|
+
* const store = createStore();
|
|
12
|
+
*/
|
|
13
|
+
import type { Database } from "./db.js";
|
|
14
|
+
import { LlamaCpp, formatQueryForEmbedding, formatDocForEmbedding, type ILLMSession } from "./llm.js";
|
|
15
|
+
import type { NamedCollection, Collection, CollectionConfig } from "./collections.js";
|
|
16
|
+
export declare const DEFAULT_EMBED_MODEL = "embeddinggemma";
|
|
17
|
+
export declare const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
|
|
18
|
+
export declare const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
|
|
19
|
+
export declare const DEFAULT_GLOB = "**/*.{md,org}";
|
|
20
|
+
export declare const DEFAULT_MULTI_GET_MAX_BYTES: number;
|
|
21
|
+
export declare const DEFAULT_EMBED_MAX_DOCS_PER_BATCH = 64;
|
|
22
|
+
export declare const DEFAULT_EMBED_MAX_BATCH_BYTES: number;
|
|
23
|
+
export declare const CHUNK_SIZE_TOKENS = 900;
|
|
24
|
+
export declare const CHUNK_OVERLAP_TOKENS: number;
|
|
25
|
+
export declare const CHUNK_SIZE_CHARS: number;
|
|
26
|
+
export declare const CHUNK_OVERLAP_CHARS: number;
|
|
27
|
+
export declare const CHUNK_WINDOW_TOKENS = 200;
|
|
28
|
+
export declare const CHUNK_WINDOW_CHARS: number;
|
|
29
|
+
/**
|
|
30
|
+
* A potential break point in the document with a base score indicating quality.
|
|
31
|
+
*/
|
|
32
|
+
export interface BreakPoint {
|
|
33
|
+
pos: number;
|
|
34
|
+
score: number;
|
|
35
|
+
type: string;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* A region where a code fence exists (between ``` markers).
|
|
39
|
+
* We should never split inside a code fence.
|
|
40
|
+
*/
|
|
41
|
+
export interface CodeFenceRegion {
|
|
42
|
+
start: number;
|
|
43
|
+
end: number;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Patterns for detecting break points in markdown documents.
|
|
47
|
+
* Higher scores indicate better places to split.
|
|
48
|
+
* Scores are spread wide so headings decisively beat lower-quality breaks.
|
|
49
|
+
* Order matters for scoring - more specific patterns first.
|
|
50
|
+
*/
|
|
51
|
+
export declare const BREAK_PATTERNS: [RegExp, number, string][];
|
|
52
|
+
/**
|
|
53
|
+
* Patterns for detecting break points in Org-mode documents.
|
|
54
|
+
* Org headings use * (one per level), code blocks use #+BEGIN_SRC/#+END_SRC.
|
|
55
|
+
*
|
|
56
|
+
* Org heading levels:
|
|
57
|
+
* \n\* heading → h1 (100)
|
|
58
|
+
* \n\*\* heading → h2 (90)
|
|
59
|
+
* etc.
|
|
60
|
+
*
|
|
61
|
+
* Note: Org * headings require a space after the stars to distinguish
|
|
62
|
+
* from bold markup (*bold*). The patterns match \n followed by stars and space.
|
|
63
|
+
*/
|
|
64
|
+
export declare const ORG_BREAK_PATTERNS: [RegExp, number, string][];
|
|
65
|
+
/**
|
|
66
|
+
* Select break patterns based on file extension.
|
|
67
|
+
*/
|
|
68
|
+
export declare function getBreakPatterns(filepath?: string): [RegExp, number, string][];
|
|
69
|
+
/**
|
|
70
|
+
* Scan text for all potential break points.
|
|
71
|
+
* Returns sorted array of break points with higher-scoring patterns taking precedence
|
|
72
|
+
* when multiple patterns match the same position.
|
|
73
|
+
*
|
|
74
|
+
* When filepath is provided and ends with .org, uses Org-mode break patterns
|
|
75
|
+
* instead of Markdown patterns.
|
|
76
|
+
*/
|
|
77
|
+
export declare function scanBreakPoints(text: string, filepath?: string): BreakPoint[];
|
|
78
|
+
/**
|
|
79
|
+
* Find all code fence regions in the text.
|
|
80
|
+
* For Markdown: delimited by ```
|
|
81
|
+
* For Org-mode: delimited by #+BEGIN_xxx / #+END_xxx pairs
|
|
82
|
+
* We should never split inside these regions.
|
|
83
|
+
*/
|
|
84
|
+
export declare function findCodeFences(text: string, filepath?: string): CodeFenceRegion[];
|
|
85
|
+
/**
|
|
86
|
+
* Find all #+BEGIN_xxx / #+END_xxx block regions in Org-mode text.
|
|
87
|
+
* These include #+BEGIN_SRC, #+BEGIN_EXAMPLE, #+BEGIN_QUOTE, etc.
|
|
88
|
+
*/
|
|
89
|
+
export declare function findOrgBlocks(text: string): CodeFenceRegion[];
|
|
90
|
+
/**
|
|
91
|
+
* Check if a position is inside a code fence region.
|
|
92
|
+
*/
|
|
93
|
+
export declare function isInsideCodeFence(pos: number, fences: CodeFenceRegion[]): boolean;
|
|
94
|
+
/**
|
|
95
|
+
* Find the best cut position using scored break points with distance decay.
|
|
96
|
+
*
|
|
97
|
+
* Uses squared distance for gentler early decay - headings far back still win
|
|
98
|
+
* over low-quality breaks near the target.
|
|
99
|
+
*
|
|
100
|
+
* @param breakPoints - Pre-scanned break points from scanBreakPoints()
|
|
101
|
+
* @param targetCharPos - The ideal cut position (e.g., maxChars boundary)
|
|
102
|
+
* @param windowChars - How far back to search for break points (default ~200 tokens)
|
|
103
|
+
* @param decayFactor - How much to penalize distance (0.7 = 30% score at window edge)
|
|
104
|
+
* @param codeFences - Code fence regions to avoid splitting inside
|
|
105
|
+
* @returns The best position to cut at
|
|
106
|
+
*/
|
|
107
|
+
export declare function findBestCutoff(breakPoints: BreakPoint[], targetCharPos: number, windowChars?: number, decayFactor?: number, codeFences?: CodeFenceRegion[]): number;
|
|
108
|
+
export type ChunkStrategy = "auto" | "regex";
|
|
109
|
+
/**
|
|
110
|
+
* Merge two sets of break points (e.g. regex + AST), keeping the highest
|
|
111
|
+
* score at each position. Result is sorted by position.
|
|
112
|
+
*/
|
|
113
|
+
export declare function mergeBreakPoints(a: BreakPoint[], b: BreakPoint[]): BreakPoint[];
|
|
114
|
+
/**
|
|
115
|
+
* Core chunk algorithm that operates on precomputed break points and code fences.
|
|
116
|
+
* This is the shared implementation used by both regex-only and AST-aware chunking.
|
|
117
|
+
*/
|
|
118
|
+
export declare function chunkDocumentWithBreakPoints(content: string, breakPoints: BreakPoint[], codeFences: CodeFenceRegion[], maxChars?: number, overlapChars?: number, windowChars?: number): {
|
|
119
|
+
text: string;
|
|
120
|
+
pos: number;
|
|
121
|
+
}[];
|
|
122
|
+
export declare const STRONG_SIGNAL_MIN_SCORE = 0.85;
|
|
123
|
+
export declare const STRONG_SIGNAL_MIN_GAP = 0.15;
|
|
124
|
+
export declare const RERANK_CANDIDATE_LIMIT = 40;
|
|
125
|
+
/**
|
|
126
|
+
* A typed query expansion result. Decoupled from llm.ts internal Queryable —
|
|
127
|
+
* same shape, but store.ts owns its own public API type.
|
|
128
|
+
*
|
|
129
|
+
* - lex: keyword variant → routes to FTS only
|
|
130
|
+
* - vec: semantic variant → routes to vector only
|
|
131
|
+
* - hyde: hypothetical document → routes to vector only
|
|
132
|
+
*/
|
|
133
|
+
export type ExpandedQuery = {
|
|
134
|
+
type: 'lex' | 'vec' | 'hyde';
|
|
135
|
+
query: string;
|
|
136
|
+
/** Optional line number for error reporting (CLI parser) */
|
|
137
|
+
line?: number;
|
|
138
|
+
};
|
|
139
|
+
export declare function homedir(): string;
|
|
140
|
+
/**
|
|
141
|
+
* Check if a path is absolute.
|
|
142
|
+
* Supports:
|
|
143
|
+
* - Unix paths: /path/to/file
|
|
144
|
+
* - Windows native: C:\path or C:/path
|
|
145
|
+
* - Git Bash: /c/path or /C/path (C-Z drives, excluding A/B floppy drives)
|
|
146
|
+
*
|
|
147
|
+
* Note: /c without trailing slash is treated as Unix path (directory named "c"),
|
|
148
|
+
* while /c/ or /c/path are treated as Git Bash paths (C: drive).
|
|
149
|
+
*/
|
|
150
|
+
export declare function isAbsolutePath(path: string): boolean;
|
|
151
|
+
/**
|
|
152
|
+
* Normalize path separators to forward slashes.
|
|
153
|
+
* Converts Windows backslashes to forward slashes.
|
|
154
|
+
*/
|
|
155
|
+
export declare function normalizePathSeparators(path: string): string;
|
|
156
|
+
/**
|
|
157
|
+
* Get the relative path from a prefix.
|
|
158
|
+
* Returns null if path is not under prefix.
|
|
159
|
+
* Returns empty string if path equals prefix.
|
|
160
|
+
*/
|
|
161
|
+
export declare function getRelativePathFromPrefix(path: string, prefix: string): string | null;
|
|
162
|
+
export declare function resolve(...paths: string[]): string;
|
|
163
|
+
export declare function enableProductionMode(): void;
|
|
164
|
+
export declare function getDefaultDbPath(indexName?: string): string;
|
|
165
|
+
export declare function getPwd(): string;
|
|
166
|
+
export declare function getRealPath(path: string): string;
|
|
167
|
+
export type VirtualPath = {
|
|
168
|
+
collectionName: string;
|
|
169
|
+
path: string;
|
|
170
|
+
};
|
|
171
|
+
/**
|
|
172
|
+
* Normalize explicit virtual path formats to standard qmd:// format.
|
|
173
|
+
* Only handles paths that are already explicitly virtual:
|
|
174
|
+
* - qmd://collection/path.md (already normalized)
|
|
175
|
+
* - qmd:////collection/path.md (extra slashes - normalize)
|
|
176
|
+
* - //collection/path.md (missing qmd: prefix - add it)
|
|
177
|
+
*
|
|
178
|
+
* Does NOT handle:
|
|
179
|
+
* - collection/path.md (bare paths - could be filesystem relative)
|
|
180
|
+
* - :linenum suffix (should be parsed separately before calling this)
|
|
181
|
+
*/
|
|
182
|
+
export declare function normalizeVirtualPath(input: string): string;
|
|
183
|
+
/**
|
|
184
|
+
* Parse a virtual path like "qmd://collection-name/path/to/file.md"
|
|
185
|
+
* into its components.
|
|
186
|
+
* Also supports collection root: "qmd://collection-name/" or "qmd://collection-name"
|
|
187
|
+
*/
|
|
188
|
+
export declare function parseVirtualPath(virtualPath: string): VirtualPath | null;
|
|
189
|
+
/**
|
|
190
|
+
* Build a virtual path from collection name and relative path.
|
|
191
|
+
*/
|
|
192
|
+
export declare function buildVirtualPath(collectionName: string, path: string): string;
|
|
193
|
+
/**
|
|
194
|
+
* Check if a path is explicitly a virtual path.
|
|
195
|
+
* Only recognizes explicit virtual path formats:
|
|
196
|
+
* - qmd://collection/path.md
|
|
197
|
+
* - //collection/path.md
|
|
198
|
+
*
|
|
199
|
+
* Does NOT consider bare collection/path.md as virtual - that should be
|
|
200
|
+
* handled separately by checking if the first component is a collection name.
|
|
201
|
+
*/
|
|
202
|
+
export declare function isVirtualPath(path: string): boolean;
|
|
203
|
+
/**
|
|
204
|
+
* Resolve a virtual path to absolute filesystem path.
|
|
205
|
+
*/
|
|
206
|
+
export declare function resolveVirtualPath(db: Database, virtualPath: string): string | null;
|
|
207
|
+
/**
|
|
208
|
+
* Convert an absolute filesystem path to a virtual path.
|
|
209
|
+
* Returns null if the file is not in any indexed collection.
|
|
210
|
+
*/
|
|
211
|
+
export declare function toVirtualPath(db: Database, absolutePath: string): string | null;
|
|
212
|
+
export declare function verifySqliteVecLoaded(db: Database): void;
|
|
213
|
+
export declare function getStoreCollections(db: Database): NamedCollection[];
|
|
214
|
+
export declare function getStoreCollection(db: Database, name: string): NamedCollection | null;
|
|
215
|
+
export declare function getStoreGlobalContext(db: Database): string | undefined;
|
|
216
|
+
export declare function getStoreContexts(db: Database): Array<{
|
|
217
|
+
collection: string;
|
|
218
|
+
path: string;
|
|
219
|
+
context: string;
|
|
220
|
+
}>;
|
|
221
|
+
export declare function upsertStoreCollection(db: Database, name: string, collection: Omit<Collection, 'pattern'> & {
|
|
222
|
+
pattern?: string;
|
|
223
|
+
}): void;
|
|
224
|
+
export declare function deleteStoreCollection(db: Database, name: string): boolean;
|
|
225
|
+
export declare function renameStoreCollection(db: Database, oldName: string, newName: string): boolean;
|
|
226
|
+
export declare function updateStoreContext(db: Database, collectionName: string, path: string, text: string): boolean;
|
|
227
|
+
export declare function removeStoreContext(db: Database, collectionName: string, path: string): boolean;
|
|
228
|
+
export declare function setStoreGlobalContext(db: Database, value: string | undefined): void;
|
|
229
|
+
/**
|
|
230
|
+
* Sync external config (YAML/inline) into SQLite store_collections.
|
|
231
|
+
* External config always wins. Skips sync if config hash hasn't changed.
|
|
232
|
+
*/
|
|
233
|
+
export declare function syncConfigToDb(db: Database, config: CollectionConfig): void;
|
|
234
|
+
export declare function isSqliteVecAvailable(): boolean;
|
|
235
|
+
export type Store = {
|
|
236
|
+
db: Database;
|
|
237
|
+
dbPath: string;
|
|
238
|
+
/** Optional LlamaCpp instance for this store (overrides the global singleton) */
|
|
239
|
+
llm?: LlamaCpp;
|
|
240
|
+
close: () => void;
|
|
241
|
+
ensureVecTable: (dimensions: number) => void;
|
|
242
|
+
getHashesNeedingEmbedding: () => number;
|
|
243
|
+
getIndexHealth: () => IndexHealthInfo;
|
|
244
|
+
getStatus: () => IndexStatus;
|
|
245
|
+
getCacheKey: typeof getCacheKey;
|
|
246
|
+
getCachedResult: (cacheKey: string) => string | null;
|
|
247
|
+
setCachedResult: (cacheKey: string, result: string) => void;
|
|
248
|
+
clearCache: () => void;
|
|
249
|
+
deleteLLMCache: () => number;
|
|
250
|
+
deleteInactiveDocuments: () => number;
|
|
251
|
+
cleanupOrphanedContent: () => number;
|
|
252
|
+
cleanupOrphanedVectors: () => number;
|
|
253
|
+
vacuumDatabase: () => void;
|
|
254
|
+
getContextForFile: (filepath: string) => string | null;
|
|
255
|
+
getContextForPath: (collectionName: string, path: string) => string | null;
|
|
256
|
+
getCollectionByName: (name: string) => {
|
|
257
|
+
name: string;
|
|
258
|
+
pwd: string;
|
|
259
|
+
glob_pattern: string;
|
|
260
|
+
} | null;
|
|
261
|
+
getCollectionsWithoutContext: () => {
|
|
262
|
+
name: string;
|
|
263
|
+
pwd: string;
|
|
264
|
+
doc_count: number;
|
|
265
|
+
}[];
|
|
266
|
+
getTopLevelPathsWithoutContext: (collectionName: string) => string[];
|
|
267
|
+
parseVirtualPath: typeof parseVirtualPath;
|
|
268
|
+
buildVirtualPath: typeof buildVirtualPath;
|
|
269
|
+
isVirtualPath: typeof isVirtualPath;
|
|
270
|
+
resolveVirtualPath: (virtualPath: string) => string | null;
|
|
271
|
+
toVirtualPath: (absolutePath: string) => string | null;
|
|
272
|
+
searchFTS: (query: string, limit?: number, collectionName?: string) => SearchResult[];
|
|
273
|
+
searchVec: (query: string, model: string, limit?: number, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]) => Promise<SearchResult[]>;
|
|
274
|
+
expandQuery: (query: string, model?: string, intent?: string) => Promise<ExpandedQuery[]>;
|
|
275
|
+
rerank: (query: string, documents: {
|
|
276
|
+
file: string;
|
|
277
|
+
text: string;
|
|
278
|
+
}[], model?: string, intent?: string) => Promise<{
|
|
279
|
+
file: string;
|
|
280
|
+
score: number;
|
|
281
|
+
}[]>;
|
|
282
|
+
findDocument: (filename: string, options?: {
|
|
283
|
+
includeBody?: boolean;
|
|
284
|
+
}) => DocumentResult | DocumentNotFound;
|
|
285
|
+
getDocumentBody: (doc: DocumentResult | {
|
|
286
|
+
filepath: string;
|
|
287
|
+
}, fromLine?: number, maxLines?: number) => string | null;
|
|
288
|
+
findDocuments: (pattern: string, options?: {
|
|
289
|
+
includeBody?: boolean;
|
|
290
|
+
maxBytes?: number;
|
|
291
|
+
}) => {
|
|
292
|
+
docs: MultiGetResult[];
|
|
293
|
+
errors: string[];
|
|
294
|
+
};
|
|
295
|
+
findSimilarFiles: (query: string, maxDistance?: number, limit?: number) => string[];
|
|
296
|
+
matchFilesByGlob: (pattern: string) => {
|
|
297
|
+
filepath: string;
|
|
298
|
+
displayPath: string;
|
|
299
|
+
bodyLength: number;
|
|
300
|
+
}[];
|
|
301
|
+
findDocumentByDocid: (docid: string) => {
|
|
302
|
+
filepath: string;
|
|
303
|
+
hash: string;
|
|
304
|
+
} | null;
|
|
305
|
+
insertContent: (hash: string, content: string, createdAt: string) => void;
|
|
306
|
+
insertDocument: (collectionName: string, path: string, title: string, hash: string, createdAt: string, modifiedAt: string) => void;
|
|
307
|
+
findActiveDocument: (collectionName: string, path: string) => {
|
|
308
|
+
id: number;
|
|
309
|
+
hash: string;
|
|
310
|
+
title: string;
|
|
311
|
+
} | null;
|
|
312
|
+
updateDocumentTitle: (documentId: number, title: string, modifiedAt: string) => void;
|
|
313
|
+
updateDocument: (documentId: number, title: string, hash: string, modifiedAt: string) => void;
|
|
314
|
+
deactivateDocument: (collectionName: string, path: string) => void;
|
|
315
|
+
getActiveDocumentPaths: (collectionName: string) => string[];
|
|
316
|
+
getHashesForEmbedding: () => {
|
|
317
|
+
hash: string;
|
|
318
|
+
body: string;
|
|
319
|
+
path: string;
|
|
320
|
+
}[];
|
|
321
|
+
clearAllEmbeddings: () => void;
|
|
322
|
+
insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string) => void;
|
|
323
|
+
};
|
|
324
|
+
export type ReindexProgress = {
|
|
325
|
+
file: string;
|
|
326
|
+
current: number;
|
|
327
|
+
total: number;
|
|
328
|
+
};
|
|
329
|
+
export type ReindexResult = {
|
|
330
|
+
indexed: number;
|
|
331
|
+
updated: number;
|
|
332
|
+
unchanged: number;
|
|
333
|
+
removed: number;
|
|
334
|
+
orphanedCleaned: number;
|
|
335
|
+
};
|
|
336
|
+
/**
|
|
337
|
+
* Re-index a single collection by scanning the filesystem and updating the database.
|
|
338
|
+
* Pure function — no console output, no db lifecycle management.
|
|
339
|
+
*/
|
|
340
|
+
export declare function reindexCollection(store: Store, collectionPath: string, globPattern: string, collectionName: string, options?: {
|
|
341
|
+
ignorePatterns?: string[];
|
|
342
|
+
onProgress?: (info: ReindexProgress) => void;
|
|
343
|
+
}): Promise<ReindexResult>;
|
|
344
|
+
export type EmbedProgress = {
|
|
345
|
+
chunksEmbedded: number;
|
|
346
|
+
totalChunks: number;
|
|
347
|
+
bytesProcessed: number;
|
|
348
|
+
totalBytes: number;
|
|
349
|
+
errors: number;
|
|
350
|
+
};
|
|
351
|
+
export type EmbedResult = {
|
|
352
|
+
docsProcessed: number;
|
|
353
|
+
chunksEmbedded: number;
|
|
354
|
+
errors: number;
|
|
355
|
+
durationMs: number;
|
|
356
|
+
};
|
|
357
|
+
export type EmbedOptions = {
|
|
358
|
+
force?: boolean;
|
|
359
|
+
model?: string;
|
|
360
|
+
maxDocsPerBatch?: number;
|
|
361
|
+
maxBatchBytes?: number;
|
|
362
|
+
chunkStrategy?: ChunkStrategy;
|
|
363
|
+
onProgress?: (info: EmbedProgress) => void;
|
|
364
|
+
};
|
|
365
|
+
/**
|
|
366
|
+
* Generate vector embeddings for documents that need them.
|
|
367
|
+
* Pure function — no console output, no db lifecycle management.
|
|
368
|
+
* Uses the store's LlamaCpp instance if set, otherwise the global singleton.
|
|
369
|
+
*/
|
|
370
|
+
export declare function generateEmbeddings(store: Store, options?: EmbedOptions): Promise<EmbedResult>;
|
|
371
|
+
/**
|
|
372
|
+
* Create a new store instance with the given database path.
|
|
373
|
+
* If no path is provided, uses the default path (~/.cache/qmd/index.sqlite).
|
|
374
|
+
*
|
|
375
|
+
* @param dbPath - Path to the SQLite database file
|
|
376
|
+
* @returns Store instance with all methods bound to the database
|
|
377
|
+
*/
|
|
378
|
+
export declare function createStore(dbPath?: string): Store;
|
|
379
|
+
/**
|
|
380
|
+
* Unified document result type with all metadata.
|
|
381
|
+
* Body is optional - use getDocumentBody() to load it separately if needed.
|
|
382
|
+
*/
|
|
383
|
+
export type DocumentResult = {
|
|
384
|
+
filepath: string;
|
|
385
|
+
displayPath: string;
|
|
386
|
+
title: string;
|
|
387
|
+
context: string | null;
|
|
388
|
+
hash: string;
|
|
389
|
+
docid: string;
|
|
390
|
+
collectionName: string;
|
|
391
|
+
modifiedAt: string;
|
|
392
|
+
bodyLength: number;
|
|
393
|
+
body?: string;
|
|
394
|
+
identifier?: string;
|
|
395
|
+
filetags?: string[];
|
|
396
|
+
properties?: Record<string, string>;
|
|
397
|
+
outboundLinks?: Array<{
|
|
398
|
+
identifier: string;
|
|
399
|
+
text: string;
|
|
400
|
+
}>;
|
|
401
|
+
inboundLinks?: Array<{
|
|
402
|
+
docid: string;
|
|
403
|
+
title: string;
|
|
404
|
+
displayPath: string;
|
|
405
|
+
}>;
|
|
406
|
+
};
|
|
407
|
+
/**
|
|
408
|
+
* Extract short docid from a full hash (first 6 characters).
|
|
409
|
+
*/
|
|
410
|
+
export declare function getDocid(hash: string): string;
|
|
411
|
+
export declare function handelize(path: string): string;
|
|
412
|
+
/**
|
|
413
|
+
* Search result extends DocumentResult with score and source info
|
|
414
|
+
*/
|
|
415
|
+
export type SearchResult = DocumentResult & {
|
|
416
|
+
score: number;
|
|
417
|
+
source: "fts" | "vec";
|
|
418
|
+
chunkPos?: number;
|
|
419
|
+
};
|
|
420
|
+
/**
|
|
421
|
+
* Ranked result for RRF fusion (simplified, used internally)
|
|
422
|
+
*/
|
|
423
|
+
export type RankedResult = {
|
|
424
|
+
file: string;
|
|
425
|
+
displayPath: string;
|
|
426
|
+
title: string;
|
|
427
|
+
body: string;
|
|
428
|
+
score: number;
|
|
429
|
+
};
|
|
430
|
+
export type RRFContributionTrace = {
|
|
431
|
+
listIndex: number;
|
|
432
|
+
source: "fts" | "vec";
|
|
433
|
+
queryType: "original" | "lex" | "vec" | "hyde";
|
|
434
|
+
query: string;
|
|
435
|
+
rank: number;
|
|
436
|
+
weight: number;
|
|
437
|
+
backendScore: number;
|
|
438
|
+
rrfContribution: number;
|
|
439
|
+
};
|
|
440
|
+
export type RRFScoreTrace = {
|
|
441
|
+
contributions: RRFContributionTrace[];
|
|
442
|
+
baseScore: number;
|
|
443
|
+
topRank: number;
|
|
444
|
+
topRankBonus: number;
|
|
445
|
+
totalScore: number;
|
|
446
|
+
};
|
|
447
|
+
export type HybridQueryExplain = {
|
|
448
|
+
ftsScores: number[];
|
|
449
|
+
vectorScores: number[];
|
|
450
|
+
rrf: {
|
|
451
|
+
rank: number;
|
|
452
|
+
positionScore: number;
|
|
453
|
+
weight: number;
|
|
454
|
+
baseScore: number;
|
|
455
|
+
topRankBonus: number;
|
|
456
|
+
totalScore: number;
|
|
457
|
+
contributions: RRFContributionTrace[];
|
|
458
|
+
};
|
|
459
|
+
rerankScore: number;
|
|
460
|
+
blendedScore: number;
|
|
461
|
+
};
|
|
462
|
+
/**
|
|
463
|
+
* Error result when document is not found
|
|
464
|
+
*/
|
|
465
|
+
export type DocumentNotFound = {
|
|
466
|
+
error: "not_found";
|
|
467
|
+
query: string;
|
|
468
|
+
similarFiles: string[];
|
|
469
|
+
};
|
|
470
|
+
/**
|
|
471
|
+
* Result from multi-get operations
|
|
472
|
+
*/
|
|
473
|
+
export type MultiGetResult = {
|
|
474
|
+
doc: DocumentResult;
|
|
475
|
+
skipped: false;
|
|
476
|
+
} | {
|
|
477
|
+
doc: Pick<DocumentResult, "filepath" | "displayPath">;
|
|
478
|
+
skipped: true;
|
|
479
|
+
skipReason: string;
|
|
480
|
+
};
|
|
481
|
+
export type CollectionInfo = {
|
|
482
|
+
name: string;
|
|
483
|
+
path: string | null;
|
|
484
|
+
pattern: string | null;
|
|
485
|
+
documents: number;
|
|
486
|
+
lastUpdated: string;
|
|
487
|
+
};
|
|
488
|
+
export type IndexStatus = {
|
|
489
|
+
totalDocuments: number;
|
|
490
|
+
needsEmbedding: number;
|
|
491
|
+
hasVectorIndex: boolean;
|
|
492
|
+
collections: CollectionInfo[];
|
|
493
|
+
};
|
|
494
|
+
export declare function getHashesNeedingEmbedding(db: Database): number;
|
|
495
|
+
export type IndexHealthInfo = {
|
|
496
|
+
needsEmbedding: number;
|
|
497
|
+
totalDocs: number;
|
|
498
|
+
daysStale: number | null;
|
|
499
|
+
};
|
|
500
|
+
export declare function getIndexHealth(db: Database): IndexHealthInfo;
|
|
501
|
+
export declare function getCacheKey(url: string, body: object): string;
|
|
502
|
+
export declare function getCachedResult(db: Database, cacheKey: string): string | null;
|
|
503
|
+
export declare function setCachedResult(db: Database, cacheKey: string, result: string): void;
|
|
504
|
+
export declare function clearCache(db: Database): void;
|
|
505
|
+
/**
|
|
506
|
+
* Delete cached LLM API responses.
|
|
507
|
+
* Returns the number of cached responses deleted.
|
|
508
|
+
*/
|
|
509
|
+
export declare function deleteLLMCache(db: Database): number;
|
|
510
|
+
/**
|
|
511
|
+
* Remove inactive document records (active = 0).
|
|
512
|
+
* Returns the number of inactive documents deleted.
|
|
513
|
+
*/
|
|
514
|
+
export declare function deleteInactiveDocuments(db: Database): number;
|
|
515
|
+
/**
|
|
516
|
+
* Remove orphaned content hashes that are not referenced by any active document.
|
|
517
|
+
* Returns the number of orphaned content hashes deleted.
|
|
518
|
+
*/
|
|
519
|
+
export declare function cleanupOrphanedContent(db: Database): number;
|
|
520
|
+
/**
|
|
521
|
+
* Remove orphaned vector embeddings that are not referenced by any active document.
|
|
522
|
+
* Returns the number of orphaned embedding chunks deleted.
|
|
523
|
+
*/
|
|
524
|
+
export declare function cleanupOrphanedVectors(db: Database): number;
|
|
525
|
+
/**
|
|
526
|
+
* Run VACUUM to reclaim unused space in the database.
|
|
527
|
+
* This operation rebuilds the database file to eliminate fragmentation.
|
|
528
|
+
*/
|
|
529
|
+
export declare function vacuumDatabase(db: Database): void;
|
|
530
|
+
export declare function hashContent(content: string): Promise<string>;
|
|
531
|
+
export declare function extractTitle(content: string, filename: string): string;
|
|
532
|
+
/**
|
|
533
|
+
* Extract Denote identifier from Org frontmatter.
|
|
534
|
+
* Matches: #+identifier: 20260405T120000
|
|
535
|
+
*/
|
|
536
|
+
export declare function extractDenoteIdentifier(content: string): string | null;
|
|
537
|
+
/**
|
|
538
|
+
* Extract Denote filetags from Org frontmatter.
|
|
539
|
+
* Matches: #+filetags: :tag1:tag2:tag3:
|
|
540
|
+
* Returns array of tags (e.g., ["tag1", "tag2", "tag3"]).
|
|
541
|
+
*/
|
|
542
|
+
export declare function extractDenoteFiletags(content: string): string[];
|
|
543
|
+
/**
|
|
544
|
+
* Extract all property drawers from Org content.
|
|
545
|
+
* Matches :PROPERTIES: ... :END: blocks and returns key-value pairs.
|
|
546
|
+
* Multiple property drawers are merged (later values overwrite earlier ones).
|
|
547
|
+
*/
|
|
548
|
+
export declare function extractOrgProperties(content: string): Record<string, string>;
|
|
549
|
+
/**
|
|
550
|
+
* Extract Org internal links in Denote format.
|
|
551
|
+
* Matches: [[denote:20260405T120000][Display Text]]
|
|
552
|
+
* Returns array of { identifier, text } objects.
|
|
553
|
+
*/
|
|
554
|
+
export declare function extractOrgLinks(content: string): Array<{
|
|
555
|
+
identifier: string;
|
|
556
|
+
text: string;
|
|
557
|
+
}>;
|
|
558
|
+
/**
|
|
559
|
+
* Extract all Org-mode metadata from content in one pass.
|
|
560
|
+
* Returns null for non-.org files.
|
|
561
|
+
*/
|
|
562
|
+
export declare function extractOrgMetadata(content: string, filepath: string): {
|
|
563
|
+
identifier: string | null;
|
|
564
|
+
filetags: string[];
|
|
565
|
+
properties: Record<string, string>;
|
|
566
|
+
links: Array<{
|
|
567
|
+
identifier: string;
|
|
568
|
+
text: string;
|
|
569
|
+
}>;
|
|
570
|
+
} | null;
|
|
571
|
+
/**
|
|
572
|
+
* Insert content into the content table (content-addressable storage).
|
|
573
|
+
* Uses INSERT OR IGNORE so duplicate hashes are skipped.
|
|
574
|
+
*/
|
|
575
|
+
export declare function insertContent(db: Database, hash: string, content: string, createdAt: string): void;
|
|
576
|
+
/**
|
|
577
|
+
* Insert a new document into the documents table.
|
|
578
|
+
* For .org files, also stores Denote identifier, filetags, and properties.
|
|
579
|
+
*/
|
|
580
|
+
export declare function insertDocument(db: Database, collectionName: string, path: string, title: string, hash: string, createdAt: string, modifiedAt: string, orgMeta?: {
|
|
581
|
+
identifier: string | null;
|
|
582
|
+
filetags: string[];
|
|
583
|
+
properties: Record<string, string>;
|
|
584
|
+
links: Array<{
|
|
585
|
+
identifier: string;
|
|
586
|
+
text: string;
|
|
587
|
+
}>;
|
|
588
|
+
}): void;
|
|
589
|
+
/**
|
|
590
|
+
* Find an active document by collection name and path.
|
|
591
|
+
*/
|
|
592
|
+
export declare function findActiveDocument(db: Database, collectionName: string, path: string): {
|
|
593
|
+
id: number;
|
|
594
|
+
hash: string;
|
|
595
|
+
title: string;
|
|
596
|
+
} | null;
|
|
597
|
+
/**
|
|
598
|
+
* Update the title and modified_at timestamp for a document.
|
|
599
|
+
*/
|
|
600
|
+
export declare function updateDocumentTitle(db: Database, documentId: number, title: string, modifiedAt: string): void;
|
|
601
|
+
/**
|
|
602
|
+
* Update an existing document's hash, title, and modified_at timestamp.
|
|
603
|
+
* Used when content changes but the file path stays the same.
|
|
604
|
+
*/
|
|
605
|
+
export declare function updateDocument(db: Database, documentId: number, title: string, hash: string, modifiedAt: string): void;
|
|
606
|
+
/**
|
|
607
|
+
* Deactivate a document (mark as inactive but don't delete).
|
|
608
|
+
*/
|
|
609
|
+
export declare function deactivateDocument(db: Database, collectionName: string, path: string): void;
|
|
610
|
+
/**
|
|
611
|
+
* Get all active document paths for a collection.
|
|
612
|
+
*/
|
|
613
|
+
export declare function getActiveDocumentPaths(db: Database, collectionName: string): string[];
|
|
614
|
+
export { formatQueryForEmbedding, formatDocForEmbedding };
|
|
615
|
+
/**
|
|
616
|
+
* Chunk a document using regex-only break point detection.
|
|
617
|
+
* This is the sync, backward-compatible API used by tests and legacy callers.
|
|
618
|
+
*/
|
|
619
|
+
export declare function chunkDocument(content: string, maxChars?: number, overlapChars?: number, windowChars?: number, filepath?: string): {
|
|
620
|
+
text: string;
|
|
621
|
+
pos: number;
|
|
622
|
+
}[];
|
|
623
|
+
/**
|
|
624
|
+
* Async AST-aware chunking. Detects language from filepath, computes AST
|
|
625
|
+
* break points for supported code files, merges with regex break points,
|
|
626
|
+
* and delegates to the shared chunk algorithm.
|
|
627
|
+
*
|
|
628
|
+
* Falls back to regex-only when strategy is "regex", filepath is absent,
|
|
629
|
+
* or language is unsupported.
|
|
630
|
+
*/
|
|
631
|
+
export declare function chunkDocumentAsync(content: string, maxChars?: number, overlapChars?: number, windowChars?: number, filepath?: string, chunkStrategy?: ChunkStrategy): Promise<{
|
|
632
|
+
text: string;
|
|
633
|
+
pos: number;
|
|
634
|
+
}[]>;
|
|
635
|
+
/**
|
|
636
|
+
* Chunk a document by actual token count using the LLM tokenizer.
|
|
637
|
+
* More accurate than character-based chunking but requires async.
|
|
638
|
+
*
|
|
639
|
+
* When filepath and chunkStrategy are provided, uses AST-aware break points
|
|
640
|
+
* for supported code files.
|
|
641
|
+
*/
|
|
642
|
+
export declare function chunkDocumentByTokens(content: string, maxTokens?: number, overlapTokens?: number, windowTokens?: number, filepath?: string, chunkStrategy?: ChunkStrategy, signal?: AbortSignal): Promise<{
|
|
643
|
+
text: string;
|
|
644
|
+
pos: number;
|
|
645
|
+
tokens: number;
|
|
646
|
+
}[]>;
|
|
647
|
+
/**
|
|
648
|
+
* Normalize a docid input by stripping surrounding quotes and leading #.
|
|
649
|
+
* Handles: "#abc123", 'abc123', "abc123", #abc123, abc123
|
|
650
|
+
* Returns the bare hex string.
|
|
651
|
+
*/
|
|
652
|
+
export declare function normalizeDocid(docid: string): string;
|
|
653
|
+
/**
|
|
654
|
+
* Check if a string looks like a docid reference.
|
|
655
|
+
* Accepts: #abc123, abc123, "#abc123", "abc123", '#abc123', 'abc123'
|
|
656
|
+
* Returns true if the normalized form is a valid hex string of 6+ chars.
|
|
657
|
+
*/
|
|
658
|
+
export declare function isDocid(input: string): boolean;
|
|
659
|
+
/**
|
|
660
|
+
* Find a document by its short docid (first 6 characters of hash).
|
|
661
|
+
* Returns the document's virtual path if found, null otherwise.
|
|
662
|
+
* If multiple documents match the same short hash (collision), returns the first one.
|
|
663
|
+
*
|
|
664
|
+
* Accepts lenient input: #abc123, abc123, "#abc123", "abc123"
|
|
665
|
+
*/
|
|
666
|
+
export declare function findDocumentByDocid(db: Database, docid: string): {
|
|
667
|
+
filepath: string;
|
|
668
|
+
hash: string;
|
|
669
|
+
} | null;
|
|
670
|
+
export declare function findSimilarFiles(db: Database, query: string, maxDistance?: number, limit?: number): string[];
|
|
671
|
+
export declare function matchFilesByGlob(db: Database, pattern: string): {
|
|
672
|
+
filepath: string;
|
|
673
|
+
displayPath: string;
|
|
674
|
+
bodyLength: number;
|
|
675
|
+
}[];
|
|
676
|
+
/**
|
|
677
|
+
* Get context for a file path using hierarchical inheritance.
|
|
678
|
+
* Contexts are collection-scoped and inherit from parent directories.
|
|
679
|
+
* For example, context at "/talks" applies to "/talks/2024/keynote.md".
|
|
680
|
+
*
|
|
681
|
+
* @param db Database instance (unused - kept for compatibility)
|
|
682
|
+
* @param collectionName Collection name
|
|
683
|
+
* @param path Relative path within the collection
|
|
684
|
+
* @returns Context string or null if no context is defined
|
|
685
|
+
*/
|
|
686
|
+
export declare function getContextForPath(db: Database, collectionName: string, path: string): string | null;
|
|
687
|
+
/**
|
|
688
|
+
* Get context for a file path (virtual or filesystem).
|
|
689
|
+
* Resolves the collection and relative path from the DB store_collections table.
|
|
690
|
+
*/
|
|
691
|
+
export declare function getContextForFile(db: Database, filepath: string): string | null;
|
|
692
|
+
/**
|
|
693
|
+
* Get collection by name from DB store_collections table.
|
|
694
|
+
*/
|
|
695
|
+
export declare function getCollectionByName(db: Database, name: string): {
|
|
696
|
+
name: string;
|
|
697
|
+
pwd: string;
|
|
698
|
+
glob_pattern: string;
|
|
699
|
+
} | null;
|
|
700
|
+
/**
|
|
701
|
+
* List all collections with document counts from database.
|
|
702
|
+
* Merges store_collections config with database statistics.
|
|
703
|
+
*/
|
|
704
|
+
export declare function listCollections(db: Database): {
|
|
705
|
+
name: string;
|
|
706
|
+
pwd: string;
|
|
707
|
+
glob_pattern: string;
|
|
708
|
+
doc_count: number;
|
|
709
|
+
active_count: number;
|
|
710
|
+
last_modified: string | null;
|
|
711
|
+
includeByDefault: boolean;
|
|
712
|
+
}[];
|
|
713
|
+
/**
|
|
714
|
+
* Remove a collection and clean up its documents.
|
|
715
|
+
* Uses collections.ts to remove from YAML config and cleans up database.
|
|
716
|
+
*/
|
|
717
|
+
export declare function removeCollection(db: Database, collectionName: string): {
|
|
718
|
+
deletedDocs: number;
|
|
719
|
+
cleanedHashes: number;
|
|
720
|
+
};
|
|
721
|
+
/**
|
|
722
|
+
* Rename a collection.
|
|
723
|
+
* Updates both YAML config and database documents table.
|
|
724
|
+
*/
|
|
725
|
+
export declare function renameCollection(db: Database, oldName: string, newName: string): void;
|
|
726
|
+
/**
|
|
727
|
+
* Insert or update a context for a specific collection and path prefix.
|
|
728
|
+
*/
|
|
729
|
+
export declare function insertContext(db: Database, collectionId: number, pathPrefix: string, context: string): void;
|
|
730
|
+
/**
|
|
731
|
+
* Delete a context for a specific collection and path prefix.
|
|
732
|
+
* Returns the number of contexts deleted.
|
|
733
|
+
*/
|
|
734
|
+
export declare function deleteContext(db: Database, collectionName: string, pathPrefix: string): number;
|
|
735
|
+
/**
|
|
736
|
+
* Delete all global contexts (contexts with empty path_prefix).
|
|
737
|
+
* Returns the number of contexts deleted.
|
|
738
|
+
*/
|
|
739
|
+
export declare function deleteGlobalContexts(db: Database): number;
|
|
740
|
+
/**
|
|
741
|
+
* List all contexts, grouped by collection.
|
|
742
|
+
* Returns contexts ordered by collection name, then by path prefix length (longest first).
|
|
743
|
+
*/
|
|
744
|
+
export declare function listPathContexts(db: Database): {
|
|
745
|
+
collection_name: string;
|
|
746
|
+
path_prefix: string;
|
|
747
|
+
context: string;
|
|
748
|
+
}[];
|
|
749
|
+
/**
|
|
750
|
+
* Get all collections (name only - from YAML config).
|
|
751
|
+
*/
|
|
752
|
+
export declare function getAllCollections(db: Database): {
|
|
753
|
+
name: string;
|
|
754
|
+
}[];
|
|
755
|
+
/**
|
|
756
|
+
* Check which collections don't have any context defined.
|
|
757
|
+
* Returns collections that have no context entries at all (not even root context).
|
|
758
|
+
*/
|
|
759
|
+
export declare function getCollectionsWithoutContext(db: Database): {
|
|
760
|
+
name: string;
|
|
761
|
+
pwd: string;
|
|
762
|
+
doc_count: number;
|
|
763
|
+
}[];
|
|
764
|
+
/**
|
|
765
|
+
* Get top-level directories in a collection that don't have context.
|
|
766
|
+
* Useful for suggesting where context might be needed.
|
|
767
|
+
*/
|
|
768
|
+
export declare function getTopLevelPathsWithoutContext(db: Database, collectionName: string): string[];
|
|
769
|
+
/**
|
|
770
|
+
* Validate that a vec/hyde query doesn't use lex-only syntax.
|
|
771
|
+
* Returns error message if invalid, null if valid.
|
|
772
|
+
*/
|
|
773
|
+
export declare function validateSemanticQuery(query: string): string | null;
|
|
774
|
+
export declare function validateLexQuery(query: string): string | null;
|
|
775
|
+
export declare function searchFTS(db: Database, query: string, limit?: number, collectionName?: string): SearchResult[];
|
|
776
|
+
export declare function searchVec(db: Database, query: string, model: string, limit?: number, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]): Promise<SearchResult[]>;
|
|
777
|
+
/**
|
|
778
|
+
* Get all unique content hashes that need embeddings (from active documents).
|
|
779
|
+
* Returns hash, document body, and a sample path for display purposes.
|
|
780
|
+
*/
|
|
781
|
+
export declare function getHashesForEmbedding(db: Database): {
|
|
782
|
+
hash: string;
|
|
783
|
+
body: string;
|
|
784
|
+
path: string;
|
|
785
|
+
}[];
|
|
786
|
+
/**
|
|
787
|
+
* Clear all embeddings from the database (force re-index).
|
|
788
|
+
* Deletes all rows from content_vectors and drops the vectors_vec table.
|
|
789
|
+
*/
|
|
790
|
+
export declare function clearAllEmbeddings(db: Database): void;
|
|
791
|
+
/**
|
|
792
|
+
* Insert a single embedding into both content_vectors and vectors_vec tables.
|
|
793
|
+
* The hash_seq key is formatted as "hash_seq" for the vectors_vec table.
|
|
794
|
+
*
|
|
795
|
+
* content_vectors is inserted first so that getHashesForEmbedding (which checks
|
|
796
|
+
* only content_vectors) won't re-select the hash on a crash between the two inserts.
|
|
797
|
+
*
|
|
798
|
+
* vectors_vec uses DELETE + INSERT instead of INSERT OR REPLACE because sqlite-vec's
|
|
799
|
+
* vec0 virtual tables silently ignore the OR REPLACE conflict clause.
|
|
800
|
+
*/
|
|
801
|
+
export declare function insertEmbedding(db: Database, hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string): void;
|
|
802
|
+
export declare function expandQuery(query: string, model: string | undefined, db: Database, intent?: string, llmOverride?: LlamaCpp): Promise<ExpandedQuery[]>;
|
|
803
|
+
export declare function rerank(query: string, documents: {
|
|
804
|
+
file: string;
|
|
805
|
+
text: string;
|
|
806
|
+
}[], model: string | undefined, db: Database, intent?: string, llmOverride?: LlamaCpp): Promise<{
|
|
807
|
+
file: string;
|
|
808
|
+
score: number;
|
|
809
|
+
}[]>;
|
|
810
|
+
export declare function reciprocalRankFusion(resultLists: RankedResult[][], weights?: number[], k?: number): RankedResult[];
|
|
811
|
+
/**
|
|
812
|
+
* Build per-document RRF contribution traces for explain/debug output.
|
|
813
|
+
*/
|
|
814
|
+
export declare function buildRrfTrace(resultLists: RankedResult[][], weights?: number[], listMeta?: RankedListMeta[], k?: number): Map<string, RRFScoreTrace>;
|
|
815
|
+
/**
|
|
816
|
+
* Find a document by filename/path, docid (#hash), or with fuzzy matching.
|
|
817
|
+
* Returns document metadata without body by default.
|
|
818
|
+
*
|
|
819
|
+
* Supports:
|
|
820
|
+
* - Virtual paths: qmd://collection/path/to/file.md
|
|
821
|
+
* - Absolute paths: /path/to/file.md
|
|
822
|
+
* - Relative paths: path/to/file.md
|
|
823
|
+
* - Short docid: #abc123 (first 6 chars of hash)
|
|
824
|
+
*/
|
|
825
|
+
export declare function findDocument(db: Database, filename: string, options?: {
|
|
826
|
+
includeBody?: boolean;
|
|
827
|
+
}): DocumentResult | DocumentNotFound;
|
|
828
|
+
/**
|
|
829
|
+
* Get the body content for a document
|
|
830
|
+
* Optionally slice by line range
|
|
831
|
+
*/
|
|
832
|
+
export declare function getDocumentBody(db: Database, doc: DocumentResult | {
|
|
833
|
+
filepath: string;
|
|
834
|
+
}, fromLine?: number, maxLines?: number): string | null;
|
|
835
|
+
/**
|
|
836
|
+
* Find multiple documents by glob pattern or comma-separated list
|
|
837
|
+
* Returns documents without body by default (use getDocumentBody to load)
|
|
838
|
+
*/
|
|
839
|
+
export declare function findDocuments(db: Database, pattern: string, options?: {
|
|
840
|
+
includeBody?: boolean;
|
|
841
|
+
maxBytes?: number;
|
|
842
|
+
}): {
|
|
843
|
+
docs: MultiGetResult[];
|
|
844
|
+
errors: string[];
|
|
845
|
+
};
|
|
846
|
+
export declare function getStatus(db: Database): IndexStatus;
|
|
847
|
+
export type SnippetResult = {
|
|
848
|
+
line: number;
|
|
849
|
+
snippet: string;
|
|
850
|
+
linesBefore: number;
|
|
851
|
+
linesAfter: number;
|
|
852
|
+
snippetLines: number;
|
|
853
|
+
};
|
|
854
|
+
/** Weight for intent terms relative to query terms (1.0) in snippet scoring */
|
|
855
|
+
export declare const INTENT_WEIGHT_SNIPPET = 0.3;
|
|
856
|
+
/** Weight for intent terms relative to query terms (1.0) in chunk selection */
|
|
857
|
+
export declare const INTENT_WEIGHT_CHUNK = 0.5;
|
|
858
|
+
/**
|
|
859
|
+
* Extract meaningful terms from an intent string, filtering stop words and punctuation.
|
|
860
|
+
* Uses Unicode-aware punctuation stripping so domain terms like "API" survive.
|
|
861
|
+
* Returns lowercase terms suitable for text matching.
|
|
862
|
+
*/
|
|
863
|
+
export declare function extractIntentTerms(intent: string): string[];
|
|
864
|
+
export declare function extractSnippet(body: string, query: string, maxLen?: number, chunkPos?: number, chunkLen?: number, intent?: string): SnippetResult;
|
|
865
|
+
/**
|
|
866
|
+
* Add line numbers to text content.
|
|
867
|
+
* Each line becomes: "{lineNum}: {content}"
|
|
868
|
+
*/
|
|
869
|
+
export declare function addLineNumbers(text: string, startLine?: number): string;
|
|
870
|
+
/**
|
|
871
|
+
* Optional progress hooks for search orchestration.
|
|
872
|
+
* CLI wires these to stderr for user feedback; MCP leaves them unset.
|
|
873
|
+
*/
|
|
874
|
+
export interface SearchHooks {
|
|
875
|
+
/** BM25 probe found strong signal — expansion will be skipped */
|
|
876
|
+
onStrongSignal?: (topScore: number) => void;
|
|
877
|
+
/** Query expansion starting */
|
|
878
|
+
onExpandStart?: () => void;
|
|
879
|
+
/** Query expansion complete. Empty array = strong signal skip. elapsedMs = time taken. */
|
|
880
|
+
onExpand?: (original: string, expanded: ExpandedQuery[], elapsedMs: number) => void;
|
|
881
|
+
/** Embedding starting (vec/hyde queries) */
|
|
882
|
+
onEmbedStart?: (count: number) => void;
|
|
883
|
+
/** Embedding complete */
|
|
884
|
+
onEmbedDone?: (elapsedMs: number) => void;
|
|
885
|
+
/** Reranking is about to start */
|
|
886
|
+
onRerankStart?: (chunkCount: number) => void;
|
|
887
|
+
/** Reranking finished */
|
|
888
|
+
onRerankDone?: (elapsedMs: number) => void;
|
|
889
|
+
}
|
|
890
|
+
export interface HybridQueryOptions {
|
|
891
|
+
collection?: string;
|
|
892
|
+
limit?: number;
|
|
893
|
+
minScore?: number;
|
|
894
|
+
candidateLimit?: number;
|
|
895
|
+
explain?: boolean;
|
|
896
|
+
intent?: string;
|
|
897
|
+
skipRerank?: boolean;
|
|
898
|
+
chunkStrategy?: ChunkStrategy;
|
|
899
|
+
hooks?: SearchHooks;
|
|
900
|
+
}
|
|
901
|
+
export interface HybridQueryResult {
|
|
902
|
+
file: string;
|
|
903
|
+
displayPath: string;
|
|
904
|
+
title: string;
|
|
905
|
+
body: string;
|
|
906
|
+
bestChunk: string;
|
|
907
|
+
bestChunkPos: number;
|
|
908
|
+
score: number;
|
|
909
|
+
context: string | null;
|
|
910
|
+
docid: string;
|
|
911
|
+
explain?: HybridQueryExplain;
|
|
912
|
+
}
|
|
913
|
+
export type RankedListMeta = {
|
|
914
|
+
source: "fts" | "vec";
|
|
915
|
+
queryType: "original" | "lex" | "vec" | "hyde";
|
|
916
|
+
query: string;
|
|
917
|
+
};
|
|
918
|
+
/**
|
|
919
|
+
* Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
|
|
920
|
+
*
|
|
921
|
+
* Pipeline:
|
|
922
|
+
* 1. BM25 probe → skip expansion if strong signal
|
|
923
|
+
* 2. expandQuery() → typed query variants (lex/vec/hyde)
|
|
924
|
+
* 3. Type-routed search: original→vector, lex→FTS, vec/hyde→vector
|
|
925
|
+
* 4. RRF fusion → slice to candidateLimit
|
|
926
|
+
* 5. chunkDocument() + keyword-best-chunk selection
|
|
927
|
+
* 6. rerank on chunks (NOT full bodies — O(tokens) trap)
|
|
928
|
+
* 7. Position-aware score blending (RRF rank × reranker score)
|
|
929
|
+
* 8. Dedup by file, filter by minScore, slice to limit
|
|
930
|
+
*/
|
|
931
|
+
export declare function hybridQuery(store: Store, query: string, options?: HybridQueryOptions): Promise<HybridQueryResult[]>;
|
|
932
|
+
export interface VectorSearchOptions {
|
|
933
|
+
collection?: string;
|
|
934
|
+
limit?: number;
|
|
935
|
+
minScore?: number;
|
|
936
|
+
intent?: string;
|
|
937
|
+
hooks?: Pick<SearchHooks, 'onExpand'>;
|
|
938
|
+
}
|
|
939
|
+
export interface VectorSearchResult {
|
|
940
|
+
file: string;
|
|
941
|
+
displayPath: string;
|
|
942
|
+
title: string;
|
|
943
|
+
body: string;
|
|
944
|
+
score: number;
|
|
945
|
+
context: string | null;
|
|
946
|
+
docid: string;
|
|
947
|
+
}
|
|
948
|
+
/**
|
|
949
|
+
* Vector-only semantic search with query expansion.
|
|
950
|
+
*
|
|
951
|
+
* Pipeline:
|
|
952
|
+
* 1. expandQuery() → typed variants, filter to vec/hyde only (lex irrelevant here)
|
|
953
|
+
* 2. searchVec() for original + vec/hyde variants (sequential — node-llama-cpp embed limitation)
|
|
954
|
+
* 3. Dedup by filepath (keep max score)
|
|
955
|
+
* 4. Sort by score descending, filter by minScore, slice to limit
|
|
956
|
+
*/
|
|
957
|
+
export declare function vectorSearchQuery(store: Store, query: string, options?: VectorSearchOptions): Promise<VectorSearchResult[]>;
|
|
958
|
+
/**
|
|
959
|
+
* A single sub-search in a structured search request.
|
|
960
|
+
* Matches the format used in QMD training data.
|
|
961
|
+
*/
|
|
962
|
+
export interface StructuredSearchOptions {
|
|
963
|
+
collections?: string[];
|
|
964
|
+
limit?: number;
|
|
965
|
+
minScore?: number;
|
|
966
|
+
candidateLimit?: number;
|
|
967
|
+
explain?: boolean;
|
|
968
|
+
/** Domain intent hint for disambiguation — steers reranking and chunk selection */
|
|
969
|
+
intent?: string;
|
|
970
|
+
/** Skip LLM reranking, use only RRF scores */
|
|
971
|
+
skipRerank?: boolean;
|
|
972
|
+
chunkStrategy?: ChunkStrategy;
|
|
973
|
+
hooks?: SearchHooks;
|
|
974
|
+
}
|
|
975
|
+
/**
|
|
976
|
+
* Structured search: execute pre-expanded queries without LLM query expansion.
|
|
977
|
+
*
|
|
978
|
+
* Designed for LLM callers (MCP/HTTP) that generate their own query expansions.
|
|
979
|
+
* Skips the internal expandQuery() step — goes directly to:
|
|
980
|
+
*
|
|
981
|
+
* Pipeline:
|
|
982
|
+
* 1. Route searches: lex→FTS, vec/hyde→vector (batch embed)
|
|
983
|
+
* 2. RRF fusion across all result lists
|
|
984
|
+
* 3. Chunk documents + keyword-best-chunk selection
|
|
985
|
+
* 4. Rerank on chunks
|
|
986
|
+
* 5. Position-aware score blending
|
|
987
|
+
* 6. Dedup, filter, slice
|
|
988
|
+
*
|
|
989
|
+
* This is the recommended endpoint for capable LLMs — they can generate
|
|
990
|
+
* better query variations than our small local model, especially for
|
|
991
|
+
* domain-specific or nuanced queries.
|
|
992
|
+
*/
|
|
993
|
+
export declare function structuredSearch(store: Store, searches: ExpandedQuery[], options?: StructuredSearchOptions): Promise<HybridQueryResult[]>;
|