@ambicuity/kindx 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +36 -0
- package/LICENSE +21 -0
- package/README.md +578 -0
- package/dist/catalogs.d.ts +137 -0
- package/dist/catalogs.js +349 -0
- package/dist/inference.d.ts +398 -0
- package/dist/inference.js +1131 -0
- package/dist/kindx.d.ts +1 -0
- package/dist/kindx.js +2621 -0
- package/dist/protocol.d.ts +21 -0
- package/dist/protocol.js +666 -0
- package/dist/renderer.d.ts +119 -0
- package/dist/renderer.js +350 -0
- package/dist/repository.d.ts +783 -0
- package/dist/repository.js +2787 -0
- package/dist/runtime.d.ts +33 -0
- package/dist/runtime.js +34 -0
- package/package.json +90 -0
|
@@ -0,0 +1,783 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* KINDX Repository - Core data access and retrieval functions
|
|
3
|
+
*
|
|
4
|
+
* This module provides all database operations, search functions, and document
|
|
5
|
+
* retrieval for QMD. It returns raw data structures that can be formatted by
|
|
6
|
+
* CLI or MCP consumers.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* const store = createStore("/path/to/db.sqlite");
|
|
10
|
+
* // or use default path:
|
|
11
|
+
* const store = createStore();
|
|
12
|
+
*/
|
|
13
|
+
import type { Database } from "./runtime.js";
|
|
14
|
+
import { formatQueryForEmbedding, formatDocForEmbedding, type ILLMSession } from "./inference.js";
|
|
15
|
+
export declare const DEFAULT_EMBED_MODEL = "embeddinggemma";
|
|
16
|
+
export declare const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
|
|
17
|
+
export declare const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
|
|
18
|
+
export declare const DEFAULT_GLOB = "**/*.md";
|
|
19
|
+
export declare const DEFAULT_MULTI_GET_MAX_BYTES: number;
|
|
20
|
+
export declare const CHUNK_SIZE_TOKENS = 900;
|
|
21
|
+
export declare const CHUNK_OVERLAP_TOKENS: number;
|
|
22
|
+
export declare const CHUNK_SIZE_CHARS: number;
|
|
23
|
+
export declare const CHUNK_OVERLAP_CHARS: number;
|
|
24
|
+
export declare const CHUNK_WINDOW_TOKENS = 200;
|
|
25
|
+
export declare const CHUNK_WINDOW_CHARS: number;
|
|
26
|
+
/**
|
|
27
|
+
* A potential break point in the document with a base score indicating quality.
|
|
28
|
+
*/
|
|
29
|
+
export interface BreakPoint {
|
|
30
|
+
pos: number;
|
|
31
|
+
score: number;
|
|
32
|
+
type: string;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* A region where a code fence exists (between ``` markers).
|
|
36
|
+
* We should never split inside a code fence.
|
|
37
|
+
*/
|
|
38
|
+
export interface CodeFenceRegion {
|
|
39
|
+
start: number;
|
|
40
|
+
end: number;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Patterns for detecting break points in markdown documents.
|
|
44
|
+
* Higher scores indicate better places to split.
|
|
45
|
+
* Scores are spread wide so headings decisively beat lower-quality breaks.
|
|
46
|
+
* Order matters for scoring - more specific patterns first.
|
|
47
|
+
*/
|
|
48
|
+
export declare const BREAK_PATTERNS: [RegExp, number, string][];
|
|
49
|
+
/**
|
|
50
|
+
* Scan text for all potential break points.
|
|
51
|
+
* Returns sorted array of break points with higher-scoring patterns taking precedence
|
|
52
|
+
* when multiple patterns match the same position.
|
|
53
|
+
*/
|
|
54
|
+
export declare function scanBreakPoints(text: string): BreakPoint[];
|
|
55
|
+
/**
|
|
56
|
+
* Find all code fence regions in the text.
|
|
57
|
+
* Code fences are delimited by ``` and we should never split inside them.
|
|
58
|
+
*/
|
|
59
|
+
export declare function findCodeFences(text: string): CodeFenceRegion[];
|
|
60
|
+
/**
|
|
61
|
+
* Check if a position is inside a code fence region.
|
|
62
|
+
*/
|
|
63
|
+
export declare function isInsideCodeFence(pos: number, fences: CodeFenceRegion[]): boolean;
|
|
64
|
+
/**
|
|
65
|
+
* Find the best cut position using scored break points with distance decay.
|
|
66
|
+
*
|
|
67
|
+
* Uses squared distance for gentler early decay - headings far back still win
|
|
68
|
+
* over low-quality breaks near the target.
|
|
69
|
+
*
|
|
70
|
+
* @param breakPoints - Pre-scanned break points from scanBreakPoints()
|
|
71
|
+
* @param targetCharPos - The ideal cut position (e.g., maxChars boundary)
|
|
72
|
+
* @param windowChars - How far back to search for break points (default ~200 tokens)
|
|
73
|
+
* @param decayFactor - How much to penalize distance (0.7 = 30% score at window edge)
|
|
74
|
+
* @param codeFences - Code fence regions to avoid splitting inside
|
|
75
|
+
* @returns The best position to cut at
|
|
76
|
+
*/
|
|
77
|
+
export declare function findBestCutoff(breakPoints: BreakPoint[], targetCharPos: number, windowChars?: number, decayFactor?: number, codeFences?: CodeFenceRegion[]): number;
|
|
78
|
+
export declare const STRONG_SIGNAL_MIN_SCORE = 0.85;
|
|
79
|
+
export declare const STRONG_SIGNAL_MIN_GAP = 0.15;
|
|
80
|
+
export declare const RERANK_CANDIDATE_LIMIT = 40;
|
|
81
|
+
/**
|
|
82
|
+
* A typed query expansion result. Decoupled from inference.ts internal Queryable —
|
|
83
|
+
* same shape, but repository.ts owns its own public API type.
|
|
84
|
+
*
|
|
85
|
+
* - lex: keyword variant → routes to FTS only
|
|
86
|
+
* - vec: semantic variant → routes to vector only
|
|
87
|
+
* - hyde: hypothetical document → routes to vector only
|
|
88
|
+
*/
|
|
89
|
+
export type ExpandedQuery = {
|
|
90
|
+
type: 'lex' | 'vec' | 'hyde';
|
|
91
|
+
text: string;
|
|
92
|
+
};
|
|
93
|
+
export declare function homedir(): string;
|
|
94
|
+
/**
|
|
95
|
+
* Check if a path is absolute.
|
|
96
|
+
* Supports:
|
|
97
|
+
* - Unix paths: /path/to/file
|
|
98
|
+
* - Windows native: C:\path or C:/path
|
|
99
|
+
* - Git Bash: /c/path or /C/path (C-Z drives, excluding A/B floppy drives)
|
|
100
|
+
*
|
|
101
|
+
* Note: /c without trailing slash is treated as Unix path (directory named "c"),
|
|
102
|
+
* while /c/ or /c/path are treated as Git Bash paths (C: drive).
|
|
103
|
+
*/
|
|
104
|
+
export declare function isAbsolutePath(path: string): boolean;
|
|
105
|
+
/**
|
|
106
|
+
* Normalize path separators to forward slashes.
|
|
107
|
+
* Converts Windows backslashes to forward slashes.
|
|
108
|
+
*/
|
|
109
|
+
export declare function normalizePathSeparators(path: string): string;
|
|
110
|
+
/**
|
|
111
|
+
* Get the relative path from a prefix.
|
|
112
|
+
* Returns null if path is not under prefix.
|
|
113
|
+
* Returns empty string if path equals prefix.
|
|
114
|
+
*/
|
|
115
|
+
export declare function getRelativePathFromPrefix(path: string, prefix: string): string | null;
|
|
116
|
+
export declare function resolve(...paths: string[]): string;
|
|
117
|
+
export declare function enableProductionMode(): void;
|
|
118
|
+
export declare function getDefaultDbPath(indexName?: string): string;
|
|
119
|
+
export declare function getPwd(): string;
|
|
120
|
+
export declare function getRealPath(path: string): string;
|
|
121
|
+
export type VirtualPath = {
|
|
122
|
+
collectionName: string;
|
|
123
|
+
path: string;
|
|
124
|
+
};
|
|
125
|
+
/**
|
|
126
|
+
* Normalize explicit virtual path formats to standard kindx:// format.
|
|
127
|
+
* Only handles paths that are already explicitly virtual:
|
|
128
|
+
* - kindx://collection/path.md (already normalized)
|
|
129
|
+
* - kindx:////collection/path.md (extra slashes - normalize)
|
|
130
|
+
* - //collection/path.md (missing kindx: prefix - add it)
|
|
131
|
+
*
|
|
132
|
+
* Does NOT handle:
|
|
133
|
+
* - collection/path.md (bare paths - could be filesystem relative)
|
|
134
|
+
* - :linenum suffix (should be parsed separately before calling this)
|
|
135
|
+
*/
|
|
136
|
+
export declare function normalizeVirtualPath(input: string): string;
|
|
137
|
+
/**
|
|
138
|
+
* Parse a virtual path like "kindx://collection-name/path/to/file.md"
|
|
139
|
+
* into its components.
|
|
140
|
+
* Also supports collection root: "kindx://collection-name/" or "kindx://collection-name"
|
|
141
|
+
*/
|
|
142
|
+
export declare function parseVirtualPath(virtualPath: string): VirtualPath | null;
|
|
143
|
+
/**
|
|
144
|
+
* Build a virtual path from collection name and relative path.
|
|
145
|
+
*/
|
|
146
|
+
export declare function buildVirtualPath(collectionName: string, path: string): string;
|
|
147
|
+
/**
|
|
148
|
+
* Check if a path is explicitly a virtual path.
|
|
149
|
+
* Only recognizes explicit virtual path formats:
|
|
150
|
+
* - kindx://collection/path.md
|
|
151
|
+
* - //collection/path.md
|
|
152
|
+
*
|
|
153
|
+
* Does NOT consider bare collection/path.md as virtual - that should be
|
|
154
|
+
* handled separately by checking if the first component is a collection name.
|
|
155
|
+
*/
|
|
156
|
+
export declare function isVirtualPath(path: string): boolean;
|
|
157
|
+
/**
|
|
158
|
+
* Resolve a virtual path to absolute filesystem path.
|
|
159
|
+
*/
|
|
160
|
+
export declare function resolveVirtualPath(db: Database, virtualPath: string): string | null;
|
|
161
|
+
/**
|
|
162
|
+
* Convert an absolute filesystem path to a virtual path.
|
|
163
|
+
* Returns null if the file is not in any indexed collection.
|
|
164
|
+
*/
|
|
165
|
+
export declare function toVirtualPath(db: Database, absolutePath: string): string | null;
|
|
166
|
+
export declare function verifySqliteVecLoaded(db: Database): void;
|
|
167
|
+
export declare function isSqliteVecAvailable(): boolean;
|
|
168
|
+
export type Store = {
|
|
169
|
+
db: Database;
|
|
170
|
+
dbPath: string;
|
|
171
|
+
close: () => void;
|
|
172
|
+
ensureVecTable: (dimensions: number) => void;
|
|
173
|
+
getHashesNeedingEmbedding: () => number;
|
|
174
|
+
getIndexHealth: () => IndexHealthInfo;
|
|
175
|
+
getStatus: () => IndexStatus;
|
|
176
|
+
getCacheKey: typeof getCacheKey;
|
|
177
|
+
getCachedResult: (cacheKey: string) => string | null;
|
|
178
|
+
setCachedResult: (cacheKey: string, result: string) => void;
|
|
179
|
+
clearCache: () => void;
|
|
180
|
+
deleteLLMCache: () => number;
|
|
181
|
+
deleteInactiveDocuments: () => number;
|
|
182
|
+
cleanupOrphanedContent: () => number;
|
|
183
|
+
cleanupOrphanedVectors: () => number;
|
|
184
|
+
vacuumDatabase: () => void;
|
|
185
|
+
getContextForFile: (filepath: string) => string | null;
|
|
186
|
+
getContextForPath: (collectionName: string, path: string) => string | null;
|
|
187
|
+
getCollectionByName: (name: string) => {
|
|
188
|
+
name: string;
|
|
189
|
+
pwd: string;
|
|
190
|
+
glob_pattern: string;
|
|
191
|
+
} | null;
|
|
192
|
+
getCollectionsWithoutContext: () => {
|
|
193
|
+
name: string;
|
|
194
|
+
pwd: string;
|
|
195
|
+
doc_count: number;
|
|
196
|
+
}[];
|
|
197
|
+
getTopLevelPathsWithoutContext: (collectionName: string) => string[];
|
|
198
|
+
parseVirtualPath: typeof parseVirtualPath;
|
|
199
|
+
buildVirtualPath: typeof buildVirtualPath;
|
|
200
|
+
isVirtualPath: typeof isVirtualPath;
|
|
201
|
+
resolveVirtualPath: (virtualPath: string) => string | null;
|
|
202
|
+
toVirtualPath: (absolutePath: string) => string | null;
|
|
203
|
+
searchFTS: (query: string, limit?: number, collectionName?: string) => SearchResult[];
|
|
204
|
+
searchVec: (query: string, model: string, limit?: number, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]) => Promise<SearchResult[]>;
|
|
205
|
+
expandQuery: (query: string, model?: string) => Promise<ExpandedQuery[]>;
|
|
206
|
+
rerank: (query: string, documents: {
|
|
207
|
+
file: string;
|
|
208
|
+
text: string;
|
|
209
|
+
}[], model?: string) => Promise<{
|
|
210
|
+
file: string;
|
|
211
|
+
score: number;
|
|
212
|
+
}[]>;
|
|
213
|
+
findDocument: (filename: string, options?: {
|
|
214
|
+
includeBody?: boolean;
|
|
215
|
+
}) => DocumentResult | DocumentNotFound;
|
|
216
|
+
getDocumentBody: (doc: DocumentResult | {
|
|
217
|
+
filepath: string;
|
|
218
|
+
}, fromLine?: number, maxLines?: number) => string | null;
|
|
219
|
+
findDocuments: (pattern: string, options?: {
|
|
220
|
+
includeBody?: boolean;
|
|
221
|
+
maxBytes?: number;
|
|
222
|
+
}) => {
|
|
223
|
+
docs: MultiGetResult[];
|
|
224
|
+
errors: string[];
|
|
225
|
+
};
|
|
226
|
+
findSimilarFiles: (query: string, maxDistance?: number, limit?: number) => string[];
|
|
227
|
+
matchFilesByGlob: (pattern: string) => {
|
|
228
|
+
filepath: string;
|
|
229
|
+
displayPath: string;
|
|
230
|
+
bodyLength: number;
|
|
231
|
+
}[];
|
|
232
|
+
findDocumentByDocid: (docid: string) => {
|
|
233
|
+
filepath: string;
|
|
234
|
+
hash: string;
|
|
235
|
+
} | null;
|
|
236
|
+
insertContent: (hash: string, content: string, createdAt: string) => void;
|
|
237
|
+
insertDocument: (collectionName: string, path: string, title: string, hash: string, createdAt: string, modifiedAt: string) => void;
|
|
238
|
+
findActiveDocument: (collectionName: string, path: string) => {
|
|
239
|
+
id: number;
|
|
240
|
+
hash: string;
|
|
241
|
+
title: string;
|
|
242
|
+
} | null;
|
|
243
|
+
updateDocumentTitle: (documentId: number, title: string, modifiedAt: string) => void;
|
|
244
|
+
updateDocument: (documentId: number, title: string, hash: string, modifiedAt: string) => void;
|
|
245
|
+
deactivateDocument: (collectionName: string, path: string) => void;
|
|
246
|
+
getActiveDocumentPaths: (collectionName: string) => string[];
|
|
247
|
+
getHashesForEmbedding: () => {
|
|
248
|
+
hash: string;
|
|
249
|
+
body: string;
|
|
250
|
+
path: string;
|
|
251
|
+
}[];
|
|
252
|
+
clearAllEmbeddings: () => void;
|
|
253
|
+
insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string) => void;
|
|
254
|
+
};
|
|
255
|
+
/**
|
|
256
|
+
* Create a new store instance with the given database path.
|
|
257
|
+
* If no path is provided, uses the default path (~/.cache/kindx/index.sqlite).
|
|
258
|
+
*
|
|
259
|
+
* @param dbPath - Path to the SQLite database file
|
|
260
|
+
* @returns Store instance with all methods bound to the database
|
|
261
|
+
*/
|
|
262
|
+
export declare function createStore(dbPath?: string): Store;
|
|
263
|
+
/**
|
|
264
|
+
* Unified document result type with all metadata.
|
|
265
|
+
* Body is optional - use getDocumentBody() to load it separately if needed.
|
|
266
|
+
*/
|
|
267
|
+
export type DocumentResult = {
|
|
268
|
+
filepath: string;
|
|
269
|
+
displayPath: string;
|
|
270
|
+
title: string;
|
|
271
|
+
context: string | null;
|
|
272
|
+
hash: string;
|
|
273
|
+
docid: string;
|
|
274
|
+
collectionName: string;
|
|
275
|
+
modifiedAt: string;
|
|
276
|
+
bodyLength: number;
|
|
277
|
+
body?: string;
|
|
278
|
+
};
|
|
279
|
+
/**
|
|
280
|
+
* Extract short docid from a full hash (first 6 characters).
|
|
281
|
+
*/
|
|
282
|
+
export declare function getDocid(hash: string): string;
|
|
283
|
+
export declare function handelize(path: string): string;
|
|
284
|
+
/**
|
|
285
|
+
* Search result extends DocumentResult with score and source info
|
|
286
|
+
*/
|
|
287
|
+
export type SearchResult = DocumentResult & {
|
|
288
|
+
score: number;
|
|
289
|
+
source: "fts" | "vec";
|
|
290
|
+
chunkPos?: number;
|
|
291
|
+
};
|
|
292
|
+
/**
|
|
293
|
+
* Ranked result for RRF fusion (simplified, used internally)
|
|
294
|
+
*/
|
|
295
|
+
export type RankedResult = {
|
|
296
|
+
file: string;
|
|
297
|
+
displayPath: string;
|
|
298
|
+
title: string;
|
|
299
|
+
body: string;
|
|
300
|
+
score: number;
|
|
301
|
+
};
|
|
302
|
+
export type RRFContributionTrace = {
|
|
303
|
+
listIndex: number;
|
|
304
|
+
source: "fts" | "vec";
|
|
305
|
+
queryType: "original" | "lex" | "vec" | "hyde";
|
|
306
|
+
query: string;
|
|
307
|
+
rank: number;
|
|
308
|
+
weight: number;
|
|
309
|
+
backendScore: number;
|
|
310
|
+
rrfContribution: number;
|
|
311
|
+
};
|
|
312
|
+
export type RRFScoreTrace = {
|
|
313
|
+
contributions: RRFContributionTrace[];
|
|
314
|
+
baseScore: number;
|
|
315
|
+
topRank: number;
|
|
316
|
+
topRankBonus: number;
|
|
317
|
+
totalScore: number;
|
|
318
|
+
};
|
|
319
|
+
export type HybridQueryExplain = {
|
|
320
|
+
ftsScores: number[];
|
|
321
|
+
vectorScores: number[];
|
|
322
|
+
rrf: {
|
|
323
|
+
rank: number;
|
|
324
|
+
positionScore: number;
|
|
325
|
+
weight: number;
|
|
326
|
+
baseScore: number;
|
|
327
|
+
topRankBonus: number;
|
|
328
|
+
totalScore: number;
|
|
329
|
+
contributions: RRFContributionTrace[];
|
|
330
|
+
};
|
|
331
|
+
rerankScore: number;
|
|
332
|
+
blendedScore: number;
|
|
333
|
+
};
|
|
334
|
+
/**
|
|
335
|
+
* Error result when document is not found
|
|
336
|
+
*/
|
|
337
|
+
export type DocumentNotFound = {
|
|
338
|
+
error: "not_found";
|
|
339
|
+
query: string;
|
|
340
|
+
similarFiles: string[];
|
|
341
|
+
};
|
|
342
|
+
/**
|
|
343
|
+
* Result from multi-get operations
|
|
344
|
+
*/
|
|
345
|
+
export type MultiGetResult = {
|
|
346
|
+
doc: DocumentResult;
|
|
347
|
+
skipped: false;
|
|
348
|
+
} | {
|
|
349
|
+
doc: Pick<DocumentResult, "filepath" | "displayPath">;
|
|
350
|
+
skipped: true;
|
|
351
|
+
skipReason: string;
|
|
352
|
+
};
|
|
353
|
+
export type CollectionInfo = {
|
|
354
|
+
name: string;
|
|
355
|
+
path: string;
|
|
356
|
+
pattern: string;
|
|
357
|
+
documents: number;
|
|
358
|
+
lastUpdated: string;
|
|
359
|
+
};
|
|
360
|
+
export type IndexStatus = {
|
|
361
|
+
totalDocuments: number;
|
|
362
|
+
needsEmbedding: number;
|
|
363
|
+
hasVectorIndex: boolean;
|
|
364
|
+
collections: CollectionInfo[];
|
|
365
|
+
};
|
|
366
|
+
export declare function getHashesNeedingEmbedding(db: Database): number;
|
|
367
|
+
export type IndexHealthInfo = {
|
|
368
|
+
needsEmbedding: number;
|
|
369
|
+
totalDocs: number;
|
|
370
|
+
daysStale: number | null;
|
|
371
|
+
};
|
|
372
|
+
export declare function getIndexHealth(db: Database): IndexHealthInfo;
|
|
373
|
+
export declare function getCacheKey(url: string, body: object): string;
|
|
374
|
+
export declare function getCachedResult(db: Database, cacheKey: string): string | null;
|
|
375
|
+
export declare function setCachedResult(db: Database, cacheKey: string, result: string): void;
|
|
376
|
+
export declare function clearCache(db: Database): void;
|
|
377
|
+
/**
|
|
378
|
+
* Delete cached LLM API responses.
|
|
379
|
+
* Returns the number of cached responses deleted.
|
|
380
|
+
*/
|
|
381
|
+
export declare function deleteLLMCache(db: Database): number;
|
|
382
|
+
/**
|
|
383
|
+
* Remove inactive document records (active = 0).
|
|
384
|
+
* Returns the number of inactive documents deleted.
|
|
385
|
+
*/
|
|
386
|
+
export declare function deleteInactiveDocuments(db: Database): number;
|
|
387
|
+
/**
|
|
388
|
+
* Remove orphaned content hashes that are not referenced by any active document.
|
|
389
|
+
* Returns the number of orphaned content hashes deleted.
|
|
390
|
+
*/
|
|
391
|
+
export declare function cleanupOrphanedContent(db: Database): number;
|
|
392
|
+
/**
|
|
393
|
+
* Remove orphaned vector embeddings that are not referenced by any active document.
|
|
394
|
+
* Returns the number of orphaned embedding chunks deleted.
|
|
395
|
+
*/
|
|
396
|
+
export declare function cleanupOrphanedVectors(db: Database): number;
|
|
397
|
+
/**
|
|
398
|
+
* Run VACUUM to reclaim unused space in the database.
|
|
399
|
+
* This operation rebuilds the database file to eliminate fragmentation.
|
|
400
|
+
*/
|
|
401
|
+
export declare function vacuumDatabase(db: Database): void;
|
|
402
|
+
export declare function hashContent(content: string): Promise<string>;
|
|
403
|
+
export declare function extractTitle(content: string, filename: string): string;
|
|
404
|
+
/**
|
|
405
|
+
* Insert content into the content table (content-addressable storage).
|
|
406
|
+
* Uses INSERT OR IGNORE so duplicate hashes are skipped.
|
|
407
|
+
*/
|
|
408
|
+
export declare function insertContent(db: Database, hash: string, content: string, createdAt: string): void;
|
|
409
|
+
/**
|
|
410
|
+
* Insert a new document into the documents table.
|
|
411
|
+
*/
|
|
412
|
+
export declare function insertDocument(db: Database, collectionName: string, path: string, title: string, hash: string, createdAt: string, modifiedAt: string): void;
|
|
413
|
+
/**
|
|
414
|
+
* Find an active document by collection name and path.
|
|
415
|
+
*/
|
|
416
|
+
export declare function findActiveDocument(db: Database, collectionName: string, path: string): {
|
|
417
|
+
id: number;
|
|
418
|
+
hash: string;
|
|
419
|
+
title: string;
|
|
420
|
+
} | null;
|
|
421
|
+
/**
|
|
422
|
+
* Update the title and modified_at timestamp for a document.
|
|
423
|
+
*/
|
|
424
|
+
export declare function updateDocumentTitle(db: Database, documentId: number, title: string, modifiedAt: string): void;
|
|
425
|
+
/**
|
|
426
|
+
* Update an existing document's hash, title, and modified_at timestamp.
|
|
427
|
+
* Used when content changes but the file path stays the same.
|
|
428
|
+
*/
|
|
429
|
+
export declare function updateDocument(db: Database, documentId: number, title: string, hash: string, modifiedAt: string): void;
|
|
430
|
+
/**
|
|
431
|
+
* Deactivate a document (mark as inactive but don't delete).
|
|
432
|
+
*/
|
|
433
|
+
export declare function deactivateDocument(db: Database, collectionName: string, path: string): void;
|
|
434
|
+
/**
|
|
435
|
+
* Get all active document paths for a collection.
|
|
436
|
+
*/
|
|
437
|
+
export declare function getActiveDocumentPaths(db: Database, collectionName: string): string[];
|
|
438
|
+
export { formatQueryForEmbedding, formatDocForEmbedding };
|
|
439
|
+
export declare function chunkDocument(content: string, maxChars?: number, overlapChars?: number, windowChars?: number): {
|
|
440
|
+
text: string;
|
|
441
|
+
pos: number;
|
|
442
|
+
}[];
|
|
443
|
+
/**
|
|
444
|
+
* Chunk a document by actual token count using the LLM tokenizer.
|
|
445
|
+
* More accurate than character-based chunking but requires async.
|
|
446
|
+
*/
|
|
447
|
+
export declare function chunkDocumentByTokens(content: string, maxTokens?: number, overlapTokens?: number, windowTokens?: number): Promise<{
|
|
448
|
+
text: string;
|
|
449
|
+
pos: number;
|
|
450
|
+
tokens: number;
|
|
451
|
+
}[]>;
|
|
452
|
+
/**
|
|
453
|
+
* Normalize a docid input by stripping surrounding quotes and leading #.
|
|
454
|
+
* Handles: "#abc123", 'abc123', "abc123", #abc123, abc123
|
|
455
|
+
* Returns the bare hex string.
|
|
456
|
+
*/
|
|
457
|
+
export declare function normalizeDocid(docid: string): string;
|
|
458
|
+
/**
|
|
459
|
+
* Check if a string looks like a docid reference.
|
|
460
|
+
* Accepts: #abc123, abc123, "#abc123", "abc123", '#abc123', 'abc123'
|
|
461
|
+
* Returns true if the normalized form is a valid hex string of 6+ chars.
|
|
462
|
+
*/
|
|
463
|
+
export declare function isDocid(input: string): boolean;
|
|
464
|
+
/**
|
|
465
|
+
* Find a document by its short docid (first 6 characters of hash).
|
|
466
|
+
* Returns the document's virtual path if found, null otherwise.
|
|
467
|
+
* If multiple documents match the same short hash (collision), returns the first one.
|
|
468
|
+
*
|
|
469
|
+
* Accepts lenient input: #abc123, abc123, "#abc123", "abc123"
|
|
470
|
+
*/
|
|
471
|
+
export declare function findDocumentByDocid(db: Database, docid: string): {
|
|
472
|
+
filepath: string;
|
|
473
|
+
hash: string;
|
|
474
|
+
} | null;
|
|
475
|
+
export declare function findSimilarFiles(db: Database, query: string, maxDistance?: number, limit?: number): string[];
|
|
476
|
+
export declare function matchFilesByGlob(db: Database, pattern: string): {
|
|
477
|
+
filepath: string;
|
|
478
|
+
displayPath: string;
|
|
479
|
+
bodyLength: number;
|
|
480
|
+
}[];
|
|
481
|
+
/**
|
|
482
|
+
* Get context for a file path using hierarchical inheritance.
|
|
483
|
+
* Contexts are collection-scoped and inherit from parent directories.
|
|
484
|
+
* For example, context at "/talks" applies to "/talks/2024/keynote.md".
|
|
485
|
+
*
|
|
486
|
+
* @param db Database instance (unused - kept for compatibility)
|
|
487
|
+
* @param collectionName Collection name
|
|
488
|
+
* @param path Relative path within the collection
|
|
489
|
+
* @returns Context string or null if no context is defined
|
|
490
|
+
*/
|
|
491
|
+
export declare function getContextForPath(db: Database, collectionName: string, path: string): string | null;
|
|
492
|
+
/**
|
|
493
|
+
* Get context for a file path (virtual or filesystem).
|
|
494
|
+
* Resolves the collection and relative path using the YAML collections config.
|
|
495
|
+
*/
|
|
496
|
+
export declare function getContextForFile(db: Database, filepath: string): string | null;
|
|
497
|
+
/**
|
|
498
|
+
* Get collection by name from YAML config.
|
|
499
|
+
* Returns collection metadata from ~/.config/kindx/index.yml
|
|
500
|
+
*/
|
|
501
|
+
export declare function getCollectionByName(db: Database, name: string): {
|
|
502
|
+
name: string;
|
|
503
|
+
pwd: string;
|
|
504
|
+
glob_pattern: string;
|
|
505
|
+
} | null;
|
|
506
|
+
/**
|
|
507
|
+
* List all collections with document counts from database.
|
|
508
|
+
* Merges YAML config with database statistics.
|
|
509
|
+
*/
|
|
510
|
+
export declare function listCollections(db: Database): {
|
|
511
|
+
name: string;
|
|
512
|
+
pwd: string;
|
|
513
|
+
glob_pattern: string;
|
|
514
|
+
doc_count: number;
|
|
515
|
+
active_count: number;
|
|
516
|
+
last_modified: string | null;
|
|
517
|
+
}[];
|
|
518
|
+
/**
|
|
519
|
+
* Remove a collection and clean up its documents.
|
|
520
|
+
* Uses catalogs.ts to remove from YAML config and cleans up database.
|
|
521
|
+
*/
|
|
522
|
+
export declare function removeCollection(db: Database, collectionName: string): {
|
|
523
|
+
deletedDocs: number;
|
|
524
|
+
cleanedHashes: number;
|
|
525
|
+
};
|
|
526
|
+
/**
|
|
527
|
+
* Rename a collection.
|
|
528
|
+
* Updates both YAML config and database documents table.
|
|
529
|
+
*/
|
|
530
|
+
export declare function renameCollection(db: Database, oldName: string, newName: string): void;
|
|
531
|
+
/**
|
|
532
|
+
* Insert or update a context for a specific collection and path prefix.
|
|
533
|
+
*/
|
|
534
|
+
export declare function insertContext(db: Database, collectionId: number, pathPrefix: string, context: string): void;
|
|
535
|
+
/**
|
|
536
|
+
* Delete a context for a specific collection and path prefix.
|
|
537
|
+
* Returns the number of contexts deleted.
|
|
538
|
+
*/
|
|
539
|
+
export declare function deleteContext(db: Database, collectionName: string, pathPrefix: string): number;
|
|
540
|
+
/**
|
|
541
|
+
* Delete all global contexts (contexts with empty path_prefix).
|
|
542
|
+
* Returns the number of contexts deleted.
|
|
543
|
+
*/
|
|
544
|
+
export declare function deleteGlobalContexts(db: Database): number;
|
|
545
|
+
/**
|
|
546
|
+
* List all contexts, grouped by collection.
|
|
547
|
+
* Returns contexts ordered by collection name, then by path prefix length (longest first).
|
|
548
|
+
*/
|
|
549
|
+
export declare function listPathContexts(db: Database): {
|
|
550
|
+
collection_name: string;
|
|
551
|
+
path_prefix: string;
|
|
552
|
+
context: string;
|
|
553
|
+
}[];
|
|
554
|
+
/**
|
|
555
|
+
* Get all collections (name only - from YAML config).
|
|
556
|
+
*/
|
|
557
|
+
export declare function getAllCollections(db: Database): {
|
|
558
|
+
name: string;
|
|
559
|
+
}[];
|
|
560
|
+
/**
|
|
561
|
+
* Check which collections don't have any context defined.
|
|
562
|
+
* Returns collections that have no context entries at all (not even root context).
|
|
563
|
+
*/
|
|
564
|
+
export declare function getCollectionsWithoutContext(db: Database): {
|
|
565
|
+
name: string;
|
|
566
|
+
pwd: string;
|
|
567
|
+
doc_count: number;
|
|
568
|
+
}[];
|
|
569
|
+
/**
|
|
570
|
+
* Get top-level directories in a collection that don't have context.
|
|
571
|
+
* Useful for suggesting where context might be needed.
|
|
572
|
+
*/
|
|
573
|
+
export declare function getTopLevelPathsWithoutContext(db: Database, collectionName: string): string[];
|
|
574
|
+
/**
|
|
575
|
+
* Validate that a vec/hyde query doesn't use lex-only syntax.
|
|
576
|
+
* Returns error message if invalid, null if valid.
|
|
577
|
+
*/
|
|
578
|
+
export declare function validateSemanticQuery(query: string): string | null;
|
|
579
|
+
export declare function validateLexQuery(query: string): string | null;
|
|
580
|
+
export declare function searchFTS(db: Database, query: string, limit?: number, collectionName?: string): SearchResult[];
|
|
581
|
+
export declare function searchVec(db: Database, query: string, model: string, limit?: number, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]): Promise<SearchResult[]>;
|
|
582
|
+
/**
|
|
583
|
+
* Get all unique content hashes that need embeddings (from active documents).
|
|
584
|
+
* Returns hash, document body, and a sample path for display purposes.
|
|
585
|
+
*/
|
|
586
|
+
export declare function getHashesForEmbedding(db: Database): {
|
|
587
|
+
hash: string;
|
|
588
|
+
body: string;
|
|
589
|
+
path: string;
|
|
590
|
+
}[];
|
|
591
|
+
/**
|
|
592
|
+
* Clear all embeddings from the database (force re-index).
|
|
593
|
+
* Deletes all rows from content_vectors and drops the vectors_vec table.
|
|
594
|
+
*/
|
|
595
|
+
export declare function clearAllEmbeddings(db: Database): void;
|
|
596
|
+
/**
|
|
597
|
+
* Insert a single embedding into both content_vectors and vectors_vec tables.
|
|
598
|
+
* The hash_seq key is formatted as "hash_seq" for the vectors_vec table.
|
|
599
|
+
*/
|
|
600
|
+
export declare function insertEmbedding(db: Database, hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string): void;
|
|
601
|
+
export declare function expandQuery(query: string, model: string | undefined, db: Database): Promise<ExpandedQuery[]>;
|
|
602
|
+
export declare function rerank(query: string, documents: {
|
|
603
|
+
file: string;
|
|
604
|
+
text: string;
|
|
605
|
+
}[], model: string | undefined, db: Database): Promise<{
|
|
606
|
+
file: string;
|
|
607
|
+
score: number;
|
|
608
|
+
}[]>;
|
|
609
|
+
export declare function reciprocalRankFusion(resultLists: RankedResult[][], weights?: number[], k?: number): RankedResult[];
|
|
610
|
+
/**
|
|
611
|
+
* Build per-document RRF contribution traces for explain/debug output.
|
|
612
|
+
*/
|
|
613
|
+
export declare function buildRrfTrace(resultLists: RankedResult[][], weights?: number[], listMeta?: RankedListMeta[], k?: number): Map<string, RRFScoreTrace>;
|
|
614
|
+
/**
|
|
615
|
+
* Find a document by filename/path, docid (#hash), or with fuzzy matching.
|
|
616
|
+
* Returns document metadata without body by default.
|
|
617
|
+
*
|
|
618
|
+
* Supports:
|
|
619
|
+
* - Virtual paths: kindx://collection/path/to/file.md
|
|
620
|
+
* - Absolute paths: /path/to/file.md
|
|
621
|
+
* - Relative paths: path/to/file.md
|
|
622
|
+
* - Short docid: #abc123 (first 6 chars of hash)
|
|
623
|
+
*/
|
|
624
|
+
export declare function findDocument(db: Database, filename: string, options?: {
|
|
625
|
+
includeBody?: boolean;
|
|
626
|
+
}): DocumentResult | DocumentNotFound;
|
|
627
|
+
/**
|
|
628
|
+
* Get the body content for a document
|
|
629
|
+
* Optionally slice by line range
|
|
630
|
+
*/
|
|
631
|
+
export declare function getDocumentBody(db: Database, doc: DocumentResult | {
|
|
632
|
+
filepath: string;
|
|
633
|
+
}, fromLine?: number, maxLines?: number): string | null;
|
|
634
|
+
/**
|
|
635
|
+
* Find multiple documents by glob pattern or comma-separated list
|
|
636
|
+
* Returns documents without body by default (use getDocumentBody to load)
|
|
637
|
+
*/
|
|
638
|
+
export declare function findDocuments(db: Database, pattern: string, options?: {
|
|
639
|
+
includeBody?: boolean;
|
|
640
|
+
maxBytes?: number;
|
|
641
|
+
}): {
|
|
642
|
+
docs: MultiGetResult[];
|
|
643
|
+
errors: string[];
|
|
644
|
+
};
|
|
645
|
+
export declare function getStatus(db: Database): IndexStatus;
|
|
646
|
+
export type SnippetResult = {
|
|
647
|
+
line: number;
|
|
648
|
+
snippet: string;
|
|
649
|
+
linesBefore: number;
|
|
650
|
+
linesAfter: number;
|
|
651
|
+
snippetLines: number;
|
|
652
|
+
};
|
|
653
|
+
export declare function extractSnippet(body: string, query: string, maxLen?: number, chunkPos?: number, chunkLen?: number): SnippetResult;
|
|
654
|
+
/**
|
|
655
|
+
* Add line numbers to text content.
|
|
656
|
+
* Each line becomes: "{lineNum}: {content}"
|
|
657
|
+
*/
|
|
658
|
+
export declare function addLineNumbers(text: string, startLine?: number): string;
|
|
659
|
+
/**
|
|
660
|
+
* Optional progress hooks for search orchestration.
|
|
661
|
+
* CLI wires these to stderr for user feedback; MCP leaves them unset.
|
|
662
|
+
*/
|
|
663
|
+
export interface SearchHooks {
|
|
664
|
+
/** BM25 probe found strong signal — expansion will be skipped */
|
|
665
|
+
onStrongSignal?: (topScore: number) => void;
|
|
666
|
+
/** Query expansion starting */
|
|
667
|
+
onExpandStart?: () => void;
|
|
668
|
+
/** Query expansion complete. Empty array = strong signal skip. elapsedMs = time taken. */
|
|
669
|
+
onExpand?: (original: string, expanded: ExpandedQuery[], elapsedMs: number) => void;
|
|
670
|
+
/** Embedding starting (vec/hyde queries) */
|
|
671
|
+
onEmbedStart?: (count: number) => void;
|
|
672
|
+
/** Embedding complete */
|
|
673
|
+
onEmbedDone?: (elapsedMs: number) => void;
|
|
674
|
+
/** Reranking is about to start */
|
|
675
|
+
onRerankStart?: (chunkCount: number) => void;
|
|
676
|
+
/** Reranking finished */
|
|
677
|
+
onRerankDone?: (elapsedMs: number) => void;
|
|
678
|
+
}
|
|
679
|
+
export interface HybridQueryOptions {
|
|
680
|
+
collection?: string;
|
|
681
|
+
limit?: number;
|
|
682
|
+
minScore?: number;
|
|
683
|
+
candidateLimit?: number;
|
|
684
|
+
explain?: boolean;
|
|
685
|
+
hooks?: SearchHooks;
|
|
686
|
+
}
|
|
687
|
+
export interface HybridQueryResult {
|
|
688
|
+
file: string;
|
|
689
|
+
displayPath: string;
|
|
690
|
+
title: string;
|
|
691
|
+
body: string;
|
|
692
|
+
bestChunk: string;
|
|
693
|
+
bestChunkPos: number;
|
|
694
|
+
score: number;
|
|
695
|
+
context: string | null;
|
|
696
|
+
docid: string;
|
|
697
|
+
explain?: HybridQueryExplain;
|
|
698
|
+
}
|
|
699
|
+
export type RankedListMeta = {
|
|
700
|
+
source: "fts" | "vec";
|
|
701
|
+
queryType: "original" | "lex" | "vec" | "hyde";
|
|
702
|
+
query: string;
|
|
703
|
+
};
|
|
704
|
+
/**
|
|
705
|
+
* Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
|
|
706
|
+
*
|
|
707
|
+
* Pipeline:
|
|
708
|
+
* 1. BM25 probe → skip expansion if strong signal
|
|
709
|
+
* 2. expandQuery() → typed query variants (lex/vec/hyde)
|
|
710
|
+
* 3. Type-routed search: original→vector, lex→FTS, vec/hyde→vector
|
|
711
|
+
* 4. RRF fusion → slice to candidateLimit
|
|
712
|
+
* 5. chunkDocument() + keyword-best-chunk selection
|
|
713
|
+
* 6. rerank on chunks (NOT full bodies — O(tokens) trap)
|
|
714
|
+
* 7. Position-aware score blending (RRF rank × reranker score)
|
|
715
|
+
* 8. Dedup by file, filter by minScore, slice to limit
|
|
716
|
+
*/
|
|
717
|
+
export declare function hybridQuery(store: Store, query: string, options?: HybridQueryOptions): Promise<HybridQueryResult[]>;
|
|
718
|
+
export interface VectorSearchOptions {
|
|
719
|
+
collection?: string;
|
|
720
|
+
limit?: number;
|
|
721
|
+
minScore?: number;
|
|
722
|
+
hooks?: Pick<SearchHooks, 'onExpand'>;
|
|
723
|
+
}
|
|
724
|
+
export interface VectorSearchResult {
|
|
725
|
+
file: string;
|
|
726
|
+
displayPath: string;
|
|
727
|
+
title: string;
|
|
728
|
+
body: string;
|
|
729
|
+
score: number;
|
|
730
|
+
context: string | null;
|
|
731
|
+
docid: string;
|
|
732
|
+
}
|
|
733
|
+
/**
|
|
734
|
+
* Vector-only semantic search with query expansion.
|
|
735
|
+
*
|
|
736
|
+
* Pipeline:
|
|
737
|
+
* 1. expandQuery() → typed variants, filter to vec/hyde only (lex irrelevant here)
|
|
738
|
+
* 2. searchVec() for original + vec/hyde variants (sequential — node-llama-cpp embed limitation)
|
|
739
|
+
* 3. Dedup by filepath (keep max score)
|
|
740
|
+
* 4. Sort by score descending, filter by minScore, slice to limit
|
|
741
|
+
*/
|
|
742
|
+
export declare function vectorSearchQuery(store: Store, query: string, options?: VectorSearchOptions): Promise<VectorSearchResult[]>;
|
|
743
|
+
/**
|
|
744
|
+
* A single sub-search in a structured search request.
|
|
745
|
+
* Matches the format used in KINDX training data.
|
|
746
|
+
*/
|
|
747
|
+
export interface StructuredSubSearch {
|
|
748
|
+
/** Search type: 'lex' for BM25, 'vec' for semantic, 'hyde' for hypothetical */
|
|
749
|
+
type: 'lex' | 'vec' | 'hyde';
|
|
750
|
+
/** The search query text */
|
|
751
|
+
query: string;
|
|
752
|
+
/** Optional line number for error reporting (CLI parser) */
|
|
753
|
+
line?: number;
|
|
754
|
+
}
|
|
755
|
+
export interface StructuredSearchOptions {
|
|
756
|
+
collections?: string[];
|
|
757
|
+
limit?: number;
|
|
758
|
+
minScore?: number;
|
|
759
|
+
candidateLimit?: number;
|
|
760
|
+
explain?: boolean;
|
|
761
|
+
/** Future: domain intent hint for routing/boosting */
|
|
762
|
+
intent?: string;
|
|
763
|
+
hooks?: SearchHooks;
|
|
764
|
+
}
|
|
765
|
+
/**
|
|
766
|
+
* Structured search: execute pre-expanded queries without LLM query expansion.
|
|
767
|
+
*
|
|
768
|
+
* Designed for LLM callers (MCP/HTTP) that generate their own query expansions.
|
|
769
|
+
* Skips the internal expandQuery() step — goes directly to:
|
|
770
|
+
*
|
|
771
|
+
* Pipeline:
|
|
772
|
+
* 1. Route searches: lex→FTS, vec/hyde→vector (batch embed)
|
|
773
|
+
* 2. RRF fusion across all result lists
|
|
774
|
+
* 3. Chunk documents + keyword-best-chunk selection
|
|
775
|
+
* 4. Rerank on chunks
|
|
776
|
+
* 5. Position-aware score blending
|
|
777
|
+
* 6. Dedup, filter, slice
|
|
778
|
+
*
|
|
779
|
+
* This is the recommended endpoint for capable LLMs — they can generate
|
|
780
|
+
* better query variations than our small local model, especially for
|
|
781
|
+
* domain-specific or nuanced queries.
|
|
782
|
+
*/
|
|
783
|
+
export declare function structuredSearch(store: Store, searches: StructuredSubSearch[], options?: StructuredSearchOptions): Promise<HybridQueryResult[]>;
|