@tobilu/qmd 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +96 -0
- package/README.md +61 -1
- package/bin/qmd +11 -2
- package/dist/ast.d.ts +64 -0
- package/dist/ast.js +324 -0
- package/dist/bench/bench.d.ts +21 -0
- package/dist/bench/bench.js +185 -0
- package/dist/bench/score.d.ts +26 -0
- package/dist/bench/score.js +67 -0
- package/dist/bench/types.d.ts +67 -0
- package/dist/bench/types.js +8 -0
- package/dist/cli/formatter.js +5 -1
- package/dist/cli/qmd.d.ts +2 -1
- package/dist/cli/qmd.js +171 -9
- package/dist/collections.d.ts +11 -0
- package/dist/db.d.ts +8 -0
- package/dist/db.js +44 -3
- package/dist/index.d.ts +7 -1
- package/dist/index.js +13 -3
- package/dist/llm.d.ts +12 -3
- package/dist/llm.js +94 -24
- package/dist/mcp/server.js +29 -5
- package/dist/store.d.ts +56 -6
- package/dist/store.js +401 -138
- package/package.json +34 -17
package/dist/llm.js
CHANGED
|
@@ -48,7 +48,7 @@ export function formatDocForEmbedding(text, title, modelUri) {
|
|
|
48
48
|
// HuggingFace model URIs for node-llama-cpp
|
|
49
49
|
// Format: hf:<user>/<repo>/<file>
|
|
50
50
|
// Override via QMD_EMBED_MODEL env var (e.g. hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf)
|
|
51
|
-
const DEFAULT_EMBED_MODEL =
|
|
51
|
+
const DEFAULT_EMBED_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
|
|
52
52
|
const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
|
|
53
53
|
// const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf";
|
|
54
54
|
const DEFAULT_GENERATE_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
|
|
@@ -61,7 +61,9 @@ export const DEFAULT_EMBED_MODEL_URI = DEFAULT_EMBED_MODEL;
|
|
|
61
61
|
export const DEFAULT_RERANK_MODEL_URI = DEFAULT_RERANK_MODEL;
|
|
62
62
|
export const DEFAULT_GENERATE_MODEL_URI = DEFAULT_GENERATE_MODEL;
|
|
63
63
|
// Local model cache directory
|
|
64
|
-
const MODEL_CACHE_DIR =
|
|
64
|
+
const MODEL_CACHE_DIR = process.env.XDG_CACHE_HOME
|
|
65
|
+
? join(process.env.XDG_CACHE_HOME, "qmd", "models")
|
|
66
|
+
: join(homedir(), ".cache", "qmd", "models");
|
|
65
67
|
export const DEFAULT_MODEL_CACHE_DIR = MODEL_CACHE_DIR;
|
|
66
68
|
function parseHfUri(model) {
|
|
67
69
|
if (!model.startsWith("hf:"))
|
|
@@ -187,14 +189,17 @@ export class LlamaCpp {
|
|
|
187
189
|
// Track disposal state to prevent double-dispose
|
|
188
190
|
disposed = false;
|
|
189
191
|
constructor(config = {}) {
|
|
190
|
-
this.embedModelUri = config.embedModel || DEFAULT_EMBED_MODEL;
|
|
191
|
-
this.generateModelUri = config.generateModel || DEFAULT_GENERATE_MODEL;
|
|
192
|
-
this.rerankModelUri = config.rerankModel || DEFAULT_RERANK_MODEL;
|
|
192
|
+
this.embedModelUri = config.embedModel || process.env.QMD_EMBED_MODEL || DEFAULT_EMBED_MODEL;
|
|
193
|
+
this.generateModelUri = config.generateModel || process.env.QMD_GENERATE_MODEL || DEFAULT_GENERATE_MODEL;
|
|
194
|
+
this.rerankModelUri = config.rerankModel || process.env.QMD_RERANK_MODEL || DEFAULT_RERANK_MODEL;
|
|
193
195
|
this.modelCacheDir = config.modelCacheDir || MODEL_CACHE_DIR;
|
|
194
196
|
this.expandContextSize = resolveExpandContextSize(config.expandContextSize);
|
|
195
197
|
this.inactivityTimeoutMs = config.inactivityTimeoutMs ?? DEFAULT_INACTIVITY_TIMEOUT_MS;
|
|
196
198
|
this.disposeModelsOnInactivity = config.disposeModelsOnInactivity ?? false;
|
|
197
199
|
}
|
|
200
|
+
get embedModelName() {
|
|
201
|
+
return this.embedModelUri;
|
|
202
|
+
}
|
|
198
203
|
/**
|
|
199
204
|
* Reset the inactivity timer. Called after each model operation.
|
|
200
205
|
* When timer fires, models are unloaded to free memory (if no active sessions).
|
|
@@ -289,11 +294,29 @@ export class LlamaCpp {
|
|
|
289
294
|
*/
|
|
290
295
|
async ensureLlama() {
|
|
291
296
|
if (!this.llama) {
|
|
292
|
-
|
|
293
|
-
|
|
297
|
+
// Allow override via QMD_LLAMA_GPU: "false" | "off" | "none" forces CPU
|
|
298
|
+
const gpuOverride = (process.env.QMD_LLAMA_GPU ?? "").toLowerCase();
|
|
299
|
+
const forceCpu = ["false", "off", "none", "disable", "disabled", "0"].includes(gpuOverride);
|
|
300
|
+
const loadLlama = async (gpu) => await getLlama({
|
|
294
301
|
build: "autoAttempt",
|
|
295
|
-
logLevel: LlamaLogLevel.error
|
|
302
|
+
logLevel: LlamaLogLevel.error,
|
|
303
|
+
gpu,
|
|
296
304
|
});
|
|
305
|
+
let llama;
|
|
306
|
+
if (forceCpu) {
|
|
307
|
+
llama = await loadLlama(false);
|
|
308
|
+
}
|
|
309
|
+
else {
|
|
310
|
+
try {
|
|
311
|
+
llama = await loadLlama("auto");
|
|
312
|
+
}
|
|
313
|
+
catch (err) {
|
|
314
|
+
// GPU backend (e.g. Vulkan on headless/driverless machines) can throw at init.
|
|
315
|
+
// Fall back to CPU so qmd still works.
|
|
316
|
+
process.stderr.write(`QMD Warning: GPU init failed (${err instanceof Error ? err.message : String(err)}), falling back to CPU.\n`);
|
|
317
|
+
llama = await loadLlama(false);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
297
320
|
if (llama.gpu === false) {
|
|
298
321
|
process.stderr.write("QMD Warning: no GPU acceleration, running on CPU (slow). Run 'qmd status' for details.\n");
|
|
299
322
|
}
|
|
@@ -394,6 +417,7 @@ export class LlamaCpp {
|
|
|
394
417
|
for (let i = 0; i < n; i++) {
|
|
395
418
|
try {
|
|
396
419
|
this.embedContexts.push(await model.createEmbeddingContext({
|
|
420
|
+
contextSize: LlamaCpp.EMBED_CONTEXT_SIZE,
|
|
397
421
|
...(threads > 0 ? { threads } : {}),
|
|
398
422
|
}));
|
|
399
423
|
}
|
|
@@ -484,9 +508,20 @@ export class LlamaCpp {
|
|
|
484
508
|
* - Combined: drops from 11.6 GB (auto, no flash) to 568 MB per context (20×)
|
|
485
509
|
*/
|
|
486
510
|
// Qwen3 reranker template adds ~200 tokens overhead (system prompt, tags, etc.)
|
|
487
|
-
//
|
|
488
|
-
//
|
|
489
|
-
|
|
511
|
+
// Default 2048 was too small for longer documents (e.g. session transcripts,
|
|
512
|
+
// CJK text, or large markdown files) — callers hit "input lengths exceed
|
|
513
|
+
// context size" errors even after truncation because the overhead estimate
|
|
514
|
+
// was insufficient. 4096 comfortably fits the largest real-world chunks
|
|
515
|
+
// while staying well below the 40 960-token auto size.
|
|
516
|
+
// Override with QMD_RERANK_CONTEXT_SIZE env var if you need more headroom.
|
|
517
|
+
static RERANK_CONTEXT_SIZE = (() => {
|
|
518
|
+
const v = parseInt(process.env.QMD_RERANK_CONTEXT_SIZE ?? "", 10);
|
|
519
|
+
return Number.isFinite(v) && v > 0 ? v : 4096;
|
|
520
|
+
})();
|
|
521
|
+
static EMBED_CONTEXT_SIZE = (() => {
|
|
522
|
+
const v = parseInt(process.env.QMD_EMBED_CONTEXT_SIZE ?? "", 10);
|
|
523
|
+
return Number.isFinite(v) && v > 0 ? v : 2048;
|
|
524
|
+
})();
|
|
490
525
|
async ensureRerankContexts() {
|
|
491
526
|
if (this.rerankContexts.length === 0) {
|
|
492
527
|
const model = await this.ensureRerankModel();
|
|
@@ -555,15 +590,41 @@ export class LlamaCpp {
|
|
|
555
590
|
// ==========================================================================
|
|
556
591
|
// Core API methods
|
|
557
592
|
// ==========================================================================
|
|
593
|
+
/**
|
|
594
|
+
* Truncate text to fit within the embedding model's context window.
|
|
595
|
+
* Uses the model's own tokenizer for accurate token counting, then
|
|
596
|
+
* detokenizes back to text if truncation is needed.
|
|
597
|
+
* Returns the (possibly truncated) text and whether truncation occurred.
|
|
598
|
+
*/
|
|
599
|
+
async truncateToContextSize(text) {
|
|
600
|
+
if (!this.embedModel)
|
|
601
|
+
return { text, truncated: false };
|
|
602
|
+
const maxTokens = this.embedModel.trainContextSize;
|
|
603
|
+
if (maxTokens <= 0)
|
|
604
|
+
return { text, truncated: false };
|
|
605
|
+
const tokens = this.embedModel.tokenize(text);
|
|
606
|
+
if (tokens.length <= maxTokens)
|
|
607
|
+
return { text, truncated: false };
|
|
608
|
+
// Leave a small margin (4 tokens) for BOS/EOS overhead
|
|
609
|
+
const safeLimit = Math.max(1, maxTokens - 4);
|
|
610
|
+
const truncatedTokens = tokens.slice(0, safeLimit);
|
|
611
|
+
const truncatedText = this.embedModel.detokenize(truncatedTokens);
|
|
612
|
+
return { text: truncatedText, truncated: true };
|
|
613
|
+
}
|
|
558
614
|
async embed(text, options = {}) {
|
|
559
615
|
// Ping activity at start to keep models alive during this operation
|
|
560
616
|
this.touchActivity();
|
|
561
617
|
try {
|
|
562
618
|
const context = await this.ensureEmbedContext();
|
|
563
|
-
|
|
619
|
+
// Guard: truncate text that exceeds model context window to prevent GGML crash
|
|
620
|
+
const { text: safeText, truncated } = await this.truncateToContextSize(text);
|
|
621
|
+
if (truncated) {
|
|
622
|
+
console.warn(`⚠ Text truncated to fit embedding context (${this.embedModel?.trainContextSize} tokens)`);
|
|
623
|
+
}
|
|
624
|
+
const embedding = await context.getEmbeddingFor(safeText);
|
|
564
625
|
return {
|
|
565
626
|
embedding: Array.from(embedding.vector),
|
|
566
|
-
model: this.embedModelUri,
|
|
627
|
+
model: options.model ?? this.embedModelUri,
|
|
567
628
|
};
|
|
568
629
|
}
|
|
569
630
|
catch (error) {
|
|
@@ -575,7 +636,7 @@ export class LlamaCpp {
|
|
|
575
636
|
* Batch embed multiple texts efficiently
|
|
576
637
|
* Uses Promise.all for parallel embedding - node-llama-cpp handles batching internally
|
|
577
638
|
*/
|
|
578
|
-
async embedBatch(texts) {
|
|
639
|
+
async embedBatch(texts, options = {}) {
|
|
579
640
|
if (this._ciMode)
|
|
580
641
|
throw new Error("LLM operations are disabled in CI (set CI=true)");
|
|
581
642
|
// Ping activity at start to keep models alive during this operation
|
|
@@ -591,9 +652,13 @@ export class LlamaCpp {
|
|
|
591
652
|
const embeddings = [];
|
|
592
653
|
for (const text of texts) {
|
|
593
654
|
try {
|
|
594
|
-
const
|
|
655
|
+
const { text: safeText, truncated } = await this.truncateToContextSize(text);
|
|
656
|
+
if (truncated) {
|
|
657
|
+
console.warn(`⚠ Batch text truncated to fit embedding context (${this.embedModel?.trainContextSize} tokens)`);
|
|
658
|
+
}
|
|
659
|
+
const embedding = await context.getEmbeddingFor(safeText);
|
|
595
660
|
this.touchActivity();
|
|
596
|
-
embeddings.push({ embedding: Array.from(embedding.vector), model: this.embedModelUri });
|
|
661
|
+
embeddings.push({ embedding: Array.from(embedding.vector), model: options.model ?? this.embedModelUri });
|
|
597
662
|
}
|
|
598
663
|
catch (err) {
|
|
599
664
|
console.error("Embedding error for text:", err);
|
|
@@ -610,9 +675,13 @@ export class LlamaCpp {
|
|
|
610
675
|
const results = [];
|
|
611
676
|
for (const text of chunk) {
|
|
612
677
|
try {
|
|
613
|
-
const
|
|
678
|
+
const { text: safeText, truncated } = await this.truncateToContextSize(text);
|
|
679
|
+
if (truncated) {
|
|
680
|
+
console.warn(`⚠ Batch text truncated to fit embedding context (${this.embedModel?.trainContextSize} tokens)`);
|
|
681
|
+
}
|
|
682
|
+
const embedding = await ctx.getEmbeddingFor(safeText);
|
|
614
683
|
this.touchActivity();
|
|
615
|
-
results.push({ embedding: Array.from(embedding.vector), model: this.embedModelUri });
|
|
684
|
+
results.push({ embedding: Array.from(embedding.vector), model: options.model ?? this.embedModelUri });
|
|
616
685
|
}
|
|
617
686
|
catch (err) {
|
|
618
687
|
console.error("Embedding error for text:", err);
|
|
@@ -767,8 +836,10 @@ export class LlamaCpp {
|
|
|
767
836
|
await genContext.dispose();
|
|
768
837
|
}
|
|
769
838
|
}
|
|
770
|
-
// Qwen3 reranker chat template overhead (system prompt, tags, separators)
|
|
771
|
-
|
|
839
|
+
// Qwen3 reranker chat template overhead (system prompt, tags, separators).
|
|
840
|
+
// Measured at ~350 tokens on real queries; use 512 as a safe upper bound so
|
|
841
|
+
// the truncation budget never lets a document slip past the context limit.
|
|
842
|
+
static RERANK_TEMPLATE_OVERHEAD = 512;
|
|
772
843
|
static RERANK_TARGET_DOCS_PER_CONTEXT = 10;
|
|
773
844
|
async rerank(query, documents, options = {}) {
|
|
774
845
|
if (this._ciMode)
|
|
@@ -1028,8 +1099,8 @@ class LLMSession {
|
|
|
1028
1099
|
async embed(text, options) {
|
|
1029
1100
|
return this.withOperation(() => this.manager.getLlamaCpp().embed(text, options));
|
|
1030
1101
|
}
|
|
1031
|
-
async embedBatch(texts) {
|
|
1032
|
-
return this.withOperation(() => this.manager.getLlamaCpp().embedBatch(texts));
|
|
1102
|
+
async embedBatch(texts, options) {
|
|
1103
|
+
return this.withOperation(() => this.manager.getLlamaCpp().embedBatch(texts, options));
|
|
1033
1104
|
}
|
|
1034
1105
|
async expandQuery(query, options) {
|
|
1035
1106
|
return this.withOperation(() => this.manager.getLlamaCpp().expandQuery(query, options));
|
|
@@ -1106,8 +1177,7 @@ let defaultLlamaCpp = null;
|
|
|
1106
1177
|
*/
|
|
1107
1178
|
export function getDefaultLlamaCpp() {
|
|
1108
1179
|
if (!defaultLlamaCpp) {
|
|
1109
|
-
|
|
1110
|
-
defaultLlamaCpp = new LlamaCpp(embedModel ? { embedModel } : {});
|
|
1180
|
+
defaultLlamaCpp = new LlamaCpp();
|
|
1111
1181
|
}
|
|
1112
1182
|
return defaultLlamaCpp;
|
|
1113
1183
|
}
|
package/dist/mcp/server.js
CHANGED
|
@@ -8,13 +8,17 @@
|
|
|
8
8
|
*/
|
|
9
9
|
import { createServer } from "node:http";
|
|
10
10
|
import { randomUUID } from "node:crypto";
|
|
11
|
+
import { readFileSync } from "node:fs";
|
|
12
|
+
import { join, dirname } from "node:path";
|
|
11
13
|
import { fileURLToPath } from "url";
|
|
12
14
|
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
13
15
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
14
16
|
import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
|
|
15
17
|
import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
|
|
16
18
|
import { z } from "zod";
|
|
19
|
+
import { existsSync } from "fs";
|
|
17
20
|
import { createStore, extractSnippet, addLineNumbers, getDefaultDbPath, DEFAULT_MULTI_GET_MAX_BYTES, } from "../index.js";
|
|
21
|
+
import { getConfigPath } from "../collections.js";
|
|
18
22
|
// =============================================================================
|
|
19
23
|
// Helper functions
|
|
20
24
|
// =============================================================================
|
|
@@ -39,6 +43,16 @@ function formatSearchSummary(results, query) {
|
|
|
39
43
|
}
|
|
40
44
|
return lines.join('\n');
|
|
41
45
|
}
|
|
46
|
+
function getPackageVersion() {
|
|
47
|
+
try {
|
|
48
|
+
const pkgPath = join(dirname(fileURLToPath(import.meta.url)), "../../package.json");
|
|
49
|
+
const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
|
|
50
|
+
return pkg.version ?? "unknown";
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
return "unknown";
|
|
54
|
+
}
|
|
55
|
+
}
|
|
42
56
|
// =============================================================================
|
|
43
57
|
// MCP Server
|
|
44
58
|
// =============================================================================
|
|
@@ -108,7 +122,7 @@ async function buildInstructions(store) {
|
|
|
108
122
|
* Shared by both stdio and HTTP transports.
|
|
109
123
|
*/
|
|
110
124
|
async function createMcpServer(store) {
|
|
111
|
-
const server = new McpServer({ name: "qmd", version:
|
|
125
|
+
const server = new McpServer({ name: "qmd", version: getPackageVersion() }, { instructions: await buildInstructions(store) });
|
|
112
126
|
// Pre-fetch default collection names for search tools
|
|
113
127
|
const defaultCollectionNames = await store.getDefaultCollectionNames();
|
|
114
128
|
// ---------------------------------------------------------------------------
|
|
@@ -218,8 +232,9 @@ Intent-aware lex (C++ performance, not sports):
|
|
|
218
232
|
candidateLimit: z.number().optional().describe("Maximum candidates to rerank (default: 40, lower = faster but may miss results)"),
|
|
219
233
|
collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
|
|
220
234
|
intent: z.string().optional().describe("Background context to disambiguate the query. Example: query='performance', intent='web page load times and Core Web Vitals'. Does not search on its own."),
|
|
235
|
+
rerank: z.boolean().optional().default(true).describe("Rerank results using LLM (default: true). Set to false for faster results on CPU-only machines."),
|
|
221
236
|
},
|
|
222
|
-
}, async ({ searches, limit, minScore, candidateLimit, collections, intent }) => {
|
|
237
|
+
}, async ({ searches, limit, minScore, candidateLimit, collections, intent, rerank }) => {
|
|
223
238
|
// Map to internal format
|
|
224
239
|
const queries = searches.map(s => ({
|
|
225
240
|
type: s.type,
|
|
@@ -232,6 +247,7 @@ Intent-aware lex (C++ performance, not sports):
|
|
|
232
247
|
collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
|
|
233
248
|
limit,
|
|
234
249
|
minScore,
|
|
250
|
+
rerank,
|
|
235
251
|
intent,
|
|
236
252
|
});
|
|
237
253
|
// Use first lex or vec query for snippet extraction
|
|
@@ -387,7 +403,7 @@ Intent-aware lex (C++ performance, not sports):
|
|
|
387
403
|
` Collections: ${status.collections.length}`,
|
|
388
404
|
];
|
|
389
405
|
for (const col of status.collections) {
|
|
390
|
-
summary.push(` - ${col.path} (${col.documents} docs)`);
|
|
406
|
+
summary.push(` - ${col.name}: ${col.path} (${col.documents} docs)`);
|
|
391
407
|
}
|
|
392
408
|
return {
|
|
393
409
|
content: [{ type: "text", text: summary.join('\n') }],
|
|
@@ -400,7 +416,11 @@ Intent-aware lex (C++ performance, not sports):
|
|
|
400
416
|
// Transport: stdio (default)
|
|
401
417
|
// =============================================================================
|
|
402
418
|
export async function startMcpServer() {
|
|
403
|
-
const
|
|
419
|
+
const configPath = getConfigPath();
|
|
420
|
+
const store = await createStore({
|
|
421
|
+
dbPath: getDefaultDbPath(),
|
|
422
|
+
...(existsSync(configPath) ? { configPath } : {}),
|
|
423
|
+
});
|
|
404
424
|
const server = await createMcpServer(store);
|
|
405
425
|
const transport = new StdioServerTransport();
|
|
406
426
|
await server.connect(transport);
|
|
@@ -410,7 +430,11 @@ export async function startMcpServer() {
|
|
|
410
430
|
* Binds to localhost only. Returns a handle for shutdown and port discovery.
|
|
411
431
|
*/
|
|
412
432
|
export async function startMcpHttpServer(port, options) {
|
|
413
|
-
const
|
|
433
|
+
const configPath = getConfigPath();
|
|
434
|
+
const store = await createStore({
|
|
435
|
+
dbPath: getDefaultDbPath(),
|
|
436
|
+
...(existsSync(configPath) ? { configPath } : {}),
|
|
437
|
+
});
|
|
414
438
|
// Pre-fetch default collection names for REST endpoint
|
|
415
439
|
const defaultCollectionNames = await store.getDefaultCollectionNames();
|
|
416
440
|
// Session map: each client gets its own McpServer + Transport pair (MCP spec requirement).
|
package/dist/store.d.ts
CHANGED
|
@@ -18,6 +18,8 @@ export declare const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b
|
|
|
18
18
|
export declare const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
|
|
19
19
|
export declare const DEFAULT_GLOB = "**/*.md";
|
|
20
20
|
export declare const DEFAULT_MULTI_GET_MAX_BYTES: number;
|
|
21
|
+
export declare const DEFAULT_EMBED_MAX_DOCS_PER_BATCH = 64;
|
|
22
|
+
export declare const DEFAULT_EMBED_MAX_BATCH_BYTES: number;
|
|
21
23
|
export declare const CHUNK_SIZE_TOKENS = 900;
|
|
22
24
|
export declare const CHUNK_OVERLAP_TOKENS: number;
|
|
23
25
|
export declare const CHUNK_SIZE_CHARS: number;
|
|
@@ -76,6 +78,20 @@ export declare function isInsideCodeFence(pos: number, fences: CodeFenceRegion[]
|
|
|
76
78
|
* @returns The best position to cut at
|
|
77
79
|
*/
|
|
78
80
|
export declare function findBestCutoff(breakPoints: BreakPoint[], targetCharPos: number, windowChars?: number, decayFactor?: number, codeFences?: CodeFenceRegion[]): number;
|
|
81
|
+
export type ChunkStrategy = "auto" | "regex";
|
|
82
|
+
/**
|
|
83
|
+
* Merge two sets of break points (e.g. regex + AST), keeping the highest
|
|
84
|
+
* score at each position. Result is sorted by position.
|
|
85
|
+
*/
|
|
86
|
+
export declare function mergeBreakPoints(a: BreakPoint[], b: BreakPoint[]): BreakPoint[];
|
|
87
|
+
/**
|
|
88
|
+
* Core chunk algorithm that operates on precomputed break points and code fences.
|
|
89
|
+
* This is the shared implementation used by both regex-only and AST-aware chunking.
|
|
90
|
+
*/
|
|
91
|
+
export declare function chunkDocumentWithBreakPoints(content: string, breakPoints: BreakPoint[], codeFences: CodeFenceRegion[], maxChars?: number, overlapChars?: number, windowChars?: number): {
|
|
92
|
+
text: string;
|
|
93
|
+
pos: number;
|
|
94
|
+
}[];
|
|
79
95
|
export declare const STRONG_SIGNAL_MIN_SCORE = 0.85;
|
|
80
96
|
export declare const STRONG_SIGNAL_MIN_GAP = 0.15;
|
|
81
97
|
export declare const RERANK_CANDIDATE_LIMIT = 40;
|
|
@@ -118,6 +134,8 @@ export declare function normalizePathSeparators(path: string): string;
|
|
|
118
134
|
export declare function getRelativePathFromPrefix(path: string, prefix: string): string | null;
|
|
119
135
|
export declare function resolve(...paths: string[]): string;
|
|
120
136
|
export declare function enableProductionMode(): void;
|
|
137
|
+
/** Reset production mode flag — only for testing. */
|
|
138
|
+
export declare function _resetProductionModeForTesting(): void;
|
|
121
139
|
export declare function getDefaultDbPath(indexName?: string): string;
|
|
122
140
|
export declare function getPwd(): string;
|
|
123
141
|
export declare function getRealPath(path: string): string;
|
|
@@ -311,16 +329,20 @@ export type EmbedResult = {
|
|
|
311
329
|
errors: number;
|
|
312
330
|
durationMs: number;
|
|
313
331
|
};
|
|
332
|
+
export type EmbedOptions = {
|
|
333
|
+
force?: boolean;
|
|
334
|
+
model?: string;
|
|
335
|
+
maxDocsPerBatch?: number;
|
|
336
|
+
maxBatchBytes?: number;
|
|
337
|
+
chunkStrategy?: ChunkStrategy;
|
|
338
|
+
onProgress?: (info: EmbedProgress) => void;
|
|
339
|
+
};
|
|
314
340
|
/**
|
|
315
341
|
* Generate vector embeddings for documents that need them.
|
|
316
342
|
* Pure function — no console output, no db lifecycle management.
|
|
317
343
|
* Uses the store's LlamaCpp instance if set, otherwise the global singleton.
|
|
318
344
|
*/
|
|
319
|
-
export declare function generateEmbeddings(store: Store, options?:
|
|
320
|
-
force?: boolean;
|
|
321
|
-
model?: string;
|
|
322
|
-
onProgress?: (info: EmbedProgress) => void;
|
|
323
|
-
}): Promise<EmbedResult>;
|
|
345
|
+
export declare function generateEmbeddings(store: Store, options?: EmbedOptions): Promise<EmbedResult>;
|
|
324
346
|
/**
|
|
325
347
|
* Create a new store instance with the given database path.
|
|
326
348
|
* If no path is provided, uses the default path (~/.cache/qmd/index.sqlite).
|
|
@@ -505,15 +527,34 @@ export declare function deactivateDocument(db: Database, collectionName: string,
|
|
|
505
527
|
*/
|
|
506
528
|
export declare function getActiveDocumentPaths(db: Database, collectionName: string): string[];
|
|
507
529
|
export { formatQueryForEmbedding, formatDocForEmbedding };
|
|
530
|
+
/**
|
|
531
|
+
* Chunk a document using regex-only break point detection.
|
|
532
|
+
* This is the sync, backward-compatible API used by tests and legacy callers.
|
|
533
|
+
*/
|
|
508
534
|
export declare function chunkDocument(content: string, maxChars?: number, overlapChars?: number, windowChars?: number): {
|
|
509
535
|
text: string;
|
|
510
536
|
pos: number;
|
|
511
537
|
}[];
|
|
538
|
+
/**
|
|
539
|
+
* Async AST-aware chunking. Detects language from filepath, computes AST
|
|
540
|
+
* break points for supported code files, merges with regex break points,
|
|
541
|
+
* and delegates to the shared chunk algorithm.
|
|
542
|
+
*
|
|
543
|
+
* Falls back to regex-only when strategy is "regex", filepath is absent,
|
|
544
|
+
* or language is unsupported.
|
|
545
|
+
*/
|
|
546
|
+
export declare function chunkDocumentAsync(content: string, maxChars?: number, overlapChars?: number, windowChars?: number, filepath?: string, chunkStrategy?: ChunkStrategy): Promise<{
|
|
547
|
+
text: string;
|
|
548
|
+
pos: number;
|
|
549
|
+
}[]>;
|
|
512
550
|
/**
|
|
513
551
|
* Chunk a document by actual token count using the LLM tokenizer.
|
|
514
552
|
* More accurate than character-based chunking but requires async.
|
|
553
|
+
*
|
|
554
|
+
* When filepath and chunkStrategy are provided, uses AST-aware break points
|
|
555
|
+
* for supported code files.
|
|
515
556
|
*/
|
|
516
|
-
export declare function chunkDocumentByTokens(content: string, maxTokens?: number, overlapTokens?: number, windowTokens?: number): Promise<{
|
|
557
|
+
export declare function chunkDocumentByTokens(content: string, maxTokens?: number, overlapTokens?: number, windowTokens?: number, filepath?: string, chunkStrategy?: ChunkStrategy, signal?: AbortSignal): Promise<{
|
|
517
558
|
text: string;
|
|
518
559
|
pos: number;
|
|
519
560
|
tokens: number;
|
|
@@ -640,6 +681,7 @@ export declare function getCollectionsWithoutContext(db: Database): {
|
|
|
640
681
|
* Useful for suggesting where context might be needed.
|
|
641
682
|
*/
|
|
642
683
|
export declare function getTopLevelPathsWithoutContext(db: Database, collectionName: string): string[];
|
|
684
|
+
export declare function sanitizeFTS5Term(term: string): string;
|
|
643
685
|
/**
|
|
644
686
|
* Validate that a vec/hyde query doesn't use lex-only syntax.
|
|
645
687
|
* Returns error message if invalid, null if valid.
|
|
@@ -665,6 +707,12 @@ export declare function clearAllEmbeddings(db: Database): void;
|
|
|
665
707
|
/**
|
|
666
708
|
* Insert a single embedding into both content_vectors and vectors_vec tables.
|
|
667
709
|
* The hash_seq key is formatted as "hash_seq" for the vectors_vec table.
|
|
710
|
+
*
|
|
711
|
+
* content_vectors is inserted first so that getHashesForEmbedding (which checks
|
|
712
|
+
* only content_vectors) won't re-select the hash on a crash between the two inserts.
|
|
713
|
+
*
|
|
714
|
+
* vectors_vec uses DELETE + INSERT instead of INSERT OR REPLACE because sqlite-vec's
|
|
715
|
+
* vec0 virtual tables silently ignore the OR REPLACE conflict clause.
|
|
668
716
|
*/
|
|
669
717
|
export declare function insertEmbedding(db: Database, hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string): void;
|
|
670
718
|
export declare function expandQuery(query: string, model: string | undefined, db: Database, intent?: string, llmOverride?: LlamaCpp): Promise<ExpandedQuery[]>;
|
|
@@ -763,6 +811,7 @@ export interface HybridQueryOptions {
|
|
|
763
811
|
explain?: boolean;
|
|
764
812
|
intent?: string;
|
|
765
813
|
skipRerank?: boolean;
|
|
814
|
+
chunkStrategy?: ChunkStrategy;
|
|
766
815
|
hooks?: SearchHooks;
|
|
767
816
|
}
|
|
768
817
|
export interface HybridQueryResult {
|
|
@@ -836,6 +885,7 @@ export interface StructuredSearchOptions {
|
|
|
836
885
|
intent?: string;
|
|
837
886
|
/** Skip LLM reranking, use only RRF scores */
|
|
838
887
|
skipRerank?: boolean;
|
|
888
|
+
chunkStrategy?: ChunkStrategy;
|
|
839
889
|
hooks?: SearchHooks;
|
|
840
890
|
}
|
|
841
891
|
/**
|