@tobilu/qmd 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/llm.js CHANGED
@@ -48,7 +48,7 @@ export function formatDocForEmbedding(text, title, modelUri) {
48
48
  // HuggingFace model URIs for node-llama-cpp
49
49
  // Format: hf:<user>/<repo>/<file>
50
50
  // Override via QMD_EMBED_MODEL env var (e.g. hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf)
51
- const DEFAULT_EMBED_MODEL = process.env.QMD_EMBED_MODEL ?? "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
51
+ const DEFAULT_EMBED_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
52
52
  const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
53
53
  // const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf";
54
54
  const DEFAULT_GENERATE_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
@@ -61,7 +61,9 @@ export const DEFAULT_EMBED_MODEL_URI = DEFAULT_EMBED_MODEL;
61
61
  export const DEFAULT_RERANK_MODEL_URI = DEFAULT_RERANK_MODEL;
62
62
  export const DEFAULT_GENERATE_MODEL_URI = DEFAULT_GENERATE_MODEL;
63
63
  // Local model cache directory
64
- const MODEL_CACHE_DIR = join(homedir(), ".cache", "qmd", "models");
64
+ const MODEL_CACHE_DIR = process.env.XDG_CACHE_HOME
65
+ ? join(process.env.XDG_CACHE_HOME, "qmd", "models")
66
+ : join(homedir(), ".cache", "qmd", "models");
65
67
  export const DEFAULT_MODEL_CACHE_DIR = MODEL_CACHE_DIR;
66
68
  function parseHfUri(model) {
67
69
  if (!model.startsWith("hf:"))
@@ -187,14 +189,17 @@ export class LlamaCpp {
187
189
  // Track disposal state to prevent double-dispose
188
190
  disposed = false;
189
191
  constructor(config = {}) {
190
- this.embedModelUri = config.embedModel || DEFAULT_EMBED_MODEL;
191
- this.generateModelUri = config.generateModel || DEFAULT_GENERATE_MODEL;
192
- this.rerankModelUri = config.rerankModel || DEFAULT_RERANK_MODEL;
192
+ this.embedModelUri = config.embedModel || process.env.QMD_EMBED_MODEL || DEFAULT_EMBED_MODEL;
193
+ this.generateModelUri = config.generateModel || process.env.QMD_GENERATE_MODEL || DEFAULT_GENERATE_MODEL;
194
+ this.rerankModelUri = config.rerankModel || process.env.QMD_RERANK_MODEL || DEFAULT_RERANK_MODEL;
193
195
  this.modelCacheDir = config.modelCacheDir || MODEL_CACHE_DIR;
194
196
  this.expandContextSize = resolveExpandContextSize(config.expandContextSize);
195
197
  this.inactivityTimeoutMs = config.inactivityTimeoutMs ?? DEFAULT_INACTIVITY_TIMEOUT_MS;
196
198
  this.disposeModelsOnInactivity = config.disposeModelsOnInactivity ?? false;
197
199
  }
200
+ get embedModelName() {
201
+ return this.embedModelUri;
202
+ }
198
203
  /**
199
204
  * Reset the inactivity timer. Called after each model operation.
200
205
  * When timer fires, models are unloaded to free memory (if no active sessions).
@@ -289,11 +294,29 @@ export class LlamaCpp {
289
294
  */
290
295
  async ensureLlama() {
291
296
  if (!this.llama) {
292
- const llama = await getLlama({
293
- // attempt to build
297
+ // Allow override via QMD_LLAMA_GPU: "false" | "off" | "none" forces CPU
298
+ const gpuOverride = (process.env.QMD_LLAMA_GPU ?? "").toLowerCase();
299
+ const forceCpu = ["false", "off", "none", "disable", "disabled", "0"].includes(gpuOverride);
300
+ const loadLlama = async (gpu) => await getLlama({
294
301
  build: "autoAttempt",
295
- logLevel: LlamaLogLevel.error
302
+ logLevel: LlamaLogLevel.error,
303
+ gpu,
296
304
  });
305
+ let llama;
306
+ if (forceCpu) {
307
+ llama = await loadLlama(false);
308
+ }
309
+ else {
310
+ try {
311
+ llama = await loadLlama("auto");
312
+ }
313
+ catch (err) {
314
+ // GPU backend (e.g. Vulkan on headless/driverless machines) can throw at init.
315
+ // Fall back to CPU so qmd still works.
316
+ process.stderr.write(`QMD Warning: GPU init failed (${err instanceof Error ? err.message : String(err)}), falling back to CPU.\n`);
317
+ llama = await loadLlama(false);
318
+ }
319
+ }
297
320
  if (llama.gpu === false) {
298
321
  process.stderr.write("QMD Warning: no GPU acceleration, running on CPU (slow). Run 'qmd status' for details.\n");
299
322
  }
@@ -394,6 +417,7 @@ export class LlamaCpp {
394
417
  for (let i = 0; i < n; i++) {
395
418
  try {
396
419
  this.embedContexts.push(await model.createEmbeddingContext({
420
+ contextSize: LlamaCpp.EMBED_CONTEXT_SIZE,
397
421
  ...(threads > 0 ? { threads } : {}),
398
422
  }));
399
423
  }
@@ -484,9 +508,20 @@ export class LlamaCpp {
484
508
  * - Combined: drops from 11.6 GB (auto, no flash) to 568 MB per context (20×)
485
509
  */
486
510
  // Qwen3 reranker template adds ~200 tokens overhead (system prompt, tags, etc.)
487
- // Chunks are max 800 tokens, so 800 + 200 + query ≈ 1100 tokens typical.
488
- // Use 2048 for safety margin. Still 17× less than auto (40960).
489
- static RERANK_CONTEXT_SIZE = 2048;
511
+ // Default 2048 was too small for longer documents (e.g. session transcripts,
512
+ // CJK text, or large markdown files) callers hit "input lengths exceed
513
+ // context size" errors even after truncation because the overhead estimate
514
+ // was insufficient. 4096 comfortably fits the largest real-world chunks
515
+ // while staying well below the 40 960-token auto size.
516
+ // Override with QMD_RERANK_CONTEXT_SIZE env var if you need more headroom.
517
+ static RERANK_CONTEXT_SIZE = (() => {
518
+ const v = parseInt(process.env.QMD_RERANK_CONTEXT_SIZE ?? "", 10);
519
+ return Number.isFinite(v) && v > 0 ? v : 4096;
520
+ })();
521
+ static EMBED_CONTEXT_SIZE = (() => {
522
+ const v = parseInt(process.env.QMD_EMBED_CONTEXT_SIZE ?? "", 10);
523
+ return Number.isFinite(v) && v > 0 ? v : 2048;
524
+ })();
490
525
  async ensureRerankContexts() {
491
526
  if (this.rerankContexts.length === 0) {
492
527
  const model = await this.ensureRerankModel();
@@ -555,15 +590,41 @@ export class LlamaCpp {
555
590
  // ==========================================================================
556
591
  // Core API methods
557
592
  // ==========================================================================
593
+ /**
594
+ * Truncate text to fit within the embedding model's context window.
595
+ * Uses the model's own tokenizer for accurate token counting, then
596
+ * detokenizes back to text if truncation is needed.
597
+ * Returns the (possibly truncated) text and whether truncation occurred.
598
+ */
599
+ async truncateToContextSize(text) {
600
+ if (!this.embedModel)
601
+ return { text, truncated: false };
602
+ const maxTokens = this.embedModel.trainContextSize;
603
+ if (maxTokens <= 0)
604
+ return { text, truncated: false };
605
+ const tokens = this.embedModel.tokenize(text);
606
+ if (tokens.length <= maxTokens)
607
+ return { text, truncated: false };
608
+ // Leave a small margin (4 tokens) for BOS/EOS overhead
609
+ const safeLimit = Math.max(1, maxTokens - 4);
610
+ const truncatedTokens = tokens.slice(0, safeLimit);
611
+ const truncatedText = this.embedModel.detokenize(truncatedTokens);
612
+ return { text: truncatedText, truncated: true };
613
+ }
558
614
  async embed(text, options = {}) {
559
615
  // Ping activity at start to keep models alive during this operation
560
616
  this.touchActivity();
561
617
  try {
562
618
  const context = await this.ensureEmbedContext();
563
- const embedding = await context.getEmbeddingFor(text);
619
+ // Guard: truncate text that exceeds model context window to prevent GGML crash
620
+ const { text: safeText, truncated } = await this.truncateToContextSize(text);
621
+ if (truncated) {
622
+ console.warn(`⚠ Text truncated to fit embedding context (${this.embedModel?.trainContextSize} tokens)`);
623
+ }
624
+ const embedding = await context.getEmbeddingFor(safeText);
564
625
  return {
565
626
  embedding: Array.from(embedding.vector),
566
- model: this.embedModelUri,
627
+ model: options.model ?? this.embedModelUri,
567
628
  };
568
629
  }
569
630
  catch (error) {
@@ -575,7 +636,7 @@ export class LlamaCpp {
575
636
  * Batch embed multiple texts efficiently
576
637
  * Uses Promise.all for parallel embedding - node-llama-cpp handles batching internally
577
638
  */
578
- async embedBatch(texts) {
639
+ async embedBatch(texts, options = {}) {
579
640
  if (this._ciMode)
580
641
  throw new Error("LLM operations are disabled in CI (set CI=true)");
581
642
  // Ping activity at start to keep models alive during this operation
@@ -591,9 +652,13 @@ export class LlamaCpp {
591
652
  const embeddings = [];
592
653
  for (const text of texts) {
593
654
  try {
594
- const embedding = await context.getEmbeddingFor(text);
655
+ const { text: safeText, truncated } = await this.truncateToContextSize(text);
656
+ if (truncated) {
657
+ console.warn(`⚠ Batch text truncated to fit embedding context (${this.embedModel?.trainContextSize} tokens)`);
658
+ }
659
+ const embedding = await context.getEmbeddingFor(safeText);
595
660
  this.touchActivity();
596
- embeddings.push({ embedding: Array.from(embedding.vector), model: this.embedModelUri });
661
+ embeddings.push({ embedding: Array.from(embedding.vector), model: options.model ?? this.embedModelUri });
597
662
  }
598
663
  catch (err) {
599
664
  console.error("Embedding error for text:", err);
@@ -610,9 +675,13 @@ export class LlamaCpp {
610
675
  const results = [];
611
676
  for (const text of chunk) {
612
677
  try {
613
- const embedding = await ctx.getEmbeddingFor(text);
678
+ const { text: safeText, truncated } = await this.truncateToContextSize(text);
679
+ if (truncated) {
680
+ console.warn(`⚠ Batch text truncated to fit embedding context (${this.embedModel?.trainContextSize} tokens)`);
681
+ }
682
+ const embedding = await ctx.getEmbeddingFor(safeText);
614
683
  this.touchActivity();
615
- results.push({ embedding: Array.from(embedding.vector), model: this.embedModelUri });
684
+ results.push({ embedding: Array.from(embedding.vector), model: options.model ?? this.embedModelUri });
616
685
  }
617
686
  catch (err) {
618
687
  console.error("Embedding error for text:", err);
@@ -767,8 +836,10 @@ export class LlamaCpp {
767
836
  await genContext.dispose();
768
837
  }
769
838
  }
770
- // Qwen3 reranker chat template overhead (system prompt, tags, separators)
771
- static RERANK_TEMPLATE_OVERHEAD = 200;
839
+ // Qwen3 reranker chat template overhead (system prompt, tags, separators).
840
+ // Measured at ~350 tokens on real queries; use 512 as a safe upper bound so
841
+ // the truncation budget never lets a document slip past the context limit.
842
+ static RERANK_TEMPLATE_OVERHEAD = 512;
772
843
  static RERANK_TARGET_DOCS_PER_CONTEXT = 10;
773
844
  async rerank(query, documents, options = {}) {
774
845
  if (this._ciMode)
@@ -1028,8 +1099,8 @@ class LLMSession {
1028
1099
  async embed(text, options) {
1029
1100
  return this.withOperation(() => this.manager.getLlamaCpp().embed(text, options));
1030
1101
  }
1031
- async embedBatch(texts) {
1032
- return this.withOperation(() => this.manager.getLlamaCpp().embedBatch(texts));
1102
+ async embedBatch(texts, options) {
1103
+ return this.withOperation(() => this.manager.getLlamaCpp().embedBatch(texts, options));
1033
1104
  }
1034
1105
  async expandQuery(query, options) {
1035
1106
  return this.withOperation(() => this.manager.getLlamaCpp().expandQuery(query, options));
@@ -1106,8 +1177,7 @@ let defaultLlamaCpp = null;
1106
1177
  */
1107
1178
  export function getDefaultLlamaCpp() {
1108
1179
  if (!defaultLlamaCpp) {
1109
- const embedModel = process.env.QMD_EMBED_MODEL;
1110
- defaultLlamaCpp = new LlamaCpp(embedModel ? { embedModel } : {});
1180
+ defaultLlamaCpp = new LlamaCpp();
1111
1181
  }
1112
1182
  return defaultLlamaCpp;
1113
1183
  }
@@ -8,13 +8,17 @@
8
8
  */
9
9
  import { createServer } from "node:http";
10
10
  import { randomUUID } from "node:crypto";
11
+ import { readFileSync } from "node:fs";
12
+ import { join, dirname } from "node:path";
11
13
  import { fileURLToPath } from "url";
12
14
  import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
13
15
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
14
16
  import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
15
17
  import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
16
18
  import { z } from "zod";
19
+ import { existsSync } from "fs";
17
20
  import { createStore, extractSnippet, addLineNumbers, getDefaultDbPath, DEFAULT_MULTI_GET_MAX_BYTES, } from "../index.js";
21
+ import { getConfigPath } from "../collections.js";
18
22
  // =============================================================================
19
23
  // Helper functions
20
24
  // =============================================================================
@@ -39,6 +43,16 @@ function formatSearchSummary(results, query) {
39
43
  }
40
44
  return lines.join('\n');
41
45
  }
46
+ function getPackageVersion() {
47
+ try {
48
+ const pkgPath = join(dirname(fileURLToPath(import.meta.url)), "../../package.json");
49
+ const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
50
+ return pkg.version ?? "unknown";
51
+ }
52
+ catch {
53
+ return "unknown";
54
+ }
55
+ }
42
56
  // =============================================================================
43
57
  // MCP Server
44
58
  // =============================================================================
@@ -108,7 +122,7 @@ async function buildInstructions(store) {
108
122
  * Shared by both stdio and HTTP transports.
109
123
  */
110
124
  async function createMcpServer(store) {
111
- const server = new McpServer({ name: "qmd", version: "0.9.9" }, { instructions: await buildInstructions(store) });
125
+ const server = new McpServer({ name: "qmd", version: getPackageVersion() }, { instructions: await buildInstructions(store) });
112
126
  // Pre-fetch default collection names for search tools
113
127
  const defaultCollectionNames = await store.getDefaultCollectionNames();
114
128
  // ---------------------------------------------------------------------------
@@ -218,8 +232,9 @@ Intent-aware lex (C++ performance, not sports):
218
232
  candidateLimit: z.number().optional().describe("Maximum candidates to rerank (default: 40, lower = faster but may miss results)"),
219
233
  collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
220
234
  intent: z.string().optional().describe("Background context to disambiguate the query. Example: query='performance', intent='web page load times and Core Web Vitals'. Does not search on its own."),
235
+ rerank: z.boolean().optional().default(true).describe("Rerank results using LLM (default: true). Set to false for faster results on CPU-only machines."),
221
236
  },
222
- }, async ({ searches, limit, minScore, candidateLimit, collections, intent }) => {
237
+ }, async ({ searches, limit, minScore, candidateLimit, collections, intent, rerank }) => {
223
238
  // Map to internal format
224
239
  const queries = searches.map(s => ({
225
240
  type: s.type,
@@ -232,6 +247,7 @@ Intent-aware lex (C++ performance, not sports):
232
247
  collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
233
248
  limit,
234
249
  minScore,
250
+ rerank,
235
251
  intent,
236
252
  });
237
253
  // Use first lex or vec query for snippet extraction
@@ -387,7 +403,7 @@ Intent-aware lex (C++ performance, not sports):
387
403
  ` Collections: ${status.collections.length}`,
388
404
  ];
389
405
  for (const col of status.collections) {
390
- summary.push(` - ${col.path} (${col.documents} docs)`);
406
+ summary.push(` - ${col.name}: ${col.path} (${col.documents} docs)`);
391
407
  }
392
408
  return {
393
409
  content: [{ type: "text", text: summary.join('\n') }],
@@ -400,7 +416,11 @@ Intent-aware lex (C++ performance, not sports):
400
416
  // Transport: stdio (default)
401
417
  // =============================================================================
402
418
  export async function startMcpServer() {
403
- const store = await createStore({ dbPath: getDefaultDbPath() });
419
+ const configPath = getConfigPath();
420
+ const store = await createStore({
421
+ dbPath: getDefaultDbPath(),
422
+ ...(existsSync(configPath) ? { configPath } : {}),
423
+ });
404
424
  const server = await createMcpServer(store);
405
425
  const transport = new StdioServerTransport();
406
426
  await server.connect(transport);
@@ -410,7 +430,11 @@ export async function startMcpServer() {
410
430
  * Binds to localhost only. Returns a handle for shutdown and port discovery.
411
431
  */
412
432
  export async function startMcpHttpServer(port, options) {
413
- const store = await createStore({ dbPath: getDefaultDbPath() });
433
+ const configPath = getConfigPath();
434
+ const store = await createStore({
435
+ dbPath: getDefaultDbPath(),
436
+ ...(existsSync(configPath) ? { configPath } : {}),
437
+ });
414
438
  // Pre-fetch default collection names for REST endpoint
415
439
  const defaultCollectionNames = await store.getDefaultCollectionNames();
416
440
  // Session map: each client gets its own McpServer + Transport pair (MCP spec requirement).
package/dist/store.d.ts CHANGED
@@ -18,6 +18,8 @@ export declare const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b
18
18
  export declare const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
19
19
  export declare const DEFAULT_GLOB = "**/*.md";
20
20
  export declare const DEFAULT_MULTI_GET_MAX_BYTES: number;
21
+ export declare const DEFAULT_EMBED_MAX_DOCS_PER_BATCH = 64;
22
+ export declare const DEFAULT_EMBED_MAX_BATCH_BYTES: number;
21
23
  export declare const CHUNK_SIZE_TOKENS = 900;
22
24
  export declare const CHUNK_OVERLAP_TOKENS: number;
23
25
  export declare const CHUNK_SIZE_CHARS: number;
@@ -76,6 +78,20 @@ export declare function isInsideCodeFence(pos: number, fences: CodeFenceRegion[]
76
78
  * @returns The best position to cut at
77
79
  */
78
80
  export declare function findBestCutoff(breakPoints: BreakPoint[], targetCharPos: number, windowChars?: number, decayFactor?: number, codeFences?: CodeFenceRegion[]): number;
81
+ export type ChunkStrategy = "auto" | "regex";
82
+ /**
83
+ * Merge two sets of break points (e.g. regex + AST), keeping the highest
84
+ * score at each position. Result is sorted by position.
85
+ */
86
+ export declare function mergeBreakPoints(a: BreakPoint[], b: BreakPoint[]): BreakPoint[];
87
+ /**
88
+ * Core chunk algorithm that operates on precomputed break points and code fences.
89
+ * This is the shared implementation used by both regex-only and AST-aware chunking.
90
+ */
91
+ export declare function chunkDocumentWithBreakPoints(content: string, breakPoints: BreakPoint[], codeFences: CodeFenceRegion[], maxChars?: number, overlapChars?: number, windowChars?: number): {
92
+ text: string;
93
+ pos: number;
94
+ }[];
79
95
  export declare const STRONG_SIGNAL_MIN_SCORE = 0.85;
80
96
  export declare const STRONG_SIGNAL_MIN_GAP = 0.15;
81
97
  export declare const RERANK_CANDIDATE_LIMIT = 40;
@@ -118,6 +134,8 @@ export declare function normalizePathSeparators(path: string): string;
118
134
  export declare function getRelativePathFromPrefix(path: string, prefix: string): string | null;
119
135
  export declare function resolve(...paths: string[]): string;
120
136
  export declare function enableProductionMode(): void;
137
+ /** Reset production mode flag — only for testing. */
138
+ export declare function _resetProductionModeForTesting(): void;
121
139
  export declare function getDefaultDbPath(indexName?: string): string;
122
140
  export declare function getPwd(): string;
123
141
  export declare function getRealPath(path: string): string;
@@ -311,16 +329,20 @@ export type EmbedResult = {
311
329
  errors: number;
312
330
  durationMs: number;
313
331
  };
332
+ export type EmbedOptions = {
333
+ force?: boolean;
334
+ model?: string;
335
+ maxDocsPerBatch?: number;
336
+ maxBatchBytes?: number;
337
+ chunkStrategy?: ChunkStrategy;
338
+ onProgress?: (info: EmbedProgress) => void;
339
+ };
314
340
  /**
315
341
  * Generate vector embeddings for documents that need them.
316
342
  * Pure function — no console output, no db lifecycle management.
317
343
  * Uses the store's LlamaCpp instance if set, otherwise the global singleton.
318
344
  */
319
- export declare function generateEmbeddings(store: Store, options?: {
320
- force?: boolean;
321
- model?: string;
322
- onProgress?: (info: EmbedProgress) => void;
323
- }): Promise<EmbedResult>;
345
+ export declare function generateEmbeddings(store: Store, options?: EmbedOptions): Promise<EmbedResult>;
324
346
  /**
325
347
  * Create a new store instance with the given database path.
326
348
  * If no path is provided, uses the default path (~/.cache/qmd/index.sqlite).
@@ -505,15 +527,34 @@ export declare function deactivateDocument(db: Database, collectionName: string,
505
527
  */
506
528
  export declare function getActiveDocumentPaths(db: Database, collectionName: string): string[];
507
529
  export { formatQueryForEmbedding, formatDocForEmbedding };
530
+ /**
531
+ * Chunk a document using regex-only break point detection.
532
+ * This is the sync, backward-compatible API used by tests and legacy callers.
533
+ */
508
534
  export declare function chunkDocument(content: string, maxChars?: number, overlapChars?: number, windowChars?: number): {
509
535
  text: string;
510
536
  pos: number;
511
537
  }[];
538
+ /**
539
+ * Async AST-aware chunking. Detects language from filepath, computes AST
540
+ * break points for supported code files, merges with regex break points,
541
+ * and delegates to the shared chunk algorithm.
542
+ *
543
+ * Falls back to regex-only when strategy is "regex", filepath is absent,
544
+ * or language is unsupported.
545
+ */
546
+ export declare function chunkDocumentAsync(content: string, maxChars?: number, overlapChars?: number, windowChars?: number, filepath?: string, chunkStrategy?: ChunkStrategy): Promise<{
547
+ text: string;
548
+ pos: number;
549
+ }[]>;
512
550
  /**
513
551
  * Chunk a document by actual token count using the LLM tokenizer.
514
552
  * More accurate than character-based chunking but requires async.
553
+ *
554
+ * When filepath and chunkStrategy are provided, uses AST-aware break points
555
+ * for supported code files.
515
556
  */
516
- export declare function chunkDocumentByTokens(content: string, maxTokens?: number, overlapTokens?: number, windowTokens?: number): Promise<{
557
+ export declare function chunkDocumentByTokens(content: string, maxTokens?: number, overlapTokens?: number, windowTokens?: number, filepath?: string, chunkStrategy?: ChunkStrategy, signal?: AbortSignal): Promise<{
517
558
  text: string;
518
559
  pos: number;
519
560
  tokens: number;
@@ -640,6 +681,7 @@ export declare function getCollectionsWithoutContext(db: Database): {
640
681
  * Useful for suggesting where context might be needed.
641
682
  */
642
683
  export declare function getTopLevelPathsWithoutContext(db: Database, collectionName: string): string[];
684
+ export declare function sanitizeFTS5Term(term: string): string;
643
685
  /**
644
686
  * Validate that a vec/hyde query doesn't use lex-only syntax.
645
687
  * Returns error message if invalid, null if valid.
@@ -665,6 +707,12 @@ export declare function clearAllEmbeddings(db: Database): void;
665
707
  /**
666
708
  * Insert a single embedding into both content_vectors and vectors_vec tables.
667
709
  * The hash_seq key is formatted as "hash_seq" for the vectors_vec table.
710
+ *
711
+ * content_vectors is inserted first so that getHashesForEmbedding (which checks
712
+ * only content_vectors) won't re-select the hash on a crash between the two inserts.
713
+ *
714
+ * vectors_vec uses DELETE + INSERT instead of INSERT OR REPLACE because sqlite-vec's
715
+ * vec0 virtual tables silently ignore the OR REPLACE conflict clause.
668
716
  */
669
717
  export declare function insertEmbedding(db: Database, hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string): void;
670
718
  export declare function expandQuery(query: string, model: string | undefined, db: Database, intent?: string, llmOverride?: LlamaCpp): Promise<ExpandedQuery[]>;
@@ -763,6 +811,7 @@ export interface HybridQueryOptions {
763
811
  explain?: boolean;
764
812
  intent?: string;
765
813
  skipRerank?: boolean;
814
+ chunkStrategy?: ChunkStrategy;
766
815
  hooks?: SearchHooks;
767
816
  }
768
817
  export interface HybridQueryResult {
@@ -836,6 +885,7 @@ export interface StructuredSearchOptions {
836
885
  intent?: string;
837
886
  /** Skip LLM reranking, use only RRF scores */
838
887
  skipRerank?: boolean;
888
+ chunkStrategy?: ChunkStrategy;
839
889
  hooks?: SearchHooks;
840
890
  }
841
891
  /**