@tobilu/qmd 1.1.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,9 +14,7 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
14
14
  import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
15
15
  import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
16
16
  import { z } from "zod";
17
- import { createStore, extractSnippet, addLineNumbers, structuredSearch, DEFAULT_MULTI_GET_MAX_BYTES, } from "./store.js";
18
- import { getCollection, getGlobalContext, getDefaultCollectionNames } from "./collections.js";
19
- import { disposeDefaultLlamaCpp } from "./llm.js";
17
+ import { createStore, extractSnippet, addLineNumbers, getDefaultDbPath, DEFAULT_MULTI_GET_MAX_BYTES, } from "../index.js";
20
18
  // =============================================================================
21
19
  // Helper functions
22
20
  // =============================================================================
@@ -49,11 +47,12 @@ function formatSearchSummary(results, query) {
49
47
  * Injected into the LLM's system prompt via MCP initialize response —
50
48
  * gives the LLM immediate context about what's searchable without a tool call.
51
49
  */
52
- function buildInstructions(store) {
53
- const status = store.getStatus();
50
+ async function buildInstructions(store) {
51
+ const status = await store.getStatus();
52
+ const contexts = await store.listContexts();
53
+ const globalCtx = await store.getGlobalContext();
54
54
  const lines = [];
55
55
  // --- What is this? ---
56
- const globalCtx = getGlobalContext();
57
56
  lines.push(`QMD is your local search engine over ${status.totalDocuments} markdown documents.`);
58
57
  if (globalCtx)
59
58
  lines.push(`Context: ${globalCtx}`);
@@ -62,9 +61,9 @@ function buildInstructions(store) {
62
61
  lines.push("");
63
62
  lines.push("Collections (scope with `collection` parameter):");
64
63
  for (const col of status.collections) {
65
- const collConfig = getCollection(col.name);
66
- const rootCtx = collConfig?.context?.[""] || collConfig?.context?.["/"];
67
- const desc = rootCtx ? ` — ${rootCtx}` : "";
64
+ // Find root context for this collection
65
+ const rootCtx = contexts.find(c => c.collection === col.name && (c.path === "" || c.path === "/"));
66
+ const desc = rootCtx ? ` — ${rootCtx.context}` : "";
68
67
  lines.push(` - "${col.name}" (${col.documents} docs)${desc}`);
69
68
  }
70
69
  }
@@ -108,8 +107,10 @@ function buildInstructions(store) {
108
107
  * Create an MCP server with all QMD tools, resources, and prompts registered.
109
108
  * Shared by both stdio and HTTP transports.
110
109
  */
111
- function createMcpServer(store) {
112
- const server = new McpServer({ name: "qmd", version: "0.9.9" }, { instructions: buildInstructions(store) });
110
+ async function createMcpServer(store) {
111
+ const server = new McpServer({ name: "qmd", version: "0.9.9" }, { instructions: await buildInstructions(store) });
112
+ // Pre-fetch default collection names for search tools
113
+ const defaultCollectionNames = await store.getDefaultCollectionNames();
113
114
  // ---------------------------------------------------------------------------
114
115
  // Resource: qmd://{path} - read-only access to documents by path
115
116
  // Note: No list() - documents are discovered via search tools
@@ -122,43 +123,20 @@ function createMcpServer(store) {
122
123
  // Decode URL-encoded path (MCP clients send encoded URIs)
123
124
  const pathStr = Array.isArray(path) ? path.join('/') : (path || '');
124
125
  const decodedPath = decodeURIComponent(pathStr);
125
- // Parse virtual path: collection/relative/path
126
- const parts = decodedPath.split('/');
127
- const collection = parts[0] || '';
128
- const relativePath = parts.slice(1).join('/');
129
- // Find document by collection and path, join with content table
130
- let doc = store.db.prepare(`
131
- SELECT d.collection, d.path, d.title, c.doc as body
132
- FROM documents d
133
- JOIN content c ON c.hash = d.hash
134
- WHERE d.collection = ? AND d.path = ? AND d.active = 1
135
- `).get(collection, relativePath);
136
- // Try suffix match if exact match fails
137
- if (!doc) {
138
- doc = store.db.prepare(`
139
- SELECT d.collection, d.path, d.title, c.doc as body
140
- FROM documents d
141
- JOIN content c ON c.hash = d.hash
142
- WHERE d.path LIKE ? AND d.active = 1
143
- LIMIT 1
144
- `).get(`%${relativePath}`);
145
- }
146
- if (!doc) {
126
+ // Use SDK to find document — findDocument handles collection/path resolution
127
+ const result = await store.get(decodedPath, { includeBody: true });
128
+ if ("error" in result) {
147
129
  return { contents: [{ uri: uri.href, text: `Document not found: ${decodedPath}` }] };
148
130
  }
149
- // Construct virtual path for context lookup
150
- const virtualPath = `qmd://${doc.collection}/${doc.path}`;
151
- const context = store.getContextForFile(virtualPath);
152
- let text = addLineNumbers(doc.body); // Default to line numbers
153
- if (context) {
154
- text = `<!-- Context: ${context} -->\n\n` + text;
131
+ let text = addLineNumbers(result.body || ""); // Default to line numbers
132
+ if (result.context) {
133
+ text = `<!-- Context: ${result.context} -->\n\n` + text;
155
134
  }
156
- const displayName = `${doc.collection}/${doc.path}`;
157
135
  return {
158
136
  contents: [{
159
137
  uri: uri.href,
160
- name: displayName,
161
- title: doc.title || doc.path,
138
+ name: result.displayPath,
139
+ title: result.title || result.displayPath,
162
140
  mimeType: "text/markdown",
163
141
  text,
164
142
  }],
@@ -243,17 +221,17 @@ Intent-aware lex (C++ performance, not sports):
243
221
  },
244
222
  }, async ({ searches, limit, minScore, candidateLimit, collections, intent }) => {
245
223
  // Map to internal format
246
- const subSearches = searches.map(s => ({
224
+ const queries = searches.map(s => ({
247
225
  type: s.type,
248
226
  query: s.query,
249
227
  }));
250
228
  // Use default collections if none specified
251
- const effectiveCollections = collections ?? getDefaultCollectionNames();
252
- const results = await structuredSearch(store, subSearches, {
229
+ const effectiveCollections = collections ?? defaultCollectionNames;
230
+ const results = await store.search({
231
+ queries,
253
232
  collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
254
233
  limit,
255
234
  minScore,
256
- candidateLimit,
257
235
  intent,
258
236
  });
259
237
  // Use first lex or vec query for snippet extraction
@@ -298,7 +276,7 @@ Intent-aware lex (C++ performance, not sports):
298
276
  parsedFromLine = parseInt(colonMatch[1], 10);
299
277
  lookup = lookup.slice(0, -colonMatch[0].length);
300
278
  }
301
- const result = store.findDocument(lookup, { includeBody: false });
279
+ const result = await store.get(lookup, { includeBody: false });
302
280
  if ("error" in result) {
303
281
  let msg = `Document not found: ${file}`;
304
282
  if (result.similarFiles.length > 0) {
@@ -309,7 +287,7 @@ Intent-aware lex (C++ performance, not sports):
309
287
  isError: true,
310
288
  };
311
289
  }
312
- const body = store.getDocumentBody(result, parsedFromLine, maxLines) ?? "";
290
+ const body = await store.getDocumentBody(result.filepath, { fromLine: parsedFromLine, maxLines }) ?? "";
313
291
  let text = body;
314
292
  if (lineNumbers) {
315
293
  const startLine = parsedFromLine || 1;
@@ -345,7 +323,7 @@ Intent-aware lex (C++ performance, not sports):
345
323
  lineNumbers: z.boolean().optional().default(false).describe("Add line numbers to output (format: 'N: content')"),
346
324
  },
347
325
  }, async ({ pattern, maxLines, maxBytes, lineNumbers }) => {
348
- const { docs, errors } = store.findDocuments(pattern, { includeBody: true, maxBytes: maxBytes || DEFAULT_MULTI_GET_MAX_BYTES });
326
+ const { docs, errors } = await store.multiGet(pattern, { includeBody: true, maxBytes: maxBytes || DEFAULT_MULTI_GET_MAX_BYTES });
349
327
  if (docs.length === 0 && errors.length === 0) {
350
328
  return {
351
329
  content: [{ type: "text", text: `No files matched pattern: ${pattern}` }],
@@ -400,7 +378,7 @@ Intent-aware lex (C++ performance, not sports):
400
378
  annotations: { readOnlyHint: true, openWorldHint: false },
401
379
  inputSchema: {},
402
380
  }, async () => {
403
- const status = store.getStatus();
381
+ const status = await store.getStatus();
404
382
  const summary = [
405
383
  `QMD Index Status:`,
406
384
  ` Total documents: ${status.totalDocuments}`,
@@ -422,8 +400,8 @@ Intent-aware lex (C++ performance, not sports):
422
400
  // Transport: stdio (default)
423
401
  // =============================================================================
424
402
  export async function startMcpServer() {
425
- const store = createStore();
426
- const server = createMcpServer(store);
403
+ const store = await createStore({ dbPath: getDefaultDbPath() });
404
+ const server = await createMcpServer(store);
427
405
  const transport = new StdioServerTransport();
428
406
  await server.connect(transport);
429
407
  }
@@ -432,7 +410,9 @@ export async function startMcpServer() {
432
410
  * Binds to localhost only. Returns a handle for shutdown and port discovery.
433
411
  */
434
412
  export async function startMcpHttpServer(port, options) {
435
- const store = createStore();
413
+ const store = await createStore({ dbPath: getDefaultDbPath() });
414
+ // Pre-fetch default collection names for REST endpoint
415
+ const defaultCollectionNames = await store.getDefaultCollectionNames();
436
416
  // Session map: each client gets its own McpServer + Transport pair (MCP spec requirement).
437
417
  // The store is shared — it's stateless SQLite, safe for concurrent access.
438
418
  const sessions = new Map();
@@ -445,7 +425,7 @@ export async function startMcpHttpServer(port, options) {
445
425
  log(`${ts()} New session ${sessionId} (${sessions.size} active)`);
446
426
  },
447
427
  });
448
- const server = createMcpServer(store);
428
+ const server = await createMcpServer(store);
449
429
  await server.connect(transport);
450
430
  transport.onclose = () => {
451
431
  if (transport.sessionId) {
@@ -513,17 +493,18 @@ export async function startMcpHttpServer(port, options) {
513
493
  return;
514
494
  }
515
495
  // Map to internal format
516
- const subSearches = params.searches.map((s) => ({
496
+ const queries = params.searches.map((s) => ({
517
497
  type: s.type,
518
498
  query: String(s.query || ""),
519
499
  }));
520
500
  // Use default collections if none specified
521
- const effectiveCollections = params.collections ?? getDefaultCollectionNames();
522
- const results = await structuredSearch(store, subSearches, {
501
+ const effectiveCollections = params.collections ?? defaultCollectionNames;
502
+ const results = await store.search({
503
+ queries,
523
504
  collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
524
505
  limit: params.limit ?? 10,
525
506
  minScore: params.minScore ?? 0,
526
- candidateLimit: params.candidateLimit,
507
+ intent: params.intent,
527
508
  });
528
509
  // Use first lex or vec query for snippet extraction
529
510
  const primaryQuery = params.searches.find((s) => s.type === 'lex')?.query
@@ -649,8 +630,7 @@ export async function startMcpHttpServer(port, options) {
649
630
  }
650
631
  sessions.clear();
651
632
  httpServer.close();
652
- store.close();
653
- await disposeDefaultLlamaCpp();
633
+ await store.close();
654
634
  };
655
635
  process.on("SIGTERM", async () => {
656
636
  console.error("Shutting down (SIGTERM)...");
@@ -666,6 +646,6 @@ export async function startMcpHttpServer(port, options) {
666
646
  return { httpServer, port: actualPort, stop };
667
647
  }
668
648
  // Run if this is the main module
669
- if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsWith("/mcp.ts") || process.argv[1]?.endsWith("/mcp.js")) {
649
+ if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsWith("/server.ts") || process.argv[1]?.endsWith("/server.js")) {
670
650
  startMcpServer().catch(console.error);
671
651
  }
package/dist/store.d.ts CHANGED
@@ -11,7 +11,8 @@
11
11
  * const store = createStore();
12
12
  */
13
13
  import type { Database } from "./db.js";
14
- import { formatQueryForEmbedding, formatDocForEmbedding, type ILLMSession } from "./llm.js";
14
+ import { LlamaCpp, formatQueryForEmbedding, formatDocForEmbedding, type ILLMSession } from "./llm.js";
15
+ import type { NamedCollection, Collection, CollectionConfig } from "./collections.js";
15
16
  export declare const DEFAULT_EMBED_MODEL = "embeddinggemma";
16
17
  export declare const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
17
18
  export declare const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
@@ -88,7 +89,9 @@ export declare const RERANK_CANDIDATE_LIMIT = 40;
88
89
  */
89
90
  export type ExpandedQuery = {
90
91
  type: 'lex' | 'vec' | 'hyde';
91
- text: string;
92
+ query: string;
93
+ /** Optional line number for error reporting (CLI parser) */
94
+ line?: number;
92
95
  };
93
96
  export declare function homedir(): string;
94
97
  /**
@@ -164,10 +167,33 @@ export declare function resolveVirtualPath(db: Database, virtualPath: string): s
164
167
  */
165
168
  export declare function toVirtualPath(db: Database, absolutePath: string): string | null;
166
169
  export declare function verifySqliteVecLoaded(db: Database): void;
170
+ export declare function getStoreCollections(db: Database): NamedCollection[];
171
+ export declare function getStoreCollection(db: Database, name: string): NamedCollection | null;
172
+ export declare function getStoreGlobalContext(db: Database): string | undefined;
173
+ export declare function getStoreContexts(db: Database): Array<{
174
+ collection: string;
175
+ path: string;
176
+ context: string;
177
+ }>;
178
+ export declare function upsertStoreCollection(db: Database, name: string, collection: Omit<Collection, 'pattern'> & {
179
+ pattern?: string;
180
+ }): void;
181
+ export declare function deleteStoreCollection(db: Database, name: string): boolean;
182
+ export declare function renameStoreCollection(db: Database, oldName: string, newName: string): boolean;
183
+ export declare function updateStoreContext(db: Database, collectionName: string, path: string, text: string): boolean;
184
+ export declare function removeStoreContext(db: Database, collectionName: string, path: string): boolean;
185
+ export declare function setStoreGlobalContext(db: Database, value: string | undefined): void;
186
+ /**
187
+ * Sync external config (YAML/inline) into SQLite store_collections.
188
+ * External config always wins. Skips sync if config hash hasn't changed.
189
+ */
190
+ export declare function syncConfigToDb(db: Database, config: CollectionConfig): void;
167
191
  export declare function isSqliteVecAvailable(): boolean;
168
192
  export type Store = {
169
193
  db: Database;
170
194
  dbPath: string;
195
+ /** Optional LlamaCpp instance for this store (overrides the global singleton) */
196
+ llm?: LlamaCpp;
171
197
  close: () => void;
172
198
  ensureVecTable: (dimensions: number) => void;
173
199
  getHashesNeedingEmbedding: () => number;
@@ -252,6 +278,49 @@ export type Store = {
252
278
  clearAllEmbeddings: () => void;
253
279
  insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string) => void;
254
280
  };
281
+ export type ReindexProgress = {
282
+ file: string;
283
+ current: number;
284
+ total: number;
285
+ };
286
+ export type ReindexResult = {
287
+ indexed: number;
288
+ updated: number;
289
+ unchanged: number;
290
+ removed: number;
291
+ orphanedCleaned: number;
292
+ };
293
+ /**
294
+ * Re-index a single collection by scanning the filesystem and updating the database.
295
+ * Pure function — no console output, no db lifecycle management.
296
+ */
297
+ export declare function reindexCollection(store: Store, collectionPath: string, globPattern: string, collectionName: string, options?: {
298
+ ignorePatterns?: string[];
299
+ onProgress?: (info: ReindexProgress) => void;
300
+ }): Promise<ReindexResult>;
301
+ export type EmbedProgress = {
302
+ chunksEmbedded: number;
303
+ totalChunks: number;
304
+ bytesProcessed: number;
305
+ totalBytes: number;
306
+ errors: number;
307
+ };
308
+ export type EmbedResult = {
309
+ docsProcessed: number;
310
+ chunksEmbedded: number;
311
+ errors: number;
312
+ durationMs: number;
313
+ };
314
+ /**
315
+ * Generate vector embeddings for documents that need them.
316
+ * Pure function — no console output, no db lifecycle management.
317
+ * Uses the store's LlamaCpp instance if set, otherwise the global singleton.
318
+ */
319
+ export declare function generateEmbeddings(store: Store, options?: {
320
+ force?: boolean;
321
+ model?: string;
322
+ onProgress?: (info: EmbedProgress) => void;
323
+ }): Promise<EmbedResult>;
255
324
  /**
256
325
  * Create a new store instance with the given database path.
257
326
  * If no path is provided, uses the default path (~/.cache/qmd/index.sqlite).
@@ -352,8 +421,8 @@ export type MultiGetResult = {
352
421
  };
353
422
  export type CollectionInfo = {
354
423
  name: string;
355
- path: string;
356
- pattern: string;
424
+ path: string | null;
425
+ pattern: string | null;
357
426
  documents: number;
358
427
  lastUpdated: string;
359
428
  };
@@ -491,12 +560,11 @@ export declare function matchFilesByGlob(db: Database, pattern: string): {
491
560
  export declare function getContextForPath(db: Database, collectionName: string, path: string): string | null;
492
561
  /**
493
562
  * Get context for a file path (virtual or filesystem).
494
- * Resolves the collection and relative path using the YAML collections config.
563
+ * Resolves the collection and relative path from the DB store_collections table.
495
564
  */
496
565
  export declare function getContextForFile(db: Database, filepath: string): string | null;
497
566
  /**
498
- * Get collection by name from YAML config.
499
- * Returns collection metadata from ~/.config/qmd/index.yml
567
+ * Get collection by name from DB store_collections table.
500
568
  */
501
569
  export declare function getCollectionByName(db: Database, name: string): {
502
570
  name: string;
@@ -505,7 +573,7 @@ export declare function getCollectionByName(db: Database, name: string): {
505
573
  } | null;
506
574
  /**
507
575
  * List all collections with document counts from database.
508
- * Merges YAML config with database statistics.
576
+ * Merges store_collections config with database statistics.
509
577
  */
510
578
  export declare function listCollections(db: Database): {
511
579
  name: string;
@@ -514,6 +582,7 @@ export declare function listCollections(db: Database): {
514
582
  doc_count: number;
515
583
  active_count: number;
516
584
  last_modified: string | null;
585
+ includeByDefault: boolean;
517
586
  }[];
518
587
  /**
519
588
  * Remove a collection and clean up its documents.
@@ -598,11 +667,11 @@ export declare function clearAllEmbeddings(db: Database): void;
598
667
  * The hash_seq key is formatted as "hash_seq" for the vectors_vec table.
599
668
  */
600
669
  export declare function insertEmbedding(db: Database, hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string): void;
601
- export declare function expandQuery(query: string, model: string | undefined, db: Database, intent?: string): Promise<ExpandedQuery[]>;
670
+ export declare function expandQuery(query: string, model: string | undefined, db: Database, intent?: string, llmOverride?: LlamaCpp): Promise<ExpandedQuery[]>;
602
671
  export declare function rerank(query: string, documents: {
603
672
  file: string;
604
673
  text: string;
605
- }[], model: string | undefined, db: Database, intent?: string): Promise<{
674
+ }[], model: string | undefined, db: Database, intent?: string, llmOverride?: LlamaCpp): Promise<{
606
675
  file: string;
607
676
  score: number;
608
677
  }[]>;
@@ -693,6 +762,7 @@ export interface HybridQueryOptions {
693
762
  candidateLimit?: number;
694
763
  explain?: boolean;
695
764
  intent?: string;
765
+ skipRerank?: boolean;
696
766
  hooks?: SearchHooks;
697
767
  }
698
768
  export interface HybridQueryResult {
@@ -756,14 +826,6 @@ export declare function vectorSearchQuery(store: Store, query: string, options?:
756
826
  * A single sub-search in a structured search request.
757
827
  * Matches the format used in QMD training data.
758
828
  */
759
- export interface StructuredSubSearch {
760
- /** Search type: 'lex' for BM25, 'vec' for semantic, 'hyde' for hypothetical */
761
- type: 'lex' | 'vec' | 'hyde';
762
- /** The search query text */
763
- query: string;
764
- /** Optional line number for error reporting (CLI parser) */
765
- line?: number;
766
- }
767
829
  export interface StructuredSearchOptions {
768
830
  collections?: string[];
769
831
  limit?: number;
@@ -772,6 +834,8 @@ export interface StructuredSearchOptions {
772
834
  explain?: boolean;
773
835
  /** Domain intent hint for disambiguation — steers reranking and chunk selection */
774
836
  intent?: string;
837
+ /** Skip LLM reranking, use only RRF scores */
838
+ skipRerank?: boolean;
775
839
  hooks?: SearchHooks;
776
840
  }
777
841
  /**
@@ -792,4 +856,4 @@ export interface StructuredSearchOptions {
792
856
  * better query variations than our small local model, especially for
793
857
  * domain-specific or nuanced queries.
794
858
  */
795
- export declare function structuredSearch(store: Store, searches: StructuredSubSearch[], options?: StructuredSearchOptions): Promise<HybridQueryResult[]>;
859
+ export declare function structuredSearch(store: Store, searches: ExpandedQuery[], options?: StructuredSearchOptions): Promise<HybridQueryResult[]>;