@tobilu/qmd 1.0.7 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/mcp.js CHANGED
@@ -12,9 +12,10 @@ import { fileURLToPath } from "url";
12
12
  import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
13
13
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
14
14
  import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
15
+ import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
15
16
  import { z } from "zod";
16
- import { createStore, extractSnippet, addLineNumbers, hybridQuery, vectorSearchQuery, DEFAULT_MULTI_GET_MAX_BYTES, } from "./store.js";
17
- import { getCollection, getGlobalContext } from "./collections.js";
17
+ import { createStore, extractSnippet, addLineNumbers, structuredSearch, DEFAULT_MULTI_GET_MAX_BYTES, } from "./store.js";
18
+ import { getCollection, getGlobalContext, getDefaultCollectionNames } from "./collections.js";
18
19
  import { disposeDefaultLlamaCpp } from "./llm.js";
19
20
  // =============================================================================
20
21
  // Helper functions
@@ -70,19 +71,23 @@ function buildInstructions(store) {
70
71
  // --- Capability gaps ---
71
72
  if (!status.hasVectorIndex) {
72
73
  lines.push("");
73
- lines.push("Note: No vector embeddings. Only `search` (BM25) is available.");
74
+ lines.push("Note: No vector embeddings yet. Run `qmd embed` to enable semantic search (vec/hyde).");
74
75
  }
75
76
  else if (status.needsEmbedding > 0) {
76
77
  lines.push("");
77
78
  lines.push(`Note: ${status.needsEmbedding} documents need embedding. Run \`qmd embed\` to update.`);
78
79
  }
79
- // --- When to use which tool (escalation ladder) ---
80
- // Tool schemas describe parameters; instructions describe strategy.
80
+ // --- Search tool ---
81
81
  lines.push("");
82
- lines.push("Search:");
83
- lines.push(" - `search` (~30ms) — keyword and exact phrase matching.");
84
- lines.push(" - `vector_search` (~2s) — meaning-based, finds adjacent concepts even when vocabulary differs.");
85
- lines.push(" - `deep_search` (~10s) auto-expands the query into variations, searches each by keyword and meaning, reranks for top hits.");
82
+ lines.push("Search: Use `query` with sub-queries (lex/vec/hyde):");
83
+ lines.push(" - type:'lex'BM25 keyword search (exact terms, fast)");
84
+ lines.push(" - type:'vec'semantic vector search (meaning-based)");
85
+ lines.push(" - type:'hyde'hypothetical document (write what the answer looks like)");
86
+ lines.push("");
87
+ lines.push("Examples:");
88
+ lines.push(" Quick keyword lookup: [{type:'lex', query:'error handling'}]");
89
+ lines.push(" Semantic search: [{type:'vec', query:'how to handle errors gracefully'}]");
90
+ lines.push(" Best results: [{type:'lex', query:'error'}, {type:'vec', query:'error handling best practices'}]");
86
91
  // --- Retrieval workflow ---
87
92
  lines.push("");
88
93
  lines.push("Retrieval:");
@@ -157,96 +162,101 @@ function createMcpServer(store) {
157
162
  };
158
163
  });
159
164
  // ---------------------------------------------------------------------------
160
- // Tool: qmd_search (keyword)
165
+ // Tool: query (Primary search tool)
161
166
  // ---------------------------------------------------------------------------
162
- server.registerTool("search", {
163
- title: "Keyword Search",
164
- description: "Search by keyword. Finds documents containing exact words and phrases in the query.",
165
- annotations: { readOnlyHint: true, openWorldHint: false },
166
- inputSchema: {
167
- query: z.string().describe("Search query - keywords or phrases to find"),
168
- limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
169
- minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
170
- collection: z.string().optional().describe("Filter to a specific collection by name"),
171
- },
172
- }, async ({ query, limit, minScore, collection }) => {
173
- const results = store.searchFTS(query, limit || 10, collection);
174
- const filtered = results
175
- .filter(r => r.score >= (minScore || 0))
176
- .map(r => {
177
- const { line, snippet } = extractSnippet(r.body || "", query, 300, r.chunkPos);
178
- return {
179
- docid: `#${r.docid}`,
180
- file: r.displayPath,
181
- title: r.title,
182
- score: Math.round(r.score * 100) / 100,
183
- context: store.getContextForFile(r.filepath),
184
- snippet: addLineNumbers(snippet, line), // Default to line numbers
185
- };
186
- });
187
- return {
188
- content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
189
- structuredContent: { results: filtered },
190
- };
167
+ const subSearchSchema = z.object({
168
+ type: z.enum(['lex', 'vec', 'hyde']).describe("lex = BM25 keywords (supports \"phrase\" and -negation); " +
169
+ "vec = semantic question; hyde = hypothetical answer passage"),
170
+ query: z.string().describe("The query text. For lex: use keywords, \"quoted phrases\", and -negation. " +
171
+ "For vec: natural language question. For hyde: 50-100 word answer passage."),
191
172
  });
192
- // ---------------------------------------------------------------------------
193
- // Tool: qmd_vector_search (Vector semantic search)
194
- // ---------------------------------------------------------------------------
195
- server.registerTool("vector_search", {
196
- title: "Vector Search",
197
- description: "Search by meaning. Finds relevant documents even when they use different words than the query — handles synonyms, paraphrases, and related concepts.",
173
+ server.registerTool("query", {
174
+ title: "Query",
175
+ description: `Search the knowledge base using a query document — one or more typed sub-queries combined for best recall.
176
+
177
+ ## Query Types
178
+
179
+ **lex** — BM25 keyword search. Fast, exact, no LLM needed.
180
+ Full lex syntax:
181
+ - \`term\` — prefix match ("perf" matches "performance")
182
+ - \`"exact phrase"\` — phrase must appear verbatim
183
+ - \`-term\` or \`-"phrase"\` — exclude documents containing this
184
+
185
+ Good lex examples:
186
+ - \`"connection pool" timeout -redis\`
187
+ - \`"machine learning" -sports -athlete\`
188
+ - \`handleError async typescript\`
189
+
190
+ **vec** — Semantic vector search. Write a natural language question. Finds documents by meaning, not exact words.
191
+ - \`how does the rate limiter handle burst traffic?\`
192
+ - \`what is the tradeoff between consistency and availability?\`
193
+
194
+ **hyde** — Hypothetical document. Write 50-100 words that look like the answer. Often the most powerful for nuanced topics.
195
+ - \`The rate limiter uses a token bucket algorithm. When a client exceeds 100 req/min, subsequent requests return 429 until the window resets.\`
196
+
197
+ ## Strategy
198
+
199
+ Combine types for best results. First sub-query gets 2× weight — put your strongest signal first.
200
+
201
+ | Goal | Approach |
202
+ |------|----------|
203
+ | Know exact term/name | \`lex\` only |
204
+ | Concept search | \`vec\` only |
205
+ | Best recall | \`lex\` + \`vec\` |
206
+ | Complex/nuanced | \`lex\` + \`vec\` + \`hyde\` |
207
+ | Unknown vocabulary | Use a standalone natural-language query (no typed lines) so the server can auto-expand it |
208
+
209
+ ## Examples
210
+
211
+ Simple lookup:
212
+ \`\`\`json
213
+ [{ "type": "lex", "query": "CAP theorem" }]
214
+ \`\`\`
215
+
216
+ Best recall on a technical topic:
217
+ \`\`\`json
218
+ [
219
+ { "type": "lex", "query": "\\"connection pool\\" timeout -redis" },
220
+ { "type": "vec", "query": "why do database connections time out under load" },
221
+ { "type": "hyde", "query": "Connection pool exhaustion occurs when all connections are in use and new requests must wait. This typically happens under high concurrency when queries run longer than expected." }
222
+ ]
223
+ \`\`\`
224
+
225
+ Intent-aware lex (C++ performance, not sports):
226
+ \`\`\`json
227
+ [
228
+ { "type": "lex", "query": "\\"C++ performance\\" optimization -sports -athlete" },
229
+ { "type": "vec", "query": "how to optimize C++ program performance" }
230
+ ]
231
+ \`\`\``,
198
232
  annotations: { readOnlyHint: true, openWorldHint: false },
199
233
  inputSchema: {
200
- query: z.string().describe("Natural language query - describe what you're looking for"),
201
- limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
202
- minScore: z.number().optional().default(0.3).describe("Minimum relevance score 0-1 (default: 0.3)"),
203
- collection: z.string().optional().describe("Filter to a specific collection by name"),
234
+ searches: z.array(subSearchSchema).min(1).max(10).describe("Typed sub-queries to execute (lex/vec/hyde). First gets 2x weight."),
235
+ limit: z.number().optional().default(10).describe("Max results (default: 10)"),
236
+ minScore: z.number().optional().default(0).describe("Min relevance 0-1 (default: 0)"),
237
+ candidateLimit: z.number().optional().describe("Maximum candidates to rerank (default: 40, lower = faster but may miss results)"),
238
+ collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
204
239
  },
205
- }, async ({ query, limit, minScore, collection }) => {
206
- const results = await vectorSearchQuery(store, query, { collection, limit, minScore });
207
- if (results.length === 0) {
208
- // Distinguish "no embeddings" from "no matches" — check if vector table exists
209
- const tableExists = store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
210
- if (!tableExists) {
211
- return {
212
- content: [{ type: "text", text: "Vector index not found. Run 'qmd embed' first to create embeddings." }],
213
- isError: true,
214
- };
215
- }
216
- }
217
- const filtered = results.map(r => {
218
- const { line, snippet } = extractSnippet(r.body, query, 300);
219
- return {
220
- docid: `#${r.docid}`,
221
- file: r.displayPath,
222
- title: r.title,
223
- score: Math.round(r.score * 100) / 100,
224
- context: r.context,
225
- snippet: addLineNumbers(snippet, line),
226
- };
240
+ }, async ({ searches, limit, minScore, candidateLimit, collections }) => {
241
+ // Map to internal format
242
+ const subSearches = searches.map(s => ({
243
+ type: s.type,
244
+ query: s.query,
245
+ }));
246
+ // Use default collections if none specified
247
+ const effectiveCollections = collections ?? getDefaultCollectionNames();
248
+ const results = await structuredSearch(store, subSearches, {
249
+ collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
250
+ limit,
251
+ minScore,
252
+ candidateLimit,
227
253
  });
228
- return {
229
- content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
230
- structuredContent: { results: filtered },
231
- };
232
- });
233
- // ---------------------------------------------------------------------------
234
- // Tool: qmd_deep_search (Deep search with expansion + reranking)
235
- // ---------------------------------------------------------------------------
236
- server.registerTool("deep_search", {
237
- title: "Deep Search",
238
- description: "Deep search. Auto-expands the query into variations, searches each by keyword and meaning, and reranks for top hits across all results.",
239
- annotations: { readOnlyHint: true, openWorldHint: false },
240
- inputSchema: {
241
- query: z.string().describe("Natural language query - describe what you're looking for"),
242
- limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
243
- minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
244
- collection: z.string().optional().describe("Filter to a specific collection by name"),
245
- },
246
- }, async ({ query, limit, minScore, collection }) => {
247
- const results = await hybridQuery(store, query, { collection, limit, minScore });
254
+ // Use first lex or vec query for snippet extraction
255
+ const primaryQuery = searches.find(s => s.type === 'lex')?.query
256
+ || searches.find(s => s.type === 'vec')?.query
257
+ || searches[0]?.query || "";
248
258
  const filtered = results.map(r => {
249
- const { line, snippet } = extractSnippet(r.bestChunk, query, 300);
259
+ const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
250
260
  return {
251
261
  docid: `#${r.docid}`,
252
262
  file: r.displayPath,
@@ -257,7 +267,7 @@ function createMcpServer(store) {
257
267
  };
258
268
  });
259
269
  return {
260
- content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
270
+ content: [{ type: "text", text: formatSearchSummary(filtered, primaryQuery) }],
261
271
  structuredContent: { results: filtered },
262
272
  };
263
273
  });
@@ -418,12 +428,27 @@ export async function startMcpServer() {
418
428
  */
419
429
  export async function startMcpHttpServer(port, options) {
420
430
  const store = createStore();
421
- const mcpServer = createMcpServer(store);
422
- const transport = new WebStandardStreamableHTTPServerTransport({
423
- sessionIdGenerator: () => randomUUID(),
424
- enableJsonResponse: true,
425
- });
426
- await mcpServer.connect(transport);
431
+ // Session map: each client gets its own McpServer + Transport pair (MCP spec requirement).
432
+ // The store is shared — it's stateless SQLite, safe for concurrent access.
433
+ const sessions = new Map();
434
+ async function createSession() {
435
+ const transport = new WebStandardStreamableHTTPServerTransport({
436
+ sessionIdGenerator: () => randomUUID(),
437
+ enableJsonResponse: true,
438
+ onsessioninitialized: (sessionId) => {
439
+ sessions.set(sessionId, transport);
440
+ log(`${ts()} New session ${sessionId} (${sessions.size} active)`);
441
+ },
442
+ });
443
+ const server = createMcpServer(store);
444
+ await server.connect(transport);
445
+ transport.onclose = () => {
446
+ if (transport.sessionId) {
447
+ sessions.delete(transport.sessionId);
448
+ }
449
+ };
450
+ return transport;
451
+ }
427
452
  const startTime = Date.now();
428
453
  const quiet = options?.quiet ?? false;
429
454
  /** Format timestamp for request logging */
@@ -471,6 +496,50 @@ export async function startMcpHttpServer(port, options) {
471
496
  log(`${ts()} GET /health (${Date.now() - reqStart}ms)`);
472
497
  return;
473
498
  }
499
+ // REST endpoint: POST /search — structured search without MCP protocol
500
+ // REST endpoint: POST /query (alias: /search) — structured search without MCP protocol
501
+ if ((pathname === "/query" || pathname === "/search") && nodeReq.method === "POST") {
502
+ const rawBody = await collectBody(nodeReq);
503
+ const params = JSON.parse(rawBody);
504
+ // Validate required fields
505
+ if (!params.searches || !Array.isArray(params.searches)) {
506
+ nodeRes.writeHead(400, { "Content-Type": "application/json" });
507
+ nodeRes.end(JSON.stringify({ error: "Missing required field: searches (array)" }));
508
+ return;
509
+ }
510
+ // Map to internal format
511
+ const subSearches = params.searches.map((s) => ({
512
+ type: s.type,
513
+ query: String(s.query || ""),
514
+ }));
515
+ // Use default collections if none specified
516
+ const effectiveCollections = params.collections ?? getDefaultCollectionNames();
517
+ const results = await structuredSearch(store, subSearches, {
518
+ collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
519
+ limit: params.limit ?? 10,
520
+ minScore: params.minScore ?? 0,
521
+ candidateLimit: params.candidateLimit,
522
+ });
523
+ // Use first lex or vec query for snippet extraction
524
+ const primaryQuery = params.searches.find((s) => s.type === 'lex')?.query
525
+ || params.searches.find((s) => s.type === 'vec')?.query
526
+ || params.searches[0]?.query || "";
527
+ const formatted = results.map(r => {
528
+ const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
529
+ return {
530
+ docid: `#${r.docid}`,
531
+ file: r.displayPath,
532
+ title: r.title,
533
+ score: Math.round(r.score * 100) / 100,
534
+ context: r.context,
535
+ snippet: addLineNumbers(snippet, line),
536
+ };
537
+ });
538
+ nodeRes.writeHead(200, { "Content-Type": "application/json" });
539
+ nodeRes.end(JSON.stringify({ results: formatted }));
540
+ log(`${ts()} POST /query ${params.searches.length} queries (${Date.now() - reqStart}ms)`);
541
+ return;
542
+ }
474
543
  if (pathname === "/mcp" && nodeReq.method === "POST") {
475
544
  const rawBody = await collectBody(nodeReq);
476
545
  const body = JSON.parse(rawBody);
@@ -481,6 +550,34 @@ export async function startMcpHttpServer(port, options) {
481
550
  if (typeof v === "string")
482
551
  headers[k] = v;
483
552
  }
553
+ // Route to existing session or create new one on initialize
554
+ const sessionId = headers["mcp-session-id"];
555
+ let transport;
556
+ if (sessionId) {
557
+ const existing = sessions.get(sessionId);
558
+ if (!existing) {
559
+ nodeRes.writeHead(404, { "Content-Type": "application/json" });
560
+ nodeRes.end(JSON.stringify({
561
+ jsonrpc: "2.0",
562
+ error: { code: -32001, message: "Session not found" },
563
+ id: body?.id ?? null,
564
+ }));
565
+ return;
566
+ }
567
+ transport = existing;
568
+ }
569
+ else if (isInitializeRequest(body)) {
570
+ transport = await createSession();
571
+ }
572
+ else {
573
+ nodeRes.writeHead(400, { "Content-Type": "application/json" });
574
+ nodeRes.end(JSON.stringify({
575
+ jsonrpc: "2.0",
576
+ error: { code: -32000, message: "Bad Request: Missing session ID" },
577
+ id: body?.id ?? null,
578
+ }));
579
+ return;
580
+ }
484
581
  const request = new Request(url, { method: "POST", headers, body: rawBody });
485
582
  const response = await transport.handleRequest(request, { parsedBody: body });
486
583
  nodeRes.writeHead(response.status, Object.fromEntries(response.headers));
@@ -489,12 +586,33 @@ export async function startMcpHttpServer(port, options) {
489
586
  return;
490
587
  }
491
588
  if (pathname === "/mcp") {
492
- const url = `http://localhost:${port}${pathname}`;
493
589
  const headers = {};
494
590
  for (const [k, v] of Object.entries(nodeReq.headers)) {
495
591
  if (typeof v === "string")
496
592
  headers[k] = v;
497
593
  }
594
+ // GET/DELETE must have a valid session
595
+ const sessionId = headers["mcp-session-id"];
596
+ if (!sessionId) {
597
+ nodeRes.writeHead(400, { "Content-Type": "application/json" });
598
+ nodeRes.end(JSON.stringify({
599
+ jsonrpc: "2.0",
600
+ error: { code: -32000, message: "Bad Request: Missing session ID" },
601
+ id: null,
602
+ }));
603
+ return;
604
+ }
605
+ const transport = sessions.get(sessionId);
606
+ if (!transport) {
607
+ nodeRes.writeHead(404, { "Content-Type": "application/json" });
608
+ nodeRes.end(JSON.stringify({
609
+ jsonrpc: "2.0",
610
+ error: { code: -32001, message: "Session not found" },
611
+ id: null,
612
+ }));
613
+ return;
614
+ }
615
+ const url = `http://localhost:${port}${pathname}`;
498
616
  const rawBody = nodeReq.method !== "GET" && nodeReq.method !== "HEAD" ? await collectBody(nodeReq) : undefined;
499
617
  const request = new Request(url, { method: nodeReq.method || "GET", headers, ...(rawBody ? { body: rawBody } : {}) });
500
618
  const response = await transport.handleRequest(request);
@@ -521,7 +639,10 @@ export async function startMcpHttpServer(port, options) {
521
639
  if (stopping)
522
640
  return;
523
641
  stopping = true;
524
- await transport.close();
642
+ for (const transport of sessions.values()) {
643
+ await transport.close();
644
+ }
645
+ sessions.clear();
525
646
  httpServer.close();
526
647
  store.close();
527
648
  await disposeDefaultLlamaCpp();