npm - @tobilu/qmd - Versions diffs - 1.0.7 → 1.1.2 - Mend

@tobilu/qmd 1.0.7 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/mcp.js CHANGED Viewed

@@ -12,9 +12,10 @@ import { fileURLToPath } from "url";
 import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
+import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
 import { z } from "zod";
-import { createStore, extractSnippet, addLineNumbers, hybridQuery, vectorSearchQuery, DEFAULT_MULTI_GET_MAX_BYTES, } from "./store.js";
-import { getCollection, getGlobalContext } from "./collections.js";
+import { createStore, extractSnippet, addLineNumbers, structuredSearch, DEFAULT_MULTI_GET_MAX_BYTES, } from "./store.js";
+import { getCollection, getGlobalContext, getDefaultCollectionNames } from "./collections.js";
 import { disposeDefaultLlamaCpp } from "./llm.js";
 // =============================================================================
 // Helper functions
@@ -70,19 +71,23 @@ function buildInstructions(store) {
     // --- Capability gaps ---
     if (!status.hasVectorIndex) {
         lines.push("");
-        lines.push("Note: No vector embeddings. Only `search` (BM25) is available.");
+        lines.push("Note: No vector embeddings yet. Run `qmd embed` to enable semantic search (vec/hyde).");
     }
     else if (status.needsEmbedding > 0) {
         lines.push("");
         lines.push(`Note: ${status.needsEmbedding} documents need embedding. Run \`qmd embed\` to update.`);
     }
-    // --- When to use which tool (escalation ladder) ---
-    // Tool schemas describe parameters; instructions describe strategy.
+    // --- Search tool ---
     lines.push("");
-    lines.push("Search:");
-    lines.push("  - `search` (~30ms) — keyword and exact phrase matching.");
-    lines.push("  - `vector_search` (~2s) — meaning-based, finds adjacent concepts even when vocabulary differs.");
-    lines.push("  - `deep_search` (~10s) — auto-expands the query into variations, searches each by keyword and meaning, reranks for top hits.");
+    lines.push("Search: Use `query` with sub-queries (lex/vec/hyde):");
+    lines.push("  - type:'lex' — BM25 keyword search (exact terms, fast)");
+    lines.push("  - type:'vec' — semantic vector search (meaning-based)");
+    lines.push("  - type:'hyde' — hypothetical document (write what the answer looks like)");
+    lines.push("");
+    lines.push("Examples:");
+    lines.push("  Quick keyword lookup: [{type:'lex', query:'error handling'}]");
+    lines.push("  Semantic search: [{type:'vec', query:'how to handle errors gracefully'}]");
+    lines.push("  Best results: [{type:'lex', query:'error'}, {type:'vec', query:'error handling best practices'}]");
     // --- Retrieval workflow ---
     lines.push("");
     lines.push("Retrieval:");
@@ -157,96 +162,101 @@ function createMcpServer(store) {
         };
     });
     // ---------------------------------------------------------------------------
-    // Tool: qmd_search (keyword)
+    // Tool: query (Primary search tool)
     // ---------------------------------------------------------------------------
-    server.registerTool("search", {
-        title: "Keyword Search",
-        description: "Search by keyword. Finds documents containing exact words and phrases in the query.",
-        annotations: { readOnlyHint: true, openWorldHint: false },
-        inputSchema: {
-            query: z.string().describe("Search query - keywords or phrases to find"),
-            limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
-            minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
-            collection: z.string().optional().describe("Filter to a specific collection by name"),
-        },
-    }, async ({ query, limit, minScore, collection }) => {
-        const results = store.searchFTS(query, limit || 10, collection);
-        const filtered = results
-            .filter(r => r.score >= (minScore || 0))
-            .map(r => {
-            const { line, snippet } = extractSnippet(r.body || "", query, 300, r.chunkPos);
-            return {
-                docid: `#${r.docid}`,
-                file: r.displayPath,
-                title: r.title,
-                score: Math.round(r.score * 100) / 100,
-                context: store.getContextForFile(r.filepath),
-                snippet: addLineNumbers(snippet, line), // Default to line numbers
-            };
-        });
-        return {
-            content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
-            structuredContent: { results: filtered },
-        };
+    const subSearchSchema = z.object({
+        type: z.enum(['lex', 'vec', 'hyde']).describe("lex = BM25 keywords (supports \"phrase\" and -negation); " +
+            "vec = semantic question; hyde = hypothetical answer passage"),
+        query: z.string().describe("The query text. For lex: use keywords, \"quoted phrases\", and -negation. " +
+            "For vec: natural language question. For hyde: 50-100 word answer passage."),
     });
-    // ---------------------------------------------------------------------------
-    // Tool: qmd_vector_search (Vector semantic search)
-    // ---------------------------------------------------------------------------
-    server.registerTool("vector_search", {
-        title: "Vector Search",
-        description: "Search by meaning. Finds relevant documents even when they use different words than the query — handles synonyms, paraphrases, and related concepts.",
+    server.registerTool("query", {
+        title: "Query",
+        description: `Search the knowledge base using a query document — one or more typed sub-queries combined for best recall.
+## Query Types
+**lex** — BM25 keyword search. Fast, exact, no LLM needed.
+Full lex syntax:
+- \`term\` — prefix match ("perf" matches "performance")
+- \`"exact phrase"\` — phrase must appear verbatim
+- \`-term\` or \`-"phrase"\` — exclude documents containing this
+Good lex examples:
+- \`"connection pool" timeout -redis\`
+- \`"machine learning" -sports -athlete\`
+- \`handleError async typescript\`
+**vec** — Semantic vector search. Write a natural language question. Finds documents by meaning, not exact words.
+- \`how does the rate limiter handle burst traffic?\`
+- \`what is the tradeoff between consistency and availability?\`
+**hyde** — Hypothetical document. Write 50-100 words that look like the answer. Often the most powerful for nuanced topics.
+- \`The rate limiter uses a token bucket algorithm. When a client exceeds 100 req/min, subsequent requests return 429 until the window resets.\`
+## Strategy
+Combine types for best results. First sub-query gets 2× weight — put your strongest signal first.
+| Goal | Approach |
+|------|----------|
+| Know exact term/name | \`lex\` only |
+| Concept search | \`vec\` only |
+| Best recall | \`lex\` + \`vec\` |
+| Complex/nuanced | \`lex\` + \`vec\` + \`hyde\` |
+| Unknown vocabulary | Use a standalone natural-language query (no typed lines) so the server can auto-expand it |
+## Examples
+Simple lookup:
+\`\`\`json
+[{ "type": "lex", "query": "CAP theorem" }]
+\`\`\`
+Best recall on a technical topic:
+\`\`\`json
+[
+  { "type": "lex", "query": "\\"connection pool\\" timeout -redis" },
+  { "type": "vec", "query": "why do database connections time out under load" },
+  { "type": "hyde", "query": "Connection pool exhaustion occurs when all connections are in use and new requests must wait. This typically happens under high concurrency when queries run longer than expected." }
+]
+\`\`\`
+Intent-aware lex (C++ performance, not sports):
+\`\`\`json
+[
+  { "type": "lex", "query": "\\"C++ performance\\" optimization -sports -athlete" },
+  { "type": "vec", "query": "how to optimize C++ program performance" }
+]
+\`\`\``,
         annotations: { readOnlyHint: true, openWorldHint: false },
         inputSchema: {
-            query: z.string().describe("Natural language query - describe what you're looking for"),
-            limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
-            minScore: z.number().optional().default(0.3).describe("Minimum relevance score 0-1 (default: 0.3)"),
-            collection: z.string().optional().describe("Filter to a specific collection by name"),
+            searches: z.array(subSearchSchema).min(1).max(10).describe("Typed sub-queries to execute (lex/vec/hyde). First gets 2x weight."),
+            limit: z.number().optional().default(10).describe("Max results (default: 10)"),
+            minScore: z.number().optional().default(0).describe("Min relevance 0-1 (default: 0)"),
+            candidateLimit: z.number().optional().describe("Maximum candidates to rerank (default: 40, lower = faster but may miss results)"),
+            collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
         },
-    }, async ({ query, limit, minScore, collection }) => {
-        const results = await vectorSearchQuery(store, query, { collection, limit, minScore });
-        if (results.length === 0) {
-            // Distinguish "no embeddings" from "no matches" — check if vector table exists
-            const tableExists = store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
-            if (!tableExists) {
-                return {
-                    content: [{ type: "text", text: "Vector index not found. Run 'qmd embed' first to create embeddings." }],
-                    isError: true,
-                };
-            }
-        }
-        const filtered = results.map(r => {
-            const { line, snippet } = extractSnippet(r.body, query, 300);
-            return {
-                docid: `#${r.docid}`,
-                file: r.displayPath,
-                title: r.title,
-                score: Math.round(r.score * 100) / 100,
-                context: r.context,
-                snippet: addLineNumbers(snippet, line),
-            };
+    }, async ({ searches, limit, minScore, candidateLimit, collections }) => {
+        // Map to internal format
+        const subSearches = searches.map(s => ({
+            type: s.type,
+            query: s.query,
+        }));
+        // Use default collections if none specified
+        const effectiveCollections = collections ?? getDefaultCollectionNames();
+        const results = await structuredSearch(store, subSearches, {
+            collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
+            limit,
+            minScore,
+            candidateLimit,
         });
-        return {
-            content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
-            structuredContent: { results: filtered },
-        };
-    });
-    // ---------------------------------------------------------------------------
-    // Tool: qmd_deep_search (Deep search with expansion + reranking)
-    // ---------------------------------------------------------------------------
-    server.registerTool("deep_search", {
-        title: "Deep Search",
-        description: "Deep search. Auto-expands the query into variations, searches each by keyword and meaning, and reranks for top hits across all results.",
-        annotations: { readOnlyHint: true, openWorldHint: false },
-        inputSchema: {
-            query: z.string().describe("Natural language query - describe what you're looking for"),
-            limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
-            minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
-            collection: z.string().optional().describe("Filter to a specific collection by name"),
-        },
-    }, async ({ query, limit, minScore, collection }) => {
-        const results = await hybridQuery(store, query, { collection, limit, minScore });
+        // Use first lex or vec query for snippet extraction
+        const primaryQuery = searches.find(s => s.type === 'lex')?.query
+            || searches.find(s => s.type === 'vec')?.query
+            || searches[0]?.query || "";
         const filtered = results.map(r => {
-            const { line, snippet } = extractSnippet(r.bestChunk, query, 300);
+            const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
             return {
                 docid: `#${r.docid}`,
                 file: r.displayPath,
@@ -257,7 +267,7 @@ function createMcpServer(store) {
             };
         });
         return {
-            content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
+            content: [{ type: "text", text: formatSearchSummary(filtered, primaryQuery) }],
             structuredContent: { results: filtered },
         };
     });
@@ -418,12 +428,27 @@ export async function startMcpServer() {
  */
 export async function startMcpHttpServer(port, options) {
     const store = createStore();
-    const mcpServer = createMcpServer(store);
-    const transport = new WebStandardStreamableHTTPServerTransport({
-        sessionIdGenerator: () => randomUUID(),
-        enableJsonResponse: true,
-    });
-    await mcpServer.connect(transport);
+    // Session map: each client gets its own McpServer + Transport pair (MCP spec requirement).
+    // The store is shared — it's stateless SQLite, safe for concurrent access.
+    const sessions = new Map();
+    async function createSession() {
+        const transport = new WebStandardStreamableHTTPServerTransport({
+            sessionIdGenerator: () => randomUUID(),
+            enableJsonResponse: true,
+            onsessioninitialized: (sessionId) => {
+                sessions.set(sessionId, transport);
+                log(`${ts()} New session ${sessionId} (${sessions.size} active)`);
+            },
+        });
+        const server = createMcpServer(store);
+        await server.connect(transport);
+        transport.onclose = () => {
+            if (transport.sessionId) {
+                sessions.delete(transport.sessionId);
+            }
+        };
+        return transport;
+    }
     const startTime = Date.now();
     const quiet = options?.quiet ?? false;
     /** Format timestamp for request logging */
@@ -471,6 +496,50 @@ export async function startMcpHttpServer(port, options) {
                 log(`${ts()} GET /health (${Date.now() - reqStart}ms)`);
                 return;
             }
+            // REST endpoint: POST /search — structured search without MCP protocol
+            // REST endpoint: POST /query (alias: /search) — structured search without MCP protocol
+            if ((pathname === "/query" || pathname === "/search") && nodeReq.method === "POST") {
+                const rawBody = await collectBody(nodeReq);
+                const params = JSON.parse(rawBody);
+                // Validate required fields
+                if (!params.searches || !Array.isArray(params.searches)) {
+                    nodeRes.writeHead(400, { "Content-Type": "application/json" });
+                    nodeRes.end(JSON.stringify({ error: "Missing required field: searches (array)" }));
+                    return;
+                }
+                // Map to internal format
+                const subSearches = params.searches.map((s) => ({
+                    type: s.type,
+                    query: String(s.query || ""),
+                }));
+                // Use default collections if none specified
+                const effectiveCollections = params.collections ?? getDefaultCollectionNames();
+                const results = await structuredSearch(store, subSearches, {
+                    collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
+                    limit: params.limit ?? 10,
+                    minScore: params.minScore ?? 0,
+                    candidateLimit: params.candidateLimit,
+                });
+                // Use first lex or vec query for snippet extraction
+                const primaryQuery = params.searches.find((s) => s.type === 'lex')?.query
+                    || params.searches.find((s) => s.type === 'vec')?.query
+                    || params.searches[0]?.query || "";
+                const formatted = results.map(r => {
+                    const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
+                    return {
+                        docid: `#${r.docid}`,
+                        file: r.displayPath,
+                        title: r.title,
+                        score: Math.round(r.score * 100) / 100,
+                        context: r.context,
+                        snippet: addLineNumbers(snippet, line),
+                    };
+                });
+                nodeRes.writeHead(200, { "Content-Type": "application/json" });
+                nodeRes.end(JSON.stringify({ results: formatted }));
+                log(`${ts()} POST /query ${params.searches.length} queries (${Date.now() - reqStart}ms)`);
+                return;
+            }
             if (pathname === "/mcp" && nodeReq.method === "POST") {
                 const rawBody = await collectBody(nodeReq);
                 const body = JSON.parse(rawBody);
@@ -481,6 +550,34 @@ export async function startMcpHttpServer(port, options) {
                     if (typeof v === "string")
                         headers[k] = v;
                 }
+                // Route to existing session or create new one on initialize
+                const sessionId = headers["mcp-session-id"];
+                let transport;
+                if (sessionId) {
+                    const existing = sessions.get(sessionId);
+                    if (!existing) {
+                        nodeRes.writeHead(404, { "Content-Type": "application/json" });
+                        nodeRes.end(JSON.stringify({
+                            jsonrpc: "2.0",
+                            error: { code: -32001, message: "Session not found" },
+                            id: body?.id ?? null,
+                        }));
+                        return;
+                    }
+                    transport = existing;
+                }
+                else if (isInitializeRequest(body)) {
+                    transport = await createSession();
+                }
+                else {
+                    nodeRes.writeHead(400, { "Content-Type": "application/json" });
+                    nodeRes.end(JSON.stringify({
+                        jsonrpc: "2.0",
+                        error: { code: -32000, message: "Bad Request: Missing session ID" },
+                        id: body?.id ?? null,
+                    }));
+                    return;
+                }
                 const request = new Request(url, { method: "POST", headers, body: rawBody });
                 const response = await transport.handleRequest(request, { parsedBody: body });
                 nodeRes.writeHead(response.status, Object.fromEntries(response.headers));
@@ -489,12 +586,33 @@ export async function startMcpHttpServer(port, options) {
                 return;
             }
             if (pathname === "/mcp") {
-                const url = `http://localhost:${port}${pathname}`;
                 const headers = {};
                 for (const [k, v] of Object.entries(nodeReq.headers)) {
                     if (typeof v === "string")
                         headers[k] = v;
                 }
+                // GET/DELETE must have a valid session
+                const sessionId = headers["mcp-session-id"];
+                if (!sessionId) {
+                    nodeRes.writeHead(400, { "Content-Type": "application/json" });
+                    nodeRes.end(JSON.stringify({
+                        jsonrpc: "2.0",
+                        error: { code: -32000, message: "Bad Request: Missing session ID" },
+                        id: null,
+                    }));
+                    return;
+                }
+                const transport = sessions.get(sessionId);
+                if (!transport) {
+                    nodeRes.writeHead(404, { "Content-Type": "application/json" });
+                    nodeRes.end(JSON.stringify({
+                        jsonrpc: "2.0",
+                        error: { code: -32001, message: "Session not found" },
+                        id: null,
+                    }));
+                    return;
+                }
+                const url = `http://localhost:${port}${pathname}`;
                 const rawBody = nodeReq.method !== "GET" && nodeReq.method !== "HEAD" ? await collectBody(nodeReq) : undefined;
                 const request = new Request(url, { method: nodeReq.method || "GET", headers, ...(rawBody ? { body: rawBody } : {}) });
                 const response = await transport.handleRequest(request);
@@ -521,7 +639,10 @@ export async function startMcpHttpServer(port, options) {
         if (stopping)
             return;
         stopping = true;
-        await transport.close();
+        for (const transport of sessions.values()) {
+            await transport.close();
+        }
+        sessions.clear();
         httpServer.close();
         store.close();
         await disposeDefaultLlamaCpp();