npm - @tobilu/qmd - Versions diffs - 1.0.7 → 1.1.1 - Mend

@tobilu/qmd 1.0.7 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,45 @@
 ## [Unreleased]
+## [1.1.1] - 2026-03-06
+### Fixes
+- Reranker: truncate documents exceeding the 2048-token context window
+  instead of silently producing garbage scores. Long chunks (e.g. from
+  PDF ingestion) now get a fair ranking.
+- Nix: add python3 and cctools to build dependencies. #214 (thanks
+  @pcasaretto)
+## [1.1.0] - 2026-02-20
+QMD now speaks in **query documents** — structured multi-line queries where every line is typed (`lex:`, `vec:`, `hyde:`), combining keyword precision with semantic recall. A single plain query still works exactly as before (it's treated as an implicit `expand:` and auto-expanded by the LLM). Lex now supports quoted phrases and negation (`"C++ performance" -sports -athlete`), making intent-aware disambiguation practical. The formal query grammar is documented in `docs/SYNTAX.md`.
+The npm package now uses the standard `#!/usr/bin/env node` bin convention, replacing the custom bash wrapper. This fixes native module ABI mismatches when installed via bun and works on any platform with node >= 22 on PATH.
+### Changes
+- **Query document format**: multi-line queries with typed sub-queries (`lex:`, `vec:`, `hyde:`). Plain queries remain the default (`expand:` implicit, but not written inside the document). First sub-query gets 2× fusion weight — put your strongest signal first. Formal grammar in `docs/SYNTAX.md`.
+- **Lex syntax**: full BM25 operator support. `"exact phrase"` for verbatim matching; `-term` and `-"phrase"` for exclusions. Essential for disambiguation when a term is overloaded across domains (e.g. `performance -sports -athlete`).
+- **`expand:` shortcut**: send a single plain query (or start the document with `expand:` on its only line) to auto-expand via the local LLM. Query documents themselves are limited to `lex`, `vec`, and `hyde` lines.
+- **MCP `query` tool** (renamed from `structured_search`): rewrote the tool description to fully teach AI agents the query document format, lex syntax, and combination strategy. Includes worked examples with intent-aware lex.
+- **HTTP `/query` endpoint** (renamed from `/search`; `/search` kept as silent alias).
+- **`collections` array filter**: filter by multiple collections in a single query (`collections: ["notes", "brain"]`). Removed the single `collection` string param — array only.
+- **Collection `include`/`exclude`**: `includeByDefault: false` hides a collection from all queries unless explicitly named via `collections`. CLI: `qmd collection exclude <name>` / `qmd collection include <name>`.
+- **Collection `update-cmd`**: attach a shell command that runs before every `qmd update` (e.g. `git stash && git pull --rebase --ff-only && git stash pop`). CLI: `qmd collection update-cmd <name> '<cmd>'`.
+- **`qmd status` tips**: shows actionable tips when collections lack context descriptions or update commands.
+- **`qmd collection` subcommands**: `show`, `update-cmd`, `include`, `exclude`. Bare `qmd collection` now prints help.
+- **Packaging**: replaced custom bash wrapper with standard `#!/usr/bin/env node` shebang on `dist/qmd.js`. Fixes native module ABI mismatches when installed via bun, and works on any platform where node >= 22 is on PATH.
+- **Removed MCP tools** `search`, `vector_search`, `deep_search` — all superseded by `query`.
+- **Removed** `qmd context check` command.
+- **CLI timing**: each LLM step (expand, embed, rerank) prints elapsed time inline (`Expanding query... (4.2s)`).
+### Fixes
+- `qmd collection list` shows `[excluded]` tag for collections with `includeByDefault: false`.
+- Default searches now respect `includeByDefault` — excluded collections are skipped unless explicitly named.
+- Fix main module detection when installed globally via npm/bun (symlink resolution).
 ## [1.0.7] - 2026-02-18
 ### Changes
@@ -333,4 +372,3 @@ notes, journals, and meeting transcripts.
 [Unreleased]: https://github.com/tobi/qmd/compare/v1.0.0...HEAD
 [1.0.0]: https://github.com/tobi/qmd/releases/tag/v1.0.0
 [0.9.0]: https://github.com/tobi/qmd/compare/v0.8.0...v0.9.0

package/dist/collections.d.ts CHANGED Viewed

@@ -18,6 +18,7 @@ export interface Collection {
     pattern: string;
     context?: ContextMap;
     update?: string;
+    includeByDefault?: boolean;
 }
 /**
  * The complete configuration file structure
@@ -55,6 +56,21 @@ export declare function getCollection(name: string): NamedCollection | null;
  * List all collections
  */
 export declare function listCollections(): NamedCollection[];
+/**
+ * Get collections that are included by default in queries
+ */
+export declare function getDefaultCollections(): NamedCollection[];
+/**
+ * Get collection names that are included by default
+ */
+export declare function getDefaultCollectionNames(): string[];
+/**
+ * Update a collection's settings
+ */
+export declare function updateCollectionSettings(name: string, settings: {
+    update?: string | null;
+    includeByDefault?: boolean;
+}): boolean;
 /**
  * Add or update a collection
  */

package/dist/collections.js CHANGED Viewed

@@ -117,6 +117,46 @@ export function listCollections() {
         ...collection,
     }));
 }
+/**
+ * Get collections that are included by default in queries
+ */
+export function getDefaultCollections() {
+    return listCollections().filter(c => c.includeByDefault !== false);
+}
+/**
+ * Get collection names that are included by default
+ */
+export function getDefaultCollectionNames() {
+    return getDefaultCollections().map(c => c.name);
+}
+/**
+ * Update a collection's settings
+ */
+export function updateCollectionSettings(name, settings) {
+    const config = loadConfig();
+    const collection = config.collections[name];
+    if (!collection)
+        return false;
+    if (settings.update !== undefined) {
+        if (settings.update === null) {
+            delete collection.update;
+        }
+        else {
+            collection.update = settings.update;
+        }
+    }
+    if (settings.includeByDefault !== undefined) {
+        if (settings.includeByDefault === true) {
+            // true is default, remove the field
+            delete collection.includeByDefault;
+        }
+        else {
+            collection.includeByDefault = settings.includeByDefault;
+        }
+    }
+    saveConfig(config);
+    return true;
+}
 /**
  * Add or update a collection
  */

package/dist/llm.d.ts CHANGED Viewed

@@ -318,6 +318,7 @@ export declare class LlamaCpp implements LLM {
         context?: string;
         includeLexical?: boolean;
     }): Promise<Queryable[]>;
+    private static readonly RERANK_TEMPLATE_OVERHEAD;
     rerank(query: string, documents: RerankDocument[], options?: RerankOptions): Promise<RerankResult>;
     /**
      * Get device/GPU info for status display.

package/dist/llm.js CHANGED Viewed

@@ -731,17 +731,31 @@ export class LlamaCpp {
             await genContext.dispose();
         }
     }
+    // Qwen3 reranker chat template overhead (system prompt, tags, separators)
+    static RERANK_TEMPLATE_OVERHEAD = 200;
     async rerank(query, documents, options = {}) {
         // Ping activity at start to keep models alive during this operation
         this.touchActivity();
         const contexts = await this.ensureRerankContexts();
+        const model = await this.ensureRerankModel();
+        // Truncate documents that would exceed the rerank context size.
+        // Budget = contextSize - template overhead - query tokens
+        const queryTokens = model.tokenize(query).length;
+        const maxDocTokens = LlamaCpp.RERANK_CONTEXT_SIZE - LlamaCpp.RERANK_TEMPLATE_OVERHEAD - queryTokens;
+        const truncatedDocs = documents.map((doc) => {
+            const tokens = model.tokenize(doc.text);
+            if (tokens.length <= maxDocTokens)
+                return doc;
+            const truncatedText = model.detokenize(tokens.slice(0, maxDocTokens));
+            return { ...doc, text: truncatedText };
+        });
         // Build a map from document text to original indices (for lookup after sorting)
         const textToDoc = new Map();
-        documents.forEach((doc, index) => {
+        truncatedDocs.forEach((doc, index) => {
             textToDoc.set(doc.text, { file: doc.file, index });
         });
         // Extract just the text for ranking
-        const texts = documents.map((doc) => doc.text);
+        const texts = truncatedDocs.map((doc) => doc.text);
         // Split documents across contexts for parallel evaluation.
         // Each context has its own sequence with a lock, so parallelism comes
         // from multiple contexts evaluating different chunks simultaneously.

package/dist/mcp.js CHANGED Viewed

@@ -13,8 +13,8 @@ import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mc
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
 import { z } from "zod";
-import { createStore, extractSnippet, addLineNumbers, hybridQuery, vectorSearchQuery, DEFAULT_MULTI_GET_MAX_BYTES, } from "./store.js";
-import { getCollection, getGlobalContext } from "./collections.js";
+import { createStore, extractSnippet, addLineNumbers, structuredSearch, DEFAULT_MULTI_GET_MAX_BYTES, } from "./store.js";
+import { getCollection, getGlobalContext, getDefaultCollectionNames } from "./collections.js";
 import { disposeDefaultLlamaCpp } from "./llm.js";
 // =============================================================================
 // Helper functions
@@ -70,19 +70,23 @@ function buildInstructions(store) {
     // --- Capability gaps ---
     if (!status.hasVectorIndex) {
         lines.push("");
-        lines.push("Note: No vector embeddings. Only `search` (BM25) is available.");
+        lines.push("Note: No vector embeddings yet. Run `qmd embed` to enable semantic search (vec/hyde).");
     }
     else if (status.needsEmbedding > 0) {
         lines.push("");
         lines.push(`Note: ${status.needsEmbedding} documents need embedding. Run \`qmd embed\` to update.`);
     }
-    // --- When to use which tool (escalation ladder) ---
-    // Tool schemas describe parameters; instructions describe strategy.
+    // --- Search tool ---
     lines.push("");
-    lines.push("Search:");
-    lines.push("  - `search` (~30ms) — keyword and exact phrase matching.");
-    lines.push("  - `vector_search` (~2s) — meaning-based, finds adjacent concepts even when vocabulary differs.");
-    lines.push("  - `deep_search` (~10s) — auto-expands the query into variations, searches each by keyword and meaning, reranks for top hits.");
+    lines.push("Search: Use `query` with sub-queries (lex/vec/hyde):");
+    lines.push("  - type:'lex' — BM25 keyword search (exact terms, fast)");
+    lines.push("  - type:'vec' — semantic vector search (meaning-based)");
+    lines.push("  - type:'hyde' — hypothetical document (write what the answer looks like)");
+    lines.push("");
+    lines.push("Examples:");
+    lines.push("  Quick keyword lookup: [{type:'lex', query:'error handling'}]");
+    lines.push("  Semantic search: [{type:'vec', query:'how to handle errors gracefully'}]");
+    lines.push("  Best results: [{type:'lex', query:'error'}, {type:'vec', query:'error handling best practices'}]");
     // --- Retrieval workflow ---
     lines.push("");
     lines.push("Retrieval:");
@@ -157,96 +161,99 @@ function createMcpServer(store) {
         };
     });
     // ---------------------------------------------------------------------------
-    // Tool: qmd_search (keyword)
+    // Tool: query (Primary search tool)
     // ---------------------------------------------------------------------------
-    server.registerTool("search", {
-        title: "Keyword Search",
-        description: "Search by keyword. Finds documents containing exact words and phrases in the query.",
-        annotations: { readOnlyHint: true, openWorldHint: false },
-        inputSchema: {
-            query: z.string().describe("Search query - keywords or phrases to find"),
-            limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
-            minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
-            collection: z.string().optional().describe("Filter to a specific collection by name"),
-        },
-    }, async ({ query, limit, minScore, collection }) => {
-        const results = store.searchFTS(query, limit || 10, collection);
-        const filtered = results
-            .filter(r => r.score >= (minScore || 0))
-            .map(r => {
-            const { line, snippet } = extractSnippet(r.body || "", query, 300, r.chunkPos);
-            return {
-                docid: `#${r.docid}`,
-                file: r.displayPath,
-                title: r.title,
-                score: Math.round(r.score * 100) / 100,
-                context: store.getContextForFile(r.filepath),
-                snippet: addLineNumbers(snippet, line), // Default to line numbers
-            };
-        });
-        return {
-            content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
-            structuredContent: { results: filtered },
-        };
+    const subSearchSchema = z.object({
+        type: z.enum(['lex', 'vec', 'hyde']).describe("lex = BM25 keywords (supports \"phrase\" and -negation); " +
+            "vec = semantic question; hyde = hypothetical answer passage"),
+        query: z.string().describe("The query text. For lex: use keywords, \"quoted phrases\", and -negation. " +
+            "For vec: natural language question. For hyde: 50-100 word answer passage."),
     });
-    // ---------------------------------------------------------------------------
-    // Tool: qmd_vector_search (Vector semantic search)
-    // ---------------------------------------------------------------------------
-    server.registerTool("vector_search", {
-        title: "Vector Search",
-        description: "Search by meaning. Finds relevant documents even when they use different words than the query — handles synonyms, paraphrases, and related concepts.",
+    server.registerTool("query", {
+        title: "Query",
+        description: `Search the knowledge base using a query document — one or more typed sub-queries combined for best recall.
+## Query Types
+**lex** — BM25 keyword search. Fast, exact, no LLM needed.
+Full lex syntax:
+- \`term\` — prefix match ("perf" matches "performance")
+- \`"exact phrase"\` — phrase must appear verbatim
+- \`-term\` or \`-"phrase"\` — exclude documents containing this
+Good lex examples:
+- \`"connection pool" timeout -redis\`
+- \`"machine learning" -sports -athlete\`
+- \`handleError async typescript\`
+**vec** — Semantic vector search. Write a natural language question. Finds documents by meaning, not exact words.
+- \`how does the rate limiter handle burst traffic?\`
+- \`what is the tradeoff between consistency and availability?\`
+**hyde** — Hypothetical document. Write 50-100 words that look like the answer. Often the most powerful for nuanced topics.
+- \`The rate limiter uses a token bucket algorithm. When a client exceeds 100 req/min, subsequent requests return 429 until the window resets.\`
+## Strategy
+Combine types for best results. First sub-query gets 2× weight — put your strongest signal first.
+| Goal | Approach |
+|------|----------|
+| Know exact term/name | \`lex\` only |
+| Concept search | \`vec\` only |
+| Best recall | \`lex\` + \`vec\` |
+| Complex/nuanced | \`lex\` + \`vec\` + \`hyde\` |
+| Unknown vocabulary | Use a standalone natural-language query (no typed lines) so the server can auto-expand it |
+## Examples
+Simple lookup:
+\`\`\`json
+[{ "type": "lex", "query": "CAP theorem" }]
+\`\`\`
+Best recall on a technical topic:
+\`\`\`json
+[
+  { "type": "lex", "query": "\\"connection pool\\" timeout -redis" },
+  { "type": "vec", "query": "why do database connections time out under load" },
+  { "type": "hyde", "query": "Connection pool exhaustion occurs when all connections are in use and new requests must wait. This typically happens under high concurrency when queries run longer than expected." }
+]
+\`\`\`
+Intent-aware lex (C++ performance, not sports):
+\`\`\`json
+[
+  { "type": "lex", "query": "\\"C++ performance\\" optimization -sports -athlete" },
+  { "type": "vec", "query": "how to optimize C++ program performance" }
+]
+\`\`\``,
         annotations: { readOnlyHint: true, openWorldHint: false },
         inputSchema: {
-            query: z.string().describe("Natural language query - describe what you're looking for"),
-            limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
-            minScore: z.number().optional().default(0.3).describe("Minimum relevance score 0-1 (default: 0.3)"),
-            collection: z.string().optional().describe("Filter to a specific collection by name"),
+            searches: z.array(subSearchSchema).min(1).max(10).describe("Typed sub-queries to execute (lex/vec/hyde). First gets 2x weight."),
+            limit: z.number().optional().default(10).describe("Max results (default: 10)"),
+            minScore: z.number().optional().default(0).describe("Min relevance 0-1 (default: 0)"),
+            collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
         },
-    }, async ({ query, limit, minScore, collection }) => {
-        const results = await vectorSearchQuery(store, query, { collection, limit, minScore });
-        if (results.length === 0) {
-            // Distinguish "no embeddings" from "no matches" — check if vector table exists
-            const tableExists = store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
-            if (!tableExists) {
-                return {
-                    content: [{ type: "text", text: "Vector index not found. Run 'qmd embed' first to create embeddings." }],
-                    isError: true,
-                };
-            }
-        }
-        const filtered = results.map(r => {
-            const { line, snippet } = extractSnippet(r.body, query, 300);
-            return {
-                docid: `#${r.docid}`,
-                file: r.displayPath,
-                title: r.title,
-                score: Math.round(r.score * 100) / 100,
-                context: r.context,
-                snippet: addLineNumbers(snippet, line),
-            };
+    }, async ({ searches, limit, minScore, collections }) => {
+        // Map to internal format
+        const subSearches = searches.map(s => ({
+            type: s.type,
+            query: s.query,
+        }));
+        // Use default collections if none specified
+        const effectiveCollections = collections ?? getDefaultCollectionNames();
+        const results = await structuredSearch(store, subSearches, {
+            collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
+            limit,
+            minScore,
         });
-        return {
-            content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
-            structuredContent: { results: filtered },
-        };
-    });
-    // ---------------------------------------------------------------------------
-    // Tool: qmd_deep_search (Deep search with expansion + reranking)
-    // ---------------------------------------------------------------------------
-    server.registerTool("deep_search", {
-        title: "Deep Search",
-        description: "Deep search. Auto-expands the query into variations, searches each by keyword and meaning, and reranks for top hits across all results.",
-        annotations: { readOnlyHint: true, openWorldHint: false },
-        inputSchema: {
-            query: z.string().describe("Natural language query - describe what you're looking for"),
-            limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
-            minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
-            collection: z.string().optional().describe("Filter to a specific collection by name"),
-        },
-    }, async ({ query, limit, minScore, collection }) => {
-        const results = await hybridQuery(store, query, { collection, limit, minScore });
+        // Use first lex or vec query for snippet extraction
+        const primaryQuery = searches.find(s => s.type === 'lex')?.query
+            || searches.find(s => s.type === 'vec')?.query
+            || searches[0]?.query || "";
         const filtered = results.map(r => {
-            const { line, snippet } = extractSnippet(r.bestChunk, query, 300);
+            const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
             return {
                 docid: `#${r.docid}`,
                 file: r.displayPath,
@@ -257,7 +264,7 @@ function createMcpServer(store) {
             };
         });
         return {
-            content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
+            content: [{ type: "text", text: formatSearchSummary(filtered, primaryQuery) }],
             structuredContent: { results: filtered },
         };
     });
@@ -471,6 +478,49 @@ export async function startMcpHttpServer(port, options) {
                 log(`${ts()} GET /health (${Date.now() - reqStart}ms)`);
                 return;
             }
+            // REST endpoint: POST /search — structured search without MCP protocol
+            // REST endpoint: POST /query (alias: /search) — structured search without MCP protocol
+            if ((pathname === "/query" || pathname === "/search") && nodeReq.method === "POST") {
+                const rawBody = await collectBody(nodeReq);
+                const params = JSON.parse(rawBody);
+                // Validate required fields
+                if (!params.searches || !Array.isArray(params.searches)) {
+                    nodeRes.writeHead(400, { "Content-Type": "application/json" });
+                    nodeRes.end(JSON.stringify({ error: "Missing required field: searches (array)" }));
+                    return;
+                }
+                // Map to internal format
+                const subSearches = params.searches.map((s) => ({
+                    type: s.type,
+                    query: String(s.query || ""),
+                }));
+                // Use default collections if none specified
+                const effectiveCollections = params.collections ?? getDefaultCollectionNames();
+                const results = await structuredSearch(store, subSearches, {
+                    collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
+                    limit: params.limit ?? 10,
+                    minScore: params.minScore ?? 0,
+                });
+                // Use first lex or vec query for snippet extraction
+                const primaryQuery = params.searches.find((s) => s.type === 'lex')?.query
+                    || params.searches.find((s) => s.type === 'vec')?.query
+                    || params.searches[0]?.query || "";
+                const formatted = results.map(r => {
+                    const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
+                    return {
+                        docid: `#${r.docid}`,
+                        file: r.displayPath,
+                        title: r.title,
+                        score: Math.round(r.score * 100) / 100,
+                        context: r.context,
+                        snippet: addLineNumbers(snippet, line),
+                    };
+                });
+                nodeRes.writeHead(200, { "Content-Type": "application/json" });
+                nodeRes.end(JSON.stringify({ results: formatted }));
+                log(`${ts()} POST /query ${params.searches.length} queries (${Date.now() - reqStart}ms)`);
+                return;
+            }
             if (pathname === "/mcp" && nodeReq.method === "POST") {
                 const rawBody = await collectBody(nodeReq);
                 const body = JSON.parse(rawBody);