@tobilu/qmd 1.0.7 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,45 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [1.1.1] - 2026-03-06
6
+
7
+ ### Fixes
8
+
9
+ - Reranker: truncate documents exceeding the 2048-token context window
10
+ instead of silently producing garbage scores. Long chunks (e.g. from
11
+ PDF ingestion) now get a fair ranking.
12
+ - Nix: add python3 and cctools to build dependencies. #214 (thanks
13
+ @pcasaretto)
14
+
15
+ ## [1.1.0] - 2026-02-20
16
+
17
+ QMD now speaks in **query documents** — structured multi-line queries where every line is typed (`lex:`, `vec:`, `hyde:`), combining keyword precision with semantic recall. A single plain query still works exactly as before (it's treated as an implicit `expand:` and auto-expanded by the LLM). Lex now supports quoted phrases and negation (`"C++ performance" -sports -athlete`), making intent-aware disambiguation practical. The formal query grammar is documented in `docs/SYNTAX.md`.
18
+
19
+ The npm package now uses the standard `#!/usr/bin/env node` bin convention, replacing the custom bash wrapper. This fixes native module ABI mismatches when installed via bun and works on any platform with node >= 22 on PATH.
20
+
21
+ ### Changes
22
+
23
+ - **Query document format**: multi-line queries with typed sub-queries (`lex:`, `vec:`, `hyde:`). Plain queries remain the default (`expand:` implicit, but not written inside the document). First sub-query gets 2× fusion weight — put your strongest signal first. Formal grammar in `docs/SYNTAX.md`.
24
+ - **Lex syntax**: full BM25 operator support. `"exact phrase"` for verbatim matching; `-term` and `-"phrase"` for exclusions. Essential for disambiguation when a term is overloaded across domains (e.g. `performance -sports -athlete`).
25
+ - **`expand:` shortcut**: send a single plain query (or start the document with `expand:` on its only line) to auto-expand via the local LLM. Query documents themselves are limited to `lex`, `vec`, and `hyde` lines.
26
+ - **MCP `query` tool** (renamed from `structured_search`): rewrote the tool description to fully teach AI agents the query document format, lex syntax, and combination strategy. Includes worked examples with intent-aware lex.
27
+ - **HTTP `/query` endpoint** (renamed from `/search`; `/search` kept as silent alias).
28
+ - **`collections` array filter**: filter by multiple collections in a single query (`collections: ["notes", "brain"]`). Removed the single `collection` string param — array only.
29
+ - **Collection `include`/`exclude`**: `includeByDefault: false` hides a collection from all queries unless explicitly named via `collections`. CLI: `qmd collection exclude <name>` / `qmd collection include <name>`.
30
+ - **Collection `update-cmd`**: attach a shell command that runs before every `qmd update` (e.g. `git stash && git pull --rebase --ff-only && git stash pop`). CLI: `qmd collection update-cmd <name> '<cmd>'`.
31
+ - **`qmd status` tips**: shows actionable tips when collections lack context descriptions or update commands.
32
+ - **`qmd collection` subcommands**: `show`, `update-cmd`, `include`, `exclude`. Bare `qmd collection` now prints help.
33
+ - **Packaging**: replaced custom bash wrapper with standard `#!/usr/bin/env node` shebang on `dist/qmd.js`. Fixes native module ABI mismatches when installed via bun, and works on any platform where node >= 22 is on PATH.
34
+ - **Removed MCP tools** `search`, `vector_search`, `deep_search` — all superseded by `query`.
35
+ - **Removed** `qmd context check` command.
36
+ - **CLI timing**: each LLM step (expand, embed, rerank) prints elapsed time inline (`Expanding query... (4.2s)`).
37
+
38
+ ### Fixes
39
+
40
+ - `qmd collection list` shows `[excluded]` tag for collections with `includeByDefault: false`.
41
+ - Default searches now respect `includeByDefault` — excluded collections are skipped unless explicitly named.
42
+ - Fix main module detection when installed globally via npm/bun (symlink resolution).
43
+
5
44
  ## [1.0.7] - 2026-02-18
6
45
 
7
46
  ### Changes
@@ -333,4 +372,3 @@ notes, journals, and meeting transcripts.
333
372
  [Unreleased]: https://github.com/tobi/qmd/compare/v1.0.0...HEAD
334
373
  [1.0.0]: https://github.com/tobi/qmd/releases/tag/v1.0.0
335
374
  [0.9.0]: https://github.com/tobi/qmd/compare/v0.8.0...v0.9.0
336
-
@@ -18,6 +18,7 @@ export interface Collection {
18
18
  pattern: string;
19
19
  context?: ContextMap;
20
20
  update?: string;
21
+ includeByDefault?: boolean;
21
22
  }
22
23
  /**
23
24
  * The complete configuration file structure
@@ -55,6 +56,21 @@ export declare function getCollection(name: string): NamedCollection | null;
55
56
  * List all collections
56
57
  */
57
58
  export declare function listCollections(): NamedCollection[];
59
+ /**
60
+ * Get collections that are included by default in queries
61
+ */
62
+ export declare function getDefaultCollections(): NamedCollection[];
63
+ /**
64
+ * Get collection names that are included by default
65
+ */
66
+ export declare function getDefaultCollectionNames(): string[];
67
+ /**
68
+ * Update a collection's settings
69
+ */
70
+ export declare function updateCollectionSettings(name: string, settings: {
71
+ update?: string | null;
72
+ includeByDefault?: boolean;
73
+ }): boolean;
58
74
  /**
59
75
  * Add or update a collection
60
76
  */
@@ -117,6 +117,46 @@ export function listCollections() {
117
117
  ...collection,
118
118
  }));
119
119
  }
120
+ /**
121
+ * Get collections that are included by default in queries
122
+ */
123
+ export function getDefaultCollections() {
124
+ return listCollections().filter(c => c.includeByDefault !== false);
125
+ }
126
+ /**
127
+ * Get collection names that are included by default
128
+ */
129
+ export function getDefaultCollectionNames() {
130
+ return getDefaultCollections().map(c => c.name);
131
+ }
132
+ /**
133
+ * Update a collection's settings
134
+ */
135
+ export function updateCollectionSettings(name, settings) {
136
+ const config = loadConfig();
137
+ const collection = config.collections[name];
138
+ if (!collection)
139
+ return false;
140
+ if (settings.update !== undefined) {
141
+ if (settings.update === null) {
142
+ delete collection.update;
143
+ }
144
+ else {
145
+ collection.update = settings.update;
146
+ }
147
+ }
148
+ if (settings.includeByDefault !== undefined) {
149
+ if (settings.includeByDefault === true) {
150
+ // true is default, remove the field
151
+ delete collection.includeByDefault;
152
+ }
153
+ else {
154
+ collection.includeByDefault = settings.includeByDefault;
155
+ }
156
+ }
157
+ saveConfig(config);
158
+ return true;
159
+ }
120
160
  /**
121
161
  * Add or update a collection
122
162
  */
package/dist/llm.d.ts CHANGED
@@ -318,6 +318,7 @@ export declare class LlamaCpp implements LLM {
318
318
  context?: string;
319
319
  includeLexical?: boolean;
320
320
  }): Promise<Queryable[]>;
321
+ private static readonly RERANK_TEMPLATE_OVERHEAD;
321
322
  rerank(query: string, documents: RerankDocument[], options?: RerankOptions): Promise<RerankResult>;
322
323
  /**
323
324
  * Get device/GPU info for status display.
package/dist/llm.js CHANGED
@@ -731,17 +731,31 @@ export class LlamaCpp {
731
731
  await genContext.dispose();
732
732
  }
733
733
  }
734
+ // Qwen3 reranker chat template overhead (system prompt, tags, separators)
735
+ static RERANK_TEMPLATE_OVERHEAD = 200;
734
736
  async rerank(query, documents, options = {}) {
735
737
  // Ping activity at start to keep models alive during this operation
736
738
  this.touchActivity();
737
739
  const contexts = await this.ensureRerankContexts();
740
+ const model = await this.ensureRerankModel();
741
+ // Truncate documents that would exceed the rerank context size.
742
+ // Budget = contextSize - template overhead - query tokens
743
+ const queryTokens = model.tokenize(query).length;
744
+ const maxDocTokens = LlamaCpp.RERANK_CONTEXT_SIZE - LlamaCpp.RERANK_TEMPLATE_OVERHEAD - queryTokens;
745
+ const truncatedDocs = documents.map((doc) => {
746
+ const tokens = model.tokenize(doc.text);
747
+ if (tokens.length <= maxDocTokens)
748
+ return doc;
749
+ const truncatedText = model.detokenize(tokens.slice(0, maxDocTokens));
750
+ return { ...doc, text: truncatedText };
751
+ });
738
752
  // Build a map from document text to original indices (for lookup after sorting)
739
753
  const textToDoc = new Map();
740
- documents.forEach((doc, index) => {
754
+ truncatedDocs.forEach((doc, index) => {
741
755
  textToDoc.set(doc.text, { file: doc.file, index });
742
756
  });
743
757
  // Extract just the text for ranking
744
- const texts = documents.map((doc) => doc.text);
758
+ const texts = truncatedDocs.map((doc) => doc.text);
745
759
  // Split documents across contexts for parallel evaluation.
746
760
  // Each context has its own sequence with a lock, so parallelism comes
747
761
  // from multiple contexts evaluating different chunks simultaneously.
package/dist/mcp.js CHANGED
@@ -13,8 +13,8 @@ import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mc
13
13
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
14
14
  import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
15
15
  import { z } from "zod";
16
- import { createStore, extractSnippet, addLineNumbers, hybridQuery, vectorSearchQuery, DEFAULT_MULTI_GET_MAX_BYTES, } from "./store.js";
17
- import { getCollection, getGlobalContext } from "./collections.js";
16
+ import { createStore, extractSnippet, addLineNumbers, structuredSearch, DEFAULT_MULTI_GET_MAX_BYTES, } from "./store.js";
17
+ import { getCollection, getGlobalContext, getDefaultCollectionNames } from "./collections.js";
18
18
  import { disposeDefaultLlamaCpp } from "./llm.js";
19
19
  // =============================================================================
20
20
  // Helper functions
@@ -70,19 +70,23 @@ function buildInstructions(store) {
70
70
  // --- Capability gaps ---
71
71
  if (!status.hasVectorIndex) {
72
72
  lines.push("");
73
- lines.push("Note: No vector embeddings. Only `search` (BM25) is available.");
73
+ lines.push("Note: No vector embeddings yet. Run `qmd embed` to enable semantic search (vec/hyde).");
74
74
  }
75
75
  else if (status.needsEmbedding > 0) {
76
76
  lines.push("");
77
77
  lines.push(`Note: ${status.needsEmbedding} documents need embedding. Run \`qmd embed\` to update.`);
78
78
  }
79
- // --- When to use which tool (escalation ladder) ---
80
- // Tool schemas describe parameters; instructions describe strategy.
79
+ // --- Search tool ---
81
80
  lines.push("");
82
- lines.push("Search:");
83
- lines.push(" - `search` (~30ms) — keyword and exact phrase matching.");
84
- lines.push(" - `vector_search` (~2s) — meaning-based, finds adjacent concepts even when vocabulary differs.");
85
- lines.push(" - `deep_search` (~10s) auto-expands the query into variations, searches each by keyword and meaning, reranks for top hits.");
81
+ lines.push("Search: Use `query` with sub-queries (lex/vec/hyde):");
82
+ lines.push(" - type:'lex'BM25 keyword search (exact terms, fast)");
83
+ lines.push(" - type:'vec'semantic vector search (meaning-based)");
84
+ lines.push(" - type:'hyde'hypothetical document (write what the answer looks like)");
85
+ lines.push("");
86
+ lines.push("Examples:");
87
+ lines.push(" Quick keyword lookup: [{type:'lex', query:'error handling'}]");
88
+ lines.push(" Semantic search: [{type:'vec', query:'how to handle errors gracefully'}]");
89
+ lines.push(" Best results: [{type:'lex', query:'error'}, {type:'vec', query:'error handling best practices'}]");
86
90
  // --- Retrieval workflow ---
87
91
  lines.push("");
88
92
  lines.push("Retrieval:");
@@ -157,96 +161,99 @@ function createMcpServer(store) {
157
161
  };
158
162
  });
159
163
  // ---------------------------------------------------------------------------
160
- // Tool: qmd_search (keyword)
164
+ // Tool: query (Primary search tool)
161
165
  // ---------------------------------------------------------------------------
162
- server.registerTool("search", {
163
- title: "Keyword Search",
164
- description: "Search by keyword. Finds documents containing exact words and phrases in the query.",
165
- annotations: { readOnlyHint: true, openWorldHint: false },
166
- inputSchema: {
167
- query: z.string().describe("Search query - keywords or phrases to find"),
168
- limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
169
- minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
170
- collection: z.string().optional().describe("Filter to a specific collection by name"),
171
- },
172
- }, async ({ query, limit, minScore, collection }) => {
173
- const results = store.searchFTS(query, limit || 10, collection);
174
- const filtered = results
175
- .filter(r => r.score >= (minScore || 0))
176
- .map(r => {
177
- const { line, snippet } = extractSnippet(r.body || "", query, 300, r.chunkPos);
178
- return {
179
- docid: `#${r.docid}`,
180
- file: r.displayPath,
181
- title: r.title,
182
- score: Math.round(r.score * 100) / 100,
183
- context: store.getContextForFile(r.filepath),
184
- snippet: addLineNumbers(snippet, line), // Default to line numbers
185
- };
186
- });
187
- return {
188
- content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
189
- structuredContent: { results: filtered },
190
- };
166
+ const subSearchSchema = z.object({
167
+ type: z.enum(['lex', 'vec', 'hyde']).describe("lex = BM25 keywords (supports \"phrase\" and -negation); " +
168
+ "vec = semantic question; hyde = hypothetical answer passage"),
169
+ query: z.string().describe("The query text. For lex: use keywords, \"quoted phrases\", and -negation. " +
170
+ "For vec: natural language question. For hyde: 50-100 word answer passage."),
191
171
  });
192
- // ---------------------------------------------------------------------------
193
- // Tool: qmd_vector_search (Vector semantic search)
194
- // ---------------------------------------------------------------------------
195
- server.registerTool("vector_search", {
196
- title: "Vector Search",
197
- description: "Search by meaning. Finds relevant documents even when they use different words than the query — handles synonyms, paraphrases, and related concepts.",
172
+ server.registerTool("query", {
173
+ title: "Query",
174
+ description: `Search the knowledge base using a query document — one or more typed sub-queries combined for best recall.
175
+
176
+ ## Query Types
177
+
178
+ **lex** — BM25 keyword search. Fast, exact, no LLM needed.
179
+ Full lex syntax:
180
+ - \`term\` — prefix match ("perf" matches "performance")
181
+ - \`"exact phrase"\` — phrase must appear verbatim
182
+ - \`-term\` or \`-"phrase"\` — exclude documents containing this
183
+
184
+ Good lex examples:
185
+ - \`"connection pool" timeout -redis\`
186
+ - \`"machine learning" -sports -athlete\`
187
+ - \`handleError async typescript\`
188
+
189
+ **vec** — Semantic vector search. Write a natural language question. Finds documents by meaning, not exact words.
190
+ - \`how does the rate limiter handle burst traffic?\`
191
+ - \`what is the tradeoff between consistency and availability?\`
192
+
193
+ **hyde** — Hypothetical document. Write 50-100 words that look like the answer. Often the most powerful for nuanced topics.
194
+ - \`The rate limiter uses a token bucket algorithm. When a client exceeds 100 req/min, subsequent requests return 429 until the window resets.\`
195
+
196
+ ## Strategy
197
+
198
+ Combine types for best results. First sub-query gets 2× weight — put your strongest signal first.
199
+
200
+ | Goal | Approach |
201
+ |------|----------|
202
+ | Know exact term/name | \`lex\` only |
203
+ | Concept search | \`vec\` only |
204
+ | Best recall | \`lex\` + \`vec\` |
205
+ | Complex/nuanced | \`lex\` + \`vec\` + \`hyde\` |
206
+ | Unknown vocabulary | Use a standalone natural-language query (no typed lines) so the server can auto-expand it |
207
+
208
+ ## Examples
209
+
210
+ Simple lookup:
211
+ \`\`\`json
212
+ [{ "type": "lex", "query": "CAP theorem" }]
213
+ \`\`\`
214
+
215
+ Best recall on a technical topic:
216
+ \`\`\`json
217
+ [
218
+ { "type": "lex", "query": "\\"connection pool\\" timeout -redis" },
219
+ { "type": "vec", "query": "why do database connections time out under load" },
220
+ { "type": "hyde", "query": "Connection pool exhaustion occurs when all connections are in use and new requests must wait. This typically happens under high concurrency when queries run longer than expected." }
221
+ ]
222
+ \`\`\`
223
+
224
+ Intent-aware lex (C++ performance, not sports):
225
+ \`\`\`json
226
+ [
227
+ { "type": "lex", "query": "\\"C++ performance\\" optimization -sports -athlete" },
228
+ { "type": "vec", "query": "how to optimize C++ program performance" }
229
+ ]
230
+ \`\`\``,
198
231
  annotations: { readOnlyHint: true, openWorldHint: false },
199
232
  inputSchema: {
200
- query: z.string().describe("Natural language query - describe what you're looking for"),
201
- limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
202
- minScore: z.number().optional().default(0.3).describe("Minimum relevance score 0-1 (default: 0.3)"),
203
- collection: z.string().optional().describe("Filter to a specific collection by name"),
233
+ searches: z.array(subSearchSchema).min(1).max(10).describe("Typed sub-queries to execute (lex/vec/hyde). First gets 2x weight."),
234
+ limit: z.number().optional().default(10).describe("Max results (default: 10)"),
235
+ minScore: z.number().optional().default(0).describe("Min relevance 0-1 (default: 0)"),
236
+ collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
204
237
  },
205
- }, async ({ query, limit, minScore, collection }) => {
206
- const results = await vectorSearchQuery(store, query, { collection, limit, minScore });
207
- if (results.length === 0) {
208
- // Distinguish "no embeddings" from "no matches" — check if vector table exists
209
- const tableExists = store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
210
- if (!tableExists) {
211
- return {
212
- content: [{ type: "text", text: "Vector index not found. Run 'qmd embed' first to create embeddings." }],
213
- isError: true,
214
- };
215
- }
216
- }
217
- const filtered = results.map(r => {
218
- const { line, snippet } = extractSnippet(r.body, query, 300);
219
- return {
220
- docid: `#${r.docid}`,
221
- file: r.displayPath,
222
- title: r.title,
223
- score: Math.round(r.score * 100) / 100,
224
- context: r.context,
225
- snippet: addLineNumbers(snippet, line),
226
- };
238
+ }, async ({ searches, limit, minScore, collections }) => {
239
+ // Map to internal format
240
+ const subSearches = searches.map(s => ({
241
+ type: s.type,
242
+ query: s.query,
243
+ }));
244
+ // Use default collections if none specified
245
+ const effectiveCollections = collections ?? getDefaultCollectionNames();
246
+ const results = await structuredSearch(store, subSearches, {
247
+ collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
248
+ limit,
249
+ minScore,
227
250
  });
228
- return {
229
- content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
230
- structuredContent: { results: filtered },
231
- };
232
- });
233
- // ---------------------------------------------------------------------------
234
- // Tool: qmd_deep_search (Deep search with expansion + reranking)
235
- // ---------------------------------------------------------------------------
236
- server.registerTool("deep_search", {
237
- title: "Deep Search",
238
- description: "Deep search. Auto-expands the query into variations, searches each by keyword and meaning, and reranks for top hits across all results.",
239
- annotations: { readOnlyHint: true, openWorldHint: false },
240
- inputSchema: {
241
- query: z.string().describe("Natural language query - describe what you're looking for"),
242
- limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
243
- minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
244
- collection: z.string().optional().describe("Filter to a specific collection by name"),
245
- },
246
- }, async ({ query, limit, minScore, collection }) => {
247
- const results = await hybridQuery(store, query, { collection, limit, minScore });
251
+ // Use first lex or vec query for snippet extraction
252
+ const primaryQuery = searches.find(s => s.type === 'lex')?.query
253
+ || searches.find(s => s.type === 'vec')?.query
254
+ || searches[0]?.query || "";
248
255
  const filtered = results.map(r => {
249
- const { line, snippet } = extractSnippet(r.bestChunk, query, 300);
256
+ const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
250
257
  return {
251
258
  docid: `#${r.docid}`,
252
259
  file: r.displayPath,
@@ -257,7 +264,7 @@ function createMcpServer(store) {
257
264
  };
258
265
  });
259
266
  return {
260
- content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
267
+ content: [{ type: "text", text: formatSearchSummary(filtered, primaryQuery) }],
261
268
  structuredContent: { results: filtered },
262
269
  };
263
270
  });
@@ -471,6 +478,49 @@ export async function startMcpHttpServer(port, options) {
471
478
  log(`${ts()} GET /health (${Date.now() - reqStart}ms)`);
472
479
  return;
473
480
  }
481
+ // REST endpoint: POST /search — structured search without MCP protocol
482
+ // REST endpoint: POST /query (alias: /search) — structured search without MCP protocol
483
+ if ((pathname === "/query" || pathname === "/search") && nodeReq.method === "POST") {
484
+ const rawBody = await collectBody(nodeReq);
485
+ const params = JSON.parse(rawBody);
486
+ // Validate required fields
487
+ if (!params.searches || !Array.isArray(params.searches)) {
488
+ nodeRes.writeHead(400, { "Content-Type": "application/json" });
489
+ nodeRes.end(JSON.stringify({ error: "Missing required field: searches (array)" }));
490
+ return;
491
+ }
492
+ // Map to internal format
493
+ const subSearches = params.searches.map((s) => ({
494
+ type: s.type,
495
+ query: String(s.query || ""),
496
+ }));
497
+ // Use default collections if none specified
498
+ const effectiveCollections = params.collections ?? getDefaultCollectionNames();
499
+ const results = await structuredSearch(store, subSearches, {
500
+ collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
501
+ limit: params.limit ?? 10,
502
+ minScore: params.minScore ?? 0,
503
+ });
504
+ // Use first lex or vec query for snippet extraction
505
+ const primaryQuery = params.searches.find((s) => s.type === 'lex')?.query
506
+ || params.searches.find((s) => s.type === 'vec')?.query
507
+ || params.searches[0]?.query || "";
508
+ const formatted = results.map(r => {
509
+ const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
510
+ return {
511
+ docid: `#${r.docid}`,
512
+ file: r.displayPath,
513
+ title: r.title,
514
+ score: Math.round(r.score * 100) / 100,
515
+ context: r.context,
516
+ snippet: addLineNumbers(snippet, line),
517
+ };
518
+ });
519
+ nodeRes.writeHead(200, { "Content-Type": "application/json" });
520
+ nodeRes.end(JSON.stringify({ results: formatted }));
521
+ log(`${ts()} POST /query ${params.searches.length} queries (${Date.now() - reqStart}ms)`);
522
+ return;
523
+ }
474
524
  if (pathname === "/mcp" && nodeReq.method === "POST") {
475
525
  const rawBody = await collectBody(nodeReq);
476
526
  const body = JSON.parse(rawBody);