npm - @tobilu/qmd - Versions diffs - 2.1.0 → 2.5.1 - Mend

@tobilu/qmd 2.1.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/CHANGELOG.md +81 -0
package/README.md +3 -0
package/bin/qmd +39 -3
package/dist/ast.d.ts +1 -0
package/dist/ast.js +18 -8
package/dist/bench/bench.d.ts +2 -0
package/dist/bench/bench.js +108 -13
package/dist/bench/score.d.ts +11 -4
package/dist/bench/score.js +34 -13
package/dist/bench/types.d.ts +13 -0
package/dist/cli/qmd.d.ts +26 -0
package/dist/cli/qmd.js +1172 -121
package/dist/collections.d.ts +9 -0
package/dist/collections.js +32 -7
package/dist/db.d.ts +6 -3
package/dist/db.js +1 -1
package/dist/index.d.ts +4 -0
package/dist/index.js +5 -2
package/dist/llm.d.ts +65 -3
package/dist/llm.js +376 -63
package/dist/mcp/server.d.ts +6 -3
package/dist/mcp/server.js +41 -26
package/dist/paths.d.ts +1 -0
package/dist/paths.js +4 -0
package/dist/store.d.ts +92 -17
package/dist/store.js +676 -176
package/package.json +23 -12
package/scripts/build.mjs +29 -0
package/scripts/check-package-grammars.mjs +29 -0
package/scripts/package-smoke.mjs +65 -0
package/scripts/test-all.mjs +27 -0
package/skills/qmd/SKILL.md +203 -0
package/skills/qmd/references/mcp-setup.md +102 -0
package/skills/release/SKILL.md +139 -0
package/skills/release/scripts/install-hooks.sh +38 -0
package/dist/embedded-skills.d.ts +0 -6
package/dist/embedded-skills.js +0 -14

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,87 @@
 ## [Unreleased]
+## [2.5.1] - 2026-05-20
+### Changes
+- Release: publish from GitHub Actions via npm Trusted Publishing/OIDC instead of a long-lived `NPM_TOKEN` secret.
+## [2.5.0] - 2026-05-19
+### Changes
+- Dependencies: update core SQLite/config/chunking packages (`better-sqlite3`, `yaml`, `web-tree-sitter`, `tree-sitter-go`, and `tree-sitter-python`) while keeping incompatible `zod`, `tsx`, and `vitest` majors pinned.
+- Agent skills: add `qmd skills list|get|path` to serve version-matched runtime skill instructions from the installed CLI, and make `qmd skill install` write a stable discovery stub so installed agent skills do not go stale after QMD upgrades.
+- CLI: add `qmd doctor` for index/runtime diagnostics, including SQLite/sqlite-vec versions, embedding fingerprint freshness, mixed-fingerprint detection, safe legacy fingerprint adoption, and content-hash sampling.
+### Fixes
+- Launcher: prefer runnable TypeScript source in git checkouts even when ignored `dist/` artifacts exist, while packaged installs continue to run `dist/`.
+- GPU: keep node-llama-cpp's documented `gpu: "auto"` initialization as the primary path, then perform no-build packaged CUDA/Vulkan/Metal probes only if auto falls back to CPU.
+- CLI: move GPU/CPU runtime diagnostics out of `qmd status`; use `qmd doctor` for device probing and related environment guidance.
+- CLI: point unexpected command/setup failures toward `qmd doctor` so diagnostics are the default next step when QMD behaves incorrectly.
+- Doctor: explicitly warn when `content_vectors` contains multiple non-empty embedding fingerprint names, with the per-fingerprint document/chunk breakdown.
+- Embed: make the TTY progress line label byte-based input progress explicitly, show embedded chunks as a count, and shorten the displayed model name.
+- Embed: retain per-chunk failure details, retry failed chunks after later successful embeds and again when no other chunks remain, clear recovered errors, and cap retries to avoid endless loops.
+- Tests: expand the container smoke harness to cover npm-global, npx-style, and Bun-global install scenarios, always checking auto and `QMD_FORCE_CPU=1` doctor modes, with opt-in tiny `qmd embed` and GPU probe runs for supported container runtimes.
+- Embedding: fingerprint vector metadata using the active embedding model and formatting/chunking parameters so stale vectors are treated as pending after search semantics change. Legacy `content_vectors` columns are migrated lazily on first vector-health/write use to preserve fast QMD startup.
+- Skill: expand the packaged QMD skill with retrieval-first workflows, structured query examples, wiki/source collection guidance, and safe fallbacks when model-backed search is unavailable.
+- Tests: make `bun run test` execute the local unit suite under both Node/Vitest and Bun (`test:node` + `test:bun`) so runtime-specific regressions are caught before CI.
+- Model config: centralize embedding/rerank/generation model resolution so `qmd embed`, `status`, `query`, `vsearch`, `pull`, SDK vector search, and `bench` use the same active `.qmd/index.yaml` model hints and environment fallbacks.
+- GPU/status: `qmd status` now uses the same embedding model identity as `qmd embed` when computing pending embeddings, so URI-backed embeddings are not incorrectly reported as pending under the legacy `embeddinggemma` alias.
+- GPU status: `qmd status` now always shows GPU mode/configuration without unsafe native probing, and CPU-fallback warnings point to `QMD_STATUS_DEVICE_PROBE=1 qmd status` for an actual backend probe. The no-GPU warning is emitted once per process instead of once per LLM instance during benchmarks.
+- GPU: add `QMD_FORCE_CPU=1` / `--no-gpu` to bypass CUDA/Vulkan/Metal probing entirely, and route native llama.cpp stdout noise to stderr so JSON output stays parseable during search/query commands.
+- Snippet line numbers: `qmd_query` (MCP), HTTP `/query`, and `qmd query`
+  (CLI JSON output and snippet headers) now return absolute source-file
+  line numbers instead of chunk-local ones, so the `line` field can be
+  passed back to `qmd_get` as `fromLine` without a separate lookup.
+  Snippet selection remains scoped to the best matching chunk
+  (preserves #149).
+- CLI: `qmd query --full` now emits the full document body in all output
+  formats (json, csv, md, xml), restoring the documented behavior of the
+  flag. Previously it returned only the best matching chunk (~3.6KB max
+  per result). Output payload for `--full` queries is now proportional
+  to total document size.
+- macOS Metal: `qmd query --json` now flushes successful JSON output and uses a safe immediate-exit path on Darwin to avoid ggml Metal finalizer aborts; other commands still dispose LLM contexts/models before the llama runtime. #368
+- Embedding: require complete chunk coverage before treating a document as
+  embedded, remove partial vectors when chunk/session failures leave a
+  document incomplete, and keep `qmd status` pending counts honest after
+  interrupted long embed runs. #637 #378
+- Embedding: `qmd embed -c <collection>` now scopes pending-doc selection
+  to the requested collection instead of embedding global pending work.
+  Scoped `--force` clears only collection-owned vectors, preserves shared
+  hashes referenced by sibling collections, and drops `vectors_vec` only
+  when the scoped clear empties all vectors.
+- Hybrid search: weight RRF lists by query type so original FTS and original vector evidence get the intended 2x boost, instead of accidentally boosting the first lexical expansion. #591
+- MCP: seed llama.cpp/GGML quiet env vars before launching `qmd mcp` so native logs cannot pollute stdio JSON-RPC framing. #593
+- CLI: remove CommonJS `require()` calls from ESM index path normalization so `qmd --index <path>` no longer crashes with `ERR_AMBIGUOUS_MODULE_SYNTAX` on Node 22+. #634
+- Windows CUDA: serialize llama.cpp embedding/reranking contexts by default to avoid intermittent `ggml-cuda.cu:98` crashes in `qmd query`; set `QMD_EMBED_PARALLELISM` to opt back into parallel contexts if your driver is stable. #519
+- MCP: make `qmd mcp --index <name>` use the selected index for both foreground and daemon HTTP servers instead of falling back to the default store. #343
+- Embedding: respect `QMD_EMBED_MODEL` consistently for vector indexing and vector-backed search, with default-model fallback when unset.
+- Config: use one home-directory resolver for YAML config and the default SQLite cache path, avoiding Windows CLI/MCP split-brain when `HOME` is unset.
+- GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529
+- Fix: preserve original filename case in `handelize()`. The previous
+  `.toLowerCase()` call made indexed paths unreachable on case-sensitive
+  filesystems (Linux). `qmd update` automatically migrates legacy
+  lowercase paths without re-embedding.
+- CLI: make `qmd status` skip native `node-llama-cpp` device probing by
+  default so status stays safe on machines with broken or unsupported GPU
+  drivers. Set `QMD_STATUS_DEVICE_PROBE=1` to opt in.
+- CLI: lazy-load `node-llama-cpp` so lightweight commands such as
+  `qmd status` do not import native ML dependencies or trigger llama.cpp
+  builds on ARM/no-GPU machines. #491
+- Store: keep content rows referenced by inactive documents during orphan
+  cleanup so `qmd update` preserves soft-deleted tombstones for removed
+  files. #585
+- Packaging: install AST grammar WASM packages as required dependencies so
+  Bun global installs include TypeScript/TSX/JavaScript grammars, and add a
+  `smoke:package-grammars` verification command. #595
+- Launcher: add wrapper smoke coverage for scoped package, npm/npx,
+  Homebrew/Linuxbrew, Bun global symlink layouts, and `$BUN_INSTALL`
+  false-positive runtime selection regressions. #351 #353 #354 #356 #358 #359
 ## [2.1.0] - 2026-04-05
 Code files now chunk at function and class boundaries via tree-sitter,

package/README.md CHANGED Viewed

@@ -797,6 +797,9 @@ llm_cache       -- Cached LLM responses (query expansion, rerank scores)
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `XDG_CACHE_HOME` | `~/.cache` | Cache directory location |
+| `QMD_LLAMA_GPU` | `auto` | Force llama.cpp GPU backend (`metal`, `vulkan`, `cuda`) or disable GPU with `false` |
+| `QMD_FORCE_CPU` | unset | Set to `1`/`true` to force CPU mode before any CUDA/Vulkan/Metal probing. Equivalent CLI flag: `--no-gpu`. |
+| `QMD_EMBED_PARALLELISM` | automatic | Override embedding/reranking context parallelism (1-8). Windows CUDA defaults to `1` because parallel CUDA contexts can crash with `ggml-cuda.cu:98`; use Vulkan or raise this only if your driver is stable. |
 ## How It Works

package/bin/qmd CHANGED Viewed

@@ -15,6 +15,42 @@ done
 # to avoid native module ABI mismatches (e.g., better-sqlite3 compiled for bun vs node)
 DIR="$(cd -P "$(dirname "$SOURCE")/.." && pwd)"
+# MCP stdio reserves stdout exclusively for JSON-RPC frames. node-llama-cpp
+# / llama.cpp / ggml can write native logs directly to stdout before JS-level
+# log handlers are attached, so seed the native quiet env before Node/Bun imports
+# the CLI and its LLM modules. Preserve explicit user values when provided.
+if [ "$1" = "mcp" ]; then
+  export LLAMA_LOG_LEVEL="${LLAMA_LOG_LEVEL:-error}"
+  export GGML_LOG_LEVEL="${GGML_LOG_LEVEL:-error}"
+  export GGML_BACKEND_SILENT="${GGML_BACKEND_SILENT:-1}"
+fi
+JS="$DIR/dist/cli/qmd.js"
+TS="$DIR/src/cli/qmd.ts"
+# In published packages, bin/qmd must run dist/. In a git checkout, however,
+# dist/ is often ignored and can be stale after git reset or branch switches.
+# Prefer source mode only for checkouts so ./bin/qmd reflects the checked-out
+# source without changing packaged/runtime behavior.
+if [ -e "$DIR/.git" ] && [ -f "$TS" ]; then
+  if [ -f "$DIR/bun.lock" ] || [ -f "$DIR/bun.lockb" ]; then
+    if command -v bun >/dev/null 2>&1; then
+      exec bun "$TS" "$@"
+    fi
+  fi
+  if [ -f "$DIR/node_modules/tsx/dist/cli.mjs" ]; then
+    exec node "$DIR/node_modules/tsx/dist/cli.mjs" "$TS" "$@"
+  fi
+fi
+if [ ! -f "$JS" ]; then
+  echo "qmd is not built: missing $JS" >&2
+  echo "Run: bun install && bun run build" >&2
+  echo "Or:  npm install && npm run build" >&2
+  echo "After building, run: qmd doctor" >&2
+  exit 1
+fi
 # Detect the package manager that installed dependencies by checking lockfiles.
 # $BUN_INSTALL is intentionally NOT checked — it only indicates that bun exists
 # on the system, not that it was used to install this package (see #361).
@@ -24,9 +60,9 @@ DIR="$(cd -P "$(dirname "$SOURCE")/.." && pwd)"
 # builds that use npm would be incorrectly routed to bun, causing ABI
 # mismatches with better-sqlite3 / sqlite-vec (see #381).
 if [ -f "$DIR/package-lock.json" ]; then
-  exec node "$DIR/dist/cli/qmd.js" "$@"
+  exec node "$JS" "$@"
 elif [ -f "$DIR/bun.lock" ] || [ -f "$DIR/bun.lockb" ]; then
-  exec bun "$DIR/dist/cli/qmd.js" "$@"
+  exec bun "$JS" "$@"
 else
-  exec node "$DIR/dist/cli/qmd.js" "$@"
+  exec node "$JS" "$@"
 fi

package/dist/ast.d.ts CHANGED Viewed

@@ -24,6 +24,7 @@ export type SupportedLanguage = "typescript" | "tsx" | "javascript" | "python" |
  * Returns null for unsupported or unknown extensions (including .md).
  */
 export declare function detectLanguage(filepath: string): SupportedLanguage | null;
+export declare function formatGrammarLoadError(language: SupportedLanguage, err: unknown): string;
 /**
  * Parse a source file and return break points at AST node boundaries.
  *

package/dist/ast.js CHANGED Viewed

@@ -47,13 +47,19 @@ export function detectLanguage(filepath) {
  * Maps language to the npm package and wasm filename for the grammar.
  */
 const GRAMMAR_MAP = {
-    typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm" },
-    tsx: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm" },
-    javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm" },
-    python: { pkg: "tree-sitter-python", wasm: "tree-sitter-python.wasm" },
-    go: { pkg: "tree-sitter-go", wasm: "tree-sitter-go.wasm" },
-    rust: { pkg: "tree-sitter-rust", wasm: "tree-sitter-rust.wasm" },
+    typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
+    tsx: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm", version: "0.23.2" },
+    javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
+    python: { pkg: "tree-sitter-python", wasm: "tree-sitter-python.wasm", version: "0.23.4" },
+    go: { pkg: "tree-sitter-go", wasm: "tree-sitter-go.wasm", version: "0.23.4" },
+    rust: { pkg: "tree-sitter-rust", wasm: "tree-sitter-rust.wasm", version: "0.24.0" },
 };
+export function formatGrammarLoadError(language, err) {
+    const grammar = GRAMMAR_MAP[language];
+    const detail = err instanceof Error ? err.message : String(err);
+    return `${grammar.pkg}/${grammar.wasm} failed to load (${detail}); falling back to regex chunking. ` +
+        `Repair a broken global install with: bun add ${grammar.pkg}@${grammar.version}`;
+}
 // =============================================================================
 // Per-Language Query Definitions
 // =============================================================================
@@ -152,6 +158,8 @@ let QueryClass = null;
 let initPromise = null;
 /** Languages that have already failed to load — warn only once per process. */
 const failedLanguages = new Set();
+/** Last grammar load error by language, for status output. */
+const grammarLoadErrors = new Map();
 /** Cached grammar load promises. */
 const grammarCache = new Map();
 /** Cached compiled queries per language. */
@@ -200,7 +208,9 @@ async function loadGrammar(language) {
     catch (err) {
         failedLanguages.add(language);
         grammarCache.delete(wasmKey);
-        console.warn(`[qmd] Failed to load tree-sitter grammar for ${language}: ${err}`);
+        const message = formatGrammarLoadError(language, err);
+        grammarLoadErrors.set(language, message);
+        console.warn(`[qmd] AST grammar unavailable for ${language}: ${message}`);
         return null;
     }
 }
@@ -299,7 +309,7 @@ export async function getASTStatus() {
                 languages.push({ language: lang, available: true });
             }
             else {
-                languages.push({ language: lang, available: false, error: "grammar failed to load" });
+                languages.push({ language: lang, available: false, error: grammarLoadErrors.get(lang) ?? "grammar failed to load" });
             }
         }
         catch (err) {

package/dist/bench/bench.d.ts CHANGED Viewed

@@ -18,4 +18,6 @@ export declare function runBenchmark(fixturePath: string, options?: {
     json?: boolean;
     collection?: string;
     backends?: string[];
+    dbPath?: string;
+    configPath?: string;
 }): Promise<BenchmarkResult>;

package/dist/bench/bench.js CHANGED Viewed

@@ -17,32 +17,113 @@ import { readFileSync } from "node:fs";
 import { resolve } from "node:path";
 import { createStore, getDefaultDbPath, } from "../index.js";
 import { scoreResults } from "./score.js";
+function parseStructuredQuery(query) {
+    const lines = query.split("\n").map((line, idx) => ({
+        trimmed: line.trim(),
+        number: idx + 1,
+    })).filter(line => line.trimmed.length > 0);
+    if (lines.length === 0)
+        return undefined;
+    const prefixRe = /^(lex|vec|hyde):\s*/i;
+    const intentRe = /^intent:\s*/i;
+    const searches = [];
+    let intent;
+    for (const line of lines) {
+        if (intentRe.test(line.trimmed)) {
+            if (intent !== undefined) {
+                throw new Error(`Line ${line.number}: only one intent: line is allowed per benchmark query.`);
+            }
+            intent = line.trimmed.replace(intentRe, "").trim();
+            if (!intent) {
+                throw new Error(`Line ${line.number}: intent: must include text.`);
+            }
+            continue;
+        }
+        const match = line.trimmed.match(prefixRe);
+        if (match) {
+            const type = match[1].toLowerCase();
+            const text = line.trimmed.slice(match[0].length).trim();
+            if (!text) {
+                throw new Error(`Line ${line.number} (${type}:) must include text.`);
+            }
+            searches.push({ type, query: text, line: line.number });
+            continue;
+        }
+        if (lines.length === 1) {
+            return undefined;
+        }
+        throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde:/intent: prefix.`);
+    }
+    if (intent && searches.length === 0) {
+        throw new Error("intent: cannot appear alone. Add at least one lex:, vec:, or hyde: line.");
+    }
+    return searches.length > 0 ? { searches, intent } : undefined;
+}
+function uniqueFiles(files, limit) {
+    const seen = new Set();
+    const out = [];
+    for (const file of files) {
+        if (seen.has(file))
+            continue;
+        seen.add(file);
+        out.push(file);
+        if (out.length >= limit)
+            break;
+    }
+    return out;
+}
 const BACKENDS = [
     {
         name: "bm25",
         run: async (store, query, limit, collection) => {
-            const results = await store.searchLex(query, { limit, collection });
+            const structured = parseStructuredQuery(query.query);
+            const lexQueries = structured?.searches.filter(q => q.type === "lex");
+            if (structured) {
+                const files = [];
+                for (const lex of lexQueries ?? []) {
+                    const results = await store.searchLex(lex.query, { limit, collection });
+                    files.push(...results.map((r) => r.filepath));
+                }
+                return uniqueFiles(files, limit);
+            }
+            const results = await store.searchLex(query.query, { limit, collection });
             return results.map((r) => r.filepath);
         },
     },
     {
         name: "vector",
         run: async (store, query, limit, collection) => {
-            const results = await store.searchVector(query, { limit, collection });
+            const structured = parseStructuredQuery(query.query);
+            const vectorQueries = structured?.searches.filter(q => q.type === "vec" || q.type === "hyde");
+            if (structured) {
+                const files = [];
+                for (const vectorQuery of vectorQueries ?? []) {
+                    const results = await store.searchVector(vectorQuery.query, { limit, collection });
+                    files.push(...results.map((r) => r.filepath));
+                }
+                return uniqueFiles(files, limit);
+            }
+            const results = await store.searchVector(query.query, { limit, collection });
             return results.map((r) => r.filepath);
         },
     },
     {
         name: "hybrid",
         run: async (store, query, limit, collection) => {
-            const results = await store.search({ query, limit, collection, rerank: false });
+            const structured = parseStructuredQuery(query.query);
+            const results = structured
+                ? await store.search({ queries: structured.searches, intent: structured.intent, limit, collection, rerank: false })
+                : await store.search({ query: query.query, limit, collection, rerank: false });
             return results.map((r) => r.file);
         },
     },
     {
         name: "full",
         run: async (store, query, limit, collection) => {
-            const results = await store.search({ query, limit, collection, rerank: true });
+            const structured = parseStructuredQuery(query.query);
+            const results = structured
+                ? await store.search({ queries: structured.searches, intent: structured.intent, limit, collection, rerank: true })
+                : await store.search({ query: query.query, limit, collection, rerank: true });
             return results.map((r) => r.file);
         },
     },
@@ -52,19 +133,24 @@ async function runQuery(store, backend, query, collection) {
     const start = Date.now();
     let resultFiles;
     try {
-        resultFiles = await backend.run(store, query.query, limit, collection);
+        resultFiles = await backend.run(store, query, limit, collection);
     }
-    catch (err) {
+    catch {
         // Backend may not be available (e.g., no embeddings for vector search)
         return {
             precision_at_k: 0,
             recall: 0,
+            recall_at_1: 0,
+            recall_at_3: 0,
+            recall_at_5: 0,
             mrr: 0,
             f1: 0,
             hits_at_k: 0,
             total_expected: query.expected_files.length,
             latency_ms: Date.now() - start,
             top_files: [],
+            matched_files: [],
+            unmatched_expected_files: query.expected_files,
         };
     }
     const latency_ms = Date.now() - start;
@@ -80,11 +166,11 @@ function formatTable(results) {
     const lines = [];
     const pad = (s, n) => s.slice(0, n).padEnd(n);
     const num = (n) => n.toFixed(2).padStart(5);
-    lines.push(`${pad("Query", 25)} ${pad("Backend", 8)} ${pad("P@k", 6)} ${pad("Recall", 7)} ${pad("MRR", 6)} ${pad("F1", 6)} ${pad("ms", 8)}`);
-    lines.push("-".repeat(70));
+    lines.push(`${pad("Query", 25)} ${pad("Backend", 8)} ${pad("P@k", 6)} ${pad("R@1", 6)} ${pad("R@3", 6)} ${pad("R@5", 6)} ${pad("MRR", 6)} ${pad("F1", 6)} ${pad("ms", 8)}`);
+    lines.push("-".repeat(88));
     for (const r of results) {
         for (const [backend, br] of Object.entries(r.backends)) {
-            lines.push(`${pad(r.id, 25)} ${pad(backend, 8)} ${num(br.precision_at_k)} ${num(br.recall)}  ${num(br.mrr)} ${num(br.f1)} ${String(Math.round(br.latency_ms)).padStart(7)}ms`);
+            lines.push(`${pad(r.id, 25)} ${pad(backend, 8)} ${num(br.precision_at_k)} ${num(br.recall_at_1)} ${num(br.recall_at_3)} ${num(br.recall_at_5)} ${num(br.mrr)} ${num(br.f1)} ${String(Math.round(br.latency_ms)).padStart(7)}ms`);
         }
         lines.push("");
     }
@@ -99,14 +185,17 @@ function computeSummary(results) {
             backendNames.add(name);
         }
     }
-    for (const name of backendNames) {
-        let totalP = 0, totalR = 0, totalMrr = 0, totalF1 = 0, totalLat = 0, count = 0;
+    for (const name of Array.from(backendNames)) {
+        let totalP = 0, totalR = 0, totalR1 = 0, totalR3 = 0, totalR5 = 0, totalMrr = 0, totalF1 = 0, totalLat = 0, count = 0;
         for (const r of results) {
             const br = r.backends[name];
             if (!br)
                 continue;
             totalP += br.precision_at_k;
             totalR += br.recall;
+            totalR1 += br.recall_at_1;
+            totalR3 += br.recall_at_3;
+            totalR5 += br.recall_at_5;
             totalMrr += br.mrr;
             totalF1 += br.f1;
             totalLat += br.latency_ms;
@@ -116,6 +205,9 @@ function computeSummary(results) {
             summary[name] = {
                 avg_precision: totalP / count,
                 avg_recall: totalR / count,
+                avg_recall_at_1: totalR1 / count,
+                avg_recall_at_3: totalR3 / count,
+                avg_recall_at_5: totalR5 / count,
                 avg_mrr: totalMrr / count,
                 avg_f1: totalF1 / count,
                 avg_latency_ms: totalLat / count,
@@ -132,7 +224,10 @@ export async function runBenchmark(fixturePath, options = {}) {
         throw new Error("Invalid fixture: missing 'queries' array");
     }
     // Open store
-    const store = await createStore({ dbPath: getDefaultDbPath() });
+    const store = await createStore({
+        dbPath: options.dbPath ?? getDefaultDbPath(),
+        ...(options.configPath ? { configPath: options.configPath } : {}),
+    });
     // Filter backends if requested
     const activeBackends = options.backends
         ? BACKENDS.filter(b => options.backends.includes(b.name))
@@ -178,7 +273,7 @@ export async function runBenchmark(fixturePath, options = {}) {
         const pad = (s, n) => s.slice(0, n).padEnd(n);
         const num = (n) => n.toFixed(3).padStart(6);
         for (const [name, s] of Object.entries(summary)) {
-            console.log(`  ${pad(name, 8)} P@k=${num(s.avg_precision)} Recall=${num(s.avg_recall)} MRR=${num(s.avg_mrr)} F1=${num(s.avg_f1)} Avg=${Math.round(s.avg_latency_ms)}ms`);
+            console.log(`  ${pad(name, 8)} P@k=${num(s.avg_precision)} R@1=${num(s.avg_recall_at_1)} R@3=${num(s.avg_recall_at_3)} R@5=${num(s.avg_recall_at_5)} MRR=${num(s.avg_mrr)} F1=${num(s.avg_f1)} Avg=${Math.round(s.avg_latency_ms)}ms`);
         }
     }
     return benchResult;

package/dist/bench/score.d.ts CHANGED Viewed

@@ -14,13 +14,20 @@ export declare function normalizePath(p: string): string;
  * Handles different path formats by comparing normalized suffixes.
  */
 export declare function pathsMatch(result: string, expected: string): boolean;
-/**
- * Score a set of search results against expected files.
- */
-export declare function scoreResults(resultFiles: string[], expectedFiles: string[], topK: number): {
+type ScoreMetrics = {
     precision_at_k: number;
     recall: number;
+    recall_at_1: number;
+    recall_at_3: number;
+    recall_at_5: number;
     mrr: number;
     f1: number;
     hits_at_k: number;
+    matched_files: string[];
+    unmatched_expected_files: string[];
 };
+/**
+ * Score a set of search results against expected files.
+ */
+export declare function scoreResults(resultFiles: string[], expectedFiles: string[], topK: number): ScoreMetrics;
+export {};

package/dist/bench/score.js CHANGED Viewed

@@ -10,7 +10,7 @@
  */
 export function normalizePath(p) {
     if (p.startsWith("qmd://")) {
-        // qmd://collection/path/to/file → path/to/file
+        // qmd://collection/docs/readme.md → docs/readme.md
         const withoutScheme = p.slice("qmd://".length);
         const slashIdx = withoutScheme.indexOf("/");
         p = slashIdx >= 0 ? withoutScheme.slice(slashIdx + 1) : withoutScheme;
@@ -30,23 +30,30 @@ export function pathsMatch(result, expected) {
         return true;
     return false;
 }
+function hitsWithin(resultFiles, expectedFiles, k) {
+    const topKResults = resultFiles.slice(0, k);
+    let hits = 0;
+    for (const expected of expectedFiles) {
+        if (topKResults.some(r => pathsMatch(r, expected))) {
+            hits++;
+        }
+    }
+    return hits;
+}
 /**
  * Score a set of search results against expected files.
  */
 export function scoreResults(resultFiles, expectedFiles, topK) {
     // Count hits in top-k
-    const topKResults = resultFiles.slice(0, topK);
-    let hitsAtK = 0;
-    for (const expected of expectedFiles) {
-        if (topKResults.some(r => pathsMatch(r, expected))) {
-            hitsAtK++;
-        }
-    }
-    // Count total hits anywhere
-    let totalHits = 0;
+    const hitsAtK = hitsWithin(resultFiles, expectedFiles, topK);
+    const matchedFiles = [];
+    const unmatchedExpectedFiles = [];
     for (const expected of expectedFiles) {
         if (resultFiles.some(r => pathsMatch(r, expected))) {
-            totalHits++;
+            matchedFiles.push(expected);
+        }
+        else {
+            unmatchedExpectedFiles.push(expected);
         }
     }
     // MRR: reciprocal rank of first relevant result
@@ -59,9 +66,23 @@ export function scoreResults(resultFiles, expectedFiles, topK) {
     }
     const denominator = Math.min(topK, expectedFiles.length);
     const precision_at_k = denominator > 0 ? hitsAtK / denominator : 0;
-    const recall = expectedFiles.length > 0 ? totalHits / expectedFiles.length : 0;
+    const recall = expectedFiles.length > 0 ? matchedFiles.length / expectedFiles.length : 0;
+    const recall_at_1 = expectedFiles.length > 0 ? hitsWithin(resultFiles, expectedFiles, 1) / expectedFiles.length : 0;
+    const recall_at_3 = expectedFiles.length > 0 ? hitsWithin(resultFiles, expectedFiles, 3) / expectedFiles.length : 0;
+    const recall_at_5 = expectedFiles.length > 0 ? hitsWithin(resultFiles, expectedFiles, 5) / expectedFiles.length : 0;
     const f1 = precision_at_k + recall > 0
         ? 2 * (precision_at_k * recall) / (precision_at_k + recall)
         : 0;
-    return { precision_at_k, recall, mrr, f1, hits_at_k: hitsAtK };
+    return {
+        precision_at_k,
+        recall,
+        recall_at_1,
+        recall_at_3,
+        recall_at_5,
+        mrr,
+        f1,
+        hits_at_k: hitsAtK,
+        matched_files: matchedFiles,
+        unmatched_expected_files: unmatchedExpectedFiles,
+    };
 }

package/dist/bench/types.d.ts CHANGED Viewed

@@ -34,6 +34,12 @@ export interface BackendResult {
     precision_at_k: number;
     /** Fraction of expected files found anywhere in results */
     recall: number;
+    /** Fraction of expected files found in the first result */
+    recall_at_1: number;
+    /** Fraction of expected files found in the top 3 results */
+    recall_at_3: number;
+    /** Fraction of expected files found in the top 5 results */
+    recall_at_5: number;
     /** Reciprocal rank of first relevant result (1/rank, 0 if not found) */
     mrr: number;
     /** Harmonic mean of precision_at_k and recall */
@@ -46,6 +52,10 @@ export interface BackendResult {
     latency_ms: number;
     /** Top result file paths (for inspection) */
     top_files: string[];
+    /** Expected files that were found anywhere in the returned result set */
+    matched_files: string[];
+    /** Expected files missing from the returned result set */
+    unmatched_expected_files: string[];
 }
 export interface QueryResult {
     id: string;
@@ -60,6 +70,9 @@ export interface BenchmarkResult {
     summary: Record<string, {
         avg_precision: number;
         avg_recall: number;
+        avg_recall_at_1: number;
+        avg_recall_at_3: number;
+        avg_recall_at_5: number;
         avg_mrr: number;
         avg_f1: number;
         avg_latency_ms: number;

package/dist/cli/qmd.d.ts CHANGED Viewed

@@ -1,2 +1,28 @@
+import { type OutputFormat } from "./formatter.js";
+type CliLifecycleWritable = {
+    write(chunk: string | Uint8Array, callback?: (error?: Error | null) => void): boolean;
+};
+type FinishSuccessfulCliCommandOptions = {
+    command: string;
+    format?: OutputFormat;
+    cleanup?: () => Promise<void>;
+    exit?: (code: number) => void;
+    immediateExit?: (code: number) => void;
+    stdout?: CliLifecycleWritable;
+    stderr?: CliLifecycleWritable;
+    platform?: NodeJS.Platform;
+};
+/**
+ * Finish a successful CLI command after output has been flushed. On macOS JSON
+ * query runs, skip normal native teardown and use Node/Bun's immediate exit path:
+ * ggml Metal can abort from C++ finalizers after valid JSON has already been
+ * produced (#368). This wrapper is only reached after the command completed, so
+ * real query failures still exit through the normal error path before this runs.
+ */
+export declare function finishSuccessfulCliCommand(options: FinishSuccessfulCliCommandOptions): Promise<void>;
+export declare function resolveEmbedModelForCli(): string;
+export declare function resolveGenerateModelForCli(): string;
+export declare function resolveRerankModelForCli(): string;
 export declare function buildEditorUri(template: string, absolutePath: string, line: number, col: number): string;
 export declare function termLink(text: string, url: string, isTTY?: boolean): string;
+export {};