@tobilu/qmd 2.1.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,87 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [2.5.1] - 2026-05-20
6
+
7
+ ### Changes
8
+
9
+ - Release: publish from GitHub Actions via npm Trusted Publishing/OIDC instead of a long-lived `NPM_TOKEN` secret.
10
+
11
+ ## [2.5.0] - 2026-05-19
12
+
13
+ ### Changes
14
+
15
+ - Dependencies: update core SQLite/config/chunking packages (`better-sqlite3`, `yaml`, `web-tree-sitter`, `tree-sitter-go`, and `tree-sitter-python`) while keeping incompatible `zod`, `tsx`, and `vitest` majors pinned.
16
+ - Agent skills: add `qmd skills list|get|path` to serve version-matched runtime skill instructions from the installed CLI, and make `qmd skill install` write a stable discovery stub so installed agent skills do not go stale after QMD upgrades.
17
+ - CLI: add `qmd doctor` for index/runtime diagnostics, including SQLite/sqlite-vec versions, embedding fingerprint freshness, mixed-fingerprint detection, safe legacy fingerprint adoption, and content-hash sampling.
18
+
19
+ ### Fixes
20
+
21
+ - Launcher: prefer runnable TypeScript source in git checkouts even when ignored `dist/` artifacts exist, while packaged installs continue to run `dist/`.
22
+ - GPU: keep node-llama-cpp's documented `gpu: "auto"` initialization as the primary path, then perform no-build packaged CUDA/Vulkan/Metal probes only if auto falls back to CPU.
23
+ - CLI: move GPU/CPU runtime diagnostics out of `qmd status`; use `qmd doctor` for device probing and related environment guidance.
24
+ - CLI: point unexpected command/setup failures toward `qmd doctor` so diagnostics are the default next step when QMD behaves incorrectly.
25
+ - Doctor: explicitly warn when `content_vectors` contains multiple non-empty embedding fingerprint names, with the per-fingerprint document/chunk breakdown.
26
+ - Embed: make the TTY progress line label byte-based input progress explicitly, show embedded chunks as a count, and shorten the displayed model name.
27
+ - Embed: retain per-chunk failure details, retry failed chunks after later successful embeds and again when no other chunks remain, clear recovered errors, and cap retries to avoid endless loops.
28
+ - Tests: expand the container smoke harness to cover npm-global, npx-style, and Bun-global install scenarios, always checking auto and `QMD_FORCE_CPU=1` doctor modes, with opt-in tiny `qmd embed` and GPU probe runs for supported container runtimes.
29
+ - Embedding: fingerprint vector metadata using the active embedding model and formatting/chunking parameters so stale vectors are treated as pending after search semantics change. Legacy `content_vectors` columns are migrated lazily on first vector-health/write use to preserve fast QMD startup.
30
+
31
+ - Skill: expand the packaged QMD skill with retrieval-first workflows, structured query examples, wiki/source collection guidance, and safe fallbacks when model-backed search is unavailable.
32
+ - Tests: make `bun run test` execute the local unit suite under both Node/Vitest and Bun (`test:node` + `test:bun`) so runtime-specific regressions are caught before CI.
33
+ - Model config: centralize embedding/rerank/generation model resolution so `qmd embed`, `status`, `query`, `vsearch`, `pull`, SDK vector search, and `bench` use the same active `.qmd/index.yaml` model hints and environment fallbacks.
34
+ - GPU/status: `qmd status` now uses the same embedding model identity as `qmd embed` when computing pending embeddings, so URI-backed embeddings are not incorrectly reported as pending under the legacy `embeddinggemma` alias.
35
+ - GPU status: `qmd status` now always shows GPU mode/configuration without unsafe native probing, and CPU-fallback warnings point to `QMD_STATUS_DEVICE_PROBE=1 qmd status` for an actual backend probe. The no-GPU warning is emitted once per process instead of once per LLM instance during benchmarks.
36
+ - GPU: add `QMD_FORCE_CPU=1` / `--no-gpu` to bypass CUDA/Vulkan/Metal probing entirely, and route native llama.cpp stdout noise to stderr so JSON output stays parseable during search/query commands.
37
+ - Snippet line numbers: `qmd_query` (MCP), HTTP `/query`, and `qmd query`
38
+ (CLI JSON output and snippet headers) now return absolute source-file
39
+ line numbers instead of chunk-local ones, so the `line` field can be
40
+ passed back to `qmd_get` as `fromLine` without a separate lookup.
41
+ Snippet selection remains scoped to the best matching chunk
42
+ (preserves #149).
43
+ - CLI: `qmd query --full` now emits the full document body in all output
44
+ formats (json, csv, md, xml), restoring the documented behavior of the
45
+ flag. Previously it returned only the best matching chunk (~3.6KB max
46
+ per result). Output payload for `--full` queries is now proportional
47
+ to total document size.
48
+ - macOS Metal: `qmd query --json` now flushes successful JSON output and uses a safe immediate-exit path on Darwin to avoid ggml Metal finalizer aborts; other commands still dispose LLM contexts/models before the llama runtime. #368
49
+ - Embedding: require complete chunk coverage before treating a document as
50
+ embedded, remove partial vectors when chunk/session failures leave a
51
+ document incomplete, and keep `qmd status` pending counts honest after
52
+ interrupted long embed runs. #637 #378
53
+ - Embedding: `qmd embed -c <collection>` now scopes pending-doc selection
54
+ to the requested collection instead of embedding global pending work.
55
+ Scoped `--force` clears only collection-owned vectors, preserves shared
56
+ hashes referenced by sibling collections, and drops `vectors_vec` only
57
+ when the scoped clear empties all vectors.
58
+ - Hybrid search: weight RRF lists by query type so original FTS and original vector evidence get the intended 2x boost, instead of accidentally boosting the first lexical expansion. #591
59
+ - MCP: seed llama.cpp/GGML quiet env vars before launching `qmd mcp` so native logs cannot pollute stdio JSON-RPC framing. #593
60
+ - CLI: remove CommonJS `require()` calls from ESM index path normalization so `qmd --index <path>` no longer crashes with `ERR_AMBIGUOUS_MODULE_SYNTAX` on Node 22+. #634
61
+ - Windows CUDA: serialize llama.cpp embedding/reranking contexts by default to avoid intermittent `ggml-cuda.cu:98` crashes in `qmd query`; set `QMD_EMBED_PARALLELISM` to opt back into parallel contexts if your driver is stable. #519
62
+ - MCP: make `qmd mcp --index <name>` use the selected index for both foreground and daemon HTTP servers instead of falling back to the default store. #343
63
+ - Embedding: respect `QMD_EMBED_MODEL` consistently for vector indexing and vector-backed search, with default-model fallback when unset.
64
+ - Config: use one home-directory resolver for YAML config and the default SQLite cache path, avoiding Windows CLI/MCP split-brain when `HOME` is unset.
65
+ - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529
66
+ - Fix: preserve original filename case in `handelize()`. The previous
67
+ `.toLowerCase()` call made indexed paths unreachable on case-sensitive
68
+ filesystems (Linux). `qmd update` automatically migrates legacy
69
+ lowercase paths without re-embedding.
70
+ - CLI: make `qmd status` skip native `node-llama-cpp` device probing by
71
+ default so status stays safe on machines with broken or unsupported GPU
72
+ drivers. Set `QMD_STATUS_DEVICE_PROBE=1` to opt in.
73
+ - CLI: lazy-load `node-llama-cpp` so lightweight commands such as
74
+ `qmd status` do not import native ML dependencies or trigger llama.cpp
75
+ builds on ARM/no-GPU machines. #491
76
+ - Store: keep content rows referenced by inactive documents during orphan
77
+ cleanup so `qmd update` preserves soft-deleted tombstones for removed
78
+ files. #585
79
+ - Packaging: install AST grammar WASM packages as required dependencies so
80
+ Bun global installs include TypeScript/TSX/JavaScript grammars, and add a
81
+ `smoke:package-grammars` verification command. #595
82
+ - Launcher: add wrapper smoke coverage for scoped package, npm/npx,
83
+ Homebrew/Linuxbrew, Bun global symlink layouts, and `$BUN_INSTALL`
84
+ false-positive runtime selection regressions. #351 #353 #354 #356 #358 #359
85
+
5
86
  ## [2.1.0] - 2026-04-05
6
87
 
7
88
  Code files now chunk at function and class boundaries via tree-sitter,
package/README.md CHANGED
@@ -797,6 +797,9 @@ llm_cache -- Cached LLM responses (query expansion, rerank scores)
797
797
  | Variable | Default | Description |
798
798
  |----------|---------|-------------|
799
799
  | `XDG_CACHE_HOME` | `~/.cache` | Cache directory location |
800
+ | `QMD_LLAMA_GPU` | `auto` | Force llama.cpp GPU backend (`metal`, `vulkan`, `cuda`) or disable GPU with `false` |
801
+ | `QMD_FORCE_CPU` | unset | Set to `1`/`true` to force CPU mode before any CUDA/Vulkan/Metal probing. Equivalent CLI flag: `--no-gpu`. |
802
+ | `QMD_EMBED_PARALLELISM` | automatic | Override embedding/reranking context parallelism (1-8). Windows CUDA defaults to `1` because parallel CUDA contexts can crash with `ggml-cuda.cu:98`; use Vulkan or raise this only if your driver is stable. |
800
803
 
801
804
  ## How It Works
802
805
 
package/bin/qmd CHANGED
@@ -15,6 +15,42 @@ done
15
15
  # to avoid native module ABI mismatches (e.g., better-sqlite3 compiled for bun vs node)
16
16
  DIR="$(cd -P "$(dirname "$SOURCE")/.." && pwd)"
17
17
 
18
+ # MCP stdio reserves stdout exclusively for JSON-RPC frames. node-llama-cpp
19
+ # / llama.cpp / ggml can write native logs directly to stdout before JS-level
20
+ # log handlers are attached, so seed the native quiet env before Node/Bun imports
21
+ # the CLI and its LLM modules. Preserve explicit user values when provided.
22
+ if [ "$1" = "mcp" ]; then
23
+ export LLAMA_LOG_LEVEL="${LLAMA_LOG_LEVEL:-error}"
24
+ export GGML_LOG_LEVEL="${GGML_LOG_LEVEL:-error}"
25
+ export GGML_BACKEND_SILENT="${GGML_BACKEND_SILENT:-1}"
26
+ fi
27
+
28
+ JS="$DIR/dist/cli/qmd.js"
29
+ TS="$DIR/src/cli/qmd.ts"
30
+
31
+ # In published packages, bin/qmd must run dist/. In a git checkout, however,
32
+ # dist/ is often ignored and can be stale after git reset or branch switches.
33
+ # Prefer source mode only for checkouts so ./bin/qmd reflects the checked-out
34
+ # source without changing packaged/runtime behavior.
35
+ if [ -e "$DIR/.git" ] && [ -f "$TS" ]; then
36
+ if [ -f "$DIR/bun.lock" ] || [ -f "$DIR/bun.lockb" ]; then
37
+ if command -v bun >/dev/null 2>&1; then
38
+ exec bun "$TS" "$@"
39
+ fi
40
+ fi
41
+ if [ -f "$DIR/node_modules/tsx/dist/cli.mjs" ]; then
42
+ exec node "$DIR/node_modules/tsx/dist/cli.mjs" "$TS" "$@"
43
+ fi
44
+ fi
45
+
46
+ if [ ! -f "$JS" ]; then
47
+ echo "qmd is not built: missing $JS" >&2
48
+ echo "Run: bun install && bun run build" >&2
49
+ echo "Or: npm install && npm run build" >&2
50
+ echo "After building, run: qmd doctor" >&2
51
+ exit 1
52
+ fi
53
+
18
54
  # Detect the package manager that installed dependencies by checking lockfiles.
19
55
  # $BUN_INSTALL is intentionally NOT checked — it only indicates that bun exists
20
56
  # on the system, not that it was used to install this package (see #361).
@@ -24,9 +60,9 @@ DIR="$(cd -P "$(dirname "$SOURCE")/.." && pwd)"
24
60
  # builds that use npm would be incorrectly routed to bun, causing ABI
25
61
  # mismatches with better-sqlite3 / sqlite-vec (see #381).
26
62
  if [ -f "$DIR/package-lock.json" ]; then
27
- exec node "$DIR/dist/cli/qmd.js" "$@"
63
+ exec node "$JS" "$@"
28
64
  elif [ -f "$DIR/bun.lock" ] || [ -f "$DIR/bun.lockb" ]; then
29
- exec bun "$DIR/dist/cli/qmd.js" "$@"
65
+ exec bun "$JS" "$@"
30
66
  else
31
- exec node "$DIR/dist/cli/qmd.js" "$@"
67
+ exec node "$JS" "$@"
32
68
  fi
package/dist/ast.d.ts CHANGED
@@ -24,6 +24,7 @@ export type SupportedLanguage = "typescript" | "tsx" | "javascript" | "python" |
24
24
  * Returns null for unsupported or unknown extensions (including .md).
25
25
  */
26
26
  export declare function detectLanguage(filepath: string): SupportedLanguage | null;
27
+ export declare function formatGrammarLoadError(language: SupportedLanguage, err: unknown): string;
27
28
  /**
28
29
  * Parse a source file and return break points at AST node boundaries.
29
30
  *
package/dist/ast.js CHANGED
@@ -47,13 +47,19 @@ export function detectLanguage(filepath) {
47
47
  * Maps language to the npm package and wasm filename for the grammar.
48
48
  */
49
49
  const GRAMMAR_MAP = {
50
- typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm" },
51
- tsx: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm" },
52
- javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm" },
53
- python: { pkg: "tree-sitter-python", wasm: "tree-sitter-python.wasm" },
54
- go: { pkg: "tree-sitter-go", wasm: "tree-sitter-go.wasm" },
55
- rust: { pkg: "tree-sitter-rust", wasm: "tree-sitter-rust.wasm" },
50
+ typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
51
+ tsx: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm", version: "0.23.2" },
52
+ javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
53
+ python: { pkg: "tree-sitter-python", wasm: "tree-sitter-python.wasm", version: "0.23.4" },
54
+ go: { pkg: "tree-sitter-go", wasm: "tree-sitter-go.wasm", version: "0.23.4" },
55
+ rust: { pkg: "tree-sitter-rust", wasm: "tree-sitter-rust.wasm", version: "0.24.0" },
56
56
  };
57
+ export function formatGrammarLoadError(language, err) {
58
+ const grammar = GRAMMAR_MAP[language];
59
+ const detail = err instanceof Error ? err.message : String(err);
60
+ return `${grammar.pkg}/${grammar.wasm} failed to load (${detail}); falling back to regex chunking. ` +
61
+ `Repair a broken global install with: bun add ${grammar.pkg}@${grammar.version}`;
62
+ }
57
63
  // =============================================================================
58
64
  // Per-Language Query Definitions
59
65
  // =============================================================================
@@ -152,6 +158,8 @@ let QueryClass = null;
152
158
  let initPromise = null;
153
159
  /** Languages that have already failed to load — warn only once per process. */
154
160
  const failedLanguages = new Set();
161
+ /** Last grammar load error by language, for status output. */
162
+ const grammarLoadErrors = new Map();
155
163
  /** Cached grammar load promises. */
156
164
  const grammarCache = new Map();
157
165
  /** Cached compiled queries per language. */
@@ -200,7 +208,9 @@ async function loadGrammar(language) {
200
208
  catch (err) {
201
209
  failedLanguages.add(language);
202
210
  grammarCache.delete(wasmKey);
203
- console.warn(`[qmd] Failed to load tree-sitter grammar for ${language}: ${err}`);
211
+ const message = formatGrammarLoadError(language, err);
212
+ grammarLoadErrors.set(language, message);
213
+ console.warn(`[qmd] AST grammar unavailable for ${language}: ${message}`);
204
214
  return null;
205
215
  }
206
216
  }
@@ -299,7 +309,7 @@ export async function getASTStatus() {
299
309
  languages.push({ language: lang, available: true });
300
310
  }
301
311
  else {
302
- languages.push({ language: lang, available: false, error: "grammar failed to load" });
312
+ languages.push({ language: lang, available: false, error: grammarLoadErrors.get(lang) ?? "grammar failed to load" });
303
313
  }
304
314
  }
305
315
  catch (err) {
@@ -18,4 +18,6 @@ export declare function runBenchmark(fixturePath: string, options?: {
18
18
  json?: boolean;
19
19
  collection?: string;
20
20
  backends?: string[];
21
+ dbPath?: string;
22
+ configPath?: string;
21
23
  }): Promise<BenchmarkResult>;
@@ -17,32 +17,113 @@ import { readFileSync } from "node:fs";
17
17
  import { resolve } from "node:path";
18
18
  import { createStore, getDefaultDbPath, } from "../index.js";
19
19
  import { scoreResults } from "./score.js";
20
+ function parseStructuredQuery(query) {
21
+ const lines = query.split("\n").map((line, idx) => ({
22
+ trimmed: line.trim(),
23
+ number: idx + 1,
24
+ })).filter(line => line.trimmed.length > 0);
25
+ if (lines.length === 0)
26
+ return undefined;
27
+ const prefixRe = /^(lex|vec|hyde):\s*/i;
28
+ const intentRe = /^intent:\s*/i;
29
+ const searches = [];
30
+ let intent;
31
+ for (const line of lines) {
32
+ if (intentRe.test(line.trimmed)) {
33
+ if (intent !== undefined) {
34
+ throw new Error(`Line ${line.number}: only one intent: line is allowed per benchmark query.`);
35
+ }
36
+ intent = line.trimmed.replace(intentRe, "").trim();
37
+ if (!intent) {
38
+ throw new Error(`Line ${line.number}: intent: must include text.`);
39
+ }
40
+ continue;
41
+ }
42
+ const match = line.trimmed.match(prefixRe);
43
+ if (match) {
44
+ const type = match[1].toLowerCase();
45
+ const text = line.trimmed.slice(match[0].length).trim();
46
+ if (!text) {
47
+ throw new Error(`Line ${line.number} (${type}:) must include text.`);
48
+ }
49
+ searches.push({ type, query: text, line: line.number });
50
+ continue;
51
+ }
52
+ if (lines.length === 1) {
53
+ return undefined;
54
+ }
55
+ throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde:/intent: prefix.`);
56
+ }
57
+ if (intent && searches.length === 0) {
58
+ throw new Error("intent: cannot appear alone. Add at least one lex:, vec:, or hyde: line.");
59
+ }
60
+ return searches.length > 0 ? { searches, intent } : undefined;
61
+ }
62
+ function uniqueFiles(files, limit) {
63
+ const seen = new Set();
64
+ const out = [];
65
+ for (const file of files) {
66
+ if (seen.has(file))
67
+ continue;
68
+ seen.add(file);
69
+ out.push(file);
70
+ if (out.length >= limit)
71
+ break;
72
+ }
73
+ return out;
74
+ }
20
75
  const BACKENDS = [
21
76
  {
22
77
  name: "bm25",
23
78
  run: async (store, query, limit, collection) => {
24
- const results = await store.searchLex(query, { limit, collection });
79
+ const structured = parseStructuredQuery(query.query);
80
+ const lexQueries = structured?.searches.filter(q => q.type === "lex");
81
+ if (structured) {
82
+ const files = [];
83
+ for (const lex of lexQueries ?? []) {
84
+ const results = await store.searchLex(lex.query, { limit, collection });
85
+ files.push(...results.map((r) => r.filepath));
86
+ }
87
+ return uniqueFiles(files, limit);
88
+ }
89
+ const results = await store.searchLex(query.query, { limit, collection });
25
90
  return results.map((r) => r.filepath);
26
91
  },
27
92
  },
28
93
  {
29
94
  name: "vector",
30
95
  run: async (store, query, limit, collection) => {
31
- const results = await store.searchVector(query, { limit, collection });
96
+ const structured = parseStructuredQuery(query.query);
97
+ const vectorQueries = structured?.searches.filter(q => q.type === "vec" || q.type === "hyde");
98
+ if (structured) {
99
+ const files = [];
100
+ for (const vectorQuery of vectorQueries ?? []) {
101
+ const results = await store.searchVector(vectorQuery.query, { limit, collection });
102
+ files.push(...results.map((r) => r.filepath));
103
+ }
104
+ return uniqueFiles(files, limit);
105
+ }
106
+ const results = await store.searchVector(query.query, { limit, collection });
32
107
  return results.map((r) => r.filepath);
33
108
  },
34
109
  },
35
110
  {
36
111
  name: "hybrid",
37
112
  run: async (store, query, limit, collection) => {
38
- const results = await store.search({ query, limit, collection, rerank: false });
113
+ const structured = parseStructuredQuery(query.query);
114
+ const results = structured
115
+ ? await store.search({ queries: structured.searches, intent: structured.intent, limit, collection, rerank: false })
116
+ : await store.search({ query: query.query, limit, collection, rerank: false });
39
117
  return results.map((r) => r.file);
40
118
  },
41
119
  },
42
120
  {
43
121
  name: "full",
44
122
  run: async (store, query, limit, collection) => {
45
- const results = await store.search({ query, limit, collection, rerank: true });
123
+ const structured = parseStructuredQuery(query.query);
124
+ const results = structured
125
+ ? await store.search({ queries: structured.searches, intent: structured.intent, limit, collection, rerank: true })
126
+ : await store.search({ query: query.query, limit, collection, rerank: true });
46
127
  return results.map((r) => r.file);
47
128
  },
48
129
  },
@@ -52,19 +133,24 @@ async function runQuery(store, backend, query, collection) {
52
133
  const start = Date.now();
53
134
  let resultFiles;
54
135
  try {
55
- resultFiles = await backend.run(store, query.query, limit, collection);
136
+ resultFiles = await backend.run(store, query, limit, collection);
56
137
  }
57
- catch (err) {
138
+ catch {
58
139
  // Backend may not be available (e.g., no embeddings for vector search)
59
140
  return {
60
141
  precision_at_k: 0,
61
142
  recall: 0,
143
+ recall_at_1: 0,
144
+ recall_at_3: 0,
145
+ recall_at_5: 0,
62
146
  mrr: 0,
63
147
  f1: 0,
64
148
  hits_at_k: 0,
65
149
  total_expected: query.expected_files.length,
66
150
  latency_ms: Date.now() - start,
67
151
  top_files: [],
152
+ matched_files: [],
153
+ unmatched_expected_files: query.expected_files,
68
154
  };
69
155
  }
70
156
  const latency_ms = Date.now() - start;
@@ -80,11 +166,11 @@ function formatTable(results) {
80
166
  const lines = [];
81
167
  const pad = (s, n) => s.slice(0, n).padEnd(n);
82
168
  const num = (n) => n.toFixed(2).padStart(5);
83
- lines.push(`${pad("Query", 25)} ${pad("Backend", 8)} ${pad("P@k", 6)} ${pad("Recall", 7)} ${pad("MRR", 6)} ${pad("F1", 6)} ${pad("ms", 8)}`);
84
- lines.push("-".repeat(70));
169
+ lines.push(`${pad("Query", 25)} ${pad("Backend", 8)} ${pad("P@k", 6)} ${pad("R@1", 6)} ${pad("R@3", 6)} ${pad("R@5", 6)} ${pad("MRR", 6)} ${pad("F1", 6)} ${pad("ms", 8)}`);
170
+ lines.push("-".repeat(88));
85
171
  for (const r of results) {
86
172
  for (const [backend, br] of Object.entries(r.backends)) {
87
- lines.push(`${pad(r.id, 25)} ${pad(backend, 8)} ${num(br.precision_at_k)} ${num(br.recall)} ${num(br.mrr)} ${num(br.f1)} ${String(Math.round(br.latency_ms)).padStart(7)}ms`);
173
+ lines.push(`${pad(r.id, 25)} ${pad(backend, 8)} ${num(br.precision_at_k)} ${num(br.recall_at_1)} ${num(br.recall_at_3)} ${num(br.recall_at_5)} ${num(br.mrr)} ${num(br.f1)} ${String(Math.round(br.latency_ms)).padStart(7)}ms`);
88
174
  }
89
175
  lines.push("");
90
176
  }
@@ -99,14 +185,17 @@ function computeSummary(results) {
99
185
  backendNames.add(name);
100
186
  }
101
187
  }
102
- for (const name of backendNames) {
103
- let totalP = 0, totalR = 0, totalMrr = 0, totalF1 = 0, totalLat = 0, count = 0;
188
+ for (const name of Array.from(backendNames)) {
189
+ let totalP = 0, totalR = 0, totalR1 = 0, totalR3 = 0, totalR5 = 0, totalMrr = 0, totalF1 = 0, totalLat = 0, count = 0;
104
190
  for (const r of results) {
105
191
  const br = r.backends[name];
106
192
  if (!br)
107
193
  continue;
108
194
  totalP += br.precision_at_k;
109
195
  totalR += br.recall;
196
+ totalR1 += br.recall_at_1;
197
+ totalR3 += br.recall_at_3;
198
+ totalR5 += br.recall_at_5;
110
199
  totalMrr += br.mrr;
111
200
  totalF1 += br.f1;
112
201
  totalLat += br.latency_ms;
@@ -116,6 +205,9 @@ function computeSummary(results) {
116
205
  summary[name] = {
117
206
  avg_precision: totalP / count,
118
207
  avg_recall: totalR / count,
208
+ avg_recall_at_1: totalR1 / count,
209
+ avg_recall_at_3: totalR3 / count,
210
+ avg_recall_at_5: totalR5 / count,
119
211
  avg_mrr: totalMrr / count,
120
212
  avg_f1: totalF1 / count,
121
213
  avg_latency_ms: totalLat / count,
@@ -132,7 +224,10 @@ export async function runBenchmark(fixturePath, options = {}) {
132
224
  throw new Error("Invalid fixture: missing 'queries' array");
133
225
  }
134
226
  // Open store
135
- const store = await createStore({ dbPath: getDefaultDbPath() });
227
+ const store = await createStore({
228
+ dbPath: options.dbPath ?? getDefaultDbPath(),
229
+ ...(options.configPath ? { configPath: options.configPath } : {}),
230
+ });
136
231
  // Filter backends if requested
137
232
  const activeBackends = options.backends
138
233
  ? BACKENDS.filter(b => options.backends.includes(b.name))
@@ -178,7 +273,7 @@ export async function runBenchmark(fixturePath, options = {}) {
178
273
  const pad = (s, n) => s.slice(0, n).padEnd(n);
179
274
  const num = (n) => n.toFixed(3).padStart(6);
180
275
  for (const [name, s] of Object.entries(summary)) {
181
- console.log(` ${pad(name, 8)} P@k=${num(s.avg_precision)} Recall=${num(s.avg_recall)} MRR=${num(s.avg_mrr)} F1=${num(s.avg_f1)} Avg=${Math.round(s.avg_latency_ms)}ms`);
276
+ console.log(` ${pad(name, 8)} P@k=${num(s.avg_precision)} R@1=${num(s.avg_recall_at_1)} R@3=${num(s.avg_recall_at_3)} R@5=${num(s.avg_recall_at_5)} MRR=${num(s.avg_mrr)} F1=${num(s.avg_f1)} Avg=${Math.round(s.avg_latency_ms)}ms`);
182
277
  }
183
278
  }
184
279
  return benchResult;
@@ -14,13 +14,20 @@ export declare function normalizePath(p: string): string;
14
14
  * Handles different path formats by comparing normalized suffixes.
15
15
  */
16
16
  export declare function pathsMatch(result: string, expected: string): boolean;
17
- /**
18
- * Score a set of search results against expected files.
19
- */
20
- export declare function scoreResults(resultFiles: string[], expectedFiles: string[], topK: number): {
17
+ type ScoreMetrics = {
21
18
  precision_at_k: number;
22
19
  recall: number;
20
+ recall_at_1: number;
21
+ recall_at_3: number;
22
+ recall_at_5: number;
23
23
  mrr: number;
24
24
  f1: number;
25
25
  hits_at_k: number;
26
+ matched_files: string[];
27
+ unmatched_expected_files: string[];
26
28
  };
29
+ /**
30
+ * Score a set of search results against expected files.
31
+ */
32
+ export declare function scoreResults(resultFiles: string[], expectedFiles: string[], topK: number): ScoreMetrics;
33
+ export {};
@@ -10,7 +10,7 @@
10
10
  */
11
11
  export function normalizePath(p) {
12
12
  if (p.startsWith("qmd://")) {
13
- // qmd://collection/path/to/filepath/to/file
13
+ // qmd://collection/docs/readme.mddocs/readme.md
14
14
  const withoutScheme = p.slice("qmd://".length);
15
15
  const slashIdx = withoutScheme.indexOf("/");
16
16
  p = slashIdx >= 0 ? withoutScheme.slice(slashIdx + 1) : withoutScheme;
@@ -30,23 +30,30 @@ export function pathsMatch(result, expected) {
30
30
  return true;
31
31
  return false;
32
32
  }
33
+ function hitsWithin(resultFiles, expectedFiles, k) {
34
+ const topKResults = resultFiles.slice(0, k);
35
+ let hits = 0;
36
+ for (const expected of expectedFiles) {
37
+ if (topKResults.some(r => pathsMatch(r, expected))) {
38
+ hits++;
39
+ }
40
+ }
41
+ return hits;
42
+ }
33
43
  /**
34
44
  * Score a set of search results against expected files.
35
45
  */
36
46
  export function scoreResults(resultFiles, expectedFiles, topK) {
37
47
  // Count hits in top-k
38
- const topKResults = resultFiles.slice(0, topK);
39
- let hitsAtK = 0;
40
- for (const expected of expectedFiles) {
41
- if (topKResults.some(r => pathsMatch(r, expected))) {
42
- hitsAtK++;
43
- }
44
- }
45
- // Count total hits anywhere
46
- let totalHits = 0;
48
+ const hitsAtK = hitsWithin(resultFiles, expectedFiles, topK);
49
+ const matchedFiles = [];
50
+ const unmatchedExpectedFiles = [];
47
51
  for (const expected of expectedFiles) {
48
52
  if (resultFiles.some(r => pathsMatch(r, expected))) {
49
- totalHits++;
53
+ matchedFiles.push(expected);
54
+ }
55
+ else {
56
+ unmatchedExpectedFiles.push(expected);
50
57
  }
51
58
  }
52
59
  // MRR: reciprocal rank of first relevant result
@@ -59,9 +66,23 @@ export function scoreResults(resultFiles, expectedFiles, topK) {
59
66
  }
60
67
  const denominator = Math.min(topK, expectedFiles.length);
61
68
  const precision_at_k = denominator > 0 ? hitsAtK / denominator : 0;
62
- const recall = expectedFiles.length > 0 ? totalHits / expectedFiles.length : 0;
69
+ const recall = expectedFiles.length > 0 ? matchedFiles.length / expectedFiles.length : 0;
70
+ const recall_at_1 = expectedFiles.length > 0 ? hitsWithin(resultFiles, expectedFiles, 1) / expectedFiles.length : 0;
71
+ const recall_at_3 = expectedFiles.length > 0 ? hitsWithin(resultFiles, expectedFiles, 3) / expectedFiles.length : 0;
72
+ const recall_at_5 = expectedFiles.length > 0 ? hitsWithin(resultFiles, expectedFiles, 5) / expectedFiles.length : 0;
63
73
  const f1 = precision_at_k + recall > 0
64
74
  ? 2 * (precision_at_k * recall) / (precision_at_k + recall)
65
75
  : 0;
66
- return { precision_at_k, recall, mrr, f1, hits_at_k: hitsAtK };
76
+ return {
77
+ precision_at_k,
78
+ recall,
79
+ recall_at_1,
80
+ recall_at_3,
81
+ recall_at_5,
82
+ mrr,
83
+ f1,
84
+ hits_at_k: hitsAtK,
85
+ matched_files: matchedFiles,
86
+ unmatched_expected_files: unmatchedExpectedFiles,
87
+ };
67
88
  }
@@ -34,6 +34,12 @@ export interface BackendResult {
34
34
  precision_at_k: number;
35
35
  /** Fraction of expected files found anywhere in results */
36
36
  recall: number;
37
+ /** Fraction of expected files found in the first result */
38
+ recall_at_1: number;
39
+ /** Fraction of expected files found in the top 3 results */
40
+ recall_at_3: number;
41
+ /** Fraction of expected files found in the top 5 results */
42
+ recall_at_5: number;
37
43
  /** Reciprocal rank of first relevant result (1/rank, 0 if not found) */
38
44
  mrr: number;
39
45
  /** Harmonic mean of precision_at_k and recall */
@@ -46,6 +52,10 @@ export interface BackendResult {
46
52
  latency_ms: number;
47
53
  /** Top result file paths (for inspection) */
48
54
  top_files: string[];
55
+ /** Expected files that were found anywhere in the returned result set */
56
+ matched_files: string[];
57
+ /** Expected files missing from the returned result set */
58
+ unmatched_expected_files: string[];
49
59
  }
50
60
  export interface QueryResult {
51
61
  id: string;
@@ -60,6 +70,9 @@ export interface BenchmarkResult {
60
70
  summary: Record<string, {
61
71
  avg_precision: number;
62
72
  avg_recall: number;
73
+ avg_recall_at_1: number;
74
+ avg_recall_at_3: number;
75
+ avg_recall_at_5: number;
63
76
  avg_mrr: number;
64
77
  avg_f1: number;
65
78
  avg_latency_ms: number;
package/dist/cli/qmd.d.ts CHANGED
@@ -1,2 +1,28 @@
1
+ import { type OutputFormat } from "./formatter.js";
2
+ type CliLifecycleWritable = {
3
+ write(chunk: string | Uint8Array, callback?: (error?: Error | null) => void): boolean;
4
+ };
5
+ type FinishSuccessfulCliCommandOptions = {
6
+ command: string;
7
+ format?: OutputFormat;
8
+ cleanup?: () => Promise<void>;
9
+ exit?: (code: number) => void;
10
+ immediateExit?: (code: number) => void;
11
+ stdout?: CliLifecycleWritable;
12
+ stderr?: CliLifecycleWritable;
13
+ platform?: NodeJS.Platform;
14
+ };
15
+ /**
16
+ * Finish a successful CLI command after output has been flushed. On macOS JSON
17
+ * query runs, skip normal native teardown and use Node/Bun's immediate exit path:
18
+ * ggml Metal can abort from C++ finalizers after valid JSON has already been
19
+ * produced (#368). This wrapper is only reached after the command completed, so
20
+ * real query failures still exit through the normal error path before this runs.
21
+ */
22
+ export declare function finishSuccessfulCliCommand(options: FinishSuccessfulCliCommandOptions): Promise<void>;
23
+ export declare function resolveEmbedModelForCli(): string;
24
+ export declare function resolveGenerateModelForCli(): string;
25
+ export declare function resolveRerankModelForCli(): string;
1
26
  export declare function buildEditorUri(template: string, absolutePath: string, line: number, col: number): string;
2
27
  export declare function termLink(text: string, url: string, isTTY?: boolean): string;
28
+ export {};