@tobilu/qmd 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,185 @@
1
+ /**
2
+ * QMD Benchmark Harness
3
+ *
4
+ * Runs queries from a fixture file against multiple search backends
5
+ * and measures precision@k, recall, MRR, F1, and latency.
6
+ *
7
+ * Usage:
8
+ * qmd bench <fixture.json> [--json] [--collection <name>]
9
+ *
10
+ * Backends tested:
11
+ * - bm25: BM25 keyword search (searchLex)
12
+ * - vector: Vector similarity search (searchVector)
13
+ * - hybrid: BM25 + vector RRF fusion without reranking
14
+ * - full: Full hybrid pipeline with LLM reranking
15
+ */
16
+ import { readFileSync } from "node:fs";
17
+ import { resolve } from "node:path";
18
+ import { createStore, getDefaultDbPath, } from "../index.js";
19
+ import { scoreResults } from "./score.js";
20
+ const BACKENDS = [
21
+ {
22
+ name: "bm25",
23
+ run: async (store, query, limit, collection) => {
24
+ const results = await store.searchLex(query, { limit, collection });
25
+ return results.map((r) => r.filepath);
26
+ },
27
+ },
28
+ {
29
+ name: "vector",
30
+ run: async (store, query, limit, collection) => {
31
+ const results = await store.searchVector(query, { limit, collection });
32
+ return results.map((r) => r.filepath);
33
+ },
34
+ },
35
+ {
36
+ name: "hybrid",
37
+ run: async (store, query, limit, collection) => {
38
+ const results = await store.search({ query, limit, collection, rerank: false });
39
+ return results.map((r) => r.file);
40
+ },
41
+ },
42
+ {
43
+ name: "full",
44
+ run: async (store, query, limit, collection) => {
45
+ const results = await store.search({ query, limit, collection, rerank: true });
46
+ return results.map((r) => r.file);
47
+ },
48
+ },
49
+ ];
50
+ async function runQuery(store, backend, query, collection) {
51
+ const limit = Math.max(query.expected_in_top_k, 10);
52
+ const start = Date.now();
53
+ let resultFiles;
54
+ try {
55
+ resultFiles = await backend.run(store, query.query, limit, collection);
56
+ }
57
+ catch (err) {
58
+ // Backend may not be available (e.g., no embeddings for vector search)
59
+ return {
60
+ precision_at_k: 0,
61
+ recall: 0,
62
+ mrr: 0,
63
+ f1: 0,
64
+ hits_at_k: 0,
65
+ total_expected: query.expected_files.length,
66
+ latency_ms: Date.now() - start,
67
+ top_files: [],
68
+ };
69
+ }
70
+ const latency_ms = Date.now() - start;
71
+ const scores = scoreResults(resultFiles, query.expected_files, query.expected_in_top_k);
72
+ return {
73
+ ...scores,
74
+ total_expected: query.expected_files.length,
75
+ latency_ms,
76
+ top_files: resultFiles.slice(0, 10),
77
+ };
78
+ }
79
+ function formatTable(results) {
80
+ const lines = [];
81
+ const pad = (s, n) => s.slice(0, n).padEnd(n);
82
+ const num = (n) => n.toFixed(2).padStart(5);
83
+ lines.push(`${pad("Query", 25)} ${pad("Backend", 8)} ${pad("P@k", 6)} ${pad("Recall", 7)} ${pad("MRR", 6)} ${pad("F1", 6)} ${pad("ms", 8)}`);
84
+ lines.push("-".repeat(70));
85
+ for (const r of results) {
86
+ for (const [backend, br] of Object.entries(r.backends)) {
87
+ lines.push(`${pad(r.id, 25)} ${pad(backend, 8)} ${num(br.precision_at_k)} ${num(br.recall)} ${num(br.mrr)} ${num(br.f1)} ${String(Math.round(br.latency_ms)).padStart(7)}ms`);
88
+ }
89
+ lines.push("");
90
+ }
91
+ return lines.join("\n");
92
+ }
93
+ function computeSummary(results) {
94
+ const summary = {};
95
+ // Collect all backend names
96
+ const backendNames = new Set();
97
+ for (const r of results) {
98
+ for (const name of Object.keys(r.backends)) {
99
+ backendNames.add(name);
100
+ }
101
+ }
102
+ for (const name of backendNames) {
103
+ let totalP = 0, totalR = 0, totalMrr = 0, totalF1 = 0, totalLat = 0, count = 0;
104
+ for (const r of results) {
105
+ const br = r.backends[name];
106
+ if (!br)
107
+ continue;
108
+ totalP += br.precision_at_k;
109
+ totalR += br.recall;
110
+ totalMrr += br.mrr;
111
+ totalF1 += br.f1;
112
+ totalLat += br.latency_ms;
113
+ count++;
114
+ }
115
+ if (count > 0) {
116
+ summary[name] = {
117
+ avg_precision: totalP / count,
118
+ avg_recall: totalR / count,
119
+ avg_mrr: totalMrr / count,
120
+ avg_f1: totalF1 / count,
121
+ avg_latency_ms: totalLat / count,
122
+ };
123
+ }
124
+ }
125
+ return summary;
126
+ }
127
+ export async function runBenchmark(fixturePath, options = {}) {
128
+ // Load fixture
129
+ const raw = readFileSync(resolve(fixturePath), "utf-8");
130
+ const fixture = JSON.parse(raw);
131
+ if (!fixture.queries || !Array.isArray(fixture.queries)) {
132
+ throw new Error("Invalid fixture: missing 'queries' array");
133
+ }
134
+ // Open store
135
+ const store = await createStore({ dbPath: getDefaultDbPath() });
136
+ // Filter backends if requested
137
+ const activeBackends = options.backends
138
+ ? BACKENDS.filter(b => options.backends.includes(b.name))
139
+ : BACKENDS;
140
+ const collection = options.collection ?? fixture.collection;
141
+ // Run queries
142
+ const results = [];
143
+ for (const query of fixture.queries) {
144
+ const backends = {};
145
+ for (const backend of activeBackends) {
146
+ if (!options.json) {
147
+ process.stderr.write(` ${query.id} / ${backend.name}...`);
148
+ }
149
+ backends[backend.name] = await runQuery(store, backend, query, collection);
150
+ if (!options.json) {
151
+ process.stderr.write(` ${Math.round(backends[backend.name].latency_ms)}ms\n`);
152
+ }
153
+ }
154
+ results.push({
155
+ id: query.id,
156
+ query: query.query,
157
+ type: query.type,
158
+ backends,
159
+ });
160
+ }
161
+ await store.close();
162
+ const summary = computeSummary(results);
163
+ const timestamp = new Date().toISOString().replace(/[:.]/g, "").slice(0, 15);
164
+ const benchResult = {
165
+ timestamp,
166
+ fixture: fixturePath,
167
+ results,
168
+ summary,
169
+ };
170
+ // Output
171
+ if (options.json) {
172
+ console.log(JSON.stringify(benchResult, null, 2));
173
+ }
174
+ else {
175
+ console.log("\n" + formatTable(results));
176
+ console.log("Summary:");
177
+ console.log("-".repeat(70));
178
+ const pad = (s, n) => s.slice(0, n).padEnd(n);
179
+ const num = (n) => n.toFixed(3).padStart(6);
180
+ for (const [name, s] of Object.entries(summary)) {
181
+ console.log(` ${pad(name, 8)} P@k=${num(s.avg_precision)} Recall=${num(s.avg_recall)} MRR=${num(s.avg_mrr)} F1=${num(s.avg_f1)} Avg=${Math.round(s.avg_latency_ms)}ms`);
182
+ }
183
+ }
184
+ return benchResult;
185
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Scoring functions for the QMD benchmark harness.
3
+ *
4
+ * Computes precision@k, recall, MRR, and F1 for search results
5
+ * against ground-truth expected files.
6
+ */
7
+ /**
8
+ * Normalize a file path for comparison.
9
+ * Strips qmd:// prefix, lowercases, removes leading/trailing slashes.
10
+ */
11
+ export declare function normalizePath(p: string): string;
12
+ /**
13
+ * Check if two paths refer to the same file.
14
+ * Handles different path formats by comparing normalized suffixes.
15
+ */
16
+ export declare function pathsMatch(result: string, expected: string): boolean;
17
+ /**
18
+ * Score a set of search results against expected files.
19
+ */
20
+ export declare function scoreResults(resultFiles: string[], expectedFiles: string[], topK: number): {
21
+ precision_at_k: number;
22
+ recall: number;
23
+ mrr: number;
24
+ f1: number;
25
+ hits_at_k: number;
26
+ };
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Scoring functions for the QMD benchmark harness.
3
+ *
4
+ * Computes precision@k, recall, MRR, and F1 for search results
5
+ * against ground-truth expected files.
6
+ */
7
+ /**
8
+ * Normalize a file path for comparison.
9
+ * Strips qmd:// prefix, lowercases, removes leading/trailing slashes.
10
+ */
11
+ export function normalizePath(p) {
12
+ if (p.startsWith("qmd://")) {
13
+ // qmd://collection/path/to/file → path/to/file
14
+ const withoutScheme = p.slice("qmd://".length);
15
+ const slashIdx = withoutScheme.indexOf("/");
16
+ p = slashIdx >= 0 ? withoutScheme.slice(slashIdx + 1) : withoutScheme;
17
+ }
18
+ return p.toLowerCase().replace(/^\/+|\/+$/g, "");
19
+ }
20
+ /**
21
+ * Check if two paths refer to the same file.
22
+ * Handles different path formats by comparing normalized suffixes.
23
+ */
24
+ export function pathsMatch(result, expected) {
25
+ const nr = normalizePath(result);
26
+ const ne = normalizePath(expected);
27
+ if (nr === ne)
28
+ return true;
29
+ if (nr.endsWith(ne) || ne.endsWith(nr))
30
+ return true;
31
+ return false;
32
+ }
33
+ /**
34
+ * Score a set of search results against expected files.
35
+ */
36
+ export function scoreResults(resultFiles, expectedFiles, topK) {
37
+ // Count hits in top-k
38
+ const topKResults = resultFiles.slice(0, topK);
39
+ let hitsAtK = 0;
40
+ for (const expected of expectedFiles) {
41
+ if (topKResults.some(r => pathsMatch(r, expected))) {
42
+ hitsAtK++;
43
+ }
44
+ }
45
+ // Count total hits anywhere
46
+ let totalHits = 0;
47
+ for (const expected of expectedFiles) {
48
+ if (resultFiles.some(r => pathsMatch(r, expected))) {
49
+ totalHits++;
50
+ }
51
+ }
52
+ // MRR: reciprocal rank of first relevant result
53
+ let mrr = 0;
54
+ for (let i = 0; i < resultFiles.length; i++) {
55
+ if (expectedFiles.some(e => pathsMatch(resultFiles[i], e))) {
56
+ mrr = 1 / (i + 1);
57
+ break;
58
+ }
59
+ }
60
+ const denominator = Math.min(topK, expectedFiles.length);
61
+ const precision_at_k = denominator > 0 ? hitsAtK / denominator : 0;
62
+ const recall = expectedFiles.length > 0 ? totalHits / expectedFiles.length : 0;
63
+ const f1 = precision_at_k + recall > 0
64
+ ? 2 * (precision_at_k * recall) / (precision_at_k + recall)
65
+ : 0;
66
+ return { precision_at_k, recall, mrr, f1, hits_at_k: hitsAtK };
67
+ }
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Types for the QMD benchmark harness.
3
+ *
4
+ * A benchmark fixture defines queries with expected results.
5
+ * The harness runs each query through multiple search backends
6
+ * and measures precision, recall, MRR, and latency.
7
+ */
8
+ export interface BenchmarkQuery {
9
+ /** Unique identifier for the query */
10
+ id: string;
11
+ /** The search query text */
12
+ query: string;
13
+ /** Query difficulty/type for grouping results */
14
+ type: "exact" | "semantic" | "topical" | "cross-domain" | "alias";
15
+ /** Human-readable description of what this tests */
16
+ description: string;
17
+ /** File paths (relative to collection) that should appear in results */
18
+ expected_files: string[];
19
+ /** How many of expected_files should appear in top-k results */
20
+ expected_in_top_k: number;
21
+ }
22
+ export interface BenchmarkFixture {
23
+ /** Description of the benchmark */
24
+ description: string;
25
+ /** Fixture format version */
26
+ version: number;
27
+ /** Optional collection to search within */
28
+ collection?: string;
29
+ /** The test queries */
30
+ queries: BenchmarkQuery[];
31
+ }
32
+ export interface BackendResult {
33
+ /** Fraction of top-k results that are relevant */
34
+ precision_at_k: number;
35
+ /** Fraction of expected files found anywhere in results */
36
+ recall: number;
37
+ /** Reciprocal rank of first relevant result (1/rank, 0 if not found) */
38
+ mrr: number;
39
+ /** Harmonic mean of precision_at_k and recall */
40
+ f1: number;
41
+ /** Number of expected files found in top-k */
42
+ hits_at_k: number;
43
+ /** Total expected files */
44
+ total_expected: number;
45
+ /** Wall-clock latency in milliseconds */
46
+ latency_ms: number;
47
+ /** Top result file paths (for inspection) */
48
+ top_files: string[];
49
+ }
50
+ export interface QueryResult {
51
+ id: string;
52
+ query: string;
53
+ type: string;
54
+ backends: Record<string, BackendResult>;
55
+ }
56
+ export interface BenchmarkResult {
57
+ timestamp: string;
58
+ fixture: string;
59
+ results: QueryResult[];
60
+ summary: Record<string, {
61
+ avg_precision: number;
62
+ avg_recall: number;
63
+ avg_mrr: number;
64
+ avg_f1: number;
65
+ avg_latency_ms: number;
66
+ }>;
67
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Types for the QMD benchmark harness.
3
+ *
4
+ * A benchmark fixture defines queries with expected results.
5
+ * The harness runs each query through multiple search backends
6
+ * and measures precision, recall, MRR, and latency.
7
+ */
8
+ export {};
@@ -54,8 +54,11 @@ export function searchResultsToJson(results, opts = {}) {
54
54
  const query = opts.query || "";
55
55
  const output = results.map(row => {
56
56
  const bodyStr = row.body || "";
57
+ const snippetInfo = bodyStr
58
+ ? extractSnippet(bodyStr, query, 300, row.chunkPos, undefined, opts.intent)
59
+ : undefined;
57
60
  let body = opts.full ? bodyStr : undefined;
58
- let snippet = !opts.full ? extractSnippet(bodyStr, query, 300, row.chunkPos, undefined, opts.intent).snippet : undefined;
61
+ let snippet = !opts.full ? snippetInfo?.snippet : undefined;
59
62
  if (opts.lineNumbers) {
60
63
  if (body)
61
64
  body = addLineNumbers(body);
@@ -66,6 +69,7 @@ export function searchResultsToJson(results, opts = {}) {
66
69
  docid: `#${row.docid}`,
67
70
  score: Math.round(row.score * 100) / 100,
68
71
  file: row.displayPath,
72
+ ...(snippetInfo && { line: snippetInfo.line }),
69
73
  title: row.title,
70
74
  ...(row.context && { context: row.context }),
71
75
  ...(body && { body }),
package/dist/cli/qmd.d.ts CHANGED
@@ -1 +1,2 @@
1
- export {};
1
+ export declare function buildEditorUri(template: string, absolutePath: string, line: number, col: number): string;
2
+ export declare function termLink(text: string, url: string, isTTY?: boolean): string;