@tobilu/qmd 2.0.1 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,88 @@
1
+ /**
2
+ * Scoring functions for the QMD benchmark harness.
3
+ *
4
+ * Computes precision@k, recall, MRR, and F1 for search results
5
+ * against ground-truth expected files.
6
+ */
7
+ /**
8
+ * Normalize a file path for comparison.
9
+ * Strips qmd:// prefix, lowercases, removes leading/trailing slashes.
10
+ */
11
+ export function normalizePath(p) {
12
+ if (p.startsWith("qmd://")) {
13
+ // qmd://collection/docs/readme.md → docs/readme.md
14
+ const withoutScheme = p.slice("qmd://".length);
15
+ const slashIdx = withoutScheme.indexOf("/");
16
+ p = slashIdx >= 0 ? withoutScheme.slice(slashIdx + 1) : withoutScheme;
17
+ }
18
+ return p.toLowerCase().replace(/^\/+|\/+$/g, "");
19
+ }
20
+ /**
21
+ * Check if two paths refer to the same file.
22
+ * Handles different path formats by comparing normalized suffixes.
23
+ */
24
+ export function pathsMatch(result, expected) {
25
+ const nr = normalizePath(result);
26
+ const ne = normalizePath(expected);
27
+ if (nr === ne)
28
+ return true;
29
+ if (nr.endsWith(ne) || ne.endsWith(nr))
30
+ return true;
31
+ return false;
32
+ }
33
+ function hitsWithin(resultFiles, expectedFiles, k) {
34
+ const topKResults = resultFiles.slice(0, k);
35
+ let hits = 0;
36
+ for (const expected of expectedFiles) {
37
+ if (topKResults.some(r => pathsMatch(r, expected))) {
38
+ hits++;
39
+ }
40
+ }
41
+ return hits;
42
+ }
43
+ /**
44
+ * Score a set of search results against expected files.
45
+ */
46
+ export function scoreResults(resultFiles, expectedFiles, topK) {
47
+ // Count hits in top-k
48
+ const hitsAtK = hitsWithin(resultFiles, expectedFiles, topK);
49
+ const matchedFiles = [];
50
+ const unmatchedExpectedFiles = [];
51
+ for (const expected of expectedFiles) {
52
+ if (resultFiles.some(r => pathsMatch(r, expected))) {
53
+ matchedFiles.push(expected);
54
+ }
55
+ else {
56
+ unmatchedExpectedFiles.push(expected);
57
+ }
58
+ }
59
+ // MRR: reciprocal rank of first relevant result
60
+ let mrr = 0;
61
+ for (let i = 0; i < resultFiles.length; i++) {
62
+ if (expectedFiles.some(e => pathsMatch(resultFiles[i], e))) {
63
+ mrr = 1 / (i + 1);
64
+ break;
65
+ }
66
+ }
67
+ const denominator = Math.min(topK, expectedFiles.length);
68
+ const precision_at_k = denominator > 0 ? hitsAtK / denominator : 0;
69
+ const recall = expectedFiles.length > 0 ? matchedFiles.length / expectedFiles.length : 0;
70
+ const recall_at_1 = expectedFiles.length > 0 ? hitsWithin(resultFiles, expectedFiles, 1) / expectedFiles.length : 0;
71
+ const recall_at_3 = expectedFiles.length > 0 ? hitsWithin(resultFiles, expectedFiles, 3) / expectedFiles.length : 0;
72
+ const recall_at_5 = expectedFiles.length > 0 ? hitsWithin(resultFiles, expectedFiles, 5) / expectedFiles.length : 0;
73
+ const f1 = precision_at_k + recall > 0
74
+ ? 2 * (precision_at_k * recall) / (precision_at_k + recall)
75
+ : 0;
76
+ return {
77
+ precision_at_k,
78
+ recall,
79
+ recall_at_1,
80
+ recall_at_3,
81
+ recall_at_5,
82
+ mrr,
83
+ f1,
84
+ hits_at_k: hitsAtK,
85
+ matched_files: matchedFiles,
86
+ unmatched_expected_files: unmatchedExpectedFiles,
87
+ };
88
+ }
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Types for the QMD benchmark harness.
3
+ *
4
+ * A benchmark fixture defines queries with expected results.
5
+ * The harness runs each query through multiple search backends
6
+ * and measures precision, recall, MRR, and latency.
7
+ */
8
+ export interface BenchmarkQuery {
9
+ /** Unique identifier for the query */
10
+ id: string;
11
+ /** The search query text */
12
+ query: string;
13
+ /** Query difficulty/type for grouping results */
14
+ type: "exact" | "semantic" | "topical" | "cross-domain" | "alias";
15
+ /** Human-readable description of what this tests */
16
+ description: string;
17
+ /** File paths (relative to collection) that should appear in results */
18
+ expected_files: string[];
19
+ /** How many of expected_files should appear in top-k results */
20
+ expected_in_top_k: number;
21
+ }
22
+ export interface BenchmarkFixture {
23
+ /** Description of the benchmark */
24
+ description: string;
25
+ /** Fixture format version */
26
+ version: number;
27
+ /** Optional collection to search within */
28
+ collection?: string;
29
+ /** The test queries */
30
+ queries: BenchmarkQuery[];
31
+ }
32
+ export interface BackendResult {
33
+ /** Fraction of top-k results that are relevant */
34
+ precision_at_k: number;
35
+ /** Fraction of expected files found anywhere in results */
36
+ recall: number;
37
+ /** Fraction of expected files found in the first result */
38
+ recall_at_1: number;
39
+ /** Fraction of expected files found in the top 3 results */
40
+ recall_at_3: number;
41
+ /** Fraction of expected files found in the top 5 results */
42
+ recall_at_5: number;
43
+ /** Reciprocal rank of first relevant result (1/rank, 0 if not found) */
44
+ mrr: number;
45
+ /** Harmonic mean of precision_at_k and recall */
46
+ f1: number;
47
+ /** Number of expected files found in top-k */
48
+ hits_at_k: number;
49
+ /** Total expected files */
50
+ total_expected: number;
51
+ /** Wall-clock latency in milliseconds */
52
+ latency_ms: number;
53
+ /** Top result file paths (for inspection) */
54
+ top_files: string[];
55
+ /** Expected files that were found anywhere in the returned result set */
56
+ matched_files: string[];
57
+ /** Expected files missing from the returned result set */
58
+ unmatched_expected_files: string[];
59
+ }
60
+ export interface QueryResult {
61
+ id: string;
62
+ query: string;
63
+ type: string;
64
+ backends: Record<string, BackendResult>;
65
+ }
66
+ export interface BenchmarkResult {
67
+ timestamp: string;
68
+ fixture: string;
69
+ results: QueryResult[];
70
+ summary: Record<string, {
71
+ avg_precision: number;
72
+ avg_recall: number;
73
+ avg_recall_at_1: number;
74
+ avg_recall_at_3: number;
75
+ avg_recall_at_5: number;
76
+ avg_mrr: number;
77
+ avg_f1: number;
78
+ avg_latency_ms: number;
79
+ }>;
80
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Types for the QMD benchmark harness.
3
+ *
4
+ * A benchmark fixture defines queries with expected results.
5
+ * The harness runs each query through multiple search backends
6
+ * and measures precision, recall, MRR, and latency.
7
+ */
8
+ export {};
@@ -54,8 +54,11 @@ export function searchResultsToJson(results, opts = {}) {
54
54
  const query = opts.query || "";
55
55
  const output = results.map(row => {
56
56
  const bodyStr = row.body || "";
57
+ const snippetInfo = bodyStr
58
+ ? extractSnippet(bodyStr, query, 300, row.chunkPos, undefined, opts.intent)
59
+ : undefined;
57
60
  let body = opts.full ? bodyStr : undefined;
58
- let snippet = !opts.full ? extractSnippet(bodyStr, query, 300, row.chunkPos, undefined, opts.intent).snippet : undefined;
61
+ let snippet = !opts.full ? snippetInfo?.snippet : undefined;
59
62
  if (opts.lineNumbers) {
60
63
  if (body)
61
64
  body = addLineNumbers(body);
@@ -66,6 +69,7 @@ export function searchResultsToJson(results, opts = {}) {
66
69
  docid: `#${row.docid}`,
67
70
  score: Math.round(row.score * 100) / 100,
68
71
  file: row.displayPath,
72
+ ...(snippetInfo && { line: snippetInfo.line }),
69
73
  title: row.title,
70
74
  ...(row.context && { context: row.context }),
71
75
  ...(body && { body }),
package/dist/cli/qmd.d.ts CHANGED
@@ -1 +1,28 @@
1
+ import { type OutputFormat } from "./formatter.js";
2
+ type CliLifecycleWritable = {
3
+ write(chunk: string | Uint8Array, callback?: (error?: Error | null) => void): boolean;
4
+ };
5
+ type FinishSuccessfulCliCommandOptions = {
6
+ command: string;
7
+ format?: OutputFormat;
8
+ cleanup?: () => Promise<void>;
9
+ exit?: (code: number) => void;
10
+ immediateExit?: (code: number) => void;
11
+ stdout?: CliLifecycleWritable;
12
+ stderr?: CliLifecycleWritable;
13
+ platform?: NodeJS.Platform;
14
+ };
15
+ /**
16
+ * Finish a successful CLI command after output has been flushed. On macOS JSON
17
+ * query runs, skip normal native teardown and use Node/Bun's immediate exit path:
18
+ * ggml Metal can abort from C++ finalizers after valid JSON has already been
19
+ * produced (#368). This wrapper is only reached after the command completed, so
20
+ * real query failures still exit through the normal error path before this runs.
21
+ */
22
+ export declare function finishSuccessfulCliCommand(options: FinishSuccessfulCliCommandOptions): Promise<void>;
23
+ export declare function resolveEmbedModelForCli(): string;
24
+ export declare function resolveGenerateModelForCli(): string;
25
+ export declare function resolveRerankModelForCli(): string;
26
+ export declare function buildEditorUri(template: string, absolutePath: string, line: number, col: number): string;
27
+ export declare function termLink(text: string, url: string, isTTY?: boolean): string;
1
28
  export {};