@gmickel/gno 1.3.1 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -880,11 +880,13 @@ bun run lint && bun run typecheck
880
880
  Use retrieval benchmark commands to track quality and latency over time:
881
881
 
882
882
  ```bash
883
+ gno bench docs/examples/bench-fixture.json
883
884
  bun run eval:hybrid
884
885
  bun run eval:hybrid:baseline
885
886
  bun run eval:hybrid:delta
886
887
  ```
887
888
 
889
+ - Public fixture runner: `gno bench <fixture.json>` reports Precision@K, Recall@K, F1@K, MRR, nDCG@K, and latency across BM25/vector/hybrid modes.
888
890
  - Benchmark guide: [evals/README.md](./evals/README.md)
889
891
  - Latest baseline snapshot: [evals/fixtures/hybrid-baseline/latest.json](./evals/fixtures/hybrid-baseline/latest.json)
890
892
 
@@ -131,6 +131,18 @@ gno query "auth" --json | jq -r '.results[0].uri' | xargs gno get
131
131
  gno search "error handling" --json | jq -r '.results[].uri' | xargs gno multi-get
132
132
  ```
133
133
 
134
+ ## MCP Retrieval Strategy
135
+
136
+ When using GNO through MCP, prefer `gno_query` first for normal questions. It returns snippets plus `uri`, `docid`, and often `line`; follow with `gno_get` using `fromLine`/`lineCount` for a bounded read, or `gno_multi_get` to batch top result refs.
137
+
138
+ Use narrower tools when the request tells you to:
139
+
140
+ - `gno_search`: exact phrase, filename, identifier, stack trace, error text
141
+ - `gno_vsearch`: conceptual similarity when exact wording differs
142
+ - `gno_status`: stale results, missing embeddings, vector unavailable
143
+
144
+ For ambiguous terms, pass `intent` instead of bloating the query text. For typed retrieval, use `queryModes`: `term` for lexical anchors, `intent` for disambiguation, one `hyde` for a hypothetical answer/document.
145
+
134
146
  ## Document Links & Similarity
135
147
 
136
148
  ```bash
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@gmickel/gno",
3
- "version": "1.3.1",
3
+ "version": "1.4.1",
4
4
  "description": "Local semantic search for your documents. Index Markdown, PDF, and Office files with hybrid BM25 + vector search.",
5
5
  "keywords": [
6
6
  "embeddings",
@@ -69,6 +69,7 @@
69
69
  "eval:hybrid": "bun --bun evalite evals/hybrid.eval.ts",
70
70
  "eval:hybrid:baseline": "bun scripts/hybrid-benchmark.ts --write",
71
71
  "eval:hybrid:delta": "bun scripts/hybrid-benchmark.ts --delta",
72
+ "bench:ast-chunking": "bun scripts/ast-chunking-benchmark.ts",
72
73
  "bench:code-embeddings": "bun scripts/code-embedding-benchmark.ts",
73
74
  "bench:code-embeddings:write": "bun scripts/code-embedding-benchmark.ts --write",
74
75
  "bench:general-embeddings": "bun scripts/general-embedding-benchmark.ts",
@@ -121,7 +122,7 @@
121
122
  "website:og": "bun scripts/og-screenshots.ts",
122
123
  "website:sync-assets": "bun scripts/sync-assets.ts",
123
124
  "sync:agents": "scripts/sync-agents.sh",
124
- "build:css": "bunx @tailwindcss/cli -i src/serve/public/globals.css -o src/serve/public/globals.built.css --minify",
125
+ "build:css": "tailwindcss -i src/serve/public/globals.css -o src/serve/public/globals.built.css --minify",
125
126
  "serve": "bun src/index.ts serve",
126
127
  "serve:dev": "NODE_ENV=development bun --hot src/index.ts serve",
127
128
  "version:patch": "npm version patch --no-git-tag-version",
@@ -133,73 +134,75 @@
133
134
  "prepare": "lefthook install"
134
135
  },
135
136
  "dependencies": {
136
- "@codemirror/lang-markdown": "^6.5.0",
137
- "@codemirror/theme-one-dark": "^6.1.3",
138
- "@modelcontextprotocol/sdk": "^1.27.1",
139
- "@radix-ui/react-collapsible": "^1.1.12",
140
- "@radix-ui/react-dialog": "^1.1.15",
141
- "@radix-ui/react-dropdown-menu": "^2.1.16",
142
- "@radix-ui/react-hover-card": "^1.1.15",
143
- "@radix-ui/react-progress": "^1.1.8",
144
- "@radix-ui/react-scroll-area": "^1.2.10",
145
- "@radix-ui/react-select": "^2.2.6",
146
- "@radix-ui/react-separator": "^1.1.8",
147
- "@radix-ui/react-slot": "^1.2.4",
148
- "@radix-ui/react-tooltip": "^1.2.8",
149
- "ai": "^6.0.5",
150
- "bun-plugin-tailwind": "^0.1.2",
151
- "class-variance-authority": "^0.7.1",
152
- "clsx": "^2.1.1",
153
- "cmdk": "^1.1.1",
154
- "codemirror": "^6.0.2",
155
- "commander": "^14.0.2",
156
- "embla-carousel-react": "^8.6.0",
157
- "franc": "^6.2.0",
158
- "lucide-react": "^1.8.0",
159
- "markitdown-ts": "^0.0.9",
160
- "minimatch": "^10.1.1",
161
- "nanoid": "^5.1.6",
162
- "node-llama-cpp": "^3.17.1",
163
- "officeparser": "^6.0.4",
164
- "picocolors": "^1.1.1",
165
- "react": "^19.2.3",
166
- "react-dom": "^19.2.3",
167
- "react-force-graph-2d": "^1.29.0",
168
- "react-markdown": "^10.1.0",
169
- "rehype-sanitize": "^6.0.0",
170
- "remark-gfm": "^4.0.1",
171
- "shiki": "^4.0.2",
172
- "sqlite-vec": "^0.1.7-alpha.2",
173
- "streamdown": "^2.0.1",
174
- "tailwind-merge": "^3.4.0",
175
- "tailwindcss": "^4.1.18",
176
- "use-stick-to-bottom": "^1.1.1",
177
- "zod": "^4.3.4"
137
+ "@codemirror/lang-markdown": "6.5.0",
138
+ "@codemirror/theme-one-dark": "6.1.3",
139
+ "@modelcontextprotocol/sdk": "1.27.1",
140
+ "@radix-ui/react-collapsible": "1.1.12",
141
+ "@radix-ui/react-dialog": "1.1.15",
142
+ "@radix-ui/react-dropdown-menu": "2.1.16",
143
+ "@radix-ui/react-hover-card": "1.1.15",
144
+ "@radix-ui/react-progress": "1.1.8",
145
+ "@radix-ui/react-scroll-area": "1.2.10",
146
+ "@radix-ui/react-select": "2.2.6",
147
+ "@radix-ui/react-separator": "1.1.8",
148
+ "@radix-ui/react-slot": "1.2.4",
149
+ "@radix-ui/react-tooltip": "1.2.8",
150
+ "ai": "6.0.68",
151
+ "bun-plugin-tailwind": "0.1.2",
152
+ "class-variance-authority": "0.7.1",
153
+ "clsx": "2.1.1",
154
+ "cmdk": "1.1.1",
155
+ "codemirror": "6.0.2",
156
+ "commander": "14.0.3",
157
+ "embla-carousel-react": "8.6.0",
158
+ "franc": "6.2.0",
159
+ "lucide-react": "1.8.0",
160
+ "markitdown-ts": "0.0.9",
161
+ "minimatch": "10.1.1",
162
+ "nanoid": "5.1.6",
163
+ "node-llama-cpp": "3.18.1",
164
+ "officeparser": "6.0.4",
165
+ "picocolors": "1.1.1",
166
+ "react": "19.2.4",
167
+ "react-dom": "19.2.4",
168
+ "react-force-graph-2d": "1.29.0",
169
+ "react-markdown": "10.1.0",
170
+ "rehype-sanitize": "6.0.0",
171
+ "remark-gfm": "4.0.1",
172
+ "shiki": "4.0.2",
173
+ "sqlite-vec": "0.1.9",
174
+ "streamdown": "2.1.0",
175
+ "tailwind-merge": "3.4.0",
176
+ "tailwindcss": "4.1.18",
177
+ "use-stick-to-bottom": "1.1.2",
178
+ "zod": "4.3.6"
178
179
  },
179
180
  "devDependencies": {
180
- "@ai-sdk/openai": "^3.0.2",
181
+ "@ai-sdk/openai": "3.0.25",
181
182
  "@biomejs/biome": "2.3.14",
182
- "@tailwindcss/cli": "^4.1.18",
183
- "@testing-library/react": "^16.3.2",
184
- "@testing-library/user-event": "^14.6.1",
185
- "@types/bun": "latest",
186
- "@types/react": "^19.2.13",
187
- "@types/react-dom": "^19.2.3",
188
- "ajv": "^8.17.1",
189
- "ajv-formats": "^3.0.1",
190
- "docx": "^9.5.1",
191
- "evalite": "^1.0.0-beta.15",
192
- "exceljs": "^4.4.0",
193
- "happy-dom": "^20.8.9",
194
- "lefthook": "^2.1.0",
195
- "oxfmt": "^0.28.0",
196
- "oxlint": "^1.42.0",
197
- "oxlint-tsgolint": "^0.11.5",
198
- "pdf-lib": "^1.17.1",
199
- "playwright": "^1.58.2",
200
- "pptxgenjs": "^4.0.1",
183
+ "@tailwindcss/cli": "4.1.18",
184
+ "@testing-library/react": "16.3.2",
185
+ "@testing-library/user-event": "14.6.1",
186
+ "@types/bun": "1.3.8",
187
+ "@types/react": "19.2.14",
188
+ "@types/react-dom": "19.2.3",
189
+ "@vscode/tree-sitter-wasm": "0.3.1",
190
+ "ajv": "8.17.1",
191
+ "ajv-formats": "3.0.1",
192
+ "docx": "9.5.1",
193
+ "evalite": "1.0.0-beta.16",
194
+ "exceljs": "4.4.0",
195
+ "happy-dom": "20.8.9",
196
+ "lefthook": "2.1.4",
197
+ "oxfmt": "0.28.0",
198
+ "oxlint": "1.43.0",
199
+ "oxlint-tsgolint": "0.11.5",
200
+ "pdf-lib": "1.17.1",
201
+ "playwright": "1.58.2",
202
+ "pptxgenjs": "4.0.1",
201
203
  "ultracite": "7.1.5",
202
- "vitest": "^4.0.16"
204
+ "vitest": "4.0.18",
205
+ "web-tree-sitter": "0.26.8"
203
206
  },
204
207
  "peerDependencies": {
205
208
  "typescript": "^5"
@@ -0,0 +1,247 @@
1
+ import { z } from "zod";
2
+
3
+ import type { BenchFixture, BenchMode, BenchOptions } from "./types";
4
+
5
+ const MODE_ALIASES = [
6
+ "bm25",
7
+ "vector",
8
+ "hybrid",
9
+ "fast",
10
+ "no-rerank",
11
+ "thorough",
12
+ ] as const;
13
+
14
+ type BenchModeAlias = (typeof MODE_ALIASES)[number];
15
+
16
+ const queryModeInputSchema = z.object({
17
+ mode: z.enum(["term", "intent", "hyde"]),
18
+ text: z.string().trim().min(1),
19
+ });
20
+
21
+ const modeObjectSchema = z.object({
22
+ name: z.string().trim().min(1).optional(),
23
+ type: z.enum(["bm25", "vector", "hybrid"]).optional(),
24
+ mode: z.enum(MODE_ALIASES).optional(),
25
+ noExpand: z.boolean().optional(),
26
+ noRerank: z.boolean().optional(),
27
+ candidateLimit: z.number().int().positive().optional(),
28
+ limit: z.number().int().positive().optional(),
29
+ queryModes: z.array(queryModeInputSchema).optional(),
30
+ });
31
+
32
+ const fixtureSchema = z.object({
33
+ version: z.literal(1),
34
+ metadata: z
35
+ .object({
36
+ name: z.string().optional(),
37
+ description: z.string().optional(),
38
+ tags: z.array(z.string()).optional(),
39
+ })
40
+ .optional(),
41
+ collection: z.string().trim().min(1).optional(),
42
+ topK: z.number().int().positive().optional(),
43
+ candidateLimit: z.number().int().positive().optional(),
44
+ modes: z.array(z.union([z.enum(MODE_ALIASES), modeObjectSchema])).optional(),
45
+ queries: z
46
+ .array(
47
+ z.object({
48
+ id: z.string().trim().min(1),
49
+ query: z.string().trim().min(1),
50
+ expected: z.array(z.string().trim().min(1)).optional(),
51
+ expectedDocuments: z.array(z.string().trim().min(1)).optional(),
52
+ expectedUris: z.array(z.string().trim().min(1)).optional(),
53
+ judgments: z
54
+ .array(
55
+ z.object({
56
+ docid: z.string().trim().min(1).optional(),
57
+ doc: z.string().trim().min(1).optional(),
58
+ uri: z.string().trim().min(1).optional(),
59
+ relevance: z.number().min(0),
60
+ })
61
+ )
62
+ .optional(),
63
+ collection: z.string().trim().min(1).optional(),
64
+ topK: z.number().int().positive().optional(),
65
+ queryModes: z.array(queryModeInputSchema).optional(),
66
+ })
67
+ )
68
+ .min(1),
69
+ });
70
+
71
+ type FixtureModeInput = NonNullable<
72
+ z.infer<typeof fixtureSchema>["modes"]
73
+ >[number];
74
+
75
+ export function normalizeBenchRef(value: string): string {
76
+ const trimmed = value.trim();
77
+ const queryIndex = trimmed.indexOf("?");
78
+ return queryIndex === -1 ? trimmed : trimmed.slice(0, queryIndex);
79
+ }
80
+
81
+ function normalizeMode(alias: BenchModeAlias): BenchMode {
82
+ switch (alias) {
83
+ case "bm25":
84
+ return { name: "bm25", type: "bm25" };
85
+ case "vector":
86
+ return { name: "vector", type: "vector" };
87
+ case "fast":
88
+ return {
89
+ name: "fast",
90
+ type: "hybrid",
91
+ noExpand: true,
92
+ noRerank: true,
93
+ };
94
+ case "no-rerank":
95
+ return { name: "no-rerank", type: "hybrid", noRerank: true };
96
+ case "thorough":
97
+ return { name: "thorough", type: "hybrid", depth: "thorough" };
98
+ case "hybrid":
99
+ return { name: "hybrid", type: "hybrid" };
100
+ }
101
+ }
102
+
103
+ function normalizeModeInput(input: FixtureModeInput): BenchMode {
104
+ if (typeof input === "string") {
105
+ return normalizeMode(input as BenchModeAlias);
106
+ }
107
+
108
+ const base = input.mode ? normalizeMode(input.mode) : undefined;
109
+ const type = input.type ?? base?.type ?? "hybrid";
110
+ const name = input.name ?? input.mode ?? type;
111
+ return {
112
+ ...base,
113
+ name,
114
+ type,
115
+ depth: base?.depth,
116
+ noExpand: input.noExpand ?? base?.noExpand,
117
+ noRerank: input.noRerank ?? base?.noRerank,
118
+ candidateLimit: input.candidateLimit,
119
+ limit: input.limit,
120
+ queryModes: input.queryModes,
121
+ };
122
+ }
123
+
124
+ function parseModeFlag(
125
+ mode: string
126
+ ): { ok: true; value: BenchMode } | { ok: false; error: string } {
127
+ const normalized = mode.trim() as BenchModeAlias;
128
+ if (!MODE_ALIASES.includes(normalized)) {
129
+ return {
130
+ ok: false,
131
+ error: `Unsupported bench mode: ${mode}. Supported: ${MODE_ALIASES.join(", ")}`,
132
+ };
133
+ }
134
+ return { ok: true, value: normalizeMode(normalized) };
135
+ }
136
+
137
+ function normalizeModes(
138
+ fixtureModes: z.infer<typeof fixtureSchema>["modes"],
139
+ optionModes?: string[]
140
+ ): BenchMode[] {
141
+ if (optionModes?.length) {
142
+ return optionModes.map((mode) => {
143
+ const parsed = parseModeFlag(mode);
144
+ if (!parsed.ok) {
145
+ throw new Error(parsed.error);
146
+ }
147
+ return parsed.value;
148
+ });
149
+ }
150
+
151
+ return (fixtureModes ?? ["bm25"]).map(normalizeModeInput);
152
+ }
153
+
154
+ function normalizeFixture(
155
+ parsed: z.infer<typeof fixtureSchema>,
156
+ options: BenchOptions
157
+ ): BenchFixture {
158
+ const modes = normalizeModes(parsed.modes, options.modes);
159
+ const topK = options.topK ?? parsed.topK ?? 10;
160
+ const candidateLimit = options.candidateLimit ?? parsed.candidateLimit;
161
+
162
+ return {
163
+ version: parsed.version,
164
+ metadata: parsed.metadata,
165
+ collection: options.collection ?? parsed.collection,
166
+ topK,
167
+ candidateLimit,
168
+ modes,
169
+ queries: parsed.queries.map((entry) => {
170
+ const explicitExpected = [
171
+ ...(entry.expected ?? []),
172
+ ...(entry.expectedDocuments ?? []),
173
+ ...(entry.expectedUris ?? []),
174
+ ].map(normalizeBenchRef);
175
+ const judgments =
176
+ entry.judgments?.flatMap((judgment) => {
177
+ const docid = judgment.docid ?? judgment.doc ?? judgment.uri;
178
+ return docid
179
+ ? [
180
+ {
181
+ docid: normalizeBenchRef(docid),
182
+ relevance: judgment.relevance,
183
+ },
184
+ ]
185
+ : [];
186
+ }) ?? [];
187
+ const expected =
188
+ explicitExpected.length > 0
189
+ ? explicitExpected
190
+ : judgments.map((judgment) => judgment.docid);
191
+
192
+ return {
193
+ id: entry.id,
194
+ query: entry.query,
195
+ expected,
196
+ judgments,
197
+ collection: options.collection ?? entry.collection ?? parsed.collection,
198
+ topK: entry.topK,
199
+ queryModes: entry.queryModes,
200
+ };
201
+ }),
202
+ };
203
+ }
204
+
205
+ export async function loadBenchFixture(
206
+ fixturePath: string,
207
+ options: BenchOptions
208
+ ): Promise<{ ok: true; fixture: BenchFixture } | { ok: false; error: string }> {
209
+ const file = Bun.file(fixturePath);
210
+ if (!(await file.exists())) {
211
+ return { ok: false, error: `Fixture not found: ${fixturePath}` };
212
+ }
213
+
214
+ let raw: unknown;
215
+ try {
216
+ raw = JSON.parse(await file.text());
217
+ } catch (error) {
218
+ return {
219
+ ok: false,
220
+ error: `Invalid JSON fixture: ${error instanceof Error ? error.message : String(error)}`,
221
+ };
222
+ }
223
+
224
+ const parsed = fixtureSchema.safeParse(raw);
225
+ if (!parsed.success) {
226
+ return { ok: false, error: z.prettifyError(parsed.error) };
227
+ }
228
+
229
+ try {
230
+ const fixture = normalizeFixture(parsed.data, options);
231
+ const missingExpected = fixture.queries.find(
232
+ (entry) => entry.expected.length === 0
233
+ );
234
+ if (missingExpected) {
235
+ return {
236
+ ok: false,
237
+ error: `Bench query "${missingExpected.id}" must define expected documents, expected URIs, or judgments`,
238
+ };
239
+ }
240
+ return { ok: true, fixture };
241
+ } catch (error) {
242
+ return {
243
+ ok: false,
244
+ error: error instanceof Error ? error.message : String(error),
245
+ };
246
+ }
247
+ }
@@ -0,0 +1,137 @@
1
+ /**
2
+ * Retrieval benchmark metric helpers.
3
+ *
4
+ * @module src/bench/metrics
5
+ */
6
+
7
+ export interface RelevanceJudgment {
8
+ docid: string;
9
+ relevance: number;
10
+ }
11
+
12
+ export interface RetrievalMetrics {
13
+ precisionAtK: number;
14
+ recallAtK: number;
15
+ f1AtK: number;
16
+ mrr: number;
17
+ ndcgAtK: number;
18
+ }
19
+
20
+ function round(value: number, places = 4): number {
21
+ return Number(value.toFixed(places));
22
+ }
23
+
24
+ /**
25
+ * Compute Precision@K: fraction of retrieved top-K docs that are relevant.
26
+ */
27
+ export function computePrecision(
28
+ output: string[],
29
+ expected: string[],
30
+ k: number
31
+ ): number {
32
+ if (k <= 0) {
33
+ return 0;
34
+ }
35
+ const expectedSet = new Set(expected);
36
+ const hits = output.slice(0, k).filter((docid) => expectedSet.has(docid));
37
+ return hits.length / k;
38
+ }
39
+
40
+ /**
41
+ * Compute Recall@K: fraction of relevant docs in top K results.
42
+ */
43
+ export function computeRecall(
44
+ output: string[],
45
+ expected: string[],
46
+ k: number
47
+ ): number {
48
+ if (expected.length === 0) return 1;
49
+ const topK = output.slice(0, k);
50
+ const hits = expected.filter((docid) => topK.includes(docid)).length;
51
+ return hits / expected.length;
52
+ }
53
+
54
+ /**
55
+ * Compute F1@K from precision and recall.
56
+ */
57
+ export function computeF1(precision: number, recall: number): number {
58
+ if (precision === 0 && recall === 0) {
59
+ return 0;
60
+ }
61
+ return (2 * precision * recall) / (precision + recall);
62
+ }
63
+
64
+ /**
65
+ * Compute nDCG@K: normalized discounted cumulative gain.
66
+ */
67
+ export function computeNdcg(
68
+ output: string[],
69
+ judgments: RelevanceJudgment[],
70
+ k: number
71
+ ): number {
72
+ if (judgments.length === 0) return 1;
73
+ const relMap = new Map(judgments.map((j) => [j.docid, j.relevance]));
74
+ const dcg = output.slice(0, k).reduce((sum, docid, i) => {
75
+ const rel = relMap.get(docid) ?? 0;
76
+ return sum + (2 ** rel - 1) / Math.log2(i + 2);
77
+ }, 0);
78
+ const idcg = [...judgments]
79
+ .sort((a, b) => b.relevance - a.relevance)
80
+ .slice(0, k)
81
+ .reduce((sum, j, i) => sum + (2 ** j.relevance - 1) / Math.log2(i + 2), 0);
82
+ return idcg > 0 ? dcg / idcg : 1;
83
+ }
84
+
85
+ /**
86
+ * Compute Mean Reciprocal Rank (single-query form).
87
+ * Returns reciprocal rank of first relevant hit in output.
88
+ */
89
+ export function computeMrr(output: string[], expected: string[]): number {
90
+ if (expected.length === 0) {
91
+ return 1;
92
+ }
93
+ const expectedSet = new Set(expected);
94
+ for (const [index, docid] of output.entries()) {
95
+ if (expectedSet.has(docid)) {
96
+ return 1 / (index + 1);
97
+ }
98
+ }
99
+ return 0;
100
+ }
101
+
102
+ export function computeRetrievalMetrics(input: {
103
+ output: string[];
104
+ expected: string[];
105
+ judgments: RelevanceJudgment[];
106
+ k: number;
107
+ }): RetrievalMetrics {
108
+ const precision = computePrecision(input.output, input.expected, input.k);
109
+ const recall = computeRecall(input.output, input.expected, input.k);
110
+ const judgmentSource =
111
+ input.judgments.length > 0
112
+ ? input.judgments
113
+ : input.expected.map((docid) => ({ docid, relevance: 1 }));
114
+
115
+ return {
116
+ precisionAtK: round(precision),
117
+ recallAtK: round(recall),
118
+ f1AtK: round(computeF1(precision, recall)),
119
+ mrr: round(computeMrr(input.output, input.expected)),
120
+ ndcgAtK: round(computeNdcg(input.output, judgmentSource, input.k)),
121
+ };
122
+ }
123
+
124
+ export function averageMetrics(metrics: RetrievalMetrics[]): RetrievalMetrics {
125
+ const average = (values: number[]): number =>
126
+ values.length === 0
127
+ ? 0
128
+ : values.reduce((sum, value) => sum + value, 0) / values.length;
129
+
130
+ return {
131
+ precisionAtK: round(average(metrics.map((m) => m.precisionAtK))),
132
+ recallAtK: round(average(metrics.map((m) => m.recallAtK))),
133
+ f1AtK: round(average(metrics.map((m) => m.f1AtK))),
134
+ mrr: round(average(metrics.map((m) => m.mrr))),
135
+ ndcgAtK: round(average(metrics.map((m) => m.ndcgAtK))),
136
+ };
137
+ }
@@ -0,0 +1,96 @@
1
+ import type { QueryModeInput } from "../pipeline/types";
2
+ import type { RelevanceJudgment, RetrievalMetrics } from "./metrics";
3
+
4
+ export type BenchModeType = "bm25" | "vector" | "hybrid";
5
+
6
+ export interface BenchMode {
7
+ name: string;
8
+ type: BenchModeType;
9
+ depth?: "thorough";
10
+ noExpand?: boolean;
11
+ noRerank?: boolean;
12
+ candidateLimit?: number;
13
+ limit?: number;
14
+ queryModes?: QueryModeInput[];
15
+ }
16
+
17
+ export interface BenchCase {
18
+ id: string;
19
+ query: string;
20
+ expected: string[];
21
+ judgments: RelevanceJudgment[];
22
+ collection?: string;
23
+ topK?: number;
24
+ queryModes?: QueryModeInput[];
25
+ }
26
+
27
+ export interface BenchFixture {
28
+ version: 1;
29
+ metadata?: {
30
+ name?: string;
31
+ description?: string;
32
+ tags?: string[];
33
+ };
34
+ collection?: string;
35
+ topK: number;
36
+ candidateLimit?: number;
37
+ modes: BenchMode[];
38
+ queries: BenchCase[];
39
+ }
40
+
41
+ export interface BenchOptions {
42
+ configPath?: string;
43
+ indexName?: string;
44
+ collection?: string;
45
+ topK?: number;
46
+ candidateLimit?: number;
47
+ modes?: string[];
48
+ json?: boolean;
49
+ }
50
+
51
+ export interface BenchCaseResult {
52
+ id: string;
53
+ query: string;
54
+ topK: number;
55
+ expected: string[];
56
+ hits: string[];
57
+ topDocs: string[];
58
+ metrics: RetrievalMetrics;
59
+ latencyMs: number;
60
+ error?: string;
61
+ }
62
+
63
+ export interface BenchModeResult {
64
+ name: string;
65
+ type: BenchModeType;
66
+ status: "ok" | "failed";
67
+ queryCount: number;
68
+ failures: number;
69
+ metrics: RetrievalMetrics;
70
+ latency: {
71
+ p50Ms: number;
72
+ p95Ms: number;
73
+ meanMs: number;
74
+ };
75
+ cases: BenchCaseResult[];
76
+ }
77
+
78
+ export interface BenchOutput {
79
+ fixture: {
80
+ path: string;
81
+ name?: string;
82
+ version: 1;
83
+ queryCount: number;
84
+ topK: number;
85
+ };
86
+ generatedAt: string;
87
+ modes: BenchModeResult[];
88
+ meta: {
89
+ indexName: string;
90
+ collection?: string;
91
+ };
92
+ }
93
+
94
+ export type BenchResult =
95
+ | { success: true; data: BenchOutput }
96
+ | { success: false; error: string; isValidation?: boolean };