@tobilu/qmd 2.0.1 → 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +177 -0
- package/README.md +64 -1
- package/bin/qmd +49 -4
- package/dist/ast.d.ts +65 -0
- package/dist/ast.js +334 -0
- package/dist/bench/bench.d.ts +23 -0
- package/dist/bench/bench.js +280 -0
- package/dist/bench/score.d.ts +33 -0
- package/dist/bench/score.js +88 -0
- package/dist/bench/types.d.ts +80 -0
- package/dist/bench/types.js +8 -0
- package/dist/cli/formatter.js +5 -1
- package/dist/cli/qmd.d.ts +27 -0
- package/dist/cli/qmd.js +1328 -115
- package/dist/collections.d.ts +20 -0
- package/dist/collections.js +32 -7
- package/dist/db.d.ts +14 -3
- package/dist/db.js +45 -4
- package/dist/index.d.ts +11 -1
- package/dist/index.js +18 -5
- package/dist/llm.d.ts +77 -6
- package/dist/llm.js +445 -62
- package/dist/mcp/server.d.ts +6 -3
- package/dist/mcp/server.js +68 -29
- package/dist/paths.d.ts +1 -0
- package/dist/paths.js +4 -0
- package/dist/store.d.ts +148 -23
- package/dist/store.js +1018 -255
- package/package.json +48 -20
- package/scripts/build.mjs +29 -0
- package/scripts/check-package-grammars.mjs +29 -0
- package/scripts/package-smoke.mjs +65 -0
- package/scripts/test-all.mjs +27 -0
- package/skills/qmd/SKILL.md +203 -0
- package/skills/qmd/references/mcp-setup.md +102 -0
- package/skills/release/SKILL.md +139 -0
- package/skills/release/scripts/install-hooks.sh +38 -0
- package/dist/embedded-skills.d.ts +0 -6
- package/dist/embedded-skills.js +0 -14
package/dist/ast.js
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AST-aware chunking support via web-tree-sitter.
|
|
3
|
+
*
|
|
4
|
+
* Provides language detection, AST break point extraction for supported
|
|
5
|
+
* code file types, and a stub for future symbol extraction.
|
|
6
|
+
*
|
|
7
|
+
* All functions degrade gracefully: parse failures or unsupported languages
|
|
8
|
+
* return empty arrays, falling back to regex-only chunking.
|
|
9
|
+
*
|
|
10
|
+
* ## Dependency Note
|
|
11
|
+
*
|
|
12
|
+
* Grammar packages (tree-sitter-typescript, etc.) are listed as
|
|
13
|
+
* optionalDependencies with pinned versions. They ship native prebuilds
|
|
14
|
+
* and source files (~72 MB total) but QMD only uses the .wasm files
|
|
15
|
+
* (~5 MB). If install size becomes a concern, the .wasm files can be
|
|
16
|
+
* bundled directly in the repo (e.g. assets/grammars/) and resolved
|
|
17
|
+
* via import.meta.url instead of require.resolve(), eliminating the
|
|
18
|
+
* grammar packages entirely.
|
|
19
|
+
*/
|
|
20
|
+
import { createRequire } from "node:module";
|
|
21
|
+
import { extname } from "node:path";
|
|
22
|
+
const EXTENSION_MAP = {
|
|
23
|
+
".ts": "typescript",
|
|
24
|
+
".tsx": "tsx",
|
|
25
|
+
".js": "javascript",
|
|
26
|
+
".jsx": "tsx",
|
|
27
|
+
".mts": "typescript",
|
|
28
|
+
".cts": "typescript",
|
|
29
|
+
".mjs": "javascript",
|
|
30
|
+
".cjs": "javascript",
|
|
31
|
+
".py": "python",
|
|
32
|
+
".go": "go",
|
|
33
|
+
".rs": "rust",
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* Detect language from file path extension.
|
|
37
|
+
* Returns null for unsupported or unknown extensions (including .md).
|
|
38
|
+
*/
|
|
39
|
+
export function detectLanguage(filepath) {
|
|
40
|
+
const ext = extname(filepath).toLowerCase();
|
|
41
|
+
return EXTENSION_MAP[ext] ?? null;
|
|
42
|
+
}
|
|
43
|
+
// =============================================================================
|
|
44
|
+
// Grammar Resolution
|
|
45
|
+
// =============================================================================
|
|
46
|
+
/**
|
|
47
|
+
* Maps language to the npm package and wasm filename for the grammar.
|
|
48
|
+
*/
|
|
49
|
+
const GRAMMAR_MAP = {
|
|
50
|
+
typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
|
|
51
|
+
tsx: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm", version: "0.23.2" },
|
|
52
|
+
javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
|
|
53
|
+
python: { pkg: "tree-sitter-python", wasm: "tree-sitter-python.wasm", version: "0.23.4" },
|
|
54
|
+
go: { pkg: "tree-sitter-go", wasm: "tree-sitter-go.wasm", version: "0.23.4" },
|
|
55
|
+
rust: { pkg: "tree-sitter-rust", wasm: "tree-sitter-rust.wasm", version: "0.24.0" },
|
|
56
|
+
};
|
|
57
|
+
export function formatGrammarLoadError(language, err) {
|
|
58
|
+
const grammar = GRAMMAR_MAP[language];
|
|
59
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
60
|
+
return `${grammar.pkg}/${grammar.wasm} failed to load (${detail}); falling back to regex chunking. ` +
|
|
61
|
+
`Repair a broken global install with: bun add ${grammar.pkg}@${grammar.version}`;
|
|
62
|
+
}
|
|
63
|
+
// =============================================================================
|
|
64
|
+
// Per-Language Query Definitions
|
|
65
|
+
// =============================================================================
|
|
66
|
+
/**
|
|
67
|
+
* Tree-sitter S-expression queries for each language.
|
|
68
|
+
* Each capture name maps to a break point score via SCORE_MAP.
|
|
69
|
+
*
|
|
70
|
+
* For TypeScript/JavaScript, we match export_statement wrappers to get the
|
|
71
|
+
* correct start position (before `export`), plus bare declarations for
|
|
72
|
+
* non-exported code.
|
|
73
|
+
*/
|
|
74
|
+
const LANGUAGE_QUERIES = {
|
|
75
|
+
typescript: `
|
|
76
|
+
(export_statement) @export
|
|
77
|
+
(class_declaration) @class
|
|
78
|
+
(function_declaration) @func
|
|
79
|
+
(method_definition) @method
|
|
80
|
+
(interface_declaration) @iface
|
|
81
|
+
(type_alias_declaration) @type
|
|
82
|
+
(enum_declaration) @enum
|
|
83
|
+
(import_statement) @import
|
|
84
|
+
(lexical_declaration (variable_declarator value: (arrow_function))) @func
|
|
85
|
+
(lexical_declaration (variable_declarator value: (function_expression))) @func
|
|
86
|
+
`,
|
|
87
|
+
tsx: `
|
|
88
|
+
(export_statement) @export
|
|
89
|
+
(class_declaration) @class
|
|
90
|
+
(function_declaration) @func
|
|
91
|
+
(method_definition) @method
|
|
92
|
+
(interface_declaration) @iface
|
|
93
|
+
(type_alias_declaration) @type
|
|
94
|
+
(enum_declaration) @enum
|
|
95
|
+
(import_statement) @import
|
|
96
|
+
(lexical_declaration (variable_declarator value: (arrow_function))) @func
|
|
97
|
+
(lexical_declaration (variable_declarator value: (function_expression))) @func
|
|
98
|
+
`,
|
|
99
|
+
javascript: `
|
|
100
|
+
(export_statement) @export
|
|
101
|
+
(class_declaration) @class
|
|
102
|
+
(function_declaration) @func
|
|
103
|
+
(method_definition) @method
|
|
104
|
+
(import_statement) @import
|
|
105
|
+
(lexical_declaration (variable_declarator value: (arrow_function))) @func
|
|
106
|
+
(lexical_declaration (variable_declarator value: (function_expression))) @func
|
|
107
|
+
`,
|
|
108
|
+
python: `
|
|
109
|
+
(class_definition) @class
|
|
110
|
+
(function_definition) @func
|
|
111
|
+
(decorated_definition) @decorated
|
|
112
|
+
(import_statement) @import
|
|
113
|
+
(import_from_statement) @import
|
|
114
|
+
`,
|
|
115
|
+
go: `
|
|
116
|
+
(type_declaration) @type
|
|
117
|
+
(function_declaration) @func
|
|
118
|
+
(method_declaration) @method
|
|
119
|
+
(import_declaration) @import
|
|
120
|
+
`,
|
|
121
|
+
rust: `
|
|
122
|
+
(struct_item) @struct
|
|
123
|
+
(impl_item) @impl
|
|
124
|
+
(function_item) @func
|
|
125
|
+
(trait_item) @trait
|
|
126
|
+
(enum_item) @enum
|
|
127
|
+
(use_declaration) @import
|
|
128
|
+
(type_item) @type
|
|
129
|
+
(mod_item) @mod
|
|
130
|
+
`,
|
|
131
|
+
};
|
|
132
|
+
/**
|
|
133
|
+
* Score mapping from capture names to break point scores.
|
|
134
|
+
* Aligned with the markdown BREAK_PATTERNS scale (h1=100, h2=90, etc.)
|
|
135
|
+
* so findBestCutoff() decay works unchanged.
|
|
136
|
+
*/
|
|
137
|
+
const SCORE_MAP = {
|
|
138
|
+
class: 100,
|
|
139
|
+
iface: 100,
|
|
140
|
+
struct: 100,
|
|
141
|
+
trait: 100,
|
|
142
|
+
impl: 100,
|
|
143
|
+
mod: 100,
|
|
144
|
+
export: 90,
|
|
145
|
+
func: 90,
|
|
146
|
+
method: 90,
|
|
147
|
+
decorated: 90,
|
|
148
|
+
type: 80,
|
|
149
|
+
enum: 80,
|
|
150
|
+
import: 60,
|
|
151
|
+
};
|
|
152
|
+
// =============================================================================
|
|
153
|
+
// Parser Caching & Initialization
|
|
154
|
+
// =============================================================================
|
|
155
|
+
let ParserClass = null;
|
|
156
|
+
let LanguageClass = null;
|
|
157
|
+
let QueryClass = null;
|
|
158
|
+
let initPromise = null;
|
|
159
|
+
/** Languages that have already failed to load — warn only once per process. */
|
|
160
|
+
const failedLanguages = new Set();
|
|
161
|
+
/** Last grammar load error by language, for status output. */
|
|
162
|
+
const grammarLoadErrors = new Map();
|
|
163
|
+
/** Cached grammar load promises. */
|
|
164
|
+
const grammarCache = new Map();
|
|
165
|
+
/** Cached compiled queries per language. */
|
|
166
|
+
const queryCache = new Map();
|
|
167
|
+
/**
|
|
168
|
+
* Initialize web-tree-sitter. Called once and cached.
|
|
169
|
+
*/
|
|
170
|
+
async function ensureInit() {
|
|
171
|
+
if (!initPromise) {
|
|
172
|
+
initPromise = (async () => {
|
|
173
|
+
const mod = await import("web-tree-sitter");
|
|
174
|
+
ParserClass = mod.Parser;
|
|
175
|
+
LanguageClass = mod.Language;
|
|
176
|
+
QueryClass = mod.Query;
|
|
177
|
+
await ParserClass.init();
|
|
178
|
+
})();
|
|
179
|
+
}
|
|
180
|
+
return initPromise;
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Resolve the filesystem path to a grammar .wasm file.
|
|
184
|
+
* Uses createRequire to resolve from installed dependency packages.
|
|
185
|
+
*/
|
|
186
|
+
function resolveGrammarPath(language) {
|
|
187
|
+
const { pkg, wasm } = GRAMMAR_MAP[language];
|
|
188
|
+
const require = createRequire(import.meta.url);
|
|
189
|
+
return require.resolve(`${pkg}/${wasm}`);
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Load and cache a grammar for the given language.
|
|
193
|
+
* Returns null on failure (logs once per language).
|
|
194
|
+
*/
|
|
195
|
+
async function loadGrammar(language) {
|
|
196
|
+
if (failedLanguages.has(language))
|
|
197
|
+
return null;
|
|
198
|
+
const wasmKey = GRAMMAR_MAP[language].wasm;
|
|
199
|
+
if (!grammarCache.has(wasmKey)) {
|
|
200
|
+
grammarCache.set(wasmKey, (async () => {
|
|
201
|
+
const path = resolveGrammarPath(language);
|
|
202
|
+
return LanguageClass.load(path);
|
|
203
|
+
})());
|
|
204
|
+
}
|
|
205
|
+
try {
|
|
206
|
+
return await grammarCache.get(wasmKey);
|
|
207
|
+
}
|
|
208
|
+
catch (err) {
|
|
209
|
+
failedLanguages.add(language);
|
|
210
|
+
grammarCache.delete(wasmKey);
|
|
211
|
+
const message = formatGrammarLoadError(language, err);
|
|
212
|
+
grammarLoadErrors.set(language, message);
|
|
213
|
+
console.warn(`[qmd] AST grammar unavailable for ${language}: ${message}`);
|
|
214
|
+
return null;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Get or create a compiled query for the given language.
|
|
219
|
+
*/
|
|
220
|
+
function getQuery(language, grammar) {
|
|
221
|
+
if (!queryCache.has(language)) {
|
|
222
|
+
const source = LANGUAGE_QUERIES[language];
|
|
223
|
+
const query = new QueryClass(grammar, source);
|
|
224
|
+
queryCache.set(language, query);
|
|
225
|
+
}
|
|
226
|
+
return queryCache.get(language);
|
|
227
|
+
}
|
|
228
|
+
// =============================================================================
|
|
229
|
+
// AST Break Point Extraction
|
|
230
|
+
// =============================================================================
|
|
231
|
+
/**
|
|
232
|
+
* Parse a source file and return break points at AST node boundaries.
|
|
233
|
+
*
|
|
234
|
+
* Returns an empty array for unsupported languages, parse failures,
|
|
235
|
+
* or grammar loading failures. Never throws.
|
|
236
|
+
*
|
|
237
|
+
* @param content - The file content to parse.
|
|
238
|
+
* @param filepath - The file path (used for language detection).
|
|
239
|
+
* @returns Array of BreakPoint objects suitable for merging with regex break points.
|
|
240
|
+
*/
|
|
241
|
+
export async function getASTBreakPoints(content, filepath) {
|
|
242
|
+
const language = detectLanguage(filepath);
|
|
243
|
+
if (!language)
|
|
244
|
+
return [];
|
|
245
|
+
try {
|
|
246
|
+
await ensureInit();
|
|
247
|
+
const grammar = await loadGrammar(language);
|
|
248
|
+
if (!grammar)
|
|
249
|
+
return [];
|
|
250
|
+
const parser = new ParserClass();
|
|
251
|
+
parser.setLanguage(grammar);
|
|
252
|
+
const tree = parser.parse(content);
|
|
253
|
+
if (!tree) {
|
|
254
|
+
parser.delete();
|
|
255
|
+
return [];
|
|
256
|
+
}
|
|
257
|
+
const query = getQuery(language, grammar);
|
|
258
|
+
const captures = query.captures(tree.rootNode);
|
|
259
|
+
// Deduplicate: at each byte position, keep the highest-scoring capture.
|
|
260
|
+
// This handles cases like export_statement wrapping a class_declaration
|
|
261
|
+
// at different offsets — we want the outermost (earliest) position.
|
|
262
|
+
const seen = new Map();
|
|
263
|
+
for (const cap of captures) {
|
|
264
|
+
const pos = cap.node.startIndex;
|
|
265
|
+
const score = SCORE_MAP[cap.name] ?? 20;
|
|
266
|
+
const type = `ast:${cap.name}`;
|
|
267
|
+
const existing = seen.get(pos);
|
|
268
|
+
if (!existing || score > existing.score) {
|
|
269
|
+
seen.set(pos, { pos, score, type });
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
tree.delete();
|
|
273
|
+
parser.delete();
|
|
274
|
+
return Array.from(seen.values()).sort((a, b) => a.pos - b.pos);
|
|
275
|
+
}
|
|
276
|
+
catch (err) {
|
|
277
|
+
console.warn(`[qmd] AST parse failed for ${filepath}, falling back to regex: ${err instanceof Error ? err.message : err}`);
|
|
278
|
+
return [];
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
// =============================================================================
|
|
282
|
+
// Health / Status
|
|
283
|
+
// =============================================================================
|
|
284
|
+
/**
|
|
285
|
+
* Check which tree-sitter grammars are available.
|
|
286
|
+
* Returns a status object for each supported language.
|
|
287
|
+
*/
|
|
288
|
+
export async function getASTStatus() {
|
|
289
|
+
const languages = [];
|
|
290
|
+
try {
|
|
291
|
+
await ensureInit();
|
|
292
|
+
}
|
|
293
|
+
catch (err) {
|
|
294
|
+
return {
|
|
295
|
+
available: false,
|
|
296
|
+
languages: Object.keys(GRAMMAR_MAP).map(lang => ({
|
|
297
|
+
language: lang,
|
|
298
|
+
available: false,
|
|
299
|
+
error: `web-tree-sitter init failed: ${err instanceof Error ? err.message : err}`,
|
|
300
|
+
})),
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
for (const lang of Object.keys(GRAMMAR_MAP)) {
|
|
304
|
+
try {
|
|
305
|
+
const grammar = await loadGrammar(lang);
|
|
306
|
+
if (grammar) {
|
|
307
|
+
// Also verify the query compiles
|
|
308
|
+
getQuery(lang, grammar);
|
|
309
|
+
languages.push({ language: lang, available: true });
|
|
310
|
+
}
|
|
311
|
+
else {
|
|
312
|
+
languages.push({ language: lang, available: false, error: grammarLoadErrors.get(lang) ?? "grammar failed to load" });
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
catch (err) {
|
|
316
|
+
languages.push({
|
|
317
|
+
language: lang,
|
|
318
|
+
available: false,
|
|
319
|
+
error: err instanceof Error ? err.message : String(err),
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
return {
|
|
324
|
+
available: languages.some(l => l.available),
|
|
325
|
+
languages,
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
/**
|
|
329
|
+
* Extract symbol metadata for code within a byte range.
|
|
330
|
+
* Stubbed for Phase 2 — returns empty array.
|
|
331
|
+
*/
|
|
332
|
+
export function extractSymbols(_content, _language, _startPos, _endPos) {
|
|
333
|
+
return [];
|
|
334
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* QMD Benchmark Harness
|
|
3
|
+
*
|
|
4
|
+
* Runs queries from a fixture file against multiple search backends
|
|
5
|
+
* and measures precision@k, recall, MRR, F1, and latency.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* qmd bench <fixture.json> [--json] [--collection <name>]
|
|
9
|
+
*
|
|
10
|
+
* Backends tested:
|
|
11
|
+
* - bm25: BM25 keyword search (searchLex)
|
|
12
|
+
* - vector: Vector similarity search (searchVector)
|
|
13
|
+
* - hybrid: BM25 + vector RRF fusion without reranking
|
|
14
|
+
* - full: Full hybrid pipeline with LLM reranking
|
|
15
|
+
*/
|
|
16
|
+
import type { BenchmarkResult } from "./types.js";
|
|
17
|
+
export declare function runBenchmark(fixturePath: string, options?: {
|
|
18
|
+
json?: boolean;
|
|
19
|
+
collection?: string;
|
|
20
|
+
backends?: string[];
|
|
21
|
+
dbPath?: string;
|
|
22
|
+
configPath?: string;
|
|
23
|
+
}): Promise<BenchmarkResult>;
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* QMD Benchmark Harness
|
|
3
|
+
*
|
|
4
|
+
* Runs queries from a fixture file against multiple search backends
|
|
5
|
+
* and measures precision@k, recall, MRR, F1, and latency.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* qmd bench <fixture.json> [--json] [--collection <name>]
|
|
9
|
+
*
|
|
10
|
+
* Backends tested:
|
|
11
|
+
* - bm25: BM25 keyword search (searchLex)
|
|
12
|
+
* - vector: Vector similarity search (searchVector)
|
|
13
|
+
* - hybrid: BM25 + vector RRF fusion without reranking
|
|
14
|
+
* - full: Full hybrid pipeline with LLM reranking
|
|
15
|
+
*/
|
|
16
|
+
import { readFileSync } from "node:fs";
|
|
17
|
+
import { resolve } from "node:path";
|
|
18
|
+
import { createStore, getDefaultDbPath, } from "../index.js";
|
|
19
|
+
import { scoreResults } from "./score.js";
|
|
20
|
+
function parseStructuredQuery(query) {
|
|
21
|
+
const lines = query.split("\n").map((line, idx) => ({
|
|
22
|
+
trimmed: line.trim(),
|
|
23
|
+
number: idx + 1,
|
|
24
|
+
})).filter(line => line.trimmed.length > 0);
|
|
25
|
+
if (lines.length === 0)
|
|
26
|
+
return undefined;
|
|
27
|
+
const prefixRe = /^(lex|vec|hyde):\s*/i;
|
|
28
|
+
const intentRe = /^intent:\s*/i;
|
|
29
|
+
const searches = [];
|
|
30
|
+
let intent;
|
|
31
|
+
for (const line of lines) {
|
|
32
|
+
if (intentRe.test(line.trimmed)) {
|
|
33
|
+
if (intent !== undefined) {
|
|
34
|
+
throw new Error(`Line ${line.number}: only one intent: line is allowed per benchmark query.`);
|
|
35
|
+
}
|
|
36
|
+
intent = line.trimmed.replace(intentRe, "").trim();
|
|
37
|
+
if (!intent) {
|
|
38
|
+
throw new Error(`Line ${line.number}: intent: must include text.`);
|
|
39
|
+
}
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
const match = line.trimmed.match(prefixRe);
|
|
43
|
+
if (match) {
|
|
44
|
+
const type = match[1].toLowerCase();
|
|
45
|
+
const text = line.trimmed.slice(match[0].length).trim();
|
|
46
|
+
if (!text) {
|
|
47
|
+
throw new Error(`Line ${line.number} (${type}:) must include text.`);
|
|
48
|
+
}
|
|
49
|
+
searches.push({ type, query: text, line: line.number });
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
if (lines.length === 1) {
|
|
53
|
+
return undefined;
|
|
54
|
+
}
|
|
55
|
+
throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde:/intent: prefix.`);
|
|
56
|
+
}
|
|
57
|
+
if (intent && searches.length === 0) {
|
|
58
|
+
throw new Error("intent: cannot appear alone. Add at least one lex:, vec:, or hyde: line.");
|
|
59
|
+
}
|
|
60
|
+
return searches.length > 0 ? { searches, intent } : undefined;
|
|
61
|
+
}
|
|
62
|
+
function uniqueFiles(files, limit) {
|
|
63
|
+
const seen = new Set();
|
|
64
|
+
const out = [];
|
|
65
|
+
for (const file of files) {
|
|
66
|
+
if (seen.has(file))
|
|
67
|
+
continue;
|
|
68
|
+
seen.add(file);
|
|
69
|
+
out.push(file);
|
|
70
|
+
if (out.length >= limit)
|
|
71
|
+
break;
|
|
72
|
+
}
|
|
73
|
+
return out;
|
|
74
|
+
}
|
|
75
|
+
const BACKENDS = [
|
|
76
|
+
{
|
|
77
|
+
name: "bm25",
|
|
78
|
+
run: async (store, query, limit, collection) => {
|
|
79
|
+
const structured = parseStructuredQuery(query.query);
|
|
80
|
+
const lexQueries = structured?.searches.filter(q => q.type === "lex");
|
|
81
|
+
if (structured) {
|
|
82
|
+
const files = [];
|
|
83
|
+
for (const lex of lexQueries ?? []) {
|
|
84
|
+
const results = await store.searchLex(lex.query, { limit, collection });
|
|
85
|
+
files.push(...results.map((r) => r.filepath));
|
|
86
|
+
}
|
|
87
|
+
return uniqueFiles(files, limit);
|
|
88
|
+
}
|
|
89
|
+
const results = await store.searchLex(query.query, { limit, collection });
|
|
90
|
+
return results.map((r) => r.filepath);
|
|
91
|
+
},
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
name: "vector",
|
|
95
|
+
run: async (store, query, limit, collection) => {
|
|
96
|
+
const structured = parseStructuredQuery(query.query);
|
|
97
|
+
const vectorQueries = structured?.searches.filter(q => q.type === "vec" || q.type === "hyde");
|
|
98
|
+
if (structured) {
|
|
99
|
+
const files = [];
|
|
100
|
+
for (const vectorQuery of vectorQueries ?? []) {
|
|
101
|
+
const results = await store.searchVector(vectorQuery.query, { limit, collection });
|
|
102
|
+
files.push(...results.map((r) => r.filepath));
|
|
103
|
+
}
|
|
104
|
+
return uniqueFiles(files, limit);
|
|
105
|
+
}
|
|
106
|
+
const results = await store.searchVector(query.query, { limit, collection });
|
|
107
|
+
return results.map((r) => r.filepath);
|
|
108
|
+
},
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
name: "hybrid",
|
|
112
|
+
run: async (store, query, limit, collection) => {
|
|
113
|
+
const structured = parseStructuredQuery(query.query);
|
|
114
|
+
const results = structured
|
|
115
|
+
? await store.search({ queries: structured.searches, intent: structured.intent, limit, collection, rerank: false })
|
|
116
|
+
: await store.search({ query: query.query, limit, collection, rerank: false });
|
|
117
|
+
return results.map((r) => r.file);
|
|
118
|
+
},
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
name: "full",
|
|
122
|
+
run: async (store, query, limit, collection) => {
|
|
123
|
+
const structured = parseStructuredQuery(query.query);
|
|
124
|
+
const results = structured
|
|
125
|
+
? await store.search({ queries: structured.searches, intent: structured.intent, limit, collection, rerank: true })
|
|
126
|
+
: await store.search({ query: query.query, limit, collection, rerank: true });
|
|
127
|
+
return results.map((r) => r.file);
|
|
128
|
+
},
|
|
129
|
+
},
|
|
130
|
+
];
|
|
131
|
+
async function runQuery(store, backend, query, collection) {
|
|
132
|
+
const limit = Math.max(query.expected_in_top_k, 10);
|
|
133
|
+
const start = Date.now();
|
|
134
|
+
let resultFiles;
|
|
135
|
+
try {
|
|
136
|
+
resultFiles = await backend.run(store, query, limit, collection);
|
|
137
|
+
}
|
|
138
|
+
catch {
|
|
139
|
+
// Backend may not be available (e.g., no embeddings for vector search)
|
|
140
|
+
return {
|
|
141
|
+
precision_at_k: 0,
|
|
142
|
+
recall: 0,
|
|
143
|
+
recall_at_1: 0,
|
|
144
|
+
recall_at_3: 0,
|
|
145
|
+
recall_at_5: 0,
|
|
146
|
+
mrr: 0,
|
|
147
|
+
f1: 0,
|
|
148
|
+
hits_at_k: 0,
|
|
149
|
+
total_expected: query.expected_files.length,
|
|
150
|
+
latency_ms: Date.now() - start,
|
|
151
|
+
top_files: [],
|
|
152
|
+
matched_files: [],
|
|
153
|
+
unmatched_expected_files: query.expected_files,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
const latency_ms = Date.now() - start;
|
|
157
|
+
const scores = scoreResults(resultFiles, query.expected_files, query.expected_in_top_k);
|
|
158
|
+
return {
|
|
159
|
+
...scores,
|
|
160
|
+
total_expected: query.expected_files.length,
|
|
161
|
+
latency_ms,
|
|
162
|
+
top_files: resultFiles.slice(0, 10),
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
function formatTable(results) {
|
|
166
|
+
const lines = [];
|
|
167
|
+
const pad = (s, n) => s.slice(0, n).padEnd(n);
|
|
168
|
+
const num = (n) => n.toFixed(2).padStart(5);
|
|
169
|
+
lines.push(`${pad("Query", 25)} ${pad("Backend", 8)} ${pad("P@k", 6)} ${pad("R@1", 6)} ${pad("R@3", 6)} ${pad("R@5", 6)} ${pad("MRR", 6)} ${pad("F1", 6)} ${pad("ms", 8)}`);
|
|
170
|
+
lines.push("-".repeat(88));
|
|
171
|
+
for (const r of results) {
|
|
172
|
+
for (const [backend, br] of Object.entries(r.backends)) {
|
|
173
|
+
lines.push(`${pad(r.id, 25)} ${pad(backend, 8)} ${num(br.precision_at_k)} ${num(br.recall_at_1)} ${num(br.recall_at_3)} ${num(br.recall_at_5)} ${num(br.mrr)} ${num(br.f1)} ${String(Math.round(br.latency_ms)).padStart(7)}ms`);
|
|
174
|
+
}
|
|
175
|
+
lines.push("");
|
|
176
|
+
}
|
|
177
|
+
return lines.join("\n");
|
|
178
|
+
}
|
|
179
|
+
function computeSummary(results) {
|
|
180
|
+
const summary = {};
|
|
181
|
+
// Collect all backend names
|
|
182
|
+
const backendNames = new Set();
|
|
183
|
+
for (const r of results) {
|
|
184
|
+
for (const name of Object.keys(r.backends)) {
|
|
185
|
+
backendNames.add(name);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
for (const name of Array.from(backendNames)) {
|
|
189
|
+
let totalP = 0, totalR = 0, totalR1 = 0, totalR3 = 0, totalR5 = 0, totalMrr = 0, totalF1 = 0, totalLat = 0, count = 0;
|
|
190
|
+
for (const r of results) {
|
|
191
|
+
const br = r.backends[name];
|
|
192
|
+
if (!br)
|
|
193
|
+
continue;
|
|
194
|
+
totalP += br.precision_at_k;
|
|
195
|
+
totalR += br.recall;
|
|
196
|
+
totalR1 += br.recall_at_1;
|
|
197
|
+
totalR3 += br.recall_at_3;
|
|
198
|
+
totalR5 += br.recall_at_5;
|
|
199
|
+
totalMrr += br.mrr;
|
|
200
|
+
totalF1 += br.f1;
|
|
201
|
+
totalLat += br.latency_ms;
|
|
202
|
+
count++;
|
|
203
|
+
}
|
|
204
|
+
if (count > 0) {
|
|
205
|
+
summary[name] = {
|
|
206
|
+
avg_precision: totalP / count,
|
|
207
|
+
avg_recall: totalR / count,
|
|
208
|
+
avg_recall_at_1: totalR1 / count,
|
|
209
|
+
avg_recall_at_3: totalR3 / count,
|
|
210
|
+
avg_recall_at_5: totalR5 / count,
|
|
211
|
+
avg_mrr: totalMrr / count,
|
|
212
|
+
avg_f1: totalF1 / count,
|
|
213
|
+
avg_latency_ms: totalLat / count,
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
return summary;
|
|
218
|
+
}
|
|
219
|
+
export async function runBenchmark(fixturePath, options = {}) {
|
|
220
|
+
// Load fixture
|
|
221
|
+
const raw = readFileSync(resolve(fixturePath), "utf-8");
|
|
222
|
+
const fixture = JSON.parse(raw);
|
|
223
|
+
if (!fixture.queries || !Array.isArray(fixture.queries)) {
|
|
224
|
+
throw new Error("Invalid fixture: missing 'queries' array");
|
|
225
|
+
}
|
|
226
|
+
// Open store
|
|
227
|
+
const store = await createStore({
|
|
228
|
+
dbPath: options.dbPath ?? getDefaultDbPath(),
|
|
229
|
+
...(options.configPath ? { configPath: options.configPath } : {}),
|
|
230
|
+
});
|
|
231
|
+
// Filter backends if requested
|
|
232
|
+
const activeBackends = options.backends
|
|
233
|
+
? BACKENDS.filter(b => options.backends.includes(b.name))
|
|
234
|
+
: BACKENDS;
|
|
235
|
+
const collection = options.collection ?? fixture.collection;
|
|
236
|
+
// Run queries
|
|
237
|
+
const results = [];
|
|
238
|
+
for (const query of fixture.queries) {
|
|
239
|
+
const backends = {};
|
|
240
|
+
for (const backend of activeBackends) {
|
|
241
|
+
if (!options.json) {
|
|
242
|
+
process.stderr.write(` ${query.id} / ${backend.name}...`);
|
|
243
|
+
}
|
|
244
|
+
backends[backend.name] = await runQuery(store, backend, query, collection);
|
|
245
|
+
if (!options.json) {
|
|
246
|
+
process.stderr.write(` ${Math.round(backends[backend.name].latency_ms)}ms\n`);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
results.push({
|
|
250
|
+
id: query.id,
|
|
251
|
+
query: query.query,
|
|
252
|
+
type: query.type,
|
|
253
|
+
backends,
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
await store.close();
|
|
257
|
+
const summary = computeSummary(results);
|
|
258
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, "").slice(0, 15);
|
|
259
|
+
const benchResult = {
|
|
260
|
+
timestamp,
|
|
261
|
+
fixture: fixturePath,
|
|
262
|
+
results,
|
|
263
|
+
summary,
|
|
264
|
+
};
|
|
265
|
+
// Output
|
|
266
|
+
if (options.json) {
|
|
267
|
+
console.log(JSON.stringify(benchResult, null, 2));
|
|
268
|
+
}
|
|
269
|
+
else {
|
|
270
|
+
console.log("\n" + formatTable(results));
|
|
271
|
+
console.log("Summary:");
|
|
272
|
+
console.log("-".repeat(70));
|
|
273
|
+
const pad = (s, n) => s.slice(0, n).padEnd(n);
|
|
274
|
+
const num = (n) => n.toFixed(3).padStart(6);
|
|
275
|
+
for (const [name, s] of Object.entries(summary)) {
|
|
276
|
+
console.log(` ${pad(name, 8)} P@k=${num(s.avg_precision)} R@1=${num(s.avg_recall_at_1)} R@3=${num(s.avg_recall_at_3)} R@5=${num(s.avg_recall_at_5)} MRR=${num(s.avg_mrr)} F1=${num(s.avg_f1)} Avg=${Math.round(s.avg_latency_ms)}ms`);
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
return benchResult;
|
|
280
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scoring functions for the QMD benchmark harness.
|
|
3
|
+
*
|
|
4
|
+
* Computes precision@k, recall, MRR, and F1 for search results
|
|
5
|
+
* against ground-truth expected files.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Normalize a file path for comparison.
|
|
9
|
+
* Strips qmd:// prefix, lowercases, removes leading/trailing slashes.
|
|
10
|
+
*/
|
|
11
|
+
export declare function normalizePath(p: string): string;
|
|
12
|
+
/**
|
|
13
|
+
* Check if two paths refer to the same file.
|
|
14
|
+
* Handles different path formats by comparing normalized suffixes.
|
|
15
|
+
*/
|
|
16
|
+
export declare function pathsMatch(result: string, expected: string): boolean;
|
|
17
|
+
type ScoreMetrics = {
|
|
18
|
+
precision_at_k: number;
|
|
19
|
+
recall: number;
|
|
20
|
+
recall_at_1: number;
|
|
21
|
+
recall_at_3: number;
|
|
22
|
+
recall_at_5: number;
|
|
23
|
+
mrr: number;
|
|
24
|
+
f1: number;
|
|
25
|
+
hits_at_k: number;
|
|
26
|
+
matched_files: string[];
|
|
27
|
+
unmatched_expected_files: string[];
|
|
28
|
+
};
|
|
29
|
+
/**
|
|
30
|
+
* Score a set of search results against expected files.
|
|
31
|
+
*/
|
|
32
|
+
export declare function scoreResults(resultFiles: string[], expectedFiles: string[], topK: number): ScoreMetrics;
|
|
33
|
+
export {};
|