@tobilu/qmd 1.1.1 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +82 -0
- package/README.md +29 -3
- package/dist/collections.d.ts +1 -0
- package/dist/formatter.d.ts +1 -0
- package/dist/formatter.js +4 -4
- package/dist/llm.d.ts +20 -5
- package/dist/llm.js +102 -51
- package/dist/mcp.js +86 -10
- package/dist/qmd.js +145 -60
- package/dist/store.d.ts +62 -15
- package/dist/store.js +224 -33
- package/package.json +4 -3
package/dist/mcp.js
CHANGED
|
@@ -12,6 +12,7 @@ import { fileURLToPath } from "url";
|
|
|
12
12
|
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
13
13
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
14
14
|
import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
|
|
15
|
+
import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
|
|
15
16
|
import { z } from "zod";
|
|
16
17
|
import { createStore, extractSnippet, addLineNumbers, structuredSearch, DEFAULT_MULTI_GET_MAX_BYTES, } from "./store.js";
|
|
17
18
|
import { getCollection, getGlobalContext, getDefaultCollectionNames } from "./collections.js";
|
|
@@ -83,10 +84,13 @@ function buildInstructions(store) {
|
|
|
83
84
|
lines.push(" - type:'vec' — semantic vector search (meaning-based)");
|
|
84
85
|
lines.push(" - type:'hyde' — hypothetical document (write what the answer looks like)");
|
|
85
86
|
lines.push("");
|
|
87
|
+
lines.push(" Always provide `intent` on every search call to disambiguate and improve snippets.");
|
|
88
|
+
lines.push("");
|
|
86
89
|
lines.push("Examples:");
|
|
87
90
|
lines.push(" Quick keyword lookup: [{type:'lex', query:'error handling'}]");
|
|
88
91
|
lines.push(" Semantic search: [{type:'vec', query:'how to handle errors gracefully'}]");
|
|
89
92
|
lines.push(" Best results: [{type:'lex', query:'error'}, {type:'vec', query:'error handling best practices'}]");
|
|
93
|
+
lines.push(" With intent: searches=[{type:'lex', query:'performance'}], intent='web page load times'");
|
|
90
94
|
// --- Retrieval workflow ---
|
|
91
95
|
lines.push("");
|
|
92
96
|
lines.push("Retrieval:");
|
|
@@ -233,9 +237,11 @@ Intent-aware lex (C++ performance, not sports):
|
|
|
233
237
|
searches: z.array(subSearchSchema).min(1).max(10).describe("Typed sub-queries to execute (lex/vec/hyde). First gets 2x weight."),
|
|
234
238
|
limit: z.number().optional().default(10).describe("Max results (default: 10)"),
|
|
235
239
|
minScore: z.number().optional().default(0).describe("Min relevance 0-1 (default: 0)"),
|
|
240
|
+
candidateLimit: z.number().optional().describe("Maximum candidates to rerank (default: 40, lower = faster but may miss results)"),
|
|
236
241
|
collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
|
|
242
|
+
intent: z.string().optional().describe("Background context to disambiguate the query. Example: query='performance', intent='web page load times and Core Web Vitals'. Does not search on its own."),
|
|
237
243
|
},
|
|
238
|
-
}, async ({ searches, limit, minScore, collections }) => {
|
|
244
|
+
}, async ({ searches, limit, minScore, candidateLimit, collections, intent }) => {
|
|
239
245
|
// Map to internal format
|
|
240
246
|
const subSearches = searches.map(s => ({
|
|
241
247
|
type: s.type,
|
|
@@ -247,13 +253,15 @@ Intent-aware lex (C++ performance, not sports):
|
|
|
247
253
|
collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
|
|
248
254
|
limit,
|
|
249
255
|
minScore,
|
|
256
|
+
candidateLimit,
|
|
257
|
+
intent,
|
|
250
258
|
});
|
|
251
259
|
// Use first lex or vec query for snippet extraction
|
|
252
260
|
const primaryQuery = searches.find(s => s.type === 'lex')?.query
|
|
253
261
|
|| searches.find(s => s.type === 'vec')?.query
|
|
254
262
|
|| searches[0]?.query || "";
|
|
255
263
|
const filtered = results.map(r => {
|
|
256
|
-
const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
|
|
264
|
+
const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300, undefined, undefined, intent);
|
|
257
265
|
return {
|
|
258
266
|
docid: `#${r.docid}`,
|
|
259
267
|
file: r.displayPath,
|
|
@@ -425,12 +433,27 @@ export async function startMcpServer() {
|
|
|
425
433
|
*/
|
|
426
434
|
export async function startMcpHttpServer(port, options) {
|
|
427
435
|
const store = createStore();
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
436
|
+
// Session map: each client gets its own McpServer + Transport pair (MCP spec requirement).
|
|
437
|
+
// The store is shared — it's stateless SQLite, safe for concurrent access.
|
|
438
|
+
const sessions = new Map();
|
|
439
|
+
async function createSession() {
|
|
440
|
+
const transport = new WebStandardStreamableHTTPServerTransport({
|
|
441
|
+
sessionIdGenerator: () => randomUUID(),
|
|
442
|
+
enableJsonResponse: true,
|
|
443
|
+
onsessioninitialized: (sessionId) => {
|
|
444
|
+
sessions.set(sessionId, transport);
|
|
445
|
+
log(`${ts()} New session ${sessionId} (${sessions.size} active)`);
|
|
446
|
+
},
|
|
447
|
+
});
|
|
448
|
+
const server = createMcpServer(store);
|
|
449
|
+
await server.connect(transport);
|
|
450
|
+
transport.onclose = () => {
|
|
451
|
+
if (transport.sessionId) {
|
|
452
|
+
sessions.delete(transport.sessionId);
|
|
453
|
+
}
|
|
454
|
+
};
|
|
455
|
+
return transport;
|
|
456
|
+
}
|
|
434
457
|
const startTime = Date.now();
|
|
435
458
|
const quiet = options?.quiet ?? false;
|
|
436
459
|
/** Format timestamp for request logging */
|
|
@@ -500,6 +523,7 @@ export async function startMcpHttpServer(port, options) {
|
|
|
500
523
|
collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
|
|
501
524
|
limit: params.limit ?? 10,
|
|
502
525
|
minScore: params.minScore ?? 0,
|
|
526
|
+
candidateLimit: params.candidateLimit,
|
|
503
527
|
});
|
|
504
528
|
// Use first lex or vec query for snippet extraction
|
|
505
529
|
const primaryQuery = params.searches.find((s) => s.type === 'lex')?.query
|
|
@@ -531,6 +555,34 @@ export async function startMcpHttpServer(port, options) {
|
|
|
531
555
|
if (typeof v === "string")
|
|
532
556
|
headers[k] = v;
|
|
533
557
|
}
|
|
558
|
+
// Route to existing session or create new one on initialize
|
|
559
|
+
const sessionId = headers["mcp-session-id"];
|
|
560
|
+
let transport;
|
|
561
|
+
if (sessionId) {
|
|
562
|
+
const existing = sessions.get(sessionId);
|
|
563
|
+
if (!existing) {
|
|
564
|
+
nodeRes.writeHead(404, { "Content-Type": "application/json" });
|
|
565
|
+
nodeRes.end(JSON.stringify({
|
|
566
|
+
jsonrpc: "2.0",
|
|
567
|
+
error: { code: -32001, message: "Session not found" },
|
|
568
|
+
id: body?.id ?? null,
|
|
569
|
+
}));
|
|
570
|
+
return;
|
|
571
|
+
}
|
|
572
|
+
transport = existing;
|
|
573
|
+
}
|
|
574
|
+
else if (isInitializeRequest(body)) {
|
|
575
|
+
transport = await createSession();
|
|
576
|
+
}
|
|
577
|
+
else {
|
|
578
|
+
nodeRes.writeHead(400, { "Content-Type": "application/json" });
|
|
579
|
+
nodeRes.end(JSON.stringify({
|
|
580
|
+
jsonrpc: "2.0",
|
|
581
|
+
error: { code: -32000, message: "Bad Request: Missing session ID" },
|
|
582
|
+
id: body?.id ?? null,
|
|
583
|
+
}));
|
|
584
|
+
return;
|
|
585
|
+
}
|
|
534
586
|
const request = new Request(url, { method: "POST", headers, body: rawBody });
|
|
535
587
|
const response = await transport.handleRequest(request, { parsedBody: body });
|
|
536
588
|
nodeRes.writeHead(response.status, Object.fromEntries(response.headers));
|
|
@@ -539,12 +591,33 @@ export async function startMcpHttpServer(port, options) {
|
|
|
539
591
|
return;
|
|
540
592
|
}
|
|
541
593
|
if (pathname === "/mcp") {
|
|
542
|
-
const url = `http://localhost:${port}${pathname}`;
|
|
543
594
|
const headers = {};
|
|
544
595
|
for (const [k, v] of Object.entries(nodeReq.headers)) {
|
|
545
596
|
if (typeof v === "string")
|
|
546
597
|
headers[k] = v;
|
|
547
598
|
}
|
|
599
|
+
// GET/DELETE must have a valid session
|
|
600
|
+
const sessionId = headers["mcp-session-id"];
|
|
601
|
+
if (!sessionId) {
|
|
602
|
+
nodeRes.writeHead(400, { "Content-Type": "application/json" });
|
|
603
|
+
nodeRes.end(JSON.stringify({
|
|
604
|
+
jsonrpc: "2.0",
|
|
605
|
+
error: { code: -32000, message: "Bad Request: Missing session ID" },
|
|
606
|
+
id: null,
|
|
607
|
+
}));
|
|
608
|
+
return;
|
|
609
|
+
}
|
|
610
|
+
const transport = sessions.get(sessionId);
|
|
611
|
+
if (!transport) {
|
|
612
|
+
nodeRes.writeHead(404, { "Content-Type": "application/json" });
|
|
613
|
+
nodeRes.end(JSON.stringify({
|
|
614
|
+
jsonrpc: "2.0",
|
|
615
|
+
error: { code: -32001, message: "Session not found" },
|
|
616
|
+
id: null,
|
|
617
|
+
}));
|
|
618
|
+
return;
|
|
619
|
+
}
|
|
620
|
+
const url = `http://localhost:${port}${pathname}`;
|
|
548
621
|
const rawBody = nodeReq.method !== "GET" && nodeReq.method !== "HEAD" ? await collectBody(nodeReq) : undefined;
|
|
549
622
|
const request = new Request(url, { method: nodeReq.method || "GET", headers, ...(rawBody ? { body: rawBody } : {}) });
|
|
550
623
|
const response = await transport.handleRequest(request);
|
|
@@ -571,7 +644,10 @@ export async function startMcpHttpServer(port, options) {
|
|
|
571
644
|
if (stopping)
|
|
572
645
|
return;
|
|
573
646
|
stopping = true;
|
|
574
|
-
|
|
647
|
+
for (const transport of sessions.values()) {
|
|
648
|
+
await transport.close();
|
|
649
|
+
}
|
|
650
|
+
sessions.clear();
|
|
575
651
|
httpServer.close();
|
|
576
652
|
store.close();
|
|
577
653
|
await disposeDefaultLlamaCpp();
|
package/dist/qmd.js
CHANGED
|
@@ -74,19 +74,24 @@ const cursor = {
|
|
|
74
74
|
// Ensure cursor is restored on exit
|
|
75
75
|
process.on('SIGINT', () => { cursor.show(); process.exit(130); });
|
|
76
76
|
process.on('SIGTERM', () => { cursor.show(); process.exit(143); });
|
|
77
|
-
// Terminal progress bar using OSC 9;4 escape sequence
|
|
77
|
+
// Terminal progress bar using OSC 9;4 escape sequence (TTY only)
|
|
78
|
+
const isTTY = process.stderr.isTTY;
|
|
78
79
|
const progress = {
|
|
79
80
|
set(percent) {
|
|
80
|
-
|
|
81
|
+
if (isTTY)
|
|
82
|
+
process.stderr.write(`\x1b]9;4;1;${Math.round(percent)}\x07`);
|
|
81
83
|
},
|
|
82
84
|
clear() {
|
|
83
|
-
|
|
85
|
+
if (isTTY)
|
|
86
|
+
process.stderr.write(`\x1b]9;4;0\x07`);
|
|
84
87
|
},
|
|
85
88
|
indeterminate() {
|
|
86
|
-
|
|
89
|
+
if (isTTY)
|
|
90
|
+
process.stderr.write(`\x1b]9;4;3\x07`);
|
|
87
91
|
},
|
|
88
92
|
error() {
|
|
89
|
-
|
|
93
|
+
if (isTTY)
|
|
94
|
+
process.stderr.write(`\x1b]9;4;2\x07`);
|
|
90
95
|
},
|
|
91
96
|
};
|
|
92
97
|
// Format seconds into human-readable ETA
|
|
@@ -398,7 +403,7 @@ async function updateCollections() {
|
|
|
398
403
|
process.exit(1);
|
|
399
404
|
}
|
|
400
405
|
}
|
|
401
|
-
await indexFiles(col.pwd, col.glob_pattern, col.name, true);
|
|
406
|
+
await indexFiles(col.pwd, col.glob_pattern, col.name, true, yamlCol?.ignore);
|
|
402
407
|
console.log("");
|
|
403
408
|
}
|
|
404
409
|
// Check if any documents need embedding (show once at end)
|
|
@@ -1103,6 +1108,9 @@ function collectionList() {
|
|
|
1103
1108
|
const excludeTag = excluded ? ` ${c.yellow}[excluded]${c.reset}` : '';
|
|
1104
1109
|
console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}${excludeTag}`);
|
|
1105
1110
|
console.log(` ${c.dim}Pattern:${c.reset} ${coll.glob_pattern}`);
|
|
1111
|
+
if (yamlColl?.ignore?.length) {
|
|
1112
|
+
console.log(` ${c.dim}Ignore:${c.reset} ${yamlColl.ignore.join(', ')}`);
|
|
1113
|
+
}
|
|
1106
1114
|
console.log(` ${c.dim}Files:${c.reset} ${coll.active_count}`);
|
|
1107
1115
|
console.log(` ${c.dim}Updated:${c.reset} ${timeAgo}`);
|
|
1108
1116
|
console.log();
|
|
@@ -1138,7 +1146,8 @@ async function collectionAdd(pwd, globPattern, name) {
|
|
|
1138
1146
|
addCollection(collName, pwd, globPattern);
|
|
1139
1147
|
// Create the collection and index files
|
|
1140
1148
|
console.log(`Creating collection '${collName}'...`);
|
|
1141
|
-
|
|
1149
|
+
const newColl = getCollectionFromYaml(collName);
|
|
1150
|
+
await indexFiles(pwd, globPattern, collName, false, newColl?.ignore);
|
|
1142
1151
|
console.log(`${c.green}✓${c.reset} Collection '${collName}' created successfully`);
|
|
1143
1152
|
}
|
|
1144
1153
|
function collectionRemove(name) {
|
|
@@ -1179,7 +1188,7 @@ function collectionRename(oldName, newName) {
|
|
|
1179
1188
|
console.log(`${c.green}✓${c.reset} Renamed collection '${oldName}' to '${newName}'`);
|
|
1180
1189
|
console.log(` Virtual paths updated: ${c.cyan}qmd://${oldName}/${c.reset} → ${c.cyan}qmd://${newName}/${c.reset}`);
|
|
1181
1190
|
}
|
|
1182
|
-
async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppressEmbedNotice = false) {
|
|
1191
|
+
async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppressEmbedNotice = false, ignorePatterns) {
|
|
1183
1192
|
const db = getDb();
|
|
1184
1193
|
const resolvedPwd = pwd || getPwd();
|
|
1185
1194
|
const now = new Date().toISOString();
|
|
@@ -1192,12 +1201,16 @@ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppr
|
|
|
1192
1201
|
}
|
|
1193
1202
|
console.log(`Collection: ${resolvedPwd} (${globPattern})`);
|
|
1194
1203
|
progress.indeterminate();
|
|
1204
|
+
const allIgnore = [
|
|
1205
|
+
...excludeDirs.map(d => `**/${d}/**`),
|
|
1206
|
+
...(ignorePatterns || []),
|
|
1207
|
+
];
|
|
1195
1208
|
const allFiles = await fastGlob(globPattern, {
|
|
1196
1209
|
cwd: resolvedPwd,
|
|
1197
1210
|
onlyFiles: true,
|
|
1198
1211
|
followSymbolicLinks: false,
|
|
1199
1212
|
dot: false,
|
|
1200
|
-
ignore:
|
|
1213
|
+
ignore: allIgnore,
|
|
1201
1214
|
});
|
|
1202
1215
|
// Filter hidden files/folders (dot: false handles top-level but not nested)
|
|
1203
1216
|
const files = allFiles.filter(file => {
|
|
@@ -1205,11 +1218,11 @@ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppr
|
|
|
1205
1218
|
return !parts.some(part => part.startsWith("."));
|
|
1206
1219
|
});
|
|
1207
1220
|
const total = files.length;
|
|
1208
|
-
|
|
1221
|
+
const hasNoFiles = total === 0;
|
|
1222
|
+
if (hasNoFiles) {
|
|
1209
1223
|
progress.clear();
|
|
1210
1224
|
console.log("No files found matching pattern.");
|
|
1211
|
-
|
|
1212
|
-
return;
|
|
1225
|
+
// Continue so the deactivation pass can mark previously indexed docs as inactive.
|
|
1213
1226
|
}
|
|
1214
1227
|
let indexed = 0, updated = 0, unchanged = 0, processed = 0;
|
|
1215
1228
|
const seenPaths = new Set();
|
|
@@ -1218,7 +1231,16 @@ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppr
|
|
|
1218
1231
|
const filepath = getRealPath(resolve(resolvedPwd, relativeFile));
|
|
1219
1232
|
const path = handelize(relativeFile); // Normalize path for token-friendliness
|
|
1220
1233
|
seenPaths.add(path);
|
|
1221
|
-
|
|
1234
|
+
let content;
|
|
1235
|
+
try {
|
|
1236
|
+
content = readFileSync(filepath, "utf-8");
|
|
1237
|
+
}
|
|
1238
|
+
catch (err) {
|
|
1239
|
+
// Skip files that can't be read (e.g. iCloud evicted files returning EAGAIN)
|
|
1240
|
+
processed++;
|
|
1241
|
+
progress.set((processed / total) * 100);
|
|
1242
|
+
continue;
|
|
1243
|
+
}
|
|
1222
1244
|
// Skip empty files - nothing useful to index
|
|
1223
1245
|
if (!content.trim()) {
|
|
1224
1246
|
processed++;
|
|
@@ -1260,7 +1282,8 @@ async function indexFiles(pwd, globPattern = DEFAULT_GLOB, collectionName, suppr
|
|
|
1260
1282
|
const rate = processed / elapsed;
|
|
1261
1283
|
const remaining = (total - processed) / rate;
|
|
1262
1284
|
const eta = processed > 2 ? ` ETA: ${formatETA(remaining)}` : "";
|
|
1263
|
-
|
|
1285
|
+
if (isTTY)
|
|
1286
|
+
process.stderr.write(`\rIndexing: ${processed}/${total}${eta} `);
|
|
1264
1287
|
}
|
|
1265
1288
|
// Deactivate documents in this collection that no longer exist
|
|
1266
1289
|
const allActive = getActiveDocumentPaths(db, collectionName);
|
|
@@ -1423,7 +1446,8 @@ async function vectorIndex(model = DEFAULT_EMBED_MODEL, force = false) {
|
|
|
1423
1446
|
const throughput = `${formatBytes(bytesPerSec)}/s`;
|
|
1424
1447
|
const eta = elapsed > 2 ? formatETA(etaSec) : "...";
|
|
1425
1448
|
const errStr = errors > 0 ? ` ${c.yellow}${errors} err${c.reset}` : "";
|
|
1426
|
-
|
|
1449
|
+
if (isTTY)
|
|
1450
|
+
process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${chunksEmbedded}/${totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
|
|
1427
1451
|
}
|
|
1428
1452
|
progress.clear();
|
|
1429
1453
|
cursor.show();
|
|
@@ -1496,6 +1520,9 @@ function formatScore(score) {
|
|
|
1496
1520
|
return `${c.yellow}${pct}%${c.reset}`;
|
|
1497
1521
|
return `${c.dim}${pct}%${c.reset}`;
|
|
1498
1522
|
}
|
|
1523
|
+
function formatExplainNumber(value) {
|
|
1524
|
+
return value.toFixed(4);
|
|
1525
|
+
}
|
|
1499
1526
|
// Shorten directory path for display - relative to $HOME (used for context paths, not documents)
|
|
1500
1527
|
function shortPath(dirpath) {
|
|
1501
1528
|
const home = homedir();
|
|
@@ -1504,10 +1531,33 @@ function shortPath(dirpath) {
|
|
|
1504
1531
|
}
|
|
1505
1532
|
return dirpath;
|
|
1506
1533
|
}
|
|
1534
|
+
// Emit format-safe empty output for search commands.
|
|
1535
|
+
function printEmptySearchResults(format, reason = "no_results") {
|
|
1536
|
+
if (format === "json") {
|
|
1537
|
+
console.log("[]");
|
|
1538
|
+
return;
|
|
1539
|
+
}
|
|
1540
|
+
if (format === "csv") {
|
|
1541
|
+
console.log("docid,score,file,title,context,line,snippet");
|
|
1542
|
+
return;
|
|
1543
|
+
}
|
|
1544
|
+
if (format === "xml") {
|
|
1545
|
+
console.log("<results></results>");
|
|
1546
|
+
return;
|
|
1547
|
+
}
|
|
1548
|
+
if (format === "md" || format === "files") {
|
|
1549
|
+
return;
|
|
1550
|
+
}
|
|
1551
|
+
if (reason === "min_score") {
|
|
1552
|
+
console.log("No results found above minimum score threshold.");
|
|
1553
|
+
return;
|
|
1554
|
+
}
|
|
1555
|
+
console.log("No results found.");
|
|
1556
|
+
}
|
|
1507
1557
|
function outputResults(results, query, opts) {
|
|
1508
1558
|
const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
|
|
1509
1559
|
if (filtered.length === 0) {
|
|
1510
|
-
|
|
1560
|
+
printEmptySearchResults(opts.format, "min_score");
|
|
1511
1561
|
return;
|
|
1512
1562
|
}
|
|
1513
1563
|
// Helper to create qmd:// URI from displayPath
|
|
@@ -1517,7 +1567,7 @@ function outputResults(results, query, opts) {
|
|
|
1517
1567
|
const output = filtered.map(row => {
|
|
1518
1568
|
const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
|
|
1519
1569
|
let body = opts.full ? row.body : undefined;
|
|
1520
|
-
let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos).snippet : undefined;
|
|
1570
|
+
let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos, undefined, opts.intent).snippet : undefined;
|
|
1521
1571
|
if (opts.lineNumbers) {
|
|
1522
1572
|
if (body)
|
|
1523
1573
|
body = addLineNumbers(body);
|
|
@@ -1532,6 +1582,7 @@ function outputResults(results, query, opts) {
|
|
|
1532
1582
|
...(row.context && { context: row.context }),
|
|
1533
1583
|
...(body && { body }),
|
|
1534
1584
|
...(snippet && { snippet }),
|
|
1585
|
+
...(opts.explain && row.explain && { explain: row.explain }),
|
|
1535
1586
|
};
|
|
1536
1587
|
});
|
|
1537
1588
|
console.log(JSON.stringify(output, null, 2));
|
|
@@ -1549,7 +1600,7 @@ function outputResults(results, query, opts) {
|
|
|
1549
1600
|
const row = filtered[i];
|
|
1550
1601
|
if (!row)
|
|
1551
1602
|
continue;
|
|
1552
|
-
const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
|
|
1603
|
+
const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
|
|
1553
1604
|
const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
|
|
1554
1605
|
// Line 1: filepath with docid
|
|
1555
1606
|
const path = toQmdPath(row.displayPath);
|
|
@@ -1570,6 +1621,27 @@ function outputResults(results, query, opts) {
|
|
|
1570
1621
|
// Line 4: Score
|
|
1571
1622
|
const score = formatScore(row.score);
|
|
1572
1623
|
console.log(`Score: ${c.bold}${score}${c.reset}`);
|
|
1624
|
+
if (opts.explain && row.explain) {
|
|
1625
|
+
const explain = row.explain;
|
|
1626
|
+
const ftsScores = explain.ftsScores.length > 0
|
|
1627
|
+
? explain.ftsScores.map(formatExplainNumber).join(", ")
|
|
1628
|
+
: "none";
|
|
1629
|
+
const vecScores = explain.vectorScores.length > 0
|
|
1630
|
+
? explain.vectorScores.map(formatExplainNumber).join(", ")
|
|
1631
|
+
: "none";
|
|
1632
|
+
const contribSummary = explain.rrf.contributions
|
|
1633
|
+
.slice()
|
|
1634
|
+
.sort((a, b) => b.rrfContribution - a.rrfContribution)
|
|
1635
|
+
.slice(0, 3)
|
|
1636
|
+
.map(c => `${c.source}/${c.queryType}#${c.rank}:${formatExplainNumber(c.rrfContribution)}`)
|
|
1637
|
+
.join(" | ");
|
|
1638
|
+
console.log(`${c.dim}Explain: fts=[${ftsScores}] vec=[${vecScores}]${c.reset}`);
|
|
1639
|
+
console.log(`${c.dim} RRF: total=${formatExplainNumber(explain.rrf.totalScore)} base=${formatExplainNumber(explain.rrf.baseScore)} bonus=${formatExplainNumber(explain.rrf.topRankBonus)} rank=${explain.rrf.rank}${c.reset}`);
|
|
1640
|
+
console.log(`${c.dim} Blend: ${Math.round(explain.rrf.weight * 100)}%*${formatExplainNumber(explain.rrf.positionScore)} + ${Math.round((1 - explain.rrf.weight) * 100)}%*${formatExplainNumber(explain.rerankScore)} = ${formatExplainNumber(explain.blendedScore)}${c.reset}`);
|
|
1641
|
+
if (contribSummary.length > 0) {
|
|
1642
|
+
console.log(`${c.dim} Top RRF contributions: ${contribSummary}${c.reset}`);
|
|
1643
|
+
}
|
|
1644
|
+
}
|
|
1573
1645
|
console.log();
|
|
1574
1646
|
// Snippet with highlighting (diff-style header included)
|
|
1575
1647
|
let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet;
|
|
@@ -1587,7 +1659,7 @@ function outputResults(results, query, opts) {
|
|
|
1587
1659
|
continue;
|
|
1588
1660
|
const heading = row.title || row.displayPath;
|
|
1589
1661
|
const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
|
|
1590
|
-
let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
|
|
1662
|
+
let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
|
|
1591
1663
|
if (opts.lineNumbers) {
|
|
1592
1664
|
content = addLineNumbers(content);
|
|
1593
1665
|
}
|
|
@@ -1601,7 +1673,7 @@ function outputResults(results, query, opts) {
|
|
|
1601
1673
|
const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '"')}"` : "";
|
|
1602
1674
|
const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '"')}"` : "";
|
|
1603
1675
|
const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
|
|
1604
|
-
let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
|
|
1676
|
+
let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
|
|
1605
1677
|
if (opts.lineNumbers) {
|
|
1606
1678
|
content = addLineNumbers(content);
|
|
1607
1679
|
}
|
|
@@ -1612,7 +1684,7 @@ function outputResults(results, query, opts) {
|
|
|
1612
1684
|
// CSV format
|
|
1613
1685
|
console.log("docid,score,file,title,context,line,snippet");
|
|
1614
1686
|
for (const row of filtered) {
|
|
1615
|
-
const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
|
|
1687
|
+
const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
|
|
1616
1688
|
let content = opts.full ? row.body : snippet;
|
|
1617
1689
|
if (opts.lineNumbers) {
|
|
1618
1690
|
content = addLineNumbers(content, line);
|
|
@@ -1655,21 +1727,6 @@ function filterByCollections(results, collectionNames) {
|
|
|
1655
1727
|
return prefixes.some(p => path.startsWith(p));
|
|
1656
1728
|
});
|
|
1657
1729
|
}
|
|
1658
|
-
/**
|
|
1659
|
-
* Parse structured search query syntax.
|
|
1660
|
-
* Lines starting with lex:, vec:, or hyde: are routed directly.
|
|
1661
|
-
* Plain lines without prefix go through query expansion.
|
|
1662
|
-
*
|
|
1663
|
-
* Returns null if this is a plain query (single line, no prefix).
|
|
1664
|
-
* Returns StructuredSubSearch[] if structured syntax detected.
|
|
1665
|
-
* Throws if multiple plain lines (ambiguous).
|
|
1666
|
-
*
|
|
1667
|
-
* Examples:
|
|
1668
|
-
* "CAP theorem" -> null (plain query, use expansion)
|
|
1669
|
-
* "lex: CAP theorem" -> [{ type: 'lex', query: 'CAP theorem' }]
|
|
1670
|
-
* "lex: CAP\nvec: consistency" -> [{ type: 'lex', ... }, { type: 'vec', ... }]
|
|
1671
|
-
* "CAP\nconsistency" -> throws (multiple plain lines)
|
|
1672
|
-
*/
|
|
1673
1730
|
function parseStructuredQuery(query) {
|
|
1674
1731
|
const rawLines = query.split('\n').map((line, idx) => ({
|
|
1675
1732
|
raw: line,
|
|
@@ -1680,7 +1737,9 @@ function parseStructuredQuery(query) {
|
|
|
1680
1737
|
return null;
|
|
1681
1738
|
const prefixRe = /^(lex|vec|hyde):\s*/i;
|
|
1682
1739
|
const expandRe = /^expand:\s*/i;
|
|
1740
|
+
const intentRe = /^intent:\s*/i;
|
|
1683
1741
|
const typed = [];
|
|
1742
|
+
let intent;
|
|
1684
1743
|
for (const line of rawLines) {
|
|
1685
1744
|
if (expandRe.test(line.trimmed)) {
|
|
1686
1745
|
if (rawLines.length > 1) {
|
|
@@ -1692,6 +1751,18 @@ function parseStructuredQuery(query) {
|
|
|
1692
1751
|
}
|
|
1693
1752
|
return null; // treat as standalone expand query
|
|
1694
1753
|
}
|
|
1754
|
+
// Parse intent: lines
|
|
1755
|
+
if (intentRe.test(line.trimmed)) {
|
|
1756
|
+
if (intent !== undefined) {
|
|
1757
|
+
throw new Error(`Line ${line.number}: only one intent: line is allowed per query document.`);
|
|
1758
|
+
}
|
|
1759
|
+
const text = line.trimmed.replace(intentRe, '').trim();
|
|
1760
|
+
if (!text) {
|
|
1761
|
+
throw new Error(`Line ${line.number}: intent: must include text.`);
|
|
1762
|
+
}
|
|
1763
|
+
intent = text;
|
|
1764
|
+
continue;
|
|
1765
|
+
}
|
|
1695
1766
|
const match = line.trimmed.match(prefixRe);
|
|
1696
1767
|
if (match) {
|
|
1697
1768
|
const type = match[1].toLowerCase();
|
|
@@ -1709,9 +1780,13 @@ function parseStructuredQuery(query) {
|
|
|
1709
1780
|
// Single plain line -> implicit expand
|
|
1710
1781
|
return null;
|
|
1711
1782
|
}
|
|
1712
|
-
throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde: prefix. Each line in a query document must start with one.`);
|
|
1783
|
+
throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde:/intent: prefix. Each line in a query document must start with one.`);
|
|
1713
1784
|
}
|
|
1714
|
-
|
|
1785
|
+
// intent: alone is not a valid query — must have at least one search
|
|
1786
|
+
if (intent && typed.length === 0) {
|
|
1787
|
+
throw new Error('intent: cannot appear alone. Add at least one lex:, vec:, or hyde: line.');
|
|
1788
|
+
}
|
|
1789
|
+
return typed.length > 0 ? { searches: typed, intent } : null;
|
|
1715
1790
|
}
|
|
1716
1791
|
function search(query, opts) {
|
|
1717
1792
|
const db = getDb();
|
|
@@ -1735,12 +1810,7 @@ function search(query, opts) {
|
|
|
1735
1810
|
}));
|
|
1736
1811
|
closeDb();
|
|
1737
1812
|
if (resultsWithContext.length === 0) {
|
|
1738
|
-
|
|
1739
|
-
console.log("[]");
|
|
1740
|
-
}
|
|
1741
|
-
else {
|
|
1742
|
-
console.log("No results found.");
|
|
1743
|
-
}
|
|
1813
|
+
printEmptySearchResults(opts.format);
|
|
1744
1814
|
return;
|
|
1745
1815
|
}
|
|
1746
1816
|
outputResults(resultsWithContext, query, opts);
|
|
@@ -1773,6 +1843,7 @@ async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
|
|
|
1773
1843
|
collection: singleCollection,
|
|
1774
1844
|
limit: opts.all ? 500 : (opts.limit || 10),
|
|
1775
1845
|
minScore: opts.minScore || 0.3,
|
|
1846
|
+
intent: opts.intent,
|
|
1776
1847
|
hooks: {
|
|
1777
1848
|
onExpand: (original, expanded) => {
|
|
1778
1849
|
logExpansionTree(original, expanded);
|
|
@@ -1789,12 +1860,7 @@ async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
|
|
|
1789
1860
|
}
|
|
1790
1861
|
closeDb();
|
|
1791
1862
|
if (results.length === 0) {
|
|
1792
|
-
|
|
1793
|
-
console.log("[]");
|
|
1794
|
-
}
|
|
1795
|
-
else {
|
|
1796
|
-
console.log("No results found.");
|
|
1797
|
-
}
|
|
1863
|
+
printEmptySearchResults(opts.format);
|
|
1798
1864
|
return;
|
|
1799
1865
|
}
|
|
1800
1866
|
outputResults(results.map(r => ({
|
|
@@ -1815,14 +1881,20 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
|
|
|
1815
1881
|
const collectionNames = resolveCollectionFilter(opts.collection, true);
|
|
1816
1882
|
const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
|
|
1817
1883
|
checkIndexHealth(store.db);
|
|
1818
|
-
// Check for structured query syntax (lex:/vec:/hyde: prefixes)
|
|
1819
|
-
const
|
|
1884
|
+
// Check for structured query syntax (lex:/vec:/hyde:/intent: prefixes)
|
|
1885
|
+
const parsed = parseStructuredQuery(query);
|
|
1886
|
+
// Intent can come from --intent flag or from intent: line in query document
|
|
1887
|
+
const intent = opts.intent || parsed?.intent;
|
|
1820
1888
|
await withLLMSession(async () => {
|
|
1821
1889
|
let results;
|
|
1822
|
-
if (
|
|
1890
|
+
if (parsed) {
|
|
1891
|
+
const structuredQueries = parsed.searches;
|
|
1823
1892
|
// Structured search — user provided their own query expansions
|
|
1824
1893
|
const typeLabels = structuredQueries.map(s => s.type).join('+');
|
|
1825
1894
|
process.stderr.write(`${c.dim}Structured search: ${structuredQueries.length} queries (${typeLabels})${c.reset}\n`);
|
|
1895
|
+
if (intent) {
|
|
1896
|
+
process.stderr.write(`${c.dim}├─ intent: ${intent}${c.reset}\n`);
|
|
1897
|
+
}
|
|
1826
1898
|
// Log each sub-query
|
|
1827
1899
|
for (const s of structuredQueries) {
|
|
1828
1900
|
let preview = s.query.replace(/\n/g, ' ');
|
|
@@ -1835,6 +1907,9 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
|
|
|
1835
1907
|
collections: singleCollection ? [singleCollection] : undefined,
|
|
1836
1908
|
limit: opts.all ? 500 : (opts.limit || 10),
|
|
1837
1909
|
minScore: opts.minScore || 0,
|
|
1910
|
+
candidateLimit: opts.candidateLimit,
|
|
1911
|
+
explain: !!opts.explain,
|
|
1912
|
+
intent,
|
|
1838
1913
|
hooks: {
|
|
1839
1914
|
onEmbedStart: (count) => {
|
|
1840
1915
|
process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
|
|
@@ -1859,6 +1934,9 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
|
|
|
1859
1934
|
collection: singleCollection,
|
|
1860
1935
|
limit: opts.all ? 500 : (opts.limit || 10),
|
|
1861
1936
|
minScore: opts.minScore || 0,
|
|
1937
|
+
candidateLimit: opts.candidateLimit,
|
|
1938
|
+
explain: !!opts.explain,
|
|
1939
|
+
intent,
|
|
1862
1940
|
hooks: {
|
|
1863
1941
|
onStrongSignal: (score) => {
|
|
1864
1942
|
process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
|
|
@@ -1897,15 +1975,11 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
|
|
|
1897
1975
|
}
|
|
1898
1976
|
closeDb();
|
|
1899
1977
|
if (results.length === 0) {
|
|
1900
|
-
|
|
1901
|
-
console.log("[]");
|
|
1902
|
-
}
|
|
1903
|
-
else {
|
|
1904
|
-
console.log("No results found.");
|
|
1905
|
-
}
|
|
1978
|
+
printEmptySearchResults(opts.format);
|
|
1906
1979
|
return;
|
|
1907
1980
|
}
|
|
1908
1981
|
// Use first lex/vec query for output context, or original query
|
|
1982
|
+
const structuredQueries = parsed?.searches;
|
|
1909
1983
|
const displayQuery = structuredQueries
|
|
1910
1984
|
? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
|
|
1911
1985
|
: query;
|
|
@@ -1919,6 +1993,7 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
|
|
|
1919
1993
|
score: r.score,
|
|
1920
1994
|
context: r.context,
|
|
1921
1995
|
docid: r.docid,
|
|
1996
|
+
explain: r.explain,
|
|
1922
1997
|
})), displayQuery, { ...opts, limit: results.length });
|
|
1923
1998
|
}, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
|
|
1924
1999
|
}
|
|
@@ -1947,6 +2022,7 @@ function parseCLI() {
|
|
|
1947
2022
|
xml: { type: "boolean" },
|
|
1948
2023
|
files: { type: "boolean" },
|
|
1949
2024
|
json: { type: "boolean" },
|
|
2025
|
+
explain: { type: "boolean" },
|
|
1950
2026
|
collection: { type: "string", short: "c", multiple: true }, // Filter by collection(s)
|
|
1951
2027
|
// Collection options
|
|
1952
2028
|
name: { type: "string" }, // collection name
|
|
@@ -1961,6 +2037,9 @@ function parseCLI() {
|
|
|
1961
2037
|
from: { type: "string" }, // start line
|
|
1962
2038
|
"max-bytes": { type: "string" }, // max bytes for multi-get
|
|
1963
2039
|
"line-numbers": { type: "boolean" }, // add line numbers to output
|
|
2040
|
+
// Query options
|
|
2041
|
+
"candidate-limit": { type: "string", short: "C" },
|
|
2042
|
+
intent: { type: "string" },
|
|
1964
2043
|
// MCP HTTP transport options
|
|
1965
2044
|
http: { type: "boolean" },
|
|
1966
2045
|
daemon: { type: "boolean" },
|
|
@@ -1999,6 +2078,9 @@ function parseCLI() {
|
|
|
1999
2078
|
all: isAll,
|
|
2000
2079
|
collection: values.collection,
|
|
2001
2080
|
lineNumbers: !!values["line-numbers"],
|
|
2081
|
+
candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
|
|
2082
|
+
explain: !!values.explain,
|
|
2083
|
+
intent: values.intent,
|
|
2002
2084
|
};
|
|
2003
2085
|
return {
|
|
2004
2086
|
command: positionals[0] || "",
|
|
@@ -2057,7 +2139,8 @@ function showHelp() {
|
|
|
2057
2139
|
`query = expand_query | query_document ;`,
|
|
2058
2140
|
`expand_query = text | explicit_expand ;`,
|
|
2059
2141
|
`explicit_expand= "expand:" text ;`,
|
|
2060
|
-
`query_document = { typed_line } ;`,
|
|
2142
|
+
`query_document = [ intent_line ] { typed_line } ;`,
|
|
2143
|
+
`intent_line = "intent:" text newline ;`,
|
|
2061
2144
|
`typed_line = type ":" text newline ;`,
|
|
2062
2145
|
`type = "lex" | "vec" | "hyde" ;`,
|
|
2063
2146
|
`text = quoted_phrase | plain_text ;`,
|
|
@@ -2094,7 +2177,9 @@ function showHelp() {
|
|
|
2094
2177
|
console.log(" --all - Return all matches (pair with --min-score)");
|
|
2095
2178
|
console.log(" --min-score <num> - Minimum similarity score");
|
|
2096
2179
|
console.log(" --full - Output full document instead of snippet");
|
|
2180
|
+
console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
|
|
2097
2181
|
console.log(" --line-numbers - Include line numbers in output");
|
|
2182
|
+
console.log(" --explain - Include retrieval score traces (query --json/CLI)");
|
|
2098
2183
|
console.log(" --files | --json | --csv | --md | --xml - Output format");
|
|
2099
2184
|
console.log(" -c, --collection <name> - Filter by one or more collections");
|
|
2100
2185
|
console.log("");
|