@joycodetech/qmd-ja 2.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +819 -0
- package/LICENSE +21 -0
- package/README.md +1143 -0
- package/bin/qmd +162 -0
- package/dist/ast.d.ts +65 -0
- package/dist/ast.js +334 -0
- package/dist/bench/bench.d.ts +23 -0
- package/dist/bench/bench.js +280 -0
- package/dist/bench/score.d.ts +33 -0
- package/dist/bench/score.js +88 -0
- package/dist/bench/types.d.ts +80 -0
- package/dist/bench/types.js +8 -0
- package/dist/cli/formatter.d.ts +120 -0
- package/dist/cli/formatter.js +355 -0
- package/dist/cli/qmd.d.ts +43 -0
- package/dist/cli/qmd.js +4159 -0
- package/dist/collections.d.ts +166 -0
- package/dist/collections.js +410 -0
- package/dist/db.d.ts +44 -0
- package/dist/db.js +75 -0
- package/dist/index.d.ts +230 -0
- package/dist/index.js +242 -0
- package/dist/llm.d.ts +500 -0
- package/dist/llm.js +1615 -0
- package/dist/maintenance.d.ts +23 -0
- package/dist/maintenance.js +37 -0
- package/dist/mcp/server.d.ts +24 -0
- package/dist/mcp/server.js +702 -0
- package/dist/paths.d.ts +1 -0
- package/dist/paths.js +4 -0
- package/dist/store.d.ts +996 -0
- package/dist/store.js +4208 -0
- package/models/vaporetto-bccwj.model +0 -0
- package/package.json +130 -0
- package/scripts/build.mjs +30 -0
- package/scripts/check-package-grammars.mjs +29 -0
- package/scripts/package-smoke.mjs +65 -0
- package/scripts/test-all.mjs +38 -0
- package/skills/qmd/SKILL.md +295 -0
- package/skills/qmd/references/mcp-setup.md +102 -0
- package/skills/release/SKILL.md +139 -0
- package/skills/release/scripts/install-hooks.sh +38 -0
- package/vendor/vaporetto-node-wasm/package.json +11 -0
- package/vendor/vaporetto-node-wasm/vaporetto_node_wasm.d.ts +19 -0
- package/vendor/vaporetto-node-wasm/vaporetto_node_wasm.js +202 -0
- package/vendor/vaporetto-node-wasm/vaporetto_node_wasm_bg.wasm +0 -0
- package/vendor/vaporetto-node-wasm/vaporetto_node_wasm_bg.wasm.d.ts +13 -0
|
@@ -0,0 +1,702 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* QMD MCP Server - Model Context Protocol server for QMD
|
|
3
|
+
*
|
|
4
|
+
* Exposes QMD search and document retrieval as MCP tools and resources.
|
|
5
|
+
* Documents are accessible via qmd:// URIs.
|
|
6
|
+
*
|
|
7
|
+
* Follows MCP spec 2025-06-18 for proper response types.
|
|
8
|
+
*/
|
|
9
|
+
import { createServer } from "node:http";
|
|
10
|
+
import { randomUUID } from "node:crypto";
|
|
11
|
+
import { readFileSync } from "node:fs";
|
|
12
|
+
import { join, dirname } from "node:path";
|
|
13
|
+
import { fileURLToPath } from "url";
|
|
14
|
+
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
15
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
16
|
+
import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
|
|
17
|
+
import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
|
|
18
|
+
import { z } from "zod";
|
|
19
|
+
import { existsSync } from "fs";
|
|
20
|
+
import { createStore, extractSnippet, addLineNumbers, getDefaultDbPath, DEFAULT_MULTI_GET_MAX_BYTES, } from "../index.js";
|
|
21
|
+
import { getConfigPath } from "../collections.js";
|
|
22
|
+
import { enableProductionMode } from "../store.js";
|
|
23
|
+
// =============================================================================
|
|
24
|
+
// Helper functions
|
|
25
|
+
// =============================================================================
|
|
26
|
+
/**
|
|
27
|
+
* Encode a path for use in qmd:// URIs.
|
|
28
|
+
* Encodes special characters but preserves forward slashes for readability.
|
|
29
|
+
*/
|
|
30
|
+
function encodeQmdPath(path) {
|
|
31
|
+
// Encode each path segment separately to preserve slashes
|
|
32
|
+
return path.split('/').map(segment => encodeURIComponent(segment)).join('/');
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Format search results as human-readable text summary
|
|
36
|
+
*/
|
|
37
|
+
function formatSearchSummary(results, query) {
|
|
38
|
+
if (results.length === 0) {
|
|
39
|
+
return `No results found for "${query}"`;
|
|
40
|
+
}
|
|
41
|
+
const lines = [`Found ${results.length} result${results.length === 1 ? '' : 's'} for "${query}":\n`];
|
|
42
|
+
for (const r of results) {
|
|
43
|
+
lines.push(`${r.docid} ${Math.round(r.score * 100)}% ${r.file} - ${r.title}`);
|
|
44
|
+
}
|
|
45
|
+
return lines.join('\n');
|
|
46
|
+
}
|
|
47
|
+
function getPackageVersion() {
|
|
48
|
+
try {
|
|
49
|
+
const pkgPath = join(dirname(fileURLToPath(import.meta.url)), "../../package.json");
|
|
50
|
+
const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
|
|
51
|
+
return pkg.version ?? "unknown";
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
return "unknown";
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
// =============================================================================
|
|
58
|
+
// MCP Server
|
|
59
|
+
// =============================================================================
|
|
60
|
+
/**
|
|
61
|
+
* Build dynamic server instructions from actual index state.
|
|
62
|
+
* Injected into the LLM's system prompt via MCP initialize response —
|
|
63
|
+
* gives the LLM immediate context about what's searchable without a tool call.
|
|
64
|
+
*/
|
|
65
|
+
async function buildInstructions(store) {
|
|
66
|
+
const status = await store.getStatus();
|
|
67
|
+
const globalCtx = await store.getGlobalContext();
|
|
68
|
+
const lines = [];
|
|
69
|
+
// --- What is this? ---
|
|
70
|
+
lines.push(`QMD is your local search engine over ${status.totalDocuments} markdown documents.`);
|
|
71
|
+
if (globalCtx)
|
|
72
|
+
lines.push(`Context: ${globalCtx}`);
|
|
73
|
+
// --- What's searchable? ---
|
|
74
|
+
// Emit names only — the per-collection doc counts and descriptions can run to ~1.5 KB
|
|
75
|
+
// across a dozen collections, and the same info is available on demand via the `status` tool.
|
|
76
|
+
if (status.collections.length > 0) {
|
|
77
|
+
lines.push("");
|
|
78
|
+
const names = status.collections.map(c => c.name).join(", ");
|
|
79
|
+
lines.push(`Collections (scope with \`collections\` parameter): ${names}`);
|
|
80
|
+
lines.push("Call the `status` tool for collection descriptions, paths, and per-collection doc counts.");
|
|
81
|
+
}
|
|
82
|
+
// --- Capability gaps ---
|
|
83
|
+
if (!status.hasVectorIndex) {
|
|
84
|
+
lines.push("");
|
|
85
|
+
lines.push("Note: No vector embeddings yet. Run `qmd embed` to enable semantic search (vec/hyde).");
|
|
86
|
+
}
|
|
87
|
+
else if (status.needsEmbedding > 0) {
|
|
88
|
+
lines.push("");
|
|
89
|
+
lines.push(`Note: ${status.needsEmbedding} documents need embedding. Run \`qmd embed\` to update.`);
|
|
90
|
+
}
|
|
91
|
+
// --- Search tool ---
|
|
92
|
+
lines.push("");
|
|
93
|
+
lines.push("Search: Use `query` with sub-queries (lex/vec/hyde):");
|
|
94
|
+
lines.push(" - type:'lex' — BM25 keyword search (exact terms, fast)");
|
|
95
|
+
lines.push(" - type:'vec' — semantic vector search (meaning-based)");
|
|
96
|
+
lines.push(" - type:'hyde' — hypothetical document (write what the answer looks like)");
|
|
97
|
+
lines.push("");
|
|
98
|
+
lines.push(" Always provide `intent` on every search call to disambiguate and improve snippets.");
|
|
99
|
+
lines.push("");
|
|
100
|
+
lines.push("Examples:");
|
|
101
|
+
lines.push(" Quick keyword lookup: [{type:'lex', query:'error handling'}]");
|
|
102
|
+
lines.push(" Semantic search: [{type:'vec', query:'how to handle errors gracefully'}]");
|
|
103
|
+
lines.push(" Best results: [{type:'lex', query:'error'}, {type:'vec', query:'error handling best practices'}]");
|
|
104
|
+
lines.push(" With intent: searches=[{type:'lex', query:'performance'}], intent='web page load times'");
|
|
105
|
+
// --- Retrieval workflow ---
|
|
106
|
+
lines.push("");
|
|
107
|
+
lines.push("Retrieval:");
|
|
108
|
+
lines.push(" - `get` — single document by path or docid (#abc123). Supports a line-range suffix: `file.md:100` (from line 100) or `file.md:100:40` (40 lines from line 100).");
|
|
109
|
+
lines.push(" - `multi_get` — batch retrieve by glob (`journals/2025-05*.md`) or comma-separated list.");
|
|
110
|
+
// --- Non-obvious things that prevent mistakes ---
|
|
111
|
+
lines.push("");
|
|
112
|
+
lines.push("Tips:");
|
|
113
|
+
lines.push(" - File paths in results are relative to their collection.");
|
|
114
|
+
lines.push(" - Use `minScore: 0.5` to filter low-confidence results.");
|
|
115
|
+
lines.push(" - Results include a `context` field describing the content type.");
|
|
116
|
+
return lines.join("\n");
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Create an MCP server with all QMD tools, resources, and prompts registered.
|
|
120
|
+
* Shared by both stdio and HTTP transports.
|
|
121
|
+
*/
|
|
122
|
+
async function createMcpServer(store) {
|
|
123
|
+
const server = new McpServer({ name: "qmd", version: getPackageVersion() }, { instructions: await buildInstructions(store) });
|
|
124
|
+
// Pre-fetch default collection names for search tools
|
|
125
|
+
const defaultCollectionNames = await store.getDefaultCollectionNames();
|
|
126
|
+
// ---------------------------------------------------------------------------
|
|
127
|
+
// Resource: qmd://{path} - read-only access to documents by path
|
|
128
|
+
// Note: No list() - documents are discovered via search tools
|
|
129
|
+
// ---------------------------------------------------------------------------
|
|
130
|
+
server.registerResource("document", new ResourceTemplate("qmd://{+path}", { list: undefined }), {
|
|
131
|
+
title: "QMD Document",
|
|
132
|
+
description: "A markdown document from your QMD knowledge base. Use search tools to discover documents.",
|
|
133
|
+
mimeType: "text/markdown",
|
|
134
|
+
}, async (uri, { path }) => {
|
|
135
|
+
// Decode URL-encoded path (MCP clients send encoded URIs)
|
|
136
|
+
const pathStr = Array.isArray(path) ? path.join('/') : (path || '');
|
|
137
|
+
const decodedPath = decodeURIComponent(pathStr);
|
|
138
|
+
// Use SDK to find document — findDocument handles collection/path resolution
|
|
139
|
+
const result = await store.get(decodedPath, { includeBody: true });
|
|
140
|
+
if ("error" in result) {
|
|
141
|
+
return { contents: [{ uri: uri.href, text: `Document not found: ${decodedPath}` }] };
|
|
142
|
+
}
|
|
143
|
+
let text = addLineNumbers(result.body || ""); // Default to line numbers
|
|
144
|
+
if (result.context) {
|
|
145
|
+
text = `<!-- Context: ${result.context} -->\n\n` + text;
|
|
146
|
+
}
|
|
147
|
+
return {
|
|
148
|
+
contents: [{
|
|
149
|
+
uri: uri.href,
|
|
150
|
+
name: result.displayPath,
|
|
151
|
+
title: result.title || result.displayPath,
|
|
152
|
+
mimeType: "text/markdown",
|
|
153
|
+
text,
|
|
154
|
+
}],
|
|
155
|
+
};
|
|
156
|
+
});
|
|
157
|
+
// ---------------------------------------------------------------------------
|
|
158
|
+
// Tool: query (Primary search tool)
|
|
159
|
+
// ---------------------------------------------------------------------------
|
|
160
|
+
const subSearchSchema = z.object({
|
|
161
|
+
type: z.enum(['lex', 'vec', 'hyde']).describe("lex = BM25 keywords (supports \"phrase\" and -negation); " +
|
|
162
|
+
"vec = semantic question; hyde = hypothetical answer passage"),
|
|
163
|
+
query: z.string().describe("The query text. For lex: use keywords, \"quoted phrases\", and -negation. " +
|
|
164
|
+
"For vec: natural language question. For hyde: 50-100 word answer passage."),
|
|
165
|
+
});
|
|
166
|
+
server.registerTool("query", {
|
|
167
|
+
title: "Query",
|
|
168
|
+
description: `Search the knowledge base using a query document — one or more typed sub-queries combined for best recall.
|
|
169
|
+
|
|
170
|
+
Each result includes a \`line\` field with the absolute 1-indexed line of the best match in the source markdown. To read more context around a hit, call \`get(file, fromLine = max(1, line - 20), maxLines = 80, lineNumbers = true)\`.
|
|
171
|
+
|
|
172
|
+
## Query Types
|
|
173
|
+
|
|
174
|
+
**lex** — BM25 keyword search. Fast, exact, no LLM needed.
|
|
175
|
+
Full lex syntax:
|
|
176
|
+
- \`term\` — prefix match ("perf" matches "performance")
|
|
177
|
+
- \`"exact phrase"\` — phrase must appear verbatim
|
|
178
|
+
- \`-term\` or \`-"phrase"\` — exclude documents containing this
|
|
179
|
+
|
|
180
|
+
Good lex examples:
|
|
181
|
+
- \`"connection pool" timeout -redis\`
|
|
182
|
+
- \`"machine learning" -sports -athlete\`
|
|
183
|
+
- \`handleError async typescript\`
|
|
184
|
+
|
|
185
|
+
**vec** — Semantic vector search. Write a natural language question. Finds documents by meaning, not exact words.
|
|
186
|
+
- \`how does the rate limiter handle burst traffic?\`
|
|
187
|
+
- \`what is the tradeoff between consistency and availability?\`
|
|
188
|
+
|
|
189
|
+
**hyde** — Hypothetical document. Write 50-100 words that look like the answer. Often the most powerful for nuanced topics.
|
|
190
|
+
- \`The rate limiter uses a token bucket algorithm. When a client exceeds 100 req/min, subsequent requests return 429 until the window resets.\`
|
|
191
|
+
|
|
192
|
+
## Strategy
|
|
193
|
+
|
|
194
|
+
Combine types for best results. First sub-query gets 2× weight — put your strongest signal first.
|
|
195
|
+
|
|
196
|
+
| Goal | Approach |
|
|
197
|
+
|------|----------|
|
|
198
|
+
| Know exact term/name | \`lex\` only |
|
|
199
|
+
| Concept search | \`vec\` only |
|
|
200
|
+
| Best recall | \`lex\` + \`vec\` |
|
|
201
|
+
| Complex/nuanced | \`lex\` + \`vec\` + \`hyde\` |
|
|
202
|
+
| Unknown vocabulary | Use a standalone natural-language query (no typed lines) so the server can auto-expand it |
|
|
203
|
+
|
|
204
|
+
## Examples
|
|
205
|
+
|
|
206
|
+
Simple lookup:
|
|
207
|
+
\`\`\`json
|
|
208
|
+
[{ "type": "lex", "query": "CAP theorem" }]
|
|
209
|
+
\`\`\`
|
|
210
|
+
|
|
211
|
+
Best recall on a technical topic:
|
|
212
|
+
\`\`\`json
|
|
213
|
+
[
|
|
214
|
+
{ "type": "lex", "query": "\\"connection pool\\" timeout -redis" },
|
|
215
|
+
{ "type": "vec", "query": "why do database connections time out under load" },
|
|
216
|
+
{ "type": "hyde", "query": "Connection pool exhaustion occurs when all connections are in use and new requests must wait. This typically happens under high concurrency when queries run longer than expected." }
|
|
217
|
+
]
|
|
218
|
+
\`\`\`
|
|
219
|
+
|
|
220
|
+
Intent-aware lex (C++ performance, not sports):
|
|
221
|
+
\`\`\`json
|
|
222
|
+
[
|
|
223
|
+
{ "type": "lex", "query": "\\"C++ performance\\" optimization -sports -athlete" },
|
|
224
|
+
{ "type": "vec", "query": "how to optimize C++ program performance" }
|
|
225
|
+
]
|
|
226
|
+
\`\`\``,
|
|
227
|
+
annotations: { readOnlyHint: true, openWorldHint: false },
|
|
228
|
+
inputSchema: {
|
|
229
|
+
searches: z.array(subSearchSchema).min(1).max(10).describe("Typed sub-queries to execute (lex/vec/hyde). First gets 2x weight."),
|
|
230
|
+
limit: z.number().optional().default(10).describe("Max results (default: 10)"),
|
|
231
|
+
minScore: z.number().optional().default(0).describe("Min relevance 0-1 (default: 0)"),
|
|
232
|
+
candidateLimit: z.number().optional().describe("Maximum candidates to rerank (default: 40, lower = faster but may miss results)"),
|
|
233
|
+
collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
|
|
234
|
+
intent: z.string().optional().describe("Background context to disambiguate the query. Example: query='performance', intent='web page load times and Core Web Vitals'. Does not search on its own."),
|
|
235
|
+
rerank: z.boolean().optional().default(true).describe("Rerank results using LLM (default: true). Set to false for faster results on CPU-only machines."),
|
|
236
|
+
},
|
|
237
|
+
}, async ({ searches, limit, minScore, candidateLimit, collections, intent, rerank }) => {
|
|
238
|
+
// Map to internal format
|
|
239
|
+
const queries = searches.map(s => ({
|
|
240
|
+
type: s.type,
|
|
241
|
+
query: s.query,
|
|
242
|
+
}));
|
|
243
|
+
// Use default collections if none specified
|
|
244
|
+
const effectiveCollections = collections ?? defaultCollectionNames;
|
|
245
|
+
const results = await store.search({
|
|
246
|
+
queries,
|
|
247
|
+
collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
|
|
248
|
+
limit,
|
|
249
|
+
minScore,
|
|
250
|
+
candidateLimit,
|
|
251
|
+
rerank,
|
|
252
|
+
intent,
|
|
253
|
+
});
|
|
254
|
+
// Use first lex or vec query for snippet extraction
|
|
255
|
+
const primaryQuery = searches.find(s => s.type === 'lex')?.query
|
|
256
|
+
|| searches.find(s => s.type === 'vec')?.query
|
|
257
|
+
|| searches[0]?.query || "";
|
|
258
|
+
const filtered = results.map(r => {
|
|
259
|
+
const { line, snippet } = extractSnippet(r.body, primaryQuery, 300, r.bestChunkPos, r.bestChunk.length, intent);
|
|
260
|
+
return {
|
|
261
|
+
docid: `#${r.docid}`,
|
|
262
|
+
file: r.displayPath,
|
|
263
|
+
title: r.title,
|
|
264
|
+
score: Math.round(r.score * 100) / 100,
|
|
265
|
+
context: r.context,
|
|
266
|
+
line,
|
|
267
|
+
snippet: addLineNumbers(snippet, line),
|
|
268
|
+
};
|
|
269
|
+
});
|
|
270
|
+
return {
|
|
271
|
+
content: [{ type: "text", text: formatSearchSummary(filtered, primaryQuery) }],
|
|
272
|
+
structuredContent: { results: filtered },
|
|
273
|
+
};
|
|
274
|
+
});
|
|
275
|
+
// ---------------------------------------------------------------------------
|
|
276
|
+
// Tool: qmd_get (Retrieve document)
|
|
277
|
+
// ---------------------------------------------------------------------------
|
|
278
|
+
server.registerTool("get", {
|
|
279
|
+
title: "Get Document",
|
|
280
|
+
description: "Retrieve the full content of a document by its file path or docid. Use paths or docids (#abc123) from search results. Suggests similar files if not found.",
|
|
281
|
+
annotations: { readOnlyHint: true, openWorldHint: false },
|
|
282
|
+
inputSchema: {
|
|
283
|
+
file: z.string().describe("File path or docid from search results. Supports a line-range suffix: 'pages/meeting.md:100' starts at line 100; 'pages/meeting.md:100:40' (or '#abc123:100:40') reads 40 lines from line 100."),
|
|
284
|
+
fromLine: z.number().optional().describe("Start from this line number (1-indexed)"),
|
|
285
|
+
maxLines: z.number().optional().describe("Maximum number of lines to return"),
|
|
286
|
+
lineNumbers: z.boolean().optional().default(true).describe("Add line numbers to output (format: 'N: content'). On by default; set false for raw content."),
|
|
287
|
+
},
|
|
288
|
+
}, async ({ file, fromLine, maxLines, lineNumbers }) => {
|
|
289
|
+
// Support :line and :from:count suffixes in `file` (e.g. "foo.md:120" or
|
|
290
|
+
// "foo.md:120:40"). Explicit fromLine/maxLines args take precedence.
|
|
291
|
+
let parsedFromLine = fromLine;
|
|
292
|
+
let parsedMaxLines = maxLines;
|
|
293
|
+
let lookup = file;
|
|
294
|
+
const rangeMatch = lookup.match(/:(\d+):(\d+)$/);
|
|
295
|
+
if (rangeMatch) {
|
|
296
|
+
if (parsedFromLine === undefined)
|
|
297
|
+
parsedFromLine = parseInt(rangeMatch[1], 10);
|
|
298
|
+
if (parsedMaxLines === undefined)
|
|
299
|
+
parsedMaxLines = parseInt(rangeMatch[2], 10);
|
|
300
|
+
lookup = lookup.slice(0, -rangeMatch[0].length);
|
|
301
|
+
}
|
|
302
|
+
else {
|
|
303
|
+
const colonMatch = lookup.match(/:(\d+)$/);
|
|
304
|
+
if (colonMatch && colonMatch[1] && parsedFromLine === undefined) {
|
|
305
|
+
parsedFromLine = parseInt(colonMatch[1], 10);
|
|
306
|
+
lookup = lookup.slice(0, -colonMatch[0].length);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
if (parsedFromLine !== undefined)
|
|
310
|
+
parsedFromLine = Math.max(1, parsedFromLine);
|
|
311
|
+
const result = await store.get(lookup, { includeBody: false });
|
|
312
|
+
if ("error" in result) {
|
|
313
|
+
let msg = `Document not found: ${file}`;
|
|
314
|
+
if (result.similarFiles.length > 0) {
|
|
315
|
+
msg += `\n\nDid you mean one of these?\n${result.similarFiles.map(s => ` - ${s}`).join('\n')}`;
|
|
316
|
+
}
|
|
317
|
+
return {
|
|
318
|
+
content: [{ type: "text", text: msg }],
|
|
319
|
+
isError: true,
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
const body = await store.getDocumentBody(result.filepath, { fromLine: parsedFromLine, maxLines: parsedMaxLines }) ?? "";
|
|
323
|
+
let text = body;
|
|
324
|
+
if (lineNumbers) {
|
|
325
|
+
const startLine = parsedFromLine || 1;
|
|
326
|
+
text = addLineNumbers(text, startLine);
|
|
327
|
+
}
|
|
328
|
+
if (result.context) {
|
|
329
|
+
text = `<!-- Context: ${result.context} -->\n\n` + text;
|
|
330
|
+
}
|
|
331
|
+
return {
|
|
332
|
+
content: [{
|
|
333
|
+
type: "resource",
|
|
334
|
+
resource: {
|
|
335
|
+
uri: `qmd://${encodeQmdPath(result.displayPath)}`,
|
|
336
|
+
name: result.displayPath,
|
|
337
|
+
title: result.title,
|
|
338
|
+
mimeType: "text/markdown",
|
|
339
|
+
text,
|
|
340
|
+
},
|
|
341
|
+
}],
|
|
342
|
+
};
|
|
343
|
+
});
|
|
344
|
+
// ---------------------------------------------------------------------------
|
|
345
|
+
// Tool: qmd_multi_get (Retrieve multiple documents)
|
|
346
|
+
// ---------------------------------------------------------------------------
|
|
347
|
+
server.registerTool("multi_get", {
|
|
348
|
+
title: "Multi-Get Documents",
|
|
349
|
+
description: "Retrieve multiple documents by glob pattern (e.g., 'journals/2025-05*.md') or comma-separated list. Skips files larger than maxBytes.",
|
|
350
|
+
annotations: { readOnlyHint: true, openWorldHint: false },
|
|
351
|
+
inputSchema: {
|
|
352
|
+
pattern: z.string().describe("Glob pattern or comma-separated list of file paths"),
|
|
353
|
+
maxLines: z.number().optional().describe("Maximum lines per file"),
|
|
354
|
+
maxBytes: z.number().optional().default(10240).describe("Skip files larger than this (default: 10240 = 10KB)"),
|
|
355
|
+
lineNumbers: z.boolean().optional().default(true).describe("Add line numbers to output (format: 'N: content'). On by default; set false for raw content."),
|
|
356
|
+
},
|
|
357
|
+
}, async ({ pattern, maxLines, maxBytes, lineNumbers }) => {
|
|
358
|
+
const { docs, errors } = await store.multiGet(pattern, { includeBody: true, maxBytes: maxBytes || DEFAULT_MULTI_GET_MAX_BYTES });
|
|
359
|
+
if (docs.length === 0 && errors.length === 0) {
|
|
360
|
+
return {
|
|
361
|
+
content: [{ type: "text", text: `No files matched pattern: ${pattern}` }],
|
|
362
|
+
isError: true,
|
|
363
|
+
};
|
|
364
|
+
}
|
|
365
|
+
const content = [];
|
|
366
|
+
if (errors.length > 0) {
|
|
367
|
+
content.push({ type: "text", text: `Errors:\n${errors.join('\n')}` });
|
|
368
|
+
}
|
|
369
|
+
for (const result of docs) {
|
|
370
|
+
if (result.skipped) {
|
|
371
|
+
content.push({
|
|
372
|
+
type: "text",
|
|
373
|
+
text: `[SKIPPED: ${result.doc.displayPath} - ${result.skipReason}. Use 'qmd_get' with file="${result.doc.displayPath}" to retrieve.]`,
|
|
374
|
+
});
|
|
375
|
+
continue;
|
|
376
|
+
}
|
|
377
|
+
let text = result.doc.body || "";
|
|
378
|
+
if (maxLines !== undefined) {
|
|
379
|
+
const lines = text.split("\n");
|
|
380
|
+
text = lines.slice(0, maxLines).join("\n");
|
|
381
|
+
if (lines.length > maxLines) {
|
|
382
|
+
text += `\n\n[... truncated ${lines.length - maxLines} more lines]`;
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
if (lineNumbers) {
|
|
386
|
+
text = addLineNumbers(text);
|
|
387
|
+
}
|
|
388
|
+
if (result.doc.context) {
|
|
389
|
+
text = `<!-- Context: ${result.doc.context} -->\n\n` + text;
|
|
390
|
+
}
|
|
391
|
+
content.push({
|
|
392
|
+
type: "resource",
|
|
393
|
+
resource: {
|
|
394
|
+
uri: `qmd://${encodeQmdPath(result.doc.displayPath)}`,
|
|
395
|
+
name: result.doc.displayPath,
|
|
396
|
+
title: result.doc.title,
|
|
397
|
+
mimeType: "text/markdown",
|
|
398
|
+
text,
|
|
399
|
+
},
|
|
400
|
+
});
|
|
401
|
+
}
|
|
402
|
+
return { content };
|
|
403
|
+
});
|
|
404
|
+
// ---------------------------------------------------------------------------
|
|
405
|
+
// Tool: qmd_status (Index status)
|
|
406
|
+
// ---------------------------------------------------------------------------
|
|
407
|
+
server.registerTool("status", {
|
|
408
|
+
title: "Index Status",
|
|
409
|
+
description: "Show the status of the QMD index: collections, document counts, and health information.",
|
|
410
|
+
annotations: { readOnlyHint: true, openWorldHint: false },
|
|
411
|
+
inputSchema: {},
|
|
412
|
+
}, async () => {
|
|
413
|
+
const status = await store.getStatus();
|
|
414
|
+
const summary = [
|
|
415
|
+
`QMD Index Status:`,
|
|
416
|
+
` Total documents: ${status.totalDocuments}`,
|
|
417
|
+
` Needs embedding: ${status.needsEmbedding}`,
|
|
418
|
+
` Vector index: ${status.hasVectorIndex ? 'yes' : 'no'}`,
|
|
419
|
+
` Collections: ${status.collections.length}`,
|
|
420
|
+
];
|
|
421
|
+
for (const col of status.collections) {
|
|
422
|
+
summary.push(` - ${col.name}: ${col.path} (${col.documents} docs)`);
|
|
423
|
+
}
|
|
424
|
+
return {
|
|
425
|
+
content: [{ type: "text", text: summary.join('\n') }],
|
|
426
|
+
structuredContent: status,
|
|
427
|
+
};
|
|
428
|
+
});
|
|
429
|
+
return server;
|
|
430
|
+
}
|
|
431
|
+
export async function startMcpServer(options = {}) {
|
|
432
|
+
// Opt into production mode when the MCP server is actually started, not
|
|
433
|
+
// when this module is merely imported for its exports. Importing the module
|
|
434
|
+
// at the top level flipped the global production flag and broke test
|
|
435
|
+
// isolation for downstream suites that expect the default (development)
|
|
436
|
+
// database path behaviour.
|
|
437
|
+
enableProductionMode();
|
|
438
|
+
const configPath = getConfigPath();
|
|
439
|
+
const store = await createStore({
|
|
440
|
+
dbPath: options.dbPath ?? getDefaultDbPath(),
|
|
441
|
+
...(existsSync(configPath) ? { configPath } : {}),
|
|
442
|
+
});
|
|
443
|
+
const server = await createMcpServer(store);
|
|
444
|
+
const transport = new StdioServerTransport();
|
|
445
|
+
await server.connect(transport);
|
|
446
|
+
}
|
|
447
|
+
/**
|
|
448
|
+
* Start MCP server over Streamable HTTP (JSON responses, no SSE).
|
|
449
|
+
* Binds to localhost only. Returns a handle for shutdown and port discovery.
|
|
450
|
+
*/
|
|
451
|
+
export async function startMcpHttpServer(port, options = {}) {
|
|
452
|
+
// See startMcpServer() for the rationale — flip production mode here so the
|
|
453
|
+
// HTTP transport resolves the real database path, without leaking state into
|
|
454
|
+
// callers that only import this module for its exports (e.g. tests).
|
|
455
|
+
enableProductionMode();
|
|
456
|
+
const configPath = getConfigPath();
|
|
457
|
+
const store = await createStore({
|
|
458
|
+
dbPath: options.dbPath ?? getDefaultDbPath(),
|
|
459
|
+
...(existsSync(configPath) ? { configPath } : {}),
|
|
460
|
+
});
|
|
461
|
+
// Pre-fetch default collection names for REST endpoint
|
|
462
|
+
const defaultCollectionNames = await store.getDefaultCollectionNames();
|
|
463
|
+
// Session map: each client gets its own McpServer + Transport pair (MCP spec requirement).
|
|
464
|
+
// The store is shared — it's stateless SQLite, safe for concurrent access.
|
|
465
|
+
const sessions = new Map();
|
|
466
|
+
async function createSession() {
|
|
467
|
+
const transport = new WebStandardStreamableHTTPServerTransport({
|
|
468
|
+
sessionIdGenerator: () => randomUUID(),
|
|
469
|
+
enableJsonResponse: true,
|
|
470
|
+
onsessioninitialized: (sessionId) => {
|
|
471
|
+
sessions.set(sessionId, transport);
|
|
472
|
+
log(`${ts()} New session ${sessionId} (${sessions.size} active)`);
|
|
473
|
+
},
|
|
474
|
+
});
|
|
475
|
+
const server = await createMcpServer(store);
|
|
476
|
+
await server.connect(transport);
|
|
477
|
+
transport.onclose = () => {
|
|
478
|
+
if (transport.sessionId) {
|
|
479
|
+
sessions.delete(transport.sessionId);
|
|
480
|
+
}
|
|
481
|
+
};
|
|
482
|
+
return transport;
|
|
483
|
+
}
|
|
484
|
+
const startTime = Date.now();
|
|
485
|
+
const quiet = options?.quiet ?? false;
|
|
486
|
+
/** Format timestamp for request logging */
|
|
487
|
+
function ts() {
|
|
488
|
+
return new Date().toISOString().slice(11, 23); // HH:mm:ss.SSS
|
|
489
|
+
}
|
|
490
|
+
/** Extract a human-readable label from a JSON-RPC body */
|
|
491
|
+
function describeRequest(body) {
|
|
492
|
+
const method = typeof body.method === "string" ? body.method : "unknown";
|
|
493
|
+
if (method === "tools/call") {
|
|
494
|
+
const tool = body.params?.name ?? "?";
|
|
495
|
+
const args = body.params?.arguments;
|
|
496
|
+
// Show query string if present, truncated
|
|
497
|
+
if (args?.query) {
|
|
498
|
+
const q = String(args.query).slice(0, 80);
|
|
499
|
+
return `tools/call ${tool} "${q}"`;
|
|
500
|
+
}
|
|
501
|
+
if (args?.path)
|
|
502
|
+
return `tools/call ${tool} ${args.path}`;
|
|
503
|
+
if (args?.pattern)
|
|
504
|
+
return `tools/call ${tool} ${args.pattern}`;
|
|
505
|
+
return `tools/call ${tool}`;
|
|
506
|
+
}
|
|
507
|
+
return method;
|
|
508
|
+
}
|
|
509
|
+
function log(msg) {
|
|
510
|
+
if (!quiet)
|
|
511
|
+
console.error(msg);
|
|
512
|
+
}
|
|
513
|
+
// Helper to collect request body
|
|
514
|
+
async function collectBody(req) {
|
|
515
|
+
const chunks = [];
|
|
516
|
+
for await (const chunk of req)
|
|
517
|
+
chunks.push(chunk);
|
|
518
|
+
return Buffer.concat(chunks).toString();
|
|
519
|
+
}
|
|
520
|
+
const httpServer = createServer(async (nodeReq, nodeRes) => {
|
|
521
|
+
const reqStart = Date.now();
|
|
522
|
+
const pathname = nodeReq.url || "/";
|
|
523
|
+
try {
|
|
524
|
+
if (pathname === "/health" && nodeReq.method === "GET") {
|
|
525
|
+
const body = JSON.stringify({ status: "ok", uptime: Math.floor((Date.now() - startTime) / 1000) });
|
|
526
|
+
nodeRes.writeHead(200, { "Content-Type": "application/json" });
|
|
527
|
+
nodeRes.end(body);
|
|
528
|
+
log(`${ts()} GET /health (${Date.now() - reqStart}ms)`);
|
|
529
|
+
return;
|
|
530
|
+
}
|
|
531
|
+
// REST endpoint: POST /search — structured search without MCP protocol
|
|
532
|
+
// REST endpoint: POST /query (alias: /search) — structured search without MCP protocol
|
|
533
|
+
if ((pathname === "/query" || pathname === "/search") && nodeReq.method === "POST") {
|
|
534
|
+
const rawBody = await collectBody(nodeReq);
|
|
535
|
+
const params = JSON.parse(rawBody);
|
|
536
|
+
// Validate required fields
|
|
537
|
+
if (!params.searches || !Array.isArray(params.searches)) {
|
|
538
|
+
nodeRes.writeHead(400, { "Content-Type": "application/json" });
|
|
539
|
+
nodeRes.end(JSON.stringify({ error: "Missing required field: searches (array)" }));
|
|
540
|
+
return;
|
|
541
|
+
}
|
|
542
|
+
// Map to internal format
|
|
543
|
+
const searches = params.searches;
|
|
544
|
+
const queries = searches.map((s) => ({
|
|
545
|
+
type: s.type,
|
|
546
|
+
query: String(s.query || ""),
|
|
547
|
+
}));
|
|
548
|
+
// Use default collections if none specified
|
|
549
|
+
const effectiveCollections = Array.isArray(params.collections) ? params.collections.map(String) : defaultCollectionNames;
|
|
550
|
+
const results = await store.search({
|
|
551
|
+
queries,
|
|
552
|
+
collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
|
|
553
|
+
limit: typeof params.limit === "number" ? params.limit : 10,
|
|
554
|
+
minScore: typeof params.minScore === "number" ? params.minScore : 0,
|
|
555
|
+
candidateLimit: typeof params.candidateLimit === "number" ? params.candidateLimit : undefined,
|
|
556
|
+
intent: typeof params.intent === "string" ? params.intent : undefined,
|
|
557
|
+
rerank: typeof params.rerank === "boolean" ? params.rerank : undefined,
|
|
558
|
+
});
|
|
559
|
+
// Use first lex or vec query for snippet extraction
|
|
560
|
+
const primaryQuery = searches.find((s) => s.type === 'lex')?.query
|
|
561
|
+
|| searches.find((s) => s.type === 'vec')?.query
|
|
562
|
+
|| searches[0]?.query || "";
|
|
563
|
+
const formatted = results.map(r => {
|
|
564
|
+
const { line, snippet } = extractSnippet(r.body, String(primaryQuery), 300, r.bestChunkPos, r.bestChunk.length, typeof params.intent === "string" ? params.intent : undefined);
|
|
565
|
+
return {
|
|
566
|
+
docid: `#${r.docid}`,
|
|
567
|
+
file: `qmd://${encodeQmdPath(r.displayPath)}`,
|
|
568
|
+
title: r.title,
|
|
569
|
+
score: Math.round(r.score * 100) / 100,
|
|
570
|
+
context: r.context,
|
|
571
|
+
line,
|
|
572
|
+
snippet: addLineNumbers(snippet, line),
|
|
573
|
+
};
|
|
574
|
+
});
|
|
575
|
+
nodeRes.writeHead(200, { "Content-Type": "application/json" });
|
|
576
|
+
nodeRes.end(JSON.stringify({ results: formatted }));
|
|
577
|
+
log(`${ts()} POST /query ${params.searches.length} queries (${Date.now() - reqStart}ms)`);
|
|
578
|
+
return;
|
|
579
|
+
}
|
|
580
|
+
if (pathname === "/mcp" && nodeReq.method === "POST") {
|
|
581
|
+
const rawBody = await collectBody(nodeReq);
|
|
582
|
+
const body = JSON.parse(rawBody);
|
|
583
|
+
const label = describeRequest(body);
|
|
584
|
+
const url = `http://localhost:${port}${pathname}`;
|
|
585
|
+
const headers = {};
|
|
586
|
+
for (const [k, v] of Object.entries(nodeReq.headers)) {
|
|
587
|
+
if (typeof v === "string")
|
|
588
|
+
headers[k] = v;
|
|
589
|
+
}
|
|
590
|
+
// Route to existing session or create new one on initialize
|
|
591
|
+
const sessionId = headers["mcp-session-id"];
|
|
592
|
+
let transport;
|
|
593
|
+
if (sessionId) {
|
|
594
|
+
const existing = sessions.get(sessionId);
|
|
595
|
+
if (!existing) {
|
|
596
|
+
nodeRes.writeHead(404, { "Content-Type": "application/json" });
|
|
597
|
+
nodeRes.end(JSON.stringify({
|
|
598
|
+
jsonrpc: "2.0",
|
|
599
|
+
error: { code: -32001, message: "Session not found" },
|
|
600
|
+
id: body?.id ?? null,
|
|
601
|
+
}));
|
|
602
|
+
return;
|
|
603
|
+
}
|
|
604
|
+
transport = existing;
|
|
605
|
+
}
|
|
606
|
+
else if (isInitializeRequest(body)) {
|
|
607
|
+
transport = await createSession();
|
|
608
|
+
}
|
|
609
|
+
else {
|
|
610
|
+
nodeRes.writeHead(400, { "Content-Type": "application/json" });
|
|
611
|
+
nodeRes.end(JSON.stringify({
|
|
612
|
+
jsonrpc: "2.0",
|
|
613
|
+
error: { code: -32000, message: "Bad Request: Missing session ID" },
|
|
614
|
+
id: body?.id ?? null,
|
|
615
|
+
}));
|
|
616
|
+
return;
|
|
617
|
+
}
|
|
618
|
+
const request = new Request(url, { method: "POST", headers, body: rawBody });
|
|
619
|
+
const response = await transport.handleRequest(request, { parsedBody: body });
|
|
620
|
+
nodeRes.writeHead(response.status, Object.fromEntries(response.headers));
|
|
621
|
+
nodeRes.end(Buffer.from(await response.arrayBuffer()));
|
|
622
|
+
log(`${ts()} POST /mcp ${label} (${Date.now() - reqStart}ms)`);
|
|
623
|
+
return;
|
|
624
|
+
}
|
|
625
|
+
if (pathname === "/mcp") {
|
|
626
|
+
const headers = {};
|
|
627
|
+
for (const [k, v] of Object.entries(nodeReq.headers)) {
|
|
628
|
+
if (typeof v === "string")
|
|
629
|
+
headers[k] = v;
|
|
630
|
+
}
|
|
631
|
+
// GET/DELETE must have a valid session
|
|
632
|
+
const sessionId = headers["mcp-session-id"];
|
|
633
|
+
if (!sessionId) {
|
|
634
|
+
nodeRes.writeHead(400, { "Content-Type": "application/json" });
|
|
635
|
+
nodeRes.end(JSON.stringify({
|
|
636
|
+
jsonrpc: "2.0",
|
|
637
|
+
error: { code: -32000, message: "Bad Request: Missing session ID" },
|
|
638
|
+
id: null,
|
|
639
|
+
}));
|
|
640
|
+
return;
|
|
641
|
+
}
|
|
642
|
+
const transport = sessions.get(sessionId);
|
|
643
|
+
if (!transport) {
|
|
644
|
+
nodeRes.writeHead(404, { "Content-Type": "application/json" });
|
|
645
|
+
nodeRes.end(JSON.stringify({
|
|
646
|
+
jsonrpc: "2.0",
|
|
647
|
+
error: { code: -32001, message: "Session not found" },
|
|
648
|
+
id: null,
|
|
649
|
+
}));
|
|
650
|
+
return;
|
|
651
|
+
}
|
|
652
|
+
const url = `http://localhost:${port}${pathname}`;
|
|
653
|
+
const rawBody = nodeReq.method !== "GET" && nodeReq.method !== "HEAD" ? await collectBody(nodeReq) : undefined;
|
|
654
|
+
const request = new Request(url, { method: nodeReq.method || "GET", headers, ...(rawBody ? { body: rawBody } : {}) });
|
|
655
|
+
const response = await transport.handleRequest(request);
|
|
656
|
+
nodeRes.writeHead(response.status, Object.fromEntries(response.headers));
|
|
657
|
+
nodeRes.end(Buffer.from(await response.arrayBuffer()));
|
|
658
|
+
return;
|
|
659
|
+
}
|
|
660
|
+
nodeRes.writeHead(404);
|
|
661
|
+
nodeRes.end("Not Found");
|
|
662
|
+
}
|
|
663
|
+
catch (err) {
|
|
664
|
+
console.error("HTTP handler error:", err);
|
|
665
|
+
nodeRes.writeHead(500);
|
|
666
|
+
nodeRes.end("Internal Server Error");
|
|
667
|
+
}
|
|
668
|
+
});
|
|
669
|
+
await new Promise((resolve, reject) => {
|
|
670
|
+
httpServer.on("error", reject);
|
|
671
|
+
httpServer.listen(port, "localhost", () => resolve());
|
|
672
|
+
});
|
|
673
|
+
const actualPort = httpServer.address().port;
|
|
674
|
+
let stopping = false;
|
|
675
|
+
const stop = async () => {
|
|
676
|
+
if (stopping)
|
|
677
|
+
return;
|
|
678
|
+
stopping = true;
|
|
679
|
+
for (const transport of sessions.values()) {
|
|
680
|
+
await transport.close();
|
|
681
|
+
}
|
|
682
|
+
sessions.clear();
|
|
683
|
+
httpServer.close();
|
|
684
|
+
await store.close();
|
|
685
|
+
};
|
|
686
|
+
process.on("SIGTERM", async () => {
|
|
687
|
+
console.error("Shutting down (SIGTERM)...");
|
|
688
|
+
await stop();
|
|
689
|
+
process.exit(0);
|
|
690
|
+
});
|
|
691
|
+
process.on("SIGINT", async () => {
|
|
692
|
+
console.error("Shutting down (SIGINT)...");
|
|
693
|
+
await stop();
|
|
694
|
+
process.exit(0);
|
|
695
|
+
});
|
|
696
|
+
log(`QMD MCP server listening on http://localhost:${actualPort}/mcp`);
|
|
697
|
+
return { httpServer, port: actualPort, stop };
|
|
698
|
+
}
|
|
699
|
+
// Run if this is the main module
|
|
700
|
+
if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsWith("/server.ts") || process.argv[1]?.endsWith("/server.js")) {
|
|
701
|
+
startMcpServer().catch(console.error);
|
|
702
|
+
}
|