@speakeasy-api/docs-mcp-eval 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/runner.js ADDED
@@ -0,0 +1,311 @@
1
+ import { Client } from "@modelcontextprotocol/sdk/client/index.js";
2
+ import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
3
+ import { execFile, spawn } from "node:child_process";
4
+ import { performance } from "node:perf_hooks";
5
+ import { promisify } from "node:util";
6
+ import { summarizeCases } from "./metrics.js";
7
+ const execFileAsync = promisify(execFile);
8
+ export function runEvaluation(input) {
9
+ const summary = summarizeCases(input.cases, input.timings);
10
+ return {
11
+ summary,
12
+ metadata: {
13
+ deterministic: input.deterministic ?? true,
14
+ provider: input.model?.provider ?? null,
15
+ model: input.model?.model ?? null
16
+ }
17
+ };
18
+ }
19
+ export async function runEvaluationAgainstServer(input) {
20
+ const buildTimeMs = input.build ? await runBuildStep(input.build) : 0;
21
+ const serverParams = {
22
+ command: input.server.command
23
+ };
24
+ if (input.server.args !== undefined) {
25
+ serverParams.args = input.server.args;
26
+ }
27
+ if (input.server.cwd !== undefined) {
28
+ serverParams.cwd = input.server.cwd;
29
+ }
30
+ if (input.server.env !== undefined) {
31
+ serverParams.env = input.server.env;
32
+ }
33
+ const transport = new StdioClientTransport(serverParams);
34
+ const client = new Client({
35
+ name: "@speakeasy-api/docs-mcp-eval",
36
+ version: "0.1.0"
37
+ }, {
38
+ capabilities: {}
39
+ });
40
+ await client.connect(transport);
41
+ const rssSampler = createRssSampler(transport.pid);
42
+ await rssSampler.start();
43
+ try {
44
+ await client.listTools();
45
+ await warmupServer(client, input.cases, input.warmupQueries ?? 0);
46
+ const rankedCases = [];
47
+ const searchLatenciesMs = [];
48
+ const getDocLatenciesMs = [];
49
+ for (const testCase of input.cases) {
50
+ const executed = await executeCase(client, testCase);
51
+ rankedCases.push(executed.rankedCase);
52
+ searchLatenciesMs.push(...executed.searchLatenciesMs);
53
+ getDocLatenciesMs.push(...executed.getDocLatenciesMs);
54
+ }
55
+ const peakRssMb = await rssSampler.stop();
56
+ const output = runEvaluation({
57
+ cases: rankedCases,
58
+ timings: {
59
+ searchLatenciesMs,
60
+ getDocLatenciesMs,
61
+ buildTimeMs,
62
+ peakRssMb
63
+ },
64
+ ...(input.model ? { model: input.model } : {}),
65
+ ...(input.deterministic !== undefined ? { deterministic: input.deterministic } : {})
66
+ });
67
+ return {
68
+ ...output,
69
+ rankedCases,
70
+ stats: {
71
+ searchLatenciesMs,
72
+ getDocLatenciesMs,
73
+ buildTimeMs,
74
+ peakRssMb
75
+ }
76
+ };
77
+ }
78
+ finally {
79
+ await rssSampler.stop();
80
+ await transport.close();
81
+ }
82
+ }
83
+ async function executeCase(client, testCase) {
84
+ const rankedChunkIds = [];
85
+ const seen = new Set();
86
+ const searchLatenciesMs = [];
87
+ const getDocLatenciesMs = [];
88
+ let rounds = 0;
89
+ let cursor;
90
+ const maxRounds = testCase.maxRounds ?? 3;
91
+ while (rounds < maxRounds) {
92
+ rounds += 1;
93
+ const args = {
94
+ query: testCase.query,
95
+ limit: testCase.limit ?? 5,
96
+ ...(testCase.filters ?? {})
97
+ };
98
+ if (cursor) {
99
+ args.cursor = cursor;
100
+ }
101
+ const searchStart = performance.now();
102
+ const toolResult = await client.callTool({
103
+ name: "search_docs",
104
+ arguments: args
105
+ });
106
+ searchLatenciesMs.push(performance.now() - searchStart);
107
+ if ("toolResult" in toolResult) {
108
+ throw new Error("Unexpected compatibility tool result shape from server");
109
+ }
110
+ if (toolResult.isError) {
111
+ throw new Error(readTextContent(toolResult.content) || "search_docs returned an unknown error");
112
+ }
113
+ const payload = parseSearchResultText(readTextContent(toolResult.content));
114
+ for (const hit of payload.hits) {
115
+ if (!seen.has(hit.chunk_id)) {
116
+ rankedChunkIds.push(hit.chunk_id);
117
+ seen.add(hit.chunk_id);
118
+ }
119
+ }
120
+ const targetHit = payload.hits.find((hit) => hit.chunk_id === testCase.expectedChunkId);
121
+ if (targetHit) {
122
+ const getDocStart = performance.now();
123
+ const getDocResult = await client.callTool({
124
+ name: "get_doc",
125
+ arguments: {
126
+ chunk_id: targetHit.chunk_id,
127
+ context: 0
128
+ }
129
+ });
130
+ getDocLatenciesMs.push(performance.now() - getDocStart);
131
+ if ("toolResult" in getDocResult) {
132
+ throw new Error("Unexpected compatibility tool result shape from server");
133
+ }
134
+ if (getDocResult.isError) {
135
+ throw new Error(readTextContent(getDocResult.content) || "get_doc returned an unknown error");
136
+ }
137
+ break;
138
+ }
139
+ cursor = payload.next_cursor ?? undefined;
140
+ if (!cursor) {
141
+ break;
142
+ }
143
+ }
144
+ const roundsToRightDoc = computeRoundsToRightDoc({
145
+ found: rankedChunkIds.includes(testCase.expectedChunkId),
146
+ roundsExecuted: rounds,
147
+ maxRounds
148
+ });
149
+ return {
150
+ rankedCase: {
151
+ expectedChunkId: testCase.expectedChunkId,
152
+ rankedChunkIds,
153
+ roundsToRightDoc
154
+ },
155
+ searchLatenciesMs,
156
+ getDocLatenciesMs
157
+ };
158
+ }
159
+ export function computeRoundsToRightDoc(input) {
160
+ return input.found ? input.roundsExecuted : input.maxRounds + 1;
161
+ }
162
+ async function warmupServer(client, cases, warmupQueries) {
163
+ if (warmupQueries <= 0 || cases.length === 0) {
164
+ return;
165
+ }
166
+ for (let i = 0; i < warmupQueries; i += 1) {
167
+ const testCase = cases[i % cases.length];
168
+ if (!testCase) {
169
+ continue;
170
+ }
171
+ const args = {
172
+ query: testCase.query,
173
+ limit: Math.min(5, testCase.limit ?? 5),
174
+ ...(testCase.filters ?? {})
175
+ };
176
+ try {
177
+ await client.callTool({
178
+ name: "search_docs",
179
+ arguments: args
180
+ });
181
+ }
182
+ catch {
183
+ // Warmup is best-effort.
184
+ }
185
+ }
186
+ }
187
+ function readTextContent(content) {
188
+ return content
189
+ .filter((entry) => entry.type === "text")
190
+ .map((entry) => entry.text ?? "")
191
+ .join("\n")
192
+ .trim();
193
+ }
194
+ function parseSearchResultText(text) {
195
+ let parsed;
196
+ try {
197
+ parsed = JSON.parse(text);
198
+ }
199
+ catch {
200
+ throw new Error("search_docs result was not valid JSON text");
201
+ }
202
+ if (!parsed || typeof parsed !== "object") {
203
+ throw new Error("search_docs result was not an object");
204
+ }
205
+ const result = parsed;
206
+ if (!Array.isArray(result.hits)) {
207
+ throw new Error("search_docs result is missing hits[]");
208
+ }
209
+ const hits = [];
210
+ for (const entry of result.hits) {
211
+ if (!entry || typeof entry !== "object") {
212
+ continue;
213
+ }
214
+ const chunkId = entry.chunk_id;
215
+ if (typeof chunkId === "string" && chunkId) {
216
+ hits.push({ chunk_id: chunkId });
217
+ }
218
+ }
219
+ const nextCursorRaw = result.next_cursor;
220
+ const nextCursor = nextCursorRaw === null || typeof nextCursorRaw === "string"
221
+ ? nextCursorRaw
222
+ : null;
223
+ return {
224
+ hits,
225
+ next_cursor: nextCursor
226
+ };
227
+ }
228
+ async function runBuildStep(config) {
229
+ const startedAt = performance.now();
230
+ await new Promise((resolve, reject) => {
231
+ const child = spawn(config.command, config.args ?? [], {
232
+ cwd: config.cwd,
233
+ env: {
234
+ ...process.env,
235
+ ...config.env
236
+ },
237
+ stdio: "inherit",
238
+ shell: false
239
+ });
240
+ child.on("error", reject);
241
+ child.on("exit", (code, signal) => {
242
+ if (code === 0) {
243
+ resolve();
244
+ return;
245
+ }
246
+ reject(new Error(`Build command failed with ${signal ? `signal ${signal}` : `exit code ${String(code)}`}`));
247
+ });
248
+ });
249
+ return performance.now() - startedAt;
250
+ }
251
+ function createRssSampler(pid) {
252
+ if (!pid) {
253
+ return {
254
+ async start() {
255
+ // noop
256
+ },
257
+ async stop() {
258
+ return 0;
259
+ }
260
+ };
261
+ }
262
+ let interval;
263
+ let peakRssMb = 0;
264
+ let stopped = false;
265
+ const sample = async () => {
266
+ try {
267
+ const rssMb = await readProcessRssMb(pid);
268
+ if (rssMb > peakRssMb) {
269
+ peakRssMb = rssMb;
270
+ }
271
+ }
272
+ catch {
273
+ // RSS sampling is best-effort.
274
+ }
275
+ };
276
+ return {
277
+ async start() {
278
+ if (stopped || interval) {
279
+ return;
280
+ }
281
+ await sample();
282
+ interval = setInterval(() => {
283
+ void sample();
284
+ }, 200);
285
+ interval.unref();
286
+ },
287
+ async stop() {
288
+ if (stopped) {
289
+ return peakRssMb;
290
+ }
291
+ stopped = true;
292
+ if (interval) {
293
+ clearInterval(interval);
294
+ interval = undefined;
295
+ }
296
+ await sample();
297
+ return Number(peakRssMb.toFixed(6));
298
+ }
299
+ };
300
+ }
301
+ async function readProcessRssMb(pid) {
302
+ const { stdout } = await execFileAsync("ps", ["-o", "rss=", "-p", String(pid)], {
303
+ windowsHide: true
304
+ });
305
+ const kb = Number.parseFloat(stdout.trim());
306
+ if (!Number.isFinite(kb) || kb <= 0) {
307
+ return 0;
308
+ }
309
+ return kb / 1024;
310
+ }
311
+ //# sourceMappingURL=runner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"runner.js","sourceRoot":"","sources":["../src/runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAE9C,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AAsE1C,MAAM,UAAU,aAAa,CAAC,KAAmB;IAC/C,MAAM,OAAO,GAAG,cAAc,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;IAC3D,OAAO;QACL,OAAO;QACP,QAAQ,EAAE;YACR,aAAa,EAAE,KAAK,CAAC,aAAa,IAAI,IAAI;YAC1C,QAAQ,EAAE,KAAK,CAAC,KAAK,EAAE,QAAQ,IAAI,IAAI;YACvC,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,IAAI,IAAI;SAClC;KACF,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC9C,KAAuB;IAEvB,MAAM,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEtE,MAAM,YAAY,GAKd;QACF,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,OAAO;KAC9B,CAAC;IAEF,IAAI,KAAK,CAAC,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QACpC,YAAY,CAAC,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC;IACxC,CAAC;IACD,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACnC,YAAY,CAAC,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC;IACtC,CAAC;IACD,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACnC,YAAY,CAAC,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC;IACtC,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,oBAAoB,CAAC,YAAY,CAAC,CAAC;IAEzD,MAAM,MAAM,GAAG,IAAI,MAAM,CACvB;QACE,IAAI,EAAE,8BAA8B;QACpC,OAAO,EAAE,OAAO;KACjB,EACD;QACE,YAAY,EAAE,EAAE;KACjB,CACF,CAAC;IAEF,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,MAAM,UAAU,GAAG,gBAAgB,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IACnD,MAAM,UAAU,CAAC,KAAK,EAAE,CAAC;IAEzB,IAAI,CAAC;QACH,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;QACzB,MAAM,YAAY,CAAC,MAAM,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,aAAa,IAAI,CAAC,CAAC,CAAC;QAElE,MAAM,WAAW,GAAiB,EAAE,CAAC;QACrC,MAAM,iBAAiB,GAAa,EAAE,CAAC;QACvC,MAAM,iBAAiB,GAAa,EAAE,CAAC;QAEvC,KAAK,MAAM,QAAQ,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;YACnC,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;YACrD,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;YACtC,iBAAiB,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,iBAAiB,CAAC,CAAC;YACtD,iBAAiB,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QACxD,CAAC;QACD,MAAM,SAAS,GAAG,MAAM,UAAU,CAAC,IAAI,EAAE,CAAC;QAE1C,MAAM,MAAM,GAAG,aAAa,CAAC;YAC3B,KAAK,EAAE,WAAW;YAClB,OAAO,EAAE;gBACP,iBAAiB;gBACjB,iBAAiB;gBACjB,WAAW;gBACX,SAAS;aACV;YACD,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9C,GAAG,CAAC,KAAK,CAAC,aAAa,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,aAAa,EAAE,KAAK,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACrF,CAAC,CAAC;QAEH,OAAO;YACL,GAAG,MAAM;YACT,WAAW;YACX,KAAK,EAAE;gBACL,iBAAiB;gBACjB,iBAAiB;gBACjB,WAAW;gBACX,SAAS;aACV;SACF,CAAC;IACJ,CAAC;YAAS,CAAC;QACT,MAAM,UAAU,CAAC,IAAI,EAAE,CAAC;QACxB,MAAM,SAAS,CAAC,KAAK,EAAE,CAAC;IAC1B,CAAC;AACH,CAAC;AAED,KAAK,UAAU,WAAW,CACxB,MAAc,EACd,QAAuB;IAMvB,MAAM,cAAc,GAAa,EAAE,CAAC;IACpC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,iBAAiB,GAAa,EAAE,CAAC;IACvC,MAAM,iBAAiB,GAAa,EAAE,CAAC;IAEvC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,MAA0B,CAAC;IAC/B,MAAM,SAAS,GAAG,QAAQ,CAAC,SAAS,IAAI,CAAC,CAAC;IAE1C,OAAO,MAAM,GAAG,SAAS,EAAE,CAAC;QAC1B,MAAM,IAAI,CAAC,CAAC;QAEZ,MAAM,IAAI,GAA4B;YACpC,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,CAAC;YAC1B,GAAG,CAAC,QAAQ,CAAC,OAAO,IAAI,EAAE,CAAC;SAC5B,CAAC;QAEF,IAAI,MAAM,EAAE,CAAC;YACX,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACvB,CAAC;QAED,MAAM,WAAW,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QACtC,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC;YACvC,IAAI,EAAE,aAAa;YACnB,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,WAAW,CAAC,CAAC;QAExD,IAAI,YAAY,IAAI,UAAU,EAAE,CAAC;YAC/B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;QAC5E,CAAC;QAED,IAAI,UAAU,CAAC,OAAO,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,eAAe,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,uCAAuC,CAAC,CAAC;QAClG,CAAC;QAED,MAAM,OAAO,GAAG,qBAAqB,CAAC,eAAe,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC;QAC3E,KAAK,MAAM,GAAG,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YAC/B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,cAAc,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;gBAClC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,KAAK,QAAQ,CAAC,eAAe,CAAC,CAAC;QACxF,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,WAAW,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YACtC,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC;gBACzC,IAAI,EAAE,SAAS;gBACf,SAAS,EAAE;oBACT,QAAQ,EAAE,SAAS,CAAC,QAAQ;oBAC5B,OAAO,EAAE,CAAC;iBACX;aACF,CAAC,CAAC;YACH,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,WAAW,CAAC,CAAC;YAExD,IAAI,YAAY,IAAI,YAAY,EAAE,CAAC;gBACjC,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;YAC5E,CAAC;YACD,IAAI,YAAY,CAAC,OAAO,EAAE,CAAC;gBACzB,MAAM,IAAI,KAAK,CAAC,eAAe,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,mCAAmC,CAAC,CAAC;YAChG,CAAC;YAED,MAAM;QACR,CAAC;QAED,MAAM,GAAG,OAAO,CAAC,WAAW,IAAI,SAAS,CAAC;QAC1C,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM;QACR,CAAC;IACH,CAAC;IAED,MAAM,gBAAgB,GAAG,uBAAuB,CAAC;QAC/C,KAAK,EAAE,cAAc,CAAC,QAAQ,CAAC,QAAQ,CAAC,eAAe,CAAC;QACxD,cAAc,EAAE,MAAM;QACtB,SAAS;KACV,CAAC,CAAC;IAEH,OAAO;QACL,UAAU,EAAE;YACV,eAAe,EAAE,QAAQ,CAAC,eAAe;YACzC,cAAc;YACd,gBAAgB;SACjB;QACD,iBAAiB;QACjB,iBAAiB;KAClB,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,KAIvC;IACC,OAAO,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC;AAClE,CAAC;AAED,KAAK,UAAU,YAAY,CACzB,MAAc,EACd,KAAsB,EACtB,aAAqB;IAErB,IAAI,aAAa,IAAI,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7C,OAAO;IACT,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1C,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;QACzC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,SAAS;QACX,CAAC;QAED,MAAM,IAAI,GAA4B;YACpC,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,KAAK,IAAI,CAAC,CAAC;YACvC,GAAG,CAAC,QAAQ,CAAC,OAAO,IAAI,EAAE,CAAC;SAC5B,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,QAAQ,CAAC;gBACpB,IAAI,EAAE,aAAa;gBACnB,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACP,yBAAyB;QAC3B,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,eAAe,CAAC,OAA+C;IACtE,OAAO,OAAO;SACX,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;SACxC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,IAAI,EAAE,CAAC;SAChC,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,qBAAqB,CAAC,IAAY;IAIzC,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;IAChE,CAAC;IAED,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC1C,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC1D,CAAC;IAED,MAAM,MAAM,GAAG,MAAiC,CAAC;IACjD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;QAChC,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC1D,CAAC;IAED,MAAM,IAAI,GAAgC,EAAE,CAAC;IAC7C,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChC,IAAI,CAAC,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YACxC,SAAS;QACX,CAAC;QACD,MAAM,OAAO,GAAI,KAAiC,CAAC,QAAQ,CAAC;QAC5D,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,OAAO,EAAE,CAAC;YAC3C,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;QACnC,CAAC;IACH,CAAC;IAED,MAAM,aAAa,GAAG,MAAM,CAAC,WAAW,CAAC;IACzC,MAAM,UAAU,GAAG,aAAa,KAAK,IAAI,IAAI,OAAO,aAAa,KAAK,QAAQ;QAC5E,CAAC,CAAC,aAAa;QACf,CAAC,CAAC,IAAI,CAAC;IAET,OAAO;QACL,IAAI;QACJ,WAAW,EAAE,UAAU;KACxB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,YAAY,CAAC,MAAuB;IACjD,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEpC,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QAC1C,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,IAAI,EAAE,EAAE;YACrD,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,GAAG,EAAE;gBACH,GAAG,OAAO,CAAC,GAAG;gBACd,GAAG,MAAM,CAAC,GAAG;aACd;YACD,KAAK,EAAE,SAAS;YAChB,KAAK,EAAE,KAAK;SACb,CAAC,CAAC;QAEH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC1B,KAAK,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE;YAChC,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;gBACf,OAAO,EAAE,CAAC;gBACV,OAAO;YACT,CAAC;YACD,MAAM,CACJ,IAAI,KAAK,CACP,6BAA6B,MAAM,CAAC,CAAC,CAAC,UAAU,MAAM,EAAE,CAAC,CAAC,CAAC,aAAa,MAAM,CAAC,IAAI,CAAC,EAAE,EAAE,CACzF,CACF,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,OAAO,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;AACvC,CAAC;AAED,SAAS,gBAAgB,CAAC,GAAkB;IAI1C,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO;YACL,KAAK,CAAC,KAAK;gBACT,OAAO;YACT,CAAC;YACD,KAAK,CAAC,IAAI;gBACR,OAAO,CAAC,CAAC;YACX,CAAC;SACF,CAAC;IACJ,CAAC;IAED,IAAI,QAAoD,CAAC;IACzD,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,OAAO,GAAG,KAAK,CAAC;IAEpB,MAAM,MAAM,GAAG,KAAK,IAAmB,EAAE;QACvC,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,MAAM,gBAAgB,CAAC,GAAG,CAAC,CAAC;YAC1C,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;gBACtB,SAAS,GAAG,KAAK,CAAC;YACpB,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,+BAA+B;QACjC,CAAC;IACH,CAAC,CAAC;IAEF,OAAO;QACL,KAAK,CAAC,KAAK;YACT,IAAI,OAAO,IAAI,QAAQ,EAAE,CAAC;gBACxB,OAAO;YACT,CAAC;YACD,MAAM,MAAM,EAAE,CAAC;YACf,QAAQ,GAAG,WAAW,CAAC,GAAG,EAAE;gBAC1B,KAAK,MAAM,EAAE,CAAC;YAChB,CAAC,EAAE,GAAG,CAAC,CAAC;YACR,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,CAAC;QACD,KAAK,CAAC,IAAI;YACR,IAAI,OAAO,EAAE,CAAC;gBACZ,OAAO,SAAS,CAAC;YACnB,CAAC;YAED,OAAO,GAAG,IAAI,CAAC;YACf,IAAI,QAAQ,EAAE,CAAC;gBACb,aAAa,CAAC,QAAQ,CAAC,CAAC;gBACxB,QAAQ,GAAG,SAAS,CAAC;YACvB,CAAC;YACD,MAAM,MAAM,EAAE,CAAC;YACf,OAAO,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QACtC,CAAC;KACF,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,GAAW;IACzC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE;QAC9E,WAAW,EAAE,IAAI;KAClB,CAAC,CAAC;IACH,MAAM,EAAE,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;IAC5C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC;QACpC,OAAO,CAAC,CAAC;IACX,CAAC;IACD,OAAO,EAAE,GAAG,IAAI,CAAC;AACnB,CAAC"}
package/package.json ADDED
@@ -0,0 +1,44 @@
1
+ {
2
+ "name": "@speakeasy-api/docs-mcp-eval",
3
+ "version": "0.1.0",
4
+ "description": "Evaluation and benchmarking harness for docs-mcp search quality metrics",
5
+ "license": "AGPL-3.0-only",
6
+ "type": "module",
7
+ "author": "Speakeasy <info@speakeasy.com> (https://speakeasy.com)",
8
+ "repository": {
9
+ "type": "git",
10
+ "url": "https://github.com/speakeasy-api/docs-mcp.git",
11
+ "directory": "packages/eval"
12
+ },
13
+ "homepage": "https://github.com/speakeasy-api/docs-mcp",
14
+ "bugs": {
15
+ "url": "https://github.com/speakeasy-api/docs-mcp/issues"
16
+ },
17
+ "files": [
18
+ "dist"
19
+ ],
20
+ "bin": {
21
+ "docs-mcp-eval": "dist/bin.js"
22
+ },
23
+ "main": "dist/index.js",
24
+ "types": "dist/index.d.ts",
25
+ "exports": {
26
+ ".": {
27
+ "types": "./dist/index.d.ts",
28
+ "import": "./dist/index.js"
29
+ }
30
+ },
31
+ "engines": {
32
+ "node": ">=22.0.0"
33
+ },
34
+ "dependencies": {
35
+ "@modelcontextprotocol/sdk": "^1.26.0",
36
+ "commander": "^13.1.0"
37
+ },
38
+ "scripts": {
39
+ "build": "tsc -p tsconfig.json",
40
+ "typecheck": "tsc -p tsconfig.json --noEmit",
41
+ "test": "vitest run",
42
+ "lint": "eslint src test"
43
+ }
44
+ }