@speakeasy-api/docs-mcp-eval 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/README.md +30 -0
- package/dist/bin.d.ts +3 -0
- package/dist/bin.d.ts.map +1 -0
- package/dist/bin.js +79 -0
- package/dist/bin.js.map +1 -0
- package/dist/delta.d.ts +28 -0
- package/dist/delta.d.ts.map +1 -0
- package/dist/delta.js +109 -0
- package/dist/delta.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -0
- package/dist/metrics.d.ts +27 -0
- package/dist/metrics.d.ts.map +1 -0
- package/dist/metrics.js +64 -0
- package/dist/metrics.js.map +1 -0
- package/dist/runner.d.ts +70 -0
- package/dist/runner.d.ts.map +1 -0
- package/dist/runner.js +311 -0
- package/dist/runner.js.map +1 -0
- package/package.json +44 -0
package/dist/runner.js
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
|
|
2
|
+
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
|
|
3
|
+
import { execFile, spawn } from "node:child_process";
|
|
4
|
+
import { performance } from "node:perf_hooks";
|
|
5
|
+
import { promisify } from "node:util";
|
|
6
|
+
import { summarizeCases } from "./metrics.js";
|
|
7
|
+
const execFileAsync = promisify(execFile);
|
|
8
|
+
export function runEvaluation(input) {
|
|
9
|
+
const summary = summarizeCases(input.cases, input.timings);
|
|
10
|
+
return {
|
|
11
|
+
summary,
|
|
12
|
+
metadata: {
|
|
13
|
+
deterministic: input.deterministic ?? true,
|
|
14
|
+
provider: input.model?.provider ?? null,
|
|
15
|
+
model: input.model?.model ?? null
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
export async function runEvaluationAgainstServer(input) {
|
|
20
|
+
const buildTimeMs = input.build ? await runBuildStep(input.build) : 0;
|
|
21
|
+
const serverParams = {
|
|
22
|
+
command: input.server.command
|
|
23
|
+
};
|
|
24
|
+
if (input.server.args !== undefined) {
|
|
25
|
+
serverParams.args = input.server.args;
|
|
26
|
+
}
|
|
27
|
+
if (input.server.cwd !== undefined) {
|
|
28
|
+
serverParams.cwd = input.server.cwd;
|
|
29
|
+
}
|
|
30
|
+
if (input.server.env !== undefined) {
|
|
31
|
+
serverParams.env = input.server.env;
|
|
32
|
+
}
|
|
33
|
+
const transport = new StdioClientTransport(serverParams);
|
|
34
|
+
const client = new Client({
|
|
35
|
+
name: "@speakeasy-api/docs-mcp-eval",
|
|
36
|
+
version: "0.1.0"
|
|
37
|
+
}, {
|
|
38
|
+
capabilities: {}
|
|
39
|
+
});
|
|
40
|
+
await client.connect(transport);
|
|
41
|
+
const rssSampler = createRssSampler(transport.pid);
|
|
42
|
+
await rssSampler.start();
|
|
43
|
+
try {
|
|
44
|
+
await client.listTools();
|
|
45
|
+
await warmupServer(client, input.cases, input.warmupQueries ?? 0);
|
|
46
|
+
const rankedCases = [];
|
|
47
|
+
const searchLatenciesMs = [];
|
|
48
|
+
const getDocLatenciesMs = [];
|
|
49
|
+
for (const testCase of input.cases) {
|
|
50
|
+
const executed = await executeCase(client, testCase);
|
|
51
|
+
rankedCases.push(executed.rankedCase);
|
|
52
|
+
searchLatenciesMs.push(...executed.searchLatenciesMs);
|
|
53
|
+
getDocLatenciesMs.push(...executed.getDocLatenciesMs);
|
|
54
|
+
}
|
|
55
|
+
const peakRssMb = await rssSampler.stop();
|
|
56
|
+
const output = runEvaluation({
|
|
57
|
+
cases: rankedCases,
|
|
58
|
+
timings: {
|
|
59
|
+
searchLatenciesMs,
|
|
60
|
+
getDocLatenciesMs,
|
|
61
|
+
buildTimeMs,
|
|
62
|
+
peakRssMb
|
|
63
|
+
},
|
|
64
|
+
...(input.model ? { model: input.model } : {}),
|
|
65
|
+
...(input.deterministic !== undefined ? { deterministic: input.deterministic } : {})
|
|
66
|
+
});
|
|
67
|
+
return {
|
|
68
|
+
...output,
|
|
69
|
+
rankedCases,
|
|
70
|
+
stats: {
|
|
71
|
+
searchLatenciesMs,
|
|
72
|
+
getDocLatenciesMs,
|
|
73
|
+
buildTimeMs,
|
|
74
|
+
peakRssMb
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
finally {
|
|
79
|
+
await rssSampler.stop();
|
|
80
|
+
await transport.close();
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
async function executeCase(client, testCase) {
|
|
84
|
+
const rankedChunkIds = [];
|
|
85
|
+
const seen = new Set();
|
|
86
|
+
const searchLatenciesMs = [];
|
|
87
|
+
const getDocLatenciesMs = [];
|
|
88
|
+
let rounds = 0;
|
|
89
|
+
let cursor;
|
|
90
|
+
const maxRounds = testCase.maxRounds ?? 3;
|
|
91
|
+
while (rounds < maxRounds) {
|
|
92
|
+
rounds += 1;
|
|
93
|
+
const args = {
|
|
94
|
+
query: testCase.query,
|
|
95
|
+
limit: testCase.limit ?? 5,
|
|
96
|
+
...(testCase.filters ?? {})
|
|
97
|
+
};
|
|
98
|
+
if (cursor) {
|
|
99
|
+
args.cursor = cursor;
|
|
100
|
+
}
|
|
101
|
+
const searchStart = performance.now();
|
|
102
|
+
const toolResult = await client.callTool({
|
|
103
|
+
name: "search_docs",
|
|
104
|
+
arguments: args
|
|
105
|
+
});
|
|
106
|
+
searchLatenciesMs.push(performance.now() - searchStart);
|
|
107
|
+
if ("toolResult" in toolResult) {
|
|
108
|
+
throw new Error("Unexpected compatibility tool result shape from server");
|
|
109
|
+
}
|
|
110
|
+
if (toolResult.isError) {
|
|
111
|
+
throw new Error(readTextContent(toolResult.content) || "search_docs returned an unknown error");
|
|
112
|
+
}
|
|
113
|
+
const payload = parseSearchResultText(readTextContent(toolResult.content));
|
|
114
|
+
for (const hit of payload.hits) {
|
|
115
|
+
if (!seen.has(hit.chunk_id)) {
|
|
116
|
+
rankedChunkIds.push(hit.chunk_id);
|
|
117
|
+
seen.add(hit.chunk_id);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
const targetHit = payload.hits.find((hit) => hit.chunk_id === testCase.expectedChunkId);
|
|
121
|
+
if (targetHit) {
|
|
122
|
+
const getDocStart = performance.now();
|
|
123
|
+
const getDocResult = await client.callTool({
|
|
124
|
+
name: "get_doc",
|
|
125
|
+
arguments: {
|
|
126
|
+
chunk_id: targetHit.chunk_id,
|
|
127
|
+
context: 0
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
getDocLatenciesMs.push(performance.now() - getDocStart);
|
|
131
|
+
if ("toolResult" in getDocResult) {
|
|
132
|
+
throw new Error("Unexpected compatibility tool result shape from server");
|
|
133
|
+
}
|
|
134
|
+
if (getDocResult.isError) {
|
|
135
|
+
throw new Error(readTextContent(getDocResult.content) || "get_doc returned an unknown error");
|
|
136
|
+
}
|
|
137
|
+
break;
|
|
138
|
+
}
|
|
139
|
+
cursor = payload.next_cursor ?? undefined;
|
|
140
|
+
if (!cursor) {
|
|
141
|
+
break;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
const roundsToRightDoc = computeRoundsToRightDoc({
|
|
145
|
+
found: rankedChunkIds.includes(testCase.expectedChunkId),
|
|
146
|
+
roundsExecuted: rounds,
|
|
147
|
+
maxRounds
|
|
148
|
+
});
|
|
149
|
+
return {
|
|
150
|
+
rankedCase: {
|
|
151
|
+
expectedChunkId: testCase.expectedChunkId,
|
|
152
|
+
rankedChunkIds,
|
|
153
|
+
roundsToRightDoc
|
|
154
|
+
},
|
|
155
|
+
searchLatenciesMs,
|
|
156
|
+
getDocLatenciesMs
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
export function computeRoundsToRightDoc(input) {
|
|
160
|
+
return input.found ? input.roundsExecuted : input.maxRounds + 1;
|
|
161
|
+
}
|
|
162
|
+
async function warmupServer(client, cases, warmupQueries) {
|
|
163
|
+
if (warmupQueries <= 0 || cases.length === 0) {
|
|
164
|
+
return;
|
|
165
|
+
}
|
|
166
|
+
for (let i = 0; i < warmupQueries; i += 1) {
|
|
167
|
+
const testCase = cases[i % cases.length];
|
|
168
|
+
if (!testCase) {
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
const args = {
|
|
172
|
+
query: testCase.query,
|
|
173
|
+
limit: Math.min(5, testCase.limit ?? 5),
|
|
174
|
+
...(testCase.filters ?? {})
|
|
175
|
+
};
|
|
176
|
+
try {
|
|
177
|
+
await client.callTool({
|
|
178
|
+
name: "search_docs",
|
|
179
|
+
arguments: args
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
catch {
|
|
183
|
+
// Warmup is best-effort.
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
function readTextContent(content) {
|
|
188
|
+
return content
|
|
189
|
+
.filter((entry) => entry.type === "text")
|
|
190
|
+
.map((entry) => entry.text ?? "")
|
|
191
|
+
.join("\n")
|
|
192
|
+
.trim();
|
|
193
|
+
}
|
|
194
|
+
function parseSearchResultText(text) {
|
|
195
|
+
let parsed;
|
|
196
|
+
try {
|
|
197
|
+
parsed = JSON.parse(text);
|
|
198
|
+
}
|
|
199
|
+
catch {
|
|
200
|
+
throw new Error("search_docs result was not valid JSON text");
|
|
201
|
+
}
|
|
202
|
+
if (!parsed || typeof parsed !== "object") {
|
|
203
|
+
throw new Error("search_docs result was not an object");
|
|
204
|
+
}
|
|
205
|
+
const result = parsed;
|
|
206
|
+
if (!Array.isArray(result.hits)) {
|
|
207
|
+
throw new Error("search_docs result is missing hits[]");
|
|
208
|
+
}
|
|
209
|
+
const hits = [];
|
|
210
|
+
for (const entry of result.hits) {
|
|
211
|
+
if (!entry || typeof entry !== "object") {
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
const chunkId = entry.chunk_id;
|
|
215
|
+
if (typeof chunkId === "string" && chunkId) {
|
|
216
|
+
hits.push({ chunk_id: chunkId });
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
const nextCursorRaw = result.next_cursor;
|
|
220
|
+
const nextCursor = nextCursorRaw === null || typeof nextCursorRaw === "string"
|
|
221
|
+
? nextCursorRaw
|
|
222
|
+
: null;
|
|
223
|
+
return {
|
|
224
|
+
hits,
|
|
225
|
+
next_cursor: nextCursor
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
async function runBuildStep(config) {
|
|
229
|
+
const startedAt = performance.now();
|
|
230
|
+
await new Promise((resolve, reject) => {
|
|
231
|
+
const child = spawn(config.command, config.args ?? [], {
|
|
232
|
+
cwd: config.cwd,
|
|
233
|
+
env: {
|
|
234
|
+
...process.env,
|
|
235
|
+
...config.env
|
|
236
|
+
},
|
|
237
|
+
stdio: "inherit",
|
|
238
|
+
shell: false
|
|
239
|
+
});
|
|
240
|
+
child.on("error", reject);
|
|
241
|
+
child.on("exit", (code, signal) => {
|
|
242
|
+
if (code === 0) {
|
|
243
|
+
resolve();
|
|
244
|
+
return;
|
|
245
|
+
}
|
|
246
|
+
reject(new Error(`Build command failed with ${signal ? `signal ${signal}` : `exit code ${String(code)}`}`));
|
|
247
|
+
});
|
|
248
|
+
});
|
|
249
|
+
return performance.now() - startedAt;
|
|
250
|
+
}
|
|
251
|
+
function createRssSampler(pid) {
|
|
252
|
+
if (!pid) {
|
|
253
|
+
return {
|
|
254
|
+
async start() {
|
|
255
|
+
// noop
|
|
256
|
+
},
|
|
257
|
+
async stop() {
|
|
258
|
+
return 0;
|
|
259
|
+
}
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
let interval;
|
|
263
|
+
let peakRssMb = 0;
|
|
264
|
+
let stopped = false;
|
|
265
|
+
const sample = async () => {
|
|
266
|
+
try {
|
|
267
|
+
const rssMb = await readProcessRssMb(pid);
|
|
268
|
+
if (rssMb > peakRssMb) {
|
|
269
|
+
peakRssMb = rssMb;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
catch {
|
|
273
|
+
// RSS sampling is best-effort.
|
|
274
|
+
}
|
|
275
|
+
};
|
|
276
|
+
return {
|
|
277
|
+
async start() {
|
|
278
|
+
if (stopped || interval) {
|
|
279
|
+
return;
|
|
280
|
+
}
|
|
281
|
+
await sample();
|
|
282
|
+
interval = setInterval(() => {
|
|
283
|
+
void sample();
|
|
284
|
+
}, 200);
|
|
285
|
+
interval.unref();
|
|
286
|
+
},
|
|
287
|
+
async stop() {
|
|
288
|
+
if (stopped) {
|
|
289
|
+
return peakRssMb;
|
|
290
|
+
}
|
|
291
|
+
stopped = true;
|
|
292
|
+
if (interval) {
|
|
293
|
+
clearInterval(interval);
|
|
294
|
+
interval = undefined;
|
|
295
|
+
}
|
|
296
|
+
await sample();
|
|
297
|
+
return Number(peakRssMb.toFixed(6));
|
|
298
|
+
}
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
async function readProcessRssMb(pid) {
|
|
302
|
+
const { stdout } = await execFileAsync("ps", ["-o", "rss=", "-p", String(pid)], {
|
|
303
|
+
windowsHide: true
|
|
304
|
+
});
|
|
305
|
+
const kb = Number.parseFloat(stdout.trim());
|
|
306
|
+
if (!Number.isFinite(kb) || kb <= 0) {
|
|
307
|
+
return 0;
|
|
308
|
+
}
|
|
309
|
+
return kb / 1024;
|
|
310
|
+
}
|
|
311
|
+
//# sourceMappingURL=runner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../src/runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAE9C,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AAsE1C,MAAM,UAAU,aAAa,CAAC,KAAmB;IAC/C,MAAM,OAAO,GAAG,cAAc,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;IAC3D,OAAO;QACL,OAAO;QACP,QAAQ,EAAE;YACR,aAAa,EAAE,KAAK,CAAC,aAAa,IAAI,IAAI;YAC1C,QAAQ,EAAE,KAAK,CAAC,KAAK,EAAE,QAAQ,IAAI,IAAI;YACvC,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,IAAI,IAAI;SAClC;KACF,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC9C,KAAuB;IAEvB,MAAM,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEtE,MAAM,YAAY,GAKd;QACF,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,OAAO;KAC9B,CAAC;IAEF,IAAI,KAAK,CAAC,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QACpC,YAAY,CAAC,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC;IACxC,CAAC;IACD,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACnC,YAAY,CAAC,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC;IACtC,CAAC;IACD,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACnC,YAAY,CAAC,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC;IACtC,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,oBAAoB,CAAC,YAAY,CAAC,CAAC;IAEzD,MAAM,MAAM,GAAG,IAAI,MAAM,CACvB;QACE,IAAI,EAAE,8BAA8B;QACpC,OAAO,EAAE,OAAO;KACjB,EACD;QACE,YAAY,EAAE,EAAE;KACjB,CACF,CAAC;IAEF,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,MAAM,UAAU,GAAG,gBAAgB,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IACnD,MAAM,UAAU,CAAC,KAAK,EAAE,CAAC;IAEzB,IAAI,CAAC;QACH,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;QACzB,MAAM,YAAY,CAAC,MAAM,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,aAAa,IAAI,CAAC,CAAC,CAAC;QAElE,MAAM,WAAW,GAAiB,EAAE,CAAC;QACrC,MAAM,iBAAiB,GAAa,EAAE,CAAC;QACvC,MAAM,iBAAiB,GAAa,EAAE,CAAC;QAEvC,KAAK,MAAM,QAAQ,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;YACnC,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;YACrD,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;YACtC,iBAAiB,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,iBAAiB,CAAC,CAAC;YACtD,iBAAiB,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QACxD,CAAC;QACD,MAAM,SAAS,GAAG,MAAM,UAAU,CAAC,IAAI,EAAE,CAAC;QAE1C,MAAM,MAAM,GAAG,aAAa,CAAC;YAC3B,KAAK,EAAE,WAAW;YAClB,OAAO,EAAE;gBACP,iBAAiB;gBACjB,iBAAiB;gBACjB,WAAW;gBACX,SAAS;aACV;YACD,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9C,GAAG,CAAC,KAAK,CAAC,aAAa,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,aAAa,EAAE,KAAK,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACrF,CAAC,CAAC;QAEH,OAAO;YACL,GAAG,MAAM;YACT,WAAW;YACX,KAAK,EAAE;gBACL,iBAAiB;gBACjB,iBAAiB;gBACjB,WAAW;gBACX,SAAS;aACV;SACF,CAAC;IACJ,CAAC;YAAS,CAAC;QACT,MAAM,UAAU,CAAC,IAAI,EAAE,CAAC;QACxB,MAAM,SAAS,CAAC,KAAK,EAAE,CAAC;IAC1B,CAAC;AACH,CAAC;AAED,KAAK,UAAU,WAAW,CACxB,MAAc,EACd,QAAuB;IAMvB,MAAM,cAAc,GAAa,EAAE,CAAC;IACpC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,iBAAiB,GAAa,EAAE,CAAC;IACvC,MAAM,iBAAiB,GAAa,EAAE,CAAC;IAEvC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,MAA0B,CAAC;IAC/B,MAAM,SAAS,GAAG,QAAQ,CAAC,SAAS,IAAI,CAAC,CAAC;IAE1C,OAAO,MAAM,GAAG,SAAS,EAAE,CAAC;QAC1B,MAAM,IAAI,CAAC,CAAC;QAEZ,MAAM,IAAI,GAA4B;YACpC,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,CAAC;YAC1B,GAAG,CAAC,QAAQ,CAAC,OAAO,IAAI,EAAE,CAAC;SAC5B,CAAC;QAEF,IAAI,MAAM,EAAE,CAAC;YACX,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACvB,CAAC;QAED,MAAM,WAAW,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QACtC,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC;YACvC,IAAI,EAAE,aAAa;YACnB,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,WAAW,CAAC,CAAC;QAExD,IAAI,YAAY,IAAI,UAAU,EAAE,CAAC;YAC/B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;QAC5E,CAAC;QAED,IAAI,UAAU,CAAC,OAAO,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,eAAe,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,uCAAuC,CAAC,CAAC;QAClG,CAAC;QAED,MAAM,OAAO,GAAG,qBAAqB,CAAC,eAAe,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC;QAC3E,KAAK,MAAM,GAAG,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YAC/B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,cAAc,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;gBAClC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,KAAK,QAAQ,CAAC,eAAe,CAAC,CAAC;QACxF,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,WAAW,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YACtC,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC;gBACzC,IAAI,EAAE,SAAS;gBACf,SAAS,EAAE;oBACT,QAAQ,EAAE,SAAS,CAAC,QAAQ;oBAC5B,OAAO,EAAE,CAAC;iBACX;aACF,CAAC,CAAC;YACH,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,WAAW,CAAC,CAAC;YAExD,IAAI,YAAY,IAAI,YAAY,EAAE,CAAC;gBACjC,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;YAC5E,CAAC;YACD,IAAI,YAAY,CAAC,OAAO,EAAE,CAAC;gBACzB,MAAM,IAAI,KAAK,CAAC,eAAe,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,mCAAmC,CAAC,CAAC;YAChG,CAAC;YAED,MAAM;QACR,CAAC;QAED,MAAM,GAAG,OAAO,CAAC,WAAW,IAAI,SAAS,CAAC;QAC1C,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM;QACR,CAAC;IACH,CAAC;IAED,MAAM,gBAAgB,GAAG,uBAAuB,CAAC;QAC/C,KAAK,EAAE,cAAc,CAAC,QAAQ,CAAC,QAAQ,CAAC,eAAe,CAAC;QACxD,cAAc,EAAE,MAAM;QACtB,SAAS;KACV,CAAC,CAAC;IAEH,OAAO;QACL,UAAU,EAAE;YACV,eAAe,EAAE,QAAQ,CAAC,eAAe;YACzC,cAAc;YACd,gBAAgB;SACjB;QACD,iBAAiB;QACjB,iBAAiB;KAClB,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,KAIvC;IACC,OAAO,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC;AAClE,CAAC;AAED,KAAK,UAAU,YAAY,CACzB,MAAc,EACd,KAAsB,EACtB,aAAqB;IAErB,IAAI,aAAa,IAAI,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7C,OAAO;IACT,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1C,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;QACzC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,SAAS;QACX,CAAC;QAED,MAAM,IAAI,GAA4B;YACpC,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,KAAK,IAAI,CAAC,CAAC;YACvC,GAAG,CAAC,QAAQ,CAAC,OAAO,IAAI,EAAE,CAAC;SAC5B,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,QAAQ,CAAC;gBACpB,IAAI,EAAE,aAAa;gBACnB,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACP,yBAAyB;QAC3B,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,eAAe,CAAC,OAA+C;IACtE,OAAO,OAAO;SACX,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;SACxC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,IAAI,EAAE,CAAC;SAChC,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,qBAAqB,CAAC,IAAY;IAIzC,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;IAChE,CAAC;IAED,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC1C,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC1D,CAAC;IAED,MAAM,MAAM,GAAG,MAAiC,CAAC;IACjD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;QAChC,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC1D,CAAC;IAED,MAAM,IAAI,GAAgC,EAAE,CAAC;IAC7C,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChC,IAAI,CAAC,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YACxC,SAAS;QACX,CAAC;QACD,MAAM,OAAO,GAAI,KAAiC,CAAC,QAAQ,CAAC;QAC5D,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,OAAO,EAAE,CAAC;YAC3C,IAAI,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;QACnC,CAAC;IACH,CAAC;IAED,MAAM,aAAa,GAAG,MAAM,CAAC,WAAW,CAAC;IACzC,MAAM,UAAU,GAAG,aAAa,KAAK,IAAI,IAAI,OAAO,aAAa,KAAK,QAAQ;QAC5E,CAAC,CAAC,aAAa;QACf,CAAC,CAAC,IAAI,CAAC;IAET,OAAO;QACL,IAAI;QACJ,WAAW,EAAE,UAAU;KACxB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,YAAY,CAAC,MAAuB;IACjD,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEpC,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QAC1C,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,IAAI,EAAE,EAAE;YACrD,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,GAAG,EAAE;gBACH,GAAG,OAAO,CAAC,GAAG;gBACd,GAAG,MAAM,CAAC,GAAG;aACd;YACD,KAAK,EAAE,SAAS;YAChB,KAAK,EAAE,KAAK;SACb,CAAC,CAAC;QAEH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC1B,KAAK,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE;YAChC,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;gBACf,OAAO,EAAE,CAAC;gBACV,OAAO;YACT,CAAC;YACD,MAAM,CACJ,IAAI,KAAK,CACP,6BAA6B,MAAM,CAAC,CAAC,CAAC,UAAU,MAAM,EAAE,CAAC,CAAC,CAAC,aAAa,MAAM,CAAC,IAAI,CAAC,EAAE,EAAE,CACzF,CACF,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,OAAO,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;AACvC,CAAC;AAED,SAAS,gBAAgB,CAAC,GAAkB;IAI1C,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO;YACL,KAAK,CAAC,KAAK;gBACT,OAAO;YACT,CAAC;YACD,KAAK,CAAC,IAAI;gBACR,OAAO,CAAC,CAAC;YACX,CAAC;SACF,CAAC;IACJ,CAAC;IAED,IAAI,QAAoD,CAAC;IACzD,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,OAAO,GAAG,KAAK,CAAC;IAEpB,MAAM,MAAM,GAAG,KAAK,IAAmB,EAAE;QACvC,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,MAAM,gBAAgB,CAAC,GAAG,CAAC,CAAC;YAC1C,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;gBACtB,SAAS,GAAG,KAAK,CAAC;YACpB,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,+BAA+B;QACjC,CAAC;IACH,CAAC,CAAC;IAEF,OAAO;QACL,KAAK,CAAC,KAAK;YACT,IAAI,OAAO,IAAI,QAAQ,EAAE,CAAC;gBACxB,OAAO;YACT,CAAC;YACD,MAAM,MAAM,EAAE,CAAC;YACf,QAAQ,GAAG,WAAW,CAAC,GAAG,EAAE;gBAC1B,KAAK,MAAM,EAAE,CAAC;YAChB,CAAC,EAAE,GAAG,CAAC,CAAC;YACR,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,CAAC;QACD,KAAK,CAAC,IAAI;YACR,IAAI,OAAO,EAAE,CAAC;gBACZ,OAAO,SAAS,CAAC;YACnB,CAAC;YAED,OAAO,GAAG,IAAI,CAAC;YACf,IAAI,QAAQ,EAAE,CAAC;gBACb,aAAa,CAAC,QAAQ,CAAC,CAAC;gBACxB,QAAQ,GAAG,SAAS,CAAC;YACvB,CAAC;YACD,MAAM,MAAM,EAAE,CAAC;YACf,OAAO,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QACtC,CAAC;KACF,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,GAAW;IACzC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE;QAC9E,WAAW,EAAE,IAAI;KAClB,CAAC,CAAC;IACH,MAAM,EAAE,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;IAC5C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC;QACpC,OAAO,CAAC,CAAC;IACX,CAAC;IACD,OAAO,EAAE,GAAG,IAAI,CAAC;AACnB,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@speakeasy-api/docs-mcp-eval",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Evaluation and benchmarking harness for docs-mcp search quality metrics",
|
|
5
|
+
"license": "AGPL-3.0-only",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"author": "Speakeasy <info@speakeasy.com> (https://speakeasy.com)",
|
|
8
|
+
"repository": {
|
|
9
|
+
"type": "git",
|
|
10
|
+
"url": "https://github.com/speakeasy-api/docs-mcp.git",
|
|
11
|
+
"directory": "packages/eval"
|
|
12
|
+
},
|
|
13
|
+
"homepage": "https://github.com/speakeasy-api/docs-mcp",
|
|
14
|
+
"bugs": {
|
|
15
|
+
"url": "https://github.com/speakeasy-api/docs-mcp/issues"
|
|
16
|
+
},
|
|
17
|
+
"files": [
|
|
18
|
+
"dist"
|
|
19
|
+
],
|
|
20
|
+
"bin": {
|
|
21
|
+
"docs-mcp-eval": "dist/bin.js"
|
|
22
|
+
},
|
|
23
|
+
"main": "dist/index.js",
|
|
24
|
+
"types": "dist/index.d.ts",
|
|
25
|
+
"exports": {
|
|
26
|
+
".": {
|
|
27
|
+
"types": "./dist/index.d.ts",
|
|
28
|
+
"import": "./dist/index.js"
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"engines": {
|
|
32
|
+
"node": ">=22.0.0"
|
|
33
|
+
},
|
|
34
|
+
"dependencies": {
|
|
35
|
+
"@modelcontextprotocol/sdk": "^1.26.0",
|
|
36
|
+
"commander": "^13.1.0"
|
|
37
|
+
},
|
|
38
|
+
"scripts": {
|
|
39
|
+
"build": "tsc -p tsconfig.json",
|
|
40
|
+
"typecheck": "tsc -p tsconfig.json --noEmit",
|
|
41
|
+
"test": "vitest run",
|
|
42
|
+
"lint": "eslint src test"
|
|
43
|
+
}
|
|
44
|
+
}
|