nodebench-mcp 2.65.0 → 2.67.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmarks/claudeCodeIntegration.d.ts +12 -0
- package/dist/benchmarks/claudeCodeIntegration.js +382 -0
- package/dist/benchmarks/claudeCodeIntegration.js.map +1 -0
- package/dist/benchmarks/investorDemo.d.ts +18 -0
- package/dist/benchmarks/investorDemo.js +397 -0
- package/dist/benchmarks/investorDemo.js.map +1 -0
- package/dist/benchmarks/llmJudgeEval.d.ts +3 -0
- package/dist/benchmarks/llmJudgeEval.js +232 -15
- package/dist/benchmarks/llmJudgeEval.js.map +1 -1
- package/dist/benchmarks/searchQualityEval.js +10 -10
- package/dist/benchmarks/searchQualityEval.js.map +1 -1
- package/dist/tools/causalMemoryTools.js +11 -0
- package/dist/tools/causalMemoryTools.js.map +1 -1
- package/dist/tools/progressiveDiscoveryTools.d.ts +2 -1
- package/dist/tools/progressiveDiscoveryTools.js +202 -2
- package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* investorDemo.ts — Self-running investor proof script for NodeBench MCP.
|
|
4
|
+
*
|
|
5
|
+
* Proves the full stack works end-to-end in under 60 seconds:
|
|
6
|
+
* 1. Setup sanity (starter preset, discovery)
|
|
7
|
+
* 2. Progressive discovery (dynamic toolset expansion)
|
|
8
|
+
* 3. Real intelligence (Gemini-synthesized analysis)
|
|
9
|
+
* 4. Memory compounding (compaction-resilient state)
|
|
10
|
+
* 5. Agent compatibility (multi-agent validation)
|
|
11
|
+
*
|
|
12
|
+
* Usage:
|
|
13
|
+
* cd packages/mcp-local
|
|
14
|
+
* npx tsx src/benchmarks/investorDemo.ts
|
|
15
|
+
*
|
|
16
|
+
* Env: GEMINI_API_KEY (loads from .env.local if not in environment)
|
|
17
|
+
*/
|
|
18
|
+
import { readFileSync } from "fs";
|
|
19
|
+
import { join } from "path";
|
|
20
|
+
import { getDb } from "../db.js";
|
|
21
|
+
import { _setDbAccessor } from "../tools/toolRegistry.js";
|
|
22
|
+
import { loadToolsets, ALL_DOMAIN_KEYS, TOOLSET_MAP, TOOL_TO_TOOLSET, } from "../toolsetRegistry.js";
|
|
23
|
+
import { createProgressiveDiscoveryTools } from "../tools/progressiveDiscoveryTools.js";
|
|
24
|
+
// ══════════════════════════════════════════════════════════════════════════════
|
|
25
|
+
// ENV
|
|
26
|
+
// ══════════════════════════════════════════════════════════════════════════════
|
|
27
|
+
function loadEnv() {
|
|
28
|
+
if (process.env.GEMINI_API_KEY)
|
|
29
|
+
return;
|
|
30
|
+
const paths = [".env.local", ".env", "../.env.local", "../../.env.local"];
|
|
31
|
+
for (const p of paths) {
|
|
32
|
+
try {
|
|
33
|
+
const content = readFileSync(join(process.cwd(), p), "utf-8");
|
|
34
|
+
for (const line of content.split("\n")) {
|
|
35
|
+
const match = line.match(/^(GEMINI_API_KEY)\s*=\s*(.+)$/);
|
|
36
|
+
if (match) {
|
|
37
|
+
process.env[match[1]] = match[2].trim();
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
/* file not found */
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
loadEnv();
|
|
48
|
+
// ══════════════════════════════════════════════════════════════════════════════
|
|
49
|
+
// ANSI helpers (no external deps)
|
|
50
|
+
// ══════════════════════════════════════════════════════════════════════════════
|
|
51
|
+
const C = {
|
|
52
|
+
reset: "\x1b[0m",
|
|
53
|
+
bold: "\x1b[1m",
|
|
54
|
+
dim: "\x1b[2m",
|
|
55
|
+
underline: "\x1b[4m",
|
|
56
|
+
// Foreground
|
|
57
|
+
red: "\x1b[31m",
|
|
58
|
+
green: "\x1b[32m",
|
|
59
|
+
yellow: "\x1b[33m",
|
|
60
|
+
blue: "\x1b[34m",
|
|
61
|
+
magenta: "\x1b[35m",
|
|
62
|
+
cyan: "\x1b[36m",
|
|
63
|
+
white: "\x1b[37m",
|
|
64
|
+
// Bright
|
|
65
|
+
brightRed: "\x1b[91m",
|
|
66
|
+
brightGreen: "\x1b[92m",
|
|
67
|
+
brightYellow: "\x1b[93m",
|
|
68
|
+
brightCyan: "\x1b[96m",
|
|
69
|
+
brightWhite: "\x1b[97m",
|
|
70
|
+
// Background
|
|
71
|
+
bgRed: "\x1b[41m",
|
|
72
|
+
bgGreen: "\x1b[42m",
|
|
73
|
+
bgBlue: "\x1b[44m",
|
|
74
|
+
bgMagenta: "\x1b[45m",
|
|
75
|
+
};
|
|
76
|
+
function log(msg) {
|
|
77
|
+
console.log(msg);
|
|
78
|
+
}
|
|
79
|
+
function stepHeader(num, title, budget) {
|
|
80
|
+
log("");
|
|
81
|
+
log(`${C.bold}${C.brightCyan} [${num}] ${title}${C.reset} ${C.dim}(budget: ${budget})${C.reset}`);
|
|
82
|
+
log(`${C.dim} ${"─".repeat(56)}${C.reset}`);
|
|
83
|
+
}
|
|
84
|
+
function bullet(label, value) {
|
|
85
|
+
log(` ${C.yellow}${label}:${C.reset} ${C.white}${value}${C.reset}`);
|
|
86
|
+
}
|
|
87
|
+
function success(msg) {
|
|
88
|
+
log(` ${C.brightGreen}[PASS]${C.reset} ${msg}`);
|
|
89
|
+
}
|
|
90
|
+
function info(msg) {
|
|
91
|
+
log(` ${C.dim}${msg}${C.reset}`);
|
|
92
|
+
}
|
|
93
|
+
function warn(msg) {
|
|
94
|
+
log(` ${C.brightYellow}[WARN]${C.reset} ${msg}`);
|
|
95
|
+
}
|
|
96
|
+
function elapsed(startMs) {
|
|
97
|
+
return `${((Date.now() - startMs) / 1000).toFixed(1)}s`;
|
|
98
|
+
}
|
|
99
|
+
// ══════════════════════════════════════════════════════════════════════════════
|
|
100
|
+
// DEMO RUNNER
|
|
101
|
+
// ══════════════════════════════════════════════════════════════════════════════
|
|
102
|
+
async function main() {
|
|
103
|
+
const demoStart = Date.now();
|
|
104
|
+
// Ensure DB is wired up for toolRegistry
|
|
105
|
+
const db = getDb();
|
|
106
|
+
_setDbAccessor(() => db);
|
|
107
|
+
// ── HEADER ──────────────────────────────────────────────────────────────
|
|
108
|
+
log("");
|
|
109
|
+
log(`${C.bold}${C.brightWhite} ╔══════════════════════════════════════════════════════════╗${C.reset}`);
|
|
110
|
+
log(`${C.bold}${C.brightWhite} ║${C.reset} ${C.bold}${C.magenta}NODEBENCH MCP${C.reset} ${C.dim}— Investor Proof Demo${C.reset} ${C.bold}${C.brightWhite}║${C.reset}`);
|
|
111
|
+
log(`${C.bold}${C.brightWhite} ║${C.reset} ${C.dim}Operating memory for agent-native businesses${C.reset} ${C.bold}${C.brightWhite}║${C.reset}`);
|
|
112
|
+
log(`${C.bold}${C.brightWhite} ║${C.reset} ${C.dim}${ALL_DOMAIN_KEYS.length} domains | Progressive discovery | Local-first${C.reset} ${C.bold}${C.brightWhite}║${C.reset}`);
|
|
113
|
+
log(`${C.bold}${C.brightWhite} ╚══════════════════════════════════════════════════════════╝${C.reset}`);
|
|
114
|
+
log("");
|
|
115
|
+
// ══════════════════════════════════════════════════════════════════════════
|
|
116
|
+
// STEP 1: Setup Sanity
|
|
117
|
+
// ══════════════════════════════════════════════════════════════════════════
|
|
118
|
+
stepHeader(1, "Setup Sanity", "< 3s");
|
|
119
|
+
const step1Start = Date.now();
|
|
120
|
+
// Load starter preset (deep_sim only, like the real server default)
|
|
121
|
+
const starterDomains = ["deep_sim"];
|
|
122
|
+
const starterTools = await loadToolsets(starterDomains);
|
|
123
|
+
// Also create progressive discovery tools (they are always present)
|
|
124
|
+
const discoveryTools = createProgressiveDiscoveryTools(starterTools.map((t) => ({ name: t.name, description: t.description })), {
|
|
125
|
+
getLoadedToolNames: () => new Set(starterTools.map((t) => t.name)),
|
|
126
|
+
getToolToToolset: () => TOOL_TO_TOOLSET,
|
|
127
|
+
});
|
|
128
|
+
const allStarterTools = [...starterTools, ...discoveryTools];
|
|
129
|
+
bullet("Preset", "starter");
|
|
130
|
+
bullet("Tools loaded", allStarterTools.length);
|
|
131
|
+
bullet("Domains", starterDomains.join(", "));
|
|
132
|
+
// Call discover_tools with query "company analysis"
|
|
133
|
+
const discoverTool = discoveryTools.find((t) => t.name === "discover_tools");
|
|
134
|
+
if (!discoverTool)
|
|
135
|
+
throw new Error("discover_tools not found");
|
|
136
|
+
const discoveryResult = (await discoverTool.handler({
|
|
137
|
+
query: "company analysis",
|
|
138
|
+
limit: 3,
|
|
139
|
+
}));
|
|
140
|
+
if (discoveryResult.results && discoveryResult.results.length > 0) {
|
|
141
|
+
log("");
|
|
142
|
+
info('discover_tools("company analysis") — top 3:');
|
|
143
|
+
for (const r of discoveryResult.results.slice(0, 3)) {
|
|
144
|
+
const score = r.relevanceScore ?? 0;
|
|
145
|
+
log(` ${C.cyan}${r.name}${C.reset} ${C.dim}score=${score.toFixed(2)} cat=${r.category}${C.reset}`);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
success(`Setup sanity — ${elapsed(step1Start)}`);
|
|
149
|
+
// ══════════════════════════════════════════════════════════════════════════
|
|
150
|
+
// STEP 2: Progressive Discovery
|
|
151
|
+
// ══════════════════════════════════════════════════════════════════════════
|
|
152
|
+
stepHeader(2, "Progressive Discovery", "< 5s");
|
|
153
|
+
const step2Start = Date.now();
|
|
154
|
+
const toolsBefore = Object.keys(TOOLSET_MAP).reduce((sum, k) => sum + (TOOLSET_MAP[k]?.length ?? 0), 0);
|
|
155
|
+
// Load founder preset domains
|
|
156
|
+
const founderDomains = ["founder", "learning", "local_dashboard"];
|
|
157
|
+
const founderTools = await loadToolsets(founderDomains);
|
|
158
|
+
const toolsAfter = Object.keys(TOOLSET_MAP).reduce((sum, k) => sum + (TOOLSET_MAP[k]?.length ?? 0), 0);
|
|
159
|
+
bullet("Before load_toolset(founder)", `${toolsBefore} tools`);
|
|
160
|
+
bullet("After load_toolset(founder)", `${toolsAfter} tools (+${toolsAfter - toolsBefore})`);
|
|
161
|
+
// Rebuild discovery with expanded tool list
|
|
162
|
+
const allLoadedTools = Object.values(TOOLSET_MAP).flat();
|
|
163
|
+
const expandedDiscovery = createProgressiveDiscoveryTools(allLoadedTools.map((t) => ({ name: t.name, description: t.description })), {
|
|
164
|
+
getLoadedToolNames: () => new Set(allLoadedTools.map((t) => t.name)),
|
|
165
|
+
getToolToToolset: () => TOOL_TO_TOOLSET,
|
|
166
|
+
});
|
|
167
|
+
const discoverTool2 = expandedDiscovery.find((t) => t.name === "discover_tools");
|
|
168
|
+
if (discoverTool2) {
|
|
169
|
+
const weeklyResult = (await discoverTool2.handler({
|
|
170
|
+
query: "weekly reset",
|
|
171
|
+
limit: 3,
|
|
172
|
+
}));
|
|
173
|
+
if (weeklyResult.results && weeklyResult.results.length > 0) {
|
|
174
|
+
log("");
|
|
175
|
+
info('discover_tools("weekly reset") — finds founder tools:');
|
|
176
|
+
for (const r of weeklyResult.results.slice(0, 3)) {
|
|
177
|
+
const score = r.relevanceScore ?? 0;
|
|
178
|
+
log(` ${C.cyan}${r.name}${C.reset} ${C.dim}score=${score.toFixed(2)} cat=${r.category}${C.reset}`);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
// Total tools across all domains
|
|
183
|
+
const fullTools = await loadToolsets(ALL_DOMAIN_KEYS);
|
|
184
|
+
const totalToolCount = fullTools.length;
|
|
185
|
+
const totalDomainCount = ALL_DOMAIN_KEYS.length;
|
|
186
|
+
log("");
|
|
187
|
+
info(`Self-guided: agents start with ${allStarterTools.length} tools, discover ${totalToolCount} on demand`);
|
|
188
|
+
success(`Progressive discovery — ${elapsed(step2Start)}`);
|
|
189
|
+
// ══════════════════════════════════════════════════════════════════════════
|
|
190
|
+
// STEP 3: Real Intelligence
|
|
191
|
+
// ══════════════════════════════════════════════════════════════════════════
|
|
192
|
+
stepHeader(3, "Real Intelligence", "< 10s");
|
|
193
|
+
const step3Start = Date.now();
|
|
194
|
+
// Find founder_local_synthesize
|
|
195
|
+
const synthesizeTool = allLoadedTools.find((t) => t.name === "founder_local_synthesize");
|
|
196
|
+
if (!synthesizeTool) {
|
|
197
|
+
warn("founder_local_synthesize not found — loading founder domain");
|
|
198
|
+
await loadToolsets(["founder"]);
|
|
199
|
+
// Re-check
|
|
200
|
+
const retryTools = TOOLSET_MAP["founder"] ?? [];
|
|
201
|
+
const retryTool = retryTools.find((t) => t.name === "founder_local_synthesize");
|
|
202
|
+
if (!retryTool) {
|
|
203
|
+
warn("Skipping Step 3: founder_local_synthesize unavailable");
|
|
204
|
+
success(`Real intelligence — skipped (${elapsed(step3Start)})`);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
const synthTool = allLoadedTools.find((t) => t.name === "founder_local_synthesize") ?? (TOOLSET_MAP["founder"] ?? []).find((t) => t.name === "founder_local_synthesize");
|
|
208
|
+
if (synthTool) {
|
|
209
|
+
const hasKey = !!process.env.GEMINI_API_KEY;
|
|
210
|
+
info(hasKey
|
|
211
|
+
? "GEMINI_API_KEY found — running live Gemini synthesis"
|
|
212
|
+
: "No GEMINI_API_KEY — using heuristic fallback");
|
|
213
|
+
try {
|
|
214
|
+
const synthResult = (await synthTool.handler({
|
|
215
|
+
query: "Analyze NodeBench competitive position vs Supermemory and Mem0",
|
|
216
|
+
packetType: "competitor_brief",
|
|
217
|
+
}));
|
|
218
|
+
if (synthResult.error) {
|
|
219
|
+
warn(`Synthesis returned error: ${synthResult.message}`);
|
|
220
|
+
}
|
|
221
|
+
else {
|
|
222
|
+
log("");
|
|
223
|
+
const summary = synthResult.summary ??
|
|
224
|
+
synthResult.sessionSummary ??
|
|
225
|
+
JSON.stringify(synthResult).slice(0, 200);
|
|
226
|
+
bullet("Summary", summary.slice(0, 120) + (summary.length > 120 ? "..." : ""));
|
|
227
|
+
const keyFindings = synthResult.keyFindings;
|
|
228
|
+
if (keyFindings && keyFindings.length > 0) {
|
|
229
|
+
bullet("Key findings", `${keyFindings.length} items`);
|
|
230
|
+
for (const f of keyFindings.slice(0, 3)) {
|
|
231
|
+
log(` ${C.dim}- ${f.slice(0, 100)}${f.length > 100 ? "..." : ""}${C.reset}`);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
const entities = synthResult.entities;
|
|
235
|
+
if (entities && entities.length > 0) {
|
|
236
|
+
bullet("Entities detected", entities.join(", "));
|
|
237
|
+
}
|
|
238
|
+
const source = synthResult.source;
|
|
239
|
+
bullet("Source", source === "gemini"
|
|
240
|
+
? "Gemini 3.1 Flash Lite (live)"
|
|
241
|
+
: source ?? "heuristic fallback");
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
catch (err) {
|
|
245
|
+
warn(`Synthesis error: ${err.message}`);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
success(`Real intelligence — ${elapsed(step3Start)}`);
|
|
249
|
+
// ══════════════════════════════════════════════════════════════════════════
|
|
250
|
+
// STEP 4: Memory Compounding
|
|
251
|
+
// ══════════════════════════════════════════════════════════════════════════
|
|
252
|
+
stepHeader(4, "Memory Compounding", "< 5s");
|
|
253
|
+
const step4Start = Date.now();
|
|
254
|
+
// Ensure founder + session_memory domains are loaded
|
|
255
|
+
await loadToolsets(["session_memory"]);
|
|
256
|
+
const founderTrackingTools = (TOOLSET_MAP["founder"] ?? []);
|
|
257
|
+
// 4a. record_event — find in already-loaded founder domain (includes causalMemoryTools)
|
|
258
|
+
let eventRecorded = false;
|
|
259
|
+
const allFounderTools = TOOLSET_MAP["founder"] ?? [];
|
|
260
|
+
const recordEventTool = allFounderTools.find((t) => t.name === "record_event");
|
|
261
|
+
if (recordEventTool) {
|
|
262
|
+
try {
|
|
263
|
+
await recordEventTool.handler({
|
|
264
|
+
eventType: "investigation_started",
|
|
265
|
+
actorType: "agent",
|
|
266
|
+
entityType: "company",
|
|
267
|
+
entityId: "nodebench",
|
|
268
|
+
summary: "Investor demo: competitive analysis of NodeBench vs Supermemory",
|
|
269
|
+
});
|
|
270
|
+
eventRecorded = true;
|
|
271
|
+
bullet("record_event", "Logged competitive analysis event to causal ledger");
|
|
272
|
+
}
|
|
273
|
+
catch (err) {
|
|
274
|
+
warn(`record_event error: ${err.message}`);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
else {
|
|
278
|
+
warn("record_event not found in founder tools");
|
|
279
|
+
}
|
|
280
|
+
// 4b. track_intent
|
|
281
|
+
let intentTracked = false;
|
|
282
|
+
const trackIntentTool = founderTrackingTools.find((t) => t.name === "track_intent");
|
|
283
|
+
if (trackIntentTool) {
|
|
284
|
+
try {
|
|
285
|
+
const intentResult = (await trackIntentTool.handler({
|
|
286
|
+
intent: "Complete investor demo and validate all 5 proof steps",
|
|
287
|
+
status: "active",
|
|
288
|
+
context: "Running automated investor proof script",
|
|
289
|
+
}));
|
|
290
|
+
intentTracked = true;
|
|
291
|
+
bullet("track_intent", `Active intent tracked (total active: ${intentResult.totalActive ?? "?"})`);
|
|
292
|
+
}
|
|
293
|
+
catch (err) {
|
|
294
|
+
warn(`track_intent error: ${err.message}`);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
else {
|
|
298
|
+
warn("track_intent not found in founder tools");
|
|
299
|
+
}
|
|
300
|
+
// 4c. summarize_session
|
|
301
|
+
let sessionSummarized = false;
|
|
302
|
+
const summarizeTool = founderTrackingTools.find((t) => t.name === "summarize_session");
|
|
303
|
+
if (summarizeTool) {
|
|
304
|
+
try {
|
|
305
|
+
const summaryResult = (await summarizeTool.handler({
|
|
306
|
+
maxTokens: 300,
|
|
307
|
+
}));
|
|
308
|
+
sessionSummarized = true;
|
|
309
|
+
const summaryText = summaryResult.sessionSummary ?? "Session summarized";
|
|
310
|
+
bullet("summarize_session", summaryText.slice(0, 100) + (summaryText.length > 100 ? "..." : ""));
|
|
311
|
+
}
|
|
312
|
+
catch (err) {
|
|
313
|
+
warn(`summarize_session error: ${err.message}`);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
else {
|
|
317
|
+
warn("summarize_session not found in founder tools");
|
|
318
|
+
}
|
|
319
|
+
// 4d. get_compaction_recovery
|
|
320
|
+
const recoveryTool = founderTrackingTools.find((t) => t.name === "get_compaction_recovery");
|
|
321
|
+
if (recoveryTool) {
|
|
322
|
+
try {
|
|
323
|
+
const recoveryResult = (await recoveryTool.handler({
|
|
324
|
+
maxTokens: 500,
|
|
325
|
+
}));
|
|
326
|
+
if (recoveryResult.injectionPrompt) {
|
|
327
|
+
const snippet = recoveryResult.injectionPrompt.slice(0, 140);
|
|
328
|
+
log("");
|
|
329
|
+
info("After context compaction, NodeBench remembers:");
|
|
330
|
+
log(` ${C.green}"${snippet}..."${C.reset}`);
|
|
331
|
+
bullet("Token cost", `~${recoveryResult.tokenEstimate ?? "?"} tokens`);
|
|
332
|
+
}
|
|
333
|
+
else {
|
|
334
|
+
bullet("get_compaction_recovery", "Recovery context generated (empty session)");
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
catch (err) {
|
|
338
|
+
warn(`get_compaction_recovery error: ${err.message}`);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
else {
|
|
342
|
+
warn("get_compaction_recovery not found in founder tools");
|
|
343
|
+
}
|
|
344
|
+
const memoryOps = [eventRecorded, intentTracked, sessionSummarized].filter(Boolean).length;
|
|
345
|
+
success(`Memory compounding — ${memoryOps}/3 ops — ${elapsed(step4Start)}`);
|
|
346
|
+
// ══════════════════════════════════════════════════════════════════════════
|
|
347
|
+
// STEP 5: Agent Compatibility
|
|
348
|
+
// ══════════════════════════════════════════════════════════════════════════
|
|
349
|
+
stepHeader(5, "Agent Compatibility", "< 5s");
|
|
350
|
+
const step5Start = Date.now();
|
|
351
|
+
try {
|
|
352
|
+
const { runAgentValidation } = await import("./agentValidation.js");
|
|
353
|
+
const validationResult = await runAgentValidation();
|
|
354
|
+
for (const persona of validationResult.personas) {
|
|
355
|
+
const passIcon = persona.passed
|
|
356
|
+
? `${C.brightGreen}PASS${C.reset}`
|
|
357
|
+
: `${C.brightRed}FAIL${C.reset}`;
|
|
358
|
+
log(` ${passIcon} ${C.bold}${persona.name}${C.reset} ` +
|
|
359
|
+
`${C.dim}discovery=${(persona.scores.toolDiscovery * 100).toFixed(0)}% ` +
|
|
360
|
+
`workflow=${(persona.scores.workflowCompletion * 100).toFixed(0)}% ` +
|
|
361
|
+
`preset=${(persona.scores.presetFit * 100).toFixed(0)}%${C.reset}`);
|
|
362
|
+
}
|
|
363
|
+
log("");
|
|
364
|
+
bullet("Overall pass rate", `${(validationResult.overallPassRate * 100).toFixed(0)}%`);
|
|
365
|
+
info("Works in: Claude Code, Cursor (<=40 tools), OpenClaw, Windsurf, Generic MCP");
|
|
366
|
+
}
|
|
367
|
+
catch (err) {
|
|
368
|
+
warn(`Agent validation error: ${err.message}`);
|
|
369
|
+
info("Works in: Claude Code (full), Cursor (<=40 tools), OpenClaw, Windsurf");
|
|
370
|
+
}
|
|
371
|
+
success(`Agent compatibility — ${elapsed(step5Start)}`);
|
|
372
|
+
// ══════════════════════════════════════════════════════════════════════════
|
|
373
|
+
// FINAL SUMMARY
|
|
374
|
+
// ══════════════════════════════════════════════════════════════════════════
|
|
375
|
+
const totalTime = ((Date.now() - demoStart) / 1000).toFixed(1);
|
|
376
|
+
const hasGemini = !!process.env.GEMINI_API_KEY;
|
|
377
|
+
log("");
|
|
378
|
+
log(`${C.bold}${C.brightWhite} ╔══════════════════════════════════════════════════════════╗${C.reset}`);
|
|
379
|
+
log(`${C.bold}${C.brightWhite} ║${C.reset} ${C.bold}${C.magenta}NODEBENCH MCP${C.reset} ${C.dim}— INVESTOR PROOF${C.reset} ${C.bold}${C.brightWhite}║${C.reset}`);
|
|
380
|
+
log(`${C.bold}${C.brightWhite} ║${C.reset} ${C.bold}${C.brightWhite}║${C.reset}`);
|
|
381
|
+
log(`${C.bold}${C.brightWhite} ║${C.reset} ${C.yellow}Tools:${C.reset} ${String(totalToolCount).padEnd(5)} across ${totalDomainCount} domains ${C.bold}${C.brightWhite}║${C.reset}`);
|
|
382
|
+
log(`${C.bold}${C.brightWhite} ║${C.reset} ${C.yellow}Starter:${C.reset} ${String(allStarterTools.length).padEnd(5)} tools (progressive discovery) ${C.bold}${C.brightWhite}║${C.reset}`);
|
|
383
|
+
log(`${C.bold}${C.brightWhite} ║${C.reset} ${C.yellow}Personas:${C.reset} founder, banker, operator, researcher ${C.bold}${C.brightWhite}║${C.reset}`);
|
|
384
|
+
log(`${C.bold}${C.brightWhite} ║${C.reset} ${C.yellow}LLM:${C.reset} ${hasGemini ? `${C.brightGreen}Gemini live${C.reset}` : `${C.dim}heuristic fallback${C.reset}`} ${C.bold}${C.brightWhite}║${C.reset}`);
|
|
385
|
+
log(`${C.bold}${C.brightWhite} ║${C.reset} ${C.yellow}Agents:${C.reset} Claude Code, Cursor, OpenClaw, Windsurf ${C.bold}${C.brightWhite}║${C.reset}`);
|
|
386
|
+
log(`${C.bold}${C.brightWhite} ║${C.reset} ${C.yellow}Memory:${C.reset} survives compaction, compounds over time ${C.bold}${C.brightWhite}║${C.reset}`);
|
|
387
|
+
log(`${C.bold}${C.brightWhite} ║${C.reset} ${C.yellow}Demo time:${C.reset} ${C.brightGreen}${totalTime}s${C.reset} ${C.bold}${C.brightWhite}║${C.reset}`);
|
|
388
|
+
log(`${C.bold}${C.brightWhite} ║${C.reset} ${C.bold}${C.brightWhite}║${C.reset}`);
|
|
389
|
+
log(`${C.bold}${C.brightWhite} ╚══════════════════════════════════════════════════════════╝${C.reset}`);
|
|
390
|
+
log("");
|
|
391
|
+
}
|
|
392
|
+
main().catch((err) => {
|
|
393
|
+
console.error(`\n${C.brightRed}[FATAL]${C.reset} ${err.message}`);
|
|
394
|
+
console.error(err.stack);
|
|
395
|
+
process.exit(1);
|
|
396
|
+
});
|
|
397
|
+
//# sourceMappingURL=investorDemo.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"investorDemo.js","sourceRoot":"","sources":["../../src/benchmarks/investorDemo.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,KAAK,EAAE,MAAM,UAAU,CAAC;AACjC,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EACL,YAAY,EACZ,eAAe,EACf,WAAW,EACX,eAAe,GAChB,MAAM,uBAAuB,CAAC;AAM/B,OAAO,EAAE,+BAA+B,EAAE,MAAM,uCAAuC,CAAC;AAGxF,iFAAiF;AACjF,MAAM;AACN,iFAAiF;AAEjF,SAAS,OAAO;IACd,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc;QAAE,OAAO;IACvC,MAAM,KAAK,GAAG,CAAC,YAAY,EAAE,MAAM,EAAE,eAAe,EAAE,kBAAkB,CAAC,CAAC;IAC1E,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAC9D,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;gBACvC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;gBAC1D,IAAI,KAAK,EAAE,CAAC;oBACV,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;oBACxC,OAAO;gBACT,CAAC;YACH,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,oBAAoB;QACtB,CAAC;IACH,CAAC;AACH,CAAC;AAED,OAAO,EAAE,CAAC;AAEV,iFAAiF;AACjF,kCAAkC;AAClC,iFAAiF;AAEjF,MAAM,CAAC,GAAG;IACR,KAAK,EAAE,SAAS;IAChB,IAAI,EAAE,SAAS;IACf,GAAG,EAAE,SAAS;IACd,SAAS,EAAE,SAAS;IACpB,aAAa;IACb,GAAG,EAAE,UAAU;IACf,KAAK,EAAE,UAAU;IACjB,MAAM,EAAE,UAAU;IAClB,IAAI,EAAE,UAAU;IAChB,OAAO,EAAE,UAAU;IACnB,IAAI,EAAE,UAAU;IAChB,KAAK,EAAE,UAAU;IACjB,SAAS;IACT,SAAS,EAAE,UAAU;IACrB,WAAW,EAAE,UAAU;IACvB,YAAY,EAAE,UAAU;IACxB,UAAU,EAAE,UAAU;IACtB,WAAW,EAAE,UAAU;IACvB,aAAa;IACb,KAAK,EAAE,UAAU;IACjB,OAAO,EAAE,UAAU;IACnB,MAAM,EAAE,UAAU;IAClB,SAAS,EAAE,UAAU;CACtB,CAAC;AAEF,SAAS,GAAG,CAAC,GAAW;IACtB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;AACnB,CAAC;AAED,SAAS,UAAU,CAAC,GAAW,EAAE,KAAa,EAAE,MAAc;IAC5D,GAAG,CAAC,EAAE,CAAC,CAAC;IACR,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,UAAU,MAAO,GAAI,KAAK,KAAK,GAAG,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,GAAG,YAAY,MAAM,IAAI,CAAC,CAAC,KAAK,EAAE,CACjG,CAAC;IACF,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,KAAK,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,MAAM,CAAC,KAAa,EAAE,KAAsB;IACnD,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,KAAK,IAAI,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,KAAK,GAAG,KAAK,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;AACzE,CAAC;AAED,SAAS,OAAO,CAAC,GAAW;IAC1B,GAAG,CAAC,OAAO,CAAC,CAAC,WAAW,SAAS,CAAC,CAAC,KAAK,IAAI,GAAG,EAAE,CAAC,CAAC;AACrD,CAAC;AAED,SAAS,IAAI,CAAC,GAAW;IACvB,GAAG,CAAC,OAAO,CAAC,CAAC,GAAG,GAAG,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;AACtC,CAAC;AAED,SAAS,IAAI,CAAC,GAAW;IACvB,GAAG,CAAC,OAAO,CAAC,CAAC,YAAY,SAAS,CAAC,CAAC,KAAK,IAAI,GAAG,EAAE,CAAC,CAAC;AACtD,CAAC;AAED,SAAS,OAAO,CAAC,OAAe;IAC9B,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;AAC1D,CAAC;AAED,iFAAiF;AACjF,cAAc;AACd,iFAAiF;AAEjF,KAAK,UAAU,IAAI;IACjB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,yCAAyC;IACzC,MAAM,EAAE,GAAG,KAAK,EAAE,CAAC;IACnB,cAAc,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;IAEzB,2EAA2E;IAE3E,GAAG,CAAC,EAAE,CAAC,CAAC;IACR,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,iEAAiE,CAAC,CAAC,KAAK,EAAE,CACpG,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,MAAM,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,OAAO,gBAAgB,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,GAAG,wBAAwB,CAAC,CAAC,KAAK,sBAAsB,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,KAAK,EAAE,CACvL,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,MAAM,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,GAAG,+CAA+C,CAAC,CAAC,KAAK,aAAa,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,KAAK,EAAE,CACvJ,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,MAAM,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,GAAG,GAAG,eAAe,CAAC,MAAM,iDAAiD,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,KAAK,EAAE,CAC1K,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,iEAAiE,CAAC,CAAC,KAAK,EAAE,CACpG,CAAC;IACF,GAAG,CAAC,EAAE,CAAC,CAAC;IAER,6EAA6E;IAC7E,uBAAuB;IACvB,6EAA6E;IAE7E,UAAU,CAAC,CAAC,EAAE,cAAc,EAAE,MAAM,CAAC,CAAC;IACtC,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE9B,oEAAoE;IACpE,MAAM,cAAc,GAAG,CAAC,UAAU,CAAC,CAAC;IACpC,MAAM,YAAY,GAAG,MAAM,YAAY,CAAC,cAAc,CAAC,CAAC;IAExD,oEAAoE;IACpE,MAAM,cAAc,GAAG,+BAA+B,CACpD,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,EACvE;QACE,kBAAkB,EAAE,GAAG,EAAE,CACvB,IAAI,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC1C,gBAAgB,EAAE,GAAG,EAAE,CAAC,eAAe;KACxC,CACF,CAAC;IAEF,MAAM,eAAe,GAAG,CAAC,GAAG,YAAY,EAAE,GAAG,cAAc,CAAC,CAAC;IAE7D,MAAM,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IAC5B,MAAM,CAAC,cAAc,EAAE,eAAe,CAAC,MAAM,CAAC,CAAC;IAC/C,MAAM,CAAC,SAAS,EAAE,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IAE7C,oDAAoD;IACpD,MAAM,YAAY,GAAG,cAAc,CAAC,IAAI,CACtC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,gBAAgB,CACnC,CAAC;IACF,IAAI,CAAC,YAAY;QAAE,MAAM,IAAI,KAAK,CAAC,0BAA0B,CAAC,CAAC;IAE/D,MAAM,eAAe,GAAG,CAAC,MAAM,YAAY,CAAC,OAAO,CAAC;QAClD,KAAK,EAAE,kBAAkB;QACzB,KAAK,EAAE,CAAC;KACT,CAAC,CAED,CAAC;IAEF,IAAI,eAAe,CAAC,OAAO,IAAI,eAAe,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAClE,GAAG,CAAC,EAAE,CAAC,CAAC;QACR,IAAI,CAAC,6CAA6C,CAAC,CAAC;QACpD,KAAK,MAAM,CAAC,IAAI,eAAe,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;YACpD,MAAM,KAAK,GAAG,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC;YACpC,GAAG,CACD,SAAS,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,GAAG,SAAS,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,KAAK,EAAE,CACrG,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO,CAAC,kBAAkB,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;IAEjD,6EAA6E;IAC7E,gCAAgC;IAChC,6EAA6E;IAE7E,UAAU,CAAC,CAAC,EAAE,uBAAuB,EAAE,MAAM,CAAC,CAAC;IAC/C,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE9B,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,MAAM,CACjD,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC,EAC/C,CAAC,CACF,CAAC;IAEF,8BAA8B;IAC9B,MAAM,cAAc,GAAG,CAAC,SAAS,EAAE,UAAU,EAAE,iBAAiB,CAAC,CAAC;IAClE,MAAM,YAAY,GAAG,MAAM,YAAY,CAAC,cAAc,CAAC,CAAC;IAExD,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,MAAM,CAChD,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC,EAC/C,CAAC,CACF,CAAC;IAEF,MAAM,CAAC,8BAA8B,EAAE,GAAG,WAAW,QAAQ,CAAC,CAAC;IAC/D,MAAM,CAAC,6BAA6B,EAAE,GAAG,UAAU,YAAY,UAAU,GAAG,WAAW,GAAG,CAAC,CAAC;IAE5F,4CAA4C;IAC5C,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE,CAAC;IACzD,MAAM,iBAAiB,GAAG,+BAA+B,CACvD,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,EACzE;QACE,kBAAkB,EAAE,GAAG,EAAE,CACvB,IAAI,GAAG,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC5C,gBAAgB,EAAE,GAAG,EAAE,CAAC,eAAe;KACxC,CACF,CAAC;IAEF,MAAM,aAAa,GAAG,iBAAiB,CAAC,IAAI,CAC1C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,gBAAgB,CACnC,CAAC;IACF,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,YAAY,GAAG,CAAC,MAAM,aAAa,CAAC,OAAO,CAAC;YAChD,KAAK,EAAE,cAAc;YACrB,KAAK,EAAE,CAAC;SACT,CAAC,CAED,CAAC;QAEF,IAAI,YAAY,CAAC,OAAO,IAAI,YAAY,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5D,GAAG,CAAC,EAAE,CAAC,CAAC;YACR,IAAI,CAAC,uDAAuD,CAAC,CAAC;YAC9D,KAAK,MAAM,CAAC,IAAI,YAAY,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBACjD,MAAM,KAAK,GAAG,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC;gBACpC,GAAG,CACD,SAAS,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,GAAG,SAAS,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,KAAK,EAAE,CACrG,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,iCAAiC;IACjC,MAAM,SAAS,GAAG,MAAM,YAAY,CAAC,eAAe,CAAC,CAAC;IACtD,MAAM,cAAc,GAAG,SAAS,CAAC,MAAM,CAAC;IACxC,MAAM,gBAAgB,GAAG,eAAe,CAAC,MAAM,CAAC;IAEhD,GAAG,CAAC,EAAE,CAAC,CAAC;IACR,IAAI,CACF,kCAAkC,eAAe,CAAC,MAAM,oBAAoB,cAAc,YAAY,CACvG,CAAC;IAEF,OAAO,CAAC,2BAA2B,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;IAE1D,6EAA6E;IAC7E,4BAA4B;IAC5B,6EAA6E;IAE7E,UAAU,CAAC,CAAC,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;IAC5C,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE9B,gCAAgC;IAChC,MAAM,cAAc,GAAG,cAAc,CAAC,IAAI,CACxC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,0BAA0B,CAC7C,CAAC;IAEF,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,IAAI,CAAC,6DAA6D,CAAC,CAAC;QACpE,MAAM,YAAY,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;QAChC,WAAW;QACX,MAAM,UAAU,GAAG,WAAW,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QAChD,MAAM,SAAS,GAAG,UAAU,CAAC,IAAI,CAC/B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,0BAA0B,CAC7C,CAAC;QACF,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,IAAI,CAAC,uDAAuD,CAAC,CAAC;YAC9D,OAAO,CAAC,gCAAgC,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,cAAc,CAAC,IAAI,CACnC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,0BAA0B,CAC7C,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CACtC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,0BAA0B,CAC7C,CAAC;IAEF,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;QAC5C,IAAI,CACF,MAAM;YACJ,CAAC,CAAC,sDAAsD;YACxD,CAAC,CAAC,8CAA8C,CACnD,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,CAAC,MAAM,SAAS,CAAC,OAAO,CAAC;gBAC3C,KAAK,EACH,gEAAgE;gBAClE,UAAU,EAAE,kBAAkB;aAC/B,CAAC,CAA4B,CAAC;YAE/B,IAAI,WAAW,CAAC,KAAK,EAAE,CAAC;gBACtB,IAAI,CAAC,6BAA6B,WAAW,CAAC,OAAO,EAAE,CAAC,CAAC;YAC3D,CAAC;iBAAM,CAAC;gBACN,GAAG,CAAC,EAAE,CAAC,CAAC;gBACR,MAAM,OAAO,GACV,WAAW,CAAC,OAAkB;oBAC9B,WAAW,CAAC,cAAyB;oBACtC,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;gBAC5C,MAAM,CAAC,SAAS,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAE/E,MAAM,WAAW,GAAG,WAAW,CAAC,WAAmC,CAAC;gBACpE,IAAI,WAAW,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC1C,MAAM,CAAC,cAAc,EAAE,GAAG,WAAW,CAAC,MAAM,QAAQ,CAAC,CAAC;oBACtD,KAAK,MAAM,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;wBACxC,GAAG,CACD,SAAS,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,KAAK,EAAE,CAC7E,CAAC;oBACJ,CAAC;gBACH,CAAC;gBAED,MAAM,QAAQ,GAAG,WAAW,CAAC,QAAgC,CAAC;gBAC9D,IAAI,QAAQ,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACpC,MAAM,CAAC,mBAAmB,EAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;gBACnD,CAAC;gBAED,MAAM,MAAM,GAAG,WAAW,CAAC,MAA4B,CAAC;gBACxD,MAAM,CACJ,QAAQ,EACR,MAAM,KAAK,QAAQ;oBACjB,CAAC,CAAC,8BAA8B;oBAChC,CAAC,CAAC,MAAM,IAAI,oBAAoB,CACnC,CAAC;YACJ,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CAAC,oBAAqB,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;IAED,OAAO,CAAC,uBAAuB,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;IAEtD,6EAA6E;IAC7E,6BAA6B;IAC7B,6EAA6E;IAE7E,UAAU,CAAC,CAAC,EAAE,oBAAoB,EAAE,MAAM,CAAC,CAAC;IAC5C,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE9B,qDAAqD;IACrD,MAAM,YAAY,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC;IAEvC,MAAM,oBAAoB,GAAc,CAAC,WAAW,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IAEvE,wFAAwF;IACxF,IAAI,aAAa,GAAG,KAAK,CAAC;IAC1B,MAAM,eAAe,GAAG,WAAW,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;IACrD,MAAM,eAAe,GAAG,eAAe,CAAC,IAAI,CAC1C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,cAAc,CACjC,CAAC;IACF,IAAI,eAAe,EAAE,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,eAAe,CAAC,OAAO,CAAC;gBAC5B,SAAS,EAAE,uBAAuB;gBAClC,SAAS,EAAE,OAAO;gBAClB,UAAU,EAAE,SAAS;gBACrB,QAAQ,EAAE,WAAW;gBACrB,OAAO,EAAE,iEAAiE;aAC3E,CAAC,CAAC;YACH,aAAa,GAAG,IAAI,CAAC;YACrB,MAAM,CAAC,cAAc,EAAE,oDAAoD,CAAC,CAAC;QAC/E,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CAAC,uBAAwB,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;SAAM,CAAC;QACN,IAAI,CAAC,yCAAyC,CAAC,CAAC;IAClD,CAAC;IAED,mBAAmB;IACnB,IAAI,aAAa,GAAG,KAAK,CAAC;IAC1B,MAAM,eAAe,GAAG,oBAAoB,CAAC,IAAI,CAC/C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,cAAc,CACjC,CAAC;IACF,IAAI,eAAe,EAAE,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,YAAY,GAAG,CAAC,MAAM,eAAe,CAAC,OAAO,CAAC;gBAClD,MAAM,EAAE,uDAAuD;gBAC/D,MAAM,EAAE,QAAQ;gBAChB,OAAO,EAAE,yCAAyC;aACnD,CAAC,CAA4B,CAAC;YAC/B,aAAa,GAAG,IAAI,CAAC;YACrB,MAAM,CACJ,cAAc,EACd,wCAAwC,YAAY,CAAC,WAAW,IAAI,GAAG,GAAG,CAC3E,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CAAC,uBAAwB,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;SAAM,CAAC;QACN,IAAI,CAAC,yCAAyC,CAAC,CAAC;IAClD,CAAC;IAED,wBAAwB;IACxB,IAAI,iBAAiB,GAAG,KAAK,CAAC;IAC9B,MAAM,aAAa,GAAG,oBAAoB,CAAC,IAAI,CAC7C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,mBAAmB,CACtC,CAAC;IACF,IAAI,aAAa,EAAE,CAAC;QAClB,IAAI,CAAC;YACH,MAAM,aAAa,GAAG,CAAC,MAAM,aAAa,CAAC,OAAO,CAAC;gBACjD,SAAS,EAAE,GAAG;aACf,CAAC,CAA4B,CAAC;YAC/B,iBAAiB,GAAG,IAAI,CAAC;YACzB,MAAM,WAAW,GACd,aAAa,CAAC,cAAyB,IAAI,oBAAoB,CAAC;YACnE,MAAM,CACJ,mBAAmB,EACnB,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CACpE,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CAAC,4BAA6B,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;QAC7D,CAAC;IACH,CAAC;SAAM,CAAC;QACN,IAAI,CAAC,8CAA8C,CAAC,CAAC;IACvD,CAAC;IAED,8BAA8B;IAC9B,MAAM,YAAY,GAAG,oBAAoB,CAAC,IAAI,CAC5C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,yBAAyB,CAC5C,CAAC;IACF,IAAI,YAAY,EAAE,CAAC;QACjB,IAAI,CAAC;YACH,MAAM,cAAc,GAAG,CAAC,MAAM,YAAY,CAAC,OAAO,CAAC;gBACjD,SAAS,EAAE,GAAG;aACf,CAAC,CAAyD,CAAC;YAE5D,IAAI,cAAc,CAAC,eAAe,EAAE,CAAC;gBACnC,MAAM,OAAO,GAAG,cAAc,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;gBAC7D,GAAG,CAAC,EAAE,CAAC,CAAC;gBACR,IAAI,CAAC,gDAAgD,CAAC,CAAC;gBACvD,GAAG,CAAC,SAAS,CAAC,CAAC,KAAK,IAAI,OAAO,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;gBACjD,MAAM,CAAC,YAAY,EAAE,IAAI,cAAc,CAAC,aAAa,IAAI,GAAG,SAAS,CAAC,CAAC;YACzE,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,yBAAyB,EAAE,4CAA4C,CAAC,CAAC;YAClF,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CAAC,kCAAmC,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;QACnE,CAAC;IACH,CAAC;SAAM,CAAC;QACN,IAAI,CAAC,oDAAoD,CAAC,CAAC;IAC7D,CAAC;IAED,MAAM,SAAS,GAAG,CAAC,aAAa,EAAE,aAAa,EAAE,iBAAiB,CAAC,CAAC,MAAM,CACxE,OAAO,CACR,CAAC,MAAM,CAAC;IACT,OAAO,CAAC,wBAAwB,SAAS,YAAY,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;IAE5E,6EAA6E;IAC7E,8BAA8B;IAC9B,6EAA6E;IAE7E,UAAU,CAAC,CAAC,EAAE,qBAAqB,EAAE,MAAM,CAAC,CAAC;IAC7C,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE9B,IAAI,CAAC;QACH,MAAM,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;QACpE,MAAM,gBAAgB,GAAG,MAAM,kBAAkB,EAAE,CAAC;QAEpD,KAAK,MAAM,OAAO,IAAI,gBAAgB,CAAC,QAAQ,EAAE,CAAC;YAChD,MAAM,QAAQ,GACZ,OAAO,CAAC,MAAM;gBACZ,CAAC,CAAC,GAAG,CAAC,CAAC,WAAW,OAAO,CAAC,CAAC,KAAK,EAAE;gBAClC,CAAC,CAAC,GAAG,CAAC,CAAC,SAAS,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC;YACrC,GAAG,CACD,OAAO,QAAQ,KAAK,CAAC,CAAC,IAAI,GAAG,OAAO,CAAC,IAAI,GAAG,CAAC,CAAC,KAAK,IAAI;gBACrD,GAAG,CAAC,CAAC,GAAG,aAAa,CAAC,OAAO,CAAC,MAAM,CAAC,aAAa,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBACzE,YAAY,CAAC,OAAO,CAAC,MAAM,CAAC,kBAAkB,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBACrE,UAAU,CAAC,OAAO,CAAC,MAAM,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CACrE,CAAC;QACJ,CAAC;QAED,GAAG,CAAC,EAAE,CAAC,CAAC;QACR,MAAM,CACJ,mBAAmB,EACnB,GAAG,CAAC,gBAAgB,CAAC,eAAe,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAC1D,CAAC;QACF,IAAI,CACF,6EAA6E,CAC9E,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,CAAC,2BAA4B,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;QAC1D,IAAI,CACF,uEAAuE,CACxE,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,yBAAyB,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;IAExD,6EAA6E;IAC7E,gBAAgB;IAChB,6EAA6E;IAE7E,MAAM,SAAS,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,SAAS,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;IAE/C,GAAG,CAAC,EAAE,CAAC,CAAC;IACR,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,iEAAiE,CAAC,CAAC,KAAK,EAAE,CACpG,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,MAAM,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,OAAO,gBAAgB,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,mBAAmB,CAAC,CAAC,KAAK,6BAA6B,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,KAAK,EAAE,CACxL,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,MAAM,CAAC,CAAC,KAAK,6DAA6D,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,KAAK,EAAE,CACvI,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,MAAM,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,MAAM,SAAS,CAAC,CAAC,KAAK,QAAQ,MAAM,CAAC,cAAc,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW,gBAAgB,6BAA6B,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,KAAK,EAAE,CACvM,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,MAAM,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,MAAM,WAAW,CAAC,CAAC,KAAK,MAAM,MAAM,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,2CAA2C,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,KAAK,EAAE,CAClM,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,MAAM,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,MAAM,YAAY,CAAC,CAAC,KAAK,iDAAiD,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,KAAK,EAAE,CAC3J,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,MAAM,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,MAAM,OAAO,CAAC,CAAC,KAAK,UAAU,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,WAAW,cAAc,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,qBAAqB,CAAC,CAAC,KAAK,EAAE,iCAAiC,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,KAAK,EAAE,CAC5O,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,MAAM,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,MAAM,UAAU,CAAC,CAAC,KAAK,kDAAkD,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,KAAK,EAAE,CAC1J,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,MAAM,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,MAAM,UAAU,CAAC,CAAC,KAAK,mDAAmD,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,KAAK,EAAE,CAC3J,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,MAAM,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,MAAM,aAAa,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,WAAW,GAAG,SAAS,IAAI,CAAC,CAAC,KAAK,4CAA4C,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,KAAK,EAAE,CAC/L,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,MAAM,CAAC,CAAC,KAAK,6DAA6D,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,KAAK,EAAE,CACvI,CAAC;IACF,GAAG,CACD,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,WAAW,iEAAiE,CAAC,CAAC,KAAK,EAAE,CACpG,CAAC;IACF,GAAG,CAAC,EAAE,CAAC,CAAC;AACV,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,SAAS,UAAU,CAAC,CAAC,KAAK,IAAI,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;IAClE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IACzB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
|
@@ -84,6 +84,7 @@ export interface RegressionItem {
|
|
|
84
84
|
export declare function generateQueryCorpus(): EvalQuery[];
|
|
85
85
|
export declare function detectRegressions(currentRunId: string, baselineRunId: string): RegressionItem[];
|
|
86
86
|
export declare function detectImprovements(currentRunId: string, baselineRunId: string): RegressionItem[];
|
|
87
|
+
export type Surface = "mcp" | "app";
|
|
87
88
|
export interface RunOptions {
|
|
88
89
|
queryLimit: number;
|
|
89
90
|
persona?: Persona;
|
|
@@ -93,6 +94,8 @@ export interface RunOptions {
|
|
|
93
94
|
dryRun?: boolean;
|
|
94
95
|
/** If true, run self-improving flywheel loop: eval → diagnose → grow → re-eval */
|
|
95
96
|
flywheel?: boolean;
|
|
97
|
+
/** Which surface to test: "mcp" (tool handlers) or "app" (web /search endpoint). Default: "mcp" */
|
|
98
|
+
surface?: Surface;
|
|
96
99
|
}
|
|
97
100
|
export declare function runLlmJudgeEval(options: RunOptions): Promise<RunSummary>;
|
|
98
101
|
export type FailureRootCause = "tool_not_found" | "tool_error" | "empty_output" | "criteria_mismatch" | "heuristic_too_strict";
|