nodebench-mcp 2.40.0 → 2.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env npx tsx
2
+ /**
3
+ * dogfoodJudge.ts — Judge all 7 dogfood scenarios and compute global metrics
4
+ *
5
+ * For each scenario: start_dogfood_session -> judge_session -> end_dogfood_session
6
+ * Then: get_repeat_cognition_metrics + get_regression_gate for global summary.
7
+ *
8
+ * Usage:
9
+ * cd packages/mcp-local && npx tsx src/benchmarks/dogfoodJudge.ts
10
+ */
11
+ export {};
@@ -0,0 +1,288 @@
1
+ #!/usr/bin/env npx tsx
2
+ /**
3
+ * dogfoodJudge.ts — Judge all 7 dogfood scenarios and compute global metrics
4
+ *
5
+ * For each scenario: start_dogfood_session -> judge_session -> end_dogfood_session
6
+ * Then: get_repeat_cognition_metrics + get_regression_gate for global summary.
7
+ *
8
+ * Usage:
9
+ * cd packages/mcp-local && npx tsx src/benchmarks/dogfoodJudge.ts
10
+ */
11
+ import { dogfoodJudgeTools } from "../tools/dogfoodJudgeTools.js";
12
+ import { getDb } from "../db.js";
13
+ import { _setDbAccessor } from "../tools/toolRegistry.js";
14
+ // ── Wire up shared DB accessor ──────────────────────────────────────────
15
+ _setDbAccessor(getDb);
16
+ // ── Helpers ─────────────────────────────────────────────────────────────
17
+ function findTool(name) {
18
+ const t = dogfoodJudgeTools.find((t) => t.name === name);
19
+ if (!t)
20
+ throw new Error(`Tool "${name}" not found in dogfoodJudgeTools`);
21
+ return t;
22
+ }
23
+ async function callTool(tool, args = {}) {
24
+ const start = Date.now();
25
+ try {
26
+ const result = await tool.handler(args);
27
+ return { ok: true, result, ms: Date.now() - start };
28
+ }
29
+ catch (err) {
30
+ return { ok: false, result: null, error: err?.message ?? String(err), ms: Date.now() - start };
31
+ }
32
+ }
33
+ const scenarios = [
34
+ {
35
+ scenarioId: "mcp_setup_sanity",
36
+ loopType: "pre_delegation",
37
+ scores: {
38
+ truthQuality: 5,
39
+ compressionQuality: 5,
40
+ anticipationQuality: 4,
41
+ outputQuality: 5,
42
+ delegationQuality: 5,
43
+ trustQuality: 5,
44
+ },
45
+ failureClasses: [],
46
+ notes: "Setup works perfectly — tool discovery, preset loading, health checks all pass",
47
+ delegationSucceeded: true,
48
+ packetExported: true,
49
+ },
50
+ {
51
+ scenarioId: "founder_weekly_reset",
52
+ loopType: "weekly_reset",
53
+ scores: {
54
+ truthQuality: 4,
55
+ compressionQuality: 3,
56
+ anticipationQuality: 2,
57
+ outputQuality: 4,
58
+ delegationQuality: 3,
59
+ trustQuality: 3,
60
+ },
61
+ failureClasses: ["stale_entity", "missing_change"],
62
+ notes: "Packet generates but uses input text as-is, no real memory. Compression is low — restates rather than distills. Anticipation weak without live data.",
63
+ delegationSucceeded: false,
64
+ packetExported: true,
65
+ },
66
+ {
67
+ scenarioId: "banker_anthropic_search",
68
+ loopType: "company_search",
69
+ scores: {
70
+ truthQuality: 3,
71
+ compressionQuality: 2,
72
+ anticipationQuality: 1,
73
+ outputQuality: 3,
74
+ delegationQuality: 2,
75
+ trustQuality: 2,
76
+ },
77
+ failureClasses: ["missing_change", "wrong_priority", "stale_entity"],
78
+ notes: "No live web data — recon is placeholder. Cannot anticipate without real signals. Trust is low because evidence chain is synthetic.",
79
+ delegationSucceeded: false,
80
+ packetExported: false,
81
+ },
82
+ {
83
+ scenarioId: "public_doc_drift",
84
+ loopType: "pre_delegation",
85
+ scores: {
86
+ truthQuality: 4,
87
+ compressionQuality: 4,
88
+ anticipationQuality: 3,
89
+ outputQuality: 4,
90
+ delegationQuality: 4,
91
+ trustQuality: 4,
92
+ },
93
+ failureClasses: [],
94
+ notes: "Correctly identifies drift between versions, produces actionable memo. Good compression of diff into narrative.",
95
+ delegationSucceeded: true,
96
+ packetExported: true,
97
+ },
98
+ {
99
+ scenarioId: "operator_causal_replay",
100
+ loopType: "pre_delegation",
101
+ scores: {
102
+ truthQuality: 5,
103
+ compressionQuality: 4,
104
+ anticipationQuality: 3,
105
+ outputQuality: 4,
106
+ delegationQuality: 4,
107
+ trustQuality: 5,
108
+ },
109
+ failureClasses: [],
110
+ notes: "Causal memory write/read works perfectly. Event ledger and trajectory summary provide strong evidence chain. Trust is high due to causal traceability.",
111
+ delegationSucceeded: true,
112
+ packetExported: true,
113
+ },
114
+ {
115
+ scenarioId: "researcher_supermemory",
116
+ loopType: "company_search",
117
+ scores: {
118
+ truthQuality: 3,
119
+ compressionQuality: 2,
120
+ anticipationQuality: 1,
121
+ outputQuality: 3,
122
+ delegationQuality: 2,
123
+ trustQuality: 2,
124
+ },
125
+ failureClasses: ["missing_change", "stale_entity"],
126
+ notes: "No live web enrichment — countermodels are synthetic. Learning record works but inputs are canned. Cannot anticipate without real research feeds.",
127
+ delegationSucceeded: false,
128
+ packetExported: false,
129
+ },
130
+ {
131
+ scenarioId: "engine_api_trace",
132
+ loopType: "pre_delegation",
133
+ scores: {
134
+ truthQuality: 5,
135
+ compressionQuality: 5,
136
+ anticipationQuality: 4,
137
+ outputQuality: 5,
138
+ delegationQuality: 5,
139
+ trustQuality: 5,
140
+ },
141
+ failureClasses: [],
142
+ notes: "Engine surface verified, milestone recorded, flywheel status confirmed. Full tool chain works end-to-end.",
143
+ delegationSucceeded: true,
144
+ packetExported: true,
145
+ },
146
+ ];
147
+ // ── Main ────────────────────────────────────────────────────────────────
148
+ async function main() {
149
+ console.log("=== DOGFOOD JUDGE: Scoring 7 scenarios ===\n");
150
+ const sessionIds = [];
151
+ let totalScore = 0;
152
+ let totalDimensions = 0;
153
+ let passCount = 0;
154
+ for (const s of scenarios) {
155
+ const tag = `[${s.scenarioId}]`;
156
+ // 1. Start session
157
+ const startRes = await callTool(findTool("start_dogfood_session"), {
158
+ loopType: s.loopType,
159
+ });
160
+ if (!startRes.ok) {
161
+ console.error(`${tag} start_dogfood_session FAILED: ${startRes.error}`);
162
+ continue;
163
+ }
164
+ const sessionId = startRes.result?.sessionId;
165
+ if (!sessionId) {
166
+ console.error(`${tag} No sessionId returned`);
167
+ continue;
168
+ }
169
+ sessionIds.push(sessionId);
170
+ // 2. Judge session
171
+ const judgeRes = await callTool(findTool("judge_session"), {
172
+ sessionId,
173
+ ...s.scores,
174
+ notes: s.notes,
175
+ failureClasses: s.failureClasses,
176
+ });
177
+ if (!judgeRes.ok) {
178
+ console.error(`${tag} judge_session FAILED: ${judgeRes.error}`);
179
+ }
180
+ // 3. End session
181
+ const endRes = await callTool(findTool("end_dogfood_session"), {
182
+ sessionId,
183
+ notes: s.notes,
184
+ delegationSucceeded: s.delegationSucceeded,
185
+ packetExported: s.packetExported,
186
+ });
187
+ if (!endRes.ok) {
188
+ console.error(`${tag} end_dogfood_session FAILED: ${endRes.error}`);
189
+ }
190
+ // Tally
191
+ const dims = Object.values(s.scores);
192
+ const avg = dims.reduce((a, b) => a + b, 0) / dims.length;
193
+ totalScore += dims.reduce((a, b) => a + b, 0);
194
+ totalDimensions += dims.length;
195
+ const passed = avg >= 2.5;
196
+ if (passed)
197
+ passCount++;
198
+ console.log(`${tag} avg=${avg.toFixed(1)}/5 ${passed ? "PASS" : "FAIL"} (${judgeRes.ms}ms)`);
199
+ }
200
+ const overallAvg = totalScore / totalDimensions;
201
+ // ── Global metrics ──────────────────────────────────────────────────
202
+ console.log("\n--- Global Metrics ---\n");
203
+ // Repeat cognition metrics
204
+ const cognRes = await callTool(findTool("get_repeat_cognition_metrics"));
205
+ let cognitionMetrics = {};
206
+ if (cognRes.ok) {
207
+ cognitionMetrics = cognRes.result;
208
+ }
209
+ else {
210
+ console.error(`get_repeat_cognition_metrics FAILED: ${cognRes.error}`);
211
+ }
212
+ // Regression gate
213
+ const gateRes = await callTool(findTool("get_regression_gate"));
214
+ let regressionGate = {};
215
+ if (gateRes.ok) {
216
+ regressionGate = gateRes.result;
217
+ }
218
+ else {
219
+ console.error(`get_regression_gate FAILED: ${gateRes.error}`);
220
+ }
221
+ // ── Identify weakest dimension across all scenarios ─────────────────
222
+ const dimSums = {};
223
+ for (const s of scenarios) {
224
+ for (const [dim, val] of Object.entries(s.scores)) {
225
+ if (!dimSums[dim])
226
+ dimSums[dim] = { total: 0, count: 0 };
227
+ dimSums[dim].total += val;
228
+ dimSums[dim].count++;
229
+ }
230
+ }
231
+ const dimAvgs = Object.entries(dimSums)
232
+ .map(([dim, { total, count }]) => ({ dim, avg: total / count }))
233
+ .sort((a, b) => a.avg - b.avg);
234
+ const weakest = dimAvgs[0];
235
+ // ── Identify weakest scenario ───────────────────────────────────────
236
+ const scenarioAvgs = scenarios.map((s) => {
237
+ const vals = Object.values(s.scores);
238
+ return { id: s.scenarioId, avg: vals.reduce((a, b) => a + b, 0) / vals.length };
239
+ }).sort((a, b) => a.avg - b.avg);
240
+ const weakestScenario = scenarioAvgs[0];
241
+ // ── Recommendation ──────────────────────────────────────────────────
242
+ let nextFix;
243
+ if (weakest.avg < 2.5) {
244
+ nextFix = `Improve ${weakest.dim} (avg ${weakest.avg.toFixed(1)}/5) — weakest across all scenarios. Primary blocker: ${weakestScenario.id}`;
245
+ }
246
+ else if (weakestScenario.avg < 3.0) {
247
+ nextFix = `Fix ${weakestScenario.id} (avg ${weakestScenario.avg.toFixed(1)}/5) — needs live web data integration to move past placeholder outputs`;
248
+ }
249
+ else {
250
+ nextFix = `All scenarios above 3.0. Focus on live data integration for company_search and researcher loops to push from B to A grade.`;
251
+ }
252
+ // ── Final summary ───────────────────────────────────────────────────
253
+ console.log(`
254
+ === DOGFOOD CYCLE 1 COMPLETE ===
255
+ Scenarios: ${passCount}/${scenarios.length} passed
256
+ Average judge score: ${overallAvg.toFixed(1)}/5
257
+ Regression gate: ${regressionGate.passed ? "PASS" : "FAIL"}
258
+ - Founder weekly reset: ${regressionGate.weeklyResetScore ?? "N/A"}/5
259
+ - Pre-delegation brief: ${regressionGate.preDelegationScore ?? "N/A"}/5
260
+ - Company search: ${regressionGate.companySearchScore ?? "N/A"}/5
261
+ Repeat cognition metrics:
262
+ - compoundScore: ${cognitionMetrics.compoundScore ?? "N/A"}
263
+ - repeatQuestionRate: ${cognitionMetrics.repeatQuestionRate ?? "N/A"}
264
+ - packetAbandonmentRate: ${cognitionMetrics.packetAbandonmentRate ?? "N/A"}
265
+ Top failure class: ${weakest.dim} (avg ${weakest.avg.toFixed(1)}/5)
266
+ Next priority fix: ${nextFix}
267
+ `);
268
+ // ── Per-dimension breakdown ─────────────────────────────────────────
269
+ console.log("--- Dimension Averages ---");
270
+ for (const d of dimAvgs) {
271
+ const bar = "\u2588".repeat(Math.round(d.avg));
272
+ const empty = "\u2591".repeat(5 - Math.round(d.avg));
273
+ console.log(` ${d.dim.padEnd(22)} ${d.avg.toFixed(1)}/5 ${bar}${empty}`);
274
+ }
275
+ // ── Per-scenario breakdown ──────────────────────────────────────────
276
+ console.log("\n--- Scenario Averages ---");
277
+ for (const s of scenarioAvgs) {
278
+ const bar = "\u2588".repeat(Math.round(s.avg));
279
+ const empty = "\u2591".repeat(5 - Math.round(s.avg));
280
+ console.log(` ${s.id.padEnd(28)} ${s.avg.toFixed(1)}/5 ${bar}${empty}`);
281
+ }
282
+ console.log("\nDone.");
283
+ }
284
+ main().catch((err) => {
285
+ console.error("Fatal:", err);
286
+ process.exit(1);
287
+ });
288
+ //# sourceMappingURL=dogfoodJudge.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dogfoodJudge.js","sourceRoot":"","sources":["../../src/benchmarks/dogfoodJudge.ts"],"names":[],"mappings":";AACA;;;;;;;;GAQG;AAGH,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,KAAK,EAAE,MAAM,UAAU,CAAC;AACjC,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAE1D,2EAA2E;AAC3E,cAAc,CAAC,KAAK,CAAC,CAAC;AAEtB,2EAA2E;AAE3E,SAAS,QAAQ,CAAC,IAAY;IAC5B,MAAM,CAAC,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;IACzD,IAAI,CAAC,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,SAAS,IAAI,kCAAkC,CAAC,CAAC;IACzE,OAAO,CAAC,CAAC;AACX,CAAC;AAED,KAAK,UAAU,QAAQ,CACrB,IAAa,EACb,OAAgC,EAAE;IAElC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACxC,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,EAAE,CAAC;IACtD,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,OAAO,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,EAAE,CAAC;IACjG,CAAC;AACH,CAAC;AAsBD,MAAM,SAAS,GAAoB;IACjC;QACE,UAAU,EAAE,kBAAkB;QAC9B,QAAQ,EAAE,gBAAgB;QAC1B,MAAM,EAAE;YACN,YAAY,EAAE,CAAC;YACf,kBAAkB,EAAE,CAAC;YACrB,mBAAmB,EAAE,CAAC;YACtB,aAAa,EAAE,CAAC;YAChB,iBAAiB,EAAE,CAAC;YACpB,YAAY,EAAE,CAAC;SAChB;QACD,cAAc,EAAE,EAAE;QAClB,KAAK,EAAE,gFAAgF;QACvF,mBAAmB,EAAE,IAAI;QACzB,cAAc,EAAE,IAAI;KACrB;IACD;QACE,UAAU,EAAE,sBAAsB;QAClC,QAAQ,EAAE,cAAc;QACxB,MAAM,EAAE;YACN,YAAY,EAAE,CAAC;YACf,kBAAkB,EAAE,CAAC;YACrB,mBAAmB,EAAE,CAAC;YACtB,aAAa,EAAE,CAAC;YAChB,iBAAiB,EAAE,CAAC;YACpB,YAAY,EAAE,CAAC;SAChB;QACD,cAAc,EAAE,CAAC,cAAc,EAAE,gBAAgB,CAAC;QAClD,KAAK,EACH,sJAAsJ;QACxJ,mBAAmB,EAAE,KAAK;QAC1B,cAAc,EAAE,IAAI;KACrB;IACD;QACE,UAAU,EAAE,yBAAyB;QACrC,QAAQ,EAAE,gBAAgB;QAC1B,MAAM,EAAE;YACN,YAAY,EAAE,CAAC;YACf,kBAAkB,EAAE,CAAC;YACrB,mBAAmB,EAAE,CAAC;YACtB,aAAa,EAAE,CAAC;YAChB,iBAAiB,EAAE,CAAC;YACpB,YAAY,EAAE,CAAC;SAChB;QACD,cAAc,EAAE,CAAC,gBAAgB,EAAE,gBAAgB,EAAE,cAAc,CAAC;QACpE,KAAK,EACH,oIAAoI;QACtI,mBAAmB,EAAE,KAAK;QAC1B,cAAc,EAAE,KAAK;KACtB;IACD;QACE,UAAU,EAAE,kBAAkB;QAC9B,QAAQ,EAAE,gBAAgB;QAC1B,MAAM,EAAE;YACN,YAAY,EAAE,CAAC;YACf,kBAAkB,EAAE,CAAC;YACrB,mBAAmB,EAAE,CAAC;YACtB,aAAa,EAAE,CAAC;YAChB,iBAAiB,EAAE,CAAC;YACpB,YAAY,EAAE,CAAC;SAChB;QACD,cAAc,EAAE,EAAE;QAClB,KAAK,EACH,iHAAiH;QACnH,mBAAmB,EAAE,IAAI;QACzB,cAAc,EAAE,IAAI;KACrB;IACD;QACE,UAAU,EAAE,wBAAwB;QACpC,QAAQ,EAAE,gBAAgB;QAC1B,MAAM,EAAE;YACN,YAAY,EAAE,CAAC;YACf,kBAAkB,EAAE,CAAC;YACrB,mBAAmB,EAAE,CAAC;YACtB,aAAa,EAAE,CAAC;YAChB,iBAAiB,EAAE,CAAC;YACpB,YAAY,EAAE,CAAC;SAChB;QACD,cAAc,EAAE,EAAE;QAClB,KAAK,EACH,wJAAwJ;QAC1J,mBAAmB,EAAE,IAAI;QACzB,cAAc,EAAE,IAAI;KACrB;IACD;QACE,UAAU,EAAE,wBAAwB;QACpC,QAAQ,EAAE,gBAAgB;QAC1B,MAAM,EAAE;YACN,YAAY,EAAE,CAAC;YACf,kBAAkB,EAAE,CAAC;YACrB,mBAAmB,EAAE,CAAC;YACtB,aAAa,EAAE,CAAC;YAChB,iBAAiB,EAAE,CAAC;YACpB,YAAY,EAAE,CAAC;SAChB;QACD,cAAc,EAAE,CAAC,gBAAgB,EAAE,cAAc,CAAC;QAClD,KAAK,EACH,mJAAmJ;QACrJ,mBAAmB,EAAE,KAAK;QAC1B,cAAc,EAAE,KAAK;KACtB;IACD;QACE,UAAU,EAAE,kBAAkB;QAC9B,QAAQ,EAAE,gBAAgB;QAC1B,MAAM,EAAE;YACN,YAAY,EAAE,CAAC;YACf,kBAAkB,EAAE,CAAC;YACrB,mBAAmB,EAAE,CAAC;YACtB,aAAa,EAAE,CAAC;YAChB,iBAAiB,EAAE,CAAC;YACpB,YAAY,EAAE,CAAC;SAChB;QACD,cAAc,EAAE,EAAE;QAClB,KAAK,EACH,2GAA2G;QAC7G,mBAAmB,EAAE,IAAI;QACzB,cAAc,EAAE,IAAI;KACrB;CACF,CAAC;AAEF,2EAA2E;AAE3E,KAAK,UAAU,IAAI;IACjB,OAAO,CAAC,GAAG,CAAC,8CAA8C,CAAC,CAAC;IAE5D,MAAM,UAAU,GAAa,EAAE,CAAC;IAChC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,UAAU,GAAG,CAAC;QAEhC,mBAAmB;QACnB,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,uBAAuB,CAAC,EAAE;YACjE,QAAQ,EAAE,CAAC,CAAC,QAAQ;SACrB,CAAC,CAAC;QACH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,CAAC,KAAK,CAAC,GAAG,GAAG,kCAAkC,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;YACxE,SAAS;QACX,CAAC;QACD,MAAM,SAAS,GAAW,QAAQ,CAAC,MAAM,EAAE,SAAS,CAAC;QACrD,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,GAAG,GAAG,wBAAwB,CAAC,CAAC;YAC9C,SAAS;QACX,CAAC;QACD,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAE3B,mBAAmB;QACnB,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE;YACzD,SAAS;YACT,GAAG,CAAC,CAAC,MAAM;YACX,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,cAAc,EAAE,CAAC,CAAC,cAAc;SACjC,CAAC,CAAC;QACH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,CAAC,KAAK,CAAC,GAAG,GAAG,0BAA0B,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;QAClE,CAAC;QAED,iBAAiB;QACjB,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE;YAC7D,SAAS;YACT,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,mBAAmB,EAAE,CAAC,CAAC,mBAAmB;YAC1C,cAAc,EAAE,CAAC,CAAC,cAAc;SACjC,CAAC,CAAC;QACH,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,GAAG,GAAG,gCAAgC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;QACtE,CAAC;QAED,QAAQ;QACR,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC;QAC1D,UAAU,IAAI,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QAC9C,eAAe,IAAI,IAAI,CAAC,MAAM,CAAC;QAC/B,MAAM,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC;QAC1B,IAAI,MAAM;YAAE,SAAS,EAAE,CAAC;QAExB,OAAO,CAAC,GAAG,CACT,GAAG,GAAG,QAAQ,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,MAAM,QAAQ,CAAC,EAAE,KAAK,CAClF,CAAC;IACJ,CAAC;IAED,MAAM,UAAU,GAAG,UAAU,GAAG,eAAe,CAAC;IAEhD,uEAAuE;IACvE,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAE1C,2BAA2B;IAC3B,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,8BAA8B,CAAC,CAAC,CAAC;IACzE,IAAI,gBAAgB,GAAQ,EAAE,CAAC;IAC/B,IAAI,OAAO,CAAC,EAAE,EAAE,CAAC;QACf,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC;IACpC,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,KAAK,CAAC,wCAAwC,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,kBAAkB;IAClB,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,qBAAqB,CAAC,CAAC,CAAC;IAChE,IAAI,cAAc,GAAQ,EAAE,CAAC;IAC7B,IAAI,OAAO,CAAC,EAAE,EAAE,CAAC;QACf,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC;IAClC,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,KAAK,CAAC,+BAA+B,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,uEAAuE;IACvE,MAAM,OAAO,GAAqD,EAAE,CAAC;IACrE,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;YAClD,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC;gBAAE,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;YACzD,OAAO,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,GAAG,CAAC;YAC1B,OAAO,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC;QACvB,CAAC;IACH,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC;SACpC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,GAAG,EAAE,KAAK,GAAG,KAAK,EAAE,CAAC,CAAC;SAC/D,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IACjC,MAAM,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IAE3B,uEAAuE;IACvE,MAAM,YAAY,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACvC,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QACrC,OAAO,EAAE,EAAE,EAAE,CAAC,CAAC,UAAU,EAAE,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;IAClF,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IACjC,MAAM,eAAe,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;IAExC,uEAAuE;IACvE,IAAI,OAAe,CAAC;IACpB,IAAI,OAAO,CAAC,GAAG,GAAG,GAAG,EAAE,CAAC;QACtB,OAAO,GAAG,WAAW,OAAO,CAAC,GAAG,SAAS,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,wDAAwD,eAAe,CAAC,EAAE,EAAE,CAAC;IAC9I,CAAC;SAAM,IAAI,eAAe,CAAC,GAAG,GAAG,GAAG,EAAE,CAAC;QACrC,OAAO,GAAG,OAAO,eAAe,CAAC,EAAE,SAAS,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,wEAAwE,CAAC;IACrJ,CAAC;SAAM,CAAC;QACN,OAAO,GAAG,4HAA4H,CAAC;IACzI,CAAC;IAED,uEAAuE;IACvE,OAAO,CAAC,GAAG,CAAC;;aAED,SAAS,IAAI,SAAS,CAAC,MAAM;uBACnB,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC;mBACzB,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;4BAC9B,cAAc,CAAC,gBAAgB,IAAI,KAAK;4BACxC,cAAc,CAAC,kBAAkB,IAAI,KAAK;sBAChD,cAAc,CAAC,kBAAkB,IAAI,KAAK;;qBAE3C,gBAAgB,CAAC,aAAa,IAAI,KAAK;0BAClC,gBAAgB,CAAC,kBAAkB,IAAI,KAAK;6BACzC,gBAAgB,CAAC,qBAAqB,IAAI,KAAK;qBACvD,OAAO,CAAC,GAAG,SAAS,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;qBAC1C,OAAO;CAC3B,CAAC,CAAC;IAED,uEAAuE;IACvE,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAC1C,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,GAAG,GAAG,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACrD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,GAAG,GAAG,KAAK,EAAE,CAAC,CAAC;IAC7E,CAAC;IAED,uEAAuE;IACvE,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;IAC3C,KAAK,MAAM,CAAC,IAAI,YAAY,EAAE,CAAC;QAC7B,MAAM,GAAG,GAAG,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACrD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,GAAG,GAAG,KAAK,EAAE,CAAC,CAAC;IAC5E,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;AACzB,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAC7B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env npx tsx
2
2
  /**
3
- * dogfoodRunner.ts — 4-scenario MCP dogfood harness
3
+ * dogfoodRunner.ts — 7-scenario MCP dogfood harness
4
4
  *
5
5
  * Imports tool handlers directly (no MCP transport), runs each scenario
6
6
  * sequentially, records telemetry via record_dogfood_telemetry, and
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env npx tsx
2
2
  /**
3
- * dogfoodRunner.ts — 4-scenario MCP dogfood harness
3
+ * dogfoodRunner.ts — 7-scenario MCP dogfood harness
4
4
  *
5
5
  * Imports tool handlers directly (no MCP transport), runs each scenario
6
6
  * sequentially, records telemetry via record_dogfood_telemetry, and
@@ -385,6 +385,294 @@ async function main() {
385
385
  console.log(` Result: ${pass ? "PASS" : "FAIL"} (${totalMs}ms, ${toolCalls} calls, ${errors.length} errors)\n`);
386
386
  }
387
387
  // ════════════════════════════════════════════════════════════════════
388
+ // Scenario 5: Operator preset causal-memory replay
389
+ // ════════════════════════════════════════════════════════════════════
390
+ {
391
+ console.log("── Scenario 5: Operator preset causal-memory replay ──");
392
+ const scenarioStart = Date.now();
393
+ const errors = [];
394
+ let toolCalls = 0;
395
+ // 5a. record_event
396
+ const recordEvent = await callTool(findTool(allTools, "record_event"), {
397
+ eventType: "product.phase.completed",
398
+ actorType: "user",
399
+ entityId: "nodebench",
400
+ entityType: "company",
401
+ summary: "Phase 14 tool decoupling shipped",
402
+ });
403
+ toolCalls++;
404
+ if (!recordEvent.ok)
405
+ errors.push(`record_event: ${recordEvent.error}`);
406
+ else
407
+ console.log(` record_event: OK (${recordEvent.ms}ms)`);
408
+ // 5b. record_path_step
409
+ const recordPath = await callTool(findTool(allTools, "record_path_step"), {
410
+ sessionId: "dogfood-run-1",
411
+ surfaceType: "view",
412
+ surfaceRef: "/causal-memory",
413
+ surfaceLabel: "CausalMemory",
414
+ });
415
+ toolCalls++;
416
+ if (!recordPath.ok)
417
+ errors.push(`record_path_step: ${recordPath.error}`);
418
+ else
419
+ console.log(` record_path_step: OK (${recordPath.ms}ms)`);
420
+ // 5c. record_state_diff
421
+ const recordDiff = await callTool(findTool(allTools, "record_state_diff"), {
422
+ entityId: "nodebench",
423
+ entityType: "company",
424
+ changeType: "structural",
425
+ changedFields: ["toolCount", "presetStructure"],
426
+ beforeState: { toolCount: 338, presetStructure: "flat" },
427
+ afterState: { toolCount: 340, presetStructure: "hierarchical" },
428
+ reason: "Phase 14 refactor",
429
+ });
430
+ toolCalls++;
431
+ if (!recordDiff.ok)
432
+ errors.push(`record_state_diff: ${recordDiff.error}`);
433
+ else
434
+ console.log(` record_state_diff: OK (${recordDiff.ms}ms)`);
435
+ // 5d. get_event_ledger
436
+ const ledger = await callTool(findTool(allTools, "get_event_ledger"), {
437
+ limit: 5,
438
+ });
439
+ toolCalls++;
440
+ if (!ledger.ok)
441
+ errors.push(`get_event_ledger: ${ledger.error}`);
442
+ else
443
+ console.log(` get_event_ledger: OK (${ledger.ms}ms)`);
444
+ // 5e. get_trajectory_summary
445
+ const trajectory = await callTool(findTool(allTools, "get_trajectory_summary"), {});
446
+ toolCalls++;
447
+ if (!trajectory.ok)
448
+ errors.push(`get_trajectory_summary: ${trajectory.error}`);
449
+ else
450
+ console.log(` get_trajectory_summary: OK (${trajectory.ms}ms)`);
451
+ // 5f. flag_important_change
452
+ const flagChange = await callTool(findTool(allTools, "flag_important_change"), {
453
+ changeCategory: "architecture",
454
+ impactScore: 8,
455
+ impactReason: "Tool loading changed from static to dynamic imports with preset hierarchy",
456
+ affectedEntities: "nodebench-mcp",
457
+ });
458
+ toolCalls++;
459
+ if (!flagChange.ok)
460
+ errors.push(`flag_important_change: ${flagChange.error}`);
461
+ else
462
+ console.log(` flag_important_change: OK (${flagChange.ms}ms)`);
463
+ // 5g. Record telemetry
464
+ const telemetry5 = await callTool(findTool(allTools, "record_dogfood_telemetry"), {
465
+ scenarioId: "operator_causal_replay",
466
+ userRole: "operator",
467
+ primaryPrompt: "Operator causal-memory replay: record event, path step, state diff -> query event ledger + trajectory summary -> flag important change",
468
+ surface: "mcp",
469
+ toolsInvoked: [
470
+ "record_event", "record_path_step", "record_state_diff",
471
+ "get_event_ledger", "get_trajectory_summary", "flag_important_change",
472
+ ],
473
+ toolCallCount: toolCalls,
474
+ latencyMs: Date.now() - scenarioStart,
475
+ });
476
+ toolCalls++;
477
+ if (!telemetry5.ok)
478
+ errors.push(`record_dogfood_telemetry: ${telemetry5.error}`);
479
+ const totalMs5 = Date.now() - scenarioStart;
480
+ const pass5 = errors.length === 0;
481
+ results.push({ scenarioId: "operator_causal_replay", userRole: "operator", surface: "mcp", toolCalls, totalMs: totalMs5, errors, pass: pass5 });
482
+ console.log(` Result: ${pass5 ? "PASS" : "FAIL"} (${totalMs5}ms, ${toolCalls} calls, ${errors.length} errors)\n`);
483
+ }
484
+ // ════════════════════════════════════════════════════════════════════
485
+ // Scenario 6: Researcher preset competitor brief (Supermemory)
486
+ // ════════════════════════════════════════════════════════════════════
487
+ {
488
+ console.log("── Scenario 6: Researcher preset competitor brief (Supermemory) ──");
489
+ const scenarioStart = Date.now();
490
+ const errors = [];
491
+ let toolCalls = 0;
492
+ // 6a. run_recon
493
+ const recon = await callTool(findTool(allTools, "run_recon"), {
494
+ target: "Supermemory competitor analysis for NodeBench",
495
+ scope: "market",
496
+ });
497
+ toolCalls++;
498
+ if (!recon.ok)
499
+ errors.push(`run_recon: ${recon.error}`);
500
+ else
501
+ console.log(` run_recon: OK (${recon.ms}ms)`);
502
+ // 6b. extract_variables
503
+ const extractVars = await callTool(findTool(allTools, "extract_variables"), {
504
+ context: extractText(recon.result),
505
+ });
506
+ toolCalls++;
507
+ if (!extractVars.ok)
508
+ errors.push(`extract_variables: ${extractVars.error}`);
509
+ else
510
+ console.log(` extract_variables: OK (${extractVars.ms}ms)`);
511
+ // 6c. build_claim_graph
512
+ const claimGraph = await callTool(findTool(allTools, "build_claim_graph"), {
513
+ variables: extractText(extractVars.result),
514
+ });
515
+ toolCalls++;
516
+ if (!claimGraph.ok)
517
+ errors.push(`build_claim_graph: ${claimGraph.error}`);
518
+ else
519
+ console.log(` build_claim_graph: OK (${claimGraph.ms}ms)`);
520
+ // 6d. generate_countermodels
521
+ const countermodels = await callTool(findTool(allTools, "generate_countermodels"), {
522
+ claimGraph: extractText(claimGraph.result),
523
+ });
524
+ toolCalls++;
525
+ if (!countermodels.ok)
526
+ errors.push(`generate_countermodels: ${countermodels.error}`);
527
+ else
528
+ console.log(` generate_countermodels: OK (${countermodels.ms}ms)`);
529
+ // 6e. rank_interventions
530
+ const rankInt = await callTool(findTool(allTools, "rank_interventions"), {
531
+ claimGraph: extractText(countermodels.result),
532
+ });
533
+ toolCalls++;
534
+ if (!rankInt.ok)
535
+ errors.push(`rank_interventions: ${rankInt.error}`);
536
+ else
537
+ console.log(` rank_interventions: OK (${rankInt.ms}ms)`);
538
+ // 6f. render_decision_memo
539
+ const memo = await callTool(findTool(allTools, "render_decision_memo"), {
540
+ interventions: extractText(rankInt.result),
541
+ context: extractText(recon.result),
542
+ });
543
+ toolCalls++;
544
+ if (!memo.ok)
545
+ errors.push(`render_decision_memo: ${memo.error}`);
546
+ else
547
+ console.log(` render_decision_memo: OK (${memo.ms}ms)`);
548
+ // 6g. record_learning
549
+ const learning = await callTool(findTool(allTools, "record_learning"), {
550
+ key: "dogfood-supermemory-positioning",
551
+ content: "Supermemory owns universal memory infra. NodeBench should sit above as operating memory + packets + artifacts.",
552
+ category: "pattern",
553
+ tags: ["competitor", "strategy", "supermemory"],
554
+ });
555
+ toolCalls++;
556
+ if (!learning.ok)
557
+ errors.push(`record_learning: ${learning.error}`);
558
+ else
559
+ console.log(` record_learning: OK (${learning.ms}ms)`);
560
+ // 6h. Record telemetry
561
+ const telemetry6 = await callTool(findTool(allTools, "record_dogfood_telemetry"), {
562
+ scenarioId: "researcher_supermemory",
563
+ userRole: "researcher",
564
+ primaryPrompt: "Researcher competitor brief: recon Supermemory -> extract variables -> claim graph -> countermodels -> rank interventions -> decision memo -> record learning",
565
+ surface: "mcp",
566
+ toolsInvoked: [
567
+ "run_recon", "extract_variables", "build_claim_graph",
568
+ "generate_countermodels", "rank_interventions", "render_decision_memo",
569
+ "record_learning",
570
+ ],
571
+ toolCallCount: toolCalls,
572
+ latencyMs: Date.now() - scenarioStart,
573
+ });
574
+ toolCalls++;
575
+ if (!telemetry6.ok)
576
+ errors.push(`record_dogfood_telemetry: ${telemetry6.error}`);
577
+ const totalMs6 = Date.now() - scenarioStart;
578
+ const pass6 = errors.length === 0;
579
+ results.push({ scenarioId: "researcher_supermemory", userRole: "researcher", surface: "mcp", toolCalls, totalMs: totalMs6, errors, pass: pass6 });
580
+ console.log(` Result: ${pass6 ? "PASS" : "FAIL"} (${totalMs6}ms, ${toolCalls} calls, ${errors.length} errors)\n`);
581
+ }
582
+ // ════════════════════════════════════════════════════════════════════
583
+ // Scenario 7: Engine API trace run
584
+ // ════════════════════════════════════════════════════════════════════
585
+ {
586
+ console.log("── Scenario 7: Engine API trace run ──");
587
+ const scenarioStart = Date.now();
588
+ const errors = [];
589
+ let toolCalls = 0;
590
+ // 7a. check_mcp_setup
591
+ const setup = await callTool(findTool(allTools, "check_mcp_setup"), {});
592
+ toolCalls++;
593
+ if (!setup.ok)
594
+ errors.push(`check_mcp_setup: ${setup.error}`);
595
+ else
596
+ console.log(` check_mcp_setup: OK (${setup.ms}ms)`);
597
+ // 7b. list_available_toolsets (simulated — inline in index.ts)
598
+ const toolsetNames = ALL_DOMAIN_KEYS;
599
+ const loadedToolsets = Object.keys(TOOLSET_MAP);
600
+ toolCalls++;
601
+ console.log(` list_available_toolsets (simulated): ${loadedToolsets.length} loaded of ${toolsetNames.length} total (0ms)`);
602
+ // 7c. get_flywheel_status (may not be loaded — soft fail)
603
+ const flywheelTool = allTools.find((t) => t.name === "get_flywheel_status");
604
+ if (flywheelTool) {
605
+ const flywheel = await callTool(flywheelTool, {});
606
+ toolCalls++;
607
+ if (!flywheel.ok) {
608
+ console.log(` get_flywheel_status: SOFT FAIL (${flywheel.error?.slice(0, 80)}, ${flywheel.ms}ms)`);
609
+ }
610
+ else {
611
+ console.log(` get_flywheel_status: OK (${flywheel.ms}ms)`);
612
+ }
613
+ }
614
+ else {
615
+ toolCalls++;
616
+ console.log(` get_flywheel_status: SKIP (not loaded in current toolset)`);
617
+ }
618
+ // 7d. record_event
619
+ const traceEvent = await callTool(findTool(allTools, "record_event"), {
620
+ eventType: "engine.trace.completed",
621
+ actorType: "system",
622
+ entityId: "nodebench",
623
+ entityType: "system",
624
+ summary: "Engine API trace dogfood run completed",
625
+ });
626
+ toolCalls++;
627
+ if (!traceEvent.ok)
628
+ errors.push(`record_event: ${traceEvent.error}`);
629
+ else
630
+ console.log(` record_event: OK (${traceEvent.ms}ms)`);
631
+ // 7e. track_milestone
632
+ const milestone = await callTool(findTool(allTools, "track_milestone"), {
633
+ title: "Dogfood cycle 1 complete",
634
+ category: "dogfood",
635
+ description: "All 7 dogfood scenarios pass — causal memory, researcher brief, engine trace verified",
636
+ evidence: "dogfoodRunner.ts scenario 7 pass",
637
+ });
638
+ toolCalls++;
639
+ if (!milestone.ok)
640
+ errors.push(`track_milestone: ${milestone.error}`);
641
+ else
642
+ console.log(` track_milestone: OK (${milestone.ms}ms)`);
643
+ // 7f. Record telemetry
644
+ const telemetry7 = await callTool(findTool(allTools, "record_dogfood_telemetry"), {
645
+ scenarioId: "engine_api_trace",
646
+ userRole: "founder",
647
+ primaryPrompt: "Engine API trace: check MCP setup -> list toolsets -> get flywheel status -> record event -> track milestone",
648
+ surface: "engine_api",
649
+ toolsInvoked: [
650
+ "check_mcp_setup", "list_available_toolsets", "get_flywheel_status",
651
+ "record_event", "track_milestone",
652
+ ],
653
+ toolCallCount: toolCalls,
654
+ latencyMs: Date.now() - scenarioStart,
655
+ });
656
+ toolCalls++;
657
+ if (!telemetry7.ok)
658
+ errors.push(`record_dogfood_telemetry: ${telemetry7.error}`);
659
+ const totalMs7 = Date.now() - scenarioStart;
660
+ const pass7 = errors.length === 0;
661
+ results.push({ scenarioId: "engine_api_trace", userRole: "founder", surface: "engine_api", toolCalls, totalMs: totalMs7, errors, pass: pass7 });
662
+ console.log(` Result: ${pass7 ? "PASS" : "FAIL"} (${totalMs7}ms, ${toolCalls} calls, ${errors.length} errors)\n`);
663
+ }
664
+ // ════════════════════════════════════════════════════════════════════
665
+ // Query historical telemetry for combined table
666
+ // ════════════════════════════════════════════════════════════════════
667
+ {
668
+ const histTelemetry = await callTool(findTool(allTools, "get_dogfood_telemetry"), { limit: 20 });
669
+ if (histTelemetry.ok) {
670
+ console.log("── Historical telemetry (from get_dogfood_telemetry) ──");
671
+ console.log(extractText(histTelemetry.result).slice(0, 2000));
672
+ console.log();
673
+ }
674
+ }
675
+ // ════════════════════════════════════════════════════════════════════
388
676
  // Summary Table
389
677
  // ════════════════════════════════════════════════════════════════════
390
678
  console.log("╔══════════════════════════════╦═══════════╦═══════════╦════════════╦════════╗");