elasticdash-test 0.1.14 โ†’ 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +36 -5
  2. package/dist/dashboard-server.d.ts +9 -0
  3. package/dist/dashboard-server.d.ts.map +1 -1
  4. package/dist/dashboard-server.js +209 -22
  5. package/dist/dashboard-server.js.map +1 -1
  6. package/dist/html/dashboard.html +158 -8
  7. package/dist/index.cjs +828 -108
  8. package/dist/index.d.ts +3 -2
  9. package/dist/index.d.ts.map +1 -1
  10. package/dist/index.js +2 -2
  11. package/dist/index.js.map +1 -1
  12. package/dist/interceptors/telemetry-push.d.ts +47 -0
  13. package/dist/interceptors/telemetry-push.d.ts.map +1 -1
  14. package/dist/interceptors/telemetry-push.js +139 -6
  15. package/dist/interceptors/telemetry-push.js.map +1 -1
  16. package/dist/interceptors/tool.d.ts.map +1 -1
  17. package/dist/interceptors/tool.js +2 -1
  18. package/dist/interceptors/tool.js.map +1 -1
  19. package/dist/interceptors/workflow-ai.d.ts.map +1 -1
  20. package/dist/interceptors/workflow-ai.js +28 -4
  21. package/dist/interceptors/workflow-ai.js.map +1 -1
  22. package/dist/internals/mock-resolver.d.ts +42 -5
  23. package/dist/internals/mock-resolver.d.ts.map +1 -1
  24. package/dist/internals/mock-resolver.js +124 -5
  25. package/dist/internals/mock-resolver.js.map +1 -1
  26. package/dist/workflow-runner-worker.js +8 -2
  27. package/dist/workflow-runner-worker.js.map +1 -1
  28. package/package.json +3 -2
  29. package/src/dashboard-server.ts +86 -17
  30. package/src/html/dashboard.html +158 -8
  31. package/src/index.ts +3 -2
  32. package/src/interceptors/telemetry-push.ts +158 -7
  33. package/src/interceptors/tool.ts +2 -1
  34. package/src/interceptors/workflow-ai.ts +30 -4
  35. package/src/internals/mock-resolver.ts +131 -5
  36. package/src/workflow-runner-worker.ts +23 -2
package/README.md CHANGED
@@ -28,7 +28,7 @@ An AI-native test runner for ElasticDash workflow testing. Built for async AI pi
28
28
  - ๐ŸŽฏ **Trace-first testing** โ€” every test gets a `trace` context to record and assert on LLM calls and tool invocations
29
29
  - ๐Ÿ” **Automatic AI interception** โ€” captures OpenAI, Gemini, and Grok calls without code changes
30
30
  - ๐Ÿงช **AI-specific matchers** โ€” semantic output matching, LLM-judged evaluations, prompt assertions
31
- - ๐Ÿ› ๏ธ **Tool recording & replay** โ€” automatically trace tool calls with checkpoint-based replay
31
+ - ๐Ÿ› ๏ธ **Tool & LLM recording & replay** โ€” automatically trace tool and AI calls with checkpoint-based replay and mock support
32
32
  - ๐Ÿ“Š **Interactive dashboard** โ€” browse workflows, debug traces, validate fixes visually
33
33
  - ๐Ÿค– **Agent mid-trace replay** โ€” resume long-running agents from any task without re-execution
34
34
  - ๐ŸŒ **HTTP workflow mode** โ€” run workflows against your live dev server for framework-heavy apps (Next.js, Remix, etc.) with full AI and tool call observability
@@ -266,6 +266,8 @@ export const callClaude = wrapAI('claude-sonnet-4-5', async (messages: Anthropic
266
266
 
267
267
  Use `wrapAI` when you have a custom AI wrapper or a provider not covered by automatic interception. For direct OpenAI/Anthropic/Gemini SDK calls inside a subprocess workflow, automatic interception via `installAIInterceptor` already handles recording without any code changes.
268
268
 
269
+ **AI mocking (subprocess / test runner mode):** `wrapAI` also checks `resolveAIMock` at call time, so the dashboard can mock LLM responses the same way it mocks tool calls โ€” without modifying your server code. Configure an `AIMockConfig` in the dashboard UI or pass it programmatically via the `aiMockConfig` option when running a workflow.
270
+
269
271
  ### HTTP Streaming Capture and Replay
270
272
 
271
273
  ElasticDash also captures non-AI `fetch` responses that stream over HTTP (for example SSE and NDJSON endpoints) in the HTTP interceptor.
@@ -354,6 +356,25 @@ The dashboard injects `x-elasticdash-run-id` and `x-elasticdash-server` headers
354
356
 
355
357
  > **Note:** Use `setHttpRunContext` (synchronous) if you only need observability and do not need step freezing. `initHttpRunContext` is required for the dashboard's breakpoint/replay functionality to work.
356
358
 
359
+ ### Dashboard Auto-Detection (env var mode)
360
+
361
+ As an alternative to calling `initHttpRunContext` in your request handler, you can set two environment variables before starting your server or script. Every `wrapTool` and `wrapAI` call will then connect to the dashboard automatically โ€” no code changes needed:
362
+
363
+ ```bash
364
+ # Required: URL of the running ElasticDash dashboard
365
+ ELASTICDASH_SERVER=http://localhost:4573
366
+
367
+ # Optional: pre-registered run ID to fetch frozen steps for
368
+ ELASTICDASH_RUN_ID=<run-id-from-dashboard>
369
+ ```
370
+
371
+ - If only `ELASTICDASH_SERVER` is set, a fresh run ID is generated and all calls push live telemetry to the dashboard (observability only, no step freezing).
372
+ - If both variables are set, frozen steps are fetched from the dashboard at startup and replayed as configured.
373
+ - If the dashboard is unreachable the SDK falls through to live execution silently.
374
+ - The initialization runs **once per process** โ€” subsequent `wrapTool`/`wrapAI` calls reuse the cached context.
375
+
376
+ This mode is intended for local development and testing scenarios. For production HTTP servers with concurrent requests, continue using `initHttpRunContext` inside your request handler.
377
+
357
378
  **Subprocess vs HTTP mode comparison:**
358
379
 
359
380
  | | Subprocess (default) | HTTP mode |
@@ -361,9 +382,10 @@ The dashboard injects `x-elasticdash-run-id` and `x-elasticdash-server` headers
361
382
  | Works with simple apps | Yes | Yes |
362
383
  | Works with Next.js / Remix | No | Yes |
363
384
  | Requires dev server running | No | Yes |
364
- | App code changes needed | Extract handler to `ed_workflows.ts` | Add `initHttpRunContext` to request handler |
385
+ | App code changes needed | Extract handler to `ed_workflows.ts` | Add `initHttpRunContext` to request handler (or use env vars for auto-detect) |
365
386
  | AI / tool call observability | Automatic via interceptors | Via `wrapAI` / `wrapTool` push |
366
- | Step freezing / breakpoints | Yes | Yes (requires `initHttpRunContext`) |
387
+ | Step freezing / breakpoints | Yes | Yes (`initHttpRunContext`, or `ELASTICDASH_SERVER` + `ELASTICDASH_RUN_ID` env vars) |
388
+ | LLM response mocking | Yes (via `aiMockConfig`) | Yes (via frozen AI events) |
367
389
 
368
390
  ---
369
391
 
@@ -420,16 +442,25 @@ reportResults(results)
420
442
  **HTTP mode context (call inside your request handler):**
421
443
 
422
444
  ```ts
423
- import { initHttpRunContext } from 'elasticdash-test'
445
+ import { initHttpRunContext, setHttpRunContext } from 'elasticdash-test'
424
446
 
425
447
  // Async โ€” fetches frozen steps from dashboard to enable step freezing/breakpoints
426
448
  await initHttpRunContext(runId, dashboardUrl)
427
449
 
428
450
  // Synchronous alternative โ€” observability only, no step freezing
429
- import { setHttpRunContext } from 'elasticdash-test'
430
451
  setHttpRunContext(runId, dashboardUrl)
431
452
  ```
432
453
 
454
+ **Dashboard auto-detection (env var mode โ€” no code changes needed):**
455
+
456
+ ```bash
457
+ # Set before starting your server or script
458
+ ELASTICDASH_SERVER=http://localhost:4573 # required
459
+ ELASTICDASH_RUN_ID=<run-id-from-dashboard> # optional, enables step freezing
460
+ ```
461
+
462
+ `wrapTool` and `wrapAI` will auto-connect on their first call. See [Dashboard Auto-Detection](#dashboard-auto-detection-env-var-mode) for details.
463
+
433
464
  ---
434
465
 
435
466
  ## License
@@ -40,6 +40,15 @@ export interface ToolMockEntry {
40
40
  export interface ToolMockConfig {
41
41
  [toolName: string]: ToolMockEntry;
42
42
  }
43
+ /** Per-model AI mock configuration sent from the dashboard UI */
44
+ export interface AIMockEntry {
45
+ mode: 'live' | 'mock-all' | 'mock-specific';
46
+ callIndices?: number[];
47
+ mockData?: Record<number, unknown>;
48
+ }
49
+ export interface AIMockConfig {
50
+ [modelName: string]: AIMockEntry;
51
+ }
43
52
  export interface HttpWorkflowConfig {
44
53
  mode: 'http';
45
54
  url: string;
@@ -1 +1 @@
1
- {"version":3,"file":"dashboard-server.d.ts","sourceRoot":"","sources":["../src/dashboard-server.ts"],"names":[],"mappings":"AAeA,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,OAAO,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,QAAQ,EAAE,MAAM,CAAA;IAChB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,OAAO,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,QAAQ,EAAE,MAAM,CAAA;IAChB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,SAAS;IACxB,SAAS,EAAE,YAAY,EAAE,CAAA;IACzB,KAAK,EAAE,QAAQ,EAAE,CAAA;CAClB;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,QAAQ,CAAC,EAAE,OAAO,CAAA;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAA;IACX,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;CACvB;AA2CD,6DAA6D;AAC7D,MAAM,WAAW,aAAa;IAC5B,oHAAoH;IACpH,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,eAAe,CAAA;IAC3C,uEAAuE;IACvE,WAAW,CAAC,EAAE,MAAM,EAAE,CAAA;IACtB,wEAAwE;IACxE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,CAAA;CAClC;AA+1FD,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,MAAM,CAAA;IACX,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAChC,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IACtC,cAAc,CAAC,EAAE,kBAAkB,GAAG,MAAM,CAAA;CAC7C;AA0ID;;GAEG;AACH,wBAAsB,oBAAoB,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,sBAA2B,GACnC,OAAO,CAAC,eAAe,CAAC,CAoZ1B;AAiFD,eAAO,MAAM,aAAa,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAa,CAAC"}
1
+ {"version":3,"file":"dashboard-server.d.ts","sourceRoot":"","sources":["../src/dashboard-server.ts"],"names":[],"mappings":"AAeA,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,OAAO,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,QAAQ,EAAE,MAAM,CAAA;IAChB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,OAAO,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,QAAQ,EAAE,MAAM,CAAA;IAChB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,SAAS;IACxB,SAAS,EAAE,YAAY,EAAE,CAAA;IACzB,KAAK,EAAE,QAAQ,EAAE,CAAA;CAClB;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,QAAQ,CAAC,EAAE,OAAO,CAAA;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAA;IACX,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;CACvB;AA2CD,6DAA6D;AAC7D,MAAM,WAAW,aAAa;IAC5B,oHAAoH;IACpH,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,eAAe,CAAA;IAC3C,uEAAuE;IACvE,WAAW,CAAC,EAAE,MAAM,EAAE,CAAA;IACtB,wEAAwE;IACxE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,CAAA;CAClC;AAED,iEAAiE;AACjE,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,eAAe,CAAA;IAC3C,WAAW,CAAC,EAAE,MAAM,EAAE,CAAA;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC;AAED,MAAM,WAAW,YAAY;IAC3B,CAAC,SAAS,EAAE,MAAM,GAAG,WAAW,CAAA;CACjC;AAqgGD,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,MAAM,CAAA;IACX,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAChC,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IACtC,cAAc,CAAC,EAAE,kBAAkB,GAAG,MAAM,CAAA;CAC7C;AA4ID;;GAEG;AACH,wBAAsB,oBAAoB,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,sBAA2B,GACnC,OAAO,CAAC,eAAe,CAAC,CA4b1B;AAiFD,eAAO,MAAM,aAAa,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAa,CAAC"}
@@ -311,6 +311,8 @@ function runWorkflowInSubprocess(workflowsModulePath, toolsModulePath, workflowN
311
311
  ...(options?.history !== undefined ? { history: options.history } : {}),
312
312
  ...(options?.agentState !== undefined ? { agentState: options.agentState } : {}),
313
313
  ...(options?.toolMockConfig !== undefined ? { toolMockConfig: options.toolMockConfig } : {}),
314
+ ...(options?.aiMockConfig !== undefined ? { aiMockConfig: options.aiMockConfig } : {}),
315
+ ...(options?.promptMockConfig !== undefined ? { promptMockConfig: options.promptMockConfig } : {}),
314
316
  });
315
317
  child.stdin.write(payload);
316
318
  child.stdin.end(); // Always close stdin to avoid subprocess hang
@@ -633,6 +635,14 @@ async function validateWorkflowRuns(cwd, body) {
633
635
  const toolMockConfig = body.toolMockConfig && typeof body.toolMockConfig === 'object' && !Array.isArray(body.toolMockConfig)
634
636
  ? body.toolMockConfig
635
637
  : undefined;
638
+ // Parse AI mock config if provided
639
+ const aiMockConfig = body.aiMockConfig && typeof body.aiMockConfig === 'object' && !Array.isArray(body.aiMockConfig)
640
+ ? body.aiMockConfig
641
+ : undefined;
642
+ // Parse prompt mock config if provided
643
+ const promptMockConfig = body.promptMockConfig && typeof body.promptMockConfig === 'object' && !Array.isArray(body.promptMockConfig)
644
+ ? body.promptMockConfig
645
+ : undefined;
636
646
  const workflowsModulePath = resolveWorkflowModule(cwd);
637
647
  if (!workflowsModulePath) {
638
648
  return {
@@ -648,7 +658,7 @@ async function validateWorkflowRuns(cwd, body) {
648
658
  console.log(`[elasticdash] Running workflow "${workflowName}" ${runCount} time(s) in ${mode} mode via subprocess`);
649
659
  async function runOne(runNumber) {
650
660
  console.log(`[elasticdash] === Run ${runNumber}: Starting workflow "${workflowName}" ===`);
651
- const result = await runWorkflowInSubprocess(workflowsModulePath, toolsModulePath, workflowName, workflowArgs, workflowInput, toolMockConfig ? { toolMockConfig } : undefined)
661
+ const result = await runWorkflowInSubprocess(workflowsModulePath, toolsModulePath, workflowName, workflowArgs, workflowInput, (toolMockConfig || aiMockConfig || promptMockConfig) ? { ...(toolMockConfig ? { toolMockConfig } : {}), ...(aiMockConfig ? { aiMockConfig } : {}), ...(promptMockConfig ? { promptMockConfig } : {}) } : undefined)
652
662
  .catch(err => {
653
663
  throw { ok: false, error: `Workflow subprocess failed: ${formatError(err)}` };
654
664
  });
@@ -1267,6 +1277,10 @@ function getDashboardHtml() {
1267
1277
  // ---- Tool Mock Config State ----
1268
1278
  window._toolMockConfig = {}; // { toolName: { mode: 'live'|'mock-all'|'mock-specific', callIndices: [], mockData: {} } }
1269
1279
 
1280
+ // ---- Prompt Mock Config State ----
1281
+ // { [originalSystemPrompt]: newSystemPrompt } โ€” only keys where user enabled the override
1282
+ window._promptMockConfig = {};
1283
+
1270
1284
  function getToolsFromTrace() {
1271
1285
  // Extract unique tool names and their call details from the uploaded trace observations
1272
1286
  const toolCalls = {};
@@ -1422,6 +1436,136 @@ function getDashboardHtml() {
1422
1436
  return html;
1423
1437
  }
1424
1438
 
1439
+ // ---- Prompt Mock Helpers ----
1440
+
1441
+ /** Extract the system prompt string from an LLM call input object or JSON string. */
1442
+ function extractSystemPromptFromInput(input) {
1443
+ // Input may arrive as a JSON-encoded string (e.g. from Langfuse traces)
1444
+ if (typeof input === 'string') {
1445
+ try { input = JSON.parse(input); } catch(e) { return null; }
1446
+ }
1447
+ if (!input || typeof input !== 'object') return null;
1448
+ // Anthropic style: { system: "...", messages: [...] }
1449
+ if (typeof input.system === 'string') return input.system;
1450
+ // Custom wrapAI callers: { systemPrompt: "...", messages: [...] }
1451
+ if (typeof input.systemPrompt === 'string' && input.systemPrompt.length > 0) return input.systemPrompt;
1452
+ // OpenAI / plain array: messages with role === "system"
1453
+ var msgs = Array.isArray(input.messages) ? input.messages : (Array.isArray(input) ? input : null);
1454
+ if (msgs) {
1455
+ for (var i = 0; i < msgs.length; i++) {
1456
+ var m = msgs[i];
1457
+ if (m && typeof m === 'object' && m.role === 'system' && typeof m.content === 'string') {
1458
+ return m.content;
1459
+ }
1460
+ }
1461
+ }
1462
+ return null;
1463
+ }
1464
+
1465
+ /**
1466
+ * Returns an array of unique system prompts observed across all GENERATION observations.
1467
+ * Each entry: { systemPrompt, modelName, count, rowIndex }
1468
+ */
1469
+ function getSystemPromptsFromTrace() {
1470
+ var seen = []; // [{ systemPrompt, modelName, count }]
1471
+ var seenMap = {}; // systemPrompt -> index in seen
1472
+ currentObservations.forEach(function(obs) {
1473
+ if (obs.type !== 'GENERATION') return;
1474
+ var sp = extractSystemPromptFromInput(obs.input);
1475
+ if (!sp) return;
1476
+ if (seenMap[sp] === undefined) {
1477
+ seenMap[sp] = seen.length;
1478
+ seen.push({ systemPrompt: sp, modelName: obs.model || obs.name || '(unknown)', count: 0 });
1479
+ }
1480
+ seen[seenMap[sp]].count++;
1481
+ });
1482
+ return seen.map(function(e, i) { return Object.assign({}, e, { rowIndex: i }); });
1483
+ }
1484
+
1485
+ function renderPromptMockSection() {
1486
+ const prompts = getSystemPromptsFromTrace();
1487
+ if (prompts.length === 0) {
1488
+ return '<div style="color:#999;font-size:13px;padding:6px 0;">No system prompts detected in trace. Only AI calls with a system prompt can be mocked here.</div>';
1489
+ }
1490
+
1491
+ let html = '<div style="max-height:360px;overflow-y:auto;border:1px solid #e0e0e0;border-radius:6px;">';
1492
+ html += '<table style="width:100%;border-collapse:collapse;font-size:13px;">';
1493
+ html += '<thead><tr style="background:#f5f5f5;">';
1494
+ html += '<th style="padding:6px 10px;text-align:left;border-bottom:1px solid #e0e0e0;width:24px;"></th>';
1495
+ html += '<th style="padding:6px 10px;text-align:left;border-bottom:1px solid #e0e0e0;">Model</th>';
1496
+ html += '<th style="padding:6px 10px;text-align:left;border-bottom:1px solid #e0e0e0;">Uses</th>';
1497
+ html += '<th style="padding:6px 10px;text-align:left;border-bottom:1px solid #e0e0e0;">System Prompt (override applies to all calls using this prompt)</th>';
1498
+ html += '</tr></thead><tbody>';
1499
+
1500
+ prompts.forEach(function(row) {
1501
+ const key = row.systemPrompt;
1502
+ const isEnabled = window._promptMockConfig[key] !== undefined;
1503
+ const currentVal = isEnabled ? window._promptMockConfig[key] : row.systemPrompt;
1504
+ const preview = key.length > 80 ? key.slice(0, 80) + 'โ€ฆ' : key;
1505
+
1506
+ html += '<tr class="prompt-mock-row" data-row-index="' + row.rowIndex + '" style="border-bottom:1px solid #f0f0f0;vertical-align:top;">';
1507
+
1508
+ // Checkbox column
1509
+ html += '<td style="padding:8px 10px;white-space:nowrap;">';
1510
+ html += '<input type="checkbox" class="prompt-mock-enable" title="Override this system prompt"' + (isEnabled ? ' checked' : '') + ' onchange="window.onPromptMockToggle(' + row.rowIndex + ', this.checked)">';
1511
+ html += '</td>';
1512
+
1513
+ // Model column
1514
+ html += '<td style="padding:8px 10px;font-family:Monaco,monospace;font-size:12px;white-space:nowrap;">' + esc(row.modelName) + '</td>';
1515
+
1516
+ // Uses count column
1517
+ html += '<td style="padding:8px 10px;color:#555;white-space:nowrap;">' + row.count + 'x</td>';
1518
+
1519
+ // System prompt column
1520
+ html += '<td style="padding:8px 10px;width:100%;">';
1521
+ if (!isEnabled) {
1522
+ html += '<div style="font-size:11px;color:#888;font-style:italic;font-family:Monaco,monospace;max-width:380px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;" title="' + esc(key) + '">' + esc(preview) + '</div>';
1523
+ } else {
1524
+ html += '<textarea class="prompt-mock-input" data-row-index="' + row.rowIndex + '" style="width:100%;box-sizing:border-box;font-size:11px;font-family:Monaco,monospace;padding:4px;border:1px solid #ddd;border-radius:4px;min-height:72px;resize:vertical;" oninput="window.onPromptMockInput(' + row.rowIndex + ', this.value)">' + esc(currentVal) + '</textarea>';
1525
+ }
1526
+ html += '</td>';
1527
+
1528
+ html += '</tr>';
1529
+ });
1530
+
1531
+ html += '</tbody></table></div>';
1532
+ return html;
1533
+ }
1534
+
1535
+ function buildPromptMockConfigFromUI() {
1536
+ // Return _promptMockConfig as-is (string key โ†’ string value), filtering out blank values
1537
+ const config = {};
1538
+ Object.keys(window._promptMockConfig).forEach(function(key) {
1539
+ const val = window._promptMockConfig[key];
1540
+ if (typeof val === 'string' && val.trim()) config[key] = val;
1541
+ });
1542
+ return config;
1543
+ }
1544
+
1545
+ window.onPromptMockToggle = function(rowIndex, enabled) {
1546
+ const prompts = getSystemPromptsFromTrace();
1547
+ const row = prompts[rowIndex];
1548
+ if (!row) return;
1549
+ const key = row.systemPrompt;
1550
+ if (!enabled) {
1551
+ delete window._promptMockConfig[key];
1552
+ } else {
1553
+ // Pre-fill with the original system prompt so user can edit from there
1554
+ if (window._promptMockConfig[key] === undefined) {
1555
+ window._promptMockConfig[key] = row.systemPrompt;
1556
+ }
1557
+ }
1558
+ const container = document.getElementById('promptMockContainer');
1559
+ if (container) container.innerHTML = renderPromptMockSection();
1560
+ };
1561
+
1562
+ window.onPromptMockInput = function(rowIndex, value) {
1563
+ const prompts = getSystemPromptsFromTrace();
1564
+ const row = prompts[rowIndex];
1565
+ if (!row) return;
1566
+ window._promptMockConfig[row.systemPrompt] = value;
1567
+ };
1568
+
1425
1569
  window.onToolMockModeChange = function(toolName, mode) {
1426
1570
  if (!window._toolMockConfig[toolName]) window._toolMockConfig[toolName] = { mode: 'live' };
1427
1571
  // Save current mock data before switching
@@ -1452,7 +1596,8 @@ function getDashboardHtml() {
1452
1596
 
1453
1597
  window.openLiveValidationDialog = function() {
1454
1598
  if (window.liveValidationDialog) return;
1455
- window._toolMockConfig = {}; // Reset mock config each time dialog opens
1599
+ window._toolMockConfig = {}; // Reset mock configs each time dialog opens
1600
+ window._promptMockConfig = {};
1456
1601
 
1457
1602
  const hasTraceTools = currentObservations.some(function(o) { return o.type === 'TOOL'; });
1458
1603
  const hasRegisteredTools = codeIndex.tools && codeIndex.tools.length > 0;
@@ -1478,8 +1623,15 @@ function getDashboardHtml() {
1478
1623
  Show all registered tools
1479
1624
  </label>
1480
1625
  </div>
1481
- <div id="toolMockContainer"></div>
1626
+ <div id="toolMockContainer" style="max-height:160px;overflow-y:auto;"></div>
1482
1627
  </div>\` : ''}
1628
+ <div style="border-top:1px solid #eee;padding-top:16px;margin-bottom:16px;">
1629
+ <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:6px;">
1630
+ <div style="font-size:15px;font-weight:600;">Prompt Mocking</div>
1631
+ <span style="font-size:12px;color:#888;">Check a row to replace the system prompt for all calls using it</span>
1632
+ </div>
1633
+ <div id="promptMockContainer" style="max-height:160px;overflow-y:auto;"></div>
1634
+ </div>
1483
1635
  <div style="display:flex;gap:12px;justify-content:space-between;align-items:center;">
1484
1636
  <span id="liveValidationProgress" style="font-size:14px;color:#555;"></span>
1485
1637
  <div style="display:flex;gap:12px;">
@@ -1490,11 +1642,15 @@ function getDashboardHtml() {
1490
1642
  </div>
1491
1643
  \`;
1492
1644
  document.body.appendChild(window.liveValidationDialog);
1493
- // Render the tool mock section after DOM insertion
1645
+ // Render mock sections after DOM insertion
1494
1646
  const toolMockContainer = document.getElementById('toolMockContainer');
1495
1647
  if (toolMockContainer) {
1496
1648
  toolMockContainer.innerHTML = renderToolMockSection(false);
1497
1649
  }
1650
+ const promptMockContainer = document.getElementById('promptMockContainer');
1651
+ if (promptMockContainer) {
1652
+ promptMockContainer.innerHTML = renderPromptMockSection();
1653
+ }
1498
1654
  document.getElementById('cancelLiveValidation').onclick = function() {
1499
1655
  window.liveValidationDialog.remove();
1500
1656
  window.liveValidationDialog = null;
@@ -1503,9 +1659,11 @@ function getDashboardHtml() {
1503
1659
  const count = parseInt(document.getElementById('liveValidationCount').value, 10);
1504
1660
  const sequential = document.getElementById('liveValidationSequential').checked;
1505
1661
  if (count >= 1) {
1506
- // Build the tool mock config from UI state and persist for "Run from here"
1662
+ // Build mock configs from UI state and persist for "Run from here"
1507
1663
  const toolMockConfig = buildToolMockConfigFromUI();
1508
1664
  window._toolMockConfig = toolMockConfig;
1665
+ const promptMockConfig = buildPromptMockConfigFromUI();
1666
+ window._promptMockConfig = promptMockConfig;
1509
1667
  const submitBtn = document.getElementById('submitLiveValidation');
1510
1668
  submitBtn.disabled = true;
1511
1669
  submitBtn.textContent = 'Validating...';
@@ -1542,7 +1700,7 @@ function getDashboardHtml() {
1542
1700
  const collectedTraces = [];
1543
1701
  let fatalError = null;
1544
1702
  for (let i = 0; i < count; i++) {
1545
- const singlePayload = { workflowName: selectedWorkflow?.name, runCount: 1, sequential: false, observations: currentObservations, toolMockConfig };
1703
+ const singlePayload = { workflowName: selectedWorkflow?.name, runCount: 1, sequential: false, observations: currentObservations, toolMockConfig, promptMockConfig };
1546
1704
  try {
1547
1705
  const response = await fetch('/api/validate-workflow', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(singlePayload) });
1548
1706
  const data = await response.json();
@@ -1561,7 +1719,7 @@ function getDashboardHtml() {
1561
1719
  } else {
1562
1720
  // Parallel mode: single bulk request
1563
1721
  if (progressEl) progressEl.textContent = \`Running \${count} workflow run\${count !== 1 ? 's' : ''} in parallelโ€ฆ\`;
1564
- const payload = { workflowName: selectedWorkflow?.name, runCount: count, sequential: false, observations: currentObservations, toolMockConfig };
1722
+ const payload = { workflowName: selectedWorkflow?.name, runCount: count, sequential: false, observations: currentObservations, toolMockConfig, promptMockConfig };
1565
1723
  try {
1566
1724
  const response = await fetch('/api/validate-workflow', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payload) });
1567
1725
  const data = await response.json();
@@ -2096,7 +2254,7 @@ function getDashboardHtml() {
2096
2254
  traceLayoutEl.classList.remove("step-4");
2097
2255
  let headerHtml = '';
2098
2256
  if (currentStep === 3) {
2099
- headerHtml = '<tr><th style="width: 40px;">Check</th><th>Name</th><th>Type</th></tr>';
2257
+ headerHtml = '<tr><th style="width: 40px;">Check</th><th>Name</th><th>Type</th><th>Duration</th></tr>';
2100
2258
  }
2101
2259
  traceLayoutEl.innerHTML = \`
2102
2260
  <div class="trace-left">
@@ -2265,6 +2423,7 @@ function getDashboardHtml() {
2265
2423
  snapshotId: liveTrace.snapshotId,
2266
2424
  observations: currentObservations,
2267
2425
  toolMockConfig: window._toolMockConfig || {},
2426
+ promptMockConfig: window._promptMockConfig || {},
2268
2427
  };
2269
2428
  const response = await fetch('/api/run-from-breakpoint', {
2270
2429
  method: 'POST',
@@ -2296,7 +2455,7 @@ function getDashboardHtml() {
2296
2455
  step5RerunInFlight = true;
2297
2456
  renderObservationTable();
2298
2457
  try {
2299
- const payload = { workflowName: selectedWorkflow?.name, runCount: 1, sequential: false, observations: currentObservations };
2458
+ const payload = { workflowName: selectedWorkflow?.name, runCount: 1, sequential: false, observations: currentObservations, toolMockConfig: window._toolMockConfig || {}, promptMockConfig: window._promptMockConfig || {} };
2300
2459
  const response = await fetch('/api/validate-workflow', {
2301
2460
  method: 'POST',
2302
2461
  headers: { 'Content-Type': 'application/json' },
@@ -2347,6 +2506,7 @@ function getDashboardHtml() {
2347
2506
  },
2348
2507
  snapshotId: liveTrace.snapshotId,
2349
2508
  toolMockConfig: window._toolMockConfig || {},
2509
+ promptMockConfig: window._promptMockConfig || {},
2350
2510
  };
2351
2511
  const response = await fetch('/api/resume-agent-from-task', {
2352
2512
  method: 'POST',
@@ -2867,11 +3027,11 @@ function resolveTemplateValue(value, input) {
2867
3027
  return value;
2868
3028
  }
2869
3029
  async function runHttpWorkflow(opts) {
2870
- const { workflowName, workflowInput, frozenEvents = [], pushedEvents, runConfigs, config, dashboardPort } = opts;
3030
+ const { workflowName, workflowInput, frozenEvents = [], promptMocks = {}, pushedEvents, runConfigs, config, dashboardPort } = opts;
2871
3031
  const runId = randomUUID();
2872
- // Register run config so the user's server can fetch frozen events
3032
+ // Register run config so the user's server can fetch frozen events and prompt mocks
2873
3033
  pushedEvents.set(runId, []);
2874
- runConfigs.set(runId, { frozenEvents });
3034
+ runConfigs.set(runId, { frozenEvents, promptMocks });
2875
3035
  try {
2876
3036
  const parsedInput = parseObservationInput(workflowInput);
2877
3037
  const inputObj = parsedInput && typeof parsedInput === 'object' && !Array.isArray(parsedInput) ? parsedInput : {};
@@ -2928,6 +3088,7 @@ async function runHttpWorkflow(opts) {
2928
3088
  const drainMs = parseInt(process.env.ELASTICDASH_HTTP_DRAIN_MS ?? '300', 10);
2929
3089
  await new Promise(resolve => setTimeout(resolve, drainMs));
2930
3090
  const events = (pushedEvents.get(runId) ?? []).sort((a, b) => a.timestamp - b.timestamp);
3091
+ console.log(`[elasticdash] runHttpWorkflow drain complete: ${events.length} events collected for runId=${runId}`);
2931
3092
  const workflowTrace = { traceId: runId, events };
2932
3093
  return { ok: true, currentOutput, workflowTrace, steps: [], llmSteps: [], toolCalls: [], customSteps: [] };
2933
3094
  }
@@ -2948,8 +3109,8 @@ export async function startDashboardServer(cwd, options = {}) {
2948
3109
  // In-memory store for telemetry events pushed from HTTP workflow mode runs.
2949
3110
  // Maps runId -> accumulated WorkflowEvent[]
2950
3111
  const pushedEvents = new Map();
2951
- // Per-run config for HTTP workflow mode (frozen events for replay).
2952
- // Maps runId -> { frozenEvents }
3112
+ // Per-run config for HTTP workflow mode (frozen events + prompt mocks for replay).
3113
+ // Maps runId -> { frozenEvents, promptMocks }
2953
3114
  const runConfigs = new Map();
2954
3115
  // Scan workflows, tools, and config once at startup
2955
3116
  const workflows = scanWorkflows(cwd);
@@ -3015,10 +3176,14 @@ export async function startDashboardServer(cwd, options = {}) {
3015
3176
  const resolvedInput = resolveWorkflowArgsFromObservations(body, workflowName);
3016
3177
  const workflowInput = resolvedInput.input ?? null;
3017
3178
  const traces = [];
3179
+ const promptMocks = body.promptMockConfig && typeof body.promptMockConfig === 'object' && !Array.isArray(body.promptMockConfig)
3180
+ ? body.promptMockConfig
3181
+ : {};
3018
3182
  const runOne = async (runNumber) => {
3019
3183
  const result = await runHttpWorkflow({
3020
3184
  workflowName, workflowInput, pushedEvents, runConfigs,
3021
3185
  config: httpConfig, dashboardPort: port,
3186
+ promptMocks,
3022
3187
  });
3023
3188
  const traceStub = { getSteps: () => [], getLLMSteps: () => [], getToolCalls: () => [], getCustomSteps: () => [], recordLLMStep: () => { }, recordToolCall: () => { }, recordCustomStep: () => { } };
3024
3189
  return {
@@ -3085,12 +3250,15 @@ export async function startDashboardServer(cwd, options = {}) {
3085
3250
  const frozenEventIds = new Set(frozenEvents.map((e) => e.id));
3086
3251
  const httpConfig = elasticdashConfig.workflows?.[workflowName];
3087
3252
  if (httpConfig?.mode === 'http') {
3088
- // HTTP workflow mode โ€” call user's dev server with frozen events for step replay
3253
+ // HTTP workflow mode โ€” call user's dev server with frozen events + prompt mocks for step replay
3254
+ const bpPromptMocks = body.promptMockConfig && typeof body.promptMockConfig === 'object' && !Array.isArray(body.promptMockConfig)
3255
+ ? body.promptMockConfig
3256
+ : {};
3089
3257
  console.log(`[elasticdash] Run from breakpoint (HTTP mode): workflow="${workflowName}" checkpoint=${checkpoint} frozen=${frozenEvents.length}`);
3090
3258
  const result = await runHttpWorkflow({
3091
3259
  workflowName, workflowInput, pushedEvents, runConfigs,
3092
3260
  config: httpConfig, dashboardPort: port,
3093
- frozenEvents,
3261
+ frozenEvents, promptMocks: bpPromptMocks,
3094
3262
  });
3095
3263
  const traceStub = { getSteps: () => [], getLLMSteps: () => [], getToolCalls: () => [], getCustomSteps: () => [], recordLLMStep: () => { }, recordToolCall: () => { }, recordCustomStep: () => { } };
3096
3264
  const snapshotId = result.workflowTrace ? saveSnapshot(cwd, result.workflowTrace) : undefined;
@@ -3118,8 +3286,14 @@ export async function startDashboardServer(cwd, options = {}) {
3118
3286
  const toolMockConfig = body.toolMockConfig && typeof body.toolMockConfig === 'object' && !Array.isArray(body.toolMockConfig)
3119
3287
  ? body.toolMockConfig
3120
3288
  : undefined;
3289
+ const aiMockConfig = body.aiMockConfig && typeof body.aiMockConfig === 'object' && !Array.isArray(body.aiMockConfig)
3290
+ ? body.aiMockConfig
3291
+ : undefined;
3292
+ const promptMockConfig = body.promptMockConfig && typeof body.promptMockConfig === 'object' && !Array.isArray(body.promptMockConfig)
3293
+ ? body.promptMockConfig
3294
+ : undefined;
3121
3295
  console.log(`[elasticdash] Run from breakpoint: workflow="${workflowName}" checkpoint=${checkpoint} historyLen=${history.length}`);
3122
- const result = await runWorkflowInSubprocess(workflowsModulePath, toolsModulePath, workflowName, workflowArgs, workflowInput, { replayMode: true, checkpoint, history, ...(toolMockConfig ? { toolMockConfig } : {}) });
3296
+ const result = await runWorkflowInSubprocess(workflowsModulePath, toolsModulePath, workflowName, workflowArgs, workflowInput, { replayMode: true, checkpoint, history, ...(toolMockConfig ? { toolMockConfig } : {}), ...(aiMockConfig ? { aiMockConfig } : {}), ...(promptMockConfig ? { promptMockConfig } : {}) });
3123
3297
  const traceStub = {
3124
3298
  getSteps: () => (result.steps ?? []),
3125
3299
  getLLMSteps: () => (result.llmSteps ?? []),
@@ -3190,8 +3364,14 @@ export async function startDashboardServer(cwd, options = {}) {
3190
3364
  const toolMockConfig = body.toolMockConfig && typeof body.toolMockConfig === 'object' && !Array.isArray(body.toolMockConfig)
3191
3365
  ? body.toolMockConfig
3192
3366
  : undefined;
3367
+ const aiMockConfig = body.aiMockConfig && typeof body.aiMockConfig === 'object' && !Array.isArray(body.aiMockConfig)
3368
+ ? body.aiMockConfig
3369
+ : undefined;
3370
+ const promptMockConfig = body.promptMockConfig && typeof body.promptMockConfig === 'object' && !Array.isArray(body.promptMockConfig)
3371
+ ? body.promptMockConfig
3372
+ : undefined;
3193
3373
  console.log(`[elasticdash] Resume agent from task: workflow="${workflowName}" taskIndex=${taskIndex}`);
3194
- const result = await runWorkflowInSubprocess(workflowsModulePath, toolsModulePath, workflowName, [], null, { replayMode: history.length > 0, checkpoint: 0, history, agentState, ...(toolMockConfig ? { toolMockConfig } : {}) });
3374
+ const result = await runWorkflowInSubprocess(workflowsModulePath, toolsModulePath, workflowName, [], null, { replayMode: history.length > 0, checkpoint: 0, history, agentState, ...(toolMockConfig ? { toolMockConfig } : {}), ...(aiMockConfig ? { aiMockConfig } : {}), ...(promptMockConfig ? { promptMockConfig } : {}) });
3195
3375
  const traceStub = {
3196
3376
  getSteps: () => (result.steps ?? []),
3197
3377
  getLLMSteps: () => (result.llmSteps ?? []),
@@ -3242,7 +3422,7 @@ export async function startDashboardServer(cwd, options = {}) {
3242
3422
  const runId = url.pathname.slice('/api/run-configs/'.length);
3243
3423
  const cfg = runConfigs.get(runId);
3244
3424
  res.writeHead(cfg ? 200 : 404, { 'Content-Type': 'application/json' });
3245
- res.end(JSON.stringify({ frozenEvents: cfg?.frozenEvents ?? [] }));
3425
+ res.end(JSON.stringify({ frozenEvents: cfg?.frozenEvents ?? [], promptMocks: cfg?.promptMocks ?? {} }));
3246
3426
  }
3247
3427
  else if (url.pathname === '/api/trace-events' && req.method === 'POST') {
3248
3428
  // Receive telemetry events pushed from wrapAI / wrapTool in HTTP workflow mode
@@ -3255,9 +3435,16 @@ export async function startDashboardServer(cwd, options = {}) {
3255
3435
  res.end(JSON.stringify({ ok: false, error: 'runId (string) and event (object) are required.' }));
3256
3436
  return;
3257
3437
  }
3258
- const existing = pushedEvents.get(body.runId) ?? [];
3259
- existing.push(body.event);
3260
- pushedEvents.set(body.runId, existing);
3438
+ const existing = pushedEvents.get(body.runId);
3439
+ if (!existing) {
3440
+ console.log(`[elasticdash] /api/trace-events: unknown runId=${body.runId}, known runIds=[${[...pushedEvents.keys()].join(',')}]`);
3441
+ res.writeHead(404, { 'Content-Type': 'application/json' });
3442
+ res.end(JSON.stringify({ ok: false, error: 'unknown runId' }));
3443
+ return;
3444
+ }
3445
+ const evt = body.event;
3446
+ existing.push(evt);
3447
+ console.log(`[elasticdash] /api/trace-events: stored event type=${evt.type} name=${('name' in evt ? evt.name : '?')} runId=${body.runId} total=${existing.length}`);
3261
3448
  res.writeHead(200, { 'Content-Type': 'application/json' });
3262
3449
  res.end(JSON.stringify({ ok: true }));
3263
3450
  }