elasticdash-test 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +36 -5
  2. package/dist/dashboard-server.d.ts +9 -0
  3. package/dist/dashboard-server.d.ts.map +1 -1
  4. package/dist/dashboard-server.js +209 -22
  5. package/dist/dashboard-server.js.map +1 -1
  6. package/dist/html/dashboard.html +158 -8
  7. package/dist/index.cjs +828 -108
  8. package/dist/index.d.ts +3 -2
  9. package/dist/index.d.ts.map +1 -1
  10. package/dist/index.js +2 -2
  11. package/dist/index.js.map +1 -1
  12. package/dist/interceptors/telemetry-push.d.ts +47 -0
  13. package/dist/interceptors/telemetry-push.d.ts.map +1 -1
  14. package/dist/interceptors/telemetry-push.js +139 -6
  15. package/dist/interceptors/telemetry-push.js.map +1 -1
  16. package/dist/interceptors/tool.d.ts.map +1 -1
  17. package/dist/interceptors/tool.js +2 -1
  18. package/dist/interceptors/tool.js.map +1 -1
  19. package/dist/interceptors/workflow-ai.d.ts.map +1 -1
  20. package/dist/interceptors/workflow-ai.js +28 -4
  21. package/dist/interceptors/workflow-ai.js.map +1 -1
  22. package/dist/internals/mock-resolver.d.ts +42 -5
  23. package/dist/internals/mock-resolver.d.ts.map +1 -1
  24. package/dist/internals/mock-resolver.js +124 -5
  25. package/dist/internals/mock-resolver.js.map +1 -1
  26. package/dist/workflow-runner-worker.js +8 -2
  27. package/dist/workflow-runner-worker.js.map +1 -1
  28. package/package.json +3 -2
  29. package/src/dashboard-server.ts +86 -17
  30. package/src/html/dashboard.html +158 -8
  31. package/src/index.ts +3 -2
  32. package/src/interceptors/telemetry-push.ts +158 -7
  33. package/src/interceptors/tool.ts +2 -1
  34. package/src/interceptors/workflow-ai.ts +30 -4
  35. package/src/internals/mock-resolver.ts +131 -5
  36. package/src/workflow-runner-worker.ts +23 -2
@@ -313,6 +313,10 @@
313
313
  // ---- Tool Mock Config State ----
314
314
  window._toolMockConfig = {}; // { toolName: { mode: 'live'|'mock-all'|'mock-specific', callIndices: [], mockData: {} } }
315
315
 
316
+ // ---- Prompt Mock Config State ----
317
+ // { [originalSystemPrompt]: newSystemPrompt } — only keys where user enabled the override
318
+ window._promptMockConfig = {};
319
+
316
320
  function getToolsFromTrace() {
317
321
  // Extract unique tool names and their call details from the uploaded trace observations
318
322
  const toolCalls = {};
@@ -468,6 +472,136 @@
468
472
  return html;
469
473
  }
470
474
 
475
+ // ---- Prompt Mock Helpers ----
476
+
477
+ /** Extract the system prompt string from an LLM call input object or JSON string. */
478
+ function extractSystemPromptFromInput(input) {
479
+ // Input may arrive as a JSON-encoded string (e.g. from Langfuse traces)
480
+ if (typeof input === 'string') {
481
+ try { input = JSON.parse(input); } catch(e) { return null; }
482
+ }
483
+ if (!input || typeof input !== 'object') return null;
484
+ // Anthropic style: { system: "...", messages: [...] }
485
+ if (typeof input.system === 'string') return input.system;
486
+ // Custom wrapAI callers: { systemPrompt: "...", messages: [...] }
487
+ if (typeof input.systemPrompt === 'string' && input.systemPrompt.length > 0) return input.systemPrompt;
488
+ // OpenAI / plain array: messages with role === "system"
489
+ var msgs = Array.isArray(input.messages) ? input.messages : (Array.isArray(input) ? input : null);
490
+ if (msgs) {
491
+ for (var i = 0; i < msgs.length; i++) {
492
+ var m = msgs[i];
493
+ if (m && typeof m === 'object' && m.role === 'system' && typeof m.content === 'string') {
494
+ return m.content;
495
+ }
496
+ }
497
+ }
498
+ return null;
499
+ }
500
+
501
+ /**
502
+ * Returns an array of unique system prompts observed across all GENERATION observations.
503
+ * Each entry: { systemPrompt, modelName, count, rowIndex }
504
+ */
505
+ function getSystemPromptsFromTrace() {
506
+ var seen = []; // [{ systemPrompt, modelName, count }]
507
+ var seenMap = {}; // systemPrompt -> index in seen
508
+ currentObservations.forEach(function(obs) {
509
+ if (obs.type !== 'GENERATION') return;
510
+ var sp = extractSystemPromptFromInput(obs.input);
511
+ if (!sp) return;
512
+ if (seenMap[sp] === undefined) {
513
+ seenMap[sp] = seen.length;
514
+ seen.push({ systemPrompt: sp, modelName: obs.model || obs.name || '(unknown)', count: 0 });
515
+ }
516
+ seen[seenMap[sp]].count++;
517
+ });
518
+ return seen.map(function(e, i) { return Object.assign({}, e, { rowIndex: i }); });
519
+ }
520
+
521
+ function renderPromptMockSection() {
522
+ const prompts = getSystemPromptsFromTrace();
523
+ if (prompts.length === 0) {
524
+ return '<div style="color:#999;font-size:13px;padding:6px 0;">No system prompts detected in trace. Only AI calls with a system prompt can be mocked here.</div>';
525
+ }
526
+
527
+ let html = '<div style="max-height:360px;overflow-y:auto;border:1px solid #e0e0e0;border-radius:6px;">';
528
+ html += '<table style="width:100%;border-collapse:collapse;font-size:13px;">';
529
+ html += '<thead><tr style="background:#f5f5f5;">';
530
+ html += '<th style="padding:6px 10px;text-align:left;border-bottom:1px solid #e0e0e0;width:24px;"></th>';
531
+ html += '<th style="padding:6px 10px;text-align:left;border-bottom:1px solid #e0e0e0;">Model</th>';
532
+ html += '<th style="padding:6px 10px;text-align:left;border-bottom:1px solid #e0e0e0;">Uses</th>';
533
+ html += '<th style="padding:6px 10px;text-align:left;border-bottom:1px solid #e0e0e0;">System Prompt (override applies to all calls using this prompt)</th>';
534
+ html += '</tr></thead><tbody>';
535
+
536
+ prompts.forEach(function(row) {
537
+ const key = row.systemPrompt;
538
+ const isEnabled = window._promptMockConfig[key] !== undefined;
539
+ const currentVal = isEnabled ? window._promptMockConfig[key] : row.systemPrompt;
540
+ const preview = key.length > 80 ? key.slice(0, 80) + '…' : key;
541
+
542
+ html += '<tr class="prompt-mock-row" data-row-index="' + row.rowIndex + '" style="border-bottom:1px solid #f0f0f0;vertical-align:top;">';
543
+
544
+ // Checkbox column
545
+ html += '<td style="padding:8px 10px;white-space:nowrap;">';
546
+ html += '<input type="checkbox" class="prompt-mock-enable" title="Override this system prompt"' + (isEnabled ? ' checked' : '') + ' onchange="window.onPromptMockToggle(' + row.rowIndex + ', this.checked)">';
547
+ html += '</td>';
548
+
549
+ // Model column
550
+ html += '<td style="padding:8px 10px;font-family:Monaco,monospace;font-size:12px;white-space:nowrap;">' + esc(row.modelName) + '</td>';
551
+
552
+ // Uses count column
553
+ html += '<td style="padding:8px 10px;color:#555;white-space:nowrap;">' + row.count + 'x</td>';
554
+
555
+ // System prompt column
556
+ html += '<td style="padding:8px 10px;width:100%;">';
557
+ if (!isEnabled) {
558
+ html += '<div style="font-size:11px;color:#888;font-style:italic;font-family:Monaco,monospace;max-width:380px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;" title="' + esc(key) + '">' + esc(preview) + '</div>';
559
+ } else {
560
+ html += '<textarea class="prompt-mock-input" data-row-index="' + row.rowIndex + '" style="width:100%;box-sizing:border-box;font-size:11px;font-family:Monaco,monospace;padding:4px;border:1px solid #ddd;border-radius:4px;min-height:72px;resize:vertical;" oninput="window.onPromptMockInput(' + row.rowIndex + ', this.value)">' + esc(currentVal) + '</textarea>';
561
+ }
562
+ html += '</td>';
563
+
564
+ html += '</tr>';
565
+ });
566
+
567
+ html += '</tbody></table></div>';
568
+ return html;
569
+ }
570
+
571
+ function buildPromptMockConfigFromUI() {
572
+ // Return _promptMockConfig as-is (string key → string value), filtering out blank values
573
+ const config = {};
574
+ Object.keys(window._promptMockConfig).forEach(function(key) {
575
+ const val = window._promptMockConfig[key];
576
+ if (typeof val === 'string' && val.trim()) config[key] = val;
577
+ });
578
+ return config;
579
+ }
580
+
581
+ window.onPromptMockToggle = function(rowIndex, enabled) {
582
+ const prompts = getSystemPromptsFromTrace();
583
+ const row = prompts[rowIndex];
584
+ if (!row) return;
585
+ const key = row.systemPrompt;
586
+ if (!enabled) {
587
+ delete window._promptMockConfig[key];
588
+ } else {
589
+ // Pre-fill with the original system prompt so user can edit from there
590
+ if (window._promptMockConfig[key] === undefined) {
591
+ window._promptMockConfig[key] = row.systemPrompt;
592
+ }
593
+ }
594
+ const container = document.getElementById('promptMockContainer');
595
+ if (container) container.innerHTML = renderPromptMockSection();
596
+ };
597
+
598
+ window.onPromptMockInput = function(rowIndex, value) {
599
+ const prompts = getSystemPromptsFromTrace();
600
+ const row = prompts[rowIndex];
601
+ if (!row) return;
602
+ window._promptMockConfig[row.systemPrompt] = value;
603
+ };
604
+
471
605
  window.onToolMockModeChange = function(toolName, mode) {
472
606
  if (!window._toolMockConfig[toolName]) window._toolMockConfig[toolName] = { mode: 'live' };
473
607
  // Save current mock data before switching
@@ -498,7 +632,8 @@
498
632
 
499
633
  window.openLiveValidationDialog = function() {
500
634
  if (window.liveValidationDialog) return;
501
- window._toolMockConfig = {}; // Reset mock config each time dialog opens
635
+ window._toolMockConfig = {}; // Reset mock configs each time dialog opens
636
+ window._promptMockConfig = {};
502
637
 
503
638
  const hasTraceTools = currentObservations.some(function(o) { return o.type === 'TOOL'; });
504
639
  const hasRegisteredTools = codeIndex.tools && codeIndex.tools.length > 0;
@@ -524,8 +659,15 @@
524
659
  Show all registered tools
525
660
  </label>
526
661
  </div>
527
- <div id="toolMockContainer"></div>
662
+ <div id="toolMockContainer" style="max-height:160px;overflow-y:auto;"></div>
528
663
  </div>` : ''}
664
+ <div style="border-top:1px solid #eee;padding-top:16px;margin-bottom:16px;">
665
+ <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:6px;">
666
+ <div style="font-size:15px;font-weight:600;">Prompt Mocking</div>
667
+ <span style="font-size:12px;color:#888;">Check a row to replace the system prompt for all calls using it</span>
668
+ </div>
669
+ <div id="promptMockContainer" style="max-height:160px;overflow-y:auto;"></div>
670
+ </div>
529
671
  <div style="display:flex;gap:12px;justify-content:space-between;align-items:center;">
530
672
  <span id="liveValidationProgress" style="font-size:14px;color:#555;"></span>
531
673
  <div style="display:flex;gap:12px;">
@@ -536,11 +678,15 @@
536
678
  </div>
537
679
  `;
538
680
  document.body.appendChild(window.liveValidationDialog);
539
- // Render the tool mock section after DOM insertion
681
+ // Render mock sections after DOM insertion
540
682
  const toolMockContainer = document.getElementById('toolMockContainer');
541
683
  if (toolMockContainer) {
542
684
  toolMockContainer.innerHTML = renderToolMockSection(false);
543
685
  }
686
+ const promptMockContainer = document.getElementById('promptMockContainer');
687
+ if (promptMockContainer) {
688
+ promptMockContainer.innerHTML = renderPromptMockSection();
689
+ }
544
690
  document.getElementById('cancelLiveValidation').onclick = function() {
545
691
  window.liveValidationDialog.remove();
546
692
  window.liveValidationDialog = null;
@@ -549,9 +695,11 @@
549
695
  const count = parseInt(document.getElementById('liveValidationCount').value, 10);
550
696
  const sequential = document.getElementById('liveValidationSequential').checked;
551
697
  if (count >= 1) {
552
- // Build the tool mock config from UI state and persist for "Run from here"
698
+ // Build mock configs from UI state and persist for "Run from here"
553
699
  const toolMockConfig = buildToolMockConfigFromUI();
554
700
  window._toolMockConfig = toolMockConfig;
701
+ const promptMockConfig = buildPromptMockConfigFromUI();
702
+ window._promptMockConfig = promptMockConfig;
555
703
  const submitBtn = document.getElementById('submitLiveValidation');
556
704
  submitBtn.disabled = true;
557
705
  submitBtn.textContent = 'Validating...';
@@ -588,7 +736,7 @@
588
736
  const collectedTraces = [];
589
737
  let fatalError = null;
590
738
  for (let i = 0; i < count; i++) {
591
- const singlePayload = { workflowName: selectedWorkflow?.name, runCount: 1, sequential: false, observations: currentObservations, toolMockConfig };
739
+ const singlePayload = { workflowName: selectedWorkflow?.name, runCount: 1, sequential: false, observations: currentObservations, toolMockConfig, promptMockConfig };
592
740
  try {
593
741
  const response = await fetch('/api/validate-workflow', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(singlePayload) });
594
742
  const data = await response.json();
@@ -607,7 +755,7 @@
607
755
  } else {
608
756
  // Parallel mode: single bulk request
609
757
  if (progressEl) progressEl.textContent = `Running ${count} workflow run${count !== 1 ? 's' : ''} in parallel…`;
610
- const payload = { workflowName: selectedWorkflow?.name, runCount: count, sequential: false, observations: currentObservations, toolMockConfig };
758
+ const payload = { workflowName: selectedWorkflow?.name, runCount: count, sequential: false, observations: currentObservations, toolMockConfig, promptMockConfig };
611
759
  try {
612
760
  const response = await fetch('/api/validate-workflow', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payload) });
613
761
  const data = await response.json();
@@ -1142,7 +1290,7 @@
1142
1290
  traceLayoutEl.classList.remove("step-4");
1143
1291
  let headerHtml = '';
1144
1292
  if (currentStep === 3) {
1145
- headerHtml = '<tr><th style="width: 40px;">Check</th><th>Name</th><th>Type</th></tr>';
1293
+ headerHtml = '<tr><th style="width: 40px;">Check</th><th>Name</th><th>Type</th><th>Duration</th></tr>';
1146
1294
  }
1147
1295
  traceLayoutEl.innerHTML = `
1148
1296
  <div class="trace-left">
@@ -1311,6 +1459,7 @@
1311
1459
  snapshotId: liveTrace.snapshotId,
1312
1460
  observations: currentObservations,
1313
1461
  toolMockConfig: window._toolMockConfig || {},
1462
+ promptMockConfig: window._promptMockConfig || {},
1314
1463
  };
1315
1464
  const response = await fetch('/api/run-from-breakpoint', {
1316
1465
  method: 'POST',
@@ -1342,7 +1491,7 @@
1342
1491
  step5RerunInFlight = true;
1343
1492
  renderObservationTable();
1344
1493
  try {
1345
- const payload = { workflowName: selectedWorkflow?.name, runCount: 1, sequential: false, observations: currentObservations };
1494
+ const payload = { workflowName: selectedWorkflow?.name, runCount: 1, sequential: false, observations: currentObservations, toolMockConfig: window._toolMockConfig || {}, promptMockConfig: window._promptMockConfig || {} };
1346
1495
  const response = await fetch('/api/validate-workflow', {
1347
1496
  method: 'POST',
1348
1497
  headers: { 'Content-Type': 'application/json' },
@@ -1393,6 +1542,7 @@
1393
1542
  },
1394
1543
  snapshotId: liveTrace.snapshotId,
1395
1544
  toolMockConfig: window._toolMockConfig || {},
1545
+ promptMockConfig: window._promptMockConfig || {},
1396
1546
  };
1397
1547
  const response = await fetch('/api/resume-agent-from-task', {
1398
1548
  method: 'POST',
package/src/index.ts CHANGED
@@ -9,7 +9,8 @@ export { installAIInterceptor, uninstallAIInterceptor } from './interceptors/ai-
9
9
  export { startLLMProxy, fetchCapturedTrace } from './proxy/llm-capture.js'
10
10
  export { recordToolCall } from './tracing.js'
11
11
  export { safeRecordToolCall, isWorker } from './internals/conditional-recorder.js'
12
- export { resolveMock } from './internals/mock-resolver.js'
12
+ export { resolveMock, resolveAIMock, resolvePromptMock } from './internals/mock-resolver.js'
13
+ export type { AIMockEntry, AIMockConfig } from './internals/mock-resolver.js'
13
14
  export type { TestResult, FileResult, RunnerOptions } from './runner.js'
14
15
  export type { AITestContext, TraceHandle, LLMStep, ToolCall, CustomStep, TraceStep, RunnerHooks } from './trace-adapter/context.js'
15
16
  // Workflow capture & replay
@@ -19,7 +20,7 @@ export type { CaptureContext } from './capture/recorder.js'
19
20
  export { ReplayController } from './capture/replay.js'
20
21
  export { wrapTool } from './interceptors/tool.js'
21
22
  export { wrapAI } from './interceptors/workflow-ai.js'
22
- export { setHttpRunContext, initHttpRunContext, getHttpRunContext } from './interceptors/telemetry-push.js'
23
+ export { setHttpRunContext, initHttpRunContext, getHttpRunContext, getHttpFrozenEvent, getHttpPromptMock, pushTelemetryEvent, tryAutoInitHttpContext, runInHttpContext, runWithInitializedHttpContext } from './interceptors/telemetry-push.js'
23
24
  export { wrapDB, wrapPgClient, wrapKnex, wrapMongoCollection, wrapRedisClient } from './interceptors/db.js'
24
25
  export { installDBAutoInterceptor, uninstallDBAutoInterceptor } from './interceptors/db-auto.js'
25
26
  export { interceptFetch, restoreFetch, readVercelAIStream } from './interceptors/http.js'
@@ -1,11 +1,15 @@
1
1
  import { AsyncLocalStorage } from 'node:async_hooks'
2
+ import { randomUUID } from 'node:crypto'
2
3
  import type { WorkflowEvent } from '../capture/event.js'
4
+ import { extractSystemPrompt, replaceSystemPrompt } from '../internals/mock-resolver.js'
3
5
 
4
6
  interface HttpRunContext {
5
7
  runId: string
6
8
  dashboardUrl: string
7
9
  nextId: () => number
8
10
  frozenEvents: Map<number, WorkflowEvent>
11
+ /** System-prompt-keyed overrides: original system prompt → replacement system prompt */
12
+ promptMocks: Map<string, string>
9
13
  }
10
14
 
11
15
  const g = globalThis as Record<string, unknown>
@@ -15,16 +19,22 @@ const httpRunAls: AsyncLocalStorage<HttpRunContext | undefined> =
15
19
  new AsyncLocalStorage<HttpRunContext | undefined>()
16
20
  if (!g[HTTP_RUN_ALS_KEY]) g[HTTP_RUN_ALS_KEY] = httpRunAls
17
21
 
18
- function buildContext(runId: string, dashboardUrl: string, frozenEvents: WorkflowEvent[]): HttpRunContext {
22
+ function buildContext(
23
+ runId: string,
24
+ dashboardUrl: string,
25
+ frozenEvents: WorkflowEvent[],
26
+ promptMocksRecord: Record<string, string> = {},
27
+ ): HttpRunContext {
19
28
  let counter = 0
20
29
  const frozenMap = new Map<number, WorkflowEvent>()
21
30
  for (const e of frozenEvents) frozenMap.set(e.id, e)
22
- return { runId, dashboardUrl, nextId: () => ++counter, frozenEvents: frozenMap }
31
+ const promptMocksMap = new Map<string, string>(Object.entries(promptMocksRecord))
32
+ return { runId, dashboardUrl, nextId: () => ++counter, frozenEvents: frozenMap, promptMocks: promptMocksMap }
23
33
  }
24
34
 
25
35
  /** Synchronous setup — use when there are no frozen events (live run with no replay). */
26
36
  export function setHttpRunContext(runId: string, dashboardUrl: string): void {
27
- httpRunAls.enterWith(buildContext(runId, dashboardUrl, []))
37
+ httpRunAls.enterWith(buildContext(runId, dashboardUrl, [], {}))
28
38
  }
29
39
 
30
40
  /**
@@ -34,16 +44,19 @@ export function setHttpRunContext(runId: string, dashboardUrl: string): void {
34
44
  */
35
45
  export async function initHttpRunContext(runId: string, dashboardUrl: string): Promise<void> {
36
46
  let frozenEvents: WorkflowEvent[] = []
47
+ let promptMocks: Record<string, string> = {}
37
48
  try {
38
49
  const res = await fetch(`${dashboardUrl}/api/run-configs/${runId}`)
39
50
  if (res.ok) {
40
- const data = await res.json() as { frozenEvents?: WorkflowEvent[] }
51
+ const data = await res.json() as { frozenEvents?: WorkflowEvent[]; promptMocks?: Record<string, string> }
41
52
  frozenEvents = Array.isArray(data.frozenEvents) ? data.frozenEvents : []
53
+ promptMocks = (data.promptMocks && typeof data.promptMocks === 'object' && !Array.isArray(data.promptMocks))
54
+ ? data.promptMocks : {}
42
55
  }
43
56
  } catch {
44
57
  // Dashboard unreachable or run config not registered — proceed with live execution
45
58
  }
46
- httpRunAls.enterWith(buildContext(runId, dashboardUrl, frozenEvents))
59
+ httpRunAls.enterWith(buildContext(runId, dashboardUrl, frozenEvents, promptMocks))
47
60
  }
48
61
 
49
62
  export function getHttpRunContext(): HttpRunContext | undefined {
@@ -55,13 +68,151 @@ export function getHttpFrozenEvent(id: number): WorkflowEvent | undefined {
55
68
  return httpRunAls.getStore()?.frozenEvents.get(id)
56
69
  }
57
70
 
71
+ /**
72
+ * If a prompt mock is configured for the system prompt found in `input`, returns
73
+ * a copy of `input` with the system prompt replaced. Otherwise returns `undefined`.
74
+ */
75
+ export function getHttpPromptMock(input: unknown): unknown | undefined {
76
+ const ctx = httpRunAls.getStore()
77
+ if (!ctx || ctx.promptMocks.size === 0) {
78
+ console.log(`[elasticdash] getHttpPromptMock: skip — promptMocks.size=${ctx?.promptMocks.size ?? 'no ctx'}`)
79
+ return undefined
80
+ }
81
+ const systemPrompt = extractSystemPrompt(input)
82
+ if (systemPrompt === undefined) {
83
+ const inputKeys = (input && typeof input === 'object') ? Object.keys(input as object).join(',') : typeof input
84
+ console.log(`[elasticdash] getHttpPromptMock: no system prompt found in input (keys: ${inputKeys})`)
85
+ return undefined
86
+ }
87
+ const newSystemPrompt = ctx.promptMocks.get(systemPrompt)
88
+ console.log(`[elasticdash] getHttpPromptMock: extracted system prompt (len=${systemPrompt.length}, first50=${JSON.stringify(systemPrompt.slice(0,50))}) — mock found=${newSystemPrompt !== undefined}`)
89
+ if (newSystemPrompt !== undefined) {
90
+ console.log(`[elasticdash] getHttpPromptMock: available mock keys=${JSON.stringify([...ctx.promptMocks.keys()].map(k => k.slice(0,50)))}`)
91
+ }
92
+ if (newSystemPrompt === undefined) {
93
+ console.log(`[elasticdash] getHttpPromptMock: no mock for this prompt. Available mock keys (first 50 chars each): ${JSON.stringify([...ctx.promptMocks.keys()].map(k => k.slice(0,50)))}`)
94
+ return undefined
95
+ }
96
+ return replaceSystemPrompt(input, newSystemPrompt)
97
+ }
98
+
58
99
  export function pushTelemetryEvent(event: WorkflowEvent): void {
59
100
  const ctx = httpRunAls.getStore()
60
- if (!ctx) return
101
+ if (!ctx) {
102
+ console.log(`[elasticdash] pushTelemetryEvent: no HTTP context, dropping event type=${event.type} name=${('name' in event ? event.name : '?')}`)
103
+ return
104
+ }
61
105
  const { runId, dashboardUrl } = ctx
106
+ console.log(`[elasticdash] pushTelemetryEvent: posting event type=${event.type} name=${('name' in event ? event.name : '?')} runId=${runId} to ${dashboardUrl}`)
62
107
  fetch(`${dashboardUrl}/api/trace-events`, {
63
108
  method: 'POST',
64
109
  headers: { 'Content-Type': 'application/json' },
65
110
  body: JSON.stringify({ runId, event }),
66
- }).catch(() => {})
111
+ }).then(r => {
112
+ console.log(`[elasticdash] pushTelemetryEvent: response status=${r.status} for type=${event.type} name=${('name' in event ? event.name : '?')}`)
113
+ }).catch(e => {
114
+ console.log(`[elasticdash] pushTelemetryEvent: fetch failed: ${e instanceof Error ? e.message : String(e)}`)
115
+ })
116
+ }
117
+
118
+ const AUTO_INIT_KEY = '__elasticdash_auto_init_promise__'
119
+
120
+ /**
121
+ * Lazily initialises an HTTP run context from environment variables when none
122
+ * has been set up explicitly. Only activates when `ELASTICDASH_SERVER` is set.
123
+ *
124
+ * - If `ELASTICDASH_RUN_ID` is also set, calls `initHttpRunContext` so that
125
+ * frozen steps are fetched from the dashboard (enables step freezing).
126
+ * - Otherwise calls `setHttpRunContext` with a fresh UUID (live/telemetry mode).
127
+ *
128
+ * The initialisation runs at most once per process — subsequent calls are
129
+ * no-ops once the context is established. Errors (e.g. dashboard unreachable)
130
+ * are swallowed so that live execution always continues unaffected.
131
+ *
132
+ * Typical usage: set `ELASTICDASH_SERVER=http://localhost:4573` and optionally
133
+ * `ELASTICDASH_RUN_ID=<id>` before starting your server or script. Every
134
+ * `wrapTool` / `wrapAI` call will then auto-connect to the dashboard without
135
+ * any explicit `initHttpRunContext` call in your code.
136
+ */
137
+ /**
138
+ * Runs `callback` inside a fresh HTTP run context scoped to `runId` / `dashboardUrl`.
139
+ * Uses `als.run()` which guarantees the store is inherited by all async descendants of
140
+ * `callback`, even when intermediate code (e.g. Langfuse / OTel) spawns its own async
141
+ * contexts via `als.run()`. Prefer this over `setHttpRunContext` when wrapping a long-lived
142
+ * async pipeline such as a streaming route handler.
143
+ */
144
+ export function runInHttpContext<T>(
145
+ runId: string,
146
+ dashboardUrl: string,
147
+ callback: () => Promise<T>,
148
+ ): Promise<T> {
149
+ return httpRunAls.run(buildContext(runId, dashboardUrl, [], {}), callback)
150
+ }
151
+
152
+ /**
153
+ * Fetches frozen events and prompt mocks from the dashboard, then runs `callback`
154
+ * inside an HTTP run context using `als.run()`. This is the preferred function for
155
+ * streaming route handlers that sit behind Langfuse / OTel instrumentation:
156
+ *
157
+ * - `als.run()` guarantees the elasticdash store is inherited through any nested
158
+ * `als.run()` calls made by third-party libraries (e.g. `startActiveObservation`).
159
+ * - Frozen events and prompt mocks are fetched before the callback so step replay
160
+ * and prompt mocking work correctly on reruns.
161
+ *
162
+ * Falls back to an empty context (live execution, no replay) if the dashboard is
163
+ * unreachable or the run config is not found.
164
+ */
165
+ export async function runWithInitializedHttpContext<T>(
166
+ runId: string,
167
+ dashboardUrl: string,
168
+ callback: () => Promise<T>,
169
+ ): Promise<T> {
170
+ let frozenEvents: WorkflowEvent[] = []
171
+ let promptMocks: Record<string, string> = {}
172
+ try {
173
+ const res = await fetch(`${dashboardUrl}/api/run-configs/${runId}`)
174
+ if (res.ok) {
175
+ const data = await res.json() as { frozenEvents?: WorkflowEvent[]; promptMocks?: Record<string, string> }
176
+ frozenEvents = Array.isArray(data.frozenEvents) ? data.frozenEvents : []
177
+ promptMocks = (data.promptMocks && typeof data.promptMocks === 'object' && !Array.isArray(data.promptMocks))
178
+ ? data.promptMocks : {}
179
+ const mockKeys = Object.keys(promptMocks)
180
+ console.log(`[elasticdash] runWithInitializedHttpContext: fetched ${mockKeys.length} prompt mocks, ${frozenEvents.length} frozen events`)
181
+ if (mockKeys.length > 0) {
182
+ console.log(`[elasticdash] runWithInitializedHttpContext: mock keys (first 80 chars each): ${JSON.stringify(mockKeys.map(k => k.slice(0,80)))}`)
183
+ }
184
+ } else {
185
+ console.log(`[elasticdash] runWithInitializedHttpContext: run-configs fetch returned ${res.status}`)
186
+ }
187
+ } catch {
188
+ // Dashboard unreachable or run config not registered — proceed with live execution
189
+ }
190
+ return httpRunAls.run(buildContext(runId, dashboardUrl, frozenEvents, promptMocks), callback)
191
+ }
192
+
193
+ export async function tryAutoInitHttpContext(): Promise<void> {
194
+ // Fast path: already initialised in this async context
195
+ if (getHttpRunContext()) return
196
+
197
+ const serverUrl = (typeof process !== 'undefined' && process.env?.ELASTICDASH_SERVER) ?? ''
198
+ if (!serverUrl) return
199
+
200
+ // Deduplicate concurrent first calls within the same process
201
+ const g = globalThis as Record<string, unknown>
202
+ if (!g[AUTO_INIT_KEY]) {
203
+ g[AUTO_INIT_KEY] = (async () => {
204
+ try {
205
+ const runId = (typeof process !== 'undefined' && process.env?.ELASTICDASH_RUN_ID) ?? ''
206
+ if (runId) {
207
+ await initHttpRunContext(runId, serverUrl)
208
+ } else {
209
+ setHttpRunContext(randomUUID(), serverUrl)
210
+ }
211
+ } catch {
212
+ // Dashboard unreachable — fall through to live execution
213
+ }
214
+ })()
215
+ }
216
+
217
+ await (g[AUTO_INIT_KEY] as Promise<void>)
67
218
  }
@@ -1,7 +1,7 @@
1
1
  import { getCaptureContext } from '../capture/recorder.js'
2
2
  import { getCurrentTrace } from '../trace-adapter/context.js'
3
3
  import { rawDateNow } from './side-effects.js'
4
- import { getHttpRunContext, getHttpFrozenEvent, pushTelemetryEvent } from './telemetry-push.js'
4
+ import { getHttpRunContext, getHttpFrozenEvent, pushTelemetryEvent, tryAutoInitHttpContext } from './telemetry-push.js'
5
5
 
6
6
  const TOOL_WRAPPER_ACTIVE_KEY = '__elasticdash_tool_wrapper_active__'
7
7
 
@@ -85,6 +85,7 @@ export function wrapTool<Args extends unknown[], R>(
85
85
  fn: (...args: Args) => Promise<R>,
86
86
  ): (...args: Args) => Promise<R> {
87
87
  return async (...args: Args): Promise<R> => {
88
+ await tryAutoInitHttpContext()
88
89
  const ctx = getCaptureContext()
89
90
  const httpCtx = getHttpRunContext()
90
91
  console.log(`[elasticdash] Tool called: ${name}`, { args })
@@ -1,6 +1,7 @@
1
1
  import { getCaptureContext } from '../capture/recorder.js'
2
2
  import { rawDateNow } from './side-effects.js'
3
- import { getHttpRunContext, getHttpFrozenEvent, pushTelemetryEvent } from './telemetry-push.js'
3
+ import { getHttpRunContext, getHttpFrozenEvent, getHttpPromptMock, pushTelemetryEvent, tryAutoInitHttpContext } from './telemetry-push.js'
4
+ import { resolveAIMock, resolvePromptMock } from '../internals/mock-resolver.js'
4
5
  import type { WorkflowEvent } from '../capture/event.js'
5
6
 
6
7
  type UsageInfo = { inputTokens?: number; outputTokens?: number; totalTokens?: number }
@@ -44,13 +45,13 @@ export function wrapAI<Args extends unknown[], R>(
44
45
  callFn: (...args: Args) => Promise<R>,
45
46
  ): (...args: Args) => Promise<R> {
46
47
  return async (...args: Args): Promise<R> => {
48
+ await tryAutoInitHttpContext()
47
49
  const ctx = getCaptureContext()
48
50
  const httpCtx = getHttpRunContext()
49
51
 
50
52
  if (!ctx && !httpCtx) return callFn(...args)
51
53
 
52
54
  const start = rawDateNow()
53
- const input = args.length === 1 ? args[0] : args
54
55
 
55
56
  if (ctx) {
56
57
  const { recorder, replay } = ctx
@@ -60,8 +61,27 @@ export function wrapAI<Args extends unknown[], R>(
60
61
  return replay.getRecordedResult(id) as R
61
62
  }
62
63
 
64
+ // Check AI mock (output mock — skip real call, return recorded result)
65
+ const aiMock = resolveAIMock(modelName)
66
+ if (aiMock.mocked) {
67
+ const input = args.length === 1 ? args[0] : args
68
+ const event: WorkflowEvent = {
69
+ id, type: 'ai', name: modelName, input,
70
+ output: aiMock.result, timestamp: start, durationMs: 0,
71
+ }
72
+ recorder.record(event)
73
+ if (httpCtx) pushTelemetryEvent(event)
74
+ return aiMock.result as R
75
+ }
76
+
77
+ // Check prompt mock (system prompt replacement — call real LLM with modified system prompt)
78
+ const rawInput = args.length === 1 ? args[0] : args
79
+ const modifiedInput = resolvePromptMock(rawInput)
80
+ const effectiveArgs: Args = modifiedInput !== undefined ? [modifiedInput] as unknown as Args : args
81
+ const input = modifiedInput !== undefined ? modifiedInput : rawInput
82
+
63
83
  try {
64
- const output = await callFn(...args)
84
+ const output = await callFn(...effectiveArgs)
65
85
  const durationMs = rawDateNow() - start
66
86
  const usage = extractUsage(output)
67
87
  const event: WorkflowEvent = {
@@ -94,8 +114,14 @@ export function wrapAI<Args extends unknown[], R>(
94
114
  return frozen.output as R
95
115
  }
96
116
 
117
+ // Check prompt mock (system prompt replacement in HTTP mode)
118
+ const rawHttpInput = args.length === 1 ? args[0] : args
119
+ const httpModifiedInput = getHttpPromptMock(rawHttpInput)
120
+ const httpEffectiveArgs: Args = httpModifiedInput !== undefined ? [httpModifiedInput] as unknown as Args : args
121
+ const input = httpModifiedInput !== undefined ? httpModifiedInput : rawHttpInput
122
+
97
123
  try {
98
- const output = await callFn(...args)
124
+ const output = await callFn(...httpEffectiveArgs)
99
125
  const durationMs = rawDateNow() - start
100
126
  const usage = extractUsage(output)
101
127
  const event: WorkflowEvent = {