elasticdash-test 0.1.14 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -5
- package/dist/dashboard-server.d.ts +9 -0
- package/dist/dashboard-server.d.ts.map +1 -1
- package/dist/dashboard-server.js +209 -22
- package/dist/dashboard-server.js.map +1 -1
- package/dist/html/dashboard.html +158 -8
- package/dist/index.cjs +828 -108
- package/dist/index.d.ts +3 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/interceptors/telemetry-push.d.ts +47 -0
- package/dist/interceptors/telemetry-push.d.ts.map +1 -1
- package/dist/interceptors/telemetry-push.js +139 -6
- package/dist/interceptors/telemetry-push.js.map +1 -1
- package/dist/interceptors/tool.d.ts.map +1 -1
- package/dist/interceptors/tool.js +2 -1
- package/dist/interceptors/tool.js.map +1 -1
- package/dist/interceptors/workflow-ai.d.ts.map +1 -1
- package/dist/interceptors/workflow-ai.js +28 -4
- package/dist/interceptors/workflow-ai.js.map +1 -1
- package/dist/internals/mock-resolver.d.ts +42 -5
- package/dist/internals/mock-resolver.d.ts.map +1 -1
- package/dist/internals/mock-resolver.js +124 -5
- package/dist/internals/mock-resolver.js.map +1 -1
- package/dist/workflow-runner-worker.js +8 -2
- package/dist/workflow-runner-worker.js.map +1 -1
- package/package.json +3 -2
- package/src/dashboard-server.ts +86 -17
- package/src/html/dashboard.html +158 -8
- package/src/index.ts +3 -2
- package/src/interceptors/telemetry-push.ts +158 -7
- package/src/interceptors/tool.ts +2 -1
- package/src/interceptors/workflow-ai.ts +30 -4
- package/src/internals/mock-resolver.ts +131 -5
- package/src/workflow-runner-worker.ts +23 -2
package/src/html/dashboard.html
CHANGED
|
@@ -313,6 +313,10 @@
|
|
|
313
313
|
// ---- Tool Mock Config State ----
|
|
314
314
|
window._toolMockConfig = {}; // { toolName: { mode: 'live'|'mock-all'|'mock-specific', callIndices: [], mockData: {} } }
|
|
315
315
|
|
|
316
|
+
// ---- Prompt Mock Config State ----
|
|
317
|
+
// { [originalSystemPrompt]: newSystemPrompt } — only keys where user enabled the override
|
|
318
|
+
window._promptMockConfig = {};
|
|
319
|
+
|
|
316
320
|
function getToolsFromTrace() {
|
|
317
321
|
// Extract unique tool names and their call details from the uploaded trace observations
|
|
318
322
|
const toolCalls = {};
|
|
@@ -468,6 +472,136 @@
|
|
|
468
472
|
return html;
|
|
469
473
|
}
|
|
470
474
|
|
|
475
|
+
// ---- Prompt Mock Helpers ----
|
|
476
|
+
|
|
477
|
+
/** Extract the system prompt string from an LLM call input object or JSON string. */
|
|
478
|
+
function extractSystemPromptFromInput(input) {
|
|
479
|
+
// Input may arrive as a JSON-encoded string (e.g. from Langfuse traces)
|
|
480
|
+
if (typeof input === 'string') {
|
|
481
|
+
try { input = JSON.parse(input); } catch(e) { return null; }
|
|
482
|
+
}
|
|
483
|
+
if (!input || typeof input !== 'object') return null;
|
|
484
|
+
// Anthropic style: { system: "...", messages: [...] }
|
|
485
|
+
if (typeof input.system === 'string') return input.system;
|
|
486
|
+
// Custom wrapAI callers: { systemPrompt: "...", messages: [...] }
|
|
487
|
+
if (typeof input.systemPrompt === 'string' && input.systemPrompt.length > 0) return input.systemPrompt;
|
|
488
|
+
// OpenAI / plain array: messages with role === "system"
|
|
489
|
+
var msgs = Array.isArray(input.messages) ? input.messages : (Array.isArray(input) ? input : null);
|
|
490
|
+
if (msgs) {
|
|
491
|
+
for (var i = 0; i < msgs.length; i++) {
|
|
492
|
+
var m = msgs[i];
|
|
493
|
+
if (m && typeof m === 'object' && m.role === 'system' && typeof m.content === 'string') {
|
|
494
|
+
return m.content;
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
return null;
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
/**
|
|
502
|
+
* Returns an array of unique system prompts observed across all GENERATION observations.
|
|
503
|
+
* Each entry: { systemPrompt, modelName, count, rowIndex }
|
|
504
|
+
*/
|
|
505
|
+
function getSystemPromptsFromTrace() {
|
|
506
|
+
var seen = []; // [{ systemPrompt, modelName, count }]
|
|
507
|
+
var seenMap = {}; // systemPrompt -> index in seen
|
|
508
|
+
currentObservations.forEach(function(obs) {
|
|
509
|
+
if (obs.type !== 'GENERATION') return;
|
|
510
|
+
var sp = extractSystemPromptFromInput(obs.input);
|
|
511
|
+
if (!sp) return;
|
|
512
|
+
if (seenMap[sp] === undefined) {
|
|
513
|
+
seenMap[sp] = seen.length;
|
|
514
|
+
seen.push({ systemPrompt: sp, modelName: obs.model || obs.name || '(unknown)', count: 0 });
|
|
515
|
+
}
|
|
516
|
+
seen[seenMap[sp]].count++;
|
|
517
|
+
});
|
|
518
|
+
return seen.map(function(e, i) { return Object.assign({}, e, { rowIndex: i }); });
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
function renderPromptMockSection() {
|
|
522
|
+
const prompts = getSystemPromptsFromTrace();
|
|
523
|
+
if (prompts.length === 0) {
|
|
524
|
+
return '<div style="color:#999;font-size:13px;padding:6px 0;">No system prompts detected in trace. Only AI calls with a system prompt can be mocked here.</div>';
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
let html = '<div style="max-height:360px;overflow-y:auto;border:1px solid #e0e0e0;border-radius:6px;">';
|
|
528
|
+
html += '<table style="width:100%;border-collapse:collapse;font-size:13px;">';
|
|
529
|
+
html += '<thead><tr style="background:#f5f5f5;">';
|
|
530
|
+
html += '<th style="padding:6px 10px;text-align:left;border-bottom:1px solid #e0e0e0;width:24px;"></th>';
|
|
531
|
+
html += '<th style="padding:6px 10px;text-align:left;border-bottom:1px solid #e0e0e0;">Model</th>';
|
|
532
|
+
html += '<th style="padding:6px 10px;text-align:left;border-bottom:1px solid #e0e0e0;">Uses</th>';
|
|
533
|
+
html += '<th style="padding:6px 10px;text-align:left;border-bottom:1px solid #e0e0e0;">System Prompt (override applies to all calls using this prompt)</th>';
|
|
534
|
+
html += '</tr></thead><tbody>';
|
|
535
|
+
|
|
536
|
+
prompts.forEach(function(row) {
|
|
537
|
+
const key = row.systemPrompt;
|
|
538
|
+
const isEnabled = window._promptMockConfig[key] !== undefined;
|
|
539
|
+
const currentVal = isEnabled ? window._promptMockConfig[key] : row.systemPrompt;
|
|
540
|
+
const preview = key.length > 80 ? key.slice(0, 80) + '…' : key;
|
|
541
|
+
|
|
542
|
+
html += '<tr class="prompt-mock-row" data-row-index="' + row.rowIndex + '" style="border-bottom:1px solid #f0f0f0;vertical-align:top;">';
|
|
543
|
+
|
|
544
|
+
// Checkbox column
|
|
545
|
+
html += '<td style="padding:8px 10px;white-space:nowrap;">';
|
|
546
|
+
html += '<input type="checkbox" class="prompt-mock-enable" title="Override this system prompt"' + (isEnabled ? ' checked' : '') + ' onchange="window.onPromptMockToggle(' + row.rowIndex + ', this.checked)">';
|
|
547
|
+
html += '</td>';
|
|
548
|
+
|
|
549
|
+
// Model column
|
|
550
|
+
html += '<td style="padding:8px 10px;font-family:Monaco,monospace;font-size:12px;white-space:nowrap;">' + esc(row.modelName) + '</td>';
|
|
551
|
+
|
|
552
|
+
// Uses count column
|
|
553
|
+
html += '<td style="padding:8px 10px;color:#555;white-space:nowrap;">' + row.count + 'x</td>';
|
|
554
|
+
|
|
555
|
+
// System prompt column
|
|
556
|
+
html += '<td style="padding:8px 10px;width:100%;">';
|
|
557
|
+
if (!isEnabled) {
|
|
558
|
+
html += '<div style="font-size:11px;color:#888;font-style:italic;font-family:Monaco,monospace;max-width:380px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;" title="' + esc(key) + '">' + esc(preview) + '</div>';
|
|
559
|
+
} else {
|
|
560
|
+
html += '<textarea class="prompt-mock-input" data-row-index="' + row.rowIndex + '" style="width:100%;box-sizing:border-box;font-size:11px;font-family:Monaco,monospace;padding:4px;border:1px solid #ddd;border-radius:4px;min-height:72px;resize:vertical;" oninput="window.onPromptMockInput(' + row.rowIndex + ', this.value)">' + esc(currentVal) + '</textarea>';
|
|
561
|
+
}
|
|
562
|
+
html += '</td>';
|
|
563
|
+
|
|
564
|
+
html += '</tr>';
|
|
565
|
+
});
|
|
566
|
+
|
|
567
|
+
html += '</tbody></table></div>';
|
|
568
|
+
return html;
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
function buildPromptMockConfigFromUI() {
|
|
572
|
+
// Return _promptMockConfig as-is (string key → string value), filtering out blank values
|
|
573
|
+
const config = {};
|
|
574
|
+
Object.keys(window._promptMockConfig).forEach(function(key) {
|
|
575
|
+
const val = window._promptMockConfig[key];
|
|
576
|
+
if (typeof val === 'string' && val.trim()) config[key] = val;
|
|
577
|
+
});
|
|
578
|
+
return config;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
window.onPromptMockToggle = function(rowIndex, enabled) {
|
|
582
|
+
const prompts = getSystemPromptsFromTrace();
|
|
583
|
+
const row = prompts[rowIndex];
|
|
584
|
+
if (!row) return;
|
|
585
|
+
const key = row.systemPrompt;
|
|
586
|
+
if (!enabled) {
|
|
587
|
+
delete window._promptMockConfig[key];
|
|
588
|
+
} else {
|
|
589
|
+
// Pre-fill with the original system prompt so user can edit from there
|
|
590
|
+
if (window._promptMockConfig[key] === undefined) {
|
|
591
|
+
window._promptMockConfig[key] = row.systemPrompt;
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
const container = document.getElementById('promptMockContainer');
|
|
595
|
+
if (container) container.innerHTML = renderPromptMockSection();
|
|
596
|
+
};
|
|
597
|
+
|
|
598
|
+
window.onPromptMockInput = function(rowIndex, value) {
|
|
599
|
+
const prompts = getSystemPromptsFromTrace();
|
|
600
|
+
const row = prompts[rowIndex];
|
|
601
|
+
if (!row) return;
|
|
602
|
+
window._promptMockConfig[row.systemPrompt] = value;
|
|
603
|
+
};
|
|
604
|
+
|
|
471
605
|
window.onToolMockModeChange = function(toolName, mode) {
|
|
472
606
|
if (!window._toolMockConfig[toolName]) window._toolMockConfig[toolName] = { mode: 'live' };
|
|
473
607
|
// Save current mock data before switching
|
|
@@ -498,7 +632,8 @@
|
|
|
498
632
|
|
|
499
633
|
window.openLiveValidationDialog = function() {
|
|
500
634
|
if (window.liveValidationDialog) return;
|
|
501
|
-
window._toolMockConfig = {};
|
|
635
|
+
window._toolMockConfig = {}; // Reset mock configs each time dialog opens
|
|
636
|
+
window._promptMockConfig = {};
|
|
502
637
|
|
|
503
638
|
const hasTraceTools = currentObservations.some(function(o) { return o.type === 'TOOL'; });
|
|
504
639
|
const hasRegisteredTools = codeIndex.tools && codeIndex.tools.length > 0;
|
|
@@ -524,8 +659,15 @@
|
|
|
524
659
|
Show all registered tools
|
|
525
660
|
</label>
|
|
526
661
|
</div>
|
|
527
|
-
<div id="toolMockContainer"></div>
|
|
662
|
+
<div id="toolMockContainer" style="max-height:160px;overflow-y:auto;"></div>
|
|
528
663
|
</div>` : ''}
|
|
664
|
+
<div style="border-top:1px solid #eee;padding-top:16px;margin-bottom:16px;">
|
|
665
|
+
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:6px;">
|
|
666
|
+
<div style="font-size:15px;font-weight:600;">Prompt Mocking</div>
|
|
667
|
+
<span style="font-size:12px;color:#888;">Check a row to replace the system prompt for all calls using it</span>
|
|
668
|
+
</div>
|
|
669
|
+
<div id="promptMockContainer" style="max-height:160px;overflow-y:auto;"></div>
|
|
670
|
+
</div>
|
|
529
671
|
<div style="display:flex;gap:12px;justify-content:space-between;align-items:center;">
|
|
530
672
|
<span id="liveValidationProgress" style="font-size:14px;color:#555;"></span>
|
|
531
673
|
<div style="display:flex;gap:12px;">
|
|
@@ -536,11 +678,15 @@
|
|
|
536
678
|
</div>
|
|
537
679
|
`;
|
|
538
680
|
document.body.appendChild(window.liveValidationDialog);
|
|
539
|
-
// Render
|
|
681
|
+
// Render mock sections after DOM insertion
|
|
540
682
|
const toolMockContainer = document.getElementById('toolMockContainer');
|
|
541
683
|
if (toolMockContainer) {
|
|
542
684
|
toolMockContainer.innerHTML = renderToolMockSection(false);
|
|
543
685
|
}
|
|
686
|
+
const promptMockContainer = document.getElementById('promptMockContainer');
|
|
687
|
+
if (promptMockContainer) {
|
|
688
|
+
promptMockContainer.innerHTML = renderPromptMockSection();
|
|
689
|
+
}
|
|
544
690
|
document.getElementById('cancelLiveValidation').onclick = function() {
|
|
545
691
|
window.liveValidationDialog.remove();
|
|
546
692
|
window.liveValidationDialog = null;
|
|
@@ -549,9 +695,11 @@
|
|
|
549
695
|
const count = parseInt(document.getElementById('liveValidationCount').value, 10);
|
|
550
696
|
const sequential = document.getElementById('liveValidationSequential').checked;
|
|
551
697
|
if (count >= 1) {
|
|
552
|
-
// Build
|
|
698
|
+
// Build mock configs from UI state and persist for "Run from here"
|
|
553
699
|
const toolMockConfig = buildToolMockConfigFromUI();
|
|
554
700
|
window._toolMockConfig = toolMockConfig;
|
|
701
|
+
const promptMockConfig = buildPromptMockConfigFromUI();
|
|
702
|
+
window._promptMockConfig = promptMockConfig;
|
|
555
703
|
const submitBtn = document.getElementById('submitLiveValidation');
|
|
556
704
|
submitBtn.disabled = true;
|
|
557
705
|
submitBtn.textContent = 'Validating...';
|
|
@@ -588,7 +736,7 @@
|
|
|
588
736
|
const collectedTraces = [];
|
|
589
737
|
let fatalError = null;
|
|
590
738
|
for (let i = 0; i < count; i++) {
|
|
591
|
-
const singlePayload = { workflowName: selectedWorkflow?.name, runCount: 1, sequential: false, observations: currentObservations, toolMockConfig };
|
|
739
|
+
const singlePayload = { workflowName: selectedWorkflow?.name, runCount: 1, sequential: false, observations: currentObservations, toolMockConfig, promptMockConfig };
|
|
592
740
|
try {
|
|
593
741
|
const response = await fetch('/api/validate-workflow', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(singlePayload) });
|
|
594
742
|
const data = await response.json();
|
|
@@ -607,7 +755,7 @@
|
|
|
607
755
|
} else {
|
|
608
756
|
// Parallel mode: single bulk request
|
|
609
757
|
if (progressEl) progressEl.textContent = `Running ${count} workflow run${count !== 1 ? 's' : ''} in parallel…`;
|
|
610
|
-
const payload = { workflowName: selectedWorkflow?.name, runCount: count, sequential: false, observations: currentObservations, toolMockConfig };
|
|
758
|
+
const payload = { workflowName: selectedWorkflow?.name, runCount: count, sequential: false, observations: currentObservations, toolMockConfig, promptMockConfig };
|
|
611
759
|
try {
|
|
612
760
|
const response = await fetch('/api/validate-workflow', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payload) });
|
|
613
761
|
const data = await response.json();
|
|
@@ -1142,7 +1290,7 @@
|
|
|
1142
1290
|
traceLayoutEl.classList.remove("step-4");
|
|
1143
1291
|
let headerHtml = '';
|
|
1144
1292
|
if (currentStep === 3) {
|
|
1145
|
-
headerHtml = '<tr><th style="width: 40px;">Check</th><th>Name</th><th>Type</th></tr>';
|
|
1293
|
+
headerHtml = '<tr><th style="width: 40px;">Check</th><th>Name</th><th>Type</th><th>Duration</th></tr>';
|
|
1146
1294
|
}
|
|
1147
1295
|
traceLayoutEl.innerHTML = `
|
|
1148
1296
|
<div class="trace-left">
|
|
@@ -1311,6 +1459,7 @@
|
|
|
1311
1459
|
snapshotId: liveTrace.snapshotId,
|
|
1312
1460
|
observations: currentObservations,
|
|
1313
1461
|
toolMockConfig: window._toolMockConfig || {},
|
|
1462
|
+
promptMockConfig: window._promptMockConfig || {},
|
|
1314
1463
|
};
|
|
1315
1464
|
const response = await fetch('/api/run-from-breakpoint', {
|
|
1316
1465
|
method: 'POST',
|
|
@@ -1342,7 +1491,7 @@
|
|
|
1342
1491
|
step5RerunInFlight = true;
|
|
1343
1492
|
renderObservationTable();
|
|
1344
1493
|
try {
|
|
1345
|
-
const payload = { workflowName: selectedWorkflow?.name, runCount: 1, sequential: false, observations: currentObservations };
|
|
1494
|
+
const payload = { workflowName: selectedWorkflow?.name, runCount: 1, sequential: false, observations: currentObservations, toolMockConfig: window._toolMockConfig || {}, promptMockConfig: window._promptMockConfig || {} };
|
|
1346
1495
|
const response = await fetch('/api/validate-workflow', {
|
|
1347
1496
|
method: 'POST',
|
|
1348
1497
|
headers: { 'Content-Type': 'application/json' },
|
|
@@ -1393,6 +1542,7 @@
|
|
|
1393
1542
|
},
|
|
1394
1543
|
snapshotId: liveTrace.snapshotId,
|
|
1395
1544
|
toolMockConfig: window._toolMockConfig || {},
|
|
1545
|
+
promptMockConfig: window._promptMockConfig || {},
|
|
1396
1546
|
};
|
|
1397
1547
|
const response = await fetch('/api/resume-agent-from-task', {
|
|
1398
1548
|
method: 'POST',
|
package/src/index.ts
CHANGED
|
@@ -9,7 +9,8 @@ export { installAIInterceptor, uninstallAIInterceptor } from './interceptors/ai-
|
|
|
9
9
|
export { startLLMProxy, fetchCapturedTrace } from './proxy/llm-capture.js'
|
|
10
10
|
export { recordToolCall } from './tracing.js'
|
|
11
11
|
export { safeRecordToolCall, isWorker } from './internals/conditional-recorder.js'
|
|
12
|
-
export { resolveMock } from './internals/mock-resolver.js'
|
|
12
|
+
export { resolveMock, resolveAIMock, resolvePromptMock } from './internals/mock-resolver.js'
|
|
13
|
+
export type { AIMockEntry, AIMockConfig } from './internals/mock-resolver.js'
|
|
13
14
|
export type { TestResult, FileResult, RunnerOptions } from './runner.js'
|
|
14
15
|
export type { AITestContext, TraceHandle, LLMStep, ToolCall, CustomStep, TraceStep, RunnerHooks } from './trace-adapter/context.js'
|
|
15
16
|
// Workflow capture & replay
|
|
@@ -19,7 +20,7 @@ export type { CaptureContext } from './capture/recorder.js'
|
|
|
19
20
|
export { ReplayController } from './capture/replay.js'
|
|
20
21
|
export { wrapTool } from './interceptors/tool.js'
|
|
21
22
|
export { wrapAI } from './interceptors/workflow-ai.js'
|
|
22
|
-
export { setHttpRunContext, initHttpRunContext, getHttpRunContext } from './interceptors/telemetry-push.js'
|
|
23
|
+
export { setHttpRunContext, initHttpRunContext, getHttpRunContext, getHttpFrozenEvent, getHttpPromptMock, pushTelemetryEvent, tryAutoInitHttpContext, runInHttpContext, runWithInitializedHttpContext } from './interceptors/telemetry-push.js'
|
|
23
24
|
export { wrapDB, wrapPgClient, wrapKnex, wrapMongoCollection, wrapRedisClient } from './interceptors/db.js'
|
|
24
25
|
export { installDBAutoInterceptor, uninstallDBAutoInterceptor } from './interceptors/db-auto.js'
|
|
25
26
|
export { interceptFetch, restoreFetch, readVercelAIStream } from './interceptors/http.js'
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
import { AsyncLocalStorage } from 'node:async_hooks'
|
|
2
|
+
import { randomUUID } from 'node:crypto'
|
|
2
3
|
import type { WorkflowEvent } from '../capture/event.js'
|
|
4
|
+
import { extractSystemPrompt, replaceSystemPrompt } from '../internals/mock-resolver.js'
|
|
3
5
|
|
|
4
6
|
interface HttpRunContext {
|
|
5
7
|
runId: string
|
|
6
8
|
dashboardUrl: string
|
|
7
9
|
nextId: () => number
|
|
8
10
|
frozenEvents: Map<number, WorkflowEvent>
|
|
11
|
+
/** System-prompt-keyed overrides: original system prompt → replacement system prompt */
|
|
12
|
+
promptMocks: Map<string, string>
|
|
9
13
|
}
|
|
10
14
|
|
|
11
15
|
const g = globalThis as Record<string, unknown>
|
|
@@ -15,16 +19,22 @@ const httpRunAls: AsyncLocalStorage<HttpRunContext | undefined> =
|
|
|
15
19
|
new AsyncLocalStorage<HttpRunContext | undefined>()
|
|
16
20
|
if (!g[HTTP_RUN_ALS_KEY]) g[HTTP_RUN_ALS_KEY] = httpRunAls
|
|
17
21
|
|
|
18
|
-
function buildContext(
|
|
22
|
+
function buildContext(
|
|
23
|
+
runId: string,
|
|
24
|
+
dashboardUrl: string,
|
|
25
|
+
frozenEvents: WorkflowEvent[],
|
|
26
|
+
promptMocksRecord: Record<string, string> = {},
|
|
27
|
+
): HttpRunContext {
|
|
19
28
|
let counter = 0
|
|
20
29
|
const frozenMap = new Map<number, WorkflowEvent>()
|
|
21
30
|
for (const e of frozenEvents) frozenMap.set(e.id, e)
|
|
22
|
-
|
|
31
|
+
const promptMocksMap = new Map<string, string>(Object.entries(promptMocksRecord))
|
|
32
|
+
return { runId, dashboardUrl, nextId: () => ++counter, frozenEvents: frozenMap, promptMocks: promptMocksMap }
|
|
23
33
|
}
|
|
24
34
|
|
|
25
35
|
/** Synchronous setup — use when there are no frozen events (live run with no replay). */
|
|
26
36
|
export function setHttpRunContext(runId: string, dashboardUrl: string): void {
|
|
27
|
-
httpRunAls.enterWith(buildContext(runId, dashboardUrl, []))
|
|
37
|
+
httpRunAls.enterWith(buildContext(runId, dashboardUrl, [], {}))
|
|
28
38
|
}
|
|
29
39
|
|
|
30
40
|
/**
|
|
@@ -34,16 +44,19 @@ export function setHttpRunContext(runId: string, dashboardUrl: string): void {
|
|
|
34
44
|
*/
|
|
35
45
|
export async function initHttpRunContext(runId: string, dashboardUrl: string): Promise<void> {
|
|
36
46
|
let frozenEvents: WorkflowEvent[] = []
|
|
47
|
+
let promptMocks: Record<string, string> = {}
|
|
37
48
|
try {
|
|
38
49
|
const res = await fetch(`${dashboardUrl}/api/run-configs/${runId}`)
|
|
39
50
|
if (res.ok) {
|
|
40
|
-
const data = await res.json() as { frozenEvents?: WorkflowEvent[] }
|
|
51
|
+
const data = await res.json() as { frozenEvents?: WorkflowEvent[]; promptMocks?: Record<string, string> }
|
|
41
52
|
frozenEvents = Array.isArray(data.frozenEvents) ? data.frozenEvents : []
|
|
53
|
+
promptMocks = (data.promptMocks && typeof data.promptMocks === 'object' && !Array.isArray(data.promptMocks))
|
|
54
|
+
? data.promptMocks : {}
|
|
42
55
|
}
|
|
43
56
|
} catch {
|
|
44
57
|
// Dashboard unreachable or run config not registered — proceed with live execution
|
|
45
58
|
}
|
|
46
|
-
httpRunAls.enterWith(buildContext(runId, dashboardUrl, frozenEvents))
|
|
59
|
+
httpRunAls.enterWith(buildContext(runId, dashboardUrl, frozenEvents, promptMocks))
|
|
47
60
|
}
|
|
48
61
|
|
|
49
62
|
export function getHttpRunContext(): HttpRunContext | undefined {
|
|
@@ -55,13 +68,151 @@ export function getHttpFrozenEvent(id: number): WorkflowEvent | undefined {
|
|
|
55
68
|
return httpRunAls.getStore()?.frozenEvents.get(id)
|
|
56
69
|
}
|
|
57
70
|
|
|
71
|
+
/**
|
|
72
|
+
* If a prompt mock is configured for the system prompt found in `input`, returns
|
|
73
|
+
* a copy of `input` with the system prompt replaced. Otherwise returns `undefined`.
|
|
74
|
+
*/
|
|
75
|
+
export function getHttpPromptMock(input: unknown): unknown | undefined {
|
|
76
|
+
const ctx = httpRunAls.getStore()
|
|
77
|
+
if (!ctx || ctx.promptMocks.size === 0) {
|
|
78
|
+
console.log(`[elasticdash] getHttpPromptMock: skip — promptMocks.size=${ctx?.promptMocks.size ?? 'no ctx'}`)
|
|
79
|
+
return undefined
|
|
80
|
+
}
|
|
81
|
+
const systemPrompt = extractSystemPrompt(input)
|
|
82
|
+
if (systemPrompt === undefined) {
|
|
83
|
+
const inputKeys = (input && typeof input === 'object') ? Object.keys(input as object).join(',') : typeof input
|
|
84
|
+
console.log(`[elasticdash] getHttpPromptMock: no system prompt found in input (keys: ${inputKeys})`)
|
|
85
|
+
return undefined
|
|
86
|
+
}
|
|
87
|
+
const newSystemPrompt = ctx.promptMocks.get(systemPrompt)
|
|
88
|
+
console.log(`[elasticdash] getHttpPromptMock: extracted system prompt (len=${systemPrompt.length}, first50=${JSON.stringify(systemPrompt.slice(0,50))}) — mock found=${newSystemPrompt !== undefined}`)
|
|
89
|
+
if (newSystemPrompt !== undefined) {
|
|
90
|
+
console.log(`[elasticdash] getHttpPromptMock: available mock keys=${JSON.stringify([...ctx.promptMocks.keys()].map(k => k.slice(0,50)))}`)
|
|
91
|
+
}
|
|
92
|
+
if (newSystemPrompt === undefined) {
|
|
93
|
+
console.log(`[elasticdash] getHttpPromptMock: no mock for this prompt. Available mock keys (first 50 chars each): ${JSON.stringify([...ctx.promptMocks.keys()].map(k => k.slice(0,50)))}`)
|
|
94
|
+
return undefined
|
|
95
|
+
}
|
|
96
|
+
return replaceSystemPrompt(input, newSystemPrompt)
|
|
97
|
+
}
|
|
98
|
+
|
|
58
99
|
export function pushTelemetryEvent(event: WorkflowEvent): void {
|
|
59
100
|
const ctx = httpRunAls.getStore()
|
|
60
|
-
if (!ctx)
|
|
101
|
+
if (!ctx) {
|
|
102
|
+
console.log(`[elasticdash] pushTelemetryEvent: no HTTP context, dropping event type=${event.type} name=${('name' in event ? event.name : '?')}`)
|
|
103
|
+
return
|
|
104
|
+
}
|
|
61
105
|
const { runId, dashboardUrl } = ctx
|
|
106
|
+
console.log(`[elasticdash] pushTelemetryEvent: posting event type=${event.type} name=${('name' in event ? event.name : '?')} runId=${runId} to ${dashboardUrl}`)
|
|
62
107
|
fetch(`${dashboardUrl}/api/trace-events`, {
|
|
63
108
|
method: 'POST',
|
|
64
109
|
headers: { 'Content-Type': 'application/json' },
|
|
65
110
|
body: JSON.stringify({ runId, event }),
|
|
66
|
-
}).
|
|
111
|
+
}).then(r => {
|
|
112
|
+
console.log(`[elasticdash] pushTelemetryEvent: response status=${r.status} for type=${event.type} name=${('name' in event ? event.name : '?')}`)
|
|
113
|
+
}).catch(e => {
|
|
114
|
+
console.log(`[elasticdash] pushTelemetryEvent: fetch failed: ${e instanceof Error ? e.message : String(e)}`)
|
|
115
|
+
})
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const AUTO_INIT_KEY = '__elasticdash_auto_init_promise__'
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Lazily initialises an HTTP run context from environment variables when none
|
|
122
|
+
* has been set up explicitly. Only activates when `ELASTICDASH_SERVER` is set.
|
|
123
|
+
*
|
|
124
|
+
* - If `ELASTICDASH_RUN_ID` is also set, calls `initHttpRunContext` so that
|
|
125
|
+
* frozen steps are fetched from the dashboard (enables step freezing).
|
|
126
|
+
* - Otherwise calls `setHttpRunContext` with a fresh UUID (live/telemetry mode).
|
|
127
|
+
*
|
|
128
|
+
* The initialisation runs at most once per process — subsequent calls are
|
|
129
|
+
* no-ops once the context is established. Errors (e.g. dashboard unreachable)
|
|
130
|
+
* are swallowed so that live execution always continues unaffected.
|
|
131
|
+
*
|
|
132
|
+
* Typical usage: set `ELASTICDASH_SERVER=http://localhost:4573` and optionally
|
|
133
|
+
* `ELASTICDASH_RUN_ID=<id>` before starting your server or script. Every
|
|
134
|
+
* `wrapTool` / `wrapAI` call will then auto-connect to the dashboard without
|
|
135
|
+
* any explicit `initHttpRunContext` call in your code.
|
|
136
|
+
*/
|
|
137
|
+
/**
|
|
138
|
+
* Runs `callback` inside a fresh HTTP run context scoped to `runId` / `dashboardUrl`.
|
|
139
|
+
* Uses `als.run()` which guarantees the store is inherited by all async descendants of
|
|
140
|
+
* `callback`, even when intermediate code (e.g. Langfuse / OTel) spawns its own async
|
|
141
|
+
* contexts via `als.run()`. Prefer this over `setHttpRunContext` when wrapping a long-lived
|
|
142
|
+
* async pipeline such as a streaming route handler.
|
|
143
|
+
*/
|
|
144
|
+
export function runInHttpContext<T>(
|
|
145
|
+
runId: string,
|
|
146
|
+
dashboardUrl: string,
|
|
147
|
+
callback: () => Promise<T>,
|
|
148
|
+
): Promise<T> {
|
|
149
|
+
return httpRunAls.run(buildContext(runId, dashboardUrl, [], {}), callback)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Fetches frozen events and prompt mocks from the dashboard, then runs `callback`
|
|
154
|
+
* inside an HTTP run context using `als.run()`. This is the preferred function for
|
|
155
|
+
* streaming route handlers that sit behind Langfuse / OTel instrumentation:
|
|
156
|
+
*
|
|
157
|
+
* - `als.run()` guarantees the elasticdash store is inherited through any nested
|
|
158
|
+
* `als.run()` calls made by third-party libraries (e.g. `startActiveObservation`).
|
|
159
|
+
* - Frozen events and prompt mocks are fetched before the callback so step replay
|
|
160
|
+
* and prompt mocking work correctly on reruns.
|
|
161
|
+
*
|
|
162
|
+
* Falls back to an empty context (live execution, no replay) if the dashboard is
|
|
163
|
+
* unreachable or the run config is not found.
|
|
164
|
+
*/
|
|
165
|
+
export async function runWithInitializedHttpContext<T>(
|
|
166
|
+
runId: string,
|
|
167
|
+
dashboardUrl: string,
|
|
168
|
+
callback: () => Promise<T>,
|
|
169
|
+
): Promise<T> {
|
|
170
|
+
let frozenEvents: WorkflowEvent[] = []
|
|
171
|
+
let promptMocks: Record<string, string> = {}
|
|
172
|
+
try {
|
|
173
|
+
const res = await fetch(`${dashboardUrl}/api/run-configs/${runId}`)
|
|
174
|
+
if (res.ok) {
|
|
175
|
+
const data = await res.json() as { frozenEvents?: WorkflowEvent[]; promptMocks?: Record<string, string> }
|
|
176
|
+
frozenEvents = Array.isArray(data.frozenEvents) ? data.frozenEvents : []
|
|
177
|
+
promptMocks = (data.promptMocks && typeof data.promptMocks === 'object' && !Array.isArray(data.promptMocks))
|
|
178
|
+
? data.promptMocks : {}
|
|
179
|
+
const mockKeys = Object.keys(promptMocks)
|
|
180
|
+
console.log(`[elasticdash] runWithInitializedHttpContext: fetched ${mockKeys.length} prompt mocks, ${frozenEvents.length} frozen events`)
|
|
181
|
+
if (mockKeys.length > 0) {
|
|
182
|
+
console.log(`[elasticdash] runWithInitializedHttpContext: mock keys (first 80 chars each): ${JSON.stringify(mockKeys.map(k => k.slice(0,80)))}`)
|
|
183
|
+
}
|
|
184
|
+
} else {
|
|
185
|
+
console.log(`[elasticdash] runWithInitializedHttpContext: run-configs fetch returned ${res.status}`)
|
|
186
|
+
}
|
|
187
|
+
} catch {
|
|
188
|
+
// Dashboard unreachable or run config not registered — proceed with live execution
|
|
189
|
+
}
|
|
190
|
+
return httpRunAls.run(buildContext(runId, dashboardUrl, frozenEvents, promptMocks), callback)
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
export async function tryAutoInitHttpContext(): Promise<void> {
|
|
194
|
+
// Fast path: already initialised in this async context
|
|
195
|
+
if (getHttpRunContext()) return
|
|
196
|
+
|
|
197
|
+
const serverUrl = (typeof process !== 'undefined' && process.env?.ELASTICDASH_SERVER) ?? ''
|
|
198
|
+
if (!serverUrl) return
|
|
199
|
+
|
|
200
|
+
// Deduplicate concurrent first calls within the same process
|
|
201
|
+
const g = globalThis as Record<string, unknown>
|
|
202
|
+
if (!g[AUTO_INIT_KEY]) {
|
|
203
|
+
g[AUTO_INIT_KEY] = (async () => {
|
|
204
|
+
try {
|
|
205
|
+
const runId = (typeof process !== 'undefined' && process.env?.ELASTICDASH_RUN_ID) ?? ''
|
|
206
|
+
if (runId) {
|
|
207
|
+
await initHttpRunContext(runId, serverUrl)
|
|
208
|
+
} else {
|
|
209
|
+
setHttpRunContext(randomUUID(), serverUrl)
|
|
210
|
+
}
|
|
211
|
+
} catch {
|
|
212
|
+
// Dashboard unreachable — fall through to live execution
|
|
213
|
+
}
|
|
214
|
+
})()
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
await (g[AUTO_INIT_KEY] as Promise<void>)
|
|
67
218
|
}
|
package/src/interceptors/tool.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { getCaptureContext } from '../capture/recorder.js'
|
|
2
2
|
import { getCurrentTrace } from '../trace-adapter/context.js'
|
|
3
3
|
import { rawDateNow } from './side-effects.js'
|
|
4
|
-
import { getHttpRunContext, getHttpFrozenEvent, pushTelemetryEvent } from './telemetry-push.js'
|
|
4
|
+
import { getHttpRunContext, getHttpFrozenEvent, pushTelemetryEvent, tryAutoInitHttpContext } from './telemetry-push.js'
|
|
5
5
|
|
|
6
6
|
const TOOL_WRAPPER_ACTIVE_KEY = '__elasticdash_tool_wrapper_active__'
|
|
7
7
|
|
|
@@ -85,6 +85,7 @@ export function wrapTool<Args extends unknown[], R>(
|
|
|
85
85
|
fn: (...args: Args) => Promise<R>,
|
|
86
86
|
): (...args: Args) => Promise<R> {
|
|
87
87
|
return async (...args: Args): Promise<R> => {
|
|
88
|
+
await tryAutoInitHttpContext()
|
|
88
89
|
const ctx = getCaptureContext()
|
|
89
90
|
const httpCtx = getHttpRunContext()
|
|
90
91
|
console.log(`[elasticdash] Tool called: ${name}`, { args })
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { getCaptureContext } from '../capture/recorder.js'
|
|
2
2
|
import { rawDateNow } from './side-effects.js'
|
|
3
|
-
import { getHttpRunContext, getHttpFrozenEvent, pushTelemetryEvent } from './telemetry-push.js'
|
|
3
|
+
import { getHttpRunContext, getHttpFrozenEvent, getHttpPromptMock, pushTelemetryEvent, tryAutoInitHttpContext } from './telemetry-push.js'
|
|
4
|
+
import { resolveAIMock, resolvePromptMock } from '../internals/mock-resolver.js'
|
|
4
5
|
import type { WorkflowEvent } from '../capture/event.js'
|
|
5
6
|
|
|
6
7
|
type UsageInfo = { inputTokens?: number; outputTokens?: number; totalTokens?: number }
|
|
@@ -44,13 +45,13 @@ export function wrapAI<Args extends unknown[], R>(
|
|
|
44
45
|
callFn: (...args: Args) => Promise<R>,
|
|
45
46
|
): (...args: Args) => Promise<R> {
|
|
46
47
|
return async (...args: Args): Promise<R> => {
|
|
48
|
+
await tryAutoInitHttpContext()
|
|
47
49
|
const ctx = getCaptureContext()
|
|
48
50
|
const httpCtx = getHttpRunContext()
|
|
49
51
|
|
|
50
52
|
if (!ctx && !httpCtx) return callFn(...args)
|
|
51
53
|
|
|
52
54
|
const start = rawDateNow()
|
|
53
|
-
const input = args.length === 1 ? args[0] : args
|
|
54
55
|
|
|
55
56
|
if (ctx) {
|
|
56
57
|
const { recorder, replay } = ctx
|
|
@@ -60,8 +61,27 @@ export function wrapAI<Args extends unknown[], R>(
|
|
|
60
61
|
return replay.getRecordedResult(id) as R
|
|
61
62
|
}
|
|
62
63
|
|
|
64
|
+
// Check AI mock (output mock — skip real call, return recorded result)
|
|
65
|
+
const aiMock = resolveAIMock(modelName)
|
|
66
|
+
if (aiMock.mocked) {
|
|
67
|
+
const input = args.length === 1 ? args[0] : args
|
|
68
|
+
const event: WorkflowEvent = {
|
|
69
|
+
id, type: 'ai', name: modelName, input,
|
|
70
|
+
output: aiMock.result, timestamp: start, durationMs: 0,
|
|
71
|
+
}
|
|
72
|
+
recorder.record(event)
|
|
73
|
+
if (httpCtx) pushTelemetryEvent(event)
|
|
74
|
+
return aiMock.result as R
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Check prompt mock (system prompt replacement — call real LLM with modified system prompt)
|
|
78
|
+
const rawInput = args.length === 1 ? args[0] : args
|
|
79
|
+
const modifiedInput = resolvePromptMock(rawInput)
|
|
80
|
+
const effectiveArgs: Args = modifiedInput !== undefined ? [modifiedInput] as unknown as Args : args
|
|
81
|
+
const input = modifiedInput !== undefined ? modifiedInput : rawInput
|
|
82
|
+
|
|
63
83
|
try {
|
|
64
|
-
const output = await callFn(...
|
|
84
|
+
const output = await callFn(...effectiveArgs)
|
|
65
85
|
const durationMs = rawDateNow() - start
|
|
66
86
|
const usage = extractUsage(output)
|
|
67
87
|
const event: WorkflowEvent = {
|
|
@@ -94,8 +114,14 @@ export function wrapAI<Args extends unknown[], R>(
|
|
|
94
114
|
return frozen.output as R
|
|
95
115
|
}
|
|
96
116
|
|
|
117
|
+
// Check prompt mock (system prompt replacement in HTTP mode)
|
|
118
|
+
const rawHttpInput = args.length === 1 ? args[0] : args
|
|
119
|
+
const httpModifiedInput = getHttpPromptMock(rawHttpInput)
|
|
120
|
+
const httpEffectiveArgs: Args = httpModifiedInput !== undefined ? [httpModifiedInput] as unknown as Args : args
|
|
121
|
+
const input = httpModifiedInput !== undefined ? httpModifiedInput : rawHttpInput
|
|
122
|
+
|
|
97
123
|
try {
|
|
98
|
-
const output = await callFn(...
|
|
124
|
+
const output = await callFn(...httpEffectiveArgs)
|
|
99
125
|
const durationMs = rawDateNow() - start
|
|
100
126
|
const usage = extractUsage(output)
|
|
101
127
|
const event: WorkflowEvent = {
|