elasticdash-test 0.1.24 → 0.1.25-alpha-2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -0
- package/dist/cli.js +53 -6
- package/dist/cli.js.map +1 -1
- package/docs/agent-integration-guide.md +557 -0
- package/docs/agents.md +140 -0
- package/docs/backend_rerun_alignment.md +291 -0
- package/docs/backend_traceid_update.md +141 -0
- package/docs/dashboard.md +394 -0
- package/docs/deno.md +69 -0
- package/docs/instrumentation.md +424 -0
- package/docs/langfuse-trace-structure.md +145 -0
- package/docs/matchers.md +173 -0
- package/docs/observability_backend_contract.md +577 -0
- package/docs/observability_mode.md +195 -0
- package/docs/observability_rerun_backend_plan.md +596 -0
- package/docs/quickstart.md +621 -0
- package/docs/security-compliance.md +566 -0
- package/docs/test-writing-guidelines.md +444 -0
- package/docs/tools.md +165 -0
- package/docs/workflow-modes.md +253 -0
- package/package.json +2 -1
- package/src/cli.ts +60 -7
|
@@ -0,0 +1,557 @@
|
|
|
1
|
+
# ElasticDash SDK — Agent Integration Guide
|
|
2
|
+
|
|
3
|
+
> **Audience**: Coding agents (Claude Code, Cursor, Copilot, Codex, Windsurf, etc.) integrating `elasticdash-test` into a user's project.
|
|
4
|
+
>
|
|
5
|
+
> **When to use**: Follow this guide step-by-step when a user asks you to "integrate ElasticDash", "set up ElasticDash testing", "implement elasticdash-test", or similar. Each step produces a specific file — work through them in order.
|
|
6
|
+
>
|
|
7
|
+
> **How to use**: Read the full guide first to understand the project's structure, then execute each step. Use the decision trees section to choose the right pattern for the user's project (framework type, module system, tool architecture).
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Prerequisites
|
|
12
|
+
|
|
13
|
+
- Node.js >= 20
|
|
14
|
+
- npm, yarn, or pnpm
|
|
15
|
+
- At least one AI workflow function (a callable async function that makes LLM/tool calls)
|
|
16
|
+
- LLM provider API keys for providers used in workflows (e.g., `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`)
|
|
17
|
+
|
|
18
|
+
### Detect project type before starting
|
|
19
|
+
|
|
20
|
+
Check these before choosing patterns:
|
|
21
|
+
|
|
22
|
+
| Check | How | Result |
|
|
23
|
+
|-------|-----|--------|
|
|
24
|
+
| Module system | `package.json` → `"type"` field | `"module"` = ESM, `"commonjs"` or missing = CJS |
|
|
25
|
+
| Framework | Look for `next.config.*`, `remix.config.*`, `svelte.config.*` | Present = framework project, use HTTP mode |
|
|
26
|
+
| Path aliases | `tsconfig.json` → `"paths"` field | Present = need advanced dashboard script |
|
|
27
|
+
| Tool architecture | Does the project use a central `dispatchTool(name, args)` function? | Yes = Pattern A, No = Pattern B |
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Step 1: Install
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
npm install elasticdash-test
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Add to `.gitignore`:
|
|
38
|
+
|
|
39
|
+
```gitignore
|
|
40
|
+
.temp/
|
|
41
|
+
.ed_traces/
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Step 2: Create `ed_tools.ts`
|
|
47
|
+
|
|
48
|
+
Create `ed_tools.ts` in the project root. This file wraps each tool function with tracing so ElasticDash can record and replay tool calls.
|
|
49
|
+
|
|
50
|
+
### Choose your pattern
|
|
51
|
+
|
|
52
|
+
- **Pattern B (recommended for most projects)**: inline async with mock support. Use when tools are imported individually, no central dispatcher exists, or you want dashboard mock support.
|
|
53
|
+
- **Pattern A**: `withTrace` HOF with central dispatcher. Use when tools are pure functions called through a single `dispatchTool(name, args)` dispatcher.
|
|
54
|
+
|
|
55
|
+
### Pattern B template (recommended)
|
|
56
|
+
|
|
57
|
+
```ts
|
|
58
|
+
// ed_tools.ts
|
|
59
|
+
// Replace imports with your actual tool functions and source paths
|
|
60
|
+
import { YOUR_TOOL_1 as YOUR_TOOL_1_impl } from './YOUR_SOURCE_PATH_1'
|
|
61
|
+
import { YOUR_TOOL_2 as YOUR_TOOL_2_impl } from './YOUR_SOURCE_PATH_2'
|
|
62
|
+
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// Helpers — copy as-is, no customization needed
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
function resolveMock(toolName: string): { mocked: true; result: unknown } | { mocked: false } {
|
|
68
|
+
const g = globalThis as any
|
|
69
|
+
const mocks = g.__ELASTICDASH_TOOL_MOCKS__
|
|
70
|
+
if (!mocks) return { mocked: false }
|
|
71
|
+
|
|
72
|
+
const entry = mocks[toolName]
|
|
73
|
+
if (!entry || entry.mode === 'live') return { mocked: false }
|
|
74
|
+
|
|
75
|
+
if (!g.__ELASTICDASH_TOOL_CALL_COUNTERS__) g.__ELASTICDASH_TOOL_CALL_COUNTERS__ = {}
|
|
76
|
+
const counters = g.__ELASTICDASH_TOOL_CALL_COUNTERS__
|
|
77
|
+
counters[toolName] = (counters[toolName] ?? 0) + 1
|
|
78
|
+
const callNumber = counters[toolName]
|
|
79
|
+
|
|
80
|
+
if (entry.mode === 'mock-all') {
|
|
81
|
+
const data = entry.mockData ?? {}
|
|
82
|
+
const result = data[callNumber] !== undefined ? data[callNumber] : data[0]
|
|
83
|
+
return { mocked: true, result }
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (entry.mode === 'mock-specific') {
|
|
87
|
+
const indices = entry.callIndices ?? []
|
|
88
|
+
if (indices.includes(callNumber)) {
|
|
89
|
+
return { mocked: true, result: (entry.mockData ?? {})[callNumber] }
|
|
90
|
+
}
|
|
91
|
+
return { mocked: false }
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return { mocked: false }
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async function safeRecordToolCall(tool: string, input: any, result: any) {
|
|
98
|
+
if (!(globalThis as any).__ELASTICDASH_WORKER__) return
|
|
99
|
+
try {
|
|
100
|
+
const { recordToolCall } = await import('elasticdash-test')
|
|
101
|
+
recordToolCall(tool, input, result)
|
|
102
|
+
} catch { /* tracing must never block business logic */ }
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
// Tools — one export per tool, following this pattern
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
// TEMPLATE: Copy this block for each tool, replacing YOUR_TOOL_NAME and YOUR_TOOL_IMPL
|
|
110
|
+
export const YOUR_TOOL_1 = async (input: any) => {
|
|
111
|
+
const mock = resolveMock('YOUR_TOOL_1')
|
|
112
|
+
if (mock.mocked) {
|
|
113
|
+
await safeRecordToolCall('YOUR_TOOL_1', input, mock.result)
|
|
114
|
+
return mock.result
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return await YOUR_TOOL_1_impl(input)
|
|
118
|
+
.then(async (res: any) => {
|
|
119
|
+
await safeRecordToolCall('YOUR_TOOL_1', input, res)
|
|
120
|
+
return res
|
|
121
|
+
})
|
|
122
|
+
.catch(async (err: any) => {
|
|
123
|
+
await safeRecordToolCall('YOUR_TOOL_1', input, err)
|
|
124
|
+
throw err
|
|
125
|
+
})
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
export const YOUR_TOOL_2 = async (input: any) => {
|
|
129
|
+
const mock = resolveMock('YOUR_TOOL_2')
|
|
130
|
+
if (mock.mocked) {
|
|
131
|
+
await safeRecordToolCall('YOUR_TOOL_2', input, mock.result)
|
|
132
|
+
return mock.result
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
return await YOUR_TOOL_2_impl(input)
|
|
136
|
+
.then(async (res: any) => {
|
|
137
|
+
await safeRecordToolCall('YOUR_TOOL_2', input, res)
|
|
138
|
+
return res
|
|
139
|
+
})
|
|
140
|
+
.catch(async (err: any) => {
|
|
141
|
+
await safeRecordToolCall('YOUR_TOOL_2', input, err)
|
|
142
|
+
throw err
|
|
143
|
+
})
|
|
144
|
+
}
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Pattern A template (dispatcher-based)
|
|
148
|
+
|
|
149
|
+
```ts
|
|
150
|
+
// ed_tools.ts
|
|
151
|
+
// Replace imports with your actual tool functions and source paths
|
|
152
|
+
import {
|
|
153
|
+
YOUR_TOOL_1 as _YOUR_TOOL_1,
|
|
154
|
+
YOUR_TOOL_2 as _YOUR_TOOL_2,
|
|
155
|
+
dispatchTool as _dispatchTool,
|
|
156
|
+
} from './YOUR_TOOLS_SOURCE'
|
|
157
|
+
|
|
158
|
+
async function withTrace<I, O>(
|
|
159
|
+
toolName: string,
|
|
160
|
+
input: I,
|
|
161
|
+
fn: (input: I) => Promise<O>,
|
|
162
|
+
): Promise<O> {
|
|
163
|
+
const result = await fn(input)
|
|
164
|
+
try {
|
|
165
|
+
const { recordToolCall } = await import('elasticdash-test')
|
|
166
|
+
recordToolCall(toolName, input, result)
|
|
167
|
+
} catch { /* tracing must never block business logic */ }
|
|
168
|
+
return result
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
export function YOUR_TOOL_1(input: Parameters<typeof _YOUR_TOOL_1>[0]) {
|
|
172
|
+
return withTrace('YOUR_TOOL_1', input, _YOUR_TOOL_1)
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
export function YOUR_TOOL_2(input: Parameters<typeof _YOUR_TOOL_2>[0]) {
|
|
176
|
+
return withTrace('YOUR_TOOL_2', input, _YOUR_TOOL_2)
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
export async function dispatchTool(name: string, args: Record<string, unknown>) {
|
|
180
|
+
switch (name) {
|
|
181
|
+
case 'YOUR_TOOL_1': return YOUR_TOOL_1(args as Parameters<typeof _YOUR_TOOL_1>[0])
|
|
182
|
+
case 'YOUR_TOOL_2': return YOUR_TOOL_2(args as Parameters<typeof _YOUR_TOOL_2>[0])
|
|
183
|
+
default: return _dispatchTool(name, args)
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Alternative: `wrapTool` shorthand
|
|
189
|
+
|
|
190
|
+
For simpler setups where you don't need mock support or dashboard replays:
|
|
191
|
+
|
|
192
|
+
```ts
|
|
193
|
+
// ed_tools.ts
|
|
194
|
+
import { wrapTool } from 'elasticdash-test'
|
|
195
|
+
import { YOUR_TOOL_1 as YOUR_TOOL_1_impl } from './YOUR_SOURCE_PATH'
|
|
196
|
+
|
|
197
|
+
export const YOUR_TOOL_1 = wrapTool('YOUR_TOOL_1', YOUR_TOOL_1_impl)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Important rules
|
|
201
|
+
|
|
202
|
+
- The string name passed to `resolveMock()`, `safeRecordToolCall()`, or `wrapTool()` **must match** the exported function name exactly.
|
|
203
|
+
- Each tool function must accept a single input object and return a plain value (JSON-serializable).
|
|
204
|
+
- Tool functions must not close over HTTP context, framework state, or database clients — extract pure logic first.
|
|
205
|
+
|
|
206
|
+
### Next.js only
|
|
207
|
+
|
|
208
|
+
Add `elasticdash-test` to `serverExternalPackages` in `next.config.ts`:
|
|
209
|
+
|
|
210
|
+
```ts
|
|
211
|
+
// next.config.ts
|
|
212
|
+
const nextConfig = {
|
|
213
|
+
serverExternalPackages: ['elasticdash-test'],
|
|
214
|
+
}
|
|
215
|
+
export default nextConfig
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## Step 3: Create `ed_workflows.ts`
|
|
221
|
+
|
|
222
|
+
Create `ed_workflows.ts` in the project root. This file exports workflow functions for the ElasticDash runner.
|
|
223
|
+
|
|
224
|
+
### Simple case — direct re-export
|
|
225
|
+
|
|
226
|
+
```ts
|
|
227
|
+
// ed_workflows.ts
|
|
228
|
+
// Replace with your actual workflow function and source path
|
|
229
|
+
export { YOUR_WORKFLOW } from './YOUR_SOURCE_PATH'
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
### Framework adapter case (Next.js / Remix)
|
|
233
|
+
|
|
234
|
+
When the workflow lives inside a route handler, create a plain-value wrapper:
|
|
235
|
+
|
|
236
|
+
```ts
|
|
237
|
+
// ed_workflows.ts
|
|
238
|
+
import { YOUR_HANDLER as _YOUR_HANDLER } from './app/api/YOUR_ROUTE/route'
|
|
239
|
+
|
|
240
|
+
export async function YOUR_WORKFLOW(input: { message: string; sessionId: string }) {
|
|
241
|
+
return _YOUR_HANDLER(input)
|
|
242
|
+
}
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### Streaming workflow case (Vercel AI SDK)
|
|
246
|
+
|
|
247
|
+
Create a separate handler file that is only imported by `ed_workflows.ts`:
|
|
248
|
+
|
|
249
|
+
```ts
|
|
250
|
+
// app/api/chat-stream/chatStreamHandler.ts
|
|
251
|
+
import { NextRequest } from 'next/server'
|
|
252
|
+
import { readVercelAIStream, recordToolCall } from 'elasticdash-test'
|
|
253
|
+
import type { VercelAIStreamResult } from 'elasticdash-test'
|
|
254
|
+
import { POST } from './route'
|
|
255
|
+
|
|
256
|
+
export async function chatStreamHandler(args: {
|
|
257
|
+
messages: Array<{ role: string; content: string }>
|
|
258
|
+
sessionId?: string
|
|
259
|
+
}): Promise<VercelAIStreamResult> {
|
|
260
|
+
const req = new NextRequest('http://localhost/api/chat-stream', {
|
|
261
|
+
method: 'POST',
|
|
262
|
+
headers: { 'Content-Type': 'application/json' },
|
|
263
|
+
body: JSON.stringify(args),
|
|
264
|
+
})
|
|
265
|
+
|
|
266
|
+
const response = await POST(req)
|
|
267
|
+
|
|
268
|
+
if (response.headers.get('x-vercel-ai-data-stream') !== 'v1') {
|
|
269
|
+
const errorMessage = await response.text().catch(() => `HTTP ${response.status}`)
|
|
270
|
+
return { message: errorMessage, type: 'error', error: errorMessage }
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
const result = await readVercelAIStream(response)
|
|
274
|
+
recordToolCall('chatStream', args, result)
|
|
275
|
+
return result
|
|
276
|
+
}
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
Then re-export from `ed_workflows.ts`:
|
|
280
|
+
|
|
281
|
+
```ts
|
|
282
|
+
// ed_workflows.ts
|
|
283
|
+
export { chatStreamHandler } from './app/api/chat-stream/chatStreamHandler'
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### Requirements for all workflow exports
|
|
287
|
+
|
|
288
|
+
- Accept only JSON-serializable inputs (strings, numbers, arrays, plain objects)
|
|
289
|
+
- Return only JSON-serializable outputs
|
|
290
|
+
- Must not depend on framework runtime APIs, HTTP request context, or live service clients
|
|
291
|
+
- If a dependency is non-serializable (e.g., database client), instantiate it inside `ed_workflows.ts`, not passed as a parameter
|
|
292
|
+
|
|
293
|
+
---
|
|
294
|
+
|
|
295
|
+
## Step 4: Update workflow imports
|
|
296
|
+
|
|
297
|
+
Change your workflow code to import tools from `ed_tools.ts` instead of the original source files:
|
|
298
|
+
|
|
299
|
+
```ts
|
|
300
|
+
// BEFORE
|
|
301
|
+
import { YOUR_TOOL_1 } from './services/YOUR_SOURCE'
|
|
302
|
+
|
|
303
|
+
// AFTER
|
|
304
|
+
import { YOUR_TOOL_1 } from './ed_tools'
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
This single import change makes all tool calls observable by ElasticDash.
|
|
308
|
+
|
|
309
|
+
---
|
|
310
|
+
|
|
311
|
+
## Step 5: Add config and scripts
|
|
312
|
+
|
|
313
|
+
### `elasticdash.config.ts`
|
|
314
|
+
|
|
315
|
+
Create in project root:
|
|
316
|
+
|
|
317
|
+
```ts
|
|
318
|
+
// elasticdash.config.ts
|
|
319
|
+
export default {
|
|
320
|
+
testMatch: ['**/*.ai.test.ts'],
|
|
321
|
+
traceMode: 'local' as const,
|
|
322
|
+
}
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
### `package.json` scripts
|
|
326
|
+
|
|
327
|
+
Add these scripts:
|
|
328
|
+
|
|
329
|
+
```json
|
|
330
|
+
{
|
|
331
|
+
"scripts": {
|
|
332
|
+
"dashboard:ai": "elasticdash dashboard",
|
|
333
|
+
"test:ai": "elasticdash test"
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
**If the project uses TypeScript path aliases** (e.g., `@/lib/...` in `tsconfig.json` `paths`):
|
|
339
|
+
|
|
340
|
+
```json
|
|
341
|
+
{
|
|
342
|
+
"scripts": {
|
|
343
|
+
"dashboard:ai": "NODE_OPTIONS='--import tsx/esm --require tsx/cjs --require tsconfig-paths/register' elasticdash dashboard"
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
---
|
|
349
|
+
|
|
350
|
+
## Step 6: Environment variables
|
|
351
|
+
|
|
352
|
+
Set in `.env` or CI secrets:
|
|
353
|
+
|
|
354
|
+
| Variable | Description | Required |
|
|
355
|
+
|----------|-------------|----------|
|
|
356
|
+
| `ELASTICDASH_API_URL` | Backend server URL (`https://server.elasticdash.com` for cloud) | For upload/CI |
|
|
357
|
+
| `ELASTICDASH_API_KEY` | Project API key from dashboard | For upload/CI |
|
|
358
|
+
| `ELASTICDASH_CAPTURE_TRACE` | Set to `1` to record a trace fixture | For trace recording |
|
|
359
|
+
| `OPENAI_API_KEY` | OpenAI API key | If using OpenAI |
|
|
360
|
+
| `ANTHROPIC_API_KEY` | Anthropic API key | If using Claude |
|
|
361
|
+
| `GEMINI_API_KEY` | Google Gemini API key | If using Gemini |
|
|
362
|
+
| `GROK_API_KEY` | xAI Grok API key | If using Grok |
|
|
363
|
+
|
|
364
|
+
---
|
|
365
|
+
|
|
366
|
+
## Step 7: Write a test
|
|
367
|
+
|
|
368
|
+
### Option A: `aiTest` — live workflow testing
|
|
369
|
+
|
|
370
|
+
Create a file ending in `.ai.test.ts`:
|
|
371
|
+
|
|
372
|
+
```ts
|
|
373
|
+
// tests/YOUR_WORKFLOW.ai.test.ts
|
|
374
|
+
import 'elasticdash-test/dist/test-setup.js'
|
|
375
|
+
import { expect } from 'expect'
|
|
376
|
+
|
|
377
|
+
// Import your workflow from ed_workflows.ts
|
|
378
|
+
import { YOUR_WORKFLOW } from '../ed_workflows'
|
|
379
|
+
|
|
380
|
+
aiTest('YOUR_TEST_NAME', async (ctx) => {
|
|
381
|
+
await YOUR_WORKFLOW({ /* your input */ })
|
|
382
|
+
|
|
383
|
+
// Assert an LLM step occurred
|
|
384
|
+
expect(ctx.trace).toHaveLLMStep({ model: 'gpt-4' })
|
|
385
|
+
|
|
386
|
+
// Assert a tool was called
|
|
387
|
+
expect(ctx.trace).toCallTool('YOUR_TOOL_NAME')
|
|
388
|
+
|
|
389
|
+
// Semantic output matching (LLM-judged)
|
|
390
|
+
expect(ctx.trace).toMatchSemanticOutput('expected output description')
|
|
391
|
+
})
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
### Option B: `defineTest` — CI/CD fixture-based testing
|
|
395
|
+
|
|
396
|
+
First, record a trace:
|
|
397
|
+
|
|
398
|
+
```bash
|
|
399
|
+
ELASTICDASH_CAPTURE_TRACE=1 tsx YOUR_WORKFLOW_SCRIPT.ts
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
Then create `ed_tests.ts`:
|
|
403
|
+
|
|
404
|
+
```ts
|
|
405
|
+
// ed_tests.ts
|
|
406
|
+
import { defineTest } from 'elasticdash-test'
|
|
407
|
+
import { YOUR_WORKFLOW } from './ed_workflows'
|
|
408
|
+
|
|
409
|
+
defineTest({
|
|
410
|
+
name: 'YOUR_TEST_NAME',
|
|
411
|
+
trace: './.ed_traces/YOUR_TRACE_FILE.json',
|
|
412
|
+
target: { type: 'tool_call', step_id: 'tool_call_0' },
|
|
413
|
+
benchmarks: { max_duration_ms: 2000 },
|
|
414
|
+
run: async () => {
|
|
415
|
+
await YOUR_WORKFLOW({ /* your input */ })
|
|
416
|
+
},
|
|
417
|
+
})
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
Run:
|
|
421
|
+
|
|
422
|
+
```bash
|
|
423
|
+
npx ed ed-test --no-upload
|
|
424
|
+
```
|
|
425
|
+
|
|
426
|
+
---
|
|
427
|
+
|
|
428
|
+
## Step 8: Run and verify
|
|
429
|
+
|
|
430
|
+
```bash
|
|
431
|
+
# Run aiTest tests
|
|
432
|
+
npx elasticdash test
|
|
433
|
+
|
|
434
|
+
# Run defineTest benchmarks
|
|
435
|
+
npx ed ed-test --no-upload
|
|
436
|
+
|
|
437
|
+
# Open the dashboard
|
|
438
|
+
npx elasticdash dashboard
|
|
439
|
+
|
|
440
|
+
# Record a trace fixture
|
|
441
|
+
ELASTICDASH_CAPTURE_TRACE=1 tsx your-workflow.ts
|
|
442
|
+
```
|
|
443
|
+
|
|
444
|
+
---
|
|
445
|
+
|
|
446
|
+
## Decision Trees
|
|
447
|
+
|
|
448
|
+
### Subprocess mode vs HTTP mode
|
|
449
|
+
|
|
450
|
+
```
|
|
451
|
+
Does your workflow live inside a framework route handler (Next.js, Remix, SvelteKit)?
|
|
452
|
+
YES → Use HTTP mode:
|
|
453
|
+
1. Configure workflow in elasticdash.config.ts with mode: 'http'
|
|
454
|
+
2. Add initHttpRunContext() to your request handler
|
|
455
|
+
3. Use wrapTool/wrapAI for observability
|
|
456
|
+
NO → Use subprocess mode (default):
|
|
457
|
+
1. Export workflow from ed_workflows.ts
|
|
458
|
+
2. Tools auto-intercepted via ed_tools.ts
|
|
459
|
+
```
|
|
460
|
+
|
|
461
|
+
### HTTP mode config template
|
|
462
|
+
|
|
463
|
+
```ts
|
|
464
|
+
// elasticdash.config.ts
|
|
465
|
+
export default {
|
|
466
|
+
testMatch: ['**/*.ai.test.ts'],
|
|
467
|
+
workflows: {
|
|
468
|
+
YOUR_WORKFLOW: {
|
|
469
|
+
mode: 'http',
|
|
470
|
+
url: 'http://localhost:3001/api/YOUR_ENDPOINT',
|
|
471
|
+
method: 'POST',
|
|
472
|
+
headers: {
|
|
473
|
+
'Content-Type': 'application/json',
|
|
474
|
+
},
|
|
475
|
+
bodyTemplate: {
|
|
476
|
+
messages: [{ role: 'user', content: '{{input.message}}' }],
|
|
477
|
+
},
|
|
478
|
+
responseFormat: 'vercel-ai-stream',
|
|
479
|
+
},
|
|
480
|
+
},
|
|
481
|
+
}
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
### HTTP mode handler setup
|
|
485
|
+
|
|
486
|
+
```ts
|
|
487
|
+
// app/api/YOUR_ENDPOINT/route.ts
|
|
488
|
+
import { initHttpRunContext, wrapTool, wrapAI } from 'elasticdash-test'
|
|
489
|
+
|
|
490
|
+
export async function POST(req: Request) {
|
|
491
|
+
const runId = req.headers.get('x-elasticdash-run-id')
|
|
492
|
+
const serverUrl = req.headers.get('x-elasticdash-server')
|
|
493
|
+
if (runId && serverUrl) {
|
|
494
|
+
await initHttpRunContext(runId, serverUrl)
|
|
495
|
+
}
|
|
496
|
+
// ... rest of handler
|
|
497
|
+
}
|
|
498
|
+
```
|
|
499
|
+
|
|
500
|
+
### AI call recording
|
|
501
|
+
|
|
502
|
+
```
|
|
503
|
+
Does your workflow call LLMs via OpenAI/Gemini/Grok SDKs?
|
|
504
|
+
YES → Automatic interception, no code changes needed
|
|
505
|
+
NO (custom provider or Anthropic SDK) → Use wrapAI:
|
|
506
|
+
import { wrapAI } from 'elasticdash-test'
|
|
507
|
+
export const callLLM = wrapAI('model-name', async (messages) => {
|
|
508
|
+
return await yourLLMClient.call(messages)
|
|
509
|
+
})
|
|
510
|
+
```
|
|
511
|
+
|
|
512
|
+
### Agent setup (`ed_agents.ts`)
|
|
513
|
+
|
|
514
|
+
```
|
|
515
|
+
Does your project use a multi-step agent with a planner/executor pattern?
|
|
516
|
+
YES → Create ed_agents.ts:
|
|
517
|
+
export { plannerAgent, executorAgent } from './your-agent-logic'
|
|
518
|
+
// OR use SDK reference implementations:
|
|
519
|
+
export { plannerAgent, executorAgent, resumeAgentFromTrace } from 'elasticdash-test'
|
|
520
|
+
NO → Skip ed_agents.ts
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
---
|
|
524
|
+
|
|
525
|
+
## Troubleshooting
|
|
526
|
+
|
|
527
|
+
| Error | Cause | Fix |
|
|
528
|
+
|-------|-------|-----|
|
|
529
|
+
| `replay miss: tool_call::YOUR_TOOL` | Trace fixture is stale or workflow changed | Re-record: `ELASTICDASH_CAPTURE_TRACE=1 tsx your-workflow.ts` |
|
|
530
|
+
| `MODULE_NOT_FOUND: elasticdash-test` | SDK not installed or Next.js bundling issue | Run `npm install elasticdash-test`. For Next.js, add to `serverExternalPackages` |
|
|
531
|
+
| `Cannot find module '@/...'` | Path aliases not resolved at runtime | Use advanced dashboard script with `tsconfig-paths/register` |
|
|
532
|
+
| `test has no run function` | `run` field missing in `defineTest` | Add `run: async () => { ... }` to the test definition |
|
|
533
|
+
| `Tool "x" not found in registry` | Tool not exported from `ed_tools.ts` | Export the tool function from `ed_tools.ts` |
|
|
534
|
+
| `ERR_UNKNOWN_FILE_EXTENSION` | ESM/CJS mismatch | Check `package.json` `type` field and `tsconfig.json` `module` setting |
|
|
535
|
+
| Git metadata shows `unknown` | No `.git` directory | Ensure repo is checked out (common in CI with shallow clones) |
|
|
536
|
+
|
|
537
|
+
---
|
|
538
|
+
|
|
539
|
+
## Final checklist
|
|
540
|
+
|
|
541
|
+
After integration, verify these files exist:
|
|
542
|
+
|
|
543
|
+
```
|
|
544
|
+
your-project/
|
|
545
|
+
ed_tools.ts # Instrumented tool wrappers
|
|
546
|
+
ed_workflows.ts # Workflow exports
|
|
547
|
+
elasticdash.config.ts # Test runner config
|
|
548
|
+
package.json # dashboard:ai and test:ai scripts added
|
|
549
|
+
.gitignore # .temp/ and .ed_traces/ added
|
|
550
|
+
```
|
|
551
|
+
|
|
552
|
+
Verify with:
|
|
553
|
+
|
|
554
|
+
```bash
|
|
555
|
+
npx elasticdash test # Should discover and run *.ai.test.ts files
|
|
556
|
+
npx elasticdash dashboard # Should open the dashboard UI
|
|
557
|
+
```
|
package/docs/agents.md
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# Agent Mid-Trace Replay
|
|
2
|
+
|
|
3
|
+
ElasticDash supports resuming long-running agents from any task in their plan — without re-executing already-completed steps.
|
|
4
|
+
|
|
5
|
+
## Use Cases
|
|
6
|
+
|
|
7
|
+
- **Resuming after failures**: If task 3 of 5 fails, fix the issue and re-run from task 3 only
|
|
8
|
+
- **Pausing for approval**: Capture state after task 2, get human sign-off, then continue
|
|
9
|
+
- **Debugging in isolation**: Re-run a single task with modified input to diagnose a problem
|
|
10
|
+
|
|
11
|
+
## How It Works
|
|
12
|
+
|
|
13
|
+
Agents are structured as an **AgentPlan** — an ordered list of **AgentTask** objects. When serialized with captured trace events, this forms an **AgentState** that can be saved and replayed later.
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
```ts
|
|
18
|
+
import { plannerAgent, executorAgent, resumeAgentFromTrace } from './ed_agents'
|
|
19
|
+
import { serializeAgentState, deserializeAgentState } from 'elasticdash-test'
|
|
20
|
+
import fs from 'node:fs'
|
|
21
|
+
|
|
22
|
+
// 1. Generate a plan
|
|
23
|
+
const plan = await plannerAgent('Show me sales for Q1', { userToken: 'tok-abc' })
|
|
24
|
+
|
|
25
|
+
// 2. Execute the plan (runs all tasks sequentially)
|
|
26
|
+
const completedPlan = await executorAgent(plan)
|
|
27
|
+
|
|
28
|
+
// 3. Serialize and save state (e.g., after partial execution)
|
|
29
|
+
const state = serializeAgentState(completedPlan, [] /* pass recorder.events in worker context */)
|
|
30
|
+
fs.writeFileSync('agent-state.json', JSON.stringify(state, null, 2))
|
|
31
|
+
|
|
32
|
+
// 4. Later: load saved state and resume from task 2 (0-based index 1)
|
|
33
|
+
const savedState = JSON.parse(fs.readFileSync('agent-state.json', 'utf8'))
|
|
34
|
+
const stateToResume = deserializeAgentState({ ...savedState, resumeFromTaskIndex: 1 })
|
|
35
|
+
const resumedPlan = await resumeAgentFromTrace(stateToResume)
|
|
36
|
+
|
|
37
|
+
console.log('Resumed plan status:', resumedPlan.status)
|
|
38
|
+
console.log('Task outputs:', resumedPlan.tasks.map((t) => ({ id: t.id, status: t.status })))
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Data Structures
|
|
42
|
+
|
|
43
|
+
### AgentState
|
|
44
|
+
|
|
45
|
+
```ts
|
|
46
|
+
interface AgentState {
|
|
47
|
+
plan: AgentPlan // Full plan with all tasks (completed and pending)
|
|
48
|
+
trace: WorkflowEvent[] // Captured trace events from previous execution
|
|
49
|
+
resumeFromTaskIndex: number // Zero-based index — tasks before this are loaded from cache
|
|
50
|
+
}
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### AgentPlan
|
|
54
|
+
|
|
55
|
+
```ts
|
|
56
|
+
interface AgentPlan {
|
|
57
|
+
id: string
|
|
58
|
+
tasks: AgentTask[]
|
|
59
|
+
status: 'planning' | 'executing' | 'completed' | 'failed' | 'paused'
|
|
60
|
+
currentTaskIndex: number
|
|
61
|
+
context: Record<string, unknown>
|
|
62
|
+
metadata: Record<string, unknown>
|
|
63
|
+
}
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### AgentTask
|
|
67
|
+
|
|
68
|
+
```ts
|
|
69
|
+
interface AgentTask {
|
|
70
|
+
id: string
|
|
71
|
+
status: 'pending' | 'in-progress' | 'completed' | 'failed'
|
|
72
|
+
description: string
|
|
73
|
+
tool: string // Name of the tool function to invoke
|
|
74
|
+
input: unknown // May contain { $ref: "task-N.output.fieldName" } placeholders
|
|
75
|
+
output?: unknown // Populated after execution
|
|
76
|
+
error?: string
|
|
77
|
+
startedAt?: number
|
|
78
|
+
completedAt?: number
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Task Input Placeholders
|
|
83
|
+
|
|
84
|
+
Task inputs can reference previous task outputs using `{ $ref: "taskId.output.fieldPath" }`:
|
|
85
|
+
|
|
86
|
+
```ts
|
|
87
|
+
// task-2 uses the embedding produced by task-1
|
|
88
|
+
{
|
|
89
|
+
id: 'task-2',
|
|
90
|
+
tool: 'taskSelectorService',
|
|
91
|
+
input: {
|
|
92
|
+
queryEmbedding: { $ref: 'task-1.output.embedding' },
|
|
93
|
+
topK: 3,
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Placeholders are resolved at execution time by `resolveTaskInput()`.
|
|
99
|
+
|
|
100
|
+
## Dashboard Integration
|
|
101
|
+
|
|
102
|
+
When running an agent workflow through the dashboard:
|
|
103
|
+
|
|
104
|
+
1. **Agent task observations** are visually highlighted with a purple background and left border
|
|
105
|
+
2. Each observation shows a **T1 / T2 / T3** badge indicating which task it belongs to
|
|
106
|
+
3. In the observation detail panel, a **"Resume from Task N"** button appears (agent steps only)
|
|
107
|
+
4. Clicking it calls `/api/resume-agent-from-task` with the serialized `AgentState` and chosen `taskIndex`
|
|
108
|
+
5. The resumed run is added as a new trace in the comparison table
|
|
109
|
+
|
|
110
|
+
## Best Practices
|
|
111
|
+
|
|
112
|
+
- **Keep tasks idempotent** where possible — if a task must be re-run, ensure it produces the same result
|
|
113
|
+
- **Store minimal outputs** — only record what downstream tasks need, not full API responses
|
|
114
|
+
- **Version your state schema** — if tool interfaces change, old states may need migration
|
|
115
|
+
- **Use sequential tasks** — the current implementation runs tasks one-by-one; parallel task support is planned
|
|
116
|
+
|
|
117
|
+
## Example: Debugging a Failed Task
|
|
118
|
+
|
|
119
|
+
```ts
|
|
120
|
+
// 1. Original execution fails at task 3
|
|
121
|
+
const plan = await plannerAgent('Process refund for order-123')
|
|
122
|
+
const result = await executorAgent(plan)
|
|
123
|
+
// Error: task 3 (calculateRefundAmount) failed
|
|
124
|
+
|
|
125
|
+
// 2. Save the state
|
|
126
|
+
const state = serializeAgentState(result, recorder.events)
|
|
127
|
+
fs.writeFileSync('failed-run.json', JSON.stringify(state))
|
|
128
|
+
|
|
129
|
+
// 3. Fix the issue in your tool/code
|
|
130
|
+
|
|
131
|
+
// 4. Resume from task 3 with corrected state
|
|
132
|
+
const savedState = JSON.parse(fs.readFileSync('failed-run.json'))
|
|
133
|
+
const fixed = await resumeAgentFromTrace({
|
|
134
|
+
...deserializeAgentState(savedState),
|
|
135
|
+
resumeFromTaskIndex: 2 // 0-based: task 3 = index 2
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
// Tasks 1-2 use cached results; task 3+ execute with fixes
|
|
139
|
+
console.log('Fixed plan:', fixed.status)
|
|
140
|
+
```
|