elasticdash-test 0.1.24 → 0.1.25-alpha-2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,557 @@
1
+ # ElasticDash SDK — Agent Integration Guide
2
+
3
+ > **Audience**: Coding agents (Claude Code, Cursor, Copilot, Codex, Windsurf, etc.) integrating `elasticdash-test` into a user's project.
4
+ >
5
+ > **When to use**: Follow this guide step-by-step when a user asks you to "integrate ElasticDash", "set up ElasticDash testing", "implement elasticdash-test", or similar. Each step produces a specific file — work through them in order.
6
+ >
7
+ > **How to use**: Read the full guide first to understand the project's structure, then execute each step. Use the decision trees section to choose the right pattern for the user's project (framework type, module system, tool architecture).
8
+
9
+ ---
10
+
11
+ ## Prerequisites
12
+
13
+ - Node.js >= 20
14
+ - npm, yarn, or pnpm
15
+ - At least one AI workflow function (a callable async function that makes LLM/tool calls)
16
+ - LLM provider API keys for providers used in workflows (e.g., `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`)
17
+
18
+ ### Detect project type before starting
19
+
20
+ Check these before choosing patterns:
21
+
22
+ | Check | How | Result |
23
+ |-------|-----|--------|
24
+ | Module system | `package.json` → `"type"` field | `"module"` = ESM, `"commonjs"` or missing = CJS |
25
+ | Framework | Look for `next.config.*`, `remix.config.*`, `svelte.config.*` | Present = framework project, use HTTP mode |
26
+ | Path aliases | `tsconfig.json` → `"paths"` field | Present = need advanced dashboard script |
27
+ | Tool architecture | Does the project use a central `dispatchTool(name, args)` function? | Yes = Pattern A, No = Pattern B |
28
+
29
+ ---
30
+
31
+ ## Step 1: Install
32
+
33
+ ```bash
34
+ npm install elasticdash-test
35
+ ```
36
+
37
+ Add to `.gitignore`:
38
+
39
+ ```gitignore
40
+ .temp/
41
+ .ed_traces/
42
+ ```
43
+
44
+ ---
45
+
46
+ ## Step 2: Create `ed_tools.ts`
47
+
48
+ Create `ed_tools.ts` in the project root. This file wraps each tool function with tracing so ElasticDash can record and replay tool calls.
49
+
50
+ ### Choose your pattern
51
+
52
+ - **Pattern B (recommended for most projects)**: inline async with mock support. Use when tools are imported individually, no central dispatcher exists, or you want dashboard mock support.
53
+ - **Pattern A**: `withTrace` HOF with central dispatcher. Use when tools are pure functions called through a single `dispatchTool(name, args)` dispatcher.
54
+
55
+ ### Pattern B template (recommended)
56
+
57
+ ```ts
58
+ // ed_tools.ts
59
+ // Replace imports with your actual tool functions and source paths
60
+ import { YOUR_TOOL_1 as YOUR_TOOL_1_impl } from './YOUR_SOURCE_PATH_1'
61
+ import { YOUR_TOOL_2 as YOUR_TOOL_2_impl } from './YOUR_SOURCE_PATH_2'
62
+
63
+ // ---------------------------------------------------------------------------
64
+ // Helpers — copy as-is, no customization needed
65
+ // ---------------------------------------------------------------------------
66
+
67
+ function resolveMock(toolName: string): { mocked: true; result: unknown } | { mocked: false } {
68
+ const g = globalThis as any
69
+ const mocks = g.__ELASTICDASH_TOOL_MOCKS__
70
+ if (!mocks) return { mocked: false }
71
+
72
+ const entry = mocks[toolName]
73
+ if (!entry || entry.mode === 'live') return { mocked: false }
74
+
75
+ if (!g.__ELASTICDASH_TOOL_CALL_COUNTERS__) g.__ELASTICDASH_TOOL_CALL_COUNTERS__ = {}
76
+ const counters = g.__ELASTICDASH_TOOL_CALL_COUNTERS__
77
+ counters[toolName] = (counters[toolName] ?? 0) + 1
78
+ const callNumber = counters[toolName]
79
+
80
+ if (entry.mode === 'mock-all') {
81
+ const data = entry.mockData ?? {}
82
+ const result = data[callNumber] !== undefined ? data[callNumber] : data[0]
83
+ return { mocked: true, result }
84
+ }
85
+
86
+ if (entry.mode === 'mock-specific') {
87
+ const indices = entry.callIndices ?? []
88
+ if (indices.includes(callNumber)) {
89
+ return { mocked: true, result: (entry.mockData ?? {})[callNumber] }
90
+ }
91
+ return { mocked: false }
92
+ }
93
+
94
+ return { mocked: false }
95
+ }
96
+
97
+ async function safeRecordToolCall(tool: string, input: any, result: any) {
98
+ if (!(globalThis as any).__ELASTICDASH_WORKER__) return
99
+ try {
100
+ const { recordToolCall } = await import('elasticdash-test')
101
+ recordToolCall(tool, input, result)
102
+ } catch { /* tracing must never block business logic */ }
103
+ }
104
+
105
+ // ---------------------------------------------------------------------------
106
+ // Tools — one export per tool, following this pattern
107
+ // ---------------------------------------------------------------------------
108
+
109
+ // TEMPLATE: Copy this block for each tool, replacing YOUR_TOOL_NAME and YOUR_TOOL_IMPL
110
+ export const YOUR_TOOL_1 = async (input: any) => {
111
+ const mock = resolveMock('YOUR_TOOL_1')
112
+ if (mock.mocked) {
113
+ await safeRecordToolCall('YOUR_TOOL_1', input, mock.result)
114
+ return mock.result
115
+ }
116
+
117
+ return await YOUR_TOOL_1_impl(input)
118
+ .then(async (res: any) => {
119
+ await safeRecordToolCall('YOUR_TOOL_1', input, res)
120
+ return res
121
+ })
122
+ .catch(async (err: any) => {
123
+ await safeRecordToolCall('YOUR_TOOL_1', input, err)
124
+ throw err
125
+ })
126
+ }
127
+
128
+ export const YOUR_TOOL_2 = async (input: any) => {
129
+ const mock = resolveMock('YOUR_TOOL_2')
130
+ if (mock.mocked) {
131
+ await safeRecordToolCall('YOUR_TOOL_2', input, mock.result)
132
+ return mock.result
133
+ }
134
+
135
+ return await YOUR_TOOL_2_impl(input)
136
+ .then(async (res: any) => {
137
+ await safeRecordToolCall('YOUR_TOOL_2', input, res)
138
+ return res
139
+ })
140
+ .catch(async (err: any) => {
141
+ await safeRecordToolCall('YOUR_TOOL_2', input, err)
142
+ throw err
143
+ })
144
+ }
145
+ ```
146
+
147
+ ### Pattern A template (dispatcher-based)
148
+
149
+ ```ts
150
+ // ed_tools.ts
151
+ // Replace imports with your actual tool functions and source paths
152
+ import {
153
+ YOUR_TOOL_1 as _YOUR_TOOL_1,
154
+ YOUR_TOOL_2 as _YOUR_TOOL_2,
155
+ dispatchTool as _dispatchTool,
156
+ } from './YOUR_TOOLS_SOURCE'
157
+
158
+ async function withTrace<I, O>(
159
+ toolName: string,
160
+ input: I,
161
+ fn: (input: I) => Promise<O>,
162
+ ): Promise<O> {
163
+ const result = await fn(input)
164
+ try {
165
+ const { recordToolCall } = await import('elasticdash-test')
166
+ recordToolCall(toolName, input, result)
167
+ } catch { /* tracing must never block business logic */ }
168
+ return result
169
+ }
170
+
171
+ export function YOUR_TOOL_1(input: Parameters<typeof _YOUR_TOOL_1>[0]) {
172
+ return withTrace('YOUR_TOOL_1', input, _YOUR_TOOL_1)
173
+ }
174
+
175
+ export function YOUR_TOOL_2(input: Parameters<typeof _YOUR_TOOL_2>[0]) {
176
+ return withTrace('YOUR_TOOL_2', input, _YOUR_TOOL_2)
177
+ }
178
+
179
+ export async function dispatchTool(name: string, args: Record<string, unknown>) {
180
+ switch (name) {
181
+ case 'YOUR_TOOL_1': return YOUR_TOOL_1(args as Parameters<typeof _YOUR_TOOL_1>[0])
182
+ case 'YOUR_TOOL_2': return YOUR_TOOL_2(args as Parameters<typeof _YOUR_TOOL_2>[0])
183
+ default: return _dispatchTool(name, args)
184
+ }
185
+ }
186
+ ```
187
+
188
+ ### Alternative: `wrapTool` shorthand
189
+
190
+ For simpler setups where you don't need mock support or dashboard replays:
191
+
192
+ ```ts
193
+ // ed_tools.ts
194
+ import { wrapTool } from 'elasticdash-test'
195
+ import { YOUR_TOOL_1 as YOUR_TOOL_1_impl } from './YOUR_SOURCE_PATH'
196
+
197
+ export const YOUR_TOOL_1 = wrapTool('YOUR_TOOL_1', YOUR_TOOL_1_impl)
198
+ ```
199
+
200
+ ### Important rules
201
+
202
+ - The string name passed to `resolveMock()`, `safeRecordToolCall()`, or `wrapTool()` **must match** the exported function name exactly.
203
+ - Each tool function must accept a single input object and return a plain value (JSON-serializable).
204
+ - Tool functions must not close over HTTP context, framework state, or database clients — extract pure logic first.
205
+
206
+ ### Next.js only
207
+
208
+ Add `elasticdash-test` to `serverExternalPackages` in `next.config.ts`:
209
+
210
+ ```ts
211
+ // next.config.ts
212
+ const nextConfig = {
213
+ serverExternalPackages: ['elasticdash-test'],
214
+ }
215
+ export default nextConfig
216
+ ```
217
+
218
+ ---
219
+
220
+ ## Step 3: Create `ed_workflows.ts`
221
+
222
+ Create `ed_workflows.ts` in the project root. This file exports workflow functions for the ElasticDash runner.
223
+
224
+ ### Simple case — direct re-export
225
+
226
+ ```ts
227
+ // ed_workflows.ts
228
+ // Replace with your actual workflow function and source path
229
+ export { YOUR_WORKFLOW } from './YOUR_SOURCE_PATH'
230
+ ```
231
+
232
+ ### Framework adapter case (Next.js / Remix)
233
+
234
+ When the workflow lives inside a route handler, create a plain-value wrapper:
235
+
236
+ ```ts
237
+ // ed_workflows.ts
238
+ import { YOUR_HANDLER as _YOUR_HANDLER } from './app/api/YOUR_ROUTE/route'
239
+
240
+ export async function YOUR_WORKFLOW(input: { message: string; sessionId: string }) {
241
+ return _YOUR_HANDLER(input)
242
+ }
243
+ ```
244
+
245
+ ### Streaming workflow case (Vercel AI SDK)
246
+
247
+ Create a separate handler file that is only imported by `ed_workflows.ts`:
248
+
249
+ ```ts
250
+ // app/api/chat-stream/chatStreamHandler.ts
251
+ import { NextRequest } from 'next/server'
252
+ import { readVercelAIStream, recordToolCall } from 'elasticdash-test'
253
+ import type { VercelAIStreamResult } from 'elasticdash-test'
254
+ import { POST } from './route'
255
+
256
+ export async function chatStreamHandler(args: {
257
+ messages: Array<{ role: string; content: string }>
258
+ sessionId?: string
259
+ }): Promise<VercelAIStreamResult> {
260
+ const req = new NextRequest('http://localhost/api/chat-stream', {
261
+ method: 'POST',
262
+ headers: { 'Content-Type': 'application/json' },
263
+ body: JSON.stringify(args),
264
+ })
265
+
266
+ const response = await POST(req)
267
+
268
+ if (response.headers.get('x-vercel-ai-data-stream') !== 'v1') {
269
+ const errorMessage = await response.text().catch(() => `HTTP ${response.status}`)
270
+ return { message: errorMessage, type: 'error', error: errorMessage }
271
+ }
272
+
273
+ const result = await readVercelAIStream(response)
274
+ recordToolCall('chatStream', args, result)
275
+ return result
276
+ }
277
+ ```
278
+
279
+ Then re-export from `ed_workflows.ts`:
280
+
281
+ ```ts
282
+ // ed_workflows.ts
283
+ export { chatStreamHandler } from './app/api/chat-stream/chatStreamHandler'
284
+ ```
285
+
286
+ ### Requirements for all workflow exports
287
+
288
+ - Accept only JSON-serializable inputs (strings, numbers, arrays, plain objects)
289
+ - Return only JSON-serializable outputs
290
+ - Must not depend on framework runtime APIs, HTTP request context, or live service clients
291
+ - If a dependency is non-serializable (e.g., database client), instantiate it inside `ed_workflows.ts`, not passed as a parameter
292
+
293
+ ---
294
+
295
+ ## Step 4: Update workflow imports
296
+
297
+ Change your workflow code to import tools from `ed_tools.ts` instead of the original source files:
298
+
299
+ ```ts
300
+ // BEFORE
301
+ import { YOUR_TOOL_1 } from './services/YOUR_SOURCE'
302
+
303
+ // AFTER
304
+ import { YOUR_TOOL_1 } from './ed_tools'
305
+ ```
306
+
307
+ This single import change makes all tool calls observable by ElasticDash.
308
+
309
+ ---
310
+
311
+ ## Step 5: Add config and scripts
312
+
313
+ ### `elasticdash.config.ts`
314
+
315
+ Create in project root:
316
+
317
+ ```ts
318
+ // elasticdash.config.ts
319
+ export default {
320
+ testMatch: ['**/*.ai.test.ts'],
321
+ traceMode: 'local' as const,
322
+ }
323
+ ```
324
+
325
+ ### `package.json` scripts
326
+
327
+ Add these scripts:
328
+
329
+ ```json
330
+ {
331
+ "scripts": {
332
+ "dashboard:ai": "elasticdash dashboard",
333
+ "test:ai": "elasticdash test"
334
+ }
335
+ }
336
+ ```
337
+
338
+ **If the project uses TypeScript path aliases** (e.g., `@/lib/...` in `tsconfig.json` `paths`):
339
+
340
+ ```json
341
+ {
342
+ "scripts": {
343
+ "dashboard:ai": "NODE_OPTIONS='--import tsx/esm --require tsx/cjs --require tsconfig-paths/register' elasticdash dashboard"
344
+ }
345
+ }
346
+ ```
347
+
348
+ ---
349
+
350
+ ## Step 6: Environment variables
351
+
352
+ Set in `.env` or CI secrets:
353
+
354
+ | Variable | Description | Required |
355
+ |----------|-------------|----------|
356
+ | `ELASTICDASH_API_URL` | Backend server URL (`https://server.elasticdash.com` for cloud) | For upload/CI |
357
+ | `ELASTICDASH_API_KEY` | Project API key from dashboard | For upload/CI |
358
+ | `ELASTICDASH_CAPTURE_TRACE` | Set to `1` to record a trace fixture | For trace recording |
359
+ | `OPENAI_API_KEY` | OpenAI API key | If using OpenAI |
360
+ | `ANTHROPIC_API_KEY` | Anthropic API key | If using Claude |
361
+ | `GEMINI_API_KEY` | Google Gemini API key | If using Gemini |
362
+ | `GROK_API_KEY` | xAI Grok API key | If using Grok |
363
+
364
+ ---
365
+
366
+ ## Step 7: Write a test
367
+
368
+ ### Option A: `aiTest` — live workflow testing
369
+
370
+ Create a file ending in `.ai.test.ts`:
371
+
372
+ ```ts
373
+ // tests/YOUR_WORKFLOW.ai.test.ts
374
+ import 'elasticdash-test/dist/test-setup.js'
375
+ import { expect } from 'expect'
376
+
377
+ // Import your workflow from ed_workflows.ts
378
+ import { YOUR_WORKFLOW } from '../ed_workflows'
379
+
380
+ aiTest('YOUR_TEST_NAME', async (ctx) => {
381
+ await YOUR_WORKFLOW({ /* your input */ })
382
+
383
+ // Assert an LLM step occurred
384
+ expect(ctx.trace).toHaveLLMStep({ model: 'gpt-4' })
385
+
386
+ // Assert a tool was called
387
+ expect(ctx.trace).toCallTool('YOUR_TOOL_NAME')
388
+
389
+ // Semantic output matching (LLM-judged)
390
+ expect(ctx.trace).toMatchSemanticOutput('expected output description')
391
+ })
392
+ ```
393
+
394
+ ### Option B: `defineTest` — CI/CD fixture-based testing
395
+
396
+ First, record a trace:
397
+
398
+ ```bash
399
+ ELASTICDASH_CAPTURE_TRACE=1 tsx YOUR_WORKFLOW_SCRIPT.ts
400
+ ```
401
+
402
+ Then create `ed_tests.ts`:
403
+
404
+ ```ts
405
+ // ed_tests.ts
406
+ import { defineTest } from 'elasticdash-test'
407
+ import { YOUR_WORKFLOW } from './ed_workflows'
408
+
409
+ defineTest({
410
+ name: 'YOUR_TEST_NAME',
411
+ trace: './.ed_traces/YOUR_TRACE_FILE.json',
412
+ target: { type: 'tool_call', step_id: 'tool_call_0' },
413
+ benchmarks: { max_duration_ms: 2000 },
414
+ run: async () => {
415
+ await YOUR_WORKFLOW({ /* your input */ })
416
+ },
417
+ })
418
+ ```
419
+
420
+ Run:
421
+
422
+ ```bash
423
+ npx ed ed-test --no-upload
424
+ ```
425
+
426
+ ---
427
+
428
+ ## Step 8: Run and verify
429
+
430
+ ```bash
431
+ # Run aiTest tests
432
+ npx elasticdash test
433
+
434
+ # Run defineTest benchmarks
435
+ npx ed ed-test --no-upload
436
+
437
+ # Open the dashboard
438
+ npx elasticdash dashboard
439
+
440
+ # Record a trace fixture
441
+ ELASTICDASH_CAPTURE_TRACE=1 tsx your-workflow.ts
442
+ ```
443
+
444
+ ---
445
+
446
+ ## Decision Trees
447
+
448
+ ### Subprocess mode vs HTTP mode
449
+
450
+ ```
451
+ Does your workflow live inside a framework route handler (Next.js, Remix, SvelteKit)?
452
+ YES → Use HTTP mode:
453
+ 1. Configure workflow in elasticdash.config.ts with mode: 'http'
454
+ 2. Add initHttpRunContext() to your request handler
455
+ 3. Use wrapTool/wrapAI for observability
456
+ NO → Use subprocess mode (default):
457
+ 1. Export workflow from ed_workflows.ts
458
+ 2. Tools auto-intercepted via ed_tools.ts
459
+ ```
460
+
461
+ ### HTTP mode config template
462
+
463
+ ```ts
464
+ // elasticdash.config.ts
465
+ export default {
466
+ testMatch: ['**/*.ai.test.ts'],
467
+ workflows: {
468
+ YOUR_WORKFLOW: {
469
+ mode: 'http',
470
+ url: 'http://localhost:3001/api/YOUR_ENDPOINT',
471
+ method: 'POST',
472
+ headers: {
473
+ 'Content-Type': 'application/json',
474
+ },
475
+ bodyTemplate: {
476
+ messages: [{ role: 'user', content: '{{input.message}}' }],
477
+ },
478
+ responseFormat: 'vercel-ai-stream',
479
+ },
480
+ },
481
+ }
482
+ ```
483
+
484
+ ### HTTP mode handler setup
485
+
486
+ ```ts
487
+ // app/api/YOUR_ENDPOINT/route.ts
488
+ import { initHttpRunContext, wrapTool, wrapAI } from 'elasticdash-test'
489
+
490
+ export async function POST(req: Request) {
491
+ const runId = req.headers.get('x-elasticdash-run-id')
492
+ const serverUrl = req.headers.get('x-elasticdash-server')
493
+ if (runId && serverUrl) {
494
+ await initHttpRunContext(runId, serverUrl)
495
+ }
496
+ // ... rest of handler
497
+ }
498
+ ```
499
+
500
+ ### AI call recording
501
+
502
+ ```
503
+ Does your workflow call LLMs via OpenAI/Gemini/Grok SDKs?
504
+ YES → Automatic interception, no code changes needed
505
+ NO (custom provider or Anthropic SDK) → Use wrapAI:
506
+ import { wrapAI } from 'elasticdash-test'
507
+ export const callLLM = wrapAI('model-name', async (messages) => {
508
+ return await yourLLMClient.call(messages)
509
+ })
510
+ ```
511
+
512
+ ### Agent setup (`ed_agents.ts`)
513
+
514
+ ```
515
+ Does your project use a multi-step agent with a planner/executor pattern?
516
+ YES → Create ed_agents.ts:
517
+ export { plannerAgent, executorAgent } from './your-agent-logic'
518
+ // OR use SDK reference implementations:
519
+ export { plannerAgent, executorAgent, resumeAgentFromTrace } from 'elasticdash-test'
520
+ NO → Skip ed_agents.ts
521
+ ```
522
+
523
+ ---
524
+
525
+ ## Troubleshooting
526
+
527
+ | Error | Cause | Fix |
528
+ |-------|-------|-----|
529
+ | `replay miss: tool_call::YOUR_TOOL` | Trace fixture is stale or workflow changed | Re-record: `ELASTICDASH_CAPTURE_TRACE=1 tsx your-workflow.ts` |
530
+ | `MODULE_NOT_FOUND: elasticdash-test` | SDK not installed or Next.js bundling issue | Run `npm install elasticdash-test`. For Next.js, add to `serverExternalPackages` |
531
+ | `Cannot find module '@/...'` | Path aliases not resolved at runtime | Use advanced dashboard script with `tsconfig-paths/register` |
532
+ | `test has no run function` | `run` field missing in `defineTest` | Add `run: async () => { ... }` to the test definition |
533
+ | `Tool "x" not found in registry` | Tool not exported from `ed_tools.ts` | Export the tool function from `ed_tools.ts` |
534
+ | `ERR_UNKNOWN_FILE_EXTENSION` | ESM/CJS mismatch | Check `package.json` `type` field and `tsconfig.json` `module` setting |
535
+ | Git metadata shows `unknown` | No `.git` directory | Ensure repo is checked out (common in CI with shallow clones) |
536
+
537
+ ---
538
+
539
+ ## Final checklist
540
+
541
+ After integration, verify these files exist:
542
+
543
+ ```
544
+ your-project/
545
+ ed_tools.ts # Instrumented tool wrappers
546
+ ed_workflows.ts # Workflow exports
547
+ elasticdash.config.ts # Test runner config
548
+ package.json # dashboard:ai and test:ai scripts added
549
+ .gitignore # .temp/ and .ed_traces/ added
550
+ ```
551
+
552
+ Verify with:
553
+
554
+ ```bash
555
+ npx elasticdash test # Should discover and run *.ai.test.ts files
556
+ npx elasticdash dashboard # Should open the dashboard UI
557
+ ```
package/docs/agents.md ADDED
@@ -0,0 +1,140 @@
1
+ # Agent Mid-Trace Replay
2
+
3
+ ElasticDash supports resuming long-running agents from any task in their plan — without re-executing already-completed steps.
4
+
5
+ ## Use Cases
6
+
7
+ - **Resuming after failures**: If task 3 of 5 fails, fix the issue and re-run from task 3 only
8
+ - **Pausing for approval**: Capture state after task 2, get human sign-off, then continue
9
+ - **Debugging in isolation**: Re-run a single task with modified input to diagnose a problem
10
+
11
+ ## How It Works
12
+
13
+ Agents are structured as an **AgentPlan** — an ordered list of **AgentTask** objects. When serialized with captured trace events, this forms an **AgentState** that can be saved and replayed later.
14
+
15
+ ## Quick Start
16
+
17
+ ```ts
18
+ import { plannerAgent, executorAgent, resumeAgentFromTrace } from './ed_agents'
19
+ import { serializeAgentState, deserializeAgentState } from 'elasticdash-test'
20
+ import fs from 'node:fs'
21
+
22
+ // 1. Generate a plan
23
+ const plan = await plannerAgent('Show me sales for Q1', { userToken: 'tok-abc' })
24
+
25
+ // 2. Execute the plan (runs all tasks sequentially)
26
+ const completedPlan = await executorAgent(plan)
27
+
28
+ // 3. Serialize and save state (e.g., after partial execution)
29
+ const state = serializeAgentState(completedPlan, [] /* pass recorder.events in worker context */)
30
+ fs.writeFileSync('agent-state.json', JSON.stringify(state, null, 2))
31
+
32
+ // 4. Later: load saved state and resume from task 2 (0-based index 1)
33
+ const savedState = JSON.parse(fs.readFileSync('agent-state.json', 'utf8'))
34
+ const stateToResume = deserializeAgentState({ ...savedState, resumeFromTaskIndex: 1 })
35
+ const resumedPlan = await resumeAgentFromTrace(stateToResume)
36
+
37
+ console.log('Resumed plan status:', resumedPlan.status)
38
+ console.log('Task outputs:', resumedPlan.tasks.map((t) => ({ id: t.id, status: t.status })))
39
+ ```
40
+
41
+ ## Data Structures
42
+
43
+ ### AgentState
44
+
45
+ ```ts
46
+ interface AgentState {
47
+ plan: AgentPlan // Full plan with all tasks (completed and pending)
48
+ trace: WorkflowEvent[] // Captured trace events from previous execution
49
+ resumeFromTaskIndex: number // Zero-based index — tasks before this are loaded from cache
50
+ }
51
+ ```
52
+
53
+ ### AgentPlan
54
+
55
+ ```ts
56
+ interface AgentPlan {
57
+ id: string
58
+ tasks: AgentTask[]
59
+ status: 'planning' | 'executing' | 'completed' | 'failed' | 'paused'
60
+ currentTaskIndex: number
61
+ context: Record<string, unknown>
62
+ metadata: Record<string, unknown>
63
+ }
64
+ ```
65
+
66
+ ### AgentTask
67
+
68
+ ```ts
69
+ interface AgentTask {
70
+ id: string
71
+ status: 'pending' | 'in-progress' | 'completed' | 'failed'
72
+ description: string
73
+ tool: string // Name of the tool function to invoke
74
+ input: unknown // May contain { $ref: "task-N.output.fieldName" } placeholders
75
+ output?: unknown // Populated after execution
76
+ error?: string
77
+ startedAt?: number
78
+ completedAt?: number
79
+ }
80
+ ```
81
+
82
+ ## Task Input Placeholders
83
+
84
+ Task inputs can reference previous task outputs using `{ $ref: "taskId.output.fieldPath" }`:
85
+
86
+ ```ts
87
+ // task-2 uses the embedding produced by task-1
88
+ {
89
+ id: 'task-2',
90
+ tool: 'taskSelectorService',
91
+ input: {
92
+ queryEmbedding: { $ref: 'task-1.output.embedding' },
93
+ topK: 3,
94
+ }
95
+ }
96
+ ```
97
+
98
+ Placeholders are resolved at execution time by `resolveTaskInput()`.
99
+
100
+ ## Dashboard Integration
101
+
102
+ When running an agent workflow through the dashboard:
103
+
104
+ 1. **Agent task observations** are visually highlighted with a purple background and left border
105
+ 2. Each observation shows a **T1 / T2 / T3** badge indicating which task it belongs to
106
+ 3. In the observation detail panel, a **"Resume from Task N"** button appears (agent steps only)
107
+ 4. Clicking it calls `/api/resume-agent-from-task` with the serialized `AgentState` and chosen `taskIndex`
108
+ 5. The resumed run is added as a new trace in the comparison table
109
+
110
+ ## Best Practices
111
+
112
+ - **Keep tasks idempotent** where possible — if a task must be re-run, ensure it produces the same result
113
+ - **Store minimal outputs** — only record what downstream tasks need, not full API responses
114
+ - **Version your state schema** — if tool interfaces change, old states may need migration
115
+ - **Use sequential tasks** — the current implementation runs tasks one-by-one; parallel task support is planned
116
+
117
+ ## Example: Debugging a Failed Task
118
+
119
+ ```ts
120
+ // 1. Original execution fails at task 3
121
+ const plan = await plannerAgent('Process refund for order-123')
122
+ const result = await executorAgent(plan)
123
+ // Error: task 3 (calculateRefundAmount) failed
124
+
125
+ // 2. Save the state
126
+ const state = serializeAgentState(result, recorder.events)
127
+ fs.writeFileSync('failed-run.json', JSON.stringify(state))
128
+
129
+ // 3. Fix the issue in your tool/code
130
+
131
+ // 4. Resume from task 3 with corrected state
132
+ const savedState = JSON.parse(fs.readFileSync('failed-run.json'))
133
+ const fixed = await resumeAgentFromTrace({
134
+ ...deserializeAgentState(savedState),
135
+ resumeFromTaskIndex: 2 // 0-based: task 3 = index 2
136
+ })
137
+
138
+ // Tasks 1-2 use cached results; task 3+ execute with fixes
139
+ console.log('Fixed plan:', fixed.status)
140
+ ```