@cuylabs/agent-runtime-dapr 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -19,6 +19,37 @@ It builds on:
19
19
  - `agent-core` for task and turn execution semantics
20
20
  - `agent-runtime` for the outer workload runtime contract
21
21
 
22
+ When paired with `@cuylabs/agent-server`, this package should sit behind the
23
+ same session/turn surface rather than replacing it. Use
24
+ `createDaprAgentServerAdapter(runner)` when you want `agent-server`
25
+ transports like WebSocket to route turns, steering, follow-ups, and
26
+ interactive requests through the same Dapr workflow runtime as the hosted
27
+ HTTP routes.
28
+
29
+ ## Why This Package Is Bigger Than A Simple Driver
30
+
31
+ `agent-runtime-dapr` has two roles:
32
+
33
+ 1. It implements shared runtime contracts from `@cuylabs/agent-runtime`
34
+ 2. It exposes Dapr-native helpers that should stay outside the shared contract
35
+
36
+ The first category is the portability seam:
37
+
38
+ - `DaprRuntimeDriver` implements `RuntimeDriver`
39
+ - `DaprOrchestratorRunStore` implements `OrchestratorRunStore`
40
+ - `createDaprWorkloadRuntime(...)` builds a `WorkloadRuntime` with those pieces
41
+
42
+ The second category is intentionally Dapr-specific:
43
+
44
+ - workflow clients and workflow activities
45
+ - HTTP host/runners
46
+ - sidecar job callbacks
47
+ - execution checkpoint persistence
48
+ - cross-service invocation helpers
49
+
50
+ Those features are not drift in the base runtime contract. They are adapter
51
+ surfaces that exist because Dapr offers more than a generic scheduler/store.
52
+
22
53
  ## Why Dapr?
23
54
 
24
55
  Dapr provides the durable infrastructure while your agent owns the intelligence:
@@ -53,6 +84,40 @@ Under the hood, this package now exposes two layers:
53
84
  - `createDaprAgentRuntime(...)` and `createDaprAgentRunner(...)` as the
54
85
  `agent-core`-specific adapters built on top of that
55
86
 
87
+ The rule is:
88
+
89
+ - if your code only needs portable scheduling/orchestration, target `agent-runtime`
90
+ - if your code wants Dapr durability or Dapr host capabilities, opt into this package explicitly
91
+
92
+ ## Tool Hosts And Durable Turns
93
+
94
+ `ToolHost` configuration still belongs on the agent, not on the Dapr runner.
95
+
96
+ ```ts
97
+ import { WorkflowRuntime } from "@dapr/dapr";
98
+ import { createAgent } from "@cuylabs/agent-core";
99
+ import { dockerHost } from "@cuylabs/agent-sandbox-docker";
100
+ import { createDaprAgentRunner } from "@cuylabs/agent-runtime-dapr";
101
+
102
+ const agent = createAgent({
103
+ model,
104
+ host: dockerHost({ image: "node:22", workspaceDir: "/workspace" }),
105
+ tools,
106
+ });
107
+
108
+ const runner = createDaprAgentRunner({
109
+ agent,
110
+ name: "my-agent",
111
+ workflowRuntime: new WorkflowRuntime(),
112
+ });
113
+ ```
114
+
115
+ In direct mode and durable mode, host-backed tools use the same `agent-core`
116
+ execution seam. Dapr persists workflow state around the tool call, but the
117
+ tool still executes through `agent.getHost()`.
118
+
119
+ For the full explanation, see [Tool Hosts In Durable Workflows](docs/tool-hosts.md).
120
+
56
121
  ## Quick Start
57
122
 
58
123
  ### Step 1: Define your agent
@@ -110,8 +175,8 @@ curl -s http://localhost:3000/agents/run \
110
175
  -H "Content-Type: application/json" \
111
176
  -d '{"message": "Greet Carlos"}' | jq
112
177
 
113
- # Durable workflow (async, crash-recoverable)
114
- curl -s http://localhost:3000/agents/workflow \
178
+ # Durable run (async, crash-recoverable)
179
+ curl -s http://localhost:3000/agents/run-durable \
115
180
  -H "Content-Type: application/json" \
116
181
  -d '{"message": "Greet Carlos"}' | jq
117
182
  ```
@@ -154,17 +219,38 @@ Every agent host exposes two ways to run a turn:
154
219
  | Mode | Endpoint | Behavior |
155
220
  |------|----------|----------|
156
221
  | **Direct** | `POST /agents/run` | Synchronous. Returns result in the HTTP response. State is persisted, but execution is not crash-recoverable. |
157
- | **Workflow** | `POST /agents/workflow` | Asynchronous. Returns `202` with an `instanceId` immediately. The turn runs as a Dapr workflow — crash-safe with activity-level checkpoints. |
222
+ | **Durable** | `POST /agents/run-durable` | Asynchronous. Returns `202` with an `instanceId` immediately. The turn runs as a Dapr workflow — crash-safe with activity-level checkpoints. |
158
223
 
159
- The workflow decomposes each turn into four activities:
224
+ The workflow decomposes each turn into five activities:
160
225
 
161
226
  ```
162
- model-step → tool-call → step-commit → output-commit
227
+ input-commit → model-step → tool-call → step-commit → output-commit
163
228
  ```
164
229
 
165
230
  Each activity is a checkpoint. If the process crashes after `tool-call`, Dapr
166
231
  replays from that point — the model call and tool execution don't repeat.
167
232
 
233
+ ## Team Coordination
234
+
235
+ `createDaprTeamRunner()` applies the same split to multi-agent coordination:
236
+
237
+ - Vocabulary:
238
+ - `run()` = direct, in-process coordinator execution
239
+ - `runDurable()` = start the durable root coordinator workflow
240
+ - child workflow = one durable member task execution started by the root
241
+ - `waitForDurableRun()` = external polling helper for the root workflow
242
+
243
+ - `run(prompt)` keeps the coordinator loop in-process while using Dapr-backed stores.
244
+ - `runDurable(prompt, options?)` starts a durable coordinator workflow and returns `{ teamId, workflowName, coordinatorSessionId, instanceId }`.
245
+ - `getDurableRun(instanceId)` reads workflow status and extracts the final coordinator result when present.
246
+ - `waitForDurableRun(instanceId, options?)` is the explicit edge-level wait helper when you want to block for completion.
247
+
248
+ The HTTP surface mirrors that programmatic contract:
249
+
250
+ - `POST /team/run`
251
+ - `POST /team/run-durable`
252
+ - `GET /team/workflows/:instanceId`
253
+
168
254
  ## HTTP API Reference
169
255
 
170
256
  | Method | Path | Description |
@@ -175,12 +261,34 @@ replays from that point — the model call and tool execution don't repeat.
175
261
  | `GET` | `/readyz` | Readiness alias |
176
262
  | `GET` | `/agents` | List registered agents |
177
263
  | `POST` | `/agents/run` | Run agent turn (direct) |
178
- | `POST` | `/agents/workflow` | Run agent turn (durable workflow) |
264
+ | `POST` | `/agents/run-durable` | Run agent turn (durable) |
179
265
  | `POST` | `/agents/:id/run` | Run specific agent (direct) |
180
- | `POST` | `/agents/:id/workflow` | Run specific agent (durable workflow) |
266
+ | `POST` | `/agents/:id/run-durable` | Run specific agent (durable) |
267
+ | `GET` | `/agents/inputs` | List durable human input requests for the single hosted agent |
268
+ | `GET` | `/agents/inputs/:requestId` | Get durable human input request for the single hosted agent |
269
+ | `POST` | `/agents/inputs/:requestId/respond` | Resolve durable human input request for the single hosted agent |
270
+ | `GET` | `/agents/approvals` | List durable approval requests for the single hosted agent |
271
+ | `GET` | `/agents/approvals/:requestId` | Get durable approval request for the single hosted agent |
272
+ | `POST` | `/agents/approvals/:requestId/respond` | Resolve durable approval for the single hosted agent |
273
+ | `GET` | `/agents/:id/inputs` | List durable human input requests |
274
+ | `GET` | `/agents/:id/inputs/:requestId` | Get durable human input request |
275
+ | `POST` | `/agents/:id/inputs/:requestId/respond` | Resolve durable human input request |
276
+ | `GET` | `/agents/:id/approvals` | List durable approval requests |
277
+ | `GET` | `/agents/:id/approvals/:requestId` | Get durable approval request |
278
+ | `POST` | `/agents/:id/approvals/:requestId/respond` | Resolve durable approval with `allow`, `deny`, or `remember` |
181
279
  | `GET` | `/agents/:id/executions/:sessionId` | Get execution details |
182
280
  | `GET` | `/agents/:id/executions/:sessionId/checkpoints` | Get execution checkpoints |
183
281
  | `GET` | `/agents/:id/workflows/:instanceId` | Get workflow state |
282
+ | `POST` | `/agents/:id/workflows/:instanceId/terminate` | Terminate a running workflow |
283
+ | `POST` | `/agents/steer` | Inject steering message (single-agent host) |
284
+ | `POST` | `/agents/:id/steer` | Inject steering message into running workflow |
285
+ | `POST` | `/agents/follow-up` | Queue follow-up message (single-agent host) |
286
+ | `POST` | `/agents/:id/follow-up` | Queue follow-up for after current turn |
287
+ | `GET` | `/agents/follow-ups` | List follow-up requests (single-agent host) |
288
+ | `GET` | `/agents/:id/follow-ups` | List follow-up requests |
289
+ | `GET` | `/agents/:id/events/:sessionId` | SSE stream of agent events |
290
+ | `GET` | `/dapr/subscribe` | Dapr pub/sub subscription declaration |
291
+ | `POST` | `/dapr/:topic` | Dapr pub/sub event delivery callback |
184
292
  | `POST` | `/job/:name` | Handle Dapr scheduled job trigger |
185
293
 
186
294
  ## Runner Options
@@ -194,6 +302,7 @@ replays from that point — the model call and tool execution don't repeat.
194
302
  | `workflowRuntime` | Yes | — | `new WorkflowRuntime()` from `@dapr/dapr` |
195
303
  | `daprHttpEndpoint` | No | `http://$DAPR_HOST:$DAPR_HTTP_PORT` | Sidecar HTTP endpoint |
196
304
  | `stateStoreName` | No | `"statestore"` | Dapr state store component |
305
+ | `workflowComponent` | No | `"dapr"` | Dapr workflow component name |
197
306
  | `driverOptions` | No | — | Advanced Dapr runtime driver options: API token, retries, timeouts, custom `fetch`, sidecar verification |
198
307
  | `observers` | No | `[]` | Extra execution lifecycle observers |
199
308
  | `logging` | No | `true` | Enable/disable console logging |
@@ -203,10 +312,19 @@ replays from that point — the model call and tool execution don't repeat.
203
312
  The runner returns an object with:
204
313
 
205
314
  - `start()` — start runtime and workflow worker
315
+ - `createHttpHandler(options?)` — build the Dapr host HTTP handler for embedding in a custom server
316
+ - `agentServerCapabilities()` — capabilities patch describing the Dapr-backed runtime
206
317
  - `serve(options?)` — start HTTP server, block on SIGINT/SIGTERM
207
318
  - `run(message, options?)` — run a task programmatically
319
+ - `runDurable(message, options?)` — start a durable turn programmatically
208
320
  - `stop()` — graceful shutdown
209
321
 
322
+ `serve(options?)` also accepts lightweight UI-hosting options:
323
+
324
+ - `staticDir` — serve static files before the built-in agent routes
325
+ - `indexFile` — file served for `/` when `staticDir` is configured
326
+ - `extraRoutes` — exact-match custom routes layered ahead of static assets and agent APIs
327
+
210
328
  Runner startup is transactional: if the workflow worker fails to start, the
211
329
  runtime is stopped before the error is returned.
212
330
 
@@ -236,6 +354,7 @@ invocation), the package also exports the lower-level building blocks:
236
354
  | Helper | Purpose |
237
355
  |--------|---------|
238
356
  | `createDaprAgentWorkflowHost()` | Wrap an Agent into a workflow host |
357
+ | `createDaprAgentServerAdapter()` | Bridge `@cuylabs/agent-server` to the Dapr workflow runtime |
239
358
  | `createDaprWorkflowWorker()` | Register workflow hosts in a WorkflowRuntime |
240
359
  | `createDaprWorkloadRuntime()` | Dapr-backed runtime bundle for generic workloads |
241
360
  | `createDaprAgentRuntime()` | Create runtime bundle (scheduling + runner + store) |
@@ -246,27 +365,40 @@ invocation), the package also exports the lower-level building blocks:
246
365
  | `createDaprExecutionObserver()` | Persist execution events to the store |
247
366
  | `createDaprLoggingObserver()` | Console logging for execution lifecycle |
248
367
  | `DaprServiceInvoker` | Call agents across Dapr service boundaries |
368
+ | `invokeRemoteAgentRun()` | Convenience wrapper for cross-service agent calls |
369
+ | `createRemoteAgentTool()` | Create a tool that invokes a remote Dapr agent |
370
+ | `createDaprWorkflowApprovalRuntime()` | Durable approval runtime |
371
+ | `createDaprWorkflowHumanInputRuntime()` | Durable human-input runtime |
372
+ | `createDaprWorkflowSteerRuntime()` | Durable steering runtime |
373
+ | `createDaprWorkflowFollowUpRuntime()` | Durable follow-up runtime |
374
+ | `createDaprHostHttpHandler()` | Build `Request → Response` handler for custom servers |
375
+ | `createEventBus()` | In-process event bus for SSE streaming |
376
+ | `createDaprPubSubEventBridge()` | Multi-instance event fan-out via Dapr pub/sub |
377
+ | `createDaprDispatchRuntime()` | Dapr-backed async dispatch runtime |
378
+ | `createDaprTeamRunner()` | Multi-agent team runner with durable coordination |
249
379
 
250
380
  See the [docs/](docs/) folder for detailed guides:
251
381
 
252
382
  - [Architecture](docs/architecture.md) — how the three packages compose
253
- - [Workflow Internals](docs/workflow-internals.md) — the 4-activity decomposition
254
- - [API Reference](docs/api-reference.md) — all exported types and functions
383
+ - [Workflow Internals](docs/durability/workflow-internals.md) — the 5-activity decomposition
384
+ - [Durable Tool Approvals](docs/hitl/durable-tool-approvals.md) — how approval middleware pauses and resumes Dapr workflows
385
+ - [Durable Human Input](docs/hitl/durable-human-input.md) — how the built-in `question` tool pauses and resumes Dapr workflows
386
+ - [API Reference](docs/api-reference.md) — all exported types and functions, including event streaming
255
387
  - [Advanced Patterns](docs/advanced-patterns.md) — cross-service invocation, custom observers, etc.
256
388
 
257
389
  ## Runtime Boundary
258
390
 
259
391
  The package layering is:
260
392
 
261
- - `agent-core`: agent turn/task semantics
393
+ - `agent-core`: agent turn/task semantics, EventBus interface, AgentSignal
262
394
  - `agent-runtime`: generic workload orchestration contract
263
- - `agent-runtime-dapr`: Dapr-backed implementation of that contract
395
+ - `agent-runtime-dapr`: Dapr-backed implementation of that contract, plus `DaprPubSubEventBridge` for multi-instance event fan-out
264
396
 
265
397
  `agent-runtime-dapr` integrates with those lower layers in two different ways:
266
398
 
267
399
  - outer workload path: it uses `agent-runtime` to schedule, dispatch, retry,
268
400
  and observe jobs
269
- - inner durable turn path: it uses `agent-core` runtime primitives to split one
401
+ - inner durable turn path: it uses `agent-core` execution primitives to split one
270
402
  agent turn into durable workflow activities such as `model-step`,
271
403
  `tool-call`, `step-commit`, and `output-commit`
272
404
 
@@ -284,12 +416,15 @@ higher-level `createDaprAgentRunner(...)`.
284
416
 
285
417
  The [`examples/`](examples/) directory has complete, runnable scripts:
286
418
 
287
- | Script | Lines | Description |
288
- |--------|-------|-------------|
289
- | [`simple-agent.ts`](examples/simple-agent.ts) | ~55 | Minimal agent with one tool |
290
- | [`coding-agent.ts`](examples/coding-agent.ts) | ~45 | File-system tools via `@cuylabs/agent-code` |
291
- | [`multi-agent.ts`](examples/multi-agent.ts) | ~85 | Two agents in one process |
292
- | [`maintenance-host.ts`](examples/maintenance-host.ts) | ~200 | Scheduled cleanup worker with `/metrics` and Dapr job callbacks |
419
+ | Script | Description |
420
+ |--------|-------------|
421
+ | [`01-simple-agent.ts`](examples/01-simple-agent.ts) | Minimal agent with one tool |
422
+ | [`02-coding-agent.ts`](examples/02-coding-agent.ts) | File-system tools via `@cuylabs/agent-code` |
423
+ | [`03-multi-agent.ts`](examples/03-multi-agent.ts) | Two agents in one process |
424
+ | [`04-crash-recovery.ts`](examples/04-crash-recovery.ts) | Process crash mid-turn, Dapr auto-resumes |
425
+ | [`05-tracing-zipkin.ts`](examples/05-tracing-zipkin.ts) | OpenTelemetry tracing → Zipkin |
426
+ | [`06-tracing-phoenix.ts`](examples/06-tracing-phoenix.ts) | OpenTelemetry tracing → Arize Phoenix |
427
+ | [`07-maintenance-host.ts`](examples/07-maintenance-host.ts) | Retention jobs + Prometheus metrics |
293
428
 
294
429
  See the [examples README](examples/README.md) for step-by-step setup and usage.
295
430
 
@@ -302,7 +437,7 @@ See the [examples README](examples/README.md) for step-by-step setup and usage.
302
437
  - `GET /ready` and `GET /readyz` report runtime, worker, sidecar, and state-store readiness
303
438
  - Dapr Jobs API calls are isolated behind an internal adapter so scheduler changes stay local to the Dapr package
304
439
  - Use `DaprExecutionStore.cleanup(...)` and `DaprOrchestratorRunStore.cleanup(...)` to enforce retention budgets
305
- - For a concrete operational service, see [`examples/maintenance-host.ts`](examples/maintenance-host.ts)
440
+ - For a concrete operational service, see [`examples/07-maintenance-host.ts`](examples/07-maintenance-host.ts)
306
441
  - For containers: run one sidecar per app process, point `daprHttpEndpoint` at the local sidecar
307
442
 
308
443
  ## License
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  DaprSidecarClient,
3
3
  isDaprConflictError
4
- } from "./chunk-A34CHK2E.js";
4
+ } from "./chunk-MQJ4LZOX.js";
5
5
 
6
6
  // src/execution/store.ts
7
7
  var DEFAULT_KEY_PREFIX = "agent-runtime:execution:";
@@ -96,6 +96,57 @@ var DaprExecutionStore = class {
96
96
  "keyPrefix"
97
97
  );
98
98
  }
99
+ // ── ExecutionStore interface (generic) ───────────────────────────────
100
+ async get(sessionId) {
101
+ const record = await this.getExecution(sessionId);
102
+ return record ? toGenericRunRecord(record) : void 0;
103
+ }
104
+ async list(options) {
105
+ const records = await this.listExecutions();
106
+ let filtered = records;
107
+ if (options?.status) {
108
+ const statuses = Array.isArray(options.status) ? options.status : [options.status];
109
+ filtered = filtered.filter((r) => statuses.includes(r.status));
110
+ }
111
+ if (options?.limit !== void 0) {
112
+ filtered = filtered.slice(0, options.limit);
113
+ }
114
+ return filtered.map(toGenericRunRecord);
115
+ }
116
+ async listGenericCheckpoints(sessionId) {
117
+ const records = await this.listCheckpoints(sessionId);
118
+ return records.map(toGenericCheckpointRecord);
119
+ }
120
+ async remove(sessionId) {
121
+ const existing = await this.getExecution(sessionId);
122
+ if (!existing) return false;
123
+ await this.deleteExecutionRecord(sessionId);
124
+ return true;
125
+ }
126
+ // ── Resume helper ────────────────────────────────────────────────────
127
+ /**
128
+ * Build a resume snapshot from a persisted execution record.
129
+ *
130
+ * Returns `undefined` if no execution exists for the session or the
131
+ * execution is already in a terminal state (completed/failed).
132
+ *
133
+ * The returned snapshot can be passed as `context.restoreFrom` to
134
+ * `createAgentTaskRunner(...)` to resume the direct-path execution
135
+ * from where it left off.
136
+ */
137
+ async buildResumeSnapshot(sessionId) {
138
+ const record = await this.getExecution(sessionId);
139
+ if (!record || record.status !== "running") return void 0;
140
+ return {
141
+ response: record.snapshot.response,
142
+ usage: { ...record.snapshot.usage },
143
+ toolCalls: record.snapshot.toolCalls.map((tc) => ({ ...tc })),
144
+ step: record.snapshot.activeStep ?? 0,
145
+ eventCount: record.snapshot.eventCount,
146
+ startedAt: record.startedAt
147
+ };
148
+ }
149
+ // ── Dapr-specific methods (rich types) ───────────────────────────────
99
150
  async getExecution(sessionId) {
100
151
  const value = await this.client.getState(
101
152
  this.stateKeyForExecution(sessionId)
@@ -147,9 +198,10 @@ var DaprExecutionStore = class {
147
198
  do {
148
199
  const response = await this.client.queryState({
149
200
  filter: {
150
- EQ: {
151
- kind: STORED_EXECUTION_CHECKPOINT_KIND
152
- }
201
+ AND: [
202
+ { EQ: { kind: STORED_EXECUTION_CHECKPOINT_KIND } },
203
+ { EQ: { "checkpoint.sessionId": sessionId } }
204
+ ]
153
205
  },
154
206
  page: {
155
207
  limit: 200,
@@ -193,13 +245,14 @@ var DaprExecutionStore = class {
193
245
  await this.writeCheckpoint(record);
194
246
  await this.addCheckpointToIndex(record.sessionId, record.id).catch(() => {
195
247
  });
196
- const current = await this.getExecution(checkpoint.run.sessionId);
197
- const next = current ?? toExecutionRecord(checkpoint.run, checkpoint.snapshot);
198
- next.updatedAt = checkpoint.snapshot.updatedAt;
199
- next.lastCheckpointReason = checkpoint.reason;
200
- next.checkpointCount = (current?.checkpointCount ?? 0) + 1;
201
- next.snapshot = toSerializableSnapshot(checkpoint.snapshot);
202
- await this.writeExecution(next);
248
+ await this.updateExecution(checkpoint.run.sessionId, (current) => {
249
+ const next = current ?? toExecutionRecord(checkpoint.run, checkpoint.snapshot);
250
+ next.updatedAt = checkpoint.snapshot.updatedAt;
251
+ next.lastCheckpointReason = checkpoint.reason;
252
+ next.checkpointCount = (current?.checkpointCount ?? 0) + 1;
253
+ next.snapshot = toSerializableSnapshot(checkpoint.snapshot);
254
+ return next;
255
+ });
203
256
  }
204
257
  async recordCompletion(run, result, snapshot) {
205
258
  const current = await this.getExecution(run.sessionId);
@@ -403,7 +456,7 @@ var DaprExecutionStore = class {
403
456
  }
404
457
  return present;
405
458
  }
406
- async writeExecution(record) {
459
+ async writeExecution(record, etag) {
407
460
  const envelope = {
408
461
  kind: STORED_EXECUTION_KIND,
409
462
  version: STORED_EXECUTION_VERSION,
@@ -411,9 +464,32 @@ var DaprExecutionStore = class {
411
464
  };
412
465
  await this.client.saveState(
413
466
  this.stateKeyForExecution(record.sessionId),
414
- envelope
467
+ envelope,
468
+ etag ? { etag, concurrency: "first-write" } : {}
415
469
  );
416
470
  }
471
+ /**
472
+ * Read-modify-write the execution record with optimistic concurrency.
473
+ * Retries on etag conflict up to 4 times.
474
+ */
475
+ async updateExecution(sessionId, updater) {
476
+ for (let attempt = 0; attempt < DEFAULT_INDEX_UPDATE_RETRIES; attempt += 1) {
477
+ const entry = await this.client.getStateEntry(
478
+ this.stateKeyForExecution(sessionId)
479
+ );
480
+ const current = this.decodeExecution(entry.value);
481
+ const next = updater(current);
482
+ try {
483
+ await this.writeExecution(next, entry.etag);
484
+ return;
485
+ } catch (error) {
486
+ if (isDaprConflictError(error) && attempt + 1 < DEFAULT_INDEX_UPDATE_RETRIES) {
487
+ continue;
488
+ }
489
+ throw error;
490
+ }
491
+ }
492
+ }
417
493
  async writeCheckpoint(record) {
418
494
  const envelope = {
419
495
  kind: STORED_EXECUTION_CHECKPOINT_KIND,
@@ -462,6 +538,54 @@ var DaprExecutionStore = class {
462
538
  return void 0;
463
539
  }
464
540
  };
541
+ function toGenericRunRecord(record) {
542
+ return {
543
+ sessionId: record.sessionId,
544
+ status: record.status,
545
+ startedAt: record.startedAt,
546
+ updatedAt: record.updatedAt,
547
+ completedAt: record.completedAt,
548
+ checkpointCount: record.checkpointCount,
549
+ lastCheckpointReason: record.lastCheckpointReason,
550
+ snapshot: {
551
+ sessionId: record.snapshot.sessionId,
552
+ response: record.snapshot.response,
553
+ usage: { ...record.snapshot.usage },
554
+ toolCalls: record.snapshot.toolCalls.map((tc) => ({ ...tc })),
555
+ eventCount: record.snapshot.eventCount,
556
+ activeStep: record.snapshot.activeStep,
557
+ error: record.snapshot.error,
558
+ startedAt: record.snapshot.startedAt,
559
+ updatedAt: record.snapshot.updatedAt
560
+ },
561
+ result: record.result ? {
562
+ response: record.result.response,
563
+ sessionId: record.result.sessionId,
564
+ usage: { ...record.result.usage },
565
+ toolCalls: record.result.toolCalls.map((tc) => ({ ...tc }))
566
+ } : void 0,
567
+ error: record.error ? { ...record.error } : void 0
568
+ };
569
+ }
570
+ function toGenericCheckpointRecord(record) {
571
+ return {
572
+ id: record.id,
573
+ sessionId: record.sessionId,
574
+ reason: record.reason,
575
+ snapshot: {
576
+ sessionId: record.snapshot.sessionId,
577
+ response: record.snapshot.response,
578
+ usage: { ...record.snapshot.usage },
579
+ toolCalls: record.snapshot.toolCalls.map((tc) => ({ ...tc })),
580
+ eventCount: record.snapshot.eventCount,
581
+ activeStep: record.snapshot.activeStep,
582
+ error: record.snapshot.error,
583
+ startedAt: record.snapshot.startedAt,
584
+ updatedAt: record.snapshot.updatedAt
585
+ },
586
+ createdAt: record.createdAt
587
+ };
588
+ }
465
589
 
466
590
  // src/execution/observer.ts
467
591
  var DaprExecutionObserver = class {
@@ -546,11 +670,12 @@ var EMPTY_USAGE = {
546
670
  outputTokens: 0,
547
671
  totalTokens: 0
548
672
  };
549
- function buildRun(state, payload, trigger) {
673
+ function buildRun(state, payload, trigger, executionId) {
550
674
  return {
551
675
  payload,
552
676
  context: { trigger },
553
677
  sessionId: state.sessionId,
678
+ executionId: executionId ?? `${state.sessionId}:${state.startedAt}`,
554
679
  startedAt: state.startedAt
555
680
  };
556
681
  }
@@ -558,7 +683,7 @@ function collectToolCalls(state) {
558
683
  const toolCalls = [];
559
684
  for (const msg of state.messages) {
560
685
  if (msg.role === "tool") {
561
- toolCalls.push({ name: msg.toolName, result: msg.content });
686
+ toolCalls.push({ name: msg.toolName, result: msg.result });
562
687
  }
563
688
  }
564
689
  return toolCalls;
@@ -569,16 +694,16 @@ function buildSnapshot(state) {
569
694
  response: state.finalResponse ?? state.lastModelStep?.text ?? "",
570
695
  usage: state.usage ?? EMPTY_USAGE,
571
696
  toolCalls: collectToolCalls(state),
572
- eventCount: 0,
697
+ eventCount: state.turnState?.eventCount ?? state.messages.length,
573
698
  activeStep: state.step,
574
699
  startedAt: state.startedAt,
575
700
  updatedAt: state.updatedAt,
576
701
  turnState: state.turnState ?? {}
577
702
  };
578
703
  }
579
- function buildCheckpoint(reason, state, payload, trigger) {
704
+ function buildCheckpoint(reason, state, payload, trigger, executionId) {
580
705
  return {
581
- run: buildRun(state, payload, trigger),
706
+ run: buildRun(state, payload, trigger, executionId),
582
707
  reason,
583
708
  snapshot: buildSnapshot(state),
584
709
  createdAt: state.updatedAt
@@ -596,6 +721,7 @@ function createWorkflowObserverBridge(options) {
596
721
  const { observers } = options;
597
722
  let payload = options.payload;
598
723
  const trigger = options.trigger ?? "workflow";
724
+ const executionId = options.executionId;
599
725
  if (observers.length === 0) {
600
726
  return {
601
727
  async notifyTaskStart() {
@@ -618,16 +744,22 @@ function createWorkflowObserverBridge(options) {
618
744
  async notifyTaskStart(state) {
619
745
  if (taskStarted) return;
620
746
  taskStarted = true;
621
- const run = buildRun(state, payload, trigger);
747
+ const run = buildRun(state, payload, trigger, executionId);
622
748
  const snapshot = buildSnapshot(state);
623
749
  await notifyAll(observers, (o) => o.onTaskStart?.(run, snapshot));
624
750
  },
625
751
  async notifyCheckpoint(reason, state) {
626
- const checkpoint = buildCheckpoint(reason, state, payload, trigger);
752
+ const checkpoint = buildCheckpoint(
753
+ reason,
754
+ state,
755
+ payload,
756
+ trigger,
757
+ executionId
758
+ );
627
759
  await notifyAll(observers, (o) => o.onCheckpoint?.(checkpoint));
628
760
  },
629
761
  async notifyTaskComplete(state) {
630
- const run = buildRun(state, payload, trigger);
762
+ const run = buildRun(state, payload, trigger, executionId);
631
763
  const snapshot = buildSnapshot(state);
632
764
  const result = {
633
765
  response: state.finalResponse ?? "",
@@ -641,16 +773,19 @@ function createWorkflowObserverBridge(options) {
641
773
  );
642
774
  },
643
775
  async notifyTaskError(state, error) {
644
- const run = buildRun(state, payload, trigger);
776
+ const run = buildRun(state, payload, trigger, executionId);
645
777
  const snapshot = buildSnapshot(state);
646
778
  await notifyAll(observers, (o) => o.onTaskError?.(run, error, snapshot));
647
779
  },
648
780
  updatePayload(newPayload) {
649
781
  payload = newPayload;
650
782
  },
651
- getOtelContext(sessionId) {
783
+ getOtelContext(sessionId, currentExecutionId) {
652
784
  for (const observer of observers) {
653
- const ctx = observer.getOtelContext?.(sessionId);
785
+ const ctx = observer.getOtelContext?.(
786
+ sessionId,
787
+ currentExecutionId ?? executionId
788
+ );
654
789
  if (ctx !== void 0) return ctx;
655
790
  }
656
791
  return void 0;
@@ -659,6 +794,9 @@ function createWorkflowObserverBridge(options) {
659
794
  }
660
795
 
661
796
  // src/execution/telemetry.ts
797
+ import {
798
+ DEFAULT_AGENT_NAME
799
+ } from "@cuylabs/agent-core";
662
800
  var _otel = null;
663
801
  function oiMime(v) {
664
802
  const t = v.trimStart();
@@ -674,7 +812,7 @@ async function getOtel() {
674
812
  }
675
813
  }
676
814
  function createOtelObserver(config = {}) {
677
- const agentName = config.agentName ?? "agent";
815
+ const agentName = config.agentName ?? DEFAULT_AGENT_NAME;
678
816
  const spanTimeoutMs = config.spanTimeoutMs ?? 5 * 60 * 1e3;
679
817
  const turnSpans = /* @__PURE__ */ new Map();
680
818
  let otel = null;
@@ -693,9 +831,13 @@ function createOtelObserver(config = {}) {
693
831
  "gen_ai.usage.output_tokens": usage.outputTokens ?? 0
694
832
  };
695
833
  }
834
+ function makeSpanKey(sessionId, executionId) {
835
+ return executionId ?? sessionId;
836
+ }
696
837
  return {
697
838
  async onTaskStart(run, _snapshot) {
698
- const existing = turnSpans.get(run.sessionId);
839
+ const key = makeSpanKey(run.sessionId, run.executionId);
840
+ const existing = turnSpans.get(key);
699
841
  if (existing) {
700
842
  const inputVal2 = run.payload.message.slice(0, 4096);
701
843
  existing.span.setAttributes({
@@ -724,20 +866,22 @@ function createOtelObserver(config = {}) {
724
866
  });
725
867
  const ctx = otel.trace.setSpan(otel.context.active(), span);
726
868
  const timer = setTimeout(() => {
727
- const entry = turnSpans.get(run.sessionId);
869
+ const entry = turnSpans.get(key);
728
870
  if (entry) {
729
871
  entry.span.setStatus({
730
872
  code: otel?.SpanStatusCode.ERROR ?? 2,
731
873
  message: "Span timed out (possible leak \u2014 task never completed)"
732
874
  });
733
875
  entry.span.end();
734
- turnSpans.delete(run.sessionId);
876
+ turnSpans.delete(key);
735
877
  }
736
878
  }, spanTimeoutMs);
737
- turnSpans.set(run.sessionId, { span, ctx, timer });
879
+ turnSpans.set(key, { span, ctx, timer });
738
880
  },
739
881
  onCheckpoint(checkpoint) {
740
- const entry = turnSpans.get(checkpoint.run.sessionId);
882
+ const entry = turnSpans.get(
883
+ makeSpanKey(checkpoint.run.sessionId, checkpoint.run.executionId)
884
+ );
741
885
  if (!entry) return;
742
886
  const reason = checkpoint.reason;
743
887
  const attrs = {
@@ -758,7 +902,8 @@ function createOtelObserver(config = {}) {
758
902
  entry.span.addEvent(`agent.checkpoint.${reason}`, attrs);
759
903
  },
760
904
  onTaskComplete(run, result, _snapshot) {
761
- const entry = turnSpans.get(run.sessionId);
905
+ const key = makeSpanKey(run.sessionId, run.executionId);
906
+ const entry = turnSpans.get(key);
762
907
  if (!entry) return;
763
908
  if (entry.timer) clearTimeout(entry.timer);
764
909
  entry.span.setAttributes({
@@ -773,10 +918,11 @@ function createOtelObserver(config = {}) {
773
918
  }
774
919
  entry.span.setStatus({ code: otel?.SpanStatusCode.OK ?? 1 });
775
920
  entry.span.end();
776
- turnSpans.delete(run.sessionId);
921
+ turnSpans.delete(key);
777
922
  },
778
923
  onTaskError(run, error, snapshot) {
779
- const entry = turnSpans.get(run.sessionId);
924
+ const key = makeSpanKey(run.sessionId, run.executionId);
925
+ const entry = turnSpans.get(key);
780
926
  if (!entry) return;
781
927
  if (entry.timer) clearTimeout(entry.timer);
782
928
  entry.span.setAttributes(getUsageAttrs(snapshot.usage));
@@ -786,13 +932,13 @@ function createOtelObserver(config = {}) {
786
932
  });
787
933
  entry.span.recordException(error);
788
934
  entry.span.end();
789
- turnSpans.delete(run.sessionId);
935
+ turnSpans.delete(key);
790
936
  },
791
- getOtelContext(sessionId) {
792
- return turnSpans.get(sessionId)?.ctx;
937
+ getOtelContext(sessionId, executionId) {
938
+ return turnSpans.get(makeSpanKey(sessionId, executionId))?.ctx;
793
939
  },
794
- activateContext(sessionId, fn) {
795
- const entry = turnSpans.get(sessionId);
940
+ activateContext(sessionId, executionId, fn) {
941
+ const entry = turnSpans.get(makeSpanKey(sessionId, executionId));
796
942
  if (!entry?.ctx || !otel) return fn();
797
943
  return otel.context.with(entry.ctx, fn);
798
944
  }