@cuylabs/agent-runtime-dapr 0.9.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +154 -19
- package/dist/{chunk-2CEICSJH.js → chunk-5CJIC4YB.js} +184 -38
- package/dist/{chunk-A34CHK2E.js → chunk-MQJ4LZOX.js} +30 -4
- package/dist/chunk-O7H3XGY2.js +11222 -0
- package/dist/chunk-YQQTUE6B.js +993 -0
- package/dist/chunk-YS2CWYBQ.js +1358 -0
- package/dist/client-UsEIzDF6.d.ts +322 -0
- package/dist/dispatch/index.d.ts +9 -0
- package/dist/dispatch/index.js +17 -0
- package/dist/execution/index.d.ts +5 -4
- package/dist/execution/index.js +2 -2
- package/dist/host/index.d.ts +8 -4
- package/dist/host/index.js +28 -8
- package/dist/index-BY0FipV1.d.ts +770 -0
- package/dist/index-CFm5LORU.d.ts +63 -0
- package/dist/index-UtePd9on.d.ts +101 -0
- package/dist/index.d.ts +62 -14
- package/dist/index.js +76 -6
- package/dist/invoker-B6ikdYaz.d.ts +50 -0
- package/dist/{store-pRLGfYhN.d.ts → store-BXBIDz40.d.ts} +24 -3
- package/dist/team/index.d.ts +612 -0
- package/dist/team/index.js +30 -0
- package/dist/worker-CXq0IFGX.d.ts +42 -0
- package/dist/workflow/index.d.ts +4 -225
- package/dist/workflow/index.js +2 -2
- package/dist/{workflow-bridge-C8Z1yr0Y.d.ts → workflow-bridge-BcicHH1Y.d.ts} +4 -2
- package/dist/workflow-host-D6W6fXoL.d.ts +459 -0
- package/package.json +16 -6
- package/dist/chunk-DILON56B.js +0 -668
- package/dist/chunk-R47X4FG2.js +0 -2009
- package/dist/index-BCMkUMAf.d.ts +0 -564
package/README.md
CHANGED
|
@@ -19,6 +19,37 @@ It builds on:
|
|
|
19
19
|
- `agent-core` for task and turn execution semantics
|
|
20
20
|
- `agent-runtime` for the outer workload runtime contract
|
|
21
21
|
|
|
22
|
+
When paired with `@cuylabs/agent-server`, this package should sit behind the
|
|
23
|
+
same session/turn surface rather than replacing it. Use
|
|
24
|
+
`createDaprAgentServerAdapter(runner)` when you want `agent-server`
|
|
25
|
+
transports like WebSocket to route turns, steering, follow-ups, and
|
|
26
|
+
interactive requests through the same Dapr workflow runtime as the hosted
|
|
27
|
+
HTTP routes.
|
|
28
|
+
|
|
29
|
+
## Why This Package Is Bigger Than A Simple Driver
|
|
30
|
+
|
|
31
|
+
`agent-runtime-dapr` has two roles:
|
|
32
|
+
|
|
33
|
+
1. It implements shared runtime contracts from `@cuylabs/agent-runtime`
|
|
34
|
+
2. It exposes Dapr-native helpers that should stay outside the shared contract
|
|
35
|
+
|
|
36
|
+
The first category is the portability seam:
|
|
37
|
+
|
|
38
|
+
- `DaprRuntimeDriver` implements `RuntimeDriver`
|
|
39
|
+
- `DaprOrchestratorRunStore` implements `OrchestratorRunStore`
|
|
40
|
+
- `createDaprWorkloadRuntime(...)` builds a `WorkloadRuntime` with those pieces
|
|
41
|
+
|
|
42
|
+
The second category is intentionally Dapr-specific:
|
|
43
|
+
|
|
44
|
+
- workflow clients and workflow activities
|
|
45
|
+
- HTTP host/runners
|
|
46
|
+
- sidecar job callbacks
|
|
47
|
+
- execution checkpoint persistence
|
|
48
|
+
- cross-service invocation helpers
|
|
49
|
+
|
|
50
|
+
Those features are not drift in the base runtime contract. They are adapter
|
|
51
|
+
surfaces that exist because Dapr offers more than a generic scheduler/store.
|
|
52
|
+
|
|
22
53
|
## Why Dapr?
|
|
23
54
|
|
|
24
55
|
Dapr provides the durable infrastructure while your agent owns the intelligence:
|
|
@@ -53,6 +84,40 @@ Under the hood, this package now exposes two layers:
|
|
|
53
84
|
- `createDaprAgentRuntime(...)` and `createDaprAgentRunner(...)` as the
|
|
54
85
|
`agent-core`-specific adapters built on top of that
|
|
55
86
|
|
|
87
|
+
The rule is:
|
|
88
|
+
|
|
89
|
+
- if your code only needs portable scheduling/orchestration, target `agent-runtime`
|
|
90
|
+
- if your code wants Dapr durability or Dapr host capabilities, opt into this package explicitly
|
|
91
|
+
|
|
92
|
+
## Tool Hosts And Durable Turns
|
|
93
|
+
|
|
94
|
+
`ToolHost` configuration still belongs on the agent, not on the Dapr runner.
|
|
95
|
+
|
|
96
|
+
```ts
|
|
97
|
+
import { WorkflowRuntime } from "@dapr/dapr";
|
|
98
|
+
import { createAgent } from "@cuylabs/agent-core";
|
|
99
|
+
import { dockerHost } from "@cuylabs/agent-sandbox-docker";
|
|
100
|
+
import { createDaprAgentRunner } from "@cuylabs/agent-runtime-dapr";
|
|
101
|
+
|
|
102
|
+
const agent = createAgent({
|
|
103
|
+
model,
|
|
104
|
+
host: dockerHost({ image: "node:22", workspaceDir: "/workspace" }),
|
|
105
|
+
tools,
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
const runner = createDaprAgentRunner({
|
|
109
|
+
agent,
|
|
110
|
+
name: "my-agent",
|
|
111
|
+
workflowRuntime: new WorkflowRuntime(),
|
|
112
|
+
});
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
In direct mode and durable mode, host-backed tools use the same `agent-core`
|
|
116
|
+
execution seam. Dapr persists workflow state around the tool call, but the
|
|
117
|
+
tool still executes through `agent.getHost()`.
|
|
118
|
+
|
|
119
|
+
For the full explanation, see [Tool Hosts In Durable Workflows](docs/tool-hosts.md).
|
|
120
|
+
|
|
56
121
|
## Quick Start
|
|
57
122
|
|
|
58
123
|
### Step 1: Define your agent
|
|
@@ -110,8 +175,8 @@ curl -s http://localhost:3000/agents/run \
|
|
|
110
175
|
-H "Content-Type: application/json" \
|
|
111
176
|
-d '{"message": "Greet Carlos"}' | jq
|
|
112
177
|
|
|
113
|
-
# Durable
|
|
114
|
-
curl -s http://localhost:3000/agents/
|
|
178
|
+
# Durable run (async, crash-recoverable)
|
|
179
|
+
curl -s http://localhost:3000/agents/run-durable \
|
|
115
180
|
-H "Content-Type: application/json" \
|
|
116
181
|
-d '{"message": "Greet Carlos"}' | jq
|
|
117
182
|
```
|
|
@@ -154,17 +219,38 @@ Every agent host exposes two ways to run a turn:
|
|
|
154
219
|
| Mode | Endpoint | Behavior |
|
|
155
220
|
|------|----------|----------|
|
|
156
221
|
| **Direct** | `POST /agents/run` | Synchronous. Returns result in the HTTP response. State is persisted, but execution is not crash-recoverable. |
|
|
157
|
-
| **
|
|
222
|
+
| **Durable** | `POST /agents/run-durable` | Asynchronous. Returns `202` with an `instanceId` immediately. The turn runs as a Dapr workflow — crash-safe with activity-level checkpoints. |
|
|
158
223
|
|
|
159
|
-
The workflow decomposes each turn into
|
|
224
|
+
The workflow decomposes each turn into five activities:
|
|
160
225
|
|
|
161
226
|
```
|
|
162
|
-
model-step → tool-call → step-commit → output-commit
|
|
227
|
+
input-commit → model-step → tool-call → step-commit → output-commit
|
|
163
228
|
```
|
|
164
229
|
|
|
165
230
|
Each activity is a checkpoint. If the process crashes after `tool-call`, Dapr
|
|
166
231
|
replays from that point — the model call and tool execution don't repeat.
|
|
167
232
|
|
|
233
|
+
## Team Coordination
|
|
234
|
+
|
|
235
|
+
`createDaprTeamRunner()` applies the same split to multi-agent coordination:
|
|
236
|
+
|
|
237
|
+
- Vocabulary:
|
|
238
|
+
- `run()` = direct, in-process coordinator execution
|
|
239
|
+
- `runDurable()` = start the durable root coordinator workflow
|
|
240
|
+
- child workflow = one durable member task execution started by the root
|
|
241
|
+
- `waitForDurableRun()` = external polling helper for the root workflow
|
|
242
|
+
|
|
243
|
+
- `run(prompt)` keeps the coordinator loop in-process while using Dapr-backed stores.
|
|
244
|
+
- `runDurable(prompt, options?)` starts a durable coordinator workflow and returns `{ teamId, workflowName, coordinatorSessionId, instanceId }`.
|
|
245
|
+
- `getDurableRun(instanceId)` reads workflow status and extracts the final coordinator result when present.
|
|
246
|
+
- `waitForDurableRun(instanceId, options?)` is the explicit edge-level wait helper when you want to block for completion.
|
|
247
|
+
|
|
248
|
+
The HTTP surface mirrors that programmatic contract:
|
|
249
|
+
|
|
250
|
+
- `POST /team/run`
|
|
251
|
+
- `POST /team/run-durable`
|
|
252
|
+
- `GET /team/workflows/:instanceId`
|
|
253
|
+
|
|
168
254
|
## HTTP API Reference
|
|
169
255
|
|
|
170
256
|
| Method | Path | Description |
|
|
@@ -175,12 +261,34 @@ replays from that point — the model call and tool execution don't repeat.
|
|
|
175
261
|
| `GET` | `/readyz` | Readiness alias |
|
|
176
262
|
| `GET` | `/agents` | List registered agents |
|
|
177
263
|
| `POST` | `/agents/run` | Run agent turn (direct) |
|
|
178
|
-
| `POST` | `/agents/
|
|
264
|
+
| `POST` | `/agents/run-durable` | Run agent turn (durable) |
|
|
179
265
|
| `POST` | `/agents/:id/run` | Run specific agent (direct) |
|
|
180
|
-
| `POST` | `/agents/:id/
|
|
266
|
+
| `POST` | `/agents/:id/run-durable` | Run specific agent (durable) |
|
|
267
|
+
| `GET` | `/agents/inputs` | List durable human input requests for the single hosted agent |
|
|
268
|
+
| `GET` | `/agents/inputs/:requestId` | Get durable human input request for the single hosted agent |
|
|
269
|
+
| `POST` | `/agents/inputs/:requestId/respond` | Resolve durable human input request for the single hosted agent |
|
|
270
|
+
| `GET` | `/agents/approvals` | List durable approval requests for the single hosted agent |
|
|
271
|
+
| `GET` | `/agents/approvals/:requestId` | Get durable approval request for the single hosted agent |
|
|
272
|
+
| `POST` | `/agents/approvals/:requestId/respond` | Resolve durable approval for the single hosted agent |
|
|
273
|
+
| `GET` | `/agents/:id/inputs` | List durable human input requests |
|
|
274
|
+
| `GET` | `/agents/:id/inputs/:requestId` | Get durable human input request |
|
|
275
|
+
| `POST` | `/agents/:id/inputs/:requestId/respond` | Resolve durable human input request |
|
|
276
|
+
| `GET` | `/agents/:id/approvals` | List durable approval requests |
|
|
277
|
+
| `GET` | `/agents/:id/approvals/:requestId` | Get durable approval request |
|
|
278
|
+
| `POST` | `/agents/:id/approvals/:requestId/respond` | Resolve durable approval with `allow`, `deny`, or `remember` |
|
|
181
279
|
| `GET` | `/agents/:id/executions/:sessionId` | Get execution details |
|
|
182
280
|
| `GET` | `/agents/:id/executions/:sessionId/checkpoints` | Get execution checkpoints |
|
|
183
281
|
| `GET` | `/agents/:id/workflows/:instanceId` | Get workflow state |
|
|
282
|
+
| `POST` | `/agents/:id/workflows/:instanceId/terminate` | Terminate a running workflow |
|
|
283
|
+
| `POST` | `/agents/steer` | Inject steering message (single-agent host) |
|
|
284
|
+
| `POST` | `/agents/:id/steer` | Inject steering message into running workflow |
|
|
285
|
+
| `POST` | `/agents/follow-up` | Queue follow-up message (single-agent host) |
|
|
286
|
+
| `POST` | `/agents/:id/follow-up` | Queue follow-up for after current turn |
|
|
287
|
+
| `GET` | `/agents/follow-ups` | List follow-up requests (single-agent host) |
|
|
288
|
+
| `GET` | `/agents/:id/follow-ups` | List follow-up requests |
|
|
289
|
+
| `GET` | `/agents/:id/events/:sessionId` | SSE stream of agent events |
|
|
290
|
+
| `GET` | `/dapr/subscribe` | Dapr pub/sub subscription declaration |
|
|
291
|
+
| `POST` | `/dapr/:topic` | Dapr pub/sub event delivery callback |
|
|
184
292
|
| `POST` | `/job/:name` | Handle Dapr scheduled job trigger |
|
|
185
293
|
|
|
186
294
|
## Runner Options
|
|
@@ -194,6 +302,7 @@ replays from that point — the model call and tool execution don't repeat.
|
|
|
194
302
|
| `workflowRuntime` | Yes | — | `new WorkflowRuntime()` from `@dapr/dapr` |
|
|
195
303
|
| `daprHttpEndpoint` | No | `http://$DAPR_HOST:$DAPR_HTTP_PORT` | Sidecar HTTP endpoint |
|
|
196
304
|
| `stateStoreName` | No | `"statestore"` | Dapr state store component |
|
|
305
|
+
| `workflowComponent` | No | `"dapr"` | Dapr workflow component name |
|
|
197
306
|
| `driverOptions` | No | — | Advanced Dapr runtime driver options: API token, retries, timeouts, custom `fetch`, sidecar verification |
|
|
198
307
|
| `observers` | No | `[]` | Extra execution lifecycle observers |
|
|
199
308
|
| `logging` | No | `true` | Enable/disable console logging |
|
|
@@ -203,10 +312,19 @@ replays from that point — the model call and tool execution don't repeat.
|
|
|
203
312
|
The runner returns an object with:
|
|
204
313
|
|
|
205
314
|
- `start()` — start runtime and workflow worker
|
|
315
|
+
- `createHttpHandler(options?)` — build the Dapr host HTTP handler for embedding in a custom server
|
|
316
|
+
- `agentServerCapabilities()` — capabilities patch describing the Dapr-backed runtime
|
|
206
317
|
- `serve(options?)` — start HTTP server, block on SIGINT/SIGTERM
|
|
207
318
|
- `run(message, options?)` — run a task programmatically
|
|
319
|
+
- `runDurable(message, options?)` — start a durable turn programmatically
|
|
208
320
|
- `stop()` — graceful shutdown
|
|
209
321
|
|
|
322
|
+
`serve(options?)` also accepts lightweight UI-hosting options:
|
|
323
|
+
|
|
324
|
+
- `staticDir` — serve static files before the built-in agent routes
|
|
325
|
+
- `indexFile` — file served for `/` when `staticDir` is configured
|
|
326
|
+
- `extraRoutes` — exact-match custom routes layered ahead of static assets and agent APIs
|
|
327
|
+
|
|
210
328
|
Runner startup is transactional: if the workflow worker fails to start, the
|
|
211
329
|
runtime is stopped before the error is returned.
|
|
212
330
|
|
|
@@ -236,6 +354,7 @@ invocation), the package also exports the lower-level building blocks:
|
|
|
236
354
|
| Helper | Purpose |
|
|
237
355
|
|--------|---------|
|
|
238
356
|
| `createDaprAgentWorkflowHost()` | Wrap an Agent into a workflow host |
|
|
357
|
+
| `createDaprAgentServerAdapter()` | Bridge `@cuylabs/agent-server` to the Dapr workflow runtime |
|
|
239
358
|
| `createDaprWorkflowWorker()` | Register workflow hosts in a WorkflowRuntime |
|
|
240
359
|
| `createDaprWorkloadRuntime()` | Dapr-backed runtime bundle for generic workloads |
|
|
241
360
|
| `createDaprAgentRuntime()` | Create runtime bundle (scheduling + runner + store) |
|
|
@@ -246,27 +365,40 @@ invocation), the package also exports the lower-level building blocks:
|
|
|
246
365
|
| `createDaprExecutionObserver()` | Persist execution events to the store |
|
|
247
366
|
| `createDaprLoggingObserver()` | Console logging for execution lifecycle |
|
|
248
367
|
| `DaprServiceInvoker` | Call agents across Dapr service boundaries |
|
|
368
|
+
| `invokeRemoteAgentRun()` | Convenience wrapper for cross-service agent calls |
|
|
369
|
+
| `createRemoteAgentTool()` | Create a tool that invokes a remote Dapr agent |
|
|
370
|
+
| `createDaprWorkflowApprovalRuntime()` | Durable approval runtime |
|
|
371
|
+
| `createDaprWorkflowHumanInputRuntime()` | Durable human-input runtime |
|
|
372
|
+
| `createDaprWorkflowSteerRuntime()` | Durable steering runtime |
|
|
373
|
+
| `createDaprWorkflowFollowUpRuntime()` | Durable follow-up runtime |
|
|
374
|
+
| `createDaprHostHttpHandler()` | Build `Request → Response` handler for custom servers |
|
|
375
|
+
| `createEventBus()` | In-process event bus for SSE streaming |
|
|
376
|
+
| `createDaprPubSubEventBridge()` | Multi-instance event fan-out via Dapr pub/sub |
|
|
377
|
+
| `createDaprDispatchRuntime()` | Dapr-backed async dispatch runtime |
|
|
378
|
+
| `createDaprTeamRunner()` | Multi-agent team runner with durable coordination |
|
|
249
379
|
|
|
250
380
|
See the [docs/](docs/) folder for detailed guides:
|
|
251
381
|
|
|
252
382
|
- [Architecture](docs/architecture.md) — how the three packages compose
|
|
253
|
-
- [Workflow Internals](docs/workflow-internals.md) — the
|
|
254
|
-
- [
|
|
383
|
+
- [Workflow Internals](docs/durability/workflow-internals.md) — the 5-activity decomposition
|
|
384
|
+
- [Durable Tool Approvals](docs/hitl/durable-tool-approvals.md) — how approval middleware pauses and resumes Dapr workflows
|
|
385
|
+
- [Durable Human Input](docs/hitl/durable-human-input.md) — how the built-in `question` tool pauses and resumes Dapr workflows
|
|
386
|
+
- [API Reference](docs/api-reference.md) — all exported types and functions, including event streaming
|
|
255
387
|
- [Advanced Patterns](docs/advanced-patterns.md) — cross-service invocation, custom observers, etc.
|
|
256
388
|
|
|
257
389
|
## Runtime Boundary
|
|
258
390
|
|
|
259
391
|
The package layering is:
|
|
260
392
|
|
|
261
|
-
- `agent-core`: agent turn/task semantics
|
|
393
|
+
- `agent-core`: agent turn/task semantics, EventBus interface, AgentSignal
|
|
262
394
|
- `agent-runtime`: generic workload orchestration contract
|
|
263
|
-
- `agent-runtime-dapr`: Dapr-backed implementation of that contract
|
|
395
|
+
- `agent-runtime-dapr`: Dapr-backed implementation of that contract, plus `DaprPubSubEventBridge` for multi-instance event fan-out
|
|
264
396
|
|
|
265
397
|
`agent-runtime-dapr` integrates with those lower layers in two different ways:
|
|
266
398
|
|
|
267
399
|
- outer workload path: it uses `agent-runtime` to schedule, dispatch, retry,
|
|
268
400
|
and observe jobs
|
|
269
|
-
- inner durable turn path: it uses `agent-core`
|
|
401
|
+
- inner durable turn path: it uses `agent-core` execution primitives to split one
|
|
270
402
|
agent turn into durable workflow activities such as `model-step`,
|
|
271
403
|
`tool-call`, `step-commit`, and `output-commit`
|
|
272
404
|
|
|
@@ -284,12 +416,15 @@ higher-level `createDaprAgentRunner(...)`.
|
|
|
284
416
|
|
|
285
417
|
The [`examples/`](examples/) directory has complete, runnable scripts:
|
|
286
418
|
|
|
287
|
-
| Script |
|
|
288
|
-
|
|
289
|
-
| [`simple-agent.ts`](examples/simple-agent.ts) |
|
|
290
|
-
| [`coding-agent.ts`](examples/coding-agent.ts) |
|
|
291
|
-
| [`multi-agent.ts`](examples/multi-agent.ts) |
|
|
292
|
-
| [`
|
|
419
|
+
| Script | Description |
|
|
420
|
+
|--------|-------------|
|
|
421
|
+
| [`01-simple-agent.ts`](examples/01-simple-agent.ts) | Minimal agent with one tool |
|
|
422
|
+
| [`02-coding-agent.ts`](examples/02-coding-agent.ts) | File-system tools via `@cuylabs/agent-code` |
|
|
423
|
+
| [`03-multi-agent.ts`](examples/03-multi-agent.ts) | Two agents in one process |
|
|
424
|
+
| [`04-crash-recovery.ts`](examples/04-crash-recovery.ts) | Process crash mid-turn, Dapr auto-resumes |
|
|
425
|
+
| [`05-tracing-zipkin.ts`](examples/05-tracing-zipkin.ts) | OpenTelemetry tracing → Zipkin |
|
|
426
|
+
| [`06-tracing-phoenix.ts`](examples/06-tracing-phoenix.ts) | OpenTelemetry tracing → Arize Phoenix |
|
|
427
|
+
| [`07-maintenance-host.ts`](examples/07-maintenance-host.ts) | Retention jobs + Prometheus metrics |
|
|
293
428
|
|
|
294
429
|
See the [examples README](examples/README.md) for step-by-step setup and usage.
|
|
295
430
|
|
|
@@ -302,7 +437,7 @@ See the [examples README](examples/README.md) for step-by-step setup and usage.
|
|
|
302
437
|
- `GET /ready` and `GET /readyz` report runtime, worker, sidecar, and state-store readiness
|
|
303
438
|
- Dapr Jobs API calls are isolated behind an internal adapter so scheduler changes stay local to the Dapr package
|
|
304
439
|
- Use `DaprExecutionStore.cleanup(...)` and `DaprOrchestratorRunStore.cleanup(...)` to enforce retention budgets
|
|
305
|
-
- For a concrete operational service, see [`examples/maintenance-host.ts`](examples/maintenance-host.ts)
|
|
440
|
+
- For a concrete operational service, see [`examples/07-maintenance-host.ts`](examples/07-maintenance-host.ts)
|
|
306
441
|
- For containers: run one sidecar per app process, point `daprHttpEndpoint` at the local sidecar
|
|
307
442
|
|
|
308
443
|
## License
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
2
|
DaprSidecarClient,
|
|
3
3
|
isDaprConflictError
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-MQJ4LZOX.js";
|
|
5
5
|
|
|
6
6
|
// src/execution/store.ts
|
|
7
7
|
var DEFAULT_KEY_PREFIX = "agent-runtime:execution:";
|
|
@@ -96,6 +96,57 @@ var DaprExecutionStore = class {
|
|
|
96
96
|
"keyPrefix"
|
|
97
97
|
);
|
|
98
98
|
}
|
|
99
|
+
// ── ExecutionStore interface (generic) ───────────────────────────────
|
|
100
|
+
async get(sessionId) {
|
|
101
|
+
const record = await this.getExecution(sessionId);
|
|
102
|
+
return record ? toGenericRunRecord(record) : void 0;
|
|
103
|
+
}
|
|
104
|
+
async list(options) {
|
|
105
|
+
const records = await this.listExecutions();
|
|
106
|
+
let filtered = records;
|
|
107
|
+
if (options?.status) {
|
|
108
|
+
const statuses = Array.isArray(options.status) ? options.status : [options.status];
|
|
109
|
+
filtered = filtered.filter((r) => statuses.includes(r.status));
|
|
110
|
+
}
|
|
111
|
+
if (options?.limit !== void 0) {
|
|
112
|
+
filtered = filtered.slice(0, options.limit);
|
|
113
|
+
}
|
|
114
|
+
return filtered.map(toGenericRunRecord);
|
|
115
|
+
}
|
|
116
|
+
async listGenericCheckpoints(sessionId) {
|
|
117
|
+
const records = await this.listCheckpoints(sessionId);
|
|
118
|
+
return records.map(toGenericCheckpointRecord);
|
|
119
|
+
}
|
|
120
|
+
async remove(sessionId) {
|
|
121
|
+
const existing = await this.getExecution(sessionId);
|
|
122
|
+
if (!existing) return false;
|
|
123
|
+
await this.deleteExecutionRecord(sessionId);
|
|
124
|
+
return true;
|
|
125
|
+
}
|
|
126
|
+
// ── Resume helper ────────────────────────────────────────────────────
|
|
127
|
+
/**
|
|
128
|
+
* Build a resume snapshot from a persisted execution record.
|
|
129
|
+
*
|
|
130
|
+
* Returns `undefined` if no execution exists for the session or the
|
|
131
|
+
* execution is already in a terminal state (completed/failed).
|
|
132
|
+
*
|
|
133
|
+
* The returned snapshot can be passed as `context.restoreFrom` to
|
|
134
|
+
* `createAgentTaskRunner(...)` to resume the direct-path execution
|
|
135
|
+
* from where it left off.
|
|
136
|
+
*/
|
|
137
|
+
async buildResumeSnapshot(sessionId) {
|
|
138
|
+
const record = await this.getExecution(sessionId);
|
|
139
|
+
if (!record || record.status !== "running") return void 0;
|
|
140
|
+
return {
|
|
141
|
+
response: record.snapshot.response,
|
|
142
|
+
usage: { ...record.snapshot.usage },
|
|
143
|
+
toolCalls: record.snapshot.toolCalls.map((tc) => ({ ...tc })),
|
|
144
|
+
step: record.snapshot.activeStep ?? 0,
|
|
145
|
+
eventCount: record.snapshot.eventCount,
|
|
146
|
+
startedAt: record.startedAt
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
// ── Dapr-specific methods (rich types) ───────────────────────────────
|
|
99
150
|
async getExecution(sessionId) {
|
|
100
151
|
const value = await this.client.getState(
|
|
101
152
|
this.stateKeyForExecution(sessionId)
|
|
@@ -147,9 +198,10 @@ var DaprExecutionStore = class {
|
|
|
147
198
|
do {
|
|
148
199
|
const response = await this.client.queryState({
|
|
149
200
|
filter: {
|
|
150
|
-
|
|
151
|
-
kind: STORED_EXECUTION_CHECKPOINT_KIND
|
|
152
|
-
|
|
201
|
+
AND: [
|
|
202
|
+
{ EQ: { kind: STORED_EXECUTION_CHECKPOINT_KIND } },
|
|
203
|
+
{ EQ: { "checkpoint.sessionId": sessionId } }
|
|
204
|
+
]
|
|
153
205
|
},
|
|
154
206
|
page: {
|
|
155
207
|
limit: 200,
|
|
@@ -193,13 +245,14 @@ var DaprExecutionStore = class {
|
|
|
193
245
|
await this.writeCheckpoint(record);
|
|
194
246
|
await this.addCheckpointToIndex(record.sessionId, record.id).catch(() => {
|
|
195
247
|
});
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
248
|
+
await this.updateExecution(checkpoint.run.sessionId, (current) => {
|
|
249
|
+
const next = current ?? toExecutionRecord(checkpoint.run, checkpoint.snapshot);
|
|
250
|
+
next.updatedAt = checkpoint.snapshot.updatedAt;
|
|
251
|
+
next.lastCheckpointReason = checkpoint.reason;
|
|
252
|
+
next.checkpointCount = (current?.checkpointCount ?? 0) + 1;
|
|
253
|
+
next.snapshot = toSerializableSnapshot(checkpoint.snapshot);
|
|
254
|
+
return next;
|
|
255
|
+
});
|
|
203
256
|
}
|
|
204
257
|
async recordCompletion(run, result, snapshot) {
|
|
205
258
|
const current = await this.getExecution(run.sessionId);
|
|
@@ -403,7 +456,7 @@ var DaprExecutionStore = class {
|
|
|
403
456
|
}
|
|
404
457
|
return present;
|
|
405
458
|
}
|
|
406
|
-
async writeExecution(record) {
|
|
459
|
+
async writeExecution(record, etag) {
|
|
407
460
|
const envelope = {
|
|
408
461
|
kind: STORED_EXECUTION_KIND,
|
|
409
462
|
version: STORED_EXECUTION_VERSION,
|
|
@@ -411,9 +464,32 @@ var DaprExecutionStore = class {
|
|
|
411
464
|
};
|
|
412
465
|
await this.client.saveState(
|
|
413
466
|
this.stateKeyForExecution(record.sessionId),
|
|
414
|
-
envelope
|
|
467
|
+
envelope,
|
|
468
|
+
etag ? { etag, concurrency: "first-write" } : {}
|
|
415
469
|
);
|
|
416
470
|
}
|
|
471
|
+
/**
|
|
472
|
+
* Read-modify-write the execution record with optimistic concurrency.
|
|
473
|
+
* Retries on etag conflict up to 4 times.
|
|
474
|
+
*/
|
|
475
|
+
async updateExecution(sessionId, updater) {
|
|
476
|
+
for (let attempt = 0; attempt < DEFAULT_INDEX_UPDATE_RETRIES; attempt += 1) {
|
|
477
|
+
const entry = await this.client.getStateEntry(
|
|
478
|
+
this.stateKeyForExecution(sessionId)
|
|
479
|
+
);
|
|
480
|
+
const current = this.decodeExecution(entry.value);
|
|
481
|
+
const next = updater(current);
|
|
482
|
+
try {
|
|
483
|
+
await this.writeExecution(next, entry.etag);
|
|
484
|
+
return;
|
|
485
|
+
} catch (error) {
|
|
486
|
+
if (isDaprConflictError(error) && attempt + 1 < DEFAULT_INDEX_UPDATE_RETRIES) {
|
|
487
|
+
continue;
|
|
488
|
+
}
|
|
489
|
+
throw error;
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
}
|
|
417
493
|
async writeCheckpoint(record) {
|
|
418
494
|
const envelope = {
|
|
419
495
|
kind: STORED_EXECUTION_CHECKPOINT_KIND,
|
|
@@ -462,6 +538,54 @@ var DaprExecutionStore = class {
|
|
|
462
538
|
return void 0;
|
|
463
539
|
}
|
|
464
540
|
};
|
|
541
|
+
function toGenericRunRecord(record) {
|
|
542
|
+
return {
|
|
543
|
+
sessionId: record.sessionId,
|
|
544
|
+
status: record.status,
|
|
545
|
+
startedAt: record.startedAt,
|
|
546
|
+
updatedAt: record.updatedAt,
|
|
547
|
+
completedAt: record.completedAt,
|
|
548
|
+
checkpointCount: record.checkpointCount,
|
|
549
|
+
lastCheckpointReason: record.lastCheckpointReason,
|
|
550
|
+
snapshot: {
|
|
551
|
+
sessionId: record.snapshot.sessionId,
|
|
552
|
+
response: record.snapshot.response,
|
|
553
|
+
usage: { ...record.snapshot.usage },
|
|
554
|
+
toolCalls: record.snapshot.toolCalls.map((tc) => ({ ...tc })),
|
|
555
|
+
eventCount: record.snapshot.eventCount,
|
|
556
|
+
activeStep: record.snapshot.activeStep,
|
|
557
|
+
error: record.snapshot.error,
|
|
558
|
+
startedAt: record.snapshot.startedAt,
|
|
559
|
+
updatedAt: record.snapshot.updatedAt
|
|
560
|
+
},
|
|
561
|
+
result: record.result ? {
|
|
562
|
+
response: record.result.response,
|
|
563
|
+
sessionId: record.result.sessionId,
|
|
564
|
+
usage: { ...record.result.usage },
|
|
565
|
+
toolCalls: record.result.toolCalls.map((tc) => ({ ...tc }))
|
|
566
|
+
} : void 0,
|
|
567
|
+
error: record.error ? { ...record.error } : void 0
|
|
568
|
+
};
|
|
569
|
+
}
|
|
570
|
+
function toGenericCheckpointRecord(record) {
|
|
571
|
+
return {
|
|
572
|
+
id: record.id,
|
|
573
|
+
sessionId: record.sessionId,
|
|
574
|
+
reason: record.reason,
|
|
575
|
+
snapshot: {
|
|
576
|
+
sessionId: record.snapshot.sessionId,
|
|
577
|
+
response: record.snapshot.response,
|
|
578
|
+
usage: { ...record.snapshot.usage },
|
|
579
|
+
toolCalls: record.snapshot.toolCalls.map((tc) => ({ ...tc })),
|
|
580
|
+
eventCount: record.snapshot.eventCount,
|
|
581
|
+
activeStep: record.snapshot.activeStep,
|
|
582
|
+
error: record.snapshot.error,
|
|
583
|
+
startedAt: record.snapshot.startedAt,
|
|
584
|
+
updatedAt: record.snapshot.updatedAt
|
|
585
|
+
},
|
|
586
|
+
createdAt: record.createdAt
|
|
587
|
+
};
|
|
588
|
+
}
|
|
465
589
|
|
|
466
590
|
// src/execution/observer.ts
|
|
467
591
|
var DaprExecutionObserver = class {
|
|
@@ -546,11 +670,12 @@ var EMPTY_USAGE = {
|
|
|
546
670
|
outputTokens: 0,
|
|
547
671
|
totalTokens: 0
|
|
548
672
|
};
|
|
549
|
-
function buildRun(state, payload, trigger) {
|
|
673
|
+
function buildRun(state, payload, trigger, executionId) {
|
|
550
674
|
return {
|
|
551
675
|
payload,
|
|
552
676
|
context: { trigger },
|
|
553
677
|
sessionId: state.sessionId,
|
|
678
|
+
executionId: executionId ?? `${state.sessionId}:${state.startedAt}`,
|
|
554
679
|
startedAt: state.startedAt
|
|
555
680
|
};
|
|
556
681
|
}
|
|
@@ -558,7 +683,7 @@ function collectToolCalls(state) {
|
|
|
558
683
|
const toolCalls = [];
|
|
559
684
|
for (const msg of state.messages) {
|
|
560
685
|
if (msg.role === "tool") {
|
|
561
|
-
toolCalls.push({ name: msg.toolName, result: msg.
|
|
686
|
+
toolCalls.push({ name: msg.toolName, result: msg.result });
|
|
562
687
|
}
|
|
563
688
|
}
|
|
564
689
|
return toolCalls;
|
|
@@ -569,16 +694,16 @@ function buildSnapshot(state) {
|
|
|
569
694
|
response: state.finalResponse ?? state.lastModelStep?.text ?? "",
|
|
570
695
|
usage: state.usage ?? EMPTY_USAGE,
|
|
571
696
|
toolCalls: collectToolCalls(state),
|
|
572
|
-
eventCount:
|
|
697
|
+
eventCount: state.turnState?.eventCount ?? state.messages.length,
|
|
573
698
|
activeStep: state.step,
|
|
574
699
|
startedAt: state.startedAt,
|
|
575
700
|
updatedAt: state.updatedAt,
|
|
576
701
|
turnState: state.turnState ?? {}
|
|
577
702
|
};
|
|
578
703
|
}
|
|
579
|
-
function buildCheckpoint(reason, state, payload, trigger) {
|
|
704
|
+
function buildCheckpoint(reason, state, payload, trigger, executionId) {
|
|
580
705
|
return {
|
|
581
|
-
run: buildRun(state, payload, trigger),
|
|
706
|
+
run: buildRun(state, payload, trigger, executionId),
|
|
582
707
|
reason,
|
|
583
708
|
snapshot: buildSnapshot(state),
|
|
584
709
|
createdAt: state.updatedAt
|
|
@@ -596,6 +721,7 @@ function createWorkflowObserverBridge(options) {
|
|
|
596
721
|
const { observers } = options;
|
|
597
722
|
let payload = options.payload;
|
|
598
723
|
const trigger = options.trigger ?? "workflow";
|
|
724
|
+
const executionId = options.executionId;
|
|
599
725
|
if (observers.length === 0) {
|
|
600
726
|
return {
|
|
601
727
|
async notifyTaskStart() {
|
|
@@ -618,16 +744,22 @@ function createWorkflowObserverBridge(options) {
|
|
|
618
744
|
async notifyTaskStart(state) {
|
|
619
745
|
if (taskStarted) return;
|
|
620
746
|
taskStarted = true;
|
|
621
|
-
const run = buildRun(state, payload, trigger);
|
|
747
|
+
const run = buildRun(state, payload, trigger, executionId);
|
|
622
748
|
const snapshot = buildSnapshot(state);
|
|
623
749
|
await notifyAll(observers, (o) => o.onTaskStart?.(run, snapshot));
|
|
624
750
|
},
|
|
625
751
|
async notifyCheckpoint(reason, state) {
|
|
626
|
-
const checkpoint = buildCheckpoint(
|
|
752
|
+
const checkpoint = buildCheckpoint(
|
|
753
|
+
reason,
|
|
754
|
+
state,
|
|
755
|
+
payload,
|
|
756
|
+
trigger,
|
|
757
|
+
executionId
|
|
758
|
+
);
|
|
627
759
|
await notifyAll(observers, (o) => o.onCheckpoint?.(checkpoint));
|
|
628
760
|
},
|
|
629
761
|
async notifyTaskComplete(state) {
|
|
630
|
-
const run = buildRun(state, payload, trigger);
|
|
762
|
+
const run = buildRun(state, payload, trigger, executionId);
|
|
631
763
|
const snapshot = buildSnapshot(state);
|
|
632
764
|
const result = {
|
|
633
765
|
response: state.finalResponse ?? "",
|
|
@@ -641,16 +773,19 @@ function createWorkflowObserverBridge(options) {
|
|
|
641
773
|
);
|
|
642
774
|
},
|
|
643
775
|
async notifyTaskError(state, error) {
|
|
644
|
-
const run = buildRun(state, payload, trigger);
|
|
776
|
+
const run = buildRun(state, payload, trigger, executionId);
|
|
645
777
|
const snapshot = buildSnapshot(state);
|
|
646
778
|
await notifyAll(observers, (o) => o.onTaskError?.(run, error, snapshot));
|
|
647
779
|
},
|
|
648
780
|
updatePayload(newPayload) {
|
|
649
781
|
payload = newPayload;
|
|
650
782
|
},
|
|
651
|
-
getOtelContext(sessionId) {
|
|
783
|
+
getOtelContext(sessionId, currentExecutionId) {
|
|
652
784
|
for (const observer of observers) {
|
|
653
|
-
const ctx = observer.getOtelContext?.(
|
|
785
|
+
const ctx = observer.getOtelContext?.(
|
|
786
|
+
sessionId,
|
|
787
|
+
currentExecutionId ?? executionId
|
|
788
|
+
);
|
|
654
789
|
if (ctx !== void 0) return ctx;
|
|
655
790
|
}
|
|
656
791
|
return void 0;
|
|
@@ -659,6 +794,9 @@ function createWorkflowObserverBridge(options) {
|
|
|
659
794
|
}
|
|
660
795
|
|
|
661
796
|
// src/execution/telemetry.ts
|
|
797
|
+
import {
|
|
798
|
+
DEFAULT_AGENT_NAME
|
|
799
|
+
} from "@cuylabs/agent-core";
|
|
662
800
|
var _otel = null;
|
|
663
801
|
function oiMime(v) {
|
|
664
802
|
const t = v.trimStart();
|
|
@@ -674,7 +812,7 @@ async function getOtel() {
|
|
|
674
812
|
}
|
|
675
813
|
}
|
|
676
814
|
function createOtelObserver(config = {}) {
|
|
677
|
-
const agentName = config.agentName ??
|
|
815
|
+
const agentName = config.agentName ?? DEFAULT_AGENT_NAME;
|
|
678
816
|
const spanTimeoutMs = config.spanTimeoutMs ?? 5 * 60 * 1e3;
|
|
679
817
|
const turnSpans = /* @__PURE__ */ new Map();
|
|
680
818
|
let otel = null;
|
|
@@ -693,9 +831,13 @@ function createOtelObserver(config = {}) {
|
|
|
693
831
|
"gen_ai.usage.output_tokens": usage.outputTokens ?? 0
|
|
694
832
|
};
|
|
695
833
|
}
|
|
834
|
+
function makeSpanKey(sessionId, executionId) {
|
|
835
|
+
return executionId ?? sessionId;
|
|
836
|
+
}
|
|
696
837
|
return {
|
|
697
838
|
async onTaskStart(run, _snapshot) {
|
|
698
|
-
const
|
|
839
|
+
const key = makeSpanKey(run.sessionId, run.executionId);
|
|
840
|
+
const existing = turnSpans.get(key);
|
|
699
841
|
if (existing) {
|
|
700
842
|
const inputVal2 = run.payload.message.slice(0, 4096);
|
|
701
843
|
existing.span.setAttributes({
|
|
@@ -724,20 +866,22 @@ function createOtelObserver(config = {}) {
|
|
|
724
866
|
});
|
|
725
867
|
const ctx = otel.trace.setSpan(otel.context.active(), span);
|
|
726
868
|
const timer = setTimeout(() => {
|
|
727
|
-
const entry = turnSpans.get(
|
|
869
|
+
const entry = turnSpans.get(key);
|
|
728
870
|
if (entry) {
|
|
729
871
|
entry.span.setStatus({
|
|
730
872
|
code: otel?.SpanStatusCode.ERROR ?? 2,
|
|
731
873
|
message: "Span timed out (possible leak \u2014 task never completed)"
|
|
732
874
|
});
|
|
733
875
|
entry.span.end();
|
|
734
|
-
turnSpans.delete(
|
|
876
|
+
turnSpans.delete(key);
|
|
735
877
|
}
|
|
736
878
|
}, spanTimeoutMs);
|
|
737
|
-
turnSpans.set(
|
|
879
|
+
turnSpans.set(key, { span, ctx, timer });
|
|
738
880
|
},
|
|
739
881
|
onCheckpoint(checkpoint) {
|
|
740
|
-
const entry = turnSpans.get(
|
|
882
|
+
const entry = turnSpans.get(
|
|
883
|
+
makeSpanKey(checkpoint.run.sessionId, checkpoint.run.executionId)
|
|
884
|
+
);
|
|
741
885
|
if (!entry) return;
|
|
742
886
|
const reason = checkpoint.reason;
|
|
743
887
|
const attrs = {
|
|
@@ -758,7 +902,8 @@ function createOtelObserver(config = {}) {
|
|
|
758
902
|
entry.span.addEvent(`agent.checkpoint.${reason}`, attrs);
|
|
759
903
|
},
|
|
760
904
|
onTaskComplete(run, result, _snapshot) {
|
|
761
|
-
const
|
|
905
|
+
const key = makeSpanKey(run.sessionId, run.executionId);
|
|
906
|
+
const entry = turnSpans.get(key);
|
|
762
907
|
if (!entry) return;
|
|
763
908
|
if (entry.timer) clearTimeout(entry.timer);
|
|
764
909
|
entry.span.setAttributes({
|
|
@@ -773,10 +918,11 @@ function createOtelObserver(config = {}) {
|
|
|
773
918
|
}
|
|
774
919
|
entry.span.setStatus({ code: otel?.SpanStatusCode.OK ?? 1 });
|
|
775
920
|
entry.span.end();
|
|
776
|
-
turnSpans.delete(
|
|
921
|
+
turnSpans.delete(key);
|
|
777
922
|
},
|
|
778
923
|
onTaskError(run, error, snapshot) {
|
|
779
|
-
const
|
|
924
|
+
const key = makeSpanKey(run.sessionId, run.executionId);
|
|
925
|
+
const entry = turnSpans.get(key);
|
|
780
926
|
if (!entry) return;
|
|
781
927
|
if (entry.timer) clearTimeout(entry.timer);
|
|
782
928
|
entry.span.setAttributes(getUsageAttrs(snapshot.usage));
|
|
@@ -786,13 +932,13 @@ function createOtelObserver(config = {}) {
|
|
|
786
932
|
});
|
|
787
933
|
entry.span.recordException(error);
|
|
788
934
|
entry.span.end();
|
|
789
|
-
turnSpans.delete(
|
|
935
|
+
turnSpans.delete(key);
|
|
790
936
|
},
|
|
791
|
-
getOtelContext(sessionId) {
|
|
792
|
-
return turnSpans.get(sessionId)?.ctx;
|
|
937
|
+
getOtelContext(sessionId, executionId) {
|
|
938
|
+
return turnSpans.get(makeSpanKey(sessionId, executionId))?.ctx;
|
|
793
939
|
},
|
|
794
|
-
activateContext(sessionId, fn) {
|
|
795
|
-
const entry = turnSpans.get(sessionId);
|
|
940
|
+
activateContext(sessionId, executionId, fn) {
|
|
941
|
+
const entry = turnSpans.get(makeSpanKey(sessionId, executionId));
|
|
796
942
|
if (!entry?.ctx || !otel) return fn();
|
|
797
943
|
return otel.context.with(entry.ctx, fn);
|
|
798
944
|
}
|